diff options
author | Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp> | 2009-05-10 09:41:43 -0400 |
---|---|---|
committer | Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp> | 2009-05-11 01:54:41 -0400 |
commit | 4f6b828837b4e3836f2c9ac2f0eab9773b6c1327 (patch) | |
tree | 9baa69b0ef44a0c604be831300fd5215c0355be0 | |
parent | 47eb6b9c8fa963c9f49967ad1d9d7ec947d15b68 (diff) |
nilfs2: fix lock order reversal in nilfs_clean_segments ioctl
This is a companion patch to ("nilfs2: fix possible circular locking
for get information ioctls").
This corrects lock order reversal between mm->mmap_sem and
nilfs->ns_segctor_sem in nilfs_clean_segments() which was detected by
lockdep check:
=======================================================
[ INFO: possible circular locking dependency detected ]
2.6.30-rc3-nilfs-00003-g360bdc1 #7
-------------------------------------------------------
mmap/5294 is trying to acquire lock:
(&nilfs->ns_segctor_sem){++++.+}, at: [<d0d0e846>] nilfs_transaction_begin+0xb6/0x10c [nilfs2]
but task is already holding lock:
(&mm->mmap_sem){++++++}, at: [<c043700a>] do_page_fault+0x1d8/0x30a
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #1 (&mm->mmap_sem){++++++}:
[<c01470a5>] __lock_acquire+0x1066/0x13b0
[<c01474a9>] lock_acquire+0xba/0xdd
[<c01836bc>] might_fault+0x68/0x88
[<c023c61d>] copy_from_user+0x2a/0x111
[<d0d120d0>] nilfs_ioctl_prepare_clean_segments+0x1d/0xf1 [nilfs2]
[<d0d0e2aa>] nilfs_clean_segments+0x6d/0x1b9 [nilfs2]
[<d0d11f68>] nilfs_ioctl+0x2ad/0x318 [nilfs2]
[<c01a3be7>] vfs_ioctl+0x22/0x69
[<c01a408e>] do_vfs_ioctl+0x460/0x499
[<c01a4107>] sys_ioctl+0x40/0x5a
[<c01031a4>] sysenter_do_call+0x12/0x38
[<ffffffff>] 0xffffffff
-> #0 (&nilfs->ns_segctor_sem){++++.+}:
[<c0146e0b>] __lock_acquire+0xdcc/0x13b0
[<c01474a9>] lock_acquire+0xba/0xdd
[<c0433f1d>] down_read+0x2a/0x3e
[<d0d0e846>] nilfs_transaction_begin+0xb6/0x10c [nilfs2]
[<d0cfe0e5>] nilfs_page_mkwrite+0xe7/0x154 [nilfs2]
[<c0183b0b>] __do_fault+0x165/0x376
[<c01855cd>] handle_mm_fault+0x287/0x5d1
[<c043712d>] do_page_fault+0x2fb/0x30a
[<c0435462>] error_code+0x72/0x78
[<ffffffff>] 0xffffffff
where nilfs_clean_segments() holds:
nilfs->ns_segctor_sem -> copy_from_user()
--> page fault -> mm->mmap_sem
And, page fault path may hold:
page fault -> mm->mmap_sem
--> nilfs_page_mkwrite() -> nilfs->ns_segctor_sem
Even though nilfs_clean_segments() does not perform write access on
given user pages, it may cause deadlock because nilfs->ns_segctor_sem
is shared per device and mm->mmap_sem can be shared with other tasks.
To avoid this problem, this patch moves all calls of copy_from_user()
outside the nilfs->ns_segctor_sem lock in the ioctl.
Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
-rw-r--r-- | fs/nilfs2/ioctl.c | 163 | ||||
-rw-r--r-- | fs/nilfs2/nilfs.h | 3 | ||||
-rw-r--r-- | fs/nilfs2/segment.c | 5 | ||||
-rw-r--r-- | fs/nilfs2/segment.h | 3 |
4 files changed, 100 insertions, 74 deletions
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index e3c693d37d69..49489f68eabe 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/smp_lock.h> /* lock_kernel(), unlock_kernel() */ | 25 | #include <linux/smp_lock.h> /* lock_kernel(), unlock_kernel() */ |
26 | #include <linux/capability.h> /* capable() */ | 26 | #include <linux/capability.h> /* capable() */ |
27 | #include <linux/uaccess.h> /* copy_from_user(), copy_to_user() */ | 27 | #include <linux/uaccess.h> /* copy_from_user(), copy_to_user() */ |
28 | #include <linux/vmalloc.h> | ||
28 | #include <linux/nilfs2_fs.h> | 29 | #include <linux/nilfs2_fs.h> |
29 | #include "nilfs.h" | 30 | #include "nilfs.h" |
30 | #include "segment.h" | 31 | #include "segment.h" |
@@ -297,10 +298,10 @@ static int nilfs_ioctl_move_inode_block(struct inode *inode, | |||
297 | return 0; | 298 | return 0; |
298 | } | 299 | } |
299 | 300 | ||
300 | static ssize_t | 301 | static int nilfs_ioctl_move_blocks(struct the_nilfs *nilfs, |
301 | nilfs_ioctl_do_move_blocks(struct the_nilfs *nilfs, __u64 *posp, int flags, | 302 | struct nilfs_argv *argv, void *buf) |
302 | void *buf, size_t size, size_t nmembs) | ||
303 | { | 303 | { |
304 | size_t nmembs = argv->v_nmembs; | ||
304 | struct inode *inode; | 305 | struct inode *inode; |
305 | struct nilfs_vdesc *vdesc; | 306 | struct nilfs_vdesc *vdesc; |
306 | struct buffer_head *bh, *n; | 307 | struct buffer_head *bh, *n; |
@@ -361,19 +362,10 @@ nilfs_ioctl_do_move_blocks(struct the_nilfs *nilfs, __u64 *posp, int flags, | |||
361 | return ret; | 362 | return ret; |
362 | } | 363 | } |
363 | 364 | ||
364 | static inline int nilfs_ioctl_move_blocks(struct the_nilfs *nilfs, | 365 | static int nilfs_ioctl_delete_checkpoints(struct the_nilfs *nilfs, |
365 | struct nilfs_argv *argv, | 366 | struct nilfs_argv *argv, void *buf) |
366 | int dir) | ||
367 | { | ||
368 | return nilfs_ioctl_wrap_copy(nilfs, argv, dir, | ||
369 | nilfs_ioctl_do_move_blocks); | ||
370 | } | ||
371 | |||
372 | static ssize_t | ||
373 | nilfs_ioctl_do_delete_checkpoints(struct the_nilfs *nilfs, __u64 *posp, | ||
374 | int flags, void *buf, size_t size, | ||
375 | size_t nmembs) | ||
376 | { | 367 | { |
368 | size_t nmembs = argv->v_nmembs; | ||
377 | struct inode *cpfile = nilfs->ns_cpfile; | 369 | struct inode *cpfile = nilfs->ns_cpfile; |
378 | struct nilfs_period *periods = buf; | 370 | struct nilfs_period *periods = buf; |
379 | int ret, i; | 371 | int ret, i; |
@@ -387,36 +379,21 @@ nilfs_ioctl_do_delete_checkpoints(struct the_nilfs *nilfs, __u64 *posp, | |||
387 | return nmembs; | 379 | return nmembs; |
388 | } | 380 | } |
389 | 381 | ||
390 | static inline int nilfs_ioctl_delete_checkpoints(struct the_nilfs *nilfs, | 382 | static int nilfs_ioctl_free_vblocknrs(struct the_nilfs *nilfs, |
391 | struct nilfs_argv *argv, | 383 | struct nilfs_argv *argv, void *buf) |
392 | int dir) | ||
393 | { | 384 | { |
394 | return nilfs_ioctl_wrap_copy(nilfs, argv, dir, | 385 | size_t nmembs = argv->v_nmembs; |
395 | nilfs_ioctl_do_delete_checkpoints); | 386 | int ret; |
396 | } | ||
397 | 387 | ||
398 | static ssize_t | 388 | ret = nilfs_dat_freev(nilfs_dat_inode(nilfs), buf, nmembs); |
399 | nilfs_ioctl_do_free_vblocknrs(struct the_nilfs *nilfs, __u64 *posp, int flags, | ||
400 | void *buf, size_t size, size_t nmembs) | ||
401 | { | ||
402 | int ret = nilfs_dat_freev(nilfs_dat_inode(nilfs), buf, nmembs); | ||
403 | 389 | ||
404 | return (ret < 0) ? ret : nmembs; | 390 | return (ret < 0) ? ret : nmembs; |
405 | } | 391 | } |
406 | 392 | ||
407 | static inline int nilfs_ioctl_free_vblocknrs(struct the_nilfs *nilfs, | 393 | static int nilfs_ioctl_mark_blocks_dirty(struct the_nilfs *nilfs, |
408 | struct nilfs_argv *argv, | 394 | struct nilfs_argv *argv, void *buf) |
409 | int dir) | ||
410 | { | ||
411 | return nilfs_ioctl_wrap_copy(nilfs, argv, dir, | ||
412 | nilfs_ioctl_do_free_vblocknrs); | ||
413 | } | ||
414 | |||
415 | static ssize_t | ||
416 | nilfs_ioctl_do_mark_blocks_dirty(struct the_nilfs *nilfs, __u64 *posp, | ||
417 | int flags, void *buf, size_t size, | ||
418 | size_t nmembs) | ||
419 | { | 395 | { |
396 | size_t nmembs = argv->v_nmembs; | ||
420 | struct inode *dat = nilfs_dat_inode(nilfs); | 397 | struct inode *dat = nilfs_dat_inode(nilfs); |
421 | struct nilfs_bmap *bmap = NILFS_I(dat)->i_bmap; | 398 | struct nilfs_bmap *bmap = NILFS_I(dat)->i_bmap; |
422 | struct nilfs_bdesc *bdescs = buf; | 399 | struct nilfs_bdesc *bdescs = buf; |
@@ -455,18 +432,10 @@ nilfs_ioctl_do_mark_blocks_dirty(struct the_nilfs *nilfs, __u64 *posp, | |||
455 | return nmembs; | 432 | return nmembs; |
456 | } | 433 | } |
457 | 434 | ||
458 | static inline int nilfs_ioctl_mark_blocks_dirty(struct the_nilfs *nilfs, | 435 | static int nilfs_ioctl_free_segments(struct the_nilfs *nilfs, |
459 | struct nilfs_argv *argv, | 436 | struct nilfs_argv *argv, void *buf) |
460 | int dir) | ||
461 | { | ||
462 | return nilfs_ioctl_wrap_copy(nilfs, argv, dir, | ||
463 | nilfs_ioctl_do_mark_blocks_dirty); | ||
464 | } | ||
465 | |||
466 | static ssize_t | ||
467 | nilfs_ioctl_do_free_segments(struct the_nilfs *nilfs, __u64 *posp, int flags, | ||
468 | void *buf, size_t size, size_t nmembs) | ||
469 | { | 437 | { |
438 | size_t nmembs = argv->v_nmembs; | ||
470 | struct nilfs_sb_info *sbi = nilfs->ns_writer; | 439 | struct nilfs_sb_info *sbi = nilfs->ns_writer; |
471 | int ret; | 440 | int ret; |
472 | 441 | ||
@@ -481,31 +450,19 @@ nilfs_ioctl_do_free_segments(struct the_nilfs *nilfs, __u64 *posp, int flags, | |||
481 | return (ret < 0) ? ret : nmembs; | 450 | return (ret < 0) ? ret : nmembs; |
482 | } | 451 | } |
483 | 452 | ||
484 | static inline int nilfs_ioctl_free_segments(struct the_nilfs *nilfs, | ||
485 | struct nilfs_argv *argv, | ||
486 | int dir) | ||
487 | { | ||
488 | return nilfs_ioctl_wrap_copy(nilfs, argv, dir, | ||
489 | nilfs_ioctl_do_free_segments); | ||
490 | } | ||
491 | |||
492 | int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs, | 453 | int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs, |
493 | void __user *argp) | 454 | struct nilfs_argv *argv, void **kbufs) |
494 | { | 455 | { |
495 | struct nilfs_argv argv[5]; | ||
496 | const char *msg; | 456 | const char *msg; |
497 | int dir, ret; | 457 | int ret; |
498 | |||
499 | if (copy_from_user(argv, argp, sizeof(argv))) | ||
500 | return -EFAULT; | ||
501 | 458 | ||
502 | dir = _IOC_WRITE; | 459 | ret = nilfs_ioctl_move_blocks(nilfs, &argv[0], kbufs[0]); |
503 | ret = nilfs_ioctl_move_blocks(nilfs, &argv[0], dir); | ||
504 | if (ret < 0) { | 460 | if (ret < 0) { |
505 | msg = "cannot read source blocks"; | 461 | msg = "cannot read source blocks"; |
506 | goto failed; | 462 | goto failed; |
507 | } | 463 | } |
508 | ret = nilfs_ioctl_delete_checkpoints(nilfs, &argv[1], dir); | 464 | |
465 | ret = nilfs_ioctl_delete_checkpoints(nilfs, &argv[1], kbufs[1]); | ||
509 | if (ret < 0) { | 466 | if (ret < 0) { |
510 | /* | 467 | /* |
511 | * can safely abort because checkpoints can be removed | 468 | * can safely abort because checkpoints can be removed |
@@ -514,7 +471,7 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs, | |||
514 | msg = "cannot delete checkpoints"; | 471 | msg = "cannot delete checkpoints"; |
515 | goto failed; | 472 | goto failed; |
516 | } | 473 | } |
517 | ret = nilfs_ioctl_free_vblocknrs(nilfs, &argv[2], dir); | 474 | ret = nilfs_ioctl_free_vblocknrs(nilfs, &argv[2], kbufs[2]); |
518 | if (ret < 0) { | 475 | if (ret < 0) { |
519 | /* | 476 | /* |
520 | * can safely abort because DAT file is updated atomically | 477 | * can safely abort because DAT file is updated atomically |
@@ -523,7 +480,7 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs, | |||
523 | msg = "cannot delete virtual blocks from DAT file"; | 480 | msg = "cannot delete virtual blocks from DAT file"; |
524 | goto failed; | 481 | goto failed; |
525 | } | 482 | } |
526 | ret = nilfs_ioctl_mark_blocks_dirty(nilfs, &argv[3], dir); | 483 | ret = nilfs_ioctl_mark_blocks_dirty(nilfs, &argv[3], kbufs[3]); |
527 | if (ret < 0) { | 484 | if (ret < 0) { |
528 | /* | 485 | /* |
529 | * can safely abort because the operation is nondestructive. | 486 | * can safely abort because the operation is nondestructive. |
@@ -531,7 +488,7 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs, | |||
531 | msg = "cannot mark copying blocks dirty"; | 488 | msg = "cannot mark copying blocks dirty"; |
532 | goto failed; | 489 | goto failed; |
533 | } | 490 | } |
534 | ret = nilfs_ioctl_free_segments(nilfs, &argv[4], dir); | 491 | ret = nilfs_ioctl_free_segments(nilfs, &argv[4], kbufs[4]); |
535 | if (ret < 0) { | 492 | if (ret < 0) { |
536 | /* | 493 | /* |
537 | * can safely abort because this operation is atomic. | 494 | * can safely abort because this operation is atomic. |
@@ -551,9 +508,75 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs, | |||
551 | static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, | 508 | static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, |
552 | unsigned int cmd, void __user *argp) | 509 | unsigned int cmd, void __user *argp) |
553 | { | 510 | { |
511 | struct nilfs_argv argv[5]; | ||
512 | const static size_t argsz[5] = { | ||
513 | sizeof(struct nilfs_vdesc), | ||
514 | sizeof(struct nilfs_period), | ||
515 | sizeof(__u64), | ||
516 | sizeof(struct nilfs_bdesc), | ||
517 | sizeof(__u64), | ||
518 | }; | ||
519 | void __user *base; | ||
520 | void *kbufs[5]; | ||
521 | struct the_nilfs *nilfs; | ||
522 | size_t len, nsegs; | ||
523 | int n, ret; | ||
524 | |||
554 | if (!capable(CAP_SYS_ADMIN)) | 525 | if (!capable(CAP_SYS_ADMIN)) |
555 | return -EPERM; | 526 | return -EPERM; |
556 | return nilfs_clean_segments(inode->i_sb, argp); | 527 | |
528 | if (copy_from_user(argv, argp, sizeof(argv))) | ||
529 | return -EFAULT; | ||
530 | |||
531 | nsegs = argv[4].v_nmembs; | ||
532 | if (argv[4].v_size != argsz[4]) | ||
533 | return -EINVAL; | ||
534 | /* | ||
535 | * argv[4] points to segment numbers this ioctl cleans. We | ||
536 | * use kmalloc() for its buffer because memory used for the | ||
537 | * segment numbers is enough small. | ||
538 | */ | ||
539 | kbufs[4] = memdup_user((void __user *)(unsigned long)argv[4].v_base, | ||
540 | nsegs * sizeof(__u64)); | ||
541 | if (IS_ERR(kbufs[4])) | ||
542 | return PTR_ERR(kbufs[4]); | ||
543 | |||
544 | nilfs = NILFS_SB(inode->i_sb)->s_nilfs; | ||
545 | |||
546 | for (n = 0; n < 4; n++) { | ||
547 | ret = -EINVAL; | ||
548 | if (argv[n].v_size != argsz[n]) | ||
549 | goto out_free; | ||
550 | |||
551 | if (argv[n].v_nmembs > nsegs * nilfs->ns_blocks_per_segment) | ||
552 | goto out_free; | ||
553 | |||
554 | len = argv[n].v_size * argv[n].v_nmembs; | ||
555 | base = (void __user *)(unsigned long)argv[n].v_base; | ||
556 | if (len == 0) { | ||
557 | kbufs[n] = NULL; | ||
558 | continue; | ||
559 | } | ||
560 | |||
561 | kbufs[n] = vmalloc(len); | ||
562 | if (!kbufs[n]) { | ||
563 | ret = -ENOMEM; | ||
564 | goto out_free; | ||
565 | } | ||
566 | if (copy_from_user(kbufs[n], base, len)) { | ||
567 | ret = -EFAULT; | ||
568 | vfree(kbufs[n]); | ||
569 | goto out_free; | ||
570 | } | ||
571 | } | ||
572 | |||
573 | ret = nilfs_clean_segments(inode->i_sb, argv, kbufs); | ||
574 | |||
575 | out_free: | ||
576 | while (--n > 0) | ||
577 | vfree(kbufs[n]); | ||
578 | kfree(kbufs[4]); | ||
579 | return ret; | ||
557 | } | 580 | } |
558 | 581 | ||
559 | static int nilfs_ioctl_sync(struct inode *inode, struct file *filp, | 582 | static int nilfs_ioctl_sync(struct inode *inode, struct file *filp, |
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index 3d0c18a16db1..da6fc0bba2e5 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h | |||
@@ -236,7 +236,8 @@ extern int nilfs_sync_file(struct file *, struct dentry *, int); | |||
236 | 236 | ||
237 | /* ioctl.c */ | 237 | /* ioctl.c */ |
238 | long nilfs_ioctl(struct file *, unsigned int, unsigned long); | 238 | long nilfs_ioctl(struct file *, unsigned int, unsigned long); |
239 | int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *, void __user *); | 239 | int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *, struct nilfs_argv *, |
240 | void **); | ||
240 | 241 | ||
241 | /* inode.c */ | 242 | /* inode.c */ |
242 | extern struct inode *nilfs_new_inode(struct inode *, int); | 243 | extern struct inode *nilfs_new_inode(struct inode *, int); |
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index fb70ec3be20e..22c7f65c2403 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c | |||
@@ -2589,7 +2589,8 @@ nilfs_remove_written_gcinodes(struct the_nilfs *nilfs, struct list_head *head) | |||
2589 | } | 2589 | } |
2590 | } | 2590 | } |
2591 | 2591 | ||
2592 | int nilfs_clean_segments(struct super_block *sb, void __user *argp) | 2592 | int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv, |
2593 | void **kbufs) | ||
2593 | { | 2594 | { |
2594 | struct nilfs_sb_info *sbi = NILFS_SB(sb); | 2595 | struct nilfs_sb_info *sbi = NILFS_SB(sb); |
2595 | struct nilfs_sc_info *sci = NILFS_SC(sbi); | 2596 | struct nilfs_sc_info *sci = NILFS_SC(sbi); |
@@ -2606,7 +2607,7 @@ int nilfs_clean_segments(struct super_block *sb, void __user *argp) | |||
2606 | err = nilfs_init_gcdat_inode(nilfs); | 2607 | err = nilfs_init_gcdat_inode(nilfs); |
2607 | if (unlikely(err)) | 2608 | if (unlikely(err)) |
2608 | goto out_unlock; | 2609 | goto out_unlock; |
2609 | err = nilfs_ioctl_prepare_clean_segments(nilfs, argp); | 2610 | err = nilfs_ioctl_prepare_clean_segments(nilfs, argv, kbufs); |
2610 | if (unlikely(err)) | 2611 | if (unlikely(err)) |
2611 | goto out_unlock; | 2612 | goto out_unlock; |
2612 | 2613 | ||
diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h index a98fc1ed0bbb..476bdd5df5be 100644 --- a/fs/nilfs2/segment.h +++ b/fs/nilfs2/segment.h | |||
@@ -222,7 +222,8 @@ extern int nilfs_construct_segment(struct super_block *); | |||
222 | extern int nilfs_construct_dsync_segment(struct super_block *, struct inode *, | 222 | extern int nilfs_construct_dsync_segment(struct super_block *, struct inode *, |
223 | loff_t, loff_t); | 223 | loff_t, loff_t); |
224 | extern void nilfs_flush_segment(struct super_block *, ino_t); | 224 | extern void nilfs_flush_segment(struct super_block *, ino_t); |
225 | extern int nilfs_clean_segments(struct super_block *, void __user *); | 225 | extern int nilfs_clean_segments(struct super_block *, struct nilfs_argv *, |
226 | void **); | ||
226 | 227 | ||
227 | extern int nilfs_segctor_add_segments_to_be_freed(struct nilfs_sc_info *, | 228 | extern int nilfs_segctor_add_segments_to_be_freed(struct nilfs_sc_info *, |
228 | __u64 *, size_t); | 229 | __u64 *, size_t); |