diff options
| -rw-r--r-- | fs/locks.c | 125 | ||||
| -rw-r--r-- | include/linux/fs.h | 1 |
2 files changed, 65 insertions, 61 deletions
diff --git a/fs/locks.c b/fs/locks.c index 8b8388eca05e..49354b9c7dc1 100644 --- a/fs/locks.c +++ b/fs/locks.c | |||
| @@ -125,6 +125,7 @@ | |||
| 125 | #include <linux/syscalls.h> | 125 | #include <linux/syscalls.h> |
| 126 | #include <linux/time.h> | 126 | #include <linux/time.h> |
| 127 | #include <linux/rcupdate.h> | 127 | #include <linux/rcupdate.h> |
| 128 | #include <linux/pid_namespace.h> | ||
| 128 | 129 | ||
| 129 | #include <asm/semaphore.h> | 130 | #include <asm/semaphore.h> |
| 130 | #include <asm/uaccess.h> | 131 | #include <asm/uaccess.h> |
| @@ -185,6 +186,7 @@ void locks_init_lock(struct file_lock *fl) | |||
| 185 | fl->fl_fasync = NULL; | 186 | fl->fl_fasync = NULL; |
| 186 | fl->fl_owner = NULL; | 187 | fl->fl_owner = NULL; |
| 187 | fl->fl_pid = 0; | 188 | fl->fl_pid = 0; |
| 189 | fl->fl_nspid = NULL; | ||
| 188 | fl->fl_file = NULL; | 190 | fl->fl_file = NULL; |
| 189 | fl->fl_flags = 0; | 191 | fl->fl_flags = 0; |
| 190 | fl->fl_type = 0; | 192 | fl->fl_type = 0; |
| @@ -553,6 +555,8 @@ static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl) | |||
| 553 | { | 555 | { |
| 554 | list_add(&fl->fl_link, &file_lock_list); | 556 | list_add(&fl->fl_link, &file_lock_list); |
| 555 | 557 | ||
| 558 | fl->fl_nspid = get_pid(task_tgid(current)); | ||
| 559 | |||
| 556 | /* insert into file's list */ | 560 | /* insert into file's list */ |
| 557 | fl->fl_next = *pos; | 561 | fl->fl_next = *pos; |
| 558 | *pos = fl; | 562 | *pos = fl; |
| @@ -584,6 +588,11 @@ static void locks_delete_lock(struct file_lock **thisfl_p) | |||
| 584 | if (fl->fl_ops && fl->fl_ops->fl_remove) | 588 | if (fl->fl_ops && fl->fl_ops->fl_remove) |
| 585 | fl->fl_ops->fl_remove(fl); | 589 | fl->fl_ops->fl_remove(fl); |
| 586 | 590 | ||
| 591 | if (fl->fl_nspid) { | ||
| 592 | put_pid(fl->fl_nspid); | ||
| 593 | fl->fl_nspid = NULL; | ||
| 594 | } | ||
| 595 | |||
| 587 | locks_wake_up_blocks(fl); | 596 | locks_wake_up_blocks(fl); |
| 588 | locks_free_lock(fl); | 597 | locks_free_lock(fl); |
| 589 | } | 598 | } |
| @@ -634,33 +643,6 @@ static int flock_locks_conflict(struct file_lock *caller_fl, struct file_lock *s | |||
| 634 | return (locks_conflict(caller_fl, sys_fl)); | 643 | return (locks_conflict(caller_fl, sys_fl)); |
| 635 | } | 644 | } |
| 636 | 645 | ||
| 637 | static int interruptible_sleep_on_locked(wait_queue_head_t *fl_wait, int timeout) | ||
| 638 | { | ||
| 639 | int result = 0; | ||
| 640 | DECLARE_WAITQUEUE(wait, current); | ||
| 641 | |||
| 642 | __set_current_state(TASK_INTERRUPTIBLE); | ||
| 643 | add_wait_queue(fl_wait, &wait); | ||
| 644 | if (timeout == 0) | ||
| 645 | schedule(); | ||
| 646 | else | ||
| 647 | result = schedule_timeout(timeout); | ||
| 648 | if (signal_pending(current)) | ||
| 649 | result = -ERESTARTSYS; | ||
| 650 | remove_wait_queue(fl_wait, &wait); | ||
| 651 | __set_current_state(TASK_RUNNING); | ||
| 652 | return result; | ||
| 653 | } | ||
| 654 | |||
| 655 | static int locks_block_on_timeout(struct file_lock *blocker, struct file_lock *waiter, int time) | ||
| 656 | { | ||
| 657 | int result; | ||
| 658 | locks_insert_block(blocker, waiter); | ||
| 659 | result = interruptible_sleep_on_locked(&waiter->fl_wait, time); | ||
| 660 | __locks_delete_block(waiter); | ||
| 661 | return result; | ||
| 662 | } | ||
| 663 | |||
| 664 | void | 646 | void |
| 665 | posix_test_lock(struct file *filp, struct file_lock *fl) | 647 | posix_test_lock(struct file *filp, struct file_lock *fl) |
| 666 | { | 648 | { |
| @@ -673,55 +655,67 @@ posix_test_lock(struct file *filp, struct file_lock *fl) | |||
| 673 | if (posix_locks_conflict(fl, cfl)) | 655 | if (posix_locks_conflict(fl, cfl)) |
| 674 | break; | 656 | break; |
| 675 | } | 657 | } |
| 676 | if (cfl) | 658 | if (cfl) { |
| 677 | __locks_copy_lock(fl, cfl); | 659 | __locks_copy_lock(fl, cfl); |
| 678 | else | 660 | if (cfl->fl_nspid) |
| 661 | fl->fl_pid = pid_nr_ns(cfl->fl_nspid, | ||
| 662 | task_active_pid_ns(current)); | ||
| 663 | } else | ||
| 679 | fl->fl_type = F_UNLCK; | 664 | fl->fl_type = F_UNLCK; |
| 680 | unlock_kernel(); | 665 | unlock_kernel(); |
| 681 | return; | 666 | return; |
| 682 | } | 667 | } |
| 683 | |||
| 684 | EXPORT_SYMBOL(posix_test_lock); | 668 | EXPORT_SYMBOL(posix_test_lock); |
| 685 | 669 | ||
| 686 | /* This function tests for deadlock condition before putting a process to | 670 | /* |
| 687 | * sleep. The detection scheme is no longer recursive. Recursive was neat, | 671 | * Deadlock detection: |
| 688 | * but dangerous - we risked stack corruption if the lock data was bad, or | 672 | * |
| 689 | * if the recursion was too deep for any other reason. | 673 | * We attempt to detect deadlocks that are due purely to posix file |
| 674 | * locks. | ||
| 690 | * | 675 | * |
| 691 | * We rely on the fact that a task can only be on one lock's wait queue | 676 | * We assume that a task can be waiting for at most one lock at a time. |
| 692 | * at a time. When we find blocked_task on a wait queue we can re-search | 677 | * So for any acquired lock, the process holding that lock may be |
| 693 | * with blocked_task equal to that queue's owner, until either blocked_task | 678 | * waiting on at most one other lock. That lock in turns may be held by |
| 694 | * isn't found, or blocked_task is found on a queue owned by my_task. | 679 | * someone waiting for at most one other lock. Given a requested lock |
| 680 | * caller_fl which is about to wait for a conflicting lock block_fl, we | ||
| 681 | * follow this chain of waiters to ensure we are not about to create a | ||
| 682 | * cycle. | ||
| 695 | * | 683 | * |
| 696 | * Note: the above assumption may not be true when handling lock requests | 684 | * Since we do this before we ever put a process to sleep on a lock, we |
| 697 | * from a broken NFS client. But broken NFS clients have a lot more to | 685 | * are ensured that there is never a cycle; that is what guarantees that |
| 698 | * worry about than proper deadlock detection anyway... --okir | 686 | * the while() loop in posix_locks_deadlock() eventually completes. |
| 699 | * | 687 | * |
| 700 | * However, the failure of this assumption (also possible in the case of | 688 | * Note: the above assumption may not be true when handling lock |
| 701 | * multiple tasks sharing the same open file table) also means there's no | 689 | * requests from a broken NFS client. It may also fail in the presence |
| 702 | * guarantee that the loop below will terminate. As a hack, we give up | 690 | * of tasks (such as posix threads) sharing the same open file table. |
| 703 | * after a few iterations. | 691 | * |
| 692 | * To handle those cases, we just bail out after a few iterations. | ||
| 704 | */ | 693 | */ |
| 705 | 694 | ||
| 706 | #define MAX_DEADLK_ITERATIONS 10 | 695 | #define MAX_DEADLK_ITERATIONS 10 |
| 707 | 696 | ||
| 697 | /* Find a lock that the owner of the given block_fl is blocking on. */ | ||
| 698 | static struct file_lock *what_owner_is_waiting_for(struct file_lock *block_fl) | ||
| 699 | { | ||
| 700 | struct file_lock *fl; | ||
| 701 | |||
| 702 | list_for_each_entry(fl, &blocked_list, fl_link) { | ||
| 703 | if (posix_same_owner(fl, block_fl)) | ||
| 704 | return fl->fl_next; | ||
| 705 | } | ||
| 706 | return NULL; | ||
| 707 | } | ||
| 708 | |||
| 708 | static int posix_locks_deadlock(struct file_lock *caller_fl, | 709 | static int posix_locks_deadlock(struct file_lock *caller_fl, |
| 709 | struct file_lock *block_fl) | 710 | struct file_lock *block_fl) |
| 710 | { | 711 | { |
| 711 | struct file_lock *fl; | ||
| 712 | int i = 0; | 712 | int i = 0; |
| 713 | 713 | ||
| 714 | next_task: | 714 | while ((block_fl = what_owner_is_waiting_for(block_fl))) { |
| 715 | if (posix_same_owner(caller_fl, block_fl)) | 715 | if (i++ > MAX_DEADLK_ITERATIONS) |
| 716 | return 1; | 716 | return 0; |
| 717 | list_for_each_entry(fl, &blocked_list, fl_link) { | 717 | if (posix_same_owner(caller_fl, block_fl)) |
| 718 | if (posix_same_owner(fl, block_fl)) { | 718 | return 1; |
| 719 | if (i++ > MAX_DEADLK_ITERATIONS) | ||
| 720 | return 0; | ||
| 721 | fl = fl->fl_next; | ||
| 722 | block_fl = fl; | ||
| 723 | goto next_task; | ||
| 724 | } | ||
| 725 | } | 719 | } |
| 726 | return 0; | 720 | return 0; |
| 727 | } | 721 | } |
| @@ -1256,7 +1250,10 @@ restart: | |||
| 1256 | if (break_time == 0) | 1250 | if (break_time == 0) |
| 1257 | break_time++; | 1251 | break_time++; |
| 1258 | } | 1252 | } |
| 1259 | error = locks_block_on_timeout(flock, new_fl, break_time); | 1253 | locks_insert_block(flock, new_fl); |
| 1254 | error = wait_event_interruptible_timeout(new_fl->fl_wait, | ||
| 1255 | !new_fl->fl_next, break_time); | ||
| 1256 | __locks_delete_block(new_fl); | ||
| 1260 | if (error >= 0) { | 1257 | if (error >= 0) { |
| 1261 | if (error == 0) | 1258 | if (error == 0) |
| 1262 | time_out_leases(inode); | 1259 | time_out_leases(inode); |
| @@ -2084,6 +2081,12 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl, | |||
| 2084 | int id, char *pfx) | 2081 | int id, char *pfx) |
| 2085 | { | 2082 | { |
| 2086 | struct inode *inode = NULL; | 2083 | struct inode *inode = NULL; |
| 2084 | unsigned int fl_pid; | ||
| 2085 | |||
| 2086 | if (fl->fl_nspid) | ||
| 2087 | fl_pid = pid_nr_ns(fl->fl_nspid, task_active_pid_ns(current)); | ||
| 2088 | else | ||
| 2089 | fl_pid = fl->fl_pid; | ||
| 2087 | 2090 | ||
| 2088 | if (fl->fl_file != NULL) | 2091 | if (fl->fl_file != NULL) |
| 2089 | inode = fl->fl_file->f_path.dentry->d_inode; | 2092 | inode = fl->fl_file->f_path.dentry->d_inode; |
| @@ -2124,16 +2127,16 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl, | |||
| 2124 | } | 2127 | } |
| 2125 | if (inode) { | 2128 | if (inode) { |
| 2126 | #ifdef WE_CAN_BREAK_LSLK_NOW | 2129 | #ifdef WE_CAN_BREAK_LSLK_NOW |
| 2127 | seq_printf(f, "%d %s:%ld ", fl->fl_pid, | 2130 | seq_printf(f, "%d %s:%ld ", fl_pid, |
| 2128 | inode->i_sb->s_id, inode->i_ino); | 2131 | inode->i_sb->s_id, inode->i_ino); |
| 2129 | #else | 2132 | #else |
| 2130 | /* userspace relies on this representation of dev_t ;-( */ | 2133 | /* userspace relies on this representation of dev_t ;-( */ |
| 2131 | seq_printf(f, "%d %02x:%02x:%ld ", fl->fl_pid, | 2134 | seq_printf(f, "%d %02x:%02x:%ld ", fl_pid, |
| 2132 | MAJOR(inode->i_sb->s_dev), | 2135 | MAJOR(inode->i_sb->s_dev), |
| 2133 | MINOR(inode->i_sb->s_dev), inode->i_ino); | 2136 | MINOR(inode->i_sb->s_dev), inode->i_ino); |
| 2134 | #endif | 2137 | #endif |
| 2135 | } else { | 2138 | } else { |
| 2136 | seq_printf(f, "%d <none>:0 ", fl->fl_pid); | 2139 | seq_printf(f, "%d <none>:0 ", fl_pid); |
| 2137 | } | 2140 | } |
| 2138 | if (IS_POSIX(fl)) { | 2141 | if (IS_POSIX(fl)) { |
| 2139 | if (fl->fl_end == OFFSET_MAX) | 2142 | if (fl->fl_end == OFFSET_MAX) |
diff --git a/include/linux/fs.h b/include/linux/fs.h index a516b6716870..b7736ab8bb5e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
| @@ -872,6 +872,7 @@ struct file_lock { | |||
| 872 | struct list_head fl_block; /* circular list of blocked processes */ | 872 | struct list_head fl_block; /* circular list of blocked processes */ |
| 873 | fl_owner_t fl_owner; | 873 | fl_owner_t fl_owner; |
| 874 | unsigned int fl_pid; | 874 | unsigned int fl_pid; |
| 875 | struct pid *fl_nspid; | ||
| 875 | wait_queue_head_t fl_wait; | 876 | wait_queue_head_t fl_wait; |
| 876 | struct file *fl_file; | 877 | struct file *fl_file; |
| 877 | unsigned char fl_flags; | 878 | unsigned char fl_flags; |
