diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2008-02-04 10:58:03 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-02-04 10:58:03 -0500 |
commit | 9853832c49dc1685587abeb4e1decd4be690d256 (patch) | |
tree | 13510327f85b8d8c238728b47ae74e0fa5299e17 | |
parent | b21761ff18c0eba67e8f2886b3c0b9cae79b5249 (diff) | |
parent | ab1f16116527e42dec8aee176d673a41a881b809 (diff) |
Merge branch 'locks' of git://linux-nfs.org/~bfields/linux
* 'locks' of git://linux-nfs.org/~bfields/linux:
pid-namespaces-vs-locks-interaction
file locks: Use wait_event_interruptible_timeout()
locks: clarify posix_locks_deadlock
-rw-r--r-- | fs/locks.c | 125 | ||||
-rw-r--r-- | include/linux/fs.h | 1 |
2 files changed, 65 insertions, 61 deletions
diff --git a/fs/locks.c b/fs/locks.c index 8b8388eca05e..49354b9c7dc1 100644 --- a/fs/locks.c +++ b/fs/locks.c | |||
@@ -125,6 +125,7 @@ | |||
125 | #include <linux/syscalls.h> | 125 | #include <linux/syscalls.h> |
126 | #include <linux/time.h> | 126 | #include <linux/time.h> |
127 | #include <linux/rcupdate.h> | 127 | #include <linux/rcupdate.h> |
128 | #include <linux/pid_namespace.h> | ||
128 | 129 | ||
129 | #include <asm/semaphore.h> | 130 | #include <asm/semaphore.h> |
130 | #include <asm/uaccess.h> | 131 | #include <asm/uaccess.h> |
@@ -185,6 +186,7 @@ void locks_init_lock(struct file_lock *fl) | |||
185 | fl->fl_fasync = NULL; | 186 | fl->fl_fasync = NULL; |
186 | fl->fl_owner = NULL; | 187 | fl->fl_owner = NULL; |
187 | fl->fl_pid = 0; | 188 | fl->fl_pid = 0; |
189 | fl->fl_nspid = NULL; | ||
188 | fl->fl_file = NULL; | 190 | fl->fl_file = NULL; |
189 | fl->fl_flags = 0; | 191 | fl->fl_flags = 0; |
190 | fl->fl_type = 0; | 192 | fl->fl_type = 0; |
@@ -553,6 +555,8 @@ static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl) | |||
553 | { | 555 | { |
554 | list_add(&fl->fl_link, &file_lock_list); | 556 | list_add(&fl->fl_link, &file_lock_list); |
555 | 557 | ||
558 | fl->fl_nspid = get_pid(task_tgid(current)); | ||
559 | |||
556 | /* insert into file's list */ | 560 | /* insert into file's list */ |
557 | fl->fl_next = *pos; | 561 | fl->fl_next = *pos; |
558 | *pos = fl; | 562 | *pos = fl; |
@@ -584,6 +588,11 @@ static void locks_delete_lock(struct file_lock **thisfl_p) | |||
584 | if (fl->fl_ops && fl->fl_ops->fl_remove) | 588 | if (fl->fl_ops && fl->fl_ops->fl_remove) |
585 | fl->fl_ops->fl_remove(fl); | 589 | fl->fl_ops->fl_remove(fl); |
586 | 590 | ||
591 | if (fl->fl_nspid) { | ||
592 | put_pid(fl->fl_nspid); | ||
593 | fl->fl_nspid = NULL; | ||
594 | } | ||
595 | |||
587 | locks_wake_up_blocks(fl); | 596 | locks_wake_up_blocks(fl); |
588 | locks_free_lock(fl); | 597 | locks_free_lock(fl); |
589 | } | 598 | } |
@@ -634,33 +643,6 @@ static int flock_locks_conflict(struct file_lock *caller_fl, struct file_lock *s | |||
634 | return (locks_conflict(caller_fl, sys_fl)); | 643 | return (locks_conflict(caller_fl, sys_fl)); |
635 | } | 644 | } |
636 | 645 | ||
637 | static int interruptible_sleep_on_locked(wait_queue_head_t *fl_wait, int timeout) | ||
638 | { | ||
639 | int result = 0; | ||
640 | DECLARE_WAITQUEUE(wait, current); | ||
641 | |||
642 | __set_current_state(TASK_INTERRUPTIBLE); | ||
643 | add_wait_queue(fl_wait, &wait); | ||
644 | if (timeout == 0) | ||
645 | schedule(); | ||
646 | else | ||
647 | result = schedule_timeout(timeout); | ||
648 | if (signal_pending(current)) | ||
649 | result = -ERESTARTSYS; | ||
650 | remove_wait_queue(fl_wait, &wait); | ||
651 | __set_current_state(TASK_RUNNING); | ||
652 | return result; | ||
653 | } | ||
654 | |||
655 | static int locks_block_on_timeout(struct file_lock *blocker, struct file_lock *waiter, int time) | ||
656 | { | ||
657 | int result; | ||
658 | locks_insert_block(blocker, waiter); | ||
659 | result = interruptible_sleep_on_locked(&waiter->fl_wait, time); | ||
660 | __locks_delete_block(waiter); | ||
661 | return result; | ||
662 | } | ||
663 | |||
664 | void | 646 | void |
665 | posix_test_lock(struct file *filp, struct file_lock *fl) | 647 | posix_test_lock(struct file *filp, struct file_lock *fl) |
666 | { | 648 | { |
@@ -673,55 +655,67 @@ posix_test_lock(struct file *filp, struct file_lock *fl) | |||
673 | if (posix_locks_conflict(fl, cfl)) | 655 | if (posix_locks_conflict(fl, cfl)) |
674 | break; | 656 | break; |
675 | } | 657 | } |
676 | if (cfl) | 658 | if (cfl) { |
677 | __locks_copy_lock(fl, cfl); | 659 | __locks_copy_lock(fl, cfl); |
678 | else | 660 | if (cfl->fl_nspid) |
661 | fl->fl_pid = pid_nr_ns(cfl->fl_nspid, | ||
662 | task_active_pid_ns(current)); | ||
663 | } else | ||
679 | fl->fl_type = F_UNLCK; | 664 | fl->fl_type = F_UNLCK; |
680 | unlock_kernel(); | 665 | unlock_kernel(); |
681 | return; | 666 | return; |
682 | } | 667 | } |
683 | |||
684 | EXPORT_SYMBOL(posix_test_lock); | 668 | EXPORT_SYMBOL(posix_test_lock); |
685 | 669 | ||
686 | /* This function tests for deadlock condition before putting a process to | 670 | /* |
687 | * sleep. The detection scheme is no longer recursive. Recursive was neat, | 671 | * Deadlock detection: |
688 | * but dangerous - we risked stack corruption if the lock data was bad, or | 672 | * |
689 | * if the recursion was too deep for any other reason. | 673 | * We attempt to detect deadlocks that are due purely to posix file |
674 | * locks. | ||
690 | * | 675 | * |
691 | * We rely on the fact that a task can only be on one lock's wait queue | 676 | * We assume that a task can be waiting for at most one lock at a time. |
692 | * at a time. When we find blocked_task on a wait queue we can re-search | 677 | * So for any acquired lock, the process holding that lock may be |
693 | * with blocked_task equal to that queue's owner, until either blocked_task | 678 | * waiting on at most one other lock. That lock in turns may be held by |
694 | * isn't found, or blocked_task is found on a queue owned by my_task. | 679 | * someone waiting for at most one other lock. Given a requested lock |
680 | * caller_fl which is about to wait for a conflicting lock block_fl, we | ||
681 | * follow this chain of waiters to ensure we are not about to create a | ||
682 | * cycle. | ||
695 | * | 683 | * |
696 | * Note: the above assumption may not be true when handling lock requests | 684 | * Since we do this before we ever put a process to sleep on a lock, we |
697 | * from a broken NFS client. But broken NFS clients have a lot more to | 685 | * are ensured that there is never a cycle; that is what guarantees that |
698 | * worry about than proper deadlock detection anyway... --okir | 686 | * the while() loop in posix_locks_deadlock() eventually completes. |
699 | * | 687 | * |
700 | * However, the failure of this assumption (also possible in the case of | 688 | * Note: the above assumption may not be true when handling lock |
701 | * multiple tasks sharing the same open file table) also means there's no | 689 | * requests from a broken NFS client. It may also fail in the presence |
702 | * guarantee that the loop below will terminate. As a hack, we give up | 690 | * of tasks (such as posix threads) sharing the same open file table. |
703 | * after a few iterations. | 691 | * |
692 | * To handle those cases, we just bail out after a few iterations. | ||
704 | */ | 693 | */ |
705 | 694 | ||
706 | #define MAX_DEADLK_ITERATIONS 10 | 695 | #define MAX_DEADLK_ITERATIONS 10 |
707 | 696 | ||
697 | /* Find a lock that the owner of the given block_fl is blocking on. */ | ||
698 | static struct file_lock *what_owner_is_waiting_for(struct file_lock *block_fl) | ||
699 | { | ||
700 | struct file_lock *fl; | ||
701 | |||
702 | list_for_each_entry(fl, &blocked_list, fl_link) { | ||
703 | if (posix_same_owner(fl, block_fl)) | ||
704 | return fl->fl_next; | ||
705 | } | ||
706 | return NULL; | ||
707 | } | ||
708 | |||
708 | static int posix_locks_deadlock(struct file_lock *caller_fl, | 709 | static int posix_locks_deadlock(struct file_lock *caller_fl, |
709 | struct file_lock *block_fl) | 710 | struct file_lock *block_fl) |
710 | { | 711 | { |
711 | struct file_lock *fl; | ||
712 | int i = 0; | 712 | int i = 0; |
713 | 713 | ||
714 | next_task: | 714 | while ((block_fl = what_owner_is_waiting_for(block_fl))) { |
715 | if (posix_same_owner(caller_fl, block_fl)) | 715 | if (i++ > MAX_DEADLK_ITERATIONS) |
716 | return 1; | 716 | return 0; |
717 | list_for_each_entry(fl, &blocked_list, fl_link) { | 717 | if (posix_same_owner(caller_fl, block_fl)) |
718 | if (posix_same_owner(fl, block_fl)) { | 718 | return 1; |
719 | if (i++ > MAX_DEADLK_ITERATIONS) | ||
720 | return 0; | ||
721 | fl = fl->fl_next; | ||
722 | block_fl = fl; | ||
723 | goto next_task; | ||
724 | } | ||
725 | } | 719 | } |
726 | return 0; | 720 | return 0; |
727 | } | 721 | } |
@@ -1256,7 +1250,10 @@ restart: | |||
1256 | if (break_time == 0) | 1250 | if (break_time == 0) |
1257 | break_time++; | 1251 | break_time++; |
1258 | } | 1252 | } |
1259 | error = locks_block_on_timeout(flock, new_fl, break_time); | 1253 | locks_insert_block(flock, new_fl); |
1254 | error = wait_event_interruptible_timeout(new_fl->fl_wait, | ||
1255 | !new_fl->fl_next, break_time); | ||
1256 | __locks_delete_block(new_fl); | ||
1260 | if (error >= 0) { | 1257 | if (error >= 0) { |
1261 | if (error == 0) | 1258 | if (error == 0) |
1262 | time_out_leases(inode); | 1259 | time_out_leases(inode); |
@@ -2084,6 +2081,12 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl, | |||
2084 | int id, char *pfx) | 2081 | int id, char *pfx) |
2085 | { | 2082 | { |
2086 | struct inode *inode = NULL; | 2083 | struct inode *inode = NULL; |
2084 | unsigned int fl_pid; | ||
2085 | |||
2086 | if (fl->fl_nspid) | ||
2087 | fl_pid = pid_nr_ns(fl->fl_nspid, task_active_pid_ns(current)); | ||
2088 | else | ||
2089 | fl_pid = fl->fl_pid; | ||
2087 | 2090 | ||
2088 | if (fl->fl_file != NULL) | 2091 | if (fl->fl_file != NULL) |
2089 | inode = fl->fl_file->f_path.dentry->d_inode; | 2092 | inode = fl->fl_file->f_path.dentry->d_inode; |
@@ -2124,16 +2127,16 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl, | |||
2124 | } | 2127 | } |
2125 | if (inode) { | 2128 | if (inode) { |
2126 | #ifdef WE_CAN_BREAK_LSLK_NOW | 2129 | #ifdef WE_CAN_BREAK_LSLK_NOW |
2127 | seq_printf(f, "%d %s:%ld ", fl->fl_pid, | 2130 | seq_printf(f, "%d %s:%ld ", fl_pid, |
2128 | inode->i_sb->s_id, inode->i_ino); | 2131 | inode->i_sb->s_id, inode->i_ino); |
2129 | #else | 2132 | #else |
2130 | /* userspace relies on this representation of dev_t ;-( */ | 2133 | /* userspace relies on this representation of dev_t ;-( */ |
2131 | seq_printf(f, "%d %02x:%02x:%ld ", fl->fl_pid, | 2134 | seq_printf(f, "%d %02x:%02x:%ld ", fl_pid, |
2132 | MAJOR(inode->i_sb->s_dev), | 2135 | MAJOR(inode->i_sb->s_dev), |
2133 | MINOR(inode->i_sb->s_dev), inode->i_ino); | 2136 | MINOR(inode->i_sb->s_dev), inode->i_ino); |
2134 | #endif | 2137 | #endif |
2135 | } else { | 2138 | } else { |
2136 | seq_printf(f, "%d <none>:0 ", fl->fl_pid); | 2139 | seq_printf(f, "%d <none>:0 ", fl_pid); |
2137 | } | 2140 | } |
2138 | if (IS_POSIX(fl)) { | 2141 | if (IS_POSIX(fl)) { |
2139 | if (fl->fl_end == OFFSET_MAX) | 2142 | if (fl->fl_end == OFFSET_MAX) |
diff --git a/include/linux/fs.h b/include/linux/fs.h index a516b6716870..b7736ab8bb5e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -872,6 +872,7 @@ struct file_lock { | |||
872 | struct list_head fl_block; /* circular list of blocked processes */ | 872 | struct list_head fl_block; /* circular list of blocked processes */ |
873 | fl_owner_t fl_owner; | 873 | fl_owner_t fl_owner; |
874 | unsigned int fl_pid; | 874 | unsigned int fl_pid; |
875 | struct pid *fl_nspid; | ||
875 | wait_queue_head_t fl_wait; | 876 | wait_queue_head_t fl_wait; |
876 | struct file *fl_file; | 877 | struct file *fl_file; |
877 | unsigned char fl_flags; | 878 | unsigned char fl_flags; |