diff options
Diffstat (limited to 'fs/locks.c')
-rw-r--r-- | fs/locks.c | 328 |
1 files changed, 232 insertions, 96 deletions
diff --git a/fs/locks.c b/fs/locks.c index cb424a4fed71..b27a3005d78d 100644 --- a/fs/locks.c +++ b/fs/locks.c | |||
@@ -126,6 +126,9 @@ | |||
126 | #include <linux/time.h> | 126 | #include <linux/time.h> |
127 | #include <linux/rcupdate.h> | 127 | #include <linux/rcupdate.h> |
128 | #include <linux/pid_namespace.h> | 128 | #include <linux/pid_namespace.h> |
129 | #include <linux/hashtable.h> | ||
130 | #include <linux/percpu.h> | ||
131 | #include <linux/lglock.h> | ||
129 | 132 | ||
130 | #include <asm/uaccess.h> | 133 | #include <asm/uaccess.h> |
131 | 134 | ||
@@ -153,30 +156,53 @@ int lease_break_time = 45; | |||
153 | #define for_each_lock(inode, lockp) \ | 156 | #define for_each_lock(inode, lockp) \ |
154 | for (lockp = &inode->i_flock; *lockp != NULL; lockp = &(*lockp)->fl_next) | 157 | for (lockp = &inode->i_flock; *lockp != NULL; lockp = &(*lockp)->fl_next) |
155 | 158 | ||
156 | static LIST_HEAD(file_lock_list); | 159 | /* |
157 | static LIST_HEAD(blocked_list); | 160 | * The global file_lock_list is only used for displaying /proc/locks, so we |
158 | static DEFINE_SPINLOCK(file_lock_lock); | 161 | * keep a list on each CPU, with each list protected by its own spinlock via |
162 | * the file_lock_lglock. Note that alterations to the list also require that | ||
163 | * the relevant i_lock is held. | ||
164 | */ | ||
165 | DEFINE_STATIC_LGLOCK(file_lock_lglock); | ||
166 | static DEFINE_PER_CPU(struct hlist_head, file_lock_list); | ||
159 | 167 | ||
160 | /* | 168 | /* |
161 | * Protects the two list heads above, plus the inode->i_flock list | 169 | * The blocked_hash is used to find POSIX lock loops for deadlock detection. |
170 | * It is protected by blocked_lock_lock. | ||
171 | * | ||
172 | * We hash locks by lockowner in order to optimize searching for the lock a | ||
173 | * particular lockowner is waiting on. | ||
174 | * | ||
175 | * FIXME: make this value scale via some heuristic? We generally will want more | ||
176 | * buckets when we have more lockowners holding locks, but that's a little | ||
177 | * difficult to determine without knowing what the workload will look like. | ||
162 | */ | 178 | */ |
163 | void lock_flocks(void) | 179 | #define BLOCKED_HASH_BITS 7 |
164 | { | 180 | static DEFINE_HASHTABLE(blocked_hash, BLOCKED_HASH_BITS); |
165 | spin_lock(&file_lock_lock); | ||
166 | } | ||
167 | EXPORT_SYMBOL_GPL(lock_flocks); | ||
168 | 181 | ||
169 | void unlock_flocks(void) | 182 | /* |
170 | { | 183 | * This lock protects the blocked_hash. Generally, if you're accessing it, you |
171 | spin_unlock(&file_lock_lock); | 184 | * want to be holding this lock. |
172 | } | 185 | * |
173 | EXPORT_SYMBOL_GPL(unlock_flocks); | 186 | * In addition, it also protects the fl->fl_block list, and the fl->fl_next |
187 | * pointer for file_lock structures that are acting as lock requests (in | ||
188 | * contrast to those that are acting as records of acquired locks). | ||
189 | * | ||
190 | * Note that when we acquire this lock in order to change the above fields, | ||
191 | * we often hold the i_lock as well. In certain cases, when reading the fields | ||
192 | * protected by this lock, we can skip acquiring it iff we already hold the | ||
193 | * i_lock. | ||
194 | * | ||
195 | * In particular, adding an entry to the fl_block list requires that you hold | ||
196 | * both the i_lock and the blocked_lock_lock (acquired in that order). Deleting | ||
197 | * an entry from the list however only requires the file_lock_lock. | ||
198 | */ | ||
199 | static DEFINE_SPINLOCK(blocked_lock_lock); | ||
174 | 200 | ||
175 | static struct kmem_cache *filelock_cache __read_mostly; | 201 | static struct kmem_cache *filelock_cache __read_mostly; |
176 | 202 | ||
177 | static void locks_init_lock_heads(struct file_lock *fl) | 203 | static void locks_init_lock_heads(struct file_lock *fl) |
178 | { | 204 | { |
179 | INIT_LIST_HEAD(&fl->fl_link); | 205 | INIT_HLIST_NODE(&fl->fl_link); |
180 | INIT_LIST_HEAD(&fl->fl_block); | 206 | INIT_LIST_HEAD(&fl->fl_block); |
181 | init_waitqueue_head(&fl->fl_wait); | 207 | init_waitqueue_head(&fl->fl_wait); |
182 | } | 208 | } |
@@ -210,7 +236,7 @@ void locks_free_lock(struct file_lock *fl) | |||
210 | { | 236 | { |
211 | BUG_ON(waitqueue_active(&fl->fl_wait)); | 237 | BUG_ON(waitqueue_active(&fl->fl_wait)); |
212 | BUG_ON(!list_empty(&fl->fl_block)); | 238 | BUG_ON(!list_empty(&fl->fl_block)); |
213 | BUG_ON(!list_empty(&fl->fl_link)); | 239 | BUG_ON(!hlist_unhashed(&fl->fl_link)); |
214 | 240 | ||
215 | locks_release_private(fl); | 241 | locks_release_private(fl); |
216 | kmem_cache_free(filelock_cache, fl); | 242 | kmem_cache_free(filelock_cache, fl); |
@@ -484,47 +510,118 @@ static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2) | |||
484 | return fl1->fl_owner == fl2->fl_owner; | 510 | return fl1->fl_owner == fl2->fl_owner; |
485 | } | 511 | } |
486 | 512 | ||
513 | /* Must be called with the i_lock held! */ | ||
514 | static inline void | ||
515 | locks_insert_global_locks(struct file_lock *fl) | ||
516 | { | ||
517 | lg_local_lock(&file_lock_lglock); | ||
518 | fl->fl_link_cpu = smp_processor_id(); | ||
519 | hlist_add_head(&fl->fl_link, this_cpu_ptr(&file_lock_list)); | ||
520 | lg_local_unlock(&file_lock_lglock); | ||
521 | } | ||
522 | |||
523 | /* Must be called with the i_lock held! */ | ||
524 | static inline void | ||
525 | locks_delete_global_locks(struct file_lock *fl) | ||
526 | { | ||
527 | /* | ||
528 | * Avoid taking lock if already unhashed. This is safe since this check | ||
529 | * is done while holding the i_lock, and new insertions into the list | ||
530 | * also require that it be held. | ||
531 | */ | ||
532 | if (hlist_unhashed(&fl->fl_link)) | ||
533 | return; | ||
534 | lg_local_lock_cpu(&file_lock_lglock, fl->fl_link_cpu); | ||
535 | hlist_del_init(&fl->fl_link); | ||
536 | lg_local_unlock_cpu(&file_lock_lglock, fl->fl_link_cpu); | ||
537 | } | ||
538 | |||
539 | static unsigned long | ||
540 | posix_owner_key(struct file_lock *fl) | ||
541 | { | ||
542 | if (fl->fl_lmops && fl->fl_lmops->lm_owner_key) | ||
543 | return fl->fl_lmops->lm_owner_key(fl); | ||
544 | return (unsigned long)fl->fl_owner; | ||
545 | } | ||
546 | |||
547 | static inline void | ||
548 | locks_insert_global_blocked(struct file_lock *waiter) | ||
549 | { | ||
550 | hash_add(blocked_hash, &waiter->fl_link, posix_owner_key(waiter)); | ||
551 | } | ||
552 | |||
553 | static inline void | ||
554 | locks_delete_global_blocked(struct file_lock *waiter) | ||
555 | { | ||
556 | hash_del(&waiter->fl_link); | ||
557 | } | ||
558 | |||
487 | /* Remove waiter from blocker's block list. | 559 | /* Remove waiter from blocker's block list. |
488 | * When blocker ends up pointing to itself then the list is empty. | 560 | * When blocker ends up pointing to itself then the list is empty. |
561 | * | ||
562 | * Must be called with blocked_lock_lock held. | ||
489 | */ | 563 | */ |
490 | static void __locks_delete_block(struct file_lock *waiter) | 564 | static void __locks_delete_block(struct file_lock *waiter) |
491 | { | 565 | { |
566 | locks_delete_global_blocked(waiter); | ||
492 | list_del_init(&waiter->fl_block); | 567 | list_del_init(&waiter->fl_block); |
493 | list_del_init(&waiter->fl_link); | ||
494 | waiter->fl_next = NULL; | 568 | waiter->fl_next = NULL; |
495 | } | 569 | } |
496 | 570 | ||
497 | /* | 571 | static void locks_delete_block(struct file_lock *waiter) |
498 | */ | ||
499 | void locks_delete_block(struct file_lock *waiter) | ||
500 | { | 572 | { |
501 | lock_flocks(); | 573 | spin_lock(&blocked_lock_lock); |
502 | __locks_delete_block(waiter); | 574 | __locks_delete_block(waiter); |
503 | unlock_flocks(); | 575 | spin_unlock(&blocked_lock_lock); |
504 | } | 576 | } |
505 | EXPORT_SYMBOL(locks_delete_block); | ||
506 | 577 | ||
507 | /* Insert waiter into blocker's block list. | 578 | /* Insert waiter into blocker's block list. |
508 | * We use a circular list so that processes can be easily woken up in | 579 | * We use a circular list so that processes can be easily woken up in |
509 | * the order they blocked. The documentation doesn't require this but | 580 | * the order they blocked. The documentation doesn't require this but |
510 | * it seems like the reasonable thing to do. | 581 | * it seems like the reasonable thing to do. |
582 | * | ||
583 | * Must be called with both the i_lock and blocked_lock_lock held. The fl_block | ||
584 | * list itself is protected by the file_lock_list, but by ensuring that the | ||
585 | * i_lock is also held on insertions we can avoid taking the blocked_lock_lock | ||
586 | * in some cases when we see that the fl_block list is empty. | ||
511 | */ | 587 | */ |
512 | static void locks_insert_block(struct file_lock *blocker, | 588 | static void __locks_insert_block(struct file_lock *blocker, |
513 | struct file_lock *waiter) | 589 | struct file_lock *waiter) |
514 | { | 590 | { |
515 | BUG_ON(!list_empty(&waiter->fl_block)); | 591 | BUG_ON(!list_empty(&waiter->fl_block)); |
516 | list_add_tail(&waiter->fl_block, &blocker->fl_block); | ||
517 | waiter->fl_next = blocker; | 592 | waiter->fl_next = blocker; |
593 | list_add_tail(&waiter->fl_block, &blocker->fl_block); | ||
518 | if (IS_POSIX(blocker)) | 594 | if (IS_POSIX(blocker)) |
519 | list_add(&waiter->fl_link, &blocked_list); | 595 | locks_insert_global_blocked(waiter); |
520 | } | 596 | } |
521 | 597 | ||
522 | /* Wake up processes blocked waiting for blocker. | 598 | /* Must be called with i_lock held. */ |
523 | * If told to wait then schedule the processes until the block list | 599 | static void locks_insert_block(struct file_lock *blocker, |
524 | * is empty, otherwise empty the block list ourselves. | 600 | struct file_lock *waiter) |
601 | { | ||
602 | spin_lock(&blocked_lock_lock); | ||
603 | __locks_insert_block(blocker, waiter); | ||
604 | spin_unlock(&blocked_lock_lock); | ||
605 | } | ||
606 | |||
607 | /* | ||
608 | * Wake up processes blocked waiting for blocker. | ||
609 | * | ||
610 | * Must be called with the inode->i_lock held! | ||
525 | */ | 611 | */ |
526 | static void locks_wake_up_blocks(struct file_lock *blocker) | 612 | static void locks_wake_up_blocks(struct file_lock *blocker) |
527 | { | 613 | { |
614 | /* | ||
615 | * Avoid taking global lock if list is empty. This is safe since new | ||
616 | * blocked requests are only added to the list under the i_lock, and | ||
617 | * the i_lock is always held here. Note that removal from the fl_block | ||
618 | * list does not require the i_lock, so we must recheck list_empty() | ||
619 | * after acquiring the blocked_lock_lock. | ||
620 | */ | ||
621 | if (list_empty(&blocker->fl_block)) | ||
622 | return; | ||
623 | |||
624 | spin_lock(&blocked_lock_lock); | ||
528 | while (!list_empty(&blocker->fl_block)) { | 625 | while (!list_empty(&blocker->fl_block)) { |
529 | struct file_lock *waiter; | 626 | struct file_lock *waiter; |
530 | 627 | ||
@@ -536,20 +633,23 @@ static void locks_wake_up_blocks(struct file_lock *blocker) | |||
536 | else | 633 | else |
537 | wake_up(&waiter->fl_wait); | 634 | wake_up(&waiter->fl_wait); |
538 | } | 635 | } |
636 | spin_unlock(&blocked_lock_lock); | ||
539 | } | 637 | } |
540 | 638 | ||
541 | /* Insert file lock fl into an inode's lock list at the position indicated | 639 | /* Insert file lock fl into an inode's lock list at the position indicated |
542 | * by pos. At the same time add the lock to the global file lock list. | 640 | * by pos. At the same time add the lock to the global file lock list. |
641 | * | ||
642 | * Must be called with the i_lock held! | ||
543 | */ | 643 | */ |
544 | static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl) | 644 | static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl) |
545 | { | 645 | { |
546 | list_add(&fl->fl_link, &file_lock_list); | ||
547 | |||
548 | fl->fl_nspid = get_pid(task_tgid(current)); | 646 | fl->fl_nspid = get_pid(task_tgid(current)); |
549 | 647 | ||
550 | /* insert into file's list */ | 648 | /* insert into file's list */ |
551 | fl->fl_next = *pos; | 649 | fl->fl_next = *pos; |
552 | *pos = fl; | 650 | *pos = fl; |
651 | |||
652 | locks_insert_global_locks(fl); | ||
553 | } | 653 | } |
554 | 654 | ||
555 | /* | 655 | /* |
@@ -557,14 +657,17 @@ static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl) | |||
557 | * Wake up processes that are blocked waiting for this lock, | 657 | * Wake up processes that are blocked waiting for this lock, |
558 | * notify the FS that the lock has been cleared and | 658 | * notify the FS that the lock has been cleared and |
559 | * finally free the lock. | 659 | * finally free the lock. |
660 | * | ||
661 | * Must be called with the i_lock held! | ||
560 | */ | 662 | */ |
561 | static void locks_delete_lock(struct file_lock **thisfl_p) | 663 | static void locks_delete_lock(struct file_lock **thisfl_p) |
562 | { | 664 | { |
563 | struct file_lock *fl = *thisfl_p; | 665 | struct file_lock *fl = *thisfl_p; |
564 | 666 | ||
667 | locks_delete_global_locks(fl); | ||
668 | |||
565 | *thisfl_p = fl->fl_next; | 669 | *thisfl_p = fl->fl_next; |
566 | fl->fl_next = NULL; | 670 | fl->fl_next = NULL; |
567 | list_del_init(&fl->fl_link); | ||
568 | 671 | ||
569 | if (fl->fl_nspid) { | 672 | if (fl->fl_nspid) { |
570 | put_pid(fl->fl_nspid); | 673 | put_pid(fl->fl_nspid); |
@@ -625,8 +728,9 @@ void | |||
625 | posix_test_lock(struct file *filp, struct file_lock *fl) | 728 | posix_test_lock(struct file *filp, struct file_lock *fl) |
626 | { | 729 | { |
627 | struct file_lock *cfl; | 730 | struct file_lock *cfl; |
731 | struct inode *inode = file_inode(filp); | ||
628 | 732 | ||
629 | lock_flocks(); | 733 | spin_lock(&inode->i_lock); |
630 | for (cfl = file_inode(filp)->i_flock; cfl; cfl = cfl->fl_next) { | 734 | for (cfl = file_inode(filp)->i_flock; cfl; cfl = cfl->fl_next) { |
631 | if (!IS_POSIX(cfl)) | 735 | if (!IS_POSIX(cfl)) |
632 | continue; | 736 | continue; |
@@ -639,7 +743,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl) | |||
639 | fl->fl_pid = pid_vnr(cfl->fl_nspid); | 743 | fl->fl_pid = pid_vnr(cfl->fl_nspid); |
640 | } else | 744 | } else |
641 | fl->fl_type = F_UNLCK; | 745 | fl->fl_type = F_UNLCK; |
642 | unlock_flocks(); | 746 | spin_unlock(&inode->i_lock); |
643 | return; | 747 | return; |
644 | } | 748 | } |
645 | EXPORT_SYMBOL(posix_test_lock); | 749 | EXPORT_SYMBOL(posix_test_lock); |
@@ -676,13 +780,14 @@ static struct file_lock *what_owner_is_waiting_for(struct file_lock *block_fl) | |||
676 | { | 780 | { |
677 | struct file_lock *fl; | 781 | struct file_lock *fl; |
678 | 782 | ||
679 | list_for_each_entry(fl, &blocked_list, fl_link) { | 783 | hash_for_each_possible(blocked_hash, fl, fl_link, posix_owner_key(block_fl)) { |
680 | if (posix_same_owner(fl, block_fl)) | 784 | if (posix_same_owner(fl, block_fl)) |
681 | return fl->fl_next; | 785 | return fl->fl_next; |
682 | } | 786 | } |
683 | return NULL; | 787 | return NULL; |
684 | } | 788 | } |
685 | 789 | ||
790 | /* Must be called with the blocked_lock_lock held! */ | ||
686 | static int posix_locks_deadlock(struct file_lock *caller_fl, | 791 | static int posix_locks_deadlock(struct file_lock *caller_fl, |
687 | struct file_lock *block_fl) | 792 | struct file_lock *block_fl) |
688 | { | 793 | { |
@@ -718,7 +823,7 @@ static int flock_lock_file(struct file *filp, struct file_lock *request) | |||
718 | return -ENOMEM; | 823 | return -ENOMEM; |
719 | } | 824 | } |
720 | 825 | ||
721 | lock_flocks(); | 826 | spin_lock(&inode->i_lock); |
722 | if (request->fl_flags & FL_ACCESS) | 827 | if (request->fl_flags & FL_ACCESS) |
723 | goto find_conflict; | 828 | goto find_conflict; |
724 | 829 | ||
@@ -748,9 +853,9 @@ static int flock_lock_file(struct file *filp, struct file_lock *request) | |||
748 | * give it the opportunity to lock the file. | 853 | * give it the opportunity to lock the file. |
749 | */ | 854 | */ |
750 | if (found) { | 855 | if (found) { |
751 | unlock_flocks(); | 856 | spin_unlock(&inode->i_lock); |
752 | cond_resched(); | 857 | cond_resched(); |
753 | lock_flocks(); | 858 | spin_lock(&inode->i_lock); |
754 | } | 859 | } |
755 | 860 | ||
756 | find_conflict: | 861 | find_conflict: |
@@ -777,7 +882,7 @@ find_conflict: | |||
777 | error = 0; | 882 | error = 0; |
778 | 883 | ||
779 | out: | 884 | out: |
780 | unlock_flocks(); | 885 | spin_unlock(&inode->i_lock); |
781 | if (new_fl) | 886 | if (new_fl) |
782 | locks_free_lock(new_fl); | 887 | locks_free_lock(new_fl); |
783 | return error; | 888 | return error; |
@@ -791,7 +896,8 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str | |||
791 | struct file_lock *left = NULL; | 896 | struct file_lock *left = NULL; |
792 | struct file_lock *right = NULL; | 897 | struct file_lock *right = NULL; |
793 | struct file_lock **before; | 898 | struct file_lock **before; |
794 | int error, added = 0; | 899 | int error; |
900 | bool added = false; | ||
795 | 901 | ||
796 | /* | 902 | /* |
797 | * We may need two file_lock structures for this operation, | 903 | * We may need two file_lock structures for this operation, |
@@ -806,7 +912,12 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str | |||
806 | new_fl2 = locks_alloc_lock(); | 912 | new_fl2 = locks_alloc_lock(); |
807 | } | 913 | } |
808 | 914 | ||
809 | lock_flocks(); | 915 | spin_lock(&inode->i_lock); |
916 | /* | ||
917 | * New lock request. Walk all POSIX locks and look for conflicts. If | ||
918 | * there are any, either return error or put the request on the | ||
919 | * blocker's list of waiters and the global blocked_hash. | ||
920 | */ | ||
810 | if (request->fl_type != F_UNLCK) { | 921 | if (request->fl_type != F_UNLCK) { |
811 | for_each_lock(inode, before) { | 922 | for_each_lock(inode, before) { |
812 | fl = *before; | 923 | fl = *before; |
@@ -819,11 +930,17 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str | |||
819 | error = -EAGAIN; | 930 | error = -EAGAIN; |
820 | if (!(request->fl_flags & FL_SLEEP)) | 931 | if (!(request->fl_flags & FL_SLEEP)) |
821 | goto out; | 932 | goto out; |
933 | /* | ||
934 | * Deadlock detection and insertion into the blocked | ||
935 | * locks list must be done while holding the same lock! | ||
936 | */ | ||
822 | error = -EDEADLK; | 937 | error = -EDEADLK; |
823 | if (posix_locks_deadlock(request, fl)) | 938 | spin_lock(&blocked_lock_lock); |
824 | goto out; | 939 | if (likely(!posix_locks_deadlock(request, fl))) { |
825 | error = FILE_LOCK_DEFERRED; | 940 | error = FILE_LOCK_DEFERRED; |
826 | locks_insert_block(fl, request); | 941 | __locks_insert_block(fl, request); |
942 | } | ||
943 | spin_unlock(&blocked_lock_lock); | ||
827 | goto out; | 944 | goto out; |
828 | } | 945 | } |
829 | } | 946 | } |
@@ -845,7 +962,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str | |||
845 | before = &fl->fl_next; | 962 | before = &fl->fl_next; |
846 | } | 963 | } |
847 | 964 | ||
848 | /* Process locks with this owner. */ | 965 | /* Process locks with this owner. */ |
849 | while ((fl = *before) && posix_same_owner(request, fl)) { | 966 | while ((fl = *before) && posix_same_owner(request, fl)) { |
850 | /* Detect adjacent or overlapping regions (if same lock type) | 967 | /* Detect adjacent or overlapping regions (if same lock type) |
851 | */ | 968 | */ |
@@ -880,7 +997,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str | |||
880 | continue; | 997 | continue; |
881 | } | 998 | } |
882 | request = fl; | 999 | request = fl; |
883 | added = 1; | 1000 | added = true; |
884 | } | 1001 | } |
885 | else { | 1002 | else { |
886 | /* Processing for different lock types is a bit | 1003 | /* Processing for different lock types is a bit |
@@ -891,7 +1008,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str | |||
891 | if (fl->fl_start > request->fl_end) | 1008 | if (fl->fl_start > request->fl_end) |
892 | break; | 1009 | break; |
893 | if (request->fl_type == F_UNLCK) | 1010 | if (request->fl_type == F_UNLCK) |
894 | added = 1; | 1011 | added = true; |
895 | if (fl->fl_start < request->fl_start) | 1012 | if (fl->fl_start < request->fl_start) |
896 | left = fl; | 1013 | left = fl; |
897 | /* If the next lock in the list has a higher end | 1014 | /* If the next lock in the list has a higher end |
@@ -921,7 +1038,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str | |||
921 | locks_release_private(fl); | 1038 | locks_release_private(fl); |
922 | locks_copy_private(fl, request); | 1039 | locks_copy_private(fl, request); |
923 | request = fl; | 1040 | request = fl; |
924 | added = 1; | 1041 | added = true; |
925 | } | 1042 | } |
926 | } | 1043 | } |
927 | /* Go on to next lock. | 1044 | /* Go on to next lock. |
@@ -931,10 +1048,9 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str | |||
931 | } | 1048 | } |
932 | 1049 | ||
933 | /* | 1050 | /* |
934 | * The above code only modifies existing locks in case of | 1051 | * The above code only modifies existing locks in case of merging or |
935 | * merging or replacing. If new lock(s) need to be inserted | 1052 | * replacing. If new lock(s) need to be inserted all modifications are |
936 | * all modifications are done bellow this, so it's safe yet to | 1053 | * done below this, so it's safe yet to bail out. |
937 | * bail out. | ||
938 | */ | 1054 | */ |
939 | error = -ENOLCK; /* "no luck" */ | 1055 | error = -ENOLCK; /* "no luck" */ |
940 | if (right && left == right && !new_fl2) | 1056 | if (right && left == right && !new_fl2) |
@@ -974,7 +1090,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str | |||
974 | locks_wake_up_blocks(left); | 1090 | locks_wake_up_blocks(left); |
975 | } | 1091 | } |
976 | out: | 1092 | out: |
977 | unlock_flocks(); | 1093 | spin_unlock(&inode->i_lock); |
978 | /* | 1094 | /* |
979 | * Free any unused locks. | 1095 | * Free any unused locks. |
980 | */ | 1096 | */ |
@@ -1049,14 +1165,14 @@ int locks_mandatory_locked(struct inode *inode) | |||
1049 | /* | 1165 | /* |
1050 | * Search the lock list for this inode for any POSIX locks. | 1166 | * Search the lock list for this inode for any POSIX locks. |
1051 | */ | 1167 | */ |
1052 | lock_flocks(); | 1168 | spin_lock(&inode->i_lock); |
1053 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { | 1169 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { |
1054 | if (!IS_POSIX(fl)) | 1170 | if (!IS_POSIX(fl)) |
1055 | continue; | 1171 | continue; |
1056 | if (fl->fl_owner != owner) | 1172 | if (fl->fl_owner != owner) |
1057 | break; | 1173 | break; |
1058 | } | 1174 | } |
1059 | unlock_flocks(); | 1175 | spin_unlock(&inode->i_lock); |
1060 | return fl ? -EAGAIN : 0; | 1176 | return fl ? -EAGAIN : 0; |
1061 | } | 1177 | } |
1062 | 1178 | ||
@@ -1199,7 +1315,7 @@ int __break_lease(struct inode *inode, unsigned int mode) | |||
1199 | if (IS_ERR(new_fl)) | 1315 | if (IS_ERR(new_fl)) |
1200 | return PTR_ERR(new_fl); | 1316 | return PTR_ERR(new_fl); |
1201 | 1317 | ||
1202 | lock_flocks(); | 1318 | spin_lock(&inode->i_lock); |
1203 | 1319 | ||
1204 | time_out_leases(inode); | 1320 | time_out_leases(inode); |
1205 | 1321 | ||
@@ -1249,11 +1365,11 @@ restart: | |||
1249 | break_time++; | 1365 | break_time++; |
1250 | } | 1366 | } |
1251 | locks_insert_block(flock, new_fl); | 1367 | locks_insert_block(flock, new_fl); |
1252 | unlock_flocks(); | 1368 | spin_unlock(&inode->i_lock); |
1253 | error = wait_event_interruptible_timeout(new_fl->fl_wait, | 1369 | error = wait_event_interruptible_timeout(new_fl->fl_wait, |
1254 | !new_fl->fl_next, break_time); | 1370 | !new_fl->fl_next, break_time); |
1255 | lock_flocks(); | 1371 | spin_lock(&inode->i_lock); |
1256 | __locks_delete_block(new_fl); | 1372 | locks_delete_block(new_fl); |
1257 | if (error >= 0) { | 1373 | if (error >= 0) { |
1258 | if (error == 0) | 1374 | if (error == 0) |
1259 | time_out_leases(inode); | 1375 | time_out_leases(inode); |
@@ -1270,7 +1386,7 @@ restart: | |||
1270 | } | 1386 | } |
1271 | 1387 | ||
1272 | out: | 1388 | out: |
1273 | unlock_flocks(); | 1389 | spin_unlock(&inode->i_lock); |
1274 | locks_free_lock(new_fl); | 1390 | locks_free_lock(new_fl); |
1275 | return error; | 1391 | return error; |
1276 | } | 1392 | } |
@@ -1323,9 +1439,10 @@ EXPORT_SYMBOL(lease_get_mtime); | |||
1323 | int fcntl_getlease(struct file *filp) | 1439 | int fcntl_getlease(struct file *filp) |
1324 | { | 1440 | { |
1325 | struct file_lock *fl; | 1441 | struct file_lock *fl; |
1442 | struct inode *inode = file_inode(filp); | ||
1326 | int type = F_UNLCK; | 1443 | int type = F_UNLCK; |
1327 | 1444 | ||
1328 | lock_flocks(); | 1445 | spin_lock(&inode->i_lock); |
1329 | time_out_leases(file_inode(filp)); | 1446 | time_out_leases(file_inode(filp)); |
1330 | for (fl = file_inode(filp)->i_flock; fl && IS_LEASE(fl); | 1447 | for (fl = file_inode(filp)->i_flock; fl && IS_LEASE(fl); |
1331 | fl = fl->fl_next) { | 1448 | fl = fl->fl_next) { |
@@ -1334,11 +1451,11 @@ int fcntl_getlease(struct file *filp) | |||
1334 | break; | 1451 | break; |
1335 | } | 1452 | } |
1336 | } | 1453 | } |
1337 | unlock_flocks(); | 1454 | spin_unlock(&inode->i_lock); |
1338 | return type; | 1455 | return type; |
1339 | } | 1456 | } |
1340 | 1457 | ||
1341 | int generic_add_lease(struct file *filp, long arg, struct file_lock **flp) | 1458 | static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp) |
1342 | { | 1459 | { |
1343 | struct file_lock *fl, **before, **my_before = NULL, *lease; | 1460 | struct file_lock *fl, **before, **my_before = NULL, *lease; |
1344 | struct dentry *dentry = filp->f_path.dentry; | 1461 | struct dentry *dentry = filp->f_path.dentry; |
@@ -1351,7 +1468,7 @@ int generic_add_lease(struct file *filp, long arg, struct file_lock **flp) | |||
1351 | if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) | 1468 | if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) |
1352 | goto out; | 1469 | goto out; |
1353 | if ((arg == F_WRLCK) | 1470 | if ((arg == F_WRLCK) |
1354 | && ((dentry->d_count > 1) | 1471 | && ((d_count(dentry) > 1) |
1355 | || (atomic_read(&inode->i_count) > 1))) | 1472 | || (atomic_read(&inode->i_count) > 1))) |
1356 | goto out; | 1473 | goto out; |
1357 | 1474 | ||
@@ -1403,7 +1520,7 @@ out: | |||
1403 | return error; | 1520 | return error; |
1404 | } | 1521 | } |
1405 | 1522 | ||
1406 | int generic_delete_lease(struct file *filp, struct file_lock **flp) | 1523 | static int generic_delete_lease(struct file *filp, struct file_lock **flp) |
1407 | { | 1524 | { |
1408 | struct file_lock *fl, **before; | 1525 | struct file_lock *fl, **before; |
1409 | struct dentry *dentry = filp->f_path.dentry; | 1526 | struct dentry *dentry = filp->f_path.dentry; |
@@ -1428,7 +1545,7 @@ int generic_delete_lease(struct file *filp, struct file_lock **flp) | |||
1428 | * The (input) flp->fl_lmops->lm_break function is required | 1545 | * The (input) flp->fl_lmops->lm_break function is required |
1429 | * by break_lease(). | 1546 | * by break_lease(). |
1430 | * | 1547 | * |
1431 | * Called with file_lock_lock held. | 1548 | * Called with inode->i_lock held. |
1432 | */ | 1549 | */ |
1433 | int generic_setlease(struct file *filp, long arg, struct file_lock **flp) | 1550 | int generic_setlease(struct file *filp, long arg, struct file_lock **flp) |
1434 | { | 1551 | { |
@@ -1497,11 +1614,12 @@ static int __vfs_setlease(struct file *filp, long arg, struct file_lock **lease) | |||
1497 | 1614 | ||
1498 | int vfs_setlease(struct file *filp, long arg, struct file_lock **lease) | 1615 | int vfs_setlease(struct file *filp, long arg, struct file_lock **lease) |
1499 | { | 1616 | { |
1617 | struct inode *inode = file_inode(filp); | ||
1500 | int error; | 1618 | int error; |
1501 | 1619 | ||
1502 | lock_flocks(); | 1620 | spin_lock(&inode->i_lock); |
1503 | error = __vfs_setlease(filp, arg, lease); | 1621 | error = __vfs_setlease(filp, arg, lease); |
1504 | unlock_flocks(); | 1622 | spin_unlock(&inode->i_lock); |
1505 | 1623 | ||
1506 | return error; | 1624 | return error; |
1507 | } | 1625 | } |
@@ -1519,6 +1637,7 @@ static int do_fcntl_delete_lease(struct file *filp) | |||
1519 | static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg) | 1637 | static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg) |
1520 | { | 1638 | { |
1521 | struct file_lock *fl, *ret; | 1639 | struct file_lock *fl, *ret; |
1640 | struct inode *inode = file_inode(filp); | ||
1522 | struct fasync_struct *new; | 1641 | struct fasync_struct *new; |
1523 | int error; | 1642 | int error; |
1524 | 1643 | ||
@@ -1532,10 +1651,10 @@ static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg) | |||
1532 | return -ENOMEM; | 1651 | return -ENOMEM; |
1533 | } | 1652 | } |
1534 | ret = fl; | 1653 | ret = fl; |
1535 | lock_flocks(); | 1654 | spin_lock(&inode->i_lock); |
1536 | error = __vfs_setlease(filp, arg, &ret); | 1655 | error = __vfs_setlease(filp, arg, &ret); |
1537 | if (error) { | 1656 | if (error) { |
1538 | unlock_flocks(); | 1657 | spin_unlock(&inode->i_lock); |
1539 | locks_free_lock(fl); | 1658 | locks_free_lock(fl); |
1540 | goto out_free_fasync; | 1659 | goto out_free_fasync; |
1541 | } | 1660 | } |
@@ -1552,7 +1671,7 @@ static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg) | |||
1552 | new = NULL; | 1671 | new = NULL; |
1553 | 1672 | ||
1554 | error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0); | 1673 | error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0); |
1555 | unlock_flocks(); | 1674 | spin_unlock(&inode->i_lock); |
1556 | 1675 | ||
1557 | out_free_fasync: | 1676 | out_free_fasync: |
1558 | if (new) | 1677 | if (new) |
@@ -2076,7 +2195,7 @@ void locks_remove_flock(struct file *filp) | |||
2076 | fl.fl_ops->fl_release_private(&fl); | 2195 | fl.fl_ops->fl_release_private(&fl); |
2077 | } | 2196 | } |
2078 | 2197 | ||
2079 | lock_flocks(); | 2198 | spin_lock(&inode->i_lock); |
2080 | before = &inode->i_flock; | 2199 | before = &inode->i_flock; |
2081 | 2200 | ||
2082 | while ((fl = *before) != NULL) { | 2201 | while ((fl = *before) != NULL) { |
@@ -2094,30 +2213,28 @@ void locks_remove_flock(struct file *filp) | |||
2094 | } | 2213 | } |
2095 | before = &fl->fl_next; | 2214 | before = &fl->fl_next; |
2096 | } | 2215 | } |
2097 | unlock_flocks(); | 2216 | spin_unlock(&inode->i_lock); |
2098 | } | 2217 | } |
2099 | 2218 | ||
2100 | /** | 2219 | /** |
2101 | * posix_unblock_lock - stop waiting for a file lock | 2220 | * posix_unblock_lock - stop waiting for a file lock |
2102 | * @filp: how the file was opened | ||
2103 | * @waiter: the lock which was waiting | 2221 | * @waiter: the lock which was waiting |
2104 | * | 2222 | * |
2105 | * lockd needs to block waiting for locks. | 2223 | * lockd needs to block waiting for locks. |
2106 | */ | 2224 | */ |
2107 | int | 2225 | int |
2108 | posix_unblock_lock(struct file *filp, struct file_lock *waiter) | 2226 | posix_unblock_lock(struct file_lock *waiter) |
2109 | { | 2227 | { |
2110 | int status = 0; | 2228 | int status = 0; |
2111 | 2229 | ||
2112 | lock_flocks(); | 2230 | spin_lock(&blocked_lock_lock); |
2113 | if (waiter->fl_next) | 2231 | if (waiter->fl_next) |
2114 | __locks_delete_block(waiter); | 2232 | __locks_delete_block(waiter); |
2115 | else | 2233 | else |
2116 | status = -ENOENT; | 2234 | status = -ENOENT; |
2117 | unlock_flocks(); | 2235 | spin_unlock(&blocked_lock_lock); |
2118 | return status; | 2236 | return status; |
2119 | } | 2237 | } |
2120 | |||
2121 | EXPORT_SYMBOL(posix_unblock_lock); | 2238 | EXPORT_SYMBOL(posix_unblock_lock); |
2122 | 2239 | ||
2123 | /** | 2240 | /** |
@@ -2140,6 +2257,11 @@ EXPORT_SYMBOL_GPL(vfs_cancel_lock); | |||
2140 | #include <linux/proc_fs.h> | 2257 | #include <linux/proc_fs.h> |
2141 | #include <linux/seq_file.h> | 2258 | #include <linux/seq_file.h> |
2142 | 2259 | ||
2260 | struct locks_iterator { | ||
2261 | int li_cpu; | ||
2262 | loff_t li_pos; | ||
2263 | }; | ||
2264 | |||
2143 | static void lock_get_status(struct seq_file *f, struct file_lock *fl, | 2265 | static void lock_get_status(struct seq_file *f, struct file_lock *fl, |
2144 | loff_t id, char *pfx) | 2266 | loff_t id, char *pfx) |
2145 | { | 2267 | { |
@@ -2213,37 +2335,41 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl, | |||
2213 | 2335 | ||
2214 | static int locks_show(struct seq_file *f, void *v) | 2336 | static int locks_show(struct seq_file *f, void *v) |
2215 | { | 2337 | { |
2338 | struct locks_iterator *iter = f->private; | ||
2216 | struct file_lock *fl, *bfl; | 2339 | struct file_lock *fl, *bfl; |
2217 | 2340 | ||
2218 | fl = list_entry(v, struct file_lock, fl_link); | 2341 | fl = hlist_entry(v, struct file_lock, fl_link); |
2219 | 2342 | ||
2220 | lock_get_status(f, fl, *((loff_t *)f->private), ""); | 2343 | lock_get_status(f, fl, iter->li_pos, ""); |
2221 | 2344 | ||
2222 | list_for_each_entry(bfl, &fl->fl_block, fl_block) | 2345 | list_for_each_entry(bfl, &fl->fl_block, fl_block) |
2223 | lock_get_status(f, bfl, *((loff_t *)f->private), " ->"); | 2346 | lock_get_status(f, bfl, iter->li_pos, " ->"); |
2224 | 2347 | ||
2225 | return 0; | 2348 | return 0; |
2226 | } | 2349 | } |
2227 | 2350 | ||
2228 | static void *locks_start(struct seq_file *f, loff_t *pos) | 2351 | static void *locks_start(struct seq_file *f, loff_t *pos) |
2229 | { | 2352 | { |
2230 | loff_t *p = f->private; | 2353 | struct locks_iterator *iter = f->private; |
2231 | 2354 | ||
2232 | lock_flocks(); | 2355 | iter->li_pos = *pos + 1; |
2233 | *p = (*pos + 1); | 2356 | lg_global_lock(&file_lock_lglock); |
2234 | return seq_list_start(&file_lock_list, *pos); | 2357 | spin_lock(&blocked_lock_lock); |
2358 | return seq_hlist_start_percpu(&file_lock_list, &iter->li_cpu, *pos); | ||
2235 | } | 2359 | } |
2236 | 2360 | ||
2237 | static void *locks_next(struct seq_file *f, void *v, loff_t *pos) | 2361 | static void *locks_next(struct seq_file *f, void *v, loff_t *pos) |
2238 | { | 2362 | { |
2239 | loff_t *p = f->private; | 2363 | struct locks_iterator *iter = f->private; |
2240 | ++*p; | 2364 | |
2241 | return seq_list_next(v, &file_lock_list, pos); | 2365 | ++iter->li_pos; |
2366 | return seq_hlist_next_percpu(v, &file_lock_list, &iter->li_cpu, pos); | ||
2242 | } | 2367 | } |
2243 | 2368 | ||
2244 | static void locks_stop(struct seq_file *f, void *v) | 2369 | static void locks_stop(struct seq_file *f, void *v) |
2245 | { | 2370 | { |
2246 | unlock_flocks(); | 2371 | spin_unlock(&blocked_lock_lock); |
2372 | lg_global_unlock(&file_lock_lglock); | ||
2247 | } | 2373 | } |
2248 | 2374 | ||
2249 | static const struct seq_operations locks_seq_operations = { | 2375 | static const struct seq_operations locks_seq_operations = { |
@@ -2255,7 +2381,8 @@ static const struct seq_operations locks_seq_operations = { | |||
2255 | 2381 | ||
2256 | static int locks_open(struct inode *inode, struct file *filp) | 2382 | static int locks_open(struct inode *inode, struct file *filp) |
2257 | { | 2383 | { |
2258 | return seq_open_private(filp, &locks_seq_operations, sizeof(loff_t)); | 2384 | return seq_open_private(filp, &locks_seq_operations, |
2385 | sizeof(struct locks_iterator)); | ||
2259 | } | 2386 | } |
2260 | 2387 | ||
2261 | static const struct file_operations proc_locks_operations = { | 2388 | static const struct file_operations proc_locks_operations = { |
@@ -2290,7 +2417,8 @@ int lock_may_read(struct inode *inode, loff_t start, unsigned long len) | |||
2290 | { | 2417 | { |
2291 | struct file_lock *fl; | 2418 | struct file_lock *fl; |
2292 | int result = 1; | 2419 | int result = 1; |
2293 | lock_flocks(); | 2420 | |
2421 | spin_lock(&inode->i_lock); | ||
2294 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { | 2422 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { |
2295 | if (IS_POSIX(fl)) { | 2423 | if (IS_POSIX(fl)) { |
2296 | if (fl->fl_type == F_RDLCK) | 2424 | if (fl->fl_type == F_RDLCK) |
@@ -2307,7 +2435,7 @@ int lock_may_read(struct inode *inode, loff_t start, unsigned long len) | |||
2307 | result = 0; | 2435 | result = 0; |
2308 | break; | 2436 | break; |
2309 | } | 2437 | } |
2310 | unlock_flocks(); | 2438 | spin_unlock(&inode->i_lock); |
2311 | return result; | 2439 | return result; |
2312 | } | 2440 | } |
2313 | 2441 | ||
@@ -2330,7 +2458,8 @@ int lock_may_write(struct inode *inode, loff_t start, unsigned long len) | |||
2330 | { | 2458 | { |
2331 | struct file_lock *fl; | 2459 | struct file_lock *fl; |
2332 | int result = 1; | 2460 | int result = 1; |
2333 | lock_flocks(); | 2461 | |
2462 | spin_lock(&inode->i_lock); | ||
2334 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { | 2463 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { |
2335 | if (IS_POSIX(fl)) { | 2464 | if (IS_POSIX(fl)) { |
2336 | if ((fl->fl_end < start) || (fl->fl_start > (start + len))) | 2465 | if ((fl->fl_end < start) || (fl->fl_start > (start + len))) |
@@ -2345,7 +2474,7 @@ int lock_may_write(struct inode *inode, loff_t start, unsigned long len) | |||
2345 | result = 0; | 2474 | result = 0; |
2346 | break; | 2475 | break; |
2347 | } | 2476 | } |
2348 | unlock_flocks(); | 2477 | spin_unlock(&inode->i_lock); |
2349 | return result; | 2478 | return result; |
2350 | } | 2479 | } |
2351 | 2480 | ||
@@ -2353,9 +2482,16 @@ EXPORT_SYMBOL(lock_may_write); | |||
2353 | 2482 | ||
2354 | static int __init filelock_init(void) | 2483 | static int __init filelock_init(void) |
2355 | { | 2484 | { |
2485 | int i; | ||
2486 | |||
2356 | filelock_cache = kmem_cache_create("file_lock_cache", | 2487 | filelock_cache = kmem_cache_create("file_lock_cache", |
2357 | sizeof(struct file_lock), 0, SLAB_PANIC, NULL); | 2488 | sizeof(struct file_lock), 0, SLAB_PANIC, NULL); |
2358 | 2489 | ||
2490 | lg_lock_init(&file_lock_lglock, "file_lock_lglock"); | ||
2491 | |||
2492 | for_each_possible_cpu(i) | ||
2493 | INIT_HLIST_HEAD(per_cpu_ptr(&file_lock_list, i)); | ||
2494 | |||
2359 | return 0; | 2495 | return 0; |
2360 | } | 2496 | } |
2361 | 2497 | ||