diff options
author | Arnd Bergmann <arnd@arndb.de> | 2010-09-18 09:09:31 -0400 |
---|---|---|
committer | Arnd Bergmann <arnd@arndb.de> | 2010-10-05 05:02:04 -0400 |
commit | b89f432133851a01c0d28822f11cbdcc15781a75 (patch) | |
tree | a3e9ba638a9b746985148f4525335d360ec7da56 /fs/locks.c | |
parent | 2e54eb96e2c801f33d95b5dade15212ac4d6c4a5 (diff) |
fs/locks.c: prepare for BKL removal
This prepares the removal of the big kernel lock from the
file locking code. We still use the BKL as long as fs/lockd
uses it and ceph might sleep, but we can flip the definition
to a private spinlock as soon as that's done.
All users outside of fs/lockd get converted to use
lock_flocks() instead of lock_kernel() where appropriate.
Based on an earlier patch to use a spinlock from Matthew
Wilcox, who has attempted this a few times before, the
earliest patch from over 10 years ago turned it into
a semaphore, which ended up being slower than the BKL
and was subsequently reverted.
Someone should do some serious performance testing when
this becomes a spinlock, since this has caused problems
before. Using a spinlock should be at least as good
as the BKL in theory, but who knows...
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Matthew Wilcox <willy@linux.intel.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Miklos Szeredi <mszeredi@suse.cz>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
Cc: Sage Weil <sage@newdream.net>
Cc: linux-kernel@vger.kernel.org
Cc: linux-fsdevel@vger.kernel.org
Diffstat (limited to 'fs/locks.c')
-rw-r--r-- | fs/locks.c | 112 |
1 files changed, 68 insertions, 44 deletions
diff --git a/fs/locks.c b/fs/locks.c index ab24d49fc048..8b2b6ad56a09 100644 --- a/fs/locks.c +++ b/fs/locks.c | |||
@@ -143,6 +143,22 @@ int lease_break_time = 45; | |||
143 | static LIST_HEAD(file_lock_list); | 143 | static LIST_HEAD(file_lock_list); |
144 | static LIST_HEAD(blocked_list); | 144 | static LIST_HEAD(blocked_list); |
145 | 145 | ||
146 | /* | ||
147 | * Protects the two list heads above, plus the inode->i_flock list | ||
148 | * FIXME: should use a spinlock, once lockd and ceph are ready. | ||
149 | */ | ||
150 | void lock_flocks(void) | ||
151 | { | ||
152 | lock_kernel(); | ||
153 | } | ||
154 | EXPORT_SYMBOL_GPL(lock_flocks); | ||
155 | |||
156 | void unlock_flocks(void) | ||
157 | { | ||
158 | unlock_kernel(); | ||
159 | } | ||
160 | EXPORT_SYMBOL_GPL(unlock_flocks); | ||
161 | |||
146 | static struct kmem_cache *filelock_cache __read_mostly; | 162 | static struct kmem_cache *filelock_cache __read_mostly; |
147 | 163 | ||
148 | /* Allocate an empty lock structure. */ | 164 | /* Allocate an empty lock structure. */ |
@@ -511,9 +527,9 @@ static void __locks_delete_block(struct file_lock *waiter) | |||
511 | */ | 527 | */ |
512 | static void locks_delete_block(struct file_lock *waiter) | 528 | static void locks_delete_block(struct file_lock *waiter) |
513 | { | 529 | { |
514 | lock_kernel(); | 530 | lock_flocks(); |
515 | __locks_delete_block(waiter); | 531 | __locks_delete_block(waiter); |
516 | unlock_kernel(); | 532 | unlock_flocks(); |
517 | } | 533 | } |
518 | 534 | ||
519 | /* Insert waiter into blocker's block list. | 535 | /* Insert waiter into blocker's block list. |
@@ -644,7 +660,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl) | |||
644 | { | 660 | { |
645 | struct file_lock *cfl; | 661 | struct file_lock *cfl; |
646 | 662 | ||
647 | lock_kernel(); | 663 | lock_flocks(); |
648 | for (cfl = filp->f_path.dentry->d_inode->i_flock; cfl; cfl = cfl->fl_next) { | 664 | for (cfl = filp->f_path.dentry->d_inode->i_flock; cfl; cfl = cfl->fl_next) { |
649 | if (!IS_POSIX(cfl)) | 665 | if (!IS_POSIX(cfl)) |
650 | continue; | 666 | continue; |
@@ -657,7 +673,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl) | |||
657 | fl->fl_pid = pid_vnr(cfl->fl_nspid); | 673 | fl->fl_pid = pid_vnr(cfl->fl_nspid); |
658 | } else | 674 | } else |
659 | fl->fl_type = F_UNLCK; | 675 | fl->fl_type = F_UNLCK; |
660 | unlock_kernel(); | 676 | unlock_flocks(); |
661 | return; | 677 | return; |
662 | } | 678 | } |
663 | EXPORT_SYMBOL(posix_test_lock); | 679 | EXPORT_SYMBOL(posix_test_lock); |
@@ -730,18 +746,16 @@ static int flock_lock_file(struct file *filp, struct file_lock *request) | |||
730 | int error = 0; | 746 | int error = 0; |
731 | int found = 0; | 747 | int found = 0; |
732 | 748 | ||
733 | lock_kernel(); | 749 | if (!(request->fl_flags & FL_ACCESS) && (request->fl_type != F_UNLCK)) { |
734 | if (request->fl_flags & FL_ACCESS) | ||
735 | goto find_conflict; | ||
736 | |||
737 | if (request->fl_type != F_UNLCK) { | ||
738 | error = -ENOMEM; | ||
739 | new_fl = locks_alloc_lock(); | 750 | new_fl = locks_alloc_lock(); |
740 | if (new_fl == NULL) | 751 | if (!new_fl) |
741 | goto out; | 752 | return -ENOMEM; |
742 | error = 0; | ||
743 | } | 753 | } |
744 | 754 | ||
755 | lock_flocks(); | ||
756 | if (request->fl_flags & FL_ACCESS) | ||
757 | goto find_conflict; | ||
758 | |||
745 | for_each_lock(inode, before) { | 759 | for_each_lock(inode, before) { |
746 | struct file_lock *fl = *before; | 760 | struct file_lock *fl = *before; |
747 | if (IS_POSIX(fl)) | 761 | if (IS_POSIX(fl)) |
@@ -767,8 +781,11 @@ static int flock_lock_file(struct file *filp, struct file_lock *request) | |||
767 | * If a higher-priority process was blocked on the old file lock, | 781 | * If a higher-priority process was blocked on the old file lock, |
768 | * give it the opportunity to lock the file. | 782 | * give it the opportunity to lock the file. |
769 | */ | 783 | */ |
770 | if (found) | 784 | if (found) { |
785 | unlock_flocks(); | ||
771 | cond_resched(); | 786 | cond_resched(); |
787 | lock_flocks(); | ||
788 | } | ||
772 | 789 | ||
773 | find_conflict: | 790 | find_conflict: |
774 | for_each_lock(inode, before) { | 791 | for_each_lock(inode, before) { |
@@ -794,7 +811,7 @@ find_conflict: | |||
794 | error = 0; | 811 | error = 0; |
795 | 812 | ||
796 | out: | 813 | out: |
797 | unlock_kernel(); | 814 | unlock_flocks(); |
798 | if (new_fl) | 815 | if (new_fl) |
799 | locks_free_lock(new_fl); | 816 | locks_free_lock(new_fl); |
800 | return error; | 817 | return error; |
@@ -823,7 +840,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str | |||
823 | new_fl2 = locks_alloc_lock(); | 840 | new_fl2 = locks_alloc_lock(); |
824 | } | 841 | } |
825 | 842 | ||
826 | lock_kernel(); | 843 | lock_flocks(); |
827 | if (request->fl_type != F_UNLCK) { | 844 | if (request->fl_type != F_UNLCK) { |
828 | for_each_lock(inode, before) { | 845 | for_each_lock(inode, before) { |
829 | fl = *before; | 846 | fl = *before; |
@@ -991,7 +1008,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str | |||
991 | locks_wake_up_blocks(left); | 1008 | locks_wake_up_blocks(left); |
992 | } | 1009 | } |
993 | out: | 1010 | out: |
994 | unlock_kernel(); | 1011 | unlock_flocks(); |
995 | /* | 1012 | /* |
996 | * Free any unused locks. | 1013 | * Free any unused locks. |
997 | */ | 1014 | */ |
@@ -1066,14 +1083,14 @@ int locks_mandatory_locked(struct inode *inode) | |||
1066 | /* | 1083 | /* |
1067 | * Search the lock list for this inode for any POSIX locks. | 1084 | * Search the lock list for this inode for any POSIX locks. |
1068 | */ | 1085 | */ |
1069 | lock_kernel(); | 1086 | lock_flocks(); |
1070 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { | 1087 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { |
1071 | if (!IS_POSIX(fl)) | 1088 | if (!IS_POSIX(fl)) |
1072 | continue; | 1089 | continue; |
1073 | if (fl->fl_owner != owner) | 1090 | if (fl->fl_owner != owner) |
1074 | break; | 1091 | break; |
1075 | } | 1092 | } |
1076 | unlock_kernel(); | 1093 | unlock_flocks(); |
1077 | return fl ? -EAGAIN : 0; | 1094 | return fl ? -EAGAIN : 0; |
1078 | } | 1095 | } |
1079 | 1096 | ||
@@ -1186,7 +1203,7 @@ int __break_lease(struct inode *inode, unsigned int mode) | |||
1186 | 1203 | ||
1187 | new_fl = lease_alloc(NULL, want_write ? F_WRLCK : F_RDLCK); | 1204 | new_fl = lease_alloc(NULL, want_write ? F_WRLCK : F_RDLCK); |
1188 | 1205 | ||
1189 | lock_kernel(); | 1206 | lock_flocks(); |
1190 | 1207 | ||
1191 | time_out_leases(inode); | 1208 | time_out_leases(inode); |
1192 | 1209 | ||
@@ -1247,8 +1264,10 @@ restart: | |||
1247 | break_time++; | 1264 | break_time++; |
1248 | } | 1265 | } |
1249 | locks_insert_block(flock, new_fl); | 1266 | locks_insert_block(flock, new_fl); |
1267 | unlock_flocks(); | ||
1250 | error = wait_event_interruptible_timeout(new_fl->fl_wait, | 1268 | error = wait_event_interruptible_timeout(new_fl->fl_wait, |
1251 | !new_fl->fl_next, break_time); | 1269 | !new_fl->fl_next, break_time); |
1270 | lock_flocks(); | ||
1252 | __locks_delete_block(new_fl); | 1271 | __locks_delete_block(new_fl); |
1253 | if (error >= 0) { | 1272 | if (error >= 0) { |
1254 | if (error == 0) | 1273 | if (error == 0) |
@@ -1263,7 +1282,7 @@ restart: | |||
1263 | } | 1282 | } |
1264 | 1283 | ||
1265 | out: | 1284 | out: |
1266 | unlock_kernel(); | 1285 | unlock_flocks(); |
1267 | if (!IS_ERR(new_fl)) | 1286 | if (!IS_ERR(new_fl)) |
1268 | locks_free_lock(new_fl); | 1287 | locks_free_lock(new_fl); |
1269 | return error; | 1288 | return error; |
@@ -1319,7 +1338,7 @@ int fcntl_getlease(struct file *filp) | |||
1319 | struct file_lock *fl; | 1338 | struct file_lock *fl; |
1320 | int type = F_UNLCK; | 1339 | int type = F_UNLCK; |
1321 | 1340 | ||
1322 | lock_kernel(); | 1341 | lock_flocks(); |
1323 | time_out_leases(filp->f_path.dentry->d_inode); | 1342 | time_out_leases(filp->f_path.dentry->d_inode); |
1324 | for (fl = filp->f_path.dentry->d_inode->i_flock; fl && IS_LEASE(fl); | 1343 | for (fl = filp->f_path.dentry->d_inode->i_flock; fl && IS_LEASE(fl); |
1325 | fl = fl->fl_next) { | 1344 | fl = fl->fl_next) { |
@@ -1328,7 +1347,7 @@ int fcntl_getlease(struct file *filp) | |||
1328 | break; | 1347 | break; |
1329 | } | 1348 | } |
1330 | } | 1349 | } |
1331 | unlock_kernel(); | 1350 | unlock_flocks(); |
1332 | return type; | 1351 | return type; |
1333 | } | 1352 | } |
1334 | 1353 | ||
@@ -1341,7 +1360,7 @@ int fcntl_getlease(struct file *filp) | |||
1341 | * The (input) flp->fl_lmops->fl_break function is required | 1360 | * The (input) flp->fl_lmops->fl_break function is required |
1342 | * by break_lease(). | 1361 | * by break_lease(). |
1343 | * | 1362 | * |
1344 | * Called with kernel lock held. | 1363 | * Called with file_lock_lock held. |
1345 | */ | 1364 | */ |
1346 | int generic_setlease(struct file *filp, long arg, struct file_lock **flp) | 1365 | int generic_setlease(struct file *filp, long arg, struct file_lock **flp) |
1347 | { | 1366 | { |
@@ -1436,7 +1455,15 @@ out: | |||
1436 | } | 1455 | } |
1437 | EXPORT_SYMBOL(generic_setlease); | 1456 | EXPORT_SYMBOL(generic_setlease); |
1438 | 1457 | ||
1439 | /** | 1458 | static int __vfs_setlease(struct file *filp, long arg, struct file_lock **lease) |
1459 | { | ||
1460 | if (filp->f_op && filp->f_op->setlease) | ||
1461 | return filp->f_op->setlease(filp, arg, lease); | ||
1462 | else | ||
1463 | return generic_setlease(filp, arg, lease); | ||
1464 | } | ||
1465 | |||
1466 | /** | ||
1440 | * vfs_setlease - sets a lease on an open file | 1467 | * vfs_setlease - sets a lease on an open file |
1441 | * @filp: file pointer | 1468 | * @filp: file pointer |
1442 | * @arg: type of lease to obtain | 1469 | * @arg: type of lease to obtain |
@@ -1467,12 +1494,9 @@ int vfs_setlease(struct file *filp, long arg, struct file_lock **lease) | |||
1467 | { | 1494 | { |
1468 | int error; | 1495 | int error; |
1469 | 1496 | ||
1470 | lock_kernel(); | 1497 | lock_flocks(); |
1471 | if (filp->f_op && filp->f_op->setlease) | 1498 | error = __vfs_setlease(filp, arg, lease); |
1472 | error = filp->f_op->setlease(filp, arg, lease); | 1499 | unlock_flocks(); |
1473 | else | ||
1474 | error = generic_setlease(filp, arg, lease); | ||
1475 | unlock_kernel(); | ||
1476 | 1500 | ||
1477 | return error; | 1501 | return error; |
1478 | } | 1502 | } |
@@ -1499,9 +1523,9 @@ int fcntl_setlease(unsigned int fd, struct file *filp, long arg) | |||
1499 | if (error) | 1523 | if (error) |
1500 | return error; | 1524 | return error; |
1501 | 1525 | ||
1502 | lock_kernel(); | 1526 | lock_flocks(); |
1503 | 1527 | ||
1504 | error = vfs_setlease(filp, arg, &flp); | 1528 | error = __vfs_setlease(filp, arg, &flp); |
1505 | if (error || arg == F_UNLCK) | 1529 | if (error || arg == F_UNLCK) |
1506 | goto out_unlock; | 1530 | goto out_unlock; |
1507 | 1531 | ||
@@ -1516,7 +1540,7 @@ int fcntl_setlease(unsigned int fd, struct file *filp, long arg) | |||
1516 | 1540 | ||
1517 | error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0); | 1541 | error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0); |
1518 | out_unlock: | 1542 | out_unlock: |
1519 | unlock_kernel(); | 1543 | unlock_flocks(); |
1520 | return error; | 1544 | return error; |
1521 | } | 1545 | } |
1522 | 1546 | ||
@@ -2020,7 +2044,7 @@ void locks_remove_flock(struct file *filp) | |||
2020 | fl.fl_ops->fl_release_private(&fl); | 2044 | fl.fl_ops->fl_release_private(&fl); |
2021 | } | 2045 | } |
2022 | 2046 | ||
2023 | lock_kernel(); | 2047 | lock_flocks(); |
2024 | before = &inode->i_flock; | 2048 | before = &inode->i_flock; |
2025 | 2049 | ||
2026 | while ((fl = *before) != NULL) { | 2050 | while ((fl = *before) != NULL) { |
@@ -2038,7 +2062,7 @@ void locks_remove_flock(struct file *filp) | |||
2038 | } | 2062 | } |
2039 | before = &fl->fl_next; | 2063 | before = &fl->fl_next; |
2040 | } | 2064 | } |
2041 | unlock_kernel(); | 2065 | unlock_flocks(); |
2042 | } | 2066 | } |
2043 | 2067 | ||
2044 | /** | 2068 | /** |
@@ -2053,12 +2077,12 @@ posix_unblock_lock(struct file *filp, struct file_lock *waiter) | |||
2053 | { | 2077 | { |
2054 | int status = 0; | 2078 | int status = 0; |
2055 | 2079 | ||
2056 | lock_kernel(); | 2080 | lock_flocks(); |
2057 | if (waiter->fl_next) | 2081 | if (waiter->fl_next) |
2058 | __locks_delete_block(waiter); | 2082 | __locks_delete_block(waiter); |
2059 | else | 2083 | else |
2060 | status = -ENOENT; | 2084 | status = -ENOENT; |
2061 | unlock_kernel(); | 2085 | unlock_flocks(); |
2062 | return status; | 2086 | return status; |
2063 | } | 2087 | } |
2064 | 2088 | ||
@@ -2172,7 +2196,7 @@ static int locks_show(struct seq_file *f, void *v) | |||
2172 | 2196 | ||
2173 | static void *locks_start(struct seq_file *f, loff_t *pos) | 2197 | static void *locks_start(struct seq_file *f, loff_t *pos) |
2174 | { | 2198 | { |
2175 | lock_kernel(); | 2199 | lock_flocks(); |
2176 | f->private = (void *)1; | 2200 | f->private = (void *)1; |
2177 | return seq_list_start(&file_lock_list, *pos); | 2201 | return seq_list_start(&file_lock_list, *pos); |
2178 | } | 2202 | } |
@@ -2184,7 +2208,7 @@ static void *locks_next(struct seq_file *f, void *v, loff_t *pos) | |||
2184 | 2208 | ||
2185 | static void locks_stop(struct seq_file *f, void *v) | 2209 | static void locks_stop(struct seq_file *f, void *v) |
2186 | { | 2210 | { |
2187 | unlock_kernel(); | 2211 | unlock_flocks(); |
2188 | } | 2212 | } |
2189 | 2213 | ||
2190 | static const struct seq_operations locks_seq_operations = { | 2214 | static const struct seq_operations locks_seq_operations = { |
@@ -2231,7 +2255,7 @@ int lock_may_read(struct inode *inode, loff_t start, unsigned long len) | |||
2231 | { | 2255 | { |
2232 | struct file_lock *fl; | 2256 | struct file_lock *fl; |
2233 | int result = 1; | 2257 | int result = 1; |
2234 | lock_kernel(); | 2258 | lock_flocks(); |
2235 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { | 2259 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { |
2236 | if (IS_POSIX(fl)) { | 2260 | if (IS_POSIX(fl)) { |
2237 | if (fl->fl_type == F_RDLCK) | 2261 | if (fl->fl_type == F_RDLCK) |
@@ -2248,7 +2272,7 @@ int lock_may_read(struct inode *inode, loff_t start, unsigned long len) | |||
2248 | result = 0; | 2272 | result = 0; |
2249 | break; | 2273 | break; |
2250 | } | 2274 | } |
2251 | unlock_kernel(); | 2275 | unlock_flocks(); |
2252 | return result; | 2276 | return result; |
2253 | } | 2277 | } |
2254 | 2278 | ||
@@ -2271,7 +2295,7 @@ int lock_may_write(struct inode *inode, loff_t start, unsigned long len) | |||
2271 | { | 2295 | { |
2272 | struct file_lock *fl; | 2296 | struct file_lock *fl; |
2273 | int result = 1; | 2297 | int result = 1; |
2274 | lock_kernel(); | 2298 | lock_flocks(); |
2275 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { | 2299 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { |
2276 | if (IS_POSIX(fl)) { | 2300 | if (IS_POSIX(fl)) { |
2277 | if ((fl->fl_end < start) || (fl->fl_start > (start + len))) | 2301 | if ((fl->fl_end < start) || (fl->fl_start > (start + len))) |
@@ -2286,7 +2310,7 @@ int lock_may_write(struct inode *inode, loff_t start, unsigned long len) | |||
2286 | result = 0; | 2310 | result = 0; |
2287 | break; | 2311 | break; |
2288 | } | 2312 | } |
2289 | unlock_kernel(); | 2313 | unlock_flocks(); |
2290 | return result; | 2314 | return result; |
2291 | } | 2315 | } |
2292 | 2316 | ||