aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-15 19:07:40 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-15 19:07:40 -0400
commit541010e4b8921cd781ff02ae68028501457045b6 (patch)
tree58bd529d4c6e69899a0aa20afa2d7f1c23326417
parente457f790d8b05977853aa238bbc667b3bb375671 (diff)
parent5e7fc436426b1f9e106f511a049de91c82ec2c53 (diff)
Merge branch 'locks' of git://linux-nfs.org/~bfields/linux
* 'locks' of git://linux-nfs.org/~bfields/linux: nfsd: remove IS_ISMNDLCK macro Rework /proc/locks via seq_files and seq_list helpers fs/locks.c: use list_for_each_entry() instead of list_for_each() NFS: clean up explicit check for mandatory locks AFS: clean up explicit check for mandatory locks 9PFS: clean up explicit check for mandatory locks GFS2: clean up explicit check for mandatory locks Cleanup macros for distinguishing mandatory locks Documentation: move locks.txt in filesystems/ locks: add warning about mandatory locking races Documentation: move mandatory locking documentation to filesystems/ locks: Fix potential OOPS in generic_setlease() Use list_first_entry in locks_wake_up_blocks locks: fix flock_lock_file() comment Memory shortage can result in inconsistent flocks state locks: kill redundant local variable locks: reverse order of posix_locks_conflict() arguments
-rw-r--r--Documentation/00-INDEX6
-rw-r--r--Documentation/filesystems/00-INDEX4
-rw-r--r--Documentation/filesystems/locks.txt (renamed from Documentation/locks.txt)10
-rw-r--r--Documentation/filesystems/mandatory-locking.txt (renamed from Documentation/mandatory.txt)21
-rw-r--r--fs/9p/vfs_file.c2
-rw-r--r--fs/afs/flock.c3
-rw-r--r--fs/gfs2/ops_file.c4
-rw-r--r--fs/locks.c192
-rw-r--r--fs/nfs/file.c3
-rw-r--r--fs/nfsd/nfs4state.c2
-rw-r--r--fs/nfsd/vfs.c13
-rw-r--r--fs/proc/proc_misc.c19
-rw-r--r--fs/read_write.c2
-rw-r--r--include/linux/fs.h22
14 files changed, 157 insertions, 146 deletions
diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX
index 43e89b1537d9..cc10ce7dc339 100644
--- a/Documentation/00-INDEX
+++ b/Documentation/00-INDEX
@@ -145,7 +145,7 @@ fb/
145feature-removal-schedule.txt 145feature-removal-schedule.txt
146 - list of files and features that are going to be removed. 146 - list of files and features that are going to be removed.
147filesystems/ 147filesystems/
148 - directory with info on the various filesystems that Linux supports. 148 - info on the vfs and the various filesystems that Linux supports.
149firmware_class/ 149firmware_class/
150 - request_firmware() hotplug interface info. 150 - request_firmware() hotplug interface info.
151floppy.txt 151floppy.txt
@@ -230,8 +230,6 @@ local_ops.txt
230 - semantics and behavior of local atomic operations. 230 - semantics and behavior of local atomic operations.
231lockdep-design.txt 231lockdep-design.txt
232 - documentation on the runtime locking correctness validator. 232 - documentation on the runtime locking correctness validator.
233locks.txt
234 - info on file locking implementations, flock() vs. fcntl(), etc.
235logo.gif 233logo.gif
236 - full colour GIF image of Linux logo (penguin - Tux). 234 - full colour GIF image of Linux logo (penguin - Tux).
237logo.txt 235logo.txt
@@ -240,8 +238,6 @@ m68k/
240 - directory with info about Linux on Motorola 68k architecture. 238 - directory with info about Linux on Motorola 68k architecture.
241magic-number.txt 239magic-number.txt
242 - list of magic numbers used to mark/protect kernel data structures. 240 - list of magic numbers used to mark/protect kernel data structures.
243mandatory.txt
244 - info on the Linux implementation of Sys V mandatory file locking.
245mca.txt 241mca.txt
246 - info on supporting Micro Channel Architecture (e.g. PS/2) systems. 242 - info on supporting Micro Channel Architecture (e.g. PS/2) systems.
247md.txt 243md.txt
diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX
index 59db1bca7027..599593a17067 100644
--- a/Documentation/filesystems/00-INDEX
+++ b/Documentation/filesystems/00-INDEX
@@ -52,6 +52,10 @@ isofs.txt
52 - info and mount options for the ISO 9660 (CDROM) filesystem. 52 - info and mount options for the ISO 9660 (CDROM) filesystem.
53jfs.txt 53jfs.txt
54 - info and mount options for the JFS filesystem. 54 - info and mount options for the JFS filesystem.
55locks.txt
56 - info on file locking implementations, flock() vs. fcntl(), etc.
57mandatory-locking.txt
58 - info on the Linux implementation of Sys V mandatory file locking.
55ncpfs.txt 59ncpfs.txt
56 - info on Novell Netware(tm) filesystem using NCP protocol. 60 - info on Novell Netware(tm) filesystem using NCP protocol.
57ntfs.txt 61ntfs.txt
diff --git a/Documentation/locks.txt b/Documentation/filesystems/locks.txt
index e3b402ef33bd..fab857accbd6 100644
--- a/Documentation/locks.txt
+++ b/Documentation/filesystems/locks.txt
@@ -53,11 +53,11 @@ fcntl(), with all the problems that implies.
531.3 Mandatory Locking As A Mount Option 531.3 Mandatory Locking As A Mount Option
54--------------------------------------- 54---------------------------------------
55 55
56Mandatory locking, as described in 'Documentation/mandatory.txt' was prior 56Mandatory locking, as described in 'Documentation/filesystems/mandatory.txt'
57to this release a general configuration option that was valid for all 57was prior to this release a general configuration option that was valid for
58mounted filesystems. This had a number of inherent dangers, not the least 58all mounted filesystems. This had a number of inherent dangers, not the
59of which was the ability to freeze an NFS server by asking it to read a 59least of which was the ability to freeze an NFS server by asking it to read
60file for which a mandatory lock existed. 60a file for which a mandatory lock existed.
61 61
62From this release of the kernel, mandatory locking can be turned on and off 62From this release of the kernel, mandatory locking can be turned on and off
63on a per-filesystem basis, using the mount options 'mand' and 'nomand'. 63on a per-filesystem basis, using the mount options 'mand' and 'nomand'.
diff --git a/Documentation/mandatory.txt b/Documentation/filesystems/mandatory-locking.txt
index bc449d49eee5..0979d1d2ca8b 100644
--- a/Documentation/mandatory.txt
+++ b/Documentation/filesystems/mandatory-locking.txt
@@ -3,7 +3,26 @@
3 Andy Walker <andy@lysaker.kvaerner.no> 3 Andy Walker <andy@lysaker.kvaerner.no>
4 4
5 15 April 1996 5 15 April 1996
6 6 (Updated September 2007)
7
80. Why you should avoid mandatory locking
9-----------------------------------------
10
11The Linux implementation is prey to a number of difficult-to-fix race
12conditions which in practice make it not dependable:
13
14 - The write system call checks for a mandatory lock only once
15 at its start. It is therefore possible for a lock request to
16 be granted after this check but before the data is modified.
17 A process may then see file data change even while a mandatory
18 lock was held.
19 - Similarly, an exclusive lock may be granted on a file after
20 the kernel has decided to proceed with a read, but before the
21 read has actually completed, and the reading process may see
22 the file data in a state which should not have been visible
23 to it.
24 - Similar races make the claimed mutual exclusion between lock
25 and mmap similarly unreliable.
7 26
81. What is mandatory locking? 271. What is mandatory locking?
9------------------------------ 28------------------------------
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 2a40c2946d0a..716691689fd5 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -105,7 +105,7 @@ static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl)
105 P9_DPRINTK(P9_DEBUG_VFS, "filp: %p lock: %p\n", filp, fl); 105 P9_DPRINTK(P9_DEBUG_VFS, "filp: %p lock: %p\n", filp, fl);
106 106
107 /* No mandatory locks */ 107 /* No mandatory locks */
108 if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID) 108 if (__mandatory_lock(inode))
109 return -ENOLCK; 109 return -ENOLCK;
110 110
111 if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->fl_type != F_UNLCK) { 111 if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->fl_type != F_UNLCK) {
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index af6952e39a18..210acafe4a9b 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -524,8 +524,7 @@ int afs_lock(struct file *file, int cmd, struct file_lock *fl)
524 (long long) fl->fl_start, (long long) fl->fl_end); 524 (long long) fl->fl_start, (long long) fl->fl_end);
525 525
526 /* AFS doesn't support mandatory locks */ 526 /* AFS doesn't support mandatory locks */
527 if ((vnode->vfs_inode.i_mode & (S_ISGID | S_IXGRP)) == S_ISGID && 527 if (__mandatory_lock(&vnode->vfs_inode) && fl->fl_type != F_UNLCK)
528 fl->fl_type != F_UNLCK)
529 return -ENOLCK; 528 return -ENOLCK;
530 529
531 if (IS_GETLK(cmd)) 530 if (IS_GETLK(cmd))
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index 46a9e10ff17b..7eb4b280ac66 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -535,7 +535,7 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl)
535 535
536 if (!(fl->fl_flags & FL_POSIX)) 536 if (!(fl->fl_flags & FL_POSIX))
537 return -ENOLCK; 537 return -ENOLCK;
538 if ((ip->i_inode.i_mode & (S_ISGID | S_IXGRP)) == S_ISGID) 538 if (__mandatory_lock(&ip->i_inode))
539 return -ENOLCK; 539 return -ENOLCK;
540 540
541 if (sdp->sd_args.ar_localflocks) { 541 if (sdp->sd_args.ar_localflocks) {
@@ -636,7 +636,7 @@ static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl)
636 636
637 if (!(fl->fl_flags & FL_FLOCK)) 637 if (!(fl->fl_flags & FL_FLOCK))
638 return -ENOLCK; 638 return -ENOLCK;
639 if ((ip->i_inode.i_mode & (S_ISGID | S_IXGRP)) == S_ISGID) 639 if (__mandatory_lock(&ip->i_inode))
640 return -ENOLCK; 640 return -ENOLCK;
641 641
642 if (sdp->sd_args.ar_localflocks) 642 if (sdp->sd_args.ar_localflocks)
diff --git a/fs/locks.c b/fs/locks.c
index c795eaaf6c4c..7f9a3ea47418 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -534,7 +534,9 @@ static void locks_insert_block(struct file_lock *blocker,
534static void locks_wake_up_blocks(struct file_lock *blocker) 534static void locks_wake_up_blocks(struct file_lock *blocker)
535{ 535{
536 while (!list_empty(&blocker->fl_block)) { 536 while (!list_empty(&blocker->fl_block)) {
537 struct file_lock *waiter = list_entry(blocker->fl_block.next, 537 struct file_lock *waiter;
538
539 waiter = list_first_entry(&blocker->fl_block,
538 struct file_lock, fl_block); 540 struct file_lock, fl_block);
539 __locks_delete_block(waiter); 541 __locks_delete_block(waiter);
540 if (waiter->fl_lmops && waiter->fl_lmops->fl_notify) 542 if (waiter->fl_lmops && waiter->fl_lmops->fl_notify)
@@ -668,7 +670,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
668 for (cfl = filp->f_path.dentry->d_inode->i_flock; cfl; cfl = cfl->fl_next) { 670 for (cfl = filp->f_path.dentry->d_inode->i_flock; cfl; cfl = cfl->fl_next) {
669 if (!IS_POSIX(cfl)) 671 if (!IS_POSIX(cfl))
670 continue; 672 continue;
671 if (posix_locks_conflict(cfl, fl)) 673 if (posix_locks_conflict(fl, cfl))
672 break; 674 break;
673 } 675 }
674 if (cfl) 676 if (cfl)
@@ -698,13 +700,12 @@ EXPORT_SYMBOL(posix_test_lock);
698static int posix_locks_deadlock(struct file_lock *caller_fl, 700static int posix_locks_deadlock(struct file_lock *caller_fl,
699 struct file_lock *block_fl) 701 struct file_lock *block_fl)
700{ 702{
701 struct list_head *tmp; 703 struct file_lock *fl;
702 704
703next_task: 705next_task:
704 if (posix_same_owner(caller_fl, block_fl)) 706 if (posix_same_owner(caller_fl, block_fl))
705 return 1; 707 return 1;
706 list_for_each(tmp, &blocked_list) { 708 list_for_each_entry(fl, &blocked_list, fl_link) {
707 struct file_lock *fl = list_entry(tmp, struct file_lock, fl_link);
708 if (posix_same_owner(fl, block_fl)) { 709 if (posix_same_owner(fl, block_fl)) {
709 fl = fl->fl_next; 710 fl = fl->fl_next;
710 block_fl = fl; 711 block_fl = fl;
@@ -715,8 +716,7 @@ next_task:
715} 716}
716 717
717/* Try to create a FLOCK lock on filp. We always insert new FLOCK locks 718/* Try to create a FLOCK lock on filp. We always insert new FLOCK locks
718 * at the head of the list, but that's secret knowledge known only to 719 * after any leases, but before any posix locks.
719 * flock_lock_file and posix_lock_file.
720 * 720 *
721 * Note that if called with an FL_EXISTS argument, the caller may determine 721 * Note that if called with an FL_EXISTS argument, the caller may determine
722 * whether or not a lock was successfully freed by testing the return 722 * whether or not a lock was successfully freed by testing the return
@@ -733,6 +733,15 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
733 lock_kernel(); 733 lock_kernel();
734 if (request->fl_flags & FL_ACCESS) 734 if (request->fl_flags & FL_ACCESS)
735 goto find_conflict; 735 goto find_conflict;
736
737 if (request->fl_type != F_UNLCK) {
738 error = -ENOMEM;
739 new_fl = locks_alloc_lock();
740 if (new_fl == NULL)
741 goto out;
742 error = 0;
743 }
744
736 for_each_lock(inode, before) { 745 for_each_lock(inode, before) {
737 struct file_lock *fl = *before; 746 struct file_lock *fl = *before;
738 if (IS_POSIX(fl)) 747 if (IS_POSIX(fl))
@@ -754,10 +763,6 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
754 goto out; 763 goto out;
755 } 764 }
756 765
757 error = -ENOMEM;
758 new_fl = locks_alloc_lock();
759 if (new_fl == NULL)
760 goto out;
761 /* 766 /*
762 * If a higher-priority process was blocked on the old file lock, 767 * If a higher-priority process was blocked on the old file lock,
763 * give it the opportunity to lock the file. 768 * give it the opportunity to lock the file.
@@ -819,7 +824,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
819 lock_kernel(); 824 lock_kernel();
820 if (request->fl_type != F_UNLCK) { 825 if (request->fl_type != F_UNLCK) {
821 for_each_lock(inode, before) { 826 for_each_lock(inode, before) {
822 struct file_lock *fl = *before; 827 fl = *before;
823 if (!IS_POSIX(fl)) 828 if (!IS_POSIX(fl))
824 continue; 829 continue;
825 if (!posix_locks_conflict(request, fl)) 830 if (!posix_locks_conflict(request, fl))
@@ -1113,7 +1118,7 @@ int locks_mandatory_area(int read_write, struct inode *inode,
1113 * If we've been sleeping someone might have 1118 * If we've been sleeping someone might have
1114 * changed the permissions behind our back. 1119 * changed the permissions behind our back.
1115 */ 1120 */
1116 if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID) 1121 if (__mandatory_lock(inode))
1117 continue; 1122 continue;
1118 } 1123 }
1119 1124
@@ -1337,6 +1342,7 @@ int fcntl_getlease(struct file *filp)
1337int generic_setlease(struct file *filp, long arg, struct file_lock **flp) 1342int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
1338{ 1343{
1339 struct file_lock *fl, **before, **my_before = NULL, *lease; 1344 struct file_lock *fl, **before, **my_before = NULL, *lease;
1345 struct file_lock *new_fl = NULL;
1340 struct dentry *dentry = filp->f_path.dentry; 1346 struct dentry *dentry = filp->f_path.dentry;
1341 struct inode *inode = dentry->d_inode; 1347 struct inode *inode = dentry->d_inode;
1342 int error, rdlease_count = 0, wrlease_count = 0; 1348 int error, rdlease_count = 0, wrlease_count = 0;
@@ -1363,6 +1369,11 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
1363 || (atomic_read(&inode->i_count) > 1))) 1369 || (atomic_read(&inode->i_count) > 1)))
1364 goto out; 1370 goto out;
1365 1371
1372 error = -ENOMEM;
1373 new_fl = locks_alloc_lock();
1374 if (new_fl == NULL)
1375 goto out;
1376
1366 /* 1377 /*
1367 * At this point, we know that if there is an exclusive 1378 * At this point, we know that if there is an exclusive
1368 * lease on this file, then we hold it on this filp 1379 * lease on this file, then we hold it on this filp
@@ -1405,18 +1416,15 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
1405 if (!leases_enable) 1416 if (!leases_enable)
1406 goto out; 1417 goto out;
1407 1418
1408 error = -ENOMEM; 1419 locks_copy_lock(new_fl, lease);
1409 fl = locks_alloc_lock(); 1420 locks_insert_lock(before, new_fl);
1410 if (fl == NULL)
1411 goto out;
1412
1413 locks_copy_lock(fl, lease);
1414 1421
1415 locks_insert_lock(before, fl); 1422 *flp = new_fl;
1423 return 0;
1416 1424
1417 *flp = fl;
1418 error = 0;
1419out: 1425out:
1426 if (new_fl != NULL)
1427 locks_free_lock(new_fl);
1420 return error; 1428 return error;
1421} 1429}
1422EXPORT_SYMBOL(generic_setlease); 1430EXPORT_SYMBOL(generic_setlease);
@@ -1752,9 +1760,7 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
1752 /* Don't allow mandatory locks on files that may be memory mapped 1760 /* Don't allow mandatory locks on files that may be memory mapped
1753 * and shared. 1761 * and shared.
1754 */ 1762 */
1755 if (IS_MANDLOCK(inode) && 1763 if (mandatory_lock(inode) && mapping_writably_mapped(filp->f_mapping)) {
1756 (inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID &&
1757 mapping_writably_mapped(filp->f_mapping)) {
1758 error = -EAGAIN; 1764 error = -EAGAIN;
1759 goto out; 1765 goto out;
1760 } 1766 }
@@ -1878,9 +1884,7 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
1878 /* Don't allow mandatory locks on files that may be memory mapped 1884 /* Don't allow mandatory locks on files that may be memory mapped
1879 * and shared. 1885 * and shared.
1880 */ 1886 */
1881 if (IS_MANDLOCK(inode) && 1887 if (mandatory_lock(inode) && mapping_writably_mapped(filp->f_mapping)) {
1882 (inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID &&
1883 mapping_writably_mapped(filp->f_mapping)) {
1884 error = -EAGAIN; 1888 error = -EAGAIN;
1885 goto out; 1889 goto out;
1886 } 1890 }
@@ -2062,138 +2066,114 @@ int vfs_cancel_lock(struct file *filp, struct file_lock *fl)
2062 2066
2063EXPORT_SYMBOL_GPL(vfs_cancel_lock); 2067EXPORT_SYMBOL_GPL(vfs_cancel_lock);
2064 2068
2065static void lock_get_status(char* out, struct file_lock *fl, int id, char *pfx) 2069#ifdef CONFIG_PROC_FS
2070#include <linux/seq_file.h>
2071
2072static void lock_get_status(struct seq_file *f, struct file_lock *fl,
2073 int id, char *pfx)
2066{ 2074{
2067 struct inode *inode = NULL; 2075 struct inode *inode = NULL;
2068 2076
2069 if (fl->fl_file != NULL) 2077 if (fl->fl_file != NULL)
2070 inode = fl->fl_file->f_path.dentry->d_inode; 2078 inode = fl->fl_file->f_path.dentry->d_inode;
2071 2079
2072 out += sprintf(out, "%d:%s ", id, pfx); 2080 seq_printf(f, "%d:%s ", id, pfx);
2073 if (IS_POSIX(fl)) { 2081 if (IS_POSIX(fl)) {
2074 out += sprintf(out, "%6s %s ", 2082 seq_printf(f, "%6s %s ",
2075 (fl->fl_flags & FL_ACCESS) ? "ACCESS" : "POSIX ", 2083 (fl->fl_flags & FL_ACCESS) ? "ACCESS" : "POSIX ",
2076 (inode == NULL) ? "*NOINODE*" : 2084 (inode == NULL) ? "*NOINODE*" :
2077 (IS_MANDLOCK(inode) && 2085 mandatory_lock(inode) ? "MANDATORY" : "ADVISORY ");
2078 (inode->i_mode & (S_IXGRP | S_ISGID)) == S_ISGID) ?
2079 "MANDATORY" : "ADVISORY ");
2080 } else if (IS_FLOCK(fl)) { 2086 } else if (IS_FLOCK(fl)) {
2081 if (fl->fl_type & LOCK_MAND) { 2087 if (fl->fl_type & LOCK_MAND) {
2082 out += sprintf(out, "FLOCK MSNFS "); 2088 seq_printf(f, "FLOCK MSNFS ");
2083 } else { 2089 } else {
2084 out += sprintf(out, "FLOCK ADVISORY "); 2090 seq_printf(f, "FLOCK ADVISORY ");
2085 } 2091 }
2086 } else if (IS_LEASE(fl)) { 2092 } else if (IS_LEASE(fl)) {
2087 out += sprintf(out, "LEASE "); 2093 seq_printf(f, "LEASE ");
2088 if (fl->fl_type & F_INPROGRESS) 2094 if (fl->fl_type & F_INPROGRESS)
2089 out += sprintf(out, "BREAKING "); 2095 seq_printf(f, "BREAKING ");
2090 else if (fl->fl_file) 2096 else if (fl->fl_file)
2091 out += sprintf(out, "ACTIVE "); 2097 seq_printf(f, "ACTIVE ");
2092 else 2098 else
2093 out += sprintf(out, "BREAKER "); 2099 seq_printf(f, "BREAKER ");
2094 } else { 2100 } else {
2095 out += sprintf(out, "UNKNOWN UNKNOWN "); 2101 seq_printf(f, "UNKNOWN UNKNOWN ");
2096 } 2102 }
2097 if (fl->fl_type & LOCK_MAND) { 2103 if (fl->fl_type & LOCK_MAND) {
2098 out += sprintf(out, "%s ", 2104 seq_printf(f, "%s ",
2099 (fl->fl_type & LOCK_READ) 2105 (fl->fl_type & LOCK_READ)
2100 ? (fl->fl_type & LOCK_WRITE) ? "RW " : "READ " 2106 ? (fl->fl_type & LOCK_WRITE) ? "RW " : "READ "
2101 : (fl->fl_type & LOCK_WRITE) ? "WRITE" : "NONE "); 2107 : (fl->fl_type & LOCK_WRITE) ? "WRITE" : "NONE ");
2102 } else { 2108 } else {
2103 out += sprintf(out, "%s ", 2109 seq_printf(f, "%s ",
2104 (fl->fl_type & F_INPROGRESS) 2110 (fl->fl_type & F_INPROGRESS)
2105 ? (fl->fl_type & F_UNLCK) ? "UNLCK" : "READ " 2111 ? (fl->fl_type & F_UNLCK) ? "UNLCK" : "READ "
2106 : (fl->fl_type & F_WRLCK) ? "WRITE" : "READ "); 2112 : (fl->fl_type & F_WRLCK) ? "WRITE" : "READ ");
2107 } 2113 }
2108 if (inode) { 2114 if (inode) {
2109#ifdef WE_CAN_BREAK_LSLK_NOW 2115#ifdef WE_CAN_BREAK_LSLK_NOW
2110 out += sprintf(out, "%d %s:%ld ", fl->fl_pid, 2116 seq_printf(f, "%d %s:%ld ", fl->fl_pid,
2111 inode->i_sb->s_id, inode->i_ino); 2117 inode->i_sb->s_id, inode->i_ino);
2112#else 2118#else
2113 /* userspace relies on this representation of dev_t ;-( */ 2119 /* userspace relies on this representation of dev_t ;-( */
2114 out += sprintf(out, "%d %02x:%02x:%ld ", fl->fl_pid, 2120 seq_printf(f, "%d %02x:%02x:%ld ", fl->fl_pid,
2115 MAJOR(inode->i_sb->s_dev), 2121 MAJOR(inode->i_sb->s_dev),
2116 MINOR(inode->i_sb->s_dev), inode->i_ino); 2122 MINOR(inode->i_sb->s_dev), inode->i_ino);
2117#endif 2123#endif
2118 } else { 2124 } else {
2119 out += sprintf(out, "%d <none>:0 ", fl->fl_pid); 2125 seq_printf(f, "%d <none>:0 ", fl->fl_pid);
2120 } 2126 }
2121 if (IS_POSIX(fl)) { 2127 if (IS_POSIX(fl)) {
2122 if (fl->fl_end == OFFSET_MAX) 2128 if (fl->fl_end == OFFSET_MAX)
2123 out += sprintf(out, "%Ld EOF\n", fl->fl_start); 2129 seq_printf(f, "%Ld EOF\n", fl->fl_start);
2124 else 2130 else
2125 out += sprintf(out, "%Ld %Ld\n", fl->fl_start, 2131 seq_printf(f, "%Ld %Ld\n", fl->fl_start, fl->fl_end);
2126 fl->fl_end);
2127 } else { 2132 } else {
2128 out += sprintf(out, "0 EOF\n"); 2133 seq_printf(f, "0 EOF\n");
2129 } 2134 }
2130} 2135}
2131 2136
2132static void move_lock_status(char **p, off_t* pos, off_t offset) 2137static int locks_show(struct seq_file *f, void *v)
2133{ 2138{
2134 int len; 2139 struct file_lock *fl, *bfl;
2135 len = strlen(*p); 2140
2136 if(*pos >= offset) { 2141 fl = list_entry(v, struct file_lock, fl_link);
2137 /* the complete line is valid */ 2142
2138 *p += len; 2143 lock_get_status(f, fl, (long)f->private, "");
2139 *pos += len; 2144
2140 return; 2145 list_for_each_entry(bfl, &fl->fl_block, fl_block)
2141 } 2146 lock_get_status(f, bfl, (long)f->private, " ->");
2142 if(*pos+len > offset) { 2147
2143 /* use the second part of the line */ 2148 f->private++;
2144 int i = offset-*pos; 2149 return 0;
2145 memmove(*p,*p+i,len-i);
2146 *p += len-i;
2147 *pos += len;
2148 return;
2149 }
2150 /* discard the complete line */
2151 *pos += len;
2152} 2150}
2153 2151
2154/** 2152static void *locks_start(struct seq_file *f, loff_t *pos)
2155 * get_locks_status - reports lock usage in /proc/locks 2153{
2156 * @buffer: address in userspace to write into 2154 lock_kernel();
2157 * @start: ? 2155 f->private = (void *)1;
2158 * @offset: how far we are through the buffer 2156 return seq_list_start(&file_lock_list, *pos);
2159 * @length: how much to read 2157}
2160 */
2161 2158
2162int get_locks_status(char *buffer, char **start, off_t offset, int length) 2159static void *locks_next(struct seq_file *f, void *v, loff_t *pos)
2163{ 2160{
2164 struct list_head *tmp; 2161 return seq_list_next(v, &file_lock_list, pos);
2165 char *q = buffer; 2162}
2166 off_t pos = 0;
2167 int i = 0;
2168 2163
2169 lock_kernel(); 2164static void locks_stop(struct seq_file *f, void *v)
2170 list_for_each(tmp, &file_lock_list) { 2165{
2171 struct list_head *btmp;
2172 struct file_lock *fl = list_entry(tmp, struct file_lock, fl_link);
2173 lock_get_status(q, fl, ++i, "");
2174 move_lock_status(&q, &pos, offset);
2175
2176 if(pos >= offset+length)
2177 goto done;
2178
2179 list_for_each(btmp, &fl->fl_block) {
2180 struct file_lock *bfl = list_entry(btmp,
2181 struct file_lock, fl_block);
2182 lock_get_status(q, bfl, i, " ->");
2183 move_lock_status(&q, &pos, offset);
2184
2185 if(pos >= offset+length)
2186 goto done;
2187 }
2188 }
2189done:
2190 unlock_kernel(); 2166 unlock_kernel();
2191 *start = buffer;
2192 if(q-buffer < length)
2193 return (q-buffer);
2194 return length;
2195} 2167}
2196 2168
2169struct seq_operations locks_seq_operations = {
2170 .start = locks_start,
2171 .next = locks_next,
2172 .stop = locks_stop,
2173 .show = locks_show,
2174};
2175#endif
2176
2197/** 2177/**
2198 * lock_may_read - checks that the region is free of locks 2178 * lock_may_read - checks that the region is free of locks
2199 * @inode: the inode that is being read 2179 * @inode: the inode that is being read
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index c664bb921425..08c7c7387fce 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -577,8 +577,7 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
577 nfs_inc_stats(inode, NFSIOS_VFSLOCK); 577 nfs_inc_stats(inode, NFSIOS_VFSLOCK);
578 578
579 /* No mandatory locks over NFS */ 579 /* No mandatory locks over NFS */
580 if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID && 580 if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK)
581 fl->fl_type != F_UNLCK)
582 return -ENOLCK; 581 return -ENOLCK;
583 582
584 if (IS_GETLK(cmd)) 583 if (IS_GETLK(cmd))
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 6f182d25793d..31673cd251c3 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2035,7 +2035,7 @@ static inline int
2035io_during_grace_disallowed(struct inode *inode, int flags) 2035io_during_grace_disallowed(struct inode *inode, int flags)
2036{ 2036{
2037 return nfs4_in_grace() && (flags & (RD_STATE | WR_STATE)) 2037 return nfs4_in_grace() && (flags & (RD_STATE | WR_STATE))
2038 && MANDATORY_LOCK(inode); 2038 && mandatory_lock(inode);
2039} 2039}
2040 2040
2041/* 2041/*
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index cec78c82b1f9..2a8d665b134b 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -61,12 +61,6 @@
61#define NFSDDBG_FACILITY NFSDDBG_FILEOP 61#define NFSDDBG_FACILITY NFSDDBG_FILEOP
62 62
63 63
64/* We must ignore files (but only files) which might have mandatory
65 * locks on them because there is no way to know if the accesser has
66 * the lock.
67 */
68#define IS_ISMNDLK(i) (S_ISREG((i)->i_mode) && MANDATORY_LOCK(i))
69
70/* 64/*
71 * This is a cache of readahead params that help us choose the proper 65 * This is a cache of readahead params that help us choose the proper
72 * readahead strategy. Initially, we set all readahead parameters to 0 66 * readahead strategy. Initially, we set all readahead parameters to 0
@@ -689,7 +683,12 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
689 err = nfserr_perm; 683 err = nfserr_perm;
690 if (IS_APPEND(inode) && (access & MAY_WRITE)) 684 if (IS_APPEND(inode) && (access & MAY_WRITE))
691 goto out; 685 goto out;
692 if (IS_ISMNDLK(inode)) 686 /*
687 * We must ignore files (but only files) which might have mandatory
688 * locks on them because there is no way to know if the accesser has
689 * the lock.
690 */
691 if (S_ISREG((inode)->i_mode) && mandatory_lock(inode))
693 goto out; 692 goto out;
694 693
695 if (!inode->i_fop) 694 if (!inode->i_fop)
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index b872a01ad3af..0071939c0095 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -66,7 +66,6 @@ extern int get_stram_list(char *);
66extern int get_filesystem_list(char *); 66extern int get_filesystem_list(char *);
67extern int get_exec_domain_list(char *); 67extern int get_exec_domain_list(char *);
68extern int get_dma_list(char *); 68extern int get_dma_list(char *);
69extern int get_locks_status (char *, char **, off_t, int);
70 69
71static int proc_calc_metrics(char *page, char **start, off_t off, 70static int proc_calc_metrics(char *page, char **start, off_t off,
72 int count, int *eof, int len) 71 int count, int *eof, int len)
@@ -624,16 +623,18 @@ static int cmdline_read_proc(char *page, char **start, off_t off,
624 return proc_calc_metrics(page, start, off, count, eof, len); 623 return proc_calc_metrics(page, start, off, count, eof, len);
625} 624}
626 625
627static int locks_read_proc(char *page, char **start, off_t off, 626static int locks_open(struct inode *inode, struct file *filp)
628 int count, int *eof, void *data)
629{ 627{
630 int len = get_locks_status(page, start, off, count); 628 return seq_open(filp, &locks_seq_operations);
631
632 if (len < count)
633 *eof = 1;
634 return len;
635} 629}
636 630
631static const struct file_operations proc_locks_operations = {
632 .open = locks_open,
633 .read = seq_read,
634 .llseek = seq_lseek,
635 .release = seq_release,
636};
637
637static int execdomains_read_proc(char *page, char **start, off_t off, 638static int execdomains_read_proc(char *page, char **start, off_t off,
638 int count, int *eof, void *data) 639 int count, int *eof, void *data)
639{ 640{
@@ -691,7 +692,6 @@ void __init proc_misc_init(void)
691#endif 692#endif
692 {"filesystems", filesystems_read_proc}, 693 {"filesystems", filesystems_read_proc},
693 {"cmdline", cmdline_read_proc}, 694 {"cmdline", cmdline_read_proc},
694 {"locks", locks_read_proc},
695 {"execdomains", execdomains_read_proc}, 695 {"execdomains", execdomains_read_proc},
696 {NULL,} 696 {NULL,}
697 }; 697 };
@@ -709,6 +709,7 @@ void __init proc_misc_init(void)
709 entry->proc_fops = &proc_kmsg_operations; 709 entry->proc_fops = &proc_kmsg_operations;
710 } 710 }
711#endif 711#endif
712 create_seq_entry("locks", 0, &proc_locks_operations);
712 create_seq_entry("devices", 0, &proc_devinfo_operations); 713 create_seq_entry("devices", 0, &proc_devinfo_operations);
713 create_seq_entry("cpuinfo", 0, &proc_cpuinfo_operations); 714 create_seq_entry("cpuinfo", 0, &proc_cpuinfo_operations);
714#ifdef CONFIG_BLOCK 715#ifdef CONFIG_BLOCK
diff --git a/fs/read_write.c b/fs/read_write.c
index 507ddff48a9a..124693e8d3fa 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -205,7 +205,7 @@ int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count
205 if (unlikely((pos < 0) || (loff_t) (pos + count) < 0)) 205 if (unlikely((pos < 0) || (loff_t) (pos + count) < 0))
206 goto Einval; 206 goto Einval;
207 207
208 if (unlikely(inode->i_flock && MANDATORY_LOCK(inode))) { 208 if (unlikely(inode->i_flock && mandatory_lock(inode))) {
209 int retval = locks_mandatory_area( 209 int retval = locks_mandatory_area(
210 read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE, 210 read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE,
211 inode, file, pos, count); 211 inode, file, pos, count);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6d760f1ad875..4a6a21077bae 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -883,6 +883,7 @@ extern int vfs_setlease(struct file *, long, struct file_lock **);
883extern int lease_modify(struct file_lock **, int); 883extern int lease_modify(struct file_lock **, int);
884extern int lock_may_read(struct inode *, loff_t start, unsigned long count); 884extern int lock_may_read(struct inode *, loff_t start, unsigned long count);
885extern int lock_may_write(struct inode *, loff_t start, unsigned long count); 885extern int lock_may_write(struct inode *, loff_t start, unsigned long count);
886extern struct seq_operations locks_seq_operations;
886 887
887struct fasync_struct { 888struct fasync_struct {
888 int magic; 889 int magic;
@@ -1375,12 +1376,25 @@ extern int locks_mandatory_area(int, struct inode *, struct file *, loff_t, size
1375 * Candidates for mandatory locking have the setgid bit set 1376 * Candidates for mandatory locking have the setgid bit set
1376 * but no group execute bit - an otherwise meaningless combination. 1377 * but no group execute bit - an otherwise meaningless combination.
1377 */ 1378 */
1378#define MANDATORY_LOCK(inode) \ 1379
1379 (IS_MANDLOCK(inode) && ((inode)->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID) 1380static inline int __mandatory_lock(struct inode *ino)
1381{
1382 return (ino->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID;
1383}
1384
1385/*
1386 * ... and these candidates should be on MS_MANDLOCK mounted fs,
1387 * otherwise these will be advisory locks
1388 */
1389
1390static inline int mandatory_lock(struct inode *ino)
1391{
1392 return IS_MANDLOCK(ino) && __mandatory_lock(ino);
1393}
1380 1394
1381static inline int locks_verify_locked(struct inode *inode) 1395static inline int locks_verify_locked(struct inode *inode)
1382{ 1396{
1383 if (MANDATORY_LOCK(inode)) 1397 if (mandatory_lock(inode))
1384 return locks_mandatory_locked(inode); 1398 return locks_mandatory_locked(inode);
1385 return 0; 1399 return 0;
1386} 1400}
@@ -1391,7 +1405,7 @@ static inline int locks_verify_truncate(struct inode *inode,
1391 struct file *filp, 1405 struct file *filp,
1392 loff_t size) 1406 loff_t size)
1393{ 1407{
1394 if (inode->i_flock && MANDATORY_LOCK(inode)) 1408 if (inode->i_flock && mandatory_lock(inode))
1395 return locks_mandatory_area( 1409 return locks_mandatory_area(
1396 FLOCK_VERIFY_WRITE, inode, filp, 1410 FLOCK_VERIFY_WRITE, inode, filp,
1397 size < inode->i_size ? size : inode->i_size, 1411 size < inode->i_size ? size : inode->i_size,