aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Staubach <staubach@redhat.com>2005-07-27 14:45:09 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2005-07-27 19:26:06 -0400
commitc293621bbf678a3d85e3ed721c3921c8a670610d (patch)
treec72fc522cf3fd2d12f7fd716b7eb4db8d7fcaf52
parent3e5ea098446e19175fdee4c2c4ec9366b0217db4 (diff)
[PATCH] stale POSIX lock handling
I believe that there is a problem with the handling of POSIX locks, which the attached patch should address. The problem appears to be a race between fcntl(2) and close(2). A multithreaded application could close a file descriptor at the same time as it is trying to acquire a lock using the same file descriptor. I would suggest that that multithreaded application is not providing the proper synchronization for itself, but the OS should still behave correctly. SUS3 (Single UNIX Specification Version 3, read: POSIX) indicates that when a file descriptor is closed, that all POSIX locks on the file, owned by the process which closed the file descriptor, should be released. The trick here is when those locks are released. The current code releases all locks which exist when close is processing, but any locks in progress are handled when the last reference to the open file is released. There are three cases to consider. One is the simple case, a multithreaded (mt) process has a file open and races to close it and acquire a lock on it. In this case, the close will release one reference to the open file and when the fcntl is done, it will release the other reference. For this situation, no locks should exist on the file when both the close and fcntl operations are done. The current system will handle this case because the last reference to the open file is being released. The second case is when the mt process has dup(2)'d the file descriptor. The close will release one reference to the file and the fcntl, when done, will release another, but there will still be at least one more reference to the open file. One could argue that the existence of a lock on the file after the close has completed is okay, because it was acquired after the close operation and there is still a way for the application to release the lock on the file, using an existing file descriptor. The third case is when the mt process has forked, after opening the file and either before or after becoming an mt process. In this case, each process would hold a reference to the open file. For each process, this degenerates to first case above. However, the lock continues to exist until both processes have released their references to the open file. This lock could block other lock requests. The changes to release the lock when the last reference to the open file aren't quite right because they would allow the lock to exist as long as there was a reference to the open file. This is too long. The new proposed solution is to add support in the fcntl code path to detect a race with close and then to release the lock which was just acquired when such as race is detected. This causes locks to be released in a timely fashion and for the system to conform to the POSIX semantic specification. This was tested by instrumenting a kernel to detect the handling locks and then running a program which generates case #3 above. A dangling lock could be reliably generated. When the changes to detect the close/fcntl race were added, a dangling lock could no longer be generated. Cc: Matthew Wilcox <willy@debian.org> Cc: Trond Myklebust <trond.myklebust@fys.uio.no> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--fs/fcntl.c5
-rw-r--r--fs/locks.c81
-rw-r--r--include/linux/fs.h6
3 files changed, 55 insertions, 37 deletions
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 286a9f8f3d49..6fbc9d8fcc36 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -288,7 +288,7 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
288 break; 288 break;
289 case F_SETLK: 289 case F_SETLK:
290 case F_SETLKW: 290 case F_SETLKW:
291 err = fcntl_setlk(filp, cmd, (struct flock __user *) arg); 291 err = fcntl_setlk(fd, filp, cmd, (struct flock __user *) arg);
292 break; 292 break;
293 case F_GETOWN: 293 case F_GETOWN:
294 /* 294 /*
@@ -376,7 +376,8 @@ asmlinkage long sys_fcntl64(unsigned int fd, unsigned int cmd, unsigned long arg
376 break; 376 break;
377 case F_SETLK64: 377 case F_SETLK64:
378 case F_SETLKW64: 378 case F_SETLKW64:
379 err = fcntl_setlk64(filp, cmd, (struct flock64 __user *) arg); 379 err = fcntl_setlk64(fd, filp, cmd,
380 (struct flock64 __user *) arg);
380 break; 381 break;
381 default: 382 default:
382 err = do_fcntl(fd, cmd, arg, filp); 383 err = do_fcntl(fd, cmd, arg, filp);
diff --git a/fs/locks.c b/fs/locks.c
index 29fa5da6c117..11956b6179ff 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1591,7 +1591,8 @@ out:
1591/* Apply the lock described by l to an open file descriptor. 1591/* Apply the lock described by l to an open file descriptor.
1592 * This implements both the F_SETLK and F_SETLKW commands of fcntl(). 1592 * This implements both the F_SETLK and F_SETLKW commands of fcntl().
1593 */ 1593 */
1594int fcntl_setlk(struct file *filp, unsigned int cmd, struct flock __user *l) 1594int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
1595 struct flock __user *l)
1595{ 1596{
1596 struct file_lock *file_lock = locks_alloc_lock(); 1597 struct file_lock *file_lock = locks_alloc_lock();
1597 struct flock flock; 1598 struct flock flock;
@@ -1620,6 +1621,7 @@ int fcntl_setlk(struct file *filp, unsigned int cmd, struct flock __user *l)
1620 goto out; 1621 goto out;
1621 } 1622 }
1622 1623
1624again:
1623 error = flock_to_posix_lock(filp, file_lock, &flock); 1625 error = flock_to_posix_lock(filp, file_lock, &flock);
1624 if (error) 1626 if (error)
1625 goto out; 1627 goto out;
@@ -1648,25 +1650,33 @@ int fcntl_setlk(struct file *filp, unsigned int cmd, struct flock __user *l)
1648 if (error) 1650 if (error)
1649 goto out; 1651 goto out;
1650 1652
1651 if (filp->f_op && filp->f_op->lock != NULL) { 1653 if (filp->f_op && filp->f_op->lock != NULL)
1652 error = filp->f_op->lock(filp, cmd, file_lock); 1654 error = filp->f_op->lock(filp, cmd, file_lock);
1653 goto out; 1655 else {
1654 } 1656 for (;;) {
1657 error = __posix_lock_file(inode, file_lock);
1658 if ((error != -EAGAIN) || (cmd == F_SETLK))
1659 break;
1660 error = wait_event_interruptible(file_lock->fl_wait,
1661 !file_lock->fl_next);
1662 if (!error)
1663 continue;
1655 1664
1656 for (;;) { 1665 locks_delete_block(file_lock);
1657 error = __posix_lock_file(inode, file_lock);
1658 if ((error != -EAGAIN) || (cmd == F_SETLK))
1659 break; 1666 break;
1660 error = wait_event_interruptible(file_lock->fl_wait, 1667 }
1661 !file_lock->fl_next); 1668 }
1662 if (!error)
1663 continue;
1664 1669
1665 locks_delete_block(file_lock); 1670 /*
1666 break; 1671 * Attempt to detect a close/fcntl race and recover by
1672 * releasing the lock that was just acquired.
1673 */
1674 if (!error && fcheck(fd) != filp && flock.l_type != F_UNLCK) {
1675 flock.l_type = F_UNLCK;
1676 goto again;
1667 } 1677 }
1668 1678
1669 out: 1679out:
1670 locks_free_lock(file_lock); 1680 locks_free_lock(file_lock);
1671 return error; 1681 return error;
1672} 1682}
@@ -1724,7 +1734,8 @@ out:
1724/* Apply the lock described by l to an open file descriptor. 1734/* Apply the lock described by l to an open file descriptor.
1725 * This implements both the F_SETLK and F_SETLKW commands of fcntl(). 1735 * This implements both the F_SETLK and F_SETLKW commands of fcntl().
1726 */ 1736 */
1727int fcntl_setlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l) 1737int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
1738 struct flock64 __user *l)
1728{ 1739{
1729 struct file_lock *file_lock = locks_alloc_lock(); 1740 struct file_lock *file_lock = locks_alloc_lock();
1730 struct flock64 flock; 1741 struct flock64 flock;
@@ -1753,6 +1764,7 @@ int fcntl_setlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l)
1753 goto out; 1764 goto out;
1754 } 1765 }
1755 1766
1767again:
1756 error = flock64_to_posix_lock(filp, file_lock, &flock); 1768 error = flock64_to_posix_lock(filp, file_lock, &flock);
1757 if (error) 1769 if (error)
1758 goto out; 1770 goto out;
@@ -1781,22 +1793,30 @@ int fcntl_setlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l)
1781 if (error) 1793 if (error)
1782 goto out; 1794 goto out;
1783 1795
1784 if (filp->f_op && filp->f_op->lock != NULL) { 1796 if (filp->f_op && filp->f_op->lock != NULL)
1785 error = filp->f_op->lock(filp, cmd, file_lock); 1797 error = filp->f_op->lock(filp, cmd, file_lock);
1786 goto out; 1798 else {
1787 } 1799 for (;;) {
1800 error = __posix_lock_file(inode, file_lock);
1801 if ((error != -EAGAIN) || (cmd == F_SETLK64))
1802 break;
1803 error = wait_event_interruptible(file_lock->fl_wait,
1804 !file_lock->fl_next);
1805 if (!error)
1806 continue;
1788 1807
1789 for (;;) { 1808 locks_delete_block(file_lock);
1790 error = __posix_lock_file(inode, file_lock);
1791 if ((error != -EAGAIN) || (cmd == F_SETLK64))
1792 break; 1809 break;
1793 error = wait_event_interruptible(file_lock->fl_wait, 1810 }
1794 !file_lock->fl_next); 1811 }
1795 if (!error)
1796 continue;
1797 1812
1798 locks_delete_block(file_lock); 1813 /*
1799 break; 1814 * Attempt to detect a close/fcntl race and recover by
1815 * releasing the lock that was just acquired.
1816 */
1817 if (!error && fcheck(fd) != filp && flock.l_type != F_UNLCK) {
1818 flock.l_type = F_UNLCK;
1819 goto again;
1800 } 1820 }
1801 1821
1802out: 1822out:
@@ -1888,12 +1908,7 @@ void locks_remove_flock(struct file *filp)
1888 1908
1889 while ((fl = *before) != NULL) { 1909 while ((fl = *before) != NULL) {
1890 if (fl->fl_file == filp) { 1910 if (fl->fl_file == filp) {
1891 /* 1911 if (IS_FLOCK(fl)) {
1892 * We might have a POSIX lock that was created at the same time
1893 * the filp was closed for the last time. Just remove that too,
1894 * regardless of ownership, since nobody can own it.
1895 */
1896 if (IS_FLOCK(fl) || IS_POSIX(fl)) {
1897 locks_delete_lock(before); 1912 locks_delete_lock(before);
1898 continue; 1913 continue;
1899 } 1914 }
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0f53e0124941..f9adf75fd9b4 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -697,11 +697,13 @@ extern struct list_head file_lock_list;
697#include <linux/fcntl.h> 697#include <linux/fcntl.h>
698 698
699extern int fcntl_getlk(struct file *, struct flock __user *); 699extern int fcntl_getlk(struct file *, struct flock __user *);
700extern int fcntl_setlk(struct file *, unsigned int, struct flock __user *); 700extern int fcntl_setlk(unsigned int, struct file *, unsigned int,
701 struct flock __user *);
701 702
702#if BITS_PER_LONG == 32 703#if BITS_PER_LONG == 32
703extern int fcntl_getlk64(struct file *, struct flock64 __user *); 704extern int fcntl_getlk64(struct file *, struct flock64 __user *);
704extern int fcntl_setlk64(struct file *, unsigned int, struct flock64 __user *); 705extern int fcntl_setlk64(unsigned int, struct file *, unsigned int,
706 struct flock64 __user *);
705#endif 707#endif
706 708
707extern void send_sigio(struct fown_struct *fown, int fd, int band); 709extern void send_sigio(struct fown_struct *fown, int fd, int band);