aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorMiklos Szeredi <miklos@szeredi.hu>2006-06-22 17:47:22 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-06-22 18:05:57 -0400
commitc89681ed7d0e4a61d35bdc12c06c6733b718b2cb (patch)
tree170d7c54d578480ba231dd690243aa21067ca253 /fs
parent09d967c6f32b35eab15b45862ae16e4f06259d8e (diff)
[PATCH] remove steal_locks()
This patch removes the steal_locks() function. steal_locks() doesn't work correctly with any filesystem that does it's own lock management, including NFS, CIFS, etc. In addition it has weird semantics on local filesystems in case tasks sharing file-descriptor tables are doing POSIX locking operations in parallel to execve(). The steal_locks() function has an effect on applications doing: clone(CLONE_FILES) /* in child */ lock execve lock POSIX locks acquired before execve (by "child", "parent" or any further task sharing files_struct) will after the execve be owned exclusively by "child". According to Chris Wright some LSB/LTP kind of suite triggers without the stealing behavior, but there's no known real-world application that would also fail. Apps using NPTL are not affected, since all other threads are killed before execve. Apps using LinuxThreads are only affected if they - have multiple threads during exec (LinuxThreads doesn't kill other threads, the app may do it with pthread_kill_other_threads_np()) - rely on POSIX locks being inherited across exec Both conditions are documented, but not their interaction. Apps using clone() natively are affected if they - use clone(CLONE_FILES) - rely on POSIX locks being inherited across exec The above scenarios are unlikely, but possible. If the patch is vetoed, there's a plan B, that involves mostly keeping the weird stealing semantics, but changing the way lock ownership is handled so that network and local filesystems work consistently. That would add more complexity though, so this solution seems to be preferred by most people. Signed-off-by: Miklos Szeredi <miklos@szeredi.hu> Cc: Trond Myklebust <trond.myklebust@fys.uio.no> Cc: Matthew Wilcox <willy@debian.org> Cc: Chris Wright <chrisw@sous-sol.org> Cc: Christoph Hellwig <hch@lst.de> Cc: Steven French <sfrench@us.ibm.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/binfmt_elf.c1
-rw-r--r--fs/binfmt_misc.c1
-rw-r--r--fs/exec.c1
-rw-r--r--fs/locks.c57
4 files changed, 0 insertions, 60 deletions
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 537893a16014..8a04216e8b4d 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -759,7 +759,6 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
759 759
760 /* Discard our unneeded old files struct */ 760 /* Discard our unneeded old files struct */
761 if (files) { 761 if (files) {
762 steal_locks(files);
763 put_files_struct(files); 762 put_files_struct(files);
764 files = NULL; 763 files = NULL;
765 } 764 }
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index d73d75591a39..599f36fd0f67 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -203,7 +203,6 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs)
203 goto _error; 203 goto _error;
204 204
205 if (files) { 205 if (files) {
206 steal_locks(files);
207 put_files_struct(files); 206 put_files_struct(files);
208 files = NULL; 207 files = NULL;
209 } 208 }
diff --git a/fs/exec.c b/fs/exec.c
index d07858c0b7c4..0b88bf646143 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -866,7 +866,6 @@ int flush_old_exec(struct linux_binprm * bprm)
866 bprm->mm = NULL; /* We're using it now */ 866 bprm->mm = NULL; /* We're using it now */
867 867
868 /* This is the point of no return */ 868 /* This is the point of no return */
869 steal_locks(files);
870 put_files_struct(files); 869 put_files_struct(files);
871 870
872 current->sas_ss_sp = current->sas_ss_size = 0; 871 current->sas_ss_sp = current->sas_ss_size = 0;
diff --git a/fs/locks.c b/fs/locks.c
index ab61a8b54829..69435c68c1ed 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -2206,63 +2206,6 @@ int lock_may_write(struct inode *inode, loff_t start, unsigned long len)
2206 2206
2207EXPORT_SYMBOL(lock_may_write); 2207EXPORT_SYMBOL(lock_may_write);
2208 2208
2209static inline void __steal_locks(struct file *file, fl_owner_t from)
2210{
2211 struct inode *inode = file->f_dentry->d_inode;
2212 struct file_lock *fl = inode->i_flock;
2213
2214 while (fl) {
2215 if (fl->fl_file == file && fl->fl_owner == from)
2216 fl->fl_owner = current->files;
2217 fl = fl->fl_next;
2218 }
2219}
2220
2221/* When getting ready for executing a binary, we make sure that current
2222 * has a files_struct on its own. Before dropping the old files_struct,
2223 * we take over ownership of all locks for all file descriptors we own.
2224 * Note that we may accidentally steal a lock for a file that a sibling
2225 * has created since the unshare_files() call.
2226 */
2227void steal_locks(fl_owner_t from)
2228{
2229 struct files_struct *files = current->files;
2230 int i, j;
2231 struct fdtable *fdt;
2232
2233 if (from == files)
2234 return;
2235
2236 lock_kernel();
2237 j = 0;
2238
2239 /*
2240 * We are not taking a ref to the file structures, so
2241 * we need to acquire ->file_lock.
2242 */
2243 spin_lock(&files->file_lock);
2244 fdt = files_fdtable(files);
2245 for (;;) {
2246 unsigned long set;
2247 i = j * __NFDBITS;
2248 if (i >= fdt->max_fdset || i >= fdt->max_fds)
2249 break;
2250 set = fdt->open_fds->fds_bits[j++];
2251 while (set) {
2252 if (set & 1) {
2253 struct file *file = fdt->fd[i];
2254 if (file)
2255 __steal_locks(file, from);
2256 }
2257 i++;
2258 set >>= 1;
2259 }
2260 }
2261 spin_unlock(&files->file_lock);
2262 unlock_kernel();
2263}
2264EXPORT_SYMBOL(steal_locks);
2265
2266static int __init filelock_init(void) 2209static int __init filelock_init(void)
2267{ 2210{
2268 filelock_cache = kmem_cache_create("file_lock_cache", 2211 filelock_cache = kmem_cache_create("file_lock_cache",