aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorJeff Layton <jlayton@redhat.com>2014-02-03 12:13:10 -0500
committerJeff Layton <jlayton@redhat.com>2014-03-31 08:24:43 -0400
commit5d50ffd7c31dab47c6b828841ca1ec70a1b40169 (patch)
tree59e96edd1c263f82012387fe7b6f290db4fb8416 /fs
parent57b65325fe34ec4c917bc4e555144b4a94d9e1f7 (diff)
locks: add new fcntl cmd values for handling file private locks
Due to some unfortunate history, POSIX locks have very strange and unhelpful semantics. The thing that usually catches people by surprise is that they are dropped whenever the process closes any file descriptor associated with the inode. This is extremely problematic for people developing file servers that need to implement byte-range locks. Developers often need a "lock management" facility to ensure that file descriptors are not closed until all of the locks associated with the inode are finished. Additionally, "classic" POSIX locks are owned by the process. Locks taken between threads within the same process won't conflict with one another, which renders them useless for synchronization between threads. This patchset adds a new type of lock that attempts to address these issues. These locks conflict with classic POSIX read/write locks, but have semantics that are more like BSD locks with respect to inheritance and behavior on close. This is implemented primarily by changing how fl_owner field is set for these locks. Instead of having them owned by the files_struct of the process, they are instead owned by the filp on which they were acquired. Thus, they are inherited across fork() and are only released when the last reference to a filp is put. These new semantics prevent them from being merged with classic POSIX locks, even if they are acquired by the same process. These locks will also conflict with classic POSIX locks even if they are acquired by the same process or on the same file descriptor. The new locks are managed using a new set of cmd values to the fcntl() syscall. The initial implementation of this converts these values to "classic" cmd values at a fairly high level, and the details are not exposed to the underlying filesystem. We may eventually want to push this handing out to the lower filesystem code but for now I don't see any need for it. Also, note that with this implementation the new cmd values are only available via fcntl64() on 32-bit arches. There's little need to add support for legacy apps on a new interface like this. Signed-off-by: Jeff Layton <jlayton@redhat.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/compat.c35
-rw-r--r--fs/fcntl.c35
-rw-r--r--fs/locks.c54
3 files changed, 104 insertions, 20 deletions
diff --git a/fs/compat.c b/fs/compat.c
index 6af20de2c1a3..f340dcf11f68 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -399,12 +399,28 @@ static int put_compat_flock64(struct flock *kfl, struct compat_flock64 __user *u
399} 399}
400#endif 400#endif
401 401
402static unsigned int
403convert_fcntl_cmd(unsigned int cmd)
404{
405 switch (cmd) {
406 case F_GETLK64:
407 return F_GETLK;
408 case F_SETLK64:
409 return F_SETLK;
410 case F_SETLKW64:
411 return F_SETLKW;
412 }
413
414 return cmd;
415}
416
402asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd, 417asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd,
403 unsigned long arg) 418 unsigned long arg)
404{ 419{
405 mm_segment_t old_fs; 420 mm_segment_t old_fs;
406 struct flock f; 421 struct flock f;
407 long ret; 422 long ret;
423 unsigned int conv_cmd;
408 424
409 switch (cmd) { 425 switch (cmd) {
410 case F_GETLK: 426 case F_GETLK:
@@ -441,16 +457,18 @@ asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd,
441 case F_GETLK64: 457 case F_GETLK64:
442 case F_SETLK64: 458 case F_SETLK64:
443 case F_SETLKW64: 459 case F_SETLKW64:
460 case F_GETLKP:
461 case F_SETLKP:
462 case F_SETLKPW:
444 ret = get_compat_flock64(&f, compat_ptr(arg)); 463 ret = get_compat_flock64(&f, compat_ptr(arg));
445 if (ret != 0) 464 if (ret != 0)
446 break; 465 break;
447 old_fs = get_fs(); 466 old_fs = get_fs();
448 set_fs(KERNEL_DS); 467 set_fs(KERNEL_DS);
449 ret = sys_fcntl(fd, (cmd == F_GETLK64) ? F_GETLK : 468 conv_cmd = convert_fcntl_cmd(cmd);
450 ((cmd == F_SETLK64) ? F_SETLK : F_SETLKW), 469 ret = sys_fcntl(fd, conv_cmd, (unsigned long)&f);
451 (unsigned long)&f);
452 set_fs(old_fs); 470 set_fs(old_fs);
453 if (cmd == F_GETLK64 && ret == 0) { 471 if ((conv_cmd == F_GETLK || conv_cmd == F_GETLKP) && ret == 0) {
454 /* need to return lock information - see above for commentary */ 472 /* need to return lock information - see above for commentary */
455 if (f.l_start > COMPAT_LOFF_T_MAX) 473 if (f.l_start > COMPAT_LOFF_T_MAX)
456 ret = -EOVERFLOW; 474 ret = -EOVERFLOW;
@@ -471,8 +489,15 @@ asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd,
471asmlinkage long compat_sys_fcntl(unsigned int fd, unsigned int cmd, 489asmlinkage long compat_sys_fcntl(unsigned int fd, unsigned int cmd,
472 unsigned long arg) 490 unsigned long arg)
473{ 491{
474 if ((cmd == F_GETLK64) || (cmd == F_SETLK64) || (cmd == F_SETLKW64)) 492 switch (cmd) {
493 case F_GETLK64:
494 case F_SETLK64:
495 case F_SETLKW64:
496 case F_GETLKP:
497 case F_SETLKP:
498 case F_SETLKPW:
475 return -EINVAL; 499 return -EINVAL;
500 }
476 return compat_sys_fcntl64(fd, cmd, arg); 501 return compat_sys_fcntl64(fd, cmd, arg);
477} 502}
478 503
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 7ef7f2d2b608..9ead1596399a 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -272,9 +272,19 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
272 case F_SETFL: 272 case F_SETFL:
273 err = setfl(fd, filp, arg); 273 err = setfl(fd, filp, arg);
274 break; 274 break;
275#if BITS_PER_LONG != 32
276 /* 32-bit arches must use fcntl64() */
277 case F_GETLKP:
278#endif
275 case F_GETLK: 279 case F_GETLK:
276 err = fcntl_getlk(filp, cmd, (struct flock __user *) arg); 280 err = fcntl_getlk(filp, cmd, (struct flock __user *) arg);
277 break; 281 break;
282#if BITS_PER_LONG != 32
283 /* 32-bit arches must use fcntl64() */
284 case F_SETLKP:
285 case F_SETLKPW:
286#endif
287 /* Fallthrough */
278 case F_SETLK: 288 case F_SETLK:
279 case F_SETLKW: 289 case F_SETLKW:
280 err = fcntl_setlk(fd, filp, cmd, (struct flock __user *) arg); 290 err = fcntl_setlk(fd, filp, cmd, (struct flock __user *) arg);
@@ -388,17 +398,20 @@ SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
388 goto out1; 398 goto out1;
389 399
390 switch (cmd) { 400 switch (cmd) {
391 case F_GETLK64: 401 case F_GETLK64:
392 err = fcntl_getlk64(f.file, cmd, (struct flock64 __user *) arg); 402 case F_GETLKP:
393 break; 403 err = fcntl_getlk64(f.file, cmd, (struct flock64 __user *) arg);
394 case F_SETLK64: 404 break;
395 case F_SETLKW64: 405 case F_SETLK64:
396 err = fcntl_setlk64(fd, f.file, cmd, 406 case F_SETLKW64:
397 (struct flock64 __user *) arg); 407 case F_SETLKP:
398 break; 408 case F_SETLKPW:
399 default: 409 err = fcntl_setlk64(fd, f.file, cmd,
400 err = do_fcntl(fd, cmd, arg, f.file); 410 (struct flock64 __user *) arg);
401 break; 411 break;
412 default:
413 err = do_fcntl(fd, cmd, arg, f.file);
414 break;
402 } 415 }
403out1: 416out1:
404 fdput(f); 417 fdput(f);
diff --git a/fs/locks.c b/fs/locks.c
index ed9fb769b88e..3b54b98236ee 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1930,6 +1930,12 @@ int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock __user *l)
1930 if (error) 1930 if (error)
1931 goto out; 1931 goto out;
1932 1932
1933 if (cmd == F_GETLKP) {
1934 cmd = F_GETLK;
1935 file_lock.fl_flags |= FL_FILE_PVT;
1936 file_lock.fl_owner = (fl_owner_t)filp;
1937 }
1938
1933 error = vfs_test_lock(filp, &file_lock); 1939 error = vfs_test_lock(filp, &file_lock);
1934 if (error) 1940 if (error)
1935 goto out; 1941 goto out;
@@ -2049,10 +2055,26 @@ again:
2049 error = flock_to_posix_lock(filp, file_lock, &flock); 2055 error = flock_to_posix_lock(filp, file_lock, &flock);
2050 if (error) 2056 if (error)
2051 goto out; 2057 goto out;
2052 if (cmd == F_SETLKW) { 2058
2059 /*
2060 * If the cmd is requesting file-private locks, then set the
2061 * FL_FILE_PVT flag and override the owner.
2062 */
2063 switch (cmd) {
2064 case F_SETLKP:
2065 cmd = F_SETLK;
2066 file_lock->fl_flags |= FL_FILE_PVT;
2067 file_lock->fl_owner = (fl_owner_t)filp;
2068 break;
2069 case F_SETLKPW:
2070 cmd = F_SETLKW;
2071 file_lock->fl_flags |= FL_FILE_PVT;
2072 file_lock->fl_owner = (fl_owner_t)filp;
2073 /* Fallthrough */
2074 case F_SETLKW:
2053 file_lock->fl_flags |= FL_SLEEP; 2075 file_lock->fl_flags |= FL_SLEEP;
2054 } 2076 }
2055 2077
2056 error = do_lock_file_wait(filp, cmd, file_lock); 2078 error = do_lock_file_wait(filp, cmd, file_lock);
2057 2079
2058 /* 2080 /*
@@ -2098,6 +2120,12 @@ int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l)
2098 if (error) 2120 if (error)
2099 goto out; 2121 goto out;
2100 2122
2123 if (cmd == F_GETLKP) {
2124 cmd = F_GETLK64;
2125 file_lock.fl_flags |= FL_FILE_PVT;
2126 file_lock.fl_owner = (fl_owner_t)filp;
2127 }
2128
2101 error = vfs_test_lock(filp, &file_lock); 2129 error = vfs_test_lock(filp, &file_lock);
2102 if (error) 2130 if (error)
2103 goto out; 2131 goto out;
@@ -2150,10 +2178,26 @@ again:
2150 error = flock64_to_posix_lock(filp, file_lock, &flock); 2178 error = flock64_to_posix_lock(filp, file_lock, &flock);
2151 if (error) 2179 if (error)
2152 goto out; 2180 goto out;
2153 if (cmd == F_SETLKW64) { 2181
2182 /*
2183 * If the cmd is requesting file-private locks, then set the
2184 * FL_FILE_PVT flag and override the owner.
2185 */
2186 switch (cmd) {
2187 case F_SETLKP:
2188 cmd = F_SETLK64;
2189 file_lock->fl_flags |= FL_FILE_PVT;
2190 file_lock->fl_owner = (fl_owner_t)filp;
2191 break;
2192 case F_SETLKPW:
2193 cmd = F_SETLKW64;
2194 file_lock->fl_flags |= FL_FILE_PVT;
2195 file_lock->fl_owner = (fl_owner_t)filp;
2196 /* Fallthrough */
2197 case F_SETLKW64:
2154 file_lock->fl_flags |= FL_SLEEP; 2198 file_lock->fl_flags |= FL_SLEEP;
2155 } 2199 }
2156 2200
2157 error = do_lock_file_wait(filp, cmd, file_lock); 2201 error = do_lock_file_wait(filp, cmd, file_lock);
2158 2202
2159 /* 2203 /*
@@ -2221,6 +2265,8 @@ void locks_remove_file(struct file *filp)
2221 if (!inode->i_flock) 2265 if (!inode->i_flock)
2222 return; 2266 return;
2223 2267
2268 locks_remove_posix(filp, (fl_owner_t)filp);
2269
2224 if (filp->f_op->flock) { 2270 if (filp->f_op->flock) {
2225 struct file_lock fl = { 2271 struct file_lock fl = {
2226 .fl_pid = current->tgid, 2272 .fl_pid = current->tgid,