summaryrefslogtreecommitdiffstats
path: root/fs/read_write.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-03-03 12:36:58 -0500
committerAl Viro <viro@zeniv.linux.org.uk>2014-03-10 11:44:41 -0400
commit9c225f2655e36a470c4f58dbbc99244c5fc7f2d4 (patch)
tree7cb89dbc82ee1b533ff2d097fed6a4248374bd4b /fs/read_write.c
parent1b56e98990bcdbb20b9fab163654b9315bf158e8 (diff)
vfs: atomic f_pos accesses as per POSIX
Our write() system call has always been atomic in the sense that you get the expected thread-safe contiguous write, but we haven't actually guaranteed that concurrent writes are serialized wrt f_pos accesses, so threads (or processes) that share a file descriptor and use "write()" concurrently would quite likely overwrite each others data. This violates POSIX.1-2008/SUSv4 Section XSI 2.9.7 that says: "2.9.7 Thread Interactions with Regular File Operations All of the following functions shall be atomic with respect to each other in the effects specified in POSIX.1-2008 when they operate on regular files or symbolic links: [...]" and one of the effects is the file position update. This unprotected file position behavior is not new behavior, and nobody has ever cared. Until now. Yongzhi Pan reported unexpected behavior to Michael Kerrisk that was due to this. This resolves the issue with a f_pos-specific lock that is taken by read/write/lseek on file descriptors that may be shared across threads or processes. Reported-by: Yongzhi Pan <panyongzhi@gmail.com> Reported-by: Michael Kerrisk <mtk.manpages@gmail.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs/read_write.c')
-rw-r--r--fs/read_write.c54
1 files changed, 40 insertions, 14 deletions
diff --git a/fs/read_write.c b/fs/read_write.c
index edc5746a902a..932bb3414a96 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -264,10 +264,36 @@ loff_t vfs_llseek(struct file *file, loff_t offset, int whence)
264} 264}
265EXPORT_SYMBOL(vfs_llseek); 265EXPORT_SYMBOL(vfs_llseek);
266 266
267/*
268 * We only lock f_pos if we have threads or if the file might be
269 * shared with another process. In both cases we'll have an elevated
270 * file count (done either by fdget() or by fork()).
271 */
272static inline struct fd fdget_pos(int fd)
273{
274 struct fd f = fdget(fd);
275 struct file *file = f.file;
276
277 if (file && (file->f_mode & FMODE_ATOMIC_POS)) {
278 if (file_count(file) > 1) {
279 f.flags |= FDPUT_POS_UNLOCK;
280 mutex_lock(&file->f_pos_lock);
281 }
282 }
283 return f;
284}
285
286static inline void fdput_pos(struct fd f)
287{
288 if (f.flags & FDPUT_POS_UNLOCK)
289 mutex_unlock(&f.file->f_pos_lock);
290 fdput(f);
291}
292
267SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence) 293SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence)
268{ 294{
269 off_t retval; 295 off_t retval;
270 struct fd f = fdget(fd); 296 struct fd f = fdget_pos(fd);
271 if (!f.file) 297 if (!f.file)
272 return -EBADF; 298 return -EBADF;
273 299
@@ -278,7 +304,7 @@ SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence)
278 if (res != (loff_t)retval) 304 if (res != (loff_t)retval)
279 retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */ 305 retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */
280 } 306 }
281 fdput(f); 307 fdput_pos(f);
282 return retval; 308 return retval;
283} 309}
284 310
@@ -498,7 +524,7 @@ static inline void file_pos_write(struct file *file, loff_t pos)
498 524
499SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) 525SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
500{ 526{
501 struct fd f = fdget(fd); 527 struct fd f = fdget_pos(fd);
502 ssize_t ret = -EBADF; 528 ssize_t ret = -EBADF;
503 529
504 if (f.file) { 530 if (f.file) {
@@ -506,7 +532,7 @@ SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
506 ret = vfs_read(f.file, buf, count, &pos); 532 ret = vfs_read(f.file, buf, count, &pos);
507 if (ret >= 0) 533 if (ret >= 0)
508 file_pos_write(f.file, pos); 534 file_pos_write(f.file, pos);
509 fdput(f); 535 fdput_pos(f);
510 } 536 }
511 return ret; 537 return ret;
512} 538}
@@ -514,7 +540,7 @@ SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
514SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, 540SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
515 size_t, count) 541 size_t, count)
516{ 542{
517 struct fd f = fdget(fd); 543 struct fd f = fdget_pos(fd);
518 ssize_t ret = -EBADF; 544 ssize_t ret = -EBADF;
519 545
520 if (f.file) { 546 if (f.file) {
@@ -522,7 +548,7 @@ SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
522 ret = vfs_write(f.file, buf, count, &pos); 548 ret = vfs_write(f.file, buf, count, &pos);
523 if (ret >= 0) 549 if (ret >= 0)
524 file_pos_write(f.file, pos); 550 file_pos_write(f.file, pos);
525 fdput(f); 551 fdput_pos(f);
526 } 552 }
527 553
528 return ret; 554 return ret;
@@ -797,7 +823,7 @@ EXPORT_SYMBOL(vfs_writev);
797SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, 823SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
798 unsigned long, vlen) 824 unsigned long, vlen)
799{ 825{
800 struct fd f = fdget(fd); 826 struct fd f = fdget_pos(fd);
801 ssize_t ret = -EBADF; 827 ssize_t ret = -EBADF;
802 828
803 if (f.file) { 829 if (f.file) {
@@ -805,7 +831,7 @@ SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
805 ret = vfs_readv(f.file, vec, vlen, &pos); 831 ret = vfs_readv(f.file, vec, vlen, &pos);
806 if (ret >= 0) 832 if (ret >= 0)
807 file_pos_write(f.file, pos); 833 file_pos_write(f.file, pos);
808 fdput(f); 834 fdput_pos(f);
809 } 835 }
810 836
811 if (ret > 0) 837 if (ret > 0)
@@ -817,7 +843,7 @@ SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
817SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, 843SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
818 unsigned long, vlen) 844 unsigned long, vlen)
819{ 845{
820 struct fd f = fdget(fd); 846 struct fd f = fdget_pos(fd);
821 ssize_t ret = -EBADF; 847 ssize_t ret = -EBADF;
822 848
823 if (f.file) { 849 if (f.file) {
@@ -825,7 +851,7 @@ SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
825 ret = vfs_writev(f.file, vec, vlen, &pos); 851 ret = vfs_writev(f.file, vec, vlen, &pos);
826 if (ret >= 0) 852 if (ret >= 0)
827 file_pos_write(f.file, pos); 853 file_pos_write(f.file, pos);
828 fdput(f); 854 fdput_pos(f);
829 } 855 }
830 856
831 if (ret > 0) 857 if (ret > 0)
@@ -968,7 +994,7 @@ COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd,
968 const struct compat_iovec __user *,vec, 994 const struct compat_iovec __user *,vec,
969 compat_ulong_t, vlen) 995 compat_ulong_t, vlen)
970{ 996{
971 struct fd f = fdget(fd); 997 struct fd f = fdget_pos(fd);
972 ssize_t ret; 998 ssize_t ret;
973 loff_t pos; 999 loff_t pos;
974 1000
@@ -978,7 +1004,7 @@ COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd,
978 ret = compat_readv(f.file, vec, vlen, &pos); 1004 ret = compat_readv(f.file, vec, vlen, &pos);
979 if (ret >= 0) 1005 if (ret >= 0)
980 f.file->f_pos = pos; 1006 f.file->f_pos = pos;
981 fdput(f); 1007 fdput_pos(f);
982 return ret; 1008 return ret;
983} 1009}
984 1010
@@ -1035,7 +1061,7 @@ COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd,
1035 const struct compat_iovec __user *, vec, 1061 const struct compat_iovec __user *, vec,
1036 compat_ulong_t, vlen) 1062 compat_ulong_t, vlen)
1037{ 1063{
1038 struct fd f = fdget(fd); 1064 struct fd f = fdget_pos(fd);
1039 ssize_t ret; 1065 ssize_t ret;
1040 loff_t pos; 1066 loff_t pos;
1041 1067
@@ -1045,7 +1071,7 @@ COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd,
1045 ret = compat_writev(f.file, vec, vlen, &pos); 1071 ret = compat_writev(f.file, vec, vlen, &pos);
1046 if (ret >= 0) 1072 if (ret >= 0)
1047 f.file->f_pos = pos; 1073 f.file->f_pos = pos;
1048 fdput(f); 1074 fdput_pos(f);
1049 return ret; 1075 return ret;
1050} 1076}
1051 1077