aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorGerd Hoffmann <kraxel@redhat.com>2009-04-02 19:59:23 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-04-02 22:05:08 -0400
commitf3554f4bc69803ac2baaf7cf2aa4339e1f4b693e (patch)
tree52505043de254dc3e34dad7110724fcc1f489eb9 /fs
parent6949a6318e60aeb9c755679ac7f978aefe8c1722 (diff)
preadv/pwritev: Add preadv and pwritev system calls.
This patch adds preadv and pwritev system calls. These syscalls are a pretty straightforward combination of pread and readv (same for write). They are quite useful for doing vectored I/O in threaded applications. Using lseek+readv instead opens race windows you'll have to plug with locking. Other systems have such system calls too, for example NetBSD, check here: http://www.daemon-systems.org/man/preadv.2.html The application-visible interface provided by glibc should look like this to be compatible to the existing implementations in the *BSD family: ssize_t preadv(int d, const struct iovec *iov, int iovcnt, off_t offset); ssize_t pwritev(int d, const struct iovec *iov, int iovcnt, off_t offset); This prototype has one problem though: On 32bit archs is the (64bit) offset argument unaligned, which the syscall ABI of several archs doesn't allow to do. At least s390 needs a wrapper in glibc to handle this. As we'll need a wrappers in glibc anyway I've decided to push problem to glibc entriely and use a syscall prototype which works without arch-specific wrappers inside the kernel: The offset argument is explicitly splitted into two 32bit values. The patch sports the actual system call implementation and the windup in the x86 system call tables. Other archs follow as separate patches. Signed-off-by: Gerd Hoffmann <kraxel@redhat.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: <linux-api@vger.kernel.org> Cc: <linux-arch@vger.kernel.org> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Ingo Molnar <mingo@elte.hu> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/compat.c36
-rw-r--r--fs/read_write.c50
2 files changed, 86 insertions, 0 deletions
diff --git a/fs/compat.c b/fs/compat.c
index e04b4660db84..7c1615183d1e 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1232,6 +1232,24 @@ compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec,
1232 return ret; 1232 return ret;
1233} 1233}
1234 1234
1235asmlinkage ssize_t
1236compat_sys_preadv(unsigned long fd, const struct compat_iovec __user *vec,
1237 unsigned long vlen, u32 pos_high, u32 pos_low)
1238{
1239 loff_t pos = ((loff_t)pos_high << 32) | pos_low;
1240 struct file *file;
1241 ssize_t ret;
1242
1243 if (pos < 0)
1244 return -EINVAL;
1245 file = fget(fd);
1246 if (!file)
1247 return -EBADF;
1248 ret = compat_readv(file, vec, vlen, &pos);
1249 fput(file);
1250 return ret;
1251}
1252
1235static size_t compat_writev(struct file *file, 1253static size_t compat_writev(struct file *file,
1236 const struct compat_iovec __user *vec, 1254 const struct compat_iovec __user *vec,
1237 unsigned long vlen, loff_t *pos) 1255 unsigned long vlen, loff_t *pos)
@@ -1269,6 +1287,24 @@ compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec,
1269 return ret; 1287 return ret;
1270} 1288}
1271 1289
1290asmlinkage ssize_t
1291compat_sys_pwritev(unsigned long fd, const struct compat_iovec __user *vec,
1292 unsigned long vlen, u32 pos_high, u32 pos_low)
1293{
1294 loff_t pos = ((loff_t)pos_high << 32) | pos_low;
1295 struct file *file;
1296 ssize_t ret;
1297
1298 if (pos < 0)
1299 return -EINVAL;
1300 file = fget(fd);
1301 if (!file)
1302 return -EBADF;
1303 ret = compat_writev(file, vec, vlen, &pos);
1304 fput(file);
1305 return ret;
1306}
1307
1272asmlinkage long 1308asmlinkage long
1273compat_sys_vmsplice(int fd, const struct compat_iovec __user *iov32, 1309compat_sys_vmsplice(int fd, const struct compat_iovec __user *iov32,
1274 unsigned int nr_segs, unsigned int flags) 1310 unsigned int nr_segs, unsigned int flags)
diff --git a/fs/read_write.c b/fs/read_write.c
index 400fe81c973e..6d5d8ff238aa 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -731,6 +731,56 @@ SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
731 return ret; 731 return ret;
732} 732}
733 733
734SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
735 unsigned long, vlen, u32, pos_high, u32, pos_low)
736{
737 loff_t pos = ((loff_t)pos_high << 32) | pos_low;
738 struct file *file;
739 ssize_t ret = -EBADF;
740 int fput_needed;
741
742 if (pos < 0)
743 return -EINVAL;
744
745 file = fget_light(fd, &fput_needed);
746 if (file) {
747 ret = -ESPIPE;
748 if (file->f_mode & FMODE_PREAD)
749 ret = vfs_readv(file, vec, vlen, &pos);
750 fput_light(file, fput_needed);
751 }
752
753 if (ret > 0)
754 add_rchar(current, ret);
755 inc_syscr(current);
756 return ret;
757}
758
759SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
760 unsigned long, vlen, u32, pos_high, u32, pos_low)
761{
762 loff_t pos = ((loff_t)pos_high << 32) | pos_low;
763 struct file *file;
764 ssize_t ret = -EBADF;
765 int fput_needed;
766
767 if (pos < 0)
768 return -EINVAL;
769
770 file = fget_light(fd, &fput_needed);
771 if (file) {
772 ret = -ESPIPE;
773 if (file->f_mode & FMODE_PWRITE)
774 ret = vfs_writev(file, vec, vlen, &pos);
775 fput_light(file, fput_needed);
776 }
777
778 if (ret > 0)
779 add_wchar(current, ret);
780 inc_syscw(current);
781 return ret;
782}
783
734static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, 784static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
735 size_t count, loff_t max) 785 size_t count, loff_t max)
736{ 786{