aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGerd Hoffmann <kraxel@redhat.com>2009-04-02 19:59:23 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-04-02 22:05:08 -0400
commitf3554f4bc69803ac2baaf7cf2aa4339e1f4b693e (patch)
tree52505043de254dc3e34dad7110724fcc1f489eb9
parent6949a6318e60aeb9c755679ac7f978aefe8c1722 (diff)
preadv/pwritev: Add preadv and pwritev system calls.
This patch adds preadv and pwritev system calls. These syscalls are a pretty straightforward combination of pread and readv (same for write). They are quite useful for doing vectored I/O in threaded applications. Using lseek+readv instead opens race windows you'll have to plug with locking. Other systems have such system calls too, for example NetBSD, check here: http://www.daemon-systems.org/man/preadv.2.html The application-visible interface provided by glibc should look like this to be compatible to the existing implementations in the *BSD family: ssize_t preadv(int d, const struct iovec *iov, int iovcnt, off_t offset); ssize_t pwritev(int d, const struct iovec *iov, int iovcnt, off_t offset); This prototype has one problem though: On 32bit archs is the (64bit) offset argument unaligned, which the syscall ABI of several archs doesn't allow to do. At least s390 needs a wrapper in glibc to handle this. As we'll need a wrappers in glibc anyway I've decided to push problem to glibc entriely and use a syscall prototype which works without arch-specific wrappers inside the kernel: The offset argument is explicitly splitted into two 32bit values. The patch sports the actual system call implementation and the windup in the x86 system call tables. Other archs follow as separate patches. Signed-off-by: Gerd Hoffmann <kraxel@redhat.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: <linux-api@vger.kernel.org> Cc: <linux-arch@vger.kernel.org> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Ingo Molnar <mingo@elte.hu> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--arch/x86/ia32/ia32entry.S2
-rw-r--r--arch/x86/include/asm/unistd_32.h2
-rw-r--r--arch/x86/include/asm/unistd_64.h4
-rw-r--r--arch/x86/kernel/syscall_table_32.S2
-rw-r--r--fs/compat.c36
-rw-r--r--fs/read_write.c50
-rw-r--r--include/linux/compat.h6
-rw-r--r--include/linux/syscalls.h4
8 files changed, 106 insertions, 0 deletions
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index db0c803170ab..a505202086e8 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -828,4 +828,6 @@ ia32_sys_call_table:
828 .quad sys_dup3 /* 330 */ 828 .quad sys_dup3 /* 330 */
829 .quad sys_pipe2 829 .quad sys_pipe2
830 .quad sys_inotify_init1 830 .quad sys_inotify_init1
831 .quad compat_sys_preadv
832 .quad compat_sys_pwritev
831ia32_syscall_end: 833ia32_syscall_end:
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index f2bba78430a4..6e72d74cf8dc 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -338,6 +338,8 @@
338#define __NR_dup3 330 338#define __NR_dup3 330
339#define __NR_pipe2 331 339#define __NR_pipe2 331
340#define __NR_inotify_init1 332 340#define __NR_inotify_init1 332
341#define __NR_preadv 333
342#define __NR_pwritev 334
341 343
342#ifdef __KERNEL__ 344#ifdef __KERNEL__
343 345
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index d2e415e6666f..f81829462325 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -653,6 +653,10 @@ __SYSCALL(__NR_dup3, sys_dup3)
653__SYSCALL(__NR_pipe2, sys_pipe2) 653__SYSCALL(__NR_pipe2, sys_pipe2)
654#define __NR_inotify_init1 294 654#define __NR_inotify_init1 294
655__SYSCALL(__NR_inotify_init1, sys_inotify_init1) 655__SYSCALL(__NR_inotify_init1, sys_inotify_init1)
656#define __NR_preadv 295
657__SYSCALL(__NR_preadv, sys_preadv)
658#define __NR_pwritev 296
659__SYSCALL(__NR_pwritev, sys_pwritev)
656 660
657 661
658#ifndef __NO_STUBS 662#ifndef __NO_STUBS
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index 3bdb64829b82..ff5c8736b491 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -332,3 +332,5 @@ ENTRY(sys_call_table)
332 .long sys_dup3 /* 330 */ 332 .long sys_dup3 /* 330 */
333 .long sys_pipe2 333 .long sys_pipe2
334 .long sys_inotify_init1 334 .long sys_inotify_init1
335 .long sys_preadv
336 .long sys_pwritev
diff --git a/fs/compat.c b/fs/compat.c
index e04b4660db84..7c1615183d1e 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1232,6 +1232,24 @@ compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec,
1232 return ret; 1232 return ret;
1233} 1233}
1234 1234
1235asmlinkage ssize_t
1236compat_sys_preadv(unsigned long fd, const struct compat_iovec __user *vec,
1237 unsigned long vlen, u32 pos_high, u32 pos_low)
1238{
1239 loff_t pos = ((loff_t)pos_high << 32) | pos_low;
1240 struct file *file;
1241 ssize_t ret;
1242
1243 if (pos < 0)
1244 return -EINVAL;
1245 file = fget(fd);
1246 if (!file)
1247 return -EBADF;
1248 ret = compat_readv(file, vec, vlen, &pos);
1249 fput(file);
1250 return ret;
1251}
1252
1235static size_t compat_writev(struct file *file, 1253static size_t compat_writev(struct file *file,
1236 const struct compat_iovec __user *vec, 1254 const struct compat_iovec __user *vec,
1237 unsigned long vlen, loff_t *pos) 1255 unsigned long vlen, loff_t *pos)
@@ -1269,6 +1287,24 @@ compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec,
1269 return ret; 1287 return ret;
1270} 1288}
1271 1289
1290asmlinkage ssize_t
1291compat_sys_pwritev(unsigned long fd, const struct compat_iovec __user *vec,
1292 unsigned long vlen, u32 pos_high, u32 pos_low)
1293{
1294 loff_t pos = ((loff_t)pos_high << 32) | pos_low;
1295 struct file *file;
1296 ssize_t ret;
1297
1298 if (pos < 0)
1299 return -EINVAL;
1300 file = fget(fd);
1301 if (!file)
1302 return -EBADF;
1303 ret = compat_writev(file, vec, vlen, &pos);
1304 fput(file);
1305 return ret;
1306}
1307
1272asmlinkage long 1308asmlinkage long
1273compat_sys_vmsplice(int fd, const struct compat_iovec __user *iov32, 1309compat_sys_vmsplice(int fd, const struct compat_iovec __user *iov32,
1274 unsigned int nr_segs, unsigned int flags) 1310 unsigned int nr_segs, unsigned int flags)
diff --git a/fs/read_write.c b/fs/read_write.c
index 400fe81c973e..6d5d8ff238aa 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -731,6 +731,56 @@ SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
731 return ret; 731 return ret;
732} 732}
733 733
734SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
735 unsigned long, vlen, u32, pos_high, u32, pos_low)
736{
737 loff_t pos = ((loff_t)pos_high << 32) | pos_low;
738 struct file *file;
739 ssize_t ret = -EBADF;
740 int fput_needed;
741
742 if (pos < 0)
743 return -EINVAL;
744
745 file = fget_light(fd, &fput_needed);
746 if (file) {
747 ret = -ESPIPE;
748 if (file->f_mode & FMODE_PREAD)
749 ret = vfs_readv(file, vec, vlen, &pos);
750 fput_light(file, fput_needed);
751 }
752
753 if (ret > 0)
754 add_rchar(current, ret);
755 inc_syscr(current);
756 return ret;
757}
758
759SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
760 unsigned long, vlen, u32, pos_high, u32, pos_low)
761{
762 loff_t pos = ((loff_t)pos_high << 32) | pos_low;
763 struct file *file;
764 ssize_t ret = -EBADF;
765 int fput_needed;
766
767 if (pos < 0)
768 return -EINVAL;
769
770 file = fget_light(fd, &fput_needed);
771 if (file) {
772 ret = -ESPIPE;
773 if (file->f_mode & FMODE_PWRITE)
774 ret = vfs_writev(file, vec, vlen, &pos);
775 fput_light(file, fput_needed);
776 }
777
778 if (ret > 0)
779 add_wchar(current, ret);
780 inc_syscw(current);
781 return ret;
782}
783
734static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, 784static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
735 size_t count, loff_t max) 785 size_t count, loff_t max)
736{ 786{
diff --git a/include/linux/compat.h b/include/linux/compat.h
index b880864672de..9723edd6455c 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -191,6 +191,12 @@ asmlinkage ssize_t compat_sys_readv(unsigned long fd,
191 const struct compat_iovec __user *vec, unsigned long vlen); 191 const struct compat_iovec __user *vec, unsigned long vlen);
192asmlinkage ssize_t compat_sys_writev(unsigned long fd, 192asmlinkage ssize_t compat_sys_writev(unsigned long fd,
193 const struct compat_iovec __user *vec, unsigned long vlen); 193 const struct compat_iovec __user *vec, unsigned long vlen);
194asmlinkage ssize_t compat_sys_preadv(unsigned long fd,
195 const struct compat_iovec __user *vec,
196 unsigned long vlen, u32 pos_high, u32 pos_low);
197asmlinkage ssize_t compat_sys_pwritev(unsigned long fd,
198 const struct compat_iovec __user *vec,
199 unsigned long vlen, u32 pos_high, u32 pos_low);
194 200
195int compat_do_execve(char * filename, compat_uptr_t __user *argv, 201int compat_do_execve(char * filename, compat_uptr_t __user *argv,
196 compat_uptr_t __user *envp, struct pt_regs * regs); 202 compat_uptr_t __user *envp, struct pt_regs * regs);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index f9f900cfd066..b299a82a05e7 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -461,6 +461,10 @@ asmlinkage long sys_pread64(unsigned int fd, char __user *buf,
461 size_t count, loff_t pos); 461 size_t count, loff_t pos);
462asmlinkage long sys_pwrite64(unsigned int fd, const char __user *buf, 462asmlinkage long sys_pwrite64(unsigned int fd, const char __user *buf,
463 size_t count, loff_t pos); 463 size_t count, loff_t pos);
464asmlinkage long sys_preadv(unsigned long fd, const struct iovec __user *vec,
465 unsigned long vlen, u32 pos_high, u32 pos_low);
466asmlinkage long sys_pwritev(unsigned long fd, const struct iovec __user *vec,
467 unsigned long vlen, u32 pos_high, u32 pos_low);
464asmlinkage long sys_getcwd(char __user *buf, unsigned long size); 468asmlinkage long sys_getcwd(char __user *buf, unsigned long size);
465asmlinkage long sys_mkdir(const char __user *pathname, int mode); 469asmlinkage long sys_mkdir(const char __user *pathname, int mode);
466asmlinkage long sys_chdir(const char __user *filename); 470asmlinkage long sys_chdir(const char __user *filename);