aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAmit Arora <aarora@in.ibm.com>2007-07-17 21:42:44 -0400
committerTheodore Ts'o <tytso@mit.edu>2007-07-17 21:42:44 -0400
commit97ac73506c0ba93f30239bb57b4cfc5d73e68a62 (patch)
tree4d02848d6c792a70b413deadcaffd7bf8c8d61de
parentcb32da0416b823b7f4b65e7e85d6cba16ca4d1e1 (diff)
sys_fallocate() implementation on i386, x86_64 and powerpc
fallocate() is a new system call being proposed here which will allow applications to preallocate space to any file(s) in a file system. Each file system implementation that wants to use this feature will need to support an inode operation called ->fallocate(). Applications can use this feature to avoid fragmentation to certain level and thus get faster access speed. With preallocation, applications also get a guarantee of space for particular file(s) - even if later the the system becomes full. Currently, glibc provides an interface called posix_fallocate() which can be used for similar cause. Though this has the advantage of working on all file systems, but it is quite slow (since it writes zeroes to each block that has to be preallocated). Without a doubt, file systems can do this more efficiently within the kernel, by implementing the proposed fallocate() system call. It is expected that posix_fallocate() will be modified to call this new system call first and incase the kernel/filesystem does not implement it, it should fall back to the current implementation of writing zeroes to the new blocks. ToDos: 1. Implementation on other architectures (other than i386, x86_64, and ppc). Patches for s390(x) and ia64 are already available from previous posts, but it was decided that they should be added later once fallocate is in the mainline. Hence not including those patches in this take. 2. Changes to glibc, a) to support fallocate() system call b) to make posix_fallocate() and posix_fallocate64() call fallocate() Signed-off-by: Amit Arora <aarora@in.ibm.com>
-rw-r--r--arch/i386/kernel/syscall_table.S1
-rw-r--r--arch/powerpc/kernel/sys_ppc32.c7
-rw-r--r--arch/x86_64/ia32/ia32entry.S1
-rw-r--r--arch/x86_64/ia32/sys_ia32.c8
-rw-r--r--fs/open.c59
-rw-r--r--include/asm-i386/unistd.h3
-rw-r--r--include/asm-powerpc/systbl.h1
-rw-r--r--include/asm-powerpc/unistd.h3
-rw-r--r--include/asm-x86_64/unistd.h2
-rw-r--r--include/linux/falloc.h6
-rw-r--r--include/linux/fs.h2
-rw-r--r--include/linux/syscalls.h1
12 files changed, 92 insertions, 2 deletions
diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S
index bf6adce5226..8344c70adf6 100644
--- a/arch/i386/kernel/syscall_table.S
+++ b/arch/i386/kernel/syscall_table.S
@@ -323,3 +323,4 @@ ENTRY(sys_call_table)
323 .long sys_signalfd 323 .long sys_signalfd
324 .long sys_timerfd 324 .long sys_timerfd
325 .long sys_eventfd 325 .long sys_eventfd
326 .long sys_fallocate
diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c
index b42cbf1e2d7..bd85b5fd08c 100644
--- a/arch/powerpc/kernel/sys_ppc32.c
+++ b/arch/powerpc/kernel/sys_ppc32.c
@@ -773,6 +773,13 @@ asmlinkage int compat_sys_truncate64(const char __user * path, u32 reg4,
773 return sys_truncate(path, (high << 32) | low); 773 return sys_truncate(path, (high << 32) | low);
774} 774}
775 775
776asmlinkage long compat_sys_fallocate(int fd, int mode, u32 offhi, u32 offlo,
777 u32 lenhi, u32 lenlo)
778{
779 return sys_fallocate(fd, mode, ((loff_t)offhi << 32) | offlo,
780 ((loff_t)lenhi << 32) | lenlo);
781}
782
776asmlinkage int compat_sys_ftruncate64(unsigned int fd, u32 reg4, unsigned long high, 783asmlinkage int compat_sys_ftruncate64(unsigned int fd, u32 reg4, unsigned long high,
777 unsigned long low) 784 unsigned long low)
778{ 785{
diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S
index 782dea81943..3f66e970d86 100644
--- a/arch/x86_64/ia32/ia32entry.S
+++ b/arch/x86_64/ia32/ia32entry.S
@@ -719,4 +719,5 @@ ia32_sys_call_table:
719 .quad compat_sys_signalfd 719 .quad compat_sys_signalfd
720 .quad compat_sys_timerfd 720 .quad compat_sys_timerfd
721 .quad sys_eventfd 721 .quad sys_eventfd
722 .quad sys32_fallocate
722ia32_syscall_end: 723ia32_syscall_end:
diff --git a/arch/x86_64/ia32/sys_ia32.c b/arch/x86_64/ia32/sys_ia32.c
index 99a78a3cce7..bee96d61443 100644
--- a/arch/x86_64/ia32/sys_ia32.c
+++ b/arch/x86_64/ia32/sys_ia32.c
@@ -879,3 +879,11 @@ asmlinkage long sys32_fadvise64(int fd, unsigned offset_lo, unsigned offset_hi,
879 return sys_fadvise64_64(fd, ((u64)offset_hi << 32) | offset_lo, 879 return sys_fadvise64_64(fd, ((u64)offset_hi << 32) | offset_lo,
880 len, advice); 880 len, advice);
881} 881}
882
883asmlinkage long sys32_fallocate(int fd, int mode, unsigned offset_lo,
884 unsigned offset_hi, unsigned len_lo,
885 unsigned len_hi)
886{
887 return sys_fallocate(fd, mode, ((u64)offset_hi << 32) | offset_lo,
888 ((u64)len_hi << 32) | len_lo);
889}
diff --git a/fs/open.c b/fs/open.c
index be6a457f422..a6b054edacb 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -26,6 +26,7 @@
26#include <linux/syscalls.h> 26#include <linux/syscalls.h>
27#include <linux/rcupdate.h> 27#include <linux/rcupdate.h>
28#include <linux/audit.h> 28#include <linux/audit.h>
29#include <linux/falloc.h>
29 30
30int vfs_statfs(struct dentry *dentry, struct kstatfs *buf) 31int vfs_statfs(struct dentry *dentry, struct kstatfs *buf)
31{ 32{
@@ -352,6 +353,64 @@ asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length)
352} 353}
353#endif 354#endif
354 355
356asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len)
357{
358 struct file *file;
359 struct inode *inode;
360 long ret = -EINVAL;
361
362 if (offset < 0 || len <= 0)
363 goto out;
364
365 /* Return error if mode is not supported */
366 ret = -EOPNOTSUPP;
367 if (mode && !(mode & FALLOC_FL_KEEP_SIZE))
368 goto out;
369
370 ret = -EBADF;
371 file = fget(fd);
372 if (!file)
373 goto out;
374 if (!(file->f_mode & FMODE_WRITE))
375 goto out_fput;
376 /*
377 * Revalidate the write permissions, in case security policy has
378 * changed since the files were opened.
379 */
380 ret = security_file_permission(file, MAY_WRITE);
381 if (ret)
382 goto out_fput;
383
384 inode = file->f_path.dentry->d_inode;
385
386 ret = -ESPIPE;
387 if (S_ISFIFO(inode->i_mode))
388 goto out_fput;
389
390 ret = -ENODEV;
391 /*
392 * Let individual file system decide if it supports preallocation
393 * for directories or not.
394 */
395 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
396 goto out_fput;
397
398 ret = -EFBIG;
399 /* Check for wrap through zero too */
400 if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0))
401 goto out_fput;
402
403 if (inode->i_op && inode->i_op->fallocate)
404 ret = inode->i_op->fallocate(inode, mode, offset, len);
405 else
406 ret = -ENOSYS;
407
408out_fput:
409 fput(file);
410out:
411 return ret;
412}
413
355/* 414/*
356 * access() needs to use the real uid/gid, not the effective uid/gid. 415 * access() needs to use the real uid/gid, not the effective uid/gid.
357 * We do this by temporarily clearing all FS-related capabilities and 416 * We do this by temporarily clearing all FS-related capabilities and
diff --git a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h
index e84ace1ec8b..9b15545eb9b 100644
--- a/include/asm-i386/unistd.h
+++ b/include/asm-i386/unistd.h
@@ -329,10 +329,11 @@
329#define __NR_signalfd 321 329#define __NR_signalfd 321
330#define __NR_timerfd 322 330#define __NR_timerfd 322
331#define __NR_eventfd 323 331#define __NR_eventfd 323
332#define __NR_fallocate 324
332 333
333#ifdef __KERNEL__ 334#ifdef __KERNEL__
334 335
335#define NR_syscalls 324 336#define NR_syscalls 325
336 337
337#define __ARCH_WANT_IPC_PARSE_VERSION 338#define __ARCH_WANT_IPC_PARSE_VERSION
338#define __ARCH_WANT_OLD_READDIR 339#define __ARCH_WANT_OLD_READDIR
diff --git a/include/asm-powerpc/systbl.h b/include/asm-powerpc/systbl.h
index 1cc3f9cb6f4..cc6d8722825 100644
--- a/include/asm-powerpc/systbl.h
+++ b/include/asm-powerpc/systbl.h
@@ -308,6 +308,7 @@ COMPAT_SYS_SPU(move_pages)
308SYSCALL_SPU(getcpu) 308SYSCALL_SPU(getcpu)
309COMPAT_SYS(epoll_pwait) 309COMPAT_SYS(epoll_pwait)
310COMPAT_SYS_SPU(utimensat) 310COMPAT_SYS_SPU(utimensat)
311COMPAT_SYS(fallocate)
311COMPAT_SYS_SPU(signalfd) 312COMPAT_SYS_SPU(signalfd)
312COMPAT_SYS_SPU(timerfd) 313COMPAT_SYS_SPU(timerfd)
313SYSCALL_SPU(eventfd) 314SYSCALL_SPU(eventfd)
diff --git a/include/asm-powerpc/unistd.h b/include/asm-powerpc/unistd.h
index f71c6061f1e..97d82b6a940 100644
--- a/include/asm-powerpc/unistd.h
+++ b/include/asm-powerpc/unistd.h
@@ -331,10 +331,11 @@
331#define __NR_timerfd 306 331#define __NR_timerfd 306
332#define __NR_eventfd 307 332#define __NR_eventfd 307
333#define __NR_sync_file_range2 308 333#define __NR_sync_file_range2 308
334#define __NR_fallocate 309
334 335
335#ifdef __KERNEL__ 336#ifdef __KERNEL__
336 337
337#define __NR_syscalls 309 338#define __NR_syscalls 310
338 339
339#define __NR__exit __NR_exit 340#define __NR__exit __NR_exit
340#define NR_syscalls __NR_syscalls 341#define NR_syscalls __NR_syscalls
diff --git a/include/asm-x86_64/unistd.h b/include/asm-x86_64/unistd.h
index 8696f8ad401..fc4e73f5f1f 100644
--- a/include/asm-x86_64/unistd.h
+++ b/include/asm-x86_64/unistd.h
@@ -630,6 +630,8 @@ __SYSCALL(__NR_signalfd, sys_signalfd)
630__SYSCALL(__NR_timerfd, sys_timerfd) 630__SYSCALL(__NR_timerfd, sys_timerfd)
631#define __NR_eventfd 284 631#define __NR_eventfd 284
632__SYSCALL(__NR_eventfd, sys_eventfd) 632__SYSCALL(__NR_eventfd, sys_eventfd)
633#define __NR_fallocate 285
634__SYSCALL(__NR_fallocate, sys_fallocate)
633 635
634#ifndef __NO_STUBS 636#ifndef __NO_STUBS
635#define __ARCH_WANT_OLD_READDIR 637#define __ARCH_WANT_OLD_READDIR
diff --git a/include/linux/falloc.h b/include/linux/falloc.h
new file mode 100644
index 00000000000..8e912ab6a07
--- /dev/null
+++ b/include/linux/falloc.h
@@ -0,0 +1,6 @@
1#ifndef _FALLOC_H_
2#define _FALLOC_H_
3
4#define FALLOC_FL_KEEP_SIZE 0x01 /* default is extend size */
5
6#endif /* _FALLOC_H_ */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 98205f68047..0b806c5e32e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1147,6 +1147,8 @@ struct inode_operations {
1147 ssize_t (*listxattr) (struct dentry *, char *, size_t); 1147 ssize_t (*listxattr) (struct dentry *, char *, size_t);
1148 int (*removexattr) (struct dentry *, const char *); 1148 int (*removexattr) (struct dentry *, const char *);
1149 void (*truncate_range)(struct inode *, loff_t, loff_t); 1149 void (*truncate_range)(struct inode *, loff_t, loff_t);
1150 long (*fallocate)(struct inode *inode, int mode, loff_t offset,
1151 loff_t len);
1150}; 1152};
1151 1153
1152struct seq_file; 1154struct seq_file;
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 83d0ec11235..7a8b1e3322e 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -610,6 +610,7 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas
610asmlinkage long sys_timerfd(int ufd, int clockid, int flags, 610asmlinkage long sys_timerfd(int ufd, int clockid, int flags,
611 const struct itimerspec __user *utmr); 611 const struct itimerspec __user *utmr);
612asmlinkage long sys_eventfd(unsigned int count); 612asmlinkage long sys_eventfd(unsigned int count);
613asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len);
613 614
614int kernel_execve(const char *filename, char *const argv[], char *const envp[]); 615int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
615 616