aboutsummaryrefslogtreecommitdiffstats
path: root/fs/open.c
diff options
context:
space:
mode:
authorAmit Arora <aarora@in.ibm.com>2007-07-17 21:42:44 -0400
committerTheodore Ts'o <tytso@mit.edu>2007-07-17 21:42:44 -0400
commit97ac73506c0ba93f30239bb57b4cfc5d73e68a62 (patch)
tree4d02848d6c792a70b413deadcaffd7bf8c8d61de /fs/open.c
parentcb32da0416b823b7f4b65e7e85d6cba16ca4d1e1 (diff)
sys_fallocate() implementation on i386, x86_64 and powerpc
fallocate() is a new system call being proposed here which will allow applications to preallocate space to any file(s) in a file system. Each file system implementation that wants to use this feature will need to support an inode operation called ->fallocate(). Applications can use this feature to avoid fragmentation to certain level and thus get faster access speed. With preallocation, applications also get a guarantee of space for particular file(s) - even if later the the system becomes full. Currently, glibc provides an interface called posix_fallocate() which can be used for similar cause. Though this has the advantage of working on all file systems, but it is quite slow (since it writes zeroes to each block that has to be preallocated). Without a doubt, file systems can do this more efficiently within the kernel, by implementing the proposed fallocate() system call. It is expected that posix_fallocate() will be modified to call this new system call first and incase the kernel/filesystem does not implement it, it should fall back to the current implementation of writing zeroes to the new blocks. ToDos: 1. Implementation on other architectures (other than i386, x86_64, and ppc). Patches for s390(x) and ia64 are already available from previous posts, but it was decided that they should be added later once fallocate is in the mainline. Hence not including those patches in this take. 2. Changes to glibc, a) to support fallocate() system call b) to make posix_fallocate() and posix_fallocate64() call fallocate() Signed-off-by: Amit Arora <aarora@in.ibm.com>
Diffstat (limited to 'fs/open.c')
-rw-r--r--fs/open.c59
1 files changed, 59 insertions, 0 deletions
diff --git a/fs/open.c b/fs/open.c
index be6a457f4226..a6b054edacba 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -26,6 +26,7 @@
26#include <linux/syscalls.h> 26#include <linux/syscalls.h>
27#include <linux/rcupdate.h> 27#include <linux/rcupdate.h>
28#include <linux/audit.h> 28#include <linux/audit.h>
29#include <linux/falloc.h>
29 30
30int vfs_statfs(struct dentry *dentry, struct kstatfs *buf) 31int vfs_statfs(struct dentry *dentry, struct kstatfs *buf)
31{ 32{
@@ -352,6 +353,64 @@ asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length)
352} 353}
353#endif 354#endif
354 355
356asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len)
357{
358 struct file *file;
359 struct inode *inode;
360 long ret = -EINVAL;
361
362 if (offset < 0 || len <= 0)
363 goto out;
364
365 /* Return error if mode is not supported */
366 ret = -EOPNOTSUPP;
367 if (mode && !(mode & FALLOC_FL_KEEP_SIZE))
368 goto out;
369
370 ret = -EBADF;
371 file = fget(fd);
372 if (!file)
373 goto out;
374 if (!(file->f_mode & FMODE_WRITE))
375 goto out_fput;
376 /*
377 * Revalidate the write permissions, in case security policy has
378 * changed since the files were opened.
379 */
380 ret = security_file_permission(file, MAY_WRITE);
381 if (ret)
382 goto out_fput;
383
384 inode = file->f_path.dentry->d_inode;
385
386 ret = -ESPIPE;
387 if (S_ISFIFO(inode->i_mode))
388 goto out_fput;
389
390 ret = -ENODEV;
391 /*
392 * Let individual file system decide if it supports preallocation
393 * for directories or not.
394 */
395 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
396 goto out_fput;
397
398 ret = -EFBIG;
399 /* Check for wrap through zero too */
400 if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0))
401 goto out_fput;
402
403 if (inode->i_op && inode->i_op->fallocate)
404 ret = inode->i_op->fallocate(inode, mode, offset, len);
405 else
406 ret = -ENOSYS;
407
408out_fput:
409 fput(file);
410out:
411 return ret;
412}
413
355/* 414/*
356 * access() needs to use the real uid/gid, not the effective uid/gid. 415 * access() needs to use the real uid/gid, not the effective uid/gid.
357 * We do this by temporarily clearing all FS-related capabilities and 416 * We do this by temporarily clearing all FS-related capabilities and