aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMiklos Szeredi <mszeredi@suse.cz>2012-06-05 09:10:17 -0400
committerAl Viro <viro@zeniv.linux.org.uk>2012-07-14 08:33:04 -0400
commitd18e9008c377dc6a6d2166a6840bf3a23a5867fd (patch)
tree6bbb29aea7e931b603bd4cea3cc74a0eda7b6379
parent54ef487241e863a6046536ac5b1fcd5d7cde86e5 (diff)
vfs: add i_op->atomic_open()
Add a new inode operation which is called on the last component of an open. Using this the filesystem can look up, possibly create and open the file in one atomic operation. If it cannot perform this (e.g. the file type turned out to be wrong) it may signal this by returning NULL instead of an open struct file pointer. i_op->atomic_open() is only called if the last component is negative or needs lookup. Handling cached positive dentries here doesn't add much value: these can be opened using f_op->open(). If the cached file turns out to be invalid, the open can be retried, this time using ->atomic_open() with a fresh dentry. For now leave the old way of using open intents in lookup and revalidate in place. This will be removed once all the users are converted. David Howells noticed that if ->atomic_open() opens the file but does not create it, handle_truncate() will be called on it even if it is not a regular file. Fix this by checking the file type in this case too. Signed-off-by: Miklos Szeredi <mszeredi@suse.cz> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
-rw-r--r--Documentation/filesystems/Locking4
-rw-r--r--Documentation/filesystems/vfs.txt11
-rw-r--r--fs/internal.h5
-rw-r--r--fs/namei.c203
-rw-r--r--fs/open.c42
-rw-r--r--include/linux/fs.h7
6 files changed, 270 insertions, 2 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 8e2da1e06e3b..8157488c3463 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -62,6 +62,9 @@ ata *);
62 int (*removexattr) (struct dentry *, const char *); 62 int (*removexattr) (struct dentry *, const char *);
63 int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); 63 int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len);
64 void (*update_time)(struct inode *, struct timespec *, int); 64 void (*update_time)(struct inode *, struct timespec *, int);
65 struct file * (*atomic_open)(struct inode *, struct dentry *,
66 struct opendata *, unsigned open_flag,
67 umode_t create_mode, bool *created);
65 68
66locking rules: 69locking rules:
67 all may block 70 all may block
@@ -89,6 +92,7 @@ listxattr: no
89removexattr: yes 92removexattr: yes
90fiemap: no 93fiemap: no
91update_time: no 94update_time: no
95atomic_open: yes
92 96
93 Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on 97 Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on
94victim. 98victim.
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index efd23f481704..beb6e691f70a 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -364,6 +364,9 @@ struct inode_operations {
364 ssize_t (*listxattr) (struct dentry *, char *, size_t); 364 ssize_t (*listxattr) (struct dentry *, char *, size_t);
365 int (*removexattr) (struct dentry *, const char *); 365 int (*removexattr) (struct dentry *, const char *);
366 void (*update_time)(struct inode *, struct timespec *, int); 366 void (*update_time)(struct inode *, struct timespec *, int);
367 struct file * (*atomic_open)(struct inode *, struct dentry *,
368 struct opendata *, unsigned open_flag,
369 umode_t create_mode, bool *created);
367}; 370};
368 371
369Again, all methods are called without any locks being held, unless 372Again, all methods are called without any locks being held, unless
@@ -476,6 +479,14 @@ otherwise noted.
476 an inode. If this is not defined the VFS will update the inode itself 479 an inode. If this is not defined the VFS will update the inode itself
477 and call mark_inode_dirty_sync. 480 and call mark_inode_dirty_sync.
478 481
482 atomic_open: called on the last component of an open. Using this optional
483 method the filesystem can look up, possibly create and open the file in
484 one atomic operation. If it cannot perform this (e.g. the file type
485 turned out to be wrong) it may signal this by returning NULL instead of
486 an open struct file pointer. This method is only called if the last
487 component is negative or needs lookup. Cached positive dentries are
488 still handled by f_op->open().
489
479The Address Space Object 490The Address Space Object
480======================== 491========================
481 492
diff --git a/fs/internal.h b/fs/internal.h
index d2a23ff61b40..70067775df2e 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -85,6 +85,11 @@ extern struct super_block *user_get_super(dev_t);
85struct nameidata; 85struct nameidata;
86extern struct file *nameidata_to_filp(struct nameidata *); 86extern struct file *nameidata_to_filp(struct nameidata *);
87extern void release_open_intent(struct nameidata *); 87extern void release_open_intent(struct nameidata *);
88struct opendata {
89 struct dentry *dentry;
90 struct vfsmount *mnt;
91 struct file **filp;
92};
88struct open_flags { 93struct open_flags {
89 int open_flag; 94 int open_flag;
90 umode_t mode; 95 umode_t mode;
diff --git a/fs/namei.c b/fs/namei.c
index ccb0eb17f528..9e11ae83bff6 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2196,6 +2196,176 @@ static inline int open_to_namei_flags(int flag)
2196 return flag; 2196 return flag;
2197} 2197}
2198 2198
2199static int may_o_create(struct path *dir, struct dentry *dentry, umode_t mode)
2200{
2201 int error = security_path_mknod(dir, dentry, mode, 0);
2202 if (error)
2203 return error;
2204
2205 error = inode_permission(dir->dentry->d_inode, MAY_WRITE | MAY_EXEC);
2206 if (error)
2207 return error;
2208
2209 return security_inode_create(dir->dentry->d_inode, dentry, mode);
2210}
2211
2212static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry,
2213 struct path *path, const struct open_flags *op,
2214 int *want_write, bool need_lookup,
2215 bool *created)
2216{
2217 struct inode *dir = nd->path.dentry->d_inode;
2218 unsigned open_flag = open_to_namei_flags(op->open_flag);
2219 umode_t mode;
2220 int error;
2221 int acc_mode;
2222 struct opendata od;
2223 struct file *filp;
2224 int create_error = 0;
2225 struct dentry *const DENTRY_NOT_SET = (void *) -1UL;
2226
2227 BUG_ON(dentry->d_inode);
2228
2229 /* Don't create child dentry for a dead directory. */
2230 if (unlikely(IS_DEADDIR(dir))) {
2231 filp = ERR_PTR(-ENOENT);
2232 goto out;
2233 }
2234
2235 mode = op->mode & S_IALLUGO;
2236 if ((open_flag & O_CREAT) && !IS_POSIXACL(dir))
2237 mode &= ~current_umask();
2238
2239 if (open_flag & O_EXCL) {
2240 open_flag &= ~O_TRUNC;
2241 *created = true;
2242 }
2243
2244 /*
2245 * Checking write permission is tricky, bacuse we don't know if we are
2246 * going to actually need it: O_CREAT opens should work as long as the
2247 * file exists. But checking existence breaks atomicity. The trick is
2248 * to check access and if not granted clear O_CREAT from the flags.
2249 *
2250 * Another problem is returing the "right" error value (e.g. for an
2251 * O_EXCL open we want to return EEXIST not EROFS).
2252 */
2253 if ((open_flag & (O_CREAT | O_TRUNC)) ||
2254 (open_flag & O_ACCMODE) != O_RDONLY) {
2255 error = mnt_want_write(nd->path.mnt);
2256 if (!error) {
2257 *want_write = 1;
2258 } else if (!(open_flag & O_CREAT)) {
2259 /*
2260 * No O_CREATE -> atomicity not a requirement -> fall
2261 * back to lookup + open
2262 */
2263 goto no_open;
2264 } else if (open_flag & (O_EXCL | O_TRUNC)) {
2265 /* Fall back and fail with the right error */
2266 create_error = error;
2267 goto no_open;
2268 } else {
2269 /* No side effects, safe to clear O_CREAT */
2270 create_error = error;
2271 open_flag &= ~O_CREAT;
2272 }
2273 }
2274
2275 if (open_flag & O_CREAT) {
2276 error = may_o_create(&nd->path, dentry, op->mode);
2277 if (error) {
2278 create_error = error;
2279 if (open_flag & O_EXCL)
2280 goto no_open;
2281 open_flag &= ~O_CREAT;
2282 }
2283 }
2284
2285 if (nd->flags & LOOKUP_DIRECTORY)
2286 open_flag |= O_DIRECTORY;
2287
2288 od.dentry = DENTRY_NOT_SET;
2289 od.mnt = nd->path.mnt;
2290 od.filp = &nd->intent.open.file;
2291 filp = dir->i_op->atomic_open(dir, dentry, &od, open_flag, mode,
2292 created);
2293 if (IS_ERR(filp)) {
2294 if (WARN_ON(od.dentry != DENTRY_NOT_SET))
2295 dput(od.dentry);
2296
2297 if (create_error && PTR_ERR(filp) == -ENOENT)
2298 filp = ERR_PTR(create_error);
2299 goto out;
2300 }
2301
2302 acc_mode = op->acc_mode;
2303 if (*created) {
2304 fsnotify_create(dir, dentry);
2305 acc_mode = MAY_OPEN;
2306 }
2307
2308 if (!filp) {
2309 if (WARN_ON(od.dentry == DENTRY_NOT_SET)) {
2310 filp = ERR_PTR(-EIO);
2311 goto out;
2312 }
2313 if (od.dentry) {
2314 dput(dentry);
2315 dentry = od.dentry;
2316 }
2317 goto looked_up;
2318 }
2319
2320 /*
2321 * We didn't have the inode before the open, so check open permission
2322 * here.
2323 */
2324 error = may_open(&filp->f_path, acc_mode, open_flag);
2325 if (error)
2326 goto out_fput;
2327
2328 error = open_check_o_direct(filp);
2329 if (error)
2330 goto out_fput;
2331
2332out:
2333 dput(dentry);
2334 return filp;
2335
2336out_fput:
2337 fput(filp);
2338 filp = ERR_PTR(error);
2339 goto out;
2340
2341no_open:
2342 if (need_lookup) {
2343 dentry = lookup_real(dir, dentry, nd);
2344 if (IS_ERR(dentry))
2345 return ERR_CAST(dentry);
2346
2347 if (create_error) {
2348 int open_flag = op->open_flag;
2349
2350 filp = ERR_PTR(create_error);
2351 if ((open_flag & O_EXCL)) {
2352 if (!dentry->d_inode)
2353 goto out;
2354 } else if (!dentry->d_inode) {
2355 goto out;
2356 } else if ((open_flag & O_TRUNC) &&
2357 S_ISREG(dentry->d_inode->i_mode)) {
2358 goto out;
2359 }
2360 /* will fail later, go on to get the right error */
2361 }
2362 }
2363looked_up:
2364 path->dentry = dentry;
2365 path->mnt = nd->path.mnt;
2366 return NULL;
2367}
2368
2199/* 2369/*
2200 * Lookup, maybe create and open the last component 2370 * Lookup, maybe create and open the last component
2201 * 2371 *
@@ -2219,6 +2389,15 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path,
2219 if (IS_ERR(dentry)) 2389 if (IS_ERR(dentry))
2220 return ERR_CAST(dentry); 2390 return ERR_CAST(dentry);
2221 2391
2392 /* Cached positive dentry: will open in f_op->open */
2393 if (!need_lookup && dentry->d_inode)
2394 goto out_no_open;
2395
2396 if ((nd->flags & LOOKUP_OPEN) && dir_inode->i_op->atomic_open) {
2397 return atomic_open(nd, dentry, path, op, want_write,
2398 need_lookup, created);
2399 }
2400
2222 if (need_lookup) { 2401 if (need_lookup) {
2223 BUG_ON(dentry->d_inode); 2402 BUG_ON(dentry->d_inode);
2224 2403
@@ -2251,6 +2430,7 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path,
2251 if (error) 2430 if (error)
2252 goto out_dput; 2431 goto out_dput;
2253 } 2432 }
2433out_no_open:
2254 path->dentry = dentry; 2434 path->dentry = dentry;
2255 path->mnt = nd->path.mnt; 2435 path->mnt = nd->path.mnt;
2256 return NULL; 2436 return NULL;
@@ -2344,8 +2524,16 @@ retry_lookup:
2344 filp = lookup_open(nd, path, op, &want_write, &created); 2524 filp = lookup_open(nd, path, op, &want_write, &created);
2345 mutex_unlock(&dir->d_inode->i_mutex); 2525 mutex_unlock(&dir->d_inode->i_mutex);
2346 2526
2347 if (IS_ERR(filp)) 2527 if (filp) {
2348 goto out; 2528 if (IS_ERR(filp))
2529 goto out;
2530
2531 if (created || !S_ISREG(filp->f_path.dentry->d_inode->i_mode))
2532 will_truncate = 0;
2533
2534 audit_inode(pathname, filp->f_path.dentry);
2535 goto opened;
2536 }
2349 2537
2350 if (created) { 2538 if (created) {
2351 /* Don't check for write permission, don't truncate */ 2539 /* Don't check for write permission, don't truncate */
@@ -2361,6 +2549,16 @@ retry_lookup:
2361 */ 2549 */
2362 audit_inode(pathname, path->dentry); 2550 audit_inode(pathname, path->dentry);
2363 2551
2552 /*
2553 * If atomic_open() acquired write access it is dropped now due to
2554 * possible mount and symlink following (this might be optimized away if
2555 * necessary...)
2556 */
2557 if (want_write) {
2558 mnt_drop_write(nd->path.mnt);
2559 want_write = 0;
2560 }
2561
2364 error = -EEXIST; 2562 error = -EEXIST;
2365 if (open_flag & O_EXCL) 2563 if (open_flag & O_EXCL)
2366 goto exit_dput; 2564 goto exit_dput;
@@ -2444,6 +2642,7 @@ common:
2444 retried = true; 2642 retried = true;
2445 goto retry_lookup; 2643 goto retry_lookup;
2446 } 2644 }
2645opened:
2447 if (!IS_ERR(filp)) { 2646 if (!IS_ERR(filp)) {
2448 error = ima_file_check(filp, op->acc_mode); 2647 error = ima_file_check(filp, op->acc_mode);
2449 if (error) { 2648 if (error) {
diff --git a/fs/open.c b/fs/open.c
index 1540632d8387..13bece4f36a4 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -811,6 +811,48 @@ out_err:
811EXPORT_SYMBOL_GPL(lookup_instantiate_filp); 811EXPORT_SYMBOL_GPL(lookup_instantiate_filp);
812 812
813/** 813/**
814 * finish_open - finish opening a file
815 * @od: opaque open data
816 * @dentry: pointer to dentry
817 * @open: open callback
818 *
819 * This can be used to finish opening a file passed to i_op->atomic_open().
820 *
821 * If the open callback is set to NULL, then the standard f_op->open()
822 * filesystem callback is substituted.
823 */
824struct file *finish_open(struct opendata *od, struct dentry *dentry,
825 int (*open)(struct inode *, struct file *))
826{
827 struct file *res;
828
829 mntget(od->mnt);
830 dget(dentry);
831
832 res = do_dentry_open(dentry, od->mnt, *od->filp, open, current_cred());
833 if (!IS_ERR(res))
834 *od->filp = NULL;
835
836 return res;
837}
838EXPORT_SYMBOL(finish_open);
839
840/**
841 * finish_no_open - finish ->atomic_open() without opening the file
842 *
843 * @od: opaque open data
844 * @dentry: dentry or NULL (as returned from ->lookup())
845 *
846 * This can be used to set the result of a successful lookup in ->atomic_open().
847 * The filesystem's atomic_open() method shall return NULL after calling this.
848 */
849void finish_no_open(struct opendata *od, struct dentry *dentry)
850{
851 od->dentry = dentry;
852}
853EXPORT_SYMBOL(finish_no_open);
854
855/**
814 * nameidata_to_filp - convert a nameidata to an open filp. 856 * nameidata_to_filp - convert a nameidata to an open filp.
815 * @nd: pointer to nameidata 857 * @nd: pointer to nameidata
816 * @flags: open flags 858 * @flags: open flags
diff --git a/include/linux/fs.h b/include/linux/fs.h
index f06db6bd5a74..0314635cf833 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -427,6 +427,7 @@ struct kstatfs;
427struct vm_area_struct; 427struct vm_area_struct;
428struct vfsmount; 428struct vfsmount;
429struct cred; 429struct cred;
430struct opendata;
430 431
431extern void __init inode_init(void); 432extern void __init inode_init(void);
432extern void __init inode_init_early(void); 433extern void __init inode_init_early(void);
@@ -1693,6 +1694,9 @@ struct inode_operations {
1693 int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, 1694 int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
1694 u64 len); 1695 u64 len);
1695 int (*update_time)(struct inode *, struct timespec *, int); 1696 int (*update_time)(struct inode *, struct timespec *, int);
1697 struct file * (*atomic_open)(struct inode *, struct dentry *,
1698 struct opendata *, unsigned open_flag,
1699 umode_t create_mode, bool *created);
1696} ____cacheline_aligned; 1700} ____cacheline_aligned;
1697 1701
1698struct seq_file; 1702struct seq_file;
@@ -2061,6 +2065,9 @@ extern struct file * dentry_open(struct dentry *, struct vfsmount *, int,
2061 const struct cred *); 2065 const struct cred *);
2062extern int filp_close(struct file *, fl_owner_t id); 2066extern int filp_close(struct file *, fl_owner_t id);
2063extern char * getname(const char __user *); 2067extern char * getname(const char __user *);
2068extern struct file *finish_open(struct opendata *od, struct dentry *dentry,
2069 int (*open)(struct inode *, struct file *));
2070extern void finish_no_open(struct opendata *od, struct dentry *dentry);
2064 2071
2065/* fs/ioctl.c */ 2072/* fs/ioctl.c */
2066 2073