diff options
-rw-r--r-- | Documentation/filesystems/Locking | 4 | ||||
-rw-r--r-- | Documentation/filesystems/vfs.txt | 11 | ||||
-rw-r--r-- | fs/internal.h | 5 | ||||
-rw-r--r-- | fs/namei.c | 203 | ||||
-rw-r--r-- | fs/open.c | 42 | ||||
-rw-r--r-- | include/linux/fs.h | 7 |
6 files changed, 270 insertions, 2 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 8e2da1e06e3b..8157488c3463 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking | |||
@@ -62,6 +62,9 @@ ata *); | |||
62 | int (*removexattr) (struct dentry *, const char *); | 62 | int (*removexattr) (struct dentry *, const char *); |
63 | int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); | 63 | int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); |
64 | void (*update_time)(struct inode *, struct timespec *, int); | 64 | void (*update_time)(struct inode *, struct timespec *, int); |
65 | struct file * (*atomic_open)(struct inode *, struct dentry *, | ||
66 | struct opendata *, unsigned open_flag, | ||
67 | umode_t create_mode, bool *created); | ||
65 | 68 | ||
66 | locking rules: | 69 | locking rules: |
67 | all may block | 70 | all may block |
@@ -89,6 +92,7 @@ listxattr: no | |||
89 | removexattr: yes | 92 | removexattr: yes |
90 | fiemap: no | 93 | fiemap: no |
91 | update_time: no | 94 | update_time: no |
95 | atomic_open: yes | ||
92 | 96 | ||
93 | Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on | 97 | Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on |
94 | victim. | 98 | victim. |
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index efd23f481704..beb6e691f70a 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt | |||
@@ -364,6 +364,9 @@ struct inode_operations { | |||
364 | ssize_t (*listxattr) (struct dentry *, char *, size_t); | 364 | ssize_t (*listxattr) (struct dentry *, char *, size_t); |
365 | int (*removexattr) (struct dentry *, const char *); | 365 | int (*removexattr) (struct dentry *, const char *); |
366 | void (*update_time)(struct inode *, struct timespec *, int); | 366 | void (*update_time)(struct inode *, struct timespec *, int); |
367 | struct file * (*atomic_open)(struct inode *, struct dentry *, | ||
368 | struct opendata *, unsigned open_flag, | ||
369 | umode_t create_mode, bool *created); | ||
367 | }; | 370 | }; |
368 | 371 | ||
369 | Again, all methods are called without any locks being held, unless | 372 | Again, all methods are called without any locks being held, unless |
@@ -476,6 +479,14 @@ otherwise noted. | |||
476 | an inode. If this is not defined the VFS will update the inode itself | 479 | an inode. If this is not defined the VFS will update the inode itself |
477 | and call mark_inode_dirty_sync. | 480 | and call mark_inode_dirty_sync. |
478 | 481 | ||
482 | atomic_open: called on the last component of an open. Using this optional | ||
483 | method the filesystem can look up, possibly create and open the file in | ||
484 | one atomic operation. If it cannot perform this (e.g. the file type | ||
485 | turned out to be wrong) it may signal this by returning NULL instead of | ||
486 | an open struct file pointer. This method is only called if the last | ||
487 | component is negative or needs lookup. Cached positive dentries are | ||
488 | still handled by f_op->open(). | ||
489 | |||
479 | The Address Space Object | 490 | The Address Space Object |
480 | ======================== | 491 | ======================== |
481 | 492 | ||
diff --git a/fs/internal.h b/fs/internal.h index d2a23ff61b40..70067775df2e 100644 --- a/fs/internal.h +++ b/fs/internal.h | |||
@@ -85,6 +85,11 @@ extern struct super_block *user_get_super(dev_t); | |||
85 | struct nameidata; | 85 | struct nameidata; |
86 | extern struct file *nameidata_to_filp(struct nameidata *); | 86 | extern struct file *nameidata_to_filp(struct nameidata *); |
87 | extern void release_open_intent(struct nameidata *); | 87 | extern void release_open_intent(struct nameidata *); |
88 | struct opendata { | ||
89 | struct dentry *dentry; | ||
90 | struct vfsmount *mnt; | ||
91 | struct file **filp; | ||
92 | }; | ||
88 | struct open_flags { | 93 | struct open_flags { |
89 | int open_flag; | 94 | int open_flag; |
90 | umode_t mode; | 95 | umode_t mode; |
diff --git a/fs/namei.c b/fs/namei.c index ccb0eb17f528..9e11ae83bff6 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -2196,6 +2196,176 @@ static inline int open_to_namei_flags(int flag) | |||
2196 | return flag; | 2196 | return flag; |
2197 | } | 2197 | } |
2198 | 2198 | ||
2199 | static int may_o_create(struct path *dir, struct dentry *dentry, umode_t mode) | ||
2200 | { | ||
2201 | int error = security_path_mknod(dir, dentry, mode, 0); | ||
2202 | if (error) | ||
2203 | return error; | ||
2204 | |||
2205 | error = inode_permission(dir->dentry->d_inode, MAY_WRITE | MAY_EXEC); | ||
2206 | if (error) | ||
2207 | return error; | ||
2208 | |||
2209 | return security_inode_create(dir->dentry->d_inode, dentry, mode); | ||
2210 | } | ||
2211 | |||
2212 | static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, | ||
2213 | struct path *path, const struct open_flags *op, | ||
2214 | int *want_write, bool need_lookup, | ||
2215 | bool *created) | ||
2216 | { | ||
2217 | struct inode *dir = nd->path.dentry->d_inode; | ||
2218 | unsigned open_flag = open_to_namei_flags(op->open_flag); | ||
2219 | umode_t mode; | ||
2220 | int error; | ||
2221 | int acc_mode; | ||
2222 | struct opendata od; | ||
2223 | struct file *filp; | ||
2224 | int create_error = 0; | ||
2225 | struct dentry *const DENTRY_NOT_SET = (void *) -1UL; | ||
2226 | |||
2227 | BUG_ON(dentry->d_inode); | ||
2228 | |||
2229 | /* Don't create child dentry for a dead directory. */ | ||
2230 | if (unlikely(IS_DEADDIR(dir))) { | ||
2231 | filp = ERR_PTR(-ENOENT); | ||
2232 | goto out; | ||
2233 | } | ||
2234 | |||
2235 | mode = op->mode & S_IALLUGO; | ||
2236 | if ((open_flag & O_CREAT) && !IS_POSIXACL(dir)) | ||
2237 | mode &= ~current_umask(); | ||
2238 | |||
2239 | if (open_flag & O_EXCL) { | ||
2240 | open_flag &= ~O_TRUNC; | ||
2241 | *created = true; | ||
2242 | } | ||
2243 | |||
2244 | /* | ||
2245 | * Checking write permission is tricky, bacuse we don't know if we are | ||
2246 | * going to actually need it: O_CREAT opens should work as long as the | ||
2247 | * file exists. But checking existence breaks atomicity. The trick is | ||
2248 | * to check access and if not granted clear O_CREAT from the flags. | ||
2249 | * | ||
2250 | * Another problem is returing the "right" error value (e.g. for an | ||
2251 | * O_EXCL open we want to return EEXIST not EROFS). | ||
2252 | */ | ||
2253 | if ((open_flag & (O_CREAT | O_TRUNC)) || | ||
2254 | (open_flag & O_ACCMODE) != O_RDONLY) { | ||
2255 | error = mnt_want_write(nd->path.mnt); | ||
2256 | if (!error) { | ||
2257 | *want_write = 1; | ||
2258 | } else if (!(open_flag & O_CREAT)) { | ||
2259 | /* | ||
2260 | * No O_CREATE -> atomicity not a requirement -> fall | ||
2261 | * back to lookup + open | ||
2262 | */ | ||
2263 | goto no_open; | ||
2264 | } else if (open_flag & (O_EXCL | O_TRUNC)) { | ||
2265 | /* Fall back and fail with the right error */ | ||
2266 | create_error = error; | ||
2267 | goto no_open; | ||
2268 | } else { | ||
2269 | /* No side effects, safe to clear O_CREAT */ | ||
2270 | create_error = error; | ||
2271 | open_flag &= ~O_CREAT; | ||
2272 | } | ||
2273 | } | ||
2274 | |||
2275 | if (open_flag & O_CREAT) { | ||
2276 | error = may_o_create(&nd->path, dentry, op->mode); | ||
2277 | if (error) { | ||
2278 | create_error = error; | ||
2279 | if (open_flag & O_EXCL) | ||
2280 | goto no_open; | ||
2281 | open_flag &= ~O_CREAT; | ||
2282 | } | ||
2283 | } | ||
2284 | |||
2285 | if (nd->flags & LOOKUP_DIRECTORY) | ||
2286 | open_flag |= O_DIRECTORY; | ||
2287 | |||
2288 | od.dentry = DENTRY_NOT_SET; | ||
2289 | od.mnt = nd->path.mnt; | ||
2290 | od.filp = &nd->intent.open.file; | ||
2291 | filp = dir->i_op->atomic_open(dir, dentry, &od, open_flag, mode, | ||
2292 | created); | ||
2293 | if (IS_ERR(filp)) { | ||
2294 | if (WARN_ON(od.dentry != DENTRY_NOT_SET)) | ||
2295 | dput(od.dentry); | ||
2296 | |||
2297 | if (create_error && PTR_ERR(filp) == -ENOENT) | ||
2298 | filp = ERR_PTR(create_error); | ||
2299 | goto out; | ||
2300 | } | ||
2301 | |||
2302 | acc_mode = op->acc_mode; | ||
2303 | if (*created) { | ||
2304 | fsnotify_create(dir, dentry); | ||
2305 | acc_mode = MAY_OPEN; | ||
2306 | } | ||
2307 | |||
2308 | if (!filp) { | ||
2309 | if (WARN_ON(od.dentry == DENTRY_NOT_SET)) { | ||
2310 | filp = ERR_PTR(-EIO); | ||
2311 | goto out; | ||
2312 | } | ||
2313 | if (od.dentry) { | ||
2314 | dput(dentry); | ||
2315 | dentry = od.dentry; | ||
2316 | } | ||
2317 | goto looked_up; | ||
2318 | } | ||
2319 | |||
2320 | /* | ||
2321 | * We didn't have the inode before the open, so check open permission | ||
2322 | * here. | ||
2323 | */ | ||
2324 | error = may_open(&filp->f_path, acc_mode, open_flag); | ||
2325 | if (error) | ||
2326 | goto out_fput; | ||
2327 | |||
2328 | error = open_check_o_direct(filp); | ||
2329 | if (error) | ||
2330 | goto out_fput; | ||
2331 | |||
2332 | out: | ||
2333 | dput(dentry); | ||
2334 | return filp; | ||
2335 | |||
2336 | out_fput: | ||
2337 | fput(filp); | ||
2338 | filp = ERR_PTR(error); | ||
2339 | goto out; | ||
2340 | |||
2341 | no_open: | ||
2342 | if (need_lookup) { | ||
2343 | dentry = lookup_real(dir, dentry, nd); | ||
2344 | if (IS_ERR(dentry)) | ||
2345 | return ERR_CAST(dentry); | ||
2346 | |||
2347 | if (create_error) { | ||
2348 | int open_flag = op->open_flag; | ||
2349 | |||
2350 | filp = ERR_PTR(create_error); | ||
2351 | if ((open_flag & O_EXCL)) { | ||
2352 | if (!dentry->d_inode) | ||
2353 | goto out; | ||
2354 | } else if (!dentry->d_inode) { | ||
2355 | goto out; | ||
2356 | } else if ((open_flag & O_TRUNC) && | ||
2357 | S_ISREG(dentry->d_inode->i_mode)) { | ||
2358 | goto out; | ||
2359 | } | ||
2360 | /* will fail later, go on to get the right error */ | ||
2361 | } | ||
2362 | } | ||
2363 | looked_up: | ||
2364 | path->dentry = dentry; | ||
2365 | path->mnt = nd->path.mnt; | ||
2366 | return NULL; | ||
2367 | } | ||
2368 | |||
2199 | /* | 2369 | /* |
2200 | * Lookup, maybe create and open the last component | 2370 | * Lookup, maybe create and open the last component |
2201 | * | 2371 | * |
@@ -2219,6 +2389,15 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path, | |||
2219 | if (IS_ERR(dentry)) | 2389 | if (IS_ERR(dentry)) |
2220 | return ERR_CAST(dentry); | 2390 | return ERR_CAST(dentry); |
2221 | 2391 | ||
2392 | /* Cached positive dentry: will open in f_op->open */ | ||
2393 | if (!need_lookup && dentry->d_inode) | ||
2394 | goto out_no_open; | ||
2395 | |||
2396 | if ((nd->flags & LOOKUP_OPEN) && dir_inode->i_op->atomic_open) { | ||
2397 | return atomic_open(nd, dentry, path, op, want_write, | ||
2398 | need_lookup, created); | ||
2399 | } | ||
2400 | |||
2222 | if (need_lookup) { | 2401 | if (need_lookup) { |
2223 | BUG_ON(dentry->d_inode); | 2402 | BUG_ON(dentry->d_inode); |
2224 | 2403 | ||
@@ -2251,6 +2430,7 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path, | |||
2251 | if (error) | 2430 | if (error) |
2252 | goto out_dput; | 2431 | goto out_dput; |
2253 | } | 2432 | } |
2433 | out_no_open: | ||
2254 | path->dentry = dentry; | 2434 | path->dentry = dentry; |
2255 | path->mnt = nd->path.mnt; | 2435 | path->mnt = nd->path.mnt; |
2256 | return NULL; | 2436 | return NULL; |
@@ -2344,8 +2524,16 @@ retry_lookup: | |||
2344 | filp = lookup_open(nd, path, op, &want_write, &created); | 2524 | filp = lookup_open(nd, path, op, &want_write, &created); |
2345 | mutex_unlock(&dir->d_inode->i_mutex); | 2525 | mutex_unlock(&dir->d_inode->i_mutex); |
2346 | 2526 | ||
2347 | if (IS_ERR(filp)) | 2527 | if (filp) { |
2348 | goto out; | 2528 | if (IS_ERR(filp)) |
2529 | goto out; | ||
2530 | |||
2531 | if (created || !S_ISREG(filp->f_path.dentry->d_inode->i_mode)) | ||
2532 | will_truncate = 0; | ||
2533 | |||
2534 | audit_inode(pathname, filp->f_path.dentry); | ||
2535 | goto opened; | ||
2536 | } | ||
2349 | 2537 | ||
2350 | if (created) { | 2538 | if (created) { |
2351 | /* Don't check for write permission, don't truncate */ | 2539 | /* Don't check for write permission, don't truncate */ |
@@ -2361,6 +2549,16 @@ retry_lookup: | |||
2361 | */ | 2549 | */ |
2362 | audit_inode(pathname, path->dentry); | 2550 | audit_inode(pathname, path->dentry); |
2363 | 2551 | ||
2552 | /* | ||
2553 | * If atomic_open() acquired write access it is dropped now due to | ||
2554 | * possible mount and symlink following (this might be optimized away if | ||
2555 | * necessary...) | ||
2556 | */ | ||
2557 | if (want_write) { | ||
2558 | mnt_drop_write(nd->path.mnt); | ||
2559 | want_write = 0; | ||
2560 | } | ||
2561 | |||
2364 | error = -EEXIST; | 2562 | error = -EEXIST; |
2365 | if (open_flag & O_EXCL) | 2563 | if (open_flag & O_EXCL) |
2366 | goto exit_dput; | 2564 | goto exit_dput; |
@@ -2444,6 +2642,7 @@ common: | |||
2444 | retried = true; | 2642 | retried = true; |
2445 | goto retry_lookup; | 2643 | goto retry_lookup; |
2446 | } | 2644 | } |
2645 | opened: | ||
2447 | if (!IS_ERR(filp)) { | 2646 | if (!IS_ERR(filp)) { |
2448 | error = ima_file_check(filp, op->acc_mode); | 2647 | error = ima_file_check(filp, op->acc_mode); |
2449 | if (error) { | 2648 | if (error) { |
@@ -811,6 +811,48 @@ out_err: | |||
811 | EXPORT_SYMBOL_GPL(lookup_instantiate_filp); | 811 | EXPORT_SYMBOL_GPL(lookup_instantiate_filp); |
812 | 812 | ||
813 | /** | 813 | /** |
814 | * finish_open - finish opening a file | ||
815 | * @od: opaque open data | ||
816 | * @dentry: pointer to dentry | ||
817 | * @open: open callback | ||
818 | * | ||
819 | * This can be used to finish opening a file passed to i_op->atomic_open(). | ||
820 | * | ||
821 | * If the open callback is set to NULL, then the standard f_op->open() | ||
822 | * filesystem callback is substituted. | ||
823 | */ | ||
824 | struct file *finish_open(struct opendata *od, struct dentry *dentry, | ||
825 | int (*open)(struct inode *, struct file *)) | ||
826 | { | ||
827 | struct file *res; | ||
828 | |||
829 | mntget(od->mnt); | ||
830 | dget(dentry); | ||
831 | |||
832 | res = do_dentry_open(dentry, od->mnt, *od->filp, open, current_cred()); | ||
833 | if (!IS_ERR(res)) | ||
834 | *od->filp = NULL; | ||
835 | |||
836 | return res; | ||
837 | } | ||
838 | EXPORT_SYMBOL(finish_open); | ||
839 | |||
840 | /** | ||
841 | * finish_no_open - finish ->atomic_open() without opening the file | ||
842 | * | ||
843 | * @od: opaque open data | ||
844 | * @dentry: dentry or NULL (as returned from ->lookup()) | ||
845 | * | ||
846 | * This can be used to set the result of a successful lookup in ->atomic_open(). | ||
847 | * The filesystem's atomic_open() method shall return NULL after calling this. | ||
848 | */ | ||
849 | void finish_no_open(struct opendata *od, struct dentry *dentry) | ||
850 | { | ||
851 | od->dentry = dentry; | ||
852 | } | ||
853 | EXPORT_SYMBOL(finish_no_open); | ||
854 | |||
855 | /** | ||
814 | * nameidata_to_filp - convert a nameidata to an open filp. | 856 | * nameidata_to_filp - convert a nameidata to an open filp. |
815 | * @nd: pointer to nameidata | 857 | * @nd: pointer to nameidata |
816 | * @flags: open flags | 858 | * @flags: open flags |
diff --git a/include/linux/fs.h b/include/linux/fs.h index f06db6bd5a74..0314635cf833 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -427,6 +427,7 @@ struct kstatfs; | |||
427 | struct vm_area_struct; | 427 | struct vm_area_struct; |
428 | struct vfsmount; | 428 | struct vfsmount; |
429 | struct cred; | 429 | struct cred; |
430 | struct opendata; | ||
430 | 431 | ||
431 | extern void __init inode_init(void); | 432 | extern void __init inode_init(void); |
432 | extern void __init inode_init_early(void); | 433 | extern void __init inode_init_early(void); |
@@ -1693,6 +1694,9 @@ struct inode_operations { | |||
1693 | int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, | 1694 | int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, |
1694 | u64 len); | 1695 | u64 len); |
1695 | int (*update_time)(struct inode *, struct timespec *, int); | 1696 | int (*update_time)(struct inode *, struct timespec *, int); |
1697 | struct file * (*atomic_open)(struct inode *, struct dentry *, | ||
1698 | struct opendata *, unsigned open_flag, | ||
1699 | umode_t create_mode, bool *created); | ||
1696 | } ____cacheline_aligned; | 1700 | } ____cacheline_aligned; |
1697 | 1701 | ||
1698 | struct seq_file; | 1702 | struct seq_file; |
@@ -2061,6 +2065,9 @@ extern struct file * dentry_open(struct dentry *, struct vfsmount *, int, | |||
2061 | const struct cred *); | 2065 | const struct cred *); |
2062 | extern int filp_close(struct file *, fl_owner_t id); | 2066 | extern int filp_close(struct file *, fl_owner_t id); |
2063 | extern char * getname(const char __user *); | 2067 | extern char * getname(const char __user *); |
2068 | extern struct file *finish_open(struct opendata *od, struct dentry *dentry, | ||
2069 | int (*open)(struct inode *, struct file *)); | ||
2070 | extern void finish_no_open(struct opendata *od, struct dentry *dentry); | ||
2064 | 2071 | ||
2065 | /* fs/ioctl.c */ | 2072 | /* fs/ioctl.c */ |
2066 | 2073 | ||