diff options
36 files changed, 645 insertions, 465 deletions
diff --git a/Documentation/filesystems/nfs/Exporting b/Documentation/filesystems/nfs/Exporting index e543b1a619cc..c8f036a9b13f 100644 --- a/Documentation/filesystems/nfs/Exporting +++ b/Documentation/filesystems/nfs/Exporting | |||
@@ -66,23 +66,31 @@ b/ A per-superblock list "s_anon" of dentries which are the roots of | |||
66 | 66 | ||
67 | c/ Helper routines to allocate anonymous dentries, and to help attach | 67 | c/ Helper routines to allocate anonymous dentries, and to help attach |
68 | loose directory dentries at lookup time. They are: | 68 | loose directory dentries at lookup time. They are: |
69 | d_alloc_anon(inode) will return a dentry for the given inode. | 69 | d_obtain_alias(inode) will return a dentry for the given inode. |
70 | If the inode already has a dentry, one of those is returned. | 70 | If the inode already has a dentry, one of those is returned. |
71 | If it doesn't, a new anonymous (IS_ROOT and | 71 | If it doesn't, a new anonymous (IS_ROOT and |
72 | DCACHE_DISCONNECTED) dentry is allocated and attached. | 72 | DCACHE_DISCONNECTED) dentry is allocated and attached. |
73 | In the case of a directory, care is taken that only one dentry | 73 | In the case of a directory, care is taken that only one dentry |
74 | can ever be attached. | 74 | can ever be attached. |
75 | d_splice_alias(inode, dentry) will make sure that there is a | 75 | d_splice_alias(inode, dentry) or d_materialise_unique(dentry, inode) |
76 | dentry with the same name and parent as the given dentry, and | 76 | will introduce a new dentry into the tree; either the passed-in |
77 | which refers to the given inode. | 77 | dentry or a preexisting alias for the given inode (such as an |
78 | If the inode is a directory and already has a dentry, then that | 78 | anonymous one created by d_obtain_alias), if appropriate. The two |
79 | dentry is d_moved over the given dentry. | 79 | functions differ in their handling of directories with preexisting |
80 | If the passed dentry gets attached, care is taken that this is | 80 | aliases: |
81 | mutually exclusive to a d_alloc_anon operation. | 81 | d_splice_alias will use any existing IS_ROOT dentry, but it will |
82 | If the passed dentry is used, NULL is returned, else the used | 82 | return -EIO rather than try to move a dentry with a different |
83 | dentry is returned. This corresponds to the calling pattern of | 83 | parent. This is appropriate for local filesystems, which |
84 | ->lookup. | 84 | should never see such an alias unless the filesystem is |
85 | 85 | corrupted somehow (for example, if two on-disk directory | |
86 | entries refer to the same directory.) | ||
87 | d_materialise_unique will attempt to move any dentry. This is | ||
88 | appropriate for distributed filesystems, where finding a | ||
89 | directory other than where we last cached it may be a normal | ||
90 | consequence of concurrent operations on other hosts. | ||
91 | Both functions return NULL when the passed-in dentry is used, | ||
92 | following the calling convention of ->lookup. | ||
93 | |||
86 | 94 | ||
87 | Filesystem Issues | 95 | Filesystem Issues |
88 | ----------------- | 96 | ----------------- |
@@ -120,12 +128,12 @@ struct which has the following members: | |||
120 | 128 | ||
121 | fh_to_dentry (mandatory) | 129 | fh_to_dentry (mandatory) |
122 | Given a filehandle fragment, this should find the implied object and | 130 | Given a filehandle fragment, this should find the implied object and |
123 | create a dentry for it (possibly with d_alloc_anon). | 131 | create a dentry for it (possibly with d_obtain_alias). |
124 | 132 | ||
125 | fh_to_parent (optional but strongly recommended) | 133 | fh_to_parent (optional but strongly recommended) |
126 | Given a filehandle fragment, this should find the parent of the | 134 | Given a filehandle fragment, this should find the parent of the |
127 | implied object and create a dentry for it (possibly with d_alloc_anon). | 135 | implied object and create a dentry for it (possibly with |
128 | May fail if the filehandle fragment is too small. | 136 | d_obtain_alias). May fail if the filehandle fragment is too small. |
129 | 137 | ||
130 | get_parent (optional but strongly recommended) | 138 | get_parent (optional but strongly recommended) |
131 | When given a dentry for a directory, this should return a dentry for | 139 | When given a dentry for a directory, this should return a dentry for |
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index a1d0d7a30165..61d65cc65c54 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt | |||
@@ -1053,7 +1053,8 @@ struct dentry_operations { | |||
1053 | If the 'rcu_walk' parameter is true, then the caller is doing a | 1053 | If the 'rcu_walk' parameter is true, then the caller is doing a |
1054 | pathwalk in RCU-walk mode. Sleeping is not permitted in this mode, | 1054 | pathwalk in RCU-walk mode. Sleeping is not permitted in this mode, |
1055 | and the caller can be asked to leave it and call again by returning | 1055 | and the caller can be asked to leave it and call again by returning |
1056 | -ECHILD. | 1056 | -ECHILD. -EISDIR may also be returned to tell pathwalk to |
1057 | ignore d_automount or any mounts. | ||
1057 | 1058 | ||
1058 | This function is only used if DCACHE_MANAGE_TRANSIT is set on the | 1059 | This function is only used if DCACHE_MANAGE_TRANSIT is set on the |
1059 | dentry being transited from. | 1060 | dentry being transited from. |
diff --git a/fs/Makefile b/fs/Makefile index 4030cbfbc9af..90c88529892b 100644 --- a/fs/Makefile +++ b/fs/Makefile | |||
@@ -11,7 +11,7 @@ obj-y := open.o read_write.o file_table.o super.o \ | |||
11 | attr.o bad_inode.o file.o filesystems.o namespace.o \ | 11 | attr.o bad_inode.o file.o filesystems.o namespace.o \ |
12 | seq_file.o xattr.o libfs.o fs-writeback.o \ | 12 | seq_file.o xattr.o libfs.o fs-writeback.o \ |
13 | pnode.o splice.o sync.o utimes.o \ | 13 | pnode.o splice.o sync.o utimes.o \ |
14 | stack.o fs_struct.o statfs.o | 14 | stack.o fs_struct.o statfs.o fs_pin.o |
15 | 15 | ||
16 | ifeq ($(CONFIG_BLOCK),y) | 16 | ifeq ($(CONFIG_BLOCK),y) |
17 | obj-y += buffer.o block_dev.o direct-io.o mpage.o | 17 | obj-y += buffer.o block_dev.o direct-io.o mpage.o |
diff --git a/fs/bad_inode.c b/fs/bad_inode.c index 7c93953030fb..afd2b4408adf 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c | |||
@@ -218,8 +218,9 @@ static int bad_inode_mknod (struct inode *dir, struct dentry *dentry, | |||
218 | return -EIO; | 218 | return -EIO; |
219 | } | 219 | } |
220 | 220 | ||
221 | static int bad_inode_rename (struct inode *old_dir, struct dentry *old_dentry, | 221 | static int bad_inode_rename2(struct inode *old_dir, struct dentry *old_dentry, |
222 | struct inode *new_dir, struct dentry *new_dentry) | 222 | struct inode *new_dir, struct dentry *new_dentry, |
223 | unsigned int flags) | ||
223 | { | 224 | { |
224 | return -EIO; | 225 | return -EIO; |
225 | } | 226 | } |
@@ -279,7 +280,7 @@ static const struct inode_operations bad_inode_ops = | |||
279 | .mkdir = bad_inode_mkdir, | 280 | .mkdir = bad_inode_mkdir, |
280 | .rmdir = bad_inode_rmdir, | 281 | .rmdir = bad_inode_rmdir, |
281 | .mknod = bad_inode_mknod, | 282 | .mknod = bad_inode_mknod, |
282 | .rename = bad_inode_rename, | 283 | .rename2 = bad_inode_rename2, |
283 | .readlink = bad_inode_readlink, | 284 | .readlink = bad_inode_readlink, |
284 | /* follow_link must be no-op, otherwise unmounting this inode | 285 | /* follow_link must be no-op, otherwise unmounting this inode |
285 | won't work */ | 286 | won't work */ |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3668048e16f8..3183742d6f0d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -8476,6 +8476,16 @@ out_notrans: | |||
8476 | return ret; | 8476 | return ret; |
8477 | } | 8477 | } |
8478 | 8478 | ||
8479 | static int btrfs_rename2(struct inode *old_dir, struct dentry *old_dentry, | ||
8480 | struct inode *new_dir, struct dentry *new_dentry, | ||
8481 | unsigned int flags) | ||
8482 | { | ||
8483 | if (flags & ~RENAME_NOREPLACE) | ||
8484 | return -EINVAL; | ||
8485 | |||
8486 | return btrfs_rename(old_dir, old_dentry, new_dir, new_dentry); | ||
8487 | } | ||
8488 | |||
8479 | static void btrfs_run_delalloc_work(struct btrfs_work *work) | 8489 | static void btrfs_run_delalloc_work(struct btrfs_work *work) |
8480 | { | 8490 | { |
8481 | struct btrfs_delalloc_work *delalloc_work; | 8491 | struct btrfs_delalloc_work *delalloc_work; |
@@ -9019,7 +9029,7 @@ static const struct inode_operations btrfs_dir_inode_operations = { | |||
9019 | .link = btrfs_link, | 9029 | .link = btrfs_link, |
9020 | .mkdir = btrfs_mkdir, | 9030 | .mkdir = btrfs_mkdir, |
9021 | .rmdir = btrfs_rmdir, | 9031 | .rmdir = btrfs_rmdir, |
9022 | .rename = btrfs_rename, | 9032 | .rename2 = btrfs_rename2, |
9023 | .symlink = btrfs_symlink, | 9033 | .symlink = btrfs_symlink, |
9024 | .setattr = btrfs_setattr, | 9034 | .setattr = btrfs_setattr, |
9025 | .mknod = btrfs_mknod, | 9035 | .mknod = btrfs_mknod, |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 8e16bca69c56..67b48b9a03e0 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -851,7 +851,6 @@ static struct dentry *get_default_root(struct super_block *sb, | |||
851 | struct btrfs_path *path; | 851 | struct btrfs_path *path; |
852 | struct btrfs_key location; | 852 | struct btrfs_key location; |
853 | struct inode *inode; | 853 | struct inode *inode; |
854 | struct dentry *dentry; | ||
855 | u64 dir_id; | 854 | u64 dir_id; |
856 | int new = 0; | 855 | int new = 0; |
857 | 856 | ||
@@ -922,13 +921,7 @@ setup_root: | |||
922 | return dget(sb->s_root); | 921 | return dget(sb->s_root); |
923 | } | 922 | } |
924 | 923 | ||
925 | dentry = d_obtain_alias(inode); | 924 | return d_obtain_root(inode); |
926 | if (!IS_ERR(dentry)) { | ||
927 | spin_lock(&dentry->d_lock); | ||
928 | dentry->d_flags &= ~DCACHE_DISCONNECTED; | ||
929 | spin_unlock(&dentry->d_lock); | ||
930 | } | ||
931 | return dentry; | ||
932 | } | 925 | } |
933 | 926 | ||
934 | static int btrfs_fill_super(struct super_block *sb, | 927 | static int btrfs_fill_super(struct super_block *sb, |
diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 06150fd745ac..f6e12377335c 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c | |||
@@ -755,7 +755,7 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc, | |||
755 | goto out; | 755 | goto out; |
756 | } | 756 | } |
757 | } else { | 757 | } else { |
758 | root = d_obtain_alias(inode); | 758 | root = d_obtain_root(inode); |
759 | } | 759 | } |
760 | ceph_init_dentry(root); | 760 | ceph_init_dentry(root); |
761 | dout("open_root_inode success, root dentry is %p\n", root); | 761 | dout("open_root_inode success, root dentry is %p\n", root); |
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 888398067420..ac4f260155c8 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c | |||
@@ -848,7 +848,7 @@ const struct inode_operations cifs_dir_inode_ops = { | |||
848 | .link = cifs_hardlink, | 848 | .link = cifs_hardlink, |
849 | .mkdir = cifs_mkdir, | 849 | .mkdir = cifs_mkdir, |
850 | .rmdir = cifs_rmdir, | 850 | .rmdir = cifs_rmdir, |
851 | .rename = cifs_rename, | 851 | .rename2 = cifs_rename2, |
852 | .permission = cifs_permission, | 852 | .permission = cifs_permission, |
853 | /* revalidate:cifs_revalidate, */ | 853 | /* revalidate:cifs_revalidate, */ |
854 | .setattr = cifs_setattr, | 854 | .setattr = cifs_setattr, |
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 560480263336..b0fafa499505 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h | |||
@@ -68,8 +68,8 @@ extern int cifs_hardlink(struct dentry *, struct inode *, struct dentry *); | |||
68 | extern int cifs_mknod(struct inode *, struct dentry *, umode_t, dev_t); | 68 | extern int cifs_mknod(struct inode *, struct dentry *, umode_t, dev_t); |
69 | extern int cifs_mkdir(struct inode *, struct dentry *, umode_t); | 69 | extern int cifs_mkdir(struct inode *, struct dentry *, umode_t); |
70 | extern int cifs_rmdir(struct inode *, struct dentry *); | 70 | extern int cifs_rmdir(struct inode *, struct dentry *); |
71 | extern int cifs_rename(struct inode *, struct dentry *, struct inode *, | 71 | extern int cifs_rename2(struct inode *, struct dentry *, struct inode *, |
72 | struct dentry *); | 72 | struct dentry *, unsigned int); |
73 | extern int cifs_revalidate_file_attr(struct file *filp); | 73 | extern int cifs_revalidate_file_attr(struct file *filp); |
74 | extern int cifs_revalidate_dentry_attr(struct dentry *); | 74 | extern int cifs_revalidate_dentry_attr(struct dentry *); |
75 | extern int cifs_revalidate_file(struct file *filp); | 75 | extern int cifs_revalidate_file(struct file *filp); |
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 41de3935caa0..426d6c6ad8bf 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c | |||
@@ -1627,8 +1627,9 @@ do_rename_exit: | |||
1627 | } | 1627 | } |
1628 | 1628 | ||
1629 | int | 1629 | int |
1630 | cifs_rename(struct inode *source_dir, struct dentry *source_dentry, | 1630 | cifs_rename2(struct inode *source_dir, struct dentry *source_dentry, |
1631 | struct inode *target_dir, struct dentry *target_dentry) | 1631 | struct inode *target_dir, struct dentry *target_dentry, |
1632 | unsigned int flags) | ||
1632 | { | 1633 | { |
1633 | char *from_name = NULL; | 1634 | char *from_name = NULL; |
1634 | char *to_name = NULL; | 1635 | char *to_name = NULL; |
@@ -1640,6 +1641,9 @@ cifs_rename(struct inode *source_dir, struct dentry *source_dentry, | |||
1640 | unsigned int xid; | 1641 | unsigned int xid; |
1641 | int rc, tmprc; | 1642 | int rc, tmprc; |
1642 | 1643 | ||
1644 | if (flags & ~RENAME_NOREPLACE) | ||
1645 | return -EINVAL; | ||
1646 | |||
1643 | cifs_sb = CIFS_SB(source_dir->i_sb); | 1647 | cifs_sb = CIFS_SB(source_dir->i_sb); |
1644 | tlink = cifs_sb_tlink(cifs_sb); | 1648 | tlink = cifs_sb_tlink(cifs_sb); |
1645 | if (IS_ERR(tlink)) | 1649 | if (IS_ERR(tlink)) |
@@ -1667,6 +1671,12 @@ cifs_rename(struct inode *source_dir, struct dentry *source_dentry, | |||
1667 | rc = cifs_do_rename(xid, source_dentry, from_name, target_dentry, | 1671 | rc = cifs_do_rename(xid, source_dentry, from_name, target_dentry, |
1668 | to_name); | 1672 | to_name); |
1669 | 1673 | ||
1674 | /* | ||
1675 | * No-replace is the natural behavior for CIFS, so skip unlink hacks. | ||
1676 | */ | ||
1677 | if (flags & RENAME_NOREPLACE) | ||
1678 | goto cifs_rename_exit; | ||
1679 | |||
1670 | if (rc == -EEXIST && tcon->unix_ext) { | 1680 | if (rc == -EEXIST && tcon->unix_ext) { |
1671 | /* | 1681 | /* |
1672 | * Are src and dst hardlinks of same inode? We can only tell | 1682 | * Are src and dst hardlinks of same inode? We can only tell |
diff --git a/fs/dcache.c b/fs/dcache.c index 06f65857a855..d30ce699ae4b 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -731,8 +731,6 @@ EXPORT_SYMBOL(dget_parent); | |||
731 | /** | 731 | /** |
732 | * d_find_alias - grab a hashed alias of inode | 732 | * d_find_alias - grab a hashed alias of inode |
733 | * @inode: inode in question | 733 | * @inode: inode in question |
734 | * @want_discon: flag, used by d_splice_alias, to request | ||
735 | * that only a DISCONNECTED alias be returned. | ||
736 | * | 734 | * |
737 | * If inode has a hashed alias, or is a directory and has any alias, | 735 | * If inode has a hashed alias, or is a directory and has any alias, |
738 | * acquire the reference to alias and return it. Otherwise return NULL. | 736 | * acquire the reference to alias and return it. Otherwise return NULL. |
@@ -741,10 +739,9 @@ EXPORT_SYMBOL(dget_parent); | |||
741 | * of a filesystem. | 739 | * of a filesystem. |
742 | * | 740 | * |
743 | * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer | 741 | * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer |
744 | * any other hashed alias over that one unless @want_discon is set, | 742 | * any other hashed alias over that one. |
745 | * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias. | ||
746 | */ | 743 | */ |
747 | static struct dentry *__d_find_alias(struct inode *inode, int want_discon) | 744 | static struct dentry *__d_find_alias(struct inode *inode) |
748 | { | 745 | { |
749 | struct dentry *alias, *discon_alias; | 746 | struct dentry *alias, *discon_alias; |
750 | 747 | ||
@@ -756,7 +753,7 @@ again: | |||
756 | if (IS_ROOT(alias) && | 753 | if (IS_ROOT(alias) && |
757 | (alias->d_flags & DCACHE_DISCONNECTED)) { | 754 | (alias->d_flags & DCACHE_DISCONNECTED)) { |
758 | discon_alias = alias; | 755 | discon_alias = alias; |
759 | } else if (!want_discon) { | 756 | } else { |
760 | __dget_dlock(alias); | 757 | __dget_dlock(alias); |
761 | spin_unlock(&alias->d_lock); | 758 | spin_unlock(&alias->d_lock); |
762 | return alias; | 759 | return alias; |
@@ -768,12 +765,9 @@ again: | |||
768 | alias = discon_alias; | 765 | alias = discon_alias; |
769 | spin_lock(&alias->d_lock); | 766 | spin_lock(&alias->d_lock); |
770 | if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { | 767 | if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { |
771 | if (IS_ROOT(alias) && | 768 | __dget_dlock(alias); |
772 | (alias->d_flags & DCACHE_DISCONNECTED)) { | 769 | spin_unlock(&alias->d_lock); |
773 | __dget_dlock(alias); | 770 | return alias; |
774 | spin_unlock(&alias->d_lock); | ||
775 | return alias; | ||
776 | } | ||
777 | } | 771 | } |
778 | spin_unlock(&alias->d_lock); | 772 | spin_unlock(&alias->d_lock); |
779 | goto again; | 773 | goto again; |
@@ -787,7 +781,7 @@ struct dentry *d_find_alias(struct inode *inode) | |||
787 | 781 | ||
788 | if (!hlist_empty(&inode->i_dentry)) { | 782 | if (!hlist_empty(&inode->i_dentry)) { |
789 | spin_lock(&inode->i_lock); | 783 | spin_lock(&inode->i_lock); |
790 | de = __d_find_alias(inode, 0); | 784 | de = __d_find_alias(inode); |
791 | spin_unlock(&inode->i_lock); | 785 | spin_unlock(&inode->i_lock); |
792 | } | 786 | } |
793 | return de; | 787 | return de; |
@@ -1781,25 +1775,7 @@ struct dentry *d_find_any_alias(struct inode *inode) | |||
1781 | } | 1775 | } |
1782 | EXPORT_SYMBOL(d_find_any_alias); | 1776 | EXPORT_SYMBOL(d_find_any_alias); |
1783 | 1777 | ||
1784 | /** | 1778 | static struct dentry *__d_obtain_alias(struct inode *inode, int disconnected) |
1785 | * d_obtain_alias - find or allocate a dentry for a given inode | ||
1786 | * @inode: inode to allocate the dentry for | ||
1787 | * | ||
1788 | * Obtain a dentry for an inode resulting from NFS filehandle conversion or | ||
1789 | * similar open by handle operations. The returned dentry may be anonymous, | ||
1790 | * or may have a full name (if the inode was already in the cache). | ||
1791 | * | ||
1792 | * When called on a directory inode, we must ensure that the inode only ever | ||
1793 | * has one dentry. If a dentry is found, that is returned instead of | ||
1794 | * allocating a new one. | ||
1795 | * | ||
1796 | * On successful return, the reference to the inode has been transferred | ||
1797 | * to the dentry. In case of an error the reference on the inode is released. | ||
1798 | * To make it easier to use in export operations a %NULL or IS_ERR inode may | ||
1799 | * be passed in and will be the error will be propagate to the return value, | ||
1800 | * with a %NULL @inode replaced by ERR_PTR(-ESTALE). | ||
1801 | */ | ||
1802 | struct dentry *d_obtain_alias(struct inode *inode) | ||
1803 | { | 1779 | { |
1804 | static const struct qstr anonstring = QSTR_INIT("/", 1); | 1780 | static const struct qstr anonstring = QSTR_INIT("/", 1); |
1805 | struct dentry *tmp; | 1781 | struct dentry *tmp; |
@@ -1830,7 +1806,10 @@ struct dentry *d_obtain_alias(struct inode *inode) | |||
1830 | } | 1806 | } |
1831 | 1807 | ||
1832 | /* attach a disconnected dentry */ | 1808 | /* attach a disconnected dentry */ |
1833 | add_flags = d_flags_for_inode(inode) | DCACHE_DISCONNECTED; | 1809 | add_flags = d_flags_for_inode(inode); |
1810 | |||
1811 | if (disconnected) | ||
1812 | add_flags |= DCACHE_DISCONNECTED; | ||
1834 | 1813 | ||
1835 | spin_lock(&tmp->d_lock); | 1814 | spin_lock(&tmp->d_lock); |
1836 | tmp->d_inode = inode; | 1815 | tmp->d_inode = inode; |
@@ -1851,59 +1830,51 @@ struct dentry *d_obtain_alias(struct inode *inode) | |||
1851 | iput(inode); | 1830 | iput(inode); |
1852 | return res; | 1831 | return res; |
1853 | } | 1832 | } |
1854 | EXPORT_SYMBOL(d_obtain_alias); | ||
1855 | 1833 | ||
1856 | /** | 1834 | /** |
1857 | * d_splice_alias - splice a disconnected dentry into the tree if one exists | 1835 | * d_obtain_alias - find or allocate a DISCONNECTED dentry for a given inode |
1858 | * @inode: the inode which may have a disconnected dentry | 1836 | * @inode: inode to allocate the dentry for |
1859 | * @dentry: a negative dentry which we want to point to the inode. | ||
1860 | * | ||
1861 | * If inode is a directory and has a 'disconnected' dentry (i.e. IS_ROOT and | ||
1862 | * DCACHE_DISCONNECTED), then d_move that in place of the given dentry | ||
1863 | * and return it, else simply d_add the inode to the dentry and return NULL. | ||
1864 | * | 1837 | * |
1865 | * This is needed in the lookup routine of any filesystem that is exportable | 1838 | * Obtain a dentry for an inode resulting from NFS filehandle conversion or |
1866 | * (via knfsd) so that we can build dcache paths to directories effectively. | 1839 | * similar open by handle operations. The returned dentry may be anonymous, |
1840 | * or may have a full name (if the inode was already in the cache). | ||
1867 | * | 1841 | * |
1868 | * If a dentry was found and moved, then it is returned. Otherwise NULL | 1842 | * When called on a directory inode, we must ensure that the inode only ever |
1869 | * is returned. This matches the expected return value of ->lookup. | 1843 | * has one dentry. If a dentry is found, that is returned instead of |
1844 | * allocating a new one. | ||
1870 | * | 1845 | * |
1871 | * Cluster filesystems may call this function with a negative, hashed dentry. | 1846 | * On successful return, the reference to the inode has been transferred |
1872 | * In that case, we know that the inode will be a regular file, and also this | 1847 | * to the dentry. In case of an error the reference on the inode is released. |
1873 | * will only occur during atomic_open. So we need to check for the dentry | 1848 | * To make it easier to use in export operations a %NULL or IS_ERR inode may |
1874 | * being already hashed only in the final case. | 1849 | * be passed in and the error will be propagated to the return value, |
1850 | * with a %NULL @inode replaced by ERR_PTR(-ESTALE). | ||
1875 | */ | 1851 | */ |
1876 | struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) | 1852 | struct dentry *d_obtain_alias(struct inode *inode) |
1877 | { | 1853 | { |
1878 | struct dentry *new = NULL; | 1854 | return __d_obtain_alias(inode, 1); |
1879 | 1855 | } | |
1880 | if (IS_ERR(inode)) | 1856 | EXPORT_SYMBOL(d_obtain_alias); |
1881 | return ERR_CAST(inode); | ||
1882 | 1857 | ||
1883 | if (inode && S_ISDIR(inode->i_mode)) { | 1858 | /** |
1884 | spin_lock(&inode->i_lock); | 1859 | * d_obtain_root - find or allocate a dentry for a given inode |
1885 | new = __d_find_alias(inode, 1); | 1860 | * @inode: inode to allocate the dentry for |
1886 | if (new) { | 1861 | * |
1887 | BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED)); | 1862 | * Obtain an IS_ROOT dentry for the root of a filesystem. |
1888 | spin_unlock(&inode->i_lock); | 1863 | * |
1889 | security_d_instantiate(new, inode); | 1864 | * We must ensure that directory inodes only ever have one dentry. If a |
1890 | d_move(new, dentry); | 1865 | * dentry is found, that is returned instead of allocating a new one. |
1891 | iput(inode); | 1866 | * |
1892 | } else { | 1867 | * On successful return, the reference to the inode has been transferred |
1893 | /* already taking inode->i_lock, so d_add() by hand */ | 1868 | * to the dentry. In case of an error the reference on the inode is |
1894 | __d_instantiate(dentry, inode); | 1869 | * released. A %NULL or IS_ERR inode may be passed in and will be the |
1895 | spin_unlock(&inode->i_lock); | 1870 | * error will be propagate to the return value, with a %NULL @inode |
1896 | security_d_instantiate(dentry, inode); | 1871 | * replaced by ERR_PTR(-ESTALE). |
1897 | d_rehash(dentry); | 1872 | */ |
1898 | } | 1873 | struct dentry *d_obtain_root(struct inode *inode) |
1899 | } else { | 1874 | { |
1900 | d_instantiate(dentry, inode); | 1875 | return __d_obtain_alias(inode, 0); |
1901 | if (d_unhashed(dentry)) | ||
1902 | d_rehash(dentry); | ||
1903 | } | ||
1904 | return new; | ||
1905 | } | 1876 | } |
1906 | EXPORT_SYMBOL(d_splice_alias); | 1877 | EXPORT_SYMBOL(d_obtain_root); |
1907 | 1878 | ||
1908 | /** | 1879 | /** |
1909 | * d_add_ci - lookup or allocate new dentry with case-exact name | 1880 | * d_add_ci - lookup or allocate new dentry with case-exact name |
@@ -2697,6 +2668,75 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) | |||
2697 | } | 2668 | } |
2698 | 2669 | ||
2699 | /** | 2670 | /** |
2671 | * d_splice_alias - splice a disconnected dentry into the tree if one exists | ||
2672 | * @inode: the inode which may have a disconnected dentry | ||
2673 | * @dentry: a negative dentry which we want to point to the inode. | ||
2674 | * | ||
2675 | * If inode is a directory and has an IS_ROOT alias, then d_move that in | ||
2676 | * place of the given dentry and return it, else simply d_add the inode | ||
2677 | * to the dentry and return NULL. | ||
2678 | * | ||
2679 | * If a non-IS_ROOT directory is found, the filesystem is corrupt, and | ||
2680 | * we should error out: directories can't have multiple aliases. | ||
2681 | * | ||
2682 | * This is needed in the lookup routine of any filesystem that is exportable | ||
2683 | * (via knfsd) so that we can build dcache paths to directories effectively. | ||
2684 | * | ||
2685 | * If a dentry was found and moved, then it is returned. Otherwise NULL | ||
2686 | * is returned. This matches the expected return value of ->lookup. | ||
2687 | * | ||
2688 | * Cluster filesystems may call this function with a negative, hashed dentry. | ||
2689 | * In that case, we know that the inode will be a regular file, and also this | ||
2690 | * will only occur during atomic_open. So we need to check for the dentry | ||
2691 | * being already hashed only in the final case. | ||
2692 | */ | ||
2693 | struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) | ||
2694 | { | ||
2695 | struct dentry *new = NULL; | ||
2696 | |||
2697 | if (IS_ERR(inode)) | ||
2698 | return ERR_CAST(inode); | ||
2699 | |||
2700 | if (inode && S_ISDIR(inode->i_mode)) { | ||
2701 | spin_lock(&inode->i_lock); | ||
2702 | new = __d_find_any_alias(inode); | ||
2703 | if (new) { | ||
2704 | if (!IS_ROOT(new)) { | ||
2705 | spin_unlock(&inode->i_lock); | ||
2706 | dput(new); | ||
2707 | return ERR_PTR(-EIO); | ||
2708 | } | ||
2709 | if (d_ancestor(new, dentry)) { | ||
2710 | spin_unlock(&inode->i_lock); | ||
2711 | dput(new); | ||
2712 | return ERR_PTR(-EIO); | ||
2713 | } | ||
2714 | write_seqlock(&rename_lock); | ||
2715 | __d_materialise_dentry(dentry, new); | ||
2716 | write_sequnlock(&rename_lock); | ||
2717 | __d_drop(new); | ||
2718 | _d_rehash(new); | ||
2719 | spin_unlock(&new->d_lock); | ||
2720 | spin_unlock(&inode->i_lock); | ||
2721 | security_d_instantiate(new, inode); | ||
2722 | iput(inode); | ||
2723 | } else { | ||
2724 | /* already taking inode->i_lock, so d_add() by hand */ | ||
2725 | __d_instantiate(dentry, inode); | ||
2726 | spin_unlock(&inode->i_lock); | ||
2727 | security_d_instantiate(dentry, inode); | ||
2728 | d_rehash(dentry); | ||
2729 | } | ||
2730 | } else { | ||
2731 | d_instantiate(dentry, inode); | ||
2732 | if (d_unhashed(dentry)) | ||
2733 | d_rehash(dentry); | ||
2734 | } | ||
2735 | return new; | ||
2736 | } | ||
2737 | EXPORT_SYMBOL(d_splice_alias); | ||
2738 | |||
2739 | /** | ||
2700 | * d_materialise_unique - introduce an inode into the tree | 2740 | * d_materialise_unique - introduce an inode into the tree |
2701 | * @dentry: candidate dentry | 2741 | * @dentry: candidate dentry |
2702 | * @inode: inode to bind to the dentry, to which aliases may be attached | 2742 | * @inode: inode to bind to the dentry, to which aliases may be attached |
@@ -2724,7 +2764,7 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) | |||
2724 | struct dentry *alias; | 2764 | struct dentry *alias; |
2725 | 2765 | ||
2726 | /* Does an aliased dentry already exist? */ | 2766 | /* Does an aliased dentry already exist? */ |
2727 | alias = __d_find_alias(inode, 0); | 2767 | alias = __d_find_alias(inode); |
2728 | if (alias) { | 2768 | if (alias) { |
2729 | actual = alias; | 2769 | actual = alias; |
2730 | write_seqlock(&rename_lock); | 2770 | write_seqlock(&rename_lock); |
diff --git a/fs/direct-io.c b/fs/direct-io.c index 17e39b047de5..c3116404ab49 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c | |||
@@ -158,7 +158,7 @@ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio) | |||
158 | { | 158 | { |
159 | ssize_t ret; | 159 | ssize_t ret; |
160 | 160 | ||
161 | ret = iov_iter_get_pages(sdio->iter, dio->pages, DIO_PAGES * PAGE_SIZE, | 161 | ret = iov_iter_get_pages(sdio->iter, dio->pages, DIO_PAGES, |
162 | &sdio->from); | 162 | &sdio->from); |
163 | 163 | ||
164 | if (ret < 0 && sdio->blocks_available && (dio->rw & WRITE)) { | 164 | if (ret < 0 && sdio->blocks_available && (dio->rw & WRITE)) { |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 3520ab8a6639..b147a67baa0d 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -3455,7 +3455,6 @@ const struct inode_operations ext4_dir_inode_operations = { | |||
3455 | .rmdir = ext4_rmdir, | 3455 | .rmdir = ext4_rmdir, |
3456 | .mknod = ext4_mknod, | 3456 | .mknod = ext4_mknod, |
3457 | .tmpfile = ext4_tmpfile, | 3457 | .tmpfile = ext4_tmpfile, |
3458 | .rename = ext4_rename, | ||
3459 | .rename2 = ext4_rename2, | 3458 | .rename2 = ext4_rename2, |
3460 | .setattr = ext4_setattr, | 3459 | .setattr = ext4_setattr, |
3461 | .setxattr = generic_setxattr, | 3460 | .setxattr = generic_setxattr, |
diff --git a/fs/fs_pin.c b/fs/fs_pin.c new file mode 100644 index 000000000000..9368236ca100 --- /dev/null +++ b/fs/fs_pin.c | |||
@@ -0,0 +1,78 @@ | |||
1 | #include <linux/fs.h> | ||
2 | #include <linux/slab.h> | ||
3 | #include <linux/fs_pin.h> | ||
4 | #include "internal.h" | ||
5 | #include "mount.h" | ||
6 | |||
7 | static void pin_free_rcu(struct rcu_head *head) | ||
8 | { | ||
9 | kfree(container_of(head, struct fs_pin, rcu)); | ||
10 | } | ||
11 | |||
12 | static DEFINE_SPINLOCK(pin_lock); | ||
13 | |||
14 | void pin_put(struct fs_pin *p) | ||
15 | { | ||
16 | if (atomic_long_dec_and_test(&p->count)) | ||
17 | call_rcu(&p->rcu, pin_free_rcu); | ||
18 | } | ||
19 | |||
20 | void pin_remove(struct fs_pin *pin) | ||
21 | { | ||
22 | spin_lock(&pin_lock); | ||
23 | hlist_del(&pin->m_list); | ||
24 | hlist_del(&pin->s_list); | ||
25 | spin_unlock(&pin_lock); | ||
26 | } | ||
27 | |||
28 | void pin_insert(struct fs_pin *pin, struct vfsmount *m) | ||
29 | { | ||
30 | spin_lock(&pin_lock); | ||
31 | hlist_add_head(&pin->s_list, &m->mnt_sb->s_pins); | ||
32 | hlist_add_head(&pin->m_list, &real_mount(m)->mnt_pins); | ||
33 | spin_unlock(&pin_lock); | ||
34 | } | ||
35 | |||
36 | void mnt_pin_kill(struct mount *m) | ||
37 | { | ||
38 | while (1) { | ||
39 | struct hlist_node *p; | ||
40 | struct fs_pin *pin; | ||
41 | rcu_read_lock(); | ||
42 | p = ACCESS_ONCE(m->mnt_pins.first); | ||
43 | if (!p) { | ||
44 | rcu_read_unlock(); | ||
45 | break; | ||
46 | } | ||
47 | pin = hlist_entry(p, struct fs_pin, m_list); | ||
48 | if (!atomic_long_inc_not_zero(&pin->count)) { | ||
49 | rcu_read_unlock(); | ||
50 | cpu_relax(); | ||
51 | continue; | ||
52 | } | ||
53 | rcu_read_unlock(); | ||
54 | pin->kill(pin); | ||
55 | } | ||
56 | } | ||
57 | |||
58 | void sb_pin_kill(struct super_block *sb) | ||
59 | { | ||
60 | while (1) { | ||
61 | struct hlist_node *p; | ||
62 | struct fs_pin *pin; | ||
63 | rcu_read_lock(); | ||
64 | p = ACCESS_ONCE(sb->s_pins.first); | ||
65 | if (!p) { | ||
66 | rcu_read_unlock(); | ||
67 | break; | ||
68 | } | ||
69 | pin = hlist_entry(p, struct fs_pin, s_list); | ||
70 | if (!atomic_long_inc_not_zero(&pin->count)) { | ||
71 | rcu_read_unlock(); | ||
72 | cpu_relax(); | ||
73 | continue; | ||
74 | } | ||
75 | rcu_read_unlock(); | ||
76 | pin->kill(pin); | ||
77 | } | ||
78 | } | ||
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 0c6048247a34..de1d84af9f7c 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c | |||
@@ -845,12 +845,6 @@ static int fuse_rename2(struct inode *olddir, struct dentry *oldent, | |||
845 | return err; | 845 | return err; |
846 | } | 846 | } |
847 | 847 | ||
848 | static int fuse_rename(struct inode *olddir, struct dentry *oldent, | ||
849 | struct inode *newdir, struct dentry *newent) | ||
850 | { | ||
851 | return fuse_rename2(olddir, oldent, newdir, newent, 0); | ||
852 | } | ||
853 | |||
854 | static int fuse_link(struct dentry *entry, struct inode *newdir, | 848 | static int fuse_link(struct dentry *entry, struct inode *newdir, |
855 | struct dentry *newent) | 849 | struct dentry *newent) |
856 | { | 850 | { |
@@ -2024,7 +2018,6 @@ static const struct inode_operations fuse_dir_inode_operations = { | |||
2024 | .symlink = fuse_symlink, | 2018 | .symlink = fuse_symlink, |
2025 | .unlink = fuse_unlink, | 2019 | .unlink = fuse_unlink, |
2026 | .rmdir = fuse_rmdir, | 2020 | .rmdir = fuse_rmdir, |
2027 | .rename = fuse_rename, | ||
2028 | .rename2 = fuse_rename2, | 2021 | .rename2 = fuse_rename2, |
2029 | .link = fuse_link, | 2022 | .link = fuse_link, |
2030 | .setattr = fuse_setattr, | 2023 | .setattr = fuse_setattr, |
diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 40ac2628ddcf..912061ac4baf 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c | |||
@@ -1303,10 +1303,10 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii, | |||
1303 | while (nbytes < *nbytesp && req->num_pages < req->max_pages) { | 1303 | while (nbytes < *nbytesp && req->num_pages < req->max_pages) { |
1304 | unsigned npages; | 1304 | unsigned npages; |
1305 | size_t start; | 1305 | size_t start; |
1306 | unsigned n = req->max_pages - req->num_pages; | ||
1307 | ssize_t ret = iov_iter_get_pages(ii, | 1306 | ssize_t ret = iov_iter_get_pages(ii, |
1308 | &req->pages[req->num_pages], | 1307 | &req->pages[req->num_pages], |
1309 | n * PAGE_SIZE, &start); | 1308 | req->max_pages - req->num_pages, |
1309 | &start); | ||
1310 | if (ret < 0) | 1310 | if (ret < 0) |
1311 | return ret; | 1311 | return ret; |
1312 | 1312 | ||
diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h index 9c88da0e855a..4fcd40d6f308 100644 --- a/fs/hostfs/hostfs.h +++ b/fs/hostfs/hostfs.h | |||
@@ -89,6 +89,7 @@ extern int do_mknod(const char *file, int mode, unsigned int major, | |||
89 | extern int link_file(const char *from, const char *to); | 89 | extern int link_file(const char *from, const char *to); |
90 | extern int hostfs_do_readlink(char *file, char *buf, int size); | 90 | extern int hostfs_do_readlink(char *file, char *buf, int size); |
91 | extern int rename_file(char *from, char *to); | 91 | extern int rename_file(char *from, char *to); |
92 | extern int rename2_file(char *from, char *to, unsigned int flags); | ||
92 | extern int do_statfs(char *root, long *bsize_out, long long *blocks_out, | 93 | extern int do_statfs(char *root, long *bsize_out, long long *blocks_out, |
93 | long long *bfree_out, long long *bavail_out, | 94 | long long *bfree_out, long long *bavail_out, |
94 | long long *files_out, long long *ffree_out, | 95 | long long *files_out, long long *ffree_out, |
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index bb529f3b7f2b..fd62cae0fdcb 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c | |||
@@ -741,21 +741,31 @@ static int hostfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, | |||
741 | return err; | 741 | return err; |
742 | } | 742 | } |
743 | 743 | ||
744 | static int hostfs_rename(struct inode *from_ino, struct dentry *from, | 744 | static int hostfs_rename2(struct inode *old_dir, struct dentry *old_dentry, |
745 | struct inode *to_ino, struct dentry *to) | 745 | struct inode *new_dir, struct dentry *new_dentry, |
746 | unsigned int flags) | ||
746 | { | 747 | { |
747 | char *from_name, *to_name; | 748 | char *old_name, *new_name; |
748 | int err; | 749 | int err; |
749 | 750 | ||
750 | if ((from_name = dentry_name(from)) == NULL) | 751 | if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) |
752 | return -EINVAL; | ||
753 | |||
754 | old_name = dentry_name(old_dentry); | ||
755 | if (old_name == NULL) | ||
751 | return -ENOMEM; | 756 | return -ENOMEM; |
752 | if ((to_name = dentry_name(to)) == NULL) { | 757 | new_name = dentry_name(new_dentry); |
753 | __putname(from_name); | 758 | if (new_name == NULL) { |
759 | __putname(old_name); | ||
754 | return -ENOMEM; | 760 | return -ENOMEM; |
755 | } | 761 | } |
756 | err = rename_file(from_name, to_name); | 762 | if (!flags) |
757 | __putname(from_name); | 763 | err = rename_file(old_name, new_name); |
758 | __putname(to_name); | 764 | else |
765 | err = rename2_file(old_name, new_name, flags); | ||
766 | |||
767 | __putname(old_name); | ||
768 | __putname(new_name); | ||
759 | return err; | 769 | return err; |
760 | } | 770 | } |
761 | 771 | ||
@@ -867,7 +877,7 @@ static const struct inode_operations hostfs_dir_iops = { | |||
867 | .mkdir = hostfs_mkdir, | 877 | .mkdir = hostfs_mkdir, |
868 | .rmdir = hostfs_rmdir, | 878 | .rmdir = hostfs_rmdir, |
869 | .mknod = hostfs_mknod, | 879 | .mknod = hostfs_mknod, |
870 | .rename = hostfs_rename, | 880 | .rename2 = hostfs_rename2, |
871 | .permission = hostfs_permission, | 881 | .permission = hostfs_permission, |
872 | .setattr = hostfs_setattr, | 882 | .setattr = hostfs_setattr, |
873 | }; | 883 | }; |
diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c index 67838f3aa20a..9765dab95cbd 100644 --- a/fs/hostfs/hostfs_user.c +++ b/fs/hostfs/hostfs_user.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <sys/time.h> | 14 | #include <sys/time.h> |
15 | #include <sys/types.h> | 15 | #include <sys/types.h> |
16 | #include <sys/vfs.h> | 16 | #include <sys/vfs.h> |
17 | #include <sys/syscall.h> | ||
17 | #include "hostfs.h" | 18 | #include "hostfs.h" |
18 | #include <utime.h> | 19 | #include <utime.h> |
19 | 20 | ||
@@ -360,6 +361,33 @@ int rename_file(char *from, char *to) | |||
360 | return 0; | 361 | return 0; |
361 | } | 362 | } |
362 | 363 | ||
364 | int rename2_file(char *from, char *to, unsigned int flags) | ||
365 | { | ||
366 | int err; | ||
367 | |||
368 | #ifndef SYS_renameat2 | ||
369 | # ifdef __x86_64__ | ||
370 | # define SYS_renameat2 316 | ||
371 | # endif | ||
372 | # ifdef __i386__ | ||
373 | # define SYS_renameat2 353 | ||
374 | # endif | ||
375 | #endif | ||
376 | |||
377 | #ifdef SYS_renameat2 | ||
378 | err = syscall(SYS_renameat2, AT_FDCWD, from, AT_FDCWD, to, flags); | ||
379 | if (err < 0) { | ||
380 | if (errno != ENOSYS) | ||
381 | return -errno; | ||
382 | else | ||
383 | return -EINVAL; | ||
384 | } | ||
385 | return 0; | ||
386 | #else | ||
387 | return -EINVAL; | ||
388 | #endif | ||
389 | } | ||
390 | |||
363 | int do_statfs(char *root, long *bsize_out, long long *blocks_out, | 391 | int do_statfs(char *root, long *bsize_out, long long *blocks_out, |
364 | long long *bfree_out, long long *bavail_out, | 392 | long long *bfree_out, long long *bavail_out, |
365 | long long *files_out, long long *ffree_out, | 393 | long long *files_out, long long *ffree_out, |
diff --git a/fs/internal.h b/fs/internal.h index 465742407466..e325b4f9c799 100644 --- a/fs/internal.h +++ b/fs/internal.h | |||
@@ -131,7 +131,6 @@ extern long prune_dcache_sb(struct super_block *sb, unsigned long nr_to_scan, | |||
131 | /* | 131 | /* |
132 | * read_write.c | 132 | * read_write.c |
133 | */ | 133 | */ |
134 | extern ssize_t __kernel_write(struct file *, const char *, size_t, loff_t *); | ||
135 | extern int rw_verify_area(int, struct file *, const loff_t *, size_t); | 134 | extern int rw_verify_area(int, struct file *, const loff_t *, size_t); |
136 | 135 | ||
137 | /* | 136 | /* |
@@ -144,3 +143,9 @@ extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, | |||
144 | * pipe.c | 143 | * pipe.c |
145 | */ | 144 | */ |
146 | extern const struct file_operations pipefifo_fops; | 145 | extern const struct file_operations pipefifo_fops; |
146 | |||
147 | /* | ||
148 | * fs_pin.c | ||
149 | */ | ||
150 | extern void sb_pin_kill(struct super_block *sb); | ||
151 | extern void mnt_pin_kill(struct mount *m); | ||
diff --git a/fs/mount.h b/fs/mount.h index d55297f2fa05..6740a6215529 100644 --- a/fs/mount.h +++ b/fs/mount.h | |||
@@ -55,7 +55,7 @@ struct mount { | |||
55 | int mnt_id; /* mount identifier */ | 55 | int mnt_id; /* mount identifier */ |
56 | int mnt_group_id; /* peer group identifier */ | 56 | int mnt_group_id; /* peer group identifier */ |
57 | int mnt_expiry_mark; /* true if marked for expiry */ | 57 | int mnt_expiry_mark; /* true if marked for expiry */ |
58 | int mnt_pinned; | 58 | struct hlist_head mnt_pins; |
59 | struct path mnt_ex_mountpoint; | 59 | struct path mnt_ex_mountpoint; |
60 | }; | 60 | }; |
61 | 61 | ||
diff --git a/fs/namei.c b/fs/namei.c index 9eb787e5c167..a996bb48dfab 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -1091,10 +1091,10 @@ int follow_down_one(struct path *path) | |||
1091 | } | 1091 | } |
1092 | EXPORT_SYMBOL(follow_down_one); | 1092 | EXPORT_SYMBOL(follow_down_one); |
1093 | 1093 | ||
1094 | static inline bool managed_dentry_might_block(struct dentry *dentry) | 1094 | static inline int managed_dentry_rcu(struct dentry *dentry) |
1095 | { | 1095 | { |
1096 | return (dentry->d_flags & DCACHE_MANAGE_TRANSIT && | 1096 | return (dentry->d_flags & DCACHE_MANAGE_TRANSIT) ? |
1097 | dentry->d_op->d_manage(dentry, true) < 0); | 1097 | dentry->d_op->d_manage(dentry, true) : 0; |
1098 | } | 1098 | } |
1099 | 1099 | ||
1100 | /* | 1100 | /* |
@@ -1110,11 +1110,18 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, | |||
1110 | * Don't forget we might have a non-mountpoint managed dentry | 1110 | * Don't forget we might have a non-mountpoint managed dentry |
1111 | * that wants to block transit. | 1111 | * that wants to block transit. |
1112 | */ | 1112 | */ |
1113 | if (unlikely(managed_dentry_might_block(path->dentry))) | 1113 | switch (managed_dentry_rcu(path->dentry)) { |
1114 | case -ECHILD: | ||
1115 | default: | ||
1114 | return false; | 1116 | return false; |
1117 | case -EISDIR: | ||
1118 | return true; | ||
1119 | case 0: | ||
1120 | break; | ||
1121 | } | ||
1115 | 1122 | ||
1116 | if (!d_mountpoint(path->dentry)) | 1123 | if (!d_mountpoint(path->dentry)) |
1117 | return true; | 1124 | return !(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT); |
1118 | 1125 | ||
1119 | mounted = __lookup_mnt(path->mnt, path->dentry); | 1126 | mounted = __lookup_mnt(path->mnt, path->dentry); |
1120 | if (!mounted) | 1127 | if (!mounted) |
@@ -1130,7 +1137,8 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, | |||
1130 | */ | 1137 | */ |
1131 | *inode = path->dentry->d_inode; | 1138 | *inode = path->dentry->d_inode; |
1132 | } | 1139 | } |
1133 | return read_seqretry(&mount_lock, nd->m_seq); | 1140 | return read_seqretry(&mount_lock, nd->m_seq) && |
1141 | !(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT); | ||
1134 | } | 1142 | } |
1135 | 1143 | ||
1136 | static int follow_dotdot_rcu(struct nameidata *nd) | 1144 | static int follow_dotdot_rcu(struct nameidata *nd) |
@@ -1402,11 +1410,8 @@ static int lookup_fast(struct nameidata *nd, | |||
1402 | } | 1410 | } |
1403 | path->mnt = mnt; | 1411 | path->mnt = mnt; |
1404 | path->dentry = dentry; | 1412 | path->dentry = dentry; |
1405 | if (unlikely(!__follow_mount_rcu(nd, path, inode))) | 1413 | if (likely(__follow_mount_rcu(nd, path, inode))) |
1406 | goto unlazy; | 1414 | return 0; |
1407 | if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT)) | ||
1408 | goto unlazy; | ||
1409 | return 0; | ||
1410 | unlazy: | 1415 | unlazy: |
1411 | if (unlazy_walk(nd, dentry)) | 1416 | if (unlazy_walk(nd, dentry)) |
1412 | return -ECHILD; | 1417 | return -ECHILD; |
@@ -4019,7 +4024,7 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname | |||
4019 | * The worst of all namespace operations - renaming directory. "Perverted" | 4024 | * The worst of all namespace operations - renaming directory. "Perverted" |
4020 | * doesn't even start to describe it. Somebody in UCB had a heck of a trip... | 4025 | * doesn't even start to describe it. Somebody in UCB had a heck of a trip... |
4021 | * Problems: | 4026 | * Problems: |
4022 | * a) we can get into loop creation. Check is done in is_subdir(). | 4027 | * a) we can get into loop creation. |
4023 | * b) race potential - two innocent renames can create a loop together. | 4028 | * b) race potential - two innocent renames can create a loop together. |
4024 | * That's where 4.4 screws up. Current fix: serialization on | 4029 | * That's where 4.4 screws up. Current fix: serialization on |
4025 | * sb->s_vfs_rename_mutex. We might be more accurate, but that's another | 4030 | * sb->s_vfs_rename_mutex. We might be more accurate, but that's another |
@@ -4075,7 +4080,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
4075 | if (error) | 4080 | if (error) |
4076 | return error; | 4081 | return error; |
4077 | 4082 | ||
4078 | if (!old_dir->i_op->rename) | 4083 | if (!old_dir->i_op->rename && !old_dir->i_op->rename2) |
4079 | return -EPERM; | 4084 | return -EPERM; |
4080 | 4085 | ||
4081 | if (flags && !old_dir->i_op->rename2) | 4086 | if (flags && !old_dir->i_op->rename2) |
@@ -4134,10 +4139,11 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
4134 | if (error) | 4139 | if (error) |
4135 | goto out; | 4140 | goto out; |
4136 | } | 4141 | } |
4137 | if (!flags) { | 4142 | if (!old_dir->i_op->rename2) { |
4138 | error = old_dir->i_op->rename(old_dir, old_dentry, | 4143 | error = old_dir->i_op->rename(old_dir, old_dentry, |
4139 | new_dir, new_dentry); | 4144 | new_dir, new_dentry); |
4140 | } else { | 4145 | } else { |
4146 | WARN_ON(old_dir->i_op->rename != NULL); | ||
4141 | error = old_dir->i_op->rename2(old_dir, old_dentry, | 4147 | error = old_dir->i_op->rename2(old_dir, old_dentry, |
4142 | new_dir, new_dentry, flags); | 4148 | new_dir, new_dentry, flags); |
4143 | } | 4149 | } |
diff --git a/fs/namespace.c b/fs/namespace.c index 0acabea58319..a01c7730e9af 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
@@ -16,7 +16,6 @@ | |||
16 | #include <linux/namei.h> | 16 | #include <linux/namei.h> |
17 | #include <linux/security.h> | 17 | #include <linux/security.h> |
18 | #include <linux/idr.h> | 18 | #include <linux/idr.h> |
19 | #include <linux/acct.h> /* acct_auto_close_mnt */ | ||
20 | #include <linux/init.h> /* init_rootfs */ | 19 | #include <linux/init.h> /* init_rootfs */ |
21 | #include <linux/fs_struct.h> /* get_fs_root et.al. */ | 20 | #include <linux/fs_struct.h> /* get_fs_root et.al. */ |
22 | #include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */ | 21 | #include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */ |
@@ -779,6 +778,20 @@ static void attach_mnt(struct mount *mnt, | |||
779 | list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); | 778 | list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); |
780 | } | 779 | } |
781 | 780 | ||
781 | static void attach_shadowed(struct mount *mnt, | ||
782 | struct mount *parent, | ||
783 | struct mount *shadows) | ||
784 | { | ||
785 | if (shadows) { | ||
786 | hlist_add_behind_rcu(&mnt->mnt_hash, &shadows->mnt_hash); | ||
787 | list_add(&mnt->mnt_child, &shadows->mnt_child); | ||
788 | } else { | ||
789 | hlist_add_head_rcu(&mnt->mnt_hash, | ||
790 | m_hash(&parent->mnt, mnt->mnt_mountpoint)); | ||
791 | list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); | ||
792 | } | ||
793 | } | ||
794 | |||
782 | /* | 795 | /* |
783 | * vfsmount lock must be held for write | 796 | * vfsmount lock must be held for write |
784 | */ | 797 | */ |
@@ -797,12 +810,7 @@ static void commit_tree(struct mount *mnt, struct mount *shadows) | |||
797 | 810 | ||
798 | list_splice(&head, n->list.prev); | 811 | list_splice(&head, n->list.prev); |
799 | 812 | ||
800 | if (shadows) | 813 | attach_shadowed(mnt, parent, shadows); |
801 | hlist_add_behind_rcu(&mnt->mnt_hash, &shadows->mnt_hash); | ||
802 | else | ||
803 | hlist_add_head_rcu(&mnt->mnt_hash, | ||
804 | m_hash(&parent->mnt, mnt->mnt_mountpoint)); | ||
805 | list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); | ||
806 | touch_mnt_namespace(n); | 814 | touch_mnt_namespace(n); |
807 | } | 815 | } |
808 | 816 | ||
@@ -951,7 +959,6 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, | |||
951 | 959 | ||
952 | static void mntput_no_expire(struct mount *mnt) | 960 | static void mntput_no_expire(struct mount *mnt) |
953 | { | 961 | { |
954 | put_again: | ||
955 | rcu_read_lock(); | 962 | rcu_read_lock(); |
956 | mnt_add_count(mnt, -1); | 963 | mnt_add_count(mnt, -1); |
957 | if (likely(mnt->mnt_ns)) { /* shouldn't be the last one */ | 964 | if (likely(mnt->mnt_ns)) { /* shouldn't be the last one */ |
@@ -964,14 +971,6 @@ put_again: | |||
964 | unlock_mount_hash(); | 971 | unlock_mount_hash(); |
965 | return; | 972 | return; |
966 | } | 973 | } |
967 | if (unlikely(mnt->mnt_pinned)) { | ||
968 | mnt_add_count(mnt, mnt->mnt_pinned + 1); | ||
969 | mnt->mnt_pinned = 0; | ||
970 | rcu_read_unlock(); | ||
971 | unlock_mount_hash(); | ||
972 | acct_auto_close_mnt(&mnt->mnt); | ||
973 | goto put_again; | ||
974 | } | ||
975 | if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) { | 974 | if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) { |
976 | rcu_read_unlock(); | 975 | rcu_read_unlock(); |
977 | unlock_mount_hash(); | 976 | unlock_mount_hash(); |
@@ -994,6 +993,8 @@ put_again: | |||
994 | * so mnt_get_writers() below is safe. | 993 | * so mnt_get_writers() below is safe. |
995 | */ | 994 | */ |
996 | WARN_ON(mnt_get_writers(mnt)); | 995 | WARN_ON(mnt_get_writers(mnt)); |
996 | if (unlikely(mnt->mnt_pins.first)) | ||
997 | mnt_pin_kill(mnt); | ||
997 | fsnotify_vfsmount_delete(&mnt->mnt); | 998 | fsnotify_vfsmount_delete(&mnt->mnt); |
998 | dput(mnt->mnt.mnt_root); | 999 | dput(mnt->mnt.mnt_root); |
999 | deactivate_super(mnt->mnt.mnt_sb); | 1000 | deactivate_super(mnt->mnt.mnt_sb); |
@@ -1021,25 +1022,15 @@ struct vfsmount *mntget(struct vfsmount *mnt) | |||
1021 | } | 1022 | } |
1022 | EXPORT_SYMBOL(mntget); | 1023 | EXPORT_SYMBOL(mntget); |
1023 | 1024 | ||
1024 | void mnt_pin(struct vfsmount *mnt) | 1025 | struct vfsmount *mnt_clone_internal(struct path *path) |
1025 | { | ||
1026 | lock_mount_hash(); | ||
1027 | real_mount(mnt)->mnt_pinned++; | ||
1028 | unlock_mount_hash(); | ||
1029 | } | ||
1030 | EXPORT_SYMBOL(mnt_pin); | ||
1031 | |||
1032 | void mnt_unpin(struct vfsmount *m) | ||
1033 | { | 1026 | { |
1034 | struct mount *mnt = real_mount(m); | 1027 | struct mount *p; |
1035 | lock_mount_hash(); | 1028 | p = clone_mnt(real_mount(path->mnt), path->dentry, CL_PRIVATE); |
1036 | if (mnt->mnt_pinned) { | 1029 | if (IS_ERR(p)) |
1037 | mnt_add_count(mnt, 1); | 1030 | return ERR_CAST(p); |
1038 | mnt->mnt_pinned--; | 1031 | p->mnt.mnt_flags |= MNT_INTERNAL; |
1039 | } | 1032 | return &p->mnt; |
1040 | unlock_mount_hash(); | ||
1041 | } | 1033 | } |
1042 | EXPORT_SYMBOL(mnt_unpin); | ||
1043 | 1034 | ||
1044 | static inline void mangle(struct seq_file *m, const char *s) | 1035 | static inline void mangle(struct seq_file *m, const char *s) |
1045 | { | 1036 | { |
@@ -1505,6 +1496,7 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, | |||
1505 | continue; | 1496 | continue; |
1506 | 1497 | ||
1507 | for (s = r; s; s = next_mnt(s, r)) { | 1498 | for (s = r; s; s = next_mnt(s, r)) { |
1499 | struct mount *t = NULL; | ||
1508 | if (!(flag & CL_COPY_UNBINDABLE) && | 1500 | if (!(flag & CL_COPY_UNBINDABLE) && |
1509 | IS_MNT_UNBINDABLE(s)) { | 1501 | IS_MNT_UNBINDABLE(s)) { |
1510 | s = skip_mnt_tree(s); | 1502 | s = skip_mnt_tree(s); |
@@ -1526,7 +1518,14 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, | |||
1526 | goto out; | 1518 | goto out; |
1527 | lock_mount_hash(); | 1519 | lock_mount_hash(); |
1528 | list_add_tail(&q->mnt_list, &res->mnt_list); | 1520 | list_add_tail(&q->mnt_list, &res->mnt_list); |
1529 | attach_mnt(q, parent, p->mnt_mp); | 1521 | mnt_set_mountpoint(parent, p->mnt_mp, q); |
1522 | if (!list_empty(&parent->mnt_mounts)) { | ||
1523 | t = list_last_entry(&parent->mnt_mounts, | ||
1524 | struct mount, mnt_child); | ||
1525 | if (t->mnt_mp != p->mnt_mp) | ||
1526 | t = NULL; | ||
1527 | } | ||
1528 | attach_shadowed(q, parent, t); | ||
1530 | unlock_mount_hash(); | 1529 | unlock_mount_hash(); |
1531 | } | 1530 | } |
1532 | } | 1531 | } |
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index b94f80420a58..880618a8b048 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c | |||
@@ -112,7 +112,7 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh, | |||
112 | * if the dentry tree reaches them; however if the dentry already | 112 | * if the dentry tree reaches them; however if the dentry already |
113 | * exists, we'll pick it up at this point and use it as the root | 113 | * exists, we'll pick it up at this point and use it as the root |
114 | */ | 114 | */ |
115 | ret = d_obtain_alias(inode); | 115 | ret = d_obtain_root(inode); |
116 | if (IS_ERR(ret)) { | 116 | if (IS_ERR(ret)) { |
117 | dprintk("nfs_get_root: get root dentry failed\n"); | 117 | dprintk("nfs_get_root: get root dentry failed\n"); |
118 | goto out; | 118 | goto out; |
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index c519927b7b5e..228f5bdf0772 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c | |||
@@ -942,7 +942,7 @@ static int nilfs_get_root_dentry(struct super_block *sb, | |||
942 | iput(inode); | 942 | iput(inode); |
943 | } | 943 | } |
944 | } else { | 944 | } else { |
945 | dentry = d_obtain_alias(inode); | 945 | dentry = d_obtain_root(inode); |
946 | if (IS_ERR(dentry)) { | 946 | if (IS_ERR(dentry)) { |
947 | ret = PTR_ERR(dentry); | 947 | ret = PTR_ERR(dentry); |
948 | goto failed_dentry; | 948 | goto failed_dentry; |
diff --git a/fs/super.c b/fs/super.c index d20d5b11dedf..a371ce6aa919 100644 --- a/fs/super.c +++ b/fs/super.c | |||
@@ -22,7 +22,6 @@ | |||
22 | 22 | ||
23 | #include <linux/export.h> | 23 | #include <linux/export.h> |
24 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
25 | #include <linux/acct.h> | ||
26 | #include <linux/blkdev.h> | 25 | #include <linux/blkdev.h> |
27 | #include <linux/mount.h> | 26 | #include <linux/mount.h> |
28 | #include <linux/security.h> | 27 | #include <linux/security.h> |
@@ -702,12 +701,22 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) | |||
702 | return -EACCES; | 701 | return -EACCES; |
703 | #endif | 702 | #endif |
704 | 703 | ||
705 | if (flags & MS_RDONLY) | ||
706 | acct_auto_close(sb); | ||
707 | shrink_dcache_sb(sb); | ||
708 | |||
709 | remount_ro = (flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY); | 704 | remount_ro = (flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY); |
710 | 705 | ||
706 | if (remount_ro) { | ||
707 | if (sb->s_pins.first) { | ||
708 | up_write(&sb->s_umount); | ||
709 | sb_pin_kill(sb); | ||
710 | down_write(&sb->s_umount); | ||
711 | if (!sb->s_root) | ||
712 | return 0; | ||
713 | if (sb->s_writers.frozen != SB_UNFROZEN) | ||
714 | return -EBUSY; | ||
715 | remount_ro = (flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY); | ||
716 | } | ||
717 | } | ||
718 | shrink_dcache_sb(sb); | ||
719 | |||
711 | /* If we are remounting RDONLY and current sb is read/write, | 720 | /* If we are remounting RDONLY and current sb is read/write, |
712 | make sure there are no rw files opened */ | 721 | make sure there are no rw files opened */ |
713 | if (remount_ro) { | 722 | if (remount_ro) { |
diff --git a/include/linux/acct.h b/include/linux/acct.h index 4a5b7cb56079..dccc2d4fe7de 100644 --- a/include/linux/acct.h +++ b/include/linux/acct.h | |||
@@ -24,14 +24,10 @@ struct super_block; | |||
24 | struct pacct_struct; | 24 | struct pacct_struct; |
25 | struct pid_namespace; | 25 | struct pid_namespace; |
26 | extern int acct_parm[]; /* for sysctl */ | 26 | extern int acct_parm[]; /* for sysctl */ |
27 | extern void acct_auto_close_mnt(struct vfsmount *m); | ||
28 | extern void acct_auto_close(struct super_block *sb); | ||
29 | extern void acct_collect(long exitcode, int group_dead); | 27 | extern void acct_collect(long exitcode, int group_dead); |
30 | extern void acct_process(void); | 28 | extern void acct_process(void); |
31 | extern void acct_exit_ns(struct pid_namespace *); | 29 | extern void acct_exit_ns(struct pid_namespace *); |
32 | #else | 30 | #else |
33 | #define acct_auto_close_mnt(x) do { } while (0) | ||
34 | #define acct_auto_close(x) do { } while (0) | ||
35 | #define acct_collect(x,y) do { } while (0) | 31 | #define acct_collect(x,y) do { } while (0) |
36 | #define acct_process() do { } while (0) | 32 | #define acct_process() do { } while (0) |
37 | #define acct_exit_ns(ns) do { } while (0) | 33 | #define acct_exit_ns(ns) do { } while (0) |
diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 3c7ec327ebd2..e4ae2ad48d07 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h | |||
@@ -249,6 +249,7 @@ extern struct dentry * d_splice_alias(struct inode *, struct dentry *); | |||
249 | extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *); | 249 | extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *); |
250 | extern struct dentry *d_find_any_alias(struct inode *inode); | 250 | extern struct dentry *d_find_any_alias(struct inode *inode); |
251 | extern struct dentry * d_obtain_alias(struct inode *); | 251 | extern struct dentry * d_obtain_alias(struct inode *); |
252 | extern struct dentry * d_obtain_root(struct inode *); | ||
252 | extern void shrink_dcache_sb(struct super_block *); | 253 | extern void shrink_dcache_sb(struct super_block *); |
253 | extern void shrink_dcache_parent(struct dentry *); | 254 | extern void shrink_dcache_parent(struct dentry *); |
254 | extern void shrink_dcache_for_umount(struct super_block *); | 255 | extern void shrink_dcache_for_umount(struct super_block *); |
diff --git a/include/linux/fs.h b/include/linux/fs.h index f0890e4a7c25..94187721ad41 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -1275,6 +1275,7 @@ struct super_block { | |||
1275 | 1275 | ||
1276 | /* AIO completions deferred from interrupt context */ | 1276 | /* AIO completions deferred from interrupt context */ |
1277 | struct workqueue_struct *s_dio_done_wq; | 1277 | struct workqueue_struct *s_dio_done_wq; |
1278 | struct hlist_head s_pins; | ||
1278 | 1279 | ||
1279 | /* | 1280 | /* |
1280 | * Keep the lru lists last in the structure so they always sit on their | 1281 | * Keep the lru lists last in the structure so they always sit on their |
@@ -2360,6 +2361,7 @@ extern int do_pipe_flags(int *, int); | |||
2360 | 2361 | ||
2361 | extern int kernel_read(struct file *, loff_t, char *, unsigned long); | 2362 | extern int kernel_read(struct file *, loff_t, char *, unsigned long); |
2362 | extern ssize_t kernel_write(struct file *, const char *, size_t, loff_t); | 2363 | extern ssize_t kernel_write(struct file *, const char *, size_t, loff_t); |
2364 | extern ssize_t __kernel_write(struct file *, const char *, size_t, loff_t *); | ||
2363 | extern struct file * open_exec(const char *); | 2365 | extern struct file * open_exec(const char *); |
2364 | 2366 | ||
2365 | /* fs/dcache.c -- generic fs support functions */ | 2367 | /* fs/dcache.c -- generic fs support functions */ |
diff --git a/include/linux/fs_pin.h b/include/linux/fs_pin.h new file mode 100644 index 000000000000..f66525e72ccf --- /dev/null +++ b/include/linux/fs_pin.h | |||
@@ -0,0 +1,17 @@ | |||
1 | #include <linux/fs.h> | ||
2 | |||
3 | struct fs_pin { | ||
4 | atomic_long_t count; | ||
5 | union { | ||
6 | struct { | ||
7 | struct hlist_node s_list; | ||
8 | struct hlist_node m_list; | ||
9 | }; | ||
10 | struct rcu_head rcu; | ||
11 | }; | ||
12 | void (*kill)(struct fs_pin *); | ||
13 | }; | ||
14 | |||
15 | void pin_put(struct fs_pin *); | ||
16 | void pin_remove(struct fs_pin *); | ||
17 | void pin_insert(struct fs_pin *, struct vfsmount *); | ||
diff --git a/include/linux/mount.h b/include/linux/mount.h index b0c1e6574e7f..9262e4bf0cc3 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h | |||
@@ -69,6 +69,7 @@ struct vfsmount { | |||
69 | }; | 69 | }; |
70 | 70 | ||
71 | struct file; /* forward dec */ | 71 | struct file; /* forward dec */ |
72 | struct path; | ||
72 | 73 | ||
73 | extern int mnt_want_write(struct vfsmount *mnt); | 74 | extern int mnt_want_write(struct vfsmount *mnt); |
74 | extern int mnt_want_write_file(struct file *file); | 75 | extern int mnt_want_write_file(struct file *file); |
@@ -77,8 +78,7 @@ extern void mnt_drop_write(struct vfsmount *mnt); | |||
77 | extern void mnt_drop_write_file(struct file *file); | 78 | extern void mnt_drop_write_file(struct file *file); |
78 | extern void mntput(struct vfsmount *mnt); | 79 | extern void mntput(struct vfsmount *mnt); |
79 | extern struct vfsmount *mntget(struct vfsmount *mnt); | 80 | extern struct vfsmount *mntget(struct vfsmount *mnt); |
80 | extern void mnt_pin(struct vfsmount *mnt); | 81 | extern struct vfsmount *mnt_clone_internal(struct path *path); |
81 | extern void mnt_unpin(struct vfsmount *mnt); | ||
82 | extern int __mnt_is_readonly(struct vfsmount *mnt); | 82 | extern int __mnt_is_readonly(struct vfsmount *mnt); |
83 | 83 | ||
84 | struct file_system_type; | 84 | struct file_system_type; |
diff --git a/include/linux/uio.h b/include/linux/uio.h index 09a7cffc224e..48d64e6ab292 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h | |||
@@ -84,7 +84,7 @@ unsigned long iov_iter_alignment(const struct iov_iter *i); | |||
84 | void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov, | 84 | void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov, |
85 | unsigned long nr_segs, size_t count); | 85 | unsigned long nr_segs, size_t count); |
86 | ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages, | 86 | ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages, |
87 | size_t maxsize, size_t *start); | 87 | unsigned maxpages, size_t *start); |
88 | ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages, | 88 | ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages, |
89 | size_t maxsize, size_t *start); | 89 | size_t maxsize, size_t *start); |
90 | int iov_iter_npages(const struct iov_iter *i, int maxpages); | 90 | int iov_iter_npages(const struct iov_iter *i, int maxpages); |
diff --git a/kernel/acct.c b/kernel/acct.c index 51793520566f..b4c667d22e79 100644 --- a/kernel/acct.c +++ b/kernel/acct.c | |||
@@ -59,6 +59,7 @@ | |||
59 | #include <asm/div64.h> | 59 | #include <asm/div64.h> |
60 | #include <linux/blkdev.h> /* sector_div */ | 60 | #include <linux/blkdev.h> /* sector_div */ |
61 | #include <linux/pid_namespace.h> | 61 | #include <linux/pid_namespace.h> |
62 | #include <linux/fs_pin.h> | ||
62 | 63 | ||
63 | /* | 64 | /* |
64 | * These constants control the amount of freespace that suspend and | 65 | * These constants control the amount of freespace that suspend and |
@@ -75,172 +76,190 @@ int acct_parm[3] = {4, 2, 30}; | |||
75 | /* | 76 | /* |
76 | * External references and all of the globals. | 77 | * External references and all of the globals. |
77 | */ | 78 | */ |
78 | static void do_acct_process(struct bsd_acct_struct *acct, | 79 | static void do_acct_process(struct bsd_acct_struct *acct); |
79 | struct pid_namespace *ns, struct file *); | ||
80 | 80 | ||
81 | /* | ||
82 | * This structure is used so that all the data protected by lock | ||
83 | * can be placed in the same cache line as the lock. This primes | ||
84 | * the cache line to have the data after getting the lock. | ||
85 | */ | ||
86 | struct bsd_acct_struct { | 81 | struct bsd_acct_struct { |
82 | struct fs_pin pin; | ||
83 | struct mutex lock; | ||
87 | int active; | 84 | int active; |
88 | unsigned long needcheck; | 85 | unsigned long needcheck; |
89 | struct file *file; | 86 | struct file *file; |
90 | struct pid_namespace *ns; | 87 | struct pid_namespace *ns; |
91 | struct list_head list; | 88 | struct work_struct work; |
89 | struct completion done; | ||
92 | }; | 90 | }; |
93 | 91 | ||
94 | static DEFINE_SPINLOCK(acct_lock); | ||
95 | static LIST_HEAD(acct_list); | ||
96 | |||
97 | /* | 92 | /* |
98 | * Check the amount of free space and suspend/resume accordingly. | 93 | * Check the amount of free space and suspend/resume accordingly. |
99 | */ | 94 | */ |
100 | static int check_free_space(struct bsd_acct_struct *acct, struct file *file) | 95 | static int check_free_space(struct bsd_acct_struct *acct) |
101 | { | 96 | { |
102 | struct kstatfs sbuf; | 97 | struct kstatfs sbuf; |
103 | int res; | 98 | |
104 | int act; | 99 | if (time_is_before_jiffies(acct->needcheck)) |
105 | u64 resume; | ||
106 | u64 suspend; | ||
107 | |||
108 | spin_lock(&acct_lock); | ||
109 | res = acct->active; | ||
110 | if (!file || time_is_before_jiffies(acct->needcheck)) | ||
111 | goto out; | 100 | goto out; |
112 | spin_unlock(&acct_lock); | ||
113 | 101 | ||
114 | /* May block */ | 102 | /* May block */ |
115 | if (vfs_statfs(&file->f_path, &sbuf)) | 103 | if (vfs_statfs(&acct->file->f_path, &sbuf)) |
116 | return res; | ||
117 | suspend = sbuf.f_blocks * SUSPEND; | ||
118 | resume = sbuf.f_blocks * RESUME; | ||
119 | |||
120 | do_div(suspend, 100); | ||
121 | do_div(resume, 100); | ||
122 | |||
123 | if (sbuf.f_bavail <= suspend) | ||
124 | act = -1; | ||
125 | else if (sbuf.f_bavail >= resume) | ||
126 | act = 1; | ||
127 | else | ||
128 | act = 0; | ||
129 | |||
130 | /* | ||
131 | * If some joker switched acct->file under us we'ld better be | ||
132 | * silent and _not_ touch anything. | ||
133 | */ | ||
134 | spin_lock(&acct_lock); | ||
135 | if (file != acct->file) { | ||
136 | if (act) | ||
137 | res = act > 0; | ||
138 | goto out; | 104 | goto out; |
139 | } | ||
140 | 105 | ||
141 | if (acct->active) { | 106 | if (acct->active) { |
142 | if (act < 0) { | 107 | u64 suspend = sbuf.f_blocks * SUSPEND; |
108 | do_div(suspend, 100); | ||
109 | if (sbuf.f_bavail <= suspend) { | ||
143 | acct->active = 0; | 110 | acct->active = 0; |
144 | pr_info("Process accounting paused\n"); | 111 | pr_info("Process accounting paused\n"); |
145 | } | 112 | } |
146 | } else { | 113 | } else { |
147 | if (act > 0) { | 114 | u64 resume = sbuf.f_blocks * RESUME; |
115 | do_div(resume, 100); | ||
116 | if (sbuf.f_bavail >= resume) { | ||
148 | acct->active = 1; | 117 | acct->active = 1; |
149 | pr_info("Process accounting resumed\n"); | 118 | pr_info("Process accounting resumed\n"); |
150 | } | 119 | } |
151 | } | 120 | } |
152 | 121 | ||
153 | acct->needcheck = jiffies + ACCT_TIMEOUT*HZ; | 122 | acct->needcheck = jiffies + ACCT_TIMEOUT*HZ; |
154 | res = acct->active; | ||
155 | out: | 123 | out: |
156 | spin_unlock(&acct_lock); | 124 | return acct->active; |
125 | } | ||
126 | |||
127 | static struct bsd_acct_struct *acct_get(struct pid_namespace *ns) | ||
128 | { | ||
129 | struct bsd_acct_struct *res; | ||
130 | again: | ||
131 | smp_rmb(); | ||
132 | rcu_read_lock(); | ||
133 | res = ACCESS_ONCE(ns->bacct); | ||
134 | if (!res) { | ||
135 | rcu_read_unlock(); | ||
136 | return NULL; | ||
137 | } | ||
138 | if (!atomic_long_inc_not_zero(&res->pin.count)) { | ||
139 | rcu_read_unlock(); | ||
140 | cpu_relax(); | ||
141 | goto again; | ||
142 | } | ||
143 | rcu_read_unlock(); | ||
144 | mutex_lock(&res->lock); | ||
145 | if (!res->ns) { | ||
146 | mutex_unlock(&res->lock); | ||
147 | pin_put(&res->pin); | ||
148 | goto again; | ||
149 | } | ||
157 | return res; | 150 | return res; |
158 | } | 151 | } |
159 | 152 | ||
160 | /* | 153 | static void close_work(struct work_struct *work) |
161 | * Close the old accounting file (if currently open) and then replace | ||
162 | * it with file (if non-NULL). | ||
163 | * | ||
164 | * NOTE: acct_lock MUST be held on entry and exit. | ||
165 | */ | ||
166 | static void acct_file_reopen(struct bsd_acct_struct *acct, struct file *file, | ||
167 | struct pid_namespace *ns) | ||
168 | { | 154 | { |
169 | struct file *old_acct = NULL; | 155 | struct bsd_acct_struct *acct = container_of(work, struct bsd_acct_struct, work); |
170 | struct pid_namespace *old_ns = NULL; | 156 | struct file *file = acct->file; |
171 | 157 | if (file->f_op->flush) | |
172 | if (acct->file) { | 158 | file->f_op->flush(file, NULL); |
173 | old_acct = acct->file; | 159 | __fput_sync(file); |
174 | old_ns = acct->ns; | 160 | complete(&acct->done); |
175 | acct->active = 0; | 161 | } |
176 | acct->file = NULL; | 162 | |
163 | static void acct_kill(struct bsd_acct_struct *acct, | ||
164 | struct bsd_acct_struct *new) | ||
165 | { | ||
166 | if (acct) { | ||
167 | struct pid_namespace *ns = acct->ns; | ||
168 | do_acct_process(acct); | ||
169 | INIT_WORK(&acct->work, close_work); | ||
170 | init_completion(&acct->done); | ||
171 | schedule_work(&acct->work); | ||
172 | wait_for_completion(&acct->done); | ||
173 | pin_remove(&acct->pin); | ||
174 | ns->bacct = new; | ||
177 | acct->ns = NULL; | 175 | acct->ns = NULL; |
178 | list_del(&acct->list); | 176 | atomic_long_dec(&acct->pin.count); |
179 | } | 177 | mutex_unlock(&acct->lock); |
180 | if (file) { | 178 | pin_put(&acct->pin); |
181 | acct->file = file; | ||
182 | acct->ns = ns; | ||
183 | acct->needcheck = jiffies + ACCT_TIMEOUT*HZ; | ||
184 | acct->active = 1; | ||
185 | list_add(&acct->list, &acct_list); | ||
186 | } | 179 | } |
187 | if (old_acct) { | 180 | } |
188 | mnt_unpin(old_acct->f_path.mnt); | 181 | |
189 | spin_unlock(&acct_lock); | 182 | static void acct_pin_kill(struct fs_pin *pin) |
190 | do_acct_process(acct, old_ns, old_acct); | 183 | { |
191 | filp_close(old_acct, NULL); | 184 | struct bsd_acct_struct *acct; |
192 | spin_lock(&acct_lock); | 185 | acct = container_of(pin, struct bsd_acct_struct, pin); |
186 | mutex_lock(&acct->lock); | ||
187 | if (!acct->ns) { | ||
188 | mutex_unlock(&acct->lock); | ||
189 | pin_put(pin); | ||
190 | acct = NULL; | ||
193 | } | 191 | } |
192 | acct_kill(acct, NULL); | ||
194 | } | 193 | } |
195 | 194 | ||
196 | static int acct_on(struct filename *pathname) | 195 | static int acct_on(struct filename *pathname) |
197 | { | 196 | { |
198 | struct file *file; | 197 | struct file *file; |
199 | struct vfsmount *mnt; | 198 | struct vfsmount *mnt, *internal; |
200 | struct pid_namespace *ns; | 199 | struct pid_namespace *ns = task_active_pid_ns(current); |
201 | struct bsd_acct_struct *acct = NULL; | 200 | struct bsd_acct_struct *acct, *old; |
201 | int err; | ||
202 | |||
203 | acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL); | ||
204 | if (!acct) | ||
205 | return -ENOMEM; | ||
202 | 206 | ||
203 | /* Difference from BSD - they don't do O_APPEND */ | 207 | /* Difference from BSD - they don't do O_APPEND */ |
204 | file = file_open_name(pathname, O_WRONLY|O_APPEND|O_LARGEFILE, 0); | 208 | file = file_open_name(pathname, O_WRONLY|O_APPEND|O_LARGEFILE, 0); |
205 | if (IS_ERR(file)) | 209 | if (IS_ERR(file)) { |
210 | kfree(acct); | ||
206 | return PTR_ERR(file); | 211 | return PTR_ERR(file); |
212 | } | ||
207 | 213 | ||
208 | if (!S_ISREG(file_inode(file)->i_mode)) { | 214 | if (!S_ISREG(file_inode(file)->i_mode)) { |
215 | kfree(acct); | ||
209 | filp_close(file, NULL); | 216 | filp_close(file, NULL); |
210 | return -EACCES; | 217 | return -EACCES; |
211 | } | 218 | } |
212 | 219 | ||
213 | if (!file->f_op->write) { | 220 | if (!file->f_op->write) { |
221 | kfree(acct); | ||
214 | filp_close(file, NULL); | 222 | filp_close(file, NULL); |
215 | return -EIO; | 223 | return -EIO; |
216 | } | 224 | } |
217 | 225 | internal = mnt_clone_internal(&file->f_path); | |
218 | ns = task_active_pid_ns(current); | 226 | if (IS_ERR(internal)) { |
219 | if (ns->bacct == NULL) { | 227 | kfree(acct); |
220 | acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL); | 228 | filp_close(file, NULL); |
221 | if (acct == NULL) { | 229 | return PTR_ERR(internal); |
222 | filp_close(file, NULL); | ||
223 | return -ENOMEM; | ||
224 | } | ||
225 | } | 230 | } |
226 | 231 | err = mnt_want_write(internal); | |
227 | spin_lock(&acct_lock); | 232 | if (err) { |
228 | if (ns->bacct == NULL) { | 233 | mntput(internal); |
229 | ns->bacct = acct; | 234 | kfree(acct); |
230 | acct = NULL; | 235 | filp_close(file, NULL); |
236 | return err; | ||
231 | } | 237 | } |
232 | |||
233 | mnt = file->f_path.mnt; | 238 | mnt = file->f_path.mnt; |
234 | mnt_pin(mnt); | 239 | file->f_path.mnt = internal; |
235 | acct_file_reopen(ns->bacct, file, ns); | 240 | |
236 | spin_unlock(&acct_lock); | 241 | atomic_long_set(&acct->pin.count, 1); |
237 | 242 | acct->pin.kill = acct_pin_kill; | |
238 | mntput(mnt); /* it's pinned, now give up active reference */ | 243 | acct->file = file; |
239 | kfree(acct); | 244 | acct->needcheck = jiffies; |
240 | 245 | acct->ns = ns; | |
246 | mutex_init(&acct->lock); | ||
247 | mutex_lock_nested(&acct->lock, 1); /* nobody has seen it yet */ | ||
248 | pin_insert(&acct->pin, mnt); | ||
249 | |||
250 | old = acct_get(ns); | ||
251 | if (old) | ||
252 | acct_kill(old, acct); | ||
253 | else | ||
254 | ns->bacct = acct; | ||
255 | mutex_unlock(&acct->lock); | ||
256 | mnt_drop_write(mnt); | ||
257 | mntput(mnt); | ||
241 | return 0; | 258 | return 0; |
242 | } | 259 | } |
243 | 260 | ||
261 | static DEFINE_MUTEX(acct_on_mutex); | ||
262 | |||
244 | /** | 263 | /** |
245 | * sys_acct - enable/disable process accounting | 264 | * sys_acct - enable/disable process accounting |
246 | * @name: file name for accounting records or NULL to shutdown accounting | 265 | * @name: file name for accounting records or NULL to shutdown accounting |
@@ -264,78 +283,20 @@ SYSCALL_DEFINE1(acct, const char __user *, name) | |||
264 | 283 | ||
265 | if (IS_ERR(tmp)) | 284 | if (IS_ERR(tmp)) |
266 | return PTR_ERR(tmp); | 285 | return PTR_ERR(tmp); |
286 | mutex_lock(&acct_on_mutex); | ||
267 | error = acct_on(tmp); | 287 | error = acct_on(tmp); |
288 | mutex_unlock(&acct_on_mutex); | ||
268 | putname(tmp); | 289 | putname(tmp); |
269 | } else { | 290 | } else { |
270 | struct bsd_acct_struct *acct; | 291 | acct_kill(acct_get(task_active_pid_ns(current)), NULL); |
271 | |||
272 | acct = task_active_pid_ns(current)->bacct; | ||
273 | if (acct == NULL) | ||
274 | return 0; | ||
275 | |||
276 | spin_lock(&acct_lock); | ||
277 | acct_file_reopen(acct, NULL, NULL); | ||
278 | spin_unlock(&acct_lock); | ||
279 | } | 292 | } |
280 | 293 | ||
281 | return error; | 294 | return error; |
282 | } | 295 | } |
283 | 296 | ||
284 | /** | ||
285 | * acct_auto_close - turn off a filesystem's accounting if it is on | ||
286 | * @m: vfsmount being shut down | ||
287 | * | ||
288 | * If the accounting is turned on for a file in the subtree pointed to | ||
289 | * to by m, turn accounting off. Done when m is about to die. | ||
290 | */ | ||
291 | void acct_auto_close_mnt(struct vfsmount *m) | ||
292 | { | ||
293 | struct bsd_acct_struct *acct; | ||
294 | |||
295 | spin_lock(&acct_lock); | ||
296 | restart: | ||
297 | list_for_each_entry(acct, &acct_list, list) | ||
298 | if (acct->file && acct->file->f_path.mnt == m) { | ||
299 | acct_file_reopen(acct, NULL, NULL); | ||
300 | goto restart; | ||
301 | } | ||
302 | spin_unlock(&acct_lock); | ||
303 | } | ||
304 | |||
305 | /** | ||
306 | * acct_auto_close - turn off a filesystem's accounting if it is on | ||
307 | * @sb: super block for the filesystem | ||
308 | * | ||
309 | * If the accounting is turned on for a file in the filesystem pointed | ||
310 | * to by sb, turn accounting off. | ||
311 | */ | ||
312 | void acct_auto_close(struct super_block *sb) | ||
313 | { | ||
314 | struct bsd_acct_struct *acct; | ||
315 | |||
316 | spin_lock(&acct_lock); | ||
317 | restart: | ||
318 | list_for_each_entry(acct, &acct_list, list) | ||
319 | if (acct->file && acct->file->f_path.dentry->d_sb == sb) { | ||
320 | acct_file_reopen(acct, NULL, NULL); | ||
321 | goto restart; | ||
322 | } | ||
323 | spin_unlock(&acct_lock); | ||
324 | } | ||
325 | |||
326 | void acct_exit_ns(struct pid_namespace *ns) | 297 | void acct_exit_ns(struct pid_namespace *ns) |
327 | { | 298 | { |
328 | struct bsd_acct_struct *acct = ns->bacct; | 299 | acct_kill(acct_get(ns), NULL); |
329 | |||
330 | if (acct == NULL) | ||
331 | return; | ||
332 | |||
333 | spin_lock(&acct_lock); | ||
334 | if (acct->file != NULL) | ||
335 | acct_file_reopen(acct, NULL, NULL); | ||
336 | spin_unlock(&acct_lock); | ||
337 | |||
338 | kfree(acct); | ||
339 | } | 300 | } |
340 | 301 | ||
341 | /* | 302 | /* |
@@ -450,38 +411,20 @@ static u32 encode_float(u64 value) | |||
450 | * do_exit() or when switching to a different output file. | 411 | * do_exit() or when switching to a different output file. |
451 | */ | 412 | */ |
452 | 413 | ||
453 | /* | 414 | static void fill_ac(acct_t *ac) |
454 | * do_acct_process does all actual work. Caller holds the reference to file. | ||
455 | */ | ||
456 | static void do_acct_process(struct bsd_acct_struct *acct, | ||
457 | struct pid_namespace *ns, struct file *file) | ||
458 | { | 415 | { |
459 | struct pacct_struct *pacct = ¤t->signal->pacct; | 416 | struct pacct_struct *pacct = ¤t->signal->pacct; |
460 | acct_t ac; | ||
461 | mm_segment_t fs; | ||
462 | unsigned long flim; | ||
463 | u64 elapsed, run_time; | 417 | u64 elapsed, run_time; |
464 | struct tty_struct *tty; | 418 | struct tty_struct *tty; |
465 | const struct cred *orig_cred; | ||
466 | |||
467 | /* Perform file operations on behalf of whoever enabled accounting */ | ||
468 | orig_cred = override_creds(file->f_cred); | ||
469 | |||
470 | /* | ||
471 | * First check to see if there is enough free_space to continue | ||
472 | * the process accounting system. | ||
473 | */ | ||
474 | if (!check_free_space(acct, file)) | ||
475 | goto out; | ||
476 | 419 | ||
477 | /* | 420 | /* |
478 | * Fill the accounting struct with the needed info as recorded | 421 | * Fill the accounting struct with the needed info as recorded |
479 | * by the different kernel functions. | 422 | * by the different kernel functions. |
480 | */ | 423 | */ |
481 | memset(&ac, 0, sizeof(acct_t)); | 424 | memset(ac, 0, sizeof(acct_t)); |
482 | 425 | ||
483 | ac.ac_version = ACCT_VERSION | ACCT_BYTEORDER; | 426 | ac->ac_version = ACCT_VERSION | ACCT_BYTEORDER; |
484 | strlcpy(ac.ac_comm, current->comm, sizeof(ac.ac_comm)); | 427 | strlcpy(ac->ac_comm, current->comm, sizeof(ac->ac_comm)); |
485 | 428 | ||
486 | /* calculate run_time in nsec*/ | 429 | /* calculate run_time in nsec*/ |
487 | run_time = ktime_get_ns(); | 430 | run_time = ktime_get_ns(); |
@@ -489,9 +432,9 @@ static void do_acct_process(struct bsd_acct_struct *acct, | |||
489 | /* convert nsec -> AHZ */ | 432 | /* convert nsec -> AHZ */ |
490 | elapsed = nsec_to_AHZ(run_time); | 433 | elapsed = nsec_to_AHZ(run_time); |
491 | #if ACCT_VERSION == 3 | 434 | #if ACCT_VERSION == 3 |
492 | ac.ac_etime = encode_float(elapsed); | 435 | ac->ac_etime = encode_float(elapsed); |
493 | #else | 436 | #else |
494 | ac.ac_etime = encode_comp_t(elapsed < (unsigned long) -1l ? | 437 | ac->ac_etime = encode_comp_t(elapsed < (unsigned long) -1l ? |
495 | (unsigned long) elapsed : (unsigned long) -1l); | 438 | (unsigned long) elapsed : (unsigned long) -1l); |
496 | #endif | 439 | #endif |
497 | #if ACCT_VERSION == 1 || ACCT_VERSION == 2 | 440 | #if ACCT_VERSION == 1 || ACCT_VERSION == 2 |
@@ -499,18 +442,58 @@ static void do_acct_process(struct bsd_acct_struct *acct, | |||
499 | /* new enlarged etime field */ | 442 | /* new enlarged etime field */ |
500 | comp2_t etime = encode_comp2_t(elapsed); | 443 | comp2_t etime = encode_comp2_t(elapsed); |
501 | 444 | ||
502 | ac.ac_etime_hi = etime >> 16; | 445 | ac->ac_etime_hi = etime >> 16; |
503 | ac.ac_etime_lo = (u16) etime; | 446 | ac->ac_etime_lo = (u16) etime; |
504 | } | 447 | } |
505 | #endif | 448 | #endif |
506 | do_div(elapsed, AHZ); | 449 | do_div(elapsed, AHZ); |
507 | ac.ac_btime = get_seconds() - elapsed; | 450 | ac->ac_btime = get_seconds() - elapsed; |
451 | #if ACCT_VERSION==2 | ||
452 | ac->ac_ahz = AHZ; | ||
453 | #endif | ||
454 | |||
455 | spin_lock_irq(¤t->sighand->siglock); | ||
456 | tty = current->signal->tty; /* Safe as we hold the siglock */ | ||
457 | ac->ac_tty = tty ? old_encode_dev(tty_devnum(tty)) : 0; | ||
458 | ac->ac_utime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_utime))); | ||
459 | ac->ac_stime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_stime))); | ||
460 | ac->ac_flag = pacct->ac_flag; | ||
461 | ac->ac_mem = encode_comp_t(pacct->ac_mem); | ||
462 | ac->ac_minflt = encode_comp_t(pacct->ac_minflt); | ||
463 | ac->ac_majflt = encode_comp_t(pacct->ac_majflt); | ||
464 | ac->ac_exitcode = pacct->ac_exitcode; | ||
465 | spin_unlock_irq(¤t->sighand->siglock); | ||
466 | } | ||
467 | /* | ||
468 | * do_acct_process does all actual work. Caller holds the reference to file. | ||
469 | */ | ||
470 | static void do_acct_process(struct bsd_acct_struct *acct) | ||
471 | { | ||
472 | acct_t ac; | ||
473 | unsigned long flim; | ||
474 | const struct cred *orig_cred; | ||
475 | struct pid_namespace *ns = acct->ns; | ||
476 | struct file *file = acct->file; | ||
477 | |||
478 | /* | ||
479 | * Accounting records are not subject to resource limits. | ||
480 | */ | ||
481 | flim = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; | ||
482 | current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY; | ||
483 | /* Perform file operations on behalf of whoever enabled accounting */ | ||
484 | orig_cred = override_creds(file->f_cred); | ||
485 | |||
486 | /* | ||
487 | * First check to see if there is enough free_space to continue | ||
488 | * the process accounting system. | ||
489 | */ | ||
490 | if (!check_free_space(acct)) | ||
491 | goto out; | ||
492 | |||
493 | fill_ac(&ac); | ||
508 | /* we really need to bite the bullet and change layout */ | 494 | /* we really need to bite the bullet and change layout */ |
509 | ac.ac_uid = from_kuid_munged(file->f_cred->user_ns, orig_cred->uid); | 495 | ac.ac_uid = from_kuid_munged(file->f_cred->user_ns, orig_cred->uid); |
510 | ac.ac_gid = from_kgid_munged(file->f_cred->user_ns, orig_cred->gid); | 496 | ac.ac_gid = from_kgid_munged(file->f_cred->user_ns, orig_cred->gid); |
511 | #if ACCT_VERSION == 2 | ||
512 | ac.ac_ahz = AHZ; | ||
513 | #endif | ||
514 | #if ACCT_VERSION == 1 || ACCT_VERSION == 2 | 497 | #if ACCT_VERSION == 1 || ACCT_VERSION == 2 |
515 | /* backward-compatible 16 bit fields */ | 498 | /* backward-compatible 16 bit fields */ |
516 | ac.ac_uid16 = ac.ac_uid; | 499 | ac.ac_uid16 = ac.ac_uid; |
@@ -522,45 +505,18 @@ static void do_acct_process(struct bsd_acct_struct *acct, | |||
522 | ac.ac_ppid = task_tgid_nr_ns(rcu_dereference(current->real_parent), ns); | 505 | ac.ac_ppid = task_tgid_nr_ns(rcu_dereference(current->real_parent), ns); |
523 | rcu_read_unlock(); | 506 | rcu_read_unlock(); |
524 | #endif | 507 | #endif |
525 | |||
526 | spin_lock_irq(¤t->sighand->siglock); | ||
527 | tty = current->signal->tty; /* Safe as we hold the siglock */ | ||
528 | ac.ac_tty = tty ? old_encode_dev(tty_devnum(tty)) : 0; | ||
529 | ac.ac_utime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_utime))); | ||
530 | ac.ac_stime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_stime))); | ||
531 | ac.ac_flag = pacct->ac_flag; | ||
532 | ac.ac_mem = encode_comp_t(pacct->ac_mem); | ||
533 | ac.ac_minflt = encode_comp_t(pacct->ac_minflt); | ||
534 | ac.ac_majflt = encode_comp_t(pacct->ac_majflt); | ||
535 | ac.ac_exitcode = pacct->ac_exitcode; | ||
536 | spin_unlock_irq(¤t->sighand->siglock); | ||
537 | ac.ac_io = encode_comp_t(0 /* current->io_usage */); /* %% */ | ||
538 | ac.ac_rw = encode_comp_t(ac.ac_io / 1024); | ||
539 | ac.ac_swaps = encode_comp_t(0); | ||
540 | |||
541 | /* | 508 | /* |
542 | * Get freeze protection. If the fs is frozen, just skip the write | 509 | * Get freeze protection. If the fs is frozen, just skip the write |
543 | * as we could deadlock the system otherwise. | 510 | * as we could deadlock the system otherwise. |
544 | */ | 511 | */ |
545 | if (!file_start_write_trylock(file)) | 512 | if (file_start_write_trylock(file)) { |
546 | goto out; | 513 | /* it's been opened O_APPEND, so position is irrelevant */ |
547 | /* | 514 | loff_t pos = 0; |
548 | * Kernel segment override to datasegment and write it | 515 | __kernel_write(file, (char *)&ac, sizeof(acct_t), &pos); |
549 | * to the accounting file. | 516 | file_end_write(file); |
550 | */ | 517 | } |
551 | fs = get_fs(); | ||
552 | set_fs(KERNEL_DS); | ||
553 | /* | ||
554 | * Accounting records are not subject to resource limits. | ||
555 | */ | ||
556 | flim = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; | ||
557 | current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY; | ||
558 | file->f_op->write(file, (char *)&ac, | ||
559 | sizeof(acct_t), &file->f_pos); | ||
560 | current->signal->rlim[RLIMIT_FSIZE].rlim_cur = flim; | ||
561 | set_fs(fs); | ||
562 | file_end_write(file); | ||
563 | out: | 518 | out: |
519 | current->signal->rlim[RLIMIT_FSIZE].rlim_cur = flim; | ||
564 | revert_creds(orig_cred); | 520 | revert_creds(orig_cred); |
565 | } | 521 | } |
566 | 522 | ||
@@ -609,34 +565,20 @@ void acct_collect(long exitcode, int group_dead) | |||
609 | spin_unlock_irq(¤t->sighand->siglock); | 565 | spin_unlock_irq(¤t->sighand->siglock); |
610 | } | 566 | } |
611 | 567 | ||
612 | static void acct_process_in_ns(struct pid_namespace *ns) | 568 | static void slow_acct_process(struct pid_namespace *ns) |
613 | { | 569 | { |
614 | struct file *file = NULL; | 570 | for ( ; ns; ns = ns->parent) { |
615 | struct bsd_acct_struct *acct; | 571 | struct bsd_acct_struct *acct = acct_get(ns); |
616 | 572 | if (acct) { | |
617 | acct = ns->bacct; | 573 | do_acct_process(acct); |
618 | /* | 574 | mutex_unlock(&acct->lock); |
619 | * accelerate the common fastpath: | 575 | pin_put(&acct->pin); |
620 | */ | 576 | } |
621 | if (!acct || !acct->file) | ||
622 | return; | ||
623 | |||
624 | spin_lock(&acct_lock); | ||
625 | file = acct->file; | ||
626 | if (unlikely(!file)) { | ||
627 | spin_unlock(&acct_lock); | ||
628 | return; | ||
629 | } | 577 | } |
630 | get_file(file); | ||
631 | spin_unlock(&acct_lock); | ||
632 | |||
633 | do_acct_process(acct, ns, file); | ||
634 | fput(file); | ||
635 | } | 578 | } |
636 | 579 | ||
637 | /** | 580 | /** |
638 | * acct_process - now just a wrapper around acct_process_in_ns, | 581 | * acct_process |
639 | * which in turn is a wrapper around do_acct_process. | ||
640 | * | 582 | * |
641 | * handles process accounting for an exiting task | 583 | * handles process accounting for an exiting task |
642 | */ | 584 | */ |
@@ -649,6 +591,10 @@ void acct_process(void) | |||
649 | * alive and holds its namespace, which in turn holds | 591 | * alive and holds its namespace, which in turn holds |
650 | * its parent. | 592 | * its parent. |
651 | */ | 593 | */ |
652 | for (ns = task_active_pid_ns(current); ns != NULL; ns = ns->parent) | 594 | for (ns = task_active_pid_ns(current); ns != NULL; ns = ns->parent) { |
653 | acct_process_in_ns(ns); | 595 | if (ns->bacct) |
596 | break; | ||
597 | } | ||
598 | if (unlikely(ns)) | ||
599 | slow_acct_process(ns); | ||
654 | } | 600 | } |
diff --git a/mm/filemap.c b/mm/filemap.c index f501b56ec2c6..90effcdf948d 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -2602,7 +2602,7 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from) | |||
2602 | * that this differs from normal direct-io semantics, which | 2602 | * that this differs from normal direct-io semantics, which |
2603 | * will return -EFOO even if some bytes were written. | 2603 | * will return -EFOO even if some bytes were written. |
2604 | */ | 2604 | */ |
2605 | if (unlikely(status < 0) && !written) { | 2605 | if (unlikely(status < 0)) { |
2606 | err = status; | 2606 | err = status; |
2607 | goto out; | 2607 | goto out; |
2608 | } | 2608 | } |
diff --git a/mm/iov_iter.c b/mm/iov_iter.c index 7b5dbd1517b5..ab88dc0ea1d3 100644 --- a/mm/iov_iter.c +++ b/mm/iov_iter.c | |||
@@ -310,7 +310,7 @@ void iov_iter_init(struct iov_iter *i, int direction, | |||
310 | EXPORT_SYMBOL(iov_iter_init); | 310 | EXPORT_SYMBOL(iov_iter_init); |
311 | 311 | ||
312 | static ssize_t get_pages_iovec(struct iov_iter *i, | 312 | static ssize_t get_pages_iovec(struct iov_iter *i, |
313 | struct page **pages, size_t maxsize, | 313 | struct page **pages, unsigned maxpages, |
314 | size_t *start) | 314 | size_t *start) |
315 | { | 315 | { |
316 | size_t offset = i->iov_offset; | 316 | size_t offset = i->iov_offset; |
@@ -323,10 +323,10 @@ static ssize_t get_pages_iovec(struct iov_iter *i, | |||
323 | len = iov->iov_len - offset; | 323 | len = iov->iov_len - offset; |
324 | if (len > i->count) | 324 | if (len > i->count) |
325 | len = i->count; | 325 | len = i->count; |
326 | if (len > maxsize) | ||
327 | len = maxsize; | ||
328 | addr = (unsigned long)iov->iov_base + offset; | 326 | addr = (unsigned long)iov->iov_base + offset; |
329 | len += *start = addr & (PAGE_SIZE - 1); | 327 | len += *start = addr & (PAGE_SIZE - 1); |
328 | if (len > maxpages * PAGE_SIZE) | ||
329 | len = maxpages * PAGE_SIZE; | ||
330 | addr &= ~(PAGE_SIZE - 1); | 330 | addr &= ~(PAGE_SIZE - 1); |
331 | n = (len + PAGE_SIZE - 1) / PAGE_SIZE; | 331 | n = (len + PAGE_SIZE - 1) / PAGE_SIZE; |
332 | res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, pages); | 332 | res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, pages); |
@@ -588,15 +588,14 @@ static unsigned long alignment_bvec(const struct iov_iter *i) | |||
588 | } | 588 | } |
589 | 589 | ||
590 | static ssize_t get_pages_bvec(struct iov_iter *i, | 590 | static ssize_t get_pages_bvec(struct iov_iter *i, |
591 | struct page **pages, size_t maxsize, | 591 | struct page **pages, unsigned maxpages, |
592 | size_t *start) | 592 | size_t *start) |
593 | { | 593 | { |
594 | const struct bio_vec *bvec = i->bvec; | 594 | const struct bio_vec *bvec = i->bvec; |
595 | size_t len = bvec->bv_len - i->iov_offset; | 595 | size_t len = bvec->bv_len - i->iov_offset; |
596 | if (len > i->count) | 596 | if (len > i->count) |
597 | len = i->count; | 597 | len = i->count; |
598 | if (len > maxsize) | 598 | /* can't be more than PAGE_SIZE */ |
599 | len = maxsize; | ||
600 | *start = bvec->bv_offset + i->iov_offset; | 599 | *start = bvec->bv_offset + i->iov_offset; |
601 | 600 | ||
602 | get_page(*pages = bvec->bv_page); | 601 | get_page(*pages = bvec->bv_page); |
@@ -712,13 +711,13 @@ unsigned long iov_iter_alignment(const struct iov_iter *i) | |||
712 | EXPORT_SYMBOL(iov_iter_alignment); | 711 | EXPORT_SYMBOL(iov_iter_alignment); |
713 | 712 | ||
714 | ssize_t iov_iter_get_pages(struct iov_iter *i, | 713 | ssize_t iov_iter_get_pages(struct iov_iter *i, |
715 | struct page **pages, size_t maxsize, | 714 | struct page **pages, unsigned maxpages, |
716 | size_t *start) | 715 | size_t *start) |
717 | { | 716 | { |
718 | if (i->type & ITER_BVEC) | 717 | if (i->type & ITER_BVEC) |
719 | return get_pages_bvec(i, pages, maxsize, start); | 718 | return get_pages_bvec(i, pages, maxpages, start); |
720 | else | 719 | else |
721 | return get_pages_iovec(i, pages, maxsize, start); | 720 | return get_pages_iovec(i, pages, maxpages, start); |
722 | } | 721 | } |
723 | EXPORT_SYMBOL(iov_iter_get_pages); | 722 | EXPORT_SYMBOL(iov_iter_get_pages); |
724 | 723 | ||
diff --git a/mm/shmem.c b/mm/shmem.c index a42add14331c..0e5fb225007c 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -2323,17 +2323,45 @@ static int shmem_rmdir(struct inode *dir, struct dentry *dentry) | |||
2323 | return shmem_unlink(dir, dentry); | 2323 | return shmem_unlink(dir, dentry); |
2324 | } | 2324 | } |
2325 | 2325 | ||
2326 | static int shmem_exchange(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) | ||
2327 | { | ||
2328 | bool old_is_dir = S_ISDIR(old_dentry->d_inode->i_mode); | ||
2329 | bool new_is_dir = S_ISDIR(new_dentry->d_inode->i_mode); | ||
2330 | |||
2331 | if (old_dir != new_dir && old_is_dir != new_is_dir) { | ||
2332 | if (old_is_dir) { | ||
2333 | drop_nlink(old_dir); | ||
2334 | inc_nlink(new_dir); | ||
2335 | } else { | ||
2336 | drop_nlink(new_dir); | ||
2337 | inc_nlink(old_dir); | ||
2338 | } | ||
2339 | } | ||
2340 | old_dir->i_ctime = old_dir->i_mtime = | ||
2341 | new_dir->i_ctime = new_dir->i_mtime = | ||
2342 | old_dentry->d_inode->i_ctime = | ||
2343 | new_dentry->d_inode->i_ctime = CURRENT_TIME; | ||
2344 | |||
2345 | return 0; | ||
2346 | } | ||
2347 | |||
2326 | /* | 2348 | /* |
2327 | * The VFS layer already does all the dentry stuff for rename, | 2349 | * The VFS layer already does all the dentry stuff for rename, |
2328 | * we just have to decrement the usage count for the target if | 2350 | * we just have to decrement the usage count for the target if |
2329 | * it exists so that the VFS layer correctly free's it when it | 2351 | * it exists so that the VFS layer correctly free's it when it |
2330 | * gets overwritten. | 2352 | * gets overwritten. |
2331 | */ | 2353 | */ |
2332 | static int shmem_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) | 2354 | static int shmem_rename2(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) |
2333 | { | 2355 | { |
2334 | struct inode *inode = old_dentry->d_inode; | 2356 | struct inode *inode = old_dentry->d_inode; |
2335 | int they_are_dirs = S_ISDIR(inode->i_mode); | 2357 | int they_are_dirs = S_ISDIR(inode->i_mode); |
2336 | 2358 | ||
2359 | if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) | ||
2360 | return -EINVAL; | ||
2361 | |||
2362 | if (flags & RENAME_EXCHANGE) | ||
2363 | return shmem_exchange(old_dir, old_dentry, new_dir, new_dentry); | ||
2364 | |||
2337 | if (!simple_empty(new_dentry)) | 2365 | if (!simple_empty(new_dentry)) |
2338 | return -ENOTEMPTY; | 2366 | return -ENOTEMPTY; |
2339 | 2367 | ||
@@ -3087,7 +3115,7 @@ static const struct inode_operations shmem_dir_inode_operations = { | |||
3087 | .mkdir = shmem_mkdir, | 3115 | .mkdir = shmem_mkdir, |
3088 | .rmdir = shmem_rmdir, | 3116 | .rmdir = shmem_rmdir, |
3089 | .mknod = shmem_mknod, | 3117 | .mknod = shmem_mknod, |
3090 | .rename = shmem_rename, | 3118 | .rename2 = shmem_rename2, |
3091 | .tmpfile = shmem_tmpfile, | 3119 | .tmpfile = shmem_tmpfile, |
3092 | #endif | 3120 | #endif |
3093 | #ifdef CONFIG_TMPFS_XATTR | 3121 | #ifdef CONFIG_TMPFS_XATTR |