diff options
| author | Thomas Gleixner <tglx@linutronix.de> | 2013-07-12 06:34:42 -0400 |
|---|---|---|
| committer | Thomas Gleixner <tglx@linutronix.de> | 2013-07-12 06:34:42 -0400 |
| commit | f2006e27396f55276f24434f56e208d86e7f9908 (patch) | |
| tree | 71896db916d33888b4286f80117d3cac0da40e6d /fs | |
| parent | e399eb56a6110e13f97e644658648602e2b08de7 (diff) | |
| parent | 9903883f1dd6e86f286b7bfa6e4b423f98c1cd9e (diff) | |
Merge branch 'linus' into timers/urgent
Get upstream changes so we can apply fixes against them
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'fs')
301 files changed, 9187 insertions, 5085 deletions
diff --git a/fs/9p/Kconfig b/fs/9p/Kconfig index 55abfd62654a..6489e1fc1afd 100644 --- a/fs/9p/Kconfig +++ b/fs/9p/Kconfig | |||
| @@ -31,3 +31,16 @@ config 9P_FS_POSIX_ACL | |||
| 31 | If you don't know what Access Control Lists are, say N | 31 | If you don't know what Access Control Lists are, say N |
| 32 | 32 | ||
| 33 | endif | 33 | endif |
| 34 | |||
| 35 | |||
| 36 | config 9P_FS_SECURITY | ||
| 37 | bool "9P Security Labels" | ||
| 38 | depends on 9P_FS | ||
| 39 | help | ||
| 40 | Security labels support alternative access control models | ||
| 41 | implemented by security modules like SELinux. This option | ||
| 42 | enables an extended attribute handler for file security | ||
| 43 | labels in the 9P filesystem. | ||
| 44 | |||
| 45 | If you are not using a security module that requires using | ||
| 46 | extended attributes for file security labels, say N. | ||
diff --git a/fs/9p/Makefile b/fs/9p/Makefile index ab8c12780634..ff7be98f84f2 100644 --- a/fs/9p/Makefile +++ b/fs/9p/Makefile | |||
| @@ -11,7 +11,9 @@ obj-$(CONFIG_9P_FS) := 9p.o | |||
| 11 | v9fs.o \ | 11 | v9fs.o \ |
| 12 | fid.o \ | 12 | fid.o \ |
| 13 | xattr.o \ | 13 | xattr.o \ |
| 14 | xattr_user.o | 14 | xattr_user.o \ |
| 15 | xattr_trusted.o | ||
| 15 | 16 | ||
| 16 | 9p-$(CONFIG_9P_FSCACHE) += cache.o | 17 | 9p-$(CONFIG_9P_FSCACHE) += cache.o |
| 17 | 9p-$(CONFIG_9P_FS_POSIX_ACL) += acl.o | 18 | 9p-$(CONFIG_9P_FS_POSIX_ACL) += acl.o |
| 19 | 9p-$(CONFIG_9P_FS_SECURITY) += xattr_security.o | ||
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index d86edc8d3fd0..25b018efb8ab 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c | |||
| @@ -1054,13 +1054,11 @@ static int | |||
| 1054 | v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, | 1054 | v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, |
| 1055 | struct kstat *stat) | 1055 | struct kstat *stat) |
| 1056 | { | 1056 | { |
| 1057 | int err; | ||
| 1058 | struct v9fs_session_info *v9ses; | 1057 | struct v9fs_session_info *v9ses; |
| 1059 | struct p9_fid *fid; | 1058 | struct p9_fid *fid; |
| 1060 | struct p9_wstat *st; | 1059 | struct p9_wstat *st; |
| 1061 | 1060 | ||
| 1062 | p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry); | 1061 | p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry); |
| 1063 | err = -EPERM; | ||
| 1064 | v9ses = v9fs_dentry2v9ses(dentry); | 1062 | v9ses = v9fs_dentry2v9ses(dentry); |
| 1065 | if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) { | 1063 | if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) { |
| 1066 | generic_fillattr(dentry->d_inode, stat); | 1064 | generic_fillattr(dentry->d_inode, stat); |
diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c index c45e016b190f..3c28cdfb8c47 100644 --- a/fs/9p/xattr.c +++ b/fs/9p/xattr.c | |||
| @@ -167,9 +167,13 @@ ssize_t v9fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) | |||
| 167 | 167 | ||
| 168 | const struct xattr_handler *v9fs_xattr_handlers[] = { | 168 | const struct xattr_handler *v9fs_xattr_handlers[] = { |
| 169 | &v9fs_xattr_user_handler, | 169 | &v9fs_xattr_user_handler, |
| 170 | &v9fs_xattr_trusted_handler, | ||
| 170 | #ifdef CONFIG_9P_FS_POSIX_ACL | 171 | #ifdef CONFIG_9P_FS_POSIX_ACL |
| 171 | &v9fs_xattr_acl_access_handler, | 172 | &v9fs_xattr_acl_access_handler, |
| 172 | &v9fs_xattr_acl_default_handler, | 173 | &v9fs_xattr_acl_default_handler, |
| 173 | #endif | 174 | #endif |
| 175 | #ifdef CONFIG_9P_FS_SECURITY | ||
| 176 | &v9fs_xattr_security_handler, | ||
| 177 | #endif | ||
| 174 | NULL | 178 | NULL |
| 175 | }; | 179 | }; |
diff --git a/fs/9p/xattr.h b/fs/9p/xattr.h index eec348a3df71..d3e2ea3840be 100644 --- a/fs/9p/xattr.h +++ b/fs/9p/xattr.h | |||
| @@ -20,6 +20,8 @@ | |||
| 20 | 20 | ||
| 21 | extern const struct xattr_handler *v9fs_xattr_handlers[]; | 21 | extern const struct xattr_handler *v9fs_xattr_handlers[]; |
| 22 | extern struct xattr_handler v9fs_xattr_user_handler; | 22 | extern struct xattr_handler v9fs_xattr_user_handler; |
| 23 | extern struct xattr_handler v9fs_xattr_trusted_handler; | ||
| 24 | extern struct xattr_handler v9fs_xattr_security_handler; | ||
| 23 | extern const struct xattr_handler v9fs_xattr_acl_access_handler; | 25 | extern const struct xattr_handler v9fs_xattr_acl_access_handler; |
| 24 | extern const struct xattr_handler v9fs_xattr_acl_default_handler; | 26 | extern const struct xattr_handler v9fs_xattr_acl_default_handler; |
| 25 | 27 | ||
diff --git a/fs/9p/xattr_security.c b/fs/9p/xattr_security.c new file mode 100644 index 000000000000..cb247a142a6e --- /dev/null +++ b/fs/9p/xattr_security.c | |||
| @@ -0,0 +1,80 @@ | |||
| 1 | /* | ||
| 2 | * Copyright IBM Corporation, 2010 | ||
| 3 | * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> | ||
| 4 | * | ||
| 5 | * This program is free software; you can redistribute it and/or modify it | ||
| 6 | * under the terms of version 2.1 of the GNU Lesser General Public License | ||
| 7 | * as published by the Free Software Foundation. | ||
| 8 | * | ||
| 9 | * This program is distributed in the hope that it would be useful, but | ||
| 10 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
| 12 | * | ||
| 13 | */ | ||
| 14 | |||
| 15 | |||
| 16 | #include <linux/module.h> | ||
| 17 | #include <linux/string.h> | ||
| 18 | #include <linux/fs.h> | ||
| 19 | #include <linux/slab.h> | ||
| 20 | #include "xattr.h" | ||
| 21 | |||
| 22 | static int v9fs_xattr_security_get(struct dentry *dentry, const char *name, | ||
| 23 | void *buffer, size_t size, int type) | ||
| 24 | { | ||
| 25 | int retval; | ||
| 26 | char *full_name; | ||
| 27 | size_t name_len; | ||
| 28 | size_t prefix_len = XATTR_SECURITY_PREFIX_LEN; | ||
| 29 | |||
| 30 | if (name == NULL) | ||
| 31 | return -EINVAL; | ||
| 32 | |||
| 33 | if (strcmp(name, "") == 0) | ||
| 34 | return -EINVAL; | ||
| 35 | |||
| 36 | name_len = strlen(name); | ||
| 37 | full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL); | ||
| 38 | if (!full_name) | ||
| 39 | return -ENOMEM; | ||
| 40 | memcpy(full_name, XATTR_SECURITY_PREFIX, prefix_len); | ||
| 41 | memcpy(full_name+prefix_len, name, name_len); | ||
| 42 | full_name[prefix_len + name_len] = '\0'; | ||
| 43 | |||
| 44 | retval = v9fs_xattr_get(dentry, full_name, buffer, size); | ||
| 45 | kfree(full_name); | ||
| 46 | return retval; | ||
| 47 | } | ||
| 48 | |||
| 49 | static int v9fs_xattr_security_set(struct dentry *dentry, const char *name, | ||
| 50 | const void *value, size_t size, int flags, int type) | ||
| 51 | { | ||
| 52 | int retval; | ||
| 53 | char *full_name; | ||
| 54 | size_t name_len; | ||
| 55 | size_t prefix_len = XATTR_SECURITY_PREFIX_LEN; | ||
| 56 | |||
| 57 | if (name == NULL) | ||
| 58 | return -EINVAL; | ||
| 59 | |||
| 60 | if (strcmp(name, "") == 0) | ||
| 61 | return -EINVAL; | ||
| 62 | |||
| 63 | name_len = strlen(name); | ||
| 64 | full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL); | ||
| 65 | if (!full_name) | ||
| 66 | return -ENOMEM; | ||
| 67 | memcpy(full_name, XATTR_SECURITY_PREFIX, prefix_len); | ||
| 68 | memcpy(full_name + prefix_len, name, name_len); | ||
| 69 | full_name[prefix_len + name_len] = '\0'; | ||
| 70 | |||
| 71 | retval = v9fs_xattr_set(dentry, full_name, value, size, flags); | ||
| 72 | kfree(full_name); | ||
| 73 | return retval; | ||
| 74 | } | ||
| 75 | |||
| 76 | struct xattr_handler v9fs_xattr_security_handler = { | ||
| 77 | .prefix = XATTR_SECURITY_PREFIX, | ||
| 78 | .get = v9fs_xattr_security_get, | ||
| 79 | .set = v9fs_xattr_security_set, | ||
| 80 | }; | ||
diff --git a/fs/9p/xattr_trusted.c b/fs/9p/xattr_trusted.c new file mode 100644 index 000000000000..e30d33b8a3fb --- /dev/null +++ b/fs/9p/xattr_trusted.c | |||
| @@ -0,0 +1,80 @@ | |||
| 1 | /* | ||
| 2 | * Copyright IBM Corporation, 2010 | ||
| 3 | * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> | ||
| 4 | * | ||
| 5 | * This program is free software; you can redistribute it and/or modify it | ||
| 6 | * under the terms of version 2.1 of the GNU Lesser General Public License | ||
| 7 | * as published by the Free Software Foundation. | ||
| 8 | * | ||
| 9 | * This program is distributed in the hope that it would be useful, but | ||
| 10 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
| 12 | * | ||
| 13 | */ | ||
| 14 | |||
| 15 | |||
| 16 | #include <linux/module.h> | ||
| 17 | #include <linux/string.h> | ||
| 18 | #include <linux/fs.h> | ||
| 19 | #include <linux/slab.h> | ||
| 20 | #include "xattr.h" | ||
| 21 | |||
| 22 | static int v9fs_xattr_trusted_get(struct dentry *dentry, const char *name, | ||
| 23 | void *buffer, size_t size, int type) | ||
| 24 | { | ||
| 25 | int retval; | ||
| 26 | char *full_name; | ||
| 27 | size_t name_len; | ||
| 28 | size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN; | ||
| 29 | |||
| 30 | if (name == NULL) | ||
| 31 | return -EINVAL; | ||
| 32 | |||
| 33 | if (strcmp(name, "") == 0) | ||
| 34 | return -EINVAL; | ||
| 35 | |||
| 36 | name_len = strlen(name); | ||
| 37 | full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL); | ||
| 38 | if (!full_name) | ||
| 39 | return -ENOMEM; | ||
| 40 | memcpy(full_name, XATTR_TRUSTED_PREFIX, prefix_len); | ||
| 41 | memcpy(full_name+prefix_len, name, name_len); | ||
| 42 | full_name[prefix_len + name_len] = '\0'; | ||
| 43 | |||
| 44 | retval = v9fs_xattr_get(dentry, full_name, buffer, size); | ||
| 45 | kfree(full_name); | ||
| 46 | return retval; | ||
| 47 | } | ||
| 48 | |||
| 49 | static int v9fs_xattr_trusted_set(struct dentry *dentry, const char *name, | ||
| 50 | const void *value, size_t size, int flags, int type) | ||
| 51 | { | ||
| 52 | int retval; | ||
| 53 | char *full_name; | ||
| 54 | size_t name_len; | ||
| 55 | size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN; | ||
| 56 | |||
| 57 | if (name == NULL) | ||
| 58 | return -EINVAL; | ||
| 59 | |||
| 60 | if (strcmp(name, "") == 0) | ||
| 61 | return -EINVAL; | ||
| 62 | |||
| 63 | name_len = strlen(name); | ||
| 64 | full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL); | ||
| 65 | if (!full_name) | ||
| 66 | return -ENOMEM; | ||
| 67 | memcpy(full_name, XATTR_TRUSTED_PREFIX, prefix_len); | ||
| 68 | memcpy(full_name + prefix_len, name, name_len); | ||
| 69 | full_name[prefix_len + name_len] = '\0'; | ||
| 70 | |||
| 71 | retval = v9fs_xattr_set(dentry, full_name, value, size, flags); | ||
| 72 | kfree(full_name); | ||
| 73 | return retval; | ||
| 74 | } | ||
| 75 | |||
| 76 | struct xattr_handler v9fs_xattr_trusted_handler = { | ||
| 77 | .prefix = XATTR_TRUSTED_PREFIX, | ||
| 78 | .get = v9fs_xattr_trusted_get, | ||
| 79 | .set = v9fs_xattr_trusted_set, | ||
| 80 | }; | ||
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c index ade28bb058e3..0d138c0de293 100644 --- a/fs/adfs/dir.c +++ b/fs/adfs/dir.c | |||
| @@ -191,8 +191,7 @@ const struct file_operations adfs_dir_operations = { | |||
| 191 | }; | 191 | }; |
| 192 | 192 | ||
| 193 | static int | 193 | static int |
| 194 | adfs_hash(const struct dentry *parent, const struct inode *inode, | 194 | adfs_hash(const struct dentry *parent, struct qstr *qstr) |
| 195 | struct qstr *qstr) | ||
| 196 | { | 195 | { |
| 197 | const unsigned int name_len = ADFS_SB(parent->d_sb)->s_namelen; | 196 | const unsigned int name_len = ADFS_SB(parent->d_sb)->s_namelen; |
| 198 | const unsigned char *name; | 197 | const unsigned char *name; |
| @@ -228,8 +227,7 @@ adfs_hash(const struct dentry *parent, const struct inode *inode, | |||
| 228 | * requirements of the underlying filesystem. | 227 | * requirements of the underlying filesystem. |
| 229 | */ | 228 | */ |
| 230 | static int | 229 | static int |
| 231 | adfs_compare(const struct dentry *parent, const struct inode *pinode, | 230 | adfs_compare(const struct dentry *parent, const struct dentry *dentry, |
| 232 | const struct dentry *dentry, const struct inode *inode, | ||
| 233 | unsigned int len, const char *str, const struct qstr *name) | 231 | unsigned int len, const char *str, const struct qstr *name) |
| 234 | { | 232 | { |
| 235 | int i; | 233 | int i; |
diff --git a/fs/affs/namei.c b/fs/affs/namei.c index ff65884a7839..c36cbb4537a2 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c | |||
| @@ -13,18 +13,12 @@ | |||
| 13 | typedef int (*toupper_t)(int); | 13 | typedef int (*toupper_t)(int); |
| 14 | 14 | ||
| 15 | static int affs_toupper(int ch); | 15 | static int affs_toupper(int ch); |
| 16 | static int affs_hash_dentry(const struct dentry *, | 16 | static int affs_hash_dentry(const struct dentry *, struct qstr *); |
| 17 | const struct inode *, struct qstr *); | 17 | static int affs_compare_dentry(const struct dentry *parent, const struct dentry *dentry, |
| 18 | static int affs_compare_dentry(const struct dentry *parent, | ||
| 19 | const struct inode *pinode, | ||
| 20 | const struct dentry *dentry, const struct inode *inode, | ||
| 21 | unsigned int len, const char *str, const struct qstr *name); | 18 | unsigned int len, const char *str, const struct qstr *name); |
| 22 | static int affs_intl_toupper(int ch); | 19 | static int affs_intl_toupper(int ch); |
| 23 | static int affs_intl_hash_dentry(const struct dentry *, | 20 | static int affs_intl_hash_dentry(const struct dentry *, struct qstr *); |
| 24 | const struct inode *, struct qstr *); | 21 | static int affs_intl_compare_dentry(const struct dentry *parent, const struct dentry *dentry, |
| 25 | static int affs_intl_compare_dentry(const struct dentry *parent, | ||
| 26 | const struct inode *pinode, | ||
| 27 | const struct dentry *dentry, const struct inode *inode, | ||
| 28 | unsigned int len, const char *str, const struct qstr *name); | 22 | unsigned int len, const char *str, const struct qstr *name); |
| 29 | 23 | ||
| 30 | const struct dentry_operations affs_dentry_operations = { | 24 | const struct dentry_operations affs_dentry_operations = { |
| @@ -86,14 +80,12 @@ __affs_hash_dentry(struct qstr *qstr, toupper_t toupper) | |||
| 86 | } | 80 | } |
| 87 | 81 | ||
| 88 | static int | 82 | static int |
| 89 | affs_hash_dentry(const struct dentry *dentry, const struct inode *inode, | 83 | affs_hash_dentry(const struct dentry *dentry, struct qstr *qstr) |
| 90 | struct qstr *qstr) | ||
| 91 | { | 84 | { |
| 92 | return __affs_hash_dentry(qstr, affs_toupper); | 85 | return __affs_hash_dentry(qstr, affs_toupper); |
| 93 | } | 86 | } |
| 94 | static int | 87 | static int |
| 95 | affs_intl_hash_dentry(const struct dentry *dentry, const struct inode *inode, | 88 | affs_intl_hash_dentry(const struct dentry *dentry, struct qstr *qstr) |
| 96 | struct qstr *qstr) | ||
| 97 | { | 89 | { |
| 98 | return __affs_hash_dentry(qstr, affs_intl_toupper); | 90 | return __affs_hash_dentry(qstr, affs_intl_toupper); |
| 99 | } | 91 | } |
| @@ -131,15 +123,13 @@ static inline int __affs_compare_dentry(unsigned int len, | |||
| 131 | } | 123 | } |
| 132 | 124 | ||
| 133 | static int | 125 | static int |
| 134 | affs_compare_dentry(const struct dentry *parent, const struct inode *pinode, | 126 | affs_compare_dentry(const struct dentry *parent, const struct dentry *dentry, |
| 135 | const struct dentry *dentry, const struct inode *inode, | ||
| 136 | unsigned int len, const char *str, const struct qstr *name) | 127 | unsigned int len, const char *str, const struct qstr *name) |
| 137 | { | 128 | { |
| 138 | return __affs_compare_dentry(len, str, name, affs_toupper); | 129 | return __affs_compare_dentry(len, str, name, affs_toupper); |
| 139 | } | 130 | } |
| 140 | static int | 131 | static int |
| 141 | affs_intl_compare_dentry(const struct dentry *parent,const struct inode *pinode, | 132 | affs_intl_compare_dentry(const struct dentry *parent, const struct dentry *dentry, |
| 142 | const struct dentry *dentry, const struct inode *inode, | ||
| 143 | unsigned int len, const char *str, const struct qstr *name) | 133 | unsigned int len, const char *str, const struct qstr *name) |
| 144 | { | 134 | { |
| 145 | return __affs_compare_dentry(len, str, name, affs_intl_toupper); | 135 | return __affs_compare_dentry(len, str, name, affs_intl_toupper); |
diff --git a/fs/afs/flock.c b/fs/afs/flock.c index 2497bf306c70..a8cf2cff836c 100644 --- a/fs/afs/flock.c +++ b/fs/afs/flock.c | |||
| @@ -252,7 +252,8 @@ static void afs_defer_unlock(struct afs_vnode *vnode, struct key *key) | |||
| 252 | */ | 252 | */ |
| 253 | static int afs_do_setlk(struct file *file, struct file_lock *fl) | 253 | static int afs_do_setlk(struct file *file, struct file_lock *fl) |
| 254 | { | 254 | { |
| 255 | struct afs_vnode *vnode = AFS_FS_I(file->f_mapping->host); | 255 | struct inode *inode = file_inode(file); |
| 256 | struct afs_vnode *vnode = AFS_FS_I(inode); | ||
| 256 | afs_lock_type_t type; | 257 | afs_lock_type_t type; |
| 257 | struct key *key = file->private_data; | 258 | struct key *key = file->private_data; |
| 258 | int ret; | 259 | int ret; |
| @@ -273,7 +274,7 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl) | |||
| 273 | 274 | ||
| 274 | type = (fl->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE; | 275 | type = (fl->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE; |
| 275 | 276 | ||
| 276 | lock_flocks(); | 277 | spin_lock(&inode->i_lock); |
| 277 | 278 | ||
| 278 | /* make sure we've got a callback on this file and that our view of the | 279 | /* make sure we've got a callback on this file and that our view of the |
| 279 | * data version is up to date */ | 280 | * data version is up to date */ |
| @@ -420,7 +421,7 @@ given_lock: | |||
| 420 | afs_vnode_fetch_status(vnode, NULL, key); | 421 | afs_vnode_fetch_status(vnode, NULL, key); |
| 421 | 422 | ||
| 422 | error: | 423 | error: |
| 423 | unlock_flocks(); | 424 | spin_unlock(&inode->i_lock); |
| 424 | _leave(" = %d", ret); | 425 | _leave(" = %d", ret); |
| 425 | return ret; | 426 | return ret; |
| 426 | 427 | ||
| @@ -39,6 +39,8 @@ | |||
| 39 | #include <asm/kmap_types.h> | 39 | #include <asm/kmap_types.h> |
| 40 | #include <asm/uaccess.h> | 40 | #include <asm/uaccess.h> |
| 41 | 41 | ||
| 42 | #include "internal.h" | ||
| 43 | |||
| 42 | #define AIO_RING_MAGIC 0xa10a10a1 | 44 | #define AIO_RING_MAGIC 0xa10a10a1 |
| 43 | #define AIO_RING_COMPAT_FEATURES 1 | 45 | #define AIO_RING_COMPAT_FEATURES 1 |
| 44 | #define AIO_RING_INCOMPAT_FEATURES 0 | 46 | #define AIO_RING_INCOMPAT_FEATURES 0 |
| @@ -623,7 +625,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2) | |||
| 623 | 625 | ||
| 624 | /* | 626 | /* |
| 625 | * Add a completion event to the ring buffer. Must be done holding | 627 | * Add a completion event to the ring buffer. Must be done holding |
| 626 | * ctx->ctx_lock to prevent other code from messing with the tail | 628 | * ctx->completion_lock to prevent other code from messing with the tail |
| 627 | * pointer since we might be called from irq context. | 629 | * pointer since we might be called from irq context. |
| 628 | */ | 630 | */ |
| 629 | spin_lock_irqsave(&ctx->completion_lock, flags); | 631 | spin_lock_irqsave(&ctx->completion_lock, flags); |
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 13ddec92341c..3d9d3f5d5dda 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c | |||
| @@ -109,7 +109,7 @@ cont: | |||
| 109 | 109 | ||
| 110 | spin_lock_nested(&q->d_lock, DENTRY_D_LOCK_NESTED); | 110 | spin_lock_nested(&q->d_lock, DENTRY_D_LOCK_NESTED); |
| 111 | /* Already gone or negative dentry (under construction) - try next */ | 111 | /* Already gone or negative dentry (under construction) - try next */ |
| 112 | if (q->d_count == 0 || !simple_positive(q)) { | 112 | if (!d_count(q) || !simple_positive(q)) { |
| 113 | spin_unlock(&q->d_lock); | 113 | spin_unlock(&q->d_lock); |
| 114 | next = q->d_u.d_child.next; | 114 | next = q->d_u.d_child.next; |
| 115 | goto cont; | 115 | goto cont; |
| @@ -267,7 +267,7 @@ static int autofs4_tree_busy(struct vfsmount *mnt, | |||
| 267 | else | 267 | else |
| 268 | ino_count++; | 268 | ino_count++; |
| 269 | 269 | ||
| 270 | if (p->d_count > ino_count) { | 270 | if (d_count(p) > ino_count) { |
| 271 | top_ino->last_used = jiffies; | 271 | top_ino->last_used = jiffies; |
| 272 | dput(p); | 272 | dput(p); |
| 273 | return 1; | 273 | return 1; |
| @@ -409,7 +409,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, | |||
| 409 | if (!exp_leaves) { | 409 | if (!exp_leaves) { |
| 410 | /* Path walk currently on this dentry? */ | 410 | /* Path walk currently on this dentry? */ |
| 411 | ino_count = atomic_read(&ino->count) + 1; | 411 | ino_count = atomic_read(&ino->count) + 1; |
| 412 | if (dentry->d_count > ino_count) | 412 | if (d_count(dentry) > ino_count) |
| 413 | goto next; | 413 | goto next; |
| 414 | 414 | ||
| 415 | if (!autofs4_tree_busy(mnt, dentry, timeout, do_now)) { | 415 | if (!autofs4_tree_busy(mnt, dentry, timeout, do_now)) { |
| @@ -423,7 +423,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, | |||
| 423 | } else { | 423 | } else { |
| 424 | /* Path walk currently on this dentry? */ | 424 | /* Path walk currently on this dentry? */ |
| 425 | ino_count = atomic_read(&ino->count) + 1; | 425 | ino_count = atomic_read(&ino->count) + 1; |
| 426 | if (dentry->d_count > ino_count) | 426 | if (d_count(dentry) > ino_count) |
| 427 | goto next; | 427 | goto next; |
| 428 | 428 | ||
| 429 | expired = autofs4_check_leaves(mnt, dentry, timeout, do_now); | 429 | expired = autofs4_check_leaves(mnt, dentry, timeout, do_now); |
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index ca8e55548d98..92ef341ba0cf 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c | |||
| @@ -179,7 +179,7 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry) | |||
| 179 | spin_lock(&active->d_lock); | 179 | spin_lock(&active->d_lock); |
| 180 | 180 | ||
| 181 | /* Already gone? */ | 181 | /* Already gone? */ |
| 182 | if (active->d_count == 0) | 182 | if (!d_count(active)) |
| 183 | goto next; | 183 | goto next; |
| 184 | 184 | ||
| 185 | qstr = &active->d_name; | 185 | qstr = &active->d_name; |
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index bce87694f7b0..89dec7f789a4 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c | |||
| @@ -255,8 +255,6 @@ static int load_aout_binary(struct linux_binprm * bprm) | |||
| 255 | (current->mm->start_data = N_DATADDR(ex)); | 255 | (current->mm->start_data = N_DATADDR(ex)); |
| 256 | current->mm->brk = ex.a_bss + | 256 | current->mm->brk = ex.a_bss + |
| 257 | (current->mm->start_brk = N_BSSADDR(ex)); | 257 | (current->mm->start_brk = N_BSSADDR(ex)); |
| 258 | current->mm->free_area_cache = current->mm->mmap_base; | ||
| 259 | current->mm->cached_hole_size = 0; | ||
| 260 | 258 | ||
| 261 | retval = setup_arg_pages(bprm, STACK_TOP, EXSTACK_DEFAULT); | 259 | retval = setup_arg_pages(bprm, STACK_TOP, EXSTACK_DEFAULT); |
| 262 | if (retval < 0) { | 260 | if (retval < 0) { |
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index f8a0b0efda44..100edcc5e312 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c | |||
| @@ -738,8 +738,6 @@ static int load_elf_binary(struct linux_binprm *bprm) | |||
| 738 | 738 | ||
| 739 | /* Do this so that we can load the interpreter, if need be. We will | 739 | /* Do this so that we can load the interpreter, if need be. We will |
| 740 | change some of these later */ | 740 | change some of these later */ |
| 741 | current->mm->free_area_cache = current->mm->mmap_base; | ||
| 742 | current->mm->cached_hole_size = 0; | ||
| 743 | retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP), | 741 | retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP), |
| 744 | executable_stack); | 742 | executable_stack); |
| 745 | if (retval < 0) { | 743 | if (retval < 0) { |
diff --git a/fs/block_dev.c b/fs/block_dev.c index 2091db8cdd78..c7bda5cd3da7 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
| @@ -58,17 +58,24 @@ static void bdev_inode_switch_bdi(struct inode *inode, | |||
| 58 | struct backing_dev_info *dst) | 58 | struct backing_dev_info *dst) |
| 59 | { | 59 | { |
| 60 | struct backing_dev_info *old = inode->i_data.backing_dev_info; | 60 | struct backing_dev_info *old = inode->i_data.backing_dev_info; |
| 61 | bool wakeup_bdi = false; | ||
| 61 | 62 | ||
| 62 | if (unlikely(dst == old)) /* deadlock avoidance */ | 63 | if (unlikely(dst == old)) /* deadlock avoidance */ |
| 63 | return; | 64 | return; |
| 64 | bdi_lock_two(&old->wb, &dst->wb); | 65 | bdi_lock_two(&old->wb, &dst->wb); |
| 65 | spin_lock(&inode->i_lock); | 66 | spin_lock(&inode->i_lock); |
| 66 | inode->i_data.backing_dev_info = dst; | 67 | inode->i_data.backing_dev_info = dst; |
| 67 | if (inode->i_state & I_DIRTY) | 68 | if (inode->i_state & I_DIRTY) { |
| 69 | if (bdi_cap_writeback_dirty(dst) && !wb_has_dirty_io(&dst->wb)) | ||
| 70 | wakeup_bdi = true; | ||
| 68 | list_move(&inode->i_wb_list, &dst->wb.b_dirty); | 71 | list_move(&inode->i_wb_list, &dst->wb.b_dirty); |
| 72 | } | ||
| 69 | spin_unlock(&inode->i_lock); | 73 | spin_unlock(&inode->i_lock); |
| 70 | spin_unlock(&old->wb.list_lock); | 74 | spin_unlock(&old->wb.list_lock); |
| 71 | spin_unlock(&dst->wb.list_lock); | 75 | spin_unlock(&dst->wb.list_lock); |
| 76 | |||
| 77 | if (wakeup_bdi) | ||
| 78 | bdi_wakeup_thread_delayed(dst); | ||
| 72 | } | 79 | } |
| 73 | 80 | ||
| 74 | /* Kill _all_ buffers and pagecache , dirty or not.. */ | 81 | /* Kill _all_ buffers and pagecache , dirty or not.. */ |
| @@ -325,31 +332,10 @@ static int blkdev_write_end(struct file *file, struct address_space *mapping, | |||
| 325 | static loff_t block_llseek(struct file *file, loff_t offset, int whence) | 332 | static loff_t block_llseek(struct file *file, loff_t offset, int whence) |
| 326 | { | 333 | { |
| 327 | struct inode *bd_inode = file->f_mapping->host; | 334 | struct inode *bd_inode = file->f_mapping->host; |
| 328 | loff_t size; | ||
| 329 | loff_t retval; | 335 | loff_t retval; |
| 330 | 336 | ||
| 331 | mutex_lock(&bd_inode->i_mutex); | 337 | mutex_lock(&bd_inode->i_mutex); |
| 332 | size = i_size_read(bd_inode); | 338 | retval = fixed_size_llseek(file, offset, whence, i_size_read(bd_inode)); |
| 333 | |||
| 334 | retval = -EINVAL; | ||
| 335 | switch (whence) { | ||
| 336 | case SEEK_END: | ||
| 337 | offset += size; | ||
| 338 | break; | ||
| 339 | case SEEK_CUR: | ||
| 340 | offset += file->f_pos; | ||
| 341 | case SEEK_SET: | ||
| 342 | break; | ||
| 343 | default: | ||
| 344 | goto out; | ||
| 345 | } | ||
| 346 | if (offset >= 0 && offset <= size) { | ||
| 347 | if (offset != file->f_pos) { | ||
| 348 | file->f_pos = offset; | ||
| 349 | } | ||
| 350 | retval = offset; | ||
| 351 | } | ||
| 352 | out: | ||
| 353 | mutex_unlock(&bd_inode->i_mutex); | 339 | mutex_unlock(&bd_inode->i_mutex); |
| 354 | return retval; | 340 | return retval; |
| 355 | } | 341 | } |
| @@ -1583,6 +1569,7 @@ static const struct address_space_operations def_blk_aops = { | |||
| 1583 | .writepages = generic_writepages, | 1569 | .writepages = generic_writepages, |
| 1584 | .releasepage = blkdev_releasepage, | 1570 | .releasepage = blkdev_releasepage, |
| 1585 | .direct_IO = blkdev_direct_IO, | 1571 | .direct_IO = blkdev_direct_IO, |
| 1572 | .is_dirty_writeback = buffer_check_dirty_writeback, | ||
| 1586 | }; | 1573 | }; |
| 1587 | 1574 | ||
| 1588 | const struct file_operations def_blk_fops = { | 1575 | const struct file_operations def_blk_fops = { |
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 290e347b6db3..eaf133384a8f 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c | |||
| @@ -255,13 +255,11 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, | |||
| 255 | * to a logical address | 255 | * to a logical address |
| 256 | */ | 256 | */ |
| 257 | static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, | 257 | static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, |
| 258 | int search_commit_root, | 258 | struct btrfs_path *path, u64 time_seq, |
| 259 | u64 time_seq, | 259 | struct __prelim_ref *ref, |
| 260 | struct __prelim_ref *ref, | 260 | struct ulist *parents, |
| 261 | struct ulist *parents, | 261 | const u64 *extent_item_pos) |
| 262 | const u64 *extent_item_pos) | ||
| 263 | { | 262 | { |
| 264 | struct btrfs_path *path; | ||
| 265 | struct btrfs_root *root; | 263 | struct btrfs_root *root; |
| 266 | struct btrfs_key root_key; | 264 | struct btrfs_key root_key; |
| 267 | struct extent_buffer *eb; | 265 | struct extent_buffer *eb; |
| @@ -269,11 +267,6 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, | |||
| 269 | int root_level; | 267 | int root_level; |
| 270 | int level = ref->level; | 268 | int level = ref->level; |
| 271 | 269 | ||
| 272 | path = btrfs_alloc_path(); | ||
| 273 | if (!path) | ||
| 274 | return -ENOMEM; | ||
| 275 | path->search_commit_root = !!search_commit_root; | ||
| 276 | |||
| 277 | root_key.objectid = ref->root_id; | 270 | root_key.objectid = ref->root_id; |
| 278 | root_key.type = BTRFS_ROOT_ITEM_KEY; | 271 | root_key.type = BTRFS_ROOT_ITEM_KEY; |
| 279 | root_key.offset = (u64)-1; | 272 | root_key.offset = (u64)-1; |
| @@ -314,7 +307,8 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, | |||
| 314 | time_seq, ref->wanted_disk_byte, | 307 | time_seq, ref->wanted_disk_byte, |
| 315 | extent_item_pos); | 308 | extent_item_pos); |
| 316 | out: | 309 | out: |
| 317 | btrfs_free_path(path); | 310 | path->lowest_level = 0; |
| 311 | btrfs_release_path(path); | ||
| 318 | return ret; | 312 | return ret; |
| 319 | } | 313 | } |
| 320 | 314 | ||
| @@ -322,7 +316,7 @@ out: | |||
| 322 | * resolve all indirect backrefs from the list | 316 | * resolve all indirect backrefs from the list |
| 323 | */ | 317 | */ |
| 324 | static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, | 318 | static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, |
| 325 | int search_commit_root, u64 time_seq, | 319 | struct btrfs_path *path, u64 time_seq, |
| 326 | struct list_head *head, | 320 | struct list_head *head, |
| 327 | const u64 *extent_item_pos) | 321 | const u64 *extent_item_pos) |
| 328 | { | 322 | { |
| @@ -349,9 +343,8 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, | |||
| 349 | continue; | 343 | continue; |
| 350 | if (ref->count == 0) | 344 | if (ref->count == 0) |
| 351 | continue; | 345 | continue; |
| 352 | err = __resolve_indirect_ref(fs_info, search_commit_root, | 346 | err = __resolve_indirect_ref(fs_info, path, time_seq, ref, |
| 353 | time_seq, ref, parents, | 347 | parents, extent_item_pos); |
| 354 | extent_item_pos); | ||
| 355 | if (err == -ENOMEM) | 348 | if (err == -ENOMEM) |
| 356 | goto out; | 349 | goto out; |
| 357 | if (err) | 350 | if (err) |
| @@ -604,6 +597,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, | |||
| 604 | int slot; | 597 | int slot; |
| 605 | struct extent_buffer *leaf; | 598 | struct extent_buffer *leaf; |
| 606 | struct btrfs_key key; | 599 | struct btrfs_key key; |
| 600 | struct btrfs_key found_key; | ||
| 607 | unsigned long ptr; | 601 | unsigned long ptr; |
| 608 | unsigned long end; | 602 | unsigned long end; |
| 609 | struct btrfs_extent_item *ei; | 603 | struct btrfs_extent_item *ei; |
| @@ -621,17 +615,21 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, | |||
| 621 | 615 | ||
| 622 | ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); | 616 | ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); |
| 623 | flags = btrfs_extent_flags(leaf, ei); | 617 | flags = btrfs_extent_flags(leaf, ei); |
| 618 | btrfs_item_key_to_cpu(leaf, &found_key, slot); | ||
| 624 | 619 | ||
| 625 | ptr = (unsigned long)(ei + 1); | 620 | ptr = (unsigned long)(ei + 1); |
| 626 | end = (unsigned long)ei + item_size; | 621 | end = (unsigned long)ei + item_size; |
| 627 | 622 | ||
| 628 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { | 623 | if (found_key.type == BTRFS_EXTENT_ITEM_KEY && |
| 624 | flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { | ||
| 629 | struct btrfs_tree_block_info *info; | 625 | struct btrfs_tree_block_info *info; |
| 630 | 626 | ||
| 631 | info = (struct btrfs_tree_block_info *)ptr; | 627 | info = (struct btrfs_tree_block_info *)ptr; |
| 632 | *info_level = btrfs_tree_block_level(leaf, info); | 628 | *info_level = btrfs_tree_block_level(leaf, info); |
| 633 | ptr += sizeof(struct btrfs_tree_block_info); | 629 | ptr += sizeof(struct btrfs_tree_block_info); |
| 634 | BUG_ON(ptr > end); | 630 | BUG_ON(ptr > end); |
| 631 | } else if (found_key.type == BTRFS_METADATA_ITEM_KEY) { | ||
| 632 | *info_level = found_key.offset; | ||
| 635 | } else { | 633 | } else { |
| 636 | BUG_ON(!(flags & BTRFS_EXTENT_FLAG_DATA)); | 634 | BUG_ON(!(flags & BTRFS_EXTENT_FLAG_DATA)); |
| 637 | } | 635 | } |
| @@ -795,7 +793,6 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans, | |||
| 795 | struct btrfs_delayed_ref_head *head; | 793 | struct btrfs_delayed_ref_head *head; |
| 796 | int info_level = 0; | 794 | int info_level = 0; |
| 797 | int ret; | 795 | int ret; |
| 798 | int search_commit_root = (trans == BTRFS_BACKREF_SEARCH_COMMIT_ROOT); | ||
| 799 | struct list_head prefs_delayed; | 796 | struct list_head prefs_delayed; |
| 800 | struct list_head prefs; | 797 | struct list_head prefs; |
| 801 | struct __prelim_ref *ref; | 798 | struct __prelim_ref *ref; |
| @@ -804,13 +801,17 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans, | |||
| 804 | INIT_LIST_HEAD(&prefs_delayed); | 801 | INIT_LIST_HEAD(&prefs_delayed); |
| 805 | 802 | ||
| 806 | key.objectid = bytenr; | 803 | key.objectid = bytenr; |
| 807 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
| 808 | key.offset = (u64)-1; | 804 | key.offset = (u64)-1; |
| 805 | if (btrfs_fs_incompat(fs_info, SKINNY_METADATA)) | ||
| 806 | key.type = BTRFS_METADATA_ITEM_KEY; | ||
| 807 | else | ||
| 808 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
| 809 | 809 | ||
| 810 | path = btrfs_alloc_path(); | 810 | path = btrfs_alloc_path(); |
| 811 | if (!path) | 811 | if (!path) |
| 812 | return -ENOMEM; | 812 | return -ENOMEM; |
| 813 | path->search_commit_root = !!search_commit_root; | 813 | if (!trans) |
| 814 | path->search_commit_root = 1; | ||
| 814 | 815 | ||
| 815 | /* | 816 | /* |
| 816 | * grab both a lock on the path and a lock on the delayed ref head. | 817 | * grab both a lock on the path and a lock on the delayed ref head. |
| @@ -825,7 +826,7 @@ again: | |||
| 825 | goto out; | 826 | goto out; |
| 826 | BUG_ON(ret == 0); | 827 | BUG_ON(ret == 0); |
| 827 | 828 | ||
| 828 | if (trans != BTRFS_BACKREF_SEARCH_COMMIT_ROOT) { | 829 | if (trans) { |
| 829 | /* | 830 | /* |
| 830 | * look if there are updates for this ref queued and lock the | 831 | * look if there are updates for this ref queued and lock the |
| 831 | * head | 832 | * head |
| @@ -869,7 +870,8 @@ again: | |||
| 869 | slot = path->slots[0]; | 870 | slot = path->slots[0]; |
| 870 | btrfs_item_key_to_cpu(leaf, &key, slot); | 871 | btrfs_item_key_to_cpu(leaf, &key, slot); |
| 871 | if (key.objectid == bytenr && | 872 | if (key.objectid == bytenr && |
| 872 | key.type == BTRFS_EXTENT_ITEM_KEY) { | 873 | (key.type == BTRFS_EXTENT_ITEM_KEY || |
| 874 | key.type == BTRFS_METADATA_ITEM_KEY)) { | ||
| 873 | ret = __add_inline_refs(fs_info, path, bytenr, | 875 | ret = __add_inline_refs(fs_info, path, bytenr, |
| 874 | &info_level, &prefs); | 876 | &info_level, &prefs); |
| 875 | if (ret) | 877 | if (ret) |
| @@ -890,8 +892,8 @@ again: | |||
| 890 | 892 | ||
| 891 | __merge_refs(&prefs, 1); | 893 | __merge_refs(&prefs, 1); |
| 892 | 894 | ||
| 893 | ret = __resolve_indirect_refs(fs_info, search_commit_root, time_seq, | 895 | ret = __resolve_indirect_refs(fs_info, path, time_seq, &prefs, |
| 894 | &prefs, extent_item_pos); | 896 | extent_item_pos); |
| 895 | if (ret) | 897 | if (ret) |
| 896 | goto out; | 898 | goto out; |
| 897 | 899 | ||
| @@ -1283,12 +1285,16 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, | |||
| 1283 | { | 1285 | { |
| 1284 | int ret; | 1286 | int ret; |
| 1285 | u64 flags; | 1287 | u64 flags; |
| 1288 | u64 size = 0; | ||
| 1286 | u32 item_size; | 1289 | u32 item_size; |
| 1287 | struct extent_buffer *eb; | 1290 | struct extent_buffer *eb; |
| 1288 | struct btrfs_extent_item *ei; | 1291 | struct btrfs_extent_item *ei; |
| 1289 | struct btrfs_key key; | 1292 | struct btrfs_key key; |
| 1290 | 1293 | ||
| 1291 | key.type = BTRFS_EXTENT_ITEM_KEY; | 1294 | if (btrfs_fs_incompat(fs_info, SKINNY_METADATA)) |
| 1295 | key.type = BTRFS_METADATA_ITEM_KEY; | ||
| 1296 | else | ||
| 1297 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
| 1292 | key.objectid = logical; | 1298 | key.objectid = logical; |
| 1293 | key.offset = (u64)-1; | 1299 | key.offset = (u64)-1; |
| 1294 | 1300 | ||
| @@ -1301,9 +1307,15 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, | |||
| 1301 | return ret; | 1307 | return ret; |
| 1302 | 1308 | ||
| 1303 | btrfs_item_key_to_cpu(path->nodes[0], found_key, path->slots[0]); | 1309 | btrfs_item_key_to_cpu(path->nodes[0], found_key, path->slots[0]); |
| 1304 | if (found_key->type != BTRFS_EXTENT_ITEM_KEY || | 1310 | if (found_key->type == BTRFS_METADATA_ITEM_KEY) |
| 1311 | size = fs_info->extent_root->leafsize; | ||
| 1312 | else if (found_key->type == BTRFS_EXTENT_ITEM_KEY) | ||
| 1313 | size = found_key->offset; | ||
| 1314 | |||
| 1315 | if ((found_key->type != BTRFS_EXTENT_ITEM_KEY && | ||
| 1316 | found_key->type != BTRFS_METADATA_ITEM_KEY) || | ||
| 1305 | found_key->objectid > logical || | 1317 | found_key->objectid > logical || |
| 1306 | found_key->objectid + found_key->offset <= logical) { | 1318 | found_key->objectid + size <= logical) { |
| 1307 | pr_debug("logical %llu is not within any extent\n", | 1319 | pr_debug("logical %llu is not within any extent\n", |
| 1308 | (unsigned long long)logical); | 1320 | (unsigned long long)logical); |
| 1309 | return -ENOENT; | 1321 | return -ENOENT; |
| @@ -1459,7 +1471,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, | |||
| 1459 | iterate_extent_inodes_t *iterate, void *ctx) | 1471 | iterate_extent_inodes_t *iterate, void *ctx) |
| 1460 | { | 1472 | { |
| 1461 | int ret; | 1473 | int ret; |
| 1462 | struct btrfs_trans_handle *trans; | 1474 | struct btrfs_trans_handle *trans = NULL; |
| 1463 | struct ulist *refs = NULL; | 1475 | struct ulist *refs = NULL; |
| 1464 | struct ulist *roots = NULL; | 1476 | struct ulist *roots = NULL; |
| 1465 | struct ulist_node *ref_node = NULL; | 1477 | struct ulist_node *ref_node = NULL; |
| @@ -1471,9 +1483,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, | |||
| 1471 | pr_debug("resolving all inodes for extent %llu\n", | 1483 | pr_debug("resolving all inodes for extent %llu\n", |
| 1472 | extent_item_objectid); | 1484 | extent_item_objectid); |
| 1473 | 1485 | ||
| 1474 | if (search_commit_root) { | 1486 | if (!search_commit_root) { |
| 1475 | trans = BTRFS_BACKREF_SEARCH_COMMIT_ROOT; | ||
| 1476 | } else { | ||
| 1477 | trans = btrfs_join_transaction(fs_info->extent_root); | 1487 | trans = btrfs_join_transaction(fs_info->extent_root); |
| 1478 | if (IS_ERR(trans)) | 1488 | if (IS_ERR(trans)) |
| 1479 | return PTR_ERR(trans); | 1489 | return PTR_ERR(trans); |
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index 0f446d7ca2c0..8f2e76702932 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h | |||
| @@ -23,8 +23,6 @@ | |||
| 23 | #include "ulist.h" | 23 | #include "ulist.h" |
| 24 | #include "extent_io.h" | 24 | #include "extent_io.h" |
| 25 | 25 | ||
| 26 | #define BTRFS_BACKREF_SEARCH_COMMIT_ROOT ((struct btrfs_trans_handle *)0) | ||
| 27 | |||
| 28 | struct inode_fs_paths { | 26 | struct inode_fs_paths { |
| 29 | struct btrfs_path *btrfs_path; | 27 | struct btrfs_path *btrfs_path; |
| 30 | struct btrfs_root *fs_root; | 28 | struct btrfs_root *fs_root; |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 02fae7f7e42c..5bf4c39e2ad6 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
| @@ -1089,7 +1089,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
| 1089 | btrfs_set_node_ptr_generation(parent, parent_slot, | 1089 | btrfs_set_node_ptr_generation(parent, parent_slot, |
| 1090 | trans->transid); | 1090 | trans->transid); |
| 1091 | btrfs_mark_buffer_dirty(parent); | 1091 | btrfs_mark_buffer_dirty(parent); |
| 1092 | tree_mod_log_free_eb(root->fs_info, buf); | 1092 | if (last_ref) |
| 1093 | tree_mod_log_free_eb(root->fs_info, buf); | ||
| 1093 | btrfs_free_tree_block(trans, root, buf, parent_start, | 1094 | btrfs_free_tree_block(trans, root, buf, parent_start, |
| 1094 | last_ref); | 1095 | last_ref); |
| 1095 | } | 1096 | } |
| @@ -1161,8 +1162,8 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info, | |||
| 1161 | * time_seq). | 1162 | * time_seq). |
| 1162 | */ | 1163 | */ |
| 1163 | static void | 1164 | static void |
| 1164 | __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq, | 1165 | __tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, |
| 1165 | struct tree_mod_elem *first_tm) | 1166 | u64 time_seq, struct tree_mod_elem *first_tm) |
| 1166 | { | 1167 | { |
| 1167 | u32 n; | 1168 | u32 n; |
| 1168 | struct rb_node *next; | 1169 | struct rb_node *next; |
| @@ -1172,6 +1173,7 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq, | |||
| 1172 | unsigned long p_size = sizeof(struct btrfs_key_ptr); | 1173 | unsigned long p_size = sizeof(struct btrfs_key_ptr); |
| 1173 | 1174 | ||
| 1174 | n = btrfs_header_nritems(eb); | 1175 | n = btrfs_header_nritems(eb); |
| 1176 | tree_mod_log_read_lock(fs_info); | ||
| 1175 | while (tm && tm->seq >= time_seq) { | 1177 | while (tm && tm->seq >= time_seq) { |
| 1176 | /* | 1178 | /* |
| 1177 | * all the operations are recorded with the operator used for | 1179 | * all the operations are recorded with the operator used for |
| @@ -1226,6 +1228,7 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq, | |||
| 1226 | if (tm->index != first_tm->index) | 1228 | if (tm->index != first_tm->index) |
| 1227 | break; | 1229 | break; |
| 1228 | } | 1230 | } |
| 1231 | tree_mod_log_read_unlock(fs_info); | ||
| 1229 | btrfs_set_header_nritems(eb, n); | 1232 | btrfs_set_header_nritems(eb, n); |
| 1230 | } | 1233 | } |
| 1231 | 1234 | ||
| @@ -1274,7 +1277,7 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, | |||
| 1274 | 1277 | ||
| 1275 | extent_buffer_get(eb_rewin); | 1278 | extent_buffer_get(eb_rewin); |
| 1276 | btrfs_tree_read_lock(eb_rewin); | 1279 | btrfs_tree_read_lock(eb_rewin); |
| 1277 | __tree_mod_log_rewind(eb_rewin, time_seq, tm); | 1280 | __tree_mod_log_rewind(fs_info, eb_rewin, time_seq, tm); |
| 1278 | WARN_ON(btrfs_header_nritems(eb_rewin) > | 1281 | WARN_ON(btrfs_header_nritems(eb_rewin) > |
| 1279 | BTRFS_NODEPTRS_PER_BLOCK(fs_info->tree_root)); | 1282 | BTRFS_NODEPTRS_PER_BLOCK(fs_info->tree_root)); |
| 1280 | 1283 | ||
| @@ -1350,7 +1353,7 @@ get_old_root(struct btrfs_root *root, u64 time_seq) | |||
| 1350 | btrfs_set_header_generation(eb, old_generation); | 1353 | btrfs_set_header_generation(eb, old_generation); |
| 1351 | } | 1354 | } |
| 1352 | if (tm) | 1355 | if (tm) |
| 1353 | __tree_mod_log_rewind(eb, time_seq, tm); | 1356 | __tree_mod_log_rewind(root->fs_info, eb, time_seq, tm); |
| 1354 | else | 1357 | else |
| 1355 | WARN_ON(btrfs_header_level(eb) != 0); | 1358 | WARN_ON(btrfs_header_level(eb) != 0); |
| 1356 | WARN_ON(btrfs_header_nritems(eb) > BTRFS_NODEPTRS_PER_BLOCK(root)); | 1359 | WARN_ON(btrfs_header_nritems(eb) > BTRFS_NODEPTRS_PER_BLOCK(root)); |
| @@ -2178,12 +2181,8 @@ static void reada_for_search(struct btrfs_root *root, | |||
| 2178 | } | 2181 | } |
| 2179 | } | 2182 | } |
| 2180 | 2183 | ||
| 2181 | /* | 2184 | static noinline void reada_for_balance(struct btrfs_root *root, |
| 2182 | * returns -EAGAIN if it had to drop the path, or zero if everything was in | 2185 | struct btrfs_path *path, int level) |
| 2183 | * cache | ||
| 2184 | */ | ||
| 2185 | static noinline int reada_for_balance(struct btrfs_root *root, | ||
| 2186 | struct btrfs_path *path, int level) | ||
| 2187 | { | 2186 | { |
| 2188 | int slot; | 2187 | int slot; |
| 2189 | int nritems; | 2188 | int nritems; |
| @@ -2192,12 +2191,11 @@ static noinline int reada_for_balance(struct btrfs_root *root, | |||
| 2192 | u64 gen; | 2191 | u64 gen; |
| 2193 | u64 block1 = 0; | 2192 | u64 block1 = 0; |
| 2194 | u64 block2 = 0; | 2193 | u64 block2 = 0; |
| 2195 | int ret = 0; | ||
| 2196 | int blocksize; | 2194 | int blocksize; |
| 2197 | 2195 | ||
| 2198 | parent = path->nodes[level + 1]; | 2196 | parent = path->nodes[level + 1]; |
| 2199 | if (!parent) | 2197 | if (!parent) |
| 2200 | return 0; | 2198 | return; |
| 2201 | 2199 | ||
| 2202 | nritems = btrfs_header_nritems(parent); | 2200 | nritems = btrfs_header_nritems(parent); |
| 2203 | slot = path->slots[level + 1]; | 2201 | slot = path->slots[level + 1]; |
| @@ -2224,28 +2222,11 @@ static noinline int reada_for_balance(struct btrfs_root *root, | |||
| 2224 | block2 = 0; | 2222 | block2 = 0; |
| 2225 | free_extent_buffer(eb); | 2223 | free_extent_buffer(eb); |
| 2226 | } | 2224 | } |
| 2227 | if (block1 || block2) { | ||
| 2228 | ret = -EAGAIN; | ||
| 2229 | |||
| 2230 | /* release the whole path */ | ||
| 2231 | btrfs_release_path(path); | ||
| 2232 | |||
| 2233 | /* read the blocks */ | ||
| 2234 | if (block1) | ||
| 2235 | readahead_tree_block(root, block1, blocksize, 0); | ||
| 2236 | if (block2) | ||
| 2237 | readahead_tree_block(root, block2, blocksize, 0); | ||
| 2238 | 2225 | ||
| 2239 | if (block1) { | 2226 | if (block1) |
| 2240 | eb = read_tree_block(root, block1, blocksize, 0); | 2227 | readahead_tree_block(root, block1, blocksize, 0); |
| 2241 | free_extent_buffer(eb); | 2228 | if (block2) |
| 2242 | } | 2229 | readahead_tree_block(root, block2, blocksize, 0); |
| 2243 | if (block2) { | ||
| 2244 | eb = read_tree_block(root, block2, blocksize, 0); | ||
| 2245 | free_extent_buffer(eb); | ||
| 2246 | } | ||
| 2247 | } | ||
| 2248 | return ret; | ||
| 2249 | } | 2230 | } |
| 2250 | 2231 | ||
| 2251 | 2232 | ||
| @@ -2359,35 +2340,28 @@ read_block_for_search(struct btrfs_trans_handle *trans, | |||
| 2359 | tmp = btrfs_find_tree_block(root, blocknr, blocksize); | 2340 | tmp = btrfs_find_tree_block(root, blocknr, blocksize); |
| 2360 | if (tmp) { | 2341 | if (tmp) { |
| 2361 | /* first we do an atomic uptodate check */ | 2342 | /* first we do an atomic uptodate check */ |
| 2362 | if (btrfs_buffer_uptodate(tmp, 0, 1) > 0) { | 2343 | if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) { |
| 2363 | if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) { | 2344 | *eb_ret = tmp; |
| 2364 | /* | 2345 | return 0; |
| 2365 | * we found an up to date block without | 2346 | } |
| 2366 | * sleeping, return | ||
| 2367 | * right away | ||
| 2368 | */ | ||
| 2369 | *eb_ret = tmp; | ||
| 2370 | return 0; | ||
| 2371 | } | ||
| 2372 | /* the pages were up to date, but we failed | ||
| 2373 | * the generation number check. Do a full | ||
| 2374 | * read for the generation number that is correct. | ||
| 2375 | * We must do this without dropping locks so | ||
| 2376 | * we can trust our generation number | ||
| 2377 | */ | ||
| 2378 | free_extent_buffer(tmp); | ||
| 2379 | btrfs_set_path_blocking(p); | ||
| 2380 | 2347 | ||
| 2381 | /* now we're allowed to do a blocking uptodate check */ | 2348 | /* the pages were up to date, but we failed |
| 2382 | tmp = read_tree_block(root, blocknr, blocksize, gen); | 2349 | * the generation number check. Do a full |
| 2383 | if (tmp && btrfs_buffer_uptodate(tmp, gen, 0) > 0) { | 2350 | * read for the generation number that is correct. |
| 2384 | *eb_ret = tmp; | 2351 | * We must do this without dropping locks so |
| 2385 | return 0; | 2352 | * we can trust our generation number |
| 2386 | } | 2353 | */ |
| 2387 | free_extent_buffer(tmp); | 2354 | btrfs_set_path_blocking(p); |
| 2388 | btrfs_release_path(p); | 2355 | |
| 2389 | return -EIO; | 2356 | /* now we're allowed to do a blocking uptodate check */ |
| 2357 | ret = btrfs_read_buffer(tmp, gen); | ||
| 2358 | if (!ret) { | ||
| 2359 | *eb_ret = tmp; | ||
| 2360 | return 0; | ||
| 2390 | } | 2361 | } |
| 2362 | free_extent_buffer(tmp); | ||
| 2363 | btrfs_release_path(p); | ||
| 2364 | return -EIO; | ||
| 2391 | } | 2365 | } |
| 2392 | 2366 | ||
| 2393 | /* | 2367 | /* |
| @@ -2448,11 +2422,8 @@ setup_nodes_for_search(struct btrfs_trans_handle *trans, | |||
| 2448 | goto again; | 2422 | goto again; |
| 2449 | } | 2423 | } |
| 2450 | 2424 | ||
| 2451 | sret = reada_for_balance(root, p, level); | ||
| 2452 | if (sret) | ||
| 2453 | goto again; | ||
| 2454 | |||
| 2455 | btrfs_set_path_blocking(p); | 2425 | btrfs_set_path_blocking(p); |
| 2426 | reada_for_balance(root, p, level); | ||
| 2456 | sret = split_node(trans, root, p, level); | 2427 | sret = split_node(trans, root, p, level); |
| 2457 | btrfs_clear_path_blocking(p, NULL, 0); | 2428 | btrfs_clear_path_blocking(p, NULL, 0); |
| 2458 | 2429 | ||
| @@ -2472,11 +2443,8 @@ setup_nodes_for_search(struct btrfs_trans_handle *trans, | |||
| 2472 | goto again; | 2443 | goto again; |
| 2473 | } | 2444 | } |
| 2474 | 2445 | ||
| 2475 | sret = reada_for_balance(root, p, level); | ||
| 2476 | if (sret) | ||
| 2477 | goto again; | ||
| 2478 | |||
| 2479 | btrfs_set_path_blocking(p); | 2446 | btrfs_set_path_blocking(p); |
| 2447 | reada_for_balance(root, p, level); | ||
| 2480 | sret = balance_level(trans, root, p, level); | 2448 | sret = balance_level(trans, root, p, level); |
| 2481 | btrfs_clear_path_blocking(p, NULL, 0); | 2449 | btrfs_clear_path_blocking(p, NULL, 0); |
| 2482 | 2450 | ||
| @@ -3143,7 +3111,7 @@ static int balance_node_right(struct btrfs_trans_handle *trans, | |||
| 3143 | */ | 3111 | */ |
| 3144 | static noinline int insert_new_root(struct btrfs_trans_handle *trans, | 3112 | static noinline int insert_new_root(struct btrfs_trans_handle *trans, |
| 3145 | struct btrfs_root *root, | 3113 | struct btrfs_root *root, |
| 3146 | struct btrfs_path *path, int level, int log_removal) | 3114 | struct btrfs_path *path, int level) |
| 3147 | { | 3115 | { |
| 3148 | u64 lower_gen; | 3116 | u64 lower_gen; |
| 3149 | struct extent_buffer *lower; | 3117 | struct extent_buffer *lower; |
| @@ -3194,7 +3162,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, | |||
| 3194 | btrfs_mark_buffer_dirty(c); | 3162 | btrfs_mark_buffer_dirty(c); |
| 3195 | 3163 | ||
| 3196 | old = root->node; | 3164 | old = root->node; |
| 3197 | tree_mod_log_set_root_pointer(root, c, log_removal); | 3165 | tree_mod_log_set_root_pointer(root, c, 0); |
| 3198 | rcu_assign_pointer(root->node, c); | 3166 | rcu_assign_pointer(root->node, c); |
| 3199 | 3167 | ||
| 3200 | /* the super has an extra ref to root->node */ | 3168 | /* the super has an extra ref to root->node */ |
| @@ -3278,14 +3246,14 @@ static noinline int split_node(struct btrfs_trans_handle *trans, | |||
| 3278 | /* | 3246 | /* |
| 3279 | * trying to split the root, lets make a new one | 3247 | * trying to split the root, lets make a new one |
| 3280 | * | 3248 | * |
| 3281 | * tree mod log: We pass 0 as log_removal parameter to | 3249 | * tree mod log: We don't log_removal old root in |
| 3282 | * insert_new_root, because that root buffer will be kept as a | 3250 | * insert_new_root, because that root buffer will be kept as a |
| 3283 | * normal node. We are going to log removal of half of the | 3251 | * normal node. We are going to log removal of half of the |
| 3284 | * elements below with tree_mod_log_eb_copy. We're holding a | 3252 | * elements below with tree_mod_log_eb_copy. We're holding a |
| 3285 | * tree lock on the buffer, which is why we cannot race with | 3253 | * tree lock on the buffer, which is why we cannot race with |
| 3286 | * other tree_mod_log users. | 3254 | * other tree_mod_log users. |
| 3287 | */ | 3255 | */ |
| 3288 | ret = insert_new_root(trans, root, path, level + 1, 0); | 3256 | ret = insert_new_root(trans, root, path, level + 1); |
| 3289 | if (ret) | 3257 | if (ret) |
| 3290 | return ret; | 3258 | return ret; |
| 3291 | } else { | 3259 | } else { |
| @@ -3986,7 +3954,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans, | |||
| 3986 | return -EOVERFLOW; | 3954 | return -EOVERFLOW; |
| 3987 | 3955 | ||
| 3988 | /* first try to make some room by pushing left and right */ | 3956 | /* first try to make some room by pushing left and right */ |
| 3989 | if (data_size) { | 3957 | if (data_size && path->nodes[1]) { |
| 3990 | wret = push_leaf_right(trans, root, path, data_size, | 3958 | wret = push_leaf_right(trans, root, path, data_size, |
| 3991 | data_size, 0, 0); | 3959 | data_size, 0, 0); |
| 3992 | if (wret < 0) | 3960 | if (wret < 0) |
| @@ -4005,7 +3973,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans, | |||
| 4005 | } | 3973 | } |
| 4006 | 3974 | ||
| 4007 | if (!path->nodes[1]) { | 3975 | if (!path->nodes[1]) { |
| 4008 | ret = insert_new_root(trans, root, path, 1, 1); | 3976 | ret = insert_new_root(trans, root, path, 1); |
| 4009 | if (ret) | 3977 | if (ret) |
| 4010 | return ret; | 3978 | return ret; |
| 4011 | } | 3979 | } |
| @@ -4430,7 +4398,7 @@ void btrfs_truncate_item(struct btrfs_root *root, struct btrfs_path *path, | |||
| 4430 | } | 4398 | } |
| 4431 | 4399 | ||
| 4432 | /* | 4400 | /* |
| 4433 | * make the item pointed to by the path bigger, data_size is the new size. | 4401 | * make the item pointed to by the path bigger, data_size is the added size. |
| 4434 | */ | 4402 | */ |
| 4435 | void btrfs_extend_item(struct btrfs_root *root, struct btrfs_path *path, | 4403 | void btrfs_extend_item(struct btrfs_root *root, struct btrfs_path *path, |
| 4436 | u32 data_size) | 4404 | u32 data_size) |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index d6dd49b51ba8..e795bf135e80 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
| @@ -961,8 +961,8 @@ struct btrfs_dev_replace_item { | |||
| 961 | #define BTRFS_BLOCK_GROUP_RAID1 (1ULL << 4) | 961 | #define BTRFS_BLOCK_GROUP_RAID1 (1ULL << 4) |
| 962 | #define BTRFS_BLOCK_GROUP_DUP (1ULL << 5) | 962 | #define BTRFS_BLOCK_GROUP_DUP (1ULL << 5) |
| 963 | #define BTRFS_BLOCK_GROUP_RAID10 (1ULL << 6) | 963 | #define BTRFS_BLOCK_GROUP_RAID10 (1ULL << 6) |
| 964 | #define BTRFS_BLOCK_GROUP_RAID5 (1 << 7) | 964 | #define BTRFS_BLOCK_GROUP_RAID5 (1ULL << 7) |
| 965 | #define BTRFS_BLOCK_GROUP_RAID6 (1 << 8) | 965 | #define BTRFS_BLOCK_GROUP_RAID6 (1ULL << 8) |
| 966 | #define BTRFS_BLOCK_GROUP_RESERVED BTRFS_AVAIL_ALLOC_BIT_SINGLE | 966 | #define BTRFS_BLOCK_GROUP_RESERVED BTRFS_AVAIL_ALLOC_BIT_SINGLE |
| 967 | 967 | ||
| 968 | enum btrfs_raid_types { | 968 | enum btrfs_raid_types { |
| @@ -1102,6 +1102,18 @@ struct btrfs_space_info { | |||
| 1102 | account */ | 1102 | account */ |
| 1103 | 1103 | ||
| 1104 | /* | 1104 | /* |
| 1105 | * bytes_pinned is kept in line with what is actually pinned, as in | ||
| 1106 | * we've called update_block_group and dropped the bytes_used counter | ||
| 1107 | * and increased the bytes_pinned counter. However this means that | ||
| 1108 | * bytes_pinned does not reflect the bytes that will be pinned once the | ||
| 1109 | * delayed refs are flushed, so this counter is inc'ed everytime we call | ||
| 1110 | * btrfs_free_extent so it is a realtime count of what will be freed | ||
| 1111 | * once the transaction is committed. It will be zero'ed everytime the | ||
| 1112 | * transaction commits. | ||
| 1113 | */ | ||
| 1114 | struct percpu_counter total_bytes_pinned; | ||
| 1115 | |||
| 1116 | /* | ||
| 1105 | * we bump reservation progress every time we decrement | 1117 | * we bump reservation progress every time we decrement |
| 1106 | * bytes_reserved. This way people waiting for reservations | 1118 | * bytes_reserved. This way people waiting for reservations |
| 1107 | * know something good has happened and they can check | 1119 | * know something good has happened and they can check |
| @@ -1437,25 +1449,22 @@ struct btrfs_fs_info { | |||
| 1437 | atomic_t open_ioctl_trans; | 1449 | atomic_t open_ioctl_trans; |
| 1438 | 1450 | ||
| 1439 | /* | 1451 | /* |
| 1440 | * this is used by the balancing code to wait for all the pending | 1452 | * this is used to protect the following list -- ordered_roots. |
| 1441 | * ordered extents | ||
| 1442 | */ | 1453 | */ |
| 1443 | spinlock_t ordered_extent_lock; | 1454 | spinlock_t ordered_root_lock; |
| 1444 | 1455 | ||
| 1445 | /* | 1456 | /* |
| 1446 | * all of the data=ordered extents pending writeback | 1457 | * all fs/file tree roots in which there are data=ordered extents |
| 1458 | * pending writeback are added into this list. | ||
| 1459 | * | ||
| 1447 | * these can span multiple transactions and basically include | 1460 | * these can span multiple transactions and basically include |
| 1448 | * every dirty data page that isn't from nodatacow | 1461 | * every dirty data page that isn't from nodatacow |
| 1449 | */ | 1462 | */ |
| 1450 | struct list_head ordered_extents; | 1463 | struct list_head ordered_roots; |
| 1451 | 1464 | ||
| 1452 | spinlock_t delalloc_lock; | 1465 | spinlock_t delalloc_root_lock; |
| 1453 | /* | 1466 | /* all fs/file tree roots that have delalloc inodes. */ |
| 1454 | * all of the inodes that have delalloc bytes. It is possible for | 1467 | struct list_head delalloc_roots; |
| 1455 | * this list to be empty even when there is still dirty data=ordered | ||
| 1456 | * extents waiting to finish IO. | ||
| 1457 | */ | ||
| 1458 | struct list_head delalloc_inodes; | ||
| 1459 | 1468 | ||
| 1460 | /* | 1469 | /* |
| 1461 | * there is a pool of worker threads for checksumming during writes | 1470 | * there is a pool of worker threads for checksumming during writes |
| @@ -1498,8 +1507,6 @@ struct btrfs_fs_info { | |||
| 1498 | int do_barriers; | 1507 | int do_barriers; |
| 1499 | int closing; | 1508 | int closing; |
| 1500 | int log_root_recovering; | 1509 | int log_root_recovering; |
| 1501 | int enospc_unlink; | ||
| 1502 | int trans_no_join; | ||
| 1503 | 1510 | ||
| 1504 | u64 total_pinned; | 1511 | u64 total_pinned; |
| 1505 | 1512 | ||
| @@ -1594,6 +1601,12 @@ struct btrfs_fs_info { | |||
| 1594 | struct rb_root qgroup_tree; | 1601 | struct rb_root qgroup_tree; |
| 1595 | spinlock_t qgroup_lock; | 1602 | spinlock_t qgroup_lock; |
| 1596 | 1603 | ||
| 1604 | /* | ||
| 1605 | * used to avoid frequently calling ulist_alloc()/ulist_free() | ||
| 1606 | * when doing qgroup accounting, it must be protected by qgroup_lock. | ||
| 1607 | */ | ||
| 1608 | struct ulist *qgroup_ulist; | ||
| 1609 | |||
| 1597 | /* protect user change for quota operations */ | 1610 | /* protect user change for quota operations */ |
| 1598 | struct mutex qgroup_ioctl_lock; | 1611 | struct mutex qgroup_ioctl_lock; |
| 1599 | 1612 | ||
| @@ -1607,6 +1620,8 @@ struct btrfs_fs_info { | |||
| 1607 | struct mutex qgroup_rescan_lock; /* protects the progress item */ | 1620 | struct mutex qgroup_rescan_lock; /* protects the progress item */ |
| 1608 | struct btrfs_key qgroup_rescan_progress; | 1621 | struct btrfs_key qgroup_rescan_progress; |
| 1609 | struct btrfs_workers qgroup_rescan_workers; | 1622 | struct btrfs_workers qgroup_rescan_workers; |
| 1623 | struct completion qgroup_rescan_completion; | ||
| 1624 | struct btrfs_work qgroup_rescan_work; | ||
| 1610 | 1625 | ||
| 1611 | /* filesystem state */ | 1626 | /* filesystem state */ |
| 1612 | unsigned long fs_state; | 1627 | unsigned long fs_state; |
| @@ -1739,6 +1754,31 @@ struct btrfs_root { | |||
| 1739 | int force_cow; | 1754 | int force_cow; |
| 1740 | 1755 | ||
| 1741 | spinlock_t root_item_lock; | 1756 | spinlock_t root_item_lock; |
| 1757 | atomic_t refs; | ||
| 1758 | |||
| 1759 | spinlock_t delalloc_lock; | ||
| 1760 | /* | ||
| 1761 | * all of the inodes that have delalloc bytes. It is possible for | ||
| 1762 | * this list to be empty even when there is still dirty data=ordered | ||
| 1763 | * extents waiting to finish IO. | ||
| 1764 | */ | ||
| 1765 | struct list_head delalloc_inodes; | ||
| 1766 | struct list_head delalloc_root; | ||
| 1767 | u64 nr_delalloc_inodes; | ||
| 1768 | /* | ||
| 1769 | * this is used by the balancing code to wait for all the pending | ||
| 1770 | * ordered extents | ||
| 1771 | */ | ||
| 1772 | spinlock_t ordered_extent_lock; | ||
| 1773 | |||
| 1774 | /* | ||
| 1775 | * all of the data=ordered extents pending writeback | ||
| 1776 | * these can span multiple transactions and basically include | ||
| 1777 | * every dirty data page that isn't from nodatacow | ||
| 1778 | */ | ||
| 1779 | struct list_head ordered_extents; | ||
| 1780 | struct list_head ordered_root; | ||
| 1781 | u64 nr_ordered_extents; | ||
| 1742 | }; | 1782 | }; |
| 1743 | 1783 | ||
| 1744 | struct btrfs_ioctl_defrag_range_args { | 1784 | struct btrfs_ioctl_defrag_range_args { |
| @@ -3028,6 +3068,8 @@ static inline u64 btrfs_calc_trunc_metadata_size(struct btrfs_root *root, | |||
| 3028 | num_items; | 3068 | num_items; |
| 3029 | } | 3069 | } |
| 3030 | 3070 | ||
| 3071 | int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans, | ||
| 3072 | struct btrfs_root *root); | ||
| 3031 | void btrfs_put_block_group(struct btrfs_block_group_cache *cache); | 3073 | void btrfs_put_block_group(struct btrfs_block_group_cache *cache); |
| 3032 | int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | 3074 | int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, |
| 3033 | struct btrfs_root *root, unsigned long count); | 3075 | struct btrfs_root *root, unsigned long count); |
| @@ -3039,6 +3081,8 @@ int btrfs_pin_extent(struct btrfs_root *root, | |||
| 3039 | u64 bytenr, u64 num, int reserved); | 3081 | u64 bytenr, u64 num, int reserved); |
| 3040 | int btrfs_pin_extent_for_log_replay(struct btrfs_root *root, | 3082 | int btrfs_pin_extent_for_log_replay(struct btrfs_root *root, |
| 3041 | u64 bytenr, u64 num_bytes); | 3083 | u64 bytenr, u64 num_bytes); |
| 3084 | int btrfs_exclude_logged_extents(struct btrfs_root *root, | ||
| 3085 | struct extent_buffer *eb); | ||
| 3042 | int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, | 3086 | int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, |
| 3043 | struct btrfs_root *root, | 3087 | struct btrfs_root *root, |
| 3044 | u64 objectid, u64 offset, u64 bytenr); | 3088 | u64 objectid, u64 offset, u64 bytenr); |
| @@ -3155,6 +3199,9 @@ int btrfs_block_rsv_refill(struct btrfs_root *root, | |||
| 3155 | int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, | 3199 | int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, |
| 3156 | struct btrfs_block_rsv *dst_rsv, | 3200 | struct btrfs_block_rsv *dst_rsv, |
| 3157 | u64 num_bytes); | 3201 | u64 num_bytes); |
| 3202 | int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info, | ||
| 3203 | struct btrfs_block_rsv *dest, u64 num_bytes, | ||
| 3204 | int min_factor); | ||
| 3158 | void btrfs_block_rsv_release(struct btrfs_root *root, | 3205 | void btrfs_block_rsv_release(struct btrfs_root *root, |
| 3159 | struct btrfs_block_rsv *block_rsv, | 3206 | struct btrfs_block_rsv *block_rsv, |
| 3160 | u64 num_bytes); | 3207 | u64 num_bytes); |
| @@ -3311,6 +3358,18 @@ static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info) | |||
| 3311 | smp_mb(); | 3358 | smp_mb(); |
| 3312 | return fs_info->closing; | 3359 | return fs_info->closing; |
| 3313 | } | 3360 | } |
| 3361 | |||
| 3362 | /* | ||
| 3363 | * If we remount the fs to be R/O or umount the fs, the cleaner needn't do | ||
| 3364 | * anything except sleeping. This function is used to check the status of | ||
| 3365 | * the fs. | ||
| 3366 | */ | ||
| 3367 | static inline int btrfs_need_cleaner_sleep(struct btrfs_root *root) | ||
| 3368 | { | ||
| 3369 | return (root->fs_info->sb->s_flags & MS_RDONLY || | ||
| 3370 | btrfs_fs_closing(root->fs_info)); | ||
| 3371 | } | ||
| 3372 | |||
| 3314 | static inline void free_fs_info(struct btrfs_fs_info *fs_info) | 3373 | static inline void free_fs_info(struct btrfs_fs_info *fs_info) |
| 3315 | { | 3374 | { |
| 3316 | kfree(fs_info->balance_ctl); | 3375 | kfree(fs_info->balance_ctl); |
| @@ -3357,9 +3416,9 @@ int __must_check btrfs_update_root(struct btrfs_trans_handle *trans, | |||
| 3357 | struct btrfs_root_item *item); | 3416 | struct btrfs_root_item *item); |
| 3358 | void btrfs_read_root_item(struct extent_buffer *eb, int slot, | 3417 | void btrfs_read_root_item(struct extent_buffer *eb, int slot, |
| 3359 | struct btrfs_root_item *item); | 3418 | struct btrfs_root_item *item); |
| 3360 | int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct | 3419 | int btrfs_find_root(struct btrfs_root *root, struct btrfs_key *search_key, |
| 3361 | btrfs_root_item *item, struct btrfs_key *key); | 3420 | struct btrfs_path *path, struct btrfs_root_item *root_item, |
| 3362 | int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid); | 3421 | struct btrfs_key *root_key); |
| 3363 | int btrfs_find_orphan_roots(struct btrfs_root *tree_root); | 3422 | int btrfs_find_orphan_roots(struct btrfs_root *tree_root); |
| 3364 | void btrfs_set_root_node(struct btrfs_root_item *item, | 3423 | void btrfs_set_root_node(struct btrfs_root_item *item, |
| 3365 | struct extent_buffer *node); | 3424 | struct extent_buffer *node); |
| @@ -3493,6 +3552,10 @@ void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work); | |||
| 3493 | struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page, | 3552 | struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page, |
| 3494 | size_t pg_offset, u64 start, u64 len, | 3553 | size_t pg_offset, u64 start, u64 len, |
| 3495 | int create); | 3554 | int create); |
| 3555 | noinline int can_nocow_extent(struct btrfs_trans_handle *trans, | ||
| 3556 | struct inode *inode, u64 offset, u64 *len, | ||
| 3557 | u64 *orig_start, u64 *orig_block_len, | ||
| 3558 | u64 *ram_bytes); | ||
| 3496 | 3559 | ||
| 3497 | /* RHEL and EL kernels have a patch that renames PG_checked to FsMisc */ | 3560 | /* RHEL and EL kernels have a patch that renames PG_checked to FsMisc */ |
| 3498 | #if defined(ClearPageFsMisc) && !defined(ClearPageChecked) | 3561 | #if defined(ClearPageFsMisc) && !defined(ClearPageChecked) |
| @@ -3530,6 +3593,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, | |||
| 3530 | u32 min_type); | 3593 | u32 min_type); |
| 3531 | 3594 | ||
| 3532 | int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); | 3595 | int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); |
| 3596 | int btrfs_start_all_delalloc_inodes(struct btrfs_fs_info *fs_info, | ||
| 3597 | int delay_iput); | ||
| 3533 | int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, | 3598 | int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, |
| 3534 | struct extent_state **cached_state); | 3599 | struct extent_state **cached_state); |
| 3535 | int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, | 3600 | int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, |
| @@ -3814,6 +3879,8 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans, | |||
| 3814 | int btrfs_quota_disable(struct btrfs_trans_handle *trans, | 3879 | int btrfs_quota_disable(struct btrfs_trans_handle *trans, |
| 3815 | struct btrfs_fs_info *fs_info); | 3880 | struct btrfs_fs_info *fs_info); |
| 3816 | int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info); | 3881 | int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info); |
| 3882 | void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info); | ||
| 3883 | int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info); | ||
| 3817 | int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, | 3884 | int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, |
| 3818 | struct btrfs_fs_info *fs_info, u64 src, u64 dst); | 3885 | struct btrfs_fs_info *fs_info, u64 src, u64 dst); |
| 3819 | int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, | 3886 | int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, |
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index eb34438ddedb..375510913fe7 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c | |||
| @@ -535,20 +535,6 @@ static struct btrfs_delayed_item *__btrfs_next_delayed_item( | |||
| 535 | return next; | 535 | return next; |
| 536 | } | 536 | } |
| 537 | 537 | ||
| 538 | static inline struct btrfs_root *btrfs_get_fs_root(struct btrfs_root *root, | ||
| 539 | u64 root_id) | ||
| 540 | { | ||
| 541 | struct btrfs_key root_key; | ||
| 542 | |||
| 543 | if (root->objectid == root_id) | ||
| 544 | return root; | ||
| 545 | |||
| 546 | root_key.objectid = root_id; | ||
| 547 | root_key.type = BTRFS_ROOT_ITEM_KEY; | ||
| 548 | root_key.offset = (u64)-1; | ||
| 549 | return btrfs_read_fs_root_no_name(root->fs_info, &root_key); | ||
| 550 | } | ||
| 551 | |||
| 552 | static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans, | 538 | static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans, |
| 553 | struct btrfs_root *root, | 539 | struct btrfs_root *root, |
| 554 | struct btrfs_delayed_item *item) | 540 | struct btrfs_delayed_item *item) |
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 65241f32d3f8..4253ad580e39 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c | |||
| @@ -400,7 +400,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root, | |||
| 400 | args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR; | 400 | args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR; |
| 401 | btrfs_dev_replace_unlock(dev_replace); | 401 | btrfs_dev_replace_unlock(dev_replace); |
| 402 | 402 | ||
| 403 | btrfs_wait_ordered_extents(root, 0); | 403 | btrfs_wait_all_ordered_extents(root->fs_info, 0); |
| 404 | 404 | ||
| 405 | /* force writing the updated state information to disk */ | 405 | /* force writing the updated state information to disk */ |
| 406 | trans = btrfs_start_transaction(root, 0); | 406 | trans = btrfs_start_transaction(root, 0); |
| @@ -470,12 +470,12 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | |||
| 470 | * flush all outstanding I/O and inode extent mappings before the | 470 | * flush all outstanding I/O and inode extent mappings before the |
| 471 | * copy operation is declared as being finished | 471 | * copy operation is declared as being finished |
| 472 | */ | 472 | */ |
| 473 | ret = btrfs_start_delalloc_inodes(root, 0); | 473 | ret = btrfs_start_all_delalloc_inodes(root->fs_info, 0); |
| 474 | if (ret) { | 474 | if (ret) { |
| 475 | mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); | 475 | mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); |
| 476 | return ret; | 476 | return ret; |
| 477 | } | 477 | } |
| 478 | btrfs_wait_ordered_extents(root, 0); | 478 | btrfs_wait_all_ordered_extents(root->fs_info, 0); |
| 479 | 479 | ||
| 480 | trans = btrfs_start_transaction(root, 0); | 480 | trans = btrfs_start_transaction(root, 0); |
| 481 | if (IS_ERR(trans)) { | 481 | if (IS_ERR(trans)) { |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b0292b3ead54..6b092a1c4e37 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
| @@ -1192,6 +1192,8 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
| 1192 | root->objectid = objectid; | 1192 | root->objectid = objectid; |
| 1193 | root->last_trans = 0; | 1193 | root->last_trans = 0; |
| 1194 | root->highest_objectid = 0; | 1194 | root->highest_objectid = 0; |
| 1195 | root->nr_delalloc_inodes = 0; | ||
| 1196 | root->nr_ordered_extents = 0; | ||
| 1195 | root->name = NULL; | 1197 | root->name = NULL; |
| 1196 | root->inode_tree = RB_ROOT; | 1198 | root->inode_tree = RB_ROOT; |
| 1197 | INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC); | 1199 | INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC); |
| @@ -1200,10 +1202,16 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
| 1200 | 1202 | ||
| 1201 | INIT_LIST_HEAD(&root->dirty_list); | 1203 | INIT_LIST_HEAD(&root->dirty_list); |
| 1202 | INIT_LIST_HEAD(&root->root_list); | 1204 | INIT_LIST_HEAD(&root->root_list); |
| 1205 | INIT_LIST_HEAD(&root->delalloc_inodes); | ||
| 1206 | INIT_LIST_HEAD(&root->delalloc_root); | ||
| 1207 | INIT_LIST_HEAD(&root->ordered_extents); | ||
| 1208 | INIT_LIST_HEAD(&root->ordered_root); | ||
| 1203 | INIT_LIST_HEAD(&root->logged_list[0]); | 1209 | INIT_LIST_HEAD(&root->logged_list[0]); |
| 1204 | INIT_LIST_HEAD(&root->logged_list[1]); | 1210 | INIT_LIST_HEAD(&root->logged_list[1]); |
| 1205 | spin_lock_init(&root->orphan_lock); | 1211 | spin_lock_init(&root->orphan_lock); |
| 1206 | spin_lock_init(&root->inode_lock); | 1212 | spin_lock_init(&root->inode_lock); |
| 1213 | spin_lock_init(&root->delalloc_lock); | ||
| 1214 | spin_lock_init(&root->ordered_extent_lock); | ||
| 1207 | spin_lock_init(&root->accounting_lock); | 1215 | spin_lock_init(&root->accounting_lock); |
| 1208 | spin_lock_init(&root->log_extents_lock[0]); | 1216 | spin_lock_init(&root->log_extents_lock[0]); |
| 1209 | spin_lock_init(&root->log_extents_lock[1]); | 1217 | spin_lock_init(&root->log_extents_lock[1]); |
| @@ -1217,6 +1225,7 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
| 1217 | atomic_set(&root->log_writers, 0); | 1225 | atomic_set(&root->log_writers, 0); |
| 1218 | atomic_set(&root->log_batch, 0); | 1226 | atomic_set(&root->log_batch, 0); |
| 1219 | atomic_set(&root->orphan_inodes, 0); | 1227 | atomic_set(&root->orphan_inodes, 0); |
| 1228 | atomic_set(&root->refs, 1); | ||
| 1220 | root->log_transid = 0; | 1229 | root->log_transid = 0; |
| 1221 | root->last_log_commit = 0; | 1230 | root->last_log_commit = 0; |
| 1222 | extent_io_tree_init(&root->dirty_log_pages, | 1231 | extent_io_tree_init(&root->dirty_log_pages, |
| @@ -1235,39 +1244,6 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
| 1235 | spin_lock_init(&root->root_item_lock); | 1244 | spin_lock_init(&root->root_item_lock); |
| 1236 | } | 1245 | } |
| 1237 | 1246 | ||
| 1238 | static int __must_check find_and_setup_root(struct btrfs_root *tree_root, | ||
| 1239 | struct btrfs_fs_info *fs_info, | ||
| 1240 | u64 objectid, | ||
| 1241 | struct btrfs_root *root) | ||
| 1242 | { | ||
| 1243 | int ret; | ||
| 1244 | u32 blocksize; | ||
| 1245 | u64 generation; | ||
| 1246 | |||
| 1247 | __setup_root(tree_root->nodesize, tree_root->leafsize, | ||
| 1248 | tree_root->sectorsize, tree_root->stripesize, | ||
| 1249 | root, fs_info, objectid); | ||
| 1250 | ret = btrfs_find_last_root(tree_root, objectid, | ||
| 1251 | &root->root_item, &root->root_key); | ||
| 1252 | if (ret > 0) | ||
| 1253 | return -ENOENT; | ||
| 1254 | else if (ret < 0) | ||
| 1255 | return ret; | ||
| 1256 | |||
| 1257 | generation = btrfs_root_generation(&root->root_item); | ||
| 1258 | blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); | ||
| 1259 | root->commit_root = NULL; | ||
| 1260 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), | ||
| 1261 | blocksize, generation); | ||
| 1262 | if (!root->node || !btrfs_buffer_uptodate(root->node, generation, 0)) { | ||
| 1263 | free_extent_buffer(root->node); | ||
| 1264 | root->node = NULL; | ||
| 1265 | return -EIO; | ||
| 1266 | } | ||
| 1267 | root->commit_root = btrfs_root_node(root); | ||
| 1268 | return 0; | ||
| 1269 | } | ||
| 1270 | |||
| 1271 | static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info) | 1247 | static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info) |
| 1272 | { | 1248 | { |
| 1273 | struct btrfs_root *root = kzalloc(sizeof(*root), GFP_NOFS); | 1249 | struct btrfs_root *root = kzalloc(sizeof(*root), GFP_NOFS); |
| @@ -1452,70 +1428,73 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans, | |||
| 1452 | return 0; | 1428 | return 0; |
| 1453 | } | 1429 | } |
| 1454 | 1430 | ||
| 1455 | struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, | 1431 | struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, |
| 1456 | struct btrfs_key *location) | 1432 | struct btrfs_key *key) |
| 1457 | { | 1433 | { |
| 1458 | struct btrfs_root *root; | 1434 | struct btrfs_root *root; |
| 1459 | struct btrfs_fs_info *fs_info = tree_root->fs_info; | 1435 | struct btrfs_fs_info *fs_info = tree_root->fs_info; |
| 1460 | struct btrfs_path *path; | 1436 | struct btrfs_path *path; |
| 1461 | struct extent_buffer *l; | ||
| 1462 | u64 generation; | 1437 | u64 generation; |
| 1463 | u32 blocksize; | 1438 | u32 blocksize; |
| 1464 | int ret = 0; | 1439 | int ret; |
| 1465 | int slot; | ||
| 1466 | 1440 | ||
| 1467 | root = btrfs_alloc_root(fs_info); | 1441 | path = btrfs_alloc_path(); |
| 1468 | if (!root) | 1442 | if (!path) |
| 1469 | return ERR_PTR(-ENOMEM); | 1443 | return ERR_PTR(-ENOMEM); |
| 1470 | if (location->offset == (u64)-1) { | 1444 | |
| 1471 | ret = find_and_setup_root(tree_root, fs_info, | 1445 | root = btrfs_alloc_root(fs_info); |
| 1472 | location->objectid, root); | 1446 | if (!root) { |
| 1473 | if (ret) { | 1447 | ret = -ENOMEM; |
| 1474 | kfree(root); | 1448 | goto alloc_fail; |
| 1475 | return ERR_PTR(ret); | ||
| 1476 | } | ||
| 1477 | goto out; | ||
| 1478 | } | 1449 | } |
| 1479 | 1450 | ||
| 1480 | __setup_root(tree_root->nodesize, tree_root->leafsize, | 1451 | __setup_root(tree_root->nodesize, tree_root->leafsize, |
| 1481 | tree_root->sectorsize, tree_root->stripesize, | 1452 | tree_root->sectorsize, tree_root->stripesize, |
| 1482 | root, fs_info, location->objectid); | 1453 | root, fs_info, key->objectid); |
| 1483 | 1454 | ||
| 1484 | path = btrfs_alloc_path(); | 1455 | ret = btrfs_find_root(tree_root, key, path, |
| 1485 | if (!path) { | 1456 | &root->root_item, &root->root_key); |
| 1486 | kfree(root); | ||
| 1487 | return ERR_PTR(-ENOMEM); | ||
| 1488 | } | ||
| 1489 | ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0); | ||
| 1490 | if (ret == 0) { | ||
| 1491 | l = path->nodes[0]; | ||
| 1492 | slot = path->slots[0]; | ||
| 1493 | btrfs_read_root_item(l, slot, &root->root_item); | ||
| 1494 | memcpy(&root->root_key, location, sizeof(*location)); | ||
| 1495 | } | ||
| 1496 | btrfs_free_path(path); | ||
| 1497 | if (ret) { | 1457 | if (ret) { |
| 1498 | kfree(root); | ||
| 1499 | if (ret > 0) | 1458 | if (ret > 0) |
| 1500 | ret = -ENOENT; | 1459 | ret = -ENOENT; |
| 1501 | return ERR_PTR(ret); | 1460 | goto find_fail; |
| 1502 | } | 1461 | } |
| 1503 | 1462 | ||
| 1504 | generation = btrfs_root_generation(&root->root_item); | 1463 | generation = btrfs_root_generation(&root->root_item); |
| 1505 | blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); | 1464 | blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); |
| 1506 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), | 1465 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), |
| 1507 | blocksize, generation); | 1466 | blocksize, generation); |
| 1508 | if (!root->node || !extent_buffer_uptodate(root->node)) { | 1467 | if (!root->node) { |
| 1509 | ret = (!root->node) ? -ENOMEM : -EIO; | 1468 | ret = -ENOMEM; |
| 1510 | 1469 | goto find_fail; | |
| 1511 | free_extent_buffer(root->node); | 1470 | } else if (!btrfs_buffer_uptodate(root->node, generation, 0)) { |
| 1512 | kfree(root); | 1471 | ret = -EIO; |
| 1513 | return ERR_PTR(ret); | 1472 | goto read_fail; |
| 1514 | } | 1473 | } |
| 1515 | |||
| 1516 | root->commit_root = btrfs_root_node(root); | 1474 | root->commit_root = btrfs_root_node(root); |
| 1517 | out: | 1475 | out: |
| 1518 | if (location->objectid != BTRFS_TREE_LOG_OBJECTID) { | 1476 | btrfs_free_path(path); |
| 1477 | return root; | ||
| 1478 | |||
| 1479 | read_fail: | ||
| 1480 | free_extent_buffer(root->node); | ||
| 1481 | find_fail: | ||
| 1482 | kfree(root); | ||
| 1483 | alloc_fail: | ||
| 1484 | root = ERR_PTR(ret); | ||
| 1485 | goto out; | ||
| 1486 | } | ||
| 1487 | |||
| 1488 | struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root, | ||
| 1489 | struct btrfs_key *location) | ||
| 1490 | { | ||
| 1491 | struct btrfs_root *root; | ||
| 1492 | |||
| 1493 | root = btrfs_read_tree_root(tree_root, location); | ||
| 1494 | if (IS_ERR(root)) | ||
| 1495 | return root; | ||
| 1496 | |||
| 1497 | if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { | ||
| 1519 | root->ref_cows = 1; | 1498 | root->ref_cows = 1; |
| 1520 | btrfs_check_and_init_root_item(&root->root_item); | 1499 | btrfs_check_and_init_root_item(&root->root_item); |
| 1521 | } | 1500 | } |
| @@ -1523,6 +1502,66 @@ out: | |||
| 1523 | return root; | 1502 | return root; |
| 1524 | } | 1503 | } |
| 1525 | 1504 | ||
| 1505 | int btrfs_init_fs_root(struct btrfs_root *root) | ||
| 1506 | { | ||
| 1507 | int ret; | ||
| 1508 | |||
| 1509 | root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS); | ||
| 1510 | root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned), | ||
| 1511 | GFP_NOFS); | ||
| 1512 | if (!root->free_ino_pinned || !root->free_ino_ctl) { | ||
| 1513 | ret = -ENOMEM; | ||
| 1514 | goto fail; | ||
| 1515 | } | ||
| 1516 | |||
| 1517 | btrfs_init_free_ino_ctl(root); | ||
| 1518 | mutex_init(&root->fs_commit_mutex); | ||
| 1519 | spin_lock_init(&root->cache_lock); | ||
| 1520 | init_waitqueue_head(&root->cache_wait); | ||
| 1521 | |||
| 1522 | ret = get_anon_bdev(&root->anon_dev); | ||
| 1523 | if (ret) | ||
| 1524 | goto fail; | ||
| 1525 | return 0; | ||
| 1526 | fail: | ||
| 1527 | kfree(root->free_ino_ctl); | ||
| 1528 | kfree(root->free_ino_pinned); | ||
| 1529 | return ret; | ||
| 1530 | } | ||
| 1531 | |||
| 1532 | struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, | ||
| 1533 | u64 root_id) | ||
| 1534 | { | ||
| 1535 | struct btrfs_root *root; | ||
| 1536 | |||
| 1537 | spin_lock(&fs_info->fs_roots_radix_lock); | ||
| 1538 | root = radix_tree_lookup(&fs_info->fs_roots_radix, | ||
| 1539 | (unsigned long)root_id); | ||
| 1540 | spin_unlock(&fs_info->fs_roots_radix_lock); | ||
| 1541 | return root; | ||
| 1542 | } | ||
| 1543 | |||
| 1544 | int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info, | ||
| 1545 | struct btrfs_root *root) | ||
| 1546 | { | ||
| 1547 | int ret; | ||
| 1548 | |||
| 1549 | ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); | ||
| 1550 | if (ret) | ||
| 1551 | return ret; | ||
| 1552 | |||
| 1553 | spin_lock(&fs_info->fs_roots_radix_lock); | ||
| 1554 | ret = radix_tree_insert(&fs_info->fs_roots_radix, | ||
| 1555 | (unsigned long)root->root_key.objectid, | ||
| 1556 | root); | ||
| 1557 | if (ret == 0) | ||
| 1558 | root->in_radix = 1; | ||
| 1559 | spin_unlock(&fs_info->fs_roots_radix_lock); | ||
| 1560 | radix_tree_preload_end(); | ||
| 1561 | |||
| 1562 | return ret; | ||
| 1563 | } | ||
| 1564 | |||
| 1526 | struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, | 1565 | struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, |
| 1527 | struct btrfs_key *location) | 1566 | struct btrfs_key *location) |
| 1528 | { | 1567 | { |
| @@ -1543,58 +1582,30 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, | |||
| 1543 | return fs_info->quota_root ? fs_info->quota_root : | 1582 | return fs_info->quota_root ? fs_info->quota_root : |
| 1544 | ERR_PTR(-ENOENT); | 1583 | ERR_PTR(-ENOENT); |
| 1545 | again: | 1584 | again: |
| 1546 | spin_lock(&fs_info->fs_roots_radix_lock); | 1585 | root = btrfs_lookup_fs_root(fs_info, location->objectid); |
| 1547 | root = radix_tree_lookup(&fs_info->fs_roots_radix, | ||
| 1548 | (unsigned long)location->objectid); | ||
| 1549 | spin_unlock(&fs_info->fs_roots_radix_lock); | ||
| 1550 | if (root) | 1586 | if (root) |
| 1551 | return root; | 1587 | return root; |
| 1552 | 1588 | ||
| 1553 | root = btrfs_read_fs_root_no_radix(fs_info->tree_root, location); | 1589 | root = btrfs_read_fs_root(fs_info->tree_root, location); |
| 1554 | if (IS_ERR(root)) | 1590 | if (IS_ERR(root)) |
| 1555 | return root; | 1591 | return root; |
| 1556 | 1592 | ||
| 1557 | root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS); | 1593 | if (btrfs_root_refs(&root->root_item) == 0) { |
| 1558 | root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned), | 1594 | ret = -ENOENT; |
| 1559 | GFP_NOFS); | ||
| 1560 | if (!root->free_ino_pinned || !root->free_ino_ctl) { | ||
| 1561 | ret = -ENOMEM; | ||
| 1562 | goto fail; | 1595 | goto fail; |
| 1563 | } | 1596 | } |
| 1564 | 1597 | ||
| 1565 | btrfs_init_free_ino_ctl(root); | 1598 | ret = btrfs_init_fs_root(root); |
| 1566 | mutex_init(&root->fs_commit_mutex); | ||
| 1567 | spin_lock_init(&root->cache_lock); | ||
| 1568 | init_waitqueue_head(&root->cache_wait); | ||
| 1569 | |||
| 1570 | ret = get_anon_bdev(&root->anon_dev); | ||
| 1571 | if (ret) | 1599 | if (ret) |
| 1572 | goto fail; | 1600 | goto fail; |
| 1573 | 1601 | ||
| 1574 | if (btrfs_root_refs(&root->root_item) == 0) { | ||
| 1575 | ret = -ENOENT; | ||
| 1576 | goto fail; | ||
| 1577 | } | ||
| 1578 | |||
| 1579 | ret = btrfs_find_orphan_item(fs_info->tree_root, location->objectid); | 1602 | ret = btrfs_find_orphan_item(fs_info->tree_root, location->objectid); |
| 1580 | if (ret < 0) | 1603 | if (ret < 0) |
| 1581 | goto fail; | 1604 | goto fail; |
| 1582 | if (ret == 0) | 1605 | if (ret == 0) |
| 1583 | root->orphan_item_inserted = 1; | 1606 | root->orphan_item_inserted = 1; |
| 1584 | 1607 | ||
| 1585 | ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); | 1608 | ret = btrfs_insert_fs_root(fs_info, root); |
| 1586 | if (ret) | ||
| 1587 | goto fail; | ||
| 1588 | |||
| 1589 | spin_lock(&fs_info->fs_roots_radix_lock); | ||
| 1590 | ret = radix_tree_insert(&fs_info->fs_roots_radix, | ||
| 1591 | (unsigned long)root->root_key.objectid, | ||
| 1592 | root); | ||
| 1593 | if (ret == 0) | ||
| 1594 | root->in_radix = 1; | ||
| 1595 | |||
| 1596 | spin_unlock(&fs_info->fs_roots_radix_lock); | ||
| 1597 | radix_tree_preload_end(); | ||
| 1598 | if (ret) { | 1609 | if (ret) { |
| 1599 | if (ret == -EEXIST) { | 1610 | if (ret == -EEXIST) { |
| 1600 | free_fs_root(root); | 1611 | free_fs_root(root); |
| @@ -1602,10 +1613,6 @@ again: | |||
| 1602 | } | 1613 | } |
| 1603 | goto fail; | 1614 | goto fail; |
| 1604 | } | 1615 | } |
| 1605 | |||
| 1606 | ret = btrfs_find_dead_roots(fs_info->tree_root, | ||
| 1607 | root->root_key.objectid); | ||
| 1608 | WARN_ON(ret); | ||
| 1609 | return root; | 1616 | return root; |
| 1610 | fail: | 1617 | fail: |
| 1611 | free_fs_root(root); | 1618 | free_fs_root(root); |
| @@ -1677,21 +1684,37 @@ static void end_workqueue_fn(struct btrfs_work *work) | |||
| 1677 | static int cleaner_kthread(void *arg) | 1684 | static int cleaner_kthread(void *arg) |
| 1678 | { | 1685 | { |
| 1679 | struct btrfs_root *root = arg; | 1686 | struct btrfs_root *root = arg; |
| 1687 | int again; | ||
| 1680 | 1688 | ||
| 1681 | do { | 1689 | do { |
| 1682 | int again = 0; | 1690 | again = 0; |
| 1683 | 1691 | ||
| 1684 | if (!(root->fs_info->sb->s_flags & MS_RDONLY) && | 1692 | /* Make the cleaner go to sleep early. */ |
| 1685 | down_read_trylock(&root->fs_info->sb->s_umount)) { | 1693 | if (btrfs_need_cleaner_sleep(root)) |
| 1686 | if (mutex_trylock(&root->fs_info->cleaner_mutex)) { | 1694 | goto sleep; |
| 1687 | btrfs_run_delayed_iputs(root); | 1695 | |
| 1688 | again = btrfs_clean_one_deleted_snapshot(root); | 1696 | if (!mutex_trylock(&root->fs_info->cleaner_mutex)) |
| 1689 | mutex_unlock(&root->fs_info->cleaner_mutex); | 1697 | goto sleep; |
| 1690 | } | 1698 | |
| 1691 | btrfs_run_defrag_inodes(root->fs_info); | 1699 | /* |
| 1692 | up_read(&root->fs_info->sb->s_umount); | 1700 | * Avoid the problem that we change the status of the fs |
| 1701 | * during the above check and trylock. | ||
| 1702 | */ | ||
| 1703 | if (btrfs_need_cleaner_sleep(root)) { | ||
| 1704 | mutex_unlock(&root->fs_info->cleaner_mutex); | ||
| 1705 | goto sleep; | ||
| 1693 | } | 1706 | } |
| 1694 | 1707 | ||
| 1708 | btrfs_run_delayed_iputs(root); | ||
| 1709 | again = btrfs_clean_one_deleted_snapshot(root); | ||
| 1710 | mutex_unlock(&root->fs_info->cleaner_mutex); | ||
| 1711 | |||
| 1712 | /* | ||
| 1713 | * The defragger has dealt with the R/O remount and umount, | ||
| 1714 | * needn't do anything special here. | ||
| 1715 | */ | ||
| 1716 | btrfs_run_defrag_inodes(root->fs_info); | ||
| 1717 | sleep: | ||
| 1695 | if (!try_to_freeze() && !again) { | 1718 | if (!try_to_freeze() && !again) { |
| 1696 | set_current_state(TASK_INTERRUPTIBLE); | 1719 | set_current_state(TASK_INTERRUPTIBLE); |
| 1697 | if (!kthread_should_stop()) | 1720 | if (!kthread_should_stop()) |
| @@ -1725,7 +1748,7 @@ static int transaction_kthread(void *arg) | |||
| 1725 | } | 1748 | } |
| 1726 | 1749 | ||
| 1727 | now = get_seconds(); | 1750 | now = get_seconds(); |
| 1728 | if (!cur->blocked && | 1751 | if (cur->state < TRANS_STATE_BLOCKED && |
| 1729 | (now < cur->start_time || now - cur->start_time < 30)) { | 1752 | (now < cur->start_time || now - cur->start_time < 30)) { |
| 1730 | spin_unlock(&root->fs_info->trans_lock); | 1753 | spin_unlock(&root->fs_info->trans_lock); |
| 1731 | delay = HZ * 5; | 1754 | delay = HZ * 5; |
| @@ -2035,11 +2058,11 @@ static void del_fs_roots(struct btrfs_fs_info *fs_info) | |||
| 2035 | list_del(&gang[0]->root_list); | 2058 | list_del(&gang[0]->root_list); |
| 2036 | 2059 | ||
| 2037 | if (gang[0]->in_radix) { | 2060 | if (gang[0]->in_radix) { |
| 2038 | btrfs_free_fs_root(fs_info, gang[0]); | 2061 | btrfs_drop_and_free_fs_root(fs_info, gang[0]); |
| 2039 | } else { | 2062 | } else { |
| 2040 | free_extent_buffer(gang[0]->node); | 2063 | free_extent_buffer(gang[0]->node); |
| 2041 | free_extent_buffer(gang[0]->commit_root); | 2064 | free_extent_buffer(gang[0]->commit_root); |
| 2042 | kfree(gang[0]); | 2065 | btrfs_put_fs_root(gang[0]); |
| 2043 | } | 2066 | } |
| 2044 | } | 2067 | } |
| 2045 | 2068 | ||
| @@ -2050,7 +2073,7 @@ static void del_fs_roots(struct btrfs_fs_info *fs_info) | |||
| 2050 | if (!ret) | 2073 | if (!ret) |
| 2051 | break; | 2074 | break; |
| 2052 | for (i = 0; i < ret; i++) | 2075 | for (i = 0; i < ret; i++) |
| 2053 | btrfs_free_fs_root(fs_info, gang[i]); | 2076 | btrfs_drop_and_free_fs_root(fs_info, gang[i]); |
| 2054 | } | 2077 | } |
| 2055 | } | 2078 | } |
| 2056 | 2079 | ||
| @@ -2082,14 +2105,8 @@ int open_ctree(struct super_block *sb, | |||
| 2082 | int backup_index = 0; | 2105 | int backup_index = 0; |
| 2083 | 2106 | ||
| 2084 | tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info); | 2107 | tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info); |
| 2085 | extent_root = fs_info->extent_root = btrfs_alloc_root(fs_info); | ||
| 2086 | csum_root = fs_info->csum_root = btrfs_alloc_root(fs_info); | ||
| 2087 | chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info); | 2108 | chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info); |
| 2088 | dev_root = fs_info->dev_root = btrfs_alloc_root(fs_info); | 2109 | if (!tree_root || !chunk_root) { |
| 2089 | quota_root = fs_info->quota_root = btrfs_alloc_root(fs_info); | ||
| 2090 | |||
| 2091 | if (!tree_root || !extent_root || !csum_root || | ||
| 2092 | !chunk_root || !dev_root || !quota_root) { | ||
| 2093 | err = -ENOMEM; | 2110 | err = -ENOMEM; |
| 2094 | goto fail; | 2111 | goto fail; |
| 2095 | } | 2112 | } |
| @@ -2132,9 +2149,9 @@ int open_ctree(struct super_block *sb, | |||
| 2132 | INIT_LIST_HEAD(&fs_info->trans_list); | 2149 | INIT_LIST_HEAD(&fs_info->trans_list); |
| 2133 | INIT_LIST_HEAD(&fs_info->dead_roots); | 2150 | INIT_LIST_HEAD(&fs_info->dead_roots); |
| 2134 | INIT_LIST_HEAD(&fs_info->delayed_iputs); | 2151 | INIT_LIST_HEAD(&fs_info->delayed_iputs); |
| 2135 | INIT_LIST_HEAD(&fs_info->delalloc_inodes); | 2152 | INIT_LIST_HEAD(&fs_info->delalloc_roots); |
| 2136 | INIT_LIST_HEAD(&fs_info->caching_block_groups); | 2153 | INIT_LIST_HEAD(&fs_info->caching_block_groups); |
| 2137 | spin_lock_init(&fs_info->delalloc_lock); | 2154 | spin_lock_init(&fs_info->delalloc_root_lock); |
| 2138 | spin_lock_init(&fs_info->trans_lock); | 2155 | spin_lock_init(&fs_info->trans_lock); |
| 2139 | spin_lock_init(&fs_info->fs_roots_radix_lock); | 2156 | spin_lock_init(&fs_info->fs_roots_radix_lock); |
| 2140 | spin_lock_init(&fs_info->delayed_iput_lock); | 2157 | spin_lock_init(&fs_info->delayed_iput_lock); |
| @@ -2170,7 +2187,6 @@ int open_ctree(struct super_block *sb, | |||
| 2170 | fs_info->max_inline = 8192 * 1024; | 2187 | fs_info->max_inline = 8192 * 1024; |
| 2171 | fs_info->metadata_ratio = 0; | 2188 | fs_info->metadata_ratio = 0; |
| 2172 | fs_info->defrag_inodes = RB_ROOT; | 2189 | fs_info->defrag_inodes = RB_ROOT; |
| 2173 | fs_info->trans_no_join = 0; | ||
| 2174 | fs_info->free_chunk_space = 0; | 2190 | fs_info->free_chunk_space = 0; |
| 2175 | fs_info->tree_mod_log = RB_ROOT; | 2191 | fs_info->tree_mod_log = RB_ROOT; |
| 2176 | 2192 | ||
| @@ -2181,8 +2197,8 @@ int open_ctree(struct super_block *sb, | |||
| 2181 | fs_info->thread_pool_size = min_t(unsigned long, | 2197 | fs_info->thread_pool_size = min_t(unsigned long, |
| 2182 | num_online_cpus() + 2, 8); | 2198 | num_online_cpus() + 2, 8); |
| 2183 | 2199 | ||
| 2184 | INIT_LIST_HEAD(&fs_info->ordered_extents); | 2200 | INIT_LIST_HEAD(&fs_info->ordered_roots); |
| 2185 | spin_lock_init(&fs_info->ordered_extent_lock); | 2201 | spin_lock_init(&fs_info->ordered_root_lock); |
| 2186 | fs_info->delayed_root = kmalloc(sizeof(struct btrfs_delayed_root), | 2202 | fs_info->delayed_root = kmalloc(sizeof(struct btrfs_delayed_root), |
| 2187 | GFP_NOFS); | 2203 | GFP_NOFS); |
| 2188 | if (!fs_info->delayed_root) { | 2204 | if (!fs_info->delayed_root) { |
| @@ -2275,6 +2291,7 @@ int open_ctree(struct super_block *sb, | |||
| 2275 | fs_info->qgroup_seq = 1; | 2291 | fs_info->qgroup_seq = 1; |
| 2276 | fs_info->quota_enabled = 0; | 2292 | fs_info->quota_enabled = 0; |
| 2277 | fs_info->pending_quota_state = 0; | 2293 | fs_info->pending_quota_state = 0; |
| 2294 | fs_info->qgroup_ulist = NULL; | ||
| 2278 | mutex_init(&fs_info->qgroup_rescan_lock); | 2295 | mutex_init(&fs_info->qgroup_rescan_lock); |
| 2279 | 2296 | ||
| 2280 | btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); | 2297 | btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); |
| @@ -2639,33 +2656,44 @@ retry_root_backup: | |||
| 2639 | btrfs_set_root_node(&tree_root->root_item, tree_root->node); | 2656 | btrfs_set_root_node(&tree_root->root_item, tree_root->node); |
| 2640 | tree_root->commit_root = btrfs_root_node(tree_root); | 2657 | tree_root->commit_root = btrfs_root_node(tree_root); |
| 2641 | 2658 | ||
| 2642 | ret = find_and_setup_root(tree_root, fs_info, | 2659 | location.objectid = BTRFS_EXTENT_TREE_OBJECTID; |
| 2643 | BTRFS_EXTENT_TREE_OBJECTID, extent_root); | 2660 | location.type = BTRFS_ROOT_ITEM_KEY; |
| 2644 | if (ret) | 2661 | location.offset = 0; |
| 2662 | |||
| 2663 | extent_root = btrfs_read_tree_root(tree_root, &location); | ||
| 2664 | if (IS_ERR(extent_root)) { | ||
| 2665 | ret = PTR_ERR(extent_root); | ||
| 2645 | goto recovery_tree_root; | 2666 | goto recovery_tree_root; |
| 2667 | } | ||
| 2646 | extent_root->track_dirty = 1; | 2668 | extent_root->track_dirty = 1; |
| 2669 | fs_info->extent_root = extent_root; | ||
| 2647 | 2670 | ||
| 2648 | ret = find_and_setup_root(tree_root, fs_info, | 2671 | location.objectid = BTRFS_DEV_TREE_OBJECTID; |
| 2649 | BTRFS_DEV_TREE_OBJECTID, dev_root); | 2672 | dev_root = btrfs_read_tree_root(tree_root, &location); |
| 2650 | if (ret) | 2673 | if (IS_ERR(dev_root)) { |
| 2674 | ret = PTR_ERR(dev_root); | ||
| 2651 | goto recovery_tree_root; | 2675 | goto recovery_tree_root; |
| 2676 | } | ||
| 2652 | dev_root->track_dirty = 1; | 2677 | dev_root->track_dirty = 1; |
| 2678 | fs_info->dev_root = dev_root; | ||
| 2679 | btrfs_init_devices_late(fs_info); | ||
| 2653 | 2680 | ||
| 2654 | ret = find_and_setup_root(tree_root, fs_info, | 2681 | location.objectid = BTRFS_CSUM_TREE_OBJECTID; |
| 2655 | BTRFS_CSUM_TREE_OBJECTID, csum_root); | 2682 | csum_root = btrfs_read_tree_root(tree_root, &location); |
| 2656 | if (ret) | 2683 | if (IS_ERR(csum_root)) { |
| 2684 | ret = PTR_ERR(csum_root); | ||
| 2657 | goto recovery_tree_root; | 2685 | goto recovery_tree_root; |
| 2686 | } | ||
| 2658 | csum_root->track_dirty = 1; | 2687 | csum_root->track_dirty = 1; |
| 2688 | fs_info->csum_root = csum_root; | ||
| 2659 | 2689 | ||
| 2660 | ret = find_and_setup_root(tree_root, fs_info, | 2690 | location.objectid = BTRFS_QUOTA_TREE_OBJECTID; |
| 2661 | BTRFS_QUOTA_TREE_OBJECTID, quota_root); | 2691 | quota_root = btrfs_read_tree_root(tree_root, &location); |
| 2662 | if (ret) { | 2692 | if (!IS_ERR(quota_root)) { |
| 2663 | kfree(quota_root); | ||
| 2664 | quota_root = fs_info->quota_root = NULL; | ||
| 2665 | } else { | ||
| 2666 | quota_root->track_dirty = 1; | 2693 | quota_root->track_dirty = 1; |
| 2667 | fs_info->quota_enabled = 1; | 2694 | fs_info->quota_enabled = 1; |
| 2668 | fs_info->pending_quota_state = 1; | 2695 | fs_info->pending_quota_state = 1; |
| 2696 | fs_info->quota_root = quota_root; | ||
| 2669 | } | 2697 | } |
| 2670 | 2698 | ||
| 2671 | fs_info->generation = generation; | 2699 | fs_info->generation = generation; |
| @@ -2818,11 +2846,9 @@ retry_root_backup: | |||
| 2818 | 2846 | ||
| 2819 | location.objectid = BTRFS_FS_TREE_OBJECTID; | 2847 | location.objectid = BTRFS_FS_TREE_OBJECTID; |
| 2820 | location.type = BTRFS_ROOT_ITEM_KEY; | 2848 | location.type = BTRFS_ROOT_ITEM_KEY; |
| 2821 | location.offset = (u64)-1; | 2849 | location.offset = 0; |
| 2822 | 2850 | ||
| 2823 | fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location); | 2851 | fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location); |
| 2824 | if (!fs_info->fs_root) | ||
| 2825 | goto fail_qgroup; | ||
| 2826 | if (IS_ERR(fs_info->fs_root)) { | 2852 | if (IS_ERR(fs_info->fs_root)) { |
| 2827 | err = PTR_ERR(fs_info->fs_root); | 2853 | err = PTR_ERR(fs_info->fs_root); |
| 2828 | goto fail_qgroup; | 2854 | goto fail_qgroup; |
| @@ -2854,6 +2880,8 @@ retry_root_backup: | |||
| 2854 | return ret; | 2880 | return ret; |
| 2855 | } | 2881 | } |
| 2856 | 2882 | ||
| 2883 | btrfs_qgroup_rescan_resume(fs_info); | ||
| 2884 | |||
| 2857 | return 0; | 2885 | return 0; |
| 2858 | 2886 | ||
| 2859 | fail_qgroup: | 2887 | fail_qgroup: |
| @@ -3259,7 +3287,7 @@ int btrfs_calc_num_tolerated_disk_barrier_failures( | |||
| 3259 | BTRFS_BLOCK_GROUP_RAID10)) { | 3287 | BTRFS_BLOCK_GROUP_RAID10)) { |
| 3260 | num_tolerated_disk_barrier_failures = 1; | 3288 | num_tolerated_disk_barrier_failures = 1; |
| 3261 | } else if (flags & | 3289 | } else if (flags & |
| 3262 | BTRFS_BLOCK_GROUP_RAID5) { | 3290 | BTRFS_BLOCK_GROUP_RAID6) { |
| 3263 | num_tolerated_disk_barrier_failures = 2; | 3291 | num_tolerated_disk_barrier_failures = 2; |
| 3264 | } | 3292 | } |
| 3265 | } | 3293 | } |
| @@ -3367,7 +3395,9 @@ int write_ctree_super(struct btrfs_trans_handle *trans, | |||
| 3367 | return ret; | 3395 | return ret; |
| 3368 | } | 3396 | } |
| 3369 | 3397 | ||
| 3370 | void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) | 3398 | /* Drop a fs root from the radix tree and free it. */ |
| 3399 | void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info, | ||
| 3400 | struct btrfs_root *root) | ||
| 3371 | { | 3401 | { |
| 3372 | spin_lock(&fs_info->fs_roots_radix_lock); | 3402 | spin_lock(&fs_info->fs_roots_radix_lock); |
| 3373 | radix_tree_delete(&fs_info->fs_roots_radix, | 3403 | radix_tree_delete(&fs_info->fs_roots_radix, |
| @@ -3398,7 +3428,12 @@ static void free_fs_root(struct btrfs_root *root) | |||
| 3398 | kfree(root->free_ino_ctl); | 3428 | kfree(root->free_ino_ctl); |
| 3399 | kfree(root->free_ino_pinned); | 3429 | kfree(root->free_ino_pinned); |
| 3400 | kfree(root->name); | 3430 | kfree(root->name); |
| 3401 | kfree(root); | 3431 | btrfs_put_fs_root(root); |
| 3432 | } | ||
| 3433 | |||
| 3434 | void btrfs_free_fs_root(struct btrfs_root *root) | ||
| 3435 | { | ||
| 3436 | free_fs_root(root); | ||
| 3402 | } | 3437 | } |
| 3403 | 3438 | ||
| 3404 | int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info) | 3439 | int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info) |
| @@ -3654,7 +3689,7 @@ static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t, | |||
| 3654 | INIT_LIST_HEAD(&splice); | 3689 | INIT_LIST_HEAD(&splice); |
| 3655 | 3690 | ||
| 3656 | mutex_lock(&root->fs_info->ordered_operations_mutex); | 3691 | mutex_lock(&root->fs_info->ordered_operations_mutex); |
| 3657 | spin_lock(&root->fs_info->ordered_extent_lock); | 3692 | spin_lock(&root->fs_info->ordered_root_lock); |
| 3658 | 3693 | ||
| 3659 | list_splice_init(&t->ordered_operations, &splice); | 3694 | list_splice_init(&t->ordered_operations, &splice); |
| 3660 | while (!list_empty(&splice)) { | 3695 | while (!list_empty(&splice)) { |
| @@ -3662,14 +3697,14 @@ static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t, | |||
| 3662 | ordered_operations); | 3697 | ordered_operations); |
| 3663 | 3698 | ||
| 3664 | list_del_init(&btrfs_inode->ordered_operations); | 3699 | list_del_init(&btrfs_inode->ordered_operations); |
| 3665 | spin_unlock(&root->fs_info->ordered_extent_lock); | 3700 | spin_unlock(&root->fs_info->ordered_root_lock); |
| 3666 | 3701 | ||
| 3667 | btrfs_invalidate_inodes(btrfs_inode->root); | 3702 | btrfs_invalidate_inodes(btrfs_inode->root); |
| 3668 | 3703 | ||
| 3669 | spin_lock(&root->fs_info->ordered_extent_lock); | 3704 | spin_lock(&root->fs_info->ordered_root_lock); |
| 3670 | } | 3705 | } |
| 3671 | 3706 | ||
| 3672 | spin_unlock(&root->fs_info->ordered_extent_lock); | 3707 | spin_unlock(&root->fs_info->ordered_root_lock); |
| 3673 | mutex_unlock(&root->fs_info->ordered_operations_mutex); | 3708 | mutex_unlock(&root->fs_info->ordered_operations_mutex); |
| 3674 | } | 3709 | } |
| 3675 | 3710 | ||
| @@ -3677,15 +3712,36 @@ static void btrfs_destroy_ordered_extents(struct btrfs_root *root) | |||
| 3677 | { | 3712 | { |
| 3678 | struct btrfs_ordered_extent *ordered; | 3713 | struct btrfs_ordered_extent *ordered; |
| 3679 | 3714 | ||
| 3680 | spin_lock(&root->fs_info->ordered_extent_lock); | 3715 | spin_lock(&root->ordered_extent_lock); |
| 3681 | /* | 3716 | /* |
| 3682 | * This will just short circuit the ordered completion stuff which will | 3717 | * This will just short circuit the ordered completion stuff which will |
| 3683 | * make sure the ordered extent gets properly cleaned up. | 3718 | * make sure the ordered extent gets properly cleaned up. |
| 3684 | */ | 3719 | */ |
| 3685 | list_for_each_entry(ordered, &root->fs_info->ordered_extents, | 3720 | list_for_each_entry(ordered, &root->ordered_extents, |
| 3686 | root_extent_list) | 3721 | root_extent_list) |
| 3687 | set_bit(BTRFS_ORDERED_IOERR, &ordered->flags); | 3722 | set_bit(BTRFS_ORDERED_IOERR, &ordered->flags); |
| 3688 | spin_unlock(&root->fs_info->ordered_extent_lock); | 3723 | spin_unlock(&root->ordered_extent_lock); |
| 3724 | } | ||
| 3725 | |||
| 3726 | static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info) | ||
| 3727 | { | ||
| 3728 | struct btrfs_root *root; | ||
| 3729 | struct list_head splice; | ||
| 3730 | |||
| 3731 | INIT_LIST_HEAD(&splice); | ||
| 3732 | |||
| 3733 | spin_lock(&fs_info->ordered_root_lock); | ||
| 3734 | list_splice_init(&fs_info->ordered_roots, &splice); | ||
| 3735 | while (!list_empty(&splice)) { | ||
| 3736 | root = list_first_entry(&splice, struct btrfs_root, | ||
| 3737 | ordered_root); | ||
| 3738 | list_del_init(&root->ordered_root); | ||
| 3739 | |||
| 3740 | btrfs_destroy_ordered_extents(root); | ||
| 3741 | |||
| 3742 | cond_resched_lock(&fs_info->ordered_root_lock); | ||
| 3743 | } | ||
| 3744 | spin_unlock(&fs_info->ordered_root_lock); | ||
| 3689 | } | 3745 | } |
| 3690 | 3746 | ||
| 3691 | int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, | 3747 | int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, |
| @@ -3707,6 +3763,7 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, | |||
| 3707 | 3763 | ||
| 3708 | while ((node = rb_first(&delayed_refs->root)) != NULL) { | 3764 | while ((node = rb_first(&delayed_refs->root)) != NULL) { |
| 3709 | struct btrfs_delayed_ref_head *head = NULL; | 3765 | struct btrfs_delayed_ref_head *head = NULL; |
| 3766 | bool pin_bytes = false; | ||
| 3710 | 3767 | ||
| 3711 | ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); | 3768 | ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); |
| 3712 | atomic_set(&ref->refs, 1); | 3769 | atomic_set(&ref->refs, 1); |
| @@ -3727,8 +3784,7 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, | |||
| 3727 | } | 3784 | } |
| 3728 | 3785 | ||
| 3729 | if (head->must_insert_reserved) | 3786 | if (head->must_insert_reserved) |
| 3730 | btrfs_pin_extent(root, ref->bytenr, | 3787 | pin_bytes = true; |
| 3731 | ref->num_bytes, 1); | ||
| 3732 | btrfs_free_delayed_extent_op(head->extent_op); | 3788 | btrfs_free_delayed_extent_op(head->extent_op); |
| 3733 | delayed_refs->num_heads--; | 3789 | delayed_refs->num_heads--; |
| 3734 | if (list_empty(&head->cluster)) | 3790 | if (list_empty(&head->cluster)) |
| @@ -3739,9 +3795,13 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, | |||
| 3739 | ref->in_tree = 0; | 3795 | ref->in_tree = 0; |
| 3740 | rb_erase(&ref->rb_node, &delayed_refs->root); | 3796 | rb_erase(&ref->rb_node, &delayed_refs->root); |
| 3741 | delayed_refs->num_entries--; | 3797 | delayed_refs->num_entries--; |
| 3742 | if (head) | ||
| 3743 | mutex_unlock(&head->mutex); | ||
| 3744 | spin_unlock(&delayed_refs->lock); | 3798 | spin_unlock(&delayed_refs->lock); |
| 3799 | if (head) { | ||
| 3800 | if (pin_bytes) | ||
| 3801 | btrfs_pin_extent(root, ref->bytenr, | ||
| 3802 | ref->num_bytes, 1); | ||
| 3803 | mutex_unlock(&head->mutex); | ||
| 3804 | } | ||
| 3745 | btrfs_put_delayed_ref(ref); | 3805 | btrfs_put_delayed_ref(ref); |
| 3746 | 3806 | ||
| 3747 | cond_resched(); | 3807 | cond_resched(); |
| @@ -3778,24 +3838,49 @@ static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root) | |||
| 3778 | 3838 | ||
| 3779 | INIT_LIST_HEAD(&splice); | 3839 | INIT_LIST_HEAD(&splice); |
| 3780 | 3840 | ||
| 3781 | spin_lock(&root->fs_info->delalloc_lock); | 3841 | spin_lock(&root->delalloc_lock); |
| 3782 | list_splice_init(&root->fs_info->delalloc_inodes, &splice); | 3842 | list_splice_init(&root->delalloc_inodes, &splice); |
| 3783 | 3843 | ||
| 3784 | while (!list_empty(&splice)) { | 3844 | while (!list_empty(&splice)) { |
| 3785 | btrfs_inode = list_entry(splice.next, struct btrfs_inode, | 3845 | btrfs_inode = list_first_entry(&splice, struct btrfs_inode, |
| 3786 | delalloc_inodes); | 3846 | delalloc_inodes); |
| 3787 | 3847 | ||
| 3788 | list_del_init(&btrfs_inode->delalloc_inodes); | 3848 | list_del_init(&btrfs_inode->delalloc_inodes); |
| 3789 | clear_bit(BTRFS_INODE_IN_DELALLOC_LIST, | 3849 | clear_bit(BTRFS_INODE_IN_DELALLOC_LIST, |
| 3790 | &btrfs_inode->runtime_flags); | 3850 | &btrfs_inode->runtime_flags); |
| 3791 | spin_unlock(&root->fs_info->delalloc_lock); | 3851 | spin_unlock(&root->delalloc_lock); |
| 3792 | 3852 | ||
| 3793 | btrfs_invalidate_inodes(btrfs_inode->root); | 3853 | btrfs_invalidate_inodes(btrfs_inode->root); |
| 3794 | 3854 | ||
| 3795 | spin_lock(&root->fs_info->delalloc_lock); | 3855 | spin_lock(&root->delalloc_lock); |
| 3796 | } | 3856 | } |
| 3797 | 3857 | ||
| 3798 | spin_unlock(&root->fs_info->delalloc_lock); | 3858 | spin_unlock(&root->delalloc_lock); |
| 3859 | } | ||
| 3860 | |||
| 3861 | static void btrfs_destroy_all_delalloc_inodes(struct btrfs_fs_info *fs_info) | ||
| 3862 | { | ||
| 3863 | struct btrfs_root *root; | ||
| 3864 | struct list_head splice; | ||
| 3865 | |||
| 3866 | INIT_LIST_HEAD(&splice); | ||
| 3867 | |||
| 3868 | spin_lock(&fs_info->delalloc_root_lock); | ||
| 3869 | list_splice_init(&fs_info->delalloc_roots, &splice); | ||
| 3870 | while (!list_empty(&splice)) { | ||
| 3871 | root = list_first_entry(&splice, struct btrfs_root, | ||
| 3872 | delalloc_root); | ||
| 3873 | list_del_init(&root->delalloc_root); | ||
| 3874 | root = btrfs_grab_fs_root(root); | ||
| 3875 | BUG_ON(!root); | ||
| 3876 | spin_unlock(&fs_info->delalloc_root_lock); | ||
| 3877 | |||
| 3878 | btrfs_destroy_delalloc_inodes(root); | ||
| 3879 | btrfs_put_fs_root(root); | ||
| 3880 | |||
| 3881 | spin_lock(&fs_info->delalloc_root_lock); | ||
| 3882 | } | ||
| 3883 | spin_unlock(&fs_info->delalloc_root_lock); | ||
| 3799 | } | 3884 | } |
| 3800 | 3885 | ||
| 3801 | static int btrfs_destroy_marked_extents(struct btrfs_root *root, | 3886 | static int btrfs_destroy_marked_extents(struct btrfs_root *root, |
| @@ -3879,19 +3964,14 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, | |||
| 3879 | btrfs_block_rsv_release(root, &root->fs_info->trans_block_rsv, | 3964 | btrfs_block_rsv_release(root, &root->fs_info->trans_block_rsv, |
| 3880 | cur_trans->dirty_pages.dirty_bytes); | 3965 | cur_trans->dirty_pages.dirty_bytes); |
| 3881 | 3966 | ||
| 3882 | /* FIXME: cleanup wait for commit */ | 3967 | cur_trans->state = TRANS_STATE_COMMIT_START; |
| 3883 | cur_trans->in_commit = 1; | ||
| 3884 | cur_trans->blocked = 1; | ||
| 3885 | wake_up(&root->fs_info->transaction_blocked_wait); | 3968 | wake_up(&root->fs_info->transaction_blocked_wait); |
| 3886 | 3969 | ||
| 3887 | btrfs_evict_pending_snapshots(cur_trans); | 3970 | btrfs_evict_pending_snapshots(cur_trans); |
| 3888 | 3971 | ||
| 3889 | cur_trans->blocked = 0; | 3972 | cur_trans->state = TRANS_STATE_UNBLOCKED; |
| 3890 | wake_up(&root->fs_info->transaction_wait); | 3973 | wake_up(&root->fs_info->transaction_wait); |
| 3891 | 3974 | ||
| 3892 | cur_trans->commit_done = 1; | ||
| 3893 | wake_up(&cur_trans->commit_wait); | ||
| 3894 | |||
| 3895 | btrfs_destroy_delayed_inodes(root); | 3975 | btrfs_destroy_delayed_inodes(root); |
| 3896 | btrfs_assert_delayed_root_empty(root); | 3976 | btrfs_assert_delayed_root_empty(root); |
| 3897 | 3977 | ||
| @@ -3900,6 +3980,9 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, | |||
| 3900 | btrfs_destroy_pinned_extent(root, | 3980 | btrfs_destroy_pinned_extent(root, |
| 3901 | root->fs_info->pinned_extents); | 3981 | root->fs_info->pinned_extents); |
| 3902 | 3982 | ||
| 3983 | cur_trans->state =TRANS_STATE_COMPLETED; | ||
| 3984 | wake_up(&cur_trans->commit_wait); | ||
| 3985 | |||
| 3903 | /* | 3986 | /* |
| 3904 | memset(cur_trans, 0, sizeof(*cur_trans)); | 3987 | memset(cur_trans, 0, sizeof(*cur_trans)); |
| 3905 | kmem_cache_free(btrfs_transaction_cachep, cur_trans); | 3988 | kmem_cache_free(btrfs_transaction_cachep, cur_trans); |
| @@ -3915,7 +3998,7 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root) | |||
| 3915 | 3998 | ||
| 3916 | spin_lock(&root->fs_info->trans_lock); | 3999 | spin_lock(&root->fs_info->trans_lock); |
| 3917 | list_splice_init(&root->fs_info->trans_list, &list); | 4000 | list_splice_init(&root->fs_info->trans_list, &list); |
| 3918 | root->fs_info->trans_no_join = 1; | 4001 | root->fs_info->running_transaction = NULL; |
| 3919 | spin_unlock(&root->fs_info->trans_lock); | 4002 | spin_unlock(&root->fs_info->trans_lock); |
| 3920 | 4003 | ||
| 3921 | while (!list_empty(&list)) { | 4004 | while (!list_empty(&list)) { |
| @@ -3923,37 +4006,31 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root) | |||
| 3923 | 4006 | ||
| 3924 | btrfs_destroy_ordered_operations(t, root); | 4007 | btrfs_destroy_ordered_operations(t, root); |
| 3925 | 4008 | ||
| 3926 | btrfs_destroy_ordered_extents(root); | 4009 | btrfs_destroy_all_ordered_extents(root->fs_info); |
| 3927 | 4010 | ||
| 3928 | btrfs_destroy_delayed_refs(t, root); | 4011 | btrfs_destroy_delayed_refs(t, root); |
| 3929 | 4012 | ||
| 3930 | /* FIXME: cleanup wait for commit */ | 4013 | /* |
| 3931 | t->in_commit = 1; | 4014 | * FIXME: cleanup wait for commit |
| 3932 | t->blocked = 1; | 4015 | * We needn't acquire the lock here, because we are during |
| 4016 | * the umount, there is no other task which will change it. | ||
| 4017 | */ | ||
| 4018 | t->state = TRANS_STATE_COMMIT_START; | ||
| 3933 | smp_mb(); | 4019 | smp_mb(); |
| 3934 | if (waitqueue_active(&root->fs_info->transaction_blocked_wait)) | 4020 | if (waitqueue_active(&root->fs_info->transaction_blocked_wait)) |
| 3935 | wake_up(&root->fs_info->transaction_blocked_wait); | 4021 | wake_up(&root->fs_info->transaction_blocked_wait); |
| 3936 | 4022 | ||
| 3937 | btrfs_evict_pending_snapshots(t); | 4023 | btrfs_evict_pending_snapshots(t); |
| 3938 | 4024 | ||
| 3939 | t->blocked = 0; | 4025 | t->state = TRANS_STATE_UNBLOCKED; |
| 3940 | smp_mb(); | 4026 | smp_mb(); |
| 3941 | if (waitqueue_active(&root->fs_info->transaction_wait)) | 4027 | if (waitqueue_active(&root->fs_info->transaction_wait)) |
| 3942 | wake_up(&root->fs_info->transaction_wait); | 4028 | wake_up(&root->fs_info->transaction_wait); |
| 3943 | 4029 | ||
| 3944 | t->commit_done = 1; | ||
| 3945 | smp_mb(); | ||
| 3946 | if (waitqueue_active(&t->commit_wait)) | ||
| 3947 | wake_up(&t->commit_wait); | ||
| 3948 | |||
| 3949 | btrfs_destroy_delayed_inodes(root); | 4030 | btrfs_destroy_delayed_inodes(root); |
| 3950 | btrfs_assert_delayed_root_empty(root); | 4031 | btrfs_assert_delayed_root_empty(root); |
| 3951 | 4032 | ||
| 3952 | btrfs_destroy_delalloc_inodes(root); | 4033 | btrfs_destroy_all_delalloc_inodes(root->fs_info); |
| 3953 | |||
| 3954 | spin_lock(&root->fs_info->trans_lock); | ||
| 3955 | root->fs_info->running_transaction = NULL; | ||
| 3956 | spin_unlock(&root->fs_info->trans_lock); | ||
| 3957 | 4034 | ||
| 3958 | btrfs_destroy_marked_extents(root, &t->dirty_pages, | 4035 | btrfs_destroy_marked_extents(root, &t->dirty_pages, |
| 3959 | EXTENT_DIRTY); | 4036 | EXTENT_DIRTY); |
| @@ -3961,15 +4038,17 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root) | |||
| 3961 | btrfs_destroy_pinned_extent(root, | 4038 | btrfs_destroy_pinned_extent(root, |
| 3962 | root->fs_info->pinned_extents); | 4039 | root->fs_info->pinned_extents); |
| 3963 | 4040 | ||
| 4041 | t->state = TRANS_STATE_COMPLETED; | ||
| 4042 | smp_mb(); | ||
| 4043 | if (waitqueue_active(&t->commit_wait)) | ||
| 4044 | wake_up(&t->commit_wait); | ||
| 4045 | |||
| 3964 | atomic_set(&t->use_count, 0); | 4046 | atomic_set(&t->use_count, 0); |
| 3965 | list_del_init(&t->list); | 4047 | list_del_init(&t->list); |
| 3966 | memset(t, 0, sizeof(*t)); | 4048 | memset(t, 0, sizeof(*t)); |
| 3967 | kmem_cache_free(btrfs_transaction_cachep, t); | 4049 | kmem_cache_free(btrfs_transaction_cachep, t); |
| 3968 | } | 4050 | } |
| 3969 | 4051 | ||
| 3970 | spin_lock(&root->fs_info->trans_lock); | ||
| 3971 | root->fs_info->trans_no_join = 0; | ||
| 3972 | spin_unlock(&root->fs_info->trans_lock); | ||
| 3973 | mutex_unlock(&root->fs_info->transaction_kthread_mutex); | 4052 | mutex_unlock(&root->fs_info->transaction_kthread_mutex); |
| 3974 | 4053 | ||
| 3975 | return 0; | 4054 | return 0; |
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index be69ce1b07a2..b71acd6e1e5b 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h | |||
| @@ -63,14 +63,40 @@ struct buffer_head *btrfs_read_dev_super(struct block_device *bdev); | |||
| 63 | int btrfs_commit_super(struct btrfs_root *root); | 63 | int btrfs_commit_super(struct btrfs_root *root); |
| 64 | struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, | 64 | struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, |
| 65 | u64 bytenr, u32 blocksize); | 65 | u64 bytenr, u32 blocksize); |
| 66 | struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, | 66 | struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root, |
| 67 | struct btrfs_key *location); | 67 | struct btrfs_key *location); |
| 68 | int btrfs_init_fs_root(struct btrfs_root *root); | ||
| 69 | int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info, | ||
| 70 | struct btrfs_root *root); | ||
| 68 | struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, | 71 | struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, |
| 69 | struct btrfs_key *location); | 72 | struct btrfs_key *location); |
| 70 | int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info); | 73 | int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info); |
| 71 | void btrfs_btree_balance_dirty(struct btrfs_root *root); | 74 | void btrfs_btree_balance_dirty(struct btrfs_root *root); |
| 72 | void btrfs_btree_balance_dirty_nodelay(struct btrfs_root *root); | 75 | void btrfs_btree_balance_dirty_nodelay(struct btrfs_root *root); |
| 73 | void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); | 76 | void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info, |
| 77 | struct btrfs_root *root); | ||
| 78 | void btrfs_free_fs_root(struct btrfs_root *root); | ||
| 79 | |||
| 80 | /* | ||
| 81 | * This function is used to grab the root, and avoid it is freed when we | ||
| 82 | * access it. But it doesn't ensure that the tree is not dropped. | ||
| 83 | * | ||
| 84 | * If you want to ensure the whole tree is safe, you should use | ||
| 85 | * fs_info->subvol_srcu | ||
| 86 | */ | ||
| 87 | static inline struct btrfs_root *btrfs_grab_fs_root(struct btrfs_root *root) | ||
| 88 | { | ||
| 89 | if (atomic_inc_not_zero(&root->refs)) | ||
| 90 | return root; | ||
| 91 | return NULL; | ||
| 92 | } | ||
| 93 | |||
| 94 | static inline void btrfs_put_fs_root(struct btrfs_root *root) | ||
| 95 | { | ||
| 96 | if (atomic_dec_and_test(&root->refs)) | ||
| 97 | kfree(root); | ||
| 98 | } | ||
| 99 | |||
| 74 | void btrfs_mark_buffer_dirty(struct extent_buffer *buf); | 100 | void btrfs_mark_buffer_dirty(struct extent_buffer *buf); |
| 75 | int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, | 101 | int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, |
| 76 | int atomic); | 102 | int atomic); |
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index 81ee29eeb7ca..4b8691607373 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c | |||
| @@ -82,11 +82,6 @@ static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, | |||
| 82 | goto fail; | 82 | goto fail; |
| 83 | } | 83 | } |
| 84 | 84 | ||
| 85 | if (btrfs_root_refs(&root->root_item) == 0) { | ||
| 86 | err = -ENOENT; | ||
| 87 | goto fail; | ||
| 88 | } | ||
| 89 | |||
| 90 | key.objectid = objectid; | 85 | key.objectid = objectid; |
| 91 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); | 86 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); |
| 92 | key.offset = 0; | 87 | key.offset = 0; |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index df472ab1b5ac..0236de711989 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
| @@ -24,6 +24,7 @@ | |||
| 24 | #include <linux/kthread.h> | 24 | #include <linux/kthread.h> |
| 25 | #include <linux/slab.h> | 25 | #include <linux/slab.h> |
| 26 | #include <linux/ratelimit.h> | 26 | #include <linux/ratelimit.h> |
| 27 | #include <linux/percpu_counter.h> | ||
| 27 | #include "compat.h" | 28 | #include "compat.h" |
| 28 | #include "hash.h" | 29 | #include "hash.h" |
| 29 | #include "ctree.h" | 30 | #include "ctree.h" |
| @@ -2526,6 +2527,51 @@ static int refs_newer(struct btrfs_delayed_ref_root *delayed_refs, int seq, | |||
| 2526 | return 0; | 2527 | return 0; |
| 2527 | } | 2528 | } |
| 2528 | 2529 | ||
| 2530 | static inline u64 heads_to_leaves(struct btrfs_root *root, u64 heads) | ||
| 2531 | { | ||
| 2532 | u64 num_bytes; | ||
| 2533 | |||
| 2534 | num_bytes = heads * (sizeof(struct btrfs_extent_item) + | ||
| 2535 | sizeof(struct btrfs_extent_inline_ref)); | ||
| 2536 | if (!btrfs_fs_incompat(root->fs_info, SKINNY_METADATA)) | ||
| 2537 | num_bytes += heads * sizeof(struct btrfs_tree_block_info); | ||
| 2538 | |||
| 2539 | /* | ||
| 2540 | * We don't ever fill up leaves all the way so multiply by 2 just to be | ||
| 2541 | * closer to what we're really going to want to ouse. | ||
| 2542 | */ | ||
| 2543 | return div64_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(root)); | ||
| 2544 | } | ||
| 2545 | |||
| 2546 | int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans, | ||
| 2547 | struct btrfs_root *root) | ||
| 2548 | { | ||
| 2549 | struct btrfs_block_rsv *global_rsv; | ||
| 2550 | u64 num_heads = trans->transaction->delayed_refs.num_heads_ready; | ||
| 2551 | u64 num_bytes; | ||
| 2552 | int ret = 0; | ||
| 2553 | |||
| 2554 | num_bytes = btrfs_calc_trans_metadata_size(root, 1); | ||
| 2555 | num_heads = heads_to_leaves(root, num_heads); | ||
| 2556 | if (num_heads > 1) | ||
| 2557 | num_bytes += (num_heads - 1) * root->leafsize; | ||
| 2558 | num_bytes <<= 1; | ||
| 2559 | global_rsv = &root->fs_info->global_block_rsv; | ||
| 2560 | |||
| 2561 | /* | ||
| 2562 | * If we can't allocate any more chunks lets make sure we have _lots_ of | ||
| 2563 | * wiggle room since running delayed refs can create more delayed refs. | ||
| 2564 | */ | ||
| 2565 | if (global_rsv->space_info->full) | ||
| 2566 | num_bytes <<= 1; | ||
| 2567 | |||
| 2568 | spin_lock(&global_rsv->lock); | ||
| 2569 | if (global_rsv->reserved <= num_bytes) | ||
| 2570 | ret = 1; | ||
| 2571 | spin_unlock(&global_rsv->lock); | ||
| 2572 | return ret; | ||
| 2573 | } | ||
| 2574 | |||
| 2529 | /* | 2575 | /* |
| 2530 | * this starts processing the delayed reference count updates and | 2576 | * this starts processing the delayed reference count updates and |
| 2531 | * extent insertions we have queued up so far. count can be | 2577 | * extent insertions we have queued up so far. count can be |
| @@ -2573,7 +2619,8 @@ progress: | |||
| 2573 | old = atomic_cmpxchg(&delayed_refs->procs_running_refs, 0, 1); | 2619 | old = atomic_cmpxchg(&delayed_refs->procs_running_refs, 0, 1); |
| 2574 | if (old) { | 2620 | if (old) { |
| 2575 | DEFINE_WAIT(__wait); | 2621 | DEFINE_WAIT(__wait); |
| 2576 | if (delayed_refs->num_entries < 16348) | 2622 | if (delayed_refs->flushing || |
| 2623 | !btrfs_should_throttle_delayed_refs(trans, root)) | ||
| 2577 | return 0; | 2624 | return 0; |
| 2578 | 2625 | ||
| 2579 | prepare_to_wait(&delayed_refs->wait, &__wait, | 2626 | prepare_to_wait(&delayed_refs->wait, &__wait, |
| @@ -2608,7 +2655,7 @@ again: | |||
| 2608 | 2655 | ||
| 2609 | while (1) { | 2656 | while (1) { |
| 2610 | if (!(run_all || run_most) && | 2657 | if (!(run_all || run_most) && |
| 2611 | delayed_refs->num_heads_ready < 64) | 2658 | !btrfs_should_throttle_delayed_refs(trans, root)) |
| 2612 | break; | 2659 | break; |
| 2613 | 2660 | ||
| 2614 | /* | 2661 | /* |
| @@ -2629,6 +2676,7 @@ again: | |||
| 2629 | spin_unlock(&delayed_refs->lock); | 2676 | spin_unlock(&delayed_refs->lock); |
| 2630 | btrfs_abort_transaction(trans, root, ret); | 2677 | btrfs_abort_transaction(trans, root, ret); |
| 2631 | atomic_dec(&delayed_refs->procs_running_refs); | 2678 | atomic_dec(&delayed_refs->procs_running_refs); |
| 2679 | wake_up(&delayed_refs->wait); | ||
| 2632 | return ret; | 2680 | return ret; |
| 2633 | } | 2681 | } |
| 2634 | 2682 | ||
| @@ -3310,6 +3358,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
| 3310 | struct btrfs_space_info *found; | 3358 | struct btrfs_space_info *found; |
| 3311 | int i; | 3359 | int i; |
| 3312 | int factor; | 3360 | int factor; |
| 3361 | int ret; | ||
| 3313 | 3362 | ||
| 3314 | if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 | | 3363 | if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 | |
| 3315 | BTRFS_BLOCK_GROUP_RAID10)) | 3364 | BTRFS_BLOCK_GROUP_RAID10)) |
| @@ -3333,6 +3382,12 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
| 3333 | if (!found) | 3382 | if (!found) |
| 3334 | return -ENOMEM; | 3383 | return -ENOMEM; |
| 3335 | 3384 | ||
| 3385 | ret = percpu_counter_init(&found->total_bytes_pinned, 0); | ||
| 3386 | if (ret) { | ||
| 3387 | kfree(found); | ||
| 3388 | return ret; | ||
| 3389 | } | ||
| 3390 | |||
| 3336 | for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) | 3391 | for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) |
| 3337 | INIT_LIST_HEAD(&found->block_groups[i]); | 3392 | INIT_LIST_HEAD(&found->block_groups[i]); |
| 3338 | init_rwsem(&found->groups_sem); | 3393 | init_rwsem(&found->groups_sem); |
| @@ -3565,10 +3620,11 @@ alloc: | |||
| 3565 | } | 3620 | } |
| 3566 | 3621 | ||
| 3567 | /* | 3622 | /* |
| 3568 | * If we have less pinned bytes than we want to allocate then | 3623 | * If we don't have enough pinned space to deal with this |
| 3569 | * don't bother committing the transaction, it won't help us. | 3624 | * allocation don't bother committing the transaction. |
| 3570 | */ | 3625 | */ |
| 3571 | if (data_sinfo->bytes_pinned < bytes) | 3626 | if (percpu_counter_compare(&data_sinfo->total_bytes_pinned, |
| 3627 | bytes) < 0) | ||
| 3572 | committed = 1; | 3628 | committed = 1; |
| 3573 | spin_unlock(&data_sinfo->lock); | 3629 | spin_unlock(&data_sinfo->lock); |
| 3574 | 3630 | ||
| @@ -3577,6 +3633,7 @@ commit_trans: | |||
| 3577 | if (!committed && | 3633 | if (!committed && |
| 3578 | !atomic_read(&root->fs_info->open_ioctl_trans)) { | 3634 | !atomic_read(&root->fs_info->open_ioctl_trans)) { |
| 3579 | committed = 1; | 3635 | committed = 1; |
| 3636 | |||
| 3580 | trans = btrfs_join_transaction(root); | 3637 | trans = btrfs_join_transaction(root); |
| 3581 | if (IS_ERR(trans)) | 3638 | if (IS_ERR(trans)) |
| 3582 | return PTR_ERR(trans); | 3639 | return PTR_ERR(trans); |
| @@ -3609,6 +3666,7 @@ void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes) | |||
| 3609 | 3666 | ||
| 3610 | data_sinfo = root->fs_info->data_sinfo; | 3667 | data_sinfo = root->fs_info->data_sinfo; |
| 3611 | spin_lock(&data_sinfo->lock); | 3668 | spin_lock(&data_sinfo->lock); |
| 3669 | WARN_ON(data_sinfo->bytes_may_use < bytes); | ||
| 3612 | data_sinfo->bytes_may_use -= bytes; | 3670 | data_sinfo->bytes_may_use -= bytes; |
| 3613 | trace_btrfs_space_reservation(root->fs_info, "space_info", | 3671 | trace_btrfs_space_reservation(root->fs_info, "space_info", |
| 3614 | data_sinfo->flags, bytes, 0); | 3672 | data_sinfo->flags, bytes, 0); |
| @@ -3886,12 +3944,11 @@ static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root, | |||
| 3886 | unsigned long nr_pages) | 3944 | unsigned long nr_pages) |
| 3887 | { | 3945 | { |
| 3888 | struct super_block *sb = root->fs_info->sb; | 3946 | struct super_block *sb = root->fs_info->sb; |
| 3889 | int started; | ||
| 3890 | 3947 | ||
| 3891 | /* If we can not start writeback, just sync all the delalloc file. */ | 3948 | if (down_read_trylock(&sb->s_umount)) { |
| 3892 | started = try_to_writeback_inodes_sb_nr(sb, nr_pages, | 3949 | writeback_inodes_sb_nr(sb, nr_pages, WB_REASON_FS_FREE_SPACE); |
| 3893 | WB_REASON_FS_FREE_SPACE); | 3950 | up_read(&sb->s_umount); |
| 3894 | if (!started) { | 3951 | } else { |
| 3895 | /* | 3952 | /* |
| 3896 | * We needn't worry the filesystem going from r/w to r/o though | 3953 | * We needn't worry the filesystem going from r/w to r/o though |
| 3897 | * we don't acquire ->s_umount mutex, because the filesystem | 3954 | * we don't acquire ->s_umount mutex, because the filesystem |
| @@ -3899,9 +3956,9 @@ static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root, | |||
| 3899 | * the filesystem is readonly(all dirty pages are written to | 3956 | * the filesystem is readonly(all dirty pages are written to |
| 3900 | * the disk). | 3957 | * the disk). |
| 3901 | */ | 3958 | */ |
| 3902 | btrfs_start_delalloc_inodes(root, 0); | 3959 | btrfs_start_all_delalloc_inodes(root->fs_info, 0); |
| 3903 | if (!current->journal_info) | 3960 | if (!current->journal_info) |
| 3904 | btrfs_wait_ordered_extents(root, 0); | 3961 | btrfs_wait_all_ordered_extents(root->fs_info, 0); |
| 3905 | } | 3962 | } |
| 3906 | } | 3963 | } |
| 3907 | 3964 | ||
| @@ -3931,7 +3988,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, | |||
| 3931 | if (delalloc_bytes == 0) { | 3988 | if (delalloc_bytes == 0) { |
| 3932 | if (trans) | 3989 | if (trans) |
| 3933 | return; | 3990 | return; |
| 3934 | btrfs_wait_ordered_extents(root, 0); | 3991 | btrfs_wait_all_ordered_extents(root->fs_info, 0); |
| 3935 | return; | 3992 | return; |
| 3936 | } | 3993 | } |
| 3937 | 3994 | ||
| @@ -3959,7 +4016,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, | |||
| 3959 | 4016 | ||
| 3960 | loops++; | 4017 | loops++; |
| 3961 | if (wait_ordered && !trans) { | 4018 | if (wait_ordered && !trans) { |
| 3962 | btrfs_wait_ordered_extents(root, 0); | 4019 | btrfs_wait_all_ordered_extents(root->fs_info, 0); |
| 3963 | } else { | 4020 | } else { |
| 3964 | time_left = schedule_timeout_killable(1); | 4021 | time_left = schedule_timeout_killable(1); |
| 3965 | if (time_left) | 4022 | if (time_left) |
| @@ -3997,7 +4054,8 @@ static int may_commit_transaction(struct btrfs_root *root, | |||
| 3997 | 4054 | ||
| 3998 | /* See if there is enough pinned space to make this reservation */ | 4055 | /* See if there is enough pinned space to make this reservation */ |
| 3999 | spin_lock(&space_info->lock); | 4056 | spin_lock(&space_info->lock); |
| 4000 | if (space_info->bytes_pinned >= bytes) { | 4057 | if (percpu_counter_compare(&space_info->total_bytes_pinned, |
| 4058 | bytes) >= 0) { | ||
| 4001 | spin_unlock(&space_info->lock); | 4059 | spin_unlock(&space_info->lock); |
| 4002 | goto commit; | 4060 | goto commit; |
| 4003 | } | 4061 | } |
| @@ -4012,7 +4070,8 @@ static int may_commit_transaction(struct btrfs_root *root, | |||
| 4012 | 4070 | ||
| 4013 | spin_lock(&space_info->lock); | 4071 | spin_lock(&space_info->lock); |
| 4014 | spin_lock(&delayed_rsv->lock); | 4072 | spin_lock(&delayed_rsv->lock); |
| 4015 | if (space_info->bytes_pinned + delayed_rsv->size < bytes) { | 4073 | if (percpu_counter_compare(&space_info->total_bytes_pinned, |
| 4074 | bytes - delayed_rsv->size) >= 0) { | ||
| 4016 | spin_unlock(&delayed_rsv->lock); | 4075 | spin_unlock(&delayed_rsv->lock); |
| 4017 | spin_unlock(&space_info->lock); | 4076 | spin_unlock(&space_info->lock); |
| 4018 | return -ENOSPC; | 4077 | return -ENOSPC; |
| @@ -4297,6 +4356,31 @@ static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv, | |||
| 4297 | spin_unlock(&block_rsv->lock); | 4356 | spin_unlock(&block_rsv->lock); |
| 4298 | } | 4357 | } |
| 4299 | 4358 | ||
| 4359 | int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info, | ||
| 4360 | struct btrfs_block_rsv *dest, u64 num_bytes, | ||
| 4361 | int min_factor) | ||
| 4362 | { | ||
| 4363 | struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; | ||
| 4364 | u64 min_bytes; | ||
| 4365 | |||
| 4366 | if (global_rsv->space_info != dest->space_info) | ||
| 4367 | return -ENOSPC; | ||
| 4368 | |||
| 4369 | spin_lock(&global_rsv->lock); | ||
| 4370 | min_bytes = div_factor(global_rsv->size, min_factor); | ||
| 4371 | if (global_rsv->reserved < min_bytes + num_bytes) { | ||
| 4372 | spin_unlock(&global_rsv->lock); | ||
| 4373 | return -ENOSPC; | ||
| 4374 | } | ||
| 4375 | global_rsv->reserved -= num_bytes; | ||
| 4376 | if (global_rsv->reserved < global_rsv->size) | ||
| 4377 | global_rsv->full = 0; | ||
| 4378 | spin_unlock(&global_rsv->lock); | ||
| 4379 | |||
| 4380 | block_rsv_add_bytes(dest, num_bytes, 1); | ||
| 4381 | return 0; | ||
| 4382 | } | ||
| 4383 | |||
| 4300 | static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info, | 4384 | static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info, |
| 4301 | struct btrfs_block_rsv *block_rsv, | 4385 | struct btrfs_block_rsv *block_rsv, |
| 4302 | struct btrfs_block_rsv *dest, u64 num_bytes) | 4386 | struct btrfs_block_rsv *dest, u64 num_bytes) |
| @@ -5030,14 +5114,14 @@ static int update_block_group(struct btrfs_root *root, | |||
| 5030 | int factor; | 5114 | int factor; |
| 5031 | 5115 | ||
| 5032 | /* block accounting for super block */ | 5116 | /* block accounting for super block */ |
| 5033 | spin_lock(&info->delalloc_lock); | 5117 | spin_lock(&info->delalloc_root_lock); |
| 5034 | old_val = btrfs_super_bytes_used(info->super_copy); | 5118 | old_val = btrfs_super_bytes_used(info->super_copy); |
| 5035 | if (alloc) | 5119 | if (alloc) |
| 5036 | old_val += num_bytes; | 5120 | old_val += num_bytes; |
| 5037 | else | 5121 | else |
| 5038 | old_val -= num_bytes; | 5122 | old_val -= num_bytes; |
| 5039 | btrfs_set_super_bytes_used(info->super_copy, old_val); | 5123 | btrfs_set_super_bytes_used(info->super_copy, old_val); |
| 5040 | spin_unlock(&info->delalloc_lock); | 5124 | spin_unlock(&info->delalloc_root_lock); |
| 5041 | 5125 | ||
| 5042 | while (total) { | 5126 | while (total) { |
| 5043 | cache = btrfs_lookup_block_group(info, bytenr); | 5127 | cache = btrfs_lookup_block_group(info, bytenr); |
| @@ -5189,6 +5273,80 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_root *root, | |||
| 5189 | return ret; | 5273 | return ret; |
| 5190 | } | 5274 | } |
| 5191 | 5275 | ||
| 5276 | static int __exclude_logged_extent(struct btrfs_root *root, u64 start, u64 num_bytes) | ||
| 5277 | { | ||
| 5278 | int ret; | ||
| 5279 | struct btrfs_block_group_cache *block_group; | ||
| 5280 | struct btrfs_caching_control *caching_ctl; | ||
| 5281 | |||
| 5282 | block_group = btrfs_lookup_block_group(root->fs_info, start); | ||
| 5283 | if (!block_group) | ||
| 5284 | return -EINVAL; | ||
| 5285 | |||
| 5286 | cache_block_group(block_group, 0); | ||
| 5287 | caching_ctl = get_caching_control(block_group); | ||
| 5288 | |||
| 5289 | if (!caching_ctl) { | ||
| 5290 | /* Logic error */ | ||
| 5291 | BUG_ON(!block_group_cache_done(block_group)); | ||
| 5292 | ret = btrfs_remove_free_space(block_group, start, num_bytes); | ||
| 5293 | } else { | ||
| 5294 | mutex_lock(&caching_ctl->mutex); | ||
| 5295 | |||
| 5296 | if (start >= caching_ctl->progress) { | ||
| 5297 | ret = add_excluded_extent(root, start, num_bytes); | ||
| 5298 | } else if (start + num_bytes <= caching_ctl->progress) { | ||
| 5299 | ret = btrfs_remove_free_space(block_group, | ||
| 5300 | start, num_bytes); | ||
| 5301 | } else { | ||
| 5302 | num_bytes = caching_ctl->progress - start; | ||
| 5303 | ret = btrfs_remove_free_space(block_group, | ||
| 5304 | start, num_bytes); | ||
| 5305 | if (ret) | ||
| 5306 | goto out_lock; | ||
| 5307 | |||
| 5308 | num_bytes = (start + num_bytes) - | ||
| 5309 | caching_ctl->progress; | ||
| 5310 | start = caching_ctl->progress; | ||
| 5311 | ret = add_excluded_extent(root, start, num_bytes); | ||
| 5312 | } | ||
| 5313 | out_lock: | ||
| 5314 | mutex_unlock(&caching_ctl->mutex); | ||
| 5315 | put_caching_control(caching_ctl); | ||
| 5316 | } | ||
| 5317 | btrfs_put_block_group(block_group); | ||
| 5318 | return ret; | ||
| 5319 | } | ||
| 5320 | |||
| 5321 | int btrfs_exclude_logged_extents(struct btrfs_root *log, | ||
| 5322 | struct extent_buffer *eb) | ||
| 5323 | { | ||
| 5324 | struct btrfs_file_extent_item *item; | ||
| 5325 | struct btrfs_key key; | ||
| 5326 | int found_type; | ||
| 5327 | int i; | ||
| 5328 | |||
| 5329 | if (!btrfs_fs_incompat(log->fs_info, MIXED_GROUPS)) | ||
| 5330 | return 0; | ||
| 5331 | |||
| 5332 | for (i = 0; i < btrfs_header_nritems(eb); i++) { | ||
| 5333 | btrfs_item_key_to_cpu(eb, &key, i); | ||
| 5334 | if (key.type != BTRFS_EXTENT_DATA_KEY) | ||
| 5335 | continue; | ||
| 5336 | item = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item); | ||
| 5337 | found_type = btrfs_file_extent_type(eb, item); | ||
| 5338 | if (found_type == BTRFS_FILE_EXTENT_INLINE) | ||
| 5339 | continue; | ||
| 5340 | if (btrfs_file_extent_disk_bytenr(eb, item) == 0) | ||
| 5341 | continue; | ||
| 5342 | key.objectid = btrfs_file_extent_disk_bytenr(eb, item); | ||
| 5343 | key.offset = btrfs_file_extent_disk_num_bytes(eb, item); | ||
| 5344 | __exclude_logged_extent(log, key.objectid, key.offset); | ||
| 5345 | } | ||
| 5346 | |||
| 5347 | return 0; | ||
| 5348 | } | ||
| 5349 | |||
| 5192 | /** | 5350 | /** |
| 5193 | * btrfs_update_reserved_bytes - update the block_group and space info counters | 5351 | * btrfs_update_reserved_bytes - update the block_group and space info counters |
| 5194 | * @cache: The cache we are manipulating | 5352 | * @cache: The cache we are manipulating |
| @@ -5251,6 +5409,7 @@ void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, | |||
| 5251 | struct btrfs_caching_control *next; | 5409 | struct btrfs_caching_control *next; |
| 5252 | struct btrfs_caching_control *caching_ctl; | 5410 | struct btrfs_caching_control *caching_ctl; |
| 5253 | struct btrfs_block_group_cache *cache; | 5411 | struct btrfs_block_group_cache *cache; |
| 5412 | struct btrfs_space_info *space_info; | ||
| 5254 | 5413 | ||
| 5255 | down_write(&fs_info->extent_commit_sem); | 5414 | down_write(&fs_info->extent_commit_sem); |
| 5256 | 5415 | ||
| @@ -5273,6 +5432,9 @@ void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, | |||
| 5273 | 5432 | ||
| 5274 | up_write(&fs_info->extent_commit_sem); | 5433 | up_write(&fs_info->extent_commit_sem); |
| 5275 | 5434 | ||
| 5435 | list_for_each_entry_rcu(space_info, &fs_info->space_info, list) | ||
| 5436 | percpu_counter_set(&space_info->total_bytes_pinned, 0); | ||
| 5437 | |||
| 5276 | update_global_block_rsv(fs_info); | 5438 | update_global_block_rsv(fs_info); |
| 5277 | } | 5439 | } |
| 5278 | 5440 | ||
| @@ -5370,6 +5532,27 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
| 5370 | return 0; | 5532 | return 0; |
| 5371 | } | 5533 | } |
| 5372 | 5534 | ||
| 5535 | static void add_pinned_bytes(struct btrfs_fs_info *fs_info, u64 num_bytes, | ||
| 5536 | u64 owner, u64 root_objectid) | ||
| 5537 | { | ||
| 5538 | struct btrfs_space_info *space_info; | ||
| 5539 | u64 flags; | ||
| 5540 | |||
| 5541 | if (owner < BTRFS_FIRST_FREE_OBJECTID) { | ||
| 5542 | if (root_objectid == BTRFS_CHUNK_TREE_OBJECTID) | ||
| 5543 | flags = BTRFS_BLOCK_GROUP_SYSTEM; | ||
| 5544 | else | ||
| 5545 | flags = BTRFS_BLOCK_GROUP_METADATA; | ||
| 5546 | } else { | ||
| 5547 | flags = BTRFS_BLOCK_GROUP_DATA; | ||
| 5548 | } | ||
| 5549 | |||
| 5550 | space_info = __find_space_info(fs_info, flags); | ||
| 5551 | BUG_ON(!space_info); /* Logic bug */ | ||
| 5552 | percpu_counter_add(&space_info->total_bytes_pinned, num_bytes); | ||
| 5553 | } | ||
| 5554 | |||
| 5555 | |||
| 5373 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | 5556 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, |
| 5374 | struct btrfs_root *root, | 5557 | struct btrfs_root *root, |
| 5375 | u64 bytenr, u64 num_bytes, u64 parent, | 5558 | u64 bytenr, u64 num_bytes, u64 parent, |
| @@ -5590,6 +5773,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
| 5590 | goto out; | 5773 | goto out; |
| 5591 | } | 5774 | } |
| 5592 | } | 5775 | } |
| 5776 | add_pinned_bytes(root->fs_info, -num_bytes, owner_objectid, | ||
| 5777 | root_objectid); | ||
| 5593 | } else { | 5778 | } else { |
| 5594 | if (found_extent) { | 5779 | if (found_extent) { |
| 5595 | BUG_ON(is_data && refs_to_drop != | 5780 | BUG_ON(is_data && refs_to_drop != |
| @@ -5713,6 +5898,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | |||
| 5713 | u64 parent, int last_ref) | 5898 | u64 parent, int last_ref) |
| 5714 | { | 5899 | { |
| 5715 | struct btrfs_block_group_cache *cache = NULL; | 5900 | struct btrfs_block_group_cache *cache = NULL; |
| 5901 | int pin = 1; | ||
| 5716 | int ret; | 5902 | int ret; |
| 5717 | 5903 | ||
| 5718 | if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { | 5904 | if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { |
| @@ -5745,8 +5931,14 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | |||
| 5745 | 5931 | ||
| 5746 | btrfs_add_free_space(cache, buf->start, buf->len); | 5932 | btrfs_add_free_space(cache, buf->start, buf->len); |
| 5747 | btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE); | 5933 | btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE); |
| 5934 | pin = 0; | ||
| 5748 | } | 5935 | } |
| 5749 | out: | 5936 | out: |
| 5937 | if (pin) | ||
| 5938 | add_pinned_bytes(root->fs_info, buf->len, | ||
| 5939 | btrfs_header_level(buf), | ||
| 5940 | root->root_key.objectid); | ||
| 5941 | |||
| 5750 | /* | 5942 | /* |
| 5751 | * Deleting the buffer, clear the corrupt flag since it doesn't matter | 5943 | * Deleting the buffer, clear the corrupt flag since it doesn't matter |
| 5752 | * anymore. | 5944 | * anymore. |
| @@ -5763,6 +5955,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
| 5763 | int ret; | 5955 | int ret; |
| 5764 | struct btrfs_fs_info *fs_info = root->fs_info; | 5956 | struct btrfs_fs_info *fs_info = root->fs_info; |
| 5765 | 5957 | ||
| 5958 | add_pinned_bytes(root->fs_info, num_bytes, owner, root_objectid); | ||
| 5959 | |||
| 5766 | /* | 5960 | /* |
| 5767 | * tree log blocks never actually go into the extent allocation | 5961 | * tree log blocks never actually go into the extent allocation |
| 5768 | * tree, just update pinning info and exit early. | 5962 | * tree, just update pinning info and exit early. |
| @@ -6560,52 +6754,26 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, | |||
| 6560 | { | 6754 | { |
| 6561 | int ret; | 6755 | int ret; |
| 6562 | struct btrfs_block_group_cache *block_group; | 6756 | struct btrfs_block_group_cache *block_group; |
| 6563 | struct btrfs_caching_control *caching_ctl; | ||
| 6564 | u64 start = ins->objectid; | ||
| 6565 | u64 num_bytes = ins->offset; | ||
| 6566 | |||
| 6567 | block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); | ||
| 6568 | cache_block_group(block_group, 0); | ||
| 6569 | caching_ctl = get_caching_control(block_group); | ||
| 6570 | |||
| 6571 | if (!caching_ctl) { | ||
| 6572 | BUG_ON(!block_group_cache_done(block_group)); | ||
| 6573 | ret = btrfs_remove_free_space(block_group, start, num_bytes); | ||
| 6574 | if (ret) | ||
| 6575 | goto out; | ||
| 6576 | } else { | ||
| 6577 | mutex_lock(&caching_ctl->mutex); | ||
| 6578 | 6757 | ||
| 6579 | if (start >= caching_ctl->progress) { | 6758 | /* |
| 6580 | ret = add_excluded_extent(root, start, num_bytes); | 6759 | * Mixed block groups will exclude before processing the log so we only |
| 6581 | } else if (start + num_bytes <= caching_ctl->progress) { | 6760 | * need to do the exlude dance if this fs isn't mixed. |
| 6582 | ret = btrfs_remove_free_space(block_group, | 6761 | */ |
| 6583 | start, num_bytes); | 6762 | if (!btrfs_fs_incompat(root->fs_info, MIXED_GROUPS)) { |
| 6584 | } else { | 6763 | ret = __exclude_logged_extent(root, ins->objectid, ins->offset); |
| 6585 | num_bytes = caching_ctl->progress - start; | ||
| 6586 | ret = btrfs_remove_free_space(block_group, | ||
| 6587 | start, num_bytes); | ||
| 6588 | if (ret) | ||
| 6589 | goto out_lock; | ||
| 6590 | |||
| 6591 | start = caching_ctl->progress; | ||
| 6592 | num_bytes = ins->objectid + ins->offset - | ||
| 6593 | caching_ctl->progress; | ||
| 6594 | ret = add_excluded_extent(root, start, num_bytes); | ||
| 6595 | } | ||
| 6596 | out_lock: | ||
| 6597 | mutex_unlock(&caching_ctl->mutex); | ||
| 6598 | put_caching_control(caching_ctl); | ||
| 6599 | if (ret) | 6764 | if (ret) |
| 6600 | goto out; | 6765 | return ret; |
| 6601 | } | 6766 | } |
| 6602 | 6767 | ||
| 6768 | block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); | ||
| 6769 | if (!block_group) | ||
| 6770 | return -EINVAL; | ||
| 6771 | |||
| 6603 | ret = btrfs_update_reserved_bytes(block_group, ins->offset, | 6772 | ret = btrfs_update_reserved_bytes(block_group, ins->offset, |
| 6604 | RESERVE_ALLOC_NO_ACCOUNT); | 6773 | RESERVE_ALLOC_NO_ACCOUNT); |
| 6605 | BUG_ON(ret); /* logic error */ | 6774 | BUG_ON(ret); /* logic error */ |
| 6606 | ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, | 6775 | ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, |
| 6607 | 0, owner, offset, ins, 1); | 6776 | 0, owner, offset, ins, 1); |
| 6608 | out: | ||
| 6609 | btrfs_put_block_group(block_group); | 6777 | btrfs_put_block_group(block_group); |
| 6610 | return ret; | 6778 | return ret; |
| 6611 | } | 6779 | } |
| @@ -7384,7 +7552,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, | |||
| 7384 | wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root); | 7552 | wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root); |
| 7385 | 7553 | ||
| 7386 | while (1) { | 7554 | while (1) { |
| 7387 | if (!for_reloc && btrfs_fs_closing(root->fs_info)) { | 7555 | if (!for_reloc && btrfs_need_cleaner_sleep(root)) { |
| 7388 | pr_debug("btrfs: drop snapshot early exit\n"); | 7556 | pr_debug("btrfs: drop snapshot early exit\n"); |
| 7389 | err = -EAGAIN; | 7557 | err = -EAGAIN; |
| 7390 | goto out_end_trans; | 7558 | goto out_end_trans; |
| @@ -7447,8 +7615,8 @@ int btrfs_drop_snapshot(struct btrfs_root *root, | |||
| 7447 | } | 7615 | } |
| 7448 | 7616 | ||
| 7449 | if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { | 7617 | if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { |
| 7450 | ret = btrfs_find_last_root(tree_root, root->root_key.objectid, | 7618 | ret = btrfs_find_root(tree_root, &root->root_key, path, |
| 7451 | NULL, NULL); | 7619 | NULL, NULL); |
| 7452 | if (ret < 0) { | 7620 | if (ret < 0) { |
| 7453 | btrfs_abort_transaction(trans, tree_root, ret); | 7621 | btrfs_abort_transaction(trans, tree_root, ret); |
| 7454 | err = ret; | 7622 | err = ret; |
| @@ -7465,11 +7633,11 @@ int btrfs_drop_snapshot(struct btrfs_root *root, | |||
| 7465 | } | 7633 | } |
| 7466 | 7634 | ||
| 7467 | if (root->in_radix) { | 7635 | if (root->in_radix) { |
| 7468 | btrfs_free_fs_root(tree_root->fs_info, root); | 7636 | btrfs_drop_and_free_fs_root(tree_root->fs_info, root); |
| 7469 | } else { | 7637 | } else { |
| 7470 | free_extent_buffer(root->node); | 7638 | free_extent_buffer(root->node); |
| 7471 | free_extent_buffer(root->commit_root); | 7639 | free_extent_buffer(root->commit_root); |
| 7472 | kfree(root); | 7640 | btrfs_put_fs_root(root); |
| 7473 | } | 7641 | } |
| 7474 | out_end_trans: | 7642 | out_end_trans: |
| 7475 | btrfs_end_transaction_throttle(trans, tree_root); | 7643 | btrfs_end_transaction_throttle(trans, tree_root); |
| @@ -7782,6 +7950,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
| 7782 | struct btrfs_space_info *space_info; | 7950 | struct btrfs_space_info *space_info; |
| 7783 | struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; | 7951 | struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; |
| 7784 | struct btrfs_device *device; | 7952 | struct btrfs_device *device; |
| 7953 | struct btrfs_trans_handle *trans; | ||
| 7785 | u64 min_free; | 7954 | u64 min_free; |
| 7786 | u64 dev_min = 1; | 7955 | u64 dev_min = 1; |
| 7787 | u64 dev_nr = 0; | 7956 | u64 dev_nr = 0; |
| @@ -7868,6 +8037,13 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
| 7868 | do_div(min_free, dev_min); | 8037 | do_div(min_free, dev_min); |
| 7869 | } | 8038 | } |
| 7870 | 8039 | ||
| 8040 | /* We need to do this so that we can look at pending chunks */ | ||
| 8041 | trans = btrfs_join_transaction(root); | ||
| 8042 | if (IS_ERR(trans)) { | ||
| 8043 | ret = PTR_ERR(trans); | ||
| 8044 | goto out; | ||
| 8045 | } | ||
| 8046 | |||
| 7871 | mutex_lock(&root->fs_info->chunk_mutex); | 8047 | mutex_lock(&root->fs_info->chunk_mutex); |
| 7872 | list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { | 8048 | list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { |
| 7873 | u64 dev_offset; | 8049 | u64 dev_offset; |
| @@ -7878,7 +8054,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
| 7878 | */ | 8054 | */ |
| 7879 | if (device->total_bytes > device->bytes_used + min_free && | 8055 | if (device->total_bytes > device->bytes_used + min_free && |
| 7880 | !device->is_tgtdev_for_dev_replace) { | 8056 | !device->is_tgtdev_for_dev_replace) { |
| 7881 | ret = find_free_dev_extent(device, min_free, | 8057 | ret = find_free_dev_extent(trans, device, min_free, |
| 7882 | &dev_offset, NULL); | 8058 | &dev_offset, NULL); |
| 7883 | if (!ret) | 8059 | if (!ret) |
| 7884 | dev_nr++; | 8060 | dev_nr++; |
| @@ -7890,6 +8066,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
| 7890 | } | 8066 | } |
| 7891 | } | 8067 | } |
| 7892 | mutex_unlock(&root->fs_info->chunk_mutex); | 8068 | mutex_unlock(&root->fs_info->chunk_mutex); |
| 8069 | btrfs_end_transaction(trans, root); | ||
| 7893 | out: | 8070 | out: |
| 7894 | btrfs_put_block_group(block_group); | 8071 | btrfs_put_block_group(block_group); |
| 7895 | return ret; | 8072 | return ret; |
| @@ -8032,6 +8209,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) | |||
| 8032 | dump_space_info(space_info, 0, 0); | 8209 | dump_space_info(space_info, 0, 0); |
| 8033 | } | 8210 | } |
| 8034 | } | 8211 | } |
| 8212 | percpu_counter_destroy(&space_info->total_bytes_pinned); | ||
| 8035 | list_del(&space_info->list); | 8213 | list_del(&space_info->list); |
| 8036 | kfree(space_info); | 8214 | kfree(space_info); |
| 8037 | } | 8215 | } |
| @@ -8254,6 +8432,10 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans, | |||
| 8254 | sizeof(item)); | 8432 | sizeof(item)); |
| 8255 | if (ret) | 8433 | if (ret) |
| 8256 | btrfs_abort_transaction(trans, extent_root, ret); | 8434 | btrfs_abort_transaction(trans, extent_root, ret); |
| 8435 | ret = btrfs_finish_chunk_alloc(trans, extent_root, | ||
| 8436 | key.objectid, key.offset); | ||
| 8437 | if (ret) | ||
| 8438 | btrfs_abort_transaction(trans, extent_root, ret); | ||
| 8257 | } | 8439 | } |
| 8258 | } | 8440 | } |
| 8259 | 8441 | ||
| @@ -8591,8 +8773,15 @@ int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range) | |||
| 8591 | if (end - start >= range->minlen) { | 8773 | if (end - start >= range->minlen) { |
| 8592 | if (!block_group_cache_done(cache)) { | 8774 | if (!block_group_cache_done(cache)) { |
| 8593 | ret = cache_block_group(cache, 0); | 8775 | ret = cache_block_group(cache, 0); |
| 8594 | if (!ret) | 8776 | if (ret) { |
| 8595 | wait_block_group_cache_done(cache); | 8777 | btrfs_put_block_group(cache); |
| 8778 | break; | ||
| 8779 | } | ||
| 8780 | ret = wait_block_group_cache_done(cache); | ||
| 8781 | if (ret) { | ||
| 8782 | btrfs_put_block_group(cache); | ||
| 8783 | break; | ||
| 8784 | } | ||
| 8596 | } | 8785 | } |
| 8597 | ret = btrfs_trim_block_group(cache, | 8786 | ret = btrfs_trim_block_group(cache, |
| 8598 | &group_trimmed, | 8787 | &group_trimmed, |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 6bca9472f313..583d98bd065e 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
| @@ -77,10 +77,29 @@ void btrfs_leak_debug_check(void) | |||
| 77 | kmem_cache_free(extent_buffer_cache, eb); | 77 | kmem_cache_free(extent_buffer_cache, eb); |
| 78 | } | 78 | } |
| 79 | } | 79 | } |
| 80 | |||
| 81 | #define btrfs_debug_check_extent_io_range(inode, start, end) \ | ||
| 82 | __btrfs_debug_check_extent_io_range(__func__, (inode), (start), (end)) | ||
| 83 | static inline void __btrfs_debug_check_extent_io_range(const char *caller, | ||
| 84 | struct inode *inode, u64 start, u64 end) | ||
| 85 | { | ||
| 86 | u64 isize = i_size_read(inode); | ||
| 87 | |||
| 88 | if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) { | ||
| 89 | printk_ratelimited(KERN_DEBUG | ||
| 90 | "btrfs: %s: ino %llu isize %llu odd range [%llu,%llu]\n", | ||
| 91 | caller, | ||
| 92 | (unsigned long long)btrfs_ino(inode), | ||
| 93 | (unsigned long long)isize, | ||
| 94 | (unsigned long long)start, | ||
| 95 | (unsigned long long)end); | ||
| 96 | } | ||
| 97 | } | ||
| 80 | #else | 98 | #else |
| 81 | #define btrfs_leak_debug_add(new, head) do {} while (0) | 99 | #define btrfs_leak_debug_add(new, head) do {} while (0) |
| 82 | #define btrfs_leak_debug_del(entry) do {} while (0) | 100 | #define btrfs_leak_debug_del(entry) do {} while (0) |
| 83 | #define btrfs_leak_debug_check() do {} while (0) | 101 | #define btrfs_leak_debug_check() do {} while (0) |
| 102 | #define btrfs_debug_check_extent_io_range(c, s, e) do {} while (0) | ||
| 84 | #endif | 103 | #endif |
| 85 | 104 | ||
| 86 | #define BUFFER_LRU_MAX 64 | 105 | #define BUFFER_LRU_MAX 64 |
| @@ -522,6 +541,11 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | |||
| 522 | int err; | 541 | int err; |
| 523 | int clear = 0; | 542 | int clear = 0; |
| 524 | 543 | ||
| 544 | btrfs_debug_check_extent_io_range(tree->mapping->host, start, end); | ||
| 545 | |||
| 546 | if (bits & EXTENT_DELALLOC) | ||
| 547 | bits |= EXTENT_NORESERVE; | ||
| 548 | |||
| 525 | if (delete) | 549 | if (delete) |
| 526 | bits |= ~EXTENT_CTLBITS; | 550 | bits |= ~EXTENT_CTLBITS; |
| 527 | bits |= EXTENT_FIRST_DELALLOC; | 551 | bits |= EXTENT_FIRST_DELALLOC; |
| @@ -677,6 +701,8 @@ static void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | |||
| 677 | struct extent_state *state; | 701 | struct extent_state *state; |
| 678 | struct rb_node *node; | 702 | struct rb_node *node; |
| 679 | 703 | ||
| 704 | btrfs_debug_check_extent_io_range(tree->mapping->host, start, end); | ||
| 705 | |||
| 680 | spin_lock(&tree->lock); | 706 | spin_lock(&tree->lock); |
| 681 | again: | 707 | again: |
| 682 | while (1) { | 708 | while (1) { |
| @@ -769,6 +795,8 @@ __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | |||
| 769 | u64 last_start; | 795 | u64 last_start; |
| 770 | u64 last_end; | 796 | u64 last_end; |
| 771 | 797 | ||
| 798 | btrfs_debug_check_extent_io_range(tree->mapping->host, start, end); | ||
| 799 | |||
| 772 | bits |= EXTENT_FIRST_DELALLOC; | 800 | bits |= EXTENT_FIRST_DELALLOC; |
| 773 | again: | 801 | again: |
| 774 | if (!prealloc && (mask & __GFP_WAIT)) { | 802 | if (!prealloc && (mask & __GFP_WAIT)) { |
| @@ -989,6 +1017,8 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | |||
| 989 | u64 last_start; | 1017 | u64 last_start; |
| 990 | u64 last_end; | 1018 | u64 last_end; |
| 991 | 1019 | ||
| 1020 | btrfs_debug_check_extent_io_range(tree->mapping->host, start, end); | ||
| 1021 | |||
| 992 | again: | 1022 | again: |
| 993 | if (!prealloc && (mask & __GFP_WAIT)) { | 1023 | if (!prealloc && (mask & __GFP_WAIT)) { |
| 994 | prealloc = alloc_extent_state(mask); | 1024 | prealloc = alloc_extent_state(mask); |
| @@ -2450,11 +2480,12 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
| 2450 | struct extent_state *cached = NULL; | 2480 | struct extent_state *cached = NULL; |
| 2451 | struct extent_state *state; | 2481 | struct extent_state *state; |
| 2452 | struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); | 2482 | struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); |
| 2483 | struct inode *inode = page->mapping->host; | ||
| 2453 | 2484 | ||
| 2454 | pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, " | 2485 | pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, " |
| 2455 | "mirror=%lu\n", (u64)bio->bi_sector, err, | 2486 | "mirror=%lu\n", (u64)bio->bi_sector, err, |
| 2456 | io_bio->mirror_num); | 2487 | io_bio->mirror_num); |
| 2457 | tree = &BTRFS_I(page->mapping->host)->io_tree; | 2488 | tree = &BTRFS_I(inode)->io_tree; |
| 2458 | 2489 | ||
| 2459 | /* We always issue full-page reads, but if some block | 2490 | /* We always issue full-page reads, but if some block |
| 2460 | * in a page fails to read, blk_update_request() will | 2491 | * in a page fails to read, blk_update_request() will |
| @@ -2528,6 +2559,14 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
| 2528 | unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC); | 2559 | unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC); |
| 2529 | 2560 | ||
| 2530 | if (uptodate) { | 2561 | if (uptodate) { |
| 2562 | loff_t i_size = i_size_read(inode); | ||
| 2563 | pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; | ||
| 2564 | unsigned offset; | ||
| 2565 | |||
| 2566 | /* Zero out the end if this page straddles i_size */ | ||
| 2567 | offset = i_size & (PAGE_CACHE_SIZE-1); | ||
| 2568 | if (page->index == end_index && offset) | ||
| 2569 | zero_user_segment(page, offset, PAGE_CACHE_SIZE); | ||
| 2531 | SetPageUptodate(page); | 2570 | SetPageUptodate(page); |
| 2532 | } else { | 2571 | } else { |
| 2533 | ClearPageUptodate(page); | 2572 | ClearPageUptodate(page); |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 41fb81e7ec53..3b8c4e26e1da 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
| @@ -19,6 +19,7 @@ | |||
| 19 | #define EXTENT_FIRST_DELALLOC (1 << 12) | 19 | #define EXTENT_FIRST_DELALLOC (1 << 12) |
| 20 | #define EXTENT_NEED_WAIT (1 << 13) | 20 | #define EXTENT_NEED_WAIT (1 << 13) |
| 21 | #define EXTENT_DAMAGED (1 << 14) | 21 | #define EXTENT_DAMAGED (1 << 14) |
| 22 | #define EXTENT_NORESERVE (1 << 15) | ||
| 22 | #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) | 23 | #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) |
| 23 | #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) | 24 | #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) |
| 24 | 25 | ||
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index b193bf324a41..a7bfc9541803 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c | |||
| @@ -34,8 +34,7 @@ | |||
| 34 | 34 | ||
| 35 | #define MAX_ORDERED_SUM_BYTES(r) ((PAGE_SIZE - \ | 35 | #define MAX_ORDERED_SUM_BYTES(r) ((PAGE_SIZE - \ |
| 36 | sizeof(struct btrfs_ordered_sum)) / \ | 36 | sizeof(struct btrfs_ordered_sum)) / \ |
| 37 | sizeof(struct btrfs_sector_sum) * \ | 37 | sizeof(u32) * (r)->sectorsize) |
| 38 | (r)->sectorsize - (r)->sectorsize) | ||
| 39 | 38 | ||
| 40 | int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, | 39 | int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, |
| 41 | struct btrfs_root *root, | 40 | struct btrfs_root *root, |
| @@ -297,7 +296,6 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, | |||
| 297 | struct btrfs_path *path; | 296 | struct btrfs_path *path; |
| 298 | struct extent_buffer *leaf; | 297 | struct extent_buffer *leaf; |
| 299 | struct btrfs_ordered_sum *sums; | 298 | struct btrfs_ordered_sum *sums; |
| 300 | struct btrfs_sector_sum *sector_sum; | ||
| 301 | struct btrfs_csum_item *item; | 299 | struct btrfs_csum_item *item; |
| 302 | LIST_HEAD(tmplist); | 300 | LIST_HEAD(tmplist); |
| 303 | unsigned long offset; | 301 | unsigned long offset; |
| @@ -368,34 +366,28 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, | |||
| 368 | struct btrfs_csum_item); | 366 | struct btrfs_csum_item); |
| 369 | while (start < csum_end) { | 367 | while (start < csum_end) { |
| 370 | size = min_t(size_t, csum_end - start, | 368 | size = min_t(size_t, csum_end - start, |
| 371 | MAX_ORDERED_SUM_BYTES(root)); | 369 | MAX_ORDERED_SUM_BYTES(root)); |
| 372 | sums = kzalloc(btrfs_ordered_sum_size(root, size), | 370 | sums = kzalloc(btrfs_ordered_sum_size(root, size), |
| 373 | GFP_NOFS); | 371 | GFP_NOFS); |
| 374 | if (!sums) { | 372 | if (!sums) { |
| 375 | ret = -ENOMEM; | 373 | ret = -ENOMEM; |
| 376 | goto fail; | 374 | goto fail; |
| 377 | } | 375 | } |
| 378 | 376 | ||
| 379 | sector_sum = sums->sums; | ||
| 380 | sums->bytenr = start; | 377 | sums->bytenr = start; |
| 381 | sums->len = size; | 378 | sums->len = (int)size; |
| 382 | 379 | ||
| 383 | offset = (start - key.offset) >> | 380 | offset = (start - key.offset) >> |
| 384 | root->fs_info->sb->s_blocksize_bits; | 381 | root->fs_info->sb->s_blocksize_bits; |
| 385 | offset *= csum_size; | 382 | offset *= csum_size; |
| 383 | size >>= root->fs_info->sb->s_blocksize_bits; | ||
| 386 | 384 | ||
| 387 | while (size > 0) { | 385 | read_extent_buffer(path->nodes[0], |
| 388 | read_extent_buffer(path->nodes[0], | 386 | sums->sums, |
| 389 | §or_sum->sum, | 387 | ((unsigned long)item) + offset, |
| 390 | ((unsigned long)item) + | 388 | csum_size * size); |
| 391 | offset, csum_size); | 389 | |
| 392 | sector_sum->bytenr = start; | 390 | start += root->sectorsize * size; |
| 393 | |||
| 394 | size -= root->sectorsize; | ||
| 395 | start += root->sectorsize; | ||
| 396 | offset += csum_size; | ||
| 397 | sector_sum++; | ||
| 398 | } | ||
| 399 | list_add_tail(&sums->list, &tmplist); | 391 | list_add_tail(&sums->list, &tmplist); |
| 400 | } | 392 | } |
| 401 | path->slots[0]++; | 393 | path->slots[0]++; |
| @@ -417,23 +409,20 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, | |||
| 417 | struct bio *bio, u64 file_start, int contig) | 409 | struct bio *bio, u64 file_start, int contig) |
| 418 | { | 410 | { |
| 419 | struct btrfs_ordered_sum *sums; | 411 | struct btrfs_ordered_sum *sums; |
| 420 | struct btrfs_sector_sum *sector_sum; | ||
| 421 | struct btrfs_ordered_extent *ordered; | 412 | struct btrfs_ordered_extent *ordered; |
| 422 | char *data; | 413 | char *data; |
| 423 | struct bio_vec *bvec = bio->bi_io_vec; | 414 | struct bio_vec *bvec = bio->bi_io_vec; |
| 424 | int bio_index = 0; | 415 | int bio_index = 0; |
| 416 | int index; | ||
| 425 | unsigned long total_bytes = 0; | 417 | unsigned long total_bytes = 0; |
| 426 | unsigned long this_sum_bytes = 0; | 418 | unsigned long this_sum_bytes = 0; |
| 427 | u64 offset; | 419 | u64 offset; |
| 428 | u64 disk_bytenr; | ||
| 429 | 420 | ||
| 430 | WARN_ON(bio->bi_vcnt <= 0); | 421 | WARN_ON(bio->bi_vcnt <= 0); |
| 431 | sums = kzalloc(btrfs_ordered_sum_size(root, bio->bi_size), GFP_NOFS); | 422 | sums = kzalloc(btrfs_ordered_sum_size(root, bio->bi_size), GFP_NOFS); |
| 432 | if (!sums) | 423 | if (!sums) |
| 433 | return -ENOMEM; | 424 | return -ENOMEM; |
| 434 | 425 | ||
| 435 | sector_sum = sums->sums; | ||
| 436 | disk_bytenr = (u64)bio->bi_sector << 9; | ||
| 437 | sums->len = bio->bi_size; | 426 | sums->len = bio->bi_size; |
| 438 | INIT_LIST_HEAD(&sums->list); | 427 | INIT_LIST_HEAD(&sums->list); |
| 439 | 428 | ||
| @@ -444,7 +433,8 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, | |||
| 444 | 433 | ||
| 445 | ordered = btrfs_lookup_ordered_extent(inode, offset); | 434 | ordered = btrfs_lookup_ordered_extent(inode, offset); |
| 446 | BUG_ON(!ordered); /* Logic error */ | 435 | BUG_ON(!ordered); /* Logic error */ |
| 447 | sums->bytenr = ordered->start; | 436 | sums->bytenr = (u64)bio->bi_sector << 9; |
| 437 | index = 0; | ||
| 448 | 438 | ||
| 449 | while (bio_index < bio->bi_vcnt) { | 439 | while (bio_index < bio->bi_vcnt) { |
| 450 | if (!contig) | 440 | if (!contig) |
| @@ -463,28 +453,27 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, | |||
| 463 | sums = kzalloc(btrfs_ordered_sum_size(root, bytes_left), | 453 | sums = kzalloc(btrfs_ordered_sum_size(root, bytes_left), |
| 464 | GFP_NOFS); | 454 | GFP_NOFS); |
| 465 | BUG_ON(!sums); /* -ENOMEM */ | 455 | BUG_ON(!sums); /* -ENOMEM */ |
| 466 | sector_sum = sums->sums; | ||
| 467 | sums->len = bytes_left; | 456 | sums->len = bytes_left; |
| 468 | ordered = btrfs_lookup_ordered_extent(inode, offset); | 457 | ordered = btrfs_lookup_ordered_extent(inode, offset); |
| 469 | BUG_ON(!ordered); /* Logic error */ | 458 | BUG_ON(!ordered); /* Logic error */ |
| 470 | sums->bytenr = ordered->start; | 459 | sums->bytenr = ((u64)bio->bi_sector << 9) + |
| 460 | total_bytes; | ||
| 461 | index = 0; | ||
| 471 | } | 462 | } |
| 472 | 463 | ||
| 473 | data = kmap_atomic(bvec->bv_page); | 464 | data = kmap_atomic(bvec->bv_page); |
| 474 | sector_sum->sum = ~(u32)0; | 465 | sums->sums[index] = ~(u32)0; |
| 475 | sector_sum->sum = btrfs_csum_data(data + bvec->bv_offset, | 466 | sums->sums[index] = btrfs_csum_data(data + bvec->bv_offset, |
| 476 | sector_sum->sum, | 467 | sums->sums[index], |
| 477 | bvec->bv_len); | 468 | bvec->bv_len); |
| 478 | kunmap_atomic(data); | 469 | kunmap_atomic(data); |
| 479 | btrfs_csum_final(sector_sum->sum, | 470 | btrfs_csum_final(sums->sums[index], |
| 480 | (char *)§or_sum->sum); | 471 | (char *)(sums->sums + index)); |
| 481 | sector_sum->bytenr = disk_bytenr; | ||
| 482 | 472 | ||
| 483 | sector_sum++; | ||
| 484 | bio_index++; | 473 | bio_index++; |
| 474 | index++; | ||
| 485 | total_bytes += bvec->bv_len; | 475 | total_bytes += bvec->bv_len; |
| 486 | this_sum_bytes += bvec->bv_len; | 476 | this_sum_bytes += bvec->bv_len; |
| 487 | disk_bytenr += bvec->bv_len; | ||
| 488 | offset += bvec->bv_len; | 477 | offset += bvec->bv_len; |
| 489 | bvec++; | 478 | bvec++; |
| 490 | } | 479 | } |
| @@ -672,62 +661,46 @@ out: | |||
| 672 | return ret; | 661 | return ret; |
| 673 | } | 662 | } |
| 674 | 663 | ||
| 675 | static u64 btrfs_sector_sum_left(struct btrfs_ordered_sum *sums, | ||
| 676 | struct btrfs_sector_sum *sector_sum, | ||
| 677 | u64 total_bytes, u64 sectorsize) | ||
| 678 | { | ||
| 679 | u64 tmp = sectorsize; | ||
| 680 | u64 next_sector = sector_sum->bytenr; | ||
| 681 | struct btrfs_sector_sum *next = sector_sum + 1; | ||
| 682 | |||
| 683 | while ((tmp + total_bytes) < sums->len) { | ||
| 684 | if (next_sector + sectorsize != next->bytenr) | ||
| 685 | break; | ||
| 686 | tmp += sectorsize; | ||
| 687 | next_sector = next->bytenr; | ||
| 688 | next++; | ||
| 689 | } | ||
| 690 | return tmp; | ||
| 691 | } | ||
| 692 | |||
| 693 | int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, | 664 | int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, |
| 694 | struct btrfs_root *root, | 665 | struct btrfs_root *root, |
| 695 | struct btrfs_ordered_sum *sums) | 666 | struct btrfs_ordered_sum *sums) |
| 696 | { | 667 | { |
| 697 | u64 bytenr; | ||
| 698 | int ret; | ||
| 699 | struct btrfs_key file_key; | 668 | struct btrfs_key file_key; |
| 700 | struct btrfs_key found_key; | 669 | struct btrfs_key found_key; |
| 701 | u64 next_offset; | ||
| 702 | u64 total_bytes = 0; | ||
| 703 | int found_next; | ||
| 704 | struct btrfs_path *path; | 670 | struct btrfs_path *path; |
| 705 | struct btrfs_csum_item *item; | 671 | struct btrfs_csum_item *item; |
| 706 | struct btrfs_csum_item *item_end; | 672 | struct btrfs_csum_item *item_end; |
| 707 | struct extent_buffer *leaf = NULL; | 673 | struct extent_buffer *leaf = NULL; |
| 674 | u64 next_offset; | ||
| 675 | u64 total_bytes = 0; | ||
| 708 | u64 csum_offset; | 676 | u64 csum_offset; |
| 709 | struct btrfs_sector_sum *sector_sum; | 677 | u64 bytenr; |
| 710 | u32 nritems; | 678 | u32 nritems; |
| 711 | u32 ins_size; | 679 | u32 ins_size; |
| 680 | int index = 0; | ||
| 681 | int found_next; | ||
| 682 | int ret; | ||
| 712 | u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); | 683 | u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); |
| 713 | 684 | ||
| 714 | path = btrfs_alloc_path(); | 685 | path = btrfs_alloc_path(); |
| 715 | if (!path) | 686 | if (!path) |
| 716 | return -ENOMEM; | 687 | return -ENOMEM; |
| 717 | |||
| 718 | sector_sum = sums->sums; | ||
| 719 | again: | 688 | again: |
| 720 | next_offset = (u64)-1; | 689 | next_offset = (u64)-1; |
| 721 | found_next = 0; | 690 | found_next = 0; |
| 691 | bytenr = sums->bytenr + total_bytes; | ||
| 722 | file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; | 692 | file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; |
| 723 | file_key.offset = sector_sum->bytenr; | 693 | file_key.offset = bytenr; |
| 724 | bytenr = sector_sum->bytenr; | ||
| 725 | btrfs_set_key_type(&file_key, BTRFS_EXTENT_CSUM_KEY); | 694 | btrfs_set_key_type(&file_key, BTRFS_EXTENT_CSUM_KEY); |
| 726 | 695 | ||
| 727 | item = btrfs_lookup_csum(trans, root, path, sector_sum->bytenr, 1); | 696 | item = btrfs_lookup_csum(trans, root, path, bytenr, 1); |
| 728 | if (!IS_ERR(item)) { | 697 | if (!IS_ERR(item)) { |
| 729 | leaf = path->nodes[0]; | ||
| 730 | ret = 0; | 698 | ret = 0; |
| 699 | leaf = path->nodes[0]; | ||
| 700 | item_end = btrfs_item_ptr(leaf, path->slots[0], | ||
| 701 | struct btrfs_csum_item); | ||
| 702 | item_end = (struct btrfs_csum_item *)((char *)item_end + | ||
| 703 | btrfs_item_size_nr(leaf, path->slots[0])); | ||
| 731 | goto found; | 704 | goto found; |
| 732 | } | 705 | } |
| 733 | ret = PTR_ERR(item); | 706 | ret = PTR_ERR(item); |
| @@ -807,8 +780,7 @@ again: | |||
| 807 | 780 | ||
| 808 | free_space = btrfs_leaf_free_space(root, leaf) - | 781 | free_space = btrfs_leaf_free_space(root, leaf) - |
| 809 | sizeof(struct btrfs_item) - csum_size; | 782 | sizeof(struct btrfs_item) - csum_size; |
| 810 | tmp = btrfs_sector_sum_left(sums, sector_sum, total_bytes, | 783 | tmp = sums->len - total_bytes; |
| 811 | root->sectorsize); | ||
| 812 | tmp >>= root->fs_info->sb->s_blocksize_bits; | 784 | tmp >>= root->fs_info->sb->s_blocksize_bits; |
| 813 | WARN_ON(tmp < 1); | 785 | WARN_ON(tmp < 1); |
| 814 | 786 | ||
| @@ -822,6 +794,7 @@ again: | |||
| 822 | diff *= csum_size; | 794 | diff *= csum_size; |
| 823 | 795 | ||
| 824 | btrfs_extend_item(root, path, diff); | 796 | btrfs_extend_item(root, path, diff); |
| 797 | ret = 0; | ||
| 825 | goto csum; | 798 | goto csum; |
| 826 | } | 799 | } |
| 827 | 800 | ||
| @@ -831,8 +804,7 @@ insert: | |||
| 831 | if (found_next) { | 804 | if (found_next) { |
| 832 | u64 tmp; | 805 | u64 tmp; |
| 833 | 806 | ||
| 834 | tmp = btrfs_sector_sum_left(sums, sector_sum, total_bytes, | 807 | tmp = sums->len - total_bytes; |
| 835 | root->sectorsize); | ||
| 836 | tmp >>= root->fs_info->sb->s_blocksize_bits; | 808 | tmp >>= root->fs_info->sb->s_blocksize_bits; |
| 837 | tmp = min(tmp, (next_offset - file_key.offset) >> | 809 | tmp = min(tmp, (next_offset - file_key.offset) >> |
| 838 | root->fs_info->sb->s_blocksize_bits); | 810 | root->fs_info->sb->s_blocksize_bits); |
| @@ -853,31 +825,25 @@ insert: | |||
| 853 | WARN_ON(1); | 825 | WARN_ON(1); |
| 854 | goto fail_unlock; | 826 | goto fail_unlock; |
| 855 | } | 827 | } |
| 856 | csum: | ||
| 857 | leaf = path->nodes[0]; | 828 | leaf = path->nodes[0]; |
| 829 | csum: | ||
| 858 | item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); | 830 | item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); |
| 859 | ret = 0; | 831 | item_end = (struct btrfs_csum_item *)((unsigned char *)item + |
| 832 | btrfs_item_size_nr(leaf, path->slots[0])); | ||
| 860 | item = (struct btrfs_csum_item *)((unsigned char *)item + | 833 | item = (struct btrfs_csum_item *)((unsigned char *)item + |
| 861 | csum_offset * csum_size); | 834 | csum_offset * csum_size); |
| 862 | found: | 835 | found: |
| 863 | item_end = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); | 836 | ins_size = (u32)(sums->len - total_bytes) >> |
| 864 | item_end = (struct btrfs_csum_item *)((unsigned char *)item_end + | 837 | root->fs_info->sb->s_blocksize_bits; |
| 865 | btrfs_item_size_nr(leaf, path->slots[0])); | 838 | ins_size *= csum_size; |
| 866 | next_sector: | 839 | ins_size = min_t(u32, (unsigned long)item_end - (unsigned long)item, |
| 867 | 840 | ins_size); | |
| 868 | write_extent_buffer(leaf, §or_sum->sum, (unsigned long)item, csum_size); | 841 | write_extent_buffer(leaf, sums->sums + index, (unsigned long)item, |
| 869 | 842 | ins_size); | |
| 870 | total_bytes += root->sectorsize; | 843 | |
| 871 | sector_sum++; | 844 | ins_size /= csum_size; |
| 872 | if (total_bytes < sums->len) { | 845 | total_bytes += ins_size * root->sectorsize; |
| 873 | item = (struct btrfs_csum_item *)((char *)item + | 846 | index += ins_size; |
| 874 | csum_size); | ||
| 875 | if (item < item_end && bytenr + PAGE_CACHE_SIZE == | ||
| 876 | sector_sum->bytenr) { | ||
| 877 | bytenr = sector_sum->bytenr; | ||
| 878 | goto next_sector; | ||
| 879 | } | ||
| 880 | } | ||
| 881 | 847 | ||
| 882 | btrfs_mark_buffer_dirty(path->nodes[0]); | 848 | btrfs_mark_buffer_dirty(path->nodes[0]); |
| 883 | if (total_bytes < sums->len) { | 849 | if (total_bytes < sums->len) { |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 4205ba752d40..a005fe2c072a 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
| @@ -309,10 +309,6 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info, | |||
| 309 | ret = PTR_ERR(inode_root); | 309 | ret = PTR_ERR(inode_root); |
| 310 | goto cleanup; | 310 | goto cleanup; |
| 311 | } | 311 | } |
| 312 | if (btrfs_root_refs(&inode_root->root_item) == 0) { | ||
| 313 | ret = -ENOENT; | ||
| 314 | goto cleanup; | ||
| 315 | } | ||
| 316 | 312 | ||
| 317 | key.objectid = defrag->ino; | 313 | key.objectid = defrag->ino; |
| 318 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); | 314 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); |
| @@ -1317,6 +1313,56 @@ fail: | |||
| 1317 | 1313 | ||
| 1318 | } | 1314 | } |
| 1319 | 1315 | ||
| 1316 | static noinline int check_can_nocow(struct inode *inode, loff_t pos, | ||
| 1317 | size_t *write_bytes) | ||
| 1318 | { | ||
| 1319 | struct btrfs_trans_handle *trans; | ||
| 1320 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 1321 | struct btrfs_ordered_extent *ordered; | ||
| 1322 | u64 lockstart, lockend; | ||
| 1323 | u64 num_bytes; | ||
| 1324 | int ret; | ||
| 1325 | |||
| 1326 | lockstart = round_down(pos, root->sectorsize); | ||
| 1327 | lockend = lockstart + round_up(*write_bytes, root->sectorsize) - 1; | ||
| 1328 | |||
| 1329 | while (1) { | ||
| 1330 | lock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend); | ||
| 1331 | ordered = btrfs_lookup_ordered_range(inode, lockstart, | ||
| 1332 | lockend - lockstart + 1); | ||
| 1333 | if (!ordered) { | ||
| 1334 | break; | ||
| 1335 | } | ||
| 1336 | unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend); | ||
| 1337 | btrfs_start_ordered_extent(inode, ordered, 1); | ||
| 1338 | btrfs_put_ordered_extent(ordered); | ||
| 1339 | } | ||
| 1340 | |||
| 1341 | trans = btrfs_join_transaction(root); | ||
| 1342 | if (IS_ERR(trans)) { | ||
| 1343 | unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend); | ||
| 1344 | return PTR_ERR(trans); | ||
| 1345 | } | ||
| 1346 | |||
| 1347 | num_bytes = lockend - lockstart + 1; | ||
| 1348 | ret = can_nocow_extent(trans, inode, lockstart, &num_bytes, NULL, NULL, | ||
| 1349 | NULL); | ||
| 1350 | btrfs_end_transaction(trans, root); | ||
| 1351 | if (ret <= 0) { | ||
| 1352 | ret = 0; | ||
| 1353 | } else { | ||
| 1354 | clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, | ||
| 1355 | EXTENT_DIRTY | EXTENT_DELALLOC | | ||
| 1356 | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 0, 0, | ||
| 1357 | NULL, GFP_NOFS); | ||
| 1358 | *write_bytes = min_t(size_t, *write_bytes, num_bytes); | ||
| 1359 | } | ||
| 1360 | |||
| 1361 | unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend); | ||
| 1362 | |||
| 1363 | return ret; | ||
| 1364 | } | ||
| 1365 | |||
| 1320 | static noinline ssize_t __btrfs_buffered_write(struct file *file, | 1366 | static noinline ssize_t __btrfs_buffered_write(struct file *file, |
| 1321 | struct iov_iter *i, | 1367 | struct iov_iter *i, |
| 1322 | loff_t pos) | 1368 | loff_t pos) |
| @@ -1324,10 +1370,12 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
| 1324 | struct inode *inode = file_inode(file); | 1370 | struct inode *inode = file_inode(file); |
| 1325 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1371 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 1326 | struct page **pages = NULL; | 1372 | struct page **pages = NULL; |
| 1373 | u64 release_bytes = 0; | ||
| 1327 | unsigned long first_index; | 1374 | unsigned long first_index; |
| 1328 | size_t num_written = 0; | 1375 | size_t num_written = 0; |
| 1329 | int nrptrs; | 1376 | int nrptrs; |
| 1330 | int ret = 0; | 1377 | int ret = 0; |
| 1378 | bool only_release_metadata = false; | ||
| 1331 | bool force_page_uptodate = false; | 1379 | bool force_page_uptodate = false; |
| 1332 | 1380 | ||
| 1333 | nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) / | 1381 | nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) / |
| @@ -1348,6 +1396,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
| 1348 | offset); | 1396 | offset); |
| 1349 | size_t num_pages = (write_bytes + offset + | 1397 | size_t num_pages = (write_bytes + offset + |
| 1350 | PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 1398 | PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
| 1399 | size_t reserve_bytes; | ||
| 1351 | size_t dirty_pages; | 1400 | size_t dirty_pages; |
| 1352 | size_t copied; | 1401 | size_t copied; |
| 1353 | 1402 | ||
| @@ -1362,11 +1411,41 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
| 1362 | break; | 1411 | break; |
| 1363 | } | 1412 | } |
| 1364 | 1413 | ||
| 1365 | ret = btrfs_delalloc_reserve_space(inode, | 1414 | reserve_bytes = num_pages << PAGE_CACHE_SHIFT; |
| 1366 | num_pages << PAGE_CACHE_SHIFT); | 1415 | ret = btrfs_check_data_free_space(inode, reserve_bytes); |
| 1416 | if (ret == -ENOSPC && | ||
| 1417 | (BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | | ||
| 1418 | BTRFS_INODE_PREALLOC))) { | ||
| 1419 | ret = check_can_nocow(inode, pos, &write_bytes); | ||
| 1420 | if (ret > 0) { | ||
| 1421 | only_release_metadata = true; | ||
| 1422 | /* | ||
| 1423 | * our prealloc extent may be smaller than | ||
| 1424 | * write_bytes, so scale down. | ||
| 1425 | */ | ||
| 1426 | num_pages = (write_bytes + offset + | ||
| 1427 | PAGE_CACHE_SIZE - 1) >> | ||
| 1428 | PAGE_CACHE_SHIFT; | ||
| 1429 | reserve_bytes = num_pages << PAGE_CACHE_SHIFT; | ||
| 1430 | ret = 0; | ||
| 1431 | } else { | ||
| 1432 | ret = -ENOSPC; | ||
| 1433 | } | ||
| 1434 | } | ||
| 1435 | |||
| 1367 | if (ret) | 1436 | if (ret) |
| 1368 | break; | 1437 | break; |
| 1369 | 1438 | ||
| 1439 | ret = btrfs_delalloc_reserve_metadata(inode, reserve_bytes); | ||
| 1440 | if (ret) { | ||
| 1441 | if (!only_release_metadata) | ||
| 1442 | btrfs_free_reserved_data_space(inode, | ||
| 1443 | reserve_bytes); | ||
| 1444 | break; | ||
| 1445 | } | ||
| 1446 | |||
| 1447 | release_bytes = reserve_bytes; | ||
| 1448 | |||
| 1370 | /* | 1449 | /* |
| 1371 | * This is going to setup the pages array with the number of | 1450 | * This is going to setup the pages array with the number of |
| 1372 | * pages we want, so we don't really need to worry about the | 1451 | * pages we want, so we don't really need to worry about the |
| @@ -1375,11 +1454,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
| 1375 | ret = prepare_pages(root, file, pages, num_pages, | 1454 | ret = prepare_pages(root, file, pages, num_pages, |
| 1376 | pos, first_index, write_bytes, | 1455 | pos, first_index, write_bytes, |
| 1377 | force_page_uptodate); | 1456 | force_page_uptodate); |
| 1378 | if (ret) { | 1457 | if (ret) |
| 1379 | btrfs_delalloc_release_space(inode, | ||
| 1380 | num_pages << PAGE_CACHE_SHIFT); | ||
| 1381 | break; | 1458 | break; |
| 1382 | } | ||
| 1383 | 1459 | ||
| 1384 | copied = btrfs_copy_from_user(pos, num_pages, | 1460 | copied = btrfs_copy_from_user(pos, num_pages, |
| 1385 | write_bytes, pages, i); | 1461 | write_bytes, pages, i); |
| @@ -1409,30 +1485,46 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
| 1409 | * managed to copy. | 1485 | * managed to copy. |
| 1410 | */ | 1486 | */ |
| 1411 | if (num_pages > dirty_pages) { | 1487 | if (num_pages > dirty_pages) { |
| 1488 | release_bytes = (num_pages - dirty_pages) << | ||
| 1489 | PAGE_CACHE_SHIFT; | ||
| 1412 | if (copied > 0) { | 1490 | if (copied > 0) { |
| 1413 | spin_lock(&BTRFS_I(inode)->lock); | 1491 | spin_lock(&BTRFS_I(inode)->lock); |
| 1414 | BTRFS_I(inode)->outstanding_extents++; | 1492 | BTRFS_I(inode)->outstanding_extents++; |
| 1415 | spin_unlock(&BTRFS_I(inode)->lock); | 1493 | spin_unlock(&BTRFS_I(inode)->lock); |
| 1416 | } | 1494 | } |
| 1417 | btrfs_delalloc_release_space(inode, | 1495 | if (only_release_metadata) |
| 1418 | (num_pages - dirty_pages) << | 1496 | btrfs_delalloc_release_metadata(inode, |
| 1419 | PAGE_CACHE_SHIFT); | 1497 | release_bytes); |
| 1498 | else | ||
| 1499 | btrfs_delalloc_release_space(inode, | ||
| 1500 | release_bytes); | ||
| 1420 | } | 1501 | } |
| 1421 | 1502 | ||
| 1503 | release_bytes = dirty_pages << PAGE_CACHE_SHIFT; | ||
| 1422 | if (copied > 0) { | 1504 | if (copied > 0) { |
| 1423 | ret = btrfs_dirty_pages(root, inode, pages, | 1505 | ret = btrfs_dirty_pages(root, inode, pages, |
| 1424 | dirty_pages, pos, copied, | 1506 | dirty_pages, pos, copied, |
| 1425 | NULL); | 1507 | NULL); |
| 1426 | if (ret) { | 1508 | if (ret) { |
| 1427 | btrfs_delalloc_release_space(inode, | ||
| 1428 | dirty_pages << PAGE_CACHE_SHIFT); | ||
| 1429 | btrfs_drop_pages(pages, num_pages); | 1509 | btrfs_drop_pages(pages, num_pages); |
| 1430 | break; | 1510 | break; |
| 1431 | } | 1511 | } |
| 1432 | } | 1512 | } |
| 1433 | 1513 | ||
| 1514 | release_bytes = 0; | ||
| 1434 | btrfs_drop_pages(pages, num_pages); | 1515 | btrfs_drop_pages(pages, num_pages); |
| 1435 | 1516 | ||
| 1517 | if (only_release_metadata && copied > 0) { | ||
| 1518 | u64 lockstart = round_down(pos, root->sectorsize); | ||
| 1519 | u64 lockend = lockstart + | ||
| 1520 | (dirty_pages << PAGE_CACHE_SHIFT) - 1; | ||
| 1521 | |||
| 1522 | set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, | ||
| 1523 | lockend, EXTENT_NORESERVE, NULL, | ||
| 1524 | NULL, GFP_NOFS); | ||
| 1525 | only_release_metadata = false; | ||
| 1526 | } | ||
| 1527 | |||
| 1436 | cond_resched(); | 1528 | cond_resched(); |
| 1437 | 1529 | ||
| 1438 | balance_dirty_pages_ratelimited(inode->i_mapping); | 1530 | balance_dirty_pages_ratelimited(inode->i_mapping); |
| @@ -1445,6 +1537,13 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
| 1445 | 1537 | ||
| 1446 | kfree(pages); | 1538 | kfree(pages); |
| 1447 | 1539 | ||
| 1540 | if (release_bytes) { | ||
| 1541 | if (only_release_metadata) | ||
| 1542 | btrfs_delalloc_release_metadata(inode, release_bytes); | ||
| 1543 | else | ||
| 1544 | btrfs_delalloc_release_space(inode, release_bytes); | ||
| 1545 | } | ||
| 1546 | |||
| 1448 | return num_written ? num_written : ret; | 1547 | return num_written ? num_written : ret; |
| 1449 | } | 1548 | } |
| 1450 | 1549 | ||
| @@ -2175,12 +2274,6 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
| 2175 | goto out_reserve_fail; | 2274 | goto out_reserve_fail; |
| 2176 | } | 2275 | } |
| 2177 | 2276 | ||
| 2178 | /* | ||
| 2179 | * wait for ordered IO before we have any locks. We'll loop again | ||
| 2180 | * below with the locks held. | ||
| 2181 | */ | ||
| 2182 | btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start); | ||
| 2183 | |||
| 2184 | mutex_lock(&inode->i_mutex); | 2277 | mutex_lock(&inode->i_mutex); |
| 2185 | ret = inode_newsize_ok(inode, alloc_end); | 2278 | ret = inode_newsize_ok(inode, alloc_end); |
| 2186 | if (ret) | 2279 | if (ret) |
| @@ -2191,8 +2284,23 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
| 2191 | alloc_start); | 2284 | alloc_start); |
| 2192 | if (ret) | 2285 | if (ret) |
| 2193 | goto out; | 2286 | goto out; |
| 2287 | } else { | ||
| 2288 | /* | ||
| 2289 | * If we are fallocating from the end of the file onward we | ||
| 2290 | * need to zero out the end of the page if i_size lands in the | ||
| 2291 | * middle of a page. | ||
| 2292 | */ | ||
| 2293 | ret = btrfs_truncate_page(inode, inode->i_size, 0, 0); | ||
| 2294 | if (ret) | ||
| 2295 | goto out; | ||
| 2194 | } | 2296 | } |
| 2195 | 2297 | ||
| 2298 | /* | ||
| 2299 | * wait for ordered IO before we have any locks. We'll loop again | ||
| 2300 | * below with the locks held. | ||
| 2301 | */ | ||
| 2302 | btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start); | ||
| 2303 | |||
| 2196 | locked_end = alloc_end - 1; | 2304 | locked_end = alloc_end - 1; |
| 2197 | while (1) { | 2305 | while (1) { |
| 2198 | struct btrfs_ordered_extent *ordered; | 2306 | struct btrfs_ordered_extent *ordered; |
| @@ -2425,20 +2533,7 @@ static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence) | |||
| 2425 | } | 2533 | } |
| 2426 | } | 2534 | } |
| 2427 | 2535 | ||
| 2428 | if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET)) { | 2536 | offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes); |
| 2429 | offset = -EINVAL; | ||
| 2430 | goto out; | ||
| 2431 | } | ||
| 2432 | if (offset > inode->i_sb->s_maxbytes) { | ||
| 2433 | offset = -EINVAL; | ||
| 2434 | goto out; | ||
| 2435 | } | ||
| 2436 | |||
| 2437 | /* Special lock needed here? */ | ||
| 2438 | if (offset != file->f_pos) { | ||
| 2439 | file->f_pos = offset; | ||
| 2440 | file->f_version = 0; | ||
| 2441 | } | ||
| 2442 | out: | 2537 | out: |
| 2443 | mutex_unlock(&inode->i_mutex); | 2538 | mutex_unlock(&inode->i_mutex); |
| 2444 | return offset; | 2539 | return offset; |
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index e53009657f0e..b21a3cd667d8 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
| @@ -213,7 +213,7 @@ int btrfs_check_trunc_cache_free_space(struct btrfs_root *root, | |||
| 213 | else | 213 | else |
| 214 | ret = 0; | 214 | ret = 0; |
| 215 | spin_unlock(&rsv->lock); | 215 | spin_unlock(&rsv->lock); |
| 216 | return 0; | 216 | return ret; |
| 217 | } | 217 | } |
| 218 | 218 | ||
| 219 | int btrfs_truncate_free_space_cache(struct btrfs_root *root, | 219 | int btrfs_truncate_free_space_cache(struct btrfs_root *root, |
| @@ -3150,6 +3150,8 @@ again: | |||
| 3150 | return 0; | 3150 | return 0; |
| 3151 | } | 3151 | } |
| 3152 | 3152 | ||
| 3153 | #define test_msg(fmt, ...) printk(KERN_INFO "btrfs: selftest: " fmt, ##__VA_ARGS__) | ||
| 3154 | |||
| 3153 | /* | 3155 | /* |
| 3154 | * This test just does basic sanity checking, making sure we can add an exten | 3156 | * This test just does basic sanity checking, making sure we can add an exten |
| 3155 | * entry and remove space from either end and the middle, and make sure we can | 3157 | * entry and remove space from either end and the middle, and make sure we can |
| @@ -3159,63 +3161,63 @@ static int test_extents(struct btrfs_block_group_cache *cache) | |||
| 3159 | { | 3161 | { |
| 3160 | int ret = 0; | 3162 | int ret = 0; |
| 3161 | 3163 | ||
| 3162 | printk(KERN_ERR "Running extent only tests\n"); | 3164 | test_msg("Running extent only tests\n"); |
| 3163 | 3165 | ||
| 3164 | /* First just make sure we can remove an entire entry */ | 3166 | /* First just make sure we can remove an entire entry */ |
| 3165 | ret = btrfs_add_free_space(cache, 0, 4 * 1024 * 1024); | 3167 | ret = btrfs_add_free_space(cache, 0, 4 * 1024 * 1024); |
| 3166 | if (ret) { | 3168 | if (ret) { |
| 3167 | printk(KERN_ERR "Error adding initial extents %d\n", ret); | 3169 | test_msg("Error adding initial extents %d\n", ret); |
| 3168 | return ret; | 3170 | return ret; |
| 3169 | } | 3171 | } |
| 3170 | 3172 | ||
| 3171 | ret = btrfs_remove_free_space(cache, 0, 4 * 1024 * 1024); | 3173 | ret = btrfs_remove_free_space(cache, 0, 4 * 1024 * 1024); |
| 3172 | if (ret) { | 3174 | if (ret) { |
| 3173 | printk(KERN_ERR "Error removing extent %d\n", ret); | 3175 | test_msg("Error removing extent %d\n", ret); |
| 3174 | return ret; | 3176 | return ret; |
| 3175 | } | 3177 | } |
| 3176 | 3178 | ||
| 3177 | if (check_exists(cache, 0, 4 * 1024 * 1024)) { | 3179 | if (check_exists(cache, 0, 4 * 1024 * 1024)) { |
| 3178 | printk(KERN_ERR "Full remove left some lingering space\n"); | 3180 | test_msg("Full remove left some lingering space\n"); |
| 3179 | return -1; | 3181 | return -1; |
| 3180 | } | 3182 | } |
| 3181 | 3183 | ||
| 3182 | /* Ok edge and middle cases now */ | 3184 | /* Ok edge and middle cases now */ |
| 3183 | ret = btrfs_add_free_space(cache, 0, 4 * 1024 * 1024); | 3185 | ret = btrfs_add_free_space(cache, 0, 4 * 1024 * 1024); |
| 3184 | if (ret) { | 3186 | if (ret) { |
| 3185 | printk(KERN_ERR "Error adding half extent %d\n", ret); | 3187 | test_msg("Error adding half extent %d\n", ret); |
| 3186 | return ret; | 3188 | return ret; |
| 3187 | } | 3189 | } |
| 3188 | 3190 | ||
| 3189 | ret = btrfs_remove_free_space(cache, 3 * 1024 * 1024, 1 * 1024 * 1024); | 3191 | ret = btrfs_remove_free_space(cache, 3 * 1024 * 1024, 1 * 1024 * 1024); |
| 3190 | if (ret) { | 3192 | if (ret) { |
| 3191 | printk(KERN_ERR "Error removing tail end %d\n", ret); | 3193 | test_msg("Error removing tail end %d\n", ret); |
| 3192 | return ret; | 3194 | return ret; |
| 3193 | } | 3195 | } |
| 3194 | 3196 | ||
| 3195 | ret = btrfs_remove_free_space(cache, 0, 1 * 1024 * 1024); | 3197 | ret = btrfs_remove_free_space(cache, 0, 1 * 1024 * 1024); |
| 3196 | if (ret) { | 3198 | if (ret) { |
| 3197 | printk(KERN_ERR "Error removing front end %d\n", ret); | 3199 | test_msg("Error removing front end %d\n", ret); |
| 3198 | return ret; | 3200 | return ret; |
| 3199 | } | 3201 | } |
| 3200 | 3202 | ||
| 3201 | ret = btrfs_remove_free_space(cache, 2 * 1024 * 1024, 4096); | 3203 | ret = btrfs_remove_free_space(cache, 2 * 1024 * 1024, 4096); |
| 3202 | if (ret) { | 3204 | if (ret) { |
| 3203 | printk(KERN_ERR "Error removing middle peice %d\n", ret); | 3205 | test_msg("Error removing middle piece %d\n", ret); |
| 3204 | return ret; | 3206 | return ret; |
| 3205 | } | 3207 | } |
| 3206 | 3208 | ||
| 3207 | if (check_exists(cache, 0, 1 * 1024 * 1024)) { | 3209 | if (check_exists(cache, 0, 1 * 1024 * 1024)) { |
| 3208 | printk(KERN_ERR "Still have space at the front\n"); | 3210 | test_msg("Still have space at the front\n"); |
| 3209 | return -1; | 3211 | return -1; |
| 3210 | } | 3212 | } |
| 3211 | 3213 | ||
| 3212 | if (check_exists(cache, 2 * 1024 * 1024, 4096)) { | 3214 | if (check_exists(cache, 2 * 1024 * 1024, 4096)) { |
| 3213 | printk(KERN_ERR "Still have space in the middle\n"); | 3215 | test_msg("Still have space in the middle\n"); |
| 3214 | return -1; | 3216 | return -1; |
| 3215 | } | 3217 | } |
| 3216 | 3218 | ||
| 3217 | if (check_exists(cache, 3 * 1024 * 1024, 1 * 1024 * 1024)) { | 3219 | if (check_exists(cache, 3 * 1024 * 1024, 1 * 1024 * 1024)) { |
| 3218 | printk(KERN_ERR "Still have space at the end\n"); | 3220 | test_msg("Still have space at the end\n"); |
| 3219 | return -1; | 3221 | return -1; |
| 3220 | } | 3222 | } |
| 3221 | 3223 | ||
| @@ -3230,34 +3232,34 @@ static int test_bitmaps(struct btrfs_block_group_cache *cache) | |||
| 3230 | u64 next_bitmap_offset; | 3232 | u64 next_bitmap_offset; |
| 3231 | int ret; | 3233 | int ret; |
| 3232 | 3234 | ||
| 3233 | printk(KERN_ERR "Running bitmap only tests\n"); | 3235 | test_msg("Running bitmap only tests\n"); |
| 3234 | 3236 | ||
| 3235 | ret = add_free_space_entry(cache, 0, 4 * 1024 * 1024, 1); | 3237 | ret = add_free_space_entry(cache, 0, 4 * 1024 * 1024, 1); |
| 3236 | if (ret) { | 3238 | if (ret) { |
| 3237 | printk(KERN_ERR "Couldn't create a bitmap entry %d\n", ret); | 3239 | test_msg("Couldn't create a bitmap entry %d\n", ret); |
| 3238 | return ret; | 3240 | return ret; |
| 3239 | } | 3241 | } |
| 3240 | 3242 | ||
| 3241 | ret = btrfs_remove_free_space(cache, 0, 4 * 1024 * 1024); | 3243 | ret = btrfs_remove_free_space(cache, 0, 4 * 1024 * 1024); |
| 3242 | if (ret) { | 3244 | if (ret) { |
| 3243 | printk(KERN_ERR "Error removing bitmap full range %d\n", ret); | 3245 | test_msg("Error removing bitmap full range %d\n", ret); |
| 3244 | return ret; | 3246 | return ret; |
| 3245 | } | 3247 | } |
| 3246 | 3248 | ||
| 3247 | if (check_exists(cache, 0, 4 * 1024 * 1024)) { | 3249 | if (check_exists(cache, 0, 4 * 1024 * 1024)) { |
| 3248 | printk(KERN_ERR "Left some space in bitmap\n"); | 3250 | test_msg("Left some space in bitmap\n"); |
| 3249 | return -1; | 3251 | return -1; |
| 3250 | } | 3252 | } |
| 3251 | 3253 | ||
| 3252 | ret = add_free_space_entry(cache, 0, 4 * 1024 * 1024, 1); | 3254 | ret = add_free_space_entry(cache, 0, 4 * 1024 * 1024, 1); |
| 3253 | if (ret) { | 3255 | if (ret) { |
| 3254 | printk(KERN_ERR "Couldn't add to our bitmap entry %d\n", ret); | 3256 | test_msg("Couldn't add to our bitmap entry %d\n", ret); |
| 3255 | return ret; | 3257 | return ret; |
| 3256 | } | 3258 | } |
| 3257 | 3259 | ||
| 3258 | ret = btrfs_remove_free_space(cache, 1 * 1024 * 1024, 2 * 1024 * 1024); | 3260 | ret = btrfs_remove_free_space(cache, 1 * 1024 * 1024, 2 * 1024 * 1024); |
| 3259 | if (ret) { | 3261 | if (ret) { |
| 3260 | printk(KERN_ERR "Couldn't remove middle chunk %d\n", ret); | 3262 | test_msg("Couldn't remove middle chunk %d\n", ret); |
| 3261 | return ret; | 3263 | return ret; |
| 3262 | } | 3264 | } |
| 3263 | 3265 | ||
| @@ -3271,21 +3273,21 @@ static int test_bitmaps(struct btrfs_block_group_cache *cache) | |||
| 3271 | ret = add_free_space_entry(cache, next_bitmap_offset - | 3273 | ret = add_free_space_entry(cache, next_bitmap_offset - |
| 3272 | (2 * 1024 * 1024), 4 * 1024 * 1024, 1); | 3274 | (2 * 1024 * 1024), 4 * 1024 * 1024, 1); |
| 3273 | if (ret) { | 3275 | if (ret) { |
| 3274 | printk(KERN_ERR "Couldn't add space that straddles two bitmaps" | 3276 | test_msg("Couldn't add space that straddles two bitmaps %d\n", |
| 3275 | " %d\n", ret); | 3277 | ret); |
| 3276 | return ret; | 3278 | return ret; |
| 3277 | } | 3279 | } |
| 3278 | 3280 | ||
| 3279 | ret = btrfs_remove_free_space(cache, next_bitmap_offset - | 3281 | ret = btrfs_remove_free_space(cache, next_bitmap_offset - |
| 3280 | (1 * 1024 * 1024), 2 * 1024 * 1024); | 3282 | (1 * 1024 * 1024), 2 * 1024 * 1024); |
| 3281 | if (ret) { | 3283 | if (ret) { |
| 3282 | printk(KERN_ERR "Couldn't remove overlapping space %d\n", ret); | 3284 | test_msg("Couldn't remove overlapping space %d\n", ret); |
| 3283 | return ret; | 3285 | return ret; |
| 3284 | } | 3286 | } |
| 3285 | 3287 | ||
| 3286 | if (check_exists(cache, next_bitmap_offset - (1 * 1024 * 1024), | 3288 | if (check_exists(cache, next_bitmap_offset - (1 * 1024 * 1024), |
| 3287 | 2 * 1024 * 1024)) { | 3289 | 2 * 1024 * 1024)) { |
| 3288 | printk(KERN_ERR "Left some space when removing overlapping\n"); | 3290 | test_msg("Left some space when removing overlapping\n"); |
| 3289 | return -1; | 3291 | return -1; |
| 3290 | } | 3292 | } |
| 3291 | 3293 | ||
| @@ -3300,7 +3302,7 @@ static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache) | |||
| 3300 | u64 bitmap_offset = (u64)(BITS_PER_BITMAP * 4096); | 3302 | u64 bitmap_offset = (u64)(BITS_PER_BITMAP * 4096); |
| 3301 | int ret; | 3303 | int ret; |
| 3302 | 3304 | ||
| 3303 | printk(KERN_ERR "Running bitmap and extent tests\n"); | 3305 | test_msg("Running bitmap and extent tests\n"); |
| 3304 | 3306 | ||
| 3305 | /* | 3307 | /* |
| 3306 | * First let's do something simple, an extent at the same offset as the | 3308 | * First let's do something simple, an extent at the same offset as the |
| @@ -3309,42 +3311,42 @@ static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache) | |||
| 3309 | */ | 3311 | */ |
| 3310 | ret = add_free_space_entry(cache, 4 * 1024 * 1024, 1 * 1024 * 1024, 1); | 3312 | ret = add_free_space_entry(cache, 4 * 1024 * 1024, 1 * 1024 * 1024, 1); |
| 3311 | if (ret) { | 3313 | if (ret) { |
| 3312 | printk(KERN_ERR "Couldn't create bitmap entry %d\n", ret); | 3314 | test_msg("Couldn't create bitmap entry %d\n", ret); |
| 3313 | return ret; | 3315 | return ret; |
| 3314 | } | 3316 | } |
| 3315 | 3317 | ||
| 3316 | ret = add_free_space_entry(cache, 0, 1 * 1024 * 1024, 0); | 3318 | ret = add_free_space_entry(cache, 0, 1 * 1024 * 1024, 0); |
| 3317 | if (ret) { | 3319 | if (ret) { |
| 3318 | printk(KERN_ERR "Couldn't add extent entry %d\n", ret); | 3320 | test_msg("Couldn't add extent entry %d\n", ret); |
| 3319 | return ret; | 3321 | return ret; |
| 3320 | } | 3322 | } |
| 3321 | 3323 | ||
| 3322 | ret = btrfs_remove_free_space(cache, 0, 1 * 1024 * 1024); | 3324 | ret = btrfs_remove_free_space(cache, 0, 1 * 1024 * 1024); |
| 3323 | if (ret) { | 3325 | if (ret) { |
| 3324 | printk(KERN_ERR "Couldn't remove extent entry %d\n", ret); | 3326 | test_msg("Couldn't remove extent entry %d\n", ret); |
| 3325 | return ret; | 3327 | return ret; |
| 3326 | } | 3328 | } |
| 3327 | 3329 | ||
| 3328 | if (check_exists(cache, 0, 1 * 1024 * 1024)) { | 3330 | if (check_exists(cache, 0, 1 * 1024 * 1024)) { |
| 3329 | printk(KERN_ERR "Left remnants after our remove\n"); | 3331 | test_msg("Left remnants after our remove\n"); |
| 3330 | return -1; | 3332 | return -1; |
| 3331 | } | 3333 | } |
| 3332 | 3334 | ||
| 3333 | /* Now to add back the extent entry and remove from the bitmap */ | 3335 | /* Now to add back the extent entry and remove from the bitmap */ |
| 3334 | ret = add_free_space_entry(cache, 0, 1 * 1024 * 1024, 0); | 3336 | ret = add_free_space_entry(cache, 0, 1 * 1024 * 1024, 0); |
| 3335 | if (ret) { | 3337 | if (ret) { |
| 3336 | printk(KERN_ERR "Couldn't re-add extent entry %d\n", ret); | 3338 | test_msg("Couldn't re-add extent entry %d\n", ret); |
| 3337 | return ret; | 3339 | return ret; |
| 3338 | } | 3340 | } |
| 3339 | 3341 | ||
| 3340 | ret = btrfs_remove_free_space(cache, 4 * 1024 * 1024, 1 * 1024 * 1024); | 3342 | ret = btrfs_remove_free_space(cache, 4 * 1024 * 1024, 1 * 1024 * 1024); |
| 3341 | if (ret) { | 3343 | if (ret) { |
| 3342 | printk(KERN_ERR "Couldn't remove from bitmap %d\n", ret); | 3344 | test_msg("Couldn't remove from bitmap %d\n", ret); |
| 3343 | return ret; | 3345 | return ret; |
| 3344 | } | 3346 | } |
| 3345 | 3347 | ||
| 3346 | if (check_exists(cache, 4 * 1024 * 1024, 1 * 1024 * 1024)) { | 3348 | if (check_exists(cache, 4 * 1024 * 1024, 1 * 1024 * 1024)) { |
| 3347 | printk(KERN_ERR "Left remnants in the bitmap\n"); | 3349 | test_msg("Left remnants in the bitmap\n"); |
| 3348 | return -1; | 3350 | return -1; |
| 3349 | } | 3351 | } |
| 3350 | 3352 | ||
| @@ -3354,19 +3356,18 @@ static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache) | |||
| 3354 | */ | 3356 | */ |
| 3355 | ret = add_free_space_entry(cache, 1 * 1024 * 1024, 4 * 1024 * 1024, 1); | 3357 | ret = add_free_space_entry(cache, 1 * 1024 * 1024, 4 * 1024 * 1024, 1); |
| 3356 | if (ret) { | 3358 | if (ret) { |
| 3357 | printk(KERN_ERR "Couldn't add to a bitmap %d\n", ret); | 3359 | test_msg("Couldn't add to a bitmap %d\n", ret); |
| 3358 | return ret; | 3360 | return ret; |
| 3359 | } | 3361 | } |
| 3360 | 3362 | ||
| 3361 | ret = btrfs_remove_free_space(cache, 512 * 1024, 3 * 1024 * 1024); | 3363 | ret = btrfs_remove_free_space(cache, 512 * 1024, 3 * 1024 * 1024); |
| 3362 | if (ret) { | 3364 | if (ret) { |
| 3363 | printk(KERN_ERR "Couldn't remove overlapping space %d\n", ret); | 3365 | test_msg("Couldn't remove overlapping space %d\n", ret); |
| 3364 | return ret; | 3366 | return ret; |
| 3365 | } | 3367 | } |
| 3366 | 3368 | ||
| 3367 | if (check_exists(cache, 512 * 1024, 3 * 1024 * 1024)) { | 3369 | if (check_exists(cache, 512 * 1024, 3 * 1024 * 1024)) { |
| 3368 | printk(KERN_ERR "Left over peices after removing " | 3370 | test_msg("Left over peices after removing overlapping\n"); |
| 3369 | "overlapping\n"); | ||
| 3370 | return -1; | 3371 | return -1; |
| 3371 | } | 3372 | } |
| 3372 | 3373 | ||
| @@ -3375,24 +3376,24 @@ static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache) | |||
| 3375 | /* Now with the extent entry offset into the bitmap */ | 3376 | /* Now with the extent entry offset into the bitmap */ |
| 3376 | ret = add_free_space_entry(cache, 4 * 1024 * 1024, 4 * 1024 * 1024, 1); | 3377 | ret = add_free_space_entry(cache, 4 * 1024 * 1024, 4 * 1024 * 1024, 1); |
| 3377 | if (ret) { | 3378 | if (ret) { |
| 3378 | printk(KERN_ERR "Couldn't add space to the bitmap %d\n", ret); | 3379 | test_msg("Couldn't add space to the bitmap %d\n", ret); |
| 3379 | return ret; | 3380 | return ret; |
| 3380 | } | 3381 | } |
| 3381 | 3382 | ||
| 3382 | ret = add_free_space_entry(cache, 2 * 1024 * 1024, 2 * 1024 * 1024, 0); | 3383 | ret = add_free_space_entry(cache, 2 * 1024 * 1024, 2 * 1024 * 1024, 0); |
| 3383 | if (ret) { | 3384 | if (ret) { |
| 3384 | printk(KERN_ERR "Couldn't add extent to the cache %d\n", ret); | 3385 | test_msg("Couldn't add extent to the cache %d\n", ret); |
| 3385 | return ret; | 3386 | return ret; |
| 3386 | } | 3387 | } |
| 3387 | 3388 | ||
| 3388 | ret = btrfs_remove_free_space(cache, 3 * 1024 * 1024, 4 * 1024 * 1024); | 3389 | ret = btrfs_remove_free_space(cache, 3 * 1024 * 1024, 4 * 1024 * 1024); |
| 3389 | if (ret) { | 3390 | if (ret) { |
| 3390 | printk(KERN_ERR "Problem removing overlapping space %d\n", ret); | 3391 | test_msg("Problem removing overlapping space %d\n", ret); |
| 3391 | return ret; | 3392 | return ret; |
| 3392 | } | 3393 | } |
| 3393 | 3394 | ||
| 3394 | if (check_exists(cache, 3 * 1024 * 1024, 4 * 1024 * 1024)) { | 3395 | if (check_exists(cache, 3 * 1024 * 1024, 4 * 1024 * 1024)) { |
| 3395 | printk(KERN_ERR "Left something behind when removing space"); | 3396 | test_msg("Left something behind when removing space"); |
| 3396 | return -1; | 3397 | return -1; |
| 3397 | } | 3398 | } |
| 3398 | 3399 | ||
| @@ -3410,27 +3411,27 @@ static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache) | |||
| 3410 | ret = add_free_space_entry(cache, bitmap_offset + 4 * 1024 * 1024, | 3411 | ret = add_free_space_entry(cache, bitmap_offset + 4 * 1024 * 1024, |
| 3411 | 4 * 1024 * 1024, 1); | 3412 | 4 * 1024 * 1024, 1); |
| 3412 | if (ret) { | 3413 | if (ret) { |
| 3413 | printk(KERN_ERR "Couldn't add bitmap %d\n", ret); | 3414 | test_msg("Couldn't add bitmap %d\n", ret); |
| 3414 | return ret; | 3415 | return ret; |
| 3415 | } | 3416 | } |
| 3416 | 3417 | ||
| 3417 | ret = add_free_space_entry(cache, bitmap_offset - 1 * 1024 * 1024, | 3418 | ret = add_free_space_entry(cache, bitmap_offset - 1 * 1024 * 1024, |
| 3418 | 5 * 1024 * 1024, 0); | 3419 | 5 * 1024 * 1024, 0); |
| 3419 | if (ret) { | 3420 | if (ret) { |
| 3420 | printk(KERN_ERR "Couldn't add extent entry %d\n", ret); | 3421 | test_msg("Couldn't add extent entry %d\n", ret); |
| 3421 | return ret; | 3422 | return ret; |
| 3422 | } | 3423 | } |
| 3423 | 3424 | ||
| 3424 | ret = btrfs_remove_free_space(cache, bitmap_offset + 1 * 1024 * 1024, | 3425 | ret = btrfs_remove_free_space(cache, bitmap_offset + 1 * 1024 * 1024, |
| 3425 | 5 * 1024 * 1024); | 3426 | 5 * 1024 * 1024); |
| 3426 | if (ret) { | 3427 | if (ret) { |
| 3427 | printk(KERN_ERR "Failed to free our space %d\n", ret); | 3428 | test_msg("Failed to free our space %d\n", ret); |
| 3428 | return ret; | 3429 | return ret; |
| 3429 | } | 3430 | } |
| 3430 | 3431 | ||
| 3431 | if (check_exists(cache, bitmap_offset + 1 * 1024 * 1024, | 3432 | if (check_exists(cache, bitmap_offset + 1 * 1024 * 1024, |
| 3432 | 5 * 1024 * 1024)) { | 3433 | 5 * 1024 * 1024)) { |
| 3433 | printk(KERN_ERR "Left stuff over\n"); | 3434 | test_msg("Left stuff over\n"); |
| 3434 | return -1; | 3435 | return -1; |
| 3435 | } | 3436 | } |
| 3436 | 3437 | ||
| @@ -3444,20 +3445,19 @@ static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache) | |||
| 3444 | */ | 3445 | */ |
| 3445 | ret = add_free_space_entry(cache, 1 * 1024 * 1024, 2 * 1024 * 1024, 1); | 3446 | ret = add_free_space_entry(cache, 1 * 1024 * 1024, 2 * 1024 * 1024, 1); |
| 3446 | if (ret) { | 3447 | if (ret) { |
| 3447 | printk(KERN_ERR "Couldn't add bitmap entry %d\n", ret); | 3448 | test_msg("Couldn't add bitmap entry %d\n", ret); |
| 3448 | return ret; | 3449 | return ret; |
| 3449 | } | 3450 | } |
| 3450 | 3451 | ||
| 3451 | ret = add_free_space_entry(cache, 3 * 1024 * 1024, 1 * 1024 * 1024, 0); | 3452 | ret = add_free_space_entry(cache, 3 * 1024 * 1024, 1 * 1024 * 1024, 0); |
| 3452 | if (ret) { | 3453 | if (ret) { |
| 3453 | printk(KERN_ERR "Couldn't add extent entry %d\n", ret); | 3454 | test_msg("Couldn't add extent entry %d\n", ret); |
| 3454 | return ret; | 3455 | return ret; |
| 3455 | } | 3456 | } |
| 3456 | 3457 | ||
| 3457 | ret = btrfs_remove_free_space(cache, 1 * 1024 * 1024, 3 * 1024 * 1024); | 3458 | ret = btrfs_remove_free_space(cache, 1 * 1024 * 1024, 3 * 1024 * 1024); |
| 3458 | if (ret) { | 3459 | if (ret) { |
| 3459 | printk(KERN_ERR "Error removing bitmap and extent " | 3460 | test_msg("Error removing bitmap and extent overlapping %d\n", ret); |
| 3460 | "overlapping %d\n", ret); | ||
| 3461 | return ret; | 3461 | return ret; |
| 3462 | } | 3462 | } |
| 3463 | 3463 | ||
| @@ -3469,11 +3469,11 @@ void btrfs_test_free_space_cache(void) | |||
| 3469 | { | 3469 | { |
| 3470 | struct btrfs_block_group_cache *cache; | 3470 | struct btrfs_block_group_cache *cache; |
| 3471 | 3471 | ||
| 3472 | printk(KERN_ERR "Running btrfs free space cache tests\n"); | 3472 | test_msg("Running btrfs free space cache tests\n"); |
| 3473 | 3473 | ||
| 3474 | cache = init_test_block_group(); | 3474 | cache = init_test_block_group(); |
| 3475 | if (!cache) { | 3475 | if (!cache) { |
| 3476 | printk(KERN_ERR "Couldn't run the tests\n"); | 3476 | test_msg("Couldn't run the tests\n"); |
| 3477 | return; | 3477 | return; |
| 3478 | } | 3478 | } |
| 3479 | 3479 | ||
| @@ -3487,6 +3487,9 @@ out: | |||
| 3487 | __btrfs_remove_free_space_cache(cache->free_space_ctl); | 3487 | __btrfs_remove_free_space_cache(cache->free_space_ctl); |
| 3488 | kfree(cache->free_space_ctl); | 3488 | kfree(cache->free_space_ctl); |
| 3489 | kfree(cache); | 3489 | kfree(cache); |
| 3490 | printk(KERN_ERR "Free space cache tests finished\n"); | 3490 | test_msg("Free space cache tests finished\n"); |
| 3491 | } | 3491 | } |
| 3492 | #endif /* CONFIG_BTRFS_FS_RUN_SANITY_TESTS */ | 3492 | #undef test_msg |
| 3493 | #else /* !CONFIG_BTRFS_FS_RUN_SANITY_TESTS */ | ||
| 3494 | void btrfs_test_free_space_cache(void) {} | ||
| 3495 | #endif /* !CONFIG_BTRFS_FS_RUN_SANITY_TESTS */ | ||
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h index 8b7f19f44961..894116b71304 100644 --- a/fs/btrfs/free-space-cache.h +++ b/fs/btrfs/free-space-cache.h | |||
| @@ -113,8 +113,6 @@ int btrfs_return_cluster_to_free_space( | |||
| 113 | int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, | 113 | int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, |
| 114 | u64 *trimmed, u64 start, u64 end, u64 minlen); | 114 | u64 *trimmed, u64 start, u64 end, u64 minlen); |
| 115 | 115 | ||
| 116 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | ||
| 117 | void btrfs_test_free_space_cache(void); | 116 | void btrfs_test_free_space_cache(void); |
| 118 | #endif | ||
| 119 | 117 | ||
| 120 | #endif | 118 | #endif |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 4f9d16b70d3d..6d1b93c8aafb 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
| @@ -42,6 +42,7 @@ | |||
| 42 | #include <linux/mount.h> | 42 | #include <linux/mount.h> |
| 43 | #include <linux/btrfs.h> | 43 | #include <linux/btrfs.h> |
| 44 | #include <linux/blkdev.h> | 44 | #include <linux/blkdev.h> |
| 45 | #include <linux/posix_acl_xattr.h> | ||
| 45 | #include "compat.h" | 46 | #include "compat.h" |
| 46 | #include "ctree.h" | 47 | #include "ctree.h" |
| 47 | #include "disk-io.h" | 48 | #include "disk-io.h" |
| @@ -57,6 +58,7 @@ | |||
| 57 | #include "free-space-cache.h" | 58 | #include "free-space-cache.h" |
| 58 | #include "inode-map.h" | 59 | #include "inode-map.h" |
| 59 | #include "backref.h" | 60 | #include "backref.h" |
| 61 | #include "hash.h" | ||
| 60 | 62 | ||
| 61 | struct btrfs_iget_args { | 63 | struct btrfs_iget_args { |
| 62 | u64 ino; | 64 | u64 ino; |
| @@ -701,8 +703,12 @@ retry: | |||
| 701 | async_extent->nr_pages = 0; | 703 | async_extent->nr_pages = 0; |
| 702 | async_extent->pages = NULL; | 704 | async_extent->pages = NULL; |
| 703 | 705 | ||
| 704 | if (ret == -ENOSPC) | 706 | if (ret == -ENOSPC) { |
| 707 | unlock_extent(io_tree, async_extent->start, | ||
| 708 | async_extent->start + | ||
| 709 | async_extent->ram_size - 1); | ||
| 705 | goto retry; | 710 | goto retry; |
| 711 | } | ||
| 706 | goto out_free; | 712 | goto out_free; |
| 707 | } | 713 | } |
| 708 | 714 | ||
| @@ -1529,6 +1535,46 @@ static void btrfs_merge_extent_hook(struct inode *inode, | |||
| 1529 | spin_unlock(&BTRFS_I(inode)->lock); | 1535 | spin_unlock(&BTRFS_I(inode)->lock); |
| 1530 | } | 1536 | } |
| 1531 | 1537 | ||
| 1538 | static void btrfs_add_delalloc_inodes(struct btrfs_root *root, | ||
| 1539 | struct inode *inode) | ||
| 1540 | { | ||
| 1541 | spin_lock(&root->delalloc_lock); | ||
| 1542 | if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) { | ||
| 1543 | list_add_tail(&BTRFS_I(inode)->delalloc_inodes, | ||
| 1544 | &root->delalloc_inodes); | ||
| 1545 | set_bit(BTRFS_INODE_IN_DELALLOC_LIST, | ||
| 1546 | &BTRFS_I(inode)->runtime_flags); | ||
| 1547 | root->nr_delalloc_inodes++; | ||
| 1548 | if (root->nr_delalloc_inodes == 1) { | ||
| 1549 | spin_lock(&root->fs_info->delalloc_root_lock); | ||
| 1550 | BUG_ON(!list_empty(&root->delalloc_root)); | ||
| 1551 | list_add_tail(&root->delalloc_root, | ||
| 1552 | &root->fs_info->delalloc_roots); | ||
| 1553 | spin_unlock(&root->fs_info->delalloc_root_lock); | ||
| 1554 | } | ||
| 1555 | } | ||
| 1556 | spin_unlock(&root->delalloc_lock); | ||
| 1557 | } | ||
| 1558 | |||
| 1559 | static void btrfs_del_delalloc_inode(struct btrfs_root *root, | ||
| 1560 | struct inode *inode) | ||
| 1561 | { | ||
| 1562 | spin_lock(&root->delalloc_lock); | ||
| 1563 | if (!list_empty(&BTRFS_I(inode)->delalloc_inodes)) { | ||
| 1564 | list_del_init(&BTRFS_I(inode)->delalloc_inodes); | ||
| 1565 | clear_bit(BTRFS_INODE_IN_DELALLOC_LIST, | ||
| 1566 | &BTRFS_I(inode)->runtime_flags); | ||
| 1567 | root->nr_delalloc_inodes--; | ||
| 1568 | if (!root->nr_delalloc_inodes) { | ||
| 1569 | spin_lock(&root->fs_info->delalloc_root_lock); | ||
| 1570 | BUG_ON(list_empty(&root->delalloc_root)); | ||
| 1571 | list_del_init(&root->delalloc_root); | ||
| 1572 | spin_unlock(&root->fs_info->delalloc_root_lock); | ||
| 1573 | } | ||
| 1574 | } | ||
| 1575 | spin_unlock(&root->delalloc_lock); | ||
| 1576 | } | ||
| 1577 | |||
| 1532 | /* | 1578 | /* |
| 1533 | * extent_io.c set_bit_hook, used to track delayed allocation | 1579 | * extent_io.c set_bit_hook, used to track delayed allocation |
| 1534 | * bytes in this file, and to maintain the list of inodes that | 1580 | * bytes in this file, and to maintain the list of inodes that |
| @@ -1561,16 +1607,8 @@ static void btrfs_set_bit_hook(struct inode *inode, | |||
| 1561 | spin_lock(&BTRFS_I(inode)->lock); | 1607 | spin_lock(&BTRFS_I(inode)->lock); |
| 1562 | BTRFS_I(inode)->delalloc_bytes += len; | 1608 | BTRFS_I(inode)->delalloc_bytes += len; |
| 1563 | if (do_list && !test_bit(BTRFS_INODE_IN_DELALLOC_LIST, | 1609 | if (do_list && !test_bit(BTRFS_INODE_IN_DELALLOC_LIST, |
| 1564 | &BTRFS_I(inode)->runtime_flags)) { | 1610 | &BTRFS_I(inode)->runtime_flags)) |
| 1565 | spin_lock(&root->fs_info->delalloc_lock); | 1611 | btrfs_add_delalloc_inodes(root, inode); |
| 1566 | if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) { | ||
| 1567 | list_add_tail(&BTRFS_I(inode)->delalloc_inodes, | ||
| 1568 | &root->fs_info->delalloc_inodes); | ||
| 1569 | set_bit(BTRFS_INODE_IN_DELALLOC_LIST, | ||
| 1570 | &BTRFS_I(inode)->runtime_flags); | ||
| 1571 | } | ||
| 1572 | spin_unlock(&root->fs_info->delalloc_lock); | ||
| 1573 | } | ||
| 1574 | spin_unlock(&BTRFS_I(inode)->lock); | 1612 | spin_unlock(&BTRFS_I(inode)->lock); |
| 1575 | } | 1613 | } |
| 1576 | } | 1614 | } |
| @@ -1604,7 +1642,7 @@ static void btrfs_clear_bit_hook(struct inode *inode, | |||
| 1604 | btrfs_delalloc_release_metadata(inode, len); | 1642 | btrfs_delalloc_release_metadata(inode, len); |
| 1605 | 1643 | ||
| 1606 | if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID | 1644 | if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID |
| 1607 | && do_list) | 1645 | && do_list && !(state->state & EXTENT_NORESERVE)) |
| 1608 | btrfs_free_reserved_data_space(inode, len); | 1646 | btrfs_free_reserved_data_space(inode, len); |
| 1609 | 1647 | ||
| 1610 | __percpu_counter_add(&root->fs_info->delalloc_bytes, -len, | 1648 | __percpu_counter_add(&root->fs_info->delalloc_bytes, -len, |
| @@ -1613,15 +1651,8 @@ static void btrfs_clear_bit_hook(struct inode *inode, | |||
| 1613 | BTRFS_I(inode)->delalloc_bytes -= len; | 1651 | BTRFS_I(inode)->delalloc_bytes -= len; |
| 1614 | if (do_list && BTRFS_I(inode)->delalloc_bytes == 0 && | 1652 | if (do_list && BTRFS_I(inode)->delalloc_bytes == 0 && |
| 1615 | test_bit(BTRFS_INODE_IN_DELALLOC_LIST, | 1653 | test_bit(BTRFS_INODE_IN_DELALLOC_LIST, |
| 1616 | &BTRFS_I(inode)->runtime_flags)) { | 1654 | &BTRFS_I(inode)->runtime_flags)) |
| 1617 | spin_lock(&root->fs_info->delalloc_lock); | 1655 | btrfs_del_delalloc_inode(root, inode); |
| 1618 | if (!list_empty(&BTRFS_I(inode)->delalloc_inodes)) { | ||
| 1619 | list_del_init(&BTRFS_I(inode)->delalloc_inodes); | ||
| 1620 | clear_bit(BTRFS_INODE_IN_DELALLOC_LIST, | ||
| 1621 | &BTRFS_I(inode)->runtime_flags); | ||
| 1622 | } | ||
| 1623 | spin_unlock(&root->fs_info->delalloc_lock); | ||
| 1624 | } | ||
| 1625 | spin_unlock(&BTRFS_I(inode)->lock); | 1656 | spin_unlock(&BTRFS_I(inode)->lock); |
| 1626 | } | 1657 | } |
| 1627 | } | 1658 | } |
| @@ -2263,11 +2294,6 @@ static noinline int relink_extent_backref(struct btrfs_path *path, | |||
| 2263 | return 0; | 2294 | return 0; |
| 2264 | return PTR_ERR(root); | 2295 | return PTR_ERR(root); |
| 2265 | } | 2296 | } |
| 2266 | if (btrfs_root_refs(&root->root_item) == 0) { | ||
| 2267 | srcu_read_unlock(&fs_info->subvol_srcu, index); | ||
| 2268 | /* parse ENOENT to 0 */ | ||
| 2269 | return 0; | ||
| 2270 | } | ||
| 2271 | 2297 | ||
| 2272 | /* step 2: get inode */ | 2298 | /* step 2: get inode */ |
| 2273 | key.objectid = backref->inum; | 2299 | key.objectid = backref->inum; |
| @@ -3215,13 +3241,16 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) | |||
| 3215 | /* 1 for the orphan item deletion. */ | 3241 | /* 1 for the orphan item deletion. */ |
| 3216 | trans = btrfs_start_transaction(root, 1); | 3242 | trans = btrfs_start_transaction(root, 1); |
| 3217 | if (IS_ERR(trans)) { | 3243 | if (IS_ERR(trans)) { |
| 3244 | iput(inode); | ||
| 3218 | ret = PTR_ERR(trans); | 3245 | ret = PTR_ERR(trans); |
| 3219 | goto out; | 3246 | goto out; |
| 3220 | } | 3247 | } |
| 3221 | ret = btrfs_orphan_add(trans, inode); | 3248 | ret = btrfs_orphan_add(trans, inode); |
| 3222 | btrfs_end_transaction(trans, root); | 3249 | btrfs_end_transaction(trans, root); |
| 3223 | if (ret) | 3250 | if (ret) { |
| 3251 | iput(inode); | ||
| 3224 | goto out; | 3252 | goto out; |
| 3253 | } | ||
| 3225 | 3254 | ||
| 3226 | ret = btrfs_truncate(inode); | 3255 | ret = btrfs_truncate(inode); |
| 3227 | if (ret) | 3256 | if (ret) |
| @@ -3274,8 +3303,17 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf, | |||
| 3274 | { | 3303 | { |
| 3275 | u32 nritems = btrfs_header_nritems(leaf); | 3304 | u32 nritems = btrfs_header_nritems(leaf); |
| 3276 | struct btrfs_key found_key; | 3305 | struct btrfs_key found_key; |
| 3306 | static u64 xattr_access = 0; | ||
| 3307 | static u64 xattr_default = 0; | ||
| 3277 | int scanned = 0; | 3308 | int scanned = 0; |
| 3278 | 3309 | ||
| 3310 | if (!xattr_access) { | ||
| 3311 | xattr_access = btrfs_name_hash(POSIX_ACL_XATTR_ACCESS, | ||
| 3312 | strlen(POSIX_ACL_XATTR_ACCESS)); | ||
| 3313 | xattr_default = btrfs_name_hash(POSIX_ACL_XATTR_DEFAULT, | ||
| 3314 | strlen(POSIX_ACL_XATTR_DEFAULT)); | ||
| 3315 | } | ||
| 3316 | |||
| 3279 | slot++; | 3317 | slot++; |
| 3280 | while (slot < nritems) { | 3318 | while (slot < nritems) { |
| 3281 | btrfs_item_key_to_cpu(leaf, &found_key, slot); | 3319 | btrfs_item_key_to_cpu(leaf, &found_key, slot); |
| @@ -3285,8 +3323,11 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf, | |||
| 3285 | return 0; | 3323 | return 0; |
| 3286 | 3324 | ||
| 3287 | /* we found an xattr, assume we've got an acl */ | 3325 | /* we found an xattr, assume we've got an acl */ |
| 3288 | if (found_key.type == BTRFS_XATTR_ITEM_KEY) | 3326 | if (found_key.type == BTRFS_XATTR_ITEM_KEY) { |
| 3289 | return 1; | 3327 | if (found_key.offset == xattr_access || |
| 3328 | found_key.offset == xattr_default) | ||
| 3329 | return 1; | ||
| 3330 | } | ||
| 3290 | 3331 | ||
| 3291 | /* | 3332 | /* |
| 3292 | * we found a key greater than an xattr key, there can't | 3333 | * we found a key greater than an xattr key, there can't |
| @@ -3660,53 +3701,20 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans, | |||
| 3660 | } | 3701 | } |
| 3661 | return ret; | 3702 | return ret; |
| 3662 | } | 3703 | } |
| 3663 | |||
| 3664 | |||
| 3665 | /* helper to check if there is any shared block in the path */ | ||
| 3666 | static int check_path_shared(struct btrfs_root *root, | ||
| 3667 | struct btrfs_path *path) | ||
| 3668 | { | ||
| 3669 | struct extent_buffer *eb; | ||
| 3670 | int level; | ||
| 3671 | u64 refs = 1; | ||
| 3672 | |||
| 3673 | for (level = 0; level < BTRFS_MAX_LEVEL; level++) { | ||
| 3674 | int ret; | ||
| 3675 | |||
| 3676 | if (!path->nodes[level]) | ||
| 3677 | break; | ||
| 3678 | eb = path->nodes[level]; | ||
| 3679 | if (!btrfs_block_can_be_shared(root, eb)) | ||
| 3680 | continue; | ||
| 3681 | ret = btrfs_lookup_extent_info(NULL, root, eb->start, level, 1, | ||
| 3682 | &refs, NULL); | ||
| 3683 | if (refs > 1) | ||
| 3684 | return 1; | ||
| 3685 | } | ||
| 3686 | return 0; | ||
| 3687 | } | ||
| 3688 | 3704 | ||
| 3689 | /* | 3705 | /* |
| 3690 | * helper to start transaction for unlink and rmdir. | 3706 | * helper to start transaction for unlink and rmdir. |
| 3691 | * | 3707 | * |
| 3692 | * unlink and rmdir are special in btrfs, they do not always free space. | 3708 | * unlink and rmdir are special in btrfs, they do not always free space, so |
| 3693 | * so in enospc case, we should make sure they will free space before | 3709 | * if we cannot make our reservations the normal way try and see if there is |
| 3694 | * allowing them to use the global metadata reservation. | 3710 | * plenty of slack room in the global reserve to migrate, otherwise we cannot |
| 3711 | * allow the unlink to occur. | ||
| 3695 | */ | 3712 | */ |
| 3696 | static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, | 3713 | static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir) |
| 3697 | struct dentry *dentry) | ||
| 3698 | { | 3714 | { |
| 3699 | struct btrfs_trans_handle *trans; | 3715 | struct btrfs_trans_handle *trans; |
| 3700 | struct btrfs_root *root = BTRFS_I(dir)->root; | 3716 | struct btrfs_root *root = BTRFS_I(dir)->root; |
| 3701 | struct btrfs_path *path; | ||
| 3702 | struct btrfs_dir_item *di; | ||
| 3703 | struct inode *inode = dentry->d_inode; | ||
| 3704 | u64 index; | ||
| 3705 | int check_link = 1; | ||
| 3706 | int err = -ENOSPC; | ||
| 3707 | int ret; | 3717 | int ret; |
| 3708 | u64 ino = btrfs_ino(inode); | ||
| 3709 | u64 dir_ino = btrfs_ino(dir); | ||
| 3710 | 3718 | ||
| 3711 | /* | 3719 | /* |
| 3712 | * 1 for the possible orphan item | 3720 | * 1 for the possible orphan item |
| @@ -3719,158 +3727,23 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, | |||
| 3719 | if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) | 3727 | if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) |
| 3720 | return trans; | 3728 | return trans; |
| 3721 | 3729 | ||
| 3722 | if (ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) | 3730 | if (PTR_ERR(trans) == -ENOSPC) { |
| 3723 | return ERR_PTR(-ENOSPC); | 3731 | u64 num_bytes = btrfs_calc_trans_metadata_size(root, 5); |
| 3724 | |||
| 3725 | /* check if there is someone else holds reference */ | ||
| 3726 | if (S_ISDIR(inode->i_mode) && atomic_read(&inode->i_count) > 1) | ||
| 3727 | return ERR_PTR(-ENOSPC); | ||
| 3728 | |||
| 3729 | if (atomic_read(&inode->i_count) > 2) | ||
| 3730 | return ERR_PTR(-ENOSPC); | ||
| 3731 | |||
| 3732 | if (xchg(&root->fs_info->enospc_unlink, 1)) | ||
| 3733 | return ERR_PTR(-ENOSPC); | ||
| 3734 | |||
| 3735 | path = btrfs_alloc_path(); | ||
| 3736 | if (!path) { | ||
| 3737 | root->fs_info->enospc_unlink = 0; | ||
| 3738 | return ERR_PTR(-ENOMEM); | ||
| 3739 | } | ||
| 3740 | 3732 | ||
| 3741 | /* 1 for the orphan item */ | 3733 | trans = btrfs_start_transaction(root, 0); |
| 3742 | trans = btrfs_start_transaction(root, 1); | 3734 | if (IS_ERR(trans)) |
| 3743 | if (IS_ERR(trans)) { | 3735 | return trans; |
| 3744 | btrfs_free_path(path); | 3736 | ret = btrfs_cond_migrate_bytes(root->fs_info, |
| 3745 | root->fs_info->enospc_unlink = 0; | 3737 | &root->fs_info->trans_block_rsv, |
| 3746 | return trans; | 3738 | num_bytes, 5); |
| 3747 | } | 3739 | if (ret) { |
| 3748 | 3740 | btrfs_end_transaction(trans, root); | |
| 3749 | path->skip_locking = 1; | 3741 | return ERR_PTR(ret); |
| 3750 | path->search_commit_root = 1; | ||
| 3751 | |||
| 3752 | ret = btrfs_lookup_inode(trans, root, path, | ||
| 3753 | &BTRFS_I(dir)->location, 0); | ||
| 3754 | if (ret < 0) { | ||
| 3755 | err = ret; | ||
| 3756 | goto out; | ||
| 3757 | } | ||
| 3758 | if (ret == 0) { | ||
| 3759 | if (check_path_shared(root, path)) | ||
| 3760 | goto out; | ||
| 3761 | } else { | ||
| 3762 | check_link = 0; | ||
| 3763 | } | ||
| 3764 | btrfs_release_path(path); | ||
| 3765 | |||
| 3766 | ret = btrfs_lookup_inode(trans, root, path, | ||
| 3767 | &BTRFS_I(inode)->location, 0); | ||
| 3768 | if (ret < 0) { | ||
| 3769 | err = ret; | ||
| 3770 | goto out; | ||
| 3771 | } | ||
| 3772 | if (ret == 0) { | ||
| 3773 | if (check_path_shared(root, path)) | ||
| 3774 | goto out; | ||
| 3775 | } else { | ||
| 3776 | check_link = 0; | ||
| 3777 | } | ||
| 3778 | btrfs_release_path(path); | ||
| 3779 | |||
| 3780 | if (ret == 0 && S_ISREG(inode->i_mode)) { | ||
| 3781 | ret = btrfs_lookup_file_extent(trans, root, path, | ||
| 3782 | ino, (u64)-1, 0); | ||
| 3783 | if (ret < 0) { | ||
| 3784 | err = ret; | ||
| 3785 | goto out; | ||
| 3786 | } | 3742 | } |
| 3787 | BUG_ON(ret == 0); /* Corruption */ | ||
| 3788 | if (check_path_shared(root, path)) | ||
| 3789 | goto out; | ||
| 3790 | btrfs_release_path(path); | ||
| 3791 | } | ||
| 3792 | |||
| 3793 | if (!check_link) { | ||
| 3794 | err = 0; | ||
| 3795 | goto out; | ||
| 3796 | } | ||
| 3797 | |||
| 3798 | di = btrfs_lookup_dir_item(trans, root, path, dir_ino, | ||
| 3799 | dentry->d_name.name, dentry->d_name.len, 0); | ||
| 3800 | if (IS_ERR(di)) { | ||
| 3801 | err = PTR_ERR(di); | ||
| 3802 | goto out; | ||
| 3803 | } | ||
| 3804 | if (di) { | ||
| 3805 | if (check_path_shared(root, path)) | ||
| 3806 | goto out; | ||
| 3807 | } else { | ||
| 3808 | err = 0; | ||
| 3809 | goto out; | ||
| 3810 | } | ||
| 3811 | btrfs_release_path(path); | ||
| 3812 | |||
| 3813 | ret = btrfs_get_inode_ref_index(trans, root, path, dentry->d_name.name, | ||
| 3814 | dentry->d_name.len, ino, dir_ino, 0, | ||
| 3815 | &index); | ||
| 3816 | if (ret) { | ||
| 3817 | err = ret; | ||
| 3818 | goto out; | ||
| 3819 | } | ||
| 3820 | |||
| 3821 | if (check_path_shared(root, path)) | ||
| 3822 | goto out; | ||
| 3823 | |||
| 3824 | btrfs_release_path(path); | ||
| 3825 | |||
| 3826 | /* | ||
| 3827 | * This is a commit root search, if we can lookup inode item and other | ||
| 3828 | * relative items in the commit root, it means the transaction of | ||
| 3829 | * dir/file creation has been committed, and the dir index item that we | ||
| 3830 | * delay to insert has also been inserted into the commit root. So | ||
| 3831 | * we needn't worry about the delayed insertion of the dir index item | ||
| 3832 | * here. | ||
| 3833 | */ | ||
| 3834 | di = btrfs_lookup_dir_index_item(trans, root, path, dir_ino, index, | ||
| 3835 | dentry->d_name.name, dentry->d_name.len, 0); | ||
| 3836 | if (IS_ERR(di)) { | ||
| 3837 | err = PTR_ERR(di); | ||
| 3838 | goto out; | ||
| 3839 | } | ||
| 3840 | BUG_ON(ret == -ENOENT); | ||
| 3841 | if (check_path_shared(root, path)) | ||
| 3842 | goto out; | ||
| 3843 | |||
| 3844 | err = 0; | ||
| 3845 | out: | ||
| 3846 | btrfs_free_path(path); | ||
| 3847 | /* Migrate the orphan reservation over */ | ||
| 3848 | if (!err) | ||
| 3849 | err = btrfs_block_rsv_migrate(trans->block_rsv, | ||
| 3850 | &root->fs_info->global_block_rsv, | ||
| 3851 | trans->bytes_reserved); | ||
| 3852 | |||
| 3853 | if (err) { | ||
| 3854 | btrfs_end_transaction(trans, root); | ||
| 3855 | root->fs_info->enospc_unlink = 0; | ||
| 3856 | return ERR_PTR(err); | ||
| 3857 | } | ||
| 3858 | |||
| 3859 | trans->block_rsv = &root->fs_info->global_block_rsv; | ||
| 3860 | return trans; | ||
| 3861 | } | ||
| 3862 | |||
| 3863 | static void __unlink_end_trans(struct btrfs_trans_handle *trans, | ||
| 3864 | struct btrfs_root *root) | ||
| 3865 | { | ||
| 3866 | if (trans->block_rsv->type == BTRFS_BLOCK_RSV_GLOBAL) { | ||
| 3867 | btrfs_block_rsv_release(root, trans->block_rsv, | ||
| 3868 | trans->bytes_reserved); | ||
| 3869 | trans->block_rsv = &root->fs_info->trans_block_rsv; | 3743 | trans->block_rsv = &root->fs_info->trans_block_rsv; |
| 3870 | BUG_ON(!root->fs_info->enospc_unlink); | 3744 | trans->bytes_reserved = num_bytes; |
| 3871 | root->fs_info->enospc_unlink = 0; | ||
| 3872 | } | 3745 | } |
| 3873 | btrfs_end_transaction(trans, root); | 3746 | return trans; |
| 3874 | } | 3747 | } |
| 3875 | 3748 | ||
| 3876 | static int btrfs_unlink(struct inode *dir, struct dentry *dentry) | 3749 | static int btrfs_unlink(struct inode *dir, struct dentry *dentry) |
| @@ -3880,7 +3753,7 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) | |||
| 3880 | struct inode *inode = dentry->d_inode; | 3753 | struct inode *inode = dentry->d_inode; |
| 3881 | int ret; | 3754 | int ret; |
| 3882 | 3755 | ||
| 3883 | trans = __unlink_start_trans(dir, dentry); | 3756 | trans = __unlink_start_trans(dir); |
| 3884 | if (IS_ERR(trans)) | 3757 | if (IS_ERR(trans)) |
| 3885 | return PTR_ERR(trans); | 3758 | return PTR_ERR(trans); |
| 3886 | 3759 | ||
| @@ -3898,7 +3771,7 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) | |||
| 3898 | } | 3771 | } |
| 3899 | 3772 | ||
| 3900 | out: | 3773 | out: |
| 3901 | __unlink_end_trans(trans, root); | 3774 | btrfs_end_transaction(trans, root); |
| 3902 | btrfs_btree_balance_dirty(root); | 3775 | btrfs_btree_balance_dirty(root); |
| 3903 | return ret; | 3776 | return ret; |
| 3904 | } | 3777 | } |
| @@ -3995,7 +3868,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
| 3995 | if (btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID) | 3868 | if (btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID) |
| 3996 | return -EPERM; | 3869 | return -EPERM; |
| 3997 | 3870 | ||
| 3998 | trans = __unlink_start_trans(dir, dentry); | 3871 | trans = __unlink_start_trans(dir); |
| 3999 | if (IS_ERR(trans)) | 3872 | if (IS_ERR(trans)) |
| 4000 | return PTR_ERR(trans); | 3873 | return PTR_ERR(trans); |
| 4001 | 3874 | ||
| @@ -4017,7 +3890,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
| 4017 | if (!err) | 3890 | if (!err) |
| 4018 | btrfs_i_size_write(inode, 0); | 3891 | btrfs_i_size_write(inode, 0); |
| 4019 | out: | 3892 | out: |
| 4020 | __unlink_end_trans(trans, root); | 3893 | btrfs_end_transaction(trans, root); |
| 4021 | btrfs_btree_balance_dirty(root); | 3894 | btrfs_btree_balance_dirty(root); |
| 4022 | 3895 | ||
| 4023 | return err; | 3896 | return err; |
| @@ -4395,6 +4268,15 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) | |||
| 4395 | u64 hole_size; | 4268 | u64 hole_size; |
| 4396 | int err = 0; | 4269 | int err = 0; |
| 4397 | 4270 | ||
| 4271 | /* | ||
| 4272 | * If our size started in the middle of a page we need to zero out the | ||
| 4273 | * rest of the page before we expand the i_size, otherwise we could | ||
| 4274 | * expose stale data. | ||
| 4275 | */ | ||
| 4276 | err = btrfs_truncate_page(inode, oldsize, 0, 0); | ||
| 4277 | if (err) | ||
| 4278 | return err; | ||
| 4279 | |||
| 4398 | if (size <= hole_start) | 4280 | if (size <= hole_start) |
| 4399 | return 0; | 4281 | return 0; |
| 4400 | 4282 | ||
| @@ -4822,11 +4704,6 @@ static int fixup_tree_root_location(struct btrfs_root *root, | |||
| 4822 | goto out; | 4704 | goto out; |
| 4823 | } | 4705 | } |
| 4824 | 4706 | ||
| 4825 | if (btrfs_root_refs(&new_root->root_item) == 0) { | ||
| 4826 | err = -ENOENT; | ||
| 4827 | goto out; | ||
| 4828 | } | ||
| 4829 | |||
| 4830 | *sub_root = new_root; | 4707 | *sub_root = new_root; |
| 4831 | location->objectid = btrfs_root_dirid(&new_root->root_item); | 4708 | location->objectid = btrfs_root_dirid(&new_root->root_item); |
| 4832 | location->type = BTRFS_INODE_ITEM_KEY; | 4709 | location->type = BTRFS_INODE_ITEM_KEY; |
| @@ -5092,8 +4969,10 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) | |||
| 5092 | if (!(inode->i_sb->s_flags & MS_RDONLY)) | 4969 | if (!(inode->i_sb->s_flags & MS_RDONLY)) |
| 5093 | ret = btrfs_orphan_cleanup(sub_root); | 4970 | ret = btrfs_orphan_cleanup(sub_root); |
| 5094 | up_read(&root->fs_info->cleanup_work_sem); | 4971 | up_read(&root->fs_info->cleanup_work_sem); |
| 5095 | if (ret) | 4972 | if (ret) { |
| 4973 | iput(inode); | ||
| 5096 | inode = ERR_PTR(ret); | 4974 | inode = ERR_PTR(ret); |
| 4975 | } | ||
| 5097 | } | 4976 | } |
| 5098 | 4977 | ||
| 5099 | return inode; | 4978 | return inode; |
| @@ -6501,10 +6380,10 @@ out: | |||
| 6501 | * returns 1 when the nocow is safe, < 1 on error, 0 if the | 6380 | * returns 1 when the nocow is safe, < 1 on error, 0 if the |
| 6502 | * block must be cow'd | 6381 | * block must be cow'd |
| 6503 | */ | 6382 | */ |
| 6504 | static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans, | 6383 | noinline int can_nocow_extent(struct btrfs_trans_handle *trans, |
| 6505 | struct inode *inode, u64 offset, u64 *len, | 6384 | struct inode *inode, u64 offset, u64 *len, |
| 6506 | u64 *orig_start, u64 *orig_block_len, | 6385 | u64 *orig_start, u64 *orig_block_len, |
| 6507 | u64 *ram_bytes) | 6386 | u64 *ram_bytes) |
| 6508 | { | 6387 | { |
| 6509 | struct btrfs_path *path; | 6388 | struct btrfs_path *path; |
| 6510 | int ret; | 6389 | int ret; |
| @@ -6518,7 +6397,7 @@ static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans, | |||
| 6518 | u64 num_bytes; | 6397 | u64 num_bytes; |
| 6519 | int slot; | 6398 | int slot; |
| 6520 | int found_type; | 6399 | int found_type; |
| 6521 | 6400 | bool nocow = (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW); | |
| 6522 | path = btrfs_alloc_path(); | 6401 | path = btrfs_alloc_path(); |
| 6523 | if (!path) | 6402 | if (!path) |
| 6524 | return -ENOMEM; | 6403 | return -ENOMEM; |
| @@ -6558,18 +6437,28 @@ static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans, | |||
| 6558 | /* not a regular extent, must cow */ | 6437 | /* not a regular extent, must cow */ |
| 6559 | goto out; | 6438 | goto out; |
| 6560 | } | 6439 | } |
| 6440 | |||
| 6441 | if (!nocow && found_type == BTRFS_FILE_EXTENT_REG) | ||
| 6442 | goto out; | ||
| 6443 | |||
| 6561 | disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); | 6444 | disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); |
| 6445 | if (disk_bytenr == 0) | ||
| 6446 | goto out; | ||
| 6447 | |||
| 6448 | if (btrfs_file_extent_compression(leaf, fi) || | ||
| 6449 | btrfs_file_extent_encryption(leaf, fi) || | ||
| 6450 | btrfs_file_extent_other_encoding(leaf, fi)) | ||
| 6451 | goto out; | ||
| 6452 | |||
| 6562 | backref_offset = btrfs_file_extent_offset(leaf, fi); | 6453 | backref_offset = btrfs_file_extent_offset(leaf, fi); |
| 6563 | 6454 | ||
| 6564 | *orig_start = key.offset - backref_offset; | 6455 | if (orig_start) { |
| 6565 | *orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi); | 6456 | *orig_start = key.offset - backref_offset; |
| 6566 | *ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi); | 6457 | *orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi); |
| 6458 | *ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi); | ||
| 6459 | } | ||
| 6567 | 6460 | ||
| 6568 | extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi); | 6461 | extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi); |
| 6569 | if (extent_end < offset + *len) { | ||
| 6570 | /* extent doesn't include our full range, must cow */ | ||
| 6571 | goto out; | ||
| 6572 | } | ||
| 6573 | 6462 | ||
| 6574 | if (btrfs_extent_readonly(root, disk_bytenr)) | 6463 | if (btrfs_extent_readonly(root, disk_bytenr)) |
| 6575 | goto out; | 6464 | goto out; |
| @@ -6813,8 +6702,8 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, | |||
| 6813 | if (IS_ERR(trans)) | 6702 | if (IS_ERR(trans)) |
| 6814 | goto must_cow; | 6703 | goto must_cow; |
| 6815 | 6704 | ||
| 6816 | if (can_nocow_odirect(trans, inode, start, &len, &orig_start, | 6705 | if (can_nocow_extent(trans, inode, start, &len, &orig_start, |
| 6817 | &orig_block_len, &ram_bytes) == 1) { | 6706 | &orig_block_len, &ram_bytes) == 1) { |
| 6818 | if (type == BTRFS_ORDERED_PREALLOC) { | 6707 | if (type == BTRFS_ORDERED_PREALLOC) { |
| 6819 | free_extent_map(em); | 6708 | free_extent_map(em); |
| 6820 | em = create_pinned_em(inode, start, len, | 6709 | em = create_pinned_em(inode, start, len, |
| @@ -7243,7 +7132,6 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio, | |||
| 7243 | { | 7132 | { |
| 7244 | struct btrfs_root *root = BTRFS_I(inode)->root; | 7133 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 7245 | struct btrfs_dio_private *dip; | 7134 | struct btrfs_dio_private *dip; |
| 7246 | struct bio_vec *bvec = dio_bio->bi_io_vec; | ||
| 7247 | struct bio *io_bio; | 7135 | struct bio *io_bio; |
| 7248 | int skip_sum; | 7136 | int skip_sum; |
| 7249 | int write = rw & REQ_WRITE; | 7137 | int write = rw & REQ_WRITE; |
| @@ -7265,16 +7153,9 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio, | |||
| 7265 | } | 7153 | } |
| 7266 | 7154 | ||
| 7267 | dip->private = dio_bio->bi_private; | 7155 | dip->private = dio_bio->bi_private; |
| 7268 | io_bio->bi_private = dio_bio->bi_private; | ||
| 7269 | dip->inode = inode; | 7156 | dip->inode = inode; |
| 7270 | dip->logical_offset = file_offset; | 7157 | dip->logical_offset = file_offset; |
| 7271 | 7158 | dip->bytes = dio_bio->bi_size; | |
| 7272 | dip->bytes = 0; | ||
| 7273 | do { | ||
| 7274 | dip->bytes += bvec->bv_len; | ||
| 7275 | bvec++; | ||
| 7276 | } while (bvec <= (dio_bio->bi_io_vec + dio_bio->bi_vcnt - 1)); | ||
| 7277 | |||
| 7278 | dip->disk_bytenr = (u64)dio_bio->bi_sector << 9; | 7159 | dip->disk_bytenr = (u64)dio_bio->bi_sector << 9; |
| 7279 | io_bio->bi_private = dip; | 7160 | io_bio->bi_private = dip; |
| 7280 | dip->errors = 0; | 7161 | dip->errors = 0; |
| @@ -7373,8 +7254,16 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, | |||
| 7373 | atomic_inc(&inode->i_dio_count); | 7254 | atomic_inc(&inode->i_dio_count); |
| 7374 | smp_mb__after_atomic_inc(); | 7255 | smp_mb__after_atomic_inc(); |
| 7375 | 7256 | ||
| 7257 | /* | ||
| 7258 | * The generic stuff only does filemap_write_and_wait_range, which isn't | ||
| 7259 | * enough if we've written compressed pages to this area, so we need to | ||
| 7260 | * call btrfs_wait_ordered_range to make absolutely sure that any | ||
| 7261 | * outstanding dirty pages are on disk. | ||
| 7262 | */ | ||
| 7263 | count = iov_length(iov, nr_segs); | ||
| 7264 | btrfs_wait_ordered_range(inode, offset, count); | ||
| 7265 | |||
| 7376 | if (rw & WRITE) { | 7266 | if (rw & WRITE) { |
| 7377 | count = iov_length(iov, nr_segs); | ||
| 7378 | /* | 7267 | /* |
| 7379 | * If the write DIO is beyond the EOF, we need update | 7268 | * If the write DIO is beyond the EOF, we need update |
| 7380 | * the isize, but it is protected by i_mutex. So we can | 7269 | * the isize, but it is protected by i_mutex. So we can |
| @@ -7694,16 +7583,12 @@ static int btrfs_truncate(struct inode *inode) | |||
| 7694 | { | 7583 | { |
| 7695 | struct btrfs_root *root = BTRFS_I(inode)->root; | 7584 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 7696 | struct btrfs_block_rsv *rsv; | 7585 | struct btrfs_block_rsv *rsv; |
| 7697 | int ret; | 7586 | int ret = 0; |
| 7698 | int err = 0; | 7587 | int err = 0; |
| 7699 | struct btrfs_trans_handle *trans; | 7588 | struct btrfs_trans_handle *trans; |
| 7700 | u64 mask = root->sectorsize - 1; | 7589 | u64 mask = root->sectorsize - 1; |
| 7701 | u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); | 7590 | u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); |
| 7702 | 7591 | ||
| 7703 | ret = btrfs_truncate_page(inode, inode->i_size, 0, 0); | ||
| 7704 | if (ret) | ||
| 7705 | return ret; | ||
| 7706 | |||
| 7707 | btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); | 7592 | btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); |
| 7708 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); | 7593 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); |
| 7709 | 7594 | ||
| @@ -7961,9 +7846,9 @@ void btrfs_destroy_inode(struct inode *inode) | |||
| 7961 | */ | 7846 | */ |
| 7962 | smp_mb(); | 7847 | smp_mb(); |
| 7963 | if (!list_empty(&BTRFS_I(inode)->ordered_operations)) { | 7848 | if (!list_empty(&BTRFS_I(inode)->ordered_operations)) { |
| 7964 | spin_lock(&root->fs_info->ordered_extent_lock); | 7849 | spin_lock(&root->fs_info->ordered_root_lock); |
| 7965 | list_del_init(&BTRFS_I(inode)->ordered_operations); | 7850 | list_del_init(&BTRFS_I(inode)->ordered_operations); |
| 7966 | spin_unlock(&root->fs_info->ordered_extent_lock); | 7851 | spin_unlock(&root->fs_info->ordered_root_lock); |
| 7967 | } | 7852 | } |
| 7968 | 7853 | ||
| 7969 | if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, | 7854 | if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, |
| @@ -8333,7 +8218,7 @@ void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work) | |||
| 8333 | * some fairly slow code that needs optimization. This walks the list | 8218 | * some fairly slow code that needs optimization. This walks the list |
| 8334 | * of all the inodes with pending delalloc and forces them to disk. | 8219 | * of all the inodes with pending delalloc and forces them to disk. |
| 8335 | */ | 8220 | */ |
| 8336 | int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) | 8221 | static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput) |
| 8337 | { | 8222 | { |
| 8338 | struct btrfs_inode *binode; | 8223 | struct btrfs_inode *binode; |
| 8339 | struct inode *inode; | 8224 | struct inode *inode; |
| @@ -8342,30 +8227,23 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) | |||
| 8342 | struct list_head splice; | 8227 | struct list_head splice; |
| 8343 | int ret = 0; | 8228 | int ret = 0; |
| 8344 | 8229 | ||
| 8345 | if (root->fs_info->sb->s_flags & MS_RDONLY) | ||
| 8346 | return -EROFS; | ||
| 8347 | |||
| 8348 | INIT_LIST_HEAD(&works); | 8230 | INIT_LIST_HEAD(&works); |
| 8349 | INIT_LIST_HEAD(&splice); | 8231 | INIT_LIST_HEAD(&splice); |
| 8350 | 8232 | ||
| 8351 | spin_lock(&root->fs_info->delalloc_lock); | 8233 | spin_lock(&root->delalloc_lock); |
| 8352 | list_splice_init(&root->fs_info->delalloc_inodes, &splice); | 8234 | list_splice_init(&root->delalloc_inodes, &splice); |
| 8353 | while (!list_empty(&splice)) { | 8235 | while (!list_empty(&splice)) { |
| 8354 | binode = list_entry(splice.next, struct btrfs_inode, | 8236 | binode = list_entry(splice.next, struct btrfs_inode, |
| 8355 | delalloc_inodes); | 8237 | delalloc_inodes); |
| 8356 | 8238 | ||
| 8357 | list_del_init(&binode->delalloc_inodes); | 8239 | list_move_tail(&binode->delalloc_inodes, |
| 8358 | 8240 | &root->delalloc_inodes); | |
| 8359 | inode = igrab(&binode->vfs_inode); | 8241 | inode = igrab(&binode->vfs_inode); |
| 8360 | if (!inode) { | 8242 | if (!inode) { |
| 8361 | clear_bit(BTRFS_INODE_IN_DELALLOC_LIST, | 8243 | cond_resched_lock(&root->delalloc_lock); |
| 8362 | &binode->runtime_flags); | ||
| 8363 | continue; | 8244 | continue; |
| 8364 | } | 8245 | } |
| 8365 | 8246 | spin_unlock(&root->delalloc_lock); | |
| 8366 | list_add_tail(&binode->delalloc_inodes, | ||
| 8367 | &root->fs_info->delalloc_inodes); | ||
| 8368 | spin_unlock(&root->fs_info->delalloc_lock); | ||
| 8369 | 8247 | ||
| 8370 | work = btrfs_alloc_delalloc_work(inode, 0, delay_iput); | 8248 | work = btrfs_alloc_delalloc_work(inode, 0, delay_iput); |
| 8371 | if (unlikely(!work)) { | 8249 | if (unlikely(!work)) { |
| @@ -8377,16 +8255,39 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) | |||
| 8377 | &work->work); | 8255 | &work->work); |
| 8378 | 8256 | ||
| 8379 | cond_resched(); | 8257 | cond_resched(); |
| 8380 | spin_lock(&root->fs_info->delalloc_lock); | 8258 | spin_lock(&root->delalloc_lock); |
| 8381 | } | 8259 | } |
| 8382 | spin_unlock(&root->fs_info->delalloc_lock); | 8260 | spin_unlock(&root->delalloc_lock); |
| 8383 | 8261 | ||
| 8384 | list_for_each_entry_safe(work, next, &works, list) { | 8262 | list_for_each_entry_safe(work, next, &works, list) { |
| 8385 | list_del_init(&work->list); | 8263 | list_del_init(&work->list); |
| 8386 | btrfs_wait_and_free_delalloc_work(work); | 8264 | btrfs_wait_and_free_delalloc_work(work); |
| 8387 | } | 8265 | } |
| 8266 | return 0; | ||
| 8267 | out: | ||
| 8268 | list_for_each_entry_safe(work, next, &works, list) { | ||
| 8269 | list_del_init(&work->list); | ||
| 8270 | btrfs_wait_and_free_delalloc_work(work); | ||
| 8271 | } | ||
| 8272 | |||
| 8273 | if (!list_empty_careful(&splice)) { | ||
| 8274 | spin_lock(&root->delalloc_lock); | ||
| 8275 | list_splice_tail(&splice, &root->delalloc_inodes); | ||
| 8276 | spin_unlock(&root->delalloc_lock); | ||
| 8277 | } | ||
| 8278 | return ret; | ||
| 8279 | } | ||
| 8280 | |||
| 8281 | int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) | ||
| 8282 | { | ||
| 8283 | int ret; | ||
| 8388 | 8284 | ||
| 8389 | /* the filemap_flush will queue IO into the worker threads, but | 8285 | if (root->fs_info->sb->s_flags & MS_RDONLY) |
| 8286 | return -EROFS; | ||
| 8287 | |||
| 8288 | ret = __start_delalloc_inodes(root, delay_iput); | ||
| 8289 | /* | ||
| 8290 | * the filemap_flush will queue IO into the worker threads, but | ||
| 8390 | * we have to make sure the IO is actually started and that | 8291 | * we have to make sure the IO is actually started and that |
| 8391 | * ordered extents get created before we return | 8292 | * ordered extents get created before we return |
| 8392 | */ | 8293 | */ |
| @@ -8398,17 +8299,55 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) | |||
| 8398 | atomic_read(&root->fs_info->async_delalloc_pages) == 0)); | 8299 | atomic_read(&root->fs_info->async_delalloc_pages) == 0)); |
| 8399 | } | 8300 | } |
| 8400 | atomic_dec(&root->fs_info->async_submit_draining); | 8301 | atomic_dec(&root->fs_info->async_submit_draining); |
| 8401 | return 0; | 8302 | return ret; |
| 8402 | out: | 8303 | } |
| 8403 | list_for_each_entry_safe(work, next, &works, list) { | 8304 | |
| 8404 | list_del_init(&work->list); | 8305 | int btrfs_start_all_delalloc_inodes(struct btrfs_fs_info *fs_info, |
| 8405 | btrfs_wait_and_free_delalloc_work(work); | 8306 | int delay_iput) |
| 8307 | { | ||
| 8308 | struct btrfs_root *root; | ||
| 8309 | struct list_head splice; | ||
| 8310 | int ret; | ||
| 8311 | |||
| 8312 | if (fs_info->sb->s_flags & MS_RDONLY) | ||
| 8313 | return -EROFS; | ||
| 8314 | |||
| 8315 | INIT_LIST_HEAD(&splice); | ||
| 8316 | |||
| 8317 | spin_lock(&fs_info->delalloc_root_lock); | ||
| 8318 | list_splice_init(&fs_info->delalloc_roots, &splice); | ||
| 8319 | while (!list_empty(&splice)) { | ||
| 8320 | root = list_first_entry(&splice, struct btrfs_root, | ||
| 8321 | delalloc_root); | ||
| 8322 | root = btrfs_grab_fs_root(root); | ||
| 8323 | BUG_ON(!root); | ||
| 8324 | list_move_tail(&root->delalloc_root, | ||
| 8325 | &fs_info->delalloc_roots); | ||
| 8326 | spin_unlock(&fs_info->delalloc_root_lock); | ||
| 8327 | |||
| 8328 | ret = __start_delalloc_inodes(root, delay_iput); | ||
| 8329 | btrfs_put_fs_root(root); | ||
| 8330 | if (ret) | ||
| 8331 | goto out; | ||
| 8332 | |||
| 8333 | spin_lock(&fs_info->delalloc_root_lock); | ||
| 8406 | } | 8334 | } |
| 8335 | spin_unlock(&fs_info->delalloc_root_lock); | ||
| 8407 | 8336 | ||
| 8337 | atomic_inc(&fs_info->async_submit_draining); | ||
| 8338 | while (atomic_read(&fs_info->nr_async_submits) || | ||
| 8339 | atomic_read(&fs_info->async_delalloc_pages)) { | ||
| 8340 | wait_event(fs_info->async_submit_wait, | ||
| 8341 | (atomic_read(&fs_info->nr_async_submits) == 0 && | ||
| 8342 | atomic_read(&fs_info->async_delalloc_pages) == 0)); | ||
| 8343 | } | ||
| 8344 | atomic_dec(&fs_info->async_submit_draining); | ||
| 8345 | return 0; | ||
| 8346 | out: | ||
| 8408 | if (!list_empty_careful(&splice)) { | 8347 | if (!list_empty_careful(&splice)) { |
| 8409 | spin_lock(&root->fs_info->delalloc_lock); | 8348 | spin_lock(&fs_info->delalloc_root_lock); |
| 8410 | list_splice_tail(&splice, &root->fs_info->delalloc_inodes); | 8349 | list_splice_tail(&splice, &fs_info->delalloc_roots); |
| 8411 | spin_unlock(&root->fs_info->delalloc_lock); | 8350 | spin_unlock(&fs_info->delalloc_root_lock); |
| 8412 | } | 8351 | } |
| 8413 | return ret; | 8352 | return ret; |
| 8414 | } | 8353 | } |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 0f81d67cdc8d..238a05545ee2 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
| @@ -555,6 +555,12 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, | |||
| 555 | if (!root->ref_cows) | 555 | if (!root->ref_cows) |
| 556 | return -EINVAL; | 556 | return -EINVAL; |
| 557 | 557 | ||
| 558 | ret = btrfs_start_delalloc_inodes(root, 0); | ||
| 559 | if (ret) | ||
| 560 | return ret; | ||
| 561 | |||
| 562 | btrfs_wait_ordered_extents(root, 0); | ||
| 563 | |||
| 558 | pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); | 564 | pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); |
| 559 | if (!pending_snapshot) | 565 | if (!pending_snapshot) |
| 560 | return -ENOMEM; | 566 | return -ENOMEM; |
| @@ -2354,14 +2360,6 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) | |||
| 2354 | if (ret) | 2360 | if (ret) |
| 2355 | return ret; | 2361 | return ret; |
| 2356 | 2362 | ||
| 2357 | if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, | ||
| 2358 | 1)) { | ||
| 2359 | pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); | ||
| 2360 | mnt_drop_write_file(file); | ||
| 2361 | return -EINVAL; | ||
| 2362 | } | ||
| 2363 | |||
| 2364 | mutex_lock(&root->fs_info->volume_mutex); | ||
| 2365 | vol_args = memdup_user(arg, sizeof(*vol_args)); | 2363 | vol_args = memdup_user(arg, sizeof(*vol_args)); |
| 2366 | if (IS_ERR(vol_args)) { | 2364 | if (IS_ERR(vol_args)) { |
| 2367 | ret = PTR_ERR(vol_args); | 2365 | ret = PTR_ERR(vol_args); |
| @@ -2369,12 +2367,20 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) | |||
| 2369 | } | 2367 | } |
| 2370 | 2368 | ||
| 2371 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; | 2369 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; |
| 2372 | ret = btrfs_rm_device(root, vol_args->name); | ||
| 2373 | 2370 | ||
| 2374 | kfree(vol_args); | 2371 | if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, |
| 2375 | out: | 2372 | 1)) { |
| 2373 | ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; | ||
| 2374 | goto out; | ||
| 2375 | } | ||
| 2376 | |||
| 2377 | mutex_lock(&root->fs_info->volume_mutex); | ||
| 2378 | ret = btrfs_rm_device(root, vol_args->name); | ||
| 2376 | mutex_unlock(&root->fs_info->volume_mutex); | 2379 | mutex_unlock(&root->fs_info->volume_mutex); |
| 2377 | atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0); | 2380 | atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0); |
| 2381 | |||
| 2382 | out: | ||
| 2383 | kfree(vol_args); | ||
| 2378 | mnt_drop_write_file(file); | 2384 | mnt_drop_write_file(file); |
| 2379 | return ret; | 2385 | return ret; |
| 2380 | } | 2386 | } |
| @@ -2480,6 +2486,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
| 2480 | int ret; | 2486 | int ret; |
| 2481 | u64 len = olen; | 2487 | u64 len = olen; |
| 2482 | u64 bs = root->fs_info->sb->s_blocksize; | 2488 | u64 bs = root->fs_info->sb->s_blocksize; |
| 2489 | int same_inode = 0; | ||
| 2483 | 2490 | ||
| 2484 | /* | 2491 | /* |
| 2485 | * TODO: | 2492 | * TODO: |
| @@ -2516,7 +2523,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
| 2516 | 2523 | ||
| 2517 | ret = -EINVAL; | 2524 | ret = -EINVAL; |
| 2518 | if (src == inode) | 2525 | if (src == inode) |
| 2519 | goto out_fput; | 2526 | same_inode = 1; |
| 2520 | 2527 | ||
| 2521 | /* the src must be open for reading */ | 2528 | /* the src must be open for reading */ |
| 2522 | if (!(src_file.file->f_mode & FMODE_READ)) | 2529 | if (!(src_file.file->f_mode & FMODE_READ)) |
| @@ -2547,12 +2554,16 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
| 2547 | } | 2554 | } |
| 2548 | path->reada = 2; | 2555 | path->reada = 2; |
| 2549 | 2556 | ||
| 2550 | if (inode < src) { | 2557 | if (!same_inode) { |
| 2551 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); | 2558 | if (inode < src) { |
| 2552 | mutex_lock_nested(&src->i_mutex, I_MUTEX_CHILD); | 2559 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); |
| 2560 | mutex_lock_nested(&src->i_mutex, I_MUTEX_CHILD); | ||
| 2561 | } else { | ||
| 2562 | mutex_lock_nested(&src->i_mutex, I_MUTEX_PARENT); | ||
| 2563 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); | ||
| 2564 | } | ||
| 2553 | } else { | 2565 | } else { |
| 2554 | mutex_lock_nested(&src->i_mutex, I_MUTEX_PARENT); | 2566 | mutex_lock(&src->i_mutex); |
| 2555 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); | ||
| 2556 | } | 2567 | } |
| 2557 | 2568 | ||
| 2558 | /* determine range to clone */ | 2569 | /* determine range to clone */ |
| @@ -2570,6 +2581,12 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
| 2570 | !IS_ALIGNED(destoff, bs)) | 2581 | !IS_ALIGNED(destoff, bs)) |
| 2571 | goto out_unlock; | 2582 | goto out_unlock; |
| 2572 | 2583 | ||
| 2584 | /* verify if ranges are overlapped within the same file */ | ||
| 2585 | if (same_inode) { | ||
| 2586 | if (destoff + len > off && destoff < off + len) | ||
| 2587 | goto out_unlock; | ||
| 2588 | } | ||
| 2589 | |||
| 2573 | if (destoff > inode->i_size) { | 2590 | if (destoff > inode->i_size) { |
| 2574 | ret = btrfs_cont_expand(inode, inode->i_size, destoff); | 2591 | ret = btrfs_cont_expand(inode, inode->i_size, destoff); |
| 2575 | if (ret) | 2592 | if (ret) |
| @@ -2846,7 +2863,8 @@ out: | |||
| 2846 | unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1); | 2863 | unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1); |
| 2847 | out_unlock: | 2864 | out_unlock: |
| 2848 | mutex_unlock(&src->i_mutex); | 2865 | mutex_unlock(&src->i_mutex); |
| 2849 | mutex_unlock(&inode->i_mutex); | 2866 | if (!same_inode) |
| 2867 | mutex_unlock(&inode->i_mutex); | ||
| 2850 | vfree(buf); | 2868 | vfree(buf); |
| 2851 | btrfs_free_path(path); | 2869 | btrfs_free_path(path); |
| 2852 | out_fput: | 2870 | out_fput: |
| @@ -2951,11 +2969,6 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) | |||
| 2951 | goto out; | 2969 | goto out; |
| 2952 | } | 2970 | } |
| 2953 | 2971 | ||
| 2954 | if (btrfs_root_refs(&new_root->root_item) == 0) { | ||
| 2955 | ret = -ENOENT; | ||
| 2956 | goto out; | ||
| 2957 | } | ||
| 2958 | |||
| 2959 | path = btrfs_alloc_path(); | 2972 | path = btrfs_alloc_path(); |
| 2960 | if (!path) { | 2973 | if (!path) { |
| 2961 | ret = -ENOMEM; | 2974 | ret = -ENOMEM; |
| @@ -3719,9 +3732,6 @@ static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg) | |||
| 3719 | break; | 3732 | break; |
| 3720 | } | 3733 | } |
| 3721 | 3734 | ||
| 3722 | if (copy_to_user(arg, sa, sizeof(*sa))) | ||
| 3723 | ret = -EFAULT; | ||
| 3724 | |||
| 3725 | err = btrfs_commit_transaction(trans, root->fs_info->tree_root); | 3735 | err = btrfs_commit_transaction(trans, root->fs_info->tree_root); |
| 3726 | if (err && !ret) | 3736 | if (err && !ret) |
| 3727 | ret = err; | 3737 | ret = err; |
| @@ -3881,7 +3891,7 @@ drop_write: | |||
| 3881 | 3891 | ||
| 3882 | static long btrfs_ioctl_quota_rescan(struct file *file, void __user *arg) | 3892 | static long btrfs_ioctl_quota_rescan(struct file *file, void __user *arg) |
| 3883 | { | 3893 | { |
| 3884 | struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; | 3894 | struct btrfs_root *root = BTRFS_I(file_inode(file))->root; |
| 3885 | struct btrfs_ioctl_quota_rescan_args *qsa; | 3895 | struct btrfs_ioctl_quota_rescan_args *qsa; |
| 3886 | int ret; | 3896 | int ret; |
| 3887 | 3897 | ||
| @@ -3914,7 +3924,7 @@ drop_write: | |||
| 3914 | 3924 | ||
| 3915 | static long btrfs_ioctl_quota_rescan_status(struct file *file, void __user *arg) | 3925 | static long btrfs_ioctl_quota_rescan_status(struct file *file, void __user *arg) |
| 3916 | { | 3926 | { |
| 3917 | struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; | 3927 | struct btrfs_root *root = BTRFS_I(file_inode(file))->root; |
| 3918 | struct btrfs_ioctl_quota_rescan_args *qsa; | 3928 | struct btrfs_ioctl_quota_rescan_args *qsa; |
| 3919 | int ret = 0; | 3929 | int ret = 0; |
| 3920 | 3930 | ||
| @@ -3937,6 +3947,16 @@ static long btrfs_ioctl_quota_rescan_status(struct file *file, void __user *arg) | |||
| 3937 | return ret; | 3947 | return ret; |
| 3938 | } | 3948 | } |
| 3939 | 3949 | ||
| 3950 | static long btrfs_ioctl_quota_rescan_wait(struct file *file, void __user *arg) | ||
| 3951 | { | ||
| 3952 | struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; | ||
| 3953 | |||
| 3954 | if (!capable(CAP_SYS_ADMIN)) | ||
| 3955 | return -EPERM; | ||
| 3956 | |||
| 3957 | return btrfs_qgroup_wait_for_completion(root->fs_info); | ||
| 3958 | } | ||
| 3959 | |||
| 3940 | static long btrfs_ioctl_set_received_subvol(struct file *file, | 3960 | static long btrfs_ioctl_set_received_subvol(struct file *file, |
| 3941 | void __user *arg) | 3961 | void __user *arg) |
| 3942 | { | 3962 | { |
| @@ -4020,7 +4040,7 @@ out: | |||
| 4020 | 4040 | ||
| 4021 | static int btrfs_ioctl_get_fslabel(struct file *file, void __user *arg) | 4041 | static int btrfs_ioctl_get_fslabel(struct file *file, void __user *arg) |
| 4022 | { | 4042 | { |
| 4023 | struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; | 4043 | struct btrfs_root *root = BTRFS_I(file_inode(file))->root; |
| 4024 | const char *label = root->fs_info->super_copy->label; | 4044 | const char *label = root->fs_info->super_copy->label; |
| 4025 | size_t len = strnlen(label, BTRFS_LABEL_SIZE); | 4045 | size_t len = strnlen(label, BTRFS_LABEL_SIZE); |
| 4026 | int ret; | 4046 | int ret; |
| @@ -4039,7 +4059,7 @@ static int btrfs_ioctl_get_fslabel(struct file *file, void __user *arg) | |||
| 4039 | 4059 | ||
| 4040 | static int btrfs_ioctl_set_fslabel(struct file *file, void __user *arg) | 4060 | static int btrfs_ioctl_set_fslabel(struct file *file, void __user *arg) |
| 4041 | { | 4061 | { |
| 4042 | struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; | 4062 | struct btrfs_root *root = BTRFS_I(file_inode(file))->root; |
| 4043 | struct btrfs_super_block *super_block = root->fs_info->super_copy; | 4063 | struct btrfs_super_block *super_block = root->fs_info->super_copy; |
| 4044 | struct btrfs_trans_handle *trans; | 4064 | struct btrfs_trans_handle *trans; |
| 4045 | char label[BTRFS_LABEL_SIZE]; | 4065 | char label[BTRFS_LABEL_SIZE]; |
| @@ -4179,6 +4199,8 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
| 4179 | return btrfs_ioctl_quota_rescan(file, argp); | 4199 | return btrfs_ioctl_quota_rescan(file, argp); |
| 4180 | case BTRFS_IOC_QUOTA_RESCAN_STATUS: | 4200 | case BTRFS_IOC_QUOTA_RESCAN_STATUS: |
| 4181 | return btrfs_ioctl_quota_rescan_status(file, argp); | 4201 | return btrfs_ioctl_quota_rescan_status(file, argp); |
| 4202 | case BTRFS_IOC_QUOTA_RESCAN_WAIT: | ||
| 4203 | return btrfs_ioctl_quota_rescan_wait(file, argp); | ||
| 4182 | case BTRFS_IOC_DEV_REPLACE: | 4204 | case BTRFS_IOC_DEV_REPLACE: |
| 4183 | return btrfs_ioctl_dev_replace(root, argp); | 4205 | return btrfs_ioctl_dev_replace(root, argp); |
| 4184 | case BTRFS_IOC_GET_FSLABEL: | 4206 | case BTRFS_IOC_GET_FSLABEL: |
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c index 743b86fa4fcb..f93151a98886 100644 --- a/fs/btrfs/lzo.c +++ b/fs/btrfs/lzo.c | |||
| @@ -31,8 +31,8 @@ | |||
| 31 | 31 | ||
| 32 | struct workspace { | 32 | struct workspace { |
| 33 | void *mem; | 33 | void *mem; |
| 34 | void *buf; /* where compressed data goes */ | 34 | void *buf; /* where decompressed data goes */ |
| 35 | void *cbuf; /* where decompressed data goes */ | 35 | void *cbuf; /* where compressed data goes */ |
| 36 | struct list_head list; | 36 | struct list_head list; |
| 37 | }; | 37 | }; |
| 38 | 38 | ||
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 1ddd728541ee..81369827e514 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
| @@ -24,6 +24,7 @@ | |||
| 24 | #include "transaction.h" | 24 | #include "transaction.h" |
| 25 | #include "btrfs_inode.h" | 25 | #include "btrfs_inode.h" |
| 26 | #include "extent_io.h" | 26 | #include "extent_io.h" |
| 27 | #include "disk-io.h" | ||
| 27 | 28 | ||
| 28 | static struct kmem_cache *btrfs_ordered_extent_cache; | 29 | static struct kmem_cache *btrfs_ordered_extent_cache; |
| 29 | 30 | ||
| @@ -184,6 +185,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
| 184 | u64 start, u64 len, u64 disk_len, | 185 | u64 start, u64 len, u64 disk_len, |
| 185 | int type, int dio, int compress_type) | 186 | int type, int dio, int compress_type) |
| 186 | { | 187 | { |
| 188 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 187 | struct btrfs_ordered_inode_tree *tree; | 189 | struct btrfs_ordered_inode_tree *tree; |
| 188 | struct rb_node *node; | 190 | struct rb_node *node; |
| 189 | struct btrfs_ordered_extent *entry; | 191 | struct btrfs_ordered_extent *entry; |
| @@ -227,10 +229,18 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
| 227 | ordered_data_tree_panic(inode, -EEXIST, file_offset); | 229 | ordered_data_tree_panic(inode, -EEXIST, file_offset); |
| 228 | spin_unlock_irq(&tree->lock); | 230 | spin_unlock_irq(&tree->lock); |
| 229 | 231 | ||
| 230 | spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); | 232 | spin_lock(&root->ordered_extent_lock); |
| 231 | list_add_tail(&entry->root_extent_list, | 233 | list_add_tail(&entry->root_extent_list, |
| 232 | &BTRFS_I(inode)->root->fs_info->ordered_extents); | 234 | &root->ordered_extents); |
| 233 | spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); | 235 | root->nr_ordered_extents++; |
| 236 | if (root->nr_ordered_extents == 1) { | ||
| 237 | spin_lock(&root->fs_info->ordered_root_lock); | ||
| 238 | BUG_ON(!list_empty(&root->ordered_root)); | ||
| 239 | list_add_tail(&root->ordered_root, | ||
| 240 | &root->fs_info->ordered_roots); | ||
| 241 | spin_unlock(&root->fs_info->ordered_root_lock); | ||
| 242 | } | ||
| 243 | spin_unlock(&root->ordered_extent_lock); | ||
| 234 | 244 | ||
| 235 | return 0; | 245 | return 0; |
| 236 | } | 246 | } |
| @@ -516,8 +526,9 @@ void btrfs_remove_ordered_extent(struct inode *inode, | |||
| 516 | set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); | 526 | set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); |
| 517 | spin_unlock_irq(&tree->lock); | 527 | spin_unlock_irq(&tree->lock); |
| 518 | 528 | ||
| 519 | spin_lock(&root->fs_info->ordered_extent_lock); | 529 | spin_lock(&root->ordered_extent_lock); |
| 520 | list_del_init(&entry->root_extent_list); | 530 | list_del_init(&entry->root_extent_list); |
| 531 | root->nr_ordered_extents--; | ||
| 521 | 532 | ||
| 522 | trace_btrfs_ordered_extent_remove(inode, entry); | 533 | trace_btrfs_ordered_extent_remove(inode, entry); |
| 523 | 534 | ||
| @@ -530,7 +541,14 @@ void btrfs_remove_ordered_extent(struct inode *inode, | |||
| 530 | !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) { | 541 | !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) { |
| 531 | list_del_init(&BTRFS_I(inode)->ordered_operations); | 542 | list_del_init(&BTRFS_I(inode)->ordered_operations); |
| 532 | } | 543 | } |
| 533 | spin_unlock(&root->fs_info->ordered_extent_lock); | 544 | |
| 545 | if (!root->nr_ordered_extents) { | ||
| 546 | spin_lock(&root->fs_info->ordered_root_lock); | ||
| 547 | BUG_ON(list_empty(&root->ordered_root)); | ||
| 548 | list_del_init(&root->ordered_root); | ||
| 549 | spin_unlock(&root->fs_info->ordered_root_lock); | ||
| 550 | } | ||
| 551 | spin_unlock(&root->ordered_extent_lock); | ||
| 534 | wake_up(&entry->wait); | 552 | wake_up(&entry->wait); |
| 535 | } | 553 | } |
| 536 | 554 | ||
| @@ -550,7 +568,6 @@ static void btrfs_run_ordered_extent_work(struct btrfs_work *work) | |||
| 550 | void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput) | 568 | void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput) |
| 551 | { | 569 | { |
| 552 | struct list_head splice, works; | 570 | struct list_head splice, works; |
| 553 | struct list_head *cur; | ||
| 554 | struct btrfs_ordered_extent *ordered, *next; | 571 | struct btrfs_ordered_extent *ordered, *next; |
| 555 | struct inode *inode; | 572 | struct inode *inode; |
| 556 | 573 | ||
| @@ -558,35 +575,34 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput) | |||
| 558 | INIT_LIST_HEAD(&works); | 575 | INIT_LIST_HEAD(&works); |
| 559 | 576 | ||
| 560 | mutex_lock(&root->fs_info->ordered_operations_mutex); | 577 | mutex_lock(&root->fs_info->ordered_operations_mutex); |
| 561 | spin_lock(&root->fs_info->ordered_extent_lock); | 578 | spin_lock(&root->ordered_extent_lock); |
| 562 | list_splice_init(&root->fs_info->ordered_extents, &splice); | 579 | list_splice_init(&root->ordered_extents, &splice); |
| 563 | while (!list_empty(&splice)) { | 580 | while (!list_empty(&splice)) { |
| 564 | cur = splice.next; | 581 | ordered = list_first_entry(&splice, struct btrfs_ordered_extent, |
| 565 | ordered = list_entry(cur, struct btrfs_ordered_extent, | 582 | root_extent_list); |
| 566 | root_extent_list); | 583 | list_move_tail(&ordered->root_extent_list, |
| 567 | list_del_init(&ordered->root_extent_list); | 584 | &root->ordered_extents); |
| 568 | atomic_inc(&ordered->refs); | ||
| 569 | |||
| 570 | /* | 585 | /* |
| 571 | * the inode may be getting freed (in sys_unlink path). | 586 | * the inode may be getting freed (in sys_unlink path). |
| 572 | */ | 587 | */ |
| 573 | inode = igrab(ordered->inode); | 588 | inode = igrab(ordered->inode); |
| 589 | if (!inode) { | ||
| 590 | cond_resched_lock(&root->ordered_extent_lock); | ||
| 591 | continue; | ||
| 592 | } | ||
| 574 | 593 | ||
| 575 | spin_unlock(&root->fs_info->ordered_extent_lock); | 594 | atomic_inc(&ordered->refs); |
| 595 | spin_unlock(&root->ordered_extent_lock); | ||
| 576 | 596 | ||
| 577 | if (inode) { | 597 | ordered->flush_work.func = btrfs_run_ordered_extent_work; |
| 578 | ordered->flush_work.func = btrfs_run_ordered_extent_work; | 598 | list_add_tail(&ordered->work_list, &works); |
| 579 | list_add_tail(&ordered->work_list, &works); | 599 | btrfs_queue_worker(&root->fs_info->flush_workers, |
| 580 | btrfs_queue_worker(&root->fs_info->flush_workers, | 600 | &ordered->flush_work); |
| 581 | &ordered->flush_work); | ||
| 582 | } else { | ||
| 583 | btrfs_put_ordered_extent(ordered); | ||
| 584 | } | ||
| 585 | 601 | ||
| 586 | cond_resched(); | 602 | cond_resched(); |
| 587 | spin_lock(&root->fs_info->ordered_extent_lock); | 603 | spin_lock(&root->ordered_extent_lock); |
| 588 | } | 604 | } |
| 589 | spin_unlock(&root->fs_info->ordered_extent_lock); | 605 | spin_unlock(&root->ordered_extent_lock); |
| 590 | 606 | ||
| 591 | list_for_each_entry_safe(ordered, next, &works, work_list) { | 607 | list_for_each_entry_safe(ordered, next, &works, work_list) { |
| 592 | list_del_init(&ordered->work_list); | 608 | list_del_init(&ordered->work_list); |
| @@ -604,6 +620,33 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput) | |||
| 604 | mutex_unlock(&root->fs_info->ordered_operations_mutex); | 620 | mutex_unlock(&root->fs_info->ordered_operations_mutex); |
| 605 | } | 621 | } |
| 606 | 622 | ||
| 623 | void btrfs_wait_all_ordered_extents(struct btrfs_fs_info *fs_info, | ||
| 624 | int delay_iput) | ||
| 625 | { | ||
| 626 | struct btrfs_root *root; | ||
| 627 | struct list_head splice; | ||
| 628 | |||
| 629 | INIT_LIST_HEAD(&splice); | ||
| 630 | |||
| 631 | spin_lock(&fs_info->ordered_root_lock); | ||
| 632 | list_splice_init(&fs_info->ordered_roots, &splice); | ||
| 633 | while (!list_empty(&splice)) { | ||
| 634 | root = list_first_entry(&splice, struct btrfs_root, | ||
| 635 | ordered_root); | ||
| 636 | root = btrfs_grab_fs_root(root); | ||
| 637 | BUG_ON(!root); | ||
| 638 | list_move_tail(&root->ordered_root, | ||
| 639 | &fs_info->ordered_roots); | ||
| 640 | spin_unlock(&fs_info->ordered_root_lock); | ||
| 641 | |||
| 642 | btrfs_wait_ordered_extents(root, delay_iput); | ||
| 643 | btrfs_put_fs_root(root); | ||
| 644 | |||
| 645 | spin_lock(&fs_info->ordered_root_lock); | ||
| 646 | } | ||
| 647 | spin_unlock(&fs_info->ordered_root_lock); | ||
| 648 | } | ||
| 649 | |||
| 607 | /* | 650 | /* |
| 608 | * this is used during transaction commit to write all the inodes | 651 | * this is used during transaction commit to write all the inodes |
| 609 | * added to the ordered operation list. These files must be fully on | 652 | * added to the ordered operation list. These files must be fully on |
| @@ -629,7 +672,7 @@ int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans, | |||
| 629 | INIT_LIST_HEAD(&works); | 672 | INIT_LIST_HEAD(&works); |
| 630 | 673 | ||
| 631 | mutex_lock(&root->fs_info->ordered_operations_mutex); | 674 | mutex_lock(&root->fs_info->ordered_operations_mutex); |
| 632 | spin_lock(&root->fs_info->ordered_extent_lock); | 675 | spin_lock(&root->fs_info->ordered_root_lock); |
| 633 | list_splice_init(&cur_trans->ordered_operations, &splice); | 676 | list_splice_init(&cur_trans->ordered_operations, &splice); |
| 634 | while (!list_empty(&splice)) { | 677 | while (!list_empty(&splice)) { |
| 635 | btrfs_inode = list_entry(splice.next, struct btrfs_inode, | 678 | btrfs_inode = list_entry(splice.next, struct btrfs_inode, |
| @@ -648,17 +691,17 @@ int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans, | |||
| 648 | if (!wait) | 691 | if (!wait) |
| 649 | list_add_tail(&BTRFS_I(inode)->ordered_operations, | 692 | list_add_tail(&BTRFS_I(inode)->ordered_operations, |
| 650 | &cur_trans->ordered_operations); | 693 | &cur_trans->ordered_operations); |
| 651 | spin_unlock(&root->fs_info->ordered_extent_lock); | 694 | spin_unlock(&root->fs_info->ordered_root_lock); |
| 652 | 695 | ||
| 653 | work = btrfs_alloc_delalloc_work(inode, wait, 1); | 696 | work = btrfs_alloc_delalloc_work(inode, wait, 1); |
| 654 | if (!work) { | 697 | if (!work) { |
| 655 | spin_lock(&root->fs_info->ordered_extent_lock); | 698 | spin_lock(&root->fs_info->ordered_root_lock); |
| 656 | if (list_empty(&BTRFS_I(inode)->ordered_operations)) | 699 | if (list_empty(&BTRFS_I(inode)->ordered_operations)) |
| 657 | list_add_tail(&btrfs_inode->ordered_operations, | 700 | list_add_tail(&btrfs_inode->ordered_operations, |
| 658 | &splice); | 701 | &splice); |
| 659 | list_splice_tail(&splice, | 702 | list_splice_tail(&splice, |
| 660 | &cur_trans->ordered_operations); | 703 | &cur_trans->ordered_operations); |
| 661 | spin_unlock(&root->fs_info->ordered_extent_lock); | 704 | spin_unlock(&root->fs_info->ordered_root_lock); |
| 662 | ret = -ENOMEM; | 705 | ret = -ENOMEM; |
| 663 | goto out; | 706 | goto out; |
| 664 | } | 707 | } |
| @@ -667,9 +710,9 @@ int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans, | |||
| 667 | &work->work); | 710 | &work->work); |
| 668 | 711 | ||
| 669 | cond_resched(); | 712 | cond_resched(); |
| 670 | spin_lock(&root->fs_info->ordered_extent_lock); | 713 | spin_lock(&root->fs_info->ordered_root_lock); |
| 671 | } | 714 | } |
| 672 | spin_unlock(&root->fs_info->ordered_extent_lock); | 715 | spin_unlock(&root->fs_info->ordered_root_lock); |
| 673 | out: | 716 | out: |
| 674 | list_for_each_entry_safe(work, next, &works, list) { | 717 | list_for_each_entry_safe(work, next, &works, list) { |
| 675 | list_del_init(&work->list); | 718 | list_del_init(&work->list); |
| @@ -989,7 +1032,6 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, | |||
| 989 | u32 *sum, int len) | 1032 | u32 *sum, int len) |
| 990 | { | 1033 | { |
| 991 | struct btrfs_ordered_sum *ordered_sum; | 1034 | struct btrfs_ordered_sum *ordered_sum; |
| 992 | struct btrfs_sector_sum *sector_sums; | ||
| 993 | struct btrfs_ordered_extent *ordered; | 1035 | struct btrfs_ordered_extent *ordered; |
| 994 | struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; | 1036 | struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; |
| 995 | unsigned long num_sectors; | 1037 | unsigned long num_sectors; |
| @@ -1007,18 +1049,16 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, | |||
| 1007 | disk_bytenr < ordered_sum->bytenr + ordered_sum->len) { | 1049 | disk_bytenr < ordered_sum->bytenr + ordered_sum->len) { |
| 1008 | i = (disk_bytenr - ordered_sum->bytenr) >> | 1050 | i = (disk_bytenr - ordered_sum->bytenr) >> |
| 1009 | inode->i_sb->s_blocksize_bits; | 1051 | inode->i_sb->s_blocksize_bits; |
| 1010 | sector_sums = ordered_sum->sums + i; | ||
| 1011 | num_sectors = ordered_sum->len >> | 1052 | num_sectors = ordered_sum->len >> |
| 1012 | inode->i_sb->s_blocksize_bits; | 1053 | inode->i_sb->s_blocksize_bits; |
| 1013 | for (; i < num_sectors; i++) { | 1054 | num_sectors = min_t(int, len - index, num_sectors - i); |
| 1014 | if (sector_sums[i].bytenr == disk_bytenr) { | 1055 | memcpy(sum + index, ordered_sum->sums + i, |
| 1015 | sum[index] = sector_sums[i].sum; | 1056 | num_sectors); |
| 1016 | index++; | 1057 | |
| 1017 | if (index == len) | 1058 | index += (int)num_sectors; |
| 1018 | goto out; | 1059 | if (index == len) |
| 1019 | disk_bytenr += sectorsize; | 1060 | goto out; |
| 1020 | } | 1061 | disk_bytenr += num_sectors * sectorsize; |
| 1021 | } | ||
| 1022 | } | 1062 | } |
| 1023 | } | 1063 | } |
| 1024 | out: | 1064 | out: |
| @@ -1055,12 +1095,12 @@ void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, | |||
| 1055 | if (last_mod < root->fs_info->last_trans_committed) | 1095 | if (last_mod < root->fs_info->last_trans_committed) |
| 1056 | return; | 1096 | return; |
| 1057 | 1097 | ||
| 1058 | spin_lock(&root->fs_info->ordered_extent_lock); | 1098 | spin_lock(&root->fs_info->ordered_root_lock); |
| 1059 | if (list_empty(&BTRFS_I(inode)->ordered_operations)) { | 1099 | if (list_empty(&BTRFS_I(inode)->ordered_operations)) { |
| 1060 | list_add_tail(&BTRFS_I(inode)->ordered_operations, | 1100 | list_add_tail(&BTRFS_I(inode)->ordered_operations, |
| 1061 | &cur_trans->ordered_operations); | 1101 | &cur_trans->ordered_operations); |
| 1062 | } | 1102 | } |
| 1063 | spin_unlock(&root->fs_info->ordered_extent_lock); | 1103 | spin_unlock(&root->fs_info->ordered_root_lock); |
| 1064 | } | 1104 | } |
| 1065 | 1105 | ||
| 1066 | int __init ordered_data_init(void) | 1106 | int __init ordered_data_init(void) |
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 58b0e3b0ebad..68844d59ee6f 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h | |||
| @@ -26,18 +26,6 @@ struct btrfs_ordered_inode_tree { | |||
| 26 | struct rb_node *last; | 26 | struct rb_node *last; |
| 27 | }; | 27 | }; |
| 28 | 28 | ||
| 29 | /* | ||
| 30 | * these are used to collect checksums done just before bios submission. | ||
| 31 | * They are attached via a list into the ordered extent, and | ||
| 32 | * checksum items are inserted into the tree after all the blocks in | ||
| 33 | * the ordered extent are on disk | ||
| 34 | */ | ||
| 35 | struct btrfs_sector_sum { | ||
| 36 | /* bytenr on disk */ | ||
| 37 | u64 bytenr; | ||
| 38 | u32 sum; | ||
| 39 | }; | ||
| 40 | |||
| 41 | struct btrfs_ordered_sum { | 29 | struct btrfs_ordered_sum { |
| 42 | /* bytenr is the start of this extent on disk */ | 30 | /* bytenr is the start of this extent on disk */ |
| 43 | u64 bytenr; | 31 | u64 bytenr; |
| @@ -45,10 +33,10 @@ struct btrfs_ordered_sum { | |||
| 45 | /* | 33 | /* |
| 46 | * this is the length in bytes covered by the sums array below. | 34 | * this is the length in bytes covered by the sums array below. |
| 47 | */ | 35 | */ |
| 48 | unsigned long len; | 36 | int len; |
| 49 | struct list_head list; | 37 | struct list_head list; |
| 50 | /* last field is a variable length array of btrfs_sector_sums */ | 38 | /* last field is a variable length array of csums */ |
| 51 | struct btrfs_sector_sum sums[]; | 39 | u32 sums[]; |
| 52 | }; | 40 | }; |
| 53 | 41 | ||
| 54 | /* | 42 | /* |
| @@ -149,11 +137,8 @@ struct btrfs_ordered_extent { | |||
| 149 | static inline int btrfs_ordered_sum_size(struct btrfs_root *root, | 137 | static inline int btrfs_ordered_sum_size(struct btrfs_root *root, |
| 150 | unsigned long bytes) | 138 | unsigned long bytes) |
| 151 | { | 139 | { |
| 152 | unsigned long num_sectors = (bytes + root->sectorsize - 1) / | 140 | int num_sectors = (int)DIV_ROUND_UP(bytes, root->sectorsize); |
| 153 | root->sectorsize; | 141 | return sizeof(struct btrfs_ordered_sum) + num_sectors * sizeof(u32); |
| 154 | num_sectors++; | ||
| 155 | return sizeof(struct btrfs_ordered_sum) + | ||
| 156 | num_sectors * sizeof(struct btrfs_sector_sum); | ||
| 157 | } | 142 | } |
| 158 | 143 | ||
| 159 | static inline void | 144 | static inline void |
| @@ -204,6 +189,8 @@ void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, | |||
| 204 | struct btrfs_root *root, | 189 | struct btrfs_root *root, |
| 205 | struct inode *inode); | 190 | struct inode *inode); |
| 206 | void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput); | 191 | void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput); |
| 192 | void btrfs_wait_all_ordered_extents(struct btrfs_fs_info *fs_info, | ||
| 193 | int delay_iput); | ||
| 207 | void btrfs_get_logged_extents(struct btrfs_root *log, struct inode *inode); | 194 | void btrfs_get_logged_extents(struct btrfs_root *log, struct inode *inode); |
| 208 | void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid); | 195 | void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid); |
| 209 | void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid); | 196 | void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid); |
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 9d49c586995a..1280eff8af56 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c | |||
| @@ -98,13 +98,10 @@ struct btrfs_qgroup_list { | |||
| 98 | struct btrfs_qgroup *member; | 98 | struct btrfs_qgroup *member; |
| 99 | }; | 99 | }; |
| 100 | 100 | ||
| 101 | struct qgroup_rescan { | 101 | static int |
| 102 | struct btrfs_work work; | 102 | qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, |
| 103 | struct btrfs_fs_info *fs_info; | 103 | int init_flags); |
| 104 | }; | 104 | static void qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info); |
| 105 | |||
| 106 | static void qgroup_rescan_start(struct btrfs_fs_info *fs_info, | ||
| 107 | struct qgroup_rescan *qscan); | ||
| 108 | 105 | ||
| 109 | /* must be called with qgroup_ioctl_lock held */ | 106 | /* must be called with qgroup_ioctl_lock held */ |
| 110 | static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info, | 107 | static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info, |
| @@ -255,10 +252,17 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info) | |||
| 255 | int slot; | 252 | int slot; |
| 256 | int ret = 0; | 253 | int ret = 0; |
| 257 | u64 flags = 0; | 254 | u64 flags = 0; |
| 255 | u64 rescan_progress = 0; | ||
| 258 | 256 | ||
| 259 | if (!fs_info->quota_enabled) | 257 | if (!fs_info->quota_enabled) |
| 260 | return 0; | 258 | return 0; |
| 261 | 259 | ||
| 260 | fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS); | ||
| 261 | if (!fs_info->qgroup_ulist) { | ||
| 262 | ret = -ENOMEM; | ||
| 263 | goto out; | ||
| 264 | } | ||
| 265 | |||
| 262 | path = btrfs_alloc_path(); | 266 | path = btrfs_alloc_path(); |
| 263 | if (!path) { | 267 | if (!path) { |
| 264 | ret = -ENOMEM; | 268 | ret = -ENOMEM; |
| @@ -306,20 +310,7 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info) | |||
| 306 | } | 310 | } |
| 307 | fs_info->qgroup_flags = btrfs_qgroup_status_flags(l, | 311 | fs_info->qgroup_flags = btrfs_qgroup_status_flags(l, |
| 308 | ptr); | 312 | ptr); |
| 309 | fs_info->qgroup_rescan_progress.objectid = | 313 | rescan_progress = btrfs_qgroup_status_rescan(l, ptr); |
| 310 | btrfs_qgroup_status_rescan(l, ptr); | ||
| 311 | if (fs_info->qgroup_flags & | ||
| 312 | BTRFS_QGROUP_STATUS_FLAG_RESCAN) { | ||
| 313 | struct qgroup_rescan *qscan = | ||
| 314 | kmalloc(sizeof(*qscan), GFP_NOFS); | ||
| 315 | if (!qscan) { | ||
| 316 | ret = -ENOMEM; | ||
| 317 | goto out; | ||
| 318 | } | ||
| 319 | fs_info->qgroup_rescan_progress.type = 0; | ||
| 320 | fs_info->qgroup_rescan_progress.offset = 0; | ||
| 321 | qgroup_rescan_start(fs_info, qscan); | ||
| 322 | } | ||
| 323 | goto next1; | 314 | goto next1; |
| 324 | } | 315 | } |
| 325 | 316 | ||
| @@ -421,9 +412,18 @@ out: | |||
| 421 | if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) { | 412 | if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) { |
| 422 | fs_info->quota_enabled = 0; | 413 | fs_info->quota_enabled = 0; |
| 423 | fs_info->pending_quota_state = 0; | 414 | fs_info->pending_quota_state = 0; |
| 415 | } else if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN && | ||
| 416 | ret >= 0) { | ||
| 417 | ret = qgroup_rescan_init(fs_info, rescan_progress, 0); | ||
| 424 | } | 418 | } |
| 425 | btrfs_free_path(path); | 419 | btrfs_free_path(path); |
| 426 | 420 | ||
| 421 | if (ret < 0) { | ||
| 422 | ulist_free(fs_info->qgroup_ulist); | ||
| 423 | fs_info->qgroup_ulist = NULL; | ||
| 424 | fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; | ||
| 425 | } | ||
| 426 | |||
| 427 | return ret < 0 ? ret : 0; | 427 | return ret < 0 ? ret : 0; |
| 428 | } | 428 | } |
| 429 | 429 | ||
| @@ -460,6 +460,7 @@ void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info) | |||
| 460 | } | 460 | } |
| 461 | kfree(qgroup); | 461 | kfree(qgroup); |
| 462 | } | 462 | } |
| 463 | ulist_free(fs_info->qgroup_ulist); | ||
| 463 | } | 464 | } |
| 464 | 465 | ||
| 465 | static int add_qgroup_relation_item(struct btrfs_trans_handle *trans, | 466 | static int add_qgroup_relation_item(struct btrfs_trans_handle *trans, |
| @@ -819,6 +820,12 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans, | |||
| 819 | goto out; | 820 | goto out; |
| 820 | } | 821 | } |
| 821 | 822 | ||
| 823 | fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS); | ||
| 824 | if (!fs_info->qgroup_ulist) { | ||
| 825 | ret = -ENOMEM; | ||
| 826 | goto out; | ||
| 827 | } | ||
| 828 | |||
| 822 | /* | 829 | /* |
| 823 | * initially create the quota tree | 830 | * initially create the quota tree |
| 824 | */ | 831 | */ |
| @@ -916,6 +923,10 @@ out_free_root: | |||
| 916 | kfree(quota_root); | 923 | kfree(quota_root); |
| 917 | } | 924 | } |
| 918 | out: | 925 | out: |
| 926 | if (ret) { | ||
| 927 | ulist_free(fs_info->qgroup_ulist); | ||
| 928 | fs_info->qgroup_ulist = NULL; | ||
| 929 | } | ||
| 919 | mutex_unlock(&fs_info->qgroup_ioctl_lock); | 930 | mutex_unlock(&fs_info->qgroup_ioctl_lock); |
| 920 | return ret; | 931 | return ret; |
| 921 | } | 932 | } |
| @@ -1355,7 +1366,6 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, | |||
| 1355 | u64 ref_root; | 1366 | u64 ref_root; |
| 1356 | struct btrfs_qgroup *qgroup; | 1367 | struct btrfs_qgroup *qgroup; |
| 1357 | struct ulist *roots = NULL; | 1368 | struct ulist *roots = NULL; |
| 1358 | struct ulist *tmp = NULL; | ||
| 1359 | u64 seq; | 1369 | u64 seq; |
| 1360 | int ret = 0; | 1370 | int ret = 0; |
| 1361 | int sgn; | 1371 | int sgn; |
| @@ -1428,14 +1438,7 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, | |||
| 1428 | if (ret < 0) | 1438 | if (ret < 0) |
| 1429 | return ret; | 1439 | return ret; |
| 1430 | 1440 | ||
| 1431 | mutex_lock(&fs_info->qgroup_rescan_lock); | ||
| 1432 | spin_lock(&fs_info->qgroup_lock); | 1441 | spin_lock(&fs_info->qgroup_lock); |
| 1433 | if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) { | ||
| 1434 | if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) { | ||
| 1435 | ret = 0; | ||
| 1436 | goto unlock; | ||
| 1437 | } | ||
| 1438 | } | ||
| 1439 | 1442 | ||
| 1440 | quota_root = fs_info->quota_root; | 1443 | quota_root = fs_info->quota_root; |
| 1441 | if (!quota_root) | 1444 | if (!quota_root) |
| @@ -1448,39 +1451,34 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, | |||
| 1448 | /* | 1451 | /* |
| 1449 | * step 1: for each old ref, visit all nodes once and inc refcnt | 1452 | * step 1: for each old ref, visit all nodes once and inc refcnt |
| 1450 | */ | 1453 | */ |
| 1451 | tmp = ulist_alloc(GFP_ATOMIC); | 1454 | ulist_reinit(fs_info->qgroup_ulist); |
| 1452 | if (!tmp) { | ||
| 1453 | ret = -ENOMEM; | ||
| 1454 | goto unlock; | ||
| 1455 | } | ||
| 1456 | seq = fs_info->qgroup_seq; | 1455 | seq = fs_info->qgroup_seq; |
| 1457 | fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */ | 1456 | fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */ |
| 1458 | 1457 | ||
| 1459 | ret = qgroup_account_ref_step1(fs_info, roots, tmp, seq); | 1458 | ret = qgroup_account_ref_step1(fs_info, roots, fs_info->qgroup_ulist, |
| 1459 | seq); | ||
| 1460 | if (ret) | 1460 | if (ret) |
| 1461 | goto unlock; | 1461 | goto unlock; |
| 1462 | 1462 | ||
| 1463 | /* | 1463 | /* |
| 1464 | * step 2: walk from the new root | 1464 | * step 2: walk from the new root |
| 1465 | */ | 1465 | */ |
| 1466 | ret = qgroup_account_ref_step2(fs_info, roots, tmp, seq, sgn, | 1466 | ret = qgroup_account_ref_step2(fs_info, roots, fs_info->qgroup_ulist, |
| 1467 | node->num_bytes, qgroup); | 1467 | seq, sgn, node->num_bytes, qgroup); |
| 1468 | if (ret) | 1468 | if (ret) |
| 1469 | goto unlock; | 1469 | goto unlock; |
| 1470 | 1470 | ||
| 1471 | /* | 1471 | /* |
| 1472 | * step 3: walk again from old refs | 1472 | * step 3: walk again from old refs |
| 1473 | */ | 1473 | */ |
| 1474 | ret = qgroup_account_ref_step3(fs_info, roots, tmp, seq, sgn, | 1474 | ret = qgroup_account_ref_step3(fs_info, roots, fs_info->qgroup_ulist, |
| 1475 | node->num_bytes); | 1475 | seq, sgn, node->num_bytes); |
| 1476 | if (ret) | 1476 | if (ret) |
| 1477 | goto unlock; | 1477 | goto unlock; |
| 1478 | 1478 | ||
| 1479 | unlock: | 1479 | unlock: |
| 1480 | spin_unlock(&fs_info->qgroup_lock); | 1480 | spin_unlock(&fs_info->qgroup_lock); |
| 1481 | mutex_unlock(&fs_info->qgroup_rescan_lock); | ||
| 1482 | ulist_free(roots); | 1481 | ulist_free(roots); |
| 1483 | ulist_free(tmp); | ||
| 1484 | 1482 | ||
| 1485 | return ret; | 1483 | return ret; |
| 1486 | } | 1484 | } |
| @@ -1527,9 +1525,12 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans, | |||
| 1527 | fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; | 1525 | fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; |
| 1528 | 1526 | ||
| 1529 | if (!ret && start_rescan_worker) { | 1527 | if (!ret && start_rescan_worker) { |
| 1530 | ret = btrfs_qgroup_rescan(fs_info); | 1528 | ret = qgroup_rescan_init(fs_info, 0, 1); |
| 1531 | if (ret) | 1529 | if (!ret) { |
| 1532 | pr_err("btrfs: start rescan quota failed: %d\n", ret); | 1530 | qgroup_rescan_zero_tracking(fs_info); |
| 1531 | btrfs_queue_worker(&fs_info->qgroup_rescan_workers, | ||
| 1532 | &fs_info->qgroup_rescan_work); | ||
| 1533 | } | ||
| 1533 | ret = 0; | 1534 | ret = 0; |
| 1534 | } | 1535 | } |
| 1535 | 1536 | ||
| @@ -1720,7 +1721,6 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes) | |||
| 1720 | struct btrfs_fs_info *fs_info = root->fs_info; | 1721 | struct btrfs_fs_info *fs_info = root->fs_info; |
| 1721 | u64 ref_root = root->root_key.objectid; | 1722 | u64 ref_root = root->root_key.objectid; |
| 1722 | int ret = 0; | 1723 | int ret = 0; |
| 1723 | struct ulist *ulist = NULL; | ||
| 1724 | struct ulist_node *unode; | 1724 | struct ulist_node *unode; |
| 1725 | struct ulist_iterator uiter; | 1725 | struct ulist_iterator uiter; |
| 1726 | 1726 | ||
| @@ -1743,17 +1743,13 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes) | |||
| 1743 | * in a first step, we check all affected qgroups if any limits would | 1743 | * in a first step, we check all affected qgroups if any limits would |
| 1744 | * be exceeded | 1744 | * be exceeded |
| 1745 | */ | 1745 | */ |
| 1746 | ulist = ulist_alloc(GFP_ATOMIC); | 1746 | ulist_reinit(fs_info->qgroup_ulist); |
| 1747 | if (!ulist) { | 1747 | ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid, |
| 1748 | ret = -ENOMEM; | ||
| 1749 | goto out; | ||
| 1750 | } | ||
| 1751 | ret = ulist_add(ulist, qgroup->qgroupid, | ||
| 1752 | (uintptr_t)qgroup, GFP_ATOMIC); | 1748 | (uintptr_t)qgroup, GFP_ATOMIC); |
| 1753 | if (ret < 0) | 1749 | if (ret < 0) |
| 1754 | goto out; | 1750 | goto out; |
| 1755 | ULIST_ITER_INIT(&uiter); | 1751 | ULIST_ITER_INIT(&uiter); |
| 1756 | while ((unode = ulist_next(ulist, &uiter))) { | 1752 | while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { |
| 1757 | struct btrfs_qgroup *qg; | 1753 | struct btrfs_qgroup *qg; |
| 1758 | struct btrfs_qgroup_list *glist; | 1754 | struct btrfs_qgroup_list *glist; |
| 1759 | 1755 | ||
| @@ -1774,7 +1770,8 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes) | |||
| 1774 | } | 1770 | } |
| 1775 | 1771 | ||
| 1776 | list_for_each_entry(glist, &qg->groups, next_group) { | 1772 | list_for_each_entry(glist, &qg->groups, next_group) { |
| 1777 | ret = ulist_add(ulist, glist->group->qgroupid, | 1773 | ret = ulist_add(fs_info->qgroup_ulist, |
| 1774 | glist->group->qgroupid, | ||
| 1778 | (uintptr_t)glist->group, GFP_ATOMIC); | 1775 | (uintptr_t)glist->group, GFP_ATOMIC); |
| 1779 | if (ret < 0) | 1776 | if (ret < 0) |
| 1780 | goto out; | 1777 | goto out; |
| @@ -1785,7 +1782,7 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes) | |||
| 1785 | * no limits exceeded, now record the reservation into all qgroups | 1782 | * no limits exceeded, now record the reservation into all qgroups |
| 1786 | */ | 1783 | */ |
| 1787 | ULIST_ITER_INIT(&uiter); | 1784 | ULIST_ITER_INIT(&uiter); |
| 1788 | while ((unode = ulist_next(ulist, &uiter))) { | 1785 | while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { |
| 1789 | struct btrfs_qgroup *qg; | 1786 | struct btrfs_qgroup *qg; |
| 1790 | 1787 | ||
| 1791 | qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux; | 1788 | qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux; |
| @@ -1795,8 +1792,6 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes) | |||
| 1795 | 1792 | ||
| 1796 | out: | 1793 | out: |
| 1797 | spin_unlock(&fs_info->qgroup_lock); | 1794 | spin_unlock(&fs_info->qgroup_lock); |
| 1798 | ulist_free(ulist); | ||
| 1799 | |||
| 1800 | return ret; | 1795 | return ret; |
| 1801 | } | 1796 | } |
| 1802 | 1797 | ||
| @@ -1805,7 +1800,6 @@ void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes) | |||
| 1805 | struct btrfs_root *quota_root; | 1800 | struct btrfs_root *quota_root; |
| 1806 | struct btrfs_qgroup *qgroup; | 1801 | struct btrfs_qgroup *qgroup; |
| 1807 | struct btrfs_fs_info *fs_info = root->fs_info; | 1802 | struct btrfs_fs_info *fs_info = root->fs_info; |
| 1808 | struct ulist *ulist = NULL; | ||
| 1809 | struct ulist_node *unode; | 1803 | struct ulist_node *unode; |
| 1810 | struct ulist_iterator uiter; | 1804 | struct ulist_iterator uiter; |
| 1811 | u64 ref_root = root->root_key.objectid; | 1805 | u64 ref_root = root->root_key.objectid; |
| @@ -1827,17 +1821,13 @@ void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes) | |||
| 1827 | if (!qgroup) | 1821 | if (!qgroup) |
| 1828 | goto out; | 1822 | goto out; |
| 1829 | 1823 | ||
| 1830 | ulist = ulist_alloc(GFP_ATOMIC); | 1824 | ulist_reinit(fs_info->qgroup_ulist); |
| 1831 | if (!ulist) { | 1825 | ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid, |
| 1832 | btrfs_std_error(fs_info, -ENOMEM); | ||
| 1833 | goto out; | ||
| 1834 | } | ||
| 1835 | ret = ulist_add(ulist, qgroup->qgroupid, | ||
| 1836 | (uintptr_t)qgroup, GFP_ATOMIC); | 1826 | (uintptr_t)qgroup, GFP_ATOMIC); |
| 1837 | if (ret < 0) | 1827 | if (ret < 0) |
| 1838 | goto out; | 1828 | goto out; |
| 1839 | ULIST_ITER_INIT(&uiter); | 1829 | ULIST_ITER_INIT(&uiter); |
| 1840 | while ((unode = ulist_next(ulist, &uiter))) { | 1830 | while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { |
| 1841 | struct btrfs_qgroup *qg; | 1831 | struct btrfs_qgroup *qg; |
| 1842 | struct btrfs_qgroup_list *glist; | 1832 | struct btrfs_qgroup_list *glist; |
| 1843 | 1833 | ||
| @@ -1846,7 +1836,8 @@ void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes) | |||
| 1846 | qg->reserved -= num_bytes; | 1836 | qg->reserved -= num_bytes; |
| 1847 | 1837 | ||
| 1848 | list_for_each_entry(glist, &qg->groups, next_group) { | 1838 | list_for_each_entry(glist, &qg->groups, next_group) { |
| 1849 | ret = ulist_add(ulist, glist->group->qgroupid, | 1839 | ret = ulist_add(fs_info->qgroup_ulist, |
| 1840 | glist->group->qgroupid, | ||
| 1850 | (uintptr_t)glist->group, GFP_ATOMIC); | 1841 | (uintptr_t)glist->group, GFP_ATOMIC); |
| 1851 | if (ret < 0) | 1842 | if (ret < 0) |
| 1852 | goto out; | 1843 | goto out; |
| @@ -1855,7 +1846,6 @@ void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes) | |||
| 1855 | 1846 | ||
| 1856 | out: | 1847 | out: |
| 1857 | spin_unlock(&fs_info->qgroup_lock); | 1848 | spin_unlock(&fs_info->qgroup_lock); |
| 1858 | ulist_free(ulist); | ||
| 1859 | } | 1849 | } |
| 1860 | 1850 | ||
| 1861 | void assert_qgroups_uptodate(struct btrfs_trans_handle *trans) | 1851 | void assert_qgroups_uptodate(struct btrfs_trans_handle *trans) |
| @@ -1874,12 +1864,11 @@ void assert_qgroups_uptodate(struct btrfs_trans_handle *trans) | |||
| 1874 | * returns 1 when done, 2 when done and FLAG_INCONSISTENT was cleared. | 1864 | * returns 1 when done, 2 when done and FLAG_INCONSISTENT was cleared. |
| 1875 | */ | 1865 | */ |
| 1876 | static int | 1866 | static int |
| 1877 | qgroup_rescan_leaf(struct qgroup_rescan *qscan, struct btrfs_path *path, | 1867 | qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, |
| 1878 | struct btrfs_trans_handle *trans, struct ulist *tmp, | 1868 | struct btrfs_trans_handle *trans, struct ulist *tmp, |
| 1879 | struct extent_buffer *scratch_leaf) | 1869 | struct extent_buffer *scratch_leaf) |
| 1880 | { | 1870 | { |
| 1881 | struct btrfs_key found; | 1871 | struct btrfs_key found; |
| 1882 | struct btrfs_fs_info *fs_info = qscan->fs_info; | ||
| 1883 | struct ulist *roots = NULL; | 1872 | struct ulist *roots = NULL; |
| 1884 | struct ulist_node *unode; | 1873 | struct ulist_node *unode; |
| 1885 | struct ulist_iterator uiter; | 1874 | struct ulist_iterator uiter; |
| @@ -2007,11 +1996,10 @@ out: | |||
| 2007 | 1996 | ||
| 2008 | static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) | 1997 | static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) |
| 2009 | { | 1998 | { |
| 2010 | struct qgroup_rescan *qscan = container_of(work, struct qgroup_rescan, | 1999 | struct btrfs_fs_info *fs_info = container_of(work, struct btrfs_fs_info, |
| 2011 | work); | 2000 | qgroup_rescan_work); |
| 2012 | struct btrfs_path *path; | 2001 | struct btrfs_path *path; |
| 2013 | struct btrfs_trans_handle *trans = NULL; | 2002 | struct btrfs_trans_handle *trans = NULL; |
| 2014 | struct btrfs_fs_info *fs_info = qscan->fs_info; | ||
| 2015 | struct ulist *tmp = NULL; | 2003 | struct ulist *tmp = NULL; |
| 2016 | struct extent_buffer *scratch_leaf = NULL; | 2004 | struct extent_buffer *scratch_leaf = NULL; |
| 2017 | int err = -ENOMEM; | 2005 | int err = -ENOMEM; |
| @@ -2036,7 +2024,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) | |||
| 2036 | if (!fs_info->quota_enabled) { | 2024 | if (!fs_info->quota_enabled) { |
| 2037 | err = -EINTR; | 2025 | err = -EINTR; |
| 2038 | } else { | 2026 | } else { |
| 2039 | err = qgroup_rescan_leaf(qscan, path, trans, | 2027 | err = qgroup_rescan_leaf(fs_info, path, trans, |
| 2040 | tmp, scratch_leaf); | 2028 | tmp, scratch_leaf); |
| 2041 | } | 2029 | } |
| 2042 | if (err > 0) | 2030 | if (err > 0) |
| @@ -2049,7 +2037,6 @@ out: | |||
| 2049 | kfree(scratch_leaf); | 2037 | kfree(scratch_leaf); |
| 2050 | ulist_free(tmp); | 2038 | ulist_free(tmp); |
| 2051 | btrfs_free_path(path); | 2039 | btrfs_free_path(path); |
| 2052 | kfree(qscan); | ||
| 2053 | 2040 | ||
| 2054 | mutex_lock(&fs_info->qgroup_rescan_lock); | 2041 | mutex_lock(&fs_info->qgroup_rescan_lock); |
| 2055 | fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; | 2042 | fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; |
| @@ -2068,47 +2055,74 @@ out: | |||
| 2068 | } else { | 2055 | } else { |
| 2069 | pr_err("btrfs: qgroup scan failed with %d\n", err); | 2056 | pr_err("btrfs: qgroup scan failed with %d\n", err); |
| 2070 | } | 2057 | } |
| 2071 | } | ||
| 2072 | 2058 | ||
| 2073 | static void | 2059 | complete_all(&fs_info->qgroup_rescan_completion); |
| 2074 | qgroup_rescan_start(struct btrfs_fs_info *fs_info, struct qgroup_rescan *qscan) | ||
| 2075 | { | ||
| 2076 | memset(&qscan->work, 0, sizeof(qscan->work)); | ||
| 2077 | qscan->work.func = btrfs_qgroup_rescan_worker; | ||
| 2078 | qscan->fs_info = fs_info; | ||
| 2079 | |||
| 2080 | pr_info("btrfs: qgroup scan started\n"); | ||
| 2081 | btrfs_queue_worker(&fs_info->qgroup_rescan_workers, &qscan->work); | ||
| 2082 | } | 2060 | } |
| 2083 | 2061 | ||
| 2084 | int | 2062 | /* |
| 2085 | btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info) | 2063 | * Checks that (a) no rescan is running and (b) quota is enabled. Allocates all |
| 2064 | * memory required for the rescan context. | ||
| 2065 | */ | ||
| 2066 | static int | ||
| 2067 | qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, | ||
| 2068 | int init_flags) | ||
| 2086 | { | 2069 | { |
| 2087 | int ret = 0; | 2070 | int ret = 0; |
| 2088 | struct rb_node *n; | ||
| 2089 | struct btrfs_qgroup *qgroup; | ||
| 2090 | struct qgroup_rescan *qscan = kmalloc(sizeof(*qscan), GFP_NOFS); | ||
| 2091 | 2071 | ||
| 2092 | if (!qscan) | 2072 | if (!init_flags && |
| 2093 | return -ENOMEM; | 2073 | (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) || |
| 2074 | !(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))) { | ||
| 2075 | ret = -EINVAL; | ||
| 2076 | goto err; | ||
| 2077 | } | ||
| 2094 | 2078 | ||
| 2095 | mutex_lock(&fs_info->qgroup_rescan_lock); | 2079 | mutex_lock(&fs_info->qgroup_rescan_lock); |
| 2096 | spin_lock(&fs_info->qgroup_lock); | 2080 | spin_lock(&fs_info->qgroup_lock); |
| 2097 | if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) | 2081 | |
| 2098 | ret = -EINPROGRESS; | 2082 | if (init_flags) { |
| 2099 | else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) | 2083 | if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) |
| 2100 | ret = -EINVAL; | 2084 | ret = -EINPROGRESS; |
| 2101 | if (ret) { | 2085 | else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) |
| 2102 | spin_unlock(&fs_info->qgroup_lock); | 2086 | ret = -EINVAL; |
| 2103 | mutex_unlock(&fs_info->qgroup_rescan_lock); | 2087 | |
| 2104 | kfree(qscan); | 2088 | if (ret) { |
| 2105 | return ret; | 2089 | spin_unlock(&fs_info->qgroup_lock); |
| 2090 | mutex_unlock(&fs_info->qgroup_rescan_lock); | ||
| 2091 | goto err; | ||
| 2092 | } | ||
| 2093 | |||
| 2094 | fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN; | ||
| 2106 | } | 2095 | } |
| 2107 | 2096 | ||
| 2108 | fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN; | ||
| 2109 | memset(&fs_info->qgroup_rescan_progress, 0, | 2097 | memset(&fs_info->qgroup_rescan_progress, 0, |
| 2110 | sizeof(fs_info->qgroup_rescan_progress)); | 2098 | sizeof(fs_info->qgroup_rescan_progress)); |
| 2099 | fs_info->qgroup_rescan_progress.objectid = progress_objectid; | ||
| 2100 | |||
| 2101 | spin_unlock(&fs_info->qgroup_lock); | ||
| 2102 | mutex_unlock(&fs_info->qgroup_rescan_lock); | ||
| 2103 | |||
| 2104 | init_completion(&fs_info->qgroup_rescan_completion); | ||
| 2105 | |||
| 2106 | memset(&fs_info->qgroup_rescan_work, 0, | ||
| 2107 | sizeof(fs_info->qgroup_rescan_work)); | ||
| 2108 | fs_info->qgroup_rescan_work.func = btrfs_qgroup_rescan_worker; | ||
| 2109 | |||
| 2110 | if (ret) { | ||
| 2111 | err: | ||
| 2112 | pr_info("btrfs: qgroup_rescan_init failed with %d\n", ret); | ||
| 2113 | return ret; | ||
| 2114 | } | ||
| 2115 | |||
| 2116 | return 0; | ||
| 2117 | } | ||
| 2118 | |||
| 2119 | static void | ||
| 2120 | qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info) | ||
| 2121 | { | ||
| 2122 | struct rb_node *n; | ||
| 2123 | struct btrfs_qgroup *qgroup; | ||
| 2111 | 2124 | ||
| 2125 | spin_lock(&fs_info->qgroup_lock); | ||
| 2112 | /* clear all current qgroup tracking information */ | 2126 | /* clear all current qgroup tracking information */ |
| 2113 | for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) { | 2127 | for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) { |
| 2114 | qgroup = rb_entry(n, struct btrfs_qgroup, node); | 2128 | qgroup = rb_entry(n, struct btrfs_qgroup, node); |
| @@ -2118,9 +2132,74 @@ btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info) | |||
| 2118 | qgroup->excl_cmpr = 0; | 2132 | qgroup->excl_cmpr = 0; |
| 2119 | } | 2133 | } |
| 2120 | spin_unlock(&fs_info->qgroup_lock); | 2134 | spin_unlock(&fs_info->qgroup_lock); |
| 2121 | mutex_unlock(&fs_info->qgroup_rescan_lock); | 2135 | } |
| 2136 | |||
| 2137 | int | ||
| 2138 | btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info) | ||
| 2139 | { | ||
| 2140 | int ret = 0; | ||
| 2141 | struct btrfs_trans_handle *trans; | ||
| 2122 | 2142 | ||
| 2123 | qgroup_rescan_start(fs_info, qscan); | 2143 | ret = qgroup_rescan_init(fs_info, 0, 1); |
| 2144 | if (ret) | ||
| 2145 | return ret; | ||
| 2146 | |||
| 2147 | /* | ||
| 2148 | * We have set the rescan_progress to 0, which means no more | ||
| 2149 | * delayed refs will be accounted by btrfs_qgroup_account_ref. | ||
| 2150 | * However, btrfs_qgroup_account_ref may be right after its call | ||
| 2151 | * to btrfs_find_all_roots, in which case it would still do the | ||
| 2152 | * accounting. | ||
| 2153 | * To solve this, we're committing the transaction, which will | ||
| 2154 | * ensure we run all delayed refs and only after that, we are | ||
| 2155 | * going to clear all tracking information for a clean start. | ||
| 2156 | */ | ||
| 2157 | |||
| 2158 | trans = btrfs_join_transaction(fs_info->fs_root); | ||
| 2159 | if (IS_ERR(trans)) { | ||
| 2160 | fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; | ||
| 2161 | return PTR_ERR(trans); | ||
| 2162 | } | ||
| 2163 | ret = btrfs_commit_transaction(trans, fs_info->fs_root); | ||
| 2164 | if (ret) { | ||
| 2165 | fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; | ||
| 2166 | return ret; | ||
| 2167 | } | ||
| 2168 | |||
| 2169 | qgroup_rescan_zero_tracking(fs_info); | ||
| 2170 | |||
| 2171 | btrfs_queue_worker(&fs_info->qgroup_rescan_workers, | ||
| 2172 | &fs_info->qgroup_rescan_work); | ||
| 2124 | 2173 | ||
| 2125 | return 0; | 2174 | return 0; |
| 2126 | } | 2175 | } |
| 2176 | |||
| 2177 | int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info) | ||
| 2178 | { | ||
| 2179 | int running; | ||
| 2180 | int ret = 0; | ||
| 2181 | |||
| 2182 | mutex_lock(&fs_info->qgroup_rescan_lock); | ||
| 2183 | spin_lock(&fs_info->qgroup_lock); | ||
| 2184 | running = fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN; | ||
| 2185 | spin_unlock(&fs_info->qgroup_lock); | ||
| 2186 | mutex_unlock(&fs_info->qgroup_rescan_lock); | ||
| 2187 | |||
| 2188 | if (running) | ||
| 2189 | ret = wait_for_completion_interruptible( | ||
| 2190 | &fs_info->qgroup_rescan_completion); | ||
| 2191 | |||
| 2192 | return ret; | ||
| 2193 | } | ||
| 2194 | |||
| 2195 | /* | ||
| 2196 | * this is only called from open_ctree where we're still single threaded, thus | ||
| 2197 | * locking is omitted here. | ||
| 2198 | */ | ||
| 2199 | void | ||
| 2200 | btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info) | ||
| 2201 | { | ||
| 2202 | if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) | ||
| 2203 | btrfs_queue_worker(&fs_info->qgroup_rescan_workers, | ||
| 2204 | &fs_info->qgroup_rescan_work); | ||
| 2205 | } | ||
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 4febca4fc2de..12096496cc99 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
| @@ -1305,6 +1305,7 @@ static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans, | |||
| 1305 | struct extent_buffer *eb; | 1305 | struct extent_buffer *eb; |
| 1306 | struct btrfs_root_item *root_item; | 1306 | struct btrfs_root_item *root_item; |
| 1307 | struct btrfs_key root_key; | 1307 | struct btrfs_key root_key; |
| 1308 | u64 last_snap = 0; | ||
| 1308 | int ret; | 1309 | int ret; |
| 1309 | 1310 | ||
| 1310 | root_item = kmalloc(sizeof(*root_item), GFP_NOFS); | 1311 | root_item = kmalloc(sizeof(*root_item), GFP_NOFS); |
| @@ -1320,6 +1321,7 @@ static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans, | |||
| 1320 | BTRFS_TREE_RELOC_OBJECTID); | 1321 | BTRFS_TREE_RELOC_OBJECTID); |
| 1321 | BUG_ON(ret); | 1322 | BUG_ON(ret); |
| 1322 | 1323 | ||
| 1324 | last_snap = btrfs_root_last_snapshot(&root->root_item); | ||
| 1323 | btrfs_set_root_last_snapshot(&root->root_item, | 1325 | btrfs_set_root_last_snapshot(&root->root_item, |
| 1324 | trans->transid - 1); | 1326 | trans->transid - 1); |
| 1325 | } else { | 1327 | } else { |
| @@ -1345,6 +1347,12 @@ static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans, | |||
| 1345 | memset(&root_item->drop_progress, 0, | 1347 | memset(&root_item->drop_progress, 0, |
| 1346 | sizeof(struct btrfs_disk_key)); | 1348 | sizeof(struct btrfs_disk_key)); |
| 1347 | root_item->drop_level = 0; | 1349 | root_item->drop_level = 0; |
| 1350 | /* | ||
| 1351 | * abuse rtransid, it is safe because it is impossible to | ||
| 1352 | * receive data into a relocation tree. | ||
| 1353 | */ | ||
| 1354 | btrfs_set_root_rtransid(root_item, last_snap); | ||
| 1355 | btrfs_set_root_otransid(root_item, trans->transid); | ||
| 1348 | } | 1356 | } |
| 1349 | 1357 | ||
| 1350 | btrfs_tree_unlock(eb); | 1358 | btrfs_tree_unlock(eb); |
| @@ -1355,8 +1363,7 @@ static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans, | |||
| 1355 | BUG_ON(ret); | 1363 | BUG_ON(ret); |
| 1356 | kfree(root_item); | 1364 | kfree(root_item); |
| 1357 | 1365 | ||
| 1358 | reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root, | 1366 | reloc_root = btrfs_read_fs_root(root->fs_info->tree_root, &root_key); |
| 1359 | &root_key); | ||
| 1360 | BUG_ON(IS_ERR(reloc_root)); | 1367 | BUG_ON(IS_ERR(reloc_root)); |
| 1361 | reloc_root->last_trans = trans->transid; | 1368 | reloc_root->last_trans = trans->transid; |
| 1362 | return reloc_root; | 1369 | return reloc_root; |
| @@ -2273,8 +2280,12 @@ void free_reloc_roots(struct list_head *list) | |||
| 2273 | static noinline_for_stack | 2280 | static noinline_for_stack |
| 2274 | int merge_reloc_roots(struct reloc_control *rc) | 2281 | int merge_reloc_roots(struct reloc_control *rc) |
| 2275 | { | 2282 | { |
| 2283 | struct btrfs_trans_handle *trans; | ||
| 2276 | struct btrfs_root *root; | 2284 | struct btrfs_root *root; |
| 2277 | struct btrfs_root *reloc_root; | 2285 | struct btrfs_root *reloc_root; |
| 2286 | u64 last_snap; | ||
| 2287 | u64 otransid; | ||
| 2288 | u64 objectid; | ||
| 2278 | LIST_HEAD(reloc_roots); | 2289 | LIST_HEAD(reloc_roots); |
| 2279 | int found = 0; | 2290 | int found = 0; |
| 2280 | int ret = 0; | 2291 | int ret = 0; |
| @@ -2308,12 +2319,44 @@ again: | |||
| 2308 | } else { | 2319 | } else { |
| 2309 | list_del_init(&reloc_root->root_list); | 2320 | list_del_init(&reloc_root->root_list); |
| 2310 | } | 2321 | } |
| 2322 | |||
| 2323 | /* | ||
| 2324 | * we keep the old last snapshod transid in rtranid when we | ||
| 2325 | * created the relocation tree. | ||
| 2326 | */ | ||
| 2327 | last_snap = btrfs_root_rtransid(&reloc_root->root_item); | ||
| 2328 | otransid = btrfs_root_otransid(&reloc_root->root_item); | ||
| 2329 | objectid = reloc_root->root_key.offset; | ||
| 2330 | |||
| 2311 | ret = btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0, 1); | 2331 | ret = btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0, 1); |
| 2312 | if (ret < 0) { | 2332 | if (ret < 0) { |
| 2313 | if (list_empty(&reloc_root->root_list)) | 2333 | if (list_empty(&reloc_root->root_list)) |
| 2314 | list_add_tail(&reloc_root->root_list, | 2334 | list_add_tail(&reloc_root->root_list, |
| 2315 | &reloc_roots); | 2335 | &reloc_roots); |
| 2316 | goto out; | 2336 | goto out; |
| 2337 | } else if (!ret) { | ||
| 2338 | /* | ||
| 2339 | * recover the last snapshot tranid to avoid | ||
| 2340 | * the space balance break NOCOW. | ||
| 2341 | */ | ||
| 2342 | root = read_fs_root(rc->extent_root->fs_info, | ||
| 2343 | objectid); | ||
| 2344 | if (IS_ERR(root)) | ||
| 2345 | continue; | ||
| 2346 | |||
| 2347 | if (btrfs_root_refs(&root->root_item) == 0) | ||
| 2348 | continue; | ||
| 2349 | |||
| 2350 | trans = btrfs_join_transaction(root); | ||
| 2351 | BUG_ON(IS_ERR(trans)); | ||
| 2352 | |||
| 2353 | /* Check if the fs/file tree was snapshoted or not. */ | ||
| 2354 | if (btrfs_root_last_snapshot(&root->root_item) == | ||
| 2355 | otransid - 1) | ||
| 2356 | btrfs_set_root_last_snapshot(&root->root_item, | ||
| 2357 | last_snap); | ||
| 2358 | |||
| 2359 | btrfs_end_transaction(trans, root); | ||
| 2317 | } | 2360 | } |
| 2318 | } | 2361 | } |
| 2319 | 2362 | ||
| @@ -3266,6 +3309,8 @@ static int __add_tree_block(struct reloc_control *rc, | |||
| 3266 | struct btrfs_path *path; | 3309 | struct btrfs_path *path; |
| 3267 | struct btrfs_key key; | 3310 | struct btrfs_key key; |
| 3268 | int ret; | 3311 | int ret; |
| 3312 | bool skinny = btrfs_fs_incompat(rc->extent_root->fs_info, | ||
| 3313 | SKINNY_METADATA); | ||
| 3269 | 3314 | ||
| 3270 | if (tree_block_processed(bytenr, blocksize, rc)) | 3315 | if (tree_block_processed(bytenr, blocksize, rc)) |
| 3271 | return 0; | 3316 | return 0; |
| @@ -3276,10 +3321,15 @@ static int __add_tree_block(struct reloc_control *rc, | |||
| 3276 | path = btrfs_alloc_path(); | 3321 | path = btrfs_alloc_path(); |
| 3277 | if (!path) | 3322 | if (!path) |
| 3278 | return -ENOMEM; | 3323 | return -ENOMEM; |
| 3279 | 3324 | again: | |
| 3280 | key.objectid = bytenr; | 3325 | key.objectid = bytenr; |
| 3281 | key.type = BTRFS_EXTENT_ITEM_KEY; | 3326 | if (skinny) { |
| 3282 | key.offset = blocksize; | 3327 | key.type = BTRFS_METADATA_ITEM_KEY; |
| 3328 | key.offset = (u64)-1; | ||
| 3329 | } else { | ||
| 3330 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
| 3331 | key.offset = blocksize; | ||
| 3332 | } | ||
| 3283 | 3333 | ||
| 3284 | path->search_commit_root = 1; | 3334 | path->search_commit_root = 1; |
| 3285 | path->skip_locking = 1; | 3335 | path->skip_locking = 1; |
| @@ -3287,11 +3337,23 @@ static int __add_tree_block(struct reloc_control *rc, | |||
| 3287 | if (ret < 0) | 3337 | if (ret < 0) |
| 3288 | goto out; | 3338 | goto out; |
| 3289 | 3339 | ||
| 3290 | btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); | 3340 | if (ret > 0 && skinny) { |
| 3291 | if (ret > 0) { | 3341 | if (path->slots[0]) { |
| 3292 | if (key.objectid == bytenr && | 3342 | path->slots[0]--; |
| 3293 | key.type == BTRFS_METADATA_ITEM_KEY) | 3343 | btrfs_item_key_to_cpu(path->nodes[0], &key, |
| 3294 | ret = 0; | 3344 | path->slots[0]); |
| 3345 | if (key.objectid == bytenr && | ||
| 3346 | (key.type == BTRFS_METADATA_ITEM_KEY || | ||
| 3347 | (key.type == BTRFS_EXTENT_ITEM_KEY && | ||
| 3348 | key.offset == blocksize))) | ||
| 3349 | ret = 0; | ||
| 3350 | } | ||
| 3351 | |||
| 3352 | if (ret) { | ||
| 3353 | skinny = false; | ||
| 3354 | btrfs_release_path(path); | ||
| 3355 | goto again; | ||
| 3356 | } | ||
| 3295 | } | 3357 | } |
| 3296 | BUG_ON(ret); | 3358 | BUG_ON(ret); |
| 3297 | 3359 | ||
| @@ -4160,12 +4222,12 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
| 4160 | (unsigned long long)rc->block_group->key.objectid, | 4222 | (unsigned long long)rc->block_group->key.objectid, |
| 4161 | (unsigned long long)rc->block_group->flags); | 4223 | (unsigned long long)rc->block_group->flags); |
| 4162 | 4224 | ||
| 4163 | ret = btrfs_start_delalloc_inodes(fs_info->tree_root, 0); | 4225 | ret = btrfs_start_all_delalloc_inodes(fs_info, 0); |
| 4164 | if (ret < 0) { | 4226 | if (ret < 0) { |
| 4165 | err = ret; | 4227 | err = ret; |
| 4166 | goto out; | 4228 | goto out; |
| 4167 | } | 4229 | } |
| 4168 | btrfs_wait_ordered_extents(fs_info->tree_root, 0); | 4230 | btrfs_wait_all_ordered_extents(fs_info, 0); |
| 4169 | 4231 | ||
| 4170 | while (1) { | 4232 | while (1) { |
| 4171 | mutex_lock(&fs_info->cleaner_mutex); | 4233 | mutex_lock(&fs_info->cleaner_mutex); |
| @@ -4277,7 +4339,7 @@ int btrfs_recover_relocation(struct btrfs_root *root) | |||
| 4277 | key.type != BTRFS_ROOT_ITEM_KEY) | 4339 | key.type != BTRFS_ROOT_ITEM_KEY) |
| 4278 | break; | 4340 | break; |
| 4279 | 4341 | ||
| 4280 | reloc_root = btrfs_read_fs_root_no_radix(root, &key); | 4342 | reloc_root = btrfs_read_fs_root(root, &key); |
| 4281 | if (IS_ERR(reloc_root)) { | 4343 | if (IS_ERR(reloc_root)) { |
| 4282 | err = PTR_ERR(reloc_root); | 4344 | err = PTR_ERR(reloc_root); |
| 4283 | goto out; | 4345 | goto out; |
| @@ -4396,10 +4458,8 @@ out: | |||
| 4396 | int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) | 4458 | int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) |
| 4397 | { | 4459 | { |
| 4398 | struct btrfs_ordered_sum *sums; | 4460 | struct btrfs_ordered_sum *sums; |
| 4399 | struct btrfs_sector_sum *sector_sum; | ||
| 4400 | struct btrfs_ordered_extent *ordered; | 4461 | struct btrfs_ordered_extent *ordered; |
| 4401 | struct btrfs_root *root = BTRFS_I(inode)->root; | 4462 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 4402 | size_t offset; | ||
| 4403 | int ret; | 4463 | int ret; |
| 4404 | u64 disk_bytenr; | 4464 | u64 disk_bytenr; |
| 4405 | LIST_HEAD(list); | 4465 | LIST_HEAD(list); |
| @@ -4413,19 +4473,13 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) | |||
| 4413 | if (ret) | 4473 | if (ret) |
| 4414 | goto out; | 4474 | goto out; |
| 4415 | 4475 | ||
| 4476 | disk_bytenr = ordered->start; | ||
| 4416 | while (!list_empty(&list)) { | 4477 | while (!list_empty(&list)) { |
| 4417 | sums = list_entry(list.next, struct btrfs_ordered_sum, list); | 4478 | sums = list_entry(list.next, struct btrfs_ordered_sum, list); |
| 4418 | list_del_init(&sums->list); | 4479 | list_del_init(&sums->list); |
| 4419 | 4480 | ||
| 4420 | sector_sum = sums->sums; | 4481 | sums->bytenr = disk_bytenr; |
| 4421 | sums->bytenr = ordered->start; | 4482 | disk_bytenr += sums->len; |
| 4422 | |||
| 4423 | offset = 0; | ||
| 4424 | while (offset < sums->len) { | ||
| 4425 | sector_sum->bytenr += ordered->start - disk_bytenr; | ||
| 4426 | sector_sum++; | ||
| 4427 | offset += root->sectorsize; | ||
| 4428 | } | ||
| 4429 | 4483 | ||
| 4430 | btrfs_add_ordered_sum(inode, ordered, sums); | 4484 | btrfs_add_ordered_sum(inode, ordered, sums); |
| 4431 | } | 4485 | } |
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 5bf1ed57f178..ffb1036ef10d 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c | |||
| @@ -64,52 +64,59 @@ void btrfs_read_root_item(struct extent_buffer *eb, int slot, | |||
| 64 | } | 64 | } |
| 65 | 65 | ||
| 66 | /* | 66 | /* |
| 67 | * lookup the root with the highest offset for a given objectid. The key we do | 67 | * btrfs_find_root - lookup the root by the key. |
| 68 | * find is copied into 'key'. If we find something return 0, otherwise 1, < 0 | 68 | * root: the root of the root tree |
| 69 | * on error. | 69 | * search_key: the key to search |
| 70 | * path: the path we search | ||
| 71 | * root_item: the root item of the tree we look for | ||
| 72 | * root_key: the reak key of the tree we look for | ||
| 73 | * | ||
| 74 | * If ->offset of 'seach_key' is -1ULL, it means we are not sure the offset | ||
| 75 | * of the search key, just lookup the root with the highest offset for a | ||
| 76 | * given objectid. | ||
| 77 | * | ||
| 78 | * If we find something return 0, otherwise > 0, < 0 on error. | ||
| 70 | */ | 79 | */ |
| 71 | int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, | 80 | int btrfs_find_root(struct btrfs_root *root, struct btrfs_key *search_key, |
| 72 | struct btrfs_root_item *item, struct btrfs_key *key) | 81 | struct btrfs_path *path, struct btrfs_root_item *root_item, |
| 82 | struct btrfs_key *root_key) | ||
| 73 | { | 83 | { |
| 74 | struct btrfs_path *path; | ||
| 75 | struct btrfs_key search_key; | ||
| 76 | struct btrfs_key found_key; | 84 | struct btrfs_key found_key; |
| 77 | struct extent_buffer *l; | 85 | struct extent_buffer *l; |
| 78 | int ret; | 86 | int ret; |
| 79 | int slot; | 87 | int slot; |
| 80 | 88 | ||
| 81 | search_key.objectid = objectid; | 89 | ret = btrfs_search_slot(NULL, root, search_key, path, 0, 0); |
| 82 | search_key.type = BTRFS_ROOT_ITEM_KEY; | ||
| 83 | search_key.offset = (u64)-1; | ||
| 84 | |||
| 85 | path = btrfs_alloc_path(); | ||
| 86 | if (!path) | ||
| 87 | return -ENOMEM; | ||
| 88 | ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); | ||
| 89 | if (ret < 0) | 90 | if (ret < 0) |
| 90 | goto out; | 91 | return ret; |
| 91 | 92 | ||
| 92 | BUG_ON(ret == 0); | 93 | if (search_key->offset != -1ULL) { /* the search key is exact */ |
| 93 | if (path->slots[0] == 0) { | 94 | if (ret > 0) |
| 94 | ret = 1; | 95 | goto out; |
| 95 | goto out; | 96 | } else { |
| 97 | BUG_ON(ret == 0); /* Logical error */ | ||
| 98 | if (path->slots[0] == 0) | ||
| 99 | goto out; | ||
| 100 | path->slots[0]--; | ||
| 101 | ret = 0; | ||
| 96 | } | 102 | } |
| 103 | |||
| 97 | l = path->nodes[0]; | 104 | l = path->nodes[0]; |
| 98 | slot = path->slots[0] - 1; | 105 | slot = path->slots[0]; |
| 106 | |||
| 99 | btrfs_item_key_to_cpu(l, &found_key, slot); | 107 | btrfs_item_key_to_cpu(l, &found_key, slot); |
| 100 | if (found_key.objectid != objectid || | 108 | if (found_key.objectid != search_key->objectid || |
| 101 | found_key.type != BTRFS_ROOT_ITEM_KEY) { | 109 | found_key.type != BTRFS_ROOT_ITEM_KEY) { |
| 102 | ret = 1; | 110 | ret = 1; |
| 103 | goto out; | 111 | goto out; |
| 104 | } | 112 | } |
| 105 | if (item) | ||
| 106 | btrfs_read_root_item(l, slot, item); | ||
| 107 | if (key) | ||
| 108 | memcpy(key, &found_key, sizeof(found_key)); | ||
| 109 | 113 | ||
| 110 | ret = 0; | 114 | if (root_item) |
| 115 | btrfs_read_root_item(l, slot, root_item); | ||
| 116 | if (root_key) | ||
| 117 | memcpy(root_key, &found_key, sizeof(found_key)); | ||
| 111 | out: | 118 | out: |
| 112 | btrfs_free_path(path); | 119 | btrfs_release_path(path); |
| 113 | return ret; | 120 | return ret; |
| 114 | } | 121 | } |
| 115 | 122 | ||
| @@ -212,86 +219,6 @@ int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
| 212 | return btrfs_insert_item(trans, root, key, item, sizeof(*item)); | 219 | return btrfs_insert_item(trans, root, key, item, sizeof(*item)); |
| 213 | } | 220 | } |
| 214 | 221 | ||
| 215 | /* | ||
| 216 | * at mount time we want to find all the old transaction snapshots that were in | ||
| 217 | * the process of being deleted if we crashed. This is any root item with an | ||
| 218 | * offset lower than the latest root. They need to be queued for deletion to | ||
| 219 | * finish what was happening when we crashed. | ||
| 220 | */ | ||
| 221 | int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid) | ||
| 222 | { | ||
| 223 | struct btrfs_root *dead_root; | ||
| 224 | struct btrfs_root_item *ri; | ||
| 225 | struct btrfs_key key; | ||
| 226 | struct btrfs_key found_key; | ||
| 227 | struct btrfs_path *path; | ||
| 228 | int ret; | ||
| 229 | u32 nritems; | ||
| 230 | struct extent_buffer *leaf; | ||
| 231 | int slot; | ||
| 232 | |||
| 233 | key.objectid = objectid; | ||
| 234 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); | ||
| 235 | key.offset = 0; | ||
| 236 | path = btrfs_alloc_path(); | ||
| 237 | if (!path) | ||
| 238 | return -ENOMEM; | ||
| 239 | |||
| 240 | again: | ||
| 241 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
| 242 | if (ret < 0) | ||
| 243 | goto err; | ||
| 244 | while (1) { | ||
| 245 | leaf = path->nodes[0]; | ||
| 246 | nritems = btrfs_header_nritems(leaf); | ||
| 247 | slot = path->slots[0]; | ||
| 248 | if (slot >= nritems) { | ||
| 249 | ret = btrfs_next_leaf(root, path); | ||
| 250 | if (ret) | ||
| 251 | break; | ||
| 252 | leaf = path->nodes[0]; | ||
| 253 | nritems = btrfs_header_nritems(leaf); | ||
| 254 | slot = path->slots[0]; | ||
| 255 | } | ||
| 256 | btrfs_item_key_to_cpu(leaf, &key, slot); | ||
| 257 | if (btrfs_key_type(&key) != BTRFS_ROOT_ITEM_KEY) | ||
| 258 | goto next; | ||
| 259 | |||
| 260 | if (key.objectid < objectid) | ||
| 261 | goto next; | ||
| 262 | |||
| 263 | if (key.objectid > objectid) | ||
| 264 | break; | ||
| 265 | |||
| 266 | ri = btrfs_item_ptr(leaf, slot, struct btrfs_root_item); | ||
| 267 | if (btrfs_disk_root_refs(leaf, ri) != 0) | ||
| 268 | goto next; | ||
| 269 | |||
| 270 | memcpy(&found_key, &key, sizeof(key)); | ||
| 271 | key.offset++; | ||
| 272 | btrfs_release_path(path); | ||
| 273 | dead_root = | ||
| 274 | btrfs_read_fs_root_no_radix(root->fs_info->tree_root, | ||
| 275 | &found_key); | ||
| 276 | if (IS_ERR(dead_root)) { | ||
| 277 | ret = PTR_ERR(dead_root); | ||
| 278 | goto err; | ||
| 279 | } | ||
| 280 | |||
| 281 | ret = btrfs_add_dead_root(dead_root); | ||
| 282 | if (ret) | ||
| 283 | goto err; | ||
| 284 | goto again; | ||
| 285 | next: | ||
| 286 | slot++; | ||
| 287 | path->slots[0]++; | ||
| 288 | } | ||
| 289 | ret = 0; | ||
| 290 | err: | ||
| 291 | btrfs_free_path(path); | ||
| 292 | return ret; | ||
| 293 | } | ||
| 294 | |||
| 295 | int btrfs_find_orphan_roots(struct btrfs_root *tree_root) | 222 | int btrfs_find_orphan_roots(struct btrfs_root *tree_root) |
| 296 | { | 223 | { |
| 297 | struct extent_buffer *leaf; | 224 | struct extent_buffer *leaf; |
| @@ -301,6 +228,10 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) | |||
| 301 | struct btrfs_root *root; | 228 | struct btrfs_root *root; |
| 302 | int err = 0; | 229 | int err = 0; |
| 303 | int ret; | 230 | int ret; |
| 231 | bool can_recover = true; | ||
| 232 | |||
| 233 | if (tree_root->fs_info->sb->s_flags & MS_RDONLY) | ||
| 234 | can_recover = false; | ||
| 304 | 235 | ||
| 305 | path = btrfs_alloc_path(); | 236 | path = btrfs_alloc_path(); |
| 306 | if (!path) | 237 | if (!path) |
| @@ -340,20 +271,52 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) | |||
| 340 | root_key.objectid = key.offset; | 271 | root_key.objectid = key.offset; |
| 341 | key.offset++; | 272 | key.offset++; |
| 342 | 273 | ||
| 343 | root = btrfs_read_fs_root_no_name(tree_root->fs_info, | 274 | root = btrfs_read_fs_root(tree_root, &root_key); |
| 344 | &root_key); | 275 | err = PTR_RET(root); |
| 345 | if (!IS_ERR(root)) | 276 | if (err && err != -ENOENT) { |
| 277 | break; | ||
| 278 | } else if (err == -ENOENT) { | ||
| 279 | struct btrfs_trans_handle *trans; | ||
| 280 | |||
| 281 | btrfs_release_path(path); | ||
| 282 | |||
| 283 | trans = btrfs_join_transaction(tree_root); | ||
| 284 | if (IS_ERR(trans)) { | ||
| 285 | err = PTR_ERR(trans); | ||
| 286 | btrfs_error(tree_root->fs_info, err, | ||
| 287 | "Failed to start trans to delete " | ||
| 288 | "orphan item"); | ||
| 289 | break; | ||
| 290 | } | ||
| 291 | err = btrfs_del_orphan_item(trans, tree_root, | ||
| 292 | root_key.objectid); | ||
| 293 | btrfs_end_transaction(trans, tree_root); | ||
| 294 | if (err) { | ||
| 295 | btrfs_error(tree_root->fs_info, err, | ||
| 296 | "Failed to delete root orphan " | ||
| 297 | "item"); | ||
| 298 | break; | ||
| 299 | } | ||
| 346 | continue; | 300 | continue; |
| 301 | } | ||
| 347 | 302 | ||
| 348 | ret = PTR_ERR(root); | 303 | if (btrfs_root_refs(&root->root_item) == 0) { |
| 349 | if (ret != -ENOENT) { | 304 | btrfs_add_dead_root(root); |
| 350 | err = ret; | 305 | continue; |
| 306 | } | ||
| 307 | |||
| 308 | err = btrfs_init_fs_root(root); | ||
| 309 | if (err) { | ||
| 310 | btrfs_free_fs_root(root); | ||
| 351 | break; | 311 | break; |
| 352 | } | 312 | } |
| 353 | 313 | ||
| 354 | ret = btrfs_find_dead_roots(tree_root, root_key.objectid); | 314 | root->orphan_item_inserted = 1; |
| 355 | if (ret) { | 315 | |
| 356 | err = ret; | 316 | err = btrfs_insert_fs_root(root->fs_info, root); |
| 317 | if (err) { | ||
| 318 | BUG_ON(err == -EEXIST); | ||
| 319 | btrfs_free_fs_root(root); | ||
| 357 | break; | 320 | break; |
| 358 | } | 321 | } |
| 359 | } | 322 | } |
| @@ -368,8 +331,6 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
| 368 | { | 331 | { |
| 369 | struct btrfs_path *path; | 332 | struct btrfs_path *path; |
| 370 | int ret; | 333 | int ret; |
| 371 | struct btrfs_root_item *ri; | ||
| 372 | struct extent_buffer *leaf; | ||
| 373 | 334 | ||
| 374 | path = btrfs_alloc_path(); | 335 | path = btrfs_alloc_path(); |
| 375 | if (!path) | 336 | if (!path) |
| @@ -379,8 +340,6 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
| 379 | goto out; | 340 | goto out; |
| 380 | 341 | ||
| 381 | BUG_ON(ret != 0); | 342 | BUG_ON(ret != 0); |
| 382 | leaf = path->nodes[0]; | ||
| 383 | ri = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_item); | ||
| 384 | 343 | ||
| 385 | ret = btrfs_del_item(trans, root, path); | 344 | ret = btrfs_del_item(trans, root, path); |
| 386 | out: | 345 | out: |
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 79bd479317cb..4ba2a69a60ad 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c | |||
| @@ -2126,8 +2126,7 @@ static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u64 len, | |||
| 2126 | u8 *csum) | 2126 | u8 *csum) |
| 2127 | { | 2127 | { |
| 2128 | struct btrfs_ordered_sum *sum = NULL; | 2128 | struct btrfs_ordered_sum *sum = NULL; |
| 2129 | int ret = 0; | 2129 | unsigned long index; |
| 2130 | unsigned long i; | ||
| 2131 | unsigned long num_sectors; | 2130 | unsigned long num_sectors; |
| 2132 | 2131 | ||
| 2133 | while (!list_empty(&sctx->csum_list)) { | 2132 | while (!list_empty(&sctx->csum_list)) { |
| @@ -2146,19 +2145,14 @@ static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u64 len, | |||
| 2146 | if (!sum) | 2145 | if (!sum) |
| 2147 | return 0; | 2146 | return 0; |
| 2148 | 2147 | ||
| 2148 | index = ((u32)(logical - sum->bytenr)) / sctx->sectorsize; | ||
| 2149 | num_sectors = sum->len / sctx->sectorsize; | 2149 | num_sectors = sum->len / sctx->sectorsize; |
| 2150 | for (i = 0; i < num_sectors; ++i) { | 2150 | memcpy(csum, sum->sums + index, sctx->csum_size); |
| 2151 | if (sum->sums[i].bytenr == logical) { | 2151 | if (index == num_sectors - 1) { |
| 2152 | memcpy(csum, &sum->sums[i].sum, sctx->csum_size); | ||
| 2153 | ret = 1; | ||
| 2154 | break; | ||
| 2155 | } | ||
| 2156 | } | ||
| 2157 | if (ret && i == num_sectors - 1) { | ||
| 2158 | list_del(&sum->list); | 2152 | list_del(&sum->list); |
| 2159 | kfree(sum); | 2153 | kfree(sum); |
| 2160 | } | 2154 | } |
| 2161 | return ret; | 2155 | return 1; |
| 2162 | } | 2156 | } |
| 2163 | 2157 | ||
| 2164 | /* scrub extent tries to collect up to 64 kB for each bio */ | 2158 | /* scrub extent tries to collect up to 64 kB for each bio */ |
| @@ -2505,6 +2499,7 @@ again: | |||
| 2505 | if (ret) | 2499 | if (ret) |
| 2506 | goto out; | 2500 | goto out; |
| 2507 | 2501 | ||
| 2502 | scrub_free_csums(sctx); | ||
| 2508 | if (extent_logical + extent_len < | 2503 | if (extent_logical + extent_len < |
| 2509 | key.objectid + bytes) { | 2504 | key.objectid + bytes) { |
| 2510 | logical += increment; | 2505 | logical += increment; |
| @@ -3204,16 +3199,18 @@ out: | |||
| 3204 | 3199 | ||
| 3205 | static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx) | 3200 | static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx) |
| 3206 | { | 3201 | { |
| 3207 | unsigned long index; | ||
| 3208 | struct scrub_copy_nocow_ctx *nocow_ctx = ctx; | 3202 | struct scrub_copy_nocow_ctx *nocow_ctx = ctx; |
| 3209 | int ret = 0; | 3203 | struct btrfs_fs_info *fs_info = nocow_ctx->sctx->dev_root->fs_info; |
| 3210 | struct btrfs_key key; | 3204 | struct btrfs_key key; |
| 3211 | struct inode *inode = NULL; | 3205 | struct inode *inode; |
| 3206 | struct page *page; | ||
| 3212 | struct btrfs_root *local_root; | 3207 | struct btrfs_root *local_root; |
| 3213 | u64 physical_for_dev_replace; | 3208 | u64 physical_for_dev_replace; |
| 3214 | u64 len; | 3209 | u64 len; |
| 3215 | struct btrfs_fs_info *fs_info = nocow_ctx->sctx->dev_root->fs_info; | 3210 | unsigned long index; |
| 3216 | int srcu_index; | 3211 | int srcu_index; |
| 3212 | int ret; | ||
| 3213 | int err; | ||
| 3217 | 3214 | ||
| 3218 | key.objectid = root; | 3215 | key.objectid = root; |
| 3219 | key.type = BTRFS_ROOT_ITEM_KEY; | 3216 | key.type = BTRFS_ROOT_ITEM_KEY; |
| @@ -3227,6 +3224,11 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx) | |||
| 3227 | return PTR_ERR(local_root); | 3224 | return PTR_ERR(local_root); |
| 3228 | } | 3225 | } |
| 3229 | 3226 | ||
| 3227 | if (btrfs_root_refs(&local_root->root_item) == 0) { | ||
| 3228 | srcu_read_unlock(&fs_info->subvol_srcu, srcu_index); | ||
| 3229 | return -ENOENT; | ||
| 3230 | } | ||
| 3231 | |||
| 3230 | key.type = BTRFS_INODE_ITEM_KEY; | 3232 | key.type = BTRFS_INODE_ITEM_KEY; |
| 3231 | key.objectid = inum; | 3233 | key.objectid = inum; |
| 3232 | key.offset = 0; | 3234 | key.offset = 0; |
| @@ -3235,19 +3237,21 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx) | |||
| 3235 | if (IS_ERR(inode)) | 3237 | if (IS_ERR(inode)) |
| 3236 | return PTR_ERR(inode); | 3238 | return PTR_ERR(inode); |
| 3237 | 3239 | ||
| 3240 | /* Avoid truncate/dio/punch hole.. */ | ||
| 3241 | mutex_lock(&inode->i_mutex); | ||
| 3242 | inode_dio_wait(inode); | ||
| 3243 | |||
| 3244 | ret = 0; | ||
| 3238 | physical_for_dev_replace = nocow_ctx->physical_for_dev_replace; | 3245 | physical_for_dev_replace = nocow_ctx->physical_for_dev_replace; |
| 3239 | len = nocow_ctx->len; | 3246 | len = nocow_ctx->len; |
| 3240 | while (len >= PAGE_CACHE_SIZE) { | 3247 | while (len >= PAGE_CACHE_SIZE) { |
| 3241 | struct page *page = NULL; | ||
| 3242 | int ret_sub; | ||
| 3243 | |||
| 3244 | index = offset >> PAGE_CACHE_SHIFT; | 3248 | index = offset >> PAGE_CACHE_SHIFT; |
| 3245 | 3249 | again: | |
| 3246 | page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); | 3250 | page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); |
| 3247 | if (!page) { | 3251 | if (!page) { |
| 3248 | pr_err("find_or_create_page() failed\n"); | 3252 | pr_err("find_or_create_page() failed\n"); |
| 3249 | ret = -ENOMEM; | 3253 | ret = -ENOMEM; |
| 3250 | goto next_page; | 3254 | goto out; |
| 3251 | } | 3255 | } |
| 3252 | 3256 | ||
| 3253 | if (PageUptodate(page)) { | 3257 | if (PageUptodate(page)) { |
| @@ -3255,39 +3259,49 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx) | |||
| 3255 | goto next_page; | 3259 | goto next_page; |
| 3256 | } else { | 3260 | } else { |
| 3257 | ClearPageError(page); | 3261 | ClearPageError(page); |
| 3258 | ret_sub = extent_read_full_page(&BTRFS_I(inode)-> | 3262 | err = extent_read_full_page(&BTRFS_I(inode)-> |
| 3259 | io_tree, | 3263 | io_tree, |
| 3260 | page, btrfs_get_extent, | 3264 | page, btrfs_get_extent, |
| 3261 | nocow_ctx->mirror_num); | 3265 | nocow_ctx->mirror_num); |
| 3262 | if (ret_sub) { | 3266 | if (err) { |
| 3263 | ret = ret_sub; | 3267 | ret = err; |
| 3264 | goto next_page; | 3268 | goto next_page; |
| 3265 | } | 3269 | } |
| 3266 | wait_on_page_locked(page); | 3270 | |
| 3271 | lock_page(page); | ||
| 3272 | /* | ||
| 3273 | * If the page has been remove from the page cache, | ||
| 3274 | * the data on it is meaningless, because it may be | ||
| 3275 | * old one, the new data may be written into the new | ||
| 3276 | * page in the page cache. | ||
| 3277 | */ | ||
| 3278 | if (page->mapping != inode->i_mapping) { | ||
| 3279 | page_cache_release(page); | ||
| 3280 | goto again; | ||
| 3281 | } | ||
| 3267 | if (!PageUptodate(page)) { | 3282 | if (!PageUptodate(page)) { |
| 3268 | ret = -EIO; | 3283 | ret = -EIO; |
| 3269 | goto next_page; | 3284 | goto next_page; |
| 3270 | } | 3285 | } |
| 3271 | } | 3286 | } |
| 3272 | ret_sub = write_page_nocow(nocow_ctx->sctx, | 3287 | err = write_page_nocow(nocow_ctx->sctx, |
| 3273 | physical_for_dev_replace, page); | 3288 | physical_for_dev_replace, page); |
| 3274 | if (ret_sub) { | 3289 | if (err) |
| 3275 | ret = ret_sub; | 3290 | ret = err; |
| 3276 | goto next_page; | ||
| 3277 | } | ||
| 3278 | |||
| 3279 | next_page: | 3291 | next_page: |
| 3280 | if (page) { | 3292 | unlock_page(page); |
| 3281 | unlock_page(page); | 3293 | page_cache_release(page); |
| 3282 | put_page(page); | 3294 | |
| 3283 | } | 3295 | if (ret) |
| 3296 | break; | ||
| 3297 | |||
| 3284 | offset += PAGE_CACHE_SIZE; | 3298 | offset += PAGE_CACHE_SIZE; |
| 3285 | physical_for_dev_replace += PAGE_CACHE_SIZE; | 3299 | physical_for_dev_replace += PAGE_CACHE_SIZE; |
| 3286 | len -= PAGE_CACHE_SIZE; | 3300 | len -= PAGE_CACHE_SIZE; |
| 3287 | } | 3301 | } |
| 3288 | 3302 | out: | |
| 3289 | if (inode) | 3303 | mutex_unlock(&inode->i_mutex); |
| 3290 | iput(inode); | 3304 | iput(inode); |
| 3291 | return ret; | 3305 | return ret; |
| 3292 | } | 3306 | } |
| 3293 | 3307 | ||
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index ff40f1c00ce3..d3f3b43cae0b 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c | |||
| @@ -158,7 +158,7 @@ static void fs_path_reset(struct fs_path *p) | |||
| 158 | } | 158 | } |
| 159 | } | 159 | } |
| 160 | 160 | ||
| 161 | static struct fs_path *fs_path_alloc(struct send_ctx *sctx) | 161 | static struct fs_path *fs_path_alloc(void) |
| 162 | { | 162 | { |
| 163 | struct fs_path *p; | 163 | struct fs_path *p; |
| 164 | 164 | ||
| @@ -173,11 +173,11 @@ static struct fs_path *fs_path_alloc(struct send_ctx *sctx) | |||
| 173 | return p; | 173 | return p; |
| 174 | } | 174 | } |
| 175 | 175 | ||
| 176 | static struct fs_path *fs_path_alloc_reversed(struct send_ctx *sctx) | 176 | static struct fs_path *fs_path_alloc_reversed(void) |
| 177 | { | 177 | { |
| 178 | struct fs_path *p; | 178 | struct fs_path *p; |
| 179 | 179 | ||
| 180 | p = fs_path_alloc(sctx); | 180 | p = fs_path_alloc(); |
| 181 | if (!p) | 181 | if (!p) |
| 182 | return NULL; | 182 | return NULL; |
| 183 | p->reversed = 1; | 183 | p->reversed = 1; |
| @@ -185,7 +185,7 @@ static struct fs_path *fs_path_alloc_reversed(struct send_ctx *sctx) | |||
| 185 | return p; | 185 | return p; |
| 186 | } | 186 | } |
| 187 | 187 | ||
| 188 | static void fs_path_free(struct send_ctx *sctx, struct fs_path *p) | 188 | static void fs_path_free(struct fs_path *p) |
| 189 | { | 189 | { |
| 190 | if (!p) | 190 | if (!p) |
| 191 | return; | 191 | return; |
| @@ -753,8 +753,7 @@ typedef int (*iterate_inode_ref_t)(int num, u64 dir, int index, | |||
| 753 | * | 753 | * |
| 754 | * path must point to the INODE_REF or INODE_EXTREF when called. | 754 | * path must point to the INODE_REF or INODE_EXTREF when called. |
| 755 | */ | 755 | */ |
| 756 | static int iterate_inode_ref(struct send_ctx *sctx, | 756 | static int iterate_inode_ref(struct btrfs_root *root, struct btrfs_path *path, |
| 757 | struct btrfs_root *root, struct btrfs_path *path, | ||
| 758 | struct btrfs_key *found_key, int resolve, | 757 | struct btrfs_key *found_key, int resolve, |
| 759 | iterate_inode_ref_t iterate, void *ctx) | 758 | iterate_inode_ref_t iterate, void *ctx) |
| 760 | { | 759 | { |
| @@ -777,13 +776,13 @@ static int iterate_inode_ref(struct send_ctx *sctx, | |||
| 777 | unsigned long elem_size; | 776 | unsigned long elem_size; |
| 778 | unsigned long ptr; | 777 | unsigned long ptr; |
| 779 | 778 | ||
| 780 | p = fs_path_alloc_reversed(sctx); | 779 | p = fs_path_alloc_reversed(); |
| 781 | if (!p) | 780 | if (!p) |
| 782 | return -ENOMEM; | 781 | return -ENOMEM; |
| 783 | 782 | ||
| 784 | tmp_path = alloc_path_for_send(); | 783 | tmp_path = alloc_path_for_send(); |
| 785 | if (!tmp_path) { | 784 | if (!tmp_path) { |
| 786 | fs_path_free(sctx, p); | 785 | fs_path_free(p); |
| 787 | return -ENOMEM; | 786 | return -ENOMEM; |
| 788 | } | 787 | } |
| 789 | 788 | ||
| @@ -858,7 +857,7 @@ static int iterate_inode_ref(struct send_ctx *sctx, | |||
| 858 | 857 | ||
| 859 | out: | 858 | out: |
| 860 | btrfs_free_path(tmp_path); | 859 | btrfs_free_path(tmp_path); |
| 861 | fs_path_free(sctx, p); | 860 | fs_path_free(p); |
| 862 | return ret; | 861 | return ret; |
| 863 | } | 862 | } |
| 864 | 863 | ||
| @@ -874,8 +873,7 @@ typedef int (*iterate_dir_item_t)(int num, struct btrfs_key *di_key, | |||
| 874 | * | 873 | * |
| 875 | * path must point to the dir item when called. | 874 | * path must point to the dir item when called. |
| 876 | */ | 875 | */ |
| 877 | static int iterate_dir_item(struct send_ctx *sctx, | 876 | static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, |
| 878 | struct btrfs_root *root, struct btrfs_path *path, | ||
| 879 | struct btrfs_key *found_key, | 877 | struct btrfs_key *found_key, |
| 880 | iterate_dir_item_t iterate, void *ctx) | 878 | iterate_dir_item_t iterate, void *ctx) |
| 881 | { | 879 | { |
| @@ -990,7 +988,7 @@ static int __copy_first_ref(int num, u64 dir, int index, | |||
| 990 | * Retrieve the first path of an inode. If an inode has more then one | 988 | * Retrieve the first path of an inode. If an inode has more then one |
| 991 | * ref/hardlink, this is ignored. | 989 | * ref/hardlink, this is ignored. |
| 992 | */ | 990 | */ |
| 993 | static int get_inode_path(struct send_ctx *sctx, struct btrfs_root *root, | 991 | static int get_inode_path(struct btrfs_root *root, |
| 994 | u64 ino, struct fs_path *path) | 992 | u64 ino, struct fs_path *path) |
| 995 | { | 993 | { |
| 996 | int ret; | 994 | int ret; |
| @@ -1022,8 +1020,8 @@ static int get_inode_path(struct send_ctx *sctx, struct btrfs_root *root, | |||
| 1022 | goto out; | 1020 | goto out; |
| 1023 | } | 1021 | } |
| 1024 | 1022 | ||
| 1025 | ret = iterate_inode_ref(sctx, root, p, &found_key, 1, | 1023 | ret = iterate_inode_ref(root, p, &found_key, 1, |
| 1026 | __copy_first_ref, path); | 1024 | __copy_first_ref, path); |
| 1027 | if (ret < 0) | 1025 | if (ret < 0) |
| 1028 | goto out; | 1026 | goto out; |
| 1029 | ret = 0; | 1027 | ret = 0; |
| @@ -1314,8 +1312,7 @@ out: | |||
| 1314 | return ret; | 1312 | return ret; |
| 1315 | } | 1313 | } |
| 1316 | 1314 | ||
| 1317 | static int read_symlink(struct send_ctx *sctx, | 1315 | static int read_symlink(struct btrfs_root *root, |
| 1318 | struct btrfs_root *root, | ||
| 1319 | u64 ino, | 1316 | u64 ino, |
| 1320 | struct fs_path *dest) | 1317 | struct fs_path *dest) |
| 1321 | { | 1318 | { |
| @@ -1562,8 +1559,7 @@ out: | |||
| 1562 | * Looks up the first btrfs_inode_ref of a given ino. It returns the parent dir, | 1559 | * Looks up the first btrfs_inode_ref of a given ino. It returns the parent dir, |
| 1563 | * generation of the parent dir and the name of the dir entry. | 1560 | * generation of the parent dir and the name of the dir entry. |
| 1564 | */ | 1561 | */ |
| 1565 | static int get_first_ref(struct send_ctx *sctx, | 1562 | static int get_first_ref(struct btrfs_root *root, u64 ino, |
| 1566 | struct btrfs_root *root, u64 ino, | ||
| 1567 | u64 *dir, u64 *dir_gen, struct fs_path *name) | 1563 | u64 *dir, u64 *dir_gen, struct fs_path *name) |
| 1568 | { | 1564 | { |
| 1569 | int ret; | 1565 | int ret; |
| @@ -1628,8 +1624,7 @@ out: | |||
| 1628 | return ret; | 1624 | return ret; |
| 1629 | } | 1625 | } |
| 1630 | 1626 | ||
| 1631 | static int is_first_ref(struct send_ctx *sctx, | 1627 | static int is_first_ref(struct btrfs_root *root, |
| 1632 | struct btrfs_root *root, | ||
| 1633 | u64 ino, u64 dir, | 1628 | u64 ino, u64 dir, |
| 1634 | const char *name, int name_len) | 1629 | const char *name, int name_len) |
| 1635 | { | 1630 | { |
| @@ -1638,11 +1633,11 @@ static int is_first_ref(struct send_ctx *sctx, | |||
| 1638 | u64 tmp_dir; | 1633 | u64 tmp_dir; |
| 1639 | u64 tmp_dir_gen; | 1634 | u64 tmp_dir_gen; |
| 1640 | 1635 | ||
| 1641 | tmp_name = fs_path_alloc(sctx); | 1636 | tmp_name = fs_path_alloc(); |
| 1642 | if (!tmp_name) | 1637 | if (!tmp_name) |
| 1643 | return -ENOMEM; | 1638 | return -ENOMEM; |
| 1644 | 1639 | ||
| 1645 | ret = get_first_ref(sctx, root, ino, &tmp_dir, &tmp_dir_gen, tmp_name); | 1640 | ret = get_first_ref(root, ino, &tmp_dir, &tmp_dir_gen, tmp_name); |
| 1646 | if (ret < 0) | 1641 | if (ret < 0) |
| 1647 | goto out; | 1642 | goto out; |
| 1648 | 1643 | ||
| @@ -1654,7 +1649,7 @@ static int is_first_ref(struct send_ctx *sctx, | |||
| 1654 | ret = !memcmp(tmp_name->start, name, name_len); | 1649 | ret = !memcmp(tmp_name->start, name, name_len); |
| 1655 | 1650 | ||
| 1656 | out: | 1651 | out: |
| 1657 | fs_path_free(sctx, tmp_name); | 1652 | fs_path_free(tmp_name); |
| 1658 | return ret; | 1653 | return ret; |
| 1659 | } | 1654 | } |
| 1660 | 1655 | ||
| @@ -1783,11 +1778,11 @@ static int did_overwrite_first_ref(struct send_ctx *sctx, u64 ino, u64 gen) | |||
| 1783 | if (!sctx->parent_root) | 1778 | if (!sctx->parent_root) |
| 1784 | goto out; | 1779 | goto out; |
| 1785 | 1780 | ||
| 1786 | name = fs_path_alloc(sctx); | 1781 | name = fs_path_alloc(); |
| 1787 | if (!name) | 1782 | if (!name) |
| 1788 | return -ENOMEM; | 1783 | return -ENOMEM; |
| 1789 | 1784 | ||
| 1790 | ret = get_first_ref(sctx, sctx->parent_root, ino, &dir, &dir_gen, name); | 1785 | ret = get_first_ref(sctx->parent_root, ino, &dir, &dir_gen, name); |
| 1791 | if (ret < 0) | 1786 | if (ret < 0) |
| 1792 | goto out; | 1787 | goto out; |
| 1793 | 1788 | ||
| @@ -1795,7 +1790,7 @@ static int did_overwrite_first_ref(struct send_ctx *sctx, u64 ino, u64 gen) | |||
| 1795 | name->start, fs_path_len(name)); | 1790 | name->start, fs_path_len(name)); |
| 1796 | 1791 | ||
| 1797 | out: | 1792 | out: |
| 1798 | fs_path_free(sctx, name); | 1793 | fs_path_free(name); |
| 1799 | return ret; | 1794 | return ret; |
| 1800 | } | 1795 | } |
| 1801 | 1796 | ||
| @@ -1979,11 +1974,11 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx, | |||
| 1979 | * send_root or parent_root for ref lookup. | 1974 | * send_root or parent_root for ref lookup. |
| 1980 | */ | 1975 | */ |
| 1981 | if (ino < sctx->send_progress) | 1976 | if (ino < sctx->send_progress) |
| 1982 | ret = get_first_ref(sctx, sctx->send_root, ino, | 1977 | ret = get_first_ref(sctx->send_root, ino, |
| 1983 | parent_ino, parent_gen, dest); | 1978 | parent_ino, parent_gen, dest); |
| 1984 | else | 1979 | else |
| 1985 | ret = get_first_ref(sctx, sctx->parent_root, ino, | 1980 | ret = get_first_ref(sctx->parent_root, ino, |
| 1986 | parent_ino, parent_gen, dest); | 1981 | parent_ino, parent_gen, dest); |
| 1987 | if (ret < 0) | 1982 | if (ret < 0) |
| 1988 | goto out; | 1983 | goto out; |
| 1989 | 1984 | ||
| @@ -2070,7 +2065,7 @@ static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen, | |||
| 2070 | u64 parent_gen = 0; | 2065 | u64 parent_gen = 0; |
| 2071 | int stop = 0; | 2066 | int stop = 0; |
| 2072 | 2067 | ||
| 2073 | name = fs_path_alloc(sctx); | 2068 | name = fs_path_alloc(); |
| 2074 | if (!name) { | 2069 | if (!name) { |
| 2075 | ret = -ENOMEM; | 2070 | ret = -ENOMEM; |
| 2076 | goto out; | 2071 | goto out; |
| @@ -2098,7 +2093,7 @@ static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen, | |||
| 2098 | } | 2093 | } |
| 2099 | 2094 | ||
| 2100 | out: | 2095 | out: |
| 2101 | fs_path_free(sctx, name); | 2096 | fs_path_free(name); |
| 2102 | if (!ret) | 2097 | if (!ret) |
| 2103 | fs_path_unreverse(dest); | 2098 | fs_path_unreverse(dest); |
| 2104 | return ret; | 2099 | return ret; |
| @@ -2263,7 +2258,7 @@ static int send_truncate(struct send_ctx *sctx, u64 ino, u64 gen, u64 size) | |||
| 2263 | 2258 | ||
| 2264 | verbose_printk("btrfs: send_truncate %llu size=%llu\n", ino, size); | 2259 | verbose_printk("btrfs: send_truncate %llu size=%llu\n", ino, size); |
| 2265 | 2260 | ||
| 2266 | p = fs_path_alloc(sctx); | 2261 | p = fs_path_alloc(); |
| 2267 | if (!p) | 2262 | if (!p) |
| 2268 | return -ENOMEM; | 2263 | return -ENOMEM; |
| 2269 | 2264 | ||
| @@ -2281,7 +2276,7 @@ verbose_printk("btrfs: send_truncate %llu size=%llu\n", ino, size); | |||
| 2281 | 2276 | ||
| 2282 | tlv_put_failure: | 2277 | tlv_put_failure: |
| 2283 | out: | 2278 | out: |
| 2284 | fs_path_free(sctx, p); | 2279 | fs_path_free(p); |
| 2285 | return ret; | 2280 | return ret; |
| 2286 | } | 2281 | } |
| 2287 | 2282 | ||
| @@ -2292,7 +2287,7 @@ static int send_chmod(struct send_ctx *sctx, u64 ino, u64 gen, u64 mode) | |||
| 2292 | 2287 | ||
| 2293 | verbose_printk("btrfs: send_chmod %llu mode=%llu\n", ino, mode); | 2288 | verbose_printk("btrfs: send_chmod %llu mode=%llu\n", ino, mode); |
| 2294 | 2289 | ||
| 2295 | p = fs_path_alloc(sctx); | 2290 | p = fs_path_alloc(); |
| 2296 | if (!p) | 2291 | if (!p) |
| 2297 | return -ENOMEM; | 2292 | return -ENOMEM; |
| 2298 | 2293 | ||
| @@ -2310,7 +2305,7 @@ verbose_printk("btrfs: send_chmod %llu mode=%llu\n", ino, mode); | |||
| 2310 | 2305 | ||
| 2311 | tlv_put_failure: | 2306 | tlv_put_failure: |
| 2312 | out: | 2307 | out: |
| 2313 | fs_path_free(sctx, p); | 2308 | fs_path_free(p); |
| 2314 | return ret; | 2309 | return ret; |
| 2315 | } | 2310 | } |
| 2316 | 2311 | ||
| @@ -2321,7 +2316,7 @@ static int send_chown(struct send_ctx *sctx, u64 ino, u64 gen, u64 uid, u64 gid) | |||
| 2321 | 2316 | ||
| 2322 | verbose_printk("btrfs: send_chown %llu uid=%llu, gid=%llu\n", ino, uid, gid); | 2317 | verbose_printk("btrfs: send_chown %llu uid=%llu, gid=%llu\n", ino, uid, gid); |
| 2323 | 2318 | ||
| 2324 | p = fs_path_alloc(sctx); | 2319 | p = fs_path_alloc(); |
| 2325 | if (!p) | 2320 | if (!p) |
| 2326 | return -ENOMEM; | 2321 | return -ENOMEM; |
| 2327 | 2322 | ||
| @@ -2340,7 +2335,7 @@ verbose_printk("btrfs: send_chown %llu uid=%llu, gid=%llu\n", ino, uid, gid); | |||
| 2340 | 2335 | ||
| 2341 | tlv_put_failure: | 2336 | tlv_put_failure: |
| 2342 | out: | 2337 | out: |
| 2343 | fs_path_free(sctx, p); | 2338 | fs_path_free(p); |
| 2344 | return ret; | 2339 | return ret; |
| 2345 | } | 2340 | } |
| 2346 | 2341 | ||
| @@ -2356,7 +2351,7 @@ static int send_utimes(struct send_ctx *sctx, u64 ino, u64 gen) | |||
| 2356 | 2351 | ||
| 2357 | verbose_printk("btrfs: send_utimes %llu\n", ino); | 2352 | verbose_printk("btrfs: send_utimes %llu\n", ino); |
| 2358 | 2353 | ||
| 2359 | p = fs_path_alloc(sctx); | 2354 | p = fs_path_alloc(); |
| 2360 | if (!p) | 2355 | if (!p) |
| 2361 | return -ENOMEM; | 2356 | return -ENOMEM; |
| 2362 | 2357 | ||
| @@ -2397,7 +2392,7 @@ verbose_printk("btrfs: send_utimes %llu\n", ino); | |||
| 2397 | 2392 | ||
| 2398 | tlv_put_failure: | 2393 | tlv_put_failure: |
| 2399 | out: | 2394 | out: |
| 2400 | fs_path_free(sctx, p); | 2395 | fs_path_free(p); |
| 2401 | btrfs_free_path(path); | 2396 | btrfs_free_path(path); |
| 2402 | return ret; | 2397 | return ret; |
| 2403 | } | 2398 | } |
| @@ -2418,7 +2413,7 @@ static int send_create_inode(struct send_ctx *sctx, u64 ino) | |||
| 2418 | 2413 | ||
| 2419 | verbose_printk("btrfs: send_create_inode %llu\n", ino); | 2414 | verbose_printk("btrfs: send_create_inode %llu\n", ino); |
| 2420 | 2415 | ||
| 2421 | p = fs_path_alloc(sctx); | 2416 | p = fs_path_alloc(); |
| 2422 | if (!p) | 2417 | if (!p) |
| 2423 | return -ENOMEM; | 2418 | return -ENOMEM; |
| 2424 | 2419 | ||
| @@ -2459,7 +2454,7 @@ verbose_printk("btrfs: send_create_inode %llu\n", ino); | |||
| 2459 | 2454 | ||
| 2460 | if (S_ISLNK(mode)) { | 2455 | if (S_ISLNK(mode)) { |
| 2461 | fs_path_reset(p); | 2456 | fs_path_reset(p); |
| 2462 | ret = read_symlink(sctx, sctx->send_root, ino, p); | 2457 | ret = read_symlink(sctx->send_root, ino, p); |
| 2463 | if (ret < 0) | 2458 | if (ret < 0) |
| 2464 | goto out; | 2459 | goto out; |
| 2465 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, p); | 2460 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, p); |
| @@ -2476,7 +2471,7 @@ verbose_printk("btrfs: send_create_inode %llu\n", ino); | |||
| 2476 | 2471 | ||
| 2477 | tlv_put_failure: | 2472 | tlv_put_failure: |
| 2478 | out: | 2473 | out: |
| 2479 | fs_path_free(sctx, p); | 2474 | fs_path_free(p); |
| 2480 | return ret; | 2475 | return ret; |
| 2481 | } | 2476 | } |
| 2482 | 2477 | ||
| @@ -2615,13 +2610,13 @@ static int record_ref(struct list_head *head, u64 dir, | |||
| 2615 | return 0; | 2610 | return 0; |
| 2616 | } | 2611 | } |
| 2617 | 2612 | ||
| 2618 | static void __free_recorded_refs(struct send_ctx *sctx, struct list_head *head) | 2613 | static void __free_recorded_refs(struct list_head *head) |
| 2619 | { | 2614 | { |
| 2620 | struct recorded_ref *cur; | 2615 | struct recorded_ref *cur; |
| 2621 | 2616 | ||
| 2622 | while (!list_empty(head)) { | 2617 | while (!list_empty(head)) { |
| 2623 | cur = list_entry(head->next, struct recorded_ref, list); | 2618 | cur = list_entry(head->next, struct recorded_ref, list); |
| 2624 | fs_path_free(sctx, cur->full_path); | 2619 | fs_path_free(cur->full_path); |
| 2625 | list_del(&cur->list); | 2620 | list_del(&cur->list); |
| 2626 | kfree(cur); | 2621 | kfree(cur); |
| 2627 | } | 2622 | } |
| @@ -2629,8 +2624,8 @@ static void __free_recorded_refs(struct send_ctx *sctx, struct list_head *head) | |||
| 2629 | 2624 | ||
| 2630 | static void free_recorded_refs(struct send_ctx *sctx) | 2625 | static void free_recorded_refs(struct send_ctx *sctx) |
| 2631 | { | 2626 | { |
| 2632 | __free_recorded_refs(sctx, &sctx->new_refs); | 2627 | __free_recorded_refs(&sctx->new_refs); |
| 2633 | __free_recorded_refs(sctx, &sctx->deleted_refs); | 2628 | __free_recorded_refs(&sctx->deleted_refs); |
| 2634 | } | 2629 | } |
| 2635 | 2630 | ||
| 2636 | /* | 2631 | /* |
| @@ -2644,7 +2639,7 @@ static int orphanize_inode(struct send_ctx *sctx, u64 ino, u64 gen, | |||
| 2644 | int ret; | 2639 | int ret; |
| 2645 | struct fs_path *orphan; | 2640 | struct fs_path *orphan; |
| 2646 | 2641 | ||
| 2647 | orphan = fs_path_alloc(sctx); | 2642 | orphan = fs_path_alloc(); |
| 2648 | if (!orphan) | 2643 | if (!orphan) |
| 2649 | return -ENOMEM; | 2644 | return -ENOMEM; |
| 2650 | 2645 | ||
| @@ -2655,7 +2650,7 @@ static int orphanize_inode(struct send_ctx *sctx, u64 ino, u64 gen, | |||
| 2655 | ret = send_rename(sctx, path, orphan); | 2650 | ret = send_rename(sctx, path, orphan); |
| 2656 | 2651 | ||
| 2657 | out: | 2652 | out: |
| 2658 | fs_path_free(sctx, orphan); | 2653 | fs_path_free(orphan); |
| 2659 | return ret; | 2654 | return ret; |
| 2660 | } | 2655 | } |
| 2661 | 2656 | ||
| @@ -2746,7 +2741,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
| 2746 | */ | 2741 | */ |
| 2747 | BUG_ON(sctx->cur_ino <= BTRFS_FIRST_FREE_OBJECTID); | 2742 | BUG_ON(sctx->cur_ino <= BTRFS_FIRST_FREE_OBJECTID); |
| 2748 | 2743 | ||
| 2749 | valid_path = fs_path_alloc(sctx); | 2744 | valid_path = fs_path_alloc(); |
| 2750 | if (!valid_path) { | 2745 | if (!valid_path) { |
| 2751 | ret = -ENOMEM; | 2746 | ret = -ENOMEM; |
| 2752 | goto out; | 2747 | goto out; |
| @@ -2843,9 +2838,9 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
| 2843 | if (ret < 0) | 2838 | if (ret < 0) |
| 2844 | goto out; | 2839 | goto out; |
| 2845 | if (ret) { | 2840 | if (ret) { |
| 2846 | ret = is_first_ref(sctx, sctx->parent_root, | 2841 | ret = is_first_ref(sctx->parent_root, |
| 2847 | ow_inode, cur->dir, cur->name, | 2842 | ow_inode, cur->dir, cur->name, |
| 2848 | cur->name_len); | 2843 | cur->name_len); |
| 2849 | if (ret < 0) | 2844 | if (ret < 0) |
| 2850 | goto out; | 2845 | goto out; |
| 2851 | if (ret) { | 2846 | if (ret) { |
| @@ -3024,7 +3019,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
| 3024 | out: | 3019 | out: |
| 3025 | free_recorded_refs(sctx); | 3020 | free_recorded_refs(sctx); |
| 3026 | ulist_free(check_dirs); | 3021 | ulist_free(check_dirs); |
| 3027 | fs_path_free(sctx, valid_path); | 3022 | fs_path_free(valid_path); |
| 3028 | return ret; | 3023 | return ret; |
| 3029 | } | 3024 | } |
| 3030 | 3025 | ||
| @@ -3037,7 +3032,7 @@ static int __record_new_ref(int num, u64 dir, int index, | |||
| 3037 | struct fs_path *p; | 3032 | struct fs_path *p; |
| 3038 | u64 gen; | 3033 | u64 gen; |
| 3039 | 3034 | ||
| 3040 | p = fs_path_alloc(sctx); | 3035 | p = fs_path_alloc(); |
| 3041 | if (!p) | 3036 | if (!p) |
| 3042 | return -ENOMEM; | 3037 | return -ENOMEM; |
| 3043 | 3038 | ||
| @@ -3057,7 +3052,7 @@ static int __record_new_ref(int num, u64 dir, int index, | |||
| 3057 | 3052 | ||
| 3058 | out: | 3053 | out: |
| 3059 | if (ret) | 3054 | if (ret) |
| 3060 | fs_path_free(sctx, p); | 3055 | fs_path_free(p); |
| 3061 | return ret; | 3056 | return ret; |
| 3062 | } | 3057 | } |
| 3063 | 3058 | ||
| @@ -3070,7 +3065,7 @@ static int __record_deleted_ref(int num, u64 dir, int index, | |||
| 3070 | struct fs_path *p; | 3065 | struct fs_path *p; |
| 3071 | u64 gen; | 3066 | u64 gen; |
| 3072 | 3067 | ||
| 3073 | p = fs_path_alloc(sctx); | 3068 | p = fs_path_alloc(); |
| 3074 | if (!p) | 3069 | if (!p) |
| 3075 | return -ENOMEM; | 3070 | return -ENOMEM; |
| 3076 | 3071 | ||
| @@ -3090,7 +3085,7 @@ static int __record_deleted_ref(int num, u64 dir, int index, | |||
| 3090 | 3085 | ||
| 3091 | out: | 3086 | out: |
| 3092 | if (ret) | 3087 | if (ret) |
| 3093 | fs_path_free(sctx, p); | 3088 | fs_path_free(p); |
| 3094 | return ret; | 3089 | return ret; |
| 3095 | } | 3090 | } |
| 3096 | 3091 | ||
| @@ -3098,8 +3093,8 @@ static int record_new_ref(struct send_ctx *sctx) | |||
| 3098 | { | 3093 | { |
| 3099 | int ret; | 3094 | int ret; |
| 3100 | 3095 | ||
| 3101 | ret = iterate_inode_ref(sctx, sctx->send_root, sctx->left_path, | 3096 | ret = iterate_inode_ref(sctx->send_root, sctx->left_path, |
| 3102 | sctx->cmp_key, 0, __record_new_ref, sctx); | 3097 | sctx->cmp_key, 0, __record_new_ref, sctx); |
| 3103 | if (ret < 0) | 3098 | if (ret < 0) |
| 3104 | goto out; | 3099 | goto out; |
| 3105 | ret = 0; | 3100 | ret = 0; |
| @@ -3112,8 +3107,8 @@ static int record_deleted_ref(struct send_ctx *sctx) | |||
| 3112 | { | 3107 | { |
| 3113 | int ret; | 3108 | int ret; |
| 3114 | 3109 | ||
| 3115 | ret = iterate_inode_ref(sctx, sctx->parent_root, sctx->right_path, | 3110 | ret = iterate_inode_ref(sctx->parent_root, sctx->right_path, |
| 3116 | sctx->cmp_key, 0, __record_deleted_ref, sctx); | 3111 | sctx->cmp_key, 0, __record_deleted_ref, sctx); |
| 3117 | if (ret < 0) | 3112 | if (ret < 0) |
| 3118 | goto out; | 3113 | goto out; |
| 3119 | ret = 0; | 3114 | ret = 0; |
| @@ -3142,8 +3137,7 @@ static int __find_iref(int num, u64 dir, int index, | |||
| 3142 | return 0; | 3137 | return 0; |
| 3143 | } | 3138 | } |
| 3144 | 3139 | ||
| 3145 | static int find_iref(struct send_ctx *sctx, | 3140 | static int find_iref(struct btrfs_root *root, |
| 3146 | struct btrfs_root *root, | ||
| 3147 | struct btrfs_path *path, | 3141 | struct btrfs_path *path, |
| 3148 | struct btrfs_key *key, | 3142 | struct btrfs_key *key, |
| 3149 | u64 dir, struct fs_path *name) | 3143 | u64 dir, struct fs_path *name) |
| @@ -3155,7 +3149,7 @@ static int find_iref(struct send_ctx *sctx, | |||
| 3155 | ctx.name = name; | 3149 | ctx.name = name; |
| 3156 | ctx.found_idx = -1; | 3150 | ctx.found_idx = -1; |
| 3157 | 3151 | ||
| 3158 | ret = iterate_inode_ref(sctx, root, path, key, 0, __find_iref, &ctx); | 3152 | ret = iterate_inode_ref(root, path, key, 0, __find_iref, &ctx); |
| 3159 | if (ret < 0) | 3153 | if (ret < 0) |
| 3160 | return ret; | 3154 | return ret; |
| 3161 | 3155 | ||
| @@ -3172,7 +3166,7 @@ static int __record_changed_new_ref(int num, u64 dir, int index, | |||
| 3172 | int ret; | 3166 | int ret; |
| 3173 | struct send_ctx *sctx = ctx; | 3167 | struct send_ctx *sctx = ctx; |
| 3174 | 3168 | ||
| 3175 | ret = find_iref(sctx, sctx->parent_root, sctx->right_path, | 3169 | ret = find_iref(sctx->parent_root, sctx->right_path, |
| 3176 | sctx->cmp_key, dir, name); | 3170 | sctx->cmp_key, dir, name); |
| 3177 | if (ret == -ENOENT) | 3171 | if (ret == -ENOENT) |
| 3178 | ret = __record_new_ref(num, dir, index, name, sctx); | 3172 | ret = __record_new_ref(num, dir, index, name, sctx); |
| @@ -3189,7 +3183,7 @@ static int __record_changed_deleted_ref(int num, u64 dir, int index, | |||
| 3189 | int ret; | 3183 | int ret; |
| 3190 | struct send_ctx *sctx = ctx; | 3184 | struct send_ctx *sctx = ctx; |
| 3191 | 3185 | ||
| 3192 | ret = find_iref(sctx, sctx->send_root, sctx->left_path, sctx->cmp_key, | 3186 | ret = find_iref(sctx->send_root, sctx->left_path, sctx->cmp_key, |
| 3193 | dir, name); | 3187 | dir, name); |
| 3194 | if (ret == -ENOENT) | 3188 | if (ret == -ENOENT) |
| 3195 | ret = __record_deleted_ref(num, dir, index, name, sctx); | 3189 | ret = __record_deleted_ref(num, dir, index, name, sctx); |
| @@ -3203,11 +3197,11 @@ static int record_changed_ref(struct send_ctx *sctx) | |||
| 3203 | { | 3197 | { |
| 3204 | int ret = 0; | 3198 | int ret = 0; |
| 3205 | 3199 | ||
| 3206 | ret = iterate_inode_ref(sctx, sctx->send_root, sctx->left_path, | 3200 | ret = iterate_inode_ref(sctx->send_root, sctx->left_path, |
| 3207 | sctx->cmp_key, 0, __record_changed_new_ref, sctx); | 3201 | sctx->cmp_key, 0, __record_changed_new_ref, sctx); |
| 3208 | if (ret < 0) | 3202 | if (ret < 0) |
| 3209 | goto out; | 3203 | goto out; |
| 3210 | ret = iterate_inode_ref(sctx, sctx->parent_root, sctx->right_path, | 3204 | ret = iterate_inode_ref(sctx->parent_root, sctx->right_path, |
| 3211 | sctx->cmp_key, 0, __record_changed_deleted_ref, sctx); | 3205 | sctx->cmp_key, 0, __record_changed_deleted_ref, sctx); |
| 3212 | if (ret < 0) | 3206 | if (ret < 0) |
| 3213 | goto out; | 3207 | goto out; |
| @@ -3266,8 +3260,7 @@ static int process_all_refs(struct send_ctx *sctx, | |||
| 3266 | found_key.type != BTRFS_INODE_EXTREF_KEY)) | 3260 | found_key.type != BTRFS_INODE_EXTREF_KEY)) |
| 3267 | break; | 3261 | break; |
| 3268 | 3262 | ||
| 3269 | ret = iterate_inode_ref(sctx, root, path, &found_key, 0, cb, | 3263 | ret = iterate_inode_ref(root, path, &found_key, 0, cb, sctx); |
| 3270 | sctx); | ||
| 3271 | btrfs_release_path(path); | 3264 | btrfs_release_path(path); |
| 3272 | if (ret < 0) | 3265 | if (ret < 0) |
| 3273 | goto out; | 3266 | goto out; |
| @@ -3335,7 +3328,7 @@ static int __process_new_xattr(int num, struct btrfs_key *di_key, | |||
| 3335 | struct fs_path *p; | 3328 | struct fs_path *p; |
| 3336 | posix_acl_xattr_header dummy_acl; | 3329 | posix_acl_xattr_header dummy_acl; |
| 3337 | 3330 | ||
| 3338 | p = fs_path_alloc(sctx); | 3331 | p = fs_path_alloc(); |
| 3339 | if (!p) | 3332 | if (!p) |
| 3340 | return -ENOMEM; | 3333 | return -ENOMEM; |
| 3341 | 3334 | ||
| @@ -3362,7 +3355,7 @@ static int __process_new_xattr(int num, struct btrfs_key *di_key, | |||
| 3362 | ret = send_set_xattr(sctx, p, name, name_len, data, data_len); | 3355 | ret = send_set_xattr(sctx, p, name, name_len, data, data_len); |
| 3363 | 3356 | ||
| 3364 | out: | 3357 | out: |
| 3365 | fs_path_free(sctx, p); | 3358 | fs_path_free(p); |
| 3366 | return ret; | 3359 | return ret; |
| 3367 | } | 3360 | } |
| 3368 | 3361 | ||
| @@ -3375,7 +3368,7 @@ static int __process_deleted_xattr(int num, struct btrfs_key *di_key, | |||
| 3375 | struct send_ctx *sctx = ctx; | 3368 | struct send_ctx *sctx = ctx; |
| 3376 | struct fs_path *p; | 3369 | struct fs_path *p; |
| 3377 | 3370 | ||
| 3378 | p = fs_path_alloc(sctx); | 3371 | p = fs_path_alloc(); |
| 3379 | if (!p) | 3372 | if (!p) |
| 3380 | return -ENOMEM; | 3373 | return -ENOMEM; |
| 3381 | 3374 | ||
| @@ -3386,7 +3379,7 @@ static int __process_deleted_xattr(int num, struct btrfs_key *di_key, | |||
| 3386 | ret = send_remove_xattr(sctx, p, name, name_len); | 3379 | ret = send_remove_xattr(sctx, p, name, name_len); |
| 3387 | 3380 | ||
| 3388 | out: | 3381 | out: |
| 3389 | fs_path_free(sctx, p); | 3382 | fs_path_free(p); |
| 3390 | return ret; | 3383 | return ret; |
| 3391 | } | 3384 | } |
| 3392 | 3385 | ||
| @@ -3394,8 +3387,8 @@ static int process_new_xattr(struct send_ctx *sctx) | |||
| 3394 | { | 3387 | { |
| 3395 | int ret = 0; | 3388 | int ret = 0; |
| 3396 | 3389 | ||
| 3397 | ret = iterate_dir_item(sctx, sctx->send_root, sctx->left_path, | 3390 | ret = iterate_dir_item(sctx->send_root, sctx->left_path, |
| 3398 | sctx->cmp_key, __process_new_xattr, sctx); | 3391 | sctx->cmp_key, __process_new_xattr, sctx); |
| 3399 | 3392 | ||
| 3400 | return ret; | 3393 | return ret; |
| 3401 | } | 3394 | } |
| @@ -3404,8 +3397,8 @@ static int process_deleted_xattr(struct send_ctx *sctx) | |||
| 3404 | { | 3397 | { |
| 3405 | int ret; | 3398 | int ret; |
| 3406 | 3399 | ||
| 3407 | ret = iterate_dir_item(sctx, sctx->parent_root, sctx->right_path, | 3400 | ret = iterate_dir_item(sctx->parent_root, sctx->right_path, |
| 3408 | sctx->cmp_key, __process_deleted_xattr, sctx); | 3401 | sctx->cmp_key, __process_deleted_xattr, sctx); |
| 3409 | 3402 | ||
| 3410 | return ret; | 3403 | return ret; |
| 3411 | } | 3404 | } |
| @@ -3429,17 +3422,15 @@ static int __find_xattr(int num, struct btrfs_key *di_key, | |||
| 3429 | strncmp(name, ctx->name, name_len) == 0) { | 3422 | strncmp(name, ctx->name, name_len) == 0) { |
| 3430 | ctx->found_idx = num; | 3423 | ctx->found_idx = num; |
| 3431 | ctx->found_data_len = data_len; | 3424 | ctx->found_data_len = data_len; |
| 3432 | ctx->found_data = kmalloc(data_len, GFP_NOFS); | 3425 | ctx->found_data = kmemdup(data, data_len, GFP_NOFS); |
| 3433 | if (!ctx->found_data) | 3426 | if (!ctx->found_data) |
| 3434 | return -ENOMEM; | 3427 | return -ENOMEM; |
| 3435 | memcpy(ctx->found_data, data, data_len); | ||
| 3436 | return 1; | 3428 | return 1; |
| 3437 | } | 3429 | } |
| 3438 | return 0; | 3430 | return 0; |
| 3439 | } | 3431 | } |
| 3440 | 3432 | ||
| 3441 | static int find_xattr(struct send_ctx *sctx, | 3433 | static int find_xattr(struct btrfs_root *root, |
| 3442 | struct btrfs_root *root, | ||
| 3443 | struct btrfs_path *path, | 3434 | struct btrfs_path *path, |
| 3444 | struct btrfs_key *key, | 3435 | struct btrfs_key *key, |
| 3445 | const char *name, int name_len, | 3436 | const char *name, int name_len, |
| @@ -3454,7 +3445,7 @@ static int find_xattr(struct send_ctx *sctx, | |||
| 3454 | ctx.found_data = NULL; | 3445 | ctx.found_data = NULL; |
| 3455 | ctx.found_data_len = 0; | 3446 | ctx.found_data_len = 0; |
| 3456 | 3447 | ||
| 3457 | ret = iterate_dir_item(sctx, root, path, key, __find_xattr, &ctx); | 3448 | ret = iterate_dir_item(root, path, key, __find_xattr, &ctx); |
| 3458 | if (ret < 0) | 3449 | if (ret < 0) |
| 3459 | return ret; | 3450 | return ret; |
| 3460 | 3451 | ||
| @@ -3480,9 +3471,9 @@ static int __process_changed_new_xattr(int num, struct btrfs_key *di_key, | |||
| 3480 | char *found_data = NULL; | 3471 | char *found_data = NULL; |
| 3481 | int found_data_len = 0; | 3472 | int found_data_len = 0; |
| 3482 | 3473 | ||
| 3483 | ret = find_xattr(sctx, sctx->parent_root, sctx->right_path, | 3474 | ret = find_xattr(sctx->parent_root, sctx->right_path, |
| 3484 | sctx->cmp_key, name, name_len, &found_data, | 3475 | sctx->cmp_key, name, name_len, &found_data, |
| 3485 | &found_data_len); | 3476 | &found_data_len); |
| 3486 | if (ret == -ENOENT) { | 3477 | if (ret == -ENOENT) { |
| 3487 | ret = __process_new_xattr(num, di_key, name, name_len, data, | 3478 | ret = __process_new_xattr(num, di_key, name, name_len, data, |
| 3488 | data_len, type, ctx); | 3479 | data_len, type, ctx); |
| @@ -3508,8 +3499,8 @@ static int __process_changed_deleted_xattr(int num, struct btrfs_key *di_key, | |||
| 3508 | int ret; | 3499 | int ret; |
| 3509 | struct send_ctx *sctx = ctx; | 3500 | struct send_ctx *sctx = ctx; |
| 3510 | 3501 | ||
| 3511 | ret = find_xattr(sctx, sctx->send_root, sctx->left_path, sctx->cmp_key, | 3502 | ret = find_xattr(sctx->send_root, sctx->left_path, sctx->cmp_key, |
| 3512 | name, name_len, NULL, NULL); | 3503 | name, name_len, NULL, NULL); |
| 3513 | if (ret == -ENOENT) | 3504 | if (ret == -ENOENT) |
| 3514 | ret = __process_deleted_xattr(num, di_key, name, name_len, data, | 3505 | ret = __process_deleted_xattr(num, di_key, name, name_len, data, |
| 3515 | data_len, type, ctx); | 3506 | data_len, type, ctx); |
| @@ -3523,11 +3514,11 @@ static int process_changed_xattr(struct send_ctx *sctx) | |||
| 3523 | { | 3514 | { |
| 3524 | int ret = 0; | 3515 | int ret = 0; |
| 3525 | 3516 | ||
| 3526 | ret = iterate_dir_item(sctx, sctx->send_root, sctx->left_path, | 3517 | ret = iterate_dir_item(sctx->send_root, sctx->left_path, |
| 3527 | sctx->cmp_key, __process_changed_new_xattr, sctx); | 3518 | sctx->cmp_key, __process_changed_new_xattr, sctx); |
| 3528 | if (ret < 0) | 3519 | if (ret < 0) |
| 3529 | goto out; | 3520 | goto out; |
| 3530 | ret = iterate_dir_item(sctx, sctx->parent_root, sctx->right_path, | 3521 | ret = iterate_dir_item(sctx->parent_root, sctx->right_path, |
| 3531 | sctx->cmp_key, __process_changed_deleted_xattr, sctx); | 3522 | sctx->cmp_key, __process_changed_deleted_xattr, sctx); |
| 3532 | 3523 | ||
| 3533 | out: | 3524 | out: |
| @@ -3572,8 +3563,8 @@ static int process_all_new_xattrs(struct send_ctx *sctx) | |||
| 3572 | goto out; | 3563 | goto out; |
| 3573 | } | 3564 | } |
| 3574 | 3565 | ||
| 3575 | ret = iterate_dir_item(sctx, root, path, &found_key, | 3566 | ret = iterate_dir_item(root, path, &found_key, |
| 3576 | __process_new_xattr, sctx); | 3567 | __process_new_xattr, sctx); |
| 3577 | if (ret < 0) | 3568 | if (ret < 0) |
| 3578 | goto out; | 3569 | goto out; |
| 3579 | 3570 | ||
| @@ -3598,7 +3589,7 @@ static int send_write(struct send_ctx *sctx, u64 offset, u32 len) | |||
| 3598 | int num_read = 0; | 3589 | int num_read = 0; |
| 3599 | mm_segment_t old_fs; | 3590 | mm_segment_t old_fs; |
| 3600 | 3591 | ||
| 3601 | p = fs_path_alloc(sctx); | 3592 | p = fs_path_alloc(); |
| 3602 | if (!p) | 3593 | if (!p) |
| 3603 | return -ENOMEM; | 3594 | return -ENOMEM; |
| 3604 | 3595 | ||
| @@ -3640,7 +3631,7 @@ verbose_printk("btrfs: send_write offset=%llu, len=%d\n", offset, len); | |||
| 3640 | 3631 | ||
| 3641 | tlv_put_failure: | 3632 | tlv_put_failure: |
| 3642 | out: | 3633 | out: |
| 3643 | fs_path_free(sctx, p); | 3634 | fs_path_free(p); |
| 3644 | set_fs(old_fs); | 3635 | set_fs(old_fs); |
| 3645 | if (ret < 0) | 3636 | if (ret < 0) |
| 3646 | return ret; | 3637 | return ret; |
| @@ -3663,7 +3654,7 @@ verbose_printk("btrfs: send_clone offset=%llu, len=%d, clone_root=%llu, " | |||
| 3663 | clone_root->root->objectid, clone_root->ino, | 3654 | clone_root->root->objectid, clone_root->ino, |
| 3664 | clone_root->offset); | 3655 | clone_root->offset); |
| 3665 | 3656 | ||
| 3666 | p = fs_path_alloc(sctx); | 3657 | p = fs_path_alloc(); |
| 3667 | if (!p) | 3658 | if (!p) |
| 3668 | return -ENOMEM; | 3659 | return -ENOMEM; |
| 3669 | 3660 | ||
| @@ -3686,8 +3677,7 @@ verbose_printk("btrfs: send_clone offset=%llu, len=%d, clone_root=%llu, " | |||
| 3686 | goto out; | 3677 | goto out; |
| 3687 | ret = get_cur_path(sctx, clone_root->ino, gen, p); | 3678 | ret = get_cur_path(sctx, clone_root->ino, gen, p); |
| 3688 | } else { | 3679 | } else { |
| 3689 | ret = get_inode_path(sctx, clone_root->root, | 3680 | ret = get_inode_path(clone_root->root, clone_root->ino, p); |
| 3690 | clone_root->ino, p); | ||
| 3691 | } | 3681 | } |
| 3692 | if (ret < 0) | 3682 | if (ret < 0) |
| 3693 | goto out; | 3683 | goto out; |
| @@ -3704,7 +3694,7 @@ verbose_printk("btrfs: send_clone offset=%llu, len=%d, clone_root=%llu, " | |||
| 3704 | 3694 | ||
| 3705 | tlv_put_failure: | 3695 | tlv_put_failure: |
| 3706 | out: | 3696 | out: |
| 3707 | fs_path_free(sctx, p); | 3697 | fs_path_free(p); |
| 3708 | return ret; | 3698 | return ret; |
| 3709 | } | 3699 | } |
| 3710 | 3700 | ||
| @@ -3717,7 +3707,7 @@ static int send_update_extent(struct send_ctx *sctx, | |||
| 3717 | int ret = 0; | 3707 | int ret = 0; |
| 3718 | struct fs_path *p; | 3708 | struct fs_path *p; |
| 3719 | 3709 | ||
| 3720 | p = fs_path_alloc(sctx); | 3710 | p = fs_path_alloc(); |
| 3721 | if (!p) | 3711 | if (!p) |
| 3722 | return -ENOMEM; | 3712 | return -ENOMEM; |
| 3723 | 3713 | ||
| @@ -3737,7 +3727,7 @@ static int send_update_extent(struct send_ctx *sctx, | |||
| 3737 | 3727 | ||
| 3738 | tlv_put_failure: | 3728 | tlv_put_failure: |
| 3739 | out: | 3729 | out: |
| 3740 | fs_path_free(sctx, p); | 3730 | fs_path_free(p); |
| 3741 | return ret; | 3731 | return ret; |
| 3742 | } | 3732 | } |
| 3743 | 3733 | ||
| @@ -4579,6 +4569,41 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) | |||
| 4579 | send_root = BTRFS_I(file_inode(mnt_file))->root; | 4569 | send_root = BTRFS_I(file_inode(mnt_file))->root; |
| 4580 | fs_info = send_root->fs_info; | 4570 | fs_info = send_root->fs_info; |
| 4581 | 4571 | ||
| 4572 | /* | ||
| 4573 | * This is done when we lookup the root, it should already be complete | ||
| 4574 | * by the time we get here. | ||
| 4575 | */ | ||
| 4576 | WARN_ON(send_root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE); | ||
| 4577 | |||
| 4578 | /* | ||
| 4579 | * If we just created this root we need to make sure that the orphan | ||
| 4580 | * cleanup has been done and committed since we search the commit root, | ||
| 4581 | * so check its commit root transid with our otransid and if they match | ||
| 4582 | * commit the transaction to make sure everything is updated. | ||
| 4583 | */ | ||
| 4584 | down_read(&send_root->fs_info->extent_commit_sem); | ||
| 4585 | if (btrfs_header_generation(send_root->commit_root) == | ||
| 4586 | btrfs_root_otransid(&send_root->root_item)) { | ||
| 4587 | struct btrfs_trans_handle *trans; | ||
| 4588 | |||
| 4589 | up_read(&send_root->fs_info->extent_commit_sem); | ||
| 4590 | |||
| 4591 | trans = btrfs_attach_transaction_barrier(send_root); | ||
| 4592 | if (IS_ERR(trans)) { | ||
| 4593 | if (PTR_ERR(trans) != -ENOENT) { | ||
| 4594 | ret = PTR_ERR(trans); | ||
| 4595 | goto out; | ||
| 4596 | } | ||
| 4597 | /* ENOENT means theres no transaction */ | ||
| 4598 | } else { | ||
| 4599 | ret = btrfs_commit_transaction(trans, send_root); | ||
| 4600 | if (ret) | ||
| 4601 | goto out; | ||
| 4602 | } | ||
| 4603 | } else { | ||
| 4604 | up_read(&send_root->fs_info->extent_commit_sem); | ||
| 4605 | } | ||
| 4606 | |||
| 4582 | arg = memdup_user(arg_, sizeof(*arg)); | 4607 | arg = memdup_user(arg_, sizeof(*arg)); |
| 4583 | if (IS_ERR(arg)) { | 4608 | if (IS_ERR(arg)) { |
| 4584 | ret = PTR_ERR(arg); | 4609 | ret = PTR_ERR(arg); |
| @@ -4663,10 +4688,6 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) | |||
| 4663 | key.type = BTRFS_ROOT_ITEM_KEY; | 4688 | key.type = BTRFS_ROOT_ITEM_KEY; |
| 4664 | key.offset = (u64)-1; | 4689 | key.offset = (u64)-1; |
| 4665 | clone_root = btrfs_read_fs_root_no_name(fs_info, &key); | 4690 | clone_root = btrfs_read_fs_root_no_name(fs_info, &key); |
| 4666 | if (!clone_root) { | ||
| 4667 | ret = -EINVAL; | ||
| 4668 | goto out; | ||
| 4669 | } | ||
| 4670 | if (IS_ERR(clone_root)) { | 4691 | if (IS_ERR(clone_root)) { |
| 4671 | ret = PTR_ERR(clone_root); | 4692 | ret = PTR_ERR(clone_root); |
| 4672 | goto out; | 4693 | goto out; |
| @@ -4682,8 +4703,8 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) | |||
| 4682 | key.type = BTRFS_ROOT_ITEM_KEY; | 4703 | key.type = BTRFS_ROOT_ITEM_KEY; |
| 4683 | key.offset = (u64)-1; | 4704 | key.offset = (u64)-1; |
| 4684 | sctx->parent_root = btrfs_read_fs_root_no_name(fs_info, &key); | 4705 | sctx->parent_root = btrfs_read_fs_root_no_name(fs_info, &key); |
| 4685 | if (!sctx->parent_root) { | 4706 | if (IS_ERR(sctx->parent_root)) { |
| 4686 | ret = -EINVAL; | 4707 | ret = PTR_ERR(sctx->parent_root); |
| 4687 | goto out; | 4708 | goto out; |
| 4688 | } | 4709 | } |
| 4689 | } | 4710 | } |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index f0857e092a3c..8eb6191d86da 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
| @@ -51,7 +51,6 @@ | |||
| 51 | #include "print-tree.h" | 51 | #include "print-tree.h" |
| 52 | #include "xattr.h" | 52 | #include "xattr.h" |
| 53 | #include "volumes.h" | 53 | #include "volumes.h" |
| 54 | #include "version.h" | ||
| 55 | #include "export.h" | 54 | #include "export.h" |
| 56 | #include "compression.h" | 55 | #include "compression.h" |
| 57 | #include "rcu-string.h" | 56 | #include "rcu-string.h" |
| @@ -266,6 +265,9 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans, | |||
| 266 | return; | 265 | return; |
| 267 | } | 266 | } |
| 268 | ACCESS_ONCE(trans->transaction->aborted) = errno; | 267 | ACCESS_ONCE(trans->transaction->aborted) = errno; |
| 268 | /* Wake up anybody who may be waiting on this transaction */ | ||
| 269 | wake_up(&root->fs_info->transaction_wait); | ||
| 270 | wake_up(&root->fs_info->transaction_blocked_wait); | ||
| 269 | __btrfs_std_error(root->fs_info, function, line, errno, NULL); | 271 | __btrfs_std_error(root->fs_info, function, line, errno, NULL); |
| 270 | } | 272 | } |
| 271 | /* | 273 | /* |
| @@ -776,9 +778,6 @@ find_root: | |||
| 776 | if (IS_ERR(new_root)) | 778 | if (IS_ERR(new_root)) |
| 777 | return ERR_CAST(new_root); | 779 | return ERR_CAST(new_root); |
| 778 | 780 | ||
| 779 | if (btrfs_root_refs(&new_root->root_item) == 0) | ||
| 780 | return ERR_PTR(-ENOENT); | ||
| 781 | |||
| 782 | dir_id = btrfs_root_dirid(&new_root->root_item); | 781 | dir_id = btrfs_root_dirid(&new_root->root_item); |
| 783 | setup_root: | 782 | setup_root: |
| 784 | location.objectid = dir_id; | 783 | location.objectid = dir_id; |
| @@ -866,7 +865,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait) | |||
| 866 | return 0; | 865 | return 0; |
| 867 | } | 866 | } |
| 868 | 867 | ||
| 869 | btrfs_wait_ordered_extents(root, 1); | 868 | btrfs_wait_all_ordered_extents(fs_info, 1); |
| 870 | 869 | ||
| 871 | trans = btrfs_attach_transaction_barrier(root); | 870 | trans = btrfs_attach_transaction_barrier(root); |
| 872 | if (IS_ERR(trans)) { | 871 | if (IS_ERR(trans)) { |
| @@ -1685,6 +1684,18 @@ static void btrfs_interface_exit(void) | |||
| 1685 | printk(KERN_INFO "btrfs: misc_deregister failed for control device\n"); | 1684 | printk(KERN_INFO "btrfs: misc_deregister failed for control device\n"); |
| 1686 | } | 1685 | } |
| 1687 | 1686 | ||
| 1687 | static void btrfs_print_info(void) | ||
| 1688 | { | ||
| 1689 | printk(KERN_INFO "Btrfs loaded" | ||
| 1690 | #ifdef CONFIG_BTRFS_DEBUG | ||
| 1691 | ", debug=on" | ||
| 1692 | #endif | ||
| 1693 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY | ||
| 1694 | ", integrity-checker=on" | ||
| 1695 | #endif | ||
| 1696 | "\n"); | ||
| 1697 | } | ||
| 1698 | |||
| 1688 | static int __init init_btrfs_fs(void) | 1699 | static int __init init_btrfs_fs(void) |
| 1689 | { | 1700 | { |
| 1690 | int err; | 1701 | int err; |
| @@ -1733,11 +1744,9 @@ static int __init init_btrfs_fs(void) | |||
| 1733 | 1744 | ||
| 1734 | btrfs_init_lockdep(); | 1745 | btrfs_init_lockdep(); |
| 1735 | 1746 | ||
| 1736 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | 1747 | btrfs_print_info(); |
| 1737 | btrfs_test_free_space_cache(); | 1748 | btrfs_test_free_space_cache(); |
| 1738 | #endif | ||
| 1739 | 1749 | ||
| 1740 | printk(KERN_INFO "%s loaded\n", BTRFS_BUILD_VERSION); | ||
| 1741 | return 0; | 1750 | return 0; |
| 1742 | 1751 | ||
| 1743 | unregister_ioctl: | 1752 | unregister_ioctl: |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 0544587d74f4..d58cce77fc6c 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
| @@ -34,12 +34,43 @@ | |||
| 34 | 34 | ||
| 35 | #define BTRFS_ROOT_TRANS_TAG 0 | 35 | #define BTRFS_ROOT_TRANS_TAG 0 |
| 36 | 36 | ||
| 37 | static unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = { | ||
| 38 | [TRANS_STATE_RUNNING] = 0U, | ||
| 39 | [TRANS_STATE_BLOCKED] = (__TRANS_USERSPACE | | ||
| 40 | __TRANS_START), | ||
| 41 | [TRANS_STATE_COMMIT_START] = (__TRANS_USERSPACE | | ||
| 42 | __TRANS_START | | ||
| 43 | __TRANS_ATTACH), | ||
| 44 | [TRANS_STATE_COMMIT_DOING] = (__TRANS_USERSPACE | | ||
| 45 | __TRANS_START | | ||
| 46 | __TRANS_ATTACH | | ||
| 47 | __TRANS_JOIN), | ||
| 48 | [TRANS_STATE_UNBLOCKED] = (__TRANS_USERSPACE | | ||
| 49 | __TRANS_START | | ||
| 50 | __TRANS_ATTACH | | ||
| 51 | __TRANS_JOIN | | ||
| 52 | __TRANS_JOIN_NOLOCK), | ||
| 53 | [TRANS_STATE_COMPLETED] = (__TRANS_USERSPACE | | ||
| 54 | __TRANS_START | | ||
| 55 | __TRANS_ATTACH | | ||
| 56 | __TRANS_JOIN | | ||
| 57 | __TRANS_JOIN_NOLOCK), | ||
| 58 | }; | ||
| 59 | |||
| 37 | static void put_transaction(struct btrfs_transaction *transaction) | 60 | static void put_transaction(struct btrfs_transaction *transaction) |
| 38 | { | 61 | { |
| 39 | WARN_ON(atomic_read(&transaction->use_count) == 0); | 62 | WARN_ON(atomic_read(&transaction->use_count) == 0); |
| 40 | if (atomic_dec_and_test(&transaction->use_count)) { | 63 | if (atomic_dec_and_test(&transaction->use_count)) { |
| 41 | BUG_ON(!list_empty(&transaction->list)); | 64 | BUG_ON(!list_empty(&transaction->list)); |
| 42 | WARN_ON(transaction->delayed_refs.root.rb_node); | 65 | WARN_ON(transaction->delayed_refs.root.rb_node); |
| 66 | while (!list_empty(&transaction->pending_chunks)) { | ||
| 67 | struct extent_map *em; | ||
| 68 | |||
| 69 | em = list_first_entry(&transaction->pending_chunks, | ||
| 70 | struct extent_map, list); | ||
| 71 | list_del_init(&em->list); | ||
| 72 | free_extent_map(em); | ||
| 73 | } | ||
| 43 | kmem_cache_free(btrfs_transaction_cachep, transaction); | 74 | kmem_cache_free(btrfs_transaction_cachep, transaction); |
| 44 | } | 75 | } |
| 45 | } | 76 | } |
| @@ -50,18 +81,35 @@ static noinline void switch_commit_root(struct btrfs_root *root) | |||
| 50 | root->commit_root = btrfs_root_node(root); | 81 | root->commit_root = btrfs_root_node(root); |
| 51 | } | 82 | } |
| 52 | 83 | ||
| 53 | static inline int can_join_transaction(struct btrfs_transaction *trans, | 84 | static inline void extwriter_counter_inc(struct btrfs_transaction *trans, |
| 54 | int type) | 85 | unsigned int type) |
| 86 | { | ||
| 87 | if (type & TRANS_EXTWRITERS) | ||
| 88 | atomic_inc(&trans->num_extwriters); | ||
| 89 | } | ||
| 90 | |||
| 91 | static inline void extwriter_counter_dec(struct btrfs_transaction *trans, | ||
| 92 | unsigned int type) | ||
| 93 | { | ||
| 94 | if (type & TRANS_EXTWRITERS) | ||
| 95 | atomic_dec(&trans->num_extwriters); | ||
| 96 | } | ||
| 97 | |||
| 98 | static inline void extwriter_counter_init(struct btrfs_transaction *trans, | ||
| 99 | unsigned int type) | ||
| 100 | { | ||
| 101 | atomic_set(&trans->num_extwriters, ((type & TRANS_EXTWRITERS) ? 1 : 0)); | ||
| 102 | } | ||
| 103 | |||
| 104 | static inline int extwriter_counter_read(struct btrfs_transaction *trans) | ||
| 55 | { | 105 | { |
| 56 | return !(trans->in_commit && | 106 | return atomic_read(&trans->num_extwriters); |
| 57 | type != TRANS_JOIN && | ||
| 58 | type != TRANS_JOIN_NOLOCK); | ||
| 59 | } | 107 | } |
| 60 | 108 | ||
| 61 | /* | 109 | /* |
| 62 | * either allocate a new transaction or hop into the existing one | 110 | * either allocate a new transaction or hop into the existing one |
| 63 | */ | 111 | */ |
| 64 | static noinline int join_transaction(struct btrfs_root *root, int type) | 112 | static noinline int join_transaction(struct btrfs_root *root, unsigned int type) |
| 65 | { | 113 | { |
| 66 | struct btrfs_transaction *cur_trans; | 114 | struct btrfs_transaction *cur_trans; |
| 67 | struct btrfs_fs_info *fs_info = root->fs_info; | 115 | struct btrfs_fs_info *fs_info = root->fs_info; |
| @@ -74,32 +122,19 @@ loop: | |||
| 74 | return -EROFS; | 122 | return -EROFS; |
| 75 | } | 123 | } |
| 76 | 124 | ||
| 77 | if (fs_info->trans_no_join) { | ||
| 78 | /* | ||
| 79 | * If we are JOIN_NOLOCK we're already committing a current | ||
| 80 | * transaction, we just need a handle to deal with something | ||
| 81 | * when committing the transaction, such as inode cache and | ||
| 82 | * space cache. It is a special case. | ||
| 83 | */ | ||
| 84 | if (type != TRANS_JOIN_NOLOCK) { | ||
| 85 | spin_unlock(&fs_info->trans_lock); | ||
| 86 | return -EBUSY; | ||
| 87 | } | ||
| 88 | } | ||
| 89 | |||
| 90 | cur_trans = fs_info->running_transaction; | 125 | cur_trans = fs_info->running_transaction; |
| 91 | if (cur_trans) { | 126 | if (cur_trans) { |
| 92 | if (cur_trans->aborted) { | 127 | if (cur_trans->aborted) { |
| 93 | spin_unlock(&fs_info->trans_lock); | 128 | spin_unlock(&fs_info->trans_lock); |
| 94 | return cur_trans->aborted; | 129 | return cur_trans->aborted; |
| 95 | } | 130 | } |
| 96 | if (!can_join_transaction(cur_trans, type)) { | 131 | if (btrfs_blocked_trans_types[cur_trans->state] & type) { |
| 97 | spin_unlock(&fs_info->trans_lock); | 132 | spin_unlock(&fs_info->trans_lock); |
| 98 | return -EBUSY; | 133 | return -EBUSY; |
| 99 | } | 134 | } |
| 100 | atomic_inc(&cur_trans->use_count); | 135 | atomic_inc(&cur_trans->use_count); |
| 101 | atomic_inc(&cur_trans->num_writers); | 136 | atomic_inc(&cur_trans->num_writers); |
| 102 | cur_trans->num_joined++; | 137 | extwriter_counter_inc(cur_trans, type); |
| 103 | spin_unlock(&fs_info->trans_lock); | 138 | spin_unlock(&fs_info->trans_lock); |
| 104 | return 0; | 139 | return 0; |
| 105 | } | 140 | } |
| @@ -112,6 +147,12 @@ loop: | |||
| 112 | if (type == TRANS_ATTACH) | 147 | if (type == TRANS_ATTACH) |
| 113 | return -ENOENT; | 148 | return -ENOENT; |
| 114 | 149 | ||
| 150 | /* | ||
| 151 | * JOIN_NOLOCK only happens during the transaction commit, so | ||
| 152 | * it is impossible that ->running_transaction is NULL | ||
| 153 | */ | ||
| 154 | BUG_ON(type == TRANS_JOIN_NOLOCK); | ||
| 155 | |||
| 115 | cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS); | 156 | cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS); |
| 116 | if (!cur_trans) | 157 | if (!cur_trans) |
| 117 | return -ENOMEM; | 158 | return -ENOMEM; |
| @@ -120,7 +161,7 @@ loop: | |||
| 120 | if (fs_info->running_transaction) { | 161 | if (fs_info->running_transaction) { |
| 121 | /* | 162 | /* |
| 122 | * someone started a transaction after we unlocked. Make sure | 163 | * someone started a transaction after we unlocked. Make sure |
| 123 | * to redo the trans_no_join checks above | 164 | * to redo the checks above |
| 124 | */ | 165 | */ |
| 125 | kmem_cache_free(btrfs_transaction_cachep, cur_trans); | 166 | kmem_cache_free(btrfs_transaction_cachep, cur_trans); |
| 126 | goto loop; | 167 | goto loop; |
| @@ -131,17 +172,15 @@ loop: | |||
| 131 | } | 172 | } |
| 132 | 173 | ||
| 133 | atomic_set(&cur_trans->num_writers, 1); | 174 | atomic_set(&cur_trans->num_writers, 1); |
| 134 | cur_trans->num_joined = 0; | 175 | extwriter_counter_init(cur_trans, type); |
| 135 | init_waitqueue_head(&cur_trans->writer_wait); | 176 | init_waitqueue_head(&cur_trans->writer_wait); |
| 136 | init_waitqueue_head(&cur_trans->commit_wait); | 177 | init_waitqueue_head(&cur_trans->commit_wait); |
| 137 | cur_trans->in_commit = 0; | 178 | cur_trans->state = TRANS_STATE_RUNNING; |
| 138 | cur_trans->blocked = 0; | ||
| 139 | /* | 179 | /* |
| 140 | * One for this trans handle, one so it will live on until we | 180 | * One for this trans handle, one so it will live on until we |
| 141 | * commit the transaction. | 181 | * commit the transaction. |
| 142 | */ | 182 | */ |
| 143 | atomic_set(&cur_trans->use_count, 2); | 183 | atomic_set(&cur_trans->use_count, 2); |
| 144 | cur_trans->commit_done = 0; | ||
| 145 | cur_trans->start_time = get_seconds(); | 184 | cur_trans->start_time = get_seconds(); |
| 146 | 185 | ||
| 147 | cur_trans->delayed_refs.root = RB_ROOT; | 186 | cur_trans->delayed_refs.root = RB_ROOT; |
| @@ -164,7 +203,6 @@ loop: | |||
| 164 | "creating a fresh transaction\n"); | 203 | "creating a fresh transaction\n"); |
| 165 | atomic64_set(&fs_info->tree_mod_seq, 0); | 204 | atomic64_set(&fs_info->tree_mod_seq, 0); |
| 166 | 205 | ||
| 167 | spin_lock_init(&cur_trans->commit_lock); | ||
| 168 | spin_lock_init(&cur_trans->delayed_refs.lock); | 206 | spin_lock_init(&cur_trans->delayed_refs.lock); |
| 169 | atomic_set(&cur_trans->delayed_refs.procs_running_refs, 0); | 207 | atomic_set(&cur_trans->delayed_refs.procs_running_refs, 0); |
| 170 | atomic_set(&cur_trans->delayed_refs.ref_seq, 0); | 208 | atomic_set(&cur_trans->delayed_refs.ref_seq, 0); |
| @@ -172,6 +210,7 @@ loop: | |||
| 172 | 210 | ||
| 173 | INIT_LIST_HEAD(&cur_trans->pending_snapshots); | 211 | INIT_LIST_HEAD(&cur_trans->pending_snapshots); |
| 174 | INIT_LIST_HEAD(&cur_trans->ordered_operations); | 212 | INIT_LIST_HEAD(&cur_trans->ordered_operations); |
| 213 | INIT_LIST_HEAD(&cur_trans->pending_chunks); | ||
| 175 | list_add_tail(&cur_trans->list, &fs_info->trans_list); | 214 | list_add_tail(&cur_trans->list, &fs_info->trans_list); |
| 176 | extent_io_tree_init(&cur_trans->dirty_pages, | 215 | extent_io_tree_init(&cur_trans->dirty_pages, |
| 177 | fs_info->btree_inode->i_mapping); | 216 | fs_info->btree_inode->i_mapping); |
| @@ -269,6 +308,13 @@ int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, | |||
| 269 | return 0; | 308 | return 0; |
| 270 | } | 309 | } |
| 271 | 310 | ||
| 311 | static inline int is_transaction_blocked(struct btrfs_transaction *trans) | ||
| 312 | { | ||
| 313 | return (trans->state >= TRANS_STATE_BLOCKED && | ||
| 314 | trans->state < TRANS_STATE_UNBLOCKED && | ||
| 315 | !trans->aborted); | ||
| 316 | } | ||
| 317 | |||
| 272 | /* wait for commit against the current transaction to become unblocked | 318 | /* wait for commit against the current transaction to become unblocked |
| 273 | * when this is done, it is safe to start a new transaction, but the current | 319 | * when this is done, it is safe to start a new transaction, but the current |
| 274 | * transaction might not be fully on disk. | 320 | * transaction might not be fully on disk. |
| @@ -279,12 +325,13 @@ static void wait_current_trans(struct btrfs_root *root) | |||
| 279 | 325 | ||
| 280 | spin_lock(&root->fs_info->trans_lock); | 326 | spin_lock(&root->fs_info->trans_lock); |
| 281 | cur_trans = root->fs_info->running_transaction; | 327 | cur_trans = root->fs_info->running_transaction; |
| 282 | if (cur_trans && cur_trans->blocked) { | 328 | if (cur_trans && is_transaction_blocked(cur_trans)) { |
| 283 | atomic_inc(&cur_trans->use_count); | 329 | atomic_inc(&cur_trans->use_count); |
| 284 | spin_unlock(&root->fs_info->trans_lock); | 330 | spin_unlock(&root->fs_info->trans_lock); |
| 285 | 331 | ||
| 286 | wait_event(root->fs_info->transaction_wait, | 332 | wait_event(root->fs_info->transaction_wait, |
| 287 | !cur_trans->blocked); | 333 | cur_trans->state >= TRANS_STATE_UNBLOCKED || |
| 334 | cur_trans->aborted); | ||
| 288 | put_transaction(cur_trans); | 335 | put_transaction(cur_trans); |
| 289 | } else { | 336 | } else { |
| 290 | spin_unlock(&root->fs_info->trans_lock); | 337 | spin_unlock(&root->fs_info->trans_lock); |
| @@ -307,7 +354,7 @@ static int may_wait_transaction(struct btrfs_root *root, int type) | |||
| 307 | } | 354 | } |
| 308 | 355 | ||
| 309 | static struct btrfs_trans_handle * | 356 | static struct btrfs_trans_handle * |
| 310 | start_transaction(struct btrfs_root *root, u64 num_items, int type, | 357 | start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type, |
| 311 | enum btrfs_reserve_flush_enum flush) | 358 | enum btrfs_reserve_flush_enum flush) |
| 312 | { | 359 | { |
| 313 | struct btrfs_trans_handle *h; | 360 | struct btrfs_trans_handle *h; |
| @@ -320,7 +367,7 @@ start_transaction(struct btrfs_root *root, u64 num_items, int type, | |||
| 320 | return ERR_PTR(-EROFS); | 367 | return ERR_PTR(-EROFS); |
| 321 | 368 | ||
| 322 | if (current->journal_info) { | 369 | if (current->journal_info) { |
| 323 | WARN_ON(type != TRANS_JOIN && type != TRANS_JOIN_NOLOCK); | 370 | WARN_ON(type & TRANS_EXTWRITERS); |
| 324 | h = current->journal_info; | 371 | h = current->journal_info; |
| 325 | h->use_count++; | 372 | h->use_count++; |
| 326 | WARN_ON(h->use_count > 2); | 373 | WARN_ON(h->use_count > 2); |
| @@ -366,7 +413,7 @@ again: | |||
| 366 | * If we are ATTACH, it means we just want to catch the current | 413 | * If we are ATTACH, it means we just want to catch the current |
| 367 | * transaction and commit it, so we needn't do sb_start_intwrite(). | 414 | * transaction and commit it, so we needn't do sb_start_intwrite(). |
| 368 | */ | 415 | */ |
| 369 | if (type < TRANS_JOIN_NOLOCK) | 416 | if (type & __TRANS_FREEZABLE) |
| 370 | sb_start_intwrite(root->fs_info->sb); | 417 | sb_start_intwrite(root->fs_info->sb); |
| 371 | 418 | ||
| 372 | if (may_wait_transaction(root, type)) | 419 | if (may_wait_transaction(root, type)) |
| @@ -408,7 +455,8 @@ again: | |||
| 408 | INIT_LIST_HEAD(&h->new_bgs); | 455 | INIT_LIST_HEAD(&h->new_bgs); |
| 409 | 456 | ||
| 410 | smp_mb(); | 457 | smp_mb(); |
| 411 | if (cur_trans->blocked && may_wait_transaction(root, type)) { | 458 | if (cur_trans->state >= TRANS_STATE_BLOCKED && |
| 459 | may_wait_transaction(root, type)) { | ||
| 412 | btrfs_commit_transaction(h, root); | 460 | btrfs_commit_transaction(h, root); |
| 413 | goto again; | 461 | goto again; |
| 414 | } | 462 | } |
| @@ -429,7 +477,7 @@ got_it: | |||
| 429 | return h; | 477 | return h; |
| 430 | 478 | ||
| 431 | join_fail: | 479 | join_fail: |
| 432 | if (type < TRANS_JOIN_NOLOCK) | 480 | if (type & __TRANS_FREEZABLE) |
| 433 | sb_end_intwrite(root->fs_info->sb); | 481 | sb_end_intwrite(root->fs_info->sb); |
| 434 | kmem_cache_free(btrfs_trans_handle_cachep, h); | 482 | kmem_cache_free(btrfs_trans_handle_cachep, h); |
| 435 | alloc_fail: | 483 | alloc_fail: |
| @@ -490,7 +538,7 @@ struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root) | |||
| 490 | } | 538 | } |
| 491 | 539 | ||
| 492 | /* | 540 | /* |
| 493 | * btrfs_attach_transaction() - catch the running transaction | 541 | * btrfs_attach_transaction_barrier() - catch the running transaction |
| 494 | * | 542 | * |
| 495 | * It is similar to the above function, the differentia is this one | 543 | * It is similar to the above function, the differentia is this one |
| 496 | * will wait for all the inactive transactions until they fully | 544 | * will wait for all the inactive transactions until they fully |
| @@ -512,7 +560,7 @@ btrfs_attach_transaction_barrier(struct btrfs_root *root) | |||
| 512 | static noinline void wait_for_commit(struct btrfs_root *root, | 560 | static noinline void wait_for_commit(struct btrfs_root *root, |
| 513 | struct btrfs_transaction *commit) | 561 | struct btrfs_transaction *commit) |
| 514 | { | 562 | { |
| 515 | wait_event(commit->commit_wait, commit->commit_done); | 563 | wait_event(commit->commit_wait, commit->state == TRANS_STATE_COMPLETED); |
| 516 | } | 564 | } |
| 517 | 565 | ||
| 518 | int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid) | 566 | int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid) |
| @@ -548,8 +596,8 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid) | |||
| 548 | spin_lock(&root->fs_info->trans_lock); | 596 | spin_lock(&root->fs_info->trans_lock); |
| 549 | list_for_each_entry_reverse(t, &root->fs_info->trans_list, | 597 | list_for_each_entry_reverse(t, &root->fs_info->trans_list, |
| 550 | list) { | 598 | list) { |
| 551 | if (t->in_commit) { | 599 | if (t->state >= TRANS_STATE_COMMIT_START) { |
| 552 | if (t->commit_done) | 600 | if (t->state == TRANS_STATE_COMPLETED) |
| 553 | break; | 601 | break; |
| 554 | cur_trans = t; | 602 | cur_trans = t; |
| 555 | atomic_inc(&cur_trans->use_count); | 603 | atomic_inc(&cur_trans->use_count); |
| @@ -576,10 +624,11 @@ void btrfs_throttle(struct btrfs_root *root) | |||
| 576 | static int should_end_transaction(struct btrfs_trans_handle *trans, | 624 | static int should_end_transaction(struct btrfs_trans_handle *trans, |
| 577 | struct btrfs_root *root) | 625 | struct btrfs_root *root) |
| 578 | { | 626 | { |
| 579 | int ret; | 627 | if (root->fs_info->global_block_rsv.space_info->full && |
| 628 | btrfs_should_throttle_delayed_refs(trans, root)) | ||
| 629 | return 1; | ||
| 580 | 630 | ||
| 581 | ret = btrfs_block_rsv_check(root, &root->fs_info->global_block_rsv, 5); | 631 | return !!btrfs_block_rsv_check(root, &root->fs_info->global_block_rsv, 5); |
| 582 | return ret ? 1 : 0; | ||
| 583 | } | 632 | } |
| 584 | 633 | ||
| 585 | int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, | 634 | int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, |
| @@ -590,7 +639,8 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, | |||
| 590 | int err; | 639 | int err; |
| 591 | 640 | ||
| 592 | smp_mb(); | 641 | smp_mb(); |
| 593 | if (cur_trans->blocked || cur_trans->delayed_refs.flushing) | 642 | if (cur_trans->state >= TRANS_STATE_BLOCKED || |
| 643 | cur_trans->delayed_refs.flushing) | ||
| 594 | return 1; | 644 | return 1; |
| 595 | 645 | ||
| 596 | updates = trans->delayed_ref_updates; | 646 | updates = trans->delayed_ref_updates; |
| @@ -609,7 +659,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
| 609 | { | 659 | { |
| 610 | struct btrfs_transaction *cur_trans = trans->transaction; | 660 | struct btrfs_transaction *cur_trans = trans->transaction; |
| 611 | struct btrfs_fs_info *info = root->fs_info; | 661 | struct btrfs_fs_info *info = root->fs_info; |
| 612 | int count = 0; | 662 | unsigned long cur = trans->delayed_ref_updates; |
| 613 | int lock = (trans->type != TRANS_JOIN_NOLOCK); | 663 | int lock = (trans->type != TRANS_JOIN_NOLOCK); |
| 614 | int err = 0; | 664 | int err = 0; |
| 615 | 665 | ||
| @@ -638,17 +688,11 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
| 638 | if (!list_empty(&trans->new_bgs)) | 688 | if (!list_empty(&trans->new_bgs)) |
| 639 | btrfs_create_pending_block_groups(trans, root); | 689 | btrfs_create_pending_block_groups(trans, root); |
| 640 | 690 | ||
| 641 | while (count < 1) { | 691 | trans->delayed_ref_updates = 0; |
| 642 | unsigned long cur = trans->delayed_ref_updates; | 692 | if (btrfs_should_throttle_delayed_refs(trans, root)) { |
| 693 | cur = max_t(unsigned long, cur, 1); | ||
| 643 | trans->delayed_ref_updates = 0; | 694 | trans->delayed_ref_updates = 0; |
| 644 | if (cur && | 695 | btrfs_run_delayed_refs(trans, root, cur); |
| 645 | trans->transaction->delayed_refs.num_heads_ready > 64) { | ||
| 646 | trans->delayed_ref_updates = 0; | ||
| 647 | btrfs_run_delayed_refs(trans, root, cur); | ||
| 648 | } else { | ||
| 649 | break; | ||
| 650 | } | ||
| 651 | count++; | ||
| 652 | } | 696 | } |
| 653 | 697 | ||
| 654 | btrfs_trans_release_metadata(trans, root); | 698 | btrfs_trans_release_metadata(trans, root); |
| @@ -658,12 +702,15 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
| 658 | btrfs_create_pending_block_groups(trans, root); | 702 | btrfs_create_pending_block_groups(trans, root); |
| 659 | 703 | ||
| 660 | if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) && | 704 | if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) && |
| 661 | should_end_transaction(trans, root)) { | 705 | should_end_transaction(trans, root) && |
| 662 | trans->transaction->blocked = 1; | 706 | ACCESS_ONCE(cur_trans->state) == TRANS_STATE_RUNNING) { |
| 663 | smp_wmb(); | 707 | spin_lock(&info->trans_lock); |
| 708 | if (cur_trans->state == TRANS_STATE_RUNNING) | ||
| 709 | cur_trans->state = TRANS_STATE_BLOCKED; | ||
| 710 | spin_unlock(&info->trans_lock); | ||
| 664 | } | 711 | } |
| 665 | 712 | ||
| 666 | if (lock && cur_trans->blocked && !cur_trans->in_commit) { | 713 | if (lock && ACCESS_ONCE(cur_trans->state) == TRANS_STATE_BLOCKED) { |
| 667 | if (throttle) { | 714 | if (throttle) { |
| 668 | /* | 715 | /* |
| 669 | * We may race with somebody else here so end up having | 716 | * We may race with somebody else here so end up having |
| @@ -677,12 +724,13 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
| 677 | } | 724 | } |
| 678 | } | 725 | } |
| 679 | 726 | ||
| 680 | if (trans->type < TRANS_JOIN_NOLOCK) | 727 | if (trans->type & __TRANS_FREEZABLE) |
| 681 | sb_end_intwrite(root->fs_info->sb); | 728 | sb_end_intwrite(root->fs_info->sb); |
| 682 | 729 | ||
| 683 | WARN_ON(cur_trans != info->running_transaction); | 730 | WARN_ON(cur_trans != info->running_transaction); |
| 684 | WARN_ON(atomic_read(&cur_trans->num_writers) < 1); | 731 | WARN_ON(atomic_read(&cur_trans->num_writers) < 1); |
| 685 | atomic_dec(&cur_trans->num_writers); | 732 | atomic_dec(&cur_trans->num_writers); |
| 733 | extwriter_counter_dec(cur_trans, trans->type); | ||
| 686 | 734 | ||
| 687 | smp_mb(); | 735 | smp_mb(); |
| 688 | if (waitqueue_active(&cur_trans->writer_wait)) | 736 | if (waitqueue_active(&cur_trans->writer_wait)) |
| @@ -736,9 +784,7 @@ int btrfs_write_marked_extents(struct btrfs_root *root, | |||
| 736 | struct extent_state *cached_state = NULL; | 784 | struct extent_state *cached_state = NULL; |
| 737 | u64 start = 0; | 785 | u64 start = 0; |
| 738 | u64 end; | 786 | u64 end; |
| 739 | struct blk_plug plug; | ||
| 740 | 787 | ||
| 741 | blk_start_plug(&plug); | ||
| 742 | while (!find_first_extent_bit(dirty_pages, start, &start, &end, | 788 | while (!find_first_extent_bit(dirty_pages, start, &start, &end, |
| 743 | mark, &cached_state)) { | 789 | mark, &cached_state)) { |
| 744 | convert_extent_bit(dirty_pages, start, end, EXTENT_NEED_WAIT, | 790 | convert_extent_bit(dirty_pages, start, end, EXTENT_NEED_WAIT, |
| @@ -752,7 +798,6 @@ int btrfs_write_marked_extents(struct btrfs_root *root, | |||
| 752 | } | 798 | } |
| 753 | if (err) | 799 | if (err) |
| 754 | werr = err; | 800 | werr = err; |
| 755 | blk_finish_plug(&plug); | ||
| 756 | return werr; | 801 | return werr; |
| 757 | } | 802 | } |
| 758 | 803 | ||
| @@ -797,8 +842,11 @@ int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, | |||
| 797 | { | 842 | { |
| 798 | int ret; | 843 | int ret; |
| 799 | int ret2; | 844 | int ret2; |
| 845 | struct blk_plug plug; | ||
| 800 | 846 | ||
| 847 | blk_start_plug(&plug); | ||
| 801 | ret = btrfs_write_marked_extents(root, dirty_pages, mark); | 848 | ret = btrfs_write_marked_extents(root, dirty_pages, mark); |
| 849 | blk_finish_plug(&plug); | ||
| 802 | ret2 = btrfs_wait_marked_extents(root, dirty_pages, mark); | 850 | ret2 = btrfs_wait_marked_extents(root, dirty_pages, mark); |
| 803 | 851 | ||
| 804 | if (ret) | 852 | if (ret) |
| @@ -1318,20 +1366,26 @@ static void update_super_roots(struct btrfs_root *root) | |||
| 1318 | 1366 | ||
| 1319 | int btrfs_transaction_in_commit(struct btrfs_fs_info *info) | 1367 | int btrfs_transaction_in_commit(struct btrfs_fs_info *info) |
| 1320 | { | 1368 | { |
| 1369 | struct btrfs_transaction *trans; | ||
| 1321 | int ret = 0; | 1370 | int ret = 0; |
| 1371 | |||
| 1322 | spin_lock(&info->trans_lock); | 1372 | spin_lock(&info->trans_lock); |
| 1323 | if (info->running_transaction) | 1373 | trans = info->running_transaction; |
| 1324 | ret = info->running_transaction->in_commit; | 1374 | if (trans) |
| 1375 | ret = (trans->state >= TRANS_STATE_COMMIT_START); | ||
| 1325 | spin_unlock(&info->trans_lock); | 1376 | spin_unlock(&info->trans_lock); |
| 1326 | return ret; | 1377 | return ret; |
| 1327 | } | 1378 | } |
| 1328 | 1379 | ||
| 1329 | int btrfs_transaction_blocked(struct btrfs_fs_info *info) | 1380 | int btrfs_transaction_blocked(struct btrfs_fs_info *info) |
| 1330 | { | 1381 | { |
| 1382 | struct btrfs_transaction *trans; | ||
| 1331 | int ret = 0; | 1383 | int ret = 0; |
| 1384 | |||
| 1332 | spin_lock(&info->trans_lock); | 1385 | spin_lock(&info->trans_lock); |
| 1333 | if (info->running_transaction) | 1386 | trans = info->running_transaction; |
| 1334 | ret = info->running_transaction->blocked; | 1387 | if (trans) |
| 1388 | ret = is_transaction_blocked(trans); | ||
| 1335 | spin_unlock(&info->trans_lock); | 1389 | spin_unlock(&info->trans_lock); |
| 1336 | return ret; | 1390 | return ret; |
| 1337 | } | 1391 | } |
| @@ -1343,7 +1397,9 @@ int btrfs_transaction_blocked(struct btrfs_fs_info *info) | |||
| 1343 | static void wait_current_trans_commit_start(struct btrfs_root *root, | 1397 | static void wait_current_trans_commit_start(struct btrfs_root *root, |
| 1344 | struct btrfs_transaction *trans) | 1398 | struct btrfs_transaction *trans) |
| 1345 | { | 1399 | { |
| 1346 | wait_event(root->fs_info->transaction_blocked_wait, trans->in_commit); | 1400 | wait_event(root->fs_info->transaction_blocked_wait, |
| 1401 | trans->state >= TRANS_STATE_COMMIT_START || | ||
| 1402 | trans->aborted); | ||
| 1347 | } | 1403 | } |
| 1348 | 1404 | ||
| 1349 | /* | 1405 | /* |
| @@ -1354,7 +1410,8 @@ static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root, | |||
| 1354 | struct btrfs_transaction *trans) | 1410 | struct btrfs_transaction *trans) |
| 1355 | { | 1411 | { |
| 1356 | wait_event(root->fs_info->transaction_wait, | 1412 | wait_event(root->fs_info->transaction_wait, |
| 1357 | trans->commit_done || (trans->in_commit && !trans->blocked)); | 1413 | trans->state >= TRANS_STATE_UNBLOCKED || |
| 1414 | trans->aborted); | ||
| 1358 | } | 1415 | } |
| 1359 | 1416 | ||
| 1360 | /* | 1417 | /* |
| @@ -1450,26 +1507,31 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, | |||
| 1450 | 1507 | ||
| 1451 | spin_lock(&root->fs_info->trans_lock); | 1508 | spin_lock(&root->fs_info->trans_lock); |
| 1452 | 1509 | ||
| 1453 | if (list_empty(&cur_trans->list)) { | 1510 | /* |
| 1454 | spin_unlock(&root->fs_info->trans_lock); | 1511 | * If the transaction is removed from the list, it means this |
| 1455 | btrfs_end_transaction(trans, root); | 1512 | * transaction has been committed successfully, so it is impossible |
| 1456 | return; | 1513 | * to call the cleanup function. |
| 1457 | } | 1514 | */ |
| 1515 | BUG_ON(list_empty(&cur_trans->list)); | ||
| 1458 | 1516 | ||
| 1459 | list_del_init(&cur_trans->list); | 1517 | list_del_init(&cur_trans->list); |
| 1460 | if (cur_trans == root->fs_info->running_transaction) { | 1518 | if (cur_trans == root->fs_info->running_transaction) { |
| 1461 | root->fs_info->trans_no_join = 1; | 1519 | cur_trans->state = TRANS_STATE_COMMIT_DOING; |
| 1462 | spin_unlock(&root->fs_info->trans_lock); | 1520 | spin_unlock(&root->fs_info->trans_lock); |
| 1463 | wait_event(cur_trans->writer_wait, | 1521 | wait_event(cur_trans->writer_wait, |
| 1464 | atomic_read(&cur_trans->num_writers) == 1); | 1522 | atomic_read(&cur_trans->num_writers) == 1); |
| 1465 | 1523 | ||
| 1466 | spin_lock(&root->fs_info->trans_lock); | 1524 | spin_lock(&root->fs_info->trans_lock); |
| 1467 | root->fs_info->running_transaction = NULL; | ||
| 1468 | } | 1525 | } |
| 1469 | spin_unlock(&root->fs_info->trans_lock); | 1526 | spin_unlock(&root->fs_info->trans_lock); |
| 1470 | 1527 | ||
| 1471 | btrfs_cleanup_one_transaction(trans->transaction, root); | 1528 | btrfs_cleanup_one_transaction(trans->transaction, root); |
| 1472 | 1529 | ||
| 1530 | spin_lock(&root->fs_info->trans_lock); | ||
| 1531 | if (cur_trans == root->fs_info->running_transaction) | ||
| 1532 | root->fs_info->running_transaction = NULL; | ||
| 1533 | spin_unlock(&root->fs_info->trans_lock); | ||
| 1534 | |||
| 1473 | put_transaction(cur_trans); | 1535 | put_transaction(cur_trans); |
| 1474 | put_transaction(cur_trans); | 1536 | put_transaction(cur_trans); |
| 1475 | 1537 | ||
| @@ -1481,33 +1543,13 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, | |||
| 1481 | current->journal_info = NULL; | 1543 | current->journal_info = NULL; |
| 1482 | 1544 | ||
| 1483 | kmem_cache_free(btrfs_trans_handle_cachep, trans); | 1545 | kmem_cache_free(btrfs_trans_handle_cachep, trans); |
| 1484 | |||
| 1485 | spin_lock(&root->fs_info->trans_lock); | ||
| 1486 | root->fs_info->trans_no_join = 0; | ||
| 1487 | spin_unlock(&root->fs_info->trans_lock); | ||
| 1488 | } | 1546 | } |
| 1489 | 1547 | ||
| 1490 | static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans, | 1548 | static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans, |
| 1491 | struct btrfs_root *root) | 1549 | struct btrfs_root *root) |
| 1492 | { | 1550 | { |
| 1493 | int flush_on_commit = btrfs_test_opt(root, FLUSHONCOMMIT); | ||
| 1494 | int snap_pending = 0; | ||
| 1495 | int ret; | 1551 | int ret; |
| 1496 | 1552 | ||
| 1497 | if (!flush_on_commit) { | ||
| 1498 | spin_lock(&root->fs_info->trans_lock); | ||
| 1499 | if (!list_empty(&trans->transaction->pending_snapshots)) | ||
| 1500 | snap_pending = 1; | ||
| 1501 | spin_unlock(&root->fs_info->trans_lock); | ||
| 1502 | } | ||
| 1503 | |||
| 1504 | if (flush_on_commit || snap_pending) { | ||
| 1505 | ret = btrfs_start_delalloc_inodes(root, 1); | ||
| 1506 | if (ret) | ||
| 1507 | return ret; | ||
| 1508 | btrfs_wait_ordered_extents(root, 1); | ||
| 1509 | } | ||
| 1510 | |||
| 1511 | ret = btrfs_run_delayed_items(trans, root); | 1553 | ret = btrfs_run_delayed_items(trans, root); |
| 1512 | if (ret) | 1554 | if (ret) |
| 1513 | return ret; | 1555 | return ret; |
| @@ -1531,23 +1573,25 @@ static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans, | |||
| 1531 | return ret; | 1573 | return ret; |
| 1532 | } | 1574 | } |
| 1533 | 1575 | ||
| 1534 | /* | 1576 | static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info) |
| 1535 | * btrfs_transaction state sequence: | 1577 | { |
| 1536 | * in_commit = 0, blocked = 0 (initial) | 1578 | if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT)) |
| 1537 | * in_commit = 1, blocked = 1 | 1579 | return btrfs_start_all_delalloc_inodes(fs_info, 1); |
| 1538 | * blocked = 0 | 1580 | return 0; |
| 1539 | * commit_done = 1 | 1581 | } |
| 1540 | */ | 1582 | |
| 1583 | static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info) | ||
| 1584 | { | ||
| 1585 | if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT)) | ||
| 1586 | btrfs_wait_all_ordered_extents(fs_info, 1); | ||
| 1587 | } | ||
| 1588 | |||
| 1541 | int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | 1589 | int btrfs_commit_transaction(struct btrfs_trans_handle *trans, |
| 1542 | struct btrfs_root *root) | 1590 | struct btrfs_root *root) |
| 1543 | { | 1591 | { |
| 1544 | unsigned long joined = 0; | ||
| 1545 | struct btrfs_transaction *cur_trans = trans->transaction; | 1592 | struct btrfs_transaction *cur_trans = trans->transaction; |
| 1546 | struct btrfs_transaction *prev_trans = NULL; | 1593 | struct btrfs_transaction *prev_trans = NULL; |
| 1547 | DEFINE_WAIT(wait); | ||
| 1548 | int ret; | 1594 | int ret; |
| 1549 | int should_grow = 0; | ||
| 1550 | unsigned long now = get_seconds(); | ||
| 1551 | 1595 | ||
| 1552 | ret = btrfs_run_ordered_operations(trans, root, 0); | 1596 | ret = btrfs_run_ordered_operations(trans, root, 0); |
| 1553 | if (ret) { | 1597 | if (ret) { |
| @@ -1586,6 +1630,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1586 | * start sending their work down. | 1630 | * start sending their work down. |
| 1587 | */ | 1631 | */ |
| 1588 | cur_trans->delayed_refs.flushing = 1; | 1632 | cur_trans->delayed_refs.flushing = 1; |
| 1633 | smp_wmb(); | ||
| 1589 | 1634 | ||
| 1590 | if (!list_empty(&trans->new_bgs)) | 1635 | if (!list_empty(&trans->new_bgs)) |
| 1591 | btrfs_create_pending_block_groups(trans, root); | 1636 | btrfs_create_pending_block_groups(trans, root); |
| @@ -1596,9 +1641,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1596 | return ret; | 1641 | return ret; |
| 1597 | } | 1642 | } |
| 1598 | 1643 | ||
| 1599 | spin_lock(&cur_trans->commit_lock); | 1644 | spin_lock(&root->fs_info->trans_lock); |
| 1600 | if (cur_trans->in_commit) { | 1645 | if (cur_trans->state >= TRANS_STATE_COMMIT_START) { |
| 1601 | spin_unlock(&cur_trans->commit_lock); | 1646 | spin_unlock(&root->fs_info->trans_lock); |
| 1602 | atomic_inc(&cur_trans->use_count); | 1647 | atomic_inc(&cur_trans->use_count); |
| 1603 | ret = btrfs_end_transaction(trans, root); | 1648 | ret = btrfs_end_transaction(trans, root); |
| 1604 | 1649 | ||
| @@ -1609,16 +1654,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1609 | return ret; | 1654 | return ret; |
| 1610 | } | 1655 | } |
| 1611 | 1656 | ||
| 1612 | trans->transaction->in_commit = 1; | 1657 | cur_trans->state = TRANS_STATE_COMMIT_START; |
| 1613 | trans->transaction->blocked = 1; | ||
| 1614 | spin_unlock(&cur_trans->commit_lock); | ||
| 1615 | wake_up(&root->fs_info->transaction_blocked_wait); | 1658 | wake_up(&root->fs_info->transaction_blocked_wait); |
| 1616 | 1659 | ||
| 1617 | spin_lock(&root->fs_info->trans_lock); | ||
| 1618 | if (cur_trans->list.prev != &root->fs_info->trans_list) { | 1660 | if (cur_trans->list.prev != &root->fs_info->trans_list) { |
| 1619 | prev_trans = list_entry(cur_trans->list.prev, | 1661 | prev_trans = list_entry(cur_trans->list.prev, |
| 1620 | struct btrfs_transaction, list); | 1662 | struct btrfs_transaction, list); |
| 1621 | if (!prev_trans->commit_done) { | 1663 | if (prev_trans->state != TRANS_STATE_COMPLETED) { |
| 1622 | atomic_inc(&prev_trans->use_count); | 1664 | atomic_inc(&prev_trans->use_count); |
| 1623 | spin_unlock(&root->fs_info->trans_lock); | 1665 | spin_unlock(&root->fs_info->trans_lock); |
| 1624 | 1666 | ||
| @@ -1632,42 +1674,32 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1632 | spin_unlock(&root->fs_info->trans_lock); | 1674 | spin_unlock(&root->fs_info->trans_lock); |
| 1633 | } | 1675 | } |
| 1634 | 1676 | ||
| 1635 | if (!btrfs_test_opt(root, SSD) && | 1677 | extwriter_counter_dec(cur_trans, trans->type); |
| 1636 | (now < cur_trans->start_time || now - cur_trans->start_time < 1)) | ||
| 1637 | should_grow = 1; | ||
| 1638 | |||
| 1639 | do { | ||
| 1640 | joined = cur_trans->num_joined; | ||
| 1641 | |||
| 1642 | WARN_ON(cur_trans != trans->transaction); | ||
| 1643 | |||
| 1644 | ret = btrfs_flush_all_pending_stuffs(trans, root); | ||
| 1645 | if (ret) | ||
| 1646 | goto cleanup_transaction; | ||
| 1647 | 1678 | ||
| 1648 | prepare_to_wait(&cur_trans->writer_wait, &wait, | 1679 | ret = btrfs_start_delalloc_flush(root->fs_info); |
| 1649 | TASK_UNINTERRUPTIBLE); | 1680 | if (ret) |
| 1681 | goto cleanup_transaction; | ||
| 1650 | 1682 | ||
| 1651 | if (atomic_read(&cur_trans->num_writers) > 1) | 1683 | ret = btrfs_flush_all_pending_stuffs(trans, root); |
| 1652 | schedule_timeout(MAX_SCHEDULE_TIMEOUT); | 1684 | if (ret) |
| 1653 | else if (should_grow) | 1685 | goto cleanup_transaction; |
| 1654 | schedule_timeout(1); | ||
| 1655 | 1686 | ||
| 1656 | finish_wait(&cur_trans->writer_wait, &wait); | 1687 | wait_event(cur_trans->writer_wait, |
| 1657 | } while (atomic_read(&cur_trans->num_writers) > 1 || | 1688 | extwriter_counter_read(cur_trans) == 0); |
| 1658 | (should_grow && cur_trans->num_joined != joined)); | ||
| 1659 | 1689 | ||
| 1690 | /* some pending stuffs might be added after the previous flush. */ | ||
| 1660 | ret = btrfs_flush_all_pending_stuffs(trans, root); | 1691 | ret = btrfs_flush_all_pending_stuffs(trans, root); |
| 1661 | if (ret) | 1692 | if (ret) |
| 1662 | goto cleanup_transaction; | 1693 | goto cleanup_transaction; |
| 1663 | 1694 | ||
| 1695 | btrfs_wait_delalloc_flush(root->fs_info); | ||
| 1664 | /* | 1696 | /* |
| 1665 | * Ok now we need to make sure to block out any other joins while we | 1697 | * Ok now we need to make sure to block out any other joins while we |
| 1666 | * commit the transaction. We could have started a join before setting | 1698 | * commit the transaction. We could have started a join before setting |
| 1667 | * no_join so make sure to wait for num_writers to == 1 again. | 1699 | * COMMIT_DOING so make sure to wait for num_writers to == 1 again. |
| 1668 | */ | 1700 | */ |
| 1669 | spin_lock(&root->fs_info->trans_lock); | 1701 | spin_lock(&root->fs_info->trans_lock); |
| 1670 | root->fs_info->trans_no_join = 1; | 1702 | cur_trans->state = TRANS_STATE_COMMIT_DOING; |
| 1671 | spin_unlock(&root->fs_info->trans_lock); | 1703 | spin_unlock(&root->fs_info->trans_lock); |
| 1672 | wait_event(cur_trans->writer_wait, | 1704 | wait_event(cur_trans->writer_wait, |
| 1673 | atomic_read(&cur_trans->num_writers) == 1); | 1705 | atomic_read(&cur_trans->num_writers) == 1); |
| @@ -1794,10 +1826,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1794 | memcpy(root->fs_info->super_for_commit, root->fs_info->super_copy, | 1826 | memcpy(root->fs_info->super_for_commit, root->fs_info->super_copy, |
| 1795 | sizeof(*root->fs_info->super_copy)); | 1827 | sizeof(*root->fs_info->super_copy)); |
| 1796 | 1828 | ||
| 1797 | trans->transaction->blocked = 0; | ||
| 1798 | spin_lock(&root->fs_info->trans_lock); | 1829 | spin_lock(&root->fs_info->trans_lock); |
| 1830 | cur_trans->state = TRANS_STATE_UNBLOCKED; | ||
| 1799 | root->fs_info->running_transaction = NULL; | 1831 | root->fs_info->running_transaction = NULL; |
| 1800 | root->fs_info->trans_no_join = 0; | ||
| 1801 | spin_unlock(&root->fs_info->trans_lock); | 1832 | spin_unlock(&root->fs_info->trans_lock); |
| 1802 | mutex_unlock(&root->fs_info->reloc_mutex); | 1833 | mutex_unlock(&root->fs_info->reloc_mutex); |
| 1803 | 1834 | ||
| @@ -1825,10 +1856,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1825 | 1856 | ||
| 1826 | btrfs_finish_extent_commit(trans, root); | 1857 | btrfs_finish_extent_commit(trans, root); |
| 1827 | 1858 | ||
| 1828 | cur_trans->commit_done = 1; | ||
| 1829 | |||
| 1830 | root->fs_info->last_trans_committed = cur_trans->transid; | 1859 | root->fs_info->last_trans_committed = cur_trans->transid; |
| 1831 | 1860 | /* | |
| 1861 | * We needn't acquire the lock here because there is no other task | ||
| 1862 | * which can change it. | ||
| 1863 | */ | ||
| 1864 | cur_trans->state = TRANS_STATE_COMPLETED; | ||
| 1832 | wake_up(&cur_trans->commit_wait); | 1865 | wake_up(&cur_trans->commit_wait); |
| 1833 | 1866 | ||
| 1834 | spin_lock(&root->fs_info->trans_lock); | 1867 | spin_lock(&root->fs_info->trans_lock); |
| @@ -1838,7 +1871,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1838 | put_transaction(cur_trans); | 1871 | put_transaction(cur_trans); |
| 1839 | put_transaction(cur_trans); | 1872 | put_transaction(cur_trans); |
| 1840 | 1873 | ||
| 1841 | if (trans->type < TRANS_JOIN_NOLOCK) | 1874 | if (trans->type & __TRANS_FREEZABLE) |
| 1842 | sb_end_intwrite(root->fs_info->sb); | 1875 | sb_end_intwrite(root->fs_info->sb); |
| 1843 | 1876 | ||
| 1844 | trace_btrfs_transaction_commit(root); | 1877 | trace_btrfs_transaction_commit(root); |
| @@ -1885,11 +1918,6 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root) | |||
| 1885 | int ret; | 1918 | int ret; |
| 1886 | struct btrfs_fs_info *fs_info = root->fs_info; | 1919 | struct btrfs_fs_info *fs_info = root->fs_info; |
| 1887 | 1920 | ||
| 1888 | if (fs_info->sb->s_flags & MS_RDONLY) { | ||
| 1889 | pr_debug("btrfs: cleaner called for RO fs!\n"); | ||
| 1890 | return 0; | ||
| 1891 | } | ||
| 1892 | |||
| 1893 | spin_lock(&fs_info->trans_lock); | 1921 | spin_lock(&fs_info->trans_lock); |
| 1894 | if (list_empty(&fs_info->dead_roots)) { | 1922 | if (list_empty(&fs_info->dead_roots)) { |
| 1895 | spin_unlock(&fs_info->trans_lock); | 1923 | spin_unlock(&fs_info->trans_lock); |
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 24c97335a59f..005b0375d18c 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h | |||
| @@ -22,21 +22,33 @@ | |||
| 22 | #include "delayed-ref.h" | 22 | #include "delayed-ref.h" |
| 23 | #include "ctree.h" | 23 | #include "ctree.h" |
| 24 | 24 | ||
| 25 | enum btrfs_trans_state { | ||
| 26 | TRANS_STATE_RUNNING = 0, | ||
| 27 | TRANS_STATE_BLOCKED = 1, | ||
| 28 | TRANS_STATE_COMMIT_START = 2, | ||
| 29 | TRANS_STATE_COMMIT_DOING = 3, | ||
| 30 | TRANS_STATE_UNBLOCKED = 4, | ||
| 31 | TRANS_STATE_COMPLETED = 5, | ||
| 32 | TRANS_STATE_MAX = 6, | ||
| 33 | }; | ||
| 34 | |||
| 25 | struct btrfs_transaction { | 35 | struct btrfs_transaction { |
| 26 | u64 transid; | 36 | u64 transid; |
| 27 | /* | 37 | /* |
| 38 | * total external writers(USERSPACE/START/ATTACH) in this | ||
| 39 | * transaction, it must be zero before the transaction is | ||
| 40 | * being committed | ||
| 41 | */ | ||
| 42 | atomic_t num_extwriters; | ||
| 43 | /* | ||
| 28 | * total writers in this transaction, it must be zero before the | 44 | * total writers in this transaction, it must be zero before the |
| 29 | * transaction can end | 45 | * transaction can end |
| 30 | */ | 46 | */ |
| 31 | atomic_t num_writers; | 47 | atomic_t num_writers; |
| 32 | atomic_t use_count; | 48 | atomic_t use_count; |
| 33 | 49 | ||
| 34 | unsigned long num_joined; | 50 | /* Be protected by fs_info->trans_lock when we want to change it. */ |
| 35 | 51 | enum btrfs_trans_state state; | |
| 36 | spinlock_t commit_lock; | ||
| 37 | int in_commit; | ||
| 38 | int commit_done; | ||
| 39 | int blocked; | ||
| 40 | struct list_head list; | 52 | struct list_head list; |
| 41 | struct extent_io_tree dirty_pages; | 53 | struct extent_io_tree dirty_pages; |
| 42 | unsigned long start_time; | 54 | unsigned long start_time; |
| @@ -44,17 +56,27 @@ struct btrfs_transaction { | |||
| 44 | wait_queue_head_t commit_wait; | 56 | wait_queue_head_t commit_wait; |
| 45 | struct list_head pending_snapshots; | 57 | struct list_head pending_snapshots; |
| 46 | struct list_head ordered_operations; | 58 | struct list_head ordered_operations; |
| 59 | struct list_head pending_chunks; | ||
| 47 | struct btrfs_delayed_ref_root delayed_refs; | 60 | struct btrfs_delayed_ref_root delayed_refs; |
| 48 | int aborted; | 61 | int aborted; |
| 49 | }; | 62 | }; |
| 50 | 63 | ||
| 51 | enum btrfs_trans_type { | 64 | #define __TRANS_FREEZABLE (1U << 0) |
| 52 | TRANS_START, | 65 | |
| 53 | TRANS_JOIN, | 66 | #define __TRANS_USERSPACE (1U << 8) |
| 54 | TRANS_USERSPACE, | 67 | #define __TRANS_START (1U << 9) |
| 55 | TRANS_JOIN_NOLOCK, | 68 | #define __TRANS_ATTACH (1U << 10) |
| 56 | TRANS_ATTACH, | 69 | #define __TRANS_JOIN (1U << 11) |
| 57 | }; | 70 | #define __TRANS_JOIN_NOLOCK (1U << 12) |
| 71 | |||
| 72 | #define TRANS_USERSPACE (__TRANS_USERSPACE | __TRANS_FREEZABLE) | ||
| 73 | #define TRANS_START (__TRANS_START | __TRANS_FREEZABLE) | ||
| 74 | #define TRANS_ATTACH (__TRANS_ATTACH) | ||
| 75 | #define TRANS_JOIN (__TRANS_JOIN | __TRANS_FREEZABLE) | ||
| 76 | #define TRANS_JOIN_NOLOCK (__TRANS_JOIN_NOLOCK) | ||
| 77 | |||
| 78 | #define TRANS_EXTWRITERS (__TRANS_USERSPACE | __TRANS_START | \ | ||
| 79 | __TRANS_ATTACH) | ||
| 58 | 80 | ||
| 59 | struct btrfs_trans_handle { | 81 | struct btrfs_trans_handle { |
| 60 | u64 transid; | 82 | u64 transid; |
| @@ -70,7 +92,7 @@ struct btrfs_trans_handle { | |||
| 70 | short aborted; | 92 | short aborted; |
| 71 | short adding_csums; | 93 | short adding_csums; |
| 72 | bool allocating_chunk; | 94 | bool allocating_chunk; |
| 73 | enum btrfs_trans_type type; | 95 | unsigned int type; |
| 74 | /* | 96 | /* |
| 75 | * this root is only needed to validate that the root passed to | 97 | * this root is only needed to validate that the root passed to |
| 76 | * start_transaction is the same as the one passed to end_transaction. | 98 | * start_transaction is the same as the one passed to end_transaction. |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index c276ac9a0ec3..2c6791493637 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
| @@ -18,6 +18,7 @@ | |||
| 18 | 18 | ||
| 19 | #include <linux/sched.h> | 19 | #include <linux/sched.h> |
| 20 | #include <linux/slab.h> | 20 | #include <linux/slab.h> |
| 21 | #include <linux/blkdev.h> | ||
| 21 | #include <linux/list_sort.h> | 22 | #include <linux/list_sort.h> |
| 22 | #include "ctree.h" | 23 | #include "ctree.h" |
| 23 | #include "transaction.h" | 24 | #include "transaction.h" |
| @@ -279,11 +280,23 @@ static int process_one_buffer(struct btrfs_root *log, | |||
| 279 | { | 280 | { |
| 280 | int ret = 0; | 281 | int ret = 0; |
| 281 | 282 | ||
| 283 | /* | ||
| 284 | * If this fs is mixed then we need to be able to process the leaves to | ||
| 285 | * pin down any logged extents, so we have to read the block. | ||
| 286 | */ | ||
| 287 | if (btrfs_fs_incompat(log->fs_info, MIXED_GROUPS)) { | ||
| 288 | ret = btrfs_read_buffer(eb, gen); | ||
| 289 | if (ret) | ||
| 290 | return ret; | ||
| 291 | } | ||
| 292 | |||
| 282 | if (wc->pin) | 293 | if (wc->pin) |
| 283 | ret = btrfs_pin_extent_for_log_replay(log->fs_info->extent_root, | 294 | ret = btrfs_pin_extent_for_log_replay(log->fs_info->extent_root, |
| 284 | eb->start, eb->len); | 295 | eb->start, eb->len); |
| 285 | 296 | ||
| 286 | if (!ret && btrfs_buffer_uptodate(eb, gen, 0)) { | 297 | if (!ret && btrfs_buffer_uptodate(eb, gen, 0)) { |
| 298 | if (wc->pin && btrfs_header_level(eb) == 0) | ||
| 299 | ret = btrfs_exclude_logged_extents(log, eb); | ||
| 287 | if (wc->write) | 300 | if (wc->write) |
| 288 | btrfs_write_tree_block(eb); | 301 | btrfs_write_tree_block(eb); |
| 289 | if (wc->wait) | 302 | if (wc->wait) |
| @@ -2016,13 +2029,8 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, | |||
| 2016 | eb, i, &key); | 2029 | eb, i, &key); |
| 2017 | if (ret) | 2030 | if (ret) |
| 2018 | break; | 2031 | break; |
| 2019 | } else if (key.type == BTRFS_INODE_REF_KEY) { | 2032 | } else if (key.type == BTRFS_INODE_REF_KEY || |
| 2020 | ret = add_inode_ref(wc->trans, root, log, path, | 2033 | key.type == BTRFS_INODE_EXTREF_KEY) { |
| 2021 | eb, i, &key); | ||
| 2022 | if (ret && ret != -ENOENT) | ||
| 2023 | break; | ||
| 2024 | ret = 0; | ||
| 2025 | } else if (key.type == BTRFS_INODE_EXTREF_KEY) { | ||
| 2026 | ret = add_inode_ref(wc->trans, root, log, path, | 2034 | ret = add_inode_ref(wc->trans, root, log, path, |
| 2027 | eb, i, &key); | 2035 | eb, i, &key); |
| 2028 | if (ret && ret != -ENOENT) | 2036 | if (ret && ret != -ENOENT) |
| @@ -2358,6 +2366,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2358 | struct btrfs_root *log = root->log_root; | 2366 | struct btrfs_root *log = root->log_root; |
| 2359 | struct btrfs_root *log_root_tree = root->fs_info->log_root_tree; | 2367 | struct btrfs_root *log_root_tree = root->fs_info->log_root_tree; |
| 2360 | unsigned long log_transid = 0; | 2368 | unsigned long log_transid = 0; |
| 2369 | struct blk_plug plug; | ||
| 2361 | 2370 | ||
| 2362 | mutex_lock(&root->log_mutex); | 2371 | mutex_lock(&root->log_mutex); |
| 2363 | log_transid = root->log_transid; | 2372 | log_transid = root->log_transid; |
| @@ -2401,8 +2410,10 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2401 | /* we start IO on all the marked extents here, but we don't actually | 2410 | /* we start IO on all the marked extents here, but we don't actually |
| 2402 | * wait for them until later. | 2411 | * wait for them until later. |
| 2403 | */ | 2412 | */ |
| 2413 | blk_start_plug(&plug); | ||
| 2404 | ret = btrfs_write_marked_extents(log, &log->dirty_log_pages, mark); | 2414 | ret = btrfs_write_marked_extents(log, &log->dirty_log_pages, mark); |
| 2405 | if (ret) { | 2415 | if (ret) { |
| 2416 | blk_finish_plug(&plug); | ||
| 2406 | btrfs_abort_transaction(trans, root, ret); | 2417 | btrfs_abort_transaction(trans, root, ret); |
| 2407 | btrfs_free_logged_extents(log, log_transid); | 2418 | btrfs_free_logged_extents(log, log_transid); |
| 2408 | mutex_unlock(&root->log_mutex); | 2419 | mutex_unlock(&root->log_mutex); |
| @@ -2437,6 +2448,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2437 | } | 2448 | } |
| 2438 | 2449 | ||
| 2439 | if (ret) { | 2450 | if (ret) { |
| 2451 | blk_finish_plug(&plug); | ||
| 2440 | if (ret != -ENOSPC) { | 2452 | if (ret != -ENOSPC) { |
| 2441 | btrfs_abort_transaction(trans, root, ret); | 2453 | btrfs_abort_transaction(trans, root, ret); |
| 2442 | mutex_unlock(&log_root_tree->log_mutex); | 2454 | mutex_unlock(&log_root_tree->log_mutex); |
| @@ -2452,6 +2464,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2452 | 2464 | ||
| 2453 | index2 = log_root_tree->log_transid % 2; | 2465 | index2 = log_root_tree->log_transid % 2; |
| 2454 | if (atomic_read(&log_root_tree->log_commit[index2])) { | 2466 | if (atomic_read(&log_root_tree->log_commit[index2])) { |
| 2467 | blk_finish_plug(&plug); | ||
| 2455 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); | 2468 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); |
| 2456 | wait_log_commit(trans, log_root_tree, | 2469 | wait_log_commit(trans, log_root_tree, |
| 2457 | log_root_tree->log_transid); | 2470 | log_root_tree->log_transid); |
| @@ -2474,6 +2487,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2474 | * check the full commit flag again | 2487 | * check the full commit flag again |
| 2475 | */ | 2488 | */ |
| 2476 | if (root->fs_info->last_trans_log_full_commit == trans->transid) { | 2489 | if (root->fs_info->last_trans_log_full_commit == trans->transid) { |
| 2490 | blk_finish_plug(&plug); | ||
| 2477 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); | 2491 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); |
| 2478 | btrfs_free_logged_extents(log, log_transid); | 2492 | btrfs_free_logged_extents(log, log_transid); |
| 2479 | mutex_unlock(&log_root_tree->log_mutex); | 2493 | mutex_unlock(&log_root_tree->log_mutex); |
| @@ -2481,9 +2495,10 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2481 | goto out_wake_log_root; | 2495 | goto out_wake_log_root; |
| 2482 | } | 2496 | } |
| 2483 | 2497 | ||
| 2484 | ret = btrfs_write_and_wait_marked_extents(log_root_tree, | 2498 | ret = btrfs_write_marked_extents(log_root_tree, |
| 2485 | &log_root_tree->dirty_log_pages, | 2499 | &log_root_tree->dirty_log_pages, |
| 2486 | EXTENT_DIRTY | EXTENT_NEW); | 2500 | EXTENT_DIRTY | EXTENT_NEW); |
| 2501 | blk_finish_plug(&plug); | ||
| 2487 | if (ret) { | 2502 | if (ret) { |
| 2488 | btrfs_abort_transaction(trans, root, ret); | 2503 | btrfs_abort_transaction(trans, root, ret); |
| 2489 | btrfs_free_logged_extents(log, log_transid); | 2504 | btrfs_free_logged_extents(log, log_transid); |
| @@ -2491,6 +2506,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2491 | goto out_wake_log_root; | 2506 | goto out_wake_log_root; |
| 2492 | } | 2507 | } |
| 2493 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); | 2508 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); |
| 2509 | btrfs_wait_marked_extents(log_root_tree, | ||
| 2510 | &log_root_tree->dirty_log_pages, | ||
| 2511 | EXTENT_NEW | EXTENT_DIRTY); | ||
| 2494 | btrfs_wait_logged_extents(log, log_transid); | 2512 | btrfs_wait_logged_extents(log, log_transid); |
| 2495 | 2513 | ||
| 2496 | btrfs_set_super_log_root(root->fs_info->super_for_commit, | 2514 | btrfs_set_super_log_root(root->fs_info->super_for_commit, |
| @@ -4016,8 +4034,7 @@ again: | |||
| 4016 | if (found_key.objectid != BTRFS_TREE_LOG_OBJECTID) | 4034 | if (found_key.objectid != BTRFS_TREE_LOG_OBJECTID) |
| 4017 | break; | 4035 | break; |
| 4018 | 4036 | ||
| 4019 | log = btrfs_read_fs_root_no_radix(log_root_tree, | 4037 | log = btrfs_read_fs_root(log_root_tree, &found_key); |
| 4020 | &found_key); | ||
| 4021 | if (IS_ERR(log)) { | 4038 | if (IS_ERR(log)) { |
| 4022 | ret = PTR_ERR(log); | 4039 | ret = PTR_ERR(log); |
| 4023 | btrfs_error(fs_info, ret, | 4040 | btrfs_error(fs_info, ret, |
diff --git a/fs/btrfs/ulist.c b/fs/btrfs/ulist.c index 7b417e20efe2..b0a523b2c60e 100644 --- a/fs/btrfs/ulist.c +++ b/fs/btrfs/ulist.c | |||
| @@ -205,6 +205,10 @@ int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux, | |||
| 205 | u64 new_alloced = ulist->nodes_alloced + 128; | 205 | u64 new_alloced = ulist->nodes_alloced + 128; |
| 206 | struct ulist_node *new_nodes; | 206 | struct ulist_node *new_nodes; |
| 207 | void *old = NULL; | 207 | void *old = NULL; |
| 208 | int i; | ||
| 209 | |||
| 210 | for (i = 0; i < ulist->nnodes; i++) | ||
| 211 | rb_erase(&ulist->nodes[i].rb_node, &ulist->root); | ||
| 208 | 212 | ||
| 209 | /* | 213 | /* |
| 210 | * if nodes_alloced == ULIST_SIZE no memory has been allocated | 214 | * if nodes_alloced == ULIST_SIZE no memory has been allocated |
| @@ -224,6 +228,17 @@ int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux, | |||
| 224 | 228 | ||
| 225 | ulist->nodes = new_nodes; | 229 | ulist->nodes = new_nodes; |
| 226 | ulist->nodes_alloced = new_alloced; | 230 | ulist->nodes_alloced = new_alloced; |
| 231 | |||
| 232 | /* | ||
| 233 | * krealloc actually uses memcpy, which does not copy rb_node | ||
| 234 | * pointers, so we have to do it ourselves. Otherwise we may | ||
| 235 | * be bitten by crashes. | ||
| 236 | */ | ||
| 237 | for (i = 0; i < ulist->nnodes; i++) { | ||
| 238 | ret = ulist_rbtree_insert(ulist, &ulist->nodes[i]); | ||
| 239 | if (ret < 0) | ||
| 240 | return ret; | ||
| 241 | } | ||
| 227 | } | 242 | } |
| 228 | ulist->nodes[ulist->nnodes].val = val; | 243 | ulist->nodes[ulist->nnodes].val = val; |
| 229 | ulist->nodes[ulist->nnodes].aux = aux; | 244 | ulist->nodes[ulist->nnodes].aux = aux; |
diff --git a/fs/btrfs/version.h b/fs/btrfs/version.h deleted file mode 100644 index 9bf3946d5ef2..000000000000 --- a/fs/btrfs/version.h +++ /dev/null | |||
| @@ -1,4 +0,0 @@ | |||
| 1 | #ifndef __BTRFS_VERSION_H | ||
| 2 | #define __BTRFS_VERSION_H | ||
| 3 | #define BTRFS_BUILD_VERSION "Btrfs" | ||
| 4 | #endif | ||
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 8bffb9174afb..78b871753cb6 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
| @@ -982,6 +982,35 @@ out: | |||
| 982 | return ret; | 982 | return ret; |
| 983 | } | 983 | } |
| 984 | 984 | ||
| 985 | static int contains_pending_extent(struct btrfs_trans_handle *trans, | ||
| 986 | struct btrfs_device *device, | ||
| 987 | u64 *start, u64 len) | ||
| 988 | { | ||
| 989 | struct extent_map *em; | ||
| 990 | int ret = 0; | ||
| 991 | |||
| 992 | list_for_each_entry(em, &trans->transaction->pending_chunks, list) { | ||
| 993 | struct map_lookup *map; | ||
| 994 | int i; | ||
| 995 | |||
| 996 | map = (struct map_lookup *)em->bdev; | ||
| 997 | for (i = 0; i < map->num_stripes; i++) { | ||
| 998 | if (map->stripes[i].dev != device) | ||
| 999 | continue; | ||
| 1000 | if (map->stripes[i].physical >= *start + len || | ||
| 1001 | map->stripes[i].physical + em->orig_block_len <= | ||
| 1002 | *start) | ||
| 1003 | continue; | ||
| 1004 | *start = map->stripes[i].physical + | ||
| 1005 | em->orig_block_len; | ||
| 1006 | ret = 1; | ||
| 1007 | } | ||
| 1008 | } | ||
| 1009 | |||
| 1010 | return ret; | ||
| 1011 | } | ||
| 1012 | |||
| 1013 | |||
| 985 | /* | 1014 | /* |
| 986 | * find_free_dev_extent - find free space in the specified device | 1015 | * find_free_dev_extent - find free space in the specified device |
| 987 | * @device: the device which we search the free space in | 1016 | * @device: the device which we search the free space in |
| @@ -1002,7 +1031,8 @@ out: | |||
| 1002 | * But if we don't find suitable free space, it is used to store the size of | 1031 | * But if we don't find suitable free space, it is used to store the size of |
| 1003 | * the max free space. | 1032 | * the max free space. |
| 1004 | */ | 1033 | */ |
| 1005 | int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes, | 1034 | int find_free_dev_extent(struct btrfs_trans_handle *trans, |
| 1035 | struct btrfs_device *device, u64 num_bytes, | ||
| 1006 | u64 *start, u64 *len) | 1036 | u64 *start, u64 *len) |
| 1007 | { | 1037 | { |
| 1008 | struct btrfs_key key; | 1038 | struct btrfs_key key; |
| @@ -1026,21 +1056,22 @@ int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes, | |||
| 1026 | */ | 1056 | */ |
| 1027 | search_start = max(root->fs_info->alloc_start, 1024ull * 1024); | 1057 | search_start = max(root->fs_info->alloc_start, 1024ull * 1024); |
| 1028 | 1058 | ||
| 1059 | path = btrfs_alloc_path(); | ||
| 1060 | if (!path) | ||
| 1061 | return -ENOMEM; | ||
| 1062 | again: | ||
| 1029 | max_hole_start = search_start; | 1063 | max_hole_start = search_start; |
| 1030 | max_hole_size = 0; | 1064 | max_hole_size = 0; |
| 1031 | hole_size = 0; | 1065 | hole_size = 0; |
| 1032 | 1066 | ||
| 1033 | if (search_start >= search_end || device->is_tgtdev_for_dev_replace) { | 1067 | if (search_start >= search_end || device->is_tgtdev_for_dev_replace) { |
| 1034 | ret = -ENOSPC; | 1068 | ret = -ENOSPC; |
| 1035 | goto error; | 1069 | goto out; |
| 1036 | } | 1070 | } |
| 1037 | 1071 | ||
| 1038 | path = btrfs_alloc_path(); | ||
| 1039 | if (!path) { | ||
| 1040 | ret = -ENOMEM; | ||
| 1041 | goto error; | ||
| 1042 | } | ||
| 1043 | path->reada = 2; | 1072 | path->reada = 2; |
| 1073 | path->search_commit_root = 1; | ||
| 1074 | path->skip_locking = 1; | ||
| 1044 | 1075 | ||
| 1045 | key.objectid = device->devid; | 1076 | key.objectid = device->devid; |
| 1046 | key.offset = search_start; | 1077 | key.offset = search_start; |
| @@ -1081,6 +1112,15 @@ int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes, | |||
| 1081 | if (key.offset > search_start) { | 1112 | if (key.offset > search_start) { |
| 1082 | hole_size = key.offset - search_start; | 1113 | hole_size = key.offset - search_start; |
| 1083 | 1114 | ||
| 1115 | /* | ||
| 1116 | * Have to check before we set max_hole_start, otherwise | ||
| 1117 | * we could end up sending back this offset anyway. | ||
| 1118 | */ | ||
| 1119 | if (contains_pending_extent(trans, device, | ||
| 1120 | &search_start, | ||
| 1121 | hole_size)) | ||
| 1122 | hole_size = 0; | ||
| 1123 | |||
| 1084 | if (hole_size > max_hole_size) { | 1124 | if (hole_size > max_hole_size) { |
| 1085 | max_hole_start = search_start; | 1125 | max_hole_start = search_start; |
| 1086 | max_hole_size = hole_size; | 1126 | max_hole_size = hole_size; |
| @@ -1124,6 +1164,11 @@ next: | |||
| 1124 | max_hole_size = hole_size; | 1164 | max_hole_size = hole_size; |
| 1125 | } | 1165 | } |
| 1126 | 1166 | ||
| 1167 | if (contains_pending_extent(trans, device, &search_start, hole_size)) { | ||
| 1168 | btrfs_release_path(path); | ||
| 1169 | goto again; | ||
| 1170 | } | ||
| 1171 | |||
| 1127 | /* See above. */ | 1172 | /* See above. */ |
| 1128 | if (hole_size < num_bytes) | 1173 | if (hole_size < num_bytes) |
| 1129 | ret = -ENOSPC; | 1174 | ret = -ENOSPC; |
| @@ -1132,7 +1177,6 @@ next: | |||
| 1132 | 1177 | ||
| 1133 | out: | 1178 | out: |
| 1134 | btrfs_free_path(path); | 1179 | btrfs_free_path(path); |
| 1135 | error: | ||
| 1136 | *start = max_hole_start; | 1180 | *start = max_hole_start; |
| 1137 | if (len) | 1181 | if (len) |
| 1138 | *len = max_hole_size; | 1182 | *len = max_hole_size; |
| @@ -1244,47 +1288,22 @@ out: | |||
| 1244 | return ret; | 1288 | return ret; |
| 1245 | } | 1289 | } |
| 1246 | 1290 | ||
| 1247 | static noinline int find_next_chunk(struct btrfs_root *root, | 1291 | static u64 find_next_chunk(struct btrfs_fs_info *fs_info) |
| 1248 | u64 objectid, u64 *offset) | ||
| 1249 | { | 1292 | { |
| 1250 | struct btrfs_path *path; | 1293 | struct extent_map_tree *em_tree; |
| 1251 | int ret; | 1294 | struct extent_map *em; |
| 1252 | struct btrfs_key key; | 1295 | struct rb_node *n; |
| 1253 | struct btrfs_chunk *chunk; | 1296 | u64 ret = 0; |
| 1254 | struct btrfs_key found_key; | ||
| 1255 | |||
| 1256 | path = btrfs_alloc_path(); | ||
| 1257 | if (!path) | ||
| 1258 | return -ENOMEM; | ||
| 1259 | |||
| 1260 | key.objectid = objectid; | ||
| 1261 | key.offset = (u64)-1; | ||
| 1262 | key.type = BTRFS_CHUNK_ITEM_KEY; | ||
| 1263 | |||
| 1264 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
| 1265 | if (ret < 0) | ||
| 1266 | goto error; | ||
| 1267 | |||
| 1268 | BUG_ON(ret == 0); /* Corruption */ | ||
| 1269 | 1297 | ||
| 1270 | ret = btrfs_previous_item(root, path, 0, BTRFS_CHUNK_ITEM_KEY); | 1298 | em_tree = &fs_info->mapping_tree.map_tree; |
| 1271 | if (ret) { | 1299 | read_lock(&em_tree->lock); |
| 1272 | *offset = 0; | 1300 | n = rb_last(&em_tree->map); |
| 1273 | } else { | 1301 | if (n) { |
| 1274 | btrfs_item_key_to_cpu(path->nodes[0], &found_key, | 1302 | em = rb_entry(n, struct extent_map, rb_node); |
| 1275 | path->slots[0]); | 1303 | ret = em->start + em->len; |
| 1276 | if (found_key.objectid != objectid) | ||
| 1277 | *offset = 0; | ||
| 1278 | else { | ||
| 1279 | chunk = btrfs_item_ptr(path->nodes[0], path->slots[0], | ||
| 1280 | struct btrfs_chunk); | ||
| 1281 | *offset = found_key.offset + | ||
| 1282 | btrfs_chunk_length(path->nodes[0], chunk); | ||
| 1283 | } | ||
| 1284 | } | 1304 | } |
| 1285 | ret = 0; | 1305 | read_unlock(&em_tree->lock); |
| 1286 | error: | 1306 | |
| 1287 | btrfs_free_path(path); | ||
| 1288 | return ret; | 1307 | return ret; |
| 1289 | } | 1308 | } |
| 1290 | 1309 | ||
| @@ -1462,31 +1481,23 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
| 1462 | btrfs_dev_replace_unlock(&root->fs_info->dev_replace); | 1481 | btrfs_dev_replace_unlock(&root->fs_info->dev_replace); |
| 1463 | 1482 | ||
| 1464 | if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && num_devices <= 4) { | 1483 | if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && num_devices <= 4) { |
| 1465 | printk(KERN_ERR "btrfs: unable to go below four devices " | 1484 | ret = BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET; |
| 1466 | "on raid10\n"); | ||
| 1467 | ret = -EINVAL; | ||
| 1468 | goto out; | 1485 | goto out; |
| 1469 | } | 1486 | } |
| 1470 | 1487 | ||
| 1471 | if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) && num_devices <= 2) { | 1488 | if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) && num_devices <= 2) { |
| 1472 | printk(KERN_ERR "btrfs: unable to go below two " | 1489 | ret = BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET; |
| 1473 | "devices on raid1\n"); | ||
| 1474 | ret = -EINVAL; | ||
| 1475 | goto out; | 1490 | goto out; |
| 1476 | } | 1491 | } |
| 1477 | 1492 | ||
| 1478 | if ((all_avail & BTRFS_BLOCK_GROUP_RAID5) && | 1493 | if ((all_avail & BTRFS_BLOCK_GROUP_RAID5) && |
| 1479 | root->fs_info->fs_devices->rw_devices <= 2) { | 1494 | root->fs_info->fs_devices->rw_devices <= 2) { |
| 1480 | printk(KERN_ERR "btrfs: unable to go below two " | 1495 | ret = BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET; |
| 1481 | "devices on raid5\n"); | ||
| 1482 | ret = -EINVAL; | ||
| 1483 | goto out; | 1496 | goto out; |
| 1484 | } | 1497 | } |
| 1485 | if ((all_avail & BTRFS_BLOCK_GROUP_RAID6) && | 1498 | if ((all_avail & BTRFS_BLOCK_GROUP_RAID6) && |
| 1486 | root->fs_info->fs_devices->rw_devices <= 3) { | 1499 | root->fs_info->fs_devices->rw_devices <= 3) { |
| 1487 | printk(KERN_ERR "btrfs: unable to go below three " | 1500 | ret = BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET; |
| 1488 | "devices on raid6\n"); | ||
| 1489 | ret = -EINVAL; | ||
| 1490 | goto out; | 1501 | goto out; |
| 1491 | } | 1502 | } |
| 1492 | 1503 | ||
| @@ -1512,8 +1523,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
| 1512 | bh = NULL; | 1523 | bh = NULL; |
| 1513 | disk_super = NULL; | 1524 | disk_super = NULL; |
| 1514 | if (!device) { | 1525 | if (!device) { |
| 1515 | printk(KERN_ERR "btrfs: no missing devices found to " | 1526 | ret = BTRFS_ERROR_DEV_MISSING_NOT_FOUND; |
| 1516 | "remove\n"); | ||
| 1517 | goto out; | 1527 | goto out; |
| 1518 | } | 1528 | } |
| 1519 | } else { | 1529 | } else { |
| @@ -1535,15 +1545,12 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
| 1535 | } | 1545 | } |
| 1536 | 1546 | ||
| 1537 | if (device->is_tgtdev_for_dev_replace) { | 1547 | if (device->is_tgtdev_for_dev_replace) { |
| 1538 | pr_err("btrfs: unable to remove the dev_replace target dev\n"); | 1548 | ret = BTRFS_ERROR_DEV_TGT_REPLACE; |
| 1539 | ret = -EINVAL; | ||
| 1540 | goto error_brelse; | 1549 | goto error_brelse; |
| 1541 | } | 1550 | } |
| 1542 | 1551 | ||
| 1543 | if (device->writeable && root->fs_info->fs_devices->rw_devices == 1) { | 1552 | if (device->writeable && root->fs_info->fs_devices->rw_devices == 1) { |
| 1544 | printk(KERN_ERR "btrfs: unable to remove the only writeable " | 1553 | ret = BTRFS_ERROR_DEV_ONLY_WRITABLE; |
| 1545 | "device\n"); | ||
| 1546 | ret = -EINVAL; | ||
| 1547 | goto error_brelse; | 1554 | goto error_brelse; |
| 1548 | } | 1555 | } |
| 1549 | 1556 | ||
| @@ -3295,10 +3302,7 @@ int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info) | |||
| 3295 | } | 3302 | } |
| 3296 | 3303 | ||
| 3297 | tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance"); | 3304 | tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance"); |
| 3298 | if (IS_ERR(tsk)) | 3305 | return PTR_RET(tsk); |
| 3299 | return PTR_ERR(tsk); | ||
| 3300 | |||
| 3301 | return 0; | ||
| 3302 | } | 3306 | } |
| 3303 | 3307 | ||
| 3304 | int btrfs_recover_balance(struct btrfs_fs_info *fs_info) | 3308 | int btrfs_recover_balance(struct btrfs_fs_info *fs_info) |
| @@ -3681,10 +3685,8 @@ static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type) | |||
| 3681 | } | 3685 | } |
| 3682 | 3686 | ||
| 3683 | static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | 3687 | static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, |
| 3684 | struct btrfs_root *extent_root, | 3688 | struct btrfs_root *extent_root, u64 start, |
| 3685 | struct map_lookup **map_ret, | 3689 | u64 type) |
| 3686 | u64 *num_bytes_out, u64 *stripe_size_out, | ||
| 3687 | u64 start, u64 type) | ||
| 3688 | { | 3690 | { |
| 3689 | struct btrfs_fs_info *info = extent_root->fs_info; | 3691 | struct btrfs_fs_info *info = extent_root->fs_info; |
| 3690 | struct btrfs_fs_devices *fs_devices = info->fs_devices; | 3692 | struct btrfs_fs_devices *fs_devices = info->fs_devices; |
| @@ -3791,7 +3793,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
| 3791 | if (total_avail == 0) | 3793 | if (total_avail == 0) |
| 3792 | continue; | 3794 | continue; |
| 3793 | 3795 | ||
| 3794 | ret = find_free_dev_extent(device, | 3796 | ret = find_free_dev_extent(trans, device, |
| 3795 | max_stripe_size * dev_stripes, | 3797 | max_stripe_size * dev_stripes, |
| 3796 | &dev_offset, &max_avail); | 3798 | &dev_offset, &max_avail); |
| 3797 | if (ret && ret != -ENOSPC) | 3799 | if (ret && ret != -ENOSPC) |
| @@ -3903,12 +3905,8 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
| 3903 | map->type = type; | 3905 | map->type = type; |
| 3904 | map->sub_stripes = sub_stripes; | 3906 | map->sub_stripes = sub_stripes; |
| 3905 | 3907 | ||
| 3906 | *map_ret = map; | ||
| 3907 | num_bytes = stripe_size * data_stripes; | 3908 | num_bytes = stripe_size * data_stripes; |
| 3908 | 3909 | ||
| 3909 | *stripe_size_out = stripe_size; | ||
| 3910 | *num_bytes_out = num_bytes; | ||
| 3911 | |||
| 3912 | trace_btrfs_chunk_alloc(info->chunk_root, map, start, num_bytes); | 3910 | trace_btrfs_chunk_alloc(info->chunk_root, map, start, num_bytes); |
| 3913 | 3911 | ||
| 3914 | em = alloc_extent_map(); | 3912 | em = alloc_extent_map(); |
| @@ -3921,38 +3919,26 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
| 3921 | em->len = num_bytes; | 3919 | em->len = num_bytes; |
| 3922 | em->block_start = 0; | 3920 | em->block_start = 0; |
| 3923 | em->block_len = em->len; | 3921 | em->block_len = em->len; |
| 3922 | em->orig_block_len = stripe_size; | ||
| 3924 | 3923 | ||
| 3925 | em_tree = &extent_root->fs_info->mapping_tree.map_tree; | 3924 | em_tree = &extent_root->fs_info->mapping_tree.map_tree; |
| 3926 | write_lock(&em_tree->lock); | 3925 | write_lock(&em_tree->lock); |
| 3927 | ret = add_extent_mapping(em_tree, em, 0); | 3926 | ret = add_extent_mapping(em_tree, em, 0); |
| 3927 | if (!ret) { | ||
| 3928 | list_add_tail(&em->list, &trans->transaction->pending_chunks); | ||
| 3929 | atomic_inc(&em->refs); | ||
| 3930 | } | ||
| 3928 | write_unlock(&em_tree->lock); | 3931 | write_unlock(&em_tree->lock); |
| 3929 | if (ret) { | 3932 | if (ret) { |
| 3930 | free_extent_map(em); | 3933 | free_extent_map(em); |
| 3931 | goto error; | 3934 | goto error; |
| 3932 | } | 3935 | } |
| 3933 | 3936 | ||
| 3934 | for (i = 0; i < map->num_stripes; ++i) { | ||
| 3935 | struct btrfs_device *device; | ||
| 3936 | u64 dev_offset; | ||
| 3937 | |||
| 3938 | device = map->stripes[i].dev; | ||
| 3939 | dev_offset = map->stripes[i].physical; | ||
| 3940 | |||
| 3941 | ret = btrfs_alloc_dev_extent(trans, device, | ||
| 3942 | info->chunk_root->root_key.objectid, | ||
| 3943 | BTRFS_FIRST_CHUNK_TREE_OBJECTID, | ||
| 3944 | start, dev_offset, stripe_size); | ||
| 3945 | if (ret) | ||
| 3946 | goto error_dev_extent; | ||
| 3947 | } | ||
| 3948 | |||
| 3949 | ret = btrfs_make_block_group(trans, extent_root, 0, type, | 3937 | ret = btrfs_make_block_group(trans, extent_root, 0, type, |
| 3950 | BTRFS_FIRST_CHUNK_TREE_OBJECTID, | 3938 | BTRFS_FIRST_CHUNK_TREE_OBJECTID, |
| 3951 | start, num_bytes); | 3939 | start, num_bytes); |
| 3952 | if (ret) { | 3940 | if (ret) |
| 3953 | i = map->num_stripes - 1; | 3941 | goto error_del_extent; |
| 3954 | goto error_dev_extent; | ||
| 3955 | } | ||
| 3956 | 3942 | ||
| 3957 | free_extent_map(em); | 3943 | free_extent_map(em); |
| 3958 | check_raid56_incompat_flag(extent_root->fs_info, type); | 3944 | check_raid56_incompat_flag(extent_root->fs_info, type); |
| @@ -3960,18 +3946,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
| 3960 | kfree(devices_info); | 3946 | kfree(devices_info); |
| 3961 | return 0; | 3947 | return 0; |
| 3962 | 3948 | ||
| 3963 | error_dev_extent: | 3949 | error_del_extent: |
| 3964 | for (; i >= 0; i--) { | ||
| 3965 | struct btrfs_device *device; | ||
| 3966 | int err; | ||
| 3967 | |||
| 3968 | device = map->stripes[i].dev; | ||
| 3969 | err = btrfs_free_dev_extent(trans, device, start); | ||
| 3970 | if (err) { | ||
| 3971 | btrfs_abort_transaction(trans, extent_root, err); | ||
| 3972 | break; | ||
| 3973 | } | ||
| 3974 | } | ||
| 3975 | write_lock(&em_tree->lock); | 3950 | write_lock(&em_tree->lock); |
| 3976 | remove_extent_mapping(em_tree, em); | 3951 | remove_extent_mapping(em_tree, em); |
| 3977 | write_unlock(&em_tree->lock); | 3952 | write_unlock(&em_tree->lock); |
| @@ -3986,33 +3961,68 @@ error: | |||
| 3986 | return ret; | 3961 | return ret; |
| 3987 | } | 3962 | } |
| 3988 | 3963 | ||
| 3989 | static int __finish_chunk_alloc(struct btrfs_trans_handle *trans, | 3964 | int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans, |
| 3990 | struct btrfs_root *extent_root, | 3965 | struct btrfs_root *extent_root, |
| 3991 | struct map_lookup *map, u64 chunk_offset, | 3966 | u64 chunk_offset, u64 chunk_size) |
| 3992 | u64 chunk_size, u64 stripe_size) | ||
| 3993 | { | 3967 | { |
| 3994 | u64 dev_offset; | ||
| 3995 | struct btrfs_key key; | 3968 | struct btrfs_key key; |
| 3996 | struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root; | 3969 | struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root; |
| 3997 | struct btrfs_device *device; | 3970 | struct btrfs_device *device; |
| 3998 | struct btrfs_chunk *chunk; | 3971 | struct btrfs_chunk *chunk; |
| 3999 | struct btrfs_stripe *stripe; | 3972 | struct btrfs_stripe *stripe; |
| 4000 | size_t item_size = btrfs_chunk_item_size(map->num_stripes); | 3973 | struct extent_map_tree *em_tree; |
| 4001 | int index = 0; | 3974 | struct extent_map *em; |
| 3975 | struct map_lookup *map; | ||
| 3976 | size_t item_size; | ||
| 3977 | u64 dev_offset; | ||
| 3978 | u64 stripe_size; | ||
| 3979 | int i = 0; | ||
| 4002 | int ret; | 3980 | int ret; |
| 4003 | 3981 | ||
| 3982 | em_tree = &extent_root->fs_info->mapping_tree.map_tree; | ||
| 3983 | read_lock(&em_tree->lock); | ||
| 3984 | em = lookup_extent_mapping(em_tree, chunk_offset, chunk_size); | ||
| 3985 | read_unlock(&em_tree->lock); | ||
| 3986 | |||
| 3987 | if (!em) { | ||
| 3988 | btrfs_crit(extent_root->fs_info, "unable to find logical " | ||
| 3989 | "%Lu len %Lu", chunk_offset, chunk_size); | ||
| 3990 | return -EINVAL; | ||
| 3991 | } | ||
| 3992 | |||
| 3993 | if (em->start != chunk_offset || em->len != chunk_size) { | ||
| 3994 | btrfs_crit(extent_root->fs_info, "found a bad mapping, wanted" | ||
| 3995 | " %Lu-%Lu, found %Lu-%Lu\n", chunk_offset, | ||
| 3996 | chunk_size, em->start, em->len); | ||
| 3997 | free_extent_map(em); | ||
| 3998 | return -EINVAL; | ||
| 3999 | } | ||
| 4000 | |||
| 4001 | map = (struct map_lookup *)em->bdev; | ||
| 4002 | item_size = btrfs_chunk_item_size(map->num_stripes); | ||
| 4003 | stripe_size = em->orig_block_len; | ||
| 4004 | |||
| 4004 | chunk = kzalloc(item_size, GFP_NOFS); | 4005 | chunk = kzalloc(item_size, GFP_NOFS); |
| 4005 | if (!chunk) | 4006 | if (!chunk) { |
| 4006 | return -ENOMEM; | 4007 | ret = -ENOMEM; |
| 4008 | goto out; | ||
| 4009 | } | ||
| 4010 | |||
| 4011 | for (i = 0; i < map->num_stripes; i++) { | ||
| 4012 | device = map->stripes[i].dev; | ||
| 4013 | dev_offset = map->stripes[i].physical; | ||
| 4007 | 4014 | ||
| 4008 | index = 0; | ||
| 4009 | while (index < map->num_stripes) { | ||
| 4010 | device = map->stripes[index].dev; | ||
| 4011 | device->bytes_used += stripe_size; | 4015 | device->bytes_used += stripe_size; |
| 4012 | ret = btrfs_update_device(trans, device); | 4016 | ret = btrfs_update_device(trans, device); |
| 4013 | if (ret) | 4017 | if (ret) |
| 4014 | goto out_free; | 4018 | goto out; |
| 4015 | index++; | 4019 | ret = btrfs_alloc_dev_extent(trans, device, |
| 4020 | chunk_root->root_key.objectid, | ||
| 4021 | BTRFS_FIRST_CHUNK_TREE_OBJECTID, | ||
| 4022 | chunk_offset, dev_offset, | ||
| 4023 | stripe_size); | ||
| 4024 | if (ret) | ||
| 4025 | goto out; | ||
| 4016 | } | 4026 | } |
| 4017 | 4027 | ||
| 4018 | spin_lock(&extent_root->fs_info->free_chunk_lock); | 4028 | spin_lock(&extent_root->fs_info->free_chunk_lock); |
| @@ -4020,17 +4030,15 @@ static int __finish_chunk_alloc(struct btrfs_trans_handle *trans, | |||
| 4020 | map->num_stripes); | 4030 | map->num_stripes); |
| 4021 | spin_unlock(&extent_root->fs_info->free_chunk_lock); | 4031 | spin_unlock(&extent_root->fs_info->free_chunk_lock); |
| 4022 | 4032 | ||
| 4023 | index = 0; | ||
| 4024 | stripe = &chunk->stripe; | 4033 | stripe = &chunk->stripe; |
| 4025 | while (index < map->num_stripes) { | 4034 | for (i = 0; i < map->num_stripes; i++) { |
| 4026 | device = map->stripes[index].dev; | 4035 | device = map->stripes[i].dev; |
| 4027 | dev_offset = map->stripes[index].physical; | 4036 | dev_offset = map->stripes[i].physical; |
| 4028 | 4037 | ||
| 4029 | btrfs_set_stack_stripe_devid(stripe, device->devid); | 4038 | btrfs_set_stack_stripe_devid(stripe, device->devid); |
| 4030 | btrfs_set_stack_stripe_offset(stripe, dev_offset); | 4039 | btrfs_set_stack_stripe_offset(stripe, dev_offset); |
| 4031 | memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE); | 4040 | memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE); |
| 4032 | stripe++; | 4041 | stripe++; |
| 4033 | index++; | ||
| 4034 | } | 4042 | } |
| 4035 | 4043 | ||
| 4036 | btrfs_set_stack_chunk_length(chunk, chunk_size); | 4044 | btrfs_set_stack_chunk_length(chunk, chunk_size); |
| @@ -4048,7 +4056,6 @@ static int __finish_chunk_alloc(struct btrfs_trans_handle *trans, | |||
| 4048 | key.offset = chunk_offset; | 4056 | key.offset = chunk_offset; |
| 4049 | 4057 | ||
| 4050 | ret = btrfs_insert_item(trans, chunk_root, &key, chunk, item_size); | 4058 | ret = btrfs_insert_item(trans, chunk_root, &key, chunk, item_size); |
| 4051 | |||
| 4052 | if (ret == 0 && map->type & BTRFS_BLOCK_GROUP_SYSTEM) { | 4059 | if (ret == 0 && map->type & BTRFS_BLOCK_GROUP_SYSTEM) { |
| 4053 | /* | 4060 | /* |
| 4054 | * TODO: Cleanup of inserted chunk root in case of | 4061 | * TODO: Cleanup of inserted chunk root in case of |
| @@ -4058,8 +4065,9 @@ static int __finish_chunk_alloc(struct btrfs_trans_handle *trans, | |||
| 4058 | item_size); | 4065 | item_size); |
| 4059 | } | 4066 | } |
| 4060 | 4067 | ||
| 4061 | out_free: | 4068 | out: |
| 4062 | kfree(chunk); | 4069 | kfree(chunk); |
| 4070 | free_extent_map(em); | ||
| 4063 | return ret; | 4071 | return ret; |
| 4064 | } | 4072 | } |
| 4065 | 4073 | ||
| @@ -4074,27 +4082,9 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
| 4074 | struct btrfs_root *extent_root, u64 type) | 4082 | struct btrfs_root *extent_root, u64 type) |
| 4075 | { | 4083 | { |
| 4076 | u64 chunk_offset; | 4084 | u64 chunk_offset; |
| 4077 | u64 chunk_size; | ||
| 4078 | u64 stripe_size; | ||
| 4079 | struct map_lookup *map; | ||
| 4080 | struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root; | ||
| 4081 | int ret; | ||
| 4082 | |||
| 4083 | ret = find_next_chunk(chunk_root, BTRFS_FIRST_CHUNK_TREE_OBJECTID, | ||
| 4084 | &chunk_offset); | ||
| 4085 | if (ret) | ||
| 4086 | return ret; | ||
| 4087 | 4085 | ||
| 4088 | ret = __btrfs_alloc_chunk(trans, extent_root, &map, &chunk_size, | 4086 | chunk_offset = find_next_chunk(extent_root->fs_info); |
| 4089 | &stripe_size, chunk_offset, type); | 4087 | return __btrfs_alloc_chunk(trans, extent_root, chunk_offset, type); |
| 4090 | if (ret) | ||
| 4091 | return ret; | ||
| 4092 | |||
| 4093 | ret = __finish_chunk_alloc(trans, extent_root, map, chunk_offset, | ||
| 4094 | chunk_size, stripe_size); | ||
| 4095 | if (ret) | ||
| 4096 | return ret; | ||
| 4097 | return 0; | ||
| 4098 | } | 4088 | } |
| 4099 | 4089 | ||
| 4100 | static noinline int init_first_rw_device(struct btrfs_trans_handle *trans, | 4090 | static noinline int init_first_rw_device(struct btrfs_trans_handle *trans, |
| @@ -4103,66 +4093,31 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans, | |||
| 4103 | { | 4093 | { |
| 4104 | u64 chunk_offset; | 4094 | u64 chunk_offset; |
| 4105 | u64 sys_chunk_offset; | 4095 | u64 sys_chunk_offset; |
| 4106 | u64 chunk_size; | ||
| 4107 | u64 sys_chunk_size; | ||
| 4108 | u64 stripe_size; | ||
| 4109 | u64 sys_stripe_size; | ||
| 4110 | u64 alloc_profile; | 4096 | u64 alloc_profile; |
| 4111 | struct map_lookup *map; | ||
| 4112 | struct map_lookup *sys_map; | ||
| 4113 | struct btrfs_fs_info *fs_info = root->fs_info; | 4097 | struct btrfs_fs_info *fs_info = root->fs_info; |
| 4114 | struct btrfs_root *extent_root = fs_info->extent_root; | 4098 | struct btrfs_root *extent_root = fs_info->extent_root; |
| 4115 | int ret; | 4099 | int ret; |
| 4116 | 4100 | ||
| 4117 | ret = find_next_chunk(fs_info->chunk_root, | 4101 | chunk_offset = find_next_chunk(fs_info); |
| 4118 | BTRFS_FIRST_CHUNK_TREE_OBJECTID, &chunk_offset); | ||
| 4119 | if (ret) | ||
| 4120 | return ret; | ||
| 4121 | |||
| 4122 | alloc_profile = btrfs_get_alloc_profile(extent_root, 0); | 4102 | alloc_profile = btrfs_get_alloc_profile(extent_root, 0); |
| 4123 | ret = __btrfs_alloc_chunk(trans, extent_root, &map, &chunk_size, | 4103 | ret = __btrfs_alloc_chunk(trans, extent_root, chunk_offset, |
| 4124 | &stripe_size, chunk_offset, alloc_profile); | 4104 | alloc_profile); |
| 4125 | if (ret) | 4105 | if (ret) |
| 4126 | return ret; | 4106 | return ret; |
| 4127 | 4107 | ||
| 4128 | sys_chunk_offset = chunk_offset + chunk_size; | 4108 | sys_chunk_offset = find_next_chunk(root->fs_info); |
| 4129 | |||
| 4130 | alloc_profile = btrfs_get_alloc_profile(fs_info->chunk_root, 0); | 4109 | alloc_profile = btrfs_get_alloc_profile(fs_info->chunk_root, 0); |
| 4131 | ret = __btrfs_alloc_chunk(trans, extent_root, &sys_map, | 4110 | ret = __btrfs_alloc_chunk(trans, extent_root, sys_chunk_offset, |
| 4132 | &sys_chunk_size, &sys_stripe_size, | 4111 | alloc_profile); |
| 4133 | sys_chunk_offset, alloc_profile); | ||
| 4134 | if (ret) { | 4112 | if (ret) { |
| 4135 | btrfs_abort_transaction(trans, root, ret); | 4113 | btrfs_abort_transaction(trans, root, ret); |
| 4136 | goto out; | 4114 | goto out; |
| 4137 | } | 4115 | } |
| 4138 | 4116 | ||
| 4139 | ret = btrfs_add_device(trans, fs_info->chunk_root, device); | 4117 | ret = btrfs_add_device(trans, fs_info->chunk_root, device); |
| 4140 | if (ret) { | ||
| 4141 | btrfs_abort_transaction(trans, root, ret); | ||
| 4142 | goto out; | ||
| 4143 | } | ||
| 4144 | |||
| 4145 | /* | ||
| 4146 | * Modifying chunk tree needs allocating new blocks from both | ||
| 4147 | * system block group and metadata block group. So we only can | ||
| 4148 | * do operations require modifying the chunk tree after both | ||
| 4149 | * block groups were created. | ||
| 4150 | */ | ||
| 4151 | ret = __finish_chunk_alloc(trans, extent_root, map, chunk_offset, | ||
| 4152 | chunk_size, stripe_size); | ||
| 4153 | if (ret) { | ||
| 4154 | btrfs_abort_transaction(trans, root, ret); | ||
| 4155 | goto out; | ||
| 4156 | } | ||
| 4157 | |||
| 4158 | ret = __finish_chunk_alloc(trans, extent_root, sys_map, | ||
| 4159 | sys_chunk_offset, sys_chunk_size, | ||
| 4160 | sys_stripe_size); | ||
| 4161 | if (ret) | 4118 | if (ret) |
| 4162 | btrfs_abort_transaction(trans, root, ret); | 4119 | btrfs_abort_transaction(trans, root, ret); |
| 4163 | |||
| 4164 | out: | 4120 | out: |
| 4165 | |||
| 4166 | return ret; | 4121 | return ret; |
| 4167 | } | 4122 | } |
| 4168 | 4123 | ||
| @@ -4435,9 +4390,6 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
| 4435 | map = (struct map_lookup *)em->bdev; | 4390 | map = (struct map_lookup *)em->bdev; |
| 4436 | offset = logical - em->start; | 4391 | offset = logical - em->start; |
| 4437 | 4392 | ||
| 4438 | if (mirror_num > map->num_stripes) | ||
| 4439 | mirror_num = 0; | ||
| 4440 | |||
| 4441 | stripe_len = map->stripe_len; | 4393 | stripe_len = map->stripe_len; |
| 4442 | stripe_nr = offset; | 4394 | stripe_nr = offset; |
| 4443 | /* | 4395 | /* |
| @@ -5367,7 +5319,6 @@ static struct btrfs_device *add_missing_dev(struct btrfs_root *root, | |||
| 5367 | return NULL; | 5319 | return NULL; |
| 5368 | list_add(&device->dev_list, | 5320 | list_add(&device->dev_list, |
| 5369 | &fs_devices->devices); | 5321 | &fs_devices->devices); |
| 5370 | device->dev_root = root->fs_info->dev_root; | ||
| 5371 | device->devid = devid; | 5322 | device->devid = devid; |
| 5372 | device->work.func = pending_bios_fn; | 5323 | device->work.func = pending_bios_fn; |
| 5373 | device->fs_devices = fs_devices; | 5324 | device->fs_devices = fs_devices; |
| @@ -5593,7 +5544,6 @@ static int read_one_dev(struct btrfs_root *root, | |||
| 5593 | } | 5544 | } |
| 5594 | 5545 | ||
| 5595 | fill_device_from_item(leaf, dev_item, device); | 5546 | fill_device_from_item(leaf, dev_item, device); |
| 5596 | device->dev_root = root->fs_info->dev_root; | ||
| 5597 | device->in_fs_metadata = 1; | 5547 | device->in_fs_metadata = 1; |
| 5598 | if (device->writeable && !device->is_tgtdev_for_dev_replace) { | 5548 | if (device->writeable && !device->is_tgtdev_for_dev_replace) { |
| 5599 | device->fs_devices->total_rw_bytes += device->total_bytes; | 5549 | device->fs_devices->total_rw_bytes += device->total_bytes; |
| @@ -5751,6 +5701,17 @@ error: | |||
| 5751 | return ret; | 5701 | return ret; |
| 5752 | } | 5702 | } |
| 5753 | 5703 | ||
| 5704 | void btrfs_init_devices_late(struct btrfs_fs_info *fs_info) | ||
| 5705 | { | ||
| 5706 | struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; | ||
| 5707 | struct btrfs_device *device; | ||
| 5708 | |||
| 5709 | mutex_lock(&fs_devices->device_list_mutex); | ||
| 5710 | list_for_each_entry(device, &fs_devices->devices, dev_list) | ||
| 5711 | device->dev_root = fs_info->dev_root; | ||
| 5712 | mutex_unlock(&fs_devices->device_list_mutex); | ||
| 5713 | } | ||
| 5714 | |||
| 5754 | static void __btrfs_reset_dev_stats(struct btrfs_device *dev) | 5715 | static void __btrfs_reset_dev_stats(struct btrfs_device *dev) |
| 5755 | { | 5716 | { |
| 5756 | int i; | 5717 | int i; |
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index f6247e2a47f7..86705583480d 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
| @@ -316,11 +316,13 @@ int btrfs_recover_balance(struct btrfs_fs_info *fs_info); | |||
| 316 | int btrfs_pause_balance(struct btrfs_fs_info *fs_info); | 316 | int btrfs_pause_balance(struct btrfs_fs_info *fs_info); |
| 317 | int btrfs_cancel_balance(struct btrfs_fs_info *fs_info); | 317 | int btrfs_cancel_balance(struct btrfs_fs_info *fs_info); |
| 318 | int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); | 318 | int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); |
| 319 | int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes, | 319 | int find_free_dev_extent(struct btrfs_trans_handle *trans, |
| 320 | struct btrfs_device *device, u64 num_bytes, | ||
| 320 | u64 *start, u64 *max_avail); | 321 | u64 *start, u64 *max_avail); |
| 321 | void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index); | 322 | void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index); |
| 322 | int btrfs_get_dev_stats(struct btrfs_root *root, | 323 | int btrfs_get_dev_stats(struct btrfs_root *root, |
| 323 | struct btrfs_ioctl_get_dev_stats *stats); | 324 | struct btrfs_ioctl_get_dev_stats *stats); |
| 325 | void btrfs_init_devices_late(struct btrfs_fs_info *fs_info); | ||
| 324 | int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info); | 326 | int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info); |
| 325 | int btrfs_run_dev_stats(struct btrfs_trans_handle *trans, | 327 | int btrfs_run_dev_stats(struct btrfs_trans_handle *trans, |
| 326 | struct btrfs_fs_info *fs_info); | 328 | struct btrfs_fs_info *fs_info); |
| @@ -336,6 +338,9 @@ int btrfs_is_parity_mirror(struct btrfs_mapping_tree *map_tree, | |||
| 336 | unsigned long btrfs_full_stripe_len(struct btrfs_root *root, | 338 | unsigned long btrfs_full_stripe_len(struct btrfs_root *root, |
| 337 | struct btrfs_mapping_tree *map_tree, | 339 | struct btrfs_mapping_tree *map_tree, |
| 338 | u64 logical); | 340 | u64 logical); |
| 341 | int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans, | ||
| 342 | struct btrfs_root *extent_root, | ||
| 343 | u64 chunk_offset, u64 chunk_size); | ||
| 339 | static inline void btrfs_dev_stat_inc(struct btrfs_device *dev, | 344 | static inline void btrfs_dev_stat_inc(struct btrfs_device *dev, |
| 340 | int index) | 345 | int index) |
| 341 | { | 346 | { |
diff --git a/fs/buffer.c b/fs/buffer.c index f93392e2df12..4d7433534f5c 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
| @@ -83,6 +83,40 @@ void unlock_buffer(struct buffer_head *bh) | |||
| 83 | EXPORT_SYMBOL(unlock_buffer); | 83 | EXPORT_SYMBOL(unlock_buffer); |
| 84 | 84 | ||
| 85 | /* | 85 | /* |
| 86 | * Returns if the page has dirty or writeback buffers. If all the buffers | ||
| 87 | * are unlocked and clean then the PageDirty information is stale. If | ||
| 88 | * any of the pages are locked, it is assumed they are locked for IO. | ||
| 89 | */ | ||
| 90 | void buffer_check_dirty_writeback(struct page *page, | ||
| 91 | bool *dirty, bool *writeback) | ||
| 92 | { | ||
| 93 | struct buffer_head *head, *bh; | ||
| 94 | *dirty = false; | ||
| 95 | *writeback = false; | ||
| 96 | |||
| 97 | BUG_ON(!PageLocked(page)); | ||
| 98 | |||
| 99 | if (!page_has_buffers(page)) | ||
| 100 | return; | ||
| 101 | |||
| 102 | if (PageWriteback(page)) | ||
| 103 | *writeback = true; | ||
| 104 | |||
| 105 | head = page_buffers(page); | ||
| 106 | bh = head; | ||
| 107 | do { | ||
| 108 | if (buffer_locked(bh)) | ||
| 109 | *writeback = true; | ||
| 110 | |||
| 111 | if (buffer_dirty(bh)) | ||
| 112 | *dirty = true; | ||
| 113 | |||
| 114 | bh = bh->b_this_page; | ||
| 115 | } while (bh != head); | ||
| 116 | } | ||
| 117 | EXPORT_SYMBOL(buffer_check_dirty_writeback); | ||
| 118 | |||
| 119 | /* | ||
| 86 | * Block until a buffer comes unlocked. This doesn't stop it | 120 | * Block until a buffer comes unlocked. This doesn't stop it |
| 87 | * from becoming locked again - you have to lock it yourself | 121 | * from becoming locked again - you have to lock it yourself |
| 88 | * if you want to preserve its state. | 122 | * if you want to preserve its state. |
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 317f9ee9c991..ebaff368120d 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | #include <linux/mount.h> | 12 | #include <linux/mount.h> |
| 13 | #include <linux/slab.h> | 13 | #include <linux/slab.h> |
| 14 | #include <linux/file.h> | 14 | #include <linux/file.h> |
| 15 | #include <linux/swap.h> | ||
| 15 | #include "internal.h" | 16 | #include "internal.h" |
| 16 | 17 | ||
| 17 | /* | 18 | /* |
| @@ -227,8 +228,7 @@ static void cachefiles_read_copier(struct fscache_operation *_op) | |||
| 227 | */ | 228 | */ |
| 228 | static int cachefiles_read_backing_file_one(struct cachefiles_object *object, | 229 | static int cachefiles_read_backing_file_one(struct cachefiles_object *object, |
| 229 | struct fscache_retrieval *op, | 230 | struct fscache_retrieval *op, |
| 230 | struct page *netpage, | 231 | struct page *netpage) |
| 231 | struct pagevec *pagevec) | ||
| 232 | { | 232 | { |
| 233 | struct cachefiles_one_read *monitor; | 233 | struct cachefiles_one_read *monitor; |
| 234 | struct address_space *bmapping; | 234 | struct address_space *bmapping; |
| @@ -237,8 +237,6 @@ static int cachefiles_read_backing_file_one(struct cachefiles_object *object, | |||
| 237 | 237 | ||
| 238 | _enter(""); | 238 | _enter(""); |
| 239 | 239 | ||
| 240 | pagevec_reinit(pagevec); | ||
| 241 | |||
| 242 | _debug("read back %p{%lu,%d}", | 240 | _debug("read back %p{%lu,%d}", |
| 243 | netpage, netpage->index, page_count(netpage)); | 241 | netpage, netpage->index, page_count(netpage)); |
| 244 | 242 | ||
| @@ -283,9 +281,7 @@ installed_new_backing_page: | |||
| 283 | backpage = newpage; | 281 | backpage = newpage; |
| 284 | newpage = NULL; | 282 | newpage = NULL; |
| 285 | 283 | ||
| 286 | page_cache_get(backpage); | 284 | lru_cache_add_file(backpage); |
| 287 | pagevec_add(pagevec, backpage); | ||
| 288 | __pagevec_lru_add_file(pagevec); | ||
| 289 | 285 | ||
| 290 | read_backing_page: | 286 | read_backing_page: |
| 291 | ret = bmapping->a_ops->readpage(NULL, backpage); | 287 | ret = bmapping->a_ops->readpage(NULL, backpage); |
| @@ -452,8 +448,7 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op, | |||
| 452 | if (block) { | 448 | if (block) { |
| 453 | /* submit the apparently valid page to the backing fs to be | 449 | /* submit the apparently valid page to the backing fs to be |
| 454 | * read from disk */ | 450 | * read from disk */ |
| 455 | ret = cachefiles_read_backing_file_one(object, op, page, | 451 | ret = cachefiles_read_backing_file_one(object, op, page); |
| 456 | &pagevec); | ||
| 457 | } else if (cachefiles_has_space(cache, 0, 1) == 0) { | 452 | } else if (cachefiles_has_space(cache, 0, 1) == 0) { |
| 458 | /* there's space in the cache we can use */ | 453 | /* there's space in the cache we can use */ |
| 459 | fscache_mark_page_cached(op, page); | 454 | fscache_mark_page_cached(op, page); |
| @@ -482,14 +477,11 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, | |||
| 482 | { | 477 | { |
| 483 | struct cachefiles_one_read *monitor = NULL; | 478 | struct cachefiles_one_read *monitor = NULL; |
| 484 | struct address_space *bmapping = object->backer->d_inode->i_mapping; | 479 | struct address_space *bmapping = object->backer->d_inode->i_mapping; |
| 485 | struct pagevec lru_pvec; | ||
| 486 | struct page *newpage = NULL, *netpage, *_n, *backpage = NULL; | 480 | struct page *newpage = NULL, *netpage, *_n, *backpage = NULL; |
| 487 | int ret = 0; | 481 | int ret = 0; |
| 488 | 482 | ||
| 489 | _enter(""); | 483 | _enter(""); |
| 490 | 484 | ||
| 491 | pagevec_init(&lru_pvec, 0); | ||
| 492 | |||
| 493 | list_for_each_entry_safe(netpage, _n, list, lru) { | 485 | list_for_each_entry_safe(netpage, _n, list, lru) { |
| 494 | list_del(&netpage->lru); | 486 | list_del(&netpage->lru); |
| 495 | 487 | ||
| @@ -534,9 +526,7 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, | |||
| 534 | backpage = newpage; | 526 | backpage = newpage; |
| 535 | newpage = NULL; | 527 | newpage = NULL; |
| 536 | 528 | ||
| 537 | page_cache_get(backpage); | 529 | lru_cache_add_file(backpage); |
| 538 | if (!pagevec_add(&lru_pvec, backpage)) | ||
| 539 | __pagevec_lru_add_file(&lru_pvec); | ||
| 540 | 530 | ||
| 541 | reread_backing_page: | 531 | reread_backing_page: |
| 542 | ret = bmapping->a_ops->readpage(NULL, backpage); | 532 | ret = bmapping->a_ops->readpage(NULL, backpage); |
| @@ -559,9 +549,7 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, | |||
| 559 | goto nomem; | 549 | goto nomem; |
| 560 | } | 550 | } |
| 561 | 551 | ||
| 562 | page_cache_get(netpage); | 552 | lru_cache_add_file(netpage); |
| 563 | if (!pagevec_add(&lru_pvec, netpage)) | ||
| 564 | __pagevec_lru_add_file(&lru_pvec); | ||
| 565 | 553 | ||
| 566 | /* install a monitor */ | 554 | /* install a monitor */ |
| 567 | page_cache_get(netpage); | 555 | page_cache_get(netpage); |
| @@ -643,9 +631,7 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, | |||
| 643 | 631 | ||
| 644 | fscache_mark_page_cached(op, netpage); | 632 | fscache_mark_page_cached(op, netpage); |
| 645 | 633 | ||
| 646 | page_cache_get(netpage); | 634 | lru_cache_add_file(netpage); |
| 647 | if (!pagevec_add(&lru_pvec, netpage)) | ||
| 648 | __pagevec_lru_add_file(&lru_pvec); | ||
| 649 | 635 | ||
| 650 | /* the netpage is unlocked and marked up to date here */ | 636 | /* the netpage is unlocked and marked up to date here */ |
| 651 | fscache_end_io(op, netpage, 0); | 637 | fscache_end_io(op, netpage, 0); |
| @@ -661,8 +647,6 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, | |||
| 661 | 647 | ||
| 662 | out: | 648 | out: |
| 663 | /* tidy up */ | 649 | /* tidy up */ |
| 664 | pagevec_lru_add_file(&lru_pvec); | ||
| 665 | |||
| 666 | if (newpage) | 650 | if (newpage) |
| 667 | page_cache_release(newpage); | 651 | page_cache_release(newpage); |
| 668 | if (netpage) | 652 | if (netpage) |
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 38b5c1bc6776..5318a3b704f6 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
| @@ -439,13 +439,12 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) | |||
| 439 | struct ceph_inode_info *ci; | 439 | struct ceph_inode_info *ci; |
| 440 | struct ceph_fs_client *fsc; | 440 | struct ceph_fs_client *fsc; |
| 441 | struct ceph_osd_client *osdc; | 441 | struct ceph_osd_client *osdc; |
| 442 | loff_t page_off = page_offset(page); | ||
| 443 | int len = PAGE_CACHE_SIZE; | ||
| 444 | loff_t i_size; | ||
| 445 | int err = 0; | ||
| 446 | struct ceph_snap_context *snapc, *oldest; | 442 | struct ceph_snap_context *snapc, *oldest; |
| 447 | u64 snap_size = 0; | 443 | loff_t page_off = page_offset(page); |
| 448 | long writeback_stat; | 444 | long writeback_stat; |
| 445 | u64 truncate_size, snap_size = 0; | ||
| 446 | u32 truncate_seq; | ||
| 447 | int err = 0, len = PAGE_CACHE_SIZE; | ||
| 449 | 448 | ||
| 450 | dout("writepage %p idx %lu\n", page, page->index); | 449 | dout("writepage %p idx %lu\n", page, page->index); |
| 451 | 450 | ||
| @@ -475,13 +474,20 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) | |||
| 475 | } | 474 | } |
| 476 | ceph_put_snap_context(oldest); | 475 | ceph_put_snap_context(oldest); |
| 477 | 476 | ||
| 477 | spin_lock(&ci->i_ceph_lock); | ||
| 478 | truncate_seq = ci->i_truncate_seq; | ||
| 479 | truncate_size = ci->i_truncate_size; | ||
| 480 | if (!snap_size) | ||
| 481 | snap_size = i_size_read(inode); | ||
| 482 | spin_unlock(&ci->i_ceph_lock); | ||
| 483 | |||
| 478 | /* is this a partial page at end of file? */ | 484 | /* is this a partial page at end of file? */ |
| 479 | if (snap_size) | 485 | if (page_off >= snap_size) { |
| 480 | i_size = snap_size; | 486 | dout("%p page eof %llu\n", page, snap_size); |
| 481 | else | 487 | goto out; |
| 482 | i_size = i_size_read(inode); | 488 | } |
| 483 | if (i_size < page_off + len) | 489 | if (snap_size < page_off + len) |
| 484 | len = i_size - page_off; | 490 | len = snap_size - page_off; |
| 485 | 491 | ||
| 486 | dout("writepage %p page %p index %lu on %llu~%u snapc %p\n", | 492 | dout("writepage %p page %p index %lu on %llu~%u snapc %p\n", |
| 487 | inode, page, page->index, page_off, len, snapc); | 493 | inode, page, page->index, page_off, len, snapc); |
| @@ -495,7 +501,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) | |||
| 495 | err = ceph_osdc_writepages(osdc, ceph_vino(inode), | 501 | err = ceph_osdc_writepages(osdc, ceph_vino(inode), |
| 496 | &ci->i_layout, snapc, | 502 | &ci->i_layout, snapc, |
| 497 | page_off, len, | 503 | page_off, len, |
| 498 | ci->i_truncate_seq, ci->i_truncate_size, | 504 | truncate_seq, truncate_size, |
| 499 | &inode->i_mtime, &page, 1); | 505 | &inode->i_mtime, &page, 1); |
| 500 | if (err < 0) { | 506 | if (err < 0) { |
| 501 | dout("writepage setting page/mapping error %d %p\n", err, page); | 507 | dout("writepage setting page/mapping error %d %p\n", err, page); |
| @@ -632,25 +638,6 @@ static void writepages_finish(struct ceph_osd_request *req, | |||
| 632 | ceph_osdc_put_request(req); | 638 | ceph_osdc_put_request(req); |
| 633 | } | 639 | } |
| 634 | 640 | ||
| 635 | static struct ceph_osd_request * | ||
| 636 | ceph_writepages_osd_request(struct inode *inode, u64 offset, u64 *len, | ||
| 637 | struct ceph_snap_context *snapc, int num_ops) | ||
| 638 | { | ||
| 639 | struct ceph_fs_client *fsc; | ||
| 640 | struct ceph_inode_info *ci; | ||
| 641 | struct ceph_vino vino; | ||
| 642 | |||
| 643 | fsc = ceph_inode_to_client(inode); | ||
| 644 | ci = ceph_inode(inode); | ||
| 645 | vino = ceph_vino(inode); | ||
| 646 | /* BUG_ON(vino.snap != CEPH_NOSNAP); */ | ||
| 647 | |||
| 648 | return ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, | ||
| 649 | vino, offset, len, num_ops, CEPH_OSD_OP_WRITE, | ||
| 650 | CEPH_OSD_FLAG_WRITE|CEPH_OSD_FLAG_ONDISK, | ||
| 651 | snapc, ci->i_truncate_seq, ci->i_truncate_size, true); | ||
| 652 | } | ||
| 653 | |||
| 654 | /* | 641 | /* |
| 655 | * initiate async writeback | 642 | * initiate async writeback |
| 656 | */ | 643 | */ |
| @@ -659,7 +646,8 @@ static int ceph_writepages_start(struct address_space *mapping, | |||
| 659 | { | 646 | { |
| 660 | struct inode *inode = mapping->host; | 647 | struct inode *inode = mapping->host; |
| 661 | struct ceph_inode_info *ci = ceph_inode(inode); | 648 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 662 | struct ceph_fs_client *fsc; | 649 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); |
| 650 | struct ceph_vino vino = ceph_vino(inode); | ||
| 663 | pgoff_t index, start, end; | 651 | pgoff_t index, start, end; |
| 664 | int range_whole = 0; | 652 | int range_whole = 0; |
| 665 | int should_loop = 1; | 653 | int should_loop = 1; |
| @@ -671,22 +659,22 @@ static int ceph_writepages_start(struct address_space *mapping, | |||
| 671 | unsigned wsize = 1 << inode->i_blkbits; | 659 | unsigned wsize = 1 << inode->i_blkbits; |
| 672 | struct ceph_osd_request *req = NULL; | 660 | struct ceph_osd_request *req = NULL; |
| 673 | int do_sync; | 661 | int do_sync; |
| 674 | u64 snap_size; | 662 | u64 truncate_size, snap_size; |
| 663 | u32 truncate_seq; | ||
| 675 | 664 | ||
| 676 | /* | 665 | /* |
| 677 | * Include a 'sync' in the OSD request if this is a data | 666 | * Include a 'sync' in the OSD request if this is a data |
| 678 | * integrity write (e.g., O_SYNC write or fsync()), or if our | 667 | * integrity write (e.g., O_SYNC write or fsync()), or if our |
| 679 | * cap is being revoked. | 668 | * cap is being revoked. |
| 680 | */ | 669 | */ |
| 681 | do_sync = wbc->sync_mode == WB_SYNC_ALL; | 670 | if ((wbc->sync_mode == WB_SYNC_ALL) || |
| 682 | if (ceph_caps_revoking(ci, CEPH_CAP_FILE_BUFFER)) | 671 | ceph_caps_revoking(ci, CEPH_CAP_FILE_BUFFER)) |
| 683 | do_sync = 1; | 672 | do_sync = 1; |
| 684 | dout("writepages_start %p dosync=%d (mode=%s)\n", | 673 | dout("writepages_start %p dosync=%d (mode=%s)\n", |
| 685 | inode, do_sync, | 674 | inode, do_sync, |
| 686 | wbc->sync_mode == WB_SYNC_NONE ? "NONE" : | 675 | wbc->sync_mode == WB_SYNC_NONE ? "NONE" : |
| 687 | (wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD")); | 676 | (wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD")); |
| 688 | 677 | ||
| 689 | fsc = ceph_inode_to_client(inode); | ||
| 690 | if (fsc->mount_state == CEPH_MOUNT_SHUTDOWN) { | 678 | if (fsc->mount_state == CEPH_MOUNT_SHUTDOWN) { |
| 691 | pr_warning("writepage_start %p on forced umount\n", inode); | 679 | pr_warning("writepage_start %p on forced umount\n", inode); |
| 692 | return -EIO; /* we're in a forced umount, don't write! */ | 680 | return -EIO; /* we're in a forced umount, don't write! */ |
| @@ -729,6 +717,14 @@ retry: | |||
| 729 | snap_size = i_size_read(inode); | 717 | snap_size = i_size_read(inode); |
| 730 | dout(" oldest snapc is %p seq %lld (%d snaps)\n", | 718 | dout(" oldest snapc is %p seq %lld (%d snaps)\n", |
| 731 | snapc, snapc->seq, snapc->num_snaps); | 719 | snapc, snapc->seq, snapc->num_snaps); |
| 720 | |||
| 721 | spin_lock(&ci->i_ceph_lock); | ||
| 722 | truncate_seq = ci->i_truncate_seq; | ||
| 723 | truncate_size = ci->i_truncate_size; | ||
| 724 | if (!snap_size) | ||
| 725 | snap_size = i_size_read(inode); | ||
| 726 | spin_unlock(&ci->i_ceph_lock); | ||
| 727 | |||
| 732 | if (last_snapc && snapc != last_snapc) { | 728 | if (last_snapc && snapc != last_snapc) { |
| 733 | /* if we switched to a newer snapc, restart our scan at the | 729 | /* if we switched to a newer snapc, restart our scan at the |
| 734 | * start of the original file range. */ | 730 | * start of the original file range. */ |
| @@ -740,7 +736,6 @@ retry: | |||
| 740 | 736 | ||
| 741 | while (!done && index <= end) { | 737 | while (!done && index <= end) { |
| 742 | int num_ops = do_sync ? 2 : 1; | 738 | int num_ops = do_sync ? 2 : 1; |
| 743 | struct ceph_vino vino; | ||
| 744 | unsigned i; | 739 | unsigned i; |
| 745 | int first; | 740 | int first; |
| 746 | pgoff_t next; | 741 | pgoff_t next; |
| @@ -834,17 +829,18 @@ get_more_pages: | |||
| 834 | * that it will use. | 829 | * that it will use. |
| 835 | */ | 830 | */ |
| 836 | if (locked_pages == 0) { | 831 | if (locked_pages == 0) { |
| 837 | size_t size; | ||
| 838 | |||
| 839 | BUG_ON(pages); | 832 | BUG_ON(pages); |
| 840 | |||
| 841 | /* prepare async write request */ | 833 | /* prepare async write request */ |
| 842 | offset = (u64)page_offset(page); | 834 | offset = (u64)page_offset(page); |
| 843 | len = wsize; | 835 | len = wsize; |
| 844 | req = ceph_writepages_osd_request(inode, | 836 | req = ceph_osdc_new_request(&fsc->client->osdc, |
| 845 | offset, &len, snapc, | 837 | &ci->i_layout, vino, |
| 846 | num_ops); | 838 | offset, &len, num_ops, |
| 847 | 839 | CEPH_OSD_OP_WRITE, | |
| 840 | CEPH_OSD_FLAG_WRITE | | ||
| 841 | CEPH_OSD_FLAG_ONDISK, | ||
| 842 | snapc, truncate_seq, | ||
| 843 | truncate_size, true); | ||
| 848 | if (IS_ERR(req)) { | 844 | if (IS_ERR(req)) { |
| 849 | rc = PTR_ERR(req); | 845 | rc = PTR_ERR(req); |
| 850 | unlock_page(page); | 846 | unlock_page(page); |
| @@ -855,8 +851,8 @@ get_more_pages: | |||
| 855 | req->r_inode = inode; | 851 | req->r_inode = inode; |
| 856 | 852 | ||
| 857 | max_pages = calc_pages_for(0, (u64)len); | 853 | max_pages = calc_pages_for(0, (u64)len); |
| 858 | size = max_pages * sizeof (*pages); | 854 | pages = kmalloc(max_pages * sizeof (*pages), |
| 859 | pages = kmalloc(size, GFP_NOFS); | 855 | GFP_NOFS); |
| 860 | if (!pages) { | 856 | if (!pages) { |
| 861 | pool = fsc->wb_pagevec_pool; | 857 | pool = fsc->wb_pagevec_pool; |
| 862 | pages = mempool_alloc(pool, GFP_NOFS); | 858 | pages = mempool_alloc(pool, GFP_NOFS); |
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index da0f9b8a3bcb..25442b40c25a 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
| @@ -147,7 +147,7 @@ void ceph_adjust_min_caps(struct ceph_mds_client *mdsc, int delta) | |||
| 147 | spin_unlock(&mdsc->caps_list_lock); | 147 | spin_unlock(&mdsc->caps_list_lock); |
| 148 | } | 148 | } |
| 149 | 149 | ||
| 150 | int ceph_reserve_caps(struct ceph_mds_client *mdsc, | 150 | void ceph_reserve_caps(struct ceph_mds_client *mdsc, |
| 151 | struct ceph_cap_reservation *ctx, int need) | 151 | struct ceph_cap_reservation *ctx, int need) |
| 152 | { | 152 | { |
| 153 | int i; | 153 | int i; |
| @@ -155,7 +155,6 @@ int ceph_reserve_caps(struct ceph_mds_client *mdsc, | |||
| 155 | int have; | 155 | int have; |
| 156 | int alloc = 0; | 156 | int alloc = 0; |
| 157 | LIST_HEAD(newcaps); | 157 | LIST_HEAD(newcaps); |
| 158 | int ret = 0; | ||
| 159 | 158 | ||
| 160 | dout("reserve caps ctx=%p need=%d\n", ctx, need); | 159 | dout("reserve caps ctx=%p need=%d\n", ctx, need); |
| 161 | 160 | ||
| @@ -174,14 +173,15 @@ int ceph_reserve_caps(struct ceph_mds_client *mdsc, | |||
| 174 | 173 | ||
| 175 | for (i = have; i < need; i++) { | 174 | for (i = have; i < need; i++) { |
| 176 | cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS); | 175 | cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS); |
| 177 | if (!cap) { | 176 | if (!cap) |
| 178 | ret = -ENOMEM; | 177 | break; |
| 179 | goto out_alloc_count; | ||
| 180 | } | ||
| 181 | list_add(&cap->caps_item, &newcaps); | 178 | list_add(&cap->caps_item, &newcaps); |
| 182 | alloc++; | 179 | alloc++; |
| 183 | } | 180 | } |
| 184 | BUG_ON(have + alloc != need); | 181 | /* we didn't manage to reserve as much as we needed */ |
| 182 | if (have + alloc != need) | ||
| 183 | pr_warn("reserve caps ctx=%p ENOMEM need=%d got=%d\n", | ||
| 184 | ctx, need, have + alloc); | ||
| 185 | 185 | ||
| 186 | spin_lock(&mdsc->caps_list_lock); | 186 | spin_lock(&mdsc->caps_list_lock); |
| 187 | mdsc->caps_total_count += alloc; | 187 | mdsc->caps_total_count += alloc; |
| @@ -197,13 +197,6 @@ int ceph_reserve_caps(struct ceph_mds_client *mdsc, | |||
| 197 | dout("reserve caps ctx=%p %d = %d used + %d resv + %d avail\n", | 197 | dout("reserve caps ctx=%p %d = %d used + %d resv + %d avail\n", |
| 198 | ctx, mdsc->caps_total_count, mdsc->caps_use_count, | 198 | ctx, mdsc->caps_total_count, mdsc->caps_use_count, |
| 199 | mdsc->caps_reserve_count, mdsc->caps_avail_count); | 199 | mdsc->caps_reserve_count, mdsc->caps_avail_count); |
| 200 | return 0; | ||
| 201 | |||
| 202 | out_alloc_count: | ||
| 203 | /* we didn't manage to reserve as much as we needed */ | ||
| 204 | pr_warning("reserve caps ctx=%p ENOMEM need=%d got=%d\n", | ||
| 205 | ctx, need, have); | ||
| 206 | return ret; | ||
| 207 | } | 200 | } |
| 208 | 201 | ||
| 209 | int ceph_unreserve_caps(struct ceph_mds_client *mdsc, | 202 | int ceph_unreserve_caps(struct ceph_mds_client *mdsc, |
| @@ -612,9 +605,11 @@ retry: | |||
| 612 | __cap_delay_requeue(mdsc, ci); | 605 | __cap_delay_requeue(mdsc, ci); |
| 613 | } | 606 | } |
| 614 | 607 | ||
| 615 | if (flags & CEPH_CAP_FLAG_AUTH) | 608 | if (flags & CEPH_CAP_FLAG_AUTH) { |
| 616 | ci->i_auth_cap = cap; | 609 | if (ci->i_auth_cap == NULL || |
| 617 | else if (ci->i_auth_cap == cap) { | 610 | ceph_seq_cmp(ci->i_auth_cap->mseq, mseq) < 0) |
| 611 | ci->i_auth_cap = cap; | ||
| 612 | } else if (ci->i_auth_cap == cap) { | ||
| 618 | ci->i_auth_cap = NULL; | 613 | ci->i_auth_cap = NULL; |
| 619 | spin_lock(&mdsc->cap_dirty_lock); | 614 | spin_lock(&mdsc->cap_dirty_lock); |
| 620 | if (!list_empty(&ci->i_dirty_item)) { | 615 | if (!list_empty(&ci->i_dirty_item)) { |
| @@ -695,6 +690,15 @@ int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented) | |||
| 695 | if (implemented) | 690 | if (implemented) |
| 696 | *implemented |= cap->implemented; | 691 | *implemented |= cap->implemented; |
| 697 | } | 692 | } |
| 693 | /* | ||
| 694 | * exclude caps issued by non-auth MDS, but are been revoking | ||
| 695 | * by the auth MDS. The non-auth MDS should be revoking/exporting | ||
| 696 | * these caps, but the message is delayed. | ||
| 697 | */ | ||
| 698 | if (ci->i_auth_cap) { | ||
| 699 | cap = ci->i_auth_cap; | ||
| 700 | have &= ~cap->implemented | cap->issued; | ||
| 701 | } | ||
| 698 | return have; | 702 | return have; |
| 699 | } | 703 | } |
| 700 | 704 | ||
| @@ -802,22 +806,28 @@ int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch) | |||
| 802 | /* | 806 | /* |
| 803 | * Return true if mask caps are currently being revoked by an MDS. | 807 | * Return true if mask caps are currently being revoked by an MDS. |
| 804 | */ | 808 | */ |
| 805 | int ceph_caps_revoking(struct ceph_inode_info *ci, int mask) | 809 | int __ceph_caps_revoking_other(struct ceph_inode_info *ci, |
| 810 | struct ceph_cap *ocap, int mask) | ||
| 806 | { | 811 | { |
| 807 | struct inode *inode = &ci->vfs_inode; | ||
| 808 | struct ceph_cap *cap; | 812 | struct ceph_cap *cap; |
| 809 | struct rb_node *p; | 813 | struct rb_node *p; |
| 810 | int ret = 0; | ||
| 811 | 814 | ||
| 812 | spin_lock(&ci->i_ceph_lock); | ||
| 813 | for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) { | 815 | for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) { |
| 814 | cap = rb_entry(p, struct ceph_cap, ci_node); | 816 | cap = rb_entry(p, struct ceph_cap, ci_node); |
| 815 | if (__cap_is_valid(cap) && | 817 | if (cap != ocap && __cap_is_valid(cap) && |
| 816 | (cap->implemented & ~cap->issued & mask)) { | 818 | (cap->implemented & ~cap->issued & mask)) |
| 817 | ret = 1; | 819 | return 1; |
| 818 | break; | ||
| 819 | } | ||
| 820 | } | 820 | } |
| 821 | return 0; | ||
| 822 | } | ||
| 823 | |||
| 824 | int ceph_caps_revoking(struct ceph_inode_info *ci, int mask) | ||
| 825 | { | ||
| 826 | struct inode *inode = &ci->vfs_inode; | ||
| 827 | int ret; | ||
| 828 | |||
| 829 | spin_lock(&ci->i_ceph_lock); | ||
| 830 | ret = __ceph_caps_revoking_other(ci, NULL, mask); | ||
| 821 | spin_unlock(&ci->i_ceph_lock); | 831 | spin_unlock(&ci->i_ceph_lock); |
| 822 | dout("ceph_caps_revoking %p %s = %d\n", inode, | 832 | dout("ceph_caps_revoking %p %s = %d\n", inode, |
| 823 | ceph_cap_string(mask), ret); | 833 | ceph_cap_string(mask), ret); |
| @@ -1980,8 +1990,15 @@ static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc, | |||
| 1980 | cap = ci->i_auth_cap; | 1990 | cap = ci->i_auth_cap; |
| 1981 | dout("kick_flushing_inode_caps %p flushing %s flush_seq %lld\n", inode, | 1991 | dout("kick_flushing_inode_caps %p flushing %s flush_seq %lld\n", inode, |
| 1982 | ceph_cap_string(ci->i_flushing_caps), ci->i_cap_flush_seq); | 1992 | ceph_cap_string(ci->i_flushing_caps), ci->i_cap_flush_seq); |
| 1993 | |||
| 1983 | __ceph_flush_snaps(ci, &session, 1); | 1994 | __ceph_flush_snaps(ci, &session, 1); |
| 1995 | |||
| 1984 | if (ci->i_flushing_caps) { | 1996 | if (ci->i_flushing_caps) { |
| 1997 | spin_lock(&mdsc->cap_dirty_lock); | ||
| 1998 | list_move_tail(&ci->i_flushing_item, | ||
| 1999 | &cap->session->s_cap_flushing); | ||
| 2000 | spin_unlock(&mdsc->cap_dirty_lock); | ||
| 2001 | |||
| 1985 | delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, | 2002 | delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, |
| 1986 | __ceph_caps_used(ci), | 2003 | __ceph_caps_used(ci), |
| 1987 | __ceph_caps_wanted(ci), | 2004 | __ceph_caps_wanted(ci), |
| @@ -2055,7 +2072,11 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, | |||
| 2055 | /* finish pending truncate */ | 2072 | /* finish pending truncate */ |
| 2056 | while (ci->i_truncate_pending) { | 2073 | while (ci->i_truncate_pending) { |
| 2057 | spin_unlock(&ci->i_ceph_lock); | 2074 | spin_unlock(&ci->i_ceph_lock); |
| 2058 | __ceph_do_pending_vmtruncate(inode, !(need & CEPH_CAP_FILE_WR)); | 2075 | if (!(need & CEPH_CAP_FILE_WR)) |
| 2076 | mutex_lock(&inode->i_mutex); | ||
| 2077 | __ceph_do_pending_vmtruncate(inode); | ||
| 2078 | if (!(need & CEPH_CAP_FILE_WR)) | ||
| 2079 | mutex_unlock(&inode->i_mutex); | ||
| 2059 | spin_lock(&ci->i_ceph_lock); | 2080 | spin_lock(&ci->i_ceph_lock); |
| 2060 | } | 2081 | } |
| 2061 | 2082 | ||
| @@ -2473,6 +2494,11 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
| 2473 | } else { | 2494 | } else { |
| 2474 | dout("grant: %s -> %s\n", ceph_cap_string(cap->issued), | 2495 | dout("grant: %s -> %s\n", ceph_cap_string(cap->issued), |
| 2475 | ceph_cap_string(newcaps)); | 2496 | ceph_cap_string(newcaps)); |
| 2497 | /* non-auth MDS is revoking the newly grant caps ? */ | ||
| 2498 | if (cap == ci->i_auth_cap && | ||
| 2499 | __ceph_caps_revoking_other(ci, cap, newcaps)) | ||
| 2500 | check_caps = 2; | ||
| 2501 | |||
| 2476 | cap->issued = newcaps; | 2502 | cap->issued = newcaps; |
| 2477 | cap->implemented |= newcaps; /* add bits only, to | 2503 | cap->implemented |= newcaps; /* add bits only, to |
| 2478 | * avoid stepping on a | 2504 | * avoid stepping on a |
| @@ -3042,21 +3068,19 @@ int ceph_encode_inode_release(void **p, struct inode *inode, | |||
| 3042 | (cap->issued & unless) == 0)) { | 3068 | (cap->issued & unless) == 0)) { |
| 3043 | if ((cap->issued & drop) && | 3069 | if ((cap->issued & drop) && |
| 3044 | (cap->issued & unless) == 0) { | 3070 | (cap->issued & unless) == 0) { |
| 3045 | dout("encode_inode_release %p cap %p %s -> " | 3071 | int wanted = __ceph_caps_wanted(ci); |
| 3046 | "%s\n", inode, cap, | 3072 | if ((ci->i_ceph_flags & CEPH_I_NODELAY) == 0) |
| 3073 | wanted |= cap->mds_wanted; | ||
| 3074 | dout("encode_inode_release %p cap %p " | ||
| 3075 | "%s -> %s, wanted %s -> %s\n", inode, cap, | ||
| 3047 | ceph_cap_string(cap->issued), | 3076 | ceph_cap_string(cap->issued), |
| 3048 | ceph_cap_string(cap->issued & ~drop)); | 3077 | ceph_cap_string(cap->issued & ~drop), |
| 3078 | ceph_cap_string(cap->mds_wanted), | ||
| 3079 | ceph_cap_string(wanted)); | ||
| 3080 | |||
| 3049 | cap->issued &= ~drop; | 3081 | cap->issued &= ~drop; |
| 3050 | cap->implemented &= ~drop; | 3082 | cap->implemented &= ~drop; |
| 3051 | if (ci->i_ceph_flags & CEPH_I_NODELAY) { | 3083 | cap->mds_wanted = wanted; |
| 3052 | int wanted = __ceph_caps_wanted(ci); | ||
| 3053 | dout(" wanted %s -> %s (act %s)\n", | ||
| 3054 | ceph_cap_string(cap->mds_wanted), | ||
| 3055 | ceph_cap_string(cap->mds_wanted & | ||
| 3056 | ~wanted), | ||
| 3057 | ceph_cap_string(wanted)); | ||
| 3058 | cap->mds_wanted &= wanted; | ||
| 3059 | } | ||
| 3060 | } else { | 3084 | } else { |
| 3061 | dout("encode_inode_release %p cap %p %s" | 3085 | dout("encode_inode_release %p cap %p %s" |
| 3062 | " (force)\n", inode, cap, | 3086 | " (force)\n", inode, cap, |
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 656e16907430..2ddf061c1c4a 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
| @@ -716,7 +716,6 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
| 716 | if (ceph_snap(inode) != CEPH_NOSNAP) | 716 | if (ceph_snap(inode) != CEPH_NOSNAP) |
| 717 | return -EROFS; | 717 | return -EROFS; |
| 718 | 718 | ||
| 719 | sb_start_write(inode->i_sb); | ||
| 720 | mutex_lock(&inode->i_mutex); | 719 | mutex_lock(&inode->i_mutex); |
| 721 | hold_mutex = true; | 720 | hold_mutex = true; |
| 722 | 721 | ||
| @@ -809,7 +808,6 @@ retry_snap: | |||
| 809 | out: | 808 | out: |
| 810 | if (hold_mutex) | 809 | if (hold_mutex) |
| 811 | mutex_unlock(&inode->i_mutex); | 810 | mutex_unlock(&inode->i_mutex); |
| 812 | sb_end_write(inode->i_sb); | ||
| 813 | current->backing_dev_info = NULL; | 811 | current->backing_dev_info = NULL; |
| 814 | 812 | ||
| 815 | return written ? written : err; | 813 | return written ? written : err; |
| @@ -824,7 +822,7 @@ static loff_t ceph_llseek(struct file *file, loff_t offset, int whence) | |||
| 824 | int ret; | 822 | int ret; |
| 825 | 823 | ||
| 826 | mutex_lock(&inode->i_mutex); | 824 | mutex_lock(&inode->i_mutex); |
| 827 | __ceph_do_pending_vmtruncate(inode, false); | 825 | __ceph_do_pending_vmtruncate(inode); |
| 828 | 826 | ||
| 829 | if (whence == SEEK_END || whence == SEEK_DATA || whence == SEEK_HOLE) { | 827 | if (whence == SEEK_END || whence == SEEK_DATA || whence == SEEK_HOLE) { |
| 830 | ret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE); | 828 | ret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE); |
| @@ -866,16 +864,7 @@ static loff_t ceph_llseek(struct file *file, loff_t offset, int whence) | |||
| 866 | break; | 864 | break; |
| 867 | } | 865 | } |
| 868 | 866 | ||
| 869 | if (offset < 0 || offset > inode->i_sb->s_maxbytes) { | 867 | offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes); |
| 870 | offset = -EINVAL; | ||
| 871 | goto out; | ||
| 872 | } | ||
| 873 | |||
| 874 | /* Special lock needed here? */ | ||
| 875 | if (offset != file->f_pos) { | ||
| 876 | file->f_pos = offset; | ||
| 877 | file->f_version = 0; | ||
| 878 | } | ||
| 879 | 868 | ||
| 880 | out: | 869 | out: |
| 881 | mutex_unlock(&inode->i_mutex); | 870 | mutex_unlock(&inode->i_mutex); |
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index be0f7e20d62e..f3a2abf28a77 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
| @@ -903,8 +903,8 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, | |||
| 903 | } else if (realdn) { | 903 | } else if (realdn) { |
| 904 | dout("dn %p (%d) spliced with %p (%d) " | 904 | dout("dn %p (%d) spliced with %p (%d) " |
| 905 | "inode %p ino %llx.%llx\n", | 905 | "inode %p ino %llx.%llx\n", |
| 906 | dn, dn->d_count, | 906 | dn, d_count(dn), |
| 907 | realdn, realdn->d_count, | 907 | realdn, d_count(realdn), |
| 908 | realdn->d_inode, ceph_vinop(realdn->d_inode)); | 908 | realdn->d_inode, ceph_vinop(realdn->d_inode)); |
| 909 | dput(dn); | 909 | dput(dn); |
| 910 | dn = realdn; | 910 | dn = realdn; |
| @@ -1465,7 +1465,9 @@ static void ceph_vmtruncate_work(struct work_struct *work) | |||
| 1465 | struct inode *inode = &ci->vfs_inode; | 1465 | struct inode *inode = &ci->vfs_inode; |
| 1466 | 1466 | ||
| 1467 | dout("vmtruncate_work %p\n", inode); | 1467 | dout("vmtruncate_work %p\n", inode); |
| 1468 | __ceph_do_pending_vmtruncate(inode, true); | 1468 | mutex_lock(&inode->i_mutex); |
| 1469 | __ceph_do_pending_vmtruncate(inode); | ||
| 1470 | mutex_unlock(&inode->i_mutex); | ||
| 1469 | iput(inode); | 1471 | iput(inode); |
| 1470 | } | 1472 | } |
| 1471 | 1473 | ||
| @@ -1492,7 +1494,7 @@ void ceph_queue_vmtruncate(struct inode *inode) | |||
| 1492 | * Make sure any pending truncation is applied before doing anything | 1494 | * Make sure any pending truncation is applied before doing anything |
| 1493 | * that may depend on it. | 1495 | * that may depend on it. |
| 1494 | */ | 1496 | */ |
| 1495 | void __ceph_do_pending_vmtruncate(struct inode *inode, bool needlock) | 1497 | void __ceph_do_pending_vmtruncate(struct inode *inode) |
| 1496 | { | 1498 | { |
| 1497 | struct ceph_inode_info *ci = ceph_inode(inode); | 1499 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 1498 | u64 to; | 1500 | u64 to; |
| @@ -1525,11 +1527,7 @@ retry: | |||
| 1525 | ci->i_truncate_pending, to); | 1527 | ci->i_truncate_pending, to); |
| 1526 | spin_unlock(&ci->i_ceph_lock); | 1528 | spin_unlock(&ci->i_ceph_lock); |
| 1527 | 1529 | ||
| 1528 | if (needlock) | ||
| 1529 | mutex_lock(&inode->i_mutex); | ||
| 1530 | truncate_inode_pages(inode->i_mapping, to); | 1530 | truncate_inode_pages(inode->i_mapping, to); |
| 1531 | if (needlock) | ||
| 1532 | mutex_unlock(&inode->i_mutex); | ||
| 1533 | 1531 | ||
| 1534 | spin_lock(&ci->i_ceph_lock); | 1532 | spin_lock(&ci->i_ceph_lock); |
| 1535 | if (to == ci->i_truncate_size) { | 1533 | if (to == ci->i_truncate_size) { |
| @@ -1588,7 +1586,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) | |||
| 1588 | if (ceph_snap(inode) != CEPH_NOSNAP) | 1586 | if (ceph_snap(inode) != CEPH_NOSNAP) |
| 1589 | return -EROFS; | 1587 | return -EROFS; |
| 1590 | 1588 | ||
| 1591 | __ceph_do_pending_vmtruncate(inode, false); | 1589 | __ceph_do_pending_vmtruncate(inode); |
| 1592 | 1590 | ||
| 1593 | err = inode_change_ok(inode, attr); | 1591 | err = inode_change_ok(inode, attr); |
| 1594 | if (err != 0) | 1592 | if (err != 0) |
| @@ -1770,7 +1768,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) | |||
| 1770 | ceph_cap_string(dirtied), mask); | 1768 | ceph_cap_string(dirtied), mask); |
| 1771 | 1769 | ||
| 1772 | ceph_mdsc_put_request(req); | 1770 | ceph_mdsc_put_request(req); |
| 1773 | __ceph_do_pending_vmtruncate(inode, false); | 1771 | __ceph_do_pending_vmtruncate(inode); |
| 1774 | return err; | 1772 | return err; |
| 1775 | out: | 1773 | out: |
| 1776 | spin_unlock(&ci->i_ceph_lock); | 1774 | spin_unlock(&ci->i_ceph_lock); |
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index ebbf680378e2..ae6d14e82b0f 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c | |||
| @@ -169,7 +169,7 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl) | |||
| 169 | } | 169 | } |
| 170 | 170 | ||
| 171 | /** | 171 | /** |
| 172 | * Must be called with BKL already held. Fills in the passed | 172 | * Must be called with lock_flocks() already held. Fills in the passed |
| 173 | * counter variables, so you can prepare pagelist metadata before calling | 173 | * counter variables, so you can prepare pagelist metadata before calling |
| 174 | * ceph_encode_locks. | 174 | * ceph_encode_locks. |
| 175 | */ | 175 | */ |
| @@ -192,7 +192,7 @@ void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count) | |||
| 192 | 192 | ||
| 193 | /** | 193 | /** |
| 194 | * Encode the flock and fcntl locks for the given inode into the ceph_filelock | 194 | * Encode the flock and fcntl locks for the given inode into the ceph_filelock |
| 195 | * array. Must be called with lock_flocks() already held. | 195 | * array. Must be called with inode->i_lock already held. |
| 196 | * If we encounter more of a specific lock type than expected, return -ENOSPC. | 196 | * If we encounter more of a specific lock type than expected, return -ENOSPC. |
| 197 | */ | 197 | */ |
| 198 | int ceph_encode_locks_to_buffer(struct inode *inode, | 198 | int ceph_encode_locks_to_buffer(struct inode *inode, |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 4d2920304be8..187bf214444d 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
| @@ -1391,6 +1391,7 @@ static void discard_cap_releases(struct ceph_mds_client *mdsc, | |||
| 1391 | num = le32_to_cpu(head->num); | 1391 | num = le32_to_cpu(head->num); |
| 1392 | dout("discard_cap_releases mds%d %p %u\n", session->s_mds, msg, num); | 1392 | dout("discard_cap_releases mds%d %p %u\n", session->s_mds, msg, num); |
| 1393 | head->num = cpu_to_le32(0); | 1393 | head->num = cpu_to_le32(0); |
| 1394 | msg->front.iov_len = sizeof(*head); | ||
| 1394 | session->s_num_cap_releases += num; | 1395 | session->s_num_cap_releases += num; |
| 1395 | 1396 | ||
| 1396 | /* requeue completed messages */ | 1397 | /* requeue completed messages */ |
| @@ -1553,7 +1554,7 @@ retry: | |||
| 1553 | *base = ceph_ino(temp->d_inode); | 1554 | *base = ceph_ino(temp->d_inode); |
| 1554 | *plen = len; | 1555 | *plen = len; |
| 1555 | dout("build_path on %p %d built %llx '%.*s'\n", | 1556 | dout("build_path on %p %d built %llx '%.*s'\n", |
| 1556 | dentry, dentry->d_count, *base, len, path); | 1557 | dentry, d_count(dentry), *base, len, path); |
| 1557 | return path; | 1558 | return path; |
| 1558 | } | 1559 | } |
| 1559 | 1560 | ||
| @@ -2454,6 +2455,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||
| 2454 | spin_lock(&ci->i_ceph_lock); | 2455 | spin_lock(&ci->i_ceph_lock); |
| 2455 | cap->seq = 0; /* reset cap seq */ | 2456 | cap->seq = 0; /* reset cap seq */ |
| 2456 | cap->issue_seq = 0; /* and issue_seq */ | 2457 | cap->issue_seq = 0; /* and issue_seq */ |
| 2458 | cap->mseq = 0; /* and migrate_seq */ | ||
| 2457 | 2459 | ||
| 2458 | if (recon_state->flock) { | 2460 | if (recon_state->flock) { |
| 2459 | rec.v2.cap_id = cpu_to_le64(cap->cap_id); | 2461 | rec.v2.cap_id = cpu_to_le64(cap->cap_id); |
| @@ -2481,20 +2483,20 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||
| 2481 | struct ceph_filelock *flocks; | 2483 | struct ceph_filelock *flocks; |
| 2482 | 2484 | ||
| 2483 | encode_again: | 2485 | encode_again: |
| 2484 | lock_flocks(); | 2486 | spin_lock(&inode->i_lock); |
| 2485 | ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks); | 2487 | ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks); |
| 2486 | unlock_flocks(); | 2488 | spin_unlock(&inode->i_lock); |
| 2487 | flocks = kmalloc((num_fcntl_locks+num_flock_locks) * | 2489 | flocks = kmalloc((num_fcntl_locks+num_flock_locks) * |
| 2488 | sizeof(struct ceph_filelock), GFP_NOFS); | 2490 | sizeof(struct ceph_filelock), GFP_NOFS); |
| 2489 | if (!flocks) { | 2491 | if (!flocks) { |
| 2490 | err = -ENOMEM; | 2492 | err = -ENOMEM; |
| 2491 | goto out_free; | 2493 | goto out_free; |
| 2492 | } | 2494 | } |
| 2493 | lock_flocks(); | 2495 | spin_lock(&inode->i_lock); |
| 2494 | err = ceph_encode_locks_to_buffer(inode, flocks, | 2496 | err = ceph_encode_locks_to_buffer(inode, flocks, |
| 2495 | num_fcntl_locks, | 2497 | num_fcntl_locks, |
| 2496 | num_flock_locks); | 2498 | num_flock_locks); |
| 2497 | unlock_flocks(); | 2499 | spin_unlock(&inode->i_lock); |
| 2498 | if (err) { | 2500 | if (err) { |
| 2499 | kfree(flocks); | 2501 | kfree(flocks); |
| 2500 | if (err == -ENOSPC) | 2502 | if (err == -ENOSPC) |
| @@ -3040,8 +3042,10 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) | |||
| 3040 | fsc->mdsc = mdsc; | 3042 | fsc->mdsc = mdsc; |
| 3041 | mutex_init(&mdsc->mutex); | 3043 | mutex_init(&mdsc->mutex); |
| 3042 | mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS); | 3044 | mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS); |
| 3043 | if (mdsc->mdsmap == NULL) | 3045 | if (mdsc->mdsmap == NULL) { |
| 3046 | kfree(mdsc); | ||
| 3044 | return -ENOMEM; | 3047 | return -ENOMEM; |
| 3048 | } | ||
| 3045 | 3049 | ||
| 3046 | init_completion(&mdsc->safe_umount_waiters); | 3050 | init_completion(&mdsc->safe_umount_waiters); |
| 3047 | init_waitqueue_head(&mdsc->session_close_wq); | 3051 | init_waitqueue_head(&mdsc->session_close_wq); |
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c index 9278dec9e940..132b64eeecd4 100644 --- a/fs/ceph/mdsmap.c +++ b/fs/ceph/mdsmap.c | |||
| @@ -92,6 +92,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) | |||
| 92 | u32 num_export_targets; | 92 | u32 num_export_targets; |
| 93 | void *pexport_targets = NULL; | 93 | void *pexport_targets = NULL; |
| 94 | struct ceph_timespec laggy_since; | 94 | struct ceph_timespec laggy_since; |
| 95 | struct ceph_mds_info *info; | ||
| 95 | 96 | ||
| 96 | ceph_decode_need(p, end, sizeof(u64)*2 + 1 + sizeof(u32), bad); | 97 | ceph_decode_need(p, end, sizeof(u64)*2 + 1 + sizeof(u32), bad); |
| 97 | global_id = ceph_decode_64(p); | 98 | global_id = ceph_decode_64(p); |
| @@ -126,24 +127,27 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) | |||
| 126 | i+1, n, global_id, mds, inc, | 127 | i+1, n, global_id, mds, inc, |
| 127 | ceph_pr_addr(&addr.in_addr), | 128 | ceph_pr_addr(&addr.in_addr), |
| 128 | ceph_mds_state_name(state)); | 129 | ceph_mds_state_name(state)); |
| 129 | if (mds >= 0 && mds < m->m_max_mds && state > 0) { | 130 | |
| 130 | m->m_info[mds].global_id = global_id; | 131 | if (mds < 0 || mds >= m->m_max_mds || state <= 0) |
| 131 | m->m_info[mds].state = state; | 132 | continue; |
| 132 | m->m_info[mds].addr = addr; | 133 | |
| 133 | m->m_info[mds].laggy = | 134 | info = &m->m_info[mds]; |
| 134 | (laggy_since.tv_sec != 0 || | 135 | info->global_id = global_id; |
| 135 | laggy_since.tv_nsec != 0); | 136 | info->state = state; |
| 136 | m->m_info[mds].num_export_targets = num_export_targets; | 137 | info->addr = addr; |
| 137 | if (num_export_targets) { | 138 | info->laggy = (laggy_since.tv_sec != 0 || |
| 138 | m->m_info[mds].export_targets = | 139 | laggy_since.tv_nsec != 0); |
| 139 | kcalloc(num_export_targets, sizeof(u32), | 140 | info->num_export_targets = num_export_targets; |
| 140 | GFP_NOFS); | 141 | if (num_export_targets) { |
| 141 | for (j = 0; j < num_export_targets; j++) | 142 | info->export_targets = kcalloc(num_export_targets, |
| 142 | m->m_info[mds].export_targets[j] = | 143 | sizeof(u32), GFP_NOFS); |
| 143 | ceph_decode_32(&pexport_targets); | 144 | if (info->export_targets == NULL) |
| 144 | } else { | 145 | goto badmem; |
| 145 | m->m_info[mds].export_targets = NULL; | 146 | for (j = 0; j < num_export_targets; j++) |
| 146 | } | 147 | info->export_targets[j] = |
| 148 | ceph_decode_32(&pexport_targets); | ||
| 149 | } else { | ||
| 150 | info->export_targets = NULL; | ||
| 147 | } | 151 | } |
| 148 | } | 152 | } |
| 149 | 153 | ||
| @@ -170,7 +174,7 @@ bad: | |||
| 170 | DUMP_PREFIX_OFFSET, 16, 1, | 174 | DUMP_PREFIX_OFFSET, 16, 1, |
| 171 | start, end - start, true); | 175 | start, end - start, true); |
| 172 | ceph_mdsmap_destroy(m); | 176 | ceph_mdsmap_destroy(m); |
| 173 | return ERR_PTR(-EINVAL); | 177 | return ERR_PTR(err); |
| 174 | } | 178 | } |
| 175 | 179 | ||
| 176 | void ceph_mdsmap_destroy(struct ceph_mdsmap *m) | 180 | void ceph_mdsmap_destroy(struct ceph_mdsmap *m) |
diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 7d377c9a5e35..6627b26a800c 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c | |||
| @@ -357,7 +357,7 @@ static int parse_mount_options(struct ceph_mount_options **pfsopt, | |||
| 357 | } | 357 | } |
| 358 | err = -EINVAL; | 358 | err = -EINVAL; |
| 359 | dev_name_end--; /* back up to ':' separator */ | 359 | dev_name_end--; /* back up to ':' separator */ |
| 360 | if (*dev_name_end != ':') { | 360 | if (dev_name_end < dev_name || *dev_name_end != ':') { |
| 361 | pr_err("device name is missing path (no : separator in %s)\n", | 361 | pr_err("device name is missing path (no : separator in %s)\n", |
| 362 | dev_name); | 362 | dev_name); |
| 363 | goto out; | 363 | goto out; |
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 7ccfdb4aea2e..cbded572345e 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
| @@ -534,7 +534,7 @@ extern int __ceph_caps_mds_wanted(struct ceph_inode_info *ci); | |||
| 534 | extern void ceph_caps_init(struct ceph_mds_client *mdsc); | 534 | extern void ceph_caps_init(struct ceph_mds_client *mdsc); |
| 535 | extern void ceph_caps_finalize(struct ceph_mds_client *mdsc); | 535 | extern void ceph_caps_finalize(struct ceph_mds_client *mdsc); |
| 536 | extern void ceph_adjust_min_caps(struct ceph_mds_client *mdsc, int delta); | 536 | extern void ceph_adjust_min_caps(struct ceph_mds_client *mdsc, int delta); |
| 537 | extern int ceph_reserve_caps(struct ceph_mds_client *mdsc, | 537 | extern void ceph_reserve_caps(struct ceph_mds_client *mdsc, |
| 538 | struct ceph_cap_reservation *ctx, int need); | 538 | struct ceph_cap_reservation *ctx, int need); |
| 539 | extern int ceph_unreserve_caps(struct ceph_mds_client *mdsc, | 539 | extern int ceph_unreserve_caps(struct ceph_mds_client *mdsc, |
| 540 | struct ceph_cap_reservation *ctx); | 540 | struct ceph_cap_reservation *ctx); |
| @@ -692,7 +692,7 @@ extern int ceph_readdir_prepopulate(struct ceph_mds_request *req, | |||
| 692 | extern int ceph_inode_holds_cap(struct inode *inode, int mask); | 692 | extern int ceph_inode_holds_cap(struct inode *inode, int mask); |
| 693 | 693 | ||
| 694 | extern int ceph_inode_set_size(struct inode *inode, loff_t size); | 694 | extern int ceph_inode_set_size(struct inode *inode, loff_t size); |
| 695 | extern void __ceph_do_pending_vmtruncate(struct inode *inode, bool needlock); | 695 | extern void __ceph_do_pending_vmtruncate(struct inode *inode); |
| 696 | extern void ceph_queue_vmtruncate(struct inode *inode); | 696 | extern void ceph_queue_vmtruncate(struct inode *inode); |
| 697 | 697 | ||
| 698 | extern void ceph_queue_invalidate(struct inode *inode); | 698 | extern void ceph_queue_invalidate(struct inode *inode); |
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 9b6b2b6dd164..be661d8f532a 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c | |||
| @@ -675,17 +675,18 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value, | |||
| 675 | if (!ceph_is_valid_xattr(name)) | 675 | if (!ceph_is_valid_xattr(name)) |
| 676 | return -ENODATA; | 676 | return -ENODATA; |
| 677 | 677 | ||
| 678 | spin_lock(&ci->i_ceph_lock); | ||
| 679 | dout("getxattr %p ver=%lld index_ver=%lld\n", inode, | ||
| 680 | ci->i_xattrs.version, ci->i_xattrs.index_version); | ||
| 681 | 678 | ||
| 682 | /* let's see if a virtual xattr was requested */ | 679 | /* let's see if a virtual xattr was requested */ |
| 683 | vxattr = ceph_match_vxattr(inode, name); | 680 | vxattr = ceph_match_vxattr(inode, name); |
| 684 | if (vxattr && !(vxattr->exists_cb && !vxattr->exists_cb(ci))) { | 681 | if (vxattr && !(vxattr->exists_cb && !vxattr->exists_cb(ci))) { |
| 685 | err = vxattr->getxattr_cb(ci, value, size); | 682 | err = vxattr->getxattr_cb(ci, value, size); |
| 686 | goto out; | 683 | return err; |
| 687 | } | 684 | } |
| 688 | 685 | ||
| 686 | spin_lock(&ci->i_ceph_lock); | ||
| 687 | dout("getxattr %p ver=%lld index_ver=%lld\n", inode, | ||
| 688 | ci->i_xattrs.version, ci->i_xattrs.index_version); | ||
| 689 | |||
| 689 | if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1) && | 690 | if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1) && |
| 690 | (ci->i_xattrs.index_version >= ci->i_xattrs.version)) { | 691 | (ci->i_xattrs.index_version >= ci->i_xattrs.version)) { |
| 691 | goto get_xattr; | 692 | goto get_xattr; |
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index 2906ee276408..603f18a65c12 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig | |||
| @@ -10,6 +10,7 @@ config CIFS | |||
| 10 | select CRYPTO_ECB | 10 | select CRYPTO_ECB |
| 11 | select CRYPTO_DES | 11 | select CRYPTO_DES |
| 12 | select CRYPTO_SHA256 | 12 | select CRYPTO_SHA256 |
| 13 | select CRYPTO_CMAC | ||
| 13 | help | 14 | help |
| 14 | This is the client VFS module for the Common Internet File System | 15 | This is the client VFS module for the Common Internet File System |
| 15 | (CIFS) protocol which is the successor to the Server Message Block | 16 | (CIFS) protocol which is the successor to the Server Message Block |
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index d59748346020..f3ac4154cbb6 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c | |||
| @@ -213,7 +213,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) | |||
| 213 | tcon->nativeFileSystem); | 213 | tcon->nativeFileSystem); |
| 214 | } | 214 | } |
| 215 | seq_printf(m, "DevInfo: 0x%x Attributes: 0x%x" | 215 | seq_printf(m, "DevInfo: 0x%x Attributes: 0x%x" |
| 216 | "\nPathComponentMax: %d Status: 0x%d", | 216 | "\n\tPathComponentMax: %d Status: 0x%d", |
| 217 | le32_to_cpu(tcon->fsDevInfo.DeviceCharacteristics), | 217 | le32_to_cpu(tcon->fsDevInfo.DeviceCharacteristics), |
| 218 | le32_to_cpu(tcon->fsAttrInfo.Attributes), | 218 | le32_to_cpu(tcon->fsAttrInfo.Attributes), |
| 219 | le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength), | 219 | le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength), |
| @@ -224,6 +224,8 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) | |||
| 224 | seq_puts(m, " type: CDROM "); | 224 | seq_puts(m, " type: CDROM "); |
| 225 | else | 225 | else |
| 226 | seq_printf(m, " type: %d ", dev_type); | 226 | seq_printf(m, " type: %d ", dev_type); |
| 227 | if (server->ops->dump_share_caps) | ||
| 228 | server->ops->dump_share_caps(m, tcon); | ||
| 227 | 229 | ||
| 228 | if (tcon->need_reconnect) | 230 | if (tcon->need_reconnect) |
| 229 | seq_puts(m, "\tDISCONNECTED "); | 231 | seq_puts(m, "\tDISCONNECTED "); |
| @@ -595,9 +597,36 @@ static int cifs_security_flags_proc_open(struct inode *inode, struct file *file) | |||
| 595 | return single_open(file, cifs_security_flags_proc_show, NULL); | 597 | return single_open(file, cifs_security_flags_proc_show, NULL); |
| 596 | } | 598 | } |
| 597 | 599 | ||
| 600 | /* | ||
| 601 | * Ensure that if someone sets a MUST flag, that we disable all other MAY | ||
| 602 | * flags except for the ones corresponding to the given MUST flag. If there are | ||
| 603 | * multiple MUST flags, then try to prefer more secure ones. | ||
| 604 | */ | ||
| 605 | static void | ||
| 606 | cifs_security_flags_handle_must_flags(unsigned int *flags) | ||
| 607 | { | ||
| 608 | unsigned int signflags = *flags & CIFSSEC_MUST_SIGN; | ||
| 609 | |||
| 610 | if ((*flags & CIFSSEC_MUST_KRB5) == CIFSSEC_MUST_KRB5) | ||
| 611 | *flags = CIFSSEC_MUST_KRB5; | ||
| 612 | else if ((*flags & CIFSSEC_MUST_NTLMSSP) == CIFSSEC_MUST_NTLMSSP) | ||
| 613 | *flags = CIFSSEC_MUST_NTLMSSP; | ||
| 614 | else if ((*flags & CIFSSEC_MUST_NTLMV2) == CIFSSEC_MUST_NTLMV2) | ||
| 615 | *flags = CIFSSEC_MUST_NTLMV2; | ||
| 616 | else if ((*flags & CIFSSEC_MUST_NTLM) == CIFSSEC_MUST_NTLM) | ||
| 617 | *flags = CIFSSEC_MUST_NTLM; | ||
| 618 | else if ((*flags & CIFSSEC_MUST_LANMAN) == CIFSSEC_MUST_LANMAN) | ||
| 619 | *flags = CIFSSEC_MUST_LANMAN; | ||
| 620 | else if ((*flags & CIFSSEC_MUST_PLNTXT) == CIFSSEC_MUST_PLNTXT) | ||
| 621 | *flags = CIFSSEC_MUST_PLNTXT; | ||
| 622 | |||
| 623 | *flags |= signflags; | ||
| 624 | } | ||
| 625 | |||
| 598 | static ssize_t cifs_security_flags_proc_write(struct file *file, | 626 | static ssize_t cifs_security_flags_proc_write(struct file *file, |
| 599 | const char __user *buffer, size_t count, loff_t *ppos) | 627 | const char __user *buffer, size_t count, loff_t *ppos) |
| 600 | { | 628 | { |
| 629 | int rc; | ||
| 601 | unsigned int flags; | 630 | unsigned int flags; |
| 602 | char flags_string[12]; | 631 | char flags_string[12]; |
| 603 | char c; | 632 | char c; |
| @@ -620,26 +649,35 @@ static ssize_t cifs_security_flags_proc_write(struct file *file, | |||
| 620 | global_secflags = CIFSSEC_MAX; | 649 | global_secflags = CIFSSEC_MAX; |
| 621 | return count; | 650 | return count; |
| 622 | } else if (!isdigit(c)) { | 651 | } else if (!isdigit(c)) { |
| 623 | cifs_dbg(VFS, "invalid flag %c\n", c); | 652 | cifs_dbg(VFS, "Invalid SecurityFlags: %s\n", |
| 653 | flags_string); | ||
| 624 | return -EINVAL; | 654 | return -EINVAL; |
| 625 | } | 655 | } |
| 626 | } | 656 | } |
| 627 | /* else we have a number */ | ||
| 628 | 657 | ||
| 629 | flags = simple_strtoul(flags_string, NULL, 0); | 658 | /* else we have a number */ |
| 659 | rc = kstrtouint(flags_string, 0, &flags); | ||
| 660 | if (rc) { | ||
| 661 | cifs_dbg(VFS, "Invalid SecurityFlags: %s\n", | ||
| 662 | flags_string); | ||
| 663 | return rc; | ||
| 664 | } | ||
| 630 | 665 | ||
| 631 | cifs_dbg(FYI, "sec flags 0x%x\n", flags); | 666 | cifs_dbg(FYI, "sec flags 0x%x\n", flags); |
| 632 | 667 | ||
| 633 | if (flags <= 0) { | 668 | if (flags == 0) { |
| 634 | cifs_dbg(VFS, "invalid security flags %s\n", flags_string); | 669 | cifs_dbg(VFS, "Invalid SecurityFlags: %s\n", flags_string); |
| 635 | return -EINVAL; | 670 | return -EINVAL; |
| 636 | } | 671 | } |
| 637 | 672 | ||
| 638 | if (flags & ~CIFSSEC_MASK) { | 673 | if (flags & ~CIFSSEC_MASK) { |
| 639 | cifs_dbg(VFS, "attempt to set unsupported security flags 0x%x\n", | 674 | cifs_dbg(VFS, "Unsupported security flags: 0x%x\n", |
| 640 | flags & ~CIFSSEC_MASK); | 675 | flags & ~CIFSSEC_MASK); |
| 641 | return -EINVAL; | 676 | return -EINVAL; |
| 642 | } | 677 | } |
| 678 | |||
| 679 | cifs_security_flags_handle_must_flags(&flags); | ||
| 680 | |||
| 643 | /* flags look ok - update the global security flags for cifs module */ | 681 | /* flags look ok - update the global security flags for cifs module */ |
| 644 | global_secflags = flags; | 682 | global_secflags = flags; |
| 645 | if (global_secflags & CIFSSEC_MUST_SIGN) { | 683 | if (global_secflags & CIFSSEC_MUST_SIGN) { |
diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h index 4fb097468e21..fe8d6276410a 100644 --- a/fs/cifs/cifs_unicode.h +++ b/fs/cifs/cifs_unicode.h | |||
| @@ -327,14 +327,14 @@ UniToupper(register wchar_t uc) | |||
| 327 | /* | 327 | /* |
| 328 | * UniStrupr: Upper case a unicode string | 328 | * UniStrupr: Upper case a unicode string |
| 329 | */ | 329 | */ |
| 330 | static inline wchar_t * | 330 | static inline __le16 * |
| 331 | UniStrupr(register wchar_t *upin) | 331 | UniStrupr(register __le16 *upin) |
| 332 | { | 332 | { |
| 333 | register wchar_t *up; | 333 | register __le16 *up; |
| 334 | 334 | ||
| 335 | up = upin; | 335 | up = upin; |
| 336 | while (*up) { /* For all characters */ | 336 | while (*up) { /* For all characters */ |
| 337 | *up = UniToupper(*up); | 337 | *up = cpu_to_le16(UniToupper(le16_to_cpu(*up))); |
| 338 | up++; | 338 | up++; |
| 339 | } | 339 | } |
| 340 | return upin; /* Return input pointer */ | 340 | return upin; /* Return input pointer */ |
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 71436d1fca13..3d8bf941d126 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c | |||
| @@ -276,7 +276,6 @@ int calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt, | |||
| 276 | strncpy(password_with_pad, password, CIFS_ENCPWD_SIZE); | 276 | strncpy(password_with_pad, password, CIFS_ENCPWD_SIZE); |
| 277 | 277 | ||
| 278 | if (!encrypt && global_secflags & CIFSSEC_MAY_PLNTXT) { | 278 | if (!encrypt && global_secflags & CIFSSEC_MAY_PLNTXT) { |
| 279 | memset(lnm_session_key, 0, CIFS_SESS_KEY_SIZE); | ||
| 280 | memcpy(lnm_session_key, password_with_pad, | 279 | memcpy(lnm_session_key, password_with_pad, |
| 281 | CIFS_ENCPWD_SIZE); | 280 | CIFS_ENCPWD_SIZE); |
| 282 | return 0; | 281 | return 0; |
| @@ -414,7 +413,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, | |||
| 414 | int rc = 0; | 413 | int rc = 0; |
| 415 | int len; | 414 | int len; |
| 416 | char nt_hash[CIFS_NTHASH_SIZE]; | 415 | char nt_hash[CIFS_NTHASH_SIZE]; |
| 417 | wchar_t *user; | 416 | __le16 *user; |
| 418 | wchar_t *domain; | 417 | wchar_t *domain; |
| 419 | wchar_t *server; | 418 | wchar_t *server; |
| 420 | 419 | ||
| @@ -439,7 +438,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, | |||
| 439 | return rc; | 438 | return rc; |
| 440 | } | 439 | } |
| 441 | 440 | ||
| 442 | /* convert ses->user_name to unicode and uppercase */ | 441 | /* convert ses->user_name to unicode */ |
| 443 | len = ses->user_name ? strlen(ses->user_name) : 0; | 442 | len = ses->user_name ? strlen(ses->user_name) : 0; |
| 444 | user = kmalloc(2 + (len * 2), GFP_KERNEL); | 443 | user = kmalloc(2 + (len * 2), GFP_KERNEL); |
| 445 | if (user == NULL) { | 444 | if (user == NULL) { |
| @@ -448,7 +447,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, | |||
| 448 | } | 447 | } |
| 449 | 448 | ||
| 450 | if (len) { | 449 | if (len) { |
| 451 | len = cifs_strtoUTF16((__le16 *)user, ses->user_name, len, nls_cp); | 450 | len = cifs_strtoUTF16(user, ses->user_name, len, nls_cp); |
| 452 | UniStrupr(user); | 451 | UniStrupr(user); |
| 453 | } else { | 452 | } else { |
| 454 | memset(user, '\0', 2); | 453 | memset(user, '\0', 2); |
| @@ -536,7 +535,7 @@ CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash) | |||
| 536 | return rc; | 535 | return rc; |
| 537 | } | 536 | } |
| 538 | 537 | ||
| 539 | if (ses->server->secType == RawNTLMSSP) | 538 | if (ses->server->negflavor == CIFS_NEGFLAVOR_EXTENDED) |
| 540 | memcpy(ses->auth_key.response + offset, | 539 | memcpy(ses->auth_key.response + offset, |
| 541 | ses->ntlmssp->cryptkey, CIFS_SERVER_CHALLENGE_SIZE); | 540 | ses->ntlmssp->cryptkey, CIFS_SERVER_CHALLENGE_SIZE); |
| 542 | else | 541 | else |
| @@ -568,7 +567,7 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) | |||
| 568 | char ntlmv2_hash[16]; | 567 | char ntlmv2_hash[16]; |
| 569 | unsigned char *tiblob = NULL; /* target info blob */ | 568 | unsigned char *tiblob = NULL; /* target info blob */ |
| 570 | 569 | ||
| 571 | if (ses->server->secType == RawNTLMSSP) { | 570 | if (ses->server->negflavor == CIFS_NEGFLAVOR_EXTENDED) { |
| 572 | if (!ses->domainName) { | 571 | if (!ses->domainName) { |
| 573 | rc = find_domain_name(ses, nls_cp); | 572 | rc = find_domain_name(ses, nls_cp); |
| 574 | if (rc) { | 573 | if (rc) { |
| @@ -706,6 +705,9 @@ calc_seckey(struct cifs_ses *ses) | |||
| 706 | void | 705 | void |
| 707 | cifs_crypto_shash_release(struct TCP_Server_Info *server) | 706 | cifs_crypto_shash_release(struct TCP_Server_Info *server) |
| 708 | { | 707 | { |
| 708 | if (server->secmech.cmacaes) | ||
| 709 | crypto_free_shash(server->secmech.cmacaes); | ||
| 710 | |||
| 709 | if (server->secmech.hmacsha256) | 711 | if (server->secmech.hmacsha256) |
| 710 | crypto_free_shash(server->secmech.hmacsha256); | 712 | crypto_free_shash(server->secmech.hmacsha256); |
| 711 | 713 | ||
| @@ -715,6 +717,8 @@ cifs_crypto_shash_release(struct TCP_Server_Info *server) | |||
| 715 | if (server->secmech.hmacmd5) | 717 | if (server->secmech.hmacmd5) |
| 716 | crypto_free_shash(server->secmech.hmacmd5); | 718 | crypto_free_shash(server->secmech.hmacmd5); |
| 717 | 719 | ||
| 720 | kfree(server->secmech.sdesccmacaes); | ||
| 721 | |||
| 718 | kfree(server->secmech.sdeschmacsha256); | 722 | kfree(server->secmech.sdeschmacsha256); |
| 719 | 723 | ||
| 720 | kfree(server->secmech.sdeschmacmd5); | 724 | kfree(server->secmech.sdeschmacmd5); |
| @@ -748,6 +752,13 @@ cifs_crypto_shash_allocate(struct TCP_Server_Info *server) | |||
| 748 | goto crypto_allocate_hmacsha256_fail; | 752 | goto crypto_allocate_hmacsha256_fail; |
| 749 | } | 753 | } |
| 750 | 754 | ||
| 755 | server->secmech.cmacaes = crypto_alloc_shash("cmac(aes)", 0, 0); | ||
| 756 | if (IS_ERR(server->secmech.cmacaes)) { | ||
| 757 | cifs_dbg(VFS, "could not allocate crypto cmac-aes"); | ||
| 758 | rc = PTR_ERR(server->secmech.cmacaes); | ||
| 759 | goto crypto_allocate_cmacaes_fail; | ||
| 760 | } | ||
| 761 | |||
| 751 | size = sizeof(struct shash_desc) + | 762 | size = sizeof(struct shash_desc) + |
| 752 | crypto_shash_descsize(server->secmech.hmacmd5); | 763 | crypto_shash_descsize(server->secmech.hmacmd5); |
| 753 | server->secmech.sdeschmacmd5 = kmalloc(size, GFP_KERNEL); | 764 | server->secmech.sdeschmacmd5 = kmalloc(size, GFP_KERNEL); |
| @@ -778,8 +789,22 @@ cifs_crypto_shash_allocate(struct TCP_Server_Info *server) | |||
| 778 | server->secmech.sdeschmacsha256->shash.tfm = server->secmech.hmacsha256; | 789 | server->secmech.sdeschmacsha256->shash.tfm = server->secmech.hmacsha256; |
| 779 | server->secmech.sdeschmacsha256->shash.flags = 0x0; | 790 | server->secmech.sdeschmacsha256->shash.flags = 0x0; |
| 780 | 791 | ||
| 792 | size = sizeof(struct shash_desc) + | ||
| 793 | crypto_shash_descsize(server->secmech.cmacaes); | ||
| 794 | server->secmech.sdesccmacaes = kmalloc(size, GFP_KERNEL); | ||
| 795 | if (!server->secmech.sdesccmacaes) { | ||
| 796 | cifs_dbg(VFS, "%s: Can't alloc cmacaes\n", __func__); | ||
| 797 | rc = -ENOMEM; | ||
| 798 | goto crypto_allocate_cmacaes_sdesc_fail; | ||
| 799 | } | ||
| 800 | server->secmech.sdesccmacaes->shash.tfm = server->secmech.cmacaes; | ||
| 801 | server->secmech.sdesccmacaes->shash.flags = 0x0; | ||
| 802 | |||
| 781 | return 0; | 803 | return 0; |
| 782 | 804 | ||
| 805 | crypto_allocate_cmacaes_sdesc_fail: | ||
| 806 | kfree(server->secmech.sdeschmacsha256); | ||
| 807 | |||
| 783 | crypto_allocate_hmacsha256_sdesc_fail: | 808 | crypto_allocate_hmacsha256_sdesc_fail: |
| 784 | kfree(server->secmech.sdescmd5); | 809 | kfree(server->secmech.sdescmd5); |
| 785 | 810 | ||
| @@ -787,6 +812,9 @@ crypto_allocate_md5_sdesc_fail: | |||
| 787 | kfree(server->secmech.sdeschmacmd5); | 812 | kfree(server->secmech.sdeschmacmd5); |
| 788 | 813 | ||
| 789 | crypto_allocate_hmacmd5_sdesc_fail: | 814 | crypto_allocate_hmacmd5_sdesc_fail: |
| 815 | crypto_free_shash(server->secmech.cmacaes); | ||
| 816 | |||
| 817 | crypto_allocate_cmacaes_fail: | ||
| 790 | crypto_free_shash(server->secmech.hmacsha256); | 818 | crypto_free_shash(server->secmech.hmacsha256); |
| 791 | 819 | ||
| 792 | crypto_allocate_hmacsha256_fail: | 820 | crypto_allocate_hmacsha256_fail: |
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 540c1ccfcdb2..4bdd547dbf6f 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c | |||
| @@ -312,11 +312,14 @@ cifs_show_address(struct seq_file *s, struct TCP_Server_Info *server) | |||
| 312 | } | 312 | } |
| 313 | 313 | ||
| 314 | static void | 314 | static void |
| 315 | cifs_show_security(struct seq_file *s, struct TCP_Server_Info *server) | 315 | cifs_show_security(struct seq_file *s, struct cifs_ses *ses) |
| 316 | { | 316 | { |
| 317 | if (ses->sectype == Unspecified) | ||
| 318 | return; | ||
| 319 | |||
| 317 | seq_printf(s, ",sec="); | 320 | seq_printf(s, ",sec="); |
| 318 | 321 | ||
| 319 | switch (server->secType) { | 322 | switch (ses->sectype) { |
| 320 | case LANMAN: | 323 | case LANMAN: |
| 321 | seq_printf(s, "lanman"); | 324 | seq_printf(s, "lanman"); |
| 322 | break; | 325 | break; |
| @@ -338,7 +341,7 @@ cifs_show_security(struct seq_file *s, struct TCP_Server_Info *server) | |||
| 338 | break; | 341 | break; |
| 339 | } | 342 | } |
| 340 | 343 | ||
| 341 | if (server->sec_mode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) | 344 | if (ses->sign) |
| 342 | seq_printf(s, "i"); | 345 | seq_printf(s, "i"); |
| 343 | } | 346 | } |
| 344 | 347 | ||
| @@ -369,7 +372,7 @@ cifs_show_options(struct seq_file *s, struct dentry *root) | |||
| 369 | srcaddr = (struct sockaddr *)&tcon->ses->server->srcaddr; | 372 | srcaddr = (struct sockaddr *)&tcon->ses->server->srcaddr; |
| 370 | 373 | ||
| 371 | seq_printf(s, ",vers=%s", tcon->ses->server->vals->version_string); | 374 | seq_printf(s, ",vers=%s", tcon->ses->server->vals->version_string); |
| 372 | cifs_show_security(s, tcon->ses->server); | 375 | cifs_show_security(s, tcon->ses); |
| 373 | cifs_show_cache_flavor(s, cifs_sb); | 376 | cifs_show_cache_flavor(s, cifs_sb); |
| 374 | 377 | ||
| 375 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER) | 378 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER) |
| @@ -765,7 +768,7 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int whence) | |||
| 765 | 768 | ||
| 766 | static int cifs_setlease(struct file *file, long arg, struct file_lock **lease) | 769 | static int cifs_setlease(struct file *file, long arg, struct file_lock **lease) |
| 767 | { | 770 | { |
| 768 | /* note that this is called by vfs setlease with lock_flocks held | 771 | /* note that this is called by vfs setlease with i_lock held |
| 769 | to protect *lease from going away */ | 772 | to protect *lease from going away */ |
| 770 | struct inode *inode = file_inode(file); | 773 | struct inode *inode = file_inode(file); |
| 771 | struct cifsFileInfo *cfile = file->private_data; | 774 | struct cifsFileInfo *cfile = file->private_data; |
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index d05b3028e3b9..ea723a5e8226 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h | |||
| @@ -132,5 +132,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); | |||
| 132 | extern const struct export_operations cifs_export_ops; | 132 | extern const struct export_operations cifs_export_ops; |
| 133 | #endif /* CONFIG_CIFS_NFSD_EXPORT */ | 133 | #endif /* CONFIG_CIFS_NFSD_EXPORT */ |
| 134 | 134 | ||
| 135 | #define CIFS_VERSION "2.0" | 135 | #define CIFS_VERSION "2.01" |
| 136 | #endif /* _CIFSFS_H */ | 136 | #endif /* _CIFSFS_H */ |
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 4f07f6fbe494..e66b08882548 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h | |||
| @@ -101,20 +101,14 @@ enum statusEnum { | |||
| 101 | }; | 101 | }; |
| 102 | 102 | ||
| 103 | enum securityEnum { | 103 | enum securityEnum { |
| 104 | LANMAN = 0, /* Legacy LANMAN auth */ | 104 | Unspecified = 0, /* not specified */ |
| 105 | LANMAN, /* Legacy LANMAN auth */ | ||
| 105 | NTLM, /* Legacy NTLM012 auth with NTLM hash */ | 106 | NTLM, /* Legacy NTLM012 auth with NTLM hash */ |
| 106 | NTLMv2, /* Legacy NTLM auth with NTLMv2 hash */ | 107 | NTLMv2, /* Legacy NTLM auth with NTLMv2 hash */ |
| 107 | RawNTLMSSP, /* NTLMSSP without SPNEGO, NTLMv2 hash */ | 108 | RawNTLMSSP, /* NTLMSSP without SPNEGO, NTLMv2 hash */ |
| 108 | /* NTLMSSP, */ /* can use rawNTLMSSP instead of NTLMSSP via SPNEGO */ | ||
| 109 | Kerberos, /* Kerberos via SPNEGO */ | 109 | Kerberos, /* Kerberos via SPNEGO */ |
| 110 | }; | 110 | }; |
| 111 | 111 | ||
| 112 | enum protocolEnum { | ||
| 113 | TCP = 0, | ||
| 114 | SCTP | ||
| 115 | /* Netbios frames protocol not supported at this time */ | ||
| 116 | }; | ||
| 117 | |||
| 118 | struct session_key { | 112 | struct session_key { |
| 119 | unsigned int len; | 113 | unsigned int len; |
| 120 | char *response; | 114 | char *response; |
| @@ -131,9 +125,11 @@ struct cifs_secmech { | |||
| 131 | struct crypto_shash *hmacmd5; /* hmac-md5 hash function */ | 125 | struct crypto_shash *hmacmd5; /* hmac-md5 hash function */ |
| 132 | struct crypto_shash *md5; /* md5 hash function */ | 126 | struct crypto_shash *md5; /* md5 hash function */ |
| 133 | struct crypto_shash *hmacsha256; /* hmac-sha256 hash function */ | 127 | struct crypto_shash *hmacsha256; /* hmac-sha256 hash function */ |
| 128 | struct crypto_shash *cmacaes; /* block-cipher based MAC function */ | ||
| 134 | struct sdesc *sdeschmacmd5; /* ctxt to generate ntlmv2 hash, CR1 */ | 129 | struct sdesc *sdeschmacmd5; /* ctxt to generate ntlmv2 hash, CR1 */ |
| 135 | struct sdesc *sdescmd5; /* ctxt to generate cifs/smb signature */ | 130 | struct sdesc *sdescmd5; /* ctxt to generate cifs/smb signature */ |
| 136 | struct sdesc *sdeschmacsha256; /* ctxt to generate smb2 signature */ | 131 | struct sdesc *sdeschmacsha256; /* ctxt to generate smb2 signature */ |
| 132 | struct sdesc *sdesccmacaes; /* ctxt to generate smb3 signature */ | ||
| 137 | }; | 133 | }; |
| 138 | 134 | ||
| 139 | /* per smb session structure/fields */ | 135 | /* per smb session structure/fields */ |
| @@ -181,6 +177,7 @@ enum smb_version { | |||
| 181 | Smb_20, | 177 | Smb_20, |
| 182 | Smb_21, | 178 | Smb_21, |
| 183 | Smb_30, | 179 | Smb_30, |
| 180 | Smb_302, | ||
| 184 | }; | 181 | }; |
| 185 | 182 | ||
| 186 | struct mid_q_entry; | 183 | struct mid_q_entry; |
| @@ -228,6 +225,7 @@ struct smb_version_operations { | |||
| 228 | void (*dump_detail)(void *); | 225 | void (*dump_detail)(void *); |
| 229 | void (*clear_stats)(struct cifs_tcon *); | 226 | void (*clear_stats)(struct cifs_tcon *); |
| 230 | void (*print_stats)(struct seq_file *m, struct cifs_tcon *); | 227 | void (*print_stats)(struct seq_file *m, struct cifs_tcon *); |
| 228 | void (*dump_share_caps)(struct seq_file *, struct cifs_tcon *); | ||
| 231 | /* verify the message */ | 229 | /* verify the message */ |
| 232 | int (*check_message)(char *, unsigned int); | 230 | int (*check_message)(char *, unsigned int); |
| 233 | bool (*is_oplock_break)(char *, struct TCP_Server_Info *); | 231 | bool (*is_oplock_break)(char *, struct TCP_Server_Info *); |
| @@ -367,6 +365,8 @@ struct smb_version_operations { | |||
| 367 | void (*set_lease_key)(struct inode *, struct cifs_fid *fid); | 365 | void (*set_lease_key)(struct inode *, struct cifs_fid *fid); |
| 368 | /* generate new lease key */ | 366 | /* generate new lease key */ |
| 369 | void (*new_lease_key)(struct cifs_fid *fid); | 367 | void (*new_lease_key)(struct cifs_fid *fid); |
| 368 | /* The next two functions will need to be changed to per smb session */ | ||
| 369 | void (*generate_signingkey)(struct TCP_Server_Info *server); | ||
| 370 | int (*calc_signature)(struct smb_rqst *rqst, | 370 | int (*calc_signature)(struct smb_rqst *rqst, |
| 371 | struct TCP_Server_Info *server); | 371 | struct TCP_Server_Info *server); |
| 372 | }; | 372 | }; |
| @@ -387,6 +387,8 @@ struct smb_version_values { | |||
| 387 | unsigned int cap_nt_find; | 387 | unsigned int cap_nt_find; |
| 388 | unsigned int cap_large_files; | 388 | unsigned int cap_large_files; |
| 389 | unsigned int oplock_read; | 389 | unsigned int oplock_read; |
| 390 | __u16 signing_enabled; | ||
| 391 | __u16 signing_required; | ||
| 390 | }; | 392 | }; |
| 391 | 393 | ||
| 392 | #define HEADER_SIZE(server) (server->vals->header_size) | 394 | #define HEADER_SIZE(server) (server->vals->header_size) |
| @@ -407,7 +409,8 @@ struct smb_vol { | |||
| 407 | kgid_t backupgid; | 409 | kgid_t backupgid; |
| 408 | umode_t file_mode; | 410 | umode_t file_mode; |
| 409 | umode_t dir_mode; | 411 | umode_t dir_mode; |
| 410 | unsigned secFlg; | 412 | enum securityEnum sectype; /* sectype requested via mnt opts */ |
| 413 | bool sign; /* was signing requested via mnt opts? */ | ||
| 411 | bool retry:1; | 414 | bool retry:1; |
| 412 | bool intr:1; | 415 | bool intr:1; |
| 413 | bool setuids:1; | 416 | bool setuids:1; |
| @@ -441,6 +444,7 @@ struct smb_vol { | |||
| 441 | bool mfsymlinks:1; /* use Minshall+French Symlinks */ | 444 | bool mfsymlinks:1; /* use Minshall+French Symlinks */ |
| 442 | bool multiuser:1; | 445 | bool multiuser:1; |
| 443 | bool rwpidforward:1; /* pid forward for read/write operations */ | 446 | bool rwpidforward:1; /* pid forward for read/write operations */ |
| 447 | bool nosharesock; | ||
| 444 | unsigned int rsize; | 448 | unsigned int rsize; |
| 445 | unsigned int wsize; | 449 | unsigned int wsize; |
| 446 | bool sockopt_tcp_nodelay:1; | 450 | bool sockopt_tcp_nodelay:1; |
| @@ -514,6 +518,7 @@ struct TCP_Server_Info { | |||
| 514 | struct task_struct *tsk; | 518 | struct task_struct *tsk; |
| 515 | char server_GUID[16]; | 519 | char server_GUID[16]; |
| 516 | __u16 sec_mode; | 520 | __u16 sec_mode; |
| 521 | bool sign; /* is signing enabled on this connection? */ | ||
| 517 | bool session_estab; /* mark when very first sess is established */ | 522 | bool session_estab; /* mark when very first sess is established */ |
| 518 | #ifdef CONFIG_CIFS_SMB2 | 523 | #ifdef CONFIG_CIFS_SMB2 |
| 519 | int echo_credits; /* echo reserved slots */ | 524 | int echo_credits; /* echo reserved slots */ |
| @@ -521,7 +526,6 @@ struct TCP_Server_Info { | |||
| 521 | bool echoes:1; /* enable echoes */ | 526 | bool echoes:1; /* enable echoes */ |
| 522 | #endif | 527 | #endif |
| 523 | u16 dialect; /* dialect index that server chose */ | 528 | u16 dialect; /* dialect index that server chose */ |
| 524 | enum securityEnum secType; | ||
| 525 | bool oplocks:1; /* enable oplocks */ | 529 | bool oplocks:1; /* enable oplocks */ |
| 526 | unsigned int maxReq; /* Clients should submit no more */ | 530 | unsigned int maxReq; /* Clients should submit no more */ |
| 527 | /* than maxReq distinct unanswered SMBs to the server when using */ | 531 | /* than maxReq distinct unanswered SMBs to the server when using */ |
| @@ -540,12 +544,17 @@ struct TCP_Server_Info { | |||
| 540 | int timeAdj; /* Adjust for difference in server time zone in sec */ | 544 | int timeAdj; /* Adjust for difference in server time zone in sec */ |
| 541 | __u64 CurrentMid; /* multiplex id - rotating counter */ | 545 | __u64 CurrentMid; /* multiplex id - rotating counter */ |
| 542 | char cryptkey[CIFS_CRYPTO_KEY_SIZE]; /* used by ntlm, ntlmv2 etc */ | 546 | char cryptkey[CIFS_CRYPTO_KEY_SIZE]; /* used by ntlm, ntlmv2 etc */ |
| 547 | char smb3signingkey[SMB3_SIGN_KEY_SIZE]; /* for signing smb3 packets */ | ||
| 543 | /* 16th byte of RFC1001 workstation name is always null */ | 548 | /* 16th byte of RFC1001 workstation name is always null */ |
| 544 | char workstation_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL]; | 549 | char workstation_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL]; |
| 545 | __u32 sequence_number; /* for signing, protected by srv_mutex */ | 550 | __u32 sequence_number; /* for signing, protected by srv_mutex */ |
| 546 | struct session_key session_key; | 551 | struct session_key session_key; |
| 547 | unsigned long lstrp; /* when we got last response from this server */ | 552 | unsigned long lstrp; /* when we got last response from this server */ |
| 548 | struct cifs_secmech secmech; /* crypto sec mech functs, descriptors */ | 553 | struct cifs_secmech secmech; /* crypto sec mech functs, descriptors */ |
| 554 | #define CIFS_NEGFLAVOR_LANMAN 0 /* wct == 13, LANMAN */ | ||
| 555 | #define CIFS_NEGFLAVOR_UNENCAP 1 /* wct == 17, but no ext_sec */ | ||
| 556 | #define CIFS_NEGFLAVOR_EXTENDED 2 /* wct == 17, ext_sec bit set */ | ||
| 557 | char negflavor; /* NEGOTIATE response flavor */ | ||
| 549 | /* extended security flavors that server supports */ | 558 | /* extended security flavors that server supports */ |
| 550 | bool sec_ntlmssp; /* supports NTLMSSP */ | 559 | bool sec_ntlmssp; /* supports NTLMSSP */ |
| 551 | bool sec_kerberosu2u; /* supports U2U Kerberos */ | 560 | bool sec_kerberosu2u; /* supports U2U Kerberos */ |
| @@ -697,7 +706,6 @@ struct cifs_ses { | |||
| 697 | enum statusEnum status; | 706 | enum statusEnum status; |
| 698 | unsigned overrideSecFlg; /* if non-zero override global sec flags */ | 707 | unsigned overrideSecFlg; /* if non-zero override global sec flags */ |
| 699 | __u16 ipc_tid; /* special tid for connection to IPC share */ | 708 | __u16 ipc_tid; /* special tid for connection to IPC share */ |
| 700 | __u16 flags; | ||
| 701 | __u16 vcnum; | 709 | __u16 vcnum; |
| 702 | char *serverOS; /* name of operating system underlying server */ | 710 | char *serverOS; /* name of operating system underlying server */ |
| 703 | char *serverNOS; /* name of network operating system of server */ | 711 | char *serverNOS; /* name of network operating system of server */ |
| @@ -714,21 +722,14 @@ struct cifs_ses { | |||
| 714 | char *password; | 722 | char *password; |
| 715 | struct session_key auth_key; | 723 | struct session_key auth_key; |
| 716 | struct ntlmssp_auth *ntlmssp; /* ciphertext, flags, server challenge */ | 724 | struct ntlmssp_auth *ntlmssp; /* ciphertext, flags, server challenge */ |
| 725 | enum securityEnum sectype; /* what security flavor was specified? */ | ||
| 726 | bool sign; /* is signing required? */ | ||
| 717 | bool need_reconnect:1; /* connection reset, uid now invalid */ | 727 | bool need_reconnect:1; /* connection reset, uid now invalid */ |
| 718 | #ifdef CONFIG_CIFS_SMB2 | 728 | #ifdef CONFIG_CIFS_SMB2 |
| 719 | __u16 session_flags; | 729 | __u16 session_flags; |
| 720 | #endif /* CONFIG_CIFS_SMB2 */ | 730 | #endif /* CONFIG_CIFS_SMB2 */ |
| 721 | }; | 731 | }; |
| 722 | 732 | ||
| 723 | /* no more than one of the following three session flags may be set */ | ||
| 724 | #define CIFS_SES_NT4 1 | ||
| 725 | #define CIFS_SES_OS2 2 | ||
| 726 | #define CIFS_SES_W9X 4 | ||
| 727 | /* following flag is set for old servers such as OS2 (and Win95?) | ||
| 728 | which do not negotiate NTLM or POSIX dialects, but instead | ||
| 729 | negotiate one of the older LANMAN dialects */ | ||
| 730 | #define CIFS_SES_LANMAN 8 | ||
| 731 | |||
| 732 | static inline bool | 733 | static inline bool |
| 733 | cap_unix(struct cifs_ses *ses) | 734 | cap_unix(struct cifs_ses *ses) |
| 734 | { | 735 | { |
| @@ -816,7 +817,7 @@ struct cifs_tcon { | |||
| 816 | #ifdef CONFIG_CIFS_SMB2 | 817 | #ifdef CONFIG_CIFS_SMB2 |
| 817 | bool print:1; /* set if connection to printer share */ | 818 | bool print:1; /* set if connection to printer share */ |
| 818 | bool bad_network_name:1; /* set if ret status STATUS_BAD_NETWORK_NAME */ | 819 | bool bad_network_name:1; /* set if ret status STATUS_BAD_NETWORK_NAME */ |
| 819 | __u32 capabilities; | 820 | __le32 capabilities; |
| 820 | __u32 share_flags; | 821 | __u32 share_flags; |
| 821 | __u32 maximal_access; | 822 | __u32 maximal_access; |
| 822 | __u32 vol_serial_number; | 823 | __u32 vol_serial_number; |
| @@ -1348,7 +1349,7 @@ require use of the stronger protocol */ | |||
| 1348 | #define CIFSSEC_MUST_SEAL 0x40040 /* not supported yet */ | 1349 | #define CIFSSEC_MUST_SEAL 0x40040 /* not supported yet */ |
| 1349 | #define CIFSSEC_MUST_NTLMSSP 0x80080 /* raw ntlmssp with ntlmv2 */ | 1350 | #define CIFSSEC_MUST_NTLMSSP 0x80080 /* raw ntlmssp with ntlmv2 */ |
| 1350 | 1351 | ||
| 1351 | #define CIFSSEC_DEF (CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLMSSP) | 1352 | #define CIFSSEC_DEF (CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_NTLMSSP) |
| 1352 | #define CIFSSEC_MAX (CIFSSEC_MUST_SIGN | CIFSSEC_MUST_NTLMV2) | 1353 | #define CIFSSEC_MAX (CIFSSEC_MUST_SIGN | CIFSSEC_MUST_NTLMV2) |
| 1353 | #define CIFSSEC_AUTH_MASK (CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_LANMAN | CIFSSEC_MAY_PLNTXT | CIFSSEC_MAY_KRB5 | CIFSSEC_MAY_NTLMSSP) | 1354 | #define CIFSSEC_AUTH_MASK (CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_LANMAN | CIFSSEC_MAY_PLNTXT | CIFSSEC_MAY_KRB5 | CIFSSEC_MAY_NTLMSSP) |
| 1354 | /* | 1355 | /* |
| @@ -1494,4 +1495,7 @@ extern struct smb_version_values smb21_values; | |||
| 1494 | #define SMB30_VERSION_STRING "3.0" | 1495 | #define SMB30_VERSION_STRING "3.0" |
| 1495 | extern struct smb_version_operations smb30_operations; | 1496 | extern struct smb_version_operations smb30_operations; |
| 1496 | extern struct smb_version_values smb30_values; | 1497 | extern struct smb_version_values smb30_values; |
| 1498 | #define SMB302_VERSION_STRING "3.02" | ||
| 1499 | /*extern struct smb_version_operations smb302_operations;*/ /* not needed yet */ | ||
| 1500 | extern struct smb_version_values smb302_values; | ||
| 1497 | #endif /* _CIFS_GLOB_H */ | 1501 | #endif /* _CIFS_GLOB_H */ |
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index e996ff6b26d1..11ca24a8e054 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h | |||
| @@ -142,6 +142,11 @@ | |||
| 142 | */ | 142 | */ |
| 143 | #define CIFS_SESS_KEY_SIZE (16) | 143 | #define CIFS_SESS_KEY_SIZE (16) |
| 144 | 144 | ||
| 145 | /* | ||
| 146 | * Size of the smb3 signing key | ||
| 147 | */ | ||
| 148 | #define SMB3_SIGN_KEY_SIZE (16) | ||
| 149 | |||
| 145 | #define CIFS_CLIENT_CHALLENGE_SIZE (8) | 150 | #define CIFS_CLIENT_CHALLENGE_SIZE (8) |
| 146 | #define CIFS_SERVER_CHALLENGE_SIZE (8) | 151 | #define CIFS_SERVER_CHALLENGE_SIZE (8) |
| 147 | #define CIFS_HMAC_MD5_HASH_SIZE (16) | 152 | #define CIFS_HMAC_MD5_HASH_SIZE (16) |
| @@ -531,7 +536,7 @@ typedef struct lanman_neg_rsp { | |||
| 531 | #define READ_RAW_ENABLE 1 | 536 | #define READ_RAW_ENABLE 1 |
| 532 | #define WRITE_RAW_ENABLE 2 | 537 | #define WRITE_RAW_ENABLE 2 |
| 533 | #define RAW_ENABLE (READ_RAW_ENABLE | WRITE_RAW_ENABLE) | 538 | #define RAW_ENABLE (READ_RAW_ENABLE | WRITE_RAW_ENABLE) |
| 534 | 539 | #define SMB1_CLIENT_GUID_SIZE (16) | |
| 535 | typedef struct negotiate_rsp { | 540 | typedef struct negotiate_rsp { |
| 536 | struct smb_hdr hdr; /* wct = 17 */ | 541 | struct smb_hdr hdr; /* wct = 17 */ |
| 537 | __le16 DialectIndex; /* 0xFFFF = no dialect acceptable */ | 542 | __le16 DialectIndex; /* 0xFFFF = no dialect acceptable */ |
| @@ -553,7 +558,7 @@ typedef struct negotiate_rsp { | |||
| 553 | /* followed by 16 bytes of server GUID */ | 558 | /* followed by 16 bytes of server GUID */ |
| 554 | /* then security blob if cap_extended_security negotiated */ | 559 | /* then security blob if cap_extended_security negotiated */ |
| 555 | struct { | 560 | struct { |
| 556 | unsigned char GUID[16]; | 561 | unsigned char GUID[SMB1_CLIENT_GUID_SIZE]; |
| 557 | unsigned char SecurityBlob[1]; | 562 | unsigned char SecurityBlob[1]; |
| 558 | } __attribute__((packed)) extended_response; | 563 | } __attribute__((packed)) extended_response; |
| 559 | } __attribute__((packed)) u; | 564 | } __attribute__((packed)) u; |
| @@ -1315,6 +1320,14 @@ typedef struct smb_com_ntransact_rsp { | |||
| 1315 | /* parms and data follow */ | 1320 | /* parms and data follow */ |
| 1316 | } __attribute__((packed)) NTRANSACT_RSP; | 1321 | } __attribute__((packed)) NTRANSACT_RSP; |
| 1317 | 1322 | ||
| 1323 | /* See MS-SMB 2.2.7.2.1.1 */ | ||
| 1324 | struct srv_copychunk { | ||
| 1325 | __le64 SourceOffset; | ||
| 1326 | __le64 DestinationOffset; | ||
| 1327 | __le32 CopyLength; | ||
| 1328 | __u32 Reserved; | ||
| 1329 | } __packed; | ||
| 1330 | |||
| 1318 | typedef struct smb_com_transaction_ioctl_req { | 1331 | typedef struct smb_com_transaction_ioctl_req { |
| 1319 | struct smb_hdr hdr; /* wct = 23 */ | 1332 | struct smb_hdr hdr; /* wct = 23 */ |
| 1320 | __u8 MaxSetupCount; | 1333 | __u8 MaxSetupCount; |
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index dda188a94332..c8ff018fae68 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h | |||
| @@ -118,6 +118,8 @@ extern void header_assemble(struct smb_hdr *, char /* command */ , | |||
| 118 | extern int small_smb_init_no_tc(const int smb_cmd, const int wct, | 118 | extern int small_smb_init_no_tc(const int smb_cmd, const int wct, |
| 119 | struct cifs_ses *ses, | 119 | struct cifs_ses *ses, |
| 120 | void **request_buf); | 120 | void **request_buf); |
| 121 | extern enum securityEnum select_sectype(struct TCP_Server_Info *server, | ||
| 122 | enum securityEnum requested); | ||
| 121 | extern int CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses, | 123 | extern int CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses, |
| 122 | const struct nls_table *nls_cp); | 124 | const struct nls_table *nls_cp); |
| 123 | extern struct timespec cifs_NTtimeToUnix(__le64 utc_nanoseconds_since_1601); | 125 | extern struct timespec cifs_NTtimeToUnix(__le64 utc_nanoseconds_since_1601); |
| @@ -212,6 +214,7 @@ extern int cifs_negotiate_protocol(const unsigned int xid, | |||
| 212 | struct cifs_ses *ses); | 214 | struct cifs_ses *ses); |
| 213 | extern int cifs_setup_session(const unsigned int xid, struct cifs_ses *ses, | 215 | extern int cifs_setup_session(const unsigned int xid, struct cifs_ses *ses, |
| 214 | struct nls_table *nls_info); | 216 | struct nls_table *nls_info); |
| 217 | extern int cifs_enable_signing(struct TCP_Server_Info *server, bool mnt_sign_required); | ||
| 215 | extern int CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses); | 218 | extern int CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses); |
| 216 | 219 | ||
| 217 | extern int CIFSTCon(const unsigned int xid, struct cifs_ses *ses, | 220 | extern int CIFSTCon(const unsigned int xid, struct cifs_ses *ses, |
| @@ -433,6 +436,7 @@ extern int setup_ntlmv2_rsp(struct cifs_ses *, const struct nls_table *); | |||
| 433 | extern int cifs_crypto_shash_allocate(struct TCP_Server_Info *); | 436 | extern int cifs_crypto_shash_allocate(struct TCP_Server_Info *); |
| 434 | extern void cifs_crypto_shash_release(struct TCP_Server_Info *); | 437 | extern void cifs_crypto_shash_release(struct TCP_Server_Info *); |
| 435 | extern int calc_seckey(struct cifs_ses *); | 438 | extern int calc_seckey(struct cifs_ses *); |
| 439 | extern void generate_smb3signingkey(struct TCP_Server_Info *); | ||
| 436 | 440 | ||
| 437 | #ifdef CONFIG_CIFS_WEAK_PW_HASH | 441 | #ifdef CONFIG_CIFS_WEAK_PW_HASH |
| 438 | extern int calc_lanman_hash(const char *password, const char *cryptkey, | 442 | extern int calc_lanman_hash(const char *password, const char *cryptkey, |
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index a58dc77cc443..a89c4cb4e6cf 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c | |||
| @@ -367,6 +367,185 @@ vt2_err: | |||
| 367 | return -EINVAL; | 367 | return -EINVAL; |
| 368 | } | 368 | } |
| 369 | 369 | ||
| 370 | static int | ||
| 371 | decode_ext_sec_blob(struct cifs_ses *ses, NEGOTIATE_RSP *pSMBr) | ||
| 372 | { | ||
| 373 | int rc = 0; | ||
| 374 | u16 count; | ||
| 375 | char *guid = pSMBr->u.extended_response.GUID; | ||
| 376 | struct TCP_Server_Info *server = ses->server; | ||
| 377 | |||
| 378 | count = get_bcc(&pSMBr->hdr); | ||
| 379 | if (count < SMB1_CLIENT_GUID_SIZE) | ||
| 380 | return -EIO; | ||
| 381 | |||
| 382 | spin_lock(&cifs_tcp_ses_lock); | ||
| 383 | if (server->srv_count > 1) { | ||
| 384 | spin_unlock(&cifs_tcp_ses_lock); | ||
| 385 | if (memcmp(server->server_GUID, guid, SMB1_CLIENT_GUID_SIZE) != 0) { | ||
| 386 | cifs_dbg(FYI, "server UID changed\n"); | ||
| 387 | memcpy(server->server_GUID, guid, SMB1_CLIENT_GUID_SIZE); | ||
| 388 | } | ||
| 389 | } else { | ||
| 390 | spin_unlock(&cifs_tcp_ses_lock); | ||
| 391 | memcpy(server->server_GUID, guid, SMB1_CLIENT_GUID_SIZE); | ||
| 392 | } | ||
| 393 | |||
| 394 | if (count == SMB1_CLIENT_GUID_SIZE) { | ||
| 395 | server->sec_ntlmssp = true; | ||
| 396 | } else { | ||
| 397 | count -= SMB1_CLIENT_GUID_SIZE; | ||
| 398 | rc = decode_negTokenInit( | ||
| 399 | pSMBr->u.extended_response.SecurityBlob, count, server); | ||
| 400 | if (rc != 1) | ||
| 401 | return -EINVAL; | ||
| 402 | } | ||
| 403 | |||
| 404 | return 0; | ||
| 405 | } | ||
| 406 | |||
| 407 | int | ||
| 408 | cifs_enable_signing(struct TCP_Server_Info *server, bool mnt_sign_required) | ||
| 409 | { | ||
| 410 | bool srv_sign_required = server->sec_mode & server->vals->signing_required; | ||
| 411 | bool srv_sign_enabled = server->sec_mode & server->vals->signing_enabled; | ||
| 412 | bool mnt_sign_enabled = global_secflags & CIFSSEC_MAY_SIGN; | ||
| 413 | |||
| 414 | /* | ||
| 415 | * Is signing required by mnt options? If not then check | ||
| 416 | * global_secflags to see if it is there. | ||
| 417 | */ | ||
| 418 | if (!mnt_sign_required) | ||
| 419 | mnt_sign_required = ((global_secflags & CIFSSEC_MUST_SIGN) == | ||
| 420 | CIFSSEC_MUST_SIGN); | ||
| 421 | |||
| 422 | /* | ||
| 423 | * If signing is required then it's automatically enabled too, | ||
| 424 | * otherwise, check to see if the secflags allow it. | ||
| 425 | */ | ||
| 426 | mnt_sign_enabled = mnt_sign_required ? mnt_sign_required : | ||
| 427 | (global_secflags & CIFSSEC_MAY_SIGN); | ||
| 428 | |||
| 429 | /* If server requires signing, does client allow it? */ | ||
| 430 | if (srv_sign_required) { | ||
| 431 | if (!mnt_sign_enabled) { | ||
| 432 | cifs_dbg(VFS, "Server requires signing, but it's disabled in SecurityFlags!"); | ||
| 433 | return -ENOTSUPP; | ||
| 434 | } | ||
| 435 | server->sign = true; | ||
| 436 | } | ||
| 437 | |||
| 438 | /* If client requires signing, does server allow it? */ | ||
| 439 | if (mnt_sign_required) { | ||
| 440 | if (!srv_sign_enabled) { | ||
| 441 | cifs_dbg(VFS, "Server does not support signing!"); | ||
| 442 | return -ENOTSUPP; | ||
| 443 | } | ||
| 444 | server->sign = true; | ||
| 445 | } | ||
| 446 | |||
| 447 | return 0; | ||
| 448 | } | ||
| 449 | |||
| 450 | #ifdef CONFIG_CIFS_WEAK_PW_HASH | ||
| 451 | static int | ||
| 452 | decode_lanman_negprot_rsp(struct TCP_Server_Info *server, NEGOTIATE_RSP *pSMBr) | ||
| 453 | { | ||
| 454 | __s16 tmp; | ||
| 455 | struct lanman_neg_rsp *rsp = (struct lanman_neg_rsp *)pSMBr; | ||
| 456 | |||
| 457 | if (server->dialect != LANMAN_PROT && server->dialect != LANMAN2_PROT) | ||
| 458 | return -EOPNOTSUPP; | ||
| 459 | |||
| 460 | server->sec_mode = le16_to_cpu(rsp->SecurityMode); | ||
| 461 | server->maxReq = min_t(unsigned int, | ||
| 462 | le16_to_cpu(rsp->MaxMpxCount), | ||
| 463 | cifs_max_pending); | ||
| 464 | set_credits(server, server->maxReq); | ||
| 465 | server->maxBuf = le16_to_cpu(rsp->MaxBufSize); | ||
| 466 | server->max_vcs = le16_to_cpu(rsp->MaxNumberVcs); | ||
| 467 | /* even though we do not use raw we might as well set this | ||
| 468 | accurately, in case we ever find a need for it */ | ||
| 469 | if ((le16_to_cpu(rsp->RawMode) & RAW_ENABLE) == RAW_ENABLE) { | ||
| 470 | server->max_rw = 0xFF00; | ||
| 471 | server->capabilities = CAP_MPX_MODE | CAP_RAW_MODE; | ||
| 472 | } else { | ||
| 473 | server->max_rw = 0;/* do not need to use raw anyway */ | ||
| 474 | server->capabilities = CAP_MPX_MODE; | ||
| 475 | } | ||
| 476 | tmp = (__s16)le16_to_cpu(rsp->ServerTimeZone); | ||
| 477 | if (tmp == -1) { | ||
| 478 | /* OS/2 often does not set timezone therefore | ||
| 479 | * we must use server time to calc time zone. | ||
| 480 | * Could deviate slightly from the right zone. | ||
| 481 | * Smallest defined timezone difference is 15 minutes | ||
| 482 | * (i.e. Nepal). Rounding up/down is done to match | ||
| 483 | * this requirement. | ||
| 484 | */ | ||
| 485 | int val, seconds, remain, result; | ||
| 486 | struct timespec ts, utc; | ||
| 487 | utc = CURRENT_TIME; | ||
| 488 | ts = cnvrtDosUnixTm(rsp->SrvTime.Date, | ||
| 489 | rsp->SrvTime.Time, 0); | ||
| 490 | cifs_dbg(FYI, "SrvTime %d sec since 1970 (utc: %d) diff: %d\n", | ||
| 491 | (int)ts.tv_sec, (int)utc.tv_sec, | ||
| 492 | (int)(utc.tv_sec - ts.tv_sec)); | ||
| 493 | val = (int)(utc.tv_sec - ts.tv_sec); | ||
| 494 | seconds = abs(val); | ||
| 495 | result = (seconds / MIN_TZ_ADJ) * MIN_TZ_ADJ; | ||
| 496 | remain = seconds % MIN_TZ_ADJ; | ||
| 497 | if (remain >= (MIN_TZ_ADJ / 2)) | ||
| 498 | result += MIN_TZ_ADJ; | ||
| 499 | if (val < 0) | ||
| 500 | result = -result; | ||
| 501 | server->timeAdj = result; | ||
| 502 | } else { | ||
| 503 | server->timeAdj = (int)tmp; | ||
| 504 | server->timeAdj *= 60; /* also in seconds */ | ||
| 505 | } | ||
| 506 | cifs_dbg(FYI, "server->timeAdj: %d seconds\n", server->timeAdj); | ||
| 507 | |||
| 508 | |||
| 509 | /* BB get server time for time conversions and add | ||
| 510 | code to use it and timezone since this is not UTC */ | ||
| 511 | |||
| 512 | if (rsp->EncryptionKeyLength == | ||
| 513 | cpu_to_le16(CIFS_CRYPTO_KEY_SIZE)) { | ||
| 514 | memcpy(server->cryptkey, rsp->EncryptionKey, | ||
| 515 | CIFS_CRYPTO_KEY_SIZE); | ||
| 516 | } else if (server->sec_mode & SECMODE_PW_ENCRYPT) { | ||
| 517 | return -EIO; /* need cryptkey unless plain text */ | ||
| 518 | } | ||
| 519 | |||
| 520 | cifs_dbg(FYI, "LANMAN negotiated\n"); | ||
| 521 | return 0; | ||
| 522 | } | ||
| 523 | #else | ||
| 524 | static inline int | ||
| 525 | decode_lanman_negprot_rsp(struct TCP_Server_Info *server, NEGOTIATE_RSP *pSMBr) | ||
| 526 | { | ||
| 527 | cifs_dbg(VFS, "mount failed, cifs module not built with CIFS_WEAK_PW_HASH support\n"); | ||
| 528 | return -EOPNOTSUPP; | ||
| 529 | } | ||
| 530 | #endif | ||
| 531 | |||
| 532 | static bool | ||
| 533 | should_set_ext_sec_flag(enum securityEnum sectype) | ||
| 534 | { | ||
| 535 | switch (sectype) { | ||
| 536 | case RawNTLMSSP: | ||
| 537 | case Kerberos: | ||
| 538 | return true; | ||
| 539 | case Unspecified: | ||
| 540 | if (global_secflags & | ||
| 541 | (CIFSSEC_MAY_KRB5 | CIFSSEC_MAY_NTLMSSP)) | ||
| 542 | return true; | ||
| 543 | /* Fallthrough */ | ||
| 544 | default: | ||
| 545 | return false; | ||
| 546 | } | ||
| 547 | } | ||
| 548 | |||
| 370 | int | 549 | int |
| 371 | CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses) | 550 | CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses) |
| 372 | { | 551 | { |
| @@ -375,41 +554,24 @@ CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses) | |||
| 375 | int rc = 0; | 554 | int rc = 0; |
| 376 | int bytes_returned; | 555 | int bytes_returned; |
| 377 | int i; | 556 | int i; |
| 378 | struct TCP_Server_Info *server; | 557 | struct TCP_Server_Info *server = ses->server; |
| 379 | u16 count; | 558 | u16 count; |
| 380 | unsigned int secFlags; | ||
| 381 | 559 | ||
| 382 | if (ses->server) | 560 | if (!server) { |
| 383 | server = ses->server; | 561 | WARN(1, "%s: server is NULL!\n", __func__); |
| 384 | else { | 562 | return -EIO; |
| 385 | rc = -EIO; | ||
| 386 | return rc; | ||
| 387 | } | 563 | } |
| 564 | |||
| 388 | rc = smb_init(SMB_COM_NEGOTIATE, 0, NULL /* no tcon yet */ , | 565 | rc = smb_init(SMB_COM_NEGOTIATE, 0, NULL /* no tcon yet */ , |
| 389 | (void **) &pSMB, (void **) &pSMBr); | 566 | (void **) &pSMB, (void **) &pSMBr); |
| 390 | if (rc) | 567 | if (rc) |
| 391 | return rc; | 568 | return rc; |
| 392 | 569 | ||
| 393 | /* if any of auth flags (ie not sign or seal) are overriden use them */ | ||
| 394 | if (ses->overrideSecFlg & (~(CIFSSEC_MUST_SIGN | CIFSSEC_MUST_SEAL))) | ||
| 395 | secFlags = ses->overrideSecFlg; /* BB FIXME fix sign flags? */ | ||
| 396 | else /* if override flags set only sign/seal OR them with global auth */ | ||
| 397 | secFlags = global_secflags | ses->overrideSecFlg; | ||
| 398 | |||
| 399 | cifs_dbg(FYI, "secFlags 0x%x\n", secFlags); | ||
| 400 | |||
| 401 | pSMB->hdr.Mid = get_next_mid(server); | 570 | pSMB->hdr.Mid = get_next_mid(server); |
| 402 | pSMB->hdr.Flags2 |= (SMBFLG2_UNICODE | SMBFLG2_ERR_STATUS); | 571 | pSMB->hdr.Flags2 |= (SMBFLG2_UNICODE | SMBFLG2_ERR_STATUS); |
| 403 | 572 | ||
| 404 | if ((secFlags & CIFSSEC_MUST_KRB5) == CIFSSEC_MUST_KRB5) | 573 | if (should_set_ext_sec_flag(ses->sectype)) { |
| 405 | pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC; | 574 | cifs_dbg(FYI, "Requesting extended security."); |
| 406 | else if ((secFlags & CIFSSEC_AUTH_MASK) == CIFSSEC_MAY_KRB5) { | ||
| 407 | cifs_dbg(FYI, "Kerberos only mechanism, enable extended security\n"); | ||
| 408 | pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC; | ||
| 409 | } else if ((secFlags & CIFSSEC_MUST_NTLMSSP) == CIFSSEC_MUST_NTLMSSP) | ||
| 410 | pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC; | ||
| 411 | else if ((secFlags & CIFSSEC_AUTH_MASK) == CIFSSEC_MAY_NTLMSSP) { | ||
| 412 | cifs_dbg(FYI, "NTLMSSP only mechanism, enable extended security\n"); | ||
| 413 | pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC; | 575 | pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC; |
| 414 | } | 576 | } |
| 415 | 577 | ||
| @@ -436,127 +598,21 @@ CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses) | |||
| 436 | could not negotiate a common dialect */ | 598 | could not negotiate a common dialect */ |
| 437 | rc = -EOPNOTSUPP; | 599 | rc = -EOPNOTSUPP; |
| 438 | goto neg_err_exit; | 600 | goto neg_err_exit; |
| 439 | #ifdef CONFIG_CIFS_WEAK_PW_HASH | ||
| 440 | } else if ((pSMBr->hdr.WordCount == 13) | ||
| 441 | && ((server->dialect == LANMAN_PROT) | ||
| 442 | || (server->dialect == LANMAN2_PROT))) { | ||
| 443 | __s16 tmp; | ||
| 444 | struct lanman_neg_rsp *rsp = (struct lanman_neg_rsp *)pSMBr; | ||
| 445 | |||
| 446 | if ((secFlags & CIFSSEC_MAY_LANMAN) || | ||
| 447 | (secFlags & CIFSSEC_MAY_PLNTXT)) | ||
| 448 | server->secType = LANMAN; | ||
| 449 | else { | ||
| 450 | cifs_dbg(VFS, "mount failed weak security disabled in /proc/fs/cifs/SecurityFlags\n"); | ||
| 451 | rc = -EOPNOTSUPP; | ||
| 452 | goto neg_err_exit; | ||
| 453 | } | ||
| 454 | server->sec_mode = le16_to_cpu(rsp->SecurityMode); | ||
| 455 | server->maxReq = min_t(unsigned int, | ||
| 456 | le16_to_cpu(rsp->MaxMpxCount), | ||
| 457 | cifs_max_pending); | ||
| 458 | set_credits(server, server->maxReq); | ||
| 459 | server->maxBuf = le16_to_cpu(rsp->MaxBufSize); | ||
| 460 | server->max_vcs = le16_to_cpu(rsp->MaxNumberVcs); | ||
| 461 | /* even though we do not use raw we might as well set this | ||
| 462 | accurately, in case we ever find a need for it */ | ||
| 463 | if ((le16_to_cpu(rsp->RawMode) & RAW_ENABLE) == RAW_ENABLE) { | ||
| 464 | server->max_rw = 0xFF00; | ||
| 465 | server->capabilities = CAP_MPX_MODE | CAP_RAW_MODE; | ||
| 466 | } else { | ||
| 467 | server->max_rw = 0;/* do not need to use raw anyway */ | ||
| 468 | server->capabilities = CAP_MPX_MODE; | ||
| 469 | } | ||
| 470 | tmp = (__s16)le16_to_cpu(rsp->ServerTimeZone); | ||
| 471 | if (tmp == -1) { | ||
| 472 | /* OS/2 often does not set timezone therefore | ||
| 473 | * we must use server time to calc time zone. | ||
| 474 | * Could deviate slightly from the right zone. | ||
| 475 | * Smallest defined timezone difference is 15 minutes | ||
| 476 | * (i.e. Nepal). Rounding up/down is done to match | ||
| 477 | * this requirement. | ||
| 478 | */ | ||
| 479 | int val, seconds, remain, result; | ||
| 480 | struct timespec ts, utc; | ||
| 481 | utc = CURRENT_TIME; | ||
| 482 | ts = cnvrtDosUnixTm(rsp->SrvTime.Date, | ||
| 483 | rsp->SrvTime.Time, 0); | ||
| 484 | cifs_dbg(FYI, "SrvTime %d sec since 1970 (utc: %d) diff: %d\n", | ||
| 485 | (int)ts.tv_sec, (int)utc.tv_sec, | ||
| 486 | (int)(utc.tv_sec - ts.tv_sec)); | ||
| 487 | val = (int)(utc.tv_sec - ts.tv_sec); | ||
| 488 | seconds = abs(val); | ||
| 489 | result = (seconds / MIN_TZ_ADJ) * MIN_TZ_ADJ; | ||
| 490 | remain = seconds % MIN_TZ_ADJ; | ||
| 491 | if (remain >= (MIN_TZ_ADJ / 2)) | ||
| 492 | result += MIN_TZ_ADJ; | ||
| 493 | if (val < 0) | ||
| 494 | result = -result; | ||
| 495 | server->timeAdj = result; | ||
| 496 | } else { | ||
| 497 | server->timeAdj = (int)tmp; | ||
| 498 | server->timeAdj *= 60; /* also in seconds */ | ||
| 499 | } | ||
| 500 | cifs_dbg(FYI, "server->timeAdj: %d seconds\n", server->timeAdj); | ||
| 501 | |||
| 502 | |||
| 503 | /* BB get server time for time conversions and add | ||
| 504 | code to use it and timezone since this is not UTC */ | ||
| 505 | |||
| 506 | if (rsp->EncryptionKeyLength == | ||
| 507 | cpu_to_le16(CIFS_CRYPTO_KEY_SIZE)) { | ||
| 508 | memcpy(ses->server->cryptkey, rsp->EncryptionKey, | ||
| 509 | CIFS_CRYPTO_KEY_SIZE); | ||
| 510 | } else if (server->sec_mode & SECMODE_PW_ENCRYPT) { | ||
| 511 | rc = -EIO; /* need cryptkey unless plain text */ | ||
| 512 | goto neg_err_exit; | ||
| 513 | } | ||
| 514 | |||
| 515 | cifs_dbg(FYI, "LANMAN negotiated\n"); | ||
| 516 | /* we will not end up setting signing flags - as no signing | ||
| 517 | was in LANMAN and server did not return the flags on */ | ||
| 518 | goto signing_check; | ||
| 519 | #else /* weak security disabled */ | ||
| 520 | } else if (pSMBr->hdr.WordCount == 13) { | 601 | } else if (pSMBr->hdr.WordCount == 13) { |
| 521 | cifs_dbg(VFS, "mount failed, cifs module not built with CIFS_WEAK_PW_HASH support\n"); | 602 | server->negflavor = CIFS_NEGFLAVOR_LANMAN; |
| 522 | rc = -EOPNOTSUPP; | 603 | rc = decode_lanman_negprot_rsp(server, pSMBr); |
| 523 | #endif /* WEAK_PW_HASH */ | 604 | goto signing_check; |
| 524 | goto neg_err_exit; | ||
| 525 | } else if (pSMBr->hdr.WordCount != 17) { | 605 | } else if (pSMBr->hdr.WordCount != 17) { |
| 526 | /* unknown wct */ | 606 | /* unknown wct */ |
| 527 | rc = -EOPNOTSUPP; | 607 | rc = -EOPNOTSUPP; |
| 528 | goto neg_err_exit; | 608 | goto neg_err_exit; |
| 529 | } | 609 | } |
| 530 | /* else wct == 17 NTLM */ | 610 | /* else wct == 17, NTLM or better */ |
| 611 | |||
| 531 | server->sec_mode = pSMBr->SecurityMode; | 612 | server->sec_mode = pSMBr->SecurityMode; |
| 532 | if ((server->sec_mode & SECMODE_USER) == 0) | 613 | if ((server->sec_mode & SECMODE_USER) == 0) |
| 533 | cifs_dbg(FYI, "share mode security\n"); | 614 | cifs_dbg(FYI, "share mode security\n"); |
| 534 | 615 | ||
| 535 | if ((server->sec_mode & SECMODE_PW_ENCRYPT) == 0) | ||
| 536 | #ifdef CONFIG_CIFS_WEAK_PW_HASH | ||
| 537 | if ((secFlags & CIFSSEC_MAY_PLNTXT) == 0) | ||
| 538 | #endif /* CIFS_WEAK_PW_HASH */ | ||
| 539 | cifs_dbg(VFS, "Server requests plain text password but client support disabled\n"); | ||
| 540 | |||
| 541 | if ((secFlags & CIFSSEC_MUST_NTLMV2) == CIFSSEC_MUST_NTLMV2) | ||
| 542 | server->secType = NTLMv2; | ||
| 543 | else if (secFlags & CIFSSEC_MAY_NTLM) | ||
| 544 | server->secType = NTLM; | ||
| 545 | else if (secFlags & CIFSSEC_MAY_NTLMV2) | ||
| 546 | server->secType = NTLMv2; | ||
| 547 | else if (secFlags & CIFSSEC_MAY_KRB5) | ||
| 548 | server->secType = Kerberos; | ||
| 549 | else if (secFlags & CIFSSEC_MAY_NTLMSSP) | ||
| 550 | server->secType = RawNTLMSSP; | ||
| 551 | else if (secFlags & CIFSSEC_MAY_LANMAN) | ||
| 552 | server->secType = LANMAN; | ||
| 553 | else { | ||
| 554 | rc = -EOPNOTSUPP; | ||
| 555 | cifs_dbg(VFS, "Invalid security type\n"); | ||
| 556 | goto neg_err_exit; | ||
| 557 | } | ||
| 558 | /* else ... any others ...? */ | ||
| 559 | |||
| 560 | /* one byte, so no need to convert this or EncryptionKeyLen from | 616 | /* one byte, so no need to convert this or EncryptionKeyLen from |
| 561 | little endian */ | 617 | little endian */ |
| 562 | server->maxReq = min_t(unsigned int, le16_to_cpu(pSMBr->MaxMpxCount), | 618 | server->maxReq = min_t(unsigned int, le16_to_cpu(pSMBr->MaxMpxCount), |
| @@ -569,90 +625,26 @@ CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses) | |||
| 569 | server->capabilities = le32_to_cpu(pSMBr->Capabilities); | 625 | server->capabilities = le32_to_cpu(pSMBr->Capabilities); |
| 570 | server->timeAdj = (int)(__s16)le16_to_cpu(pSMBr->ServerTimeZone); | 626 | server->timeAdj = (int)(__s16)le16_to_cpu(pSMBr->ServerTimeZone); |
| 571 | server->timeAdj *= 60; | 627 | server->timeAdj *= 60; |
| 628 | |||
| 572 | if (pSMBr->EncryptionKeyLength == CIFS_CRYPTO_KEY_SIZE) { | 629 | if (pSMBr->EncryptionKeyLength == CIFS_CRYPTO_KEY_SIZE) { |
| 630 | server->negflavor = CIFS_NEGFLAVOR_UNENCAP; | ||
| 573 | memcpy(ses->server->cryptkey, pSMBr->u.EncryptionKey, | 631 | memcpy(ses->server->cryptkey, pSMBr->u.EncryptionKey, |
| 574 | CIFS_CRYPTO_KEY_SIZE); | 632 | CIFS_CRYPTO_KEY_SIZE); |
| 575 | } else if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC || | 633 | } else if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC || |
| 576 | server->capabilities & CAP_EXTENDED_SECURITY) && | 634 | server->capabilities & CAP_EXTENDED_SECURITY) && |
| 577 | (pSMBr->EncryptionKeyLength == 0)) { | 635 | (pSMBr->EncryptionKeyLength == 0)) { |
| 578 | /* decode security blob */ | 636 | server->negflavor = CIFS_NEGFLAVOR_EXTENDED; |
| 579 | count = get_bcc(&pSMBr->hdr); | 637 | rc = decode_ext_sec_blob(ses, pSMBr); |
| 580 | if (count < 16) { | ||
| 581 | rc = -EIO; | ||
| 582 | goto neg_err_exit; | ||
| 583 | } | ||
| 584 | spin_lock(&cifs_tcp_ses_lock); | ||
| 585 | if (server->srv_count > 1) { | ||
| 586 | spin_unlock(&cifs_tcp_ses_lock); | ||
| 587 | if (memcmp(server->server_GUID, | ||
| 588 | pSMBr->u.extended_response. | ||
| 589 | GUID, 16) != 0) { | ||
| 590 | cifs_dbg(FYI, "server UID changed\n"); | ||
| 591 | memcpy(server->server_GUID, | ||
| 592 | pSMBr->u.extended_response.GUID, | ||
| 593 | 16); | ||
| 594 | } | ||
| 595 | } else { | ||
| 596 | spin_unlock(&cifs_tcp_ses_lock); | ||
| 597 | memcpy(server->server_GUID, | ||
| 598 | pSMBr->u.extended_response.GUID, 16); | ||
| 599 | } | ||
| 600 | |||
| 601 | if (count == 16) { | ||
| 602 | server->secType = RawNTLMSSP; | ||
| 603 | } else { | ||
| 604 | rc = decode_negTokenInit(pSMBr->u.extended_response. | ||
| 605 | SecurityBlob, count - 16, | ||
| 606 | server); | ||
| 607 | if (rc == 1) | ||
| 608 | rc = 0; | ||
| 609 | else | ||
| 610 | rc = -EINVAL; | ||
| 611 | if (server->secType == Kerberos) { | ||
| 612 | if (!server->sec_kerberos && | ||
| 613 | !server->sec_mskerberos) | ||
| 614 | rc = -EOPNOTSUPP; | ||
| 615 | } else if (server->secType == RawNTLMSSP) { | ||
| 616 | if (!server->sec_ntlmssp) | ||
| 617 | rc = -EOPNOTSUPP; | ||
| 618 | } else | ||
| 619 | rc = -EOPNOTSUPP; | ||
| 620 | } | ||
| 621 | } else if (server->sec_mode & SECMODE_PW_ENCRYPT) { | 638 | } else if (server->sec_mode & SECMODE_PW_ENCRYPT) { |
| 622 | rc = -EIO; /* no crypt key only if plain text pwd */ | 639 | rc = -EIO; /* no crypt key only if plain text pwd */ |
| 623 | goto neg_err_exit; | ||
| 624 | } else | ||
| 625 | server->capabilities &= ~CAP_EXTENDED_SECURITY; | ||
| 626 | |||
| 627 | #ifdef CONFIG_CIFS_WEAK_PW_HASH | ||
| 628 | signing_check: | ||
| 629 | #endif | ||
| 630 | if ((secFlags & CIFSSEC_MAY_SIGN) == 0) { | ||
| 631 | /* MUST_SIGN already includes the MAY_SIGN FLAG | ||
| 632 | so if this is zero it means that signing is disabled */ | ||
| 633 | cifs_dbg(FYI, "Signing disabled\n"); | ||
| 634 | if (server->sec_mode & SECMODE_SIGN_REQUIRED) { | ||
| 635 | cifs_dbg(VFS, "Server requires packet signing to be enabled in /proc/fs/cifs/SecurityFlags\n"); | ||
| 636 | rc = -EOPNOTSUPP; | ||
| 637 | } | ||
| 638 | server->sec_mode &= | ||
| 639 | ~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED); | ||
| 640 | } else if ((secFlags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN) { | ||
| 641 | /* signing required */ | ||
| 642 | cifs_dbg(FYI, "Must sign - secFlags 0x%x\n", secFlags); | ||
| 643 | if ((server->sec_mode & | ||
| 644 | (SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED)) == 0) { | ||
| 645 | cifs_dbg(VFS, "signing required but server lacks support\n"); | ||
| 646 | rc = -EOPNOTSUPP; | ||
| 647 | } else | ||
| 648 | server->sec_mode |= SECMODE_SIGN_REQUIRED; | ||
| 649 | } else { | 640 | } else { |
| 650 | /* signing optional ie CIFSSEC_MAY_SIGN */ | 641 | server->negflavor = CIFS_NEGFLAVOR_UNENCAP; |
| 651 | if ((server->sec_mode & SECMODE_SIGN_REQUIRED) == 0) | 642 | server->capabilities &= ~CAP_EXTENDED_SECURITY; |
| 652 | server->sec_mode &= | ||
| 653 | ~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED); | ||
| 654 | } | 643 | } |
| 655 | 644 | ||
| 645 | signing_check: | ||
| 646 | if (!rc) | ||
| 647 | rc = cifs_enable_signing(server, ses->sign); | ||
| 656 | neg_err_exit: | 648 | neg_err_exit: |
| 657 | cifs_buf_release(pSMB); | 649 | cifs_buf_release(pSMB); |
| 658 | 650 | ||
| @@ -777,9 +769,8 @@ CIFSSMBLogoff(const unsigned int xid, struct cifs_ses *ses) | |||
| 777 | 769 | ||
| 778 | pSMB->hdr.Mid = get_next_mid(ses->server); | 770 | pSMB->hdr.Mid = get_next_mid(ses->server); |
| 779 | 771 | ||
| 780 | if (ses->server->sec_mode & | 772 | if (ses->server->sign) |
| 781 | (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) | 773 | pSMB->hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE; |
| 782 | pSMB->hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE; | ||
| 783 | 774 | ||
| 784 | pSMB->hdr.Uid = ses->Suid; | 775 | pSMB->hdr.Uid = ses->Suid; |
| 785 | 776 | ||
| @@ -1540,8 +1531,7 @@ cifs_readv_callback(struct mid_q_entry *mid) | |||
| 1540 | switch (mid->mid_state) { | 1531 | switch (mid->mid_state) { |
| 1541 | case MID_RESPONSE_RECEIVED: | 1532 | case MID_RESPONSE_RECEIVED: |
| 1542 | /* result already set, check signature */ | 1533 | /* result already set, check signature */ |
| 1543 | if (server->sec_mode & | 1534 | if (server->sign) { |
| 1544 | (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) { | ||
| 1545 | int rc = 0; | 1535 | int rc = 0; |
| 1546 | 1536 | ||
| 1547 | rc = cifs_verify_signature(&rqst, server, | 1537 | rc = cifs_verify_signature(&rqst, server, |
| @@ -3940,6 +3930,7 @@ QFileInfoRetry: | |||
| 3940 | pSMB->Pad = 0; | 3930 | pSMB->Pad = 0; |
| 3941 | pSMB->Fid = netfid; | 3931 | pSMB->Fid = netfid; |
| 3942 | inc_rfc1001_len(pSMB, byte_count); | 3932 | inc_rfc1001_len(pSMB, byte_count); |
| 3933 | pSMB->t2.ByteCount = cpu_to_le16(byte_count); | ||
| 3943 | 3934 | ||
| 3944 | rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, | 3935 | rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, |
| 3945 | (struct smb_hdr *) pSMBr, &bytes_returned, 0); | 3936 | (struct smb_hdr *) pSMBr, &bytes_returned, 0); |
| @@ -4108,6 +4099,7 @@ UnixQFileInfoRetry: | |||
| 4108 | pSMB->Pad = 0; | 4099 | pSMB->Pad = 0; |
| 4109 | pSMB->Fid = netfid; | 4100 | pSMB->Fid = netfid; |
| 4110 | inc_rfc1001_len(pSMB, byte_count); | 4101 | inc_rfc1001_len(pSMB, byte_count); |
| 4102 | pSMB->t2.ByteCount = cpu_to_le16(byte_count); | ||
| 4111 | 4103 | ||
| 4112 | rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, | 4104 | rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, |
| 4113 | (struct smb_hdr *) pSMBr, &bytes_returned, 0); | 4105 | (struct smb_hdr *) pSMBr, &bytes_returned, 0); |
| @@ -4794,11 +4786,8 @@ getDFSRetry: | |||
| 4794 | strncpy(pSMB->RequestFileName, search_name, name_len); | 4786 | strncpy(pSMB->RequestFileName, search_name, name_len); |
| 4795 | } | 4787 | } |
| 4796 | 4788 | ||
| 4797 | if (ses->server) { | 4789 | if (ses->server && ses->server->sign) |
| 4798 | if (ses->server->sec_mode & | 4790 | pSMB->hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE; |
| 4799 | (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) | ||
| 4800 | pSMB->hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE; | ||
| 4801 | } | ||
| 4802 | 4791 | ||
| 4803 | pSMB->hdr.Uid = ses->Suid; | 4792 | pSMB->hdr.Uid = ses->Suid; |
| 4804 | 4793 | ||
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index e3bc39bb9d12..afcb8a1a33b7 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c | |||
| @@ -85,7 +85,7 @@ enum { | |||
| 85 | Opt_acl, Opt_noacl, Opt_locallease, | 85 | Opt_acl, Opt_noacl, Opt_locallease, |
| 86 | Opt_sign, Opt_seal, Opt_noac, | 86 | Opt_sign, Opt_seal, Opt_noac, |
| 87 | Opt_fsc, Opt_mfsymlinks, | 87 | Opt_fsc, Opt_mfsymlinks, |
| 88 | Opt_multiuser, Opt_sloppy, | 88 | Opt_multiuser, Opt_sloppy, Opt_nosharesock, |
| 89 | 89 | ||
| 90 | /* Mount options which take numeric value */ | 90 | /* Mount options which take numeric value */ |
| 91 | Opt_backupuid, Opt_backupgid, Opt_uid, | 91 | Opt_backupuid, Opt_backupgid, Opt_uid, |
| @@ -165,6 +165,7 @@ static const match_table_t cifs_mount_option_tokens = { | |||
| 165 | { Opt_mfsymlinks, "mfsymlinks" }, | 165 | { Opt_mfsymlinks, "mfsymlinks" }, |
| 166 | { Opt_multiuser, "multiuser" }, | 166 | { Opt_multiuser, "multiuser" }, |
| 167 | { Opt_sloppy, "sloppy" }, | 167 | { Opt_sloppy, "sloppy" }, |
| 168 | { Opt_nosharesock, "nosharesock" }, | ||
| 168 | 169 | ||
| 169 | { Opt_backupuid, "backupuid=%s" }, | 170 | { Opt_backupuid, "backupuid=%s" }, |
| 170 | { Opt_backupgid, "backupgid=%s" }, | 171 | { Opt_backupgid, "backupgid=%s" }, |
| @@ -275,6 +276,7 @@ static const match_table_t cifs_smb_version_tokens = { | |||
| 275 | { Smb_20, SMB20_VERSION_STRING}, | 276 | { Smb_20, SMB20_VERSION_STRING}, |
| 276 | { Smb_21, SMB21_VERSION_STRING }, | 277 | { Smb_21, SMB21_VERSION_STRING }, |
| 277 | { Smb_30, SMB30_VERSION_STRING }, | 278 | { Smb_30, SMB30_VERSION_STRING }, |
| 279 | { Smb_302, SMB302_VERSION_STRING }, | ||
| 278 | }; | 280 | }; |
| 279 | 281 | ||
| 280 | static int ip_connect(struct TCP_Server_Info *server); | 282 | static int ip_connect(struct TCP_Server_Info *server); |
| @@ -1024,44 +1026,48 @@ static int cifs_parse_security_flavors(char *value, | |||
| 1024 | 1026 | ||
| 1025 | substring_t args[MAX_OPT_ARGS]; | 1027 | substring_t args[MAX_OPT_ARGS]; |
| 1026 | 1028 | ||
| 1029 | /* | ||
| 1030 | * With mount options, the last one should win. Reset any existing | ||
| 1031 | * settings back to default. | ||
| 1032 | */ | ||
| 1033 | vol->sectype = Unspecified; | ||
| 1034 | vol->sign = false; | ||
| 1035 | |||
| 1027 | switch (match_token(value, cifs_secflavor_tokens, args)) { | 1036 | switch (match_token(value, cifs_secflavor_tokens, args)) { |
| 1028 | case Opt_sec_krb5: | ||
| 1029 | vol->secFlg |= CIFSSEC_MAY_KRB5 | CIFSSEC_MAY_SIGN; | ||
| 1030 | break; | ||
| 1031 | case Opt_sec_krb5i: | ||
| 1032 | vol->secFlg |= CIFSSEC_MAY_KRB5 | CIFSSEC_MUST_SIGN; | ||
| 1033 | break; | ||
| 1034 | case Opt_sec_krb5p: | 1037 | case Opt_sec_krb5p: |
| 1035 | /* vol->secFlg |= CIFSSEC_MUST_SEAL | CIFSSEC_MAY_KRB5; */ | 1038 | cifs_dbg(VFS, "sec=krb5p is not supported!\n"); |
| 1036 | cifs_dbg(VFS, "Krb5 cifs privacy not supported\n"); | 1039 | return 1; |
| 1037 | break; | 1040 | case Opt_sec_krb5i: |
| 1038 | case Opt_sec_ntlmssp: | 1041 | vol->sign = true; |
| 1039 | vol->secFlg |= CIFSSEC_MAY_NTLMSSP; | 1042 | /* Fallthrough */ |
| 1043 | case Opt_sec_krb5: | ||
| 1044 | vol->sectype = Kerberos; | ||
| 1040 | break; | 1045 | break; |
| 1041 | case Opt_sec_ntlmsspi: | 1046 | case Opt_sec_ntlmsspi: |
| 1042 | vol->secFlg |= CIFSSEC_MAY_NTLMSSP | CIFSSEC_MUST_SIGN; | 1047 | vol->sign = true; |
| 1043 | break; | 1048 | /* Fallthrough */ |
| 1044 | case Opt_ntlm: | 1049 | case Opt_sec_ntlmssp: |
| 1045 | /* ntlm is default so can be turned off too */ | 1050 | vol->sectype = RawNTLMSSP; |
| 1046 | vol->secFlg |= CIFSSEC_MAY_NTLM; | ||
| 1047 | break; | 1051 | break; |
| 1048 | case Opt_sec_ntlmi: | 1052 | case Opt_sec_ntlmi: |
| 1049 | vol->secFlg |= CIFSSEC_MAY_NTLM | CIFSSEC_MUST_SIGN; | 1053 | vol->sign = true; |
| 1050 | break; | 1054 | /* Fallthrough */ |
| 1051 | case Opt_sec_ntlmv2: | 1055 | case Opt_ntlm: |
| 1052 | vol->secFlg |= CIFSSEC_MAY_NTLMV2; | 1056 | vol->sectype = NTLM; |
| 1053 | break; | 1057 | break; |
| 1054 | case Opt_sec_ntlmv2i: | 1058 | case Opt_sec_ntlmv2i: |
| 1055 | vol->secFlg |= CIFSSEC_MAY_NTLMV2 | CIFSSEC_MUST_SIGN; | 1059 | vol->sign = true; |
| 1060 | /* Fallthrough */ | ||
| 1061 | case Opt_sec_ntlmv2: | ||
| 1062 | vol->sectype = NTLMv2; | ||
| 1056 | break; | 1063 | break; |
| 1057 | #ifdef CONFIG_CIFS_WEAK_PW_HASH | 1064 | #ifdef CONFIG_CIFS_WEAK_PW_HASH |
| 1058 | case Opt_sec_lanman: | 1065 | case Opt_sec_lanman: |
| 1059 | vol->secFlg |= CIFSSEC_MAY_LANMAN; | 1066 | vol->sectype = LANMAN; |
| 1060 | break; | 1067 | break; |
| 1061 | #endif | 1068 | #endif |
| 1062 | case Opt_sec_none: | 1069 | case Opt_sec_none: |
| 1063 | vol->nullauth = 1; | 1070 | vol->nullauth = 1; |
| 1064 | vol->secFlg |= CIFSSEC_MAY_NTLM; | ||
| 1065 | break; | 1071 | break; |
| 1066 | default: | 1072 | default: |
| 1067 | cifs_dbg(VFS, "bad security option: %s\n", value); | 1073 | cifs_dbg(VFS, "bad security option: %s\n", value); |
| @@ -1119,6 +1125,10 @@ cifs_parse_smb_version(char *value, struct smb_vol *vol) | |||
| 1119 | vol->ops = &smb30_operations; | 1125 | vol->ops = &smb30_operations; |
| 1120 | vol->vals = &smb30_values; | 1126 | vol->vals = &smb30_values; |
| 1121 | break; | 1127 | break; |
| 1128 | case Smb_302: | ||
| 1129 | vol->ops = &smb30_operations; /* currently identical with 3.0 */ | ||
| 1130 | vol->vals = &smb302_values; | ||
| 1131 | break; | ||
| 1122 | #endif | 1132 | #endif |
| 1123 | default: | 1133 | default: |
| 1124 | cifs_dbg(VFS, "Unknown vers= option specified: %s\n", value); | 1134 | cifs_dbg(VFS, "Unknown vers= option specified: %s\n", value); |
| @@ -1424,7 +1434,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, | |||
| 1424 | vol->local_lease = 1; | 1434 | vol->local_lease = 1; |
| 1425 | break; | 1435 | break; |
| 1426 | case Opt_sign: | 1436 | case Opt_sign: |
| 1427 | vol->secFlg |= CIFSSEC_MUST_SIGN; | 1437 | vol->sign = true; |
| 1428 | break; | 1438 | break; |
| 1429 | case Opt_seal: | 1439 | case Opt_seal: |
| 1430 | /* we do not do the following in secFlags because seal | 1440 | /* we do not do the following in secFlags because seal |
| @@ -1455,6 +1465,9 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, | |||
| 1455 | case Opt_sloppy: | 1465 | case Opt_sloppy: |
| 1456 | sloppy = true; | 1466 | sloppy = true; |
| 1457 | break; | 1467 | break; |
| 1468 | case Opt_nosharesock: | ||
| 1469 | vol->nosharesock = true; | ||
| 1470 | break; | ||
| 1458 | 1471 | ||
| 1459 | /* Numeric Values */ | 1472 | /* Numeric Values */ |
| 1460 | case Opt_backupuid: | 1473 | case Opt_backupuid: |
| @@ -1978,47 +1991,21 @@ match_address(struct TCP_Server_Info *server, struct sockaddr *addr, | |||
| 1978 | static bool | 1991 | static bool |
| 1979 | match_security(struct TCP_Server_Info *server, struct smb_vol *vol) | 1992 | match_security(struct TCP_Server_Info *server, struct smb_vol *vol) |
| 1980 | { | 1993 | { |
| 1981 | unsigned int secFlags; | 1994 | /* |
| 1982 | 1995 | * The select_sectype function should either return the vol->sectype | |
| 1983 | if (vol->secFlg & (~(CIFSSEC_MUST_SIGN | CIFSSEC_MUST_SEAL))) | 1996 | * that was specified, or "Unspecified" if that sectype was not |
| 1984 | secFlags = vol->secFlg; | 1997 | * compatible with the given NEGOTIATE request. |
| 1985 | else | 1998 | */ |
| 1986 | secFlags = global_secflags | vol->secFlg; | 1999 | if (select_sectype(server, vol->sectype) == Unspecified) |
| 1987 | |||
| 1988 | switch (server->secType) { | ||
| 1989 | case LANMAN: | ||
| 1990 | if (!(secFlags & (CIFSSEC_MAY_LANMAN|CIFSSEC_MAY_PLNTXT))) | ||
| 1991 | return false; | ||
| 1992 | break; | ||
| 1993 | case NTLMv2: | ||
| 1994 | if (!(secFlags & CIFSSEC_MAY_NTLMV2)) | ||
| 1995 | return false; | ||
| 1996 | break; | ||
| 1997 | case NTLM: | ||
| 1998 | if (!(secFlags & CIFSSEC_MAY_NTLM)) | ||
| 1999 | return false; | ||
| 2000 | break; | ||
| 2001 | case Kerberos: | ||
| 2002 | if (!(secFlags & CIFSSEC_MAY_KRB5)) | ||
| 2003 | return false; | ||
| 2004 | break; | ||
| 2005 | case RawNTLMSSP: | ||
| 2006 | if (!(secFlags & CIFSSEC_MAY_NTLMSSP)) | ||
| 2007 | return false; | ||
| 2008 | break; | ||
| 2009 | default: | ||
| 2010 | /* shouldn't happen */ | ||
| 2011 | return false; | 2000 | return false; |
| 2012 | } | ||
| 2013 | 2001 | ||
| 2014 | /* now check if signing mode is acceptable */ | 2002 | /* |
| 2015 | if ((secFlags & CIFSSEC_MAY_SIGN) == 0 && | 2003 | * Now check if signing mode is acceptable. No need to check |
| 2016 | (server->sec_mode & SECMODE_SIGN_REQUIRED)) | 2004 | * global_secflags at this point since if MUST_SIGN is set then |
| 2017 | return false; | 2005 | * the server->sign had better be too. |
| 2018 | else if (((secFlags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN) && | 2006 | */ |
| 2019 | (server->sec_mode & | 2007 | if (vol->sign && !server->sign) |
| 2020 | (SECMODE_SIGN_ENABLED|SECMODE_SIGN_REQUIRED)) == 0) | 2008 | return false; |
| 2021 | return false; | ||
| 2022 | 2009 | ||
| 2023 | return true; | 2010 | return true; |
| 2024 | } | 2011 | } |
| @@ -2027,6 +2014,9 @@ static int match_server(struct TCP_Server_Info *server, struct smb_vol *vol) | |||
| 2027 | { | 2014 | { |
| 2028 | struct sockaddr *addr = (struct sockaddr *)&vol->dstaddr; | 2015 | struct sockaddr *addr = (struct sockaddr *)&vol->dstaddr; |
| 2029 | 2016 | ||
| 2017 | if (vol->nosharesock) | ||
| 2018 | return 0; | ||
| 2019 | |||
| 2030 | if ((server->vals != vol->vals) || (server->ops != vol->ops)) | 2020 | if ((server->vals != vol->vals) || (server->ops != vol->ops)) |
| 2031 | return 0; | 2021 | return 0; |
| 2032 | 2022 | ||
| @@ -2216,7 +2206,11 @@ out_err: | |||
| 2216 | 2206 | ||
| 2217 | static int match_session(struct cifs_ses *ses, struct smb_vol *vol) | 2207 | static int match_session(struct cifs_ses *ses, struct smb_vol *vol) |
| 2218 | { | 2208 | { |
| 2219 | switch (ses->server->secType) { | 2209 | if (vol->sectype != Unspecified && |
| 2210 | vol->sectype != ses->sectype) | ||
| 2211 | return 0; | ||
| 2212 | |||
| 2213 | switch (ses->sectype) { | ||
| 2220 | case Kerberos: | 2214 | case Kerberos: |
| 2221 | if (!uid_eq(vol->cred_uid, ses->cred_uid)) | 2215 | if (!uid_eq(vol->cred_uid, ses->cred_uid)) |
| 2222 | return 0; | 2216 | return 0; |
| @@ -2493,7 +2487,8 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info) | |||
| 2493 | ses->cred_uid = volume_info->cred_uid; | 2487 | ses->cred_uid = volume_info->cred_uid; |
| 2494 | ses->linux_uid = volume_info->linux_uid; | 2488 | ses->linux_uid = volume_info->linux_uid; |
| 2495 | 2489 | ||
| 2496 | ses->overrideSecFlg = volume_info->secFlg; | 2490 | ses->sectype = volume_info->sectype; |
| 2491 | ses->sign = volume_info->sign; | ||
| 2497 | 2492 | ||
| 2498 | mutex_lock(&ses->session_mutex); | 2493 | mutex_lock(&ses->session_mutex); |
| 2499 | rc = cifs_negotiate_protocol(xid, ses); | 2494 | rc = cifs_negotiate_protocol(xid, ses); |
| @@ -3656,7 +3651,7 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses, | |||
| 3656 | NTLMv2 password here) */ | 3651 | NTLMv2 password here) */ |
| 3657 | #ifdef CONFIG_CIFS_WEAK_PW_HASH | 3652 | #ifdef CONFIG_CIFS_WEAK_PW_HASH |
| 3658 | if ((global_secflags & CIFSSEC_MAY_LANMAN) && | 3653 | if ((global_secflags & CIFSSEC_MAY_LANMAN) && |
| 3659 | (ses->server->secType == LANMAN)) | 3654 | (ses->sectype == LANMAN)) |
| 3660 | calc_lanman_hash(tcon->password, ses->server->cryptkey, | 3655 | calc_lanman_hash(tcon->password, ses->server->cryptkey, |
| 3661 | ses->server->sec_mode & | 3656 | ses->server->sec_mode & |
| 3662 | SECMODE_PW_ENCRYPT ? true : false, | 3657 | SECMODE_PW_ENCRYPT ? true : false, |
| @@ -3674,8 +3669,7 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses, | |||
| 3674 | } | 3669 | } |
| 3675 | } | 3670 | } |
| 3676 | 3671 | ||
| 3677 | if (ses->server->sec_mode & | 3672 | if (ses->server->sign) |
| 3678 | (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) | ||
| 3679 | smb_buffer->Flags2 |= SMBFLG2_SECURITY_SIGNATURE; | 3673 | smb_buffer->Flags2 |= SMBFLG2_SECURITY_SIGNATURE; |
| 3680 | 3674 | ||
| 3681 | if (ses->capabilities & CAP_STATUS32) { | 3675 | if (ses->capabilities & CAP_STATUS32) { |
| @@ -3738,7 +3732,7 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses, | |||
| 3738 | } | 3732 | } |
| 3739 | bcc_ptr += length + 1; | 3733 | bcc_ptr += length + 1; |
| 3740 | bytes_left -= (length + 1); | 3734 | bytes_left -= (length + 1); |
| 3741 | strncpy(tcon->treeName, tree, MAX_TREE_SIZE); | 3735 | strlcpy(tcon->treeName, tree, sizeof(tcon->treeName)); |
| 3742 | 3736 | ||
| 3743 | /* mostly informational -- no need to fail on error here */ | 3737 | /* mostly informational -- no need to fail on error here */ |
| 3744 | kfree(tcon->nativeFileSystem); | 3738 | kfree(tcon->nativeFileSystem); |
| @@ -3827,7 +3821,6 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses, | |||
| 3827 | int rc = -ENOSYS; | 3821 | int rc = -ENOSYS; |
| 3828 | struct TCP_Server_Info *server = ses->server; | 3822 | struct TCP_Server_Info *server = ses->server; |
| 3829 | 3823 | ||
| 3830 | ses->flags = 0; | ||
| 3831 | ses->capabilities = server->capabilities; | 3824 | ses->capabilities = server->capabilities; |
| 3832 | if (linuxExtEnabled == 0) | 3825 | if (linuxExtEnabled == 0) |
| 3833 | ses->capabilities &= (~server->vals->cap_unix); | 3826 | ses->capabilities &= (~server->vals->cap_unix); |
| @@ -3848,6 +3841,8 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses, | |||
| 3848 | server->sequence_number = 0x2; | 3841 | server->sequence_number = 0x2; |
| 3849 | server->session_estab = true; | 3842 | server->session_estab = true; |
| 3850 | ses->auth_key.response = NULL; | 3843 | ses->auth_key.response = NULL; |
| 3844 | if (server->ops->generate_signingkey) | ||
| 3845 | server->ops->generate_signingkey(server); | ||
| 3851 | } | 3846 | } |
| 3852 | mutex_unlock(&server->srv_mutex); | 3847 | mutex_unlock(&server->srv_mutex); |
| 3853 | 3848 | ||
| @@ -3870,23 +3865,11 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses, | |||
| 3870 | static int | 3865 | static int |
| 3871 | cifs_set_vol_auth(struct smb_vol *vol, struct cifs_ses *ses) | 3866 | cifs_set_vol_auth(struct smb_vol *vol, struct cifs_ses *ses) |
| 3872 | { | 3867 | { |
| 3873 | switch (ses->server->secType) { | 3868 | vol->sectype = ses->sectype; |
| 3874 | case Kerberos: | 3869 | |
| 3875 | vol->secFlg = CIFSSEC_MUST_KRB5; | 3870 | /* krb5 is special, since we don't need username or pw */ |
| 3871 | if (vol->sectype == Kerberos) | ||
| 3876 | return 0; | 3872 | return 0; |
| 3877 | case NTLMv2: | ||
| 3878 | vol->secFlg = CIFSSEC_MUST_NTLMV2; | ||
| 3879 | break; | ||
| 3880 | case NTLM: | ||
| 3881 | vol->secFlg = CIFSSEC_MUST_NTLM; | ||
| 3882 | break; | ||
| 3883 | case RawNTLMSSP: | ||
| 3884 | vol->secFlg = CIFSSEC_MUST_NTLMSSP; | ||
| 3885 | break; | ||
| 3886 | case LANMAN: | ||
| 3887 | vol->secFlg = CIFSSEC_MUST_LANMAN; | ||
| 3888 | break; | ||
| 3889 | } | ||
| 3890 | 3873 | ||
| 3891 | return cifs_set_cifscreds(vol, ses); | 3874 | return cifs_set_cifscreds(vol, ses); |
| 3892 | } | 3875 | } |
| @@ -3912,6 +3895,8 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid) | |||
| 3912 | vol_info->nocase = master_tcon->nocase; | 3895 | vol_info->nocase = master_tcon->nocase; |
| 3913 | vol_info->local_lease = master_tcon->local_lease; | 3896 | vol_info->local_lease = master_tcon->local_lease; |
| 3914 | vol_info->no_linux_ext = !master_tcon->unix_ext; | 3897 | vol_info->no_linux_ext = !master_tcon->unix_ext; |
| 3898 | vol_info->sectype = master_tcon->ses->sectype; | ||
| 3899 | vol_info->sign = master_tcon->ses->sign; | ||
| 3915 | 3900 | ||
| 3916 | rc = cifs_set_vol_auth(vol_info, master_tcon->ses); | 3901 | rc = cifs_set_vol_auth(vol_info, master_tcon->ses); |
| 3917 | if (rc) { | 3902 | if (rc) { |
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 5699b5036ed8..5175aebf6737 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c | |||
| @@ -822,8 +822,7 @@ const struct dentry_operations cifs_dentry_ops = { | |||
| 822 | /* d_delete: cifs_d_delete, */ /* not needed except for debugging */ | 822 | /* d_delete: cifs_d_delete, */ /* not needed except for debugging */ |
| 823 | }; | 823 | }; |
| 824 | 824 | ||
| 825 | static int cifs_ci_hash(const struct dentry *dentry, const struct inode *inode, | 825 | static int cifs_ci_hash(const struct dentry *dentry, struct qstr *q) |
| 826 | struct qstr *q) | ||
| 827 | { | 826 | { |
| 828 | struct nls_table *codepage = CIFS_SB(dentry->d_sb)->local_nls; | 827 | struct nls_table *codepage = CIFS_SB(dentry->d_sb)->local_nls; |
| 829 | unsigned long hash; | 828 | unsigned long hash; |
| @@ -838,12 +837,10 @@ static int cifs_ci_hash(const struct dentry *dentry, const struct inode *inode, | |||
| 838 | return 0; | 837 | return 0; |
| 839 | } | 838 | } |
| 840 | 839 | ||
| 841 | static int cifs_ci_compare(const struct dentry *parent, | 840 | static int cifs_ci_compare(const struct dentry *parent, const struct dentry *dentry, |
| 842 | const struct inode *pinode, | ||
| 843 | const struct dentry *dentry, const struct inode *inode, | ||
| 844 | unsigned int len, const char *str, const struct qstr *name) | 841 | unsigned int len, const char *str, const struct qstr *name) |
| 845 | { | 842 | { |
| 846 | struct nls_table *codepage = CIFS_SB(pinode->i_sb)->local_nls; | 843 | struct nls_table *codepage = CIFS_SB(parent->d_sb)->local_nls; |
| 847 | 844 | ||
| 848 | if ((name->len == len) && | 845 | if ((name->len == len) && |
| 849 | (nls_strnicmp(codepage, name->name, str, len) == 0)) | 846 | (nls_strnicmp(codepage, name->name, str, len) == 0)) |
diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 4d8ba8d491e5..91d8629e69a2 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c | |||
| @@ -999,7 +999,7 @@ try_again: | |||
| 999 | rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next); | 999 | rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next); |
| 1000 | if (!rc) | 1000 | if (!rc) |
| 1001 | goto try_again; | 1001 | goto try_again; |
| 1002 | locks_delete_block(flock); | 1002 | posix_unblock_lock(flock); |
| 1003 | } | 1003 | } |
| 1004 | return rc; | 1004 | return rc; |
| 1005 | } | 1005 | } |
| @@ -1092,6 +1092,7 @@ struct lock_to_push { | |||
| 1092 | static int | 1092 | static int |
| 1093 | cifs_push_posix_locks(struct cifsFileInfo *cfile) | 1093 | cifs_push_posix_locks(struct cifsFileInfo *cfile) |
| 1094 | { | 1094 | { |
| 1095 | struct inode *inode = cfile->dentry->d_inode; | ||
| 1095 | struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); | 1096 | struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); |
| 1096 | struct file_lock *flock, **before; | 1097 | struct file_lock *flock, **before; |
| 1097 | unsigned int count = 0, i = 0; | 1098 | unsigned int count = 0, i = 0; |
| @@ -1102,12 +1103,12 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) | |||
| 1102 | 1103 | ||
| 1103 | xid = get_xid(); | 1104 | xid = get_xid(); |
| 1104 | 1105 | ||
| 1105 | lock_flocks(); | 1106 | spin_lock(&inode->i_lock); |
| 1106 | cifs_for_each_lock(cfile->dentry->d_inode, before) { | 1107 | cifs_for_each_lock(inode, before) { |
| 1107 | if ((*before)->fl_flags & FL_POSIX) | 1108 | if ((*before)->fl_flags & FL_POSIX) |
| 1108 | count++; | 1109 | count++; |
| 1109 | } | 1110 | } |
| 1110 | unlock_flocks(); | 1111 | spin_unlock(&inode->i_lock); |
| 1111 | 1112 | ||
| 1112 | INIT_LIST_HEAD(&locks_to_send); | 1113 | INIT_LIST_HEAD(&locks_to_send); |
| 1113 | 1114 | ||
| @@ -1126,8 +1127,8 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) | |||
| 1126 | } | 1127 | } |
| 1127 | 1128 | ||
| 1128 | el = locks_to_send.next; | 1129 | el = locks_to_send.next; |
| 1129 | lock_flocks(); | 1130 | spin_lock(&inode->i_lock); |
| 1130 | cifs_for_each_lock(cfile->dentry->d_inode, before) { | 1131 | cifs_for_each_lock(inode, before) { |
| 1131 | flock = *before; | 1132 | flock = *before; |
| 1132 | if ((flock->fl_flags & FL_POSIX) == 0) | 1133 | if ((flock->fl_flags & FL_POSIX) == 0) |
| 1133 | continue; | 1134 | continue; |
| @@ -1152,7 +1153,7 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) | |||
| 1152 | lck->offset = flock->fl_start; | 1153 | lck->offset = flock->fl_start; |
| 1153 | el = el->next; | 1154 | el = el->next; |
| 1154 | } | 1155 | } |
| 1155 | unlock_flocks(); | 1156 | spin_unlock(&inode->i_lock); |
| 1156 | 1157 | ||
| 1157 | list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) { | 1158 | list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) { |
| 1158 | int stored_rc; | 1159 | int stored_rc; |
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 1bec014779fd..f7d4b2285efe 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c | |||
| @@ -267,8 +267,7 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ , | |||
| 267 | if (treeCon->nocase) | 267 | if (treeCon->nocase) |
| 268 | buffer->Flags |= SMBFLG_CASELESS; | 268 | buffer->Flags |= SMBFLG_CASELESS; |
| 269 | if ((treeCon->ses) && (treeCon->ses->server)) | 269 | if ((treeCon->ses) && (treeCon->ses->server)) |
| 270 | if (treeCon->ses->server->sec_mode & | 270 | if (treeCon->ses->server->sign) |
| 271 | (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) | ||
| 272 | buffer->Flags2 |= SMBFLG2_SECURITY_SIGNATURE; | 271 | buffer->Flags2 |= SMBFLG2_SECURITY_SIGNATURE; |
| 273 | } | 272 | } |
| 274 | 273 | ||
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index f1213799de1a..ab8778469394 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c | |||
| @@ -126,6 +126,22 @@ out: | |||
| 126 | dput(dentry); | 126 | dput(dentry); |
| 127 | } | 127 | } |
| 128 | 128 | ||
| 129 | /* | ||
| 130 | * Is it possible that this directory might turn out to be a DFS referral | ||
| 131 | * once we go to try and use it? | ||
| 132 | */ | ||
| 133 | static bool | ||
| 134 | cifs_dfs_is_possible(struct cifs_sb_info *cifs_sb) | ||
| 135 | { | ||
| 136 | #ifdef CONFIG_CIFS_DFS_UPCALL | ||
| 137 | struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); | ||
| 138 | |||
| 139 | if (tcon->Flags & SMB_SHARE_IS_IN_DFS) | ||
| 140 | return true; | ||
| 141 | #endif | ||
| 142 | return false; | ||
| 143 | } | ||
| 144 | |||
| 129 | static void | 145 | static void |
| 130 | cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb) | 146 | cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb) |
| 131 | { | 147 | { |
| @@ -135,6 +151,19 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb) | |||
| 135 | if (fattr->cf_cifsattrs & ATTR_DIRECTORY) { | 151 | if (fattr->cf_cifsattrs & ATTR_DIRECTORY) { |
| 136 | fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode; | 152 | fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode; |
| 137 | fattr->cf_dtype = DT_DIR; | 153 | fattr->cf_dtype = DT_DIR; |
| 154 | /* | ||
| 155 | * Windows CIFS servers generally make DFS referrals look | ||
| 156 | * like directories in FIND_* responses with the reparse | ||
| 157 | * attribute flag also set (since DFS junctions are | ||
| 158 | * reparse points). We must revalidate at least these | ||
| 159 | * directory inodes before trying to use them (if | ||
| 160 | * they are DFS we will get PATH_NOT_COVERED back | ||
| 161 | * when queried directly and can then try to connect | ||
| 162 | * to the DFS target) | ||
| 163 | */ | ||
| 164 | if (cifs_dfs_is_possible(cifs_sb) && | ||
| 165 | (fattr->cf_cifsattrs & ATTR_REPARSE)) | ||
| 166 | fattr->cf_flags |= CIFS_FATTR_NEED_REVAL; | ||
| 138 | } else { | 167 | } else { |
| 139 | fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode; | 168 | fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode; |
| 140 | fattr->cf_dtype = DT_REG; | 169 | fattr->cf_dtype = DT_REG; |
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index f230571a7ab3..79358e341fd2 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c | |||
| @@ -138,8 +138,7 @@ static __u32 cifs_ssetup_hdr(struct cifs_ses *ses, SESSION_SETUP_ANDX *pSMB) | |||
| 138 | capabilities = CAP_LARGE_FILES | CAP_NT_SMBS | CAP_LEVEL_II_OPLOCKS | | 138 | capabilities = CAP_LARGE_FILES | CAP_NT_SMBS | CAP_LEVEL_II_OPLOCKS | |
| 139 | CAP_LARGE_WRITE_X | CAP_LARGE_READ_X; | 139 | CAP_LARGE_WRITE_X | CAP_LARGE_READ_X; |
| 140 | 140 | ||
| 141 | if (ses->server->sec_mode & | 141 | if (ses->server->sign) |
| 142 | (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) | ||
| 143 | pSMB->req.hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE; | 142 | pSMB->req.hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE; |
| 144 | 143 | ||
| 145 | if (ses->capabilities & CAP_UNICODE) { | 144 | if (ses->capabilities & CAP_UNICODE) { |
| @@ -310,11 +309,10 @@ decode_unicode_ssetup(char **pbcc_area, int bleft, struct cifs_ses *ses, | |||
| 310 | return; | 309 | return; |
| 311 | } | 310 | } |
| 312 | 311 | ||
| 313 | static int decode_ascii_ssetup(char **pbcc_area, __u16 bleft, | 312 | static void decode_ascii_ssetup(char **pbcc_area, __u16 bleft, |
| 314 | struct cifs_ses *ses, | 313 | struct cifs_ses *ses, |
| 315 | const struct nls_table *nls_cp) | 314 | const struct nls_table *nls_cp) |
| 316 | { | 315 | { |
| 317 | int rc = 0; | ||
| 318 | int len; | 316 | int len; |
| 319 | char *bcc_ptr = *pbcc_area; | 317 | char *bcc_ptr = *pbcc_area; |
| 320 | 318 | ||
| @@ -322,24 +320,22 @@ static int decode_ascii_ssetup(char **pbcc_area, __u16 bleft, | |||
| 322 | 320 | ||
| 323 | len = strnlen(bcc_ptr, bleft); | 321 | len = strnlen(bcc_ptr, bleft); |
| 324 | if (len >= bleft) | 322 | if (len >= bleft) |
| 325 | return rc; | 323 | return; |
| 326 | 324 | ||
| 327 | kfree(ses->serverOS); | 325 | kfree(ses->serverOS); |
| 328 | 326 | ||
| 329 | ses->serverOS = kzalloc(len + 1, GFP_KERNEL); | 327 | ses->serverOS = kzalloc(len + 1, GFP_KERNEL); |
| 330 | if (ses->serverOS) | 328 | if (ses->serverOS) |
| 331 | strncpy(ses->serverOS, bcc_ptr, len); | 329 | strncpy(ses->serverOS, bcc_ptr, len); |
| 332 | if (strncmp(ses->serverOS, "OS/2", 4) == 0) { | 330 | if (strncmp(ses->serverOS, "OS/2", 4) == 0) |
| 333 | cifs_dbg(FYI, "OS/2 server\n"); | 331 | cifs_dbg(FYI, "OS/2 server\n"); |
| 334 | ses->flags |= CIFS_SES_OS2; | ||
| 335 | } | ||
| 336 | 332 | ||
| 337 | bcc_ptr += len + 1; | 333 | bcc_ptr += len + 1; |
| 338 | bleft -= len + 1; | 334 | bleft -= len + 1; |
| 339 | 335 | ||
| 340 | len = strnlen(bcc_ptr, bleft); | 336 | len = strnlen(bcc_ptr, bleft); |
| 341 | if (len >= bleft) | 337 | if (len >= bleft) |
| 342 | return rc; | 338 | return; |
| 343 | 339 | ||
| 344 | kfree(ses->serverNOS); | 340 | kfree(ses->serverNOS); |
| 345 | 341 | ||
| @@ -352,7 +348,7 @@ static int decode_ascii_ssetup(char **pbcc_area, __u16 bleft, | |||
| 352 | 348 | ||
| 353 | len = strnlen(bcc_ptr, bleft); | 349 | len = strnlen(bcc_ptr, bleft); |
| 354 | if (len > bleft) | 350 | if (len > bleft) |
| 355 | return rc; | 351 | return; |
| 356 | 352 | ||
| 357 | /* No domain field in LANMAN case. Domain is | 353 | /* No domain field in LANMAN case. Domain is |
| 358 | returned by old servers in the SMB negprot response */ | 354 | returned by old servers in the SMB negprot response */ |
| @@ -360,8 +356,6 @@ static int decode_ascii_ssetup(char **pbcc_area, __u16 bleft, | |||
| 360 | but thus do return domain here we could add parsing | 356 | but thus do return domain here we could add parsing |
| 361 | for it later, but it is not very important */ | 357 | for it later, but it is not very important */ |
| 362 | cifs_dbg(FYI, "ascii: bytes left %d\n", bleft); | 358 | cifs_dbg(FYI, "ascii: bytes left %d\n", bleft); |
| 363 | |||
| 364 | return rc; | ||
| 365 | } | 359 | } |
| 366 | 360 | ||
| 367 | int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, | 361 | int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, |
| @@ -432,8 +426,7 @@ void build_ntlmssp_negotiate_blob(unsigned char *pbuffer, | |||
| 432 | flags = NTLMSSP_NEGOTIATE_56 | NTLMSSP_REQUEST_TARGET | | 426 | flags = NTLMSSP_NEGOTIATE_56 | NTLMSSP_REQUEST_TARGET | |
| 433 | NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE | | 427 | NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE | |
| 434 | NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC; | 428 | NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC; |
| 435 | if (ses->server->sec_mode & | 429 | if (ses->server->sign) { |
| 436 | (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) { | ||
| 437 | flags |= NTLMSSP_NEGOTIATE_SIGN; | 430 | flags |= NTLMSSP_NEGOTIATE_SIGN; |
| 438 | if (!ses->server->session_estab) | 431 | if (!ses->server->session_estab) |
| 439 | flags |= NTLMSSP_NEGOTIATE_KEY_XCH; | 432 | flags |= NTLMSSP_NEGOTIATE_KEY_XCH; |
| @@ -471,8 +464,7 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer, | |||
| 471 | NTLMSSP_REQUEST_TARGET | NTLMSSP_NEGOTIATE_TARGET_INFO | | 464 | NTLMSSP_REQUEST_TARGET | NTLMSSP_NEGOTIATE_TARGET_INFO | |
| 472 | NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE | | 465 | NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE | |
| 473 | NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC; | 466 | NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC; |
| 474 | if (ses->server->sec_mode & | 467 | if (ses->server->sign) { |
| 475 | (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) { | ||
| 476 | flags |= NTLMSSP_NEGOTIATE_SIGN; | 468 | flags |= NTLMSSP_NEGOTIATE_SIGN; |
| 477 | if (!ses->server->session_estab) | 469 | if (!ses->server->session_estab) |
| 478 | flags |= NTLMSSP_NEGOTIATE_KEY_XCH; | 470 | flags |= NTLMSSP_NEGOTIATE_KEY_XCH; |
| @@ -558,6 +550,56 @@ setup_ntlmv2_ret: | |||
| 558 | return rc; | 550 | return rc; |
| 559 | } | 551 | } |
| 560 | 552 | ||
| 553 | enum securityEnum | ||
| 554 | select_sectype(struct TCP_Server_Info *server, enum securityEnum requested) | ||
| 555 | { | ||
| 556 | switch (server->negflavor) { | ||
| 557 | case CIFS_NEGFLAVOR_EXTENDED: | ||
| 558 | switch (requested) { | ||
| 559 | case Kerberos: | ||
| 560 | case RawNTLMSSP: | ||
| 561 | return requested; | ||
| 562 | case Unspecified: | ||
| 563 | if (server->sec_ntlmssp && | ||
| 564 | (global_secflags & CIFSSEC_MAY_NTLMSSP)) | ||
| 565 | return RawNTLMSSP; | ||
| 566 | if ((server->sec_kerberos || server->sec_mskerberos) && | ||
| 567 | (global_secflags & CIFSSEC_MAY_KRB5)) | ||
| 568 | return Kerberos; | ||
| 569 | /* Fallthrough */ | ||
| 570 | default: | ||
| 571 | return Unspecified; | ||
| 572 | } | ||
| 573 | case CIFS_NEGFLAVOR_UNENCAP: | ||
| 574 | switch (requested) { | ||
| 575 | case NTLM: | ||
| 576 | case NTLMv2: | ||
| 577 | return requested; | ||
| 578 | case Unspecified: | ||
| 579 | if (global_secflags & CIFSSEC_MAY_NTLMV2) | ||
| 580 | return NTLMv2; | ||
| 581 | if (global_secflags & CIFSSEC_MAY_NTLM) | ||
| 582 | return NTLM; | ||
| 583 | /* Fallthrough */ | ||
| 584 | default: | ||
| 585 | return Unspecified; | ||
| 586 | } | ||
| 587 | case CIFS_NEGFLAVOR_LANMAN: | ||
| 588 | switch (requested) { | ||
| 589 | case LANMAN: | ||
| 590 | return requested; | ||
| 591 | case Unspecified: | ||
| 592 | if (global_secflags & CIFSSEC_MAY_LANMAN) | ||
| 593 | return LANMAN; | ||
| 594 | /* Fallthrough */ | ||
| 595 | default: | ||
| 596 | return Unspecified; | ||
| 597 | } | ||
| 598 | default: | ||
| 599 | return Unspecified; | ||
| 600 | } | ||
| 601 | } | ||
| 602 | |||
| 561 | int | 603 | int |
| 562 | CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses, | 604 | CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses, |
| 563 | const struct nls_table *nls_cp) | 605 | const struct nls_table *nls_cp) |
| @@ -579,11 +621,18 @@ CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses, | |||
| 579 | u16 blob_len; | 621 | u16 blob_len; |
| 580 | char *ntlmsspblob = NULL; | 622 | char *ntlmsspblob = NULL; |
| 581 | 623 | ||
| 582 | if (ses == NULL) | 624 | if (ses == NULL) { |
| 625 | WARN(1, "%s: ses == NULL!", __func__); | ||
| 583 | return -EINVAL; | 626 | return -EINVAL; |
| 627 | } | ||
| 584 | 628 | ||
| 585 | type = ses->server->secType; | 629 | type = select_sectype(ses->server, ses->sectype); |
| 586 | cifs_dbg(FYI, "sess setup type %d\n", type); | 630 | cifs_dbg(FYI, "sess setup type %d\n", type); |
| 631 | if (type == Unspecified) { | ||
| 632 | cifs_dbg(VFS, "Unable to select appropriate authentication method!"); | ||
| 633 | return -EINVAL; | ||
| 634 | } | ||
| 635 | |||
| 587 | if (type == RawNTLMSSP) { | 636 | if (type == RawNTLMSSP) { |
| 588 | /* if memory allocation is successful, caller of this function | 637 | /* if memory allocation is successful, caller of this function |
| 589 | * frees it. | 638 | * frees it. |
| @@ -643,8 +692,6 @@ ssetup_ntlmssp_authenticate: | |||
| 643 | } | 692 | } |
| 644 | bcc_ptr = str_area; | 693 | bcc_ptr = str_area; |
| 645 | 694 | ||
| 646 | ses->flags &= ~CIFS_SES_LANMAN; | ||
| 647 | |||
| 648 | iov[1].iov_base = NULL; | 695 | iov[1].iov_base = NULL; |
| 649 | iov[1].iov_len = 0; | 696 | iov[1].iov_len = 0; |
| 650 | 697 | ||
| @@ -668,7 +715,6 @@ ssetup_ntlmssp_authenticate: | |||
| 668 | ses->server->sec_mode & SECMODE_PW_ENCRYPT ? | 715 | ses->server->sec_mode & SECMODE_PW_ENCRYPT ? |
| 669 | true : false, lnm_session_key); | 716 | true : false, lnm_session_key); |
| 670 | 717 | ||
| 671 | ses->flags |= CIFS_SES_LANMAN; | ||
| 672 | memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_AUTH_RESP_SIZE); | 718 | memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_AUTH_RESP_SIZE); |
| 673 | bcc_ptr += CIFS_AUTH_RESP_SIZE; | 719 | bcc_ptr += CIFS_AUTH_RESP_SIZE; |
| 674 | 720 | ||
| @@ -938,8 +984,7 @@ ssetup_ntlmssp_authenticate: | |||
| 938 | } | 984 | } |
| 939 | decode_unicode_ssetup(&bcc_ptr, bytes_remaining, ses, nls_cp); | 985 | decode_unicode_ssetup(&bcc_ptr, bytes_remaining, ses, nls_cp); |
| 940 | } else { | 986 | } else { |
| 941 | rc = decode_ascii_ssetup(&bcc_ptr, bytes_remaining, | 987 | decode_ascii_ssetup(&bcc_ptr, bytes_remaining, ses, nls_cp); |
| 942 | ses, nls_cp); | ||
| 943 | } | 988 | } |
| 944 | 989 | ||
| 945 | ssetup_exit: | 990 | ssetup_exit: |
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 3efdb9d5c0b8..e813f04511d8 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c | |||
| @@ -449,8 +449,7 @@ cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info) | |||
| 449 | * WRITEX header, not including the 4 byte RFC1001 length. | 449 | * WRITEX header, not including the 4 byte RFC1001 length. |
| 450 | */ | 450 | */ |
| 451 | if (!(server->capabilities & CAP_LARGE_WRITE_X) || | 451 | if (!(server->capabilities & CAP_LARGE_WRITE_X) || |
| 452 | (!(server->capabilities & CAP_UNIX) && | 452 | (!(server->capabilities & CAP_UNIX) && server->sign)) |
| 453 | (server->sec_mode & (SECMODE_SIGN_ENABLED|SECMODE_SIGN_REQUIRED)))) | ||
| 454 | wsize = min_t(unsigned int, wsize, | 453 | wsize = min_t(unsigned int, wsize, |
| 455 | server->maxBuf - sizeof(WRITE_REQ) + 4); | 454 | server->maxBuf - sizeof(WRITE_REQ) + 4); |
| 456 | 455 | ||
| @@ -765,20 +764,14 @@ smb_set_file_info(struct inode *inode, const char *full_path, | |||
| 765 | } | 764 | } |
| 766 | tcon = tlink_tcon(tlink); | 765 | tcon = tlink_tcon(tlink); |
| 767 | 766 | ||
| 768 | /* | 767 | rc = CIFSSMBSetPathInfo(xid, tcon, full_path, buf, cifs_sb->local_nls, |
| 769 | * NT4 apparently returns success on this call, but it doesn't really | ||
| 770 | * work. | ||
| 771 | */ | ||
| 772 | if (!(tcon->ses->flags & CIFS_SES_NT4)) { | ||
| 773 | rc = CIFSSMBSetPathInfo(xid, tcon, full_path, buf, | ||
| 774 | cifs_sb->local_nls, | ||
| 775 | cifs_sb->mnt_cifs_flags & | 768 | cifs_sb->mnt_cifs_flags & |
| 776 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 769 | CIFS_MOUNT_MAP_SPECIAL_CHR); |
| 777 | if (rc == 0) { | 770 | if (rc == 0) { |
| 778 | cinode->cifsAttrs = le32_to_cpu(buf->Attributes); | 771 | cinode->cifsAttrs = le32_to_cpu(buf->Attributes); |
| 779 | goto out; | 772 | goto out; |
| 780 | } else if (rc != -EOPNOTSUPP && rc != -EINVAL) | 773 | } else if (rc != -EOPNOTSUPP && rc != -EINVAL) { |
| 781 | goto out; | 774 | goto out; |
| 782 | } | 775 | } |
| 783 | 776 | ||
| 784 | cifs_dbg(FYI, "calling SetFileInfo since SetPathInfo for times not supported by this server\n"); | 777 | cifs_dbg(FYI, "calling SetFileInfo since SetPathInfo for times not supported by this server\n"); |
| @@ -964,4 +957,6 @@ struct smb_version_values smb1_values = { | |||
| 964 | .cap_nt_find = CAP_NT_SMBS | CAP_NT_FIND, | 957 | .cap_nt_find = CAP_NT_SMBS | CAP_NT_FIND, |
| 965 | .cap_large_files = CAP_LARGE_FILES, | 958 | .cap_large_files = CAP_LARGE_FILES, |
| 966 | .oplock_read = OPLOCK_READ, | 959 | .oplock_read = OPLOCK_READ, |
| 960 | .signing_enabled = SECMODE_SIGN_ENABLED, | ||
| 961 | .signing_required = SECMODE_SIGN_REQUIRED, | ||
| 967 | }; | 962 | }; |
diff --git a/fs/cifs/smb2glob.h b/fs/cifs/smb2glob.h index 7c0e2143e775..c38350851b08 100644 --- a/fs/cifs/smb2glob.h +++ b/fs/cifs/smb2glob.h | |||
| @@ -54,5 +54,7 @@ | |||
| 54 | #define SMB2_SIGNATURE_SIZE (16) | 54 | #define SMB2_SIGNATURE_SIZE (16) |
| 55 | #define SMB2_NTLMV2_SESSKEY_SIZE (16) | 55 | #define SMB2_NTLMV2_SESSKEY_SIZE (16) |
| 56 | #define SMB2_HMACSHA256_SIZE (32) | 56 | #define SMB2_HMACSHA256_SIZE (32) |
| 57 | #define SMB2_CMACAES_SIZE (16) | ||
| 58 | #define SMB3_SIGNKEY_SIZE (16) | ||
| 57 | 59 | ||
| 58 | #endif /* _SMB2_GLOB_H */ | 60 | #endif /* _SMB2_GLOB_H */ |
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index 10383d8c015b..b0c43345cd98 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c | |||
| @@ -266,6 +266,10 @@ smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr) | |||
| 266 | ((struct smb2_query_directory_rsp *)hdr)->OutputBufferLength); | 266 | ((struct smb2_query_directory_rsp *)hdr)->OutputBufferLength); |
| 267 | break; | 267 | break; |
| 268 | case SMB2_IOCTL: | 268 | case SMB2_IOCTL: |
| 269 | *off = le32_to_cpu( | ||
| 270 | ((struct smb2_ioctl_rsp *)hdr)->OutputOffset); | ||
| 271 | *len = le32_to_cpu(((struct smb2_ioctl_rsp *)hdr)->OutputCount); | ||
| 272 | break; | ||
| 269 | case SMB2_CHANGE_NOTIFY: | 273 | case SMB2_CHANGE_NOTIFY: |
| 270 | default: | 274 | default: |
| 271 | /* BB FIXME for unimplemented cases above */ | 275 | /* BB FIXME for unimplemented cases above */ |
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index f2e76f3b0c61..6d15cab95b99 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c | |||
| @@ -281,6 +281,25 @@ smb2_clear_stats(struct cifs_tcon *tcon) | |||
| 281 | } | 281 | } |
| 282 | 282 | ||
| 283 | static void | 283 | static void |
| 284 | smb2_dump_share_caps(struct seq_file *m, struct cifs_tcon *tcon) | ||
| 285 | { | ||
| 286 | seq_puts(m, "\n\tShare Capabilities:"); | ||
| 287 | if (tcon->capabilities & SMB2_SHARE_CAP_DFS) | ||
| 288 | seq_puts(m, " DFS,"); | ||
| 289 | if (tcon->capabilities & SMB2_SHARE_CAP_CONTINUOUS_AVAILABILITY) | ||
| 290 | seq_puts(m, " CONTINUOUS AVAILABILITY,"); | ||
| 291 | if (tcon->capabilities & SMB2_SHARE_CAP_SCALEOUT) | ||
| 292 | seq_puts(m, " SCALEOUT,"); | ||
| 293 | if (tcon->capabilities & SMB2_SHARE_CAP_CLUSTER) | ||
| 294 | seq_puts(m, " CLUSTER,"); | ||
| 295 | if (tcon->capabilities & SMB2_SHARE_CAP_ASYMMETRIC) | ||
| 296 | seq_puts(m, " ASYMMETRIC,"); | ||
| 297 | if (tcon->capabilities == 0) | ||
| 298 | seq_puts(m, " None"); | ||
| 299 | seq_printf(m, "\tShare Flags: 0x%x", tcon->share_flags); | ||
| 300 | } | ||
| 301 | |||
| 302 | static void | ||
| 284 | smb2_print_stats(struct seq_file *m, struct cifs_tcon *tcon) | 303 | smb2_print_stats(struct seq_file *m, struct cifs_tcon *tcon) |
| 285 | { | 304 | { |
| 286 | #ifdef CONFIG_CIFS_STATS | 305 | #ifdef CONFIG_CIFS_STATS |
| @@ -292,7 +311,6 @@ smb2_print_stats(struct seq_file *m, struct cifs_tcon *tcon) | |||
| 292 | seq_printf(m, "\nSessionSetups: %d sent %d failed", | 311 | seq_printf(m, "\nSessionSetups: %d sent %d failed", |
| 293 | atomic_read(&sent[SMB2_SESSION_SETUP_HE]), | 312 | atomic_read(&sent[SMB2_SESSION_SETUP_HE]), |
| 294 | atomic_read(&failed[SMB2_SESSION_SETUP_HE])); | 313 | atomic_read(&failed[SMB2_SESSION_SETUP_HE])); |
| 295 | #define SMB2LOGOFF 0x0002 /* trivial request/resp */ | ||
| 296 | seq_printf(m, "\nLogoffs: %d sent %d failed", | 314 | seq_printf(m, "\nLogoffs: %d sent %d failed", |
| 297 | atomic_read(&sent[SMB2_LOGOFF_HE]), | 315 | atomic_read(&sent[SMB2_LOGOFF_HE]), |
| 298 | atomic_read(&failed[SMB2_LOGOFF_HE])); | 316 | atomic_read(&failed[SMB2_LOGOFF_HE])); |
| @@ -645,6 +663,7 @@ struct smb_version_operations smb30_operations = { | |||
| 645 | .dump_detail = smb2_dump_detail, | 663 | .dump_detail = smb2_dump_detail, |
| 646 | .clear_stats = smb2_clear_stats, | 664 | .clear_stats = smb2_clear_stats, |
| 647 | .print_stats = smb2_print_stats, | 665 | .print_stats = smb2_print_stats, |
| 666 | .dump_share_caps = smb2_dump_share_caps, | ||
| 648 | .is_oplock_break = smb2_is_valid_oplock_break, | 667 | .is_oplock_break = smb2_is_valid_oplock_break, |
| 649 | .need_neg = smb2_need_neg, | 668 | .need_neg = smb2_need_neg, |
| 650 | .negotiate = smb2_negotiate, | 669 | .negotiate = smb2_negotiate, |
| @@ -690,6 +709,7 @@ struct smb_version_operations smb30_operations = { | |||
| 690 | .get_lease_key = smb2_get_lease_key, | 709 | .get_lease_key = smb2_get_lease_key, |
| 691 | .set_lease_key = smb2_set_lease_key, | 710 | .set_lease_key = smb2_set_lease_key, |
| 692 | .new_lease_key = smb2_new_lease_key, | 711 | .new_lease_key = smb2_new_lease_key, |
| 712 | .generate_signingkey = generate_smb3signingkey, | ||
| 693 | .calc_signature = smb3_calc_signature, | 713 | .calc_signature = smb3_calc_signature, |
| 694 | }; | 714 | }; |
| 695 | 715 | ||
| @@ -709,6 +729,8 @@ struct smb_version_values smb20_values = { | |||
| 709 | .cap_nt_find = SMB2_NT_FIND, | 729 | .cap_nt_find = SMB2_NT_FIND, |
| 710 | .cap_large_files = SMB2_LARGE_FILES, | 730 | .cap_large_files = SMB2_LARGE_FILES, |
| 711 | .oplock_read = SMB2_OPLOCK_LEVEL_II, | 731 | .oplock_read = SMB2_OPLOCK_LEVEL_II, |
| 732 | .signing_enabled = SMB2_NEGOTIATE_SIGNING_ENABLED | SMB2_NEGOTIATE_SIGNING_REQUIRED, | ||
| 733 | .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED, | ||
| 712 | }; | 734 | }; |
| 713 | 735 | ||
| 714 | struct smb_version_values smb21_values = { | 736 | struct smb_version_values smb21_values = { |
| @@ -727,6 +749,8 @@ struct smb_version_values smb21_values = { | |||
| 727 | .cap_nt_find = SMB2_NT_FIND, | 749 | .cap_nt_find = SMB2_NT_FIND, |
| 728 | .cap_large_files = SMB2_LARGE_FILES, | 750 | .cap_large_files = SMB2_LARGE_FILES, |
| 729 | .oplock_read = SMB2_OPLOCK_LEVEL_II, | 751 | .oplock_read = SMB2_OPLOCK_LEVEL_II, |
| 752 | .signing_enabled = SMB2_NEGOTIATE_SIGNING_ENABLED | SMB2_NEGOTIATE_SIGNING_REQUIRED, | ||
| 753 | .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED, | ||
| 730 | }; | 754 | }; |
| 731 | 755 | ||
| 732 | struct smb_version_values smb30_values = { | 756 | struct smb_version_values smb30_values = { |
| @@ -745,4 +769,26 @@ struct smb_version_values smb30_values = { | |||
| 745 | .cap_nt_find = SMB2_NT_FIND, | 769 | .cap_nt_find = SMB2_NT_FIND, |
| 746 | .cap_large_files = SMB2_LARGE_FILES, | 770 | .cap_large_files = SMB2_LARGE_FILES, |
| 747 | .oplock_read = SMB2_OPLOCK_LEVEL_II, | 771 | .oplock_read = SMB2_OPLOCK_LEVEL_II, |
| 772 | .signing_enabled = SMB2_NEGOTIATE_SIGNING_ENABLED | SMB2_NEGOTIATE_SIGNING_REQUIRED, | ||
| 773 | .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED, | ||
| 774 | }; | ||
| 775 | |||
| 776 | struct smb_version_values smb302_values = { | ||
| 777 | .version_string = SMB302_VERSION_STRING, | ||
| 778 | .protocol_id = SMB302_PROT_ID, | ||
| 779 | .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU, | ||
| 780 | .large_lock_type = 0, | ||
| 781 | .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK, | ||
| 782 | .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK, | ||
| 783 | .unlock_lock_type = SMB2_LOCKFLAG_UNLOCK, | ||
| 784 | .header_size = sizeof(struct smb2_hdr), | ||
| 785 | .max_header_size = MAX_SMB2_HDR_SIZE, | ||
| 786 | .read_rsp_size = sizeof(struct smb2_read_rsp) - 1, | ||
| 787 | .lock_cmd = SMB2_LOCK, | ||
| 788 | .cap_unix = 0, | ||
| 789 | .cap_nt_find = SMB2_NT_FIND, | ||
| 790 | .cap_large_files = SMB2_LARGE_FILES, | ||
| 791 | .oplock_read = SMB2_OPLOCK_LEVEL_II, | ||
| 792 | .signing_enabled = SMB2_NEGOTIATE_SIGNING_ENABLED | SMB2_NEGOTIATE_SIGNING_REQUIRED, | ||
| 793 | .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED, | ||
| 748 | }; | 794 | }; |
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 2b95ce2b54e8..2b312e4eeaa6 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c | |||
| @@ -1,7 +1,7 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * fs/cifs/smb2pdu.c | 2 | * fs/cifs/smb2pdu.c |
| 3 | * | 3 | * |
| 4 | * Copyright (C) International Business Machines Corp., 2009, 2012 | 4 | * Copyright (C) International Business Machines Corp., 2009, 2013 |
| 5 | * Etersoft, 2012 | 5 | * Etersoft, 2012 |
| 6 | * Author(s): Steve French (sfrench@us.ibm.com) | 6 | * Author(s): Steve French (sfrench@us.ibm.com) |
| 7 | * Pavel Shilovsky (pshilovsky@samba.org) 2012 | 7 | * Pavel Shilovsky (pshilovsky@samba.org) 2012 |
| @@ -108,19 +108,33 @@ smb2_hdr_assemble(struct smb2_hdr *hdr, __le16 smb2_cmd /* command */ , | |||
| 108 | if (!tcon) | 108 | if (!tcon) |
| 109 | goto out; | 109 | goto out; |
| 110 | 110 | ||
| 111 | /* BB FIXME when we do write > 64K add +1 for every 64K in req or rsp */ | ||
| 112 | /* GLOBAL_CAP_LARGE_MTU will only be set if dialect > SMB2.02 */ | ||
| 113 | /* See sections 2.2.4 and 3.2.4.1.5 of MS-SMB2 */ | ||
| 114 | if ((tcon->ses) && | ||
| 115 | (tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU)) | ||
| 116 | hdr->CreditCharge = cpu_to_le16(1); | ||
| 117 | /* else CreditCharge MBZ */ | ||
| 118 | |||
| 111 | hdr->TreeId = tcon->tid; | 119 | hdr->TreeId = tcon->tid; |
| 112 | /* Uid is not converted */ | 120 | /* Uid is not converted */ |
| 113 | if (tcon->ses) | 121 | if (tcon->ses) |
| 114 | hdr->SessionId = tcon->ses->Suid; | 122 | hdr->SessionId = tcon->ses->Suid; |
| 115 | /* BB check following DFS flags BB */ | 123 | |
| 116 | /* BB do we have to add check for SHI1005_FLAGS_DFS_ROOT too? */ | 124 | /* |
| 117 | if (tcon->share_flags & SHI1005_FLAGS_DFS) | 125 | * If we would set SMB2_FLAGS_DFS_OPERATIONS on open we also would have |
| 118 | hdr->Flags |= SMB2_FLAGS_DFS_OPERATIONS; | 126 | * to pass the path on the Open SMB prefixed by \\server\share. |
| 119 | /* BB how does SMB2 do case sensitive? */ | 127 | * Not sure when we would need to do the augmented path (if ever) and |
| 120 | /* if (tcon->nocase) | 128 | * setting this flag breaks the SMB2 open operation since it is |
| 121 | hdr->Flags |= SMBFLG_CASELESS; */ | 129 | * illegal to send an empty path name (without \\server\share prefix) |
| 122 | if (tcon->ses && tcon->ses->server && | 130 | * when the DFS flag is set in the SMB open header. We could |
| 123 | (tcon->ses->server->sec_mode & SECMODE_SIGN_REQUIRED)) | 131 | * consider setting the flag on all operations other than open |
| 132 | * but it is safer to net set it for now. | ||
| 133 | */ | ||
| 134 | /* if (tcon->share_flags & SHI1005_FLAGS_DFS) | ||
| 135 | hdr->Flags |= SMB2_FLAGS_DFS_OPERATIONS; */ | ||
| 136 | |||
| 137 | if (tcon->ses && tcon->ses->server && tcon->ses->server->sign) | ||
| 124 | hdr->Flags |= SMB2_FLAGS_SIGNED; | 138 | hdr->Flags |= SMB2_FLAGS_SIGNED; |
| 125 | out: | 139 | out: |
| 126 | pdu->StructureSize2 = cpu_to_le16(parmsize); | 140 | pdu->StructureSize2 = cpu_to_le16(parmsize); |
| @@ -328,34 +342,22 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) | |||
| 328 | struct kvec iov[1]; | 342 | struct kvec iov[1]; |
| 329 | int rc = 0; | 343 | int rc = 0; |
| 330 | int resp_buftype; | 344 | int resp_buftype; |
| 331 | struct TCP_Server_Info *server; | 345 | struct TCP_Server_Info *server = ses->server; |
| 332 | unsigned int sec_flags; | ||
| 333 | u16 temp = 0; | ||
| 334 | int blob_offset, blob_length; | 346 | int blob_offset, blob_length; |
| 335 | char *security_blob; | 347 | char *security_blob; |
| 336 | int flags = CIFS_NEG_OP; | 348 | int flags = CIFS_NEG_OP; |
| 337 | 349 | ||
| 338 | cifs_dbg(FYI, "Negotiate protocol\n"); | 350 | cifs_dbg(FYI, "Negotiate protocol\n"); |
| 339 | 351 | ||
| 340 | if (ses->server) | 352 | if (!server) { |
| 341 | server = ses->server; | 353 | WARN(1, "%s: server is NULL!\n", __func__); |
| 342 | else { | 354 | return -EIO; |
| 343 | rc = -EIO; | ||
| 344 | return rc; | ||
| 345 | } | 355 | } |
| 346 | 356 | ||
| 347 | rc = small_smb2_init(SMB2_NEGOTIATE, NULL, (void **) &req); | 357 | rc = small_smb2_init(SMB2_NEGOTIATE, NULL, (void **) &req); |
| 348 | if (rc) | 358 | if (rc) |
| 349 | return rc; | 359 | return rc; |
| 350 | 360 | ||
| 351 | /* if any of auth flags (ie not sign or seal) are overriden use them */ | ||
| 352 | if (ses->overrideSecFlg & (~(CIFSSEC_MUST_SIGN | CIFSSEC_MUST_SEAL))) | ||
| 353 | sec_flags = ses->overrideSecFlg; /* BB FIXME fix sign flags?*/ | ||
| 354 | else /* if override flags set only sign/seal OR them with global auth */ | ||
| 355 | sec_flags = global_secflags | ses->overrideSecFlg; | ||
| 356 | |||
| 357 | cifs_dbg(FYI, "sec_flags 0x%x\n", sec_flags); | ||
| 358 | |||
| 359 | req->hdr.SessionId = 0; | 361 | req->hdr.SessionId = 0; |
| 360 | 362 | ||
| 361 | req->Dialects[0] = cpu_to_le16(ses->server->vals->protocol_id); | 363 | req->Dialects[0] = cpu_to_le16(ses->server->vals->protocol_id); |
| @@ -364,12 +366,12 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) | |||
| 364 | inc_rfc1001_len(req, 2); | 366 | inc_rfc1001_len(req, 2); |
| 365 | 367 | ||
| 366 | /* only one of SMB2 signing flags may be set in SMB2 request */ | 368 | /* only one of SMB2 signing flags may be set in SMB2 request */ |
| 367 | if ((sec_flags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN) | 369 | if (ses->sign) |
| 368 | temp = SMB2_NEGOTIATE_SIGNING_REQUIRED; | 370 | req->SecurityMode = cpu_to_le16(SMB2_NEGOTIATE_SIGNING_REQUIRED); |
| 369 | else if (sec_flags & CIFSSEC_MAY_SIGN) /* MAY_SIGN is a single flag */ | 371 | else if (global_secflags & CIFSSEC_MAY_SIGN) |
| 370 | temp = SMB2_NEGOTIATE_SIGNING_ENABLED; | 372 | req->SecurityMode = cpu_to_le16(SMB2_NEGOTIATE_SIGNING_ENABLED); |
| 371 | 373 | else | |
| 372 | req->SecurityMode = cpu_to_le16(temp); | 374 | req->SecurityMode = 0; |
| 373 | 375 | ||
| 374 | req->Capabilities = cpu_to_le32(ses->server->vals->req_capabilities); | 376 | req->Capabilities = cpu_to_le32(ses->server->vals->req_capabilities); |
| 375 | 377 | ||
| @@ -399,6 +401,8 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) | |||
| 399 | cifs_dbg(FYI, "negotiated smb2.1 dialect\n"); | 401 | cifs_dbg(FYI, "negotiated smb2.1 dialect\n"); |
| 400 | else if (rsp->DialectRevision == cpu_to_le16(SMB30_PROT_ID)) | 402 | else if (rsp->DialectRevision == cpu_to_le16(SMB30_PROT_ID)) |
| 401 | cifs_dbg(FYI, "negotiated smb3.0 dialect\n"); | 403 | cifs_dbg(FYI, "negotiated smb3.0 dialect\n"); |
| 404 | else if (rsp->DialectRevision == cpu_to_le16(SMB302_PROT_ID)) | ||
| 405 | cifs_dbg(FYI, "negotiated smb3.02 dialect\n"); | ||
| 402 | else { | 406 | else { |
| 403 | cifs_dbg(VFS, "Illegal dialect returned by server %d\n", | 407 | cifs_dbg(VFS, "Illegal dialect returned by server %d\n", |
| 404 | le16_to_cpu(rsp->DialectRevision)); | 408 | le16_to_cpu(rsp->DialectRevision)); |
| @@ -407,6 +411,8 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) | |||
| 407 | } | 411 | } |
| 408 | server->dialect = le16_to_cpu(rsp->DialectRevision); | 412 | server->dialect = le16_to_cpu(rsp->DialectRevision); |
| 409 | 413 | ||
| 414 | /* SMB2 only has an extended negflavor */ | ||
| 415 | server->negflavor = CIFS_NEGFLAVOR_EXTENDED; | ||
| 410 | server->maxBuf = le32_to_cpu(rsp->MaxTransactSize); | 416 | server->maxBuf = le32_to_cpu(rsp->MaxTransactSize); |
| 411 | server->max_read = le32_to_cpu(rsp->MaxReadSize); | 417 | server->max_read = le32_to_cpu(rsp->MaxReadSize); |
| 412 | server->max_write = le32_to_cpu(rsp->MaxWriteSize); | 418 | server->max_write = le32_to_cpu(rsp->MaxWriteSize); |
| @@ -418,44 +424,22 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) | |||
| 418 | 424 | ||
| 419 | security_blob = smb2_get_data_area_len(&blob_offset, &blob_length, | 425 | security_blob = smb2_get_data_area_len(&blob_offset, &blob_length, |
| 420 | &rsp->hdr); | 426 | &rsp->hdr); |
| 421 | if (blob_length == 0) { | 427 | /* |
| 422 | cifs_dbg(VFS, "missing security blob on negprot\n"); | 428 | * See MS-SMB2 section 2.2.4: if no blob, client picks default which |
| 423 | rc = -EIO; | 429 | * for us will be |
| 424 | goto neg_exit; | 430 | * ses->sectype = RawNTLMSSP; |
| 425 | } | 431 | * but for time being this is our only auth choice so doesn't matter. |
| 426 | 432 | * We just found a server which sets blob length to zero expecting raw. | |
| 427 | cifs_dbg(FYI, "sec_flags 0x%x\n", sec_flags); | 433 | */ |
| 428 | if ((sec_flags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN) { | 434 | if (blob_length == 0) |
| 429 | cifs_dbg(FYI, "Signing required\n"); | 435 | cifs_dbg(FYI, "missing security blob on negprot\n"); |
| 430 | if (!(server->sec_mode & (SMB2_NEGOTIATE_SIGNING_REQUIRED | | ||
| 431 | SMB2_NEGOTIATE_SIGNING_ENABLED))) { | ||
| 432 | cifs_dbg(VFS, "signing required but server lacks support\n"); | ||
| 433 | rc = -EOPNOTSUPP; | ||
| 434 | goto neg_exit; | ||
| 435 | } | ||
| 436 | server->sec_mode |= SECMODE_SIGN_REQUIRED; | ||
| 437 | } else if (sec_flags & CIFSSEC_MAY_SIGN) { | ||
| 438 | cifs_dbg(FYI, "Signing optional\n"); | ||
| 439 | if (server->sec_mode & SMB2_NEGOTIATE_SIGNING_REQUIRED) { | ||
| 440 | cifs_dbg(FYI, "Server requires signing\n"); | ||
| 441 | server->sec_mode |= SECMODE_SIGN_REQUIRED; | ||
| 442 | } else { | ||
| 443 | server->sec_mode &= | ||
| 444 | ~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED); | ||
| 445 | } | ||
| 446 | } else { | ||
| 447 | cifs_dbg(FYI, "Signing disabled\n"); | ||
| 448 | if (server->sec_mode & SMB2_NEGOTIATE_SIGNING_REQUIRED) { | ||
| 449 | cifs_dbg(VFS, "Server requires packet signing to be enabled in /proc/fs/cifs/SecurityFlags\n"); | ||
| 450 | rc = -EOPNOTSUPP; | ||
| 451 | goto neg_exit; | ||
| 452 | } | ||
| 453 | server->sec_mode &= | ||
| 454 | ~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED); | ||
| 455 | } | ||
| 456 | 436 | ||
| 437 | rc = cifs_enable_signing(server, ses->sign); | ||
| 457 | #ifdef CONFIG_SMB2_ASN1 /* BB REMOVEME when updated asn1.c ready */ | 438 | #ifdef CONFIG_SMB2_ASN1 /* BB REMOVEME when updated asn1.c ready */ |
| 458 | rc = decode_neg_token_init(security_blob, blob_length, | 439 | if (rc) |
| 440 | goto neg_exit; | ||
| 441 | if (blob_length) | ||
| 442 | rc = decode_neg_token_init(security_blob, blob_length, | ||
| 459 | &server->sec_type); | 443 | &server->sec_type); |
| 460 | if (rc == 1) | 444 | if (rc == 1) |
| 461 | rc = 0; | 445 | rc = 0; |
| @@ -480,9 +464,7 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses, | |||
| 480 | int rc = 0; | 464 | int rc = 0; |
| 481 | int resp_buftype; | 465 | int resp_buftype; |
| 482 | __le32 phase = NtLmNegotiate; /* NTLMSSP, if needed, is multistage */ | 466 | __le32 phase = NtLmNegotiate; /* NTLMSSP, if needed, is multistage */ |
| 483 | struct TCP_Server_Info *server; | 467 | struct TCP_Server_Info *server = ses->server; |
| 484 | unsigned int sec_flags; | ||
| 485 | u8 temp = 0; | ||
| 486 | u16 blob_length = 0; | 468 | u16 blob_length = 0; |
| 487 | char *security_blob; | 469 | char *security_blob; |
| 488 | char *ntlmssp_blob = NULL; | 470 | char *ntlmssp_blob = NULL; |
| @@ -490,11 +472,9 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses, | |||
| 490 | 472 | ||
| 491 | cifs_dbg(FYI, "Session Setup\n"); | 473 | cifs_dbg(FYI, "Session Setup\n"); |
| 492 | 474 | ||
| 493 | if (ses->server) | 475 | if (!server) { |
| 494 | server = ses->server; | 476 | WARN(1, "%s: server is NULL!\n", __func__); |
| 495 | else { | 477 | return -EIO; |
| 496 | rc = -EIO; | ||
| 497 | return rc; | ||
| 498 | } | 478 | } |
| 499 | 479 | ||
| 500 | /* | 480 | /* |
| @@ -505,7 +485,8 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses, | |||
| 505 | if (!ses->ntlmssp) | 485 | if (!ses->ntlmssp) |
| 506 | return -ENOMEM; | 486 | return -ENOMEM; |
| 507 | 487 | ||
| 508 | ses->server->secType = RawNTLMSSP; | 488 | /* FIXME: allow for other auth types besides NTLMSSP (e.g. krb5) */ |
| 489 | ses->sectype = RawNTLMSSP; | ||
| 509 | 490 | ||
| 510 | ssetup_ntlmssp_authenticate: | 491 | ssetup_ntlmssp_authenticate: |
| 511 | if (phase == NtLmChallenge) | 492 | if (phase == NtLmChallenge) |
| @@ -515,28 +496,19 @@ ssetup_ntlmssp_authenticate: | |||
| 515 | if (rc) | 496 | if (rc) |
| 516 | return rc; | 497 | return rc; |
| 517 | 498 | ||
| 518 | /* if any of auth flags (ie not sign or seal) are overriden use them */ | ||
| 519 | if (ses->overrideSecFlg & (~(CIFSSEC_MUST_SIGN | CIFSSEC_MUST_SEAL))) | ||
| 520 | sec_flags = ses->overrideSecFlg; /* BB FIXME fix sign flags?*/ | ||
| 521 | else /* if override flags set only sign/seal OR them with global auth */ | ||
| 522 | sec_flags = global_secflags | ses->overrideSecFlg; | ||
| 523 | |||
| 524 | cifs_dbg(FYI, "sec_flags 0x%x\n", sec_flags); | ||
| 525 | |||
| 526 | req->hdr.SessionId = 0; /* First session, not a reauthenticate */ | 499 | req->hdr.SessionId = 0; /* First session, not a reauthenticate */ |
| 527 | req->VcNumber = 0; /* MBZ */ | 500 | req->VcNumber = 0; /* MBZ */ |
| 528 | /* to enable echos and oplocks */ | 501 | /* to enable echos and oplocks */ |
| 529 | req->hdr.CreditRequest = cpu_to_le16(3); | 502 | req->hdr.CreditRequest = cpu_to_le16(3); |
| 530 | 503 | ||
| 531 | /* only one of SMB2 signing flags may be set in SMB2 request */ | 504 | /* only one of SMB2 signing flags may be set in SMB2 request */ |
| 532 | if ((sec_flags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN) | 505 | if (server->sign) |
| 533 | temp = SMB2_NEGOTIATE_SIGNING_REQUIRED; | 506 | req->SecurityMode = SMB2_NEGOTIATE_SIGNING_REQUIRED; |
| 534 | else if (ses->server->sec_mode & SMB2_NEGOTIATE_SIGNING_REQUIRED) | 507 | else if (global_secflags & CIFSSEC_MAY_SIGN) /* one flag unlike MUST_ */ |
| 535 | temp = SMB2_NEGOTIATE_SIGNING_REQUIRED; | 508 | req->SecurityMode = SMB2_NEGOTIATE_SIGNING_ENABLED; |
| 536 | else if (sec_flags & CIFSSEC_MAY_SIGN) /* MAY_SIGN is a single flag */ | 509 | else |
| 537 | temp = SMB2_NEGOTIATE_SIGNING_ENABLED; | 510 | req->SecurityMode = 0; |
| 538 | 511 | ||
| 539 | req->SecurityMode = temp; | ||
| 540 | req->Capabilities = 0; | 512 | req->Capabilities = 0; |
| 541 | req->Channel = 0; /* MBZ */ | 513 | req->Channel = 0; /* MBZ */ |
| 542 | 514 | ||
| @@ -679,7 +651,7 @@ SMB2_logoff(const unsigned int xid, struct cifs_ses *ses) | |||
| 679 | 651 | ||
| 680 | /* since no tcon, smb2_init can not do this, so do here */ | 652 | /* since no tcon, smb2_init can not do this, so do here */ |
| 681 | req->hdr.SessionId = ses->Suid; | 653 | req->hdr.SessionId = ses->Suid; |
| 682 | if (server->sec_mode & SECMODE_SIGN_REQUIRED) | 654 | if (server->sign) |
| 683 | req->hdr.Flags |= SMB2_FLAGS_SIGNED; | 655 | req->hdr.Flags |= SMB2_FLAGS_SIGNED; |
| 684 | 656 | ||
| 685 | rc = SendReceiveNoRsp(xid, ses, (char *) &req->hdr, 0); | 657 | rc = SendReceiveNoRsp(xid, ses, (char *) &req->hdr, 0); |
| @@ -788,11 +760,12 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, | |||
| 788 | } | 760 | } |
| 789 | 761 | ||
| 790 | tcon->share_flags = le32_to_cpu(rsp->ShareFlags); | 762 | tcon->share_flags = le32_to_cpu(rsp->ShareFlags); |
| 763 | tcon->capabilities = rsp->Capabilities; /* we keep caps little endian */ | ||
| 791 | tcon->maximal_access = le32_to_cpu(rsp->MaximalAccess); | 764 | tcon->maximal_access = le32_to_cpu(rsp->MaximalAccess); |
| 792 | tcon->tidStatus = CifsGood; | 765 | tcon->tidStatus = CifsGood; |
| 793 | tcon->need_reconnect = false; | 766 | tcon->need_reconnect = false; |
| 794 | tcon->tid = rsp->hdr.TreeId; | 767 | tcon->tid = rsp->hdr.TreeId; |
| 795 | strncpy(tcon->treeName, tree, MAX_TREE_SIZE); | 768 | strlcpy(tcon->treeName, tree, sizeof(tcon->treeName)); |
| 796 | 769 | ||
| 797 | if ((rsp->Capabilities & SMB2_SHARE_CAP_DFS) && | 770 | if ((rsp->Capabilities & SMB2_SHARE_CAP_DFS) && |
| 798 | ((tcon->share_flags & SHI1005_FLAGS_DFS) == 0)) | 771 | ((tcon->share_flags & SHI1005_FLAGS_DFS) == 0)) |
| @@ -1036,6 +1009,122 @@ creat_exit: | |||
| 1036 | return rc; | 1009 | return rc; |
| 1037 | } | 1010 | } |
| 1038 | 1011 | ||
| 1012 | /* | ||
| 1013 | * SMB2 IOCTL is used for both IOCTLs and FSCTLs | ||
| 1014 | */ | ||
| 1015 | int | ||
| 1016 | SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, | ||
| 1017 | u64 volatile_fid, u32 opcode, bool is_fsctl, char *in_data, | ||
| 1018 | u32 indatalen, char **out_data, u32 *plen /* returned data len */) | ||
| 1019 | { | ||
| 1020 | struct smb2_ioctl_req *req; | ||
| 1021 | struct smb2_ioctl_rsp *rsp; | ||
| 1022 | struct TCP_Server_Info *server; | ||
| 1023 | struct cifs_ses *ses = tcon->ses; | ||
| 1024 | struct kvec iov[2]; | ||
| 1025 | int resp_buftype; | ||
| 1026 | int num_iovecs; | ||
| 1027 | int rc = 0; | ||
| 1028 | |||
| 1029 | cifs_dbg(FYI, "SMB2 IOCTL\n"); | ||
| 1030 | |||
| 1031 | /* zero out returned data len, in case of error */ | ||
| 1032 | if (plen) | ||
| 1033 | *plen = 0; | ||
| 1034 | |||
| 1035 | if (ses && (ses->server)) | ||
| 1036 | server = ses->server; | ||
| 1037 | else | ||
| 1038 | return -EIO; | ||
| 1039 | |||
| 1040 | rc = small_smb2_init(SMB2_IOCTL, tcon, (void **) &req); | ||
| 1041 | if (rc) | ||
| 1042 | return rc; | ||
| 1043 | |||
| 1044 | req->CtlCode = cpu_to_le32(opcode); | ||
| 1045 | req->PersistentFileId = persistent_fid; | ||
| 1046 | req->VolatileFileId = volatile_fid; | ||
| 1047 | |||
| 1048 | if (indatalen) { | ||
| 1049 | req->InputCount = cpu_to_le32(indatalen); | ||
| 1050 | /* do not set InputOffset if no input data */ | ||
| 1051 | req->InputOffset = | ||
| 1052 | cpu_to_le32(offsetof(struct smb2_ioctl_req, Buffer) - 4); | ||
| 1053 | iov[1].iov_base = in_data; | ||
| 1054 | iov[1].iov_len = indatalen; | ||
| 1055 | num_iovecs = 2; | ||
| 1056 | } else | ||
| 1057 | num_iovecs = 1; | ||
| 1058 | |||
| 1059 | req->OutputOffset = 0; | ||
| 1060 | req->OutputCount = 0; /* MBZ */ | ||
| 1061 | |||
| 1062 | /* | ||
| 1063 | * Could increase MaxOutputResponse, but that would require more | ||
| 1064 | * than one credit. Windows typically sets this smaller, but for some | ||
| 1065 | * ioctls it may be useful to allow server to send more. No point | ||
| 1066 | * limiting what the server can send as long as fits in one credit | ||
| 1067 | */ | ||
| 1068 | req->MaxOutputResponse = cpu_to_le32(0xFF00); /* < 64K uses 1 credit */ | ||
| 1069 | |||
| 1070 | if (is_fsctl) | ||
| 1071 | req->Flags = cpu_to_le32(SMB2_0_IOCTL_IS_FSCTL); | ||
| 1072 | else | ||
| 1073 | req->Flags = 0; | ||
| 1074 | |||
| 1075 | iov[0].iov_base = (char *)req; | ||
| 1076 | /* 4 for rfc1002 length field */ | ||
| 1077 | iov[0].iov_len = get_rfc1002_length(req) + 4; | ||
| 1078 | |||
| 1079 | if (indatalen) | ||
| 1080 | inc_rfc1001_len(req, indatalen); | ||
| 1081 | |||
| 1082 | rc = SendReceive2(xid, ses, iov, num_iovecs, &resp_buftype, 0); | ||
| 1083 | rsp = (struct smb2_ioctl_rsp *)iov[0].iov_base; | ||
| 1084 | |||
| 1085 | if (rc != 0) { | ||
| 1086 | if (tcon) | ||
| 1087 | cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE); | ||
| 1088 | goto ioctl_exit; | ||
| 1089 | } | ||
| 1090 | |||
| 1091 | /* check if caller wants to look at return data or just return rc */ | ||
| 1092 | if ((plen == NULL) || (out_data == NULL)) | ||
| 1093 | goto ioctl_exit; | ||
| 1094 | |||
| 1095 | *plen = le32_to_cpu(rsp->OutputCount); | ||
| 1096 | |||
| 1097 | /* We check for obvious errors in the output buffer length and offset */ | ||
| 1098 | if (*plen == 0) | ||
| 1099 | goto ioctl_exit; /* server returned no data */ | ||
| 1100 | else if (*plen > 0xFF00) { | ||
| 1101 | cifs_dbg(VFS, "srv returned invalid ioctl length: %d\n", *plen); | ||
| 1102 | *plen = 0; | ||
| 1103 | rc = -EIO; | ||
| 1104 | goto ioctl_exit; | ||
| 1105 | } | ||
| 1106 | |||
| 1107 | if (get_rfc1002_length(rsp) < le32_to_cpu(rsp->OutputOffset) + *plen) { | ||
| 1108 | cifs_dbg(VFS, "Malformed ioctl resp: len %d offset %d\n", *plen, | ||
| 1109 | le32_to_cpu(rsp->OutputOffset)); | ||
| 1110 | *plen = 0; | ||
| 1111 | rc = -EIO; | ||
| 1112 | goto ioctl_exit; | ||
| 1113 | } | ||
| 1114 | |||
| 1115 | *out_data = kmalloc(*plen, GFP_KERNEL); | ||
| 1116 | if (*out_data == NULL) { | ||
| 1117 | rc = -ENOMEM; | ||
| 1118 | goto ioctl_exit; | ||
| 1119 | } | ||
| 1120 | |||
| 1121 | memcpy(*out_data, rsp->hdr.ProtocolId + le32_to_cpu(rsp->OutputOffset), | ||
| 1122 | *plen); | ||
| 1123 | ioctl_exit: | ||
| 1124 | free_rsp_buf(resp_buftype, rsp); | ||
| 1125 | return rc; | ||
| 1126 | } | ||
| 1127 | |||
| 1039 | int | 1128 | int |
| 1040 | SMB2_close(const unsigned int xid, struct cifs_tcon *tcon, | 1129 | SMB2_close(const unsigned int xid, struct cifs_tcon *tcon, |
| 1041 | u64 persistent_fid, u64 volatile_fid) | 1130 | u64 persistent_fid, u64 volatile_fid) |
| @@ -1384,8 +1473,7 @@ smb2_readv_callback(struct mid_q_entry *mid) | |||
| 1384 | case MID_RESPONSE_RECEIVED: | 1473 | case MID_RESPONSE_RECEIVED: |
| 1385 | credits_received = le16_to_cpu(buf->CreditRequest); | 1474 | credits_received = le16_to_cpu(buf->CreditRequest); |
| 1386 | /* result already set, check signature */ | 1475 | /* result already set, check signature */ |
| 1387 | if (server->sec_mode & | 1476 | if (server->sign) { |
| 1388 | (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) { | ||
| 1389 | int rc; | 1477 | int rc; |
| 1390 | 1478 | ||
| 1391 | rc = smb2_verify_signature(&rqst, server); | 1479 | rc = smb2_verify_signature(&rqst, server); |
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 4cb4ced258cb..f31043b26bd3 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h | |||
| @@ -1,7 +1,7 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * fs/cifs/smb2pdu.h | 2 | * fs/cifs/smb2pdu.h |
| 3 | * | 3 | * |
| 4 | * Copyright (c) International Business Machines Corp., 2009, 2010 | 4 | * Copyright (c) International Business Machines Corp., 2009, 2013 |
| 5 | * Etersoft, 2012 | 5 | * Etersoft, 2012 |
| 6 | * Author(s): Steve French (sfrench@us.ibm.com) | 6 | * Author(s): Steve French (sfrench@us.ibm.com) |
| 7 | * Pavel Shilovsky (pshilovsky@samba.org) 2012 | 7 | * Pavel Shilovsky (pshilovsky@samba.org) 2012 |
| @@ -170,6 +170,7 @@ struct smb2_negotiate_req { | |||
| 170 | #define SMB20_PROT_ID 0x0202 | 170 | #define SMB20_PROT_ID 0x0202 |
| 171 | #define SMB21_PROT_ID 0x0210 | 171 | #define SMB21_PROT_ID 0x0210 |
| 172 | #define SMB30_PROT_ID 0x0300 | 172 | #define SMB30_PROT_ID 0x0300 |
| 173 | #define SMB302_PROT_ID 0x0302 | ||
| 173 | #define BAD_PROT_ID 0xFFFF | 174 | #define BAD_PROT_ID 0xFFFF |
| 174 | 175 | ||
| 175 | /* SecurityMode flags */ | 176 | /* SecurityMode flags */ |
| @@ -283,10 +284,17 @@ struct smb2_tree_connect_rsp { | |||
| 283 | #define SHI1005_FLAGS_ALLOW_NAMESPACE_CACHING 0x00000400 | 284 | #define SHI1005_FLAGS_ALLOW_NAMESPACE_CACHING 0x00000400 |
| 284 | #define SHI1005_FLAGS_ACCESS_BASED_DIRECTORY_ENUM 0x00000800 | 285 | #define SHI1005_FLAGS_ACCESS_BASED_DIRECTORY_ENUM 0x00000800 |
| 285 | #define SHI1005_FLAGS_FORCE_LEVELII_OPLOCK 0x00001000 | 286 | #define SHI1005_FLAGS_FORCE_LEVELII_OPLOCK 0x00001000 |
| 286 | #define SHI1005_FLAGS_ENABLE_HASH 0x00002000 | 287 | #define SHI1005_FLAGS_ENABLE_HASH_V1 0x00002000 |
| 288 | #define SHI1005_FLAGS_ENABLE_HASH_V2 0x00004000 | ||
| 289 | #define SHI1005_FLAGS_ENCRYPT_DATA 0x00008000 | ||
| 290 | #define SHI1005_FLAGS_ALL 0x0000FF33 | ||
| 287 | 291 | ||
| 288 | /* Possible share capabilities */ | 292 | /* Possible share capabilities */ |
| 289 | #define SMB2_SHARE_CAP_DFS cpu_to_le32(0x00000008) | 293 | #define SMB2_SHARE_CAP_DFS cpu_to_le32(0x00000008) /* all dialects */ |
| 294 | #define SMB2_SHARE_CAP_CONTINUOUS_AVAILABILITY cpu_to_le32(0x00000010) /* 3.0 */ | ||
| 295 | #define SMB2_SHARE_CAP_SCALEOUT cpu_to_le32(0x00000020) /* 3.0 */ | ||
| 296 | #define SMB2_SHARE_CAP_CLUSTER cpu_to_le32(0x00000040) /* 3.0 */ | ||
| 297 | #define SMB2_SHARE_CAP_ASYMMETRIC cpu_to_le32(0x00000080) /* 3.02 */ | ||
| 290 | 298 | ||
| 291 | struct smb2_tree_disconnect_req { | 299 | struct smb2_tree_disconnect_req { |
| 292 | struct smb2_hdr hdr; | 300 | struct smb2_hdr hdr; |
| @@ -477,6 +485,75 @@ struct create_lease { | |||
| 477 | struct lease_context lcontext; | 485 | struct lease_context lcontext; |
| 478 | } __packed; | 486 | } __packed; |
| 479 | 487 | ||
| 488 | /* this goes in the ioctl buffer when doing a copychunk request */ | ||
| 489 | struct copychunk_ioctl { | ||
| 490 | char SourceKey[24]; | ||
| 491 | __le32 ChunkCount; /* we are only sending 1 */ | ||
| 492 | __le32 Reserved; | ||
| 493 | /* array will only be one chunk long for us */ | ||
| 494 | __le64 SourceOffset; | ||
| 495 | __le64 TargetOffset; | ||
| 496 | __le32 Length; /* how many bytes to copy */ | ||
| 497 | __u32 Reserved2; | ||
| 498 | } __packed; | ||
| 499 | |||
| 500 | /* Response and Request are the same format */ | ||
| 501 | struct validate_negotiate_info { | ||
| 502 | __le32 Capabilities; | ||
| 503 | __u8 Guid[SMB2_CLIENT_GUID_SIZE]; | ||
| 504 | __le16 SecurityMode; | ||
| 505 | __le16 DialectCount; | ||
| 506 | __le16 Dialect[1]; | ||
| 507 | } __packed; | ||
| 508 | |||
| 509 | #define RSS_CAPABLE 0x00000001 | ||
| 510 | #define RDMA_CAPABLE 0x00000002 | ||
| 511 | |||
| 512 | struct network_interface_info_ioctl_rsp { | ||
| 513 | __le32 Next; /* next interface. zero if this is last one */ | ||
| 514 | __le32 IfIndex; | ||
| 515 | __le32 Capability; /* RSS or RDMA Capable */ | ||
| 516 | __le32 Reserved; | ||
| 517 | __le64 LinkSpeed; | ||
| 518 | char SockAddr_Storage[128]; | ||
| 519 | } __packed; | ||
| 520 | |||
| 521 | #define NO_FILE_ID 0xFFFFFFFFFFFFFFFFULL /* general ioctls to srv not to file */ | ||
| 522 | |||
| 523 | struct smb2_ioctl_req { | ||
| 524 | struct smb2_hdr hdr; | ||
| 525 | __le16 StructureSize; /* Must be 57 */ | ||
| 526 | __u16 Reserved; | ||
| 527 | __le32 CtlCode; | ||
| 528 | __u64 PersistentFileId; /* opaque endianness */ | ||
| 529 | __u64 VolatileFileId; /* opaque endianness */ | ||
| 530 | __le32 InputOffset; | ||
| 531 | __le32 InputCount; | ||
| 532 | __le32 MaxInputResponse; | ||
| 533 | __le32 OutputOffset; | ||
| 534 | __le32 OutputCount; | ||
| 535 | __le32 MaxOutputResponse; | ||
| 536 | __le32 Flags; | ||
| 537 | __u32 Reserved2; | ||
| 538 | char Buffer[0]; | ||
| 539 | } __packed; | ||
| 540 | |||
| 541 | struct smb2_ioctl_rsp { | ||
| 542 | struct smb2_hdr hdr; | ||
| 543 | __le16 StructureSize; /* Must be 57 */ | ||
| 544 | __u16 Reserved; | ||
| 545 | __le32 CtlCode; | ||
| 546 | __u64 PersistentFileId; /* opaque endianness */ | ||
| 547 | __u64 VolatileFileId; /* opaque endianness */ | ||
| 548 | __le32 InputOffset; | ||
| 549 | __le32 InputCount; | ||
| 550 | __le32 OutputOffset; | ||
| 551 | __le32 OutputCount; | ||
| 552 | __le32 Flags; | ||
| 553 | __u32 Reserved2; | ||
| 554 | /* char * buffer[] */ | ||
| 555 | } __packed; | ||
| 556 | |||
| 480 | /* Currently defined values for close flags */ | 557 | /* Currently defined values for close flags */ |
| 481 | #define SMB2_CLOSE_FLAG_POSTQUERY_ATTRIB cpu_to_le16(0x0001) | 558 | #define SMB2_CLOSE_FLAG_POSTQUERY_ATTRIB cpu_to_le16(0x0001) |
| 482 | struct smb2_close_req { | 559 | struct smb2_close_req { |
| @@ -517,17 +594,25 @@ struct smb2_flush_rsp { | |||
| 517 | __le16 Reserved; | 594 | __le16 Reserved; |
| 518 | } __packed; | 595 | } __packed; |
| 519 | 596 | ||
| 597 | /* For read request Flags field below, following flag is defined for SMB3.02 */ | ||
| 598 | #define SMB2_READFLAG_READ_UNBUFFERED 0x01 | ||
| 599 | |||
| 600 | /* Channel field for read and write: exactly one of following flags can be set*/ | ||
| 601 | #define SMB2_CHANNEL_NONE 0x00000000 | ||
| 602 | #define SMB2_CHANNEL_RDMA_V1 0x00000001 /* SMB3 or later */ | ||
| 603 | #define SMB2_CHANNEL_RDMA_V1_INVALIDATE 0x00000001 /* SMB3.02 or later */ | ||
| 604 | |||
| 520 | struct smb2_read_req { | 605 | struct smb2_read_req { |
| 521 | struct smb2_hdr hdr; | 606 | struct smb2_hdr hdr; |
| 522 | __le16 StructureSize; /* Must be 49 */ | 607 | __le16 StructureSize; /* Must be 49 */ |
| 523 | __u8 Padding; /* offset from start of SMB2 header to place read */ | 608 | __u8 Padding; /* offset from start of SMB2 header to place read */ |
| 524 | __u8 Reserved; | 609 | __u8 Flags; /* MBZ unless SMB3.02 or later */ |
| 525 | __le32 Length; | 610 | __le32 Length; |
| 526 | __le64 Offset; | 611 | __le64 Offset; |
| 527 | __u64 PersistentFileId; /* opaque endianness */ | 612 | __u64 PersistentFileId; /* opaque endianness */ |
| 528 | __u64 VolatileFileId; /* opaque endianness */ | 613 | __u64 VolatileFileId; /* opaque endianness */ |
| 529 | __le32 MinimumCount; | 614 | __le32 MinimumCount; |
| 530 | __le32 Channel; /* Reserved MBZ */ | 615 | __le32 Channel; /* MBZ except for SMB3 or later */ |
| 531 | __le32 RemainingBytes; | 616 | __le32 RemainingBytes; |
| 532 | __le16 ReadChannelInfoOffset; /* Reserved MBZ */ | 617 | __le16 ReadChannelInfoOffset; /* Reserved MBZ */ |
| 533 | __le16 ReadChannelInfoLength; /* Reserved MBZ */ | 618 | __le16 ReadChannelInfoLength; /* Reserved MBZ */ |
| @@ -545,8 +630,9 @@ struct smb2_read_rsp { | |||
| 545 | __u8 Buffer[1]; | 630 | __u8 Buffer[1]; |
| 546 | } __packed; | 631 | } __packed; |
| 547 | 632 | ||
| 548 | /* For write request Flags field below the following flag is defined: */ | 633 | /* For write request Flags field below the following flags are defined: */ |
| 549 | #define SMB2_WRITEFLAG_WRITE_THROUGH 0x00000001 | 634 | #define SMB2_WRITEFLAG_WRITE_THROUGH 0x00000001 /* SMB2.1 or later */ |
| 635 | #define SMB2_WRITEFLAG_WRITE_UNBUFFERED 0x00000002 /* SMB3.02 or later */ | ||
| 550 | 636 | ||
| 551 | struct smb2_write_req { | 637 | struct smb2_write_req { |
| 552 | struct smb2_hdr hdr; | 638 | struct smb2_hdr hdr; |
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 2aa3535e38ce..d4e1eb807457 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h | |||
| @@ -111,6 +111,10 @@ extern int SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, | |||
| 111 | __u32 desired_access, __u32 create_disposition, | 111 | __u32 desired_access, __u32 create_disposition, |
| 112 | __u32 file_attributes, __u32 create_options, | 112 | __u32 file_attributes, __u32 create_options, |
| 113 | __u8 *oplock, struct smb2_file_all_info *buf); | 113 | __u8 *oplock, struct smb2_file_all_info *buf); |
| 114 | extern int SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, | ||
| 115 | u64 persistent_fid, u64 volatile_fid, u32 opcode, | ||
| 116 | bool is_fsctl, char *in_data, u32 indatalen, | ||
| 117 | char **out_data, u32 *plen /* returned data len */); | ||
| 114 | extern int SMB2_close(const unsigned int xid, struct cifs_tcon *tcon, | 118 | extern int SMB2_close(const unsigned int xid, struct cifs_tcon *tcon, |
| 115 | u64 persistent_file_id, u64 volatile_file_id); | 119 | u64 persistent_file_id, u64 volatile_file_id); |
| 116 | extern int SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, | 120 | extern int SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, |
diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index 01f0ac800780..09b4fbaadeb6 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c | |||
| @@ -116,11 +116,155 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) | |||
| 116 | return rc; | 116 | return rc; |
| 117 | } | 117 | } |
| 118 | 118 | ||
| 119 | void | ||
| 120 | generate_smb3signingkey(struct TCP_Server_Info *server) | ||
| 121 | { | ||
| 122 | unsigned char zero = 0x0; | ||
| 123 | __u8 i[4] = {0, 0, 0, 1}; | ||
| 124 | __u8 L[4] = {0, 0, 0, 128}; | ||
| 125 | int rc = 0; | ||
| 126 | unsigned char prfhash[SMB2_HMACSHA256_SIZE]; | ||
| 127 | unsigned char *hashptr = prfhash; | ||
| 128 | |||
| 129 | memset(prfhash, 0x0, SMB2_HMACSHA256_SIZE); | ||
| 130 | memset(server->smb3signingkey, 0x0, SMB3_SIGNKEY_SIZE); | ||
| 131 | |||
| 132 | rc = crypto_shash_setkey(server->secmech.hmacsha256, | ||
| 133 | server->session_key.response, SMB2_NTLMV2_SESSKEY_SIZE); | ||
| 134 | if (rc) { | ||
| 135 | cifs_dbg(VFS, "%s: Could not set with session key\n", __func__); | ||
| 136 | goto smb3signkey_ret; | ||
| 137 | } | ||
| 138 | |||
| 139 | rc = crypto_shash_init(&server->secmech.sdeschmacsha256->shash); | ||
| 140 | if (rc) { | ||
| 141 | cifs_dbg(VFS, "%s: Could not init sign hmac\n", __func__); | ||
| 142 | goto smb3signkey_ret; | ||
| 143 | } | ||
| 144 | |||
| 145 | rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash, | ||
| 146 | i, 4); | ||
| 147 | if (rc) { | ||
| 148 | cifs_dbg(VFS, "%s: Could not update with n\n", __func__); | ||
| 149 | goto smb3signkey_ret; | ||
| 150 | } | ||
| 151 | |||
| 152 | rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash, | ||
| 153 | "SMB2AESCMAC", 12); | ||
| 154 | if (rc) { | ||
| 155 | cifs_dbg(VFS, "%s: Could not update with label\n", __func__); | ||
| 156 | goto smb3signkey_ret; | ||
| 157 | } | ||
| 158 | |||
| 159 | rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash, | ||
| 160 | &zero, 1); | ||
| 161 | if (rc) { | ||
| 162 | cifs_dbg(VFS, "%s: Could not update with zero\n", __func__); | ||
| 163 | goto smb3signkey_ret; | ||
| 164 | } | ||
| 165 | |||
| 166 | rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash, | ||
| 167 | "SmbSign", 8); | ||
| 168 | if (rc) { | ||
| 169 | cifs_dbg(VFS, "%s: Could not update with context\n", __func__); | ||
| 170 | goto smb3signkey_ret; | ||
| 171 | } | ||
| 172 | |||
| 173 | rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash, | ||
| 174 | L, 4); | ||
| 175 | if (rc) { | ||
| 176 | cifs_dbg(VFS, "%s: Could not update with L\n", __func__); | ||
| 177 | goto smb3signkey_ret; | ||
| 178 | } | ||
| 179 | |||
| 180 | rc = crypto_shash_final(&server->secmech.sdeschmacsha256->shash, | ||
| 181 | hashptr); | ||
| 182 | if (rc) { | ||
| 183 | cifs_dbg(VFS, "%s: Could not generate sha256 hash\n", __func__); | ||
| 184 | goto smb3signkey_ret; | ||
| 185 | } | ||
| 186 | |||
| 187 | memcpy(server->smb3signingkey, hashptr, SMB3_SIGNKEY_SIZE); | ||
| 188 | |||
| 189 | smb3signkey_ret: | ||
| 190 | return; | ||
| 191 | } | ||
| 192 | |||
| 119 | int | 193 | int |
| 120 | smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) | 194 | smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) |
| 121 | { | 195 | { |
| 122 | cifs_dbg(FYI, "smb3 signatures not supported yet\n"); | 196 | int i, rc; |
| 123 | return -EOPNOTSUPP; | 197 | unsigned char smb3_signature[SMB2_CMACAES_SIZE]; |
| 198 | unsigned char *sigptr = smb3_signature; | ||
| 199 | struct kvec *iov = rqst->rq_iov; | ||
| 200 | int n_vec = rqst->rq_nvec; | ||
| 201 | struct smb2_hdr *smb2_pdu = (struct smb2_hdr *)iov[0].iov_base; | ||
| 202 | |||
| 203 | memset(smb3_signature, 0x0, SMB2_CMACAES_SIZE); | ||
| 204 | memset(smb2_pdu->Signature, 0x0, SMB2_SIGNATURE_SIZE); | ||
| 205 | |||
| 206 | rc = crypto_shash_setkey(server->secmech.cmacaes, | ||
| 207 | server->smb3signingkey, SMB2_CMACAES_SIZE); | ||
| 208 | if (rc) { | ||
| 209 | cifs_dbg(VFS, "%s: Could not set key for cmac aes\n", __func__); | ||
| 210 | return rc; | ||
| 211 | } | ||
| 212 | |||
| 213 | rc = crypto_shash_init(&server->secmech.sdesccmacaes->shash); | ||
| 214 | if (rc) { | ||
| 215 | cifs_dbg(VFS, "%s: Could not init cmac aes\n", __func__); | ||
| 216 | return rc; | ||
| 217 | } | ||
| 218 | |||
| 219 | for (i = 0; i < n_vec; i++) { | ||
| 220 | if (iov[i].iov_len == 0) | ||
| 221 | continue; | ||
| 222 | if (iov[i].iov_base == NULL) { | ||
| 223 | cifs_dbg(VFS, "null iovec entry"); | ||
| 224 | return -EIO; | ||
| 225 | } | ||
| 226 | /* | ||
| 227 | * The first entry includes a length field (which does not get | ||
| 228 | * signed that occupies the first 4 bytes before the header). | ||
| 229 | */ | ||
| 230 | if (i == 0) { | ||
| 231 | if (iov[0].iov_len <= 8) /* cmd field at offset 9 */ | ||
| 232 | break; /* nothing to sign or corrupt header */ | ||
| 233 | rc = | ||
| 234 | crypto_shash_update( | ||
| 235 | &server->secmech.sdesccmacaes->shash, | ||
| 236 | iov[i].iov_base + 4, iov[i].iov_len - 4); | ||
| 237 | } else { | ||
| 238 | rc = | ||
| 239 | crypto_shash_update( | ||
| 240 | &server->secmech.sdesccmacaes->shash, | ||
| 241 | iov[i].iov_base, iov[i].iov_len); | ||
| 242 | } | ||
| 243 | if (rc) { | ||
| 244 | cifs_dbg(VFS, "%s: Couldn't update cmac aes with payload\n", | ||
| 245 | __func__); | ||
| 246 | return rc; | ||
| 247 | } | ||
| 248 | } | ||
| 249 | |||
| 250 | /* now hash over the rq_pages array */ | ||
| 251 | for (i = 0; i < rqst->rq_npages; i++) { | ||
| 252 | struct kvec p_iov; | ||
| 253 | |||
| 254 | cifs_rqst_page_to_kvec(rqst, i, &p_iov); | ||
| 255 | crypto_shash_update(&server->secmech.sdesccmacaes->shash, | ||
| 256 | p_iov.iov_base, p_iov.iov_len); | ||
| 257 | kunmap(rqst->rq_pages[i]); | ||
| 258 | } | ||
| 259 | |||
| 260 | rc = crypto_shash_final(&server->secmech.sdesccmacaes->shash, | ||
| 261 | sigptr); | ||
| 262 | if (rc) | ||
| 263 | cifs_dbg(VFS, "%s: Could not generate cmac aes\n", __func__); | ||
| 264 | |||
| 265 | memcpy(smb2_pdu->Signature, sigptr, SMB2_SIGNATURE_SIZE); | ||
| 266 | |||
| 267 | return rc; | ||
| 124 | } | 268 | } |
| 125 | 269 | ||
| 126 | /* must be called with server->srv_mutex held */ | 270 | /* must be called with server->srv_mutex held */ |
| @@ -275,8 +419,7 @@ smb2_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server, | |||
| 275 | 419 | ||
| 276 | dump_smb(mid->resp_buf, min_t(u32, 80, len)); | 420 | dump_smb(mid->resp_buf, min_t(u32, 80, len)); |
| 277 | /* convert the length into a more usable form */ | 421 | /* convert the length into a more usable form */ |
| 278 | if ((len > 24) && | 422 | if (len > 24 && server->sign) { |
| 279 | (server->sec_mode & (SECMODE_SIGN_REQUIRED|SECMODE_SIGN_ENABLED))) { | ||
| 280 | int rc; | 423 | int rc; |
| 281 | 424 | ||
| 282 | rc = smb2_verify_signature(&rqst, server); | 425 | rc = smb2_verify_signature(&rqst, server); |
diff --git a/fs/cifs/smbfsctl.h b/fs/cifs/smbfsctl.h index 7056b891e087..d952ee48f4dc 100644 --- a/fs/cifs/smbfsctl.h +++ b/fs/cifs/smbfsctl.h | |||
| @@ -1,7 +1,7 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * fs/cifs/smbfsctl.h: SMB, CIFS, SMB2 FSCTL definitions | 2 | * fs/cifs/smbfsctl.h: SMB, CIFS, SMB2 FSCTL definitions |
| 3 | * | 3 | * |
| 4 | * Copyright (c) International Business Machines Corp., 2002,2009 | 4 | * Copyright (c) International Business Machines Corp., 2002,2013 |
| 5 | * Author(s): Steve French (sfrench@us.ibm.com) | 5 | * Author(s): Steve French (sfrench@us.ibm.com) |
| 6 | * | 6 | * |
| 7 | * This library is free software; you can redistribute it and/or modify | 7 | * This library is free software; you can redistribute it and/or modify |
| @@ -22,7 +22,7 @@ | |||
| 22 | /* IOCTL information */ | 22 | /* IOCTL information */ |
| 23 | /* | 23 | /* |
| 24 | * List of ioctl/fsctl function codes that are or could be useful in the | 24 | * List of ioctl/fsctl function codes that are or could be useful in the |
| 25 | * future to remote clients like cifs or SMB2 client. There is probably | 25 | * future to remote clients like cifs or SMB2/SMB3 client. This is probably |
| 26 | * a slightly larger set of fsctls that NTFS local filesystem could handle, | 26 | * a slightly larger set of fsctls that NTFS local filesystem could handle, |
| 27 | * including the seven below that we do not have struct definitions for. | 27 | * including the seven below that we do not have struct definitions for. |
| 28 | * Even with protocol definitions for most of these now available, we still | 28 | * Even with protocol definitions for most of these now available, we still |
| @@ -30,7 +30,13 @@ | |||
| 30 | * remotely. Some of the following, such as the encryption/compression ones | 30 | * remotely. Some of the following, such as the encryption/compression ones |
| 31 | * could be invoked from tools via a specialized hook into the VFS rather | 31 | * could be invoked from tools via a specialized hook into the VFS rather |
| 32 | * than via the standard vfs entry points | 32 | * than via the standard vfs entry points |
| 33 | * | ||
| 34 | * See MS-SMB2 Section 2.2.31 (last checked June 2013, all of that list are | ||
| 35 | * below). Additional detail on less common ones can be found in MS-FSCC | ||
| 36 | * section 2.3. | ||
| 33 | */ | 37 | */ |
| 38 | #define FSCTL_DFS_GET_REFERRALS 0x00060194 | ||
| 39 | #define FSCTL_DFS_GET_REFERRALS_EX 0x000601B0 | ||
| 34 | #define FSCTL_REQUEST_OPLOCK_LEVEL_1 0x00090000 | 40 | #define FSCTL_REQUEST_OPLOCK_LEVEL_1 0x00090000 |
| 35 | #define FSCTL_REQUEST_OPLOCK_LEVEL_2 0x00090004 | 41 | #define FSCTL_REQUEST_OPLOCK_LEVEL_2 0x00090004 |
| 36 | #define FSCTL_REQUEST_BATCH_OPLOCK 0x00090008 | 42 | #define FSCTL_REQUEST_BATCH_OPLOCK 0x00090008 |
| @@ -71,14 +77,31 @@ | |||
| 71 | #define FSCTL_SET_SHORT_NAME_BEHAVIOR 0x000901B4 /* BB add struct */ | 77 | #define FSCTL_SET_SHORT_NAME_BEHAVIOR 0x000901B4 /* BB add struct */ |
| 72 | #define FSCTL_QUERY_ALLOCATED_RANGES 0x000940CF /* BB add struct */ | 78 | #define FSCTL_QUERY_ALLOCATED_RANGES 0x000940CF /* BB add struct */ |
| 73 | #define FSCTL_SET_DEFECT_MANAGEMENT 0x00098134 /* BB add struct */ | 79 | #define FSCTL_SET_DEFECT_MANAGEMENT 0x00098134 /* BB add struct */ |
| 80 | #define FSCTL_FILE_LEVEL_TRIM 0x00098208 /* BB add struct */ | ||
| 74 | #define FSCTL_SIS_LINK_FILES 0x0009C104 | 81 | #define FSCTL_SIS_LINK_FILES 0x0009C104 |
| 75 | #define FSCTL_PIPE_PEEK 0x0011400C /* BB add struct */ | 82 | #define FSCTL_PIPE_PEEK 0x0011400C /* BB add struct */ |
| 76 | #define FSCTL_PIPE_TRANSCEIVE 0x0011C017 /* BB add struct */ | 83 | #define FSCTL_PIPE_TRANSCEIVE 0x0011C017 /* BB add struct */ |
| 77 | /* strange that the number for this op is not sequential with previous op */ | 84 | /* strange that the number for this op is not sequential with previous op */ |
| 78 | #define FSCTL_PIPE_WAIT 0x00110018 /* BB add struct */ | 85 | #define FSCTL_PIPE_WAIT 0x00110018 /* BB add struct */ |
| 86 | /* Enumerate previous versions of a file */ | ||
| 87 | #define FSCTL_SRV_ENUMERATE_SNAPSHOTS 0x00144064 | ||
| 88 | /* Retrieve an opaque file reference for server-side data movement ie copy */ | ||
| 89 | #define FSCTL_SRV_REQUEST_RESUME_KEY 0x00140078 | ||
| 90 | #define FSCTL_LMR_REQUEST_RESILIENCY 0x001401D4 /* BB add struct */ | ||
| 79 | #define FSCTL_LMR_GET_LINK_TRACK_INF 0x001400E8 /* BB add struct */ | 91 | #define FSCTL_LMR_GET_LINK_TRACK_INF 0x001400E8 /* BB add struct */ |
| 80 | #define FSCTL_LMR_SET_LINK_TRACK_INF 0x001400EC /* BB add struct */ | 92 | #define FSCTL_LMR_SET_LINK_TRACK_INF 0x001400EC /* BB add struct */ |
| 93 | #define FSCTL_VALIDATE_NEGOTIATE_INFO 0x00140204 /* BB add struct */ | ||
| 94 | /* Perform server-side data movement */ | ||
| 95 | #define FSCTL_SRV_COPYCHUNK 0x001440F2 | ||
| 96 | #define FSCTL_SRV_COPYCHUNK_WRITE 0x001480F2 | ||
| 97 | #define FSCTL_QUERY_NETWORK_INTERFACE_INFO 0x001401FC /* BB add struct */ | ||
| 98 | #define FSCTL_SRV_READ_HASH 0x001441BB /* BB add struct */ | ||
| 81 | 99 | ||
| 82 | #define IO_REPARSE_TAG_MOUNT_POINT 0xA0000003 | 100 | #define IO_REPARSE_TAG_MOUNT_POINT 0xA0000003 |
| 83 | #define IO_REPARSE_TAG_HSM 0xC0000004 | 101 | #define IO_REPARSE_TAG_HSM 0xC0000004 |
| 84 | #define IO_REPARSE_TAG_SIS 0x80000007 | 102 | #define IO_REPARSE_TAG_SIS 0x80000007 |
| 103 | |||
| 104 | /* fsctl flags */ | ||
| 105 | /* If Flags is set to this value, the request is an FSCTL not ioctl request */ | ||
| 106 | #define SMB2_0_IOCTL_IS_FSCTL 0x00000001 | ||
| 107 | |||
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index bfbf4700d160..6fdcb1b4a106 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c | |||
| @@ -447,7 +447,7 @@ wait_for_response(struct TCP_Server_Info *server, struct mid_q_entry *midQ) | |||
| 447 | { | 447 | { |
| 448 | int error; | 448 | int error; |
| 449 | 449 | ||
| 450 | error = wait_event_freezekillable(server->response_q, | 450 | error = wait_event_freezekillable_unsafe(server->response_q, |
| 451 | midQ->mid_state != MID_REQUEST_SUBMITTED); | 451 | midQ->mid_state != MID_REQUEST_SUBMITTED); |
| 452 | if (error < 0) | 452 | if (error < 0) |
| 453 | return -ERESTARTSYS; | 453 | return -ERESTARTSYS; |
| @@ -463,7 +463,7 @@ cifs_setup_async_request(struct TCP_Server_Info *server, struct smb_rqst *rqst) | |||
| 463 | struct mid_q_entry *mid; | 463 | struct mid_q_entry *mid; |
| 464 | 464 | ||
| 465 | /* enable signing if server requires it */ | 465 | /* enable signing if server requires it */ |
| 466 | if (server->sec_mode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) | 466 | if (server->sign) |
| 467 | hdr->Flags2 |= SMBFLG2_SECURITY_SIGNATURE; | 467 | hdr->Flags2 |= SMBFLG2_SECURITY_SIGNATURE; |
| 468 | 468 | ||
| 469 | mid = AllocMidQEntry(hdr, server); | 469 | mid = AllocMidQEntry(hdr, server); |
| @@ -612,7 +612,7 @@ cifs_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server, | |||
| 612 | dump_smb(mid->resp_buf, min_t(u32, 92, len)); | 612 | dump_smb(mid->resp_buf, min_t(u32, 92, len)); |
| 613 | 613 | ||
| 614 | /* convert the length into a more usable form */ | 614 | /* convert the length into a more usable form */ |
| 615 | if (server->sec_mode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) { | 615 | if (server->sign) { |
| 616 | struct kvec iov; | 616 | struct kvec iov; |
| 617 | int rc = 0; | 617 | int rc = 0; |
| 618 | struct smb_rqst rqst = { .rq_iov = &iov, | 618 | struct smb_rqst rqst = { .rq_iov = &iov, |
diff --git a/fs/coda/dir.c b/fs/coda/dir.c index 87e0ee9f4465..190effc6a6fa 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c | |||
| @@ -487,13 +487,7 @@ static int coda_venus_readdir(struct file *coda_file, struct dir_context *ctx) | |||
| 487 | 487 | ||
| 488 | /* skip null entries */ | 488 | /* skip null entries */ |
| 489 | if (vdir->d_fileno && name.len) { | 489 | if (vdir->d_fileno && name.len) { |
| 490 | /* try to look up this entry in the dcache, that way | 490 | ino = vdir->d_fileno; |
| 491 | * userspace doesn't have to worry about breaking | ||
| 492 | * getcwd by having mismatched inode numbers for | ||
| 493 | * internal volume mountpoints. */ | ||
| 494 | ino = find_inode_number(de, &name); | ||
| 495 | if (!ino) ino = vdir->d_fileno; | ||
| 496 | |||
| 497 | type = CDT2DT(vdir->d_type); | 491 | type = CDT2DT(vdir->d_type); |
| 498 | if (!dir_emit(ctx, name.name, name.len, ino, type)) | 492 | if (!dir_emit(ctx, name.name, name.len, ino, type)) |
| 499 | break; | 493 | break; |
| @@ -532,7 +526,7 @@ static int coda_dentry_revalidate(struct dentry *de, unsigned int flags) | |||
| 532 | if (cii->c_flags & C_FLUSH) | 526 | if (cii->c_flags & C_FLUSH) |
| 533 | coda_flag_inode_children(inode, C_FLUSH); | 527 | coda_flag_inode_children(inode, C_FLUSH); |
| 534 | 528 | ||
| 535 | if (de->d_count > 1) | 529 | if (d_count(de) > 1) |
| 536 | /* pretend it's valid, but don't change the flags */ | 530 | /* pretend it's valid, but don't change the flags */ |
| 537 | goto out; | 531 | goto out; |
| 538 | 532 | ||
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 64e5323cbbb0..5e7c60c1cb63 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c | |||
| @@ -387,7 +387,7 @@ static void remove_dir(struct dentry * d) | |||
| 387 | if (d->d_inode) | 387 | if (d->d_inode) |
| 388 | simple_rmdir(parent->d_inode,d); | 388 | simple_rmdir(parent->d_inode,d); |
| 389 | 389 | ||
| 390 | pr_debug(" o %s removing done (%d)\n",d->d_name.name, d->d_count); | 390 | pr_debug(" o %s removing done (%d)\n",d->d_name.name, d_count(d)); |
| 391 | 391 | ||
| 392 | dput(parent); | 392 | dput(parent); |
| 393 | } | 393 | } |
diff --git a/fs/configfs/file.c b/fs/configfs/file.c index 2b6cb23dd14e..1d1c41f1014d 100644 --- a/fs/configfs/file.c +++ b/fs/configfs/file.c | |||
| @@ -203,7 +203,7 @@ configfs_write_file(struct file *file, const char __user *buf, size_t count, lof | |||
| 203 | mutex_lock(&buffer->mutex); | 203 | mutex_lock(&buffer->mutex); |
| 204 | len = fill_write_buffer(buffer, buf, count); | 204 | len = fill_write_buffer(buffer, buf, count); |
| 205 | if (len > 0) | 205 | if (len > 0) |
| 206 | len = flush_write_buffer(file->f_path.dentry, buffer, count); | 206 | len = flush_write_buffer(file->f_path.dentry, buffer, len); |
| 207 | if (len > 0) | 207 | if (len > 0) |
| 208 | *ppos += len; | 208 | *ppos += len; |
| 209 | mutex_unlock(&buffer->mutex); | 209 | mutex_unlock(&buffer->mutex); |
diff --git a/fs/coredump.c b/fs/coredump.c index dafafbafa731..72f816d6cad9 100644 --- a/fs/coredump.c +++ b/fs/coredump.c | |||
| @@ -45,69 +45,79 @@ | |||
| 45 | #include <trace/events/sched.h> | 45 | #include <trace/events/sched.h> |
| 46 | 46 | ||
| 47 | int core_uses_pid; | 47 | int core_uses_pid; |
| 48 | char core_pattern[CORENAME_MAX_SIZE] = "core"; | ||
| 49 | unsigned int core_pipe_limit; | 48 | unsigned int core_pipe_limit; |
| 49 | char core_pattern[CORENAME_MAX_SIZE] = "core"; | ||
| 50 | static int core_name_size = CORENAME_MAX_SIZE; | ||
| 50 | 51 | ||
| 51 | struct core_name { | 52 | struct core_name { |
| 52 | char *corename; | 53 | char *corename; |
| 53 | int used, size; | 54 | int used, size; |
| 54 | }; | 55 | }; |
| 55 | static atomic_t call_count = ATOMIC_INIT(1); | ||
| 56 | 56 | ||
| 57 | /* The maximal length of core_pattern is also specified in sysctl.c */ | 57 | /* The maximal length of core_pattern is also specified in sysctl.c */ |
| 58 | 58 | ||
| 59 | static int expand_corename(struct core_name *cn) | 59 | static int expand_corename(struct core_name *cn, int size) |
| 60 | { | 60 | { |
| 61 | char *old_corename = cn->corename; | 61 | char *corename = krealloc(cn->corename, size, GFP_KERNEL); |
| 62 | |||
| 63 | cn->size = CORENAME_MAX_SIZE * atomic_inc_return(&call_count); | ||
| 64 | cn->corename = krealloc(old_corename, cn->size, GFP_KERNEL); | ||
| 65 | 62 | ||
| 66 | if (!cn->corename) { | 63 | if (!corename) |
| 67 | kfree(old_corename); | ||
| 68 | return -ENOMEM; | 64 | return -ENOMEM; |
| 69 | } | ||
| 70 | 65 | ||
| 66 | if (size > core_name_size) /* racy but harmless */ | ||
| 67 | core_name_size = size; | ||
| 68 | |||
| 69 | cn->size = ksize(corename); | ||
| 70 | cn->corename = corename; | ||
| 71 | return 0; | 71 | return 0; |
| 72 | } | 72 | } |
| 73 | 73 | ||
| 74 | static int cn_vprintf(struct core_name *cn, const char *fmt, va_list arg) | ||
| 75 | { | ||
| 76 | int free, need; | ||
| 77 | |||
| 78 | again: | ||
| 79 | free = cn->size - cn->used; | ||
| 80 | need = vsnprintf(cn->corename + cn->used, free, fmt, arg); | ||
| 81 | if (need < free) { | ||
| 82 | cn->used += need; | ||
| 83 | return 0; | ||
| 84 | } | ||
| 85 | |||
| 86 | if (!expand_corename(cn, cn->size + need - free + 1)) | ||
| 87 | goto again; | ||
| 88 | |||
| 89 | return -ENOMEM; | ||
| 90 | } | ||
| 91 | |||
| 74 | static int cn_printf(struct core_name *cn, const char *fmt, ...) | 92 | static int cn_printf(struct core_name *cn, const char *fmt, ...) |
| 75 | { | 93 | { |
| 76 | char *cur; | ||
| 77 | int need; | ||
| 78 | int ret; | ||
| 79 | va_list arg; | 94 | va_list arg; |
| 95 | int ret; | ||
| 80 | 96 | ||
| 81 | va_start(arg, fmt); | 97 | va_start(arg, fmt); |
| 82 | need = vsnprintf(NULL, 0, fmt, arg); | 98 | ret = cn_vprintf(cn, fmt, arg); |
| 83 | va_end(arg); | 99 | va_end(arg); |
| 84 | 100 | ||
| 85 | if (likely(need < cn->size - cn->used - 1)) | 101 | return ret; |
| 86 | goto out_printf; | 102 | } |
| 87 | 103 | ||
| 88 | ret = expand_corename(cn); | 104 | static int cn_esc_printf(struct core_name *cn, const char *fmt, ...) |
| 89 | if (ret) | 105 | { |
| 90 | goto expand_fail; | 106 | int cur = cn->used; |
| 107 | va_list arg; | ||
| 108 | int ret; | ||
| 91 | 109 | ||
| 92 | out_printf: | ||
| 93 | cur = cn->corename + cn->used; | ||
| 94 | va_start(arg, fmt); | 110 | va_start(arg, fmt); |
| 95 | vsnprintf(cur, need + 1, fmt, arg); | 111 | ret = cn_vprintf(cn, fmt, arg); |
| 96 | va_end(arg); | 112 | va_end(arg); |
| 97 | cn->used += need; | ||
| 98 | return 0; | ||
| 99 | 113 | ||
| 100 | expand_fail: | 114 | for (; cur < cn->used; ++cur) { |
| 115 | if (cn->corename[cur] == '/') | ||
| 116 | cn->corename[cur] = '!'; | ||
| 117 | } | ||
| 101 | return ret; | 118 | return ret; |
| 102 | } | 119 | } |
| 103 | 120 | ||
| 104 | static void cn_escape(char *str) | ||
| 105 | { | ||
| 106 | for (; *str; str++) | ||
| 107 | if (*str == '/') | ||
| 108 | *str = '!'; | ||
| 109 | } | ||
| 110 | |||
| 111 | static int cn_print_exe_file(struct core_name *cn) | 121 | static int cn_print_exe_file(struct core_name *cn) |
| 112 | { | 122 | { |
| 113 | struct file *exe_file; | 123 | struct file *exe_file; |
| @@ -115,12 +125,8 @@ static int cn_print_exe_file(struct core_name *cn) | |||
| 115 | int ret; | 125 | int ret; |
| 116 | 126 | ||
| 117 | exe_file = get_mm_exe_file(current->mm); | 127 | exe_file = get_mm_exe_file(current->mm); |
| 118 | if (!exe_file) { | 128 | if (!exe_file) |
| 119 | char *commstart = cn->corename + cn->used; | 129 | return cn_esc_printf(cn, "%s (path unknown)", current->comm); |
| 120 | ret = cn_printf(cn, "%s (path unknown)", current->comm); | ||
| 121 | cn_escape(commstart); | ||
| 122 | return ret; | ||
| 123 | } | ||
| 124 | 130 | ||
| 125 | pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY); | 131 | pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY); |
| 126 | if (!pathbuf) { | 132 | if (!pathbuf) { |
| @@ -134,9 +140,7 @@ static int cn_print_exe_file(struct core_name *cn) | |||
| 134 | goto free_buf; | 140 | goto free_buf; |
| 135 | } | 141 | } |
| 136 | 142 | ||
| 137 | cn_escape(path); | 143 | ret = cn_esc_printf(cn, "%s", path); |
| 138 | |||
| 139 | ret = cn_printf(cn, "%s", path); | ||
| 140 | 144 | ||
| 141 | free_buf: | 145 | free_buf: |
| 142 | kfree(pathbuf); | 146 | kfree(pathbuf); |
| @@ -157,19 +161,19 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm) | |||
| 157 | int pid_in_pattern = 0; | 161 | int pid_in_pattern = 0; |
| 158 | int err = 0; | 162 | int err = 0; |
| 159 | 163 | ||
| 160 | cn->size = CORENAME_MAX_SIZE * atomic_read(&call_count); | ||
| 161 | cn->corename = kmalloc(cn->size, GFP_KERNEL); | ||
| 162 | cn->used = 0; | 164 | cn->used = 0; |
| 163 | 165 | cn->corename = NULL; | |
| 164 | if (!cn->corename) | 166 | if (expand_corename(cn, core_name_size)) |
| 165 | return -ENOMEM; | 167 | return -ENOMEM; |
| 168 | cn->corename[0] = '\0'; | ||
| 169 | |||
| 170 | if (ispipe) | ||
| 171 | ++pat_ptr; | ||
| 166 | 172 | ||
| 167 | /* Repeat as long as we have more pattern to process and more output | 173 | /* Repeat as long as we have more pattern to process and more output |
| 168 | space */ | 174 | space */ |
| 169 | while (*pat_ptr) { | 175 | while (*pat_ptr) { |
| 170 | if (*pat_ptr != '%') { | 176 | if (*pat_ptr != '%') { |
| 171 | if (*pat_ptr == 0) | ||
| 172 | goto out; | ||
| 173 | err = cn_printf(cn, "%c", *pat_ptr++); | 177 | err = cn_printf(cn, "%c", *pat_ptr++); |
| 174 | } else { | 178 | } else { |
| 175 | switch (*++pat_ptr) { | 179 | switch (*++pat_ptr) { |
| @@ -210,22 +214,16 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm) | |||
| 210 | break; | 214 | break; |
| 211 | } | 215 | } |
| 212 | /* hostname */ | 216 | /* hostname */ |
| 213 | case 'h': { | 217 | case 'h': |
| 214 | char *namestart = cn->corename + cn->used; | ||
| 215 | down_read(&uts_sem); | 218 | down_read(&uts_sem); |
| 216 | err = cn_printf(cn, "%s", | 219 | err = cn_esc_printf(cn, "%s", |
| 217 | utsname()->nodename); | 220 | utsname()->nodename); |
| 218 | up_read(&uts_sem); | 221 | up_read(&uts_sem); |
| 219 | cn_escape(namestart); | ||
| 220 | break; | 222 | break; |
| 221 | } | ||
| 222 | /* executable */ | 223 | /* executable */ |
| 223 | case 'e': { | 224 | case 'e': |
| 224 | char *commstart = cn->corename + cn->used; | 225 | err = cn_esc_printf(cn, "%s", current->comm); |
| 225 | err = cn_printf(cn, "%s", current->comm); | ||
| 226 | cn_escape(commstart); | ||
| 227 | break; | 226 | break; |
| 228 | } | ||
| 229 | case 'E': | 227 | case 'E': |
| 230 | err = cn_print_exe_file(cn); | 228 | err = cn_print_exe_file(cn); |
| 231 | break; | 229 | break; |
| @@ -244,6 +242,7 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm) | |||
| 244 | return err; | 242 | return err; |
| 245 | } | 243 | } |
| 246 | 244 | ||
| 245 | out: | ||
| 247 | /* Backward compatibility with core_uses_pid: | 246 | /* Backward compatibility with core_uses_pid: |
| 248 | * | 247 | * |
| 249 | * If core_pattern does not include a %p (as is the default) | 248 | * If core_pattern does not include a %p (as is the default) |
| @@ -254,7 +253,6 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm) | |||
| 254 | if (err) | 253 | if (err) |
| 255 | return err; | 254 | return err; |
| 256 | } | 255 | } |
| 257 | out: | ||
| 258 | return ispipe; | 256 | return ispipe; |
| 259 | } | 257 | } |
| 260 | 258 | ||
| @@ -549,7 +547,7 @@ void do_coredump(siginfo_t *siginfo) | |||
| 549 | if (ispipe < 0) { | 547 | if (ispipe < 0) { |
| 550 | printk(KERN_WARNING "format_corename failed\n"); | 548 | printk(KERN_WARNING "format_corename failed\n"); |
| 551 | printk(KERN_WARNING "Aborting core\n"); | 549 | printk(KERN_WARNING "Aborting core\n"); |
| 552 | goto fail_corename; | 550 | goto fail_unlock; |
| 553 | } | 551 | } |
| 554 | 552 | ||
| 555 | if (cprm.limit == 1) { | 553 | if (cprm.limit == 1) { |
| @@ -584,7 +582,7 @@ void do_coredump(siginfo_t *siginfo) | |||
| 584 | goto fail_dropcount; | 582 | goto fail_dropcount; |
| 585 | } | 583 | } |
| 586 | 584 | ||
| 587 | helper_argv = argv_split(GFP_KERNEL, cn.corename+1, NULL); | 585 | helper_argv = argv_split(GFP_KERNEL, cn.corename, NULL); |
| 588 | if (!helper_argv) { | 586 | if (!helper_argv) { |
| 589 | printk(KERN_WARNING "%s failed to allocate memory\n", | 587 | printk(KERN_WARNING "%s failed to allocate memory\n", |
| 590 | __func__); | 588 | __func__); |
| @@ -601,7 +599,7 @@ void do_coredump(siginfo_t *siginfo) | |||
| 601 | 599 | ||
| 602 | argv_free(helper_argv); | 600 | argv_free(helper_argv); |
| 603 | if (retval) { | 601 | if (retval) { |
| 604 | printk(KERN_INFO "Core dump to %s pipe failed\n", | 602 | printk(KERN_INFO "Core dump to |%s pipe failed\n", |
| 605 | cn.corename); | 603 | cn.corename); |
| 606 | goto close_fail; | 604 | goto close_fail; |
| 607 | } | 605 | } |
| @@ -669,7 +667,6 @@ fail_dropcount: | |||
| 669 | atomic_dec(&core_dump_count); | 667 | atomic_dec(&core_dump_count); |
| 670 | fail_unlock: | 668 | fail_unlock: |
| 671 | kfree(cn.corename); | 669 | kfree(cn.corename); |
| 672 | fail_corename: | ||
| 673 | coredump_finish(mm, core_dumped); | 670 | coredump_finish(mm, core_dumped); |
| 674 | revert_creds(old_cred); | 671 | revert_creds(old_cred); |
| 675 | fail_creds: | 672 | fail_creds: |
diff --git a/fs/dcache.c b/fs/dcache.c index 5a23073138df..87bdb5329c3c 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
| @@ -1730,7 +1730,7 @@ EXPORT_SYMBOL(d_add_ci); | |||
| 1730 | * Do the slow-case of the dentry name compare. | 1730 | * Do the slow-case of the dentry name compare. |
| 1731 | * | 1731 | * |
| 1732 | * Unlike the dentry_cmp() function, we need to atomically | 1732 | * Unlike the dentry_cmp() function, we need to atomically |
| 1733 | * load the name, length and inode information, so that the | 1733 | * load the name and length information, so that the |
| 1734 | * filesystem can rely on them, and can use the 'name' and | 1734 | * filesystem can rely on them, and can use the 'name' and |
| 1735 | * 'len' information without worrying about walking off the | 1735 | * 'len' information without worrying about walking off the |
| 1736 | * end of memory etc. | 1736 | * end of memory etc. |
| @@ -1748,22 +1748,18 @@ enum slow_d_compare { | |||
| 1748 | 1748 | ||
| 1749 | static noinline enum slow_d_compare slow_dentry_cmp( | 1749 | static noinline enum slow_d_compare slow_dentry_cmp( |
| 1750 | const struct dentry *parent, | 1750 | const struct dentry *parent, |
| 1751 | struct inode *inode, | ||
| 1752 | struct dentry *dentry, | 1751 | struct dentry *dentry, |
| 1753 | unsigned int seq, | 1752 | unsigned int seq, |
| 1754 | const struct qstr *name) | 1753 | const struct qstr *name) |
| 1755 | { | 1754 | { |
| 1756 | int tlen = dentry->d_name.len; | 1755 | int tlen = dentry->d_name.len; |
| 1757 | const char *tname = dentry->d_name.name; | 1756 | const char *tname = dentry->d_name.name; |
| 1758 | struct inode *i = dentry->d_inode; | ||
| 1759 | 1757 | ||
| 1760 | if (read_seqcount_retry(&dentry->d_seq, seq)) { | 1758 | if (read_seqcount_retry(&dentry->d_seq, seq)) { |
| 1761 | cpu_relax(); | 1759 | cpu_relax(); |
| 1762 | return D_COMP_SEQRETRY; | 1760 | return D_COMP_SEQRETRY; |
| 1763 | } | 1761 | } |
| 1764 | if (parent->d_op->d_compare(parent, inode, | 1762 | if (parent->d_op->d_compare(parent, dentry, tlen, tname, name)) |
| 1765 | dentry, i, | ||
| 1766 | tlen, tname, name)) | ||
| 1767 | return D_COMP_NOMATCH; | 1763 | return D_COMP_NOMATCH; |
| 1768 | return D_COMP_OK; | 1764 | return D_COMP_OK; |
| 1769 | } | 1765 | } |
| @@ -1773,7 +1769,6 @@ static noinline enum slow_d_compare slow_dentry_cmp( | |||
| 1773 | * @parent: parent dentry | 1769 | * @parent: parent dentry |
| 1774 | * @name: qstr of name we wish to find | 1770 | * @name: qstr of name we wish to find |
| 1775 | * @seqp: returns d_seq value at the point where the dentry was found | 1771 | * @seqp: returns d_seq value at the point where the dentry was found |
| 1776 | * @inode: returns dentry->d_inode when the inode was found valid. | ||
| 1777 | * Returns: dentry, or NULL | 1772 | * Returns: dentry, or NULL |
| 1778 | * | 1773 | * |
| 1779 | * __d_lookup_rcu is the dcache lookup function for rcu-walk name | 1774 | * __d_lookup_rcu is the dcache lookup function for rcu-walk name |
| @@ -1800,7 +1795,7 @@ static noinline enum slow_d_compare slow_dentry_cmp( | |||
| 1800 | */ | 1795 | */ |
| 1801 | struct dentry *__d_lookup_rcu(const struct dentry *parent, | 1796 | struct dentry *__d_lookup_rcu(const struct dentry *parent, |
| 1802 | const struct qstr *name, | 1797 | const struct qstr *name, |
| 1803 | unsigned *seqp, struct inode *inode) | 1798 | unsigned *seqp) |
| 1804 | { | 1799 | { |
| 1805 | u64 hashlen = name->hash_len; | 1800 | u64 hashlen = name->hash_len; |
| 1806 | const unsigned char *str = name->name; | 1801 | const unsigned char *str = name->name; |
| @@ -1834,11 +1829,10 @@ struct dentry *__d_lookup_rcu(const struct dentry *parent, | |||
| 1834 | seqretry: | 1829 | seqretry: |
| 1835 | /* | 1830 | /* |
| 1836 | * The dentry sequence count protects us from concurrent | 1831 | * The dentry sequence count protects us from concurrent |
| 1837 | * renames, and thus protects inode, parent and name fields. | 1832 | * renames, and thus protects parent and name fields. |
| 1838 | * | 1833 | * |
| 1839 | * The caller must perform a seqcount check in order | 1834 | * The caller must perform a seqcount check in order |
| 1840 | * to do anything useful with the returned dentry, | 1835 | * to do anything useful with the returned dentry. |
| 1841 | * including using the 'd_inode' pointer. | ||
| 1842 | * | 1836 | * |
| 1843 | * NOTE! We do a "raw" seqcount_begin here. That means that | 1837 | * NOTE! We do a "raw" seqcount_begin here. That means that |
| 1844 | * we don't wait for the sequence count to stabilize if it | 1838 | * we don't wait for the sequence count to stabilize if it |
| @@ -1852,12 +1846,12 @@ seqretry: | |||
| 1852 | continue; | 1846 | continue; |
| 1853 | if (d_unhashed(dentry)) | 1847 | if (d_unhashed(dentry)) |
| 1854 | continue; | 1848 | continue; |
| 1855 | *seqp = seq; | ||
| 1856 | 1849 | ||
| 1857 | if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) { | 1850 | if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) { |
| 1858 | if (dentry->d_name.hash != hashlen_hash(hashlen)) | 1851 | if (dentry->d_name.hash != hashlen_hash(hashlen)) |
| 1859 | continue; | 1852 | continue; |
| 1860 | switch (slow_dentry_cmp(parent, inode, dentry, seq, name)) { | 1853 | *seqp = seq; |
| 1854 | switch (slow_dentry_cmp(parent, dentry, seq, name)) { | ||
| 1861 | case D_COMP_OK: | 1855 | case D_COMP_OK: |
| 1862 | return dentry; | 1856 | return dentry; |
| 1863 | case D_COMP_NOMATCH: | 1857 | case D_COMP_NOMATCH: |
| @@ -1869,6 +1863,7 @@ seqretry: | |||
| 1869 | 1863 | ||
| 1870 | if (dentry->d_name.hash_len != hashlen) | 1864 | if (dentry->d_name.hash_len != hashlen) |
| 1871 | continue; | 1865 | continue; |
| 1866 | *seqp = seq; | ||
| 1872 | if (!dentry_cmp(dentry, str, hashlen_len(hashlen))) | 1867 | if (!dentry_cmp(dentry, str, hashlen_len(hashlen))) |
| 1873 | return dentry; | 1868 | return dentry; |
| 1874 | } | 1869 | } |
| @@ -1966,9 +1961,7 @@ struct dentry *__d_lookup(const struct dentry *parent, const struct qstr *name) | |||
| 1966 | if (parent->d_flags & DCACHE_OP_COMPARE) { | 1961 | if (parent->d_flags & DCACHE_OP_COMPARE) { |
| 1967 | int tlen = dentry->d_name.len; | 1962 | int tlen = dentry->d_name.len; |
| 1968 | const char *tname = dentry->d_name.name; | 1963 | const char *tname = dentry->d_name.name; |
| 1969 | if (parent->d_op->d_compare(parent, parent->d_inode, | 1964 | if (parent->d_op->d_compare(parent, dentry, tlen, tname, name)) |
| 1970 | dentry, dentry->d_inode, | ||
| 1971 | tlen, tname, name)) | ||
| 1972 | goto next; | 1965 | goto next; |
| 1973 | } else { | 1966 | } else { |
| 1974 | if (dentry->d_name.len != len) | 1967 | if (dentry->d_name.len != len) |
| @@ -2005,7 +1998,7 @@ struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name) | |||
| 2005 | */ | 1998 | */ |
| 2006 | name->hash = full_name_hash(name->name, name->len); | 1999 | name->hash = full_name_hash(name->name, name->len); |
| 2007 | if (dir->d_flags & DCACHE_OP_HASH) { | 2000 | if (dir->d_flags & DCACHE_OP_HASH) { |
| 2008 | int err = dir->d_op->d_hash(dir, dir->d_inode, name); | 2001 | int err = dir->d_op->d_hash(dir, name); |
| 2009 | if (unlikely(err < 0)) | 2002 | if (unlikely(err < 0)) |
| 2010 | return ERR_PTR(err); | 2003 | return ERR_PTR(err); |
| 2011 | } | 2004 | } |
| @@ -2975,34 +2968,21 @@ rename_retry: | |||
| 2975 | goto again; | 2968 | goto again; |
| 2976 | } | 2969 | } |
| 2977 | 2970 | ||
| 2978 | /** | 2971 | void d_tmpfile(struct dentry *dentry, struct inode *inode) |
| 2979 | * find_inode_number - check for dentry with name | ||
| 2980 | * @dir: directory to check | ||
| 2981 | * @name: Name to find. | ||
| 2982 | * | ||
| 2983 | * Check whether a dentry already exists for the given name, | ||
| 2984 | * and return the inode number if it has an inode. Otherwise | ||
| 2985 | * 0 is returned. | ||
| 2986 | * | ||
| 2987 | * This routine is used to post-process directory listings for | ||
| 2988 | * filesystems using synthetic inode numbers, and is necessary | ||
| 2989 | * to keep getcwd() working. | ||
| 2990 | */ | ||
| 2991 | |||
| 2992 | ino_t find_inode_number(struct dentry *dir, struct qstr *name) | ||
| 2993 | { | 2972 | { |
| 2994 | struct dentry * dentry; | 2973 | inode_dec_link_count(inode); |
| 2995 | ino_t ino = 0; | 2974 | BUG_ON(dentry->d_name.name != dentry->d_iname || |
| 2996 | 2975 | !hlist_unhashed(&dentry->d_alias) || | |
| 2997 | dentry = d_hash_and_lookup(dir, name); | 2976 | !d_unlinked(dentry)); |
| 2998 | if (!IS_ERR_OR_NULL(dentry)) { | 2977 | spin_lock(&dentry->d_parent->d_lock); |
| 2999 | if (dentry->d_inode) | 2978 | spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); |
| 3000 | ino = dentry->d_inode->i_ino; | 2979 | dentry->d_name.len = sprintf(dentry->d_iname, "#%llu", |
| 3001 | dput(dentry); | 2980 | (unsigned long long)inode->i_ino); |
| 3002 | } | 2981 | spin_unlock(&dentry->d_lock); |
| 3003 | return ino; | 2982 | spin_unlock(&dentry->d_parent->d_lock); |
| 2983 | d_instantiate(dentry, inode); | ||
| 3004 | } | 2984 | } |
| 3005 | EXPORT_SYMBOL(find_inode_number); | 2985 | EXPORT_SYMBOL(d_tmpfile); |
| 3006 | 2986 | ||
| 3007 | static __initdata unsigned long dhash_entries; | 2987 | static __initdata unsigned long dhash_entries; |
| 3008 | static int __init set_dhash_entries(char *str) | 2988 | static int __init set_dhash_entries(char *str) |
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index f71ec125290d..d10757635b9c 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c | |||
| @@ -37,16 +37,8 @@ | |||
| 37 | #include <asm/unaligned.h> | 37 | #include <asm/unaligned.h> |
| 38 | #include "ecryptfs_kernel.h" | 38 | #include "ecryptfs_kernel.h" |
| 39 | 39 | ||
| 40 | static int | 40 | #define DECRYPT 0 |
| 41 | ecryptfs_decrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat, | 41 | #define ENCRYPT 1 |
| 42 | struct page *dst_page, int dst_offset, | ||
| 43 | struct page *src_page, int src_offset, int size, | ||
| 44 | unsigned char *iv); | ||
| 45 | static int | ||
| 46 | ecryptfs_encrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat, | ||
| 47 | struct page *dst_page, int dst_offset, | ||
| 48 | struct page *src_page, int src_offset, int size, | ||
| 49 | unsigned char *iv); | ||
| 50 | 42 | ||
| 51 | /** | 43 | /** |
| 52 | * ecryptfs_to_hex | 44 | * ecryptfs_to_hex |
| @@ -336,19 +328,20 @@ static void extent_crypt_complete(struct crypto_async_request *req, int rc) | |||
| 336 | } | 328 | } |
| 337 | 329 | ||
| 338 | /** | 330 | /** |
| 339 | * encrypt_scatterlist | 331 | * crypt_scatterlist |
| 340 | * @crypt_stat: Pointer to the crypt_stat struct to initialize. | 332 | * @crypt_stat: Pointer to the crypt_stat struct to initialize. |
| 341 | * @dest_sg: Destination of encrypted data | 333 | * @dst_sg: Destination of the data after performing the crypto operation |
| 342 | * @src_sg: Data to be encrypted | 334 | * @src_sg: Data to be encrypted or decrypted |
| 343 | * @size: Length of data to be encrypted | 335 | * @size: Length of data |
| 344 | * @iv: iv to use during encryption | 336 | * @iv: IV to use |
| 337 | * @op: ENCRYPT or DECRYPT to indicate the desired operation | ||
| 345 | * | 338 | * |
| 346 | * Returns the number of bytes encrypted; negative value on error | 339 | * Returns the number of bytes encrypted or decrypted; negative value on error |
| 347 | */ | 340 | */ |
| 348 | static int encrypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat, | 341 | static int crypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat, |
| 349 | struct scatterlist *dest_sg, | 342 | struct scatterlist *dst_sg, |
| 350 | struct scatterlist *src_sg, int size, | 343 | struct scatterlist *src_sg, int size, |
| 351 | unsigned char *iv) | 344 | unsigned char *iv, int op) |
| 352 | { | 345 | { |
| 353 | struct ablkcipher_request *req = NULL; | 346 | struct ablkcipher_request *req = NULL; |
| 354 | struct extent_crypt_result ecr; | 347 | struct extent_crypt_result ecr; |
| @@ -391,9 +384,9 @@ static int encrypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat, | |||
| 391 | crypt_stat->flags |= ECRYPTFS_KEY_SET; | 384 | crypt_stat->flags |= ECRYPTFS_KEY_SET; |
| 392 | } | 385 | } |
| 393 | mutex_unlock(&crypt_stat->cs_tfm_mutex); | 386 | mutex_unlock(&crypt_stat->cs_tfm_mutex); |
| 394 | ecryptfs_printk(KERN_DEBUG, "Encrypting [%d] bytes.\n", size); | 387 | ablkcipher_request_set_crypt(req, src_sg, dst_sg, size, iv); |
| 395 | ablkcipher_request_set_crypt(req, src_sg, dest_sg, size, iv); | 388 | rc = op == ENCRYPT ? crypto_ablkcipher_encrypt(req) : |
| 396 | rc = crypto_ablkcipher_encrypt(req); | 389 | crypto_ablkcipher_decrypt(req); |
| 397 | if (rc == -EINPROGRESS || rc == -EBUSY) { | 390 | if (rc == -EINPROGRESS || rc == -EBUSY) { |
| 398 | struct extent_crypt_result *ecr = req->base.data; | 391 | struct extent_crypt_result *ecr = req->base.data; |
| 399 | 392 | ||
| @@ -407,41 +400,43 @@ out: | |||
| 407 | } | 400 | } |
| 408 | 401 | ||
| 409 | /** | 402 | /** |
| 410 | * ecryptfs_lower_offset_for_extent | 403 | * lower_offset_for_page |
| 411 | * | 404 | * |
| 412 | * Convert an eCryptfs page index into a lower byte offset | 405 | * Convert an eCryptfs page index into a lower byte offset |
| 413 | */ | 406 | */ |
| 414 | static void ecryptfs_lower_offset_for_extent(loff_t *offset, loff_t extent_num, | 407 | static loff_t lower_offset_for_page(struct ecryptfs_crypt_stat *crypt_stat, |
| 415 | struct ecryptfs_crypt_stat *crypt_stat) | 408 | struct page *page) |
| 416 | { | 409 | { |
| 417 | (*offset) = ecryptfs_lower_header_size(crypt_stat) | 410 | return ecryptfs_lower_header_size(crypt_stat) + |
| 418 | + (crypt_stat->extent_size * extent_num); | 411 | (page->index << PAGE_CACHE_SHIFT); |
| 419 | } | 412 | } |
| 420 | 413 | ||
| 421 | /** | 414 | /** |
| 422 | * ecryptfs_encrypt_extent | 415 | * crypt_extent |
| 423 | * @enc_extent_page: Allocated page into which to encrypt the data in | ||
| 424 | * @page | ||
| 425 | * @crypt_stat: crypt_stat containing cryptographic context for the | 416 | * @crypt_stat: crypt_stat containing cryptographic context for the |
| 426 | * encryption operation | 417 | * encryption operation |
| 427 | * @page: Page containing plaintext data extent to encrypt | 418 | * @dst_page: The page to write the result into |
| 419 | * @src_page: The page to read from | ||
| 428 | * @extent_offset: Page extent offset for use in generating IV | 420 | * @extent_offset: Page extent offset for use in generating IV |
| 421 | * @op: ENCRYPT or DECRYPT to indicate the desired operation | ||
| 429 | * | 422 | * |
| 430 | * Encrypts one extent of data. | 423 | * Encrypts or decrypts one extent of data. |
| 431 | * | 424 | * |
| 432 | * Return zero on success; non-zero otherwise | 425 | * Return zero on success; non-zero otherwise |
| 433 | */ | 426 | */ |
| 434 | static int ecryptfs_encrypt_extent(struct page *enc_extent_page, | 427 | static int crypt_extent(struct ecryptfs_crypt_stat *crypt_stat, |
| 435 | struct ecryptfs_crypt_stat *crypt_stat, | 428 | struct page *dst_page, |
| 436 | struct page *page, | 429 | struct page *src_page, |
| 437 | unsigned long extent_offset) | 430 | unsigned long extent_offset, int op) |
| 438 | { | 431 | { |
| 432 | pgoff_t page_index = op == ENCRYPT ? src_page->index : dst_page->index; | ||
| 439 | loff_t extent_base; | 433 | loff_t extent_base; |
| 440 | char extent_iv[ECRYPTFS_MAX_IV_BYTES]; | 434 | char extent_iv[ECRYPTFS_MAX_IV_BYTES]; |
| 435 | struct scatterlist src_sg, dst_sg; | ||
| 436 | size_t extent_size = crypt_stat->extent_size; | ||
| 441 | int rc; | 437 | int rc; |
| 442 | 438 | ||
| 443 | extent_base = (((loff_t)page->index) | 439 | extent_base = (((loff_t)page_index) * (PAGE_CACHE_SIZE / extent_size)); |
| 444 | * (PAGE_CACHE_SIZE / crypt_stat->extent_size)); | ||
| 445 | rc = ecryptfs_derive_iv(extent_iv, crypt_stat, | 440 | rc = ecryptfs_derive_iv(extent_iv, crypt_stat, |
| 446 | (extent_base + extent_offset)); | 441 | (extent_base + extent_offset)); |
| 447 | if (rc) { | 442 | if (rc) { |
| @@ -450,15 +445,21 @@ static int ecryptfs_encrypt_extent(struct page *enc_extent_page, | |||
| 450 | (unsigned long long)(extent_base + extent_offset), rc); | 445 | (unsigned long long)(extent_base + extent_offset), rc); |
| 451 | goto out; | 446 | goto out; |
| 452 | } | 447 | } |
| 453 | rc = ecryptfs_encrypt_page_offset(crypt_stat, enc_extent_page, 0, | 448 | |
| 454 | page, (extent_offset | 449 | sg_init_table(&src_sg, 1); |
| 455 | * crypt_stat->extent_size), | 450 | sg_init_table(&dst_sg, 1); |
| 456 | crypt_stat->extent_size, extent_iv); | 451 | |
| 452 | sg_set_page(&src_sg, src_page, extent_size, | ||
| 453 | extent_offset * extent_size); | ||
| 454 | sg_set_page(&dst_sg, dst_page, extent_size, | ||
| 455 | extent_offset * extent_size); | ||
| 456 | |||
| 457 | rc = crypt_scatterlist(crypt_stat, &dst_sg, &src_sg, extent_size, | ||
| 458 | extent_iv, op); | ||
| 457 | if (rc < 0) { | 459 | if (rc < 0) { |
| 458 | printk(KERN_ERR "%s: Error attempting to encrypt page with " | 460 | printk(KERN_ERR "%s: Error attempting to crypt page with " |
| 459 | "page->index = [%ld], extent_offset = [%ld]; " | 461 | "page_index = [%ld], extent_offset = [%ld]; " |
| 460 | "rc = [%d]\n", __func__, page->index, extent_offset, | 462 | "rc = [%d]\n", __func__, page_index, extent_offset, rc); |
| 461 | rc); | ||
| 462 | goto out; | 463 | goto out; |
| 463 | } | 464 | } |
| 464 | rc = 0; | 465 | rc = 0; |
| @@ -489,6 +490,7 @@ int ecryptfs_encrypt_page(struct page *page) | |||
| 489 | char *enc_extent_virt; | 490 | char *enc_extent_virt; |
| 490 | struct page *enc_extent_page = NULL; | 491 | struct page *enc_extent_page = NULL; |
| 491 | loff_t extent_offset; | 492 | loff_t extent_offset; |
| 493 | loff_t lower_offset; | ||
| 492 | int rc = 0; | 494 | int rc = 0; |
| 493 | 495 | ||
| 494 | ecryptfs_inode = page->mapping->host; | 496 | ecryptfs_inode = page->mapping->host; |
| @@ -502,75 +504,35 @@ int ecryptfs_encrypt_page(struct page *page) | |||
| 502 | "encrypted extent\n"); | 504 | "encrypted extent\n"); |
| 503 | goto out; | 505 | goto out; |
| 504 | } | 506 | } |
| 505 | enc_extent_virt = kmap(enc_extent_page); | 507 | |
| 506 | for (extent_offset = 0; | 508 | for (extent_offset = 0; |
| 507 | extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size); | 509 | extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size); |
| 508 | extent_offset++) { | 510 | extent_offset++) { |
| 509 | loff_t offset; | 511 | rc = crypt_extent(crypt_stat, enc_extent_page, page, |
| 510 | 512 | extent_offset, ENCRYPT); | |
| 511 | rc = ecryptfs_encrypt_extent(enc_extent_page, crypt_stat, page, | ||
| 512 | extent_offset); | ||
| 513 | if (rc) { | 513 | if (rc) { |
| 514 | printk(KERN_ERR "%s: Error encrypting extent; " | 514 | printk(KERN_ERR "%s: Error encrypting extent; " |
| 515 | "rc = [%d]\n", __func__, rc); | 515 | "rc = [%d]\n", __func__, rc); |
| 516 | goto out; | 516 | goto out; |
| 517 | } | 517 | } |
| 518 | ecryptfs_lower_offset_for_extent( | ||
| 519 | &offset, ((((loff_t)page->index) | ||
| 520 | * (PAGE_CACHE_SIZE | ||
| 521 | / crypt_stat->extent_size)) | ||
| 522 | + extent_offset), crypt_stat); | ||
| 523 | rc = ecryptfs_write_lower(ecryptfs_inode, enc_extent_virt, | ||
| 524 | offset, crypt_stat->extent_size); | ||
| 525 | if (rc < 0) { | ||
| 526 | ecryptfs_printk(KERN_ERR, "Error attempting " | ||
| 527 | "to write lower page; rc = [%d]" | ||
| 528 | "\n", rc); | ||
| 529 | goto out; | ||
| 530 | } | ||
| 531 | } | ||
| 532 | rc = 0; | ||
| 533 | out: | ||
| 534 | if (enc_extent_page) { | ||
| 535 | kunmap(enc_extent_page); | ||
| 536 | __free_page(enc_extent_page); | ||
| 537 | } | 518 | } |
| 538 | return rc; | ||
| 539 | } | ||
| 540 | 519 | ||
| 541 | static int ecryptfs_decrypt_extent(struct page *page, | 520 | lower_offset = lower_offset_for_page(crypt_stat, page); |
| 542 | struct ecryptfs_crypt_stat *crypt_stat, | 521 | enc_extent_virt = kmap(enc_extent_page); |
| 543 | struct page *enc_extent_page, | 522 | rc = ecryptfs_write_lower(ecryptfs_inode, enc_extent_virt, lower_offset, |
| 544 | unsigned long extent_offset) | 523 | PAGE_CACHE_SIZE); |
| 545 | { | 524 | kunmap(enc_extent_page); |
| 546 | loff_t extent_base; | ||
| 547 | char extent_iv[ECRYPTFS_MAX_IV_BYTES]; | ||
| 548 | int rc; | ||
| 549 | |||
| 550 | extent_base = (((loff_t)page->index) | ||
| 551 | * (PAGE_CACHE_SIZE / crypt_stat->extent_size)); | ||
| 552 | rc = ecryptfs_derive_iv(extent_iv, crypt_stat, | ||
| 553 | (extent_base + extent_offset)); | ||
| 554 | if (rc) { | ||
| 555 | ecryptfs_printk(KERN_ERR, "Error attempting to derive IV for " | ||
| 556 | "extent [0x%.16llx]; rc = [%d]\n", | ||
| 557 | (unsigned long long)(extent_base + extent_offset), rc); | ||
| 558 | goto out; | ||
| 559 | } | ||
| 560 | rc = ecryptfs_decrypt_page_offset(crypt_stat, page, | ||
| 561 | (extent_offset | ||
| 562 | * crypt_stat->extent_size), | ||
| 563 | enc_extent_page, 0, | ||
| 564 | crypt_stat->extent_size, extent_iv); | ||
| 565 | if (rc < 0) { | 525 | if (rc < 0) { |
| 566 | printk(KERN_ERR "%s: Error attempting to decrypt to page with " | 526 | ecryptfs_printk(KERN_ERR, |
| 567 | "page->index = [%ld], extent_offset = [%ld]; " | 527 | "Error attempting to write lower page; rc = [%d]\n", |
| 568 | "rc = [%d]\n", __func__, page->index, extent_offset, | 528 | rc); |
| 569 | rc); | ||
| 570 | goto out; | 529 | goto out; |
| 571 | } | 530 | } |
| 572 | rc = 0; | 531 | rc = 0; |
| 573 | out: | 532 | out: |
| 533 | if (enc_extent_page) { | ||
| 534 | __free_page(enc_extent_page); | ||
| 535 | } | ||
| 574 | return rc; | 536 | return rc; |
| 575 | } | 537 | } |
| 576 | 538 | ||
| @@ -594,43 +556,33 @@ int ecryptfs_decrypt_page(struct page *page) | |||
| 594 | { | 556 | { |
| 595 | struct inode *ecryptfs_inode; | 557 | struct inode *ecryptfs_inode; |
| 596 | struct ecryptfs_crypt_stat *crypt_stat; | 558 | struct ecryptfs_crypt_stat *crypt_stat; |
| 597 | char *enc_extent_virt; | 559 | char *page_virt; |
| 598 | struct page *enc_extent_page = NULL; | ||
| 599 | unsigned long extent_offset; | 560 | unsigned long extent_offset; |
| 561 | loff_t lower_offset; | ||
| 600 | int rc = 0; | 562 | int rc = 0; |
| 601 | 563 | ||
| 602 | ecryptfs_inode = page->mapping->host; | 564 | ecryptfs_inode = page->mapping->host; |
| 603 | crypt_stat = | 565 | crypt_stat = |
| 604 | &(ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat); | 566 | &(ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat); |
| 605 | BUG_ON(!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)); | 567 | BUG_ON(!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)); |
| 606 | enc_extent_page = alloc_page(GFP_USER); | 568 | |
| 607 | if (!enc_extent_page) { | 569 | lower_offset = lower_offset_for_page(crypt_stat, page); |
| 608 | rc = -ENOMEM; | 570 | page_virt = kmap(page); |
| 609 | ecryptfs_printk(KERN_ERR, "Error allocating memory for " | 571 | rc = ecryptfs_read_lower(page_virt, lower_offset, PAGE_CACHE_SIZE, |
| 610 | "encrypted extent\n"); | 572 | ecryptfs_inode); |
| 573 | kunmap(page); | ||
| 574 | if (rc < 0) { | ||
| 575 | ecryptfs_printk(KERN_ERR, | ||
| 576 | "Error attempting to read lower page; rc = [%d]\n", | ||
| 577 | rc); | ||
| 611 | goto out; | 578 | goto out; |
| 612 | } | 579 | } |
| 613 | enc_extent_virt = kmap(enc_extent_page); | 580 | |
| 614 | for (extent_offset = 0; | 581 | for (extent_offset = 0; |
| 615 | extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size); | 582 | extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size); |
| 616 | extent_offset++) { | 583 | extent_offset++) { |
| 617 | loff_t offset; | 584 | rc = crypt_extent(crypt_stat, page, page, |
| 618 | 585 | extent_offset, DECRYPT); | |
| 619 | ecryptfs_lower_offset_for_extent( | ||
| 620 | &offset, ((page->index * (PAGE_CACHE_SIZE | ||
| 621 | / crypt_stat->extent_size)) | ||
| 622 | + extent_offset), crypt_stat); | ||
| 623 | rc = ecryptfs_read_lower(enc_extent_virt, offset, | ||
| 624 | crypt_stat->extent_size, | ||
| 625 | ecryptfs_inode); | ||
| 626 | if (rc < 0) { | ||
| 627 | ecryptfs_printk(KERN_ERR, "Error attempting " | ||
| 628 | "to read lower page; rc = [%d]" | ||
| 629 | "\n", rc); | ||
| 630 | goto out; | ||
| 631 | } | ||
| 632 | rc = ecryptfs_decrypt_extent(page, crypt_stat, enc_extent_page, | ||
| 633 | extent_offset); | ||
| 634 | if (rc) { | 586 | if (rc) { |
| 635 | printk(KERN_ERR "%s: Error encrypting extent; " | 587 | printk(KERN_ERR "%s: Error encrypting extent; " |
| 636 | "rc = [%d]\n", __func__, rc); | 588 | "rc = [%d]\n", __func__, rc); |
| @@ -638,142 +590,9 @@ int ecryptfs_decrypt_page(struct page *page) | |||
| 638 | } | 590 | } |
| 639 | } | 591 | } |
| 640 | out: | 592 | out: |
| 641 | if (enc_extent_page) { | ||
| 642 | kunmap(enc_extent_page); | ||
| 643 | __free_page(enc_extent_page); | ||
| 644 | } | ||
| 645 | return rc; | 593 | return rc; |
| 646 | } | 594 | } |
| 647 | 595 | ||
| 648 | /** | ||
| 649 | * decrypt_scatterlist | ||
| 650 | * @crypt_stat: Cryptographic context | ||
| 651 | * @dest_sg: The destination scatterlist to decrypt into | ||
| 652 | * @src_sg: The source scatterlist to decrypt from | ||
| 653 | * @size: The number of bytes to decrypt | ||
| 654 | * @iv: The initialization vector to use for the decryption | ||
| 655 | * | ||
| 656 | * Returns the number of bytes decrypted; negative value on error | ||
| 657 | */ | ||
| 658 | static int decrypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat, | ||
| 659 | struct scatterlist *dest_sg, | ||
| 660 | struct scatterlist *src_sg, int size, | ||
| 661 | unsigned char *iv) | ||
| 662 | { | ||
| 663 | struct ablkcipher_request *req = NULL; | ||
| 664 | struct extent_crypt_result ecr; | ||
| 665 | int rc = 0; | ||
| 666 | |||
| 667 | BUG_ON(!crypt_stat || !crypt_stat->tfm | ||
| 668 | || !(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED)); | ||
| 669 | if (unlikely(ecryptfs_verbosity > 0)) { | ||
| 670 | ecryptfs_printk(KERN_DEBUG, "Key size [%zd]; key:\n", | ||
| 671 | crypt_stat->key_size); | ||
| 672 | ecryptfs_dump_hex(crypt_stat->key, | ||
| 673 | crypt_stat->key_size); | ||
| 674 | } | ||
| 675 | |||
| 676 | init_completion(&ecr.completion); | ||
| 677 | |||
| 678 | mutex_lock(&crypt_stat->cs_tfm_mutex); | ||
| 679 | req = ablkcipher_request_alloc(crypt_stat->tfm, GFP_NOFS); | ||
| 680 | if (!req) { | ||
| 681 | mutex_unlock(&crypt_stat->cs_tfm_mutex); | ||
| 682 | rc = -ENOMEM; | ||
| 683 | goto out; | ||
| 684 | } | ||
| 685 | |||
| 686 | ablkcipher_request_set_callback(req, | ||
| 687 | CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, | ||
| 688 | extent_crypt_complete, &ecr); | ||
| 689 | /* Consider doing this once, when the file is opened */ | ||
| 690 | if (!(crypt_stat->flags & ECRYPTFS_KEY_SET)) { | ||
| 691 | rc = crypto_ablkcipher_setkey(crypt_stat->tfm, crypt_stat->key, | ||
| 692 | crypt_stat->key_size); | ||
| 693 | if (rc) { | ||
| 694 | ecryptfs_printk(KERN_ERR, | ||
| 695 | "Error setting key; rc = [%d]\n", | ||
| 696 | rc); | ||
| 697 | mutex_unlock(&crypt_stat->cs_tfm_mutex); | ||
| 698 | rc = -EINVAL; | ||
| 699 | goto out; | ||
| 700 | } | ||
| 701 | crypt_stat->flags |= ECRYPTFS_KEY_SET; | ||
| 702 | } | ||
| 703 | mutex_unlock(&crypt_stat->cs_tfm_mutex); | ||
| 704 | ecryptfs_printk(KERN_DEBUG, "Decrypting [%d] bytes.\n", size); | ||
| 705 | ablkcipher_request_set_crypt(req, src_sg, dest_sg, size, iv); | ||
| 706 | rc = crypto_ablkcipher_decrypt(req); | ||
| 707 | if (rc == -EINPROGRESS || rc == -EBUSY) { | ||
| 708 | struct extent_crypt_result *ecr = req->base.data; | ||
| 709 | |||
| 710 | wait_for_completion(&ecr->completion); | ||
| 711 | rc = ecr->rc; | ||
| 712 | INIT_COMPLETION(ecr->completion); | ||
| 713 | } | ||
| 714 | out: | ||
| 715 | ablkcipher_request_free(req); | ||
| 716 | return rc; | ||
| 717 | |||
| 718 | } | ||
| 719 | |||
| 720 | /** | ||
| 721 | * ecryptfs_encrypt_page_offset | ||
| 722 | * @crypt_stat: The cryptographic context | ||
| 723 | * @dst_page: The page to encrypt into | ||
| 724 | * @dst_offset: The offset in the page to encrypt into | ||
| 725 | * @src_page: The page to encrypt from | ||
| 726 | * @src_offset: The offset in the page to encrypt from | ||
| 727 | * @size: The number of bytes to encrypt | ||
| 728 | * @iv: The initialization vector to use for the encryption | ||
| 729 | * | ||
| 730 | * Returns the number of bytes encrypted | ||
| 731 | */ | ||
| 732 | static int | ||
| 733 | ecryptfs_encrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat, | ||
| 734 | struct page *dst_page, int dst_offset, | ||
| 735 | struct page *src_page, int src_offset, int size, | ||
| 736 | unsigned char *iv) | ||
| 737 | { | ||
| 738 | struct scatterlist src_sg, dst_sg; | ||
| 739 | |||
| 740 | sg_init_table(&src_sg, 1); | ||
| 741 | sg_init_table(&dst_sg, 1); | ||
| 742 | |||
| 743 | sg_set_page(&src_sg, src_page, size, src_offset); | ||
| 744 | sg_set_page(&dst_sg, dst_page, size, dst_offset); | ||
| 745 | return encrypt_scatterlist(crypt_stat, &dst_sg, &src_sg, size, iv); | ||
| 746 | } | ||
| 747 | |||
| 748 | /** | ||
| 749 | * ecryptfs_decrypt_page_offset | ||
| 750 | * @crypt_stat: The cryptographic context | ||
| 751 | * @dst_page: The page to decrypt into | ||
| 752 | * @dst_offset: The offset in the page to decrypt into | ||
| 753 | * @src_page: The page to decrypt from | ||
| 754 | * @src_offset: The offset in the page to decrypt from | ||
| 755 | * @size: The number of bytes to decrypt | ||
| 756 | * @iv: The initialization vector to use for the decryption | ||
| 757 | * | ||
| 758 | * Returns the number of bytes decrypted | ||
| 759 | */ | ||
| 760 | static int | ||
| 761 | ecryptfs_decrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat, | ||
| 762 | struct page *dst_page, int dst_offset, | ||
| 763 | struct page *src_page, int src_offset, int size, | ||
| 764 | unsigned char *iv) | ||
| 765 | { | ||
| 766 | struct scatterlist src_sg, dst_sg; | ||
| 767 | |||
| 768 | sg_init_table(&src_sg, 1); | ||
| 769 | sg_set_page(&src_sg, src_page, size, src_offset); | ||
| 770 | |||
| 771 | sg_init_table(&dst_sg, 1); | ||
| 772 | sg_set_page(&dst_sg, dst_page, size, dst_offset); | ||
| 773 | |||
| 774 | return decrypt_scatterlist(crypt_stat, &dst_sg, &src_sg, size, iv); | ||
| 775 | } | ||
| 776 | |||
| 777 | #define ECRYPTFS_MAX_SCATTERLIST_LEN 4 | 596 | #define ECRYPTFS_MAX_SCATTERLIST_LEN 4 |
| 778 | 597 | ||
| 779 | /** | 598 | /** |
| @@ -2243,12 +2062,11 @@ out: | |||
| 2243 | */ | 2062 | */ |
| 2244 | int ecryptfs_decode_and_decrypt_filename(char **plaintext_name, | 2063 | int ecryptfs_decode_and_decrypt_filename(char **plaintext_name, |
| 2245 | size_t *plaintext_name_size, | 2064 | size_t *plaintext_name_size, |
| 2246 | struct dentry *ecryptfs_dir_dentry, | 2065 | struct super_block *sb, |
| 2247 | const char *name, size_t name_size) | 2066 | const char *name, size_t name_size) |
| 2248 | { | 2067 | { |
| 2249 | struct ecryptfs_mount_crypt_stat *mount_crypt_stat = | 2068 | struct ecryptfs_mount_crypt_stat *mount_crypt_stat = |
| 2250 | &ecryptfs_superblock_to_private( | 2069 | &ecryptfs_superblock_to_private(sb)->mount_crypt_stat; |
| 2251 | ecryptfs_dir_dentry->d_sb)->mount_crypt_stat; | ||
| 2252 | char *decoded_name; | 2070 | char *decoded_name; |
| 2253 | size_t decoded_name_size; | 2071 | size_t decoded_name_size; |
| 2254 | size_t packet_size; | 2072 | size_t packet_size; |
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index f622a733f7ad..df19d34a033b 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h | |||
| @@ -575,7 +575,7 @@ int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry, | |||
| 575 | struct inode *ecryptfs_inode); | 575 | struct inode *ecryptfs_inode); |
| 576 | int ecryptfs_decode_and_decrypt_filename(char **decrypted_name, | 576 | int ecryptfs_decode_and_decrypt_filename(char **decrypted_name, |
| 577 | size_t *decrypted_name_size, | 577 | size_t *decrypted_name_size, |
| 578 | struct dentry *ecryptfs_dentry, | 578 | struct super_block *sb, |
| 579 | const char *name, size_t name_size); | 579 | const char *name, size_t name_size); |
| 580 | int ecryptfs_fill_zeros(struct file *file, loff_t new_length); | 580 | int ecryptfs_fill_zeros(struct file *file, loff_t new_length); |
| 581 | int ecryptfs_encrypt_and_encode_filename( | 581 | int ecryptfs_encrypt_and_encode_filename( |
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 9aa05e08060b..992cf95830b5 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c | |||
| @@ -49,7 +49,7 @@ static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb, | |||
| 49 | unsigned long nr_segs, loff_t pos) | 49 | unsigned long nr_segs, loff_t pos) |
| 50 | { | 50 | { |
| 51 | ssize_t rc; | 51 | ssize_t rc; |
| 52 | struct path lower; | 52 | struct path *path; |
| 53 | struct file *file = iocb->ki_filp; | 53 | struct file *file = iocb->ki_filp; |
| 54 | 54 | ||
| 55 | rc = generic_file_aio_read(iocb, iov, nr_segs, pos); | 55 | rc = generic_file_aio_read(iocb, iov, nr_segs, pos); |
| @@ -60,9 +60,8 @@ static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb, | |||
| 60 | if (-EIOCBQUEUED == rc) | 60 | if (-EIOCBQUEUED == rc) |
| 61 | rc = wait_on_sync_kiocb(iocb); | 61 | rc = wait_on_sync_kiocb(iocb); |
| 62 | if (rc >= 0) { | 62 | if (rc >= 0) { |
| 63 | lower.dentry = ecryptfs_dentry_to_lower(file->f_path.dentry); | 63 | path = ecryptfs_dentry_to_lower_path(file->f_path.dentry); |
| 64 | lower.mnt = ecryptfs_dentry_to_lower_mnt(file->f_path.dentry); | 64 | touch_atime(path); |
| 65 | touch_atime(&lower); | ||
| 66 | } | 65 | } |
| 67 | return rc; | 66 | return rc; |
| 68 | } | 67 | } |
| @@ -70,7 +69,7 @@ static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb, | |||
| 70 | struct ecryptfs_getdents_callback { | 69 | struct ecryptfs_getdents_callback { |
| 71 | struct dir_context ctx; | 70 | struct dir_context ctx; |
| 72 | struct dir_context *caller; | 71 | struct dir_context *caller; |
| 73 | struct dentry *dentry; | 72 | struct super_block *sb; |
| 74 | int filldir_called; | 73 | int filldir_called; |
| 75 | int entries_written; | 74 | int entries_written; |
| 76 | }; | 75 | }; |
| @@ -88,7 +87,7 @@ ecryptfs_filldir(void *dirent, const char *lower_name, int lower_namelen, | |||
| 88 | 87 | ||
| 89 | buf->filldir_called++; | 88 | buf->filldir_called++; |
| 90 | rc = ecryptfs_decode_and_decrypt_filename(&name, &name_size, | 89 | rc = ecryptfs_decode_and_decrypt_filename(&name, &name_size, |
| 91 | buf->dentry, lower_name, | 90 | buf->sb, lower_name, |
| 92 | lower_namelen); | 91 | lower_namelen); |
| 93 | if (rc) { | 92 | if (rc) { |
| 94 | printk(KERN_ERR "%s: Error attempting to decode and decrypt " | 93 | printk(KERN_ERR "%s: Error attempting to decode and decrypt " |
| @@ -114,15 +113,14 @@ static int ecryptfs_readdir(struct file *file, struct dir_context *ctx) | |||
| 114 | { | 113 | { |
| 115 | int rc; | 114 | int rc; |
| 116 | struct file *lower_file; | 115 | struct file *lower_file; |
| 117 | struct inode *inode; | 116 | struct inode *inode = file_inode(file); |
| 118 | struct ecryptfs_getdents_callback buf = { | 117 | struct ecryptfs_getdents_callback buf = { |
| 119 | .ctx.actor = ecryptfs_filldir, | 118 | .ctx.actor = ecryptfs_filldir, |
| 120 | .caller = ctx, | 119 | .caller = ctx, |
| 121 | .dentry = file->f_path.dentry | 120 | .sb = inode->i_sb, |
| 122 | }; | 121 | }; |
| 123 | lower_file = ecryptfs_file_to_lower(file); | 122 | lower_file = ecryptfs_file_to_lower(file); |
| 124 | lower_file->f_pos = ctx->pos; | 123 | lower_file->f_pos = ctx->pos; |
| 125 | inode = file_inode(file); | ||
| 126 | rc = iterate_dir(lower_file, &buf.ctx); | 124 | rc = iterate_dir(lower_file, &buf.ctx); |
| 127 | ctx->pos = buf.ctx.pos; | 125 | ctx->pos = buf.ctx.pos; |
| 128 | if (rc < 0) | 126 | if (rc < 0) |
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 5eab400e2590..67e9b6339691 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c | |||
| @@ -358,7 +358,7 @@ static int ecryptfs_lookup_interpose(struct dentry *dentry, | |||
| 358 | 358 | ||
| 359 | lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(dentry->d_parent)); | 359 | lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(dentry->d_parent)); |
| 360 | fsstack_copy_attr_atime(dir_inode, lower_dentry->d_parent->d_inode); | 360 | fsstack_copy_attr_atime(dir_inode, lower_dentry->d_parent->d_inode); |
| 361 | BUG_ON(!lower_dentry->d_count); | 361 | BUG_ON(!d_count(lower_dentry)); |
| 362 | 362 | ||
| 363 | ecryptfs_set_dentry_private(dentry, dentry_info); | 363 | ecryptfs_set_dentry_private(dentry, dentry_info); |
| 364 | ecryptfs_set_dentry_lower(dentry, lower_dentry); | 364 | ecryptfs_set_dentry_lower(dentry, lower_dentry); |
| @@ -679,7 +679,7 @@ static int ecryptfs_readlink_lower(struct dentry *dentry, char **buf, | |||
| 679 | set_fs(old_fs); | 679 | set_fs(old_fs); |
| 680 | if (rc < 0) | 680 | if (rc < 0) |
| 681 | goto out; | 681 | goto out; |
| 682 | rc = ecryptfs_decode_and_decrypt_filename(buf, bufsiz, dentry, | 682 | rc = ecryptfs_decode_and_decrypt_filename(buf, bufsiz, dentry->d_sb, |
| 683 | lower_buf, rc); | 683 | lower_buf, rc); |
| 684 | out: | 684 | out: |
| 685 | kfree(lower_buf); | 685 | kfree(lower_buf); |
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index e924cf45aad9..eb1c5979ecaf 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c | |||
| @@ -120,16 +120,15 @@ static int ecryptfs_init_lower_file(struct dentry *dentry, | |||
| 120 | struct file **lower_file) | 120 | struct file **lower_file) |
| 121 | { | 121 | { |
| 122 | const struct cred *cred = current_cred(); | 122 | const struct cred *cred = current_cred(); |
| 123 | struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); | 123 | struct path *path = ecryptfs_dentry_to_lower_path(dentry); |
| 124 | struct vfsmount *lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry); | ||
| 125 | int rc; | 124 | int rc; |
| 126 | 125 | ||
| 127 | rc = ecryptfs_privileged_open(lower_file, lower_dentry, lower_mnt, | 126 | rc = ecryptfs_privileged_open(lower_file, path->dentry, path->mnt, |
| 128 | cred); | 127 | cred); |
| 129 | if (rc) { | 128 | if (rc) { |
| 130 | printk(KERN_ERR "Error opening lower file " | 129 | printk(KERN_ERR "Error opening lower file " |
| 131 | "for lower_dentry [0x%p] and lower_mnt [0x%p]; " | 130 | "for lower_dentry [0x%p] and lower_mnt [0x%p]; " |
| 132 | "rc = [%d]\n", lower_dentry, lower_mnt, rc); | 131 | "rc = [%d]\n", path->dentry, path->mnt, rc); |
| 133 | (*lower_file) = NULL; | 132 | (*lower_file) = NULL; |
| 134 | } | 133 | } |
| 135 | return rc; | 134 | return rc; |
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c index 49ff8ea08f1c..e57380e5f6bd 100644 --- a/fs/ecryptfs/messaging.c +++ b/fs/ecryptfs/messaging.c | |||
| @@ -247,14 +247,13 @@ int ecryptfs_process_response(struct ecryptfs_daemon *daemon, | |||
| 247 | goto unlock; | 247 | goto unlock; |
| 248 | } | 248 | } |
| 249 | msg_size = (sizeof(*msg) + msg->data_len); | 249 | msg_size = (sizeof(*msg) + msg->data_len); |
| 250 | msg_ctx->msg = kmalloc(msg_size, GFP_KERNEL); | 250 | msg_ctx->msg = kmemdup(msg, msg_size, GFP_KERNEL); |
| 251 | if (!msg_ctx->msg) { | 251 | if (!msg_ctx->msg) { |
| 252 | rc = -ENOMEM; | 252 | rc = -ENOMEM; |
| 253 | printk(KERN_ERR "%s: Failed to allocate [%zd] bytes of " | 253 | printk(KERN_ERR "%s: Failed to allocate [%zd] bytes of " |
| 254 | "GFP_KERNEL memory\n", __func__, msg_size); | 254 | "GFP_KERNEL memory\n", __func__, msg_size); |
| 255 | goto unlock; | 255 | goto unlock; |
| 256 | } | 256 | } |
| 257 | memcpy(msg_ctx->msg, msg, msg_size); | ||
| 258 | msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_DONE; | 257 | msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_DONE; |
| 259 | wake_up_process(msg_ctx->task); | 258 | wake_up_process(msg_ctx->task); |
| 260 | rc = 0; | 259 | rc = 0; |
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index 141aee31884f..a8766b880c07 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c | |||
| @@ -45,8 +45,8 @@ static struct super_block *efivarfs_sb; | |||
| 45 | * So we need to perform a case-sensitive match on part 1 and a | 45 | * So we need to perform a case-sensitive match on part 1 and a |
| 46 | * case-insensitive match on part 2. | 46 | * case-insensitive match on part 2. |
| 47 | */ | 47 | */ |
| 48 | static int efivarfs_d_compare(const struct dentry *parent, const struct inode *pinode, | 48 | static int efivarfs_d_compare(const struct dentry *parent, |
| 49 | const struct dentry *dentry, const struct inode *inode, | 49 | const struct dentry *dentry, |
| 50 | unsigned int len, const char *str, | 50 | unsigned int len, const char *str, |
| 51 | const struct qstr *name) | 51 | const struct qstr *name) |
| 52 | { | 52 | { |
| @@ -63,8 +63,7 @@ static int efivarfs_d_compare(const struct dentry *parent, const struct inode *p | |||
| 63 | return strncasecmp(name->name + guid, str + guid, EFI_VARIABLE_GUID_LEN); | 63 | return strncasecmp(name->name + guid, str + guid, EFI_VARIABLE_GUID_LEN); |
| 64 | } | 64 | } |
| 65 | 65 | ||
| 66 | static int efivarfs_d_hash(const struct dentry *dentry, | 66 | static int efivarfs_d_hash(const struct dentry *dentry, struct qstr *qstr) |
| 67 | const struct inode *inode, struct qstr *qstr) | ||
| 68 | { | 67 | { |
| 69 | unsigned long hash = init_name_hash(); | 68 | unsigned long hash = init_name_hash(); |
| 70 | const unsigned char *s = qstr->name; | 69 | const unsigned char *s = qstr->name; |
| @@ -108,7 +107,7 @@ static struct dentry *efivarfs_alloc_dentry(struct dentry *parent, char *name) | |||
| 108 | q.name = name; | 107 | q.name = name; |
| 109 | q.len = strlen(name); | 108 | q.len = strlen(name); |
| 110 | 109 | ||
| 111 | err = efivarfs_d_hash(NULL, NULL, &q); | 110 | err = efivarfs_d_hash(NULL, &q); |
| 112 | if (err) | 111 | if (err) |
| 113 | return ERR_PTR(err); | 112 | return ERR_PTR(err); |
| 114 | 113 | ||
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index deecc7294a67..9ad17b15b454 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c | |||
| @@ -34,6 +34,7 @@ | |||
| 34 | #include <linux/mutex.h> | 34 | #include <linux/mutex.h> |
| 35 | #include <linux/anon_inodes.h> | 35 | #include <linux/anon_inodes.h> |
| 36 | #include <linux/device.h> | 36 | #include <linux/device.h> |
| 37 | #include <linux/freezer.h> | ||
| 37 | #include <asm/uaccess.h> | 38 | #include <asm/uaccess.h> |
| 38 | #include <asm/io.h> | 39 | #include <asm/io.h> |
| 39 | #include <asm/mman.h> | 40 | #include <asm/mman.h> |
| @@ -1602,7 +1603,8 @@ fetch_events: | |||
| 1602 | } | 1603 | } |
| 1603 | 1604 | ||
| 1604 | spin_unlock_irqrestore(&ep->lock, flags); | 1605 | spin_unlock_irqrestore(&ep->lock, flags); |
| 1605 | if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) | 1606 | if (!freezable_schedule_hrtimeout_range(to, slack, |
| 1607 | HRTIMER_MODE_ABS)) | ||
| 1606 | timed_out = 1; | 1608 | timed_out = 1; |
| 1607 | 1609 | ||
| 1608 | spin_lock_irqsave(&ep->lock, flags); | 1610 | spin_lock_irqsave(&ep->lock, flags); |
| @@ -1975,8 +1977,8 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events, | |||
| 1975 | return -EINVAL; | 1977 | return -EINVAL; |
| 1976 | if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask))) | 1978 | if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask))) |
| 1977 | return -EFAULT; | 1979 | return -EFAULT; |
| 1978 | sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP)); | 1980 | sigsaved = current->blocked; |
| 1979 | sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); | 1981 | set_current_blocked(&ksigmask); |
| 1980 | } | 1982 | } |
| 1981 | 1983 | ||
| 1982 | error = sys_epoll_wait(epfd, events, maxevents, timeout); | 1984 | error = sys_epoll_wait(epfd, events, maxevents, timeout); |
| @@ -1993,7 +1995,7 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events, | |||
| 1993 | sizeof(sigsaved)); | 1995 | sizeof(sigsaved)); |
| 1994 | set_restore_sigmask(); | 1996 | set_restore_sigmask(); |
| 1995 | } else | 1997 | } else |
| 1996 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); | 1998 | set_current_blocked(&sigsaved); |
| 1997 | } | 1999 | } |
| 1998 | 2000 | ||
| 1999 | return error; | 2001 | return error; |
| @@ -2020,8 +2022,8 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd, | |||
| 2020 | if (copy_from_user(&csigmask, sigmask, sizeof(csigmask))) | 2022 | if (copy_from_user(&csigmask, sigmask, sizeof(csigmask))) |
| 2021 | return -EFAULT; | 2023 | return -EFAULT; |
| 2022 | sigset_from_compat(&ksigmask, &csigmask); | 2024 | sigset_from_compat(&ksigmask, &csigmask); |
| 2023 | sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP)); | 2025 | sigsaved = current->blocked; |
| 2024 | sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); | 2026 | set_current_blocked(&ksigmask); |
| 2025 | } | 2027 | } |
| 2026 | 2028 | ||
| 2027 | err = sys_epoll_wait(epfd, events, maxevents, timeout); | 2029 | err = sys_epoll_wait(epfd, events, maxevents, timeout); |
| @@ -2038,7 +2040,7 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd, | |||
| 2038 | sizeof(sigsaved)); | 2040 | sizeof(sigsaved)); |
| 2039 | set_restore_sigmask(); | 2041 | set_restore_sigmask(); |
| 2040 | } else | 2042 | } else |
| 2041 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); | 2043 | set_current_blocked(&sigsaved); |
| 2042 | } | 2044 | } |
| 2043 | 2045 | ||
| 2044 | return err; | 2046 | return err; |
| @@ -110,13 +110,14 @@ SYSCALL_DEFINE1(uselib, const char __user *, library) | |||
| 110 | static const struct open_flags uselib_flags = { | 110 | static const struct open_flags uselib_flags = { |
| 111 | .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC, | 111 | .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC, |
| 112 | .acc_mode = MAY_READ | MAY_EXEC | MAY_OPEN, | 112 | .acc_mode = MAY_READ | MAY_EXEC | MAY_OPEN, |
| 113 | .intent = LOOKUP_OPEN | 113 | .intent = LOOKUP_OPEN, |
| 114 | .lookup_flags = LOOKUP_FOLLOW, | ||
| 114 | }; | 115 | }; |
| 115 | 116 | ||
| 116 | if (IS_ERR(tmp)) | 117 | if (IS_ERR(tmp)) |
| 117 | goto out; | 118 | goto out; |
| 118 | 119 | ||
| 119 | file = do_filp_open(AT_FDCWD, tmp, &uselib_flags, LOOKUP_FOLLOW); | 120 | file = do_filp_open(AT_FDCWD, tmp, &uselib_flags); |
| 120 | putname(tmp); | 121 | putname(tmp); |
| 121 | error = PTR_ERR(file); | 122 | error = PTR_ERR(file); |
| 122 | if (IS_ERR(file)) | 123 | if (IS_ERR(file)) |
| @@ -756,10 +757,11 @@ struct file *open_exec(const char *name) | |||
| 756 | static const struct open_flags open_exec_flags = { | 757 | static const struct open_flags open_exec_flags = { |
| 757 | .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC, | 758 | .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC, |
| 758 | .acc_mode = MAY_EXEC | MAY_OPEN, | 759 | .acc_mode = MAY_EXEC | MAY_OPEN, |
| 759 | .intent = LOOKUP_OPEN | 760 | .intent = LOOKUP_OPEN, |
| 761 | .lookup_flags = LOOKUP_FOLLOW, | ||
| 760 | }; | 762 | }; |
| 761 | 763 | ||
| 762 | file = do_filp_open(AT_FDCWD, &tmp, &open_exec_flags, LOOKUP_FOLLOW); | 764 | file = do_filp_open(AT_FDCWD, &tmp, &open_exec_flags); |
| 763 | if (IS_ERR(file)) | 765 | if (IS_ERR(file)) |
| 764 | goto out; | 766 | goto out; |
| 765 | 767 | ||
| @@ -930,6 +932,7 @@ static int de_thread(struct task_struct *tsk) | |||
| 930 | * also take its birthdate (always earlier than our own). | 932 | * also take its birthdate (always earlier than our own). |
| 931 | */ | 933 | */ |
| 932 | tsk->start_time = leader->start_time; | 934 | tsk->start_time = leader->start_time; |
| 935 | tsk->real_start_time = leader->real_start_time; | ||
| 933 | 936 | ||
| 934 | BUG_ON(!same_thread_group(leader, tsk)); | 937 | BUG_ON(!same_thread_group(leader, tsk)); |
| 935 | BUG_ON(has_group_leader_pid(tsk)); | 938 | BUG_ON(has_group_leader_pid(tsk)); |
| @@ -945,9 +948,8 @@ static int de_thread(struct task_struct *tsk) | |||
| 945 | * Note: The old leader also uses this pid until release_task | 948 | * Note: The old leader also uses this pid until release_task |
| 946 | * is called. Odd but simple and correct. | 949 | * is called. Odd but simple and correct. |
| 947 | */ | 950 | */ |
| 948 | detach_pid(tsk, PIDTYPE_PID); | ||
| 949 | tsk->pid = leader->pid; | 951 | tsk->pid = leader->pid; |
| 950 | attach_pid(tsk, PIDTYPE_PID, task_pid(leader)); | 952 | change_pid(tsk, PIDTYPE_PID, task_pid(leader)); |
| 951 | transfer_pid(leader, tsk, PIDTYPE_PGID); | 953 | transfer_pid(leader, tsk, PIDTYPE_PGID); |
| 952 | transfer_pid(leader, tsk, PIDTYPE_SID); | 954 | transfer_pid(leader, tsk, PIDTYPE_SID); |
| 953 | 955 | ||
| @@ -1463,7 +1465,6 @@ static int do_execve_common(const char *filename, | |||
| 1463 | struct files_struct *displaced; | 1465 | struct files_struct *displaced; |
| 1464 | bool clear_in_exec; | 1466 | bool clear_in_exec; |
| 1465 | int retval; | 1467 | int retval; |
| 1466 | const struct cred *cred = current_cred(); | ||
| 1467 | 1468 | ||
| 1468 | /* | 1469 | /* |
| 1469 | * We move the actual failure in case of RLIMIT_NPROC excess from | 1470 | * We move the actual failure in case of RLIMIT_NPROC excess from |
| @@ -1472,7 +1473,7 @@ static int do_execve_common(const char *filename, | |||
| 1472 | * whether NPROC limit is still exceeded. | 1473 | * whether NPROC limit is still exceeded. |
| 1473 | */ | 1474 | */ |
| 1474 | if ((current->flags & PF_NPROC_EXCEEDED) && | 1475 | if ((current->flags & PF_NPROC_EXCEEDED) && |
| 1475 | atomic_read(&cred->user->processes) > rlimit(RLIMIT_NPROC)) { | 1476 | atomic_read(¤t_user()->processes) > rlimit(RLIMIT_NPROC)) { |
| 1476 | retval = -EAGAIN; | 1477 | retval = -EAGAIN; |
| 1477 | goto out_ret; | 1478 | goto out_ret; |
| 1478 | } | 1479 | } |
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index 73b0d9519836..256dd5f4c1c4 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c | |||
| @@ -119,6 +119,29 @@ static int ext2_create (struct inode * dir, struct dentry * dentry, umode_t mode | |||
| 119 | return ext2_add_nondir(dentry, inode); | 119 | return ext2_add_nondir(dentry, inode); |
| 120 | } | 120 | } |
| 121 | 121 | ||
| 122 | static int ext2_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) | ||
| 123 | { | ||
| 124 | struct inode *inode = ext2_new_inode(dir, mode, NULL); | ||
| 125 | if (IS_ERR(inode)) | ||
| 126 | return PTR_ERR(inode); | ||
| 127 | |||
| 128 | inode->i_op = &ext2_file_inode_operations; | ||
| 129 | if (ext2_use_xip(inode->i_sb)) { | ||
| 130 | inode->i_mapping->a_ops = &ext2_aops_xip; | ||
| 131 | inode->i_fop = &ext2_xip_file_operations; | ||
| 132 | } else if (test_opt(inode->i_sb, NOBH)) { | ||
| 133 | inode->i_mapping->a_ops = &ext2_nobh_aops; | ||
| 134 | inode->i_fop = &ext2_file_operations; | ||
| 135 | } else { | ||
| 136 | inode->i_mapping->a_ops = &ext2_aops; | ||
| 137 | inode->i_fop = &ext2_file_operations; | ||
| 138 | } | ||
| 139 | mark_inode_dirty(inode); | ||
| 140 | d_tmpfile(dentry, inode); | ||
| 141 | unlock_new_inode(inode); | ||
| 142 | return 0; | ||
| 143 | } | ||
| 144 | |||
| 122 | static int ext2_mknod (struct inode * dir, struct dentry *dentry, umode_t mode, dev_t rdev) | 145 | static int ext2_mknod (struct inode * dir, struct dentry *dentry, umode_t mode, dev_t rdev) |
| 123 | { | 146 | { |
| 124 | struct inode * inode; | 147 | struct inode * inode; |
| @@ -398,6 +421,7 @@ const struct inode_operations ext2_dir_inode_operations = { | |||
| 398 | #endif | 421 | #endif |
| 399 | .setattr = ext2_setattr, | 422 | .setattr = ext2_setattr, |
| 400 | .get_acl = ext2_get_acl, | 423 | .get_acl = ext2_get_acl, |
| 424 | .tmpfile = ext2_tmpfile, | ||
| 401 | }; | 425 | }; |
| 402 | 426 | ||
| 403 | const struct inode_operations ext2_special_inode_operations = { | 427 | const struct inode_operations ext2_special_inode_operations = { |
diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c index b31dbd4c46ad..1cb9c7e10c6f 100644 --- a/fs/ext3/fsync.c +++ b/fs/ext3/fsync.c | |||
| @@ -48,9 +48,13 @@ int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
| 48 | 48 | ||
| 49 | trace_ext3_sync_file_enter(file, datasync); | 49 | trace_ext3_sync_file_enter(file, datasync); |
| 50 | 50 | ||
| 51 | if (inode->i_sb->s_flags & MS_RDONLY) | 51 | if (inode->i_sb->s_flags & MS_RDONLY) { |
| 52 | /* Make sure that we read updated state */ | ||
| 53 | smp_rmb(); | ||
| 54 | if (EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ERROR_FS) | ||
| 55 | return -EROFS; | ||
| 52 | return 0; | 56 | return 0; |
| 53 | 57 | } | |
| 54 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); | 58 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); |
| 55 | if (ret) | 59 | if (ret) |
| 56 | goto out; | 60 | goto out; |
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index f67668f724ba..2bd85486b879 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c | |||
| @@ -1985,6 +1985,7 @@ static const struct address_space_operations ext3_ordered_aops = { | |||
| 1985 | .direct_IO = ext3_direct_IO, | 1985 | .direct_IO = ext3_direct_IO, |
| 1986 | .migratepage = buffer_migrate_page, | 1986 | .migratepage = buffer_migrate_page, |
| 1987 | .is_partially_uptodate = block_is_partially_uptodate, | 1987 | .is_partially_uptodate = block_is_partially_uptodate, |
| 1988 | .is_dirty_writeback = buffer_check_dirty_writeback, | ||
| 1988 | .error_remove_page = generic_error_remove_page, | 1989 | .error_remove_page = generic_error_remove_page, |
| 1989 | }; | 1990 | }; |
| 1990 | 1991 | ||
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index cea8ecf3e76e..998ea111e537 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c | |||
| @@ -1759,6 +1759,45 @@ retry: | |||
| 1759 | return err; | 1759 | return err; |
| 1760 | } | 1760 | } |
| 1761 | 1761 | ||
| 1762 | static int ext3_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) | ||
| 1763 | { | ||
| 1764 | handle_t *handle; | ||
| 1765 | struct inode *inode; | ||
| 1766 | int err, retries = 0; | ||
| 1767 | |||
| 1768 | dquot_initialize(dir); | ||
| 1769 | |||
| 1770 | retry: | ||
| 1771 | handle = ext3_journal_start(dir, EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) + | ||
| 1772 | 4 + EXT3_XATTR_TRANS_BLOCKS); | ||
| 1773 | |||
| 1774 | if (IS_ERR(handle)) | ||
| 1775 | return PTR_ERR(handle); | ||
| 1776 | |||
| 1777 | inode = ext3_new_inode (handle, dir, NULL, mode); | ||
| 1778 | err = PTR_ERR(inode); | ||
| 1779 | if (!IS_ERR(inode)) { | ||
| 1780 | inode->i_op = &ext3_file_inode_operations; | ||
| 1781 | inode->i_fop = &ext3_file_operations; | ||
| 1782 | ext3_set_aops(inode); | ||
| 1783 | err = ext3_orphan_add(handle, inode); | ||
| 1784 | if (err) | ||
| 1785 | goto err_drop_inode; | ||
| 1786 | mark_inode_dirty(inode); | ||
| 1787 | d_tmpfile(dentry, inode); | ||
| 1788 | unlock_new_inode(inode); | ||
| 1789 | } | ||
| 1790 | ext3_journal_stop(handle); | ||
| 1791 | if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries)) | ||
| 1792 | goto retry; | ||
| 1793 | return err; | ||
| 1794 | err_drop_inode: | ||
| 1795 | ext3_journal_stop(handle); | ||
| 1796 | unlock_new_inode(inode); | ||
| 1797 | iput(inode); | ||
| 1798 | return err; | ||
| 1799 | } | ||
| 1800 | |||
| 1762 | static int ext3_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) | 1801 | static int ext3_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) |
| 1763 | { | 1802 | { |
| 1764 | handle_t *handle; | 1803 | handle_t *handle; |
| @@ -2300,7 +2339,7 @@ static int ext3_link (struct dentry * old_dentry, | |||
| 2300 | 2339 | ||
| 2301 | retry: | 2340 | retry: |
| 2302 | handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + | 2341 | handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + |
| 2303 | EXT3_INDEX_EXTRA_TRANS_BLOCKS); | 2342 | EXT3_INDEX_EXTRA_TRANS_BLOCKS + 1); |
| 2304 | if (IS_ERR(handle)) | 2343 | if (IS_ERR(handle)) |
| 2305 | return PTR_ERR(handle); | 2344 | return PTR_ERR(handle); |
| 2306 | 2345 | ||
| @@ -2314,6 +2353,11 @@ retry: | |||
| 2314 | err = ext3_add_entry(handle, dentry, inode); | 2353 | err = ext3_add_entry(handle, dentry, inode); |
| 2315 | if (!err) { | 2354 | if (!err) { |
| 2316 | ext3_mark_inode_dirty(handle, inode); | 2355 | ext3_mark_inode_dirty(handle, inode); |
| 2356 | /* this can happen only for tmpfile being | ||
| 2357 | * linked the first time | ||
| 2358 | */ | ||
| 2359 | if (inode->i_nlink == 1) | ||
| 2360 | ext3_orphan_del(handle, inode); | ||
| 2317 | d_instantiate(dentry, inode); | 2361 | d_instantiate(dentry, inode); |
| 2318 | } else { | 2362 | } else { |
| 2319 | drop_nlink(inode); | 2363 | drop_nlink(inode); |
| @@ -2516,6 +2560,7 @@ const struct inode_operations ext3_dir_inode_operations = { | |||
| 2516 | .mkdir = ext3_mkdir, | 2560 | .mkdir = ext3_mkdir, |
| 2517 | .rmdir = ext3_rmdir, | 2561 | .rmdir = ext3_rmdir, |
| 2518 | .mknod = ext3_mknod, | 2562 | .mknod = ext3_mknod, |
| 2563 | .tmpfile = ext3_tmpfile, | ||
| 2519 | .rename = ext3_rename, | 2564 | .rename = ext3_rename, |
| 2520 | .setattr = ext3_setattr, | 2565 | .setattr = ext3_setattr, |
| 2521 | #ifdef CONFIG_EXT3_FS_XATTR | 2566 | #ifdef CONFIG_EXT3_FS_XATTR |
diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 6356665a74bb..c47f14750722 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c | |||
| @@ -174,6 +174,11 @@ static void ext3_handle_error(struct super_block *sb) | |||
| 174 | if (test_opt (sb, ERRORS_RO)) { | 174 | if (test_opt (sb, ERRORS_RO)) { |
| 175 | ext3_msg(sb, KERN_CRIT, | 175 | ext3_msg(sb, KERN_CRIT, |
| 176 | "error: remounting filesystem read-only"); | 176 | "error: remounting filesystem read-only"); |
| 177 | /* | ||
| 178 | * Make sure updated value of ->s_mount_state will be visible | ||
| 179 | * before ->s_flags update. | ||
| 180 | */ | ||
| 181 | smp_wmb(); | ||
| 177 | sb->s_flags |= MS_RDONLY; | 182 | sb->s_flags |= MS_RDONLY; |
| 178 | } | 183 | } |
| 179 | ext3_commit_super(sb, es, 1); | 184 | ext3_commit_super(sb, es, 1); |
| @@ -291,8 +296,14 @@ void ext3_abort(struct super_block *sb, const char *function, | |||
| 291 | ext3_msg(sb, KERN_CRIT, | 296 | ext3_msg(sb, KERN_CRIT, |
| 292 | "error: remounting filesystem read-only"); | 297 | "error: remounting filesystem read-only"); |
| 293 | EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS; | 298 | EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS; |
| 294 | sb->s_flags |= MS_RDONLY; | ||
| 295 | set_opt(EXT3_SB(sb)->s_mount_opt, ABORT); | 299 | set_opt(EXT3_SB(sb)->s_mount_opt, ABORT); |
| 300 | /* | ||
| 301 | * Make sure updated value of ->s_mount_state will be visible | ||
| 302 | * before ->s_flags update. | ||
| 303 | */ | ||
| 304 | smp_wmb(); | ||
| 305 | sb->s_flags |= MS_RDONLY; | ||
| 306 | |||
| 296 | if (EXT3_SB(sb)->s_journal) | 307 | if (EXT3_SB(sb)->s_journal) |
| 297 | journal_abort(EXT3_SB(sb)->s_journal, -EIO); | 308 | journal_abort(EXT3_SB(sb)->s_journal, -EIO); |
| 298 | } | 309 | } |
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index b19f0a457f32..6f4cc567c382 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
| @@ -494,17 +494,7 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) | |||
| 494 | if (dataoff > isize) | 494 | if (dataoff > isize) |
| 495 | return -ENXIO; | 495 | return -ENXIO; |
| 496 | 496 | ||
| 497 | if (dataoff < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET)) | 497 | return vfs_setpos(file, dataoff, maxsize); |
| 498 | return -EINVAL; | ||
| 499 | if (dataoff > maxsize) | ||
| 500 | return -EINVAL; | ||
| 501 | |||
| 502 | if (dataoff != file->f_pos) { | ||
| 503 | file->f_pos = dataoff; | ||
| 504 | file->f_version = 0; | ||
| 505 | } | ||
| 506 | |||
| 507 | return dataoff; | ||
| 508 | } | 498 | } |
| 509 | 499 | ||
| 510 | /* | 500 | /* |
| @@ -580,17 +570,7 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize) | |||
| 580 | if (holeoff > isize) | 570 | if (holeoff > isize) |
| 581 | holeoff = isize; | 571 | holeoff = isize; |
| 582 | 572 | ||
| 583 | if (holeoff < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET)) | 573 | return vfs_setpos(file, holeoff, maxsize); |
| 584 | return -EINVAL; | ||
| 585 | if (holeoff > maxsize) | ||
| 586 | return -EINVAL; | ||
| 587 | |||
| 588 | if (holeoff != file->f_pos) { | ||
| 589 | file->f_pos = holeoff; | ||
| 590 | file->f_version = 0; | ||
| 591 | } | ||
| 592 | |||
| 593 | return holeoff; | ||
| 594 | } | 574 | } |
| 595 | 575 | ||
| 596 | /* | 576 | /* |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index ab2f6dc44b3a..234b834d5a97 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
| @@ -2296,6 +2296,45 @@ retry: | |||
| 2296 | return err; | 2296 | return err; |
| 2297 | } | 2297 | } |
| 2298 | 2298 | ||
| 2299 | static int ext4_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) | ||
| 2300 | { | ||
| 2301 | handle_t *handle; | ||
| 2302 | struct inode *inode; | ||
| 2303 | int err, retries = 0; | ||
| 2304 | |||
| 2305 | dquot_initialize(dir); | ||
| 2306 | |||
| 2307 | retry: | ||
| 2308 | inode = ext4_new_inode_start_handle(dir, mode, | ||
| 2309 | NULL, 0, NULL, | ||
| 2310 | EXT4_HT_DIR, | ||
| 2311 | EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) + | ||
| 2312 | 4 + EXT4_XATTR_TRANS_BLOCKS); | ||
| 2313 | handle = ext4_journal_current_handle(); | ||
| 2314 | err = PTR_ERR(inode); | ||
| 2315 | if (!IS_ERR(inode)) { | ||
| 2316 | inode->i_op = &ext4_file_inode_operations; | ||
| 2317 | inode->i_fop = &ext4_file_operations; | ||
| 2318 | ext4_set_aops(inode); | ||
| 2319 | err = ext4_orphan_add(handle, inode); | ||
| 2320 | if (err) | ||
| 2321 | goto err_drop_inode; | ||
| 2322 | mark_inode_dirty(inode); | ||
| 2323 | d_tmpfile(dentry, inode); | ||
| 2324 | unlock_new_inode(inode); | ||
| 2325 | } | ||
| 2326 | if (handle) | ||
| 2327 | ext4_journal_stop(handle); | ||
| 2328 | if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) | ||
| 2329 | goto retry; | ||
| 2330 | return err; | ||
| 2331 | err_drop_inode: | ||
| 2332 | ext4_journal_stop(handle); | ||
| 2333 | unlock_new_inode(inode); | ||
| 2334 | iput(inode); | ||
| 2335 | return err; | ||
| 2336 | } | ||
| 2337 | |||
| 2299 | struct ext4_dir_entry_2 *ext4_init_dot_dotdot(struct inode *inode, | 2338 | struct ext4_dir_entry_2 *ext4_init_dot_dotdot(struct inode *inode, |
| 2300 | struct ext4_dir_entry_2 *de, | 2339 | struct ext4_dir_entry_2 *de, |
| 2301 | int blocksize, int csum_size, | 2340 | int blocksize, int csum_size, |
| @@ -2903,7 +2942,7 @@ static int ext4_link(struct dentry *old_dentry, | |||
| 2903 | retry: | 2942 | retry: |
| 2904 | handle = ext4_journal_start(dir, EXT4_HT_DIR, | 2943 | handle = ext4_journal_start(dir, EXT4_HT_DIR, |
| 2905 | (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + | 2944 | (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + |
| 2906 | EXT4_INDEX_EXTRA_TRANS_BLOCKS)); | 2945 | EXT4_INDEX_EXTRA_TRANS_BLOCKS) + 1); |
| 2907 | if (IS_ERR(handle)) | 2946 | if (IS_ERR(handle)) |
| 2908 | return PTR_ERR(handle); | 2947 | return PTR_ERR(handle); |
| 2909 | 2948 | ||
| @@ -2917,6 +2956,11 @@ retry: | |||
| 2917 | err = ext4_add_entry(handle, dentry, inode); | 2956 | err = ext4_add_entry(handle, dentry, inode); |
| 2918 | if (!err) { | 2957 | if (!err) { |
| 2919 | ext4_mark_inode_dirty(handle, inode); | 2958 | ext4_mark_inode_dirty(handle, inode); |
| 2959 | /* this can happen only for tmpfile being | ||
| 2960 | * linked the first time | ||
| 2961 | */ | ||
| 2962 | if (inode->i_nlink == 1) | ||
| 2963 | ext4_orphan_del(handle, inode); | ||
| 2920 | d_instantiate(dentry, inode); | 2964 | d_instantiate(dentry, inode); |
| 2921 | } else { | 2965 | } else { |
| 2922 | drop_nlink(inode); | 2966 | drop_nlink(inode); |
| @@ -3169,6 +3213,7 @@ const struct inode_operations ext4_dir_inode_operations = { | |||
| 3169 | .mkdir = ext4_mkdir, | 3213 | .mkdir = ext4_mkdir, |
| 3170 | .rmdir = ext4_rmdir, | 3214 | .rmdir = ext4_rmdir, |
| 3171 | .mknod = ext4_mknod, | 3215 | .mknod = ext4_mknod, |
| 3216 | .tmpfile = ext4_tmpfile, | ||
| 3172 | .rename = ext4_rename, | 3217 | .rename = ext4_rename, |
| 3173 | .setattr = ext4_setattr, | 3218 | .setattr = ext4_setattr, |
| 3174 | .setxattr = generic_setxattr, | 3219 | .setxattr = generic_setxattr, |
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 9d1cd423450d..62f0d5977c64 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c | |||
| @@ -610,13 +610,12 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) | |||
| 610 | { | 610 | { |
| 611 | struct inode *inode = file_inode(file); | 611 | struct inode *inode = file_inode(file); |
| 612 | unsigned long npages = dir_blocks(inode); | 612 | unsigned long npages = dir_blocks(inode); |
| 613 | unsigned int bit_pos = 0, start_bit_pos = 0; | 613 | unsigned int bit_pos = 0; |
| 614 | struct f2fs_dentry_block *dentry_blk = NULL; | 614 | struct f2fs_dentry_block *dentry_blk = NULL; |
| 615 | struct f2fs_dir_entry *de = NULL; | 615 | struct f2fs_dir_entry *de = NULL; |
| 616 | struct page *dentry_page = NULL; | 616 | struct page *dentry_page = NULL; |
| 617 | unsigned int n = ((unsigned long)ctx->pos / NR_DENTRY_IN_BLOCK); | 617 | unsigned int n = ((unsigned long)ctx->pos / NR_DENTRY_IN_BLOCK); |
| 618 | unsigned char d_type = DT_UNKNOWN; | 618 | unsigned char d_type = DT_UNKNOWN; |
| 619 | int slots; | ||
| 620 | 619 | ||
| 621 | bit_pos = ((unsigned long)ctx->pos % NR_DENTRY_IN_BLOCK); | 620 | bit_pos = ((unsigned long)ctx->pos % NR_DENTRY_IN_BLOCK); |
| 622 | 621 | ||
| @@ -625,7 +624,6 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) | |||
| 625 | if (IS_ERR(dentry_page)) | 624 | if (IS_ERR(dentry_page)) |
| 626 | continue; | 625 | continue; |
| 627 | 626 | ||
| 628 | start_bit_pos = bit_pos; | ||
| 629 | dentry_blk = kmap(dentry_page); | 627 | dentry_blk = kmap(dentry_page); |
| 630 | while (bit_pos < NR_DENTRY_IN_BLOCK) { | 628 | while (bit_pos < NR_DENTRY_IN_BLOCK) { |
| 631 | bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, | 629 | bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, |
| @@ -634,19 +632,19 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) | |||
| 634 | if (bit_pos >= NR_DENTRY_IN_BLOCK) | 632 | if (bit_pos >= NR_DENTRY_IN_BLOCK) |
| 635 | break; | 633 | break; |
| 636 | 634 | ||
| 637 | ctx->pos += bit_pos - start_bit_pos; | ||
| 638 | de = &dentry_blk->dentry[bit_pos]; | 635 | de = &dentry_blk->dentry[bit_pos]; |
| 639 | if (de->file_type < F2FS_FT_MAX) | 636 | if (de->file_type < F2FS_FT_MAX) |
| 640 | d_type = f2fs_filetype_table[de->file_type]; | 637 | d_type = f2fs_filetype_table[de->file_type]; |
| 641 | else | 638 | else |
| 642 | d_type = DT_UNKNOWN; | 639 | d_type = DT_UNKNOWN; |
| 643 | if (!dir_emit(ctx, | 640 | if (!dir_emit(ctx, |
| 644 | dentry_blk->filename[bit_pos], | 641 | dentry_blk->filename[bit_pos], |
| 645 | le16_to_cpu(de->name_len), | 642 | le16_to_cpu(de->name_len), |
| 646 | le32_to_cpu(de->ino), d_type)) | 643 | le32_to_cpu(de->ino), d_type)) |
| 647 | goto success; | 644 | goto stop; |
| 648 | slots = GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); | 645 | |
| 649 | bit_pos += slots; | 646 | bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); |
| 647 | ctx->pos = n * NR_DENTRY_IN_BLOCK + bit_pos; | ||
| 650 | } | 648 | } |
| 651 | bit_pos = 0; | 649 | bit_pos = 0; |
| 652 | ctx->pos = (n + 1) * NR_DENTRY_IN_BLOCK; | 650 | ctx->pos = (n + 1) * NR_DENTRY_IN_BLOCK; |
| @@ -654,7 +652,7 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) | |||
| 654 | f2fs_put_page(dentry_page, 1); | 652 | f2fs_put_page(dentry_page, 1); |
| 655 | dentry_page = NULL; | 653 | dentry_page = NULL; |
| 656 | } | 654 | } |
| 657 | success: | 655 | stop: |
| 658 | if (dentry_page && !IS_ERR(dentry_page)) { | 656 | if (dentry_page && !IS_ERR(dentry_page)) { |
| 659 | kunmap(dentry_page); | 657 | kunmap(dentry_page); |
| 660 | f2fs_put_page(dentry_page, 1); | 658 | f2fs_put_page(dentry_page, 1); |
diff --git a/fs/fat/fat.h b/fs/fat/fat.h index 21664fcf3616..4241e6f39e86 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h | |||
| @@ -86,6 +86,7 @@ struct msdos_sb_info { | |||
| 86 | const void *dir_ops; /* Opaque; default directory operations */ | 86 | const void *dir_ops; /* Opaque; default directory operations */ |
| 87 | int dir_per_block; /* dir entries per block */ | 87 | int dir_per_block; /* dir entries per block */ |
| 88 | int dir_per_block_bits; /* log2(dir_per_block) */ | 88 | int dir_per_block_bits; /* log2(dir_per_block) */ |
| 89 | unsigned int vol_id; /*volume ID*/ | ||
| 89 | 90 | ||
| 90 | int fatent_shift; | 91 | int fatent_shift; |
| 91 | struct fatent_operations *fatent_ops; | 92 | struct fatent_operations *fatent_ops; |
diff --git a/fs/fat/file.c b/fs/fat/file.c index b0b632e50ddb..9b104f543056 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c | |||
| @@ -114,6 +114,12 @@ out: | |||
| 114 | return err; | 114 | return err; |
| 115 | } | 115 | } |
| 116 | 116 | ||
| 117 | static int fat_ioctl_get_volume_id(struct inode *inode, u32 __user *user_attr) | ||
| 118 | { | ||
| 119 | struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); | ||
| 120 | return put_user(sbi->vol_id, user_attr); | ||
| 121 | } | ||
| 122 | |||
| 117 | long fat_generic_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | 123 | long fat_generic_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) |
| 118 | { | 124 | { |
| 119 | struct inode *inode = file_inode(filp); | 125 | struct inode *inode = file_inode(filp); |
| @@ -124,6 +130,8 @@ long fat_generic_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
| 124 | return fat_ioctl_get_attributes(inode, user_attr); | 130 | return fat_ioctl_get_attributes(inode, user_attr); |
| 125 | case FAT_IOCTL_SET_ATTRIBUTES: | 131 | case FAT_IOCTL_SET_ATTRIBUTES: |
| 126 | return fat_ioctl_set_attributes(filp, user_attr); | 132 | return fat_ioctl_set_attributes(filp, user_attr); |
| 133 | case FAT_IOCTL_GET_VOLUME_ID: | ||
| 134 | return fat_ioctl_get_volume_id(inode, user_attr); | ||
| 127 | default: | 135 | default: |
| 128 | return -ENOTTY; /* Inappropriate ioctl for device */ | 136 | return -ENOTTY; /* Inappropriate ioctl for device */ |
| 129 | } | 137 | } |
diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 5d4513cb1b3c..11b51bb55b42 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c | |||
| @@ -1415,6 +1415,18 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, | |||
| 1415 | brelse(fsinfo_bh); | 1415 | brelse(fsinfo_bh); |
| 1416 | } | 1416 | } |
| 1417 | 1417 | ||
| 1418 | /* interpret volume ID as a little endian 32 bit integer */ | ||
| 1419 | if (sbi->fat_bits == 32) | ||
| 1420 | sbi->vol_id = (((u32)b->fat32.vol_id[0]) | | ||
| 1421 | ((u32)b->fat32.vol_id[1] << 8) | | ||
| 1422 | ((u32)b->fat32.vol_id[2] << 16) | | ||
| 1423 | ((u32)b->fat32.vol_id[3] << 24)); | ||
| 1424 | else /* fat 16 or 12 */ | ||
| 1425 | sbi->vol_id = (((u32)b->fat16.vol_id[0]) | | ||
| 1426 | ((u32)b->fat16.vol_id[1] << 8) | | ||
| 1427 | ((u32)b->fat16.vol_id[2] << 16) | | ||
| 1428 | ((u32)b->fat16.vol_id[3] << 24)); | ||
| 1429 | |||
| 1418 | sbi->dir_per_block = sb->s_blocksize / sizeof(struct msdos_dir_entry); | 1430 | sbi->dir_per_block = sb->s_blocksize / sizeof(struct msdos_dir_entry); |
| 1419 | sbi->dir_per_block_bits = ffs(sbi->dir_per_block) - 1; | 1431 | sbi->dir_per_block_bits = ffs(sbi->dir_per_block) - 1; |
| 1420 | 1432 | ||
diff --git a/fs/fat/misc.c b/fs/fat/misc.c index 359d307b5507..628e22a5a543 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c | |||
| @@ -30,7 +30,7 @@ void __fat_fs_error(struct super_block *sb, int report, const char *fmt, ...) | |||
| 30 | va_start(args, fmt); | 30 | va_start(args, fmt); |
| 31 | vaf.fmt = fmt; | 31 | vaf.fmt = fmt; |
| 32 | vaf.va = &args; | 32 | vaf.va = &args; |
| 33 | printk(KERN_ERR "FAT-fs (%s): error, %pV\n", sb->s_id, &vaf); | 33 | fat_msg(sb, KERN_ERR, "error, %pV", &vaf); |
| 34 | va_end(args); | 34 | va_end(args); |
| 35 | } | 35 | } |
| 36 | 36 | ||
| @@ -38,8 +38,7 @@ void __fat_fs_error(struct super_block *sb, int report, const char *fmt, ...) | |||
| 38 | panic("FAT-fs (%s): fs panic from previous error\n", sb->s_id); | 38 | panic("FAT-fs (%s): fs panic from previous error\n", sb->s_id); |
| 39 | else if (opts->errors == FAT_ERRORS_RO && !(sb->s_flags & MS_RDONLY)) { | 39 | else if (opts->errors == FAT_ERRORS_RO && !(sb->s_flags & MS_RDONLY)) { |
| 40 | sb->s_flags |= MS_RDONLY; | 40 | sb->s_flags |= MS_RDONLY; |
| 41 | printk(KERN_ERR "FAT-fs (%s): Filesystem has been " | 41 | fat_msg(sb, KERN_ERR, "Filesystem has been set read-only"); |
| 42 | "set read-only\n", sb->s_id); | ||
| 43 | } | 42 | } |
| 44 | } | 43 | } |
| 45 | EXPORT_SYMBOL_GPL(__fat_fs_error); | 44 | EXPORT_SYMBOL_GPL(__fat_fs_error); |
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index 081b759cff83..a783b0e1272a 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c | |||
| @@ -148,8 +148,7 @@ static int msdos_find(struct inode *dir, const unsigned char *name, int len, | |||
| 148 | * that the existing dentry can be used. The msdos fs routines will | 148 | * that the existing dentry can be used. The msdos fs routines will |
| 149 | * return ENOENT or EINVAL as appropriate. | 149 | * return ENOENT or EINVAL as appropriate. |
| 150 | */ | 150 | */ |
| 151 | static int msdos_hash(const struct dentry *dentry, const struct inode *inode, | 151 | static int msdos_hash(const struct dentry *dentry, struct qstr *qstr) |
| 152 | struct qstr *qstr) | ||
| 153 | { | 152 | { |
| 154 | struct fat_mount_options *options = &MSDOS_SB(dentry->d_sb)->options; | 153 | struct fat_mount_options *options = &MSDOS_SB(dentry->d_sb)->options; |
| 155 | unsigned char msdos_name[MSDOS_NAME]; | 154 | unsigned char msdos_name[MSDOS_NAME]; |
| @@ -165,8 +164,7 @@ static int msdos_hash(const struct dentry *dentry, const struct inode *inode, | |||
| 165 | * Compare two msdos names. If either of the names are invalid, | 164 | * Compare two msdos names. If either of the names are invalid, |
| 166 | * we fall back to doing the standard name comparison. | 165 | * we fall back to doing the standard name comparison. |
| 167 | */ | 166 | */ |
| 168 | static int msdos_cmp(const struct dentry *parent, const struct inode *pinode, | 167 | static int msdos_cmp(const struct dentry *parent, const struct dentry *dentry, |
| 169 | const struct dentry *dentry, const struct inode *inode, | ||
| 170 | unsigned int len, const char *str, const struct qstr *name) | 168 | unsigned int len, const char *str, const struct qstr *name) |
| 171 | { | 169 | { |
| 172 | struct fat_mount_options *options = &MSDOS_SB(parent->d_sb)->options; | 170 | struct fat_mount_options *options = &MSDOS_SB(parent->d_sb)->options; |
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 2da952036a3d..6df8d3d885e5 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c | |||
| @@ -107,8 +107,7 @@ static unsigned int vfat_striptail_len(const struct qstr *qstr) | |||
| 107 | * that the existing dentry can be used. The vfat fs routines will | 107 | * that the existing dentry can be used. The vfat fs routines will |
| 108 | * return ENOENT or EINVAL as appropriate. | 108 | * return ENOENT or EINVAL as appropriate. |
| 109 | */ | 109 | */ |
| 110 | static int vfat_hash(const struct dentry *dentry, const struct inode *inode, | 110 | static int vfat_hash(const struct dentry *dentry, struct qstr *qstr) |
| 111 | struct qstr *qstr) | ||
| 112 | { | 111 | { |
| 113 | qstr->hash = full_name_hash(qstr->name, vfat_striptail_len(qstr)); | 112 | qstr->hash = full_name_hash(qstr->name, vfat_striptail_len(qstr)); |
| 114 | return 0; | 113 | return 0; |
| @@ -120,8 +119,7 @@ static int vfat_hash(const struct dentry *dentry, const struct inode *inode, | |||
| 120 | * that the existing dentry can be used. The vfat fs routines will | 119 | * that the existing dentry can be used. The vfat fs routines will |
| 121 | * return ENOENT or EINVAL as appropriate. | 120 | * return ENOENT or EINVAL as appropriate. |
| 122 | */ | 121 | */ |
| 123 | static int vfat_hashi(const struct dentry *dentry, const struct inode *inode, | 122 | static int vfat_hashi(const struct dentry *dentry, struct qstr *qstr) |
| 124 | struct qstr *qstr) | ||
| 125 | { | 123 | { |
| 126 | struct nls_table *t = MSDOS_SB(dentry->d_sb)->nls_io; | 124 | struct nls_table *t = MSDOS_SB(dentry->d_sb)->nls_io; |
| 127 | const unsigned char *name; | 125 | const unsigned char *name; |
| @@ -142,8 +140,7 @@ static int vfat_hashi(const struct dentry *dentry, const struct inode *inode, | |||
| 142 | /* | 140 | /* |
| 143 | * Case insensitive compare of two vfat names. | 141 | * Case insensitive compare of two vfat names. |
| 144 | */ | 142 | */ |
| 145 | static int vfat_cmpi(const struct dentry *parent, const struct inode *pinode, | 143 | static int vfat_cmpi(const struct dentry *parent, const struct dentry *dentry, |
| 146 | const struct dentry *dentry, const struct inode *inode, | ||
| 147 | unsigned int len, const char *str, const struct qstr *name) | 144 | unsigned int len, const char *str, const struct qstr *name) |
| 148 | { | 145 | { |
| 149 | struct nls_table *t = MSDOS_SB(parent->d_sb)->nls_io; | 146 | struct nls_table *t = MSDOS_SB(parent->d_sb)->nls_io; |
| @@ -162,8 +159,7 @@ static int vfat_cmpi(const struct dentry *parent, const struct inode *pinode, | |||
| 162 | /* | 159 | /* |
| 163 | * Case sensitive compare of two vfat names. | 160 | * Case sensitive compare of two vfat names. |
| 164 | */ | 161 | */ |
| 165 | static int vfat_cmp(const struct dentry *parent, const struct inode *pinode, | 162 | static int vfat_cmp(const struct dentry *parent, const struct dentry *dentry, |
| 166 | const struct dentry *dentry, const struct inode *inode, | ||
| 167 | unsigned int len, const char *str, const struct qstr *name) | 163 | unsigned int len, const char *str, const struct qstr *name) |
| 168 | { | 164 | { |
| 169 | unsigned int alen, blen; | 165 | unsigned int alen, blen; |
diff --git a/fs/file_table.c b/fs/file_table.c index 485dc0eddd67..08e719b884ca 100644 --- a/fs/file_table.c +++ b/fs/file_table.c | |||
| @@ -227,7 +227,7 @@ static void __fput(struct file *file) | |||
| 227 | { | 227 | { |
| 228 | struct dentry *dentry = file->f_path.dentry; | 228 | struct dentry *dentry = file->f_path.dentry; |
| 229 | struct vfsmount *mnt = file->f_path.mnt; | 229 | struct vfsmount *mnt = file->f_path.mnt; |
| 230 | struct inode *inode = dentry->d_inode; | 230 | struct inode *inode = file->f_inode; |
| 231 | 231 | ||
| 232 | might_sleep(); | 232 | might_sleep(); |
| 233 | 233 | ||
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index a85ac4e33436..68851ff2fd41 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
| @@ -963,7 +963,7 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb) | |||
| 963 | /* | 963 | /* |
| 964 | * Retrieve work items and do the writeback they describe | 964 | * Retrieve work items and do the writeback they describe |
| 965 | */ | 965 | */ |
| 966 | long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | 966 | static long wb_do_writeback(struct bdi_writeback *wb) |
| 967 | { | 967 | { |
| 968 | struct backing_dev_info *bdi = wb->bdi; | 968 | struct backing_dev_info *bdi = wb->bdi; |
| 969 | struct wb_writeback_work *work; | 969 | struct wb_writeback_work *work; |
| @@ -971,12 +971,6 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | |||
| 971 | 971 | ||
| 972 | set_bit(BDI_writeback_running, &wb->bdi->state); | 972 | set_bit(BDI_writeback_running, &wb->bdi->state); |
| 973 | while ((work = get_next_work_item(bdi)) != NULL) { | 973 | while ((work = get_next_work_item(bdi)) != NULL) { |
| 974 | /* | ||
| 975 | * Override sync mode, in case we must wait for completion | ||
| 976 | * because this thread is exiting now. | ||
| 977 | */ | ||
| 978 | if (force_wait) | ||
| 979 | work->sync_mode = WB_SYNC_ALL; | ||
| 980 | 974 | ||
| 981 | trace_writeback_exec(bdi, work); | 975 | trace_writeback_exec(bdi, work); |
| 982 | 976 | ||
| @@ -1025,7 +1019,7 @@ void bdi_writeback_workfn(struct work_struct *work) | |||
| 1025 | * rescuer as work_list needs to be drained. | 1019 | * rescuer as work_list needs to be drained. |
| 1026 | */ | 1020 | */ |
| 1027 | do { | 1021 | do { |
| 1028 | pages_written = wb_do_writeback(wb, 0); | 1022 | pages_written = wb_do_writeback(wb); |
| 1029 | trace_writeback_pages_written(pages_written); | 1023 | trace_writeback_pages_written(pages_written); |
| 1030 | } while (!list_empty(&bdi->work_list)); | 1024 | } while (!list_empty(&bdi->work_list)); |
| 1031 | } else { | 1025 | } else { |
diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 35f281033142..5c121fe19c5f 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c | |||
| @@ -548,8 +548,7 @@ static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos) | |||
| 548 | res = io->bytes < 0 ? io->size : io->bytes; | 548 | res = io->bytes < 0 ? io->size : io->bytes; |
| 549 | 549 | ||
| 550 | if (!is_sync_kiocb(io->iocb)) { | 550 | if (!is_sync_kiocb(io->iocb)) { |
| 551 | struct path *path = &io->iocb->ki_filp->f_path; | 551 | struct inode *inode = file_inode(io->iocb->ki_filp); |
| 552 | struct inode *inode = path->dentry->d_inode; | ||
| 553 | struct fuse_conn *fc = get_fuse_conn(inode); | 552 | struct fuse_conn *fc = get_fuse_conn(inode); |
| 554 | struct fuse_inode *fi = get_fuse_inode(inode); | 553 | struct fuse_inode *fi = get_fuse_inode(inode); |
| 555 | 554 | ||
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 9a0cdde14a08..0b578598c6ac 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c | |||
| @@ -785,7 +785,7 @@ static const struct super_operations fuse_super_operations = { | |||
| 785 | static void sanitize_global_limit(unsigned *limit) | 785 | static void sanitize_global_limit(unsigned *limit) |
| 786 | { | 786 | { |
| 787 | if (*limit == 0) | 787 | if (*limit == 0) |
| 788 | *limit = ((num_physpages << PAGE_SHIFT) >> 13) / | 788 | *limit = ((totalram_pages << PAGE_SHIFT) >> 13) / |
| 789 | sizeof(struct fuse_req); | 789 | sizeof(struct fuse_req); |
| 790 | 790 | ||
| 791 | if (*limit >= 1 << 16) | 791 | if (*limit >= 1 << 16) |
diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c index 4fddb3c22d25..f2448ab2aac5 100644 --- a/fs/gfs2/dentry.c +++ b/fs/gfs2/dentry.c | |||
| @@ -109,8 +109,7 @@ fail: | |||
| 109 | return 0; | 109 | return 0; |
| 110 | } | 110 | } |
| 111 | 111 | ||
| 112 | static int gfs2_dhash(const struct dentry *dentry, const struct inode *inode, | 112 | static int gfs2_dhash(const struct dentry *dentry, struct qstr *str) |
| 113 | struct qstr *str) | ||
| 114 | { | 113 | { |
| 115 | str->hash = gfs2_disk_hash(str->name, str->len); | 114 | str->hash = gfs2_disk_hash(str->name, str->len); |
| 116 | return 0; | 115 | return 0; |
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index f99f9e8a325f..72c3866a7320 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c | |||
| @@ -912,7 +912,7 @@ out_uninit: | |||
| 912 | * cluster; until we do, disable leases (by just returning -EINVAL), | 912 | * cluster; until we do, disable leases (by just returning -EINVAL), |
| 913 | * unless the administrator has requested purely local locking. | 913 | * unless the administrator has requested purely local locking. |
| 914 | * | 914 | * |
| 915 | * Locking: called under lock_flocks | 915 | * Locking: called under i_lock |
| 916 | * | 916 | * |
| 917 | * Returns: errno | 917 | * Returns: errno |
| 918 | */ | 918 | */ |
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h index a73b11839a41..0524cda47a6e 100644 --- a/fs/hfs/hfs_fs.h +++ b/fs/hfs/hfs_fs.h | |||
| @@ -229,13 +229,10 @@ extern int hfs_part_find(struct super_block *, sector_t *, sector_t *); | |||
| 229 | /* string.c */ | 229 | /* string.c */ |
| 230 | extern const struct dentry_operations hfs_dentry_operations; | 230 | extern const struct dentry_operations hfs_dentry_operations; |
| 231 | 231 | ||
| 232 | extern int hfs_hash_dentry(const struct dentry *, const struct inode *, | 232 | extern int hfs_hash_dentry(const struct dentry *, struct qstr *); |
| 233 | struct qstr *); | ||
| 234 | extern int hfs_strcmp(const unsigned char *, unsigned int, | 233 | extern int hfs_strcmp(const unsigned char *, unsigned int, |
| 235 | const unsigned char *, unsigned int); | 234 | const unsigned char *, unsigned int); |
| 236 | extern int hfs_compare_dentry(const struct dentry *parent, | 235 | extern int hfs_compare_dentry(const struct dentry *parent, const struct dentry *dentry, |
| 237 | const struct inode *pinode, | ||
| 238 | const struct dentry *dentry, const struct inode *inode, | ||
| 239 | unsigned int len, const char *str, const struct qstr *name); | 236 | unsigned int len, const char *str, const struct qstr *name); |
| 240 | 237 | ||
| 241 | /* trans.c */ | 238 | /* trans.c */ |
diff --git a/fs/hfs/string.c b/fs/hfs/string.c index 495a976a3cc9..85b610c3909f 100644 --- a/fs/hfs/string.c +++ b/fs/hfs/string.c | |||
| @@ -51,8 +51,7 @@ static unsigned char caseorder[256] = { | |||
| 51 | /* | 51 | /* |
| 52 | * Hash a string to an integer in a case-independent way | 52 | * Hash a string to an integer in a case-independent way |
| 53 | */ | 53 | */ |
| 54 | int hfs_hash_dentry(const struct dentry *dentry, const struct inode *inode, | 54 | int hfs_hash_dentry(const struct dentry *dentry, struct qstr *this) |
| 55 | struct qstr *this) | ||
| 56 | { | 55 | { |
| 57 | const unsigned char *name = this->name; | 56 | const unsigned char *name = this->name; |
| 58 | unsigned int hash, len = this->len; | 57 | unsigned int hash, len = this->len; |
| @@ -93,8 +92,7 @@ int hfs_strcmp(const unsigned char *s1, unsigned int len1, | |||
| 93 | * Test for equality of two strings in the HFS filename character ordering. | 92 | * Test for equality of two strings in the HFS filename character ordering. |
| 94 | * return 1 on failure and 0 on success | 93 | * return 1 on failure and 0 on success |
| 95 | */ | 94 | */ |
| 96 | int hfs_compare_dentry(const struct dentry *parent, const struct inode *pinode, | 95 | int hfs_compare_dentry(const struct dentry *parent, const struct dentry *dentry, |
| 97 | const struct dentry *dentry, const struct inode *inode, | ||
| 98 | unsigned int len, const char *str, const struct qstr *name) | 96 | unsigned int len, const char *str, const struct qstr *name) |
| 99 | { | 97 | { |
| 100 | const unsigned char *n1, *n2; | 98 | const unsigned char *n1, *n2; |
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index 60b0a3388b26..ede79317cfb8 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h | |||
| @@ -495,11 +495,8 @@ int hfsplus_uni2asc(struct super_block *, | |||
| 495 | const struct hfsplus_unistr *, char *, int *); | 495 | const struct hfsplus_unistr *, char *, int *); |
| 496 | int hfsplus_asc2uni(struct super_block *, | 496 | int hfsplus_asc2uni(struct super_block *, |
| 497 | struct hfsplus_unistr *, int, const char *, int); | 497 | struct hfsplus_unistr *, int, const char *, int); |
| 498 | int hfsplus_hash_dentry(const struct dentry *dentry, | 498 | int hfsplus_hash_dentry(const struct dentry *dentry, struct qstr *str); |
| 499 | const struct inode *inode, struct qstr *str); | 499 | int hfsplus_compare_dentry(const struct dentry *parent, const struct dentry *dentry, |
| 500 | int hfsplus_compare_dentry(const struct dentry *parent, | ||
| 501 | const struct inode *pinode, | ||
| 502 | const struct dentry *dentry, const struct inode *inode, | ||
| 503 | unsigned int len, const char *str, const struct qstr *name); | 500 | unsigned int len, const char *str, const struct qstr *name); |
| 504 | 501 | ||
| 505 | /* wrapper.c */ | 502 | /* wrapper.c */ |
diff --git a/fs/hfsplus/unicode.c b/fs/hfsplus/unicode.c index 2c2e47dcfdd8..e8ef121a4d8b 100644 --- a/fs/hfsplus/unicode.c +++ b/fs/hfsplus/unicode.c | |||
| @@ -334,8 +334,7 @@ int hfsplus_asc2uni(struct super_block *sb, | |||
| 334 | * Composed unicode characters are decomposed and case-folding is performed | 334 | * Composed unicode characters are decomposed and case-folding is performed |
| 335 | * if the appropriate bits are (un)set on the superblock. | 335 | * if the appropriate bits are (un)set on the superblock. |
| 336 | */ | 336 | */ |
| 337 | int hfsplus_hash_dentry(const struct dentry *dentry, const struct inode *inode, | 337 | int hfsplus_hash_dentry(const struct dentry *dentry, struct qstr *str) |
| 338 | struct qstr *str) | ||
| 339 | { | 338 | { |
| 340 | struct super_block *sb = dentry->d_sb; | 339 | struct super_block *sb = dentry->d_sb; |
| 341 | const char *astr; | 340 | const char *astr; |
| @@ -386,9 +385,7 @@ int hfsplus_hash_dentry(const struct dentry *dentry, const struct inode *inode, | |||
| 386 | * Composed unicode characters are decomposed and case-folding is performed | 385 | * Composed unicode characters are decomposed and case-folding is performed |
| 387 | * if the appropriate bits are (un)set on the superblock. | 386 | * if the appropriate bits are (un)set on the superblock. |
| 388 | */ | 387 | */ |
| 389 | int hfsplus_compare_dentry(const struct dentry *parent, | 388 | int hfsplus_compare_dentry(const struct dentry *parent, const struct dentry *dentry, |
| 390 | const struct inode *pinode, | ||
| 391 | const struct dentry *dentry, const struct inode *inode, | ||
| 392 | unsigned int len, const char *str, const struct qstr *name) | 389 | unsigned int len, const char *str, const struct qstr *name) |
| 393 | { | 390 | { |
| 394 | struct super_block *sb = parent->d_sb; | 391 | struct super_block *sb = parent->d_sb; |
diff --git a/fs/hpfs/buffer.c b/fs/hpfs/buffer.c index f49d1498aa2e..4d0a1afa058c 100644 --- a/fs/hpfs/buffer.c +++ b/fs/hpfs/buffer.c | |||
| @@ -7,8 +7,37 @@ | |||
| 7 | */ | 7 | */ |
| 8 | #include <linux/sched.h> | 8 | #include <linux/sched.h> |
| 9 | #include <linux/slab.h> | 9 | #include <linux/slab.h> |
| 10 | #include <linux/blkdev.h> | ||
| 10 | #include "hpfs_fn.h" | 11 | #include "hpfs_fn.h" |
| 11 | 12 | ||
| 13 | void hpfs_prefetch_sectors(struct super_block *s, unsigned secno, int n) | ||
| 14 | { | ||
| 15 | struct buffer_head *bh; | ||
| 16 | struct blk_plug plug; | ||
| 17 | |||
| 18 | if (n <= 0 || unlikely(secno >= hpfs_sb(s)->sb_fs_size)) | ||
| 19 | return; | ||
| 20 | |||
| 21 | bh = sb_find_get_block(s, secno); | ||
| 22 | if (bh) { | ||
| 23 | if (buffer_uptodate(bh)) { | ||
| 24 | brelse(bh); | ||
| 25 | return; | ||
| 26 | } | ||
| 27 | brelse(bh); | ||
| 28 | }; | ||
| 29 | |||
| 30 | blk_start_plug(&plug); | ||
| 31 | while (n > 0) { | ||
| 32 | if (unlikely(secno >= hpfs_sb(s)->sb_fs_size)) | ||
| 33 | break; | ||
| 34 | sb_breadahead(s, secno); | ||
| 35 | secno++; | ||
| 36 | n--; | ||
| 37 | } | ||
| 38 | blk_finish_plug(&plug); | ||
| 39 | } | ||
| 40 | |||
| 12 | /* Map a sector into a buffer and return pointers to it and to the buffer. */ | 41 | /* Map a sector into a buffer and return pointers to it and to the buffer. */ |
| 13 | 42 | ||
| 14 | void *hpfs_map_sector(struct super_block *s, unsigned secno, struct buffer_head **bhp, | 43 | void *hpfs_map_sector(struct super_block *s, unsigned secno, struct buffer_head **bhp, |
| @@ -18,6 +47,8 @@ void *hpfs_map_sector(struct super_block *s, unsigned secno, struct buffer_head | |||
| 18 | 47 | ||
| 19 | hpfs_lock_assert(s); | 48 | hpfs_lock_assert(s); |
| 20 | 49 | ||
| 50 | hpfs_prefetch_sectors(s, secno, ahead); | ||
| 51 | |||
| 21 | cond_resched(); | 52 | cond_resched(); |
| 22 | 53 | ||
| 23 | *bhp = bh = sb_bread(s, secno); | 54 | *bhp = bh = sb_bread(s, secno); |
| @@ -67,6 +98,8 @@ void *hpfs_map_4sectors(struct super_block *s, unsigned secno, struct quad_buffe | |||
| 67 | return NULL; | 98 | return NULL; |
| 68 | } | 99 | } |
| 69 | 100 | ||
| 101 | hpfs_prefetch_sectors(s, secno, 4 + ahead); | ||
| 102 | |||
| 70 | qbh->data = data = kmalloc(2048, GFP_NOFS); | 103 | qbh->data = data = kmalloc(2048, GFP_NOFS); |
| 71 | if (!data) { | 104 | if (!data) { |
| 72 | printk("HPFS: hpfs_map_4sectors: out of memory\n"); | 105 | printk("HPFS: hpfs_map_4sectors: out of memory\n"); |
diff --git a/fs/hpfs/dentry.c b/fs/hpfs/dentry.c index 05d4816e4e77..fa27980f2229 100644 --- a/fs/hpfs/dentry.c +++ b/fs/hpfs/dentry.c | |||
| @@ -12,8 +12,7 @@ | |||
| 12 | * Note: the dentry argument is the parent dentry. | 12 | * Note: the dentry argument is the parent dentry. |
| 13 | */ | 13 | */ |
| 14 | 14 | ||
| 15 | static int hpfs_hash_dentry(const struct dentry *dentry, const struct inode *inode, | 15 | static int hpfs_hash_dentry(const struct dentry *dentry, struct qstr *qstr) |
| 16 | struct qstr *qstr) | ||
| 17 | { | 16 | { |
| 18 | unsigned long hash; | 17 | unsigned long hash; |
| 19 | int i; | 18 | int i; |
| @@ -35,9 +34,7 @@ static int hpfs_hash_dentry(const struct dentry *dentry, const struct inode *ino | |||
| 35 | return 0; | 34 | return 0; |
| 36 | } | 35 | } |
| 37 | 36 | ||
| 38 | static int hpfs_compare_dentry(const struct dentry *parent, | 37 | static int hpfs_compare_dentry(const struct dentry *parent, const struct dentry *dentry, |
| 39 | const struct inode *pinode, | ||
| 40 | const struct dentry *dentry, const struct inode *inode, | ||
| 41 | unsigned int len, const char *str, const struct qstr *name) | 38 | unsigned int len, const char *str, const struct qstr *name) |
| 42 | { | 39 | { |
| 43 | unsigned al = len; | 40 | unsigned al = len; |
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c index e4ba5fe4c3b5..4e9dabcf1f4c 100644 --- a/fs/hpfs/file.c +++ b/fs/hpfs/file.c | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | */ | 7 | */ |
| 8 | 8 | ||
| 9 | #include "hpfs_fn.h" | 9 | #include "hpfs_fn.h" |
| 10 | #include <linux/mpage.h> | ||
| 10 | 11 | ||
| 11 | #define BLOCKS(size) (((size) + 511) >> 9) | 12 | #define BLOCKS(size) (((size) + 511) >> 9) |
| 12 | 13 | ||
| @@ -34,7 +35,7 @@ int hpfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) | |||
| 34 | * so we must ignore such errors. | 35 | * so we must ignore such errors. |
| 35 | */ | 36 | */ |
| 36 | 37 | ||
| 37 | static secno hpfs_bmap(struct inode *inode, unsigned file_secno) | 38 | static secno hpfs_bmap(struct inode *inode, unsigned file_secno, unsigned *n_secs) |
| 38 | { | 39 | { |
| 39 | struct hpfs_inode_info *hpfs_inode = hpfs_i(inode); | 40 | struct hpfs_inode_info *hpfs_inode = hpfs_i(inode); |
| 40 | unsigned n, disk_secno; | 41 | unsigned n, disk_secno; |
| @@ -42,11 +43,20 @@ static secno hpfs_bmap(struct inode *inode, unsigned file_secno) | |||
| 42 | struct buffer_head *bh; | 43 | struct buffer_head *bh; |
| 43 | if (BLOCKS(hpfs_i(inode)->mmu_private) <= file_secno) return 0; | 44 | if (BLOCKS(hpfs_i(inode)->mmu_private) <= file_secno) return 0; |
| 44 | n = file_secno - hpfs_inode->i_file_sec; | 45 | n = file_secno - hpfs_inode->i_file_sec; |
| 45 | if (n < hpfs_inode->i_n_secs) return hpfs_inode->i_disk_sec + n; | 46 | if (n < hpfs_inode->i_n_secs) { |
| 47 | *n_secs = hpfs_inode->i_n_secs - n; | ||
| 48 | return hpfs_inode->i_disk_sec + n; | ||
| 49 | } | ||
| 46 | if (!(fnode = hpfs_map_fnode(inode->i_sb, inode->i_ino, &bh))) return 0; | 50 | if (!(fnode = hpfs_map_fnode(inode->i_sb, inode->i_ino, &bh))) return 0; |
| 47 | disk_secno = hpfs_bplus_lookup(inode->i_sb, inode, &fnode->btree, file_secno, bh); | 51 | disk_secno = hpfs_bplus_lookup(inode->i_sb, inode, &fnode->btree, file_secno, bh); |
| 48 | if (disk_secno == -1) return 0; | 52 | if (disk_secno == -1) return 0; |
| 49 | if (hpfs_chk_sectors(inode->i_sb, disk_secno, 1, "bmap")) return 0; | 53 | if (hpfs_chk_sectors(inode->i_sb, disk_secno, 1, "bmap")) return 0; |
| 54 | n = file_secno - hpfs_inode->i_file_sec; | ||
| 55 | if (n < hpfs_inode->i_n_secs) { | ||
| 56 | *n_secs = hpfs_inode->i_n_secs - n; | ||
| 57 | return hpfs_inode->i_disk_sec + n; | ||
| 58 | } | ||
| 59 | *n_secs = 1; | ||
| 50 | return disk_secno; | 60 | return disk_secno; |
| 51 | } | 61 | } |
| 52 | 62 | ||
| @@ -67,10 +77,14 @@ static int hpfs_get_block(struct inode *inode, sector_t iblock, struct buffer_he | |||
| 67 | { | 77 | { |
| 68 | int r; | 78 | int r; |
| 69 | secno s; | 79 | secno s; |
| 80 | unsigned n_secs; | ||
| 70 | hpfs_lock(inode->i_sb); | 81 | hpfs_lock(inode->i_sb); |
| 71 | s = hpfs_bmap(inode, iblock); | 82 | s = hpfs_bmap(inode, iblock, &n_secs); |
| 72 | if (s) { | 83 | if (s) { |
| 84 | if (bh_result->b_size >> 9 < n_secs) | ||
| 85 | n_secs = bh_result->b_size >> 9; | ||
| 73 | map_bh(bh_result, inode->i_sb, s); | 86 | map_bh(bh_result, inode->i_sb, s); |
| 87 | bh_result->b_size = n_secs << 9; | ||
| 74 | goto ret_0; | 88 | goto ret_0; |
| 75 | } | 89 | } |
| 76 | if (!create) goto ret_0; | 90 | if (!create) goto ret_0; |
| @@ -95,14 +109,26 @@ static int hpfs_get_block(struct inode *inode, sector_t iblock, struct buffer_he | |||
| 95 | return r; | 109 | return r; |
| 96 | } | 110 | } |
| 97 | 111 | ||
| 112 | static int hpfs_readpage(struct file *file, struct page *page) | ||
| 113 | { | ||
| 114 | return mpage_readpage(page, hpfs_get_block); | ||
| 115 | } | ||
| 116 | |||
| 98 | static int hpfs_writepage(struct page *page, struct writeback_control *wbc) | 117 | static int hpfs_writepage(struct page *page, struct writeback_control *wbc) |
| 99 | { | 118 | { |
| 100 | return block_write_full_page(page,hpfs_get_block, wbc); | 119 | return block_write_full_page(page, hpfs_get_block, wbc); |
| 101 | } | 120 | } |
| 102 | 121 | ||
| 103 | static int hpfs_readpage(struct file *file, struct page *page) | 122 | static int hpfs_readpages(struct file *file, struct address_space *mapping, |
| 123 | struct list_head *pages, unsigned nr_pages) | ||
| 124 | { | ||
| 125 | return mpage_readpages(mapping, pages, nr_pages, hpfs_get_block); | ||
| 126 | } | ||
| 127 | |||
| 128 | static int hpfs_writepages(struct address_space *mapping, | ||
| 129 | struct writeback_control *wbc) | ||
| 104 | { | 130 | { |
| 105 | return block_read_full_page(page,hpfs_get_block); | 131 | return mpage_writepages(mapping, wbc, hpfs_get_block); |
| 106 | } | 132 | } |
| 107 | 133 | ||
| 108 | static void hpfs_write_failed(struct address_space *mapping, loff_t to) | 134 | static void hpfs_write_failed(struct address_space *mapping, loff_t to) |
| @@ -161,6 +187,8 @@ static sector_t _hpfs_bmap(struct address_space *mapping, sector_t block) | |||
| 161 | const struct address_space_operations hpfs_aops = { | 187 | const struct address_space_operations hpfs_aops = { |
| 162 | .readpage = hpfs_readpage, | 188 | .readpage = hpfs_readpage, |
| 163 | .writepage = hpfs_writepage, | 189 | .writepage = hpfs_writepage, |
| 190 | .readpages = hpfs_readpages, | ||
| 191 | .writepages = hpfs_writepages, | ||
| 164 | .write_begin = hpfs_write_begin, | 192 | .write_begin = hpfs_write_begin, |
| 165 | .write_end = hpfs_write_end, | 193 | .write_end = hpfs_write_end, |
| 166 | .bmap = _hpfs_bmap | 194 | .bmap = _hpfs_bmap |
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index b7ae286646b5..1b398636e990 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h | |||
| @@ -27,8 +27,9 @@ | |||
| 27 | #define ALLOC_FWD_MAX 128 | 27 | #define ALLOC_FWD_MAX 128 |
| 28 | #define ALLOC_M 1 | 28 | #define ALLOC_M 1 |
| 29 | #define FNODE_RD_AHEAD 16 | 29 | #define FNODE_RD_AHEAD 16 |
| 30 | #define ANODE_RD_AHEAD 16 | 30 | #define ANODE_RD_AHEAD 0 |
| 31 | #define DNODE_RD_AHEAD 4 | 31 | #define DNODE_RD_AHEAD 72 |
| 32 | #define COUNT_RD_AHEAD 62 | ||
| 32 | 33 | ||
| 33 | #define FREE_DNODES_ADD 58 | 34 | #define FREE_DNODES_ADD 58 |
| 34 | #define FREE_DNODES_DEL 29 | 35 | #define FREE_DNODES_DEL 29 |
| @@ -207,6 +208,7 @@ void hpfs_remove_fnode(struct super_block *, fnode_secno fno); | |||
| 207 | 208 | ||
| 208 | /* buffer.c */ | 209 | /* buffer.c */ |
| 209 | 210 | ||
| 211 | void hpfs_prefetch_sectors(struct super_block *, unsigned, int); | ||
| 210 | void *hpfs_map_sector(struct super_block *, unsigned, struct buffer_head **, int); | 212 | void *hpfs_map_sector(struct super_block *, unsigned, struct buffer_head **, int); |
| 211 | void *hpfs_get_sector(struct super_block *, unsigned, struct buffer_head **); | 213 | void *hpfs_get_sector(struct super_block *, unsigned, struct buffer_head **); |
| 212 | void *hpfs_map_4sectors(struct super_block *, unsigned, struct quad_buffer_head *, int); | 214 | void *hpfs_map_4sectors(struct super_block *, unsigned, struct quad_buffer_head *, int); |
| @@ -271,6 +273,7 @@ void hpfs_evict_inode(struct inode *); | |||
| 271 | 273 | ||
| 272 | __le32 *hpfs_map_dnode_bitmap(struct super_block *, struct quad_buffer_head *); | 274 | __le32 *hpfs_map_dnode_bitmap(struct super_block *, struct quad_buffer_head *); |
| 273 | __le32 *hpfs_map_bitmap(struct super_block *, unsigned, struct quad_buffer_head *, char *); | 275 | __le32 *hpfs_map_bitmap(struct super_block *, unsigned, struct quad_buffer_head *, char *); |
| 276 | void hpfs_prefetch_bitmap(struct super_block *, unsigned); | ||
| 274 | unsigned char *hpfs_load_code_page(struct super_block *, secno); | 277 | unsigned char *hpfs_load_code_page(struct super_block *, secno); |
| 275 | __le32 *hpfs_load_bitmap_directory(struct super_block *, secno bmp); | 278 | __le32 *hpfs_load_bitmap_directory(struct super_block *, secno bmp); |
| 276 | struct fnode *hpfs_map_fnode(struct super_block *s, ino_t, struct buffer_head **); | 279 | struct fnode *hpfs_map_fnode(struct super_block *s, ino_t, struct buffer_head **); |
diff --git a/fs/hpfs/map.c b/fs/hpfs/map.c index 4acb19d78359..3aa66ae1031e 100644 --- a/fs/hpfs/map.c +++ b/fs/hpfs/map.c | |||
| @@ -17,7 +17,9 @@ __le32 *hpfs_map_bitmap(struct super_block *s, unsigned bmp_block, | |||
| 17 | struct quad_buffer_head *qbh, char *id) | 17 | struct quad_buffer_head *qbh, char *id) |
| 18 | { | 18 | { |
| 19 | secno sec; | 19 | secno sec; |
| 20 | if (hpfs_sb(s)->sb_chk) if (bmp_block * 16384 > hpfs_sb(s)->sb_fs_size) { | 20 | __le32 *ret; |
| 21 | unsigned n_bands = (hpfs_sb(s)->sb_fs_size + 0x3fff) >> 14; | ||
| 22 | if (hpfs_sb(s)->sb_chk) if (bmp_block >= n_bands) { | ||
| 21 | hpfs_error(s, "hpfs_map_bitmap called with bad parameter: %08x at %s", bmp_block, id); | 23 | hpfs_error(s, "hpfs_map_bitmap called with bad parameter: %08x at %s", bmp_block, id); |
| 22 | return NULL; | 24 | return NULL; |
| 23 | } | 25 | } |
| @@ -26,7 +28,23 @@ __le32 *hpfs_map_bitmap(struct super_block *s, unsigned bmp_block, | |||
| 26 | hpfs_error(s, "invalid bitmap block pointer %08x -> %08x at %s", bmp_block, sec, id); | 28 | hpfs_error(s, "invalid bitmap block pointer %08x -> %08x at %s", bmp_block, sec, id); |
| 27 | return NULL; | 29 | return NULL; |
| 28 | } | 30 | } |
| 29 | return hpfs_map_4sectors(s, sec, qbh, 4); | 31 | ret = hpfs_map_4sectors(s, sec, qbh, 4); |
| 32 | if (ret) hpfs_prefetch_bitmap(s, bmp_block + 1); | ||
| 33 | return ret; | ||
| 34 | } | ||
| 35 | |||
| 36 | void hpfs_prefetch_bitmap(struct super_block *s, unsigned bmp_block) | ||
| 37 | { | ||
| 38 | unsigned to_prefetch, next_prefetch; | ||
| 39 | unsigned n_bands = (hpfs_sb(s)->sb_fs_size + 0x3fff) >> 14; | ||
| 40 | if (unlikely(bmp_block >= n_bands)) | ||
| 41 | return; | ||
| 42 | to_prefetch = le32_to_cpu(hpfs_sb(s)->sb_bmp_dir[bmp_block]); | ||
| 43 | if (unlikely(bmp_block + 1 >= n_bands)) | ||
| 44 | next_prefetch = 0; | ||
| 45 | else | ||
| 46 | next_prefetch = le32_to_cpu(hpfs_sb(s)->sb_bmp_dir[bmp_block + 1]); | ||
| 47 | hpfs_prefetch_sectors(s, to_prefetch, 4 + 4 * (to_prefetch + 4 == next_prefetch)); | ||
| 30 | } | 48 | } |
| 31 | 49 | ||
| 32 | /* | 50 | /* |
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index a0617e706957..4334cda8dba1 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c | |||
| @@ -121,7 +121,7 @@ unsigned hpfs_count_one_bitmap(struct super_block *s, secno secno) | |||
| 121 | unsigned long *bits; | 121 | unsigned long *bits; |
| 122 | unsigned count; | 122 | unsigned count; |
| 123 | 123 | ||
| 124 | bits = hpfs_map_4sectors(s, secno, &qbh, 4); | 124 | bits = hpfs_map_4sectors(s, secno, &qbh, 0); |
| 125 | if (!bits) | 125 | if (!bits) |
| 126 | return 0; | 126 | return 0; |
| 127 | count = bitmap_weight(bits, 2048 * BITS_PER_BYTE); | 127 | count = bitmap_weight(bits, 2048 * BITS_PER_BYTE); |
| @@ -134,8 +134,13 @@ static unsigned count_bitmaps(struct super_block *s) | |||
| 134 | unsigned n, count, n_bands; | 134 | unsigned n, count, n_bands; |
| 135 | n_bands = (hpfs_sb(s)->sb_fs_size + 0x3fff) >> 14; | 135 | n_bands = (hpfs_sb(s)->sb_fs_size + 0x3fff) >> 14; |
| 136 | count = 0; | 136 | count = 0; |
| 137 | for (n = 0; n < n_bands; n++) | 137 | for (n = 0; n < COUNT_RD_AHEAD; n++) { |
| 138 | hpfs_prefetch_bitmap(s, n); | ||
| 139 | } | ||
| 140 | for (n = 0; n < n_bands; n++) { | ||
| 141 | hpfs_prefetch_bitmap(s, n + COUNT_RD_AHEAD); | ||
| 138 | count += hpfs_count_one_bitmap(s, le32_to_cpu(hpfs_sb(s)->sb_bmp_dir[n])); | 142 | count += hpfs_count_one_bitmap(s, le32_to_cpu(hpfs_sb(s)->sb_bmp_dir[n])); |
| 143 | } | ||
| 139 | return count; | 144 | return count; |
| 140 | } | 145 | } |
| 141 | 146 | ||
| @@ -558,7 +563,13 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent) | |||
| 558 | sbi->sb_cp_table = NULL; | 563 | sbi->sb_cp_table = NULL; |
| 559 | sbi->sb_c_bitmap = -1; | 564 | sbi->sb_c_bitmap = -1; |
| 560 | sbi->sb_max_fwd_alloc = 0xffffff; | 565 | sbi->sb_max_fwd_alloc = 0xffffff; |
| 561 | 566 | ||
| 567 | if (sbi->sb_fs_size >= 0x80000000) { | ||
| 568 | hpfs_error(s, "invalid size in superblock: %08x", | ||
| 569 | (unsigned)sbi->sb_fs_size); | ||
| 570 | goto bail4; | ||
| 571 | } | ||
| 572 | |||
| 562 | /* Load bitmap directory */ | 573 | /* Load bitmap directory */ |
| 563 | if (!(sbi->sb_bmp_dir = hpfs_load_bitmap_directory(s, le32_to_cpu(superblock->bitmaps)))) | 574 | if (!(sbi->sb_bmp_dir = hpfs_load_bitmap_directory(s, le32_to_cpu(superblock->bitmaps)))) |
| 564 | goto bail4; | 575 | goto bail4; |
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c index fc90ab11c340..4338ff32959d 100644 --- a/fs/hppfs/hppfs.c +++ b/fs/hppfs/hppfs.c | |||
| @@ -69,7 +69,7 @@ static char *dentry_name(struct dentry *dentry, int extra) | |||
| 69 | struct dentry *parent; | 69 | struct dentry *parent; |
| 70 | char *root, *name; | 70 | char *root, *name; |
| 71 | const char *seg_name; | 71 | const char *seg_name; |
| 72 | int len, seg_len; | 72 | int len, seg_len, root_len; |
| 73 | 73 | ||
| 74 | len = 0; | 74 | len = 0; |
| 75 | parent = dentry; | 75 | parent = dentry; |
| @@ -81,7 +81,8 @@ static char *dentry_name(struct dentry *dentry, int extra) | |||
| 81 | } | 81 | } |
| 82 | 82 | ||
| 83 | root = "proc"; | 83 | root = "proc"; |
| 84 | len += strlen(root); | 84 | root_len = strlen(root); |
| 85 | len += root_len; | ||
| 85 | name = kmalloc(len + extra + 1, GFP_KERNEL); | 86 | name = kmalloc(len + extra + 1, GFP_KERNEL); |
| 86 | if (name == NULL) | 87 | if (name == NULL) |
| 87 | return NULL; | 88 | return NULL; |
| @@ -91,7 +92,7 @@ static char *dentry_name(struct dentry *dentry, int extra) | |||
| 91 | while (parent->d_parent != parent) { | 92 | while (parent->d_parent != parent) { |
| 92 | if (is_pid(parent)) { | 93 | if (is_pid(parent)) { |
| 93 | seg_name = "pid"; | 94 | seg_name = "pid"; |
| 94 | seg_len = strlen("pid"); | 95 | seg_len = strlen(seg_name); |
| 95 | } | 96 | } |
| 96 | else { | 97 | else { |
| 97 | seg_name = parent->d_name.name; | 98 | seg_name = parent->d_name.name; |
| @@ -100,10 +101,10 @@ static char *dentry_name(struct dentry *dentry, int extra) | |||
| 100 | 101 | ||
| 101 | len -= seg_len + 1; | 102 | len -= seg_len + 1; |
| 102 | name[len] = '/'; | 103 | name[len] = '/'; |
| 103 | strncpy(&name[len + 1], seg_name, seg_len); | 104 | memcpy(&name[len + 1], seg_name, seg_len); |
| 104 | parent = parent->d_parent; | 105 | parent = parent->d_parent; |
| 105 | } | 106 | } |
| 106 | strncpy(name, root, strlen(root)); | 107 | memcpy(name, root, root_len); |
| 107 | return name; | 108 | return name; |
| 108 | } | 109 | } |
| 109 | 110 | ||
diff --git a/fs/inode.c b/fs/inode.c index 00d5fc3b86e1..d6dfb09c8280 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
| @@ -333,8 +333,10 @@ EXPORT_SYMBOL(set_nlink); | |||
| 333 | */ | 333 | */ |
| 334 | void inc_nlink(struct inode *inode) | 334 | void inc_nlink(struct inode *inode) |
| 335 | { | 335 | { |
| 336 | if (WARN_ON(inode->i_nlink == 0)) | 336 | if (unlikely(inode->i_nlink == 0)) { |
| 337 | WARN_ON(!(inode->i_state & I_LINKABLE)); | ||
| 337 | atomic_long_dec(&inode->i_sb->s_remove_count); | 338 | atomic_long_dec(&inode->i_sb->s_remove_count); |
| 339 | } | ||
| 338 | 340 | ||
| 339 | inode->__i_nlink++; | 341 | inode->__i_nlink++; |
| 340 | } | 342 | } |
diff --git a/fs/internal.h b/fs/internal.h index 68121584ae37..7c5f01cf619d 100644 --- a/fs/internal.h +++ b/fs/internal.h | |||
| @@ -96,11 +96,12 @@ struct open_flags { | |||
| 96 | umode_t mode; | 96 | umode_t mode; |
| 97 | int acc_mode; | 97 | int acc_mode; |
| 98 | int intent; | 98 | int intent; |
| 99 | int lookup_flags; | ||
| 99 | }; | 100 | }; |
| 100 | extern struct file *do_filp_open(int dfd, struct filename *pathname, | 101 | extern struct file *do_filp_open(int dfd, struct filename *pathname, |
| 101 | const struct open_flags *op, int flags); | 102 | const struct open_flags *op); |
| 102 | extern struct file *do_file_open_root(struct dentry *, struct vfsmount *, | 103 | extern struct file *do_file_open_root(struct dentry *, struct vfsmount *, |
| 103 | const char *, const struct open_flags *, int lookup_flags); | 104 | const char *, const struct open_flags *); |
| 104 | 105 | ||
| 105 | extern long do_handle_open(int mountdirfd, | 106 | extern long do_handle_open(int mountdirfd, |
| 106 | struct file_handle __user *ufh, int open_flag); | 107 | struct file_handle __user *ufh, int open_flag); |
| @@ -130,6 +131,7 @@ extern struct dentry *__d_alloc(struct super_block *, const struct qstr *); | |||
| 130 | * read_write.c | 131 | * read_write.c |
| 131 | */ | 132 | */ |
| 132 | extern ssize_t __kernel_write(struct file *, const char *, size_t, loff_t *); | 133 | extern ssize_t __kernel_write(struct file *, const char *, size_t, loff_t *); |
| 134 | extern int rw_verify_area(int, struct file *, const loff_t *, size_t); | ||
| 133 | 135 | ||
| 134 | /* | 136 | /* |
| 135 | * splice.c | 137 | * splice.c |
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index d9b8aebdeb22..c348d6d88624 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c | |||
| @@ -28,31 +28,23 @@ | |||
| 28 | 28 | ||
| 29 | #define BEQUIET | 29 | #define BEQUIET |
| 30 | 30 | ||
| 31 | static int isofs_hashi(const struct dentry *parent, const struct inode *inode, | 31 | static int isofs_hashi(const struct dentry *parent, struct qstr *qstr); |
| 32 | struct qstr *qstr); | 32 | static int isofs_hash(const struct dentry *parent, struct qstr *qstr); |
| 33 | static int isofs_hash(const struct dentry *parent, const struct inode *inode, | ||
| 34 | struct qstr *qstr); | ||
| 35 | static int isofs_dentry_cmpi(const struct dentry *parent, | 33 | static int isofs_dentry_cmpi(const struct dentry *parent, |
| 36 | const struct inode *pinode, | 34 | const struct dentry *dentry, |
| 37 | const struct dentry *dentry, const struct inode *inode, | ||
| 38 | unsigned int len, const char *str, const struct qstr *name); | 35 | unsigned int len, const char *str, const struct qstr *name); |
| 39 | static int isofs_dentry_cmp(const struct dentry *parent, | 36 | static int isofs_dentry_cmp(const struct dentry *parent, |
| 40 | const struct inode *pinode, | 37 | const struct dentry *dentry, |
| 41 | const struct dentry *dentry, const struct inode *inode, | ||
| 42 | unsigned int len, const char *str, const struct qstr *name); | 38 | unsigned int len, const char *str, const struct qstr *name); |
| 43 | 39 | ||
| 44 | #ifdef CONFIG_JOLIET | 40 | #ifdef CONFIG_JOLIET |
| 45 | static int isofs_hashi_ms(const struct dentry *parent, const struct inode *inode, | 41 | static int isofs_hashi_ms(const struct dentry *parent, struct qstr *qstr); |
| 46 | struct qstr *qstr); | 42 | static int isofs_hash_ms(const struct dentry *parent, struct qstr *qstr); |
| 47 | static int isofs_hash_ms(const struct dentry *parent, const struct inode *inode, | ||
| 48 | struct qstr *qstr); | ||
| 49 | static int isofs_dentry_cmpi_ms(const struct dentry *parent, | 43 | static int isofs_dentry_cmpi_ms(const struct dentry *parent, |
| 50 | const struct inode *pinode, | 44 | const struct dentry *dentry, |
| 51 | const struct dentry *dentry, const struct inode *inode, | ||
| 52 | unsigned int len, const char *str, const struct qstr *name); | 45 | unsigned int len, const char *str, const struct qstr *name); |
| 53 | static int isofs_dentry_cmp_ms(const struct dentry *parent, | 46 | static int isofs_dentry_cmp_ms(const struct dentry *parent, |
| 54 | const struct inode *pinode, | 47 | const struct dentry *dentry, |
| 55 | const struct dentry *dentry, const struct inode *inode, | ||
| 56 | unsigned int len, const char *str, const struct qstr *name); | 48 | unsigned int len, const char *str, const struct qstr *name); |
| 57 | #endif | 49 | #endif |
| 58 | 50 | ||
| @@ -265,30 +257,26 @@ static int isofs_dentry_cmp_common( | |||
| 265 | } | 257 | } |
| 266 | 258 | ||
| 267 | static int | 259 | static int |
| 268 | isofs_hash(const struct dentry *dentry, const struct inode *inode, | 260 | isofs_hash(const struct dentry *dentry, struct qstr *qstr) |
| 269 | struct qstr *qstr) | ||
| 270 | { | 261 | { |
| 271 | return isofs_hash_common(dentry, qstr, 0); | 262 | return isofs_hash_common(dentry, qstr, 0); |
| 272 | } | 263 | } |
| 273 | 264 | ||
| 274 | static int | 265 | static int |
| 275 | isofs_hashi(const struct dentry *dentry, const struct inode *inode, | 266 | isofs_hashi(const struct dentry *dentry, struct qstr *qstr) |
| 276 | struct qstr *qstr) | ||
| 277 | { | 267 | { |
| 278 | return isofs_hashi_common(dentry, qstr, 0); | 268 | return isofs_hashi_common(dentry, qstr, 0); |
| 279 | } | 269 | } |
| 280 | 270 | ||
| 281 | static int | 271 | static int |
| 282 | isofs_dentry_cmp(const struct dentry *parent, const struct inode *pinode, | 272 | isofs_dentry_cmp(const struct dentry *parent, const struct dentry *dentry, |
| 283 | const struct dentry *dentry, const struct inode *inode, | ||
| 284 | unsigned int len, const char *str, const struct qstr *name) | 273 | unsigned int len, const char *str, const struct qstr *name) |
| 285 | { | 274 | { |
| 286 | return isofs_dentry_cmp_common(len, str, name, 0, 0); | 275 | return isofs_dentry_cmp_common(len, str, name, 0, 0); |
| 287 | } | 276 | } |
| 288 | 277 | ||
| 289 | static int | 278 | static int |
| 290 | isofs_dentry_cmpi(const struct dentry *parent, const struct inode *pinode, | 279 | isofs_dentry_cmpi(const struct dentry *parent, const struct dentry *dentry, |
| 291 | const struct dentry *dentry, const struct inode *inode, | ||
| 292 | unsigned int len, const char *str, const struct qstr *name) | 280 | unsigned int len, const char *str, const struct qstr *name) |
| 293 | { | 281 | { |
| 294 | return isofs_dentry_cmp_common(len, str, name, 0, 1); | 282 | return isofs_dentry_cmp_common(len, str, name, 0, 1); |
| @@ -296,30 +284,26 @@ isofs_dentry_cmpi(const struct dentry *parent, const struct inode *pinode, | |||
| 296 | 284 | ||
| 297 | #ifdef CONFIG_JOLIET | 285 | #ifdef CONFIG_JOLIET |
| 298 | static int | 286 | static int |
| 299 | isofs_hash_ms(const struct dentry *dentry, const struct inode *inode, | 287 | isofs_hash_ms(const struct dentry *dentry, struct qstr *qstr) |
| 300 | struct qstr *qstr) | ||
| 301 | { | 288 | { |
| 302 | return isofs_hash_common(dentry, qstr, 1); | 289 | return isofs_hash_common(dentry, qstr, 1); |
| 303 | } | 290 | } |
| 304 | 291 | ||
| 305 | static int | 292 | static int |
| 306 | isofs_hashi_ms(const struct dentry *dentry, const struct inode *inode, | 293 | isofs_hashi_ms(const struct dentry *dentry, struct qstr *qstr) |
| 307 | struct qstr *qstr) | ||
| 308 | { | 294 | { |
| 309 | return isofs_hashi_common(dentry, qstr, 1); | 295 | return isofs_hashi_common(dentry, qstr, 1); |
| 310 | } | 296 | } |
| 311 | 297 | ||
| 312 | static int | 298 | static int |
| 313 | isofs_dentry_cmp_ms(const struct dentry *parent, const struct inode *pinode, | 299 | isofs_dentry_cmp_ms(const struct dentry *parent, const struct dentry *dentry, |
| 314 | const struct dentry *dentry, const struct inode *inode, | ||
| 315 | unsigned int len, const char *str, const struct qstr *name) | 300 | unsigned int len, const char *str, const struct qstr *name) |
| 316 | { | 301 | { |
| 317 | return isofs_dentry_cmp_common(len, str, name, 1, 0); | 302 | return isofs_dentry_cmp_common(len, str, name, 1, 0); |
| 318 | } | 303 | } |
| 319 | 304 | ||
| 320 | static int | 305 | static int |
| 321 | isofs_dentry_cmpi_ms(const struct dentry *parent, const struct inode *pinode, | 306 | isofs_dentry_cmpi_ms(const struct dentry *parent, const struct dentry *dentry, |
| 322 | const struct dentry *dentry, const struct inode *inode, | ||
| 323 | unsigned int len, const char *str, const struct qstr *name) | 307 | unsigned int len, const char *str, const struct qstr *name) |
| 324 | { | 308 | { |
| 325 | return isofs_dentry_cmp_common(len, str, name, 1, 1); | 309 | return isofs_dentry_cmp_common(len, str, name, 1, 1); |
diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c index c167028844ed..95295640d9c8 100644 --- a/fs/isofs/namei.c +++ b/fs/isofs/namei.c | |||
| @@ -37,8 +37,7 @@ isofs_cmp(struct dentry *dentry, const char *compare, int dlen) | |||
| 37 | 37 | ||
| 38 | qstr.name = compare; | 38 | qstr.name = compare; |
| 39 | qstr.len = dlen; | 39 | qstr.len = dlen; |
| 40 | return dentry->d_op->d_compare(NULL, NULL, NULL, NULL, | 40 | return dentry->d_op->d_compare(NULL, NULL, dentry->d_name.len, dentry->d_name.name, &qstr); |
| 41 | dentry->d_name.len, dentry->d_name.name, &qstr); | ||
| 42 | } | 41 | } |
| 43 | 42 | ||
| 44 | /* | 43 | /* |
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index 9a55f53be5ff..370d7b6c5942 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c | |||
| @@ -346,8 +346,7 @@ int dbFree(struct inode *ip, s64 blkno, s64 nblocks) | |||
| 346 | printk(KERN_ERR "blkno = %Lx, nblocks = %Lx\n", | 346 | printk(KERN_ERR "blkno = %Lx, nblocks = %Lx\n", |
| 347 | (unsigned long long) blkno, | 347 | (unsigned long long) blkno, |
| 348 | (unsigned long long) nblocks); | 348 | (unsigned long long) nblocks); |
| 349 | jfs_error(ip->i_sb, | 349 | jfs_error(ip->i_sb, "block to be freed is outside the map\n"); |
| 350 | "dbFree: block to be freed is outside the map"); | ||
| 351 | return -EIO; | 350 | return -EIO; |
| 352 | } | 351 | } |
| 353 | 352 | ||
| @@ -384,7 +383,7 @@ int dbFree(struct inode *ip, s64 blkno, s64 nblocks) | |||
| 384 | 383 | ||
| 385 | /* free the blocks. */ | 384 | /* free the blocks. */ |
| 386 | if ((rc = dbFreeDmap(bmp, dp, blkno, nb))) { | 385 | if ((rc = dbFreeDmap(bmp, dp, blkno, nb))) { |
| 387 | jfs_error(ip->i_sb, "dbFree: error in block map\n"); | 386 | jfs_error(ip->i_sb, "error in block map\n"); |
| 388 | release_metapage(mp); | 387 | release_metapage(mp); |
| 389 | IREAD_UNLOCK(ipbmap); | 388 | IREAD_UNLOCK(ipbmap); |
| 390 | return (rc); | 389 | return (rc); |
| @@ -441,8 +440,7 @@ dbUpdatePMap(struct inode *ipbmap, | |||
| 441 | printk(KERN_ERR "blkno = %Lx, nblocks = %Lx\n", | 440 | printk(KERN_ERR "blkno = %Lx, nblocks = %Lx\n", |
| 442 | (unsigned long long) blkno, | 441 | (unsigned long long) blkno, |
| 443 | (unsigned long long) nblocks); | 442 | (unsigned long long) nblocks); |
| 444 | jfs_error(ipbmap->i_sb, | 443 | jfs_error(ipbmap->i_sb, "blocks are outside the map\n"); |
| 445 | "dbUpdatePMap: blocks are outside the map"); | ||
| 446 | return -EIO; | 444 | return -EIO; |
| 447 | } | 445 | } |
| 448 | 446 | ||
| @@ -726,7 +724,7 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results) | |||
| 726 | 724 | ||
| 727 | /* the hint should be within the map */ | 725 | /* the hint should be within the map */ |
| 728 | if (hint >= mapSize) { | 726 | if (hint >= mapSize) { |
| 729 | jfs_error(ip->i_sb, "dbAlloc: the hint is outside the map"); | 727 | jfs_error(ip->i_sb, "the hint is outside the map\n"); |
| 730 | return -EIO; | 728 | return -EIO; |
| 731 | } | 729 | } |
| 732 | 730 | ||
| @@ -1057,8 +1055,7 @@ static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks) | |||
| 1057 | bmp = sbi->bmap; | 1055 | bmp = sbi->bmap; |
| 1058 | if (lastblkno < 0 || lastblkno >= bmp->db_mapsize) { | 1056 | if (lastblkno < 0 || lastblkno >= bmp->db_mapsize) { |
| 1059 | IREAD_UNLOCK(ipbmap); | 1057 | IREAD_UNLOCK(ipbmap); |
| 1060 | jfs_error(ip->i_sb, | 1058 | jfs_error(ip->i_sb, "the block is outside the filesystem\n"); |
| 1061 | "dbExtend: the block is outside the filesystem"); | ||
| 1062 | return -EIO; | 1059 | return -EIO; |
| 1063 | } | 1060 | } |
| 1064 | 1061 | ||
| @@ -1134,8 +1131,7 @@ static int dbAllocNext(struct bmap * bmp, struct dmap * dp, s64 blkno, | |||
| 1134 | u32 mask; | 1131 | u32 mask; |
| 1135 | 1132 | ||
| 1136 | if (dp->tree.leafidx != cpu_to_le32(LEAFIND)) { | 1133 | if (dp->tree.leafidx != cpu_to_le32(LEAFIND)) { |
| 1137 | jfs_error(bmp->db_ipbmap->i_sb, | 1134 | jfs_error(bmp->db_ipbmap->i_sb, "Corrupt dmap page\n"); |
| 1138 | "dbAllocNext: Corrupt dmap page"); | ||
| 1139 | return -EIO; | 1135 | return -EIO; |
| 1140 | } | 1136 | } |
| 1141 | 1137 | ||
| @@ -1265,8 +1261,7 @@ dbAllocNear(struct bmap * bmp, | |||
| 1265 | s8 *leaf; | 1261 | s8 *leaf; |
| 1266 | 1262 | ||
| 1267 | if (dp->tree.leafidx != cpu_to_le32(LEAFIND)) { | 1263 | if (dp->tree.leafidx != cpu_to_le32(LEAFIND)) { |
| 1268 | jfs_error(bmp->db_ipbmap->i_sb, | 1264 | jfs_error(bmp->db_ipbmap->i_sb, "Corrupt dmap page\n"); |
| 1269 | "dbAllocNear: Corrupt dmap page"); | ||
| 1270 | return -EIO; | 1265 | return -EIO; |
| 1271 | } | 1266 | } |
| 1272 | 1267 | ||
| @@ -1381,8 +1376,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) | |||
| 1381 | */ | 1376 | */ |
| 1382 | if (l2nb > bmp->db_agl2size) { | 1377 | if (l2nb > bmp->db_agl2size) { |
| 1383 | jfs_error(bmp->db_ipbmap->i_sb, | 1378 | jfs_error(bmp->db_ipbmap->i_sb, |
| 1384 | "dbAllocAG: allocation request is larger than the " | 1379 | "allocation request is larger than the allocation group size\n"); |
| 1385 | "allocation group size"); | ||
| 1386 | return -EIO; | 1380 | return -EIO; |
| 1387 | } | 1381 | } |
| 1388 | 1382 | ||
| @@ -1417,7 +1411,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) | |||
| 1417 | (unsigned long long) blkno, | 1411 | (unsigned long long) blkno, |
| 1418 | (unsigned long long) nblocks); | 1412 | (unsigned long long) nblocks); |
| 1419 | jfs_error(bmp->db_ipbmap->i_sb, | 1413 | jfs_error(bmp->db_ipbmap->i_sb, |
| 1420 | "dbAllocAG: dbAllocCtl failed in free AG"); | 1414 | "dbAllocCtl failed in free AG\n"); |
| 1421 | } | 1415 | } |
| 1422 | return (rc); | 1416 | return (rc); |
| 1423 | } | 1417 | } |
| @@ -1433,8 +1427,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) | |||
| 1433 | budmin = dcp->budmin; | 1427 | budmin = dcp->budmin; |
| 1434 | 1428 | ||
| 1435 | if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) { | 1429 | if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) { |
| 1436 | jfs_error(bmp->db_ipbmap->i_sb, | 1430 | jfs_error(bmp->db_ipbmap->i_sb, "Corrupt dmapctl page\n"); |
| 1437 | "dbAllocAG: Corrupt dmapctl page"); | ||
| 1438 | release_metapage(mp); | 1431 | release_metapage(mp); |
| 1439 | return -EIO; | 1432 | return -EIO; |
| 1440 | } | 1433 | } |
| @@ -1475,7 +1468,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) | |||
| 1475 | } | 1468 | } |
| 1476 | if (n == 4) { | 1469 | if (n == 4) { |
| 1477 | jfs_error(bmp->db_ipbmap->i_sb, | 1470 | jfs_error(bmp->db_ipbmap->i_sb, |
| 1478 | "dbAllocAG: failed descending stree"); | 1471 | "failed descending stree\n"); |
| 1479 | release_metapage(mp); | 1472 | release_metapage(mp); |
| 1480 | return -EIO; | 1473 | return -EIO; |
| 1481 | } | 1474 | } |
| @@ -1515,8 +1508,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) | |||
| 1515 | &blkno))) { | 1508 | &blkno))) { |
| 1516 | if (rc == -ENOSPC) { | 1509 | if (rc == -ENOSPC) { |
| 1517 | jfs_error(bmp->db_ipbmap->i_sb, | 1510 | jfs_error(bmp->db_ipbmap->i_sb, |
| 1518 | "dbAllocAG: control page " | 1511 | "control page inconsistent\n"); |
| 1519 | "inconsistent"); | ||
| 1520 | return -EIO; | 1512 | return -EIO; |
| 1521 | } | 1513 | } |
| 1522 | return (rc); | 1514 | return (rc); |
| @@ -1528,7 +1520,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) | |||
| 1528 | rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results); | 1520 | rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results); |
| 1529 | if (rc == -ENOSPC) { | 1521 | if (rc == -ENOSPC) { |
| 1530 | jfs_error(bmp->db_ipbmap->i_sb, | 1522 | jfs_error(bmp->db_ipbmap->i_sb, |
| 1531 | "dbAllocAG: unable to allocate blocks"); | 1523 | "unable to allocate blocks\n"); |
| 1532 | rc = -EIO; | 1524 | rc = -EIO; |
| 1533 | } | 1525 | } |
| 1534 | return (rc); | 1526 | return (rc); |
| @@ -1587,8 +1579,7 @@ static int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results) | |||
| 1587 | */ | 1579 | */ |
| 1588 | rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results); | 1580 | rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results); |
| 1589 | if (rc == -ENOSPC) { | 1581 | if (rc == -ENOSPC) { |
| 1590 | jfs_error(bmp->db_ipbmap->i_sb, | 1582 | jfs_error(bmp->db_ipbmap->i_sb, "unable to allocate blocks\n"); |
| 1591 | "dbAllocAny: unable to allocate blocks"); | ||
| 1592 | return -EIO; | 1583 | return -EIO; |
| 1593 | } | 1584 | } |
| 1594 | return (rc); | 1585 | return (rc); |
| @@ -1652,8 +1643,7 @@ s64 dbDiscardAG(struct inode *ip, int agno, s64 minlen) | |||
| 1652 | range_cnt = min_t(u64, max_ranges + 1, 32 * 1024); | 1643 | range_cnt = min_t(u64, max_ranges + 1, 32 * 1024); |
| 1653 | totrim = kmalloc(sizeof(struct range2trim) * range_cnt, GFP_NOFS); | 1644 | totrim = kmalloc(sizeof(struct range2trim) * range_cnt, GFP_NOFS); |
| 1654 | if (totrim == NULL) { | 1645 | if (totrim == NULL) { |
| 1655 | jfs_error(bmp->db_ipbmap->i_sb, | 1646 | jfs_error(bmp->db_ipbmap->i_sb, "no memory for trim array\n"); |
| 1656 | "dbDiscardAG: no memory for trim array"); | ||
| 1657 | IWRITE_UNLOCK(ipbmap); | 1647 | IWRITE_UNLOCK(ipbmap); |
| 1658 | return 0; | 1648 | return 0; |
| 1659 | } | 1649 | } |
| @@ -1682,8 +1672,7 @@ s64 dbDiscardAG(struct inode *ip, int agno, s64 minlen) | |||
| 1682 | nblocks = 1 << l2nb; | 1672 | nblocks = 1 << l2nb; |
| 1683 | } else { | 1673 | } else { |
| 1684 | /* Trim any already allocated blocks */ | 1674 | /* Trim any already allocated blocks */ |
| 1685 | jfs_error(bmp->db_ipbmap->i_sb, | 1675 | jfs_error(bmp->db_ipbmap->i_sb, "-EIO\n"); |
| 1686 | "dbDiscardAG: -EIO"); | ||
| 1687 | break; | 1676 | break; |
| 1688 | } | 1677 | } |
| 1689 | 1678 | ||
| @@ -1761,7 +1750,7 @@ static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno) | |||
| 1761 | 1750 | ||
| 1762 | if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) { | 1751 | if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) { |
| 1763 | jfs_error(bmp->db_ipbmap->i_sb, | 1752 | jfs_error(bmp->db_ipbmap->i_sb, |
| 1764 | "dbFindCtl: Corrupt dmapctl page"); | 1753 | "Corrupt dmapctl page\n"); |
| 1765 | release_metapage(mp); | 1754 | release_metapage(mp); |
| 1766 | return -EIO; | 1755 | return -EIO; |
| 1767 | } | 1756 | } |
| @@ -1782,7 +1771,7 @@ static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno) | |||
| 1782 | if (rc) { | 1771 | if (rc) { |
| 1783 | if (lev != level) { | 1772 | if (lev != level) { |
| 1784 | jfs_error(bmp->db_ipbmap->i_sb, | 1773 | jfs_error(bmp->db_ipbmap->i_sb, |
| 1785 | "dbFindCtl: dmap inconsistent"); | 1774 | "dmap inconsistent\n"); |
| 1786 | return -EIO; | 1775 | return -EIO; |
| 1787 | } | 1776 | } |
| 1788 | return -ENOSPC; | 1777 | return -ENOSPC; |
| @@ -1906,7 +1895,7 @@ dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, s64 * results) | |||
| 1906 | if (dp->tree.stree[ROOT] != L2BPERDMAP) { | 1895 | if (dp->tree.stree[ROOT] != L2BPERDMAP) { |
| 1907 | release_metapage(mp); | 1896 | release_metapage(mp); |
| 1908 | jfs_error(bmp->db_ipbmap->i_sb, | 1897 | jfs_error(bmp->db_ipbmap->i_sb, |
| 1909 | "dbAllocCtl: the dmap is not all free"); | 1898 | "the dmap is not all free\n"); |
| 1910 | rc = -EIO; | 1899 | rc = -EIO; |
| 1911 | goto backout; | 1900 | goto backout; |
| 1912 | } | 1901 | } |
| @@ -1953,7 +1942,7 @@ dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, s64 * results) | |||
| 1953 | * to indicate that we have leaked blocks. | 1942 | * to indicate that we have leaked blocks. |
| 1954 | */ | 1943 | */ |
| 1955 | jfs_error(bmp->db_ipbmap->i_sb, | 1944 | jfs_error(bmp->db_ipbmap->i_sb, |
| 1956 | "dbAllocCtl: I/O Error: Block Leakage."); | 1945 | "I/O Error: Block Leakage\n"); |
| 1957 | continue; | 1946 | continue; |
| 1958 | } | 1947 | } |
| 1959 | dp = (struct dmap *) mp->data; | 1948 | dp = (struct dmap *) mp->data; |
| @@ -1965,8 +1954,7 @@ dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, s64 * results) | |||
| 1965 | * to indicate that we have leaked blocks. | 1954 | * to indicate that we have leaked blocks. |
| 1966 | */ | 1955 | */ |
| 1967 | release_metapage(mp); | 1956 | release_metapage(mp); |
| 1968 | jfs_error(bmp->db_ipbmap->i_sb, | 1957 | jfs_error(bmp->db_ipbmap->i_sb, "Block Leakage\n"); |
| 1969 | "dbAllocCtl: Block Leakage."); | ||
| 1970 | continue; | 1958 | continue; |
| 1971 | } | 1959 | } |
| 1972 | 1960 | ||
| @@ -2263,8 +2251,7 @@ static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno, | |||
| 2263 | for (; nwords > 0; nwords -= nw) { | 2251 | for (; nwords > 0; nwords -= nw) { |
| 2264 | if (leaf[word] < BUDMIN) { | 2252 | if (leaf[word] < BUDMIN) { |
| 2265 | jfs_error(bmp->db_ipbmap->i_sb, | 2253 | jfs_error(bmp->db_ipbmap->i_sb, |
| 2266 | "dbAllocBits: leaf page " | 2254 | "leaf page corrupt\n"); |
| 2267 | "corrupt"); | ||
| 2268 | break; | 2255 | break; |
| 2269 | } | 2256 | } |
| 2270 | 2257 | ||
| @@ -2536,8 +2523,7 @@ dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level) | |||
| 2536 | dcp = (struct dmapctl *) mp->data; | 2523 | dcp = (struct dmapctl *) mp->data; |
| 2537 | 2524 | ||
| 2538 | if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) { | 2525 | if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) { |
| 2539 | jfs_error(bmp->db_ipbmap->i_sb, | 2526 | jfs_error(bmp->db_ipbmap->i_sb, "Corrupt dmapctl page\n"); |
| 2540 | "dbAdjCtl: Corrupt dmapctl page"); | ||
| 2541 | release_metapage(mp); | 2527 | release_metapage(mp); |
| 2542 | return -EIO; | 2528 | return -EIO; |
| 2543 | } | 2529 | } |
| @@ -2638,8 +2624,7 @@ dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level) | |||
| 2638 | assert(level == bmp->db_maxlevel); | 2624 | assert(level == bmp->db_maxlevel); |
| 2639 | if (bmp->db_maxfreebud != oldroot) { | 2625 | if (bmp->db_maxfreebud != oldroot) { |
| 2640 | jfs_error(bmp->db_ipbmap->i_sb, | 2626 | jfs_error(bmp->db_ipbmap->i_sb, |
| 2641 | "dbAdjCtl: the maximum free buddy is " | 2627 | "the maximum free buddy is not the old root\n"); |
| 2642 | "not the old root"); | ||
| 2643 | } | 2628 | } |
| 2644 | bmp->db_maxfreebud = dcp->stree[ROOT]; | 2629 | bmp->db_maxfreebud = dcp->stree[ROOT]; |
| 2645 | } | 2630 | } |
| @@ -3481,7 +3466,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks) | |||
| 3481 | p = BMAPBLKNO + nbperpage; /* L2 page */ | 3466 | p = BMAPBLKNO + nbperpage; /* L2 page */ |
| 3482 | l2mp = read_metapage(ipbmap, p, PSIZE, 0); | 3467 | l2mp = read_metapage(ipbmap, p, PSIZE, 0); |
| 3483 | if (!l2mp) { | 3468 | if (!l2mp) { |
| 3484 | jfs_error(ipbmap->i_sb, "dbExtendFS: L2 page could not be read"); | 3469 | jfs_error(ipbmap->i_sb, "L2 page could not be read\n"); |
| 3485 | return -EIO; | 3470 | return -EIO; |
| 3486 | } | 3471 | } |
| 3487 | l2dcp = (struct dmapctl *) l2mp->data; | 3472 | l2dcp = (struct dmapctl *) l2mp->data; |
| @@ -3646,8 +3631,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks) | |||
| 3646 | } | 3631 | } |
| 3647 | } /* for each L1 in a L2 */ | 3632 | } /* for each L1 in a L2 */ |
| 3648 | 3633 | ||
| 3649 | jfs_error(ipbmap->i_sb, | 3634 | jfs_error(ipbmap->i_sb, "function has not returned as expected\n"); |
| 3650 | "dbExtendFS: function has not returned as expected"); | ||
| 3651 | errout: | 3635 | errout: |
| 3652 | if (l0mp) | 3636 | if (l0mp) |
| 3653 | release_metapage(l0mp); | 3637 | release_metapage(l0mp); |
| @@ -3717,7 +3701,7 @@ void dbFinalizeBmap(struct inode *ipbmap) | |||
| 3717 | } | 3701 | } |
| 3718 | if (bmp->db_agpref >= bmp->db_numag) { | 3702 | if (bmp->db_agpref >= bmp->db_numag) { |
| 3719 | jfs_error(ipbmap->i_sb, | 3703 | jfs_error(ipbmap->i_sb, |
| 3720 | "cannot find ag with average freespace"); | 3704 | "cannot find ag with average freespace\n"); |
| 3721 | } | 3705 | } |
| 3722 | } | 3706 | } |
| 3723 | 3707 | ||
diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c index 9f4ed13d9f15..8743ba9c6742 100644 --- a/fs/jfs/jfs_dtree.c +++ b/fs/jfs/jfs_dtree.c | |||
| @@ -124,21 +124,21 @@ struct dtsplit { | |||
| 124 | #define DT_PAGE(IP, MP) BT_PAGE(IP, MP, dtpage_t, i_dtroot) | 124 | #define DT_PAGE(IP, MP) BT_PAGE(IP, MP, dtpage_t, i_dtroot) |
| 125 | 125 | ||
| 126 | /* get page buffer for specified block address */ | 126 | /* get page buffer for specified block address */ |
| 127 | #define DT_GETPAGE(IP, BN, MP, SIZE, P, RC)\ | 127 | #define DT_GETPAGE(IP, BN, MP, SIZE, P, RC) \ |
| 128 | {\ | 128 | do { \ |
| 129 | BT_GETPAGE(IP, BN, MP, dtpage_t, SIZE, P, RC, i_dtroot)\ | 129 | BT_GETPAGE(IP, BN, MP, dtpage_t, SIZE, P, RC, i_dtroot); \ |
| 130 | if (!(RC))\ | 130 | if (!(RC)) { \ |
| 131 | {\ | 131 | if (((P)->header.nextindex > \ |
| 132 | if (((P)->header.nextindex > (((BN)==0)?DTROOTMAXSLOT:(P)->header.maxslot)) ||\ | 132 | (((BN) == 0) ? DTROOTMAXSLOT : (P)->header.maxslot)) || \ |
| 133 | ((BN) && ((P)->header.maxslot > DTPAGEMAXSLOT)))\ | 133 | ((BN) && ((P)->header.maxslot > DTPAGEMAXSLOT))) { \ |
| 134 | {\ | 134 | BT_PUTPAGE(MP); \ |
| 135 | BT_PUTPAGE(MP);\ | 135 | jfs_error((IP)->i_sb, \ |
| 136 | jfs_error((IP)->i_sb, "DT_GETPAGE: dtree page corrupt");\ | 136 | "DT_GETPAGE: dtree page corrupt\n"); \ |
| 137 | MP = NULL;\ | 137 | MP = NULL; \ |
| 138 | RC = -EIO;\ | 138 | RC = -EIO; \ |
| 139 | }\ | 139 | } \ |
| 140 | }\ | 140 | } \ |
| 141 | } | 141 | } while (0) |
| 142 | 142 | ||
| 143 | /* for consistency */ | 143 | /* for consistency */ |
| 144 | #define DT_PUTPAGE(MP) BT_PUTPAGE(MP) | 144 | #define DT_PUTPAGE(MP) BT_PUTPAGE(MP) |
| @@ -776,7 +776,7 @@ int dtSearch(struct inode *ip, struct component_name * key, ino_t * data, | |||
| 776 | /* Something's corrupted, mark filesystem dirty so | 776 | /* Something's corrupted, mark filesystem dirty so |
| 777 | * chkdsk will fix it. | 777 | * chkdsk will fix it. |
| 778 | */ | 778 | */ |
| 779 | jfs_error(sb, "stack overrun in dtSearch!"); | 779 | jfs_error(sb, "stack overrun!\n"); |
| 780 | BT_STACK_DUMP(btstack); | 780 | BT_STACK_DUMP(btstack); |
| 781 | rc = -EIO; | 781 | rc = -EIO; |
| 782 | goto out; | 782 | goto out; |
| @@ -3247,8 +3247,7 @@ int jfs_readdir(struct file *file, struct dir_context *ctx) | |||
| 3247 | /* Sanity Check */ | 3247 | /* Sanity Check */ |
| 3248 | if (d_namleft == 0) { | 3248 | if (d_namleft == 0) { |
| 3249 | jfs_error(ip->i_sb, | 3249 | jfs_error(ip->i_sb, |
| 3250 | "JFS:Dtree error: ino = " | 3250 | "JFS:Dtree error: ino = %ld, bn=%lld, index = %d\n", |
| 3251 | "%ld, bn=%Ld, index = %d", | ||
| 3252 | (long)ip->i_ino, | 3251 | (long)ip->i_ino, |
| 3253 | (long long)bn, | 3252 | (long long)bn, |
| 3254 | i); | 3253 | i); |
| @@ -3368,7 +3367,7 @@ static int dtReadFirst(struct inode *ip, struct btstack * btstack) | |||
| 3368 | */ | 3367 | */ |
| 3369 | if (BT_STACK_FULL(btstack)) { | 3368 | if (BT_STACK_FULL(btstack)) { |
| 3370 | DT_PUTPAGE(mp); | 3369 | DT_PUTPAGE(mp); |
| 3371 | jfs_error(ip->i_sb, "dtReadFirst: btstack overrun"); | 3370 | jfs_error(ip->i_sb, "btstack overrun\n"); |
| 3372 | BT_STACK_DUMP(btstack); | 3371 | BT_STACK_DUMP(btstack); |
| 3373 | return -EIO; | 3372 | return -EIO; |
| 3374 | } | 3373 | } |
diff --git a/fs/jfs/jfs_extent.c b/fs/jfs/jfs_extent.c index e5fe8506ed16..2ae7d59ab10a 100644 --- a/fs/jfs/jfs_extent.c +++ b/fs/jfs/jfs_extent.c | |||
| @@ -388,7 +388,7 @@ int extHint(struct inode *ip, s64 offset, xad_t * xp) | |||
| 388 | 388 | ||
| 389 | if ((rc == 0) && xlen) { | 389 | if ((rc == 0) && xlen) { |
| 390 | if (xlen != nbperpage) { | 390 | if (xlen != nbperpage) { |
| 391 | jfs_error(ip->i_sb, "extHint: corrupt xtree"); | 391 | jfs_error(ip->i_sb, "corrupt xtree\n"); |
| 392 | rc = -EIO; | 392 | rc = -EIO; |
| 393 | } | 393 | } |
| 394 | XADaddress(xp, xaddr); | 394 | XADaddress(xp, xaddr); |
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c index f7e042b63ddb..f321986e73d2 100644 --- a/fs/jfs/jfs_imap.c +++ b/fs/jfs/jfs_imap.c | |||
| @@ -386,7 +386,7 @@ int diRead(struct inode *ip) | |||
| 386 | dp += rel_inode; | 386 | dp += rel_inode; |
| 387 | 387 | ||
| 388 | if (ip->i_ino != le32_to_cpu(dp->di_number)) { | 388 | if (ip->i_ino != le32_to_cpu(dp->di_number)) { |
| 389 | jfs_error(ip->i_sb, "diRead: i_ino != di_number"); | 389 | jfs_error(ip->i_sb, "i_ino != di_number\n"); |
| 390 | rc = -EIO; | 390 | rc = -EIO; |
| 391 | } else if (le32_to_cpu(dp->di_nlink) == 0) | 391 | } else if (le32_to_cpu(dp->di_nlink) == 0) |
| 392 | rc = -ESTALE; | 392 | rc = -ESTALE; |
| @@ -625,7 +625,7 @@ int diWrite(tid_t tid, struct inode *ip) | |||
| 625 | if (!addressPXD(&(jfs_ip->ixpxd)) || | 625 | if (!addressPXD(&(jfs_ip->ixpxd)) || |
| 626 | (lengthPXD(&(jfs_ip->ixpxd)) != | 626 | (lengthPXD(&(jfs_ip->ixpxd)) != |
| 627 | JFS_IP(ipimap)->i_imap->im_nbperiext)) { | 627 | JFS_IP(ipimap)->i_imap->im_nbperiext)) { |
| 628 | jfs_error(ip->i_sb, "diWrite: ixpxd invalid"); | 628 | jfs_error(ip->i_sb, "ixpxd invalid\n"); |
| 629 | return -EIO; | 629 | return -EIO; |
| 630 | } | 630 | } |
| 631 | 631 | ||
| @@ -893,8 +893,7 @@ int diFree(struct inode *ip) | |||
| 893 | if (iagno >= imap->im_nextiag) { | 893 | if (iagno >= imap->im_nextiag) { |
| 894 | print_hex_dump(KERN_ERR, "imap: ", DUMP_PREFIX_ADDRESS, 16, 4, | 894 | print_hex_dump(KERN_ERR, "imap: ", DUMP_PREFIX_ADDRESS, 16, 4, |
| 895 | imap, 32, 0); | 895 | imap, 32, 0); |
| 896 | jfs_error(ip->i_sb, | 896 | jfs_error(ip->i_sb, "inum = %d, iagno = %d, nextiag = %d\n", |
| 897 | "diFree: inum = %d, iagno = %d, nextiag = %d", | ||
| 898 | (uint) inum, iagno, imap->im_nextiag); | 897 | (uint) inum, iagno, imap->im_nextiag); |
| 899 | return -EIO; | 898 | return -EIO; |
| 900 | } | 899 | } |
| @@ -930,15 +929,14 @@ int diFree(struct inode *ip) | |||
| 930 | mask = HIGHORDER >> bitno; | 929 | mask = HIGHORDER >> bitno; |
| 931 | 930 | ||
| 932 | if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { | 931 | if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { |
| 933 | jfs_error(ip->i_sb, | 932 | jfs_error(ip->i_sb, "wmap shows inode already free\n"); |
| 934 | "diFree: wmap shows inode already free"); | ||
| 935 | } | 933 | } |
| 936 | 934 | ||
| 937 | if (!addressPXD(&iagp->inoext[extno])) { | 935 | if (!addressPXD(&iagp->inoext[extno])) { |
| 938 | release_metapage(mp); | 936 | release_metapage(mp); |
| 939 | IREAD_UNLOCK(ipimap); | 937 | IREAD_UNLOCK(ipimap); |
| 940 | AG_UNLOCK(imap, agno); | 938 | AG_UNLOCK(imap, agno); |
| 941 | jfs_error(ip->i_sb, "diFree: invalid inoext"); | 939 | jfs_error(ip->i_sb, "invalid inoext\n"); |
| 942 | return -EIO; | 940 | return -EIO; |
| 943 | } | 941 | } |
| 944 | 942 | ||
| @@ -950,7 +948,7 @@ int diFree(struct inode *ip) | |||
| 950 | release_metapage(mp); | 948 | release_metapage(mp); |
| 951 | IREAD_UNLOCK(ipimap); | 949 | IREAD_UNLOCK(ipimap); |
| 952 | AG_UNLOCK(imap, agno); | 950 | AG_UNLOCK(imap, agno); |
| 953 | jfs_error(ip->i_sb, "diFree: numfree > numinos"); | 951 | jfs_error(ip->i_sb, "numfree > numinos\n"); |
| 954 | return -EIO; | 952 | return -EIO; |
| 955 | } | 953 | } |
| 956 | /* | 954 | /* |
| @@ -1199,7 +1197,7 @@ int diFree(struct inode *ip) | |||
| 1199 | * for the inode being freed. | 1197 | * for the inode being freed. |
| 1200 | */ | 1198 | */ |
| 1201 | if (iagp->pmap[extno] != 0) { | 1199 | if (iagp->pmap[extno] != 0) { |
| 1202 | jfs_error(ip->i_sb, "diFree: the pmap does not show inode free"); | 1200 | jfs_error(ip->i_sb, "the pmap does not show inode free\n"); |
| 1203 | } | 1201 | } |
| 1204 | iagp->wmap[extno] = 0; | 1202 | iagp->wmap[extno] = 0; |
| 1205 | PXDlength(&iagp->inoext[extno], 0); | 1203 | PXDlength(&iagp->inoext[extno], 0); |
| @@ -1518,8 +1516,7 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip) | |||
| 1518 | release_metapage(mp); | 1516 | release_metapage(mp); |
| 1519 | AG_UNLOCK(imap, agno); | 1517 | AG_UNLOCK(imap, agno); |
| 1520 | jfs_error(ip->i_sb, | 1518 | jfs_error(ip->i_sb, |
| 1521 | "diAlloc: can't find free bit " | 1519 | "can't find free bit in wmap\n"); |
| 1522 | "in wmap"); | ||
| 1523 | return -EIO; | 1520 | return -EIO; |
| 1524 | } | 1521 | } |
| 1525 | 1522 | ||
| @@ -1660,7 +1657,7 @@ diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip) | |||
| 1660 | numinos = imap->im_agctl[agno].numinos; | 1657 | numinos = imap->im_agctl[agno].numinos; |
| 1661 | 1658 | ||
| 1662 | if (numfree > numinos) { | 1659 | if (numfree > numinos) { |
| 1663 | jfs_error(ip->i_sb, "diAllocAG: numfree > numinos"); | 1660 | jfs_error(ip->i_sb, "numfree > numinos\n"); |
| 1664 | return -EIO; | 1661 | return -EIO; |
| 1665 | } | 1662 | } |
| 1666 | 1663 | ||
| @@ -1811,8 +1808,7 @@ static int diAllocIno(struct inomap * imap, int agno, struct inode *ip) | |||
| 1811 | if (!iagp->nfreeinos) { | 1808 | if (!iagp->nfreeinos) { |
| 1812 | IREAD_UNLOCK(imap->im_ipimap); | 1809 | IREAD_UNLOCK(imap->im_ipimap); |
| 1813 | release_metapage(mp); | 1810 | release_metapage(mp); |
| 1814 | jfs_error(ip->i_sb, | 1811 | jfs_error(ip->i_sb, "nfreeinos = 0, but iag on freelist\n"); |
| 1815 | "diAllocIno: nfreeinos = 0, but iag on freelist"); | ||
| 1816 | return -EIO; | 1812 | return -EIO; |
| 1817 | } | 1813 | } |
| 1818 | 1814 | ||
| @@ -1824,7 +1820,7 @@ static int diAllocIno(struct inomap * imap, int agno, struct inode *ip) | |||
| 1824 | IREAD_UNLOCK(imap->im_ipimap); | 1820 | IREAD_UNLOCK(imap->im_ipimap); |
| 1825 | release_metapage(mp); | 1821 | release_metapage(mp); |
| 1826 | jfs_error(ip->i_sb, | 1822 | jfs_error(ip->i_sb, |
| 1827 | "diAllocIno: free inode not found in summary map"); | 1823 | "free inode not found in summary map\n"); |
| 1828 | return -EIO; | 1824 | return -EIO; |
| 1829 | } | 1825 | } |
| 1830 | 1826 | ||
| @@ -1839,7 +1835,7 @@ static int diAllocIno(struct inomap * imap, int agno, struct inode *ip) | |||
| 1839 | if (rem >= EXTSPERSUM) { | 1835 | if (rem >= EXTSPERSUM) { |
| 1840 | IREAD_UNLOCK(imap->im_ipimap); | 1836 | IREAD_UNLOCK(imap->im_ipimap); |
| 1841 | release_metapage(mp); | 1837 | release_metapage(mp); |
| 1842 | jfs_error(ip->i_sb, "diAllocIno: no free extent found"); | 1838 | jfs_error(ip->i_sb, "no free extent found\n"); |
| 1843 | return -EIO; | 1839 | return -EIO; |
| 1844 | } | 1840 | } |
| 1845 | extno = (sword << L2EXTSPERSUM) + rem; | 1841 | extno = (sword << L2EXTSPERSUM) + rem; |
| @@ -1850,7 +1846,7 @@ static int diAllocIno(struct inomap * imap, int agno, struct inode *ip) | |||
| 1850 | if (rem >= INOSPEREXT) { | 1846 | if (rem >= INOSPEREXT) { |
| 1851 | IREAD_UNLOCK(imap->im_ipimap); | 1847 | IREAD_UNLOCK(imap->im_ipimap); |
| 1852 | release_metapage(mp); | 1848 | release_metapage(mp); |
| 1853 | jfs_error(ip->i_sb, "diAllocIno: free inode not found"); | 1849 | jfs_error(ip->i_sb, "free inode not found\n"); |
| 1854 | return -EIO; | 1850 | return -EIO; |
| 1855 | } | 1851 | } |
| 1856 | 1852 | ||
| @@ -1936,7 +1932,7 @@ static int diAllocExt(struct inomap * imap, int agno, struct inode *ip) | |||
| 1936 | IREAD_LOCK(imap->im_ipimap, RDWRLOCK_IMAP); | 1932 | IREAD_LOCK(imap->im_ipimap, RDWRLOCK_IMAP); |
| 1937 | if ((rc = diIAGRead(imap, iagno, &mp))) { | 1933 | if ((rc = diIAGRead(imap, iagno, &mp))) { |
| 1938 | IREAD_UNLOCK(imap->im_ipimap); | 1934 | IREAD_UNLOCK(imap->im_ipimap); |
| 1939 | jfs_error(ip->i_sb, "diAllocExt: error reading iag"); | 1935 | jfs_error(ip->i_sb, "error reading iag\n"); |
| 1940 | return rc; | 1936 | return rc; |
| 1941 | } | 1937 | } |
| 1942 | iagp = (struct iag *) mp->data; | 1938 | iagp = (struct iag *) mp->data; |
| @@ -1948,8 +1944,7 @@ static int diAllocExt(struct inomap * imap, int agno, struct inode *ip) | |||
| 1948 | if (sword >= SMAPSZ) { | 1944 | if (sword >= SMAPSZ) { |
| 1949 | release_metapage(mp); | 1945 | release_metapage(mp); |
| 1950 | IREAD_UNLOCK(imap->im_ipimap); | 1946 | IREAD_UNLOCK(imap->im_ipimap); |
| 1951 | jfs_error(ip->i_sb, | 1947 | jfs_error(ip->i_sb, "free ext summary map not found\n"); |
| 1952 | "diAllocExt: free ext summary map not found"); | ||
| 1953 | return -EIO; | 1948 | return -EIO; |
| 1954 | } | 1949 | } |
| 1955 | if (~iagp->extsmap[sword]) | 1950 | if (~iagp->extsmap[sword]) |
| @@ -1962,7 +1957,7 @@ static int diAllocExt(struct inomap * imap, int agno, struct inode *ip) | |||
| 1962 | if (rem >= EXTSPERSUM) { | 1957 | if (rem >= EXTSPERSUM) { |
| 1963 | release_metapage(mp); | 1958 | release_metapage(mp); |
| 1964 | IREAD_UNLOCK(imap->im_ipimap); | 1959 | IREAD_UNLOCK(imap->im_ipimap); |
| 1965 | jfs_error(ip->i_sb, "diAllocExt: free extent not found"); | 1960 | jfs_error(ip->i_sb, "free extent not found\n"); |
| 1966 | return -EIO; | 1961 | return -EIO; |
| 1967 | } | 1962 | } |
| 1968 | extno = (sword << L2EXTSPERSUM) + rem; | 1963 | extno = (sword << L2EXTSPERSUM) + rem; |
| @@ -2081,8 +2076,7 @@ static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino) | |||
| 2081 | if (bmp) | 2076 | if (bmp) |
| 2082 | release_metapage(bmp); | 2077 | release_metapage(bmp); |
| 2083 | 2078 | ||
| 2084 | jfs_error(imap->im_ipimap->i_sb, | 2079 | jfs_error(imap->im_ipimap->i_sb, "iag inconsistent\n"); |
| 2085 | "diAllocBit: iag inconsistent"); | ||
| 2086 | return -EIO; | 2080 | return -EIO; |
| 2087 | } | 2081 | } |
| 2088 | 2082 | ||
| @@ -2189,7 +2183,7 @@ static int diNewExt(struct inomap * imap, struct iag * iagp, int extno) | |||
| 2189 | /* better have free extents. | 2183 | /* better have free extents. |
| 2190 | */ | 2184 | */ |
| 2191 | if (!iagp->nfreeexts) { | 2185 | if (!iagp->nfreeexts) { |
| 2192 | jfs_error(imap->im_ipimap->i_sb, "diNewExt: no free extents"); | 2186 | jfs_error(imap->im_ipimap->i_sb, "no free extents\n"); |
| 2193 | return -EIO; | 2187 | return -EIO; |
| 2194 | } | 2188 | } |
| 2195 | 2189 | ||
| @@ -2261,7 +2255,7 @@ static int diNewExt(struct inomap * imap, struct iag * iagp, int extno) | |||
| 2261 | } | 2255 | } |
| 2262 | if (ciagp == NULL) { | 2256 | if (ciagp == NULL) { |
| 2263 | jfs_error(imap->im_ipimap->i_sb, | 2257 | jfs_error(imap->im_ipimap->i_sb, |
| 2264 | "diNewExt: ciagp == NULL"); | 2258 | "ciagp == NULL\n"); |
| 2265 | rc = -EIO; | 2259 | rc = -EIO; |
| 2266 | goto error_out; | 2260 | goto error_out; |
| 2267 | } | 2261 | } |
| @@ -2498,7 +2492,7 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp) | |||
| 2498 | IWRITE_UNLOCK(ipimap); | 2492 | IWRITE_UNLOCK(ipimap); |
| 2499 | IAGFREE_UNLOCK(imap); | 2493 | IAGFREE_UNLOCK(imap); |
| 2500 | jfs_error(imap->im_ipimap->i_sb, | 2494 | jfs_error(imap->im_ipimap->i_sb, |
| 2501 | "diNewIAG: ipimap->i_size is wrong"); | 2495 | "ipimap->i_size is wrong\n"); |
| 2502 | return -EIO; | 2496 | return -EIO; |
| 2503 | } | 2497 | } |
| 2504 | 2498 | ||
| @@ -2758,8 +2752,7 @@ diUpdatePMap(struct inode *ipimap, | |||
| 2758 | iagno = INOTOIAG(inum); | 2752 | iagno = INOTOIAG(inum); |
| 2759 | /* make sure that the iag is contained within the map */ | 2753 | /* make sure that the iag is contained within the map */ |
| 2760 | if (iagno >= imap->im_nextiag) { | 2754 | if (iagno >= imap->im_nextiag) { |
| 2761 | jfs_error(ipimap->i_sb, | 2755 | jfs_error(ipimap->i_sb, "the iag is outside the map\n"); |
| 2762 | "diUpdatePMap: the iag is outside the map"); | ||
| 2763 | return -EIO; | 2756 | return -EIO; |
| 2764 | } | 2757 | } |
| 2765 | /* read the iag */ | 2758 | /* read the iag */ |
| @@ -2788,13 +2781,13 @@ diUpdatePMap(struct inode *ipimap, | |||
| 2788 | */ | 2781 | */ |
| 2789 | if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { | 2782 | if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { |
| 2790 | jfs_error(ipimap->i_sb, | 2783 | jfs_error(ipimap->i_sb, |
| 2791 | "diUpdatePMap: inode %ld not marked as " | 2784 | "inode %ld not marked as allocated in wmap!\n", |
| 2792 | "allocated in wmap!", inum); | 2785 | inum); |
| 2793 | } | 2786 | } |
| 2794 | if (!(le32_to_cpu(iagp->pmap[extno]) & mask)) { | 2787 | if (!(le32_to_cpu(iagp->pmap[extno]) & mask)) { |
| 2795 | jfs_error(ipimap->i_sb, | 2788 | jfs_error(ipimap->i_sb, |
| 2796 | "diUpdatePMap: inode %ld not marked as " | 2789 | "inode %ld not marked as allocated in pmap!\n", |
| 2797 | "allocated in pmap!", inum); | 2790 | inum); |
| 2798 | } | 2791 | } |
| 2799 | /* update the bitmap for the extent of the freed inode */ | 2792 | /* update the bitmap for the extent of the freed inode */ |
| 2800 | iagp->pmap[extno] &= cpu_to_le32(~mask); | 2793 | iagp->pmap[extno] &= cpu_to_le32(~mask); |
| @@ -2809,15 +2802,13 @@ diUpdatePMap(struct inode *ipimap, | |||
| 2809 | if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { | 2802 | if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { |
| 2810 | release_metapage(mp); | 2803 | release_metapage(mp); |
| 2811 | jfs_error(ipimap->i_sb, | 2804 | jfs_error(ipimap->i_sb, |
| 2812 | "diUpdatePMap: the inode is not allocated in " | 2805 | "the inode is not allocated in the working map\n"); |
| 2813 | "the working map"); | ||
| 2814 | return -EIO; | 2806 | return -EIO; |
| 2815 | } | 2807 | } |
| 2816 | if ((le32_to_cpu(iagp->pmap[extno]) & mask) != 0) { | 2808 | if ((le32_to_cpu(iagp->pmap[extno]) & mask) != 0) { |
| 2817 | release_metapage(mp); | 2809 | release_metapage(mp); |
| 2818 | jfs_error(ipimap->i_sb, | 2810 | jfs_error(ipimap->i_sb, |
| 2819 | "diUpdatePMap: the inode is not free in the " | 2811 | "the inode is not free in the persistent map\n"); |
| 2820 | "persistent map"); | ||
| 2821 | return -EIO; | 2812 | return -EIO; |
| 2822 | } | 2813 | } |
| 2823 | /* update the bitmap for the extent of the allocated inode */ | 2814 | /* update the bitmap for the extent of the allocated inode */ |
| @@ -2909,8 +2900,7 @@ int diExtendFS(struct inode *ipimap, struct inode *ipbmap) | |||
| 2909 | iagp = (struct iag *) bp->data; | 2900 | iagp = (struct iag *) bp->data; |
| 2910 | if (le32_to_cpu(iagp->iagnum) != i) { | 2901 | if (le32_to_cpu(iagp->iagnum) != i) { |
| 2911 | release_metapage(bp); | 2902 | release_metapage(bp); |
| 2912 | jfs_error(ipimap->i_sb, | 2903 | jfs_error(ipimap->i_sb, "unexpected value of iagnum\n"); |
| 2913 | "diExtendFs: unexpected value of iagnum"); | ||
| 2914 | return -EIO; | 2904 | return -EIO; |
| 2915 | } | 2905 | } |
| 2916 | 2906 | ||
| @@ -2986,8 +2976,7 @@ int diExtendFS(struct inode *ipimap, struct inode *ipbmap) | |||
| 2986 | 2976 | ||
| 2987 | if (xnuminos != atomic_read(&imap->im_numinos) || | 2977 | if (xnuminos != atomic_read(&imap->im_numinos) || |
| 2988 | xnumfree != atomic_read(&imap->im_numfree)) { | 2978 | xnumfree != atomic_read(&imap->im_numfree)) { |
| 2989 | jfs_error(ipimap->i_sb, | 2979 | jfs_error(ipimap->i_sb, "numinos or numfree incorrect\n"); |
| 2990 | "diExtendFs: numinos or numfree incorrect"); | ||
| 2991 | return -EIO; | 2980 | return -EIO; |
| 2992 | } | 2981 | } |
| 2993 | 2982 | ||
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index 9e3aaff11f89..d165cde0c68d 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c | |||
| @@ -647,7 +647,7 @@ struct metapage *__get_metapage(struct inode *inode, unsigned long lblock, | |||
| 647 | if (mp) { | 647 | if (mp) { |
| 648 | if (mp->logical_size != size) { | 648 | if (mp->logical_size != size) { |
| 649 | jfs_error(inode->i_sb, | 649 | jfs_error(inode->i_sb, |
| 650 | "__get_metapage: mp->logical_size != size"); | 650 | "get_mp->logical_size != size\n"); |
| 651 | jfs_err("logical_size = %d, size = %d", | 651 | jfs_err("logical_size = %d, size = %d", |
| 652 | mp->logical_size, size); | 652 | mp->logical_size, size); |
| 653 | dump_stack(); | 653 | dump_stack(); |
| @@ -658,8 +658,7 @@ struct metapage *__get_metapage(struct inode *inode, unsigned long lblock, | |||
| 658 | if (test_bit(META_discard, &mp->flag)) { | 658 | if (test_bit(META_discard, &mp->flag)) { |
| 659 | if (!new) { | 659 | if (!new) { |
| 660 | jfs_error(inode->i_sb, | 660 | jfs_error(inode->i_sb, |
| 661 | "__get_metapage: using a " | 661 | "using a discarded metapage\n"); |
| 662 | "discarded metapage"); | ||
| 663 | discard_metapage(mp); | 662 | discard_metapage(mp); |
| 664 | goto unlock; | 663 | goto unlock; |
| 665 | } | 664 | } |
diff --git a/fs/jfs/jfs_superblock.h b/fs/jfs/jfs_superblock.h index 884fc21ab8ee..04847b8d3070 100644 --- a/fs/jfs/jfs_superblock.h +++ b/fs/jfs/jfs_superblock.h | |||
| @@ -108,6 +108,7 @@ struct jfs_superblock { | |||
| 108 | 108 | ||
| 109 | extern int readSuper(struct super_block *, struct buffer_head **); | 109 | extern int readSuper(struct super_block *, struct buffer_head **); |
| 110 | extern int updateSuper(struct super_block *, uint); | 110 | extern int updateSuper(struct super_block *, uint); |
| 111 | __printf(2, 3) | ||
| 111 | extern void jfs_error(struct super_block *, const char *, ...); | 112 | extern void jfs_error(struct super_block *, const char *, ...); |
| 112 | extern int jfs_mount(struct super_block *); | 113 | extern int jfs_mount(struct super_block *); |
| 113 | extern int jfs_mount_rw(struct super_block *, int); | 114 | extern int jfs_mount_rw(struct super_block *, int); |
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c index 5fcc02eaa64c..564c4f279ac6 100644 --- a/fs/jfs/jfs_txnmgr.c +++ b/fs/jfs/jfs_txnmgr.c | |||
| @@ -2684,7 +2684,7 @@ void txAbort(tid_t tid, int dirty) | |||
| 2684 | * mark filesystem dirty | 2684 | * mark filesystem dirty |
| 2685 | */ | 2685 | */ |
| 2686 | if (dirty) | 2686 | if (dirty) |
| 2687 | jfs_error(tblk->sb, "txAbort"); | 2687 | jfs_error(tblk->sb, "\n"); |
| 2688 | 2688 | ||
| 2689 | return; | 2689 | return; |
| 2690 | } | 2690 | } |
diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c index 6c50871e6220..5ad7748860ce 100644 --- a/fs/jfs/jfs_xtree.c +++ b/fs/jfs/jfs_xtree.c | |||
| @@ -64,22 +64,23 @@ | |||
| 64 | 64 | ||
| 65 | /* get page buffer for specified block address */ | 65 | /* get page buffer for specified block address */ |
| 66 | /* ToDo: Replace this ugly macro with a function */ | 66 | /* ToDo: Replace this ugly macro with a function */ |
| 67 | #define XT_GETPAGE(IP, BN, MP, SIZE, P, RC)\ | 67 | #define XT_GETPAGE(IP, BN, MP, SIZE, P, RC) \ |
| 68 | {\ | 68 | do { \ |
| 69 | BT_GETPAGE(IP, BN, MP, xtpage_t, SIZE, P, RC, i_xtroot)\ | 69 | BT_GETPAGE(IP, BN, MP, xtpage_t, SIZE, P, RC, i_xtroot); \ |
| 70 | if (!(RC))\ | 70 | if (!(RC)) { \ |
| 71 | {\ | 71 | if ((le16_to_cpu((P)->header.nextindex) < XTENTRYSTART) || \ |
| 72 | if ((le16_to_cpu((P)->header.nextindex) < XTENTRYSTART) ||\ | 72 | (le16_to_cpu((P)->header.nextindex) > \ |
| 73 | (le16_to_cpu((P)->header.nextindex) > le16_to_cpu((P)->header.maxentry)) ||\ | 73 | le16_to_cpu((P)->header.maxentry)) || \ |
| 74 | (le16_to_cpu((P)->header.maxentry) > (((BN)==0)?XTROOTMAXSLOT:PSIZE>>L2XTSLOTSIZE)))\ | 74 | (le16_to_cpu((P)->header.maxentry) > \ |
| 75 | {\ | 75 | (((BN) == 0) ? XTROOTMAXSLOT : PSIZE >> L2XTSLOTSIZE))) { \ |
| 76 | jfs_error((IP)->i_sb, "XT_GETPAGE: xtree page corrupt");\ | 76 | jfs_error((IP)->i_sb, \ |
| 77 | BT_PUTPAGE(MP);\ | 77 | "XT_GETPAGE: xtree page corrupt\n"); \ |
| 78 | MP = NULL;\ | 78 | BT_PUTPAGE(MP); \ |
| 79 | RC = -EIO;\ | 79 | MP = NULL; \ |
| 80 | }\ | 80 | RC = -EIO; \ |
| 81 | }\ | 81 | } \ |
| 82 | } | 82 | } \ |
| 83 | } while (0) | ||
| 83 | 84 | ||
| 84 | /* for consistency */ | 85 | /* for consistency */ |
| 85 | #define XT_PUTPAGE(MP) BT_PUTPAGE(MP) | 86 | #define XT_PUTPAGE(MP) BT_PUTPAGE(MP) |
| @@ -499,7 +500,7 @@ static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp, | |||
| 499 | 500 | ||
| 500 | /* push (bn, index) of the parent page/entry */ | 501 | /* push (bn, index) of the parent page/entry */ |
| 501 | if (BT_STACK_FULL(btstack)) { | 502 | if (BT_STACK_FULL(btstack)) { |
| 502 | jfs_error(ip->i_sb, "stack overrun in xtSearch!"); | 503 | jfs_error(ip->i_sb, "stack overrun!\n"); |
| 503 | XT_PUTPAGE(mp); | 504 | XT_PUTPAGE(mp); |
| 504 | return -EIO; | 505 | return -EIO; |
| 505 | } | 506 | } |
| @@ -1385,7 +1386,7 @@ int xtExtend(tid_t tid, /* transaction id */ | |||
| 1385 | 1386 | ||
| 1386 | if (cmp != 0) { | 1387 | if (cmp != 0) { |
| 1387 | XT_PUTPAGE(mp); | 1388 | XT_PUTPAGE(mp); |
| 1388 | jfs_error(ip->i_sb, "xtExtend: xtSearch did not find extent"); | 1389 | jfs_error(ip->i_sb, "xtSearch did not find extent\n"); |
| 1389 | return -EIO; | 1390 | return -EIO; |
| 1390 | } | 1391 | } |
| 1391 | 1392 | ||
| @@ -1393,7 +1394,7 @@ int xtExtend(tid_t tid, /* transaction id */ | |||
| 1393 | xad = &p->xad[index]; | 1394 | xad = &p->xad[index]; |
| 1394 | if ((offsetXAD(xad) + lengthXAD(xad)) != xoff) { | 1395 | if ((offsetXAD(xad) + lengthXAD(xad)) != xoff) { |
| 1395 | XT_PUTPAGE(mp); | 1396 | XT_PUTPAGE(mp); |
| 1396 | jfs_error(ip->i_sb, "xtExtend: extension is not contiguous"); | 1397 | jfs_error(ip->i_sb, "extension is not contiguous\n"); |
| 1397 | return -EIO; | 1398 | return -EIO; |
| 1398 | } | 1399 | } |
| 1399 | 1400 | ||
| @@ -1552,7 +1553,7 @@ printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n", | |||
| 1552 | 1553 | ||
| 1553 | if (cmp != 0) { | 1554 | if (cmp != 0) { |
| 1554 | XT_PUTPAGE(mp); | 1555 | XT_PUTPAGE(mp); |
| 1555 | jfs_error(ip->i_sb, "xtTailgate: couldn't find extent"); | 1556 | jfs_error(ip->i_sb, "couldn't find extent\n"); |
| 1556 | return -EIO; | 1557 | return -EIO; |
| 1557 | } | 1558 | } |
| 1558 | 1559 | ||
| @@ -1560,8 +1561,7 @@ printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n", | |||
| 1560 | nextindex = le16_to_cpu(p->header.nextindex); | 1561 | nextindex = le16_to_cpu(p->header.nextindex); |
| 1561 | if (index != nextindex - 1) { | 1562 | if (index != nextindex - 1) { |
| 1562 | XT_PUTPAGE(mp); | 1563 | XT_PUTPAGE(mp); |
| 1563 | jfs_error(ip->i_sb, | 1564 | jfs_error(ip->i_sb, "the entry found is not the last entry\n"); |
| 1564 | "xtTailgate: the entry found is not the last entry"); | ||
| 1565 | return -EIO; | 1565 | return -EIO; |
| 1566 | } | 1566 | } |
| 1567 | 1567 | ||
| @@ -1734,7 +1734,7 @@ int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad) | |||
| 1734 | 1734 | ||
| 1735 | if (cmp != 0) { | 1735 | if (cmp != 0) { |
| 1736 | XT_PUTPAGE(mp); | 1736 | XT_PUTPAGE(mp); |
| 1737 | jfs_error(ip->i_sb, "xtUpdate: Could not find extent"); | 1737 | jfs_error(ip->i_sb, "Could not find extent\n"); |
| 1738 | return -EIO; | 1738 | return -EIO; |
| 1739 | } | 1739 | } |
| 1740 | 1740 | ||
| @@ -1758,7 +1758,7 @@ int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad) | |||
| 1758 | (nxoff + nxlen > xoff + xlen)) { | 1758 | (nxoff + nxlen > xoff + xlen)) { |
| 1759 | XT_PUTPAGE(mp); | 1759 | XT_PUTPAGE(mp); |
| 1760 | jfs_error(ip->i_sb, | 1760 | jfs_error(ip->i_sb, |
| 1761 | "xtUpdate: nXAD in not completely contained within XAD"); | 1761 | "nXAD in not completely contained within XAD\n"); |
| 1762 | return -EIO; | 1762 | return -EIO; |
| 1763 | } | 1763 | } |
| 1764 | 1764 | ||
| @@ -1907,7 +1907,7 @@ int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad) | |||
| 1907 | 1907 | ||
| 1908 | if (xoff >= nxoff) { | 1908 | if (xoff >= nxoff) { |
| 1909 | XT_PUTPAGE(mp); | 1909 | XT_PUTPAGE(mp); |
| 1910 | jfs_error(ip->i_sb, "xtUpdate: xoff >= nxoff"); | 1910 | jfs_error(ip->i_sb, "xoff >= nxoff\n"); |
| 1911 | return -EIO; | 1911 | return -EIO; |
| 1912 | } | 1912 | } |
| 1913 | /* #endif _JFS_WIP_COALESCE */ | 1913 | /* #endif _JFS_WIP_COALESCE */ |
| @@ -2048,14 +2048,13 @@ int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad) | |||
| 2048 | 2048 | ||
| 2049 | if (cmp != 0) { | 2049 | if (cmp != 0) { |
| 2050 | XT_PUTPAGE(mp); | 2050 | XT_PUTPAGE(mp); |
| 2051 | jfs_error(ip->i_sb, "xtUpdate: xtSearch failed"); | 2051 | jfs_error(ip->i_sb, "xtSearch failed\n"); |
| 2052 | return -EIO; | 2052 | return -EIO; |
| 2053 | } | 2053 | } |
| 2054 | 2054 | ||
| 2055 | if (index0 != index) { | 2055 | if (index0 != index) { |
| 2056 | XT_PUTPAGE(mp); | 2056 | XT_PUTPAGE(mp); |
| 2057 | jfs_error(ip->i_sb, | 2057 | jfs_error(ip->i_sb, "unexpected value of index\n"); |
| 2058 | "xtUpdate: unexpected value of index"); | ||
| 2059 | return -EIO; | 2058 | return -EIO; |
| 2060 | } | 2059 | } |
| 2061 | } | 2060 | } |
| @@ -3650,7 +3649,7 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag) | |||
| 3650 | getChild: | 3649 | getChild: |
| 3651 | /* save current parent entry for the child page */ | 3650 | /* save current parent entry for the child page */ |
| 3652 | if (BT_STACK_FULL(&btstack)) { | 3651 | if (BT_STACK_FULL(&btstack)) { |
| 3653 | jfs_error(ip->i_sb, "stack overrun in xtTruncate!"); | 3652 | jfs_error(ip->i_sb, "stack overrun!\n"); |
| 3654 | XT_PUTPAGE(mp); | 3653 | XT_PUTPAGE(mp); |
| 3655 | return -EIO; | 3654 | return -EIO; |
| 3656 | } | 3655 | } |
| @@ -3751,8 +3750,7 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size) | |||
| 3751 | 3750 | ||
| 3752 | if (cmp != 0) { | 3751 | if (cmp != 0) { |
| 3753 | XT_PUTPAGE(mp); | 3752 | XT_PUTPAGE(mp); |
| 3754 | jfs_error(ip->i_sb, | 3753 | jfs_error(ip->i_sb, "did not find extent\n"); |
| 3755 | "xtTruncate_pmap: did not find extent"); | ||
| 3756 | return -EIO; | 3754 | return -EIO; |
| 3757 | } | 3755 | } |
| 3758 | } else { | 3756 | } else { |
| @@ -3851,7 +3849,7 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size) | |||
| 3851 | getChild: | 3849 | getChild: |
| 3852 | /* save current parent entry for the child page */ | 3850 | /* save current parent entry for the child page */ |
| 3853 | if (BT_STACK_FULL(&btstack)) { | 3851 | if (BT_STACK_FULL(&btstack)) { |
| 3854 | jfs_error(ip->i_sb, "stack overrun in xtTruncate_pmap!"); | 3852 | jfs_error(ip->i_sb, "stack overrun!\n"); |
| 3855 | XT_PUTPAGE(mp); | 3853 | XT_PUTPAGE(mp); |
| 3856 | return -EIO; | 3854 | return -EIO; |
| 3857 | } | 3855 | } |
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 89186b7b9002..aa8a3370631b 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c | |||
| @@ -1176,7 +1176,7 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 1176 | if (!S_ISDIR(old_ip->i_mode) && new_ip) | 1176 | if (!S_ISDIR(old_ip->i_mode) && new_ip) |
| 1177 | IWRITE_UNLOCK(new_ip); | 1177 | IWRITE_UNLOCK(new_ip); |
| 1178 | jfs_error(new_ip->i_sb, | 1178 | jfs_error(new_ip->i_sb, |
| 1179 | "jfs_rename: new_ip->i_nlink != 0"); | 1179 | "new_ip->i_nlink != 0\n"); |
| 1180 | return -EIO; | 1180 | return -EIO; |
| 1181 | } | 1181 | } |
| 1182 | tblk = tid_to_tblock(tid); | 1182 | tblk = tid_to_tblock(tid); |
| @@ -1538,8 +1538,7 @@ const struct file_operations jfs_dir_operations = { | |||
| 1538 | .llseek = generic_file_llseek, | 1538 | .llseek = generic_file_llseek, |
| 1539 | }; | 1539 | }; |
| 1540 | 1540 | ||
| 1541 | static int jfs_ci_hash(const struct dentry *dir, const struct inode *inode, | 1541 | static int jfs_ci_hash(const struct dentry *dir, struct qstr *this) |
| 1542 | struct qstr *this) | ||
| 1543 | { | 1542 | { |
| 1544 | unsigned long hash; | 1543 | unsigned long hash; |
| 1545 | int i; | 1544 | int i; |
| @@ -1552,9 +1551,7 @@ static int jfs_ci_hash(const struct dentry *dir, const struct inode *inode, | |||
| 1552 | return 0; | 1551 | return 0; |
| 1553 | } | 1552 | } |
| 1554 | 1553 | ||
| 1555 | static int jfs_ci_compare(const struct dentry *parent, | 1554 | static int jfs_ci_compare(const struct dentry *parent, const struct dentry *dentry, |
| 1556 | const struct inode *pinode, | ||
| 1557 | const struct dentry *dentry, const struct inode *inode, | ||
| 1558 | unsigned int len, const char *str, const struct qstr *name) | 1555 | unsigned int len, const char *str, const struct qstr *name) |
| 1559 | { | 1556 | { |
| 1560 | int i, result = 1; | 1557 | int i, result = 1; |
diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c index 8d0c1c7c0820..90b3bc21e9b0 100644 --- a/fs/jfs/resize.c +++ b/fs/jfs/resize.c | |||
| @@ -530,7 +530,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) | |||
| 530 | goto resume; | 530 | goto resume; |
| 531 | 531 | ||
| 532 | error_out: | 532 | error_out: |
| 533 | jfs_error(sb, "jfs_extendfs"); | 533 | jfs_error(sb, "\n"); |
| 534 | 534 | ||
| 535 | resume: | 535 | resume: |
| 536 | /* | 536 | /* |
diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 788e0a9c1fb0..6669aa2042c3 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c | |||
| @@ -92,16 +92,20 @@ static void jfs_handle_error(struct super_block *sb) | |||
| 92 | /* nothing is done for continue beyond marking the superblock dirty */ | 92 | /* nothing is done for continue beyond marking the superblock dirty */ |
| 93 | } | 93 | } |
| 94 | 94 | ||
| 95 | void jfs_error(struct super_block *sb, const char * function, ...) | 95 | void jfs_error(struct super_block *sb, const char *fmt, ...) |
| 96 | { | 96 | { |
| 97 | static char error_buf[256]; | 97 | struct va_format vaf; |
| 98 | va_list args; | 98 | va_list args; |
| 99 | 99 | ||
| 100 | va_start(args, function); | 100 | va_start(args, fmt); |
| 101 | vsnprintf(error_buf, sizeof(error_buf), function, args); | 101 | |
| 102 | va_end(args); | 102 | vaf.fmt = fmt; |
| 103 | vaf.va = &args; | ||
| 103 | 104 | ||
| 104 | pr_err("ERROR: (device %s): %s\n", sb->s_id, error_buf); | 105 | pr_err("ERROR: (device %s): %pf: %pV\n", |
| 106 | sb->s_id, __builtin_return_address(0), &vaf); | ||
| 107 | |||
| 108 | va_end(args); | ||
| 105 | 109 | ||
| 106 | jfs_handle_error(sb); | 110 | jfs_handle_error(sb); |
| 107 | } | 111 | } |
| @@ -617,7 +621,7 @@ static int jfs_freeze(struct super_block *sb) | |||
| 617 | txQuiesce(sb); | 621 | txQuiesce(sb); |
| 618 | rc = lmLogShutdown(log); | 622 | rc = lmLogShutdown(log); |
| 619 | if (rc) { | 623 | if (rc) { |
| 620 | jfs_error(sb, "jfs_freeze: lmLogShutdown failed"); | 624 | jfs_error(sb, "lmLogShutdown failed\n"); |
| 621 | 625 | ||
| 622 | /* let operations fail rather than hang */ | 626 | /* let operations fail rather than hang */ |
| 623 | txResume(sb); | 627 | txResume(sb); |
| @@ -646,12 +650,12 @@ static int jfs_unfreeze(struct super_block *sb) | |||
| 646 | if (!(sb->s_flags & MS_RDONLY)) { | 650 | if (!(sb->s_flags & MS_RDONLY)) { |
| 647 | rc = updateSuper(sb, FM_MOUNT); | 651 | rc = updateSuper(sb, FM_MOUNT); |
| 648 | if (rc) { | 652 | if (rc) { |
| 649 | jfs_error(sb, "jfs_unfreeze: updateSuper failed"); | 653 | jfs_error(sb, "updateSuper failed\n"); |
| 650 | goto out; | 654 | goto out; |
| 651 | } | 655 | } |
| 652 | rc = lmLogInit(log); | 656 | rc = lmLogInit(log); |
| 653 | if (rc) | 657 | if (rc) |
| 654 | jfs_error(sb, "jfs_unfreeze: lmLogInit failed"); | 658 | jfs_error(sb, "lmLogInit failed\n"); |
| 655 | out: | 659 | out: |
| 656 | txResume(sb); | 660 | txResume(sb); |
| 657 | } | 661 | } |
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c index 42d67f9757bf..d3472f4cd530 100644 --- a/fs/jfs/xattr.c +++ b/fs/jfs/xattr.c | |||
| @@ -382,7 +382,7 @@ static int ea_read(struct inode *ip, struct jfs_ea_list *ealist) | |||
| 382 | 382 | ||
| 383 | nbytes = sizeDXD(&ji->ea); | 383 | nbytes = sizeDXD(&ji->ea); |
| 384 | if (!nbytes) { | 384 | if (!nbytes) { |
| 385 | jfs_error(sb, "ea_read: nbytes is 0"); | 385 | jfs_error(sb, "nbytes is 0\n"); |
| 386 | return -EIO; | 386 | return -EIO; |
| 387 | } | 387 | } |
| 388 | 388 | ||
| @@ -482,7 +482,7 @@ static int ea_get(struct inode *inode, struct ea_buffer *ea_buf, int min_size) | |||
| 482 | current_blocks = 0; | 482 | current_blocks = 0; |
| 483 | } else { | 483 | } else { |
| 484 | if (!(ji->ea.flag & DXD_EXTENT)) { | 484 | if (!(ji->ea.flag & DXD_EXTENT)) { |
| 485 | jfs_error(sb, "ea_get: invalid ea.flag)"); | 485 | jfs_error(sb, "invalid ea.flag\n"); |
| 486 | return -EIO; | 486 | return -EIO; |
| 487 | } | 487 | } |
| 488 | current_blocks = (ea_size + sb->s_blocksize - 1) >> | 488 | current_blocks = (ea_size + sb->s_blocksize - 1) >> |
| @@ -1089,8 +1089,8 @@ int jfs_removexattr(struct dentry *dentry, const char *name) | |||
| 1089 | } | 1089 | } |
| 1090 | 1090 | ||
| 1091 | #ifdef CONFIG_JFS_SECURITY | 1091 | #ifdef CONFIG_JFS_SECURITY |
| 1092 | int jfs_initxattrs(struct inode *inode, const struct xattr *xattr_array, | 1092 | static int jfs_initxattrs(struct inode *inode, const struct xattr *xattr_array, |
| 1093 | void *fs_info) | 1093 | void *fs_info) |
| 1094 | { | 1094 | { |
| 1095 | const struct xattr *xattr; | 1095 | const struct xattr *xattr; |
| 1096 | tid_t *tid = fs_info; | 1096 | tid_t *tid = fs_info; |
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index a2aa97d45670..10d6c41aecad 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c | |||
| @@ -305,7 +305,7 @@ static int lockd_start_svc(struct svc_serv *serv) | |||
| 305 | svc_sock_update_bufs(serv); | 305 | svc_sock_update_bufs(serv); |
| 306 | serv->sv_maxconn = nlm_max_connections; | 306 | serv->sv_maxconn = nlm_max_connections; |
| 307 | 307 | ||
| 308 | nlmsvc_task = kthread_run(lockd, nlmsvc_rqst, serv->sv_name); | 308 | nlmsvc_task = kthread_run(lockd, nlmsvc_rqst, "%s", serv->sv_name); |
| 309 | if (IS_ERR(nlmsvc_task)) { | 309 | if (IS_ERR(nlmsvc_task)) { |
| 310 | error = PTR_ERR(nlmsvc_task); | 310 | error = PTR_ERR(nlmsvc_task); |
| 311 | printk(KERN_WARNING | 311 | printk(KERN_WARNING |
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index e703318c41df..067778b0ccc9 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c | |||
| @@ -276,7 +276,7 @@ static int nlmsvc_unlink_block(struct nlm_block *block) | |||
| 276 | dprintk("lockd: unlinking block %p...\n", block); | 276 | dprintk("lockd: unlinking block %p...\n", block); |
| 277 | 277 | ||
| 278 | /* Remove block from list */ | 278 | /* Remove block from list */ |
| 279 | status = posix_unblock_lock(block->b_file->f_file, &block->b_call->a_args.lock.fl); | 279 | status = posix_unblock_lock(&block->b_call->a_args.lock.fl); |
| 280 | nlmsvc_remove_block(block); | 280 | nlmsvc_remove_block(block); |
| 281 | return status; | 281 | return status; |
| 282 | } | 282 | } |
| @@ -744,8 +744,20 @@ static int nlmsvc_same_owner(struct file_lock *fl1, struct file_lock *fl2) | |||
| 744 | return fl1->fl_owner == fl2->fl_owner && fl1->fl_pid == fl2->fl_pid; | 744 | return fl1->fl_owner == fl2->fl_owner && fl1->fl_pid == fl2->fl_pid; |
| 745 | } | 745 | } |
| 746 | 746 | ||
| 747 | /* | ||
| 748 | * Since NLM uses two "keys" for tracking locks, we need to hash them down | ||
| 749 | * to one for the blocked_hash. Here, we're just xor'ing the host address | ||
| 750 | * with the pid in order to create a key value for picking a hash bucket. | ||
| 751 | */ | ||
| 752 | static unsigned long | ||
| 753 | nlmsvc_owner_key(struct file_lock *fl) | ||
| 754 | { | ||
| 755 | return (unsigned long)fl->fl_owner ^ (unsigned long)fl->fl_pid; | ||
| 756 | } | ||
| 757 | |||
| 747 | const struct lock_manager_operations nlmsvc_lock_operations = { | 758 | const struct lock_manager_operations nlmsvc_lock_operations = { |
| 748 | .lm_compare_owner = nlmsvc_same_owner, | 759 | .lm_compare_owner = nlmsvc_same_owner, |
| 760 | .lm_owner_key = nlmsvc_owner_key, | ||
| 749 | .lm_notify = nlmsvc_notify_blocked, | 761 | .lm_notify = nlmsvc_notify_blocked, |
| 750 | .lm_grant = nlmsvc_grant_deferred, | 762 | .lm_grant = nlmsvc_grant_deferred, |
| 751 | }; | 763 | }; |
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index 97e87415b145..dc5c75930f0f 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c | |||
| @@ -169,7 +169,7 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file, | |||
| 169 | 169 | ||
| 170 | again: | 170 | again: |
| 171 | file->f_locks = 0; | 171 | file->f_locks = 0; |
| 172 | lock_flocks(); /* protects i_flock list */ | 172 | spin_lock(&inode->i_lock); |
| 173 | for (fl = inode->i_flock; fl; fl = fl->fl_next) { | 173 | for (fl = inode->i_flock; fl; fl = fl->fl_next) { |
| 174 | if (fl->fl_lmops != &nlmsvc_lock_operations) | 174 | if (fl->fl_lmops != &nlmsvc_lock_operations) |
| 175 | continue; | 175 | continue; |
| @@ -181,7 +181,7 @@ again: | |||
| 181 | if (match(lockhost, host)) { | 181 | if (match(lockhost, host)) { |
| 182 | struct file_lock lock = *fl; | 182 | struct file_lock lock = *fl; |
| 183 | 183 | ||
| 184 | unlock_flocks(); | 184 | spin_unlock(&inode->i_lock); |
| 185 | lock.fl_type = F_UNLCK; | 185 | lock.fl_type = F_UNLCK; |
| 186 | lock.fl_start = 0; | 186 | lock.fl_start = 0; |
| 187 | lock.fl_end = OFFSET_MAX; | 187 | lock.fl_end = OFFSET_MAX; |
| @@ -193,7 +193,7 @@ again: | |||
| 193 | goto again; | 193 | goto again; |
| 194 | } | 194 | } |
| 195 | } | 195 | } |
| 196 | unlock_flocks(); | 196 | spin_unlock(&inode->i_lock); |
| 197 | 197 | ||
| 198 | return 0; | 198 | return 0; |
| 199 | } | 199 | } |
| @@ -228,14 +228,14 @@ nlm_file_inuse(struct nlm_file *file) | |||
| 228 | if (file->f_count || !list_empty(&file->f_blocks) || file->f_shares) | 228 | if (file->f_count || !list_empty(&file->f_blocks) || file->f_shares) |
| 229 | return 1; | 229 | return 1; |
| 230 | 230 | ||
| 231 | lock_flocks(); | 231 | spin_lock(&inode->i_lock); |
| 232 | for (fl = inode->i_flock; fl; fl = fl->fl_next) { | 232 | for (fl = inode->i_flock; fl; fl = fl->fl_next) { |
| 233 | if (fl->fl_lmops == &nlmsvc_lock_operations) { | 233 | if (fl->fl_lmops == &nlmsvc_lock_operations) { |
| 234 | unlock_flocks(); | 234 | spin_unlock(&inode->i_lock); |
| 235 | return 1; | 235 | return 1; |
| 236 | } | 236 | } |
| 237 | } | 237 | } |
| 238 | unlock_flocks(); | 238 | spin_unlock(&inode->i_lock); |
| 239 | file->f_locks = 0; | 239 | file->f_locks = 0; |
| 240 | return 0; | 240 | return 0; |
| 241 | } | 241 | } |
diff --git a/fs/locks.c b/fs/locks.c index cb424a4fed71..b27a3005d78d 100644 --- a/fs/locks.c +++ b/fs/locks.c | |||
| @@ -126,6 +126,9 @@ | |||
| 126 | #include <linux/time.h> | 126 | #include <linux/time.h> |
| 127 | #include <linux/rcupdate.h> | 127 | #include <linux/rcupdate.h> |
| 128 | #include <linux/pid_namespace.h> | 128 | #include <linux/pid_namespace.h> |
| 129 | #include <linux/hashtable.h> | ||
| 130 | #include <linux/percpu.h> | ||
| 131 | #include <linux/lglock.h> | ||
| 129 | 132 | ||
| 130 | #include <asm/uaccess.h> | 133 | #include <asm/uaccess.h> |
| 131 | 134 | ||
| @@ -153,30 +156,53 @@ int lease_break_time = 45; | |||
| 153 | #define for_each_lock(inode, lockp) \ | 156 | #define for_each_lock(inode, lockp) \ |
| 154 | for (lockp = &inode->i_flock; *lockp != NULL; lockp = &(*lockp)->fl_next) | 157 | for (lockp = &inode->i_flock; *lockp != NULL; lockp = &(*lockp)->fl_next) |
| 155 | 158 | ||
| 156 | static LIST_HEAD(file_lock_list); | 159 | /* |
| 157 | static LIST_HEAD(blocked_list); | 160 | * The global file_lock_list is only used for displaying /proc/locks, so we |
| 158 | static DEFINE_SPINLOCK(file_lock_lock); | 161 | * keep a list on each CPU, with each list protected by its own spinlock via |
| 162 | * the file_lock_lglock. Note that alterations to the list also require that | ||
| 163 | * the relevant i_lock is held. | ||
| 164 | */ | ||
| 165 | DEFINE_STATIC_LGLOCK(file_lock_lglock); | ||
| 166 | static DEFINE_PER_CPU(struct hlist_head, file_lock_list); | ||
| 159 | 167 | ||
| 160 | /* | 168 | /* |
| 161 | * Protects the two list heads above, plus the inode->i_flock list | 169 | * The blocked_hash is used to find POSIX lock loops for deadlock detection. |
| 170 | * It is protected by blocked_lock_lock. | ||
| 171 | * | ||
| 172 | * We hash locks by lockowner in order to optimize searching for the lock a | ||
| 173 | * particular lockowner is waiting on. | ||
| 174 | * | ||
| 175 | * FIXME: make this value scale via some heuristic? We generally will want more | ||
| 176 | * buckets when we have more lockowners holding locks, but that's a little | ||
| 177 | * difficult to determine without knowing what the workload will look like. | ||
| 162 | */ | 178 | */ |
| 163 | void lock_flocks(void) | 179 | #define BLOCKED_HASH_BITS 7 |
| 164 | { | 180 | static DEFINE_HASHTABLE(blocked_hash, BLOCKED_HASH_BITS); |
| 165 | spin_lock(&file_lock_lock); | ||
| 166 | } | ||
| 167 | EXPORT_SYMBOL_GPL(lock_flocks); | ||
| 168 | 181 | ||
| 169 | void unlock_flocks(void) | 182 | /* |
| 170 | { | 183 | * This lock protects the blocked_hash. Generally, if you're accessing it, you |
| 171 | spin_unlock(&file_lock_lock); | 184 | * want to be holding this lock. |
| 172 | } | 185 | * |
| 173 | EXPORT_SYMBOL_GPL(unlock_flocks); | 186 | * In addition, it also protects the fl->fl_block list, and the fl->fl_next |
| 187 | * pointer for file_lock structures that are acting as lock requests (in | ||
| 188 | * contrast to those that are acting as records of acquired locks). | ||
| 189 | * | ||
| 190 | * Note that when we acquire this lock in order to change the above fields, | ||
| 191 | * we often hold the i_lock as well. In certain cases, when reading the fields | ||
| 192 | * protected by this lock, we can skip acquiring it iff we already hold the | ||
| 193 | * i_lock. | ||
| 194 | * | ||
| 195 | * In particular, adding an entry to the fl_block list requires that you hold | ||
| 196 | * both the i_lock and the blocked_lock_lock (acquired in that order). Deleting | ||
| 197 | * an entry from the list however only requires the file_lock_lock. | ||
| 198 | */ | ||
| 199 | static DEFINE_SPINLOCK(blocked_lock_lock); | ||
| 174 | 200 | ||
| 175 | static struct kmem_cache *filelock_cache __read_mostly; | 201 | static struct kmem_cache *filelock_cache __read_mostly; |
| 176 | 202 | ||
| 177 | static void locks_init_lock_heads(struct file_lock *fl) | 203 | static void locks_init_lock_heads(struct file_lock *fl) |
| 178 | { | 204 | { |
| 179 | INIT_LIST_HEAD(&fl->fl_link); | 205 | INIT_HLIST_NODE(&fl->fl_link); |
| 180 | INIT_LIST_HEAD(&fl->fl_block); | 206 | INIT_LIST_HEAD(&fl->fl_block); |
| 181 | init_waitqueue_head(&fl->fl_wait); | 207 | init_waitqueue_head(&fl->fl_wait); |
| 182 | } | 208 | } |
| @@ -210,7 +236,7 @@ void locks_free_lock(struct file_lock *fl) | |||
| 210 | { | 236 | { |
| 211 | BUG_ON(waitqueue_active(&fl->fl_wait)); | 237 | BUG_ON(waitqueue_active(&fl->fl_wait)); |
| 212 | BUG_ON(!list_empty(&fl->fl_block)); | 238 | BUG_ON(!list_empty(&fl->fl_block)); |
| 213 | BUG_ON(!list_empty(&fl->fl_link)); | 239 | BUG_ON(!hlist_unhashed(&fl->fl_link)); |
| 214 | 240 | ||
| 215 | locks_release_private(fl); | 241 | locks_release_private(fl); |
| 216 | kmem_cache_free(filelock_cache, fl); | 242 | kmem_cache_free(filelock_cache, fl); |
| @@ -484,47 +510,118 @@ static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2) | |||
| 484 | return fl1->fl_owner == fl2->fl_owner; | 510 | return fl1->fl_owner == fl2->fl_owner; |
| 485 | } | 511 | } |
| 486 | 512 | ||
| 513 | /* Must be called with the i_lock held! */ | ||
| 514 | static inline void | ||
| 515 | locks_insert_global_locks(struct file_lock *fl) | ||
| 516 | { | ||
| 517 | lg_local_lock(&file_lock_lglock); | ||
| 518 | fl->fl_link_cpu = smp_processor_id(); | ||
| 519 | hlist_add_head(&fl->fl_link, this_cpu_ptr(&file_lock_list)); | ||
| 520 | lg_local_unlock(&file_lock_lglock); | ||
| 521 | } | ||
| 522 | |||
| 523 | /* Must be called with the i_lock held! */ | ||
| 524 | static inline void | ||
| 525 | locks_delete_global_locks(struct file_lock *fl) | ||
| 526 | { | ||
| 527 | /* | ||
| 528 | * Avoid taking lock if already unhashed. This is safe since this check | ||
| 529 | * is done while holding the i_lock, and new insertions into the list | ||
| 530 | * also require that it be held. | ||
| 531 | */ | ||
| 532 | if (hlist_unhashed(&fl->fl_link)) | ||
| 533 | return; | ||
| 534 | lg_local_lock_cpu(&file_lock_lglock, fl->fl_link_cpu); | ||
| 535 | hlist_del_init(&fl->fl_link); | ||
| 536 | lg_local_unlock_cpu(&file_lock_lglock, fl->fl_link_cpu); | ||
| 537 | } | ||
| 538 | |||
| 539 | static unsigned long | ||
| 540 | posix_owner_key(struct file_lock *fl) | ||
| 541 | { | ||
| 542 | if (fl->fl_lmops && fl->fl_lmops->lm_owner_key) | ||
| 543 | return fl->fl_lmops->lm_owner_key(fl); | ||
| 544 | return (unsigned long)fl->fl_owner; | ||
| 545 | } | ||
| 546 | |||
| 547 | static inline void | ||
| 548 | locks_insert_global_blocked(struct file_lock *waiter) | ||
| 549 | { | ||
| 550 | hash_add(blocked_hash, &waiter->fl_link, posix_owner_key(waiter)); | ||
| 551 | } | ||
| 552 | |||
| 553 | static inline void | ||
| 554 | locks_delete_global_blocked(struct file_lock *waiter) | ||
| 555 | { | ||
| 556 | hash_del(&waiter->fl_link); | ||
| 557 | } | ||
| 558 | |||
| 487 | /* Remove waiter from blocker's block list. | 559 | /* Remove waiter from blocker's block list. |
| 488 | * When blocker ends up pointing to itself then the list is empty. | 560 | * When blocker ends up pointing to itself then the list is empty. |
| 561 | * | ||
| 562 | * Must be called with blocked_lock_lock held. | ||
| 489 | */ | 563 | */ |
| 490 | static void __locks_delete_block(struct file_lock *waiter) | 564 | static void __locks_delete_block(struct file_lock *waiter) |
| 491 | { | 565 | { |
| 566 | locks_delete_global_blocked(waiter); | ||
| 492 | list_del_init(&waiter->fl_block); | 567 | list_del_init(&waiter->fl_block); |
| 493 | list_del_init(&waiter->fl_link); | ||
| 494 | waiter->fl_next = NULL; | 568 | waiter->fl_next = NULL; |
| 495 | } | 569 | } |
| 496 | 570 | ||
| 497 | /* | 571 | static void locks_delete_block(struct file_lock *waiter) |
| 498 | */ | ||
| 499 | void locks_delete_block(struct file_lock *waiter) | ||
| 500 | { | 572 | { |
| 501 | lock_flocks(); | 573 | spin_lock(&blocked_lock_lock); |
| 502 | __locks_delete_block(waiter); | 574 | __locks_delete_block(waiter); |
| 503 | unlock_flocks(); | 575 | spin_unlock(&blocked_lock_lock); |
| 504 | } | 576 | } |
| 505 | EXPORT_SYMBOL(locks_delete_block); | ||
| 506 | 577 | ||
| 507 | /* Insert waiter into blocker's block list. | 578 | /* Insert waiter into blocker's block list. |
| 508 | * We use a circular list so that processes can be easily woken up in | 579 | * We use a circular list so that processes can be easily woken up in |
| 509 | * the order they blocked. The documentation doesn't require this but | 580 | * the order they blocked. The documentation doesn't require this but |
| 510 | * it seems like the reasonable thing to do. | 581 | * it seems like the reasonable thing to do. |
| 582 | * | ||
| 583 | * Must be called with both the i_lock and blocked_lock_lock held. The fl_block | ||
| 584 | * list itself is protected by the file_lock_list, but by ensuring that the | ||
| 585 | * i_lock is also held on insertions we can avoid taking the blocked_lock_lock | ||
| 586 | * in some cases when we see that the fl_block list is empty. | ||
| 511 | */ | 587 | */ |
| 512 | static void locks_insert_block(struct file_lock *blocker, | 588 | static void __locks_insert_block(struct file_lock *blocker, |
| 513 | struct file_lock *waiter) | 589 | struct file_lock *waiter) |
| 514 | { | 590 | { |
| 515 | BUG_ON(!list_empty(&waiter->fl_block)); | 591 | BUG_ON(!list_empty(&waiter->fl_block)); |
| 516 | list_add_tail(&waiter->fl_block, &blocker->fl_block); | ||
| 517 | waiter->fl_next = blocker; | 592 | waiter->fl_next = blocker; |
| 593 | list_add_tail(&waiter->fl_block, &blocker->fl_block); | ||
| 518 | if (IS_POSIX(blocker)) | 594 | if (IS_POSIX(blocker)) |
| 519 | list_add(&waiter->fl_link, &blocked_list); | 595 | locks_insert_global_blocked(waiter); |
| 520 | } | 596 | } |
| 521 | 597 | ||
| 522 | /* Wake up processes blocked waiting for blocker. | 598 | /* Must be called with i_lock held. */ |
| 523 | * If told to wait then schedule the processes until the block list | 599 | static void locks_insert_block(struct file_lock *blocker, |
| 524 | * is empty, otherwise empty the block list ourselves. | 600 | struct file_lock *waiter) |
| 601 | { | ||
| 602 | spin_lock(&blocked_lock_lock); | ||
| 603 | __locks_insert_block(blocker, waiter); | ||
| 604 | spin_unlock(&blocked_lock_lock); | ||
| 605 | } | ||
| 606 | |||
| 607 | /* | ||
| 608 | * Wake up processes blocked waiting for blocker. | ||
| 609 | * | ||
| 610 | * Must be called with the inode->i_lock held! | ||
| 525 | */ | 611 | */ |
| 526 | static void locks_wake_up_blocks(struct file_lock *blocker) | 612 | static void locks_wake_up_blocks(struct file_lock *blocker) |
| 527 | { | 613 | { |
| 614 | /* | ||
| 615 | * Avoid taking global lock if list is empty. This is safe since new | ||
| 616 | * blocked requests are only added to the list under the i_lock, and | ||
| 617 | * the i_lock is always held here. Note that removal from the fl_block | ||
| 618 | * list does not require the i_lock, so we must recheck list_empty() | ||
| 619 | * after acquiring the blocked_lock_lock. | ||
| 620 | */ | ||
| 621 | if (list_empty(&blocker->fl_block)) | ||
| 622 | return; | ||
| 623 | |||
| 624 | spin_lock(&blocked_lock_lock); | ||
| 528 | while (!list_empty(&blocker->fl_block)) { | 625 | while (!list_empty(&blocker->fl_block)) { |
| 529 | struct file_lock *waiter; | 626 | struct file_lock *waiter; |
| 530 | 627 | ||
| @@ -536,20 +633,23 @@ static void locks_wake_up_blocks(struct file_lock *blocker) | |||
| 536 | else | 633 | else |
| 537 | wake_up(&waiter->fl_wait); | 634 | wake_up(&waiter->fl_wait); |
| 538 | } | 635 | } |
| 636 | spin_unlock(&blocked_lock_lock); | ||
| 539 | } | 637 | } |
| 540 | 638 | ||
| 541 | /* Insert file lock fl into an inode's lock list at the position indicated | 639 | /* Insert file lock fl into an inode's lock list at the position indicated |
| 542 | * by pos. At the same time add the lock to the global file lock list. | 640 | * by pos. At the same time add the lock to the global file lock list. |
| 641 | * | ||
| 642 | * Must be called with the i_lock held! | ||
| 543 | */ | 643 | */ |
| 544 | static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl) | 644 | static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl) |
| 545 | { | 645 | { |
| 546 | list_add(&fl->fl_link, &file_lock_list); | ||
| 547 | |||
| 548 | fl->fl_nspid = get_pid(task_tgid(current)); | 646 | fl->fl_nspid = get_pid(task_tgid(current)); |
| 549 | 647 | ||
| 550 | /* insert into file's list */ | 648 | /* insert into file's list */ |
| 551 | fl->fl_next = *pos; | 649 | fl->fl_next = *pos; |
| 552 | *pos = fl; | 650 | *pos = fl; |
| 651 | |||
| 652 | locks_insert_global_locks(fl); | ||
| 553 | } | 653 | } |
| 554 | 654 | ||
| 555 | /* | 655 | /* |
| @@ -557,14 +657,17 @@ static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl) | |||
| 557 | * Wake up processes that are blocked waiting for this lock, | 657 | * Wake up processes that are blocked waiting for this lock, |
| 558 | * notify the FS that the lock has been cleared and | 658 | * notify the FS that the lock has been cleared and |
| 559 | * finally free the lock. | 659 | * finally free the lock. |
| 660 | * | ||
| 661 | * Must be called with the i_lock held! | ||
| 560 | */ | 662 | */ |
| 561 | static void locks_delete_lock(struct file_lock **thisfl_p) | 663 | static void locks_delete_lock(struct file_lock **thisfl_p) |
| 562 | { | 664 | { |
| 563 | struct file_lock *fl = *thisfl_p; | 665 | struct file_lock *fl = *thisfl_p; |
| 564 | 666 | ||
| 667 | locks_delete_global_locks(fl); | ||
| 668 | |||
| 565 | *thisfl_p = fl->fl_next; | 669 | *thisfl_p = fl->fl_next; |
| 566 | fl->fl_next = NULL; | 670 | fl->fl_next = NULL; |
| 567 | list_del_init(&fl->fl_link); | ||
| 568 | 671 | ||
| 569 | if (fl->fl_nspid) { | 672 | if (fl->fl_nspid) { |
| 570 | put_pid(fl->fl_nspid); | 673 | put_pid(fl->fl_nspid); |
| @@ -625,8 +728,9 @@ void | |||
| 625 | posix_test_lock(struct file *filp, struct file_lock *fl) | 728 | posix_test_lock(struct file *filp, struct file_lock *fl) |
| 626 | { | 729 | { |
| 627 | struct file_lock *cfl; | 730 | struct file_lock *cfl; |
| 731 | struct inode *inode = file_inode(filp); | ||
| 628 | 732 | ||
| 629 | lock_flocks(); | 733 | spin_lock(&inode->i_lock); |
| 630 | for (cfl = file_inode(filp)->i_flock; cfl; cfl = cfl->fl_next) { | 734 | for (cfl = file_inode(filp)->i_flock; cfl; cfl = cfl->fl_next) { |
| 631 | if (!IS_POSIX(cfl)) | 735 | if (!IS_POSIX(cfl)) |
| 632 | continue; | 736 | continue; |
| @@ -639,7 +743,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl) | |||
| 639 | fl->fl_pid = pid_vnr(cfl->fl_nspid); | 743 | fl->fl_pid = pid_vnr(cfl->fl_nspid); |
| 640 | } else | 744 | } else |
| 641 | fl->fl_type = F_UNLCK; | 745 | fl->fl_type = F_UNLCK; |
| 642 | unlock_flocks(); | 746 | spin_unlock(&inode->i_lock); |
| 643 | return; | 747 | return; |
| 644 | } | 748 | } |
| 645 | EXPORT_SYMBOL(posix_test_lock); | 749 | EXPORT_SYMBOL(posix_test_lock); |
| @@ -676,13 +780,14 @@ static struct file_lock *what_owner_is_waiting_for(struct file_lock *block_fl) | |||
| 676 | { | 780 | { |
| 677 | struct file_lock *fl; | 781 | struct file_lock *fl; |
| 678 | 782 | ||
| 679 | list_for_each_entry(fl, &blocked_list, fl_link) { | 783 | hash_for_each_possible(blocked_hash, fl, fl_link, posix_owner_key(block_fl)) { |
| 680 | if (posix_same_owner(fl, block_fl)) | 784 | if (posix_same_owner(fl, block_fl)) |
| 681 | return fl->fl_next; | 785 | return fl->fl_next; |
| 682 | } | 786 | } |
| 683 | return NULL; | 787 | return NULL; |
| 684 | } | 788 | } |
| 685 | 789 | ||
| 790 | /* Must be called with the blocked_lock_lock held! */ | ||
| 686 | static int posix_locks_deadlock(struct file_lock *caller_fl, | 791 | static int posix_locks_deadlock(struct file_lock *caller_fl, |
| 687 | struct file_lock *block_fl) | 792 | struct file_lock *block_fl) |
| 688 | { | 793 | { |
| @@ -718,7 +823,7 @@ static int flock_lock_file(struct file *filp, struct file_lock *request) | |||
| 718 | return -ENOMEM; | 823 | return -ENOMEM; |
| 719 | } | 824 | } |
| 720 | 825 | ||
| 721 | lock_flocks(); | 826 | spin_lock(&inode->i_lock); |
| 722 | if (request->fl_flags & FL_ACCESS) | 827 | if (request->fl_flags & FL_ACCESS) |
| 723 | goto find_conflict; | 828 | goto find_conflict; |
| 724 | 829 | ||
| @@ -748,9 +853,9 @@ static int flock_lock_file(struct file *filp, struct file_lock *request) | |||
| 748 | * give it the opportunity to lock the file. | 853 | * give it the opportunity to lock the file. |
| 749 | */ | 854 | */ |
| 750 | if (found) { | 855 | if (found) { |
| 751 | unlock_flocks(); | 856 | spin_unlock(&inode->i_lock); |
| 752 | cond_resched(); | 857 | cond_resched(); |
| 753 | lock_flocks(); | 858 | spin_lock(&inode->i_lock); |
| 754 | } | 859 | } |
| 755 | 860 | ||
| 756 | find_conflict: | 861 | find_conflict: |
| @@ -777,7 +882,7 @@ find_conflict: | |||
| 777 | error = 0; | 882 | error = 0; |
| 778 | 883 | ||
| 779 | out: | 884 | out: |
| 780 | unlock_flocks(); | 885 | spin_unlock(&inode->i_lock); |
| 781 | if (new_fl) | 886 | if (new_fl) |
| 782 | locks_free_lock(new_fl); | 887 | locks_free_lock(new_fl); |
| 783 | return error; | 888 | return error; |
| @@ -791,7 +896,8 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str | |||
| 791 | struct file_lock *left = NULL; | 896 | struct file_lock *left = NULL; |
| 792 | struct file_lock *right = NULL; | 897 | struct file_lock *right = NULL; |
| 793 | struct file_lock **before; | 898 | struct file_lock **before; |
| 794 | int error, added = 0; | 899 | int error; |
| 900 | bool added = false; | ||
| 795 | 901 | ||
| 796 | /* | 902 | /* |
| 797 | * We may need two file_lock structures for this operation, | 903 | * We may need two file_lock structures for this operation, |
| @@ -806,7 +912,12 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str | |||
| 806 | new_fl2 = locks_alloc_lock(); | 912 | new_fl2 = locks_alloc_lock(); |
| 807 | } | 913 | } |
| 808 | 914 | ||
| 809 | lock_flocks(); | 915 | spin_lock(&inode->i_lock); |
| 916 | /* | ||
| 917 | * New lock request. Walk all POSIX locks and look for conflicts. If | ||
| 918 | * there are any, either return error or put the request on the | ||
| 919 | * blocker's list of waiters and the global blocked_hash. | ||
| 920 | */ | ||
| 810 | if (request->fl_type != F_UNLCK) { | 921 | if (request->fl_type != F_UNLCK) { |
| 811 | for_each_lock(inode, before) { | 922 | for_each_lock(inode, before) { |
| 812 | fl = *before; | 923 | fl = *before; |
| @@ -819,11 +930,17 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str | |||
| 819 | error = -EAGAIN; | 930 | error = -EAGAIN; |
| 820 | if (!(request->fl_flags & FL_SLEEP)) | 931 | if (!(request->fl_flags & FL_SLEEP)) |
| 821 | goto out; | 932 | goto out; |
| 933 | /* | ||
| 934 | * Deadlock detection and insertion into the blocked | ||
| 935 | * locks list must be done while holding the same lock! | ||
| 936 | */ | ||
| 822 | error = -EDEADLK; | 937 | error = -EDEADLK; |
| 823 | if (posix_locks_deadlock(request, fl)) | 938 | spin_lock(&blocked_lock_lock); |
| 824 | goto out; | 939 | if (likely(!posix_locks_deadlock(request, fl))) { |
| 825 | error = FILE_LOCK_DEFERRED; | 940 | error = FILE_LOCK_DEFERRED; |
| 826 | locks_insert_block(fl, request); | 941 | __locks_insert_block(fl, request); |
| 942 | } | ||
| 943 | spin_unlock(&blocked_lock_lock); | ||
| 827 | goto out; | 944 | goto out; |
| 828 | } | 945 | } |
| 829 | } | 946 | } |
| @@ -845,7 +962,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str | |||
| 845 | before = &fl->fl_next; | 962 | before = &fl->fl_next; |
| 846 | } | 963 | } |
| 847 | 964 | ||
| 848 | /* Process locks with this owner. */ | 965 | /* Process locks with this owner. */ |
| 849 | while ((fl = *before) && posix_same_owner(request, fl)) { | 966 | while ((fl = *before) && posix_same_owner(request, fl)) { |
| 850 | /* Detect adjacent or overlapping regions (if same lock type) | 967 | /* Detect adjacent or overlapping regions (if same lock type) |
| 851 | */ | 968 | */ |
| @@ -880,7 +997,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str | |||
| 880 | continue; | 997 | continue; |
| 881 | } | 998 | } |
| 882 | request = fl; | 999 | request = fl; |
| 883 | added = 1; | 1000 | added = true; |
| 884 | } | 1001 | } |
| 885 | else { | 1002 | else { |
| 886 | /* Processing for different lock types is a bit | 1003 | /* Processing for different lock types is a bit |
| @@ -891,7 +1008,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str | |||
| 891 | if (fl->fl_start > request->fl_end) | 1008 | if (fl->fl_start > request->fl_end) |
| 892 | break; | 1009 | break; |
| 893 | if (request->fl_type == F_UNLCK) | 1010 | if (request->fl_type == F_UNLCK) |
| 894 | added = 1; | 1011 | added = true; |
| 895 | if (fl->fl_start < request->fl_start) | 1012 | if (fl->fl_start < request->fl_start) |
| 896 | left = fl; | 1013 | left = fl; |
| 897 | /* If the next lock in the list has a higher end | 1014 | /* If the next lock in the list has a higher end |
| @@ -921,7 +1038,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str | |||
| 921 | locks_release_private(fl); | 1038 | locks_release_private(fl); |
| 922 | locks_copy_private(fl, request); | 1039 | locks_copy_private(fl, request); |
| 923 | request = fl; | 1040 | request = fl; |
| 924 | added = 1; | 1041 | added = true; |
| 925 | } | 1042 | } |
| 926 | } | 1043 | } |
| 927 | /* Go on to next lock. | 1044 | /* Go on to next lock. |
| @@ -931,10 +1048,9 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str | |||
| 931 | } | 1048 | } |
| 932 | 1049 | ||
| 933 | /* | 1050 | /* |
| 934 | * The above code only modifies existing locks in case of | 1051 | * The above code only modifies existing locks in case of merging or |
| 935 | * merging or replacing. If new lock(s) need to be inserted | 1052 | * replacing. If new lock(s) need to be inserted all modifications are |
| 936 | * all modifications are done bellow this, so it's safe yet to | 1053 | * done below this, so it's safe yet to bail out. |
| 937 | * bail out. | ||
| 938 | */ | 1054 | */ |
| 939 | error = -ENOLCK; /* "no luck" */ | 1055 | error = -ENOLCK; /* "no luck" */ |
| 940 | if (right && left == right && !new_fl2) | 1056 | if (right && left == right && !new_fl2) |
| @@ -974,7 +1090,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str | |||
| 974 | locks_wake_up_blocks(left); | 1090 | locks_wake_up_blocks(left); |
| 975 | } | 1091 | } |
| 976 | out: | 1092 | out: |
| 977 | unlock_flocks(); | 1093 | spin_unlock(&inode->i_lock); |
| 978 | /* | 1094 | /* |
| 979 | * Free any unused locks. | 1095 | * Free any unused locks. |
| 980 | */ | 1096 | */ |
| @@ -1049,14 +1165,14 @@ int locks_mandatory_locked(struct inode *inode) | |||
| 1049 | /* | 1165 | /* |
| 1050 | * Search the lock list for this inode for any POSIX locks. | 1166 | * Search the lock list for this inode for any POSIX locks. |
| 1051 | */ | 1167 | */ |
| 1052 | lock_flocks(); | 1168 | spin_lock(&inode->i_lock); |
| 1053 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { | 1169 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { |
| 1054 | if (!IS_POSIX(fl)) | 1170 | if (!IS_POSIX(fl)) |
| 1055 | continue; | 1171 | continue; |
| 1056 | if (fl->fl_owner != owner) | 1172 | if (fl->fl_owner != owner) |
| 1057 | break; | 1173 | break; |
| 1058 | } | 1174 | } |
| 1059 | unlock_flocks(); | 1175 | spin_unlock(&inode->i_lock); |
| 1060 | return fl ? -EAGAIN : 0; | 1176 | return fl ? -EAGAIN : 0; |
| 1061 | } | 1177 | } |
| 1062 | 1178 | ||
| @@ -1199,7 +1315,7 @@ int __break_lease(struct inode *inode, unsigned int mode) | |||
| 1199 | if (IS_ERR(new_fl)) | 1315 | if (IS_ERR(new_fl)) |
| 1200 | return PTR_ERR(new_fl); | 1316 | return PTR_ERR(new_fl); |
| 1201 | 1317 | ||
| 1202 | lock_flocks(); | 1318 | spin_lock(&inode->i_lock); |
| 1203 | 1319 | ||
| 1204 | time_out_leases(inode); | 1320 | time_out_leases(inode); |
| 1205 | 1321 | ||
| @@ -1249,11 +1365,11 @@ restart: | |||
| 1249 | break_time++; | 1365 | break_time++; |
| 1250 | } | 1366 | } |
| 1251 | locks_insert_block(flock, new_fl); | 1367 | locks_insert_block(flock, new_fl); |
| 1252 | unlock_flocks(); | 1368 | spin_unlock(&inode->i_lock); |
| 1253 | error = wait_event_interruptible_timeout(new_fl->fl_wait, | 1369 | error = wait_event_interruptible_timeout(new_fl->fl_wait, |
| 1254 | !new_fl->fl_next, break_time); | 1370 | !new_fl->fl_next, break_time); |
| 1255 | lock_flocks(); | 1371 | spin_lock(&inode->i_lock); |
| 1256 | __locks_delete_block(new_fl); | 1372 | locks_delete_block(new_fl); |
| 1257 | if (error >= 0) { | 1373 | if (error >= 0) { |
| 1258 | if (error == 0) | 1374 | if (error == 0) |
| 1259 | time_out_leases(inode); | 1375 | time_out_leases(inode); |
| @@ -1270,7 +1386,7 @@ restart: | |||
| 1270 | } | 1386 | } |
| 1271 | 1387 | ||
| 1272 | out: | 1388 | out: |
| 1273 | unlock_flocks(); | 1389 | spin_unlock(&inode->i_lock); |
| 1274 | locks_free_lock(new_fl); | 1390 | locks_free_lock(new_fl); |
| 1275 | return error; | 1391 | return error; |
| 1276 | } | 1392 | } |
| @@ -1323,9 +1439,10 @@ EXPORT_SYMBOL(lease_get_mtime); | |||
| 1323 | int fcntl_getlease(struct file *filp) | 1439 | int fcntl_getlease(struct file *filp) |
| 1324 | { | 1440 | { |
| 1325 | struct file_lock *fl; | 1441 | struct file_lock *fl; |
| 1442 | struct inode *inode = file_inode(filp); | ||
| 1326 | int type = F_UNLCK; | 1443 | int type = F_UNLCK; |
| 1327 | 1444 | ||
| 1328 | lock_flocks(); | 1445 | spin_lock(&inode->i_lock); |
| 1329 | time_out_leases(file_inode(filp)); | 1446 | time_out_leases(file_inode(filp)); |
| 1330 | for (fl = file_inode(filp)->i_flock; fl && IS_LEASE(fl); | 1447 | for (fl = file_inode(filp)->i_flock; fl && IS_LEASE(fl); |
| 1331 | fl = fl->fl_next) { | 1448 | fl = fl->fl_next) { |
| @@ -1334,11 +1451,11 @@ int fcntl_getlease(struct file *filp) | |||
| 1334 | break; | 1451 | break; |
| 1335 | } | 1452 | } |
| 1336 | } | 1453 | } |
| 1337 | unlock_flocks(); | 1454 | spin_unlock(&inode->i_lock); |
| 1338 | return type; | 1455 | return type; |
| 1339 | } | 1456 | } |
| 1340 | 1457 | ||
| 1341 | int generic_add_lease(struct file *filp, long arg, struct file_lock **flp) | 1458 | static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp) |
| 1342 | { | 1459 | { |
| 1343 | struct file_lock *fl, **before, **my_before = NULL, *lease; | 1460 | struct file_lock *fl, **before, **my_before = NULL, *lease; |
| 1344 | struct dentry *dentry = filp->f_path.dentry; | 1461 | struct dentry *dentry = filp->f_path.dentry; |
| @@ -1351,7 +1468,7 @@ int generic_add_lease(struct file *filp, long arg, struct file_lock **flp) | |||
| 1351 | if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) | 1468 | if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) |
| 1352 | goto out; | 1469 | goto out; |
| 1353 | if ((arg == F_WRLCK) | 1470 | if ((arg == F_WRLCK) |
| 1354 | && ((dentry->d_count > 1) | 1471 | && ((d_count(dentry) > 1) |
| 1355 | || (atomic_read(&inode->i_count) > 1))) | 1472 | || (atomic_read(&inode->i_count) > 1))) |
| 1356 | goto out; | 1473 | goto out; |
| 1357 | 1474 | ||
| @@ -1403,7 +1520,7 @@ out: | |||
| 1403 | return error; | 1520 | return error; |
| 1404 | } | 1521 | } |
| 1405 | 1522 | ||
| 1406 | int generic_delete_lease(struct file *filp, struct file_lock **flp) | 1523 | static int generic_delete_lease(struct file *filp, struct file_lock **flp) |
| 1407 | { | 1524 | { |
| 1408 | struct file_lock *fl, **before; | 1525 | struct file_lock *fl, **before; |
| 1409 | struct dentry *dentry = filp->f_path.dentry; | 1526 | struct dentry *dentry = filp->f_path.dentry; |
| @@ -1428,7 +1545,7 @@ int generic_delete_lease(struct file *filp, struct file_lock **flp) | |||
| 1428 | * The (input) flp->fl_lmops->lm_break function is required | 1545 | * The (input) flp->fl_lmops->lm_break function is required |
| 1429 | * by break_lease(). | 1546 | * by break_lease(). |
| 1430 | * | 1547 | * |
| 1431 | * Called with file_lock_lock held. | 1548 | * Called with inode->i_lock held. |
| 1432 | */ | 1549 | */ |
| 1433 | int generic_setlease(struct file *filp, long arg, struct file_lock **flp) | 1550 | int generic_setlease(struct file *filp, long arg, struct file_lock **flp) |
| 1434 | { | 1551 | { |
| @@ -1497,11 +1614,12 @@ static int __vfs_setlease(struct file *filp, long arg, struct file_lock **lease) | |||
| 1497 | 1614 | ||
| 1498 | int vfs_setlease(struct file *filp, long arg, struct file_lock **lease) | 1615 | int vfs_setlease(struct file *filp, long arg, struct file_lock **lease) |
| 1499 | { | 1616 | { |
| 1617 | struct inode *inode = file_inode(filp); | ||
| 1500 | int error; | 1618 | int error; |
| 1501 | 1619 | ||
| 1502 | lock_flocks(); | 1620 | spin_lock(&inode->i_lock); |
| 1503 | error = __vfs_setlease(filp, arg, lease); | 1621 | error = __vfs_setlease(filp, arg, lease); |
| 1504 | unlock_flocks(); | 1622 | spin_unlock(&inode->i_lock); |
| 1505 | 1623 | ||
| 1506 | return error; | 1624 | return error; |
| 1507 | } | 1625 | } |
| @@ -1519,6 +1637,7 @@ static int do_fcntl_delete_lease(struct file *filp) | |||
| 1519 | static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg) | 1637 | static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg) |
| 1520 | { | 1638 | { |
| 1521 | struct file_lock *fl, *ret; | 1639 | struct file_lock *fl, *ret; |
| 1640 | struct inode *inode = file_inode(filp); | ||
| 1522 | struct fasync_struct *new; | 1641 | struct fasync_struct *new; |
| 1523 | int error; | 1642 | int error; |
| 1524 | 1643 | ||
| @@ -1532,10 +1651,10 @@ static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg) | |||
| 1532 | return -ENOMEM; | 1651 | return -ENOMEM; |
| 1533 | } | 1652 | } |
| 1534 | ret = fl; | 1653 | ret = fl; |
| 1535 | lock_flocks(); | 1654 | spin_lock(&inode->i_lock); |
| 1536 | error = __vfs_setlease(filp, arg, &ret); | 1655 | error = __vfs_setlease(filp, arg, &ret); |
| 1537 | if (error) { | 1656 | if (error) { |
| 1538 | unlock_flocks(); | 1657 | spin_unlock(&inode->i_lock); |
| 1539 | locks_free_lock(fl); | 1658 | locks_free_lock(fl); |
| 1540 | goto out_free_fasync; | 1659 | goto out_free_fasync; |
| 1541 | } | 1660 | } |
| @@ -1552,7 +1671,7 @@ static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg) | |||
| 1552 | new = NULL; | 1671 | new = NULL; |
| 1553 | 1672 | ||
| 1554 | error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0); | 1673 | error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0); |
| 1555 | unlock_flocks(); | 1674 | spin_unlock(&inode->i_lock); |
| 1556 | 1675 | ||
| 1557 | out_free_fasync: | 1676 | out_free_fasync: |
| 1558 | if (new) | 1677 | if (new) |
| @@ -2076,7 +2195,7 @@ void locks_remove_flock(struct file *filp) | |||
| 2076 | fl.fl_ops->fl_release_private(&fl); | 2195 | fl.fl_ops->fl_release_private(&fl); |
| 2077 | } | 2196 | } |
| 2078 | 2197 | ||
| 2079 | lock_flocks(); | 2198 | spin_lock(&inode->i_lock); |
| 2080 | before = &inode->i_flock; | 2199 | before = &inode->i_flock; |
| 2081 | 2200 | ||
| 2082 | while ((fl = *before) != NULL) { | 2201 | while ((fl = *before) != NULL) { |
| @@ -2094,30 +2213,28 @@ void locks_remove_flock(struct file *filp) | |||
| 2094 | } | 2213 | } |
| 2095 | before = &fl->fl_next; | 2214 | before = &fl->fl_next; |
| 2096 | } | 2215 | } |
| 2097 | unlock_flocks(); | 2216 | spin_unlock(&inode->i_lock); |
| 2098 | } | 2217 | } |
| 2099 | 2218 | ||
| 2100 | /** | 2219 | /** |
| 2101 | * posix_unblock_lock - stop waiting for a file lock | 2220 | * posix_unblock_lock - stop waiting for a file lock |
| 2102 | * @filp: how the file was opened | ||
| 2103 | * @waiter: the lock which was waiting | 2221 | * @waiter: the lock which was waiting |
| 2104 | * | 2222 | * |
| 2105 | * lockd needs to block waiting for locks. | 2223 | * lockd needs to block waiting for locks. |
| 2106 | */ | 2224 | */ |
| 2107 | int | 2225 | int |
| 2108 | posix_unblock_lock(struct file *filp, struct file_lock *waiter) | 2226 | posix_unblock_lock(struct file_lock *waiter) |
| 2109 | { | 2227 | { |
| 2110 | int status = 0; | 2228 | int status = 0; |
| 2111 | 2229 | ||
| 2112 | lock_flocks(); | 2230 | spin_lock(&blocked_lock_lock); |
| 2113 | if (waiter->fl_next) | 2231 | if (waiter->fl_next) |
| 2114 | __locks_delete_block(waiter); | 2232 | __locks_delete_block(waiter); |
| 2115 | else | 2233 | else |
| 2116 | status = -ENOENT; | 2234 | status = -ENOENT; |
| 2117 | unlock_flocks(); | 2235 | spin_unlock(&blocked_lock_lock); |
| 2118 | return status; | 2236 | return status; |
| 2119 | } | 2237 | } |
| 2120 | |||
| 2121 | EXPORT_SYMBOL(posix_unblock_lock); | 2238 | EXPORT_SYMBOL(posix_unblock_lock); |
| 2122 | 2239 | ||
| 2123 | /** | 2240 | /** |
| @@ -2140,6 +2257,11 @@ EXPORT_SYMBOL_GPL(vfs_cancel_lock); | |||
| 2140 | #include <linux/proc_fs.h> | 2257 | #include <linux/proc_fs.h> |
| 2141 | #include <linux/seq_file.h> | 2258 | #include <linux/seq_file.h> |
| 2142 | 2259 | ||
| 2260 | struct locks_iterator { | ||
| 2261 | int li_cpu; | ||
| 2262 | loff_t li_pos; | ||
| 2263 | }; | ||
| 2264 | |||
| 2143 | static void lock_get_status(struct seq_file *f, struct file_lock *fl, | 2265 | static void lock_get_status(struct seq_file *f, struct file_lock *fl, |
| 2144 | loff_t id, char *pfx) | 2266 | loff_t id, char *pfx) |
| 2145 | { | 2267 | { |
| @@ -2213,37 +2335,41 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl, | |||
| 2213 | 2335 | ||
| 2214 | static int locks_show(struct seq_file *f, void *v) | 2336 | static int locks_show(struct seq_file *f, void *v) |
| 2215 | { | 2337 | { |
| 2338 | struct locks_iterator *iter = f->private; | ||
| 2216 | struct file_lock *fl, *bfl; | 2339 | struct file_lock *fl, *bfl; |
| 2217 | 2340 | ||
| 2218 | fl = list_entry(v, struct file_lock, fl_link); | 2341 | fl = hlist_entry(v, struct file_lock, fl_link); |
| 2219 | 2342 | ||
| 2220 | lock_get_status(f, fl, *((loff_t *)f->private), ""); | 2343 | lock_get_status(f, fl, iter->li_pos, ""); |
| 2221 | 2344 | ||
| 2222 | list_for_each_entry(bfl, &fl->fl_block, fl_block) | 2345 | list_for_each_entry(bfl, &fl->fl_block, fl_block) |
| 2223 | lock_get_status(f, bfl, *((loff_t *)f->private), " ->"); | 2346 | lock_get_status(f, bfl, iter->li_pos, " ->"); |
| 2224 | 2347 | ||
| 2225 | return 0; | 2348 | return 0; |
| 2226 | } | 2349 | } |
| 2227 | 2350 | ||
| 2228 | static void *locks_start(struct seq_file *f, loff_t *pos) | 2351 | static void *locks_start(struct seq_file *f, loff_t *pos) |
| 2229 | { | 2352 | { |
| 2230 | loff_t *p = f->private; | 2353 | struct locks_iterator *iter = f->private; |
| 2231 | 2354 | ||
| 2232 | lock_flocks(); | 2355 | iter->li_pos = *pos + 1; |
| 2233 | *p = (*pos + 1); | 2356 | lg_global_lock(&file_lock_lglock); |
| 2234 | return seq_list_start(&file_lock_list, *pos); | 2357 | spin_lock(&blocked_lock_lock); |
| 2358 | return seq_hlist_start_percpu(&file_lock_list, &iter->li_cpu, *pos); | ||
| 2235 | } | 2359 | } |
| 2236 | 2360 | ||
| 2237 | static void *locks_next(struct seq_file *f, void *v, loff_t *pos) | 2361 | static void *locks_next(struct seq_file *f, void *v, loff_t *pos) |
| 2238 | { | 2362 | { |
| 2239 | loff_t *p = f->private; | 2363 | struct locks_iterator *iter = f->private; |
| 2240 | ++*p; | 2364 | |
| 2241 | return seq_list_next(v, &file_lock_list, pos); | 2365 | ++iter->li_pos; |
| 2366 | return seq_hlist_next_percpu(v, &file_lock_list, &iter->li_cpu, pos); | ||
| 2242 | } | 2367 | } |
| 2243 | 2368 | ||
| 2244 | static void locks_stop(struct seq_file *f, void *v) | 2369 | static void locks_stop(struct seq_file *f, void *v) |
| 2245 | { | 2370 | { |
| 2246 | unlock_flocks(); | 2371 | spin_unlock(&blocked_lock_lock); |
| 2372 | lg_global_unlock(&file_lock_lglock); | ||
| 2247 | } | 2373 | } |
| 2248 | 2374 | ||
| 2249 | static const struct seq_operations locks_seq_operations = { | 2375 | static const struct seq_operations locks_seq_operations = { |
| @@ -2255,7 +2381,8 @@ static const struct seq_operations locks_seq_operations = { | |||
| 2255 | 2381 | ||
| 2256 | static int locks_open(struct inode *inode, struct file *filp) | 2382 | static int locks_open(struct inode *inode, struct file *filp) |
| 2257 | { | 2383 | { |
| 2258 | return seq_open_private(filp, &locks_seq_operations, sizeof(loff_t)); | 2384 | return seq_open_private(filp, &locks_seq_operations, |
| 2385 | sizeof(struct locks_iterator)); | ||
| 2259 | } | 2386 | } |
| 2260 | 2387 | ||
| 2261 | static const struct file_operations proc_locks_operations = { | 2388 | static const struct file_operations proc_locks_operations = { |
| @@ -2290,7 +2417,8 @@ int lock_may_read(struct inode *inode, loff_t start, unsigned long len) | |||
| 2290 | { | 2417 | { |
| 2291 | struct file_lock *fl; | 2418 | struct file_lock *fl; |
| 2292 | int result = 1; | 2419 | int result = 1; |
| 2293 | lock_flocks(); | 2420 | |
| 2421 | spin_lock(&inode->i_lock); | ||
| 2294 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { | 2422 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { |
| 2295 | if (IS_POSIX(fl)) { | 2423 | if (IS_POSIX(fl)) { |
| 2296 | if (fl->fl_type == F_RDLCK) | 2424 | if (fl->fl_type == F_RDLCK) |
| @@ -2307,7 +2435,7 @@ int lock_may_read(struct inode *inode, loff_t start, unsigned long len) | |||
| 2307 | result = 0; | 2435 | result = 0; |
| 2308 | break; | 2436 | break; |
| 2309 | } | 2437 | } |
| 2310 | unlock_flocks(); | 2438 | spin_unlock(&inode->i_lock); |
| 2311 | return result; | 2439 | return result; |
| 2312 | } | 2440 | } |
| 2313 | 2441 | ||
| @@ -2330,7 +2458,8 @@ int lock_may_write(struct inode *inode, loff_t start, unsigned long len) | |||
| 2330 | { | 2458 | { |
| 2331 | struct file_lock *fl; | 2459 | struct file_lock *fl; |
| 2332 | int result = 1; | 2460 | int result = 1; |
| 2333 | lock_flocks(); | 2461 | |
| 2462 | spin_lock(&inode->i_lock); | ||
| 2334 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { | 2463 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { |
| 2335 | if (IS_POSIX(fl)) { | 2464 | if (IS_POSIX(fl)) { |
| 2336 | if ((fl->fl_end < start) || (fl->fl_start > (start + len))) | 2465 | if ((fl->fl_end < start) || (fl->fl_start > (start + len))) |
| @@ -2345,7 +2474,7 @@ int lock_may_write(struct inode *inode, loff_t start, unsigned long len) | |||
| 2345 | result = 0; | 2474 | result = 0; |
| 2346 | break; | 2475 | break; |
| 2347 | } | 2476 | } |
| 2348 | unlock_flocks(); | 2477 | spin_unlock(&inode->i_lock); |
| 2349 | return result; | 2478 | return result; |
| 2350 | } | 2479 | } |
| 2351 | 2480 | ||
| @@ -2353,9 +2482,16 @@ EXPORT_SYMBOL(lock_may_write); | |||
| 2353 | 2482 | ||
| 2354 | static int __init filelock_init(void) | 2483 | static int __init filelock_init(void) |
| 2355 | { | 2484 | { |
| 2485 | int i; | ||
| 2486 | |||
| 2356 | filelock_cache = kmem_cache_create("file_lock_cache", | 2487 | filelock_cache = kmem_cache_create("file_lock_cache", |
| 2357 | sizeof(struct file_lock), 0, SLAB_PANIC, NULL); | 2488 | sizeof(struct file_lock), 0, SLAB_PANIC, NULL); |
| 2358 | 2489 | ||
| 2490 | lg_lock_init(&file_lock_lglock, "file_lock_lglock"); | ||
| 2491 | |||
| 2492 | for_each_possible_cpu(i) | ||
| 2493 | INIT_HLIST_HEAD(per_cpu_ptr(&file_lock_list, i)); | ||
| 2494 | |||
| 2359 | return 0; | 2495 | return 0; |
| 2360 | } | 2496 | } |
| 2361 | 2497 | ||
diff --git a/fs/minix/dir.c b/fs/minix/dir.c index 08c442902fcd..dfaf6fa9b7b5 100644 --- a/fs/minix/dir.c +++ b/fs/minix/dir.c | |||
| @@ -93,7 +93,7 @@ static int minix_readdir(struct file *file, struct dir_context *ctx) | |||
| 93 | unsigned offset; | 93 | unsigned offset; |
| 94 | unsigned long n; | 94 | unsigned long n; |
| 95 | 95 | ||
| 96 | ctx->pos = pos = (pos + chunk_size-1) & ~(chunk_size-1); | 96 | ctx->pos = pos = ALIGN(pos, chunk_size); |
| 97 | if (pos >= inode->i_size) | 97 | if (pos >= inode->i_size) |
| 98 | return 0; | 98 | return 0; |
| 99 | 99 | ||
diff --git a/fs/minix/namei.c b/fs/minix/namei.c index 0db73d9dd668..cd950e2331b6 100644 --- a/fs/minix/namei.c +++ b/fs/minix/namei.c | |||
| @@ -54,6 +54,18 @@ static int minix_mknod(struct inode * dir, struct dentry *dentry, umode_t mode, | |||
| 54 | return error; | 54 | return error; |
| 55 | } | 55 | } |
| 56 | 56 | ||
| 57 | static int minix_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) | ||
| 58 | { | ||
| 59 | int error; | ||
| 60 | struct inode *inode = minix_new_inode(dir, mode, &error); | ||
| 61 | if (inode) { | ||
| 62 | minix_set_inode(inode, 0); | ||
| 63 | mark_inode_dirty(inode); | ||
| 64 | d_tmpfile(dentry, inode); | ||
| 65 | } | ||
| 66 | return error; | ||
| 67 | } | ||
| 68 | |||
| 57 | static int minix_create(struct inode *dir, struct dentry *dentry, umode_t mode, | 69 | static int minix_create(struct inode *dir, struct dentry *dentry, umode_t mode, |
| 58 | bool excl) | 70 | bool excl) |
| 59 | { | 71 | { |
| @@ -254,4 +266,5 @@ const struct inode_operations minix_dir_inode_operations = { | |||
| 254 | .mknod = minix_mknod, | 266 | .mknod = minix_mknod, |
| 255 | .rename = minix_rename, | 267 | .rename = minix_rename, |
| 256 | .getattr = minix_getattr, | 268 | .getattr = minix_getattr, |
| 269 | .tmpfile = minix_tmpfile, | ||
| 257 | }; | 270 | }; |
diff --git a/fs/namei.c b/fs/namei.c index 9ed9361223c0..b2beee7a733f 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
| @@ -1352,7 +1352,7 @@ static int lookup_fast(struct nameidata *nd, | |||
| 1352 | */ | 1352 | */ |
| 1353 | if (nd->flags & LOOKUP_RCU) { | 1353 | if (nd->flags & LOOKUP_RCU) { |
| 1354 | unsigned seq; | 1354 | unsigned seq; |
| 1355 | dentry = __d_lookup_rcu(parent, &nd->last, &seq, nd->inode); | 1355 | dentry = __d_lookup_rcu(parent, &nd->last, &seq); |
| 1356 | if (!dentry) | 1356 | if (!dentry) |
| 1357 | goto unlazy; | 1357 | goto unlazy; |
| 1358 | 1358 | ||
| @@ -1787,8 +1787,7 @@ static int link_path_walk(const char *name, struct nameidata *nd) | |||
| 1787 | struct dentry *parent = nd->path.dentry; | 1787 | struct dentry *parent = nd->path.dentry; |
| 1788 | nd->flags &= ~LOOKUP_JUMPED; | 1788 | nd->flags &= ~LOOKUP_JUMPED; |
| 1789 | if (unlikely(parent->d_flags & DCACHE_OP_HASH)) { | 1789 | if (unlikely(parent->d_flags & DCACHE_OP_HASH)) { |
| 1790 | err = parent->d_op->d_hash(parent, nd->inode, | 1790 | err = parent->d_op->d_hash(parent, &this); |
| 1791 | &this); | ||
| 1792 | if (err < 0) | 1791 | if (err < 0) |
| 1793 | break; | 1792 | break; |
| 1794 | } | 1793 | } |
| @@ -2121,7 +2120,7 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) | |||
| 2121 | * to use its own hash.. | 2120 | * to use its own hash.. |
| 2122 | */ | 2121 | */ |
| 2123 | if (base->d_flags & DCACHE_OP_HASH) { | 2122 | if (base->d_flags & DCACHE_OP_HASH) { |
| 2124 | int err = base->d_op->d_hash(base, base->d_inode, &this); | 2123 | int err = base->d_op->d_hash(base, &this); |
| 2125 | if (err < 0) | 2124 | if (err < 0) |
| 2126 | return ERR_PTR(err); | 2125 | return ERR_PTR(err); |
| 2127 | } | 2126 | } |
| @@ -2690,28 +2689,10 @@ static int do_last(struct nameidata *nd, struct path *path, | |||
| 2690 | nd->flags &= ~LOOKUP_PARENT; | 2689 | nd->flags &= ~LOOKUP_PARENT; |
| 2691 | nd->flags |= op->intent; | 2690 | nd->flags |= op->intent; |
| 2692 | 2691 | ||
| 2693 | switch (nd->last_type) { | 2692 | if (nd->last_type != LAST_NORM) { |
| 2694 | case LAST_DOTDOT: | ||
| 2695 | case LAST_DOT: | ||
| 2696 | error = handle_dots(nd, nd->last_type); | 2693 | error = handle_dots(nd, nd->last_type); |
| 2697 | if (error) | 2694 | if (error) |
| 2698 | return error; | 2695 | return error; |
| 2699 | /* fallthrough */ | ||
| 2700 | case LAST_ROOT: | ||
| 2701 | error = complete_walk(nd); | ||
| 2702 | if (error) | ||
| 2703 | return error; | ||
| 2704 | audit_inode(name, nd->path.dentry, 0); | ||
| 2705 | if (open_flag & O_CREAT) { | ||
| 2706 | error = -EISDIR; | ||
| 2707 | goto out; | ||
| 2708 | } | ||
| 2709 | goto finish_open; | ||
| 2710 | case LAST_BIND: | ||
| 2711 | error = complete_walk(nd); | ||
| 2712 | if (error) | ||
| 2713 | return error; | ||
| 2714 | audit_inode(name, dir, 0); | ||
| 2715 | goto finish_open; | 2696 | goto finish_open; |
| 2716 | } | 2697 | } |
| 2717 | 2698 | ||
| @@ -2841,19 +2822,19 @@ finish_lookup: | |||
| 2841 | } | 2822 | } |
| 2842 | nd->inode = inode; | 2823 | nd->inode = inode; |
| 2843 | /* Why this, you ask? _Now_ we might have grown LOOKUP_JUMPED... */ | 2824 | /* Why this, you ask? _Now_ we might have grown LOOKUP_JUMPED... */ |
| 2825 | finish_open: | ||
| 2844 | error = complete_walk(nd); | 2826 | error = complete_walk(nd); |
| 2845 | if (error) { | 2827 | if (error) { |
| 2846 | path_put(&save_parent); | 2828 | path_put(&save_parent); |
| 2847 | return error; | 2829 | return error; |
| 2848 | } | 2830 | } |
| 2831 | audit_inode(name, nd->path.dentry, 0); | ||
| 2849 | error = -EISDIR; | 2832 | error = -EISDIR; |
| 2850 | if ((open_flag & O_CREAT) && S_ISDIR(nd->inode->i_mode)) | 2833 | if ((open_flag & O_CREAT) && S_ISDIR(nd->inode->i_mode)) |
| 2851 | goto out; | 2834 | goto out; |
| 2852 | error = -ENOTDIR; | 2835 | error = -ENOTDIR; |
| 2853 | if ((nd->flags & LOOKUP_DIRECTORY) && !can_lookup(nd->inode)) | 2836 | if ((nd->flags & LOOKUP_DIRECTORY) && !can_lookup(nd->inode)) |
| 2854 | goto out; | 2837 | goto out; |
| 2855 | audit_inode(name, nd->path.dentry, 0); | ||
| 2856 | finish_open: | ||
| 2857 | if (!S_ISREG(nd->inode->i_mode)) | 2838 | if (!S_ISREG(nd->inode->i_mode)) |
| 2858 | will_truncate = false; | 2839 | will_truncate = false; |
| 2859 | 2840 | ||
| @@ -2920,6 +2901,67 @@ stale_open: | |||
| 2920 | goto retry_lookup; | 2901 | goto retry_lookup; |
| 2921 | } | 2902 | } |
| 2922 | 2903 | ||
| 2904 | static int do_tmpfile(int dfd, struct filename *pathname, | ||
| 2905 | struct nameidata *nd, int flags, | ||
| 2906 | const struct open_flags *op, | ||
| 2907 | struct file *file, int *opened) | ||
| 2908 | { | ||
| 2909 | static const struct qstr name = QSTR_INIT("/", 1); | ||
| 2910 | struct dentry *dentry, *child; | ||
| 2911 | struct inode *dir; | ||
| 2912 | int error = path_lookupat(dfd, pathname->name, | ||
| 2913 | flags | LOOKUP_DIRECTORY, nd); | ||
| 2914 | if (unlikely(error)) | ||
| 2915 | return error; | ||
| 2916 | error = mnt_want_write(nd->path.mnt); | ||
| 2917 | if (unlikely(error)) | ||
| 2918 | goto out; | ||
| 2919 | /* we want directory to be writable */ | ||
| 2920 | error = inode_permission(nd->inode, MAY_WRITE | MAY_EXEC); | ||
| 2921 | if (error) | ||
| 2922 | goto out2; | ||
| 2923 | dentry = nd->path.dentry; | ||
| 2924 | dir = dentry->d_inode; | ||
| 2925 | if (!dir->i_op->tmpfile) { | ||
| 2926 | error = -EOPNOTSUPP; | ||
| 2927 | goto out2; | ||
| 2928 | } | ||
| 2929 | child = d_alloc(dentry, &name); | ||
| 2930 | if (unlikely(!child)) { | ||
| 2931 | error = -ENOMEM; | ||
| 2932 | goto out2; | ||
| 2933 | } | ||
| 2934 | nd->flags &= ~LOOKUP_DIRECTORY; | ||
| 2935 | nd->flags |= op->intent; | ||
| 2936 | dput(nd->path.dentry); | ||
| 2937 | nd->path.dentry = child; | ||
| 2938 | error = dir->i_op->tmpfile(dir, nd->path.dentry, op->mode); | ||
| 2939 | if (error) | ||
| 2940 | goto out2; | ||
| 2941 | audit_inode(pathname, nd->path.dentry, 0); | ||
| 2942 | error = may_open(&nd->path, op->acc_mode, op->open_flag); | ||
| 2943 | if (error) | ||
| 2944 | goto out2; | ||
| 2945 | file->f_path.mnt = nd->path.mnt; | ||
| 2946 | error = finish_open(file, nd->path.dentry, NULL, opened); | ||
| 2947 | if (error) | ||
| 2948 | goto out2; | ||
| 2949 | error = open_check_o_direct(file); | ||
| 2950 | if (error) { | ||
| 2951 | fput(file); | ||
| 2952 | } else if (!(op->open_flag & O_EXCL)) { | ||
| 2953 | struct inode *inode = file_inode(file); | ||
| 2954 | spin_lock(&inode->i_lock); | ||
| 2955 | inode->i_state |= I_LINKABLE; | ||
| 2956 | spin_unlock(&inode->i_lock); | ||
| 2957 | } | ||
| 2958 | out2: | ||
| 2959 | mnt_drop_write(nd->path.mnt); | ||
| 2960 | out: | ||
| 2961 | path_put(&nd->path); | ||
| 2962 | return error; | ||
| 2963 | } | ||
| 2964 | |||
| 2923 | static struct file *path_openat(int dfd, struct filename *pathname, | 2965 | static struct file *path_openat(int dfd, struct filename *pathname, |
| 2924 | struct nameidata *nd, const struct open_flags *op, int flags) | 2966 | struct nameidata *nd, const struct open_flags *op, int flags) |
| 2925 | { | 2967 | { |
| @@ -2935,6 +2977,11 @@ static struct file *path_openat(int dfd, struct filename *pathname, | |||
| 2935 | 2977 | ||
| 2936 | file->f_flags = op->open_flag; | 2978 | file->f_flags = op->open_flag; |
| 2937 | 2979 | ||
| 2980 | if (unlikely(file->f_flags & O_TMPFILE)) { | ||
| 2981 | error = do_tmpfile(dfd, pathname, nd, flags, op, file, &opened); | ||
| 2982 | goto out; | ||
| 2983 | } | ||
| 2984 | |||
| 2938 | error = path_init(dfd, pathname->name, flags | LOOKUP_PARENT, nd, &base); | 2985 | error = path_init(dfd, pathname->name, flags | LOOKUP_PARENT, nd, &base); |
| 2939 | if (unlikely(error)) | 2986 | if (unlikely(error)) |
| 2940 | goto out; | 2987 | goto out; |
| @@ -2987,9 +3034,10 @@ out: | |||
| 2987 | } | 3034 | } |
| 2988 | 3035 | ||
| 2989 | struct file *do_filp_open(int dfd, struct filename *pathname, | 3036 | struct file *do_filp_open(int dfd, struct filename *pathname, |
| 2990 | const struct open_flags *op, int flags) | 3037 | const struct open_flags *op) |
| 2991 | { | 3038 | { |
| 2992 | struct nameidata nd; | 3039 | struct nameidata nd; |
| 3040 | int flags = op->lookup_flags; | ||
| 2993 | struct file *filp; | 3041 | struct file *filp; |
| 2994 | 3042 | ||
| 2995 | filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_RCU); | 3043 | filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_RCU); |
| @@ -3001,17 +3049,16 @@ struct file *do_filp_open(int dfd, struct filename *pathname, | |||
| 3001 | } | 3049 | } |
| 3002 | 3050 | ||
| 3003 | struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt, | 3051 | struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt, |
| 3004 | const char *name, const struct open_flags *op, int flags) | 3052 | const char *name, const struct open_flags *op) |
| 3005 | { | 3053 | { |
| 3006 | struct nameidata nd; | 3054 | struct nameidata nd; |
| 3007 | struct file *file; | 3055 | struct file *file; |
| 3008 | struct filename filename = { .name = name }; | 3056 | struct filename filename = { .name = name }; |
| 3057 | int flags = op->lookup_flags | LOOKUP_ROOT; | ||
| 3009 | 3058 | ||
| 3010 | nd.root.mnt = mnt; | 3059 | nd.root.mnt = mnt; |
| 3011 | nd.root.dentry = dentry; | 3060 | nd.root.dentry = dentry; |
| 3012 | 3061 | ||
| 3013 | flags |= LOOKUP_ROOT; | ||
| 3014 | |||
| 3015 | if (dentry->d_inode->i_op->follow_link && op->intent & LOOKUP_OPEN) | 3062 | if (dentry->d_inode->i_op->follow_link && op->intent & LOOKUP_OPEN) |
| 3016 | return ERR_PTR(-ELOOP); | 3063 | return ERR_PTR(-ELOOP); |
| 3017 | 3064 | ||
| @@ -3586,12 +3633,18 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de | |||
| 3586 | 3633 | ||
| 3587 | mutex_lock(&inode->i_mutex); | 3634 | mutex_lock(&inode->i_mutex); |
| 3588 | /* Make sure we don't allow creating hardlink to an unlinked file */ | 3635 | /* Make sure we don't allow creating hardlink to an unlinked file */ |
| 3589 | if (inode->i_nlink == 0) | 3636 | if (inode->i_nlink == 0 && !(inode->i_state & I_LINKABLE)) |
| 3590 | error = -ENOENT; | 3637 | error = -ENOENT; |
| 3591 | else if (max_links && inode->i_nlink >= max_links) | 3638 | else if (max_links && inode->i_nlink >= max_links) |
| 3592 | error = -EMLINK; | 3639 | error = -EMLINK; |
| 3593 | else | 3640 | else |
| 3594 | error = dir->i_op->link(old_dentry, dir, new_dentry); | 3641 | error = dir->i_op->link(old_dentry, dir, new_dentry); |
| 3642 | |||
| 3643 | if (!error && (inode->i_state & I_LINKABLE)) { | ||
| 3644 | spin_lock(&inode->i_lock); | ||
| 3645 | inode->i_state &= ~I_LINKABLE; | ||
| 3646 | spin_unlock(&inode->i_lock); | ||
| 3647 | } | ||
| 3595 | mutex_unlock(&inode->i_mutex); | 3648 | mutex_unlock(&inode->i_mutex); |
| 3596 | if (!error) | 3649 | if (!error) |
| 3597 | fsnotify_link(dir, inode, new_dentry); | 3650 | fsnotify_link(dir, inode, new_dentry); |
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index 0e7f00298213..3be047474bfc 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c | |||
| @@ -73,10 +73,8 @@ const struct inode_operations ncp_dir_inode_operations = | |||
| 73 | * Dentry operations routines | 73 | * Dentry operations routines |
| 74 | */ | 74 | */ |
| 75 | static int ncp_lookup_validate(struct dentry *, unsigned int); | 75 | static int ncp_lookup_validate(struct dentry *, unsigned int); |
| 76 | static int ncp_hash_dentry(const struct dentry *, const struct inode *, | 76 | static int ncp_hash_dentry(const struct dentry *, struct qstr *); |
| 77 | struct qstr *); | 77 | static int ncp_compare_dentry(const struct dentry *, const struct dentry *, |
| 78 | static int ncp_compare_dentry(const struct dentry *, const struct inode *, | ||
| 79 | const struct dentry *, const struct inode *, | ||
| 80 | unsigned int, const char *, const struct qstr *); | 78 | unsigned int, const char *, const struct qstr *); |
| 81 | static int ncp_delete_dentry(const struct dentry *); | 79 | static int ncp_delete_dentry(const struct dentry *); |
| 82 | 80 | ||
| @@ -119,11 +117,19 @@ static inline int ncp_case_sensitive(const struct inode *i) | |||
| 119 | /* | 117 | /* |
| 120 | * Note: leave the hash unchanged if the directory | 118 | * Note: leave the hash unchanged if the directory |
| 121 | * is case-sensitive. | 119 | * is case-sensitive. |
| 120 | * | ||
| 121 | * Accessing the parent inode can be racy under RCU pathwalking. | ||
| 122 | * Use ACCESS_ONCE() to make sure we use _one_ particular inode, | ||
| 123 | * the callers will handle races. | ||
| 122 | */ | 124 | */ |
| 123 | static int | 125 | static int |
| 124 | ncp_hash_dentry(const struct dentry *dentry, const struct inode *inode, | 126 | ncp_hash_dentry(const struct dentry *dentry, struct qstr *this) |
| 125 | struct qstr *this) | ||
| 126 | { | 127 | { |
| 128 | struct inode *inode = ACCESS_ONCE(dentry->d_inode); | ||
| 129 | |||
| 130 | if (!inode) | ||
| 131 | return 0; | ||
| 132 | |||
| 127 | if (!ncp_case_sensitive(inode)) { | 133 | if (!ncp_case_sensitive(inode)) { |
| 128 | struct super_block *sb = dentry->d_sb; | 134 | struct super_block *sb = dentry->d_sb; |
| 129 | struct nls_table *t; | 135 | struct nls_table *t; |
| @@ -140,14 +146,24 @@ ncp_hash_dentry(const struct dentry *dentry, const struct inode *inode, | |||
| 140 | return 0; | 146 | return 0; |
| 141 | } | 147 | } |
| 142 | 148 | ||
| 149 | /* | ||
| 150 | * Accessing the parent inode can be racy under RCU pathwalking. | ||
| 151 | * Use ACCESS_ONCE() to make sure we use _one_ particular inode, | ||
| 152 | * the callers will handle races. | ||
| 153 | */ | ||
| 143 | static int | 154 | static int |
| 144 | ncp_compare_dentry(const struct dentry *parent, const struct inode *pinode, | 155 | ncp_compare_dentry(const struct dentry *parent, const struct dentry *dentry, |
| 145 | const struct dentry *dentry, const struct inode *inode, | ||
| 146 | unsigned int len, const char *str, const struct qstr *name) | 156 | unsigned int len, const char *str, const struct qstr *name) |
| 147 | { | 157 | { |
| 158 | struct inode *pinode; | ||
| 159 | |||
| 148 | if (len != name->len) | 160 | if (len != name->len) |
| 149 | return 1; | 161 | return 1; |
| 150 | 162 | ||
| 163 | pinode = ACCESS_ONCE(parent->d_inode); | ||
| 164 | if (!pinode) | ||
| 165 | return 1; | ||
| 166 | |||
| 151 | if (ncp_case_sensitive(pinode)) | 167 | if (ncp_case_sensitive(pinode)) |
| 152 | return strncmp(str, name->name, len); | 168 | return strncmp(str, name->name, len); |
| 153 | 169 | ||
| @@ -660,8 +676,6 @@ end_advance: | |||
| 660 | ctl.valid = 0; | 676 | ctl.valid = 0; |
| 661 | if (!ctl.filled && (ctl.fpos == ctx->pos)) { | 677 | if (!ctl.filled && (ctl.fpos == ctx->pos)) { |
| 662 | if (!ino) | 678 | if (!ino) |
| 663 | ino = find_inode_number(dentry, &qname); | ||
| 664 | if (!ino) | ||
| 665 | ino = iunique(dir->i_sb, 2); | 679 | ino = iunique(dir->i_sb, 2); |
| 666 | ctl.filled = !dir_emit(ctx, qname.name, qname.len, | 680 | ctl.filled = !dir_emit(ctx, qname.name, qname.len, |
| 667 | ino, DT_UNKNOWN); | 681 | ino, DT_UNKNOWN); |
| @@ -1123,17 +1137,6 @@ static int ncp_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 1123 | old_dentry->d_parent->d_name.name, old_dentry->d_name.name, | 1137 | old_dentry->d_parent->d_name.name, old_dentry->d_name.name, |
| 1124 | new_dentry->d_parent->d_name.name, new_dentry->d_name.name); | 1138 | new_dentry->d_parent->d_name.name, new_dentry->d_name.name); |
| 1125 | 1139 | ||
| 1126 | if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) { | ||
| 1127 | /* | ||
| 1128 | * fail with EBUSY if there are still references to this | ||
| 1129 | * directory. | ||
| 1130 | */ | ||
| 1131 | dentry_unhash(new_dentry); | ||
| 1132 | error = -EBUSY; | ||
| 1133 | if (!d_unhashed(new_dentry)) | ||
| 1134 | goto out; | ||
| 1135 | } | ||
| 1136 | |||
| 1137 | ncp_age_dentry(server, old_dentry); | 1140 | ncp_age_dentry(server, old_dentry); |
| 1138 | ncp_age_dentry(server, new_dentry); | 1141 | ncp_age_dentry(server, new_dentry); |
| 1139 | 1142 | ||
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index 26910c8154da..4659da67e7f6 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c | |||
| @@ -403,18 +403,24 @@ static int ncp_parse_options(struct ncp_mount_data_kernel *data, char *options) | |||
| 403 | switch (optval) { | 403 | switch (optval) { |
| 404 | case 'u': | 404 | case 'u': |
| 405 | data->uid = make_kuid(current_user_ns(), optint); | 405 | data->uid = make_kuid(current_user_ns(), optint); |
| 406 | if (!uid_valid(data->uid)) | 406 | if (!uid_valid(data->uid)) { |
| 407 | ret = -EINVAL; | ||
| 407 | goto err; | 408 | goto err; |
| 409 | } | ||
| 408 | break; | 410 | break; |
| 409 | case 'g': | 411 | case 'g': |
| 410 | data->gid = make_kgid(current_user_ns(), optint); | 412 | data->gid = make_kgid(current_user_ns(), optint); |
| 411 | if (!gid_valid(data->gid)) | 413 | if (!gid_valid(data->gid)) { |
| 414 | ret = -EINVAL; | ||
| 412 | goto err; | 415 | goto err; |
| 416 | } | ||
| 413 | break; | 417 | break; |
| 414 | case 'o': | 418 | case 'o': |
| 415 | data->mounted_uid = make_kuid(current_user_ns(), optint); | 419 | data->mounted_uid = make_kuid(current_user_ns(), optint); |
| 416 | if (!uid_valid(data->mounted_uid)) | 420 | if (!uid_valid(data->mounted_uid)) { |
| 421 | ret = -EINVAL; | ||
| 417 | goto err; | 422 | goto err; |
| 423 | } | ||
| 418 | break; | 424 | break; |
| 419 | case 'm': | 425 | case 'm': |
| 420 | data->file_mode = optint; | 426 | data->file_mode = optint; |
| @@ -891,6 +897,10 @@ int ncp_notify_change(struct dentry *dentry, struct iattr *attr) | |||
| 891 | if (!server) /* How this could happen? */ | 897 | if (!server) /* How this could happen? */ |
| 892 | goto out; | 898 | goto out; |
| 893 | 899 | ||
| 900 | result = -EPERM; | ||
| 901 | if (IS_DEADDIR(dentry->d_inode)) | ||
| 902 | goto out; | ||
| 903 | |||
| 894 | /* ageing the dentry to force validation */ | 904 | /* ageing the dentry to force validation */ |
| 895 | ncp_age_dentry(server, dentry); | 905 | ncp_age_dentry(server, dentry); |
| 896 | 906 | ||
diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c index ee24df5af1f9..3c5dd55d284c 100644 --- a/fs/ncpfs/mmap.c +++ b/fs/ncpfs/mmap.c | |||
| @@ -117,7 +117,7 @@ int ncp_mmap(struct file *file, struct vm_area_struct *vma) | |||
| 117 | return -EINVAL; | 117 | return -EINVAL; |
| 118 | /* we do not support files bigger than 4GB... We eventually | 118 | /* we do not support files bigger than 4GB... We eventually |
| 119 | supports just 4GB... */ | 119 | supports just 4GB... */ |
| 120 | if (((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff | 120 | if (vma_pages(vma) + vma->vm_pgoff |
| 121 | > (1U << (32 - PAGE_SHIFT))) | 121 | > (1U << (32 - PAGE_SHIFT))) |
| 122 | return -EFBIG; | 122 | return -EFBIG; |
| 123 | 123 | ||
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 13ca196385f5..b5e80b0af315 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig | |||
| @@ -104,6 +104,15 @@ config NFS_V4_1 | |||
| 104 | 104 | ||
| 105 | If unsure, say N. | 105 | If unsure, say N. |
| 106 | 106 | ||
| 107 | config NFS_V4_2 | ||
| 108 | bool "NFS client support for NFSv4.2" | ||
| 109 | depends on NFS_V4_1 | ||
| 110 | help | ||
| 111 | This option enables support for minor version 2 of the NFSv4 protocol | ||
| 112 | in the kernel's NFS client. | ||
| 113 | |||
| 114 | If unsure, say N. | ||
| 115 | |||
| 107 | config PNFS_FILE_LAYOUT | 116 | config PNFS_FILE_LAYOUT |
| 108 | tristate | 117 | tristate |
| 109 | depends on NFS_V4_1 | 118 | depends on NFS_V4_1 |
| @@ -131,6 +140,11 @@ config NFS_V4_1_IMPLEMENTATION_ID_DOMAIN | |||
| 131 | If the NFS client is unchanged from the upstream kernel, this | 140 | If the NFS client is unchanged from the upstream kernel, this |
| 132 | option should be set to the default "kernel.org". | 141 | option should be set to the default "kernel.org". |
| 133 | 142 | ||
| 143 | config NFS_V4_SECURITY_LABEL | ||
| 144 | bool | ||
| 145 | depends on NFS_V4_2 && SECURITY | ||
| 146 | default y | ||
| 147 | |||
| 134 | config ROOT_NFS | 148 | config ROOT_NFS |
| 135 | bool "Root file system on NFS" | 149 | bool "Root file system on NFS" |
| 136 | depends on NFS_FS=y && IP_PNP | 150 | depends on NFS_FS=y && IP_PNP |
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index cce2c057bd2d..e0bb048e9576 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile | |||
| @@ -6,8 +6,7 @@ obj-$(CONFIG_NFS_FS) += nfs.o | |||
| 6 | 6 | ||
| 7 | nfs-y := client.o dir.o file.o getroot.o inode.o super.o \ | 7 | nfs-y := client.o dir.o file.o getroot.o inode.o super.o \ |
| 8 | direct.o pagelist.o read.o symlink.o unlink.o \ | 8 | direct.o pagelist.o read.o symlink.o unlink.o \ |
| 9 | write.o namespace.o mount_clnt.o \ | 9 | write.o namespace.o mount_clnt.o |
| 10 | dns_resolve.o cache_lib.o | ||
| 11 | nfs-$(CONFIG_ROOT_NFS) += nfsroot.o | 10 | nfs-$(CONFIG_ROOT_NFS) += nfsroot.o |
| 12 | nfs-$(CONFIG_SYSCTL) += sysctl.o | 11 | nfs-$(CONFIG_SYSCTL) += sysctl.o |
| 13 | nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o | 12 | nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o |
| @@ -22,7 +21,8 @@ nfsv3-$(CONFIG_NFS_V3_ACL) += nfs3acl.o | |||
| 22 | obj-$(CONFIG_NFS_V4) += nfsv4.o | 21 | obj-$(CONFIG_NFS_V4) += nfsv4.o |
| 23 | nfsv4-y := nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o nfs4super.o nfs4file.o \ | 22 | nfsv4-y := nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o nfs4super.o nfs4file.o \ |
| 24 | delegation.o idmap.o callback.o callback_xdr.o callback_proc.o \ | 23 | delegation.o idmap.o callback.o callback_xdr.o callback_proc.o \ |
| 25 | nfs4namespace.o nfs4getroot.o nfs4client.o | 24 | nfs4namespace.o nfs4getroot.o nfs4client.o dns_resolve.o |
| 25 | nfsv4-$(CONFIG_NFS_USE_LEGACY_DNS) += cache_lib.o | ||
| 26 | nfsv4-$(CONFIG_SYSCTL) += nfs4sysctl.o | 26 | nfsv4-$(CONFIG_SYSCTL) += nfs4sysctl.o |
| 27 | nfsv4-$(CONFIG_NFS_V4_1) += nfs4session.o pnfs.o pnfs_dev.o | 27 | nfsv4-$(CONFIG_NFS_V4_1) += nfs4session.o pnfs.o pnfs_dev.o |
| 28 | 28 | ||
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 434b93ec0970..e242bbf72972 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c | |||
| @@ -1089,9 +1089,10 @@ nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh, | |||
| 1089 | dev->pgbase = 0; | 1089 | dev->pgbase = 0; |
| 1090 | dev->pglen = PAGE_SIZE * max_pages; | 1090 | dev->pglen = PAGE_SIZE * max_pages; |
| 1091 | dev->mincount = 0; | 1091 | dev->mincount = 0; |
| 1092 | dev->maxcount = max_resp_sz - nfs41_maxgetdevinfo_overhead; | ||
| 1092 | 1093 | ||
| 1093 | dprintk("%s: dev_id: %s\n", __func__, dev->dev_id.data); | 1094 | dprintk("%s: dev_id: %s\n", __func__, dev->dev_id.data); |
| 1094 | rc = nfs4_proc_getdeviceinfo(server, dev); | 1095 | rc = nfs4_proc_getdeviceinfo(server, dev, NULL); |
| 1095 | dprintk("%s getdevice info returns %d\n", __func__, rc); | 1096 | dprintk("%s getdevice info returns %d\n", __func__, rc); |
| 1096 | if (rc) { | 1097 | if (rc) { |
| 1097 | rv = ERR_PTR(rc); | 1098 | rv = ERR_PTR(rc); |
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index cff089a412c7..67cd73213168 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c | |||
| @@ -211,7 +211,6 @@ static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt, | |||
| 211 | struct svc_rqst *rqstp; | 211 | struct svc_rqst *rqstp; |
| 212 | int (*callback_svc)(void *vrqstp); | 212 | int (*callback_svc)(void *vrqstp); |
| 213 | struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion]; | 213 | struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion]; |
| 214 | char svc_name[12]; | ||
| 215 | int ret; | 214 | int ret; |
| 216 | 215 | ||
| 217 | nfs_callback_bc_serv(minorversion, xprt, serv); | 216 | nfs_callback_bc_serv(minorversion, xprt, serv); |
| @@ -235,10 +234,10 @@ static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt, | |||
| 235 | 234 | ||
| 236 | svc_sock_update_bufs(serv); | 235 | svc_sock_update_bufs(serv); |
| 237 | 236 | ||
| 238 | sprintf(svc_name, "nfsv4.%u-svc", minorversion); | ||
| 239 | cb_info->serv = serv; | 237 | cb_info->serv = serv; |
| 240 | cb_info->rqst = rqstp; | 238 | cb_info->rqst = rqstp; |
| 241 | cb_info->task = kthread_run(callback_svc, cb_info->rqst, svc_name); | 239 | cb_info->task = kthread_run(callback_svc, cb_info->rqst, |
| 240 | "nfsv4.%u-svc", minorversion); | ||
| 242 | if (IS_ERR(cb_info->task)) { | 241 | if (IS_ERR(cb_info->task)) { |
| 243 | ret = PTR_ERR(cb_info->task); | 242 | ret = PTR_ERR(cb_info->task); |
| 244 | svc_exit_thread(cb_info->rqst); | 243 | svc_exit_thread(cb_info->rqst); |
| @@ -282,6 +281,7 @@ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, struct n | |||
| 282 | ret = nfs4_callback_up_net(serv, net); | 281 | ret = nfs4_callback_up_net(serv, net); |
| 283 | break; | 282 | break; |
| 284 | case 1: | 283 | case 1: |
| 284 | case 2: | ||
| 285 | ret = nfs41_callback_up_net(serv, net); | 285 | ret = nfs41_callback_up_net(serv, net); |
| 286 | break; | 286 | break; |
| 287 | default: | 287 | default: |
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index efd54f0a4c46..84326e9fb47a 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h | |||
| @@ -32,6 +32,8 @@ enum nfs4_callback_opnum { | |||
| 32 | OP_CB_WANTS_CANCELLED = 12, | 32 | OP_CB_WANTS_CANCELLED = 12, |
| 33 | OP_CB_NOTIFY_LOCK = 13, | 33 | OP_CB_NOTIFY_LOCK = 13, |
| 34 | OP_CB_NOTIFY_DEVICEID = 14, | 34 | OP_CB_NOTIFY_DEVICEID = 14, |
| 35 | /* Callback operations new to NFSv4.2 */ | ||
| 36 | OP_CB_OFFLOAD = 15, | ||
| 35 | OP_CB_ILLEGAL = 10044, | 37 | OP_CB_ILLEGAL = 10044, |
| 36 | }; | 38 | }; |
| 37 | 39 | ||
| @@ -39,6 +41,7 @@ struct cb_process_state { | |||
| 39 | __be32 drc_status; | 41 | __be32 drc_status; |
| 40 | struct nfs_client *clp; | 42 | struct nfs_client *clp; |
| 41 | u32 slotid; | 43 | u32 slotid; |
| 44 | u32 minorversion; | ||
| 42 | struct net *net; | 45 | struct net *net; |
| 43 | }; | 46 | }; |
| 44 | 47 | ||
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 0bc27684ebfa..e6ebc4c38c81 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c | |||
| @@ -406,7 +406,8 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, | |||
| 406 | int i; | 406 | int i; |
| 407 | __be32 status = htonl(NFS4ERR_BADSESSION); | 407 | __be32 status = htonl(NFS4ERR_BADSESSION); |
| 408 | 408 | ||
| 409 | clp = nfs4_find_client_sessionid(cps->net, args->csa_addr, &args->csa_sessionid); | 409 | clp = nfs4_find_client_sessionid(cps->net, args->csa_addr, |
| 410 | &args->csa_sessionid, cps->minorversion); | ||
| 410 | if (clp == NULL) | 411 | if (clp == NULL) |
| 411 | goto out; | 412 | goto out; |
| 412 | 413 | ||
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index a35582c9d444..f4ccfe6521ec 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c | |||
| @@ -166,9 +166,9 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound | |||
| 166 | if (unlikely(p == NULL)) | 166 | if (unlikely(p == NULL)) |
| 167 | return htonl(NFS4ERR_RESOURCE); | 167 | return htonl(NFS4ERR_RESOURCE); |
| 168 | hdr->minorversion = ntohl(*p++); | 168 | hdr->minorversion = ntohl(*p++); |
| 169 | /* Check minor version is zero or one. */ | 169 | /* Check for minor version support */ |
| 170 | if (hdr->minorversion <= 1) { | 170 | if (hdr->minorversion <= NFS4_MAX_MINOR_VERSION) { |
| 171 | hdr->cb_ident = ntohl(*p++); /* ignored by v4.1 */ | 171 | hdr->cb_ident = ntohl(*p++); /* ignored by v4.1 and v4.2 */ |
| 172 | } else { | 172 | } else { |
| 173 | pr_warn_ratelimited("NFS: %s: NFSv4 server callback with " | 173 | pr_warn_ratelimited("NFS: %s: NFSv4 server callback with " |
| 174 | "illegal minor version %u!\n", | 174 | "illegal minor version %u!\n", |
| @@ -786,6 +786,26 @@ static void nfs4_cb_free_slot(struct cb_process_state *cps) | |||
| 786 | } | 786 | } |
| 787 | #endif /* CONFIG_NFS_V4_1 */ | 787 | #endif /* CONFIG_NFS_V4_1 */ |
| 788 | 788 | ||
| 789 | #ifdef CONFIG_NFS_V4_2 | ||
| 790 | static __be32 | ||
| 791 | preprocess_nfs42_op(int nop, unsigned int op_nr, struct callback_op **op) | ||
| 792 | { | ||
| 793 | __be32 status = preprocess_nfs41_op(nop, op_nr, op); | ||
| 794 | if (status != htonl(NFS4ERR_OP_ILLEGAL)) | ||
| 795 | return status; | ||
| 796 | |||
| 797 | if (op_nr == OP_CB_OFFLOAD) | ||
| 798 | return htonl(NFS4ERR_NOTSUPP); | ||
| 799 | return htonl(NFS4ERR_OP_ILLEGAL); | ||
| 800 | } | ||
| 801 | #else /* CONFIG_NFS_V4_2 */ | ||
| 802 | static __be32 | ||
| 803 | preprocess_nfs42_op(int nop, unsigned int op_nr, struct callback_op **op) | ||
| 804 | { | ||
| 805 | return htonl(NFS4ERR_MINOR_VERS_MISMATCH); | ||
| 806 | } | ||
| 807 | #endif /* CONFIG_NFS_V4_2 */ | ||
| 808 | |||
| 789 | static __be32 | 809 | static __be32 |
| 790 | preprocess_nfs4_op(unsigned int op_nr, struct callback_op **op) | 810 | preprocess_nfs4_op(unsigned int op_nr, struct callback_op **op) |
| 791 | { | 811 | { |
| @@ -801,8 +821,7 @@ preprocess_nfs4_op(unsigned int op_nr, struct callback_op **op) | |||
| 801 | return htonl(NFS_OK); | 821 | return htonl(NFS_OK); |
| 802 | } | 822 | } |
| 803 | 823 | ||
| 804 | static __be32 process_op(uint32_t minorversion, int nop, | 824 | static __be32 process_op(int nop, struct svc_rqst *rqstp, |
| 805 | struct svc_rqst *rqstp, | ||
| 806 | struct xdr_stream *xdr_in, void *argp, | 825 | struct xdr_stream *xdr_in, void *argp, |
| 807 | struct xdr_stream *xdr_out, void *resp, | 826 | struct xdr_stream *xdr_out, void *resp, |
| 808 | struct cb_process_state *cps) | 827 | struct cb_process_state *cps) |
| @@ -819,10 +838,22 @@ static __be32 process_op(uint32_t minorversion, int nop, | |||
| 819 | return status; | 838 | return status; |
| 820 | 839 | ||
| 821 | dprintk("%s: minorversion=%d nop=%d op_nr=%u\n", | 840 | dprintk("%s: minorversion=%d nop=%d op_nr=%u\n", |
| 822 | __func__, minorversion, nop, op_nr); | 841 | __func__, cps->minorversion, nop, op_nr); |
| 842 | |||
| 843 | switch (cps->minorversion) { | ||
| 844 | case 0: | ||
| 845 | status = preprocess_nfs4_op(op_nr, &op); | ||
| 846 | break; | ||
| 847 | case 1: | ||
| 848 | status = preprocess_nfs41_op(nop, op_nr, &op); | ||
| 849 | break; | ||
| 850 | case 2: | ||
| 851 | status = preprocess_nfs42_op(nop, op_nr, &op); | ||
| 852 | break; | ||
| 853 | default: | ||
| 854 | status = htonl(NFS4ERR_MINOR_VERS_MISMATCH); | ||
| 855 | } | ||
| 823 | 856 | ||
| 824 | status = minorversion ? preprocess_nfs41_op(nop, op_nr, &op) : | ||
| 825 | preprocess_nfs4_op(op_nr, &op); | ||
| 826 | if (status == htonl(NFS4ERR_OP_ILLEGAL)) | 857 | if (status == htonl(NFS4ERR_OP_ILLEGAL)) |
| 827 | op_nr = OP_CB_ILLEGAL; | 858 | op_nr = OP_CB_ILLEGAL; |
| 828 | if (status) | 859 | if (status) |
| @@ -885,14 +916,15 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r | |||
| 885 | return rpc_drop_reply; | 916 | return rpc_drop_reply; |
| 886 | } | 917 | } |
| 887 | 918 | ||
| 919 | cps.minorversion = hdr_arg.minorversion; | ||
| 888 | hdr_res.taglen = hdr_arg.taglen; | 920 | hdr_res.taglen = hdr_arg.taglen; |
| 889 | hdr_res.tag = hdr_arg.tag; | 921 | hdr_res.tag = hdr_arg.tag; |
| 890 | if (encode_compound_hdr_res(&xdr_out, &hdr_res) != 0) | 922 | if (encode_compound_hdr_res(&xdr_out, &hdr_res) != 0) |
| 891 | return rpc_system_err; | 923 | return rpc_system_err; |
| 892 | 924 | ||
| 893 | while (status == 0 && nops != hdr_arg.nops) { | 925 | while (status == 0 && nops != hdr_arg.nops) { |
| 894 | status = process_op(hdr_arg.minorversion, nops, rqstp, | 926 | status = process_op(nops, rqstp, &xdr_in, |
| 895 | &xdr_in, argp, &xdr_out, resp, &cps); | 927 | argp, &xdr_out, resp, &cps); |
| 896 | nops++; | 928 | nops++; |
| 897 | } | 929 | } |
| 898 | 930 | ||
diff --git a/fs/nfs/client.c b/fs/nfs/client.c index c513b0cc835f..340b1eff0267 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c | |||
| @@ -753,8 +753,6 @@ static int nfs_init_server(struct nfs_server *server, | |||
| 753 | data->timeo, data->retrans); | 753 | data->timeo, data->retrans); |
| 754 | if (data->flags & NFS_MOUNT_NORESVPORT) | 754 | if (data->flags & NFS_MOUNT_NORESVPORT) |
| 755 | set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); | 755 | set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); |
| 756 | if (server->options & NFS_OPTION_MIGRATION) | ||
| 757 | set_bit(NFS_CS_MIGRATION, &cl_init.init_flags); | ||
| 758 | 756 | ||
| 759 | /* Allocate or find a client reference we can use */ | 757 | /* Allocate or find a client reference we can use */ |
| 760 | clp = nfs_get_client(&cl_init, &timeparms, NULL, RPC_AUTH_UNIX); | 758 | clp = nfs_get_client(&cl_init, &timeparms, NULL, RPC_AUTH_UNIX); |
| @@ -1076,7 +1074,7 @@ struct nfs_server *nfs_create_server(struct nfs_mount_info *mount_info, | |||
| 1076 | } | 1074 | } |
| 1077 | 1075 | ||
| 1078 | if (!(fattr->valid & NFS_ATTR_FATTR)) { | 1076 | if (!(fattr->valid & NFS_ATTR_FATTR)) { |
| 1079 | error = nfs_mod->rpc_ops->getattr(server, mount_info->mntfh, fattr); | 1077 | error = nfs_mod->rpc_ops->getattr(server, mount_info->mntfh, fattr, NULL); |
| 1080 | if (error < 0) { | 1078 | if (error < 0) { |
| 1081 | dprintk("nfs_create_server: getattr error = %d\n", -error); | 1079 | dprintk("nfs_create_server: getattr error = %d\n", -error); |
| 1082 | goto error; | 1080 | goto error; |
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 57db3244f4d9..7ec4814e298d 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c | |||
| @@ -73,20 +73,20 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_ | |||
| 73 | if (inode->i_flock == NULL) | 73 | if (inode->i_flock == NULL) |
| 74 | goto out; | 74 | goto out; |
| 75 | 75 | ||
| 76 | /* Protect inode->i_flock using the file locks lock */ | 76 | /* Protect inode->i_flock using the i_lock */ |
| 77 | lock_flocks(); | 77 | spin_lock(&inode->i_lock); |
| 78 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { | 78 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { |
| 79 | if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) | 79 | if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) |
| 80 | continue; | 80 | continue; |
| 81 | if (nfs_file_open_context(fl->fl_file) != ctx) | 81 | if (nfs_file_open_context(fl->fl_file) != ctx) |
| 82 | continue; | 82 | continue; |
| 83 | unlock_flocks(); | 83 | spin_unlock(&inode->i_lock); |
| 84 | status = nfs4_lock_delegation_recall(fl, state, stateid); | 84 | status = nfs4_lock_delegation_recall(fl, state, stateid); |
| 85 | if (status < 0) | 85 | if (status < 0) |
| 86 | goto out; | 86 | goto out; |
| 87 | lock_flocks(); | 87 | spin_lock(&inode->i_lock); |
| 88 | } | 88 | } |
| 89 | unlock_flocks(); | 89 | spin_unlock(&inode->i_lock); |
| 90 | out: | 90 | out: |
| 91 | return status; | 91 | return status; |
| 92 | } | 92 | } |
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 5d051419527b..e474ca2b2bfe 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c | |||
| @@ -33,6 +33,7 @@ | |||
| 33 | #include <linux/pagevec.h> | 33 | #include <linux/pagevec.h> |
| 34 | #include <linux/namei.h> | 34 | #include <linux/namei.h> |
| 35 | #include <linux/mount.h> | 35 | #include <linux/mount.h> |
| 36 | #include <linux/swap.h> | ||
| 36 | #include <linux/sched.h> | 37 | #include <linux/sched.h> |
| 37 | #include <linux/kmemleak.h> | 38 | #include <linux/kmemleak.h> |
| 38 | #include <linux/xattr.h> | 39 | #include <linux/xattr.h> |
| @@ -436,6 +437,7 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) | |||
| 436 | struct dentry *alias; | 437 | struct dentry *alias; |
| 437 | struct inode *dir = parent->d_inode; | 438 | struct inode *dir = parent->d_inode; |
| 438 | struct inode *inode; | 439 | struct inode *inode; |
| 440 | int status; | ||
| 439 | 441 | ||
| 440 | if (filename.name[0] == '.') { | 442 | if (filename.name[0] == '.') { |
| 441 | if (filename.len == 1) | 443 | if (filename.len == 1) |
| @@ -448,7 +450,10 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) | |||
| 448 | dentry = d_lookup(parent, &filename); | 450 | dentry = d_lookup(parent, &filename); |
| 449 | if (dentry != NULL) { | 451 | if (dentry != NULL) { |
| 450 | if (nfs_same_file(dentry, entry)) { | 452 | if (nfs_same_file(dentry, entry)) { |
| 451 | nfs_refresh_inode(dentry->d_inode, entry->fattr); | 453 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); |
| 454 | status = nfs_refresh_inode(dentry->d_inode, entry->fattr); | ||
| 455 | if (!status) | ||
| 456 | nfs_setsecurity(dentry->d_inode, entry->fattr, entry->label); | ||
| 452 | goto out; | 457 | goto out; |
| 453 | } else { | 458 | } else { |
| 454 | if (d_invalidate(dentry) != 0) | 459 | if (d_invalidate(dentry) != 0) |
| @@ -461,7 +466,7 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) | |||
| 461 | if (dentry == NULL) | 466 | if (dentry == NULL) |
| 462 | return; | 467 | return; |
| 463 | 468 | ||
| 464 | inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr); | 469 | inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr, entry->label); |
| 465 | if (IS_ERR(inode)) | 470 | if (IS_ERR(inode)) |
| 466 | goto out; | 471 | goto out; |
| 467 | 472 | ||
| @@ -586,10 +591,16 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, | |||
| 586 | if (entry.fh == NULL || entry.fattr == NULL) | 591 | if (entry.fh == NULL || entry.fattr == NULL) |
| 587 | goto out; | 592 | goto out; |
| 588 | 593 | ||
| 594 | entry.label = nfs4_label_alloc(NFS_SERVER(inode), GFP_NOWAIT); | ||
| 595 | if (IS_ERR(entry.label)) { | ||
| 596 | status = PTR_ERR(entry.label); | ||
| 597 | goto out; | ||
| 598 | } | ||
| 599 | |||
| 589 | array = nfs_readdir_get_array(page); | 600 | array = nfs_readdir_get_array(page); |
| 590 | if (IS_ERR(array)) { | 601 | if (IS_ERR(array)) { |
| 591 | status = PTR_ERR(array); | 602 | status = PTR_ERR(array); |
| 592 | goto out; | 603 | goto out_label_free; |
| 593 | } | 604 | } |
| 594 | memset(array, 0, sizeof(struct nfs_cache_array)); | 605 | memset(array, 0, sizeof(struct nfs_cache_array)); |
| 595 | array->eof_index = -1; | 606 | array->eof_index = -1; |
| @@ -615,6 +626,8 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, | |||
| 615 | nfs_readdir_free_large_page(pages_ptr, pages, array_size); | 626 | nfs_readdir_free_large_page(pages_ptr, pages, array_size); |
| 616 | out_release_array: | 627 | out_release_array: |
| 617 | nfs_readdir_release_array(page); | 628 | nfs_readdir_release_array(page); |
| 629 | out_label_free: | ||
| 630 | nfs4_label_free(entry.label); | ||
| 618 | out: | 631 | out: |
| 619 | nfs_free_fattr(entry.fattr); | 632 | nfs_free_fattr(entry.fattr); |
| 620 | nfs_free_fhandle(entry.fh); | 633 | nfs_free_fhandle(entry.fh); |
| @@ -805,7 +818,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) | |||
| 805 | nfs_readdir_descriptor_t my_desc, | 818 | nfs_readdir_descriptor_t my_desc, |
| 806 | *desc = &my_desc; | 819 | *desc = &my_desc; |
| 807 | struct nfs_open_dir_context *dir_ctx = file->private_data; | 820 | struct nfs_open_dir_context *dir_ctx = file->private_data; |
| 808 | int res; | 821 | int res = 0; |
| 809 | 822 | ||
| 810 | dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n", | 823 | dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n", |
| 811 | dentry->d_parent->d_name.name, dentry->d_name.name, | 824 | dentry->d_parent->d_name.name, dentry->d_name.name, |
| @@ -827,7 +840,8 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) | |||
| 827 | desc->plus = nfs_use_readdirplus(inode, ctx) ? 1 : 0; | 840 | desc->plus = nfs_use_readdirplus(inode, ctx) ? 1 : 0; |
| 828 | 841 | ||
| 829 | nfs_block_sillyrename(dentry); | 842 | nfs_block_sillyrename(dentry); |
| 830 | res = nfs_revalidate_mapping(inode, file->f_mapping); | 843 | if (ctx->pos == 0 || nfs_attribute_cache_expired(inode)) |
| 844 | res = nfs_revalidate_mapping(inode, file->f_mapping); | ||
| 831 | if (res < 0) | 845 | if (res < 0) |
| 832 | goto out; | 846 | goto out; |
| 833 | 847 | ||
| @@ -1039,6 +1053,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) | |||
| 1039 | struct dentry *parent; | 1053 | struct dentry *parent; |
| 1040 | struct nfs_fh *fhandle = NULL; | 1054 | struct nfs_fh *fhandle = NULL; |
| 1041 | struct nfs_fattr *fattr = NULL; | 1055 | struct nfs_fattr *fattr = NULL; |
| 1056 | struct nfs4_label *label = NULL; | ||
| 1042 | int error; | 1057 | int error; |
| 1043 | 1058 | ||
| 1044 | if (flags & LOOKUP_RCU) | 1059 | if (flags & LOOKUP_RCU) |
| @@ -1081,7 +1096,11 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) | |||
| 1081 | if (fhandle == NULL || fattr == NULL) | 1096 | if (fhandle == NULL || fattr == NULL) |
| 1082 | goto out_error; | 1097 | goto out_error; |
| 1083 | 1098 | ||
| 1084 | error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr); | 1099 | label = nfs4_label_alloc(NFS_SERVER(inode), GFP_NOWAIT); |
| 1100 | if (IS_ERR(label)) | ||
| 1101 | goto out_error; | ||
| 1102 | |||
| 1103 | error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label); | ||
| 1085 | if (error) | 1104 | if (error) |
| 1086 | goto out_bad; | 1105 | goto out_bad; |
| 1087 | if (nfs_compare_fh(NFS_FH(inode), fhandle)) | 1106 | if (nfs_compare_fh(NFS_FH(inode), fhandle)) |
| @@ -1089,8 +1108,12 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) | |||
| 1089 | if ((error = nfs_refresh_inode(inode, fattr)) != 0) | 1108 | if ((error = nfs_refresh_inode(inode, fattr)) != 0) |
| 1090 | goto out_bad; | 1109 | goto out_bad; |
| 1091 | 1110 | ||
| 1111 | nfs_setsecurity(inode, fattr, label); | ||
| 1112 | |||
| 1092 | nfs_free_fattr(fattr); | 1113 | nfs_free_fattr(fattr); |
| 1093 | nfs_free_fhandle(fhandle); | 1114 | nfs_free_fhandle(fhandle); |
| 1115 | nfs4_label_free(label); | ||
| 1116 | |||
| 1094 | out_set_verifier: | 1117 | out_set_verifier: |
| 1095 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); | 1118 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); |
| 1096 | out_valid: | 1119 | out_valid: |
| @@ -1107,6 +1130,7 @@ out_zap_parent: | |||
| 1107 | out_bad: | 1130 | out_bad: |
| 1108 | nfs_free_fattr(fattr); | 1131 | nfs_free_fattr(fattr); |
| 1109 | nfs_free_fhandle(fhandle); | 1132 | nfs_free_fhandle(fhandle); |
| 1133 | nfs4_label_free(label); | ||
| 1110 | nfs_mark_for_revalidate(dir); | 1134 | nfs_mark_for_revalidate(dir); |
| 1111 | if (inode && S_ISDIR(inode->i_mode)) { | 1135 | if (inode && S_ISDIR(inode->i_mode)) { |
| 1112 | /* Purge readdir caches. */ | 1136 | /* Purge readdir caches. */ |
| @@ -1127,6 +1151,7 @@ out_zap_parent: | |||
| 1127 | out_error: | 1151 | out_error: |
| 1128 | nfs_free_fattr(fattr); | 1152 | nfs_free_fattr(fattr); |
| 1129 | nfs_free_fhandle(fhandle); | 1153 | nfs_free_fhandle(fhandle); |
| 1154 | nfs4_label_free(label); | ||
| 1130 | dput(parent); | 1155 | dput(parent); |
| 1131 | dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) lookup returned error %d\n", | 1156 | dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) lookup returned error %d\n", |
| 1132 | __func__, dentry->d_parent->d_name.name, | 1157 | __func__, dentry->d_parent->d_name.name, |
| @@ -1255,6 +1280,7 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in | |||
| 1255 | struct inode *inode = NULL; | 1280 | struct inode *inode = NULL; |
| 1256 | struct nfs_fh *fhandle = NULL; | 1281 | struct nfs_fh *fhandle = NULL; |
| 1257 | struct nfs_fattr *fattr = NULL; | 1282 | struct nfs_fattr *fattr = NULL; |
| 1283 | struct nfs4_label *label = NULL; | ||
| 1258 | int error; | 1284 | int error; |
| 1259 | 1285 | ||
| 1260 | dfprintk(VFS, "NFS: lookup(%s/%s)\n", | 1286 | dfprintk(VFS, "NFS: lookup(%s/%s)\n", |
| @@ -1281,17 +1307,21 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in | |||
| 1281 | if (fhandle == NULL || fattr == NULL) | 1307 | if (fhandle == NULL || fattr == NULL) |
| 1282 | goto out; | 1308 | goto out; |
| 1283 | 1309 | ||
| 1310 | label = nfs4_label_alloc(NFS_SERVER(dir), GFP_NOWAIT); | ||
| 1311 | if (IS_ERR(label)) | ||
| 1312 | goto out; | ||
| 1313 | |||
| 1284 | parent = dentry->d_parent; | 1314 | parent = dentry->d_parent; |
| 1285 | /* Protect against concurrent sillydeletes */ | 1315 | /* Protect against concurrent sillydeletes */ |
| 1286 | nfs_block_sillyrename(parent); | 1316 | nfs_block_sillyrename(parent); |
| 1287 | error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr); | 1317 | error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label); |
| 1288 | if (error == -ENOENT) | 1318 | if (error == -ENOENT) |
| 1289 | goto no_entry; | 1319 | goto no_entry; |
| 1290 | if (error < 0) { | 1320 | if (error < 0) { |
| 1291 | res = ERR_PTR(error); | 1321 | res = ERR_PTR(error); |
| 1292 | goto out_unblock_sillyrename; | 1322 | goto out_unblock_sillyrename; |
| 1293 | } | 1323 | } |
| 1294 | inode = nfs_fhget(dentry->d_sb, fhandle, fattr); | 1324 | inode = nfs_fhget(dentry->d_sb, fhandle, fattr, label); |
| 1295 | res = ERR_CAST(inode); | 1325 | res = ERR_CAST(inode); |
| 1296 | if (IS_ERR(res)) | 1326 | if (IS_ERR(res)) |
| 1297 | goto out_unblock_sillyrename; | 1327 | goto out_unblock_sillyrename; |
| @@ -1309,6 +1339,7 @@ no_entry: | |||
| 1309 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); | 1339 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); |
| 1310 | out_unblock_sillyrename: | 1340 | out_unblock_sillyrename: |
| 1311 | nfs_unblock_sillyrename(parent); | 1341 | nfs_unblock_sillyrename(parent); |
| 1342 | nfs4_label_free(label); | ||
| 1312 | out: | 1343 | out: |
| 1313 | nfs_free_fattr(fattr); | 1344 | nfs_free_fattr(fattr); |
| 1314 | nfs_free_fhandle(fhandle); | 1345 | nfs_free_fhandle(fhandle); |
| @@ -1356,18 +1387,6 @@ static int nfs_finish_open(struct nfs_open_context *ctx, | |||
| 1356 | { | 1387 | { |
| 1357 | int err; | 1388 | int err; |
| 1358 | 1389 | ||
| 1359 | if (ctx->dentry != dentry) { | ||
| 1360 | dput(ctx->dentry); | ||
| 1361 | ctx->dentry = dget(dentry); | ||
| 1362 | } | ||
| 1363 | |||
| 1364 | /* If the open_intent is for execute, we have an extra check to make */ | ||
| 1365 | if (ctx->mode & FMODE_EXEC) { | ||
| 1366 | err = nfs_may_open(dentry->d_inode, ctx->cred, open_flags); | ||
| 1367 | if (err < 0) | ||
| 1368 | goto out; | ||
| 1369 | } | ||
| 1370 | |||
| 1371 | err = finish_open(file, dentry, do_open, opened); | 1390 | err = finish_open(file, dentry, do_open, opened); |
| 1372 | if (err) | 1391 | if (err) |
| 1373 | goto out; | 1392 | goto out; |
| @@ -1426,13 +1445,13 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, | |||
| 1426 | 1445 | ||
| 1427 | nfs_block_sillyrename(dentry->d_parent); | 1446 | nfs_block_sillyrename(dentry->d_parent); |
| 1428 | inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr); | 1447 | inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr); |
| 1429 | d_drop(dentry); | 1448 | nfs_unblock_sillyrename(dentry->d_parent); |
| 1430 | if (IS_ERR(inode)) { | 1449 | if (IS_ERR(inode)) { |
| 1431 | nfs_unblock_sillyrename(dentry->d_parent); | ||
| 1432 | put_nfs_open_context(ctx); | 1450 | put_nfs_open_context(ctx); |
| 1433 | err = PTR_ERR(inode); | 1451 | err = PTR_ERR(inode); |
| 1434 | switch (err) { | 1452 | switch (err) { |
| 1435 | case -ENOENT: | 1453 | case -ENOENT: |
| 1454 | d_drop(dentry); | ||
| 1436 | d_add(dentry, NULL); | 1455 | d_add(dentry, NULL); |
| 1437 | break; | 1456 | break; |
| 1438 | case -EISDIR: | 1457 | case -EISDIR: |
| @@ -1448,16 +1467,8 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, | |||
| 1448 | } | 1467 | } |
| 1449 | goto out; | 1468 | goto out; |
| 1450 | } | 1469 | } |
| 1451 | res = d_add_unique(dentry, inode); | ||
| 1452 | if (res != NULL) | ||
| 1453 | dentry = res; | ||
| 1454 | |||
| 1455 | nfs_unblock_sillyrename(dentry->d_parent); | ||
| 1456 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); | ||
| 1457 | |||
| 1458 | err = nfs_finish_open(ctx, dentry, file, open_flags, opened); | ||
| 1459 | 1470 | ||
| 1460 | dput(res); | 1471 | err = nfs_finish_open(ctx, ctx->dentry, file, open_flags, opened); |
| 1461 | out: | 1472 | out: |
| 1462 | return err; | 1473 | return err; |
| 1463 | 1474 | ||
| @@ -1527,7 +1538,8 @@ no_open: | |||
| 1527 | * Code common to create, mkdir, and mknod. | 1538 | * Code common to create, mkdir, and mknod. |
| 1528 | */ | 1539 | */ |
| 1529 | int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle, | 1540 | int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle, |
| 1530 | struct nfs_fattr *fattr) | 1541 | struct nfs_fattr *fattr, |
| 1542 | struct nfs4_label *label) | ||
| 1531 | { | 1543 | { |
| 1532 | struct dentry *parent = dget_parent(dentry); | 1544 | struct dentry *parent = dget_parent(dentry); |
| 1533 | struct inode *dir = parent->d_inode; | 1545 | struct inode *dir = parent->d_inode; |
| @@ -1540,18 +1552,18 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle, | |||
| 1540 | if (dentry->d_inode) | 1552 | if (dentry->d_inode) |
| 1541 | goto out; | 1553 | goto out; |
| 1542 | if (fhandle->size == 0) { | 1554 | if (fhandle->size == 0) { |
| 1543 | error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr); | 1555 | error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, NULL); |
| 1544 | if (error) | 1556 | if (error) |
| 1545 | goto out_error; | 1557 | goto out_error; |
| 1546 | } | 1558 | } |
| 1547 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); | 1559 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); |
| 1548 | if (!(fattr->valid & NFS_ATTR_FATTR)) { | 1560 | if (!(fattr->valid & NFS_ATTR_FATTR)) { |
| 1549 | struct nfs_server *server = NFS_SB(dentry->d_sb); | 1561 | struct nfs_server *server = NFS_SB(dentry->d_sb); |
| 1550 | error = server->nfs_client->rpc_ops->getattr(server, fhandle, fattr); | 1562 | error = server->nfs_client->rpc_ops->getattr(server, fhandle, fattr, NULL); |
| 1551 | if (error < 0) | 1563 | if (error < 0) |
| 1552 | goto out_error; | 1564 | goto out_error; |
| 1553 | } | 1565 | } |
| 1554 | inode = nfs_fhget(dentry->d_sb, fhandle, fattr); | 1566 | inode = nfs_fhget(dentry->d_sb, fhandle, fattr, label); |
| 1555 | error = PTR_ERR(inode); | 1567 | error = PTR_ERR(inode); |
| 1556 | if (IS_ERR(inode)) | 1568 | if (IS_ERR(inode)) |
| 1557 | goto out_error; | 1569 | goto out_error; |
| @@ -1720,7 +1732,7 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry) | |||
| 1720 | dir->i_ino, dentry->d_name.name); | 1732 | dir->i_ino, dentry->d_name.name); |
| 1721 | 1733 | ||
| 1722 | spin_lock(&dentry->d_lock); | 1734 | spin_lock(&dentry->d_lock); |
| 1723 | if (dentry->d_count > 1) { | 1735 | if (d_count(dentry) > 1) { |
| 1724 | spin_unlock(&dentry->d_lock); | 1736 | spin_unlock(&dentry->d_lock); |
| 1725 | /* Start asynchronous writeout of the inode */ | 1737 | /* Start asynchronous writeout of the inode */ |
| 1726 | write_inode_now(dentry->d_inode, 0); | 1738 | write_inode_now(dentry->d_inode, 0); |
| @@ -1758,7 +1770,6 @@ EXPORT_SYMBOL_GPL(nfs_unlink); | |||
| 1758 | */ | 1770 | */ |
| 1759 | int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) | 1771 | int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) |
| 1760 | { | 1772 | { |
| 1761 | struct pagevec lru_pvec; | ||
| 1762 | struct page *page; | 1773 | struct page *page; |
| 1763 | char *kaddr; | 1774 | char *kaddr; |
| 1764 | struct iattr attr; | 1775 | struct iattr attr; |
| @@ -1798,11 +1809,8 @@ int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) | |||
| 1798 | * No big deal if we can't add this page to the page cache here. | 1809 | * No big deal if we can't add this page to the page cache here. |
| 1799 | * READLINK will get the missing page from the server if needed. | 1810 | * READLINK will get the missing page from the server if needed. |
| 1800 | */ | 1811 | */ |
| 1801 | pagevec_init(&lru_pvec, 0); | 1812 | if (!add_to_page_cache_lru(page, dentry->d_inode->i_mapping, 0, |
| 1802 | if (!add_to_page_cache(page, dentry->d_inode->i_mapping, 0, | ||
| 1803 | GFP_KERNEL)) { | 1813 | GFP_KERNEL)) { |
| 1804 | pagevec_add(&lru_pvec, page); | ||
| 1805 | pagevec_lru_add_file(&lru_pvec); | ||
| 1806 | SetPageUptodate(page); | 1814 | SetPageUptodate(page); |
| 1807 | unlock_page(page); | 1815 | unlock_page(page); |
| 1808 | } else | 1816 | } else |
| @@ -1869,7 +1877,7 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 1869 | dfprintk(VFS, "NFS: rename(%s/%s -> %s/%s, ct=%d)\n", | 1877 | dfprintk(VFS, "NFS: rename(%s/%s -> %s/%s, ct=%d)\n", |
| 1870 | old_dentry->d_parent->d_name.name, old_dentry->d_name.name, | 1878 | old_dentry->d_parent->d_name.name, old_dentry->d_name.name, |
| 1871 | new_dentry->d_parent->d_name.name, new_dentry->d_name.name, | 1879 | new_dentry->d_parent->d_name.name, new_dentry->d_name.name, |
| 1872 | new_dentry->d_count); | 1880 | d_count(new_dentry)); |
| 1873 | 1881 | ||
| 1874 | /* | 1882 | /* |
| 1875 | * For non-directories, check whether the target is busy and if so, | 1883 | * For non-directories, check whether the target is busy and if so, |
| @@ -1887,7 +1895,7 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 1887 | rehash = new_dentry; | 1895 | rehash = new_dentry; |
| 1888 | } | 1896 | } |
| 1889 | 1897 | ||
| 1890 | if (new_dentry->d_count > 2) { | 1898 | if (d_count(new_dentry) > 2) { |
| 1891 | int err; | 1899 | int err; |
| 1892 | 1900 | ||
| 1893 | /* copy the target dentry's name */ | 1901 | /* copy the target dentry's name */ |
diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index 945527092295..fc0f95ec7358 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c | |||
| @@ -29,7 +29,6 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen, | |||
| 29 | kfree(ip_addr); | 29 | kfree(ip_addr); |
| 30 | return ret; | 30 | return ret; |
| 31 | } | 31 | } |
| 32 | EXPORT_SYMBOL_GPL(nfs_dns_resolve_name); | ||
| 33 | 32 | ||
| 34 | #else | 33 | #else |
| 35 | 34 | ||
| @@ -351,7 +350,6 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, | |||
| 351 | ret = -ESRCH; | 350 | ret = -ESRCH; |
| 352 | return ret; | 351 | return ret; |
| 353 | } | 352 | } |
| 354 | EXPORT_SYMBOL_GPL(nfs_dns_resolve_name); | ||
| 355 | 353 | ||
| 356 | static struct cache_detail nfs_dns_resolve_template = { | 354 | static struct cache_detail nfs_dns_resolve_template = { |
| 357 | .owner = THIS_MODULE, | 355 | .owner = THIS_MODULE, |
| @@ -396,6 +394,21 @@ void nfs_dns_resolver_cache_destroy(struct net *net) | |||
| 396 | cache_destroy_net(nn->nfs_dns_resolve, net); | 394 | cache_destroy_net(nn->nfs_dns_resolve, net); |
| 397 | } | 395 | } |
| 398 | 396 | ||
| 397 | static int nfs4_dns_net_init(struct net *net) | ||
| 398 | { | ||
| 399 | return nfs_dns_resolver_cache_init(net); | ||
| 400 | } | ||
| 401 | |||
| 402 | static void nfs4_dns_net_exit(struct net *net) | ||
| 403 | { | ||
| 404 | nfs_dns_resolver_cache_destroy(net); | ||
| 405 | } | ||
| 406 | |||
| 407 | static struct pernet_operations nfs4_dns_resolver_ops = { | ||
| 408 | .init = nfs4_dns_net_init, | ||
| 409 | .exit = nfs4_dns_net_exit, | ||
| 410 | }; | ||
| 411 | |||
| 399 | static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event, | 412 | static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event, |
| 400 | void *ptr) | 413 | void *ptr) |
| 401 | { | 414 | { |
| @@ -432,11 +445,24 @@ static struct notifier_block nfs_dns_resolver_block = { | |||
| 432 | 445 | ||
| 433 | int nfs_dns_resolver_init(void) | 446 | int nfs_dns_resolver_init(void) |
| 434 | { | 447 | { |
| 435 | return rpc_pipefs_notifier_register(&nfs_dns_resolver_block); | 448 | int err; |
| 449 | |||
| 450 | err = register_pernet_subsys(&nfs4_dns_resolver_ops); | ||
| 451 | if (err < 0) | ||
| 452 | goto out; | ||
| 453 | err = rpc_pipefs_notifier_register(&nfs_dns_resolver_block); | ||
| 454 | if (err < 0) | ||
| 455 | goto out1; | ||
| 456 | return 0; | ||
| 457 | out1: | ||
| 458 | unregister_pernet_subsys(&nfs4_dns_resolver_ops); | ||
| 459 | out: | ||
| 460 | return err; | ||
| 436 | } | 461 | } |
| 437 | 462 | ||
| 438 | void nfs_dns_resolver_destroy(void) | 463 | void nfs_dns_resolver_destroy(void) |
| 439 | { | 464 | { |
| 440 | rpc_pipefs_notifier_unregister(&nfs_dns_resolver_block); | 465 | rpc_pipefs_notifier_unregister(&nfs_dns_resolver_block); |
| 466 | unregister_pernet_subsys(&nfs4_dns_resolver_ops); | ||
| 441 | } | 467 | } |
| 442 | #endif | 468 | #endif |
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 6b4a79f4ad1d..94e94bd11aae 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
| @@ -495,6 +495,35 @@ static int nfs_release_page(struct page *page, gfp_t gfp) | |||
| 495 | return nfs_fscache_release_page(page, gfp); | 495 | return nfs_fscache_release_page(page, gfp); |
| 496 | } | 496 | } |
| 497 | 497 | ||
| 498 | static void nfs_check_dirty_writeback(struct page *page, | ||
| 499 | bool *dirty, bool *writeback) | ||
| 500 | { | ||
| 501 | struct nfs_inode *nfsi; | ||
| 502 | struct address_space *mapping = page_file_mapping(page); | ||
| 503 | |||
| 504 | if (!mapping || PageSwapCache(page)) | ||
| 505 | return; | ||
| 506 | |||
| 507 | /* | ||
| 508 | * Check if an unstable page is currently being committed and | ||
| 509 | * if so, have the VM treat it as if the page is under writeback | ||
| 510 | * so it will not block due to pages that will shortly be freeable. | ||
| 511 | */ | ||
| 512 | nfsi = NFS_I(mapping->host); | ||
| 513 | if (test_bit(NFS_INO_COMMIT, &nfsi->flags)) { | ||
| 514 | *writeback = true; | ||
| 515 | return; | ||
| 516 | } | ||
| 517 | |||
| 518 | /* | ||
| 519 | * If PagePrivate() is set, then the page is not freeable and as the | ||
| 520 | * inode is not being committed, it's not going to be cleaned in the | ||
| 521 | * near future so treat it as dirty | ||
| 522 | */ | ||
| 523 | if (PagePrivate(page)) | ||
| 524 | *dirty = true; | ||
| 525 | } | ||
| 526 | |||
| 498 | /* | 527 | /* |
| 499 | * Attempt to clear the private state associated with a page when an error | 528 | * Attempt to clear the private state associated with a page when an error |
| 500 | * occurs that requires the cached contents of an inode to be written back or | 529 | * occurs that requires the cached contents of an inode to be written back or |
| @@ -542,6 +571,7 @@ const struct address_space_operations nfs_file_aops = { | |||
| 542 | .direct_IO = nfs_direct_IO, | 571 | .direct_IO = nfs_direct_IO, |
| 543 | .migratepage = nfs_migrate_page, | 572 | .migratepage = nfs_migrate_page, |
| 544 | .launder_page = nfs_launder_page, | 573 | .launder_page = nfs_launder_page, |
| 574 | .is_dirty_writeback = nfs_check_dirty_writeback, | ||
| 545 | .error_remove_page = generic_error_remove_page, | 575 | .error_remove_page = generic_error_remove_page, |
| 546 | #ifdef CONFIG_NFS_SWAP | 576 | #ifdef CONFIG_NFS_SWAP |
| 547 | .swap_activate = nfs_swap_activate, | 577 | .swap_activate = nfs_swap_activate, |
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index 44efaa8c5f78..66984a9aafaa 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c | |||
| @@ -95,7 +95,7 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh, | |||
| 95 | goto out; | 95 | goto out; |
| 96 | } | 96 | } |
| 97 | 97 | ||
| 98 | inode = nfs_fhget(sb, mntfh, fsinfo.fattr); | 98 | inode = nfs_fhget(sb, mntfh, fsinfo.fattr, NULL); |
| 99 | if (IS_ERR(inode)) { | 99 | if (IS_ERR(inode)) { |
| 100 | dprintk("nfs_get_root: get root inode failed\n"); | 100 | dprintk("nfs_get_root: get root inode failed\n"); |
| 101 | ret = ERR_CAST(inode); | 101 | ret = ERR_CAST(inode); |
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index c516da5873fd..c2c4163d5683 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c | |||
| @@ -262,29 +262,42 @@ static ssize_t nfs_idmap_get_desc(const char *name, size_t namelen, | |||
| 262 | return desclen; | 262 | return desclen; |
| 263 | } | 263 | } |
| 264 | 264 | ||
| 265 | static ssize_t nfs_idmap_request_key(struct key_type *key_type, | 265 | static struct key *nfs_idmap_request_key(const char *name, size_t namelen, |
| 266 | const char *name, size_t namelen, | 266 | const char *type, struct idmap *idmap) |
| 267 | const char *type, void *data, | ||
| 268 | size_t data_size, struct idmap *idmap) | ||
| 269 | { | 267 | { |
| 270 | const struct cred *saved_cred; | ||
| 271 | struct key *rkey; | ||
| 272 | char *desc; | 268 | char *desc; |
| 273 | struct user_key_payload *payload; | 269 | struct key *rkey; |
| 274 | ssize_t ret; | 270 | ssize_t ret; |
| 275 | 271 | ||
| 276 | ret = nfs_idmap_get_desc(name, namelen, type, strlen(type), &desc); | 272 | ret = nfs_idmap_get_desc(name, namelen, type, strlen(type), &desc); |
| 277 | if (ret <= 0) | 273 | if (ret <= 0) |
| 278 | goto out; | 274 | return ERR_PTR(ret); |
| 275 | |||
| 276 | rkey = request_key(&key_type_id_resolver, desc, ""); | ||
| 277 | if (IS_ERR(rkey)) { | ||
| 278 | mutex_lock(&idmap->idmap_mutex); | ||
| 279 | rkey = request_key_with_auxdata(&key_type_id_resolver_legacy, | ||
| 280 | desc, "", 0, idmap); | ||
| 281 | mutex_unlock(&idmap->idmap_mutex); | ||
| 282 | } | ||
| 283 | |||
| 284 | kfree(desc); | ||
| 285 | return rkey; | ||
| 286 | } | ||
| 287 | |||
| 288 | static ssize_t nfs_idmap_get_key(const char *name, size_t namelen, | ||
| 289 | const char *type, void *data, | ||
| 290 | size_t data_size, struct idmap *idmap) | ||
| 291 | { | ||
| 292 | const struct cred *saved_cred; | ||
| 293 | struct key *rkey; | ||
| 294 | struct user_key_payload *payload; | ||
| 295 | ssize_t ret; | ||
| 279 | 296 | ||
| 280 | saved_cred = override_creds(id_resolver_cache); | 297 | saved_cred = override_creds(id_resolver_cache); |
| 281 | if (idmap) | 298 | rkey = nfs_idmap_request_key(name, namelen, type, idmap); |
| 282 | rkey = request_key_with_auxdata(key_type, desc, "", 0, idmap); | ||
| 283 | else | ||
| 284 | rkey = request_key(&key_type_id_resolver, desc, ""); | ||
| 285 | revert_creds(saved_cred); | 299 | revert_creds(saved_cred); |
| 286 | 300 | ||
| 287 | kfree(desc); | ||
| 288 | if (IS_ERR(rkey)) { | 301 | if (IS_ERR(rkey)) { |
| 289 | ret = PTR_ERR(rkey); | 302 | ret = PTR_ERR(rkey); |
| 290 | goto out; | 303 | goto out; |
| @@ -316,23 +329,6 @@ out: | |||
| 316 | return ret; | 329 | return ret; |
| 317 | } | 330 | } |
| 318 | 331 | ||
| 319 | static ssize_t nfs_idmap_get_key(const char *name, size_t namelen, | ||
| 320 | const char *type, void *data, | ||
| 321 | size_t data_size, struct idmap *idmap) | ||
| 322 | { | ||
| 323 | ssize_t ret = nfs_idmap_request_key(&key_type_id_resolver, | ||
| 324 | name, namelen, type, data, | ||
| 325 | data_size, NULL); | ||
| 326 | if (ret < 0) { | ||
| 327 | mutex_lock(&idmap->idmap_mutex); | ||
| 328 | ret = nfs_idmap_request_key(&key_type_id_resolver_legacy, | ||
| 329 | name, namelen, type, data, | ||
| 330 | data_size, idmap); | ||
| 331 | mutex_unlock(&idmap->idmap_mutex); | ||
| 332 | } | ||
| 333 | return ret; | ||
| 334 | } | ||
| 335 | |||
| 336 | /* ID -> Name */ | 332 | /* ID -> Name */ |
| 337 | static ssize_t nfs_idmap_lookup_name(__u32 id, const char *type, char *buf, | 333 | static ssize_t nfs_idmap_lookup_name(__u32 id, const char *type, char *buf, |
| 338 | size_t buflen, struct idmap *idmap) | 334 | size_t buflen, struct idmap *idmap) |
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index c1c7a9d78722..af6e806044d7 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
| @@ -48,7 +48,6 @@ | |||
| 48 | #include "iostat.h" | 48 | #include "iostat.h" |
| 49 | #include "internal.h" | 49 | #include "internal.h" |
| 50 | #include "fscache.h" | 50 | #include "fscache.h" |
| 51 | #include "dns_resolve.h" | ||
| 52 | #include "pnfs.h" | 51 | #include "pnfs.h" |
| 53 | #include "nfs.h" | 52 | #include "nfs.h" |
| 54 | #include "netns.h" | 53 | #include "netns.h" |
| @@ -79,7 +78,7 @@ int nfs_wait_bit_killable(void *word) | |||
| 79 | { | 78 | { |
| 80 | if (fatal_signal_pending(current)) | 79 | if (fatal_signal_pending(current)) |
| 81 | return -ERESTARTSYS; | 80 | return -ERESTARTSYS; |
| 82 | freezable_schedule(); | 81 | freezable_schedule_unsafe(); |
| 83 | return 0; | 82 | return 0; |
| 84 | } | 83 | } |
| 85 | EXPORT_SYMBOL_GPL(nfs_wait_bit_killable); | 84 | EXPORT_SYMBOL_GPL(nfs_wait_bit_killable); |
| @@ -162,11 +161,19 @@ static void nfs_zap_caches_locked(struct inode *inode) | |||
| 162 | 161 | ||
| 163 | memset(NFS_I(inode)->cookieverf, 0, sizeof(NFS_I(inode)->cookieverf)); | 162 | memset(NFS_I(inode)->cookieverf, 0, sizeof(NFS_I(inode)->cookieverf)); |
| 164 | if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) { | 163 | if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) { |
| 165 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; | ||
| 166 | nfs_fscache_invalidate(inode); | 164 | nfs_fscache_invalidate(inode); |
| 167 | } else { | 165 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR |
| 168 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; | 166 | | NFS_INO_INVALID_LABEL |
| 169 | } | 167 | | NFS_INO_INVALID_DATA |
| 168 | | NFS_INO_INVALID_ACCESS | ||
| 169 | | NFS_INO_INVALID_ACL | ||
| 170 | | NFS_INO_REVAL_PAGECACHE; | ||
| 171 | } else | ||
| 172 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR | ||
| 173 | | NFS_INO_INVALID_LABEL | ||
| 174 | | NFS_INO_INVALID_ACCESS | ||
| 175 | | NFS_INO_INVALID_ACL | ||
| 176 | | NFS_INO_REVAL_PAGECACHE; | ||
| 170 | } | 177 | } |
| 171 | 178 | ||
| 172 | void nfs_zap_caches(struct inode *inode) | 179 | void nfs_zap_caches(struct inode *inode) |
| @@ -257,12 +264,72 @@ nfs_init_locked(struct inode *inode, void *opaque) | |||
| 257 | return 0; | 264 | return 0; |
| 258 | } | 265 | } |
| 259 | 266 | ||
| 267 | #ifdef CONFIG_NFS_V4_SECURITY_LABEL | ||
| 268 | void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr, | ||
| 269 | struct nfs4_label *label) | ||
| 270 | { | ||
| 271 | int error; | ||
| 272 | |||
| 273 | if (label == NULL) | ||
| 274 | return; | ||
| 275 | |||
| 276 | if (nfs_server_capable(inode, NFS_CAP_SECURITY_LABEL) == 0) | ||
| 277 | return; | ||
| 278 | |||
| 279 | if (NFS_SERVER(inode)->nfs_client->cl_minorversion < 2) | ||
| 280 | return; | ||
| 281 | |||
| 282 | if ((fattr->valid & NFS_ATTR_FATTR_V4_SECURITY_LABEL) && inode->i_security) { | ||
| 283 | error = security_inode_notifysecctx(inode, label->label, | ||
| 284 | label->len); | ||
| 285 | if (error) | ||
| 286 | printk(KERN_ERR "%s() %s %d " | ||
| 287 | "security_inode_notifysecctx() %d\n", | ||
| 288 | __func__, | ||
| 289 | (char *)label->label, | ||
| 290 | label->len, error); | ||
| 291 | } | ||
| 292 | } | ||
| 293 | |||
| 294 | struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags) | ||
| 295 | { | ||
| 296 | struct nfs4_label *label = NULL; | ||
| 297 | int minor_version = server->nfs_client->cl_minorversion; | ||
| 298 | |||
| 299 | if (minor_version < 2) | ||
| 300 | return label; | ||
| 301 | |||
| 302 | if (!(server->caps & NFS_CAP_SECURITY_LABEL)) | ||
| 303 | return label; | ||
| 304 | |||
| 305 | label = kzalloc(sizeof(struct nfs4_label), flags); | ||
| 306 | if (label == NULL) | ||
| 307 | return ERR_PTR(-ENOMEM); | ||
| 308 | |||
| 309 | label->label = kzalloc(NFS4_MAXLABELLEN, flags); | ||
| 310 | if (label->label == NULL) { | ||
| 311 | kfree(label); | ||
| 312 | return ERR_PTR(-ENOMEM); | ||
| 313 | } | ||
| 314 | label->len = NFS4_MAXLABELLEN; | ||
| 315 | |||
| 316 | return label; | ||
| 317 | } | ||
| 318 | EXPORT_SYMBOL_GPL(nfs4_label_alloc); | ||
| 319 | #else | ||
| 320 | void inline nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr, | ||
| 321 | struct nfs4_label *label) | ||
| 322 | { | ||
| 323 | } | ||
| 324 | #endif | ||
| 325 | EXPORT_SYMBOL_GPL(nfs_setsecurity); | ||
| 326 | |||
| 260 | /* | 327 | /* |
| 261 | * This is our front-end to iget that looks up inodes by file handle | 328 | * This is our front-end to iget that looks up inodes by file handle |
| 262 | * instead of inode number. | 329 | * instead of inode number. |
| 263 | */ | 330 | */ |
| 264 | struct inode * | 331 | struct inode * |
| 265 | nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) | 332 | nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, struct nfs4_label *label) |
| 266 | { | 333 | { |
| 267 | struct nfs_find_desc desc = { | 334 | struct nfs_find_desc desc = { |
| 268 | .fh = fh, | 335 | .fh = fh, |
| @@ -384,6 +451,9 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) | |||
| 384 | */ | 451 | */ |
| 385 | inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used); | 452 | inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used); |
| 386 | } | 453 | } |
| 454 | |||
| 455 | nfs_setsecurity(inode, fattr, label); | ||
| 456 | |||
| 387 | nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); | 457 | nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); |
| 388 | nfsi->attrtimeo_timestamp = now; | 458 | nfsi->attrtimeo_timestamp = now; |
| 389 | nfsi->access_cache = RB_ROOT; | 459 | nfsi->access_cache = RB_ROOT; |
| @@ -393,6 +463,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) | |||
| 393 | unlock_new_inode(inode); | 463 | unlock_new_inode(inode); |
| 394 | } else | 464 | } else |
| 395 | nfs_refresh_inode(inode, fattr); | 465 | nfs_refresh_inode(inode, fattr); |
| 466 | nfs_setsecurity(inode, fattr, label); | ||
| 396 | dprintk("NFS: nfs_fhget(%s/%Ld fh_crc=0x%08x ct=%d)\n", | 467 | dprintk("NFS: nfs_fhget(%s/%Ld fh_crc=0x%08x ct=%d)\n", |
| 397 | inode->i_sb->s_id, | 468 | inode->i_sb->s_id, |
| 398 | (long long)NFS_FILEID(inode), | 469 | (long long)NFS_FILEID(inode), |
| @@ -449,7 +520,7 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
| 449 | NFS_PROTO(inode)->return_delegation(inode); | 520 | NFS_PROTO(inode)->return_delegation(inode); |
| 450 | error = NFS_PROTO(inode)->setattr(dentry, fattr, attr); | 521 | error = NFS_PROTO(inode)->setattr(dentry, fattr, attr); |
| 451 | if (error == 0) | 522 | if (error == 0) |
| 452 | nfs_refresh_inode(inode, fattr); | 523 | error = nfs_refresh_inode(inode, fattr); |
| 453 | nfs_free_fattr(fattr); | 524 | nfs_free_fattr(fattr); |
| 454 | out: | 525 | out: |
| 455 | return error; | 526 | return error; |
| @@ -713,16 +784,23 @@ EXPORT_SYMBOL_GPL(put_nfs_open_context); | |||
| 713 | * Ensure that mmap has a recent RPC credential for use when writing out | 784 | * Ensure that mmap has a recent RPC credential for use when writing out |
| 714 | * shared pages | 785 | * shared pages |
| 715 | */ | 786 | */ |
| 716 | void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx) | 787 | void nfs_inode_attach_open_context(struct nfs_open_context *ctx) |
| 717 | { | 788 | { |
| 718 | struct inode *inode = file_inode(filp); | 789 | struct inode *inode = ctx->dentry->d_inode; |
| 719 | struct nfs_inode *nfsi = NFS_I(inode); | 790 | struct nfs_inode *nfsi = NFS_I(inode); |
| 720 | 791 | ||
| 721 | filp->private_data = get_nfs_open_context(ctx); | ||
| 722 | spin_lock(&inode->i_lock); | 792 | spin_lock(&inode->i_lock); |
| 723 | list_add(&ctx->list, &nfsi->open_files); | 793 | list_add(&ctx->list, &nfsi->open_files); |
| 724 | spin_unlock(&inode->i_lock); | 794 | spin_unlock(&inode->i_lock); |
| 725 | } | 795 | } |
| 796 | EXPORT_SYMBOL_GPL(nfs_inode_attach_open_context); | ||
| 797 | |||
| 798 | void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx) | ||
| 799 | { | ||
| 800 | filp->private_data = get_nfs_open_context(ctx); | ||
| 801 | if (list_empty(&ctx->list)) | ||
| 802 | nfs_inode_attach_open_context(ctx); | ||
| 803 | } | ||
| 726 | EXPORT_SYMBOL_GPL(nfs_file_set_open_context); | 804 | EXPORT_SYMBOL_GPL(nfs_file_set_open_context); |
| 727 | 805 | ||
| 728 | /* | 806 | /* |
| @@ -748,10 +826,11 @@ struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_c | |||
| 748 | 826 | ||
| 749 | static void nfs_file_clear_open_context(struct file *filp) | 827 | static void nfs_file_clear_open_context(struct file *filp) |
| 750 | { | 828 | { |
| 751 | struct inode *inode = file_inode(filp); | ||
| 752 | struct nfs_open_context *ctx = nfs_file_open_context(filp); | 829 | struct nfs_open_context *ctx = nfs_file_open_context(filp); |
| 753 | 830 | ||
| 754 | if (ctx) { | 831 | if (ctx) { |
| 832 | struct inode *inode = ctx->dentry->d_inode; | ||
| 833 | |||
| 755 | filp->private_data = NULL; | 834 | filp->private_data = NULL; |
| 756 | spin_lock(&inode->i_lock); | 835 | spin_lock(&inode->i_lock); |
| 757 | list_move_tail(&ctx->list, &NFS_I(inode)->open_files); | 836 | list_move_tail(&ctx->list, &NFS_I(inode)->open_files); |
| @@ -790,6 +869,7 @@ int | |||
| 790 | __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) | 869 | __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) |
| 791 | { | 870 | { |
| 792 | int status = -ESTALE; | 871 | int status = -ESTALE; |
| 872 | struct nfs4_label *label = NULL; | ||
| 793 | struct nfs_fattr *fattr = NULL; | 873 | struct nfs_fattr *fattr = NULL; |
| 794 | struct nfs_inode *nfsi = NFS_I(inode); | 874 | struct nfs_inode *nfsi = NFS_I(inode); |
| 795 | 875 | ||
| @@ -807,7 +887,14 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) | |||
| 807 | goto out; | 887 | goto out; |
| 808 | 888 | ||
| 809 | nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE); | 889 | nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE); |
| 810 | status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), fattr); | 890 | |
| 891 | label = nfs4_label_alloc(NFS_SERVER(inode), GFP_KERNEL); | ||
| 892 | if (IS_ERR(label)) { | ||
| 893 | status = PTR_ERR(label); | ||
| 894 | goto out; | ||
| 895 | } | ||
| 896 | |||
| 897 | status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), fattr, label); | ||
| 811 | if (status != 0) { | 898 | if (status != 0) { |
| 812 | dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n", | 899 | dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n", |
| 813 | inode->i_sb->s_id, | 900 | inode->i_sb->s_id, |
| @@ -817,7 +904,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) | |||
| 817 | if (!S_ISDIR(inode->i_mode)) | 904 | if (!S_ISDIR(inode->i_mode)) |
| 818 | set_bit(NFS_INO_STALE, &NFS_I(inode)->flags); | 905 | set_bit(NFS_INO_STALE, &NFS_I(inode)->flags); |
| 819 | } | 906 | } |
| 820 | goto out; | 907 | goto err_out; |
| 821 | } | 908 | } |
| 822 | 909 | ||
| 823 | status = nfs_refresh_inode(inode, fattr); | 910 | status = nfs_refresh_inode(inode, fattr); |
| @@ -825,7 +912,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) | |||
| 825 | dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) refresh failed, error=%d\n", | 912 | dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) refresh failed, error=%d\n", |
| 826 | inode->i_sb->s_id, | 913 | inode->i_sb->s_id, |
| 827 | (long long)NFS_FILEID(inode), status); | 914 | (long long)NFS_FILEID(inode), status); |
| 828 | goto out; | 915 | goto err_out; |
| 829 | } | 916 | } |
| 830 | 917 | ||
| 831 | if (nfsi->cache_validity & NFS_INO_INVALID_ACL) | 918 | if (nfsi->cache_validity & NFS_INO_INVALID_ACL) |
| @@ -835,7 +922,9 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) | |||
| 835 | inode->i_sb->s_id, | 922 | inode->i_sb->s_id, |
| 836 | (long long)NFS_FILEID(inode)); | 923 | (long long)NFS_FILEID(inode)); |
| 837 | 924 | ||
| 838 | out: | 925 | err_out: |
| 926 | nfs4_label_free(label); | ||
| 927 | out: | ||
| 839 | nfs_free_fattr(fattr); | 928 | nfs_free_fattr(fattr); |
| 840 | return status; | 929 | return status; |
| 841 | } | 930 | } |
| @@ -847,7 +936,7 @@ int nfs_attribute_timeout(struct inode *inode) | |||
| 847 | return !time_in_range_open(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo); | 936 | return !time_in_range_open(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo); |
| 848 | } | 937 | } |
| 849 | 938 | ||
| 850 | static int nfs_attribute_cache_expired(struct inode *inode) | 939 | int nfs_attribute_cache_expired(struct inode *inode) |
| 851 | { | 940 | { |
| 852 | if (nfs_have_delegated_attributes(inode)) | 941 | if (nfs_have_delegated_attributes(inode)) |
| 853 | return 0; | 942 | return 0; |
| @@ -863,7 +952,8 @@ static int nfs_attribute_cache_expired(struct inode *inode) | |||
| 863 | */ | 952 | */ |
| 864 | int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) | 953 | int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) |
| 865 | { | 954 | { |
| 866 | if (!(NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATTR) | 955 | if (!(NFS_I(inode)->cache_validity & |
| 956 | (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL)) | ||
| 867 | && !nfs_attribute_cache_expired(inode)) | 957 | && !nfs_attribute_cache_expired(inode)) |
| 868 | return NFS_STALE(inode) ? -ESTALE : 0; | 958 | return NFS_STALE(inode) ? -ESTALE : 0; |
| 869 | return __nfs_revalidate_inode(server, inode); | 959 | return __nfs_revalidate_inode(server, inode); |
| @@ -1243,6 +1333,7 @@ int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
| 1243 | spin_lock(&inode->i_lock); | 1333 | spin_lock(&inode->i_lock); |
| 1244 | status = nfs_post_op_update_inode_locked(inode, fattr); | 1334 | status = nfs_post_op_update_inode_locked(inode, fattr); |
| 1245 | spin_unlock(&inode->i_lock); | 1335 | spin_unlock(&inode->i_lock); |
| 1336 | |||
| 1246 | return status; | 1337 | return status; |
| 1247 | } | 1338 | } |
| 1248 | EXPORT_SYMBOL_GPL(nfs_post_op_update_inode); | 1339 | EXPORT_SYMBOL_GPL(nfs_post_op_update_inode); |
| @@ -1483,7 +1574,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
| 1483 | inode->i_blocks = fattr->du.nfs2.blocks; | 1574 | inode->i_blocks = fattr->du.nfs2.blocks; |
| 1484 | 1575 | ||
| 1485 | /* Update attrtimeo value if we're out of the unstable period */ | 1576 | /* Update attrtimeo value if we're out of the unstable period */ |
| 1486 | if (invalid & NFS_INO_INVALID_ATTR) { | 1577 | if (invalid & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL)) { |
| 1487 | nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); | 1578 | nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); |
| 1488 | nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); | 1579 | nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); |
| 1489 | nfsi->attrtimeo_timestamp = now; | 1580 | nfsi->attrtimeo_timestamp = now; |
| @@ -1496,6 +1587,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
| 1496 | } | 1587 | } |
| 1497 | } | 1588 | } |
| 1498 | invalid &= ~NFS_INO_INVALID_ATTR; | 1589 | invalid &= ~NFS_INO_INVALID_ATTR; |
| 1590 | invalid &= ~NFS_INO_INVALID_LABEL; | ||
| 1499 | /* Don't invalidate the data if we were to blame */ | 1591 | /* Don't invalidate the data if we were to blame */ |
| 1500 | if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) | 1592 | if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) |
| 1501 | || S_ISLNK(inode->i_mode))) | 1593 | || S_ISLNK(inode->i_mode))) |
| @@ -1638,12 +1730,11 @@ EXPORT_SYMBOL_GPL(nfs_net_id); | |||
| 1638 | static int nfs_net_init(struct net *net) | 1730 | static int nfs_net_init(struct net *net) |
| 1639 | { | 1731 | { |
| 1640 | nfs_clients_init(net); | 1732 | nfs_clients_init(net); |
| 1641 | return nfs_dns_resolver_cache_init(net); | 1733 | return 0; |
| 1642 | } | 1734 | } |
| 1643 | 1735 | ||
| 1644 | static void nfs_net_exit(struct net *net) | 1736 | static void nfs_net_exit(struct net *net) |
| 1645 | { | 1737 | { |
| 1646 | nfs_dns_resolver_cache_destroy(net); | ||
| 1647 | nfs_cleanup_cb_ident_idr(net); | 1738 | nfs_cleanup_cb_ident_idr(net); |
| 1648 | } | 1739 | } |
| 1649 | 1740 | ||
| @@ -1661,10 +1752,6 @@ static int __init init_nfs_fs(void) | |||
| 1661 | { | 1752 | { |
| 1662 | int err; | 1753 | int err; |
| 1663 | 1754 | ||
| 1664 | err = nfs_dns_resolver_init(); | ||
| 1665 | if (err < 0) | ||
| 1666 | goto out10;; | ||
| 1667 | |||
| 1668 | err = register_pernet_subsys(&nfs_net_ops); | 1755 | err = register_pernet_subsys(&nfs_net_ops); |
| 1669 | if (err < 0) | 1756 | if (err < 0) |
| 1670 | goto out9; | 1757 | goto out9; |
| @@ -1730,8 +1817,6 @@ out7: | |||
| 1730 | out8: | 1817 | out8: |
| 1731 | unregister_pernet_subsys(&nfs_net_ops); | 1818 | unregister_pernet_subsys(&nfs_net_ops); |
| 1732 | out9: | 1819 | out9: |
| 1733 | nfs_dns_resolver_destroy(); | ||
| 1734 | out10: | ||
| 1735 | return err; | 1820 | return err; |
| 1736 | } | 1821 | } |
| 1737 | 1822 | ||
| @@ -1744,7 +1829,6 @@ static void __exit exit_nfs_fs(void) | |||
| 1744 | nfs_destroy_nfspagecache(); | 1829 | nfs_destroy_nfspagecache(); |
| 1745 | nfs_fscache_unregister(); | 1830 | nfs_fscache_unregister(); |
| 1746 | unregister_pernet_subsys(&nfs_net_ops); | 1831 | unregister_pernet_subsys(&nfs_net_ops); |
| 1747 | nfs_dns_resolver_destroy(); | ||
| 1748 | #ifdef CONFIG_PROC_FS | 1832 | #ifdef CONFIG_PROC_FS |
| 1749 | rpc_proc_unregister(&init_net, "nfs"); | 1833 | rpc_proc_unregister(&init_net, "nfs"); |
| 1750 | #endif | 1834 | #endif |
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 91e59a39fc08..3c8373f90ab3 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h | |||
| @@ -165,7 +165,7 @@ extern void nfs_free_client(struct nfs_client *); | |||
| 165 | extern struct nfs_client *nfs4_find_client_ident(struct net *, int); | 165 | extern struct nfs_client *nfs4_find_client_ident(struct net *, int); |
| 166 | extern struct nfs_client * | 166 | extern struct nfs_client * |
| 167 | nfs4_find_client_sessionid(struct net *, const struct sockaddr *, | 167 | nfs4_find_client_sessionid(struct net *, const struct sockaddr *, |
| 168 | struct nfs4_sessionid *); | 168 | struct nfs4_sessionid *, u32); |
| 169 | extern struct nfs_server *nfs_create_server(struct nfs_mount_info *, | 169 | extern struct nfs_server *nfs_create_server(struct nfs_mount_info *, |
| 170 | struct nfs_subversion *); | 170 | struct nfs_subversion *); |
| 171 | extern struct nfs_server *nfs4_create_server( | 171 | extern struct nfs_server *nfs4_create_server( |
| @@ -255,6 +255,7 @@ extern int nfs4_decode_dirent(struct xdr_stream *, | |||
| 255 | #ifdef CONFIG_NFS_V4_1 | 255 | #ifdef CONFIG_NFS_V4_1 |
| 256 | extern const u32 nfs41_maxread_overhead; | 256 | extern const u32 nfs41_maxread_overhead; |
| 257 | extern const u32 nfs41_maxwrite_overhead; | 257 | extern const u32 nfs41_maxwrite_overhead; |
| 258 | extern const u32 nfs41_maxgetdevinfo_overhead; | ||
| 258 | #endif | 259 | #endif |
| 259 | 260 | ||
| 260 | /* nfs4proc.c */ | 261 | /* nfs4proc.c */ |
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 91a6faf811ac..99a45283b9ee 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c | |||
| @@ -139,7 +139,10 @@ struct mnt_fhstatus { | |||
| 139 | * nfs_mount - Obtain an NFS file handle for the given host and path | 139 | * nfs_mount - Obtain an NFS file handle for the given host and path |
| 140 | * @info: pointer to mount request arguments | 140 | * @info: pointer to mount request arguments |
| 141 | * | 141 | * |
| 142 | * Uses default timeout parameters specified by underlying transport. | 142 | * Uses default timeout parameters specified by underlying transport. On |
| 143 | * successful return, the auth_flavs list and auth_flav_len will be populated | ||
| 144 | * with the list from the server or a faked-up list if the server didn't | ||
| 145 | * provide one. | ||
| 143 | */ | 146 | */ |
| 144 | int nfs_mount(struct nfs_mount_request *info) | 147 | int nfs_mount(struct nfs_mount_request *info) |
| 145 | { | 148 | { |
| @@ -195,6 +198,15 @@ int nfs_mount(struct nfs_mount_request *info) | |||
| 195 | dprintk("NFS: MNT request succeeded\n"); | 198 | dprintk("NFS: MNT request succeeded\n"); |
| 196 | status = 0; | 199 | status = 0; |
| 197 | 200 | ||
| 201 | /* | ||
| 202 | * If the server didn't provide a flavor list, allow the | ||
| 203 | * client to try any flavor. | ||
| 204 | */ | ||
| 205 | if (info->version != NFS_MNT3_VERSION || *info->auth_flav_len == 0) { | ||
| 206 | dprintk("NFS: Faking up auth_flavs list\n"); | ||
| 207 | info->auth_flavs[0] = RPC_AUTH_NULL; | ||
| 208 | *info->auth_flav_len = 1; | ||
| 209 | } | ||
| 198 | out: | 210 | out: |
| 199 | return status; | 211 | return status; |
| 200 | 212 | ||
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index fc8dc20fdeb9..348b535cd786 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c | |||
| @@ -280,7 +280,7 @@ struct vfsmount *nfs_submount(struct nfs_server *server, struct dentry *dentry, | |||
| 280 | struct dentry *parent = dget_parent(dentry); | 280 | struct dentry *parent = dget_parent(dentry); |
| 281 | 281 | ||
| 282 | /* Look it up again to get its attributes */ | 282 | /* Look it up again to get its attributes */ |
| 283 | err = server->nfs_client->rpc_ops->lookup(parent->d_inode, &dentry->d_name, fh, fattr); | 283 | err = server->nfs_client->rpc_ops->lookup(parent->d_inode, &dentry->d_name, fh, fattr, NULL); |
| 284 | dput(parent); | 284 | dput(parent); |
| 285 | if (err != 0) | 285 | if (err != 0) |
| 286 | return ERR_PTR(err); | 286 | return ERR_PTR(err); |
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 43ea96ced28c..f5c84c3efbca 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c | |||
| @@ -33,7 +33,7 @@ nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) | |||
| 33 | res = rpc_call_sync(clnt, msg, flags); | 33 | res = rpc_call_sync(clnt, msg, flags); |
| 34 | if (res != -EJUKEBOX) | 34 | if (res != -EJUKEBOX) |
| 35 | break; | 35 | break; |
| 36 | freezable_schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME); | 36 | freezable_schedule_timeout_killable_unsafe(NFS_JUKEBOX_RETRY_TIME); |
| 37 | res = -ERESTARTSYS; | 37 | res = -ERESTARTSYS; |
| 38 | } while (!fatal_signal_pending(current)); | 38 | } while (!fatal_signal_pending(current)); |
| 39 | return res; | 39 | return res; |
| @@ -98,7 +98,7 @@ nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, | |||
| 98 | */ | 98 | */ |
| 99 | static int | 99 | static int |
| 100 | nfs3_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, | 100 | nfs3_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, |
| 101 | struct nfs_fattr *fattr) | 101 | struct nfs_fattr *fattr, struct nfs4_label *label) |
| 102 | { | 102 | { |
| 103 | struct rpc_message msg = { | 103 | struct rpc_message msg = { |
| 104 | .rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR], | 104 | .rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR], |
| @@ -143,7 +143,8 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, | |||
| 143 | 143 | ||
| 144 | static int | 144 | static int |
| 145 | nfs3_proc_lookup(struct inode *dir, struct qstr *name, | 145 | nfs3_proc_lookup(struct inode *dir, struct qstr *name, |
| 146 | struct nfs_fh *fhandle, struct nfs_fattr *fattr) | 146 | struct nfs_fh *fhandle, struct nfs_fattr *fattr, |
| 147 | struct nfs4_label *label) | ||
| 147 | { | 148 | { |
| 148 | struct nfs3_diropargs arg = { | 149 | struct nfs3_diropargs arg = { |
| 149 | .fh = NFS_FH(dir), | 150 | .fh = NFS_FH(dir), |
| @@ -300,7 +301,7 @@ static int nfs3_do_create(struct inode *dir, struct dentry *dentry, struct nfs3_ | |||
| 300 | status = rpc_call_sync(NFS_CLIENT(dir), &data->msg, 0); | 301 | status = rpc_call_sync(NFS_CLIENT(dir), &data->msg, 0); |
| 301 | nfs_post_op_update_inode(dir, data->res.dir_attr); | 302 | nfs_post_op_update_inode(dir, data->res.dir_attr); |
| 302 | if (status == 0) | 303 | if (status == 0) |
| 303 | status = nfs_instantiate(dentry, data->res.fh, data->res.fattr); | 304 | status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, NULL); |
| 304 | return status; | 305 | return status; |
| 305 | } | 306 | } |
| 306 | 307 | ||
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index a1dd768d0a35..ee81e354bce7 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h | |||
| @@ -194,7 +194,7 @@ struct nfs4_state_recovery_ops { | |||
| 194 | int (*recover_lock)(struct nfs4_state *, struct file_lock *); | 194 | int (*recover_lock)(struct nfs4_state *, struct file_lock *); |
| 195 | int (*establish_clid)(struct nfs_client *, struct rpc_cred *); | 195 | int (*establish_clid)(struct nfs_client *, struct rpc_cred *); |
| 196 | struct rpc_cred * (*get_clid_cred)(struct nfs_client *); | 196 | struct rpc_cred * (*get_clid_cred)(struct nfs_client *); |
| 197 | int (*reclaim_complete)(struct nfs_client *); | 197 | int (*reclaim_complete)(struct nfs_client *, struct rpc_cred *); |
| 198 | int (*detect_trunking)(struct nfs_client *, struct nfs_client **, | 198 | int (*detect_trunking)(struct nfs_client *, struct nfs_client **, |
| 199 | struct rpc_cred *); | 199 | struct rpc_cred *); |
| 200 | }; | 200 | }; |
| @@ -303,10 +303,10 @@ is_ds_client(struct nfs_client *clp) | |||
| 303 | extern const struct nfs4_minor_version_ops *nfs_v4_minor_ops[]; | 303 | extern const struct nfs4_minor_version_ops *nfs_v4_minor_ops[]; |
| 304 | 304 | ||
| 305 | extern const u32 nfs4_fattr_bitmap[3]; | 305 | extern const u32 nfs4_fattr_bitmap[3]; |
| 306 | extern const u32 nfs4_statfs_bitmap[2]; | 306 | extern const u32 nfs4_statfs_bitmap[3]; |
| 307 | extern const u32 nfs4_pathconf_bitmap[2]; | 307 | extern const u32 nfs4_pathconf_bitmap[3]; |
| 308 | extern const u32 nfs4_fsinfo_bitmap[3]; | 308 | extern const u32 nfs4_fsinfo_bitmap[3]; |
| 309 | extern const u32 nfs4_fs_locations_bitmap[2]; | 309 | extern const u32 nfs4_fs_locations_bitmap[3]; |
| 310 | 310 | ||
| 311 | void nfs4_free_client(struct nfs_client *); | 311 | void nfs4_free_client(struct nfs_client *); |
| 312 | 312 | ||
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 4cbad5d6b276..90dce91dd5b5 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c | |||
| @@ -66,6 +66,11 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init) | |||
| 66 | if (err) | 66 | if (err) |
| 67 | goto error; | 67 | goto error; |
| 68 | 68 | ||
| 69 | if (cl_init->minorversion > NFS4_MAX_MINOR_VERSION) { | ||
| 70 | err = -EINVAL; | ||
| 71 | goto error; | ||
| 72 | } | ||
| 73 | |||
| 69 | spin_lock_init(&clp->cl_lock); | 74 | spin_lock_init(&clp->cl_lock); |
| 70 | INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state); | 75 | INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state); |
| 71 | rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client"); | 76 | rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client"); |
| @@ -562,14 +567,14 @@ static bool nfs4_cb_match_client(const struct sockaddr *addr, | |||
| 562 | */ | 567 | */ |
| 563 | struct nfs_client * | 568 | struct nfs_client * |
| 564 | nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr, | 569 | nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr, |
| 565 | struct nfs4_sessionid *sid) | 570 | struct nfs4_sessionid *sid, u32 minorversion) |
| 566 | { | 571 | { |
| 567 | struct nfs_client *clp; | 572 | struct nfs_client *clp; |
| 568 | struct nfs_net *nn = net_generic(net, nfs_net_id); | 573 | struct nfs_net *nn = net_generic(net, nfs_net_id); |
| 569 | 574 | ||
| 570 | spin_lock(&nn->nfs_client_lock); | 575 | spin_lock(&nn->nfs_client_lock); |
| 571 | list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) { | 576 | list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) { |
| 572 | if (nfs4_cb_match_client(addr, clp, 1) == false) | 577 | if (nfs4_cb_match_client(addr, clp, minorversion) == false) |
| 573 | continue; | 578 | continue; |
| 574 | 579 | ||
| 575 | if (!nfs4_has_session(clp)) | 580 | if (!nfs4_has_session(clp)) |
| @@ -592,7 +597,7 @@ nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr, | |||
| 592 | 597 | ||
| 593 | struct nfs_client * | 598 | struct nfs_client * |
| 594 | nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr, | 599 | nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr, |
| 595 | struct nfs4_sessionid *sid) | 600 | struct nfs4_sessionid *sid, u32 minorversion) |
| 596 | { | 601 | { |
| 597 | return NULL; | 602 | return NULL; |
| 598 | } | 603 | } |
| @@ -626,6 +631,8 @@ static int nfs4_set_client(struct nfs_server *server, | |||
| 626 | 631 | ||
| 627 | if (server->flags & NFS_MOUNT_NORESVPORT) | 632 | if (server->flags & NFS_MOUNT_NORESVPORT) |
| 628 | set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); | 633 | set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); |
| 634 | if (server->options & NFS_OPTION_MIGRATION) | ||
| 635 | set_bit(NFS_CS_MIGRATION, &cl_init.init_flags); | ||
| 629 | 636 | ||
| 630 | /* Allocate or find a client reference we can use */ | 637 | /* Allocate or find a client reference we can use */ |
| 631 | clp = nfs_get_client(&cl_init, timeparms, ip_addr, authflavour); | 638 | clp = nfs_get_client(&cl_init, timeparms, ip_addr, authflavour); |
| @@ -730,7 +737,7 @@ static int nfs4_server_common_setup(struct nfs_server *server, | |||
| 730 | return -ENOMEM; | 737 | return -ENOMEM; |
| 731 | 738 | ||
| 732 | /* We must ensure the session is initialised first */ | 739 | /* We must ensure the session is initialised first */ |
| 733 | error = nfs4_init_session(server); | 740 | error = nfs4_init_session(server->nfs_client); |
| 734 | if (error < 0) | 741 | if (error < 0) |
| 735 | goto out; | 742 | goto out; |
| 736 | 743 | ||
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 13e6bb3e3fe5..e5b804dd944c 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c | |||
| @@ -69,7 +69,6 @@ nfs4_file_open(struct inode *inode, struct file *filp) | |||
| 69 | goto out_drop; | 69 | goto out_drop; |
| 70 | } | 70 | } |
| 71 | } | 71 | } |
| 72 | iput(inode); | ||
| 73 | if (inode != dentry->d_inode) | 72 | if (inode != dentry->d_inode) |
| 74 | goto out_drop; | 73 | goto out_drop; |
| 75 | 74 | ||
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 22d10623f5ee..17ed87ef9de8 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c | |||
| @@ -643,7 +643,8 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, | |||
| 643 | d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode)->pnfs_curr_ld, | 643 | d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode)->pnfs_curr_ld, |
| 644 | NFS_SERVER(lo->plh_inode)->nfs_client, id); | 644 | NFS_SERVER(lo->plh_inode)->nfs_client, id); |
| 645 | if (d == NULL) { | 645 | if (d == NULL) { |
| 646 | dsaddr = filelayout_get_device_info(lo->plh_inode, id, gfp_flags); | 646 | dsaddr = filelayout_get_device_info(lo->plh_inode, id, |
| 647 | lo->plh_lc_cred, gfp_flags); | ||
| 647 | if (dsaddr == NULL) | 648 | if (dsaddr == NULL) |
| 648 | goto out; | 649 | goto out; |
| 649 | } else | 650 | } else |
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h index 235ff952d3c8..cebd20e7e923 100644 --- a/fs/nfs/nfs4filelayout.h +++ b/fs/nfs/nfs4filelayout.h | |||
| @@ -150,6 +150,7 @@ struct nfs4_pnfs_ds *nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, | |||
| 150 | extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); | 150 | extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); |
| 151 | extern void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); | 151 | extern void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); |
| 152 | struct nfs4_file_layout_dsaddr * | 152 | struct nfs4_file_layout_dsaddr * |
| 153 | filelayout_get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags); | 153 | filelayout_get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, |
| 154 | struct rpc_cred *cred, gfp_t gfp_flags); | ||
| 154 | 155 | ||
| 155 | #endif /* FS_NFS_NFS4FILELAYOUT_H */ | 156 | #endif /* FS_NFS_NFS4FILELAYOUT_H */ |
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index 661a0f611215..95604f64cab8 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c | |||
| @@ -668,7 +668,10 @@ decode_and_add_device(struct inode *inode, struct pnfs_device *dev, gfp_t gfp_fl | |||
| 668 | * of available devices, and return it. | 668 | * of available devices, and return it. |
| 669 | */ | 669 | */ |
| 670 | struct nfs4_file_layout_dsaddr * | 670 | struct nfs4_file_layout_dsaddr * |
| 671 | filelayout_get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags) | 671 | filelayout_get_device_info(struct inode *inode, |
| 672 | struct nfs4_deviceid *dev_id, | ||
| 673 | struct rpc_cred *cred, | ||
| 674 | gfp_t gfp_flags) | ||
| 672 | { | 675 | { |
| 673 | struct pnfs_device *pdev = NULL; | 676 | struct pnfs_device *pdev = NULL; |
| 674 | u32 max_resp_sz; | 677 | u32 max_resp_sz; |
| @@ -708,8 +711,9 @@ filelayout_get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gf | |||
| 708 | pdev->pgbase = 0; | 711 | pdev->pgbase = 0; |
| 709 | pdev->pglen = max_resp_sz; | 712 | pdev->pglen = max_resp_sz; |
| 710 | pdev->mincount = 0; | 713 | pdev->mincount = 0; |
| 714 | pdev->maxcount = max_resp_sz - nfs41_maxgetdevinfo_overhead; | ||
| 711 | 715 | ||
| 712 | rc = nfs4_proc_getdeviceinfo(server, pdev); | 716 | rc = nfs4_proc_getdeviceinfo(server, pdev, cred); |
| 713 | dprintk("%s getdevice info returns %d\n", __func__, rc); | 717 | dprintk("%s getdevice info returns %d\n", __func__, rc); |
| 714 | if (rc) | 718 | if (rc) |
| 715 | goto out_free; | 719 | goto out_free; |
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d7ba5616989c..cf11799297c4 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
| @@ -77,15 +77,68 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data); | |||
| 77 | static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); | 77 | static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); |
| 78 | static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *); | 78 | static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *); |
| 79 | static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr); | 79 | static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr); |
| 80 | static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *); | 80 | static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, struct nfs4_label *label); |
| 81 | static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr); | 81 | static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label); |
| 82 | static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, | 82 | static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, |
| 83 | struct nfs_fattr *fattr, struct iattr *sattr, | 83 | struct nfs_fattr *fattr, struct iattr *sattr, |
| 84 | struct nfs4_state *state); | 84 | struct nfs4_state *state, struct nfs4_label *ilabel, |
| 85 | struct nfs4_label *olabel); | ||
| 85 | #ifdef CONFIG_NFS_V4_1 | 86 | #ifdef CONFIG_NFS_V4_1 |
| 86 | static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *); | 87 | static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *, |
| 87 | static int nfs41_free_stateid(struct nfs_server *, nfs4_stateid *); | 88 | struct rpc_cred *); |
| 89 | static int nfs41_free_stateid(struct nfs_server *, nfs4_stateid *, | ||
| 90 | struct rpc_cred *); | ||
| 88 | #endif | 91 | #endif |
| 92 | |||
| 93 | #ifdef CONFIG_NFS_V4_SECURITY_LABEL | ||
| 94 | static inline struct nfs4_label * | ||
| 95 | nfs4_label_init_security(struct inode *dir, struct dentry *dentry, | ||
| 96 | struct iattr *sattr, struct nfs4_label *label) | ||
| 97 | { | ||
| 98 | int err; | ||
| 99 | |||
| 100 | if (label == NULL) | ||
| 101 | return NULL; | ||
| 102 | |||
| 103 | if (nfs_server_capable(dir, NFS_CAP_SECURITY_LABEL) == 0) | ||
| 104 | return NULL; | ||
| 105 | |||
| 106 | if (NFS_SERVER(dir)->nfs_client->cl_minorversion < 2) | ||
| 107 | return NULL; | ||
| 108 | |||
| 109 | err = security_dentry_init_security(dentry, sattr->ia_mode, | ||
| 110 | &dentry->d_name, (void **)&label->label, &label->len); | ||
| 111 | if (err == 0) | ||
| 112 | return label; | ||
| 113 | |||
| 114 | return NULL; | ||
| 115 | } | ||
| 116 | static inline void | ||
| 117 | nfs4_label_release_security(struct nfs4_label *label) | ||
| 118 | { | ||
| 119 | if (label) | ||
| 120 | security_release_secctx(label->label, label->len); | ||
| 121 | } | ||
| 122 | static inline u32 *nfs4_bitmask(struct nfs_server *server, struct nfs4_label *label) | ||
| 123 | { | ||
| 124 | if (label) | ||
| 125 | return server->attr_bitmask; | ||
| 126 | |||
| 127 | return server->attr_bitmask_nl; | ||
| 128 | } | ||
| 129 | #else | ||
| 130 | static inline struct nfs4_label * | ||
| 131 | nfs4_label_init_security(struct inode *dir, struct dentry *dentry, | ||
| 132 | struct iattr *sattr, struct nfs4_label *l) | ||
| 133 | { return NULL; } | ||
| 134 | static inline void | ||
| 135 | nfs4_label_release_security(struct nfs4_label *label) | ||
| 136 | { return; } | ||
| 137 | static inline u32 * | ||
| 138 | nfs4_bitmask(struct nfs_server *server, struct nfs4_label *label) | ||
| 139 | { return server->attr_bitmask; } | ||
| 140 | #endif | ||
| 141 | |||
| 89 | /* Prevent leaks of NFSv4 errors into userland */ | 142 | /* Prevent leaks of NFSv4 errors into userland */ |
| 90 | static int nfs4_map_errors(int err) | 143 | static int nfs4_map_errors(int err) |
| 91 | { | 144 | { |
| @@ -134,7 +187,10 @@ const u32 nfs4_fattr_bitmap[3] = { | |||
| 134 | | FATTR4_WORD1_SPACE_USED | 187 | | FATTR4_WORD1_SPACE_USED |
| 135 | | FATTR4_WORD1_TIME_ACCESS | 188 | | FATTR4_WORD1_TIME_ACCESS |
| 136 | | FATTR4_WORD1_TIME_METADATA | 189 | | FATTR4_WORD1_TIME_METADATA |
| 137 | | FATTR4_WORD1_TIME_MODIFY | 190 | | FATTR4_WORD1_TIME_MODIFY, |
| 191 | #ifdef CONFIG_NFS_V4_SECURITY_LABEL | ||
| 192 | FATTR4_WORD2_SECURITY_LABEL | ||
| 193 | #endif | ||
| 138 | }; | 194 | }; |
| 139 | 195 | ||
| 140 | static const u32 nfs4_pnfs_open_bitmap[3] = { | 196 | static const u32 nfs4_pnfs_open_bitmap[3] = { |
| @@ -161,7 +217,7 @@ static const u32 nfs4_open_noattr_bitmap[3] = { | |||
| 161 | | FATTR4_WORD0_FILEID, | 217 | | FATTR4_WORD0_FILEID, |
| 162 | }; | 218 | }; |
| 163 | 219 | ||
| 164 | const u32 nfs4_statfs_bitmap[2] = { | 220 | const u32 nfs4_statfs_bitmap[3] = { |
| 165 | FATTR4_WORD0_FILES_AVAIL | 221 | FATTR4_WORD0_FILES_AVAIL |
| 166 | | FATTR4_WORD0_FILES_FREE | 222 | | FATTR4_WORD0_FILES_FREE |
| 167 | | FATTR4_WORD0_FILES_TOTAL, | 223 | | FATTR4_WORD0_FILES_TOTAL, |
| @@ -170,7 +226,7 @@ const u32 nfs4_statfs_bitmap[2] = { | |||
| 170 | | FATTR4_WORD1_SPACE_TOTAL | 226 | | FATTR4_WORD1_SPACE_TOTAL |
| 171 | }; | 227 | }; |
| 172 | 228 | ||
| 173 | const u32 nfs4_pathconf_bitmap[2] = { | 229 | const u32 nfs4_pathconf_bitmap[3] = { |
| 174 | FATTR4_WORD0_MAXLINK | 230 | FATTR4_WORD0_MAXLINK |
| 175 | | FATTR4_WORD0_MAXNAME, | 231 | | FATTR4_WORD0_MAXNAME, |
| 176 | 0 | 232 | 0 |
| @@ -185,7 +241,7 @@ const u32 nfs4_fsinfo_bitmap[3] = { FATTR4_WORD0_MAXFILESIZE | |||
| 185 | FATTR4_WORD2_LAYOUT_BLKSIZE | 241 | FATTR4_WORD2_LAYOUT_BLKSIZE |
| 186 | }; | 242 | }; |
| 187 | 243 | ||
| 188 | const u32 nfs4_fs_locations_bitmap[2] = { | 244 | const u32 nfs4_fs_locations_bitmap[3] = { |
| 189 | FATTR4_WORD0_TYPE | 245 | FATTR4_WORD0_TYPE |
| 190 | | FATTR4_WORD0_CHANGE | 246 | | FATTR4_WORD0_CHANGE |
| 191 | | FATTR4_WORD0_SIZE | 247 | | FATTR4_WORD0_SIZE |
| @@ -201,7 +257,7 @@ const u32 nfs4_fs_locations_bitmap[2] = { | |||
| 201 | | FATTR4_WORD1_TIME_ACCESS | 257 | | FATTR4_WORD1_TIME_ACCESS |
| 202 | | FATTR4_WORD1_TIME_METADATA | 258 | | FATTR4_WORD1_TIME_METADATA |
| 203 | | FATTR4_WORD1_TIME_MODIFY | 259 | | FATTR4_WORD1_TIME_MODIFY |
| 204 | | FATTR4_WORD1_MOUNTED_ON_FILEID | 260 | | FATTR4_WORD1_MOUNTED_ON_FILEID, |
| 205 | }; | 261 | }; |
| 206 | 262 | ||
| 207 | static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dentry, | 263 | static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dentry, |
| @@ -268,7 +324,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout) | |||
| 268 | *timeout = NFS4_POLL_RETRY_MIN; | 324 | *timeout = NFS4_POLL_RETRY_MIN; |
| 269 | if (*timeout > NFS4_POLL_RETRY_MAX) | 325 | if (*timeout > NFS4_POLL_RETRY_MAX) |
| 270 | *timeout = NFS4_POLL_RETRY_MAX; | 326 | *timeout = NFS4_POLL_RETRY_MAX; |
| 271 | freezable_schedule_timeout_killable(*timeout); | 327 | freezable_schedule_timeout_killable_unsafe(*timeout); |
| 272 | if (fatal_signal_pending(current)) | 328 | if (fatal_signal_pending(current)) |
| 273 | res = -ERESTARTSYS; | 329 | res = -ERESTARTSYS; |
| 274 | *timeout <<= 1; | 330 | *timeout <<= 1; |
| @@ -762,6 +818,7 @@ struct nfs4_opendata { | |||
| 762 | struct nfs4_string owner_name; | 818 | struct nfs4_string owner_name; |
| 763 | struct nfs4_string group_name; | 819 | struct nfs4_string group_name; |
| 764 | struct nfs_fattr f_attr; | 820 | struct nfs_fattr f_attr; |
| 821 | struct nfs4_label *f_label; | ||
| 765 | struct dentry *dir; | 822 | struct dentry *dir; |
| 766 | struct dentry *dentry; | 823 | struct dentry *dentry; |
| 767 | struct nfs4_state_owner *owner; | 824 | struct nfs4_state_owner *owner; |
| @@ -807,6 +864,7 @@ nfs4_map_atomic_open_claim(struct nfs_server *server, | |||
| 807 | static void nfs4_init_opendata_res(struct nfs4_opendata *p) | 864 | static void nfs4_init_opendata_res(struct nfs4_opendata *p) |
| 808 | { | 865 | { |
| 809 | p->o_res.f_attr = &p->f_attr; | 866 | p->o_res.f_attr = &p->f_attr; |
| 867 | p->o_res.f_label = p->f_label; | ||
| 810 | p->o_res.seqid = p->o_arg.seqid; | 868 | p->o_res.seqid = p->o_arg.seqid; |
| 811 | p->c_res.seqid = p->c_arg.seqid; | 869 | p->c_res.seqid = p->c_arg.seqid; |
| 812 | p->o_res.server = p->o_arg.server; | 870 | p->o_res.server = p->o_arg.server; |
| @@ -818,6 +876,7 @@ static void nfs4_init_opendata_res(struct nfs4_opendata *p) | |||
| 818 | static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, | 876 | static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, |
| 819 | struct nfs4_state_owner *sp, fmode_t fmode, int flags, | 877 | struct nfs4_state_owner *sp, fmode_t fmode, int flags, |
| 820 | const struct iattr *attrs, | 878 | const struct iattr *attrs, |
| 879 | struct nfs4_label *label, | ||
| 821 | enum open_claim_type4 claim, | 880 | enum open_claim_type4 claim, |
| 822 | gfp_t gfp_mask) | 881 | gfp_t gfp_mask) |
| 823 | { | 882 | { |
| @@ -829,9 +888,14 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, | |||
| 829 | p = kzalloc(sizeof(*p), gfp_mask); | 888 | p = kzalloc(sizeof(*p), gfp_mask); |
| 830 | if (p == NULL) | 889 | if (p == NULL) |
| 831 | goto err; | 890 | goto err; |
| 891 | |||
| 892 | p->f_label = nfs4_label_alloc(server, gfp_mask); | ||
| 893 | if (IS_ERR(p->f_label)) | ||
| 894 | goto err_free_p; | ||
| 895 | |||
| 832 | p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid, gfp_mask); | 896 | p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid, gfp_mask); |
| 833 | if (p->o_arg.seqid == NULL) | 897 | if (p->o_arg.seqid == NULL) |
| 834 | goto err_free; | 898 | goto err_free_label; |
| 835 | nfs_sb_active(dentry->d_sb); | 899 | nfs_sb_active(dentry->d_sb); |
| 836 | p->dentry = dget(dentry); | 900 | p->dentry = dget(dentry); |
| 837 | p->dir = parent; | 901 | p->dir = parent; |
| @@ -852,8 +916,9 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, | |||
| 852 | p->o_arg.id.uniquifier = sp->so_seqid.owner_id; | 916 | p->o_arg.id.uniquifier = sp->so_seqid.owner_id; |
| 853 | p->o_arg.name = &dentry->d_name; | 917 | p->o_arg.name = &dentry->d_name; |
| 854 | p->o_arg.server = server; | 918 | p->o_arg.server = server; |
| 855 | p->o_arg.bitmask = server->attr_bitmask; | 919 | p->o_arg.bitmask = nfs4_bitmask(server, label); |
| 856 | p->o_arg.open_bitmap = &nfs4_fattr_bitmap[0]; | 920 | p->o_arg.open_bitmap = &nfs4_fattr_bitmap[0]; |
| 921 | p->o_arg.label = label; | ||
| 857 | p->o_arg.claim = nfs4_map_atomic_open_claim(server, claim); | 922 | p->o_arg.claim = nfs4_map_atomic_open_claim(server, claim); |
| 858 | switch (p->o_arg.claim) { | 923 | switch (p->o_arg.claim) { |
| 859 | case NFS4_OPEN_CLAIM_NULL: | 924 | case NFS4_OPEN_CLAIM_NULL: |
| @@ -884,7 +949,10 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, | |||
| 884 | nfs4_init_opendata_res(p); | 949 | nfs4_init_opendata_res(p); |
| 885 | kref_init(&p->kref); | 950 | kref_init(&p->kref); |
| 886 | return p; | 951 | return p; |
| 887 | err_free: | 952 | |
| 953 | err_free_label: | ||
| 954 | nfs4_label_free(p->f_label); | ||
| 955 | err_free_p: | ||
| 888 | kfree(p); | 956 | kfree(p); |
| 889 | err: | 957 | err: |
| 890 | dput(parent); | 958 | dput(parent); |
| @@ -901,6 +969,9 @@ static void nfs4_opendata_free(struct kref *kref) | |||
| 901 | if (p->state != NULL) | 969 | if (p->state != NULL) |
| 902 | nfs4_put_open_state(p->state); | 970 | nfs4_put_open_state(p->state); |
| 903 | nfs4_put_state_owner(p->owner); | 971 | nfs4_put_state_owner(p->owner); |
| 972 | |||
| 973 | nfs4_label_free(p->f_label); | ||
| 974 | |||
| 904 | dput(p->dir); | 975 | dput(p->dir); |
| 905 | dput(p->dentry); | 976 | dput(p->dentry); |
| 906 | nfs_sb_deactive(sb); | 977 | nfs_sb_deactive(sb); |
| @@ -1179,6 +1250,8 @@ _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data) | |||
| 1179 | if (ret) | 1250 | if (ret) |
| 1180 | goto err; | 1251 | goto err; |
| 1181 | 1252 | ||
| 1253 | nfs_setsecurity(inode, &data->f_attr, data->f_label); | ||
| 1254 | |||
| 1182 | if (data->o_res.delegation_type != 0) | 1255 | if (data->o_res.delegation_type != 0) |
| 1183 | nfs4_opendata_check_deleg(data, state); | 1256 | nfs4_opendata_check_deleg(data, state); |
| 1184 | update_open_stateid(state, &data->o_res.stateid, NULL, | 1257 | update_open_stateid(state, &data->o_res.stateid, NULL, |
| @@ -1205,7 +1278,7 @@ _nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data) | |||
| 1205 | ret = -EAGAIN; | 1278 | ret = -EAGAIN; |
| 1206 | if (!(data->f_attr.valid & NFS_ATTR_FATTR)) | 1279 | if (!(data->f_attr.valid & NFS_ATTR_FATTR)) |
| 1207 | goto err; | 1280 | goto err; |
| 1208 | inode = nfs_fhget(data->dir->d_sb, &data->o_res.fh, &data->f_attr); | 1281 | inode = nfs_fhget(data->dir->d_sb, &data->o_res.fh, &data->f_attr, data->f_label); |
| 1209 | ret = PTR_ERR(inode); | 1282 | ret = PTR_ERR(inode); |
| 1210 | if (IS_ERR(inode)) | 1283 | if (IS_ERR(inode)) |
| 1211 | goto err; | 1284 | goto err; |
| @@ -1258,7 +1331,7 @@ static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context | |||
| 1258 | struct nfs4_opendata *opendata; | 1331 | struct nfs4_opendata *opendata; |
| 1259 | 1332 | ||
| 1260 | opendata = nfs4_opendata_alloc(ctx->dentry, state->owner, 0, 0, | 1333 | opendata = nfs4_opendata_alloc(ctx->dentry, state->owner, 0, 0, |
| 1261 | NULL, claim, GFP_NOFS); | 1334 | NULL, NULL, claim, GFP_NOFS); |
| 1262 | if (opendata == NULL) | 1335 | if (opendata == NULL) |
| 1263 | return ERR_PTR(-ENOMEM); | 1336 | return ERR_PTR(-ENOMEM); |
| 1264 | opendata->state = state; | 1337 | opendata->state = state; |
| @@ -1784,7 +1857,7 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) | |||
| 1784 | return status; | 1857 | return status; |
| 1785 | } | 1858 | } |
| 1786 | if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) | 1859 | if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) |
| 1787 | _nfs4_proc_getattr(server, &o_res->fh, o_res->f_attr); | 1860 | _nfs4_proc_getattr(server, &o_res->fh, o_res->f_attr, o_res->f_label); |
| 1788 | return 0; | 1861 | return 0; |
| 1789 | } | 1862 | } |
| 1790 | 1863 | ||
| @@ -1855,18 +1928,30 @@ static void nfs41_clear_delegation_stateid(struct nfs4_state *state) | |||
| 1855 | { | 1928 | { |
| 1856 | struct nfs_server *server = NFS_SERVER(state->inode); | 1929 | struct nfs_server *server = NFS_SERVER(state->inode); |
| 1857 | nfs4_stateid *stateid = &state->stateid; | 1930 | nfs4_stateid *stateid = &state->stateid; |
| 1858 | int status; | 1931 | struct nfs_delegation *delegation; |
| 1932 | struct rpc_cred *cred = NULL; | ||
| 1933 | int status = -NFS4ERR_BAD_STATEID; | ||
| 1859 | 1934 | ||
| 1860 | /* If a state reset has been done, test_stateid is unneeded */ | 1935 | /* If a state reset has been done, test_stateid is unneeded */ |
| 1861 | if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) | 1936 | if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) |
| 1862 | return; | 1937 | return; |
| 1863 | 1938 | ||
| 1864 | status = nfs41_test_stateid(server, stateid); | 1939 | /* Get the delegation credential for use by test/free_stateid */ |
| 1940 | rcu_read_lock(); | ||
| 1941 | delegation = rcu_dereference(NFS_I(state->inode)->delegation); | ||
| 1942 | if (delegation != NULL && | ||
| 1943 | nfs4_stateid_match(&delegation->stateid, stateid)) { | ||
| 1944 | cred = get_rpccred(delegation->cred); | ||
| 1945 | rcu_read_unlock(); | ||
| 1946 | status = nfs41_test_stateid(server, stateid, cred); | ||
| 1947 | } else | ||
| 1948 | rcu_read_unlock(); | ||
| 1949 | |||
| 1865 | if (status != NFS_OK) { | 1950 | if (status != NFS_OK) { |
| 1866 | /* Free the stateid unless the server explicitly | 1951 | /* Free the stateid unless the server explicitly |
| 1867 | * informs us the stateid is unrecognized. */ | 1952 | * informs us the stateid is unrecognized. */ |
| 1868 | if (status != -NFS4ERR_BAD_STATEID) | 1953 | if (status != -NFS4ERR_BAD_STATEID) |
| 1869 | nfs41_free_stateid(server, stateid); | 1954 | nfs41_free_stateid(server, stateid, cred); |
| 1870 | nfs_remove_bad_delegation(state->inode); | 1955 | nfs_remove_bad_delegation(state->inode); |
| 1871 | 1956 | ||
| 1872 | write_seqlock(&state->seqlock); | 1957 | write_seqlock(&state->seqlock); |
| @@ -1874,6 +1959,9 @@ static void nfs41_clear_delegation_stateid(struct nfs4_state *state) | |||
| 1874 | write_sequnlock(&state->seqlock); | 1959 | write_sequnlock(&state->seqlock); |
| 1875 | clear_bit(NFS_DELEGATED_STATE, &state->flags); | 1960 | clear_bit(NFS_DELEGATED_STATE, &state->flags); |
| 1876 | } | 1961 | } |
| 1962 | |||
| 1963 | if (cred != NULL) | ||
| 1964 | put_rpccred(cred); | ||
| 1877 | } | 1965 | } |
| 1878 | 1966 | ||
| 1879 | /** | 1967 | /** |
| @@ -1888,6 +1976,7 @@ static int nfs41_check_open_stateid(struct nfs4_state *state) | |||
| 1888 | { | 1976 | { |
| 1889 | struct nfs_server *server = NFS_SERVER(state->inode); | 1977 | struct nfs_server *server = NFS_SERVER(state->inode); |
| 1890 | nfs4_stateid *stateid = &state->open_stateid; | 1978 | nfs4_stateid *stateid = &state->open_stateid; |
| 1979 | struct rpc_cred *cred = state->owner->so_cred; | ||
| 1891 | int status; | 1980 | int status; |
| 1892 | 1981 | ||
| 1893 | /* If a state reset has been done, test_stateid is unneeded */ | 1982 | /* If a state reset has been done, test_stateid is unneeded */ |
| @@ -1896,12 +1985,12 @@ static int nfs41_check_open_stateid(struct nfs4_state *state) | |||
| 1896 | (test_bit(NFS_O_RDWR_STATE, &state->flags) == 0)) | 1985 | (test_bit(NFS_O_RDWR_STATE, &state->flags) == 0)) |
| 1897 | return -NFS4ERR_BAD_STATEID; | 1986 | return -NFS4ERR_BAD_STATEID; |
| 1898 | 1987 | ||
| 1899 | status = nfs41_test_stateid(server, stateid); | 1988 | status = nfs41_test_stateid(server, stateid, cred); |
| 1900 | if (status != NFS_OK) { | 1989 | if (status != NFS_OK) { |
| 1901 | /* Free the stateid unless the server explicitly | 1990 | /* Free the stateid unless the server explicitly |
| 1902 | * informs us the stateid is unrecognized. */ | 1991 | * informs us the stateid is unrecognized. */ |
| 1903 | if (status != -NFS4ERR_BAD_STATEID) | 1992 | if (status != -NFS4ERR_BAD_STATEID) |
| 1904 | nfs41_free_stateid(server, stateid); | 1993 | nfs41_free_stateid(server, stateid, cred); |
| 1905 | 1994 | ||
| 1906 | clear_bit(NFS_O_RDONLY_STATE, &state->flags); | 1995 | clear_bit(NFS_O_RDONLY_STATE, &state->flags); |
| 1907 | clear_bit(NFS_O_WRONLY_STATE, &state->flags); | 1996 | clear_bit(NFS_O_WRONLY_STATE, &state->flags); |
| @@ -1942,10 +2031,11 @@ static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, struct | |||
| 1942 | static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, | 2031 | static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, |
| 1943 | fmode_t fmode, | 2032 | fmode_t fmode, |
| 1944 | int flags, | 2033 | int flags, |
| 1945 | struct nfs4_state **res) | 2034 | struct nfs_open_context *ctx) |
| 1946 | { | 2035 | { |
| 1947 | struct nfs4_state_owner *sp = opendata->owner; | 2036 | struct nfs4_state_owner *sp = opendata->owner; |
| 1948 | struct nfs_server *server = sp->so_server; | 2037 | struct nfs_server *server = sp->so_server; |
| 2038 | struct dentry *dentry; | ||
| 1949 | struct nfs4_state *state; | 2039 | struct nfs4_state *state; |
| 1950 | unsigned int seq; | 2040 | unsigned int seq; |
| 1951 | int ret; | 2041 | int ret; |
| @@ -1963,13 +2053,31 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, | |||
| 1963 | if (server->caps & NFS_CAP_POSIX_LOCK) | 2053 | if (server->caps & NFS_CAP_POSIX_LOCK) |
| 1964 | set_bit(NFS_STATE_POSIX_LOCKS, &state->flags); | 2054 | set_bit(NFS_STATE_POSIX_LOCKS, &state->flags); |
| 1965 | 2055 | ||
| 2056 | dentry = opendata->dentry; | ||
| 2057 | if (dentry->d_inode == NULL) { | ||
| 2058 | /* FIXME: Is this d_drop() ever needed? */ | ||
| 2059 | d_drop(dentry); | ||
| 2060 | dentry = d_add_unique(dentry, igrab(state->inode)); | ||
| 2061 | if (dentry == NULL) { | ||
| 2062 | dentry = opendata->dentry; | ||
| 2063 | } else if (dentry != ctx->dentry) { | ||
| 2064 | dput(ctx->dentry); | ||
| 2065 | ctx->dentry = dget(dentry); | ||
| 2066 | } | ||
| 2067 | nfs_set_verifier(dentry, | ||
| 2068 | nfs_save_change_attribute(opendata->dir->d_inode)); | ||
| 2069 | } | ||
| 2070 | |||
| 1966 | ret = nfs4_opendata_access(sp->so_cred, opendata, state, fmode, flags); | 2071 | ret = nfs4_opendata_access(sp->so_cred, opendata, state, fmode, flags); |
| 1967 | if (ret != 0) | 2072 | if (ret != 0) |
| 1968 | goto out; | 2073 | goto out; |
| 1969 | 2074 | ||
| 1970 | if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) | 2075 | ctx->state = state; |
| 1971 | nfs4_schedule_stateid_recovery(server, state); | 2076 | if (dentry->d_inode == state->inode) { |
| 1972 | *res = state; | 2077 | nfs_inode_attach_open_context(ctx); |
| 2078 | if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) | ||
| 2079 | nfs4_schedule_stateid_recovery(server, state); | ||
| 2080 | } | ||
| 1973 | out: | 2081 | out: |
| 1974 | return ret; | 2082 | return ret; |
| 1975 | } | 2083 | } |
| @@ -1978,19 +2086,21 @@ out: | |||
| 1978 | * Returns a referenced nfs4_state | 2086 | * Returns a referenced nfs4_state |
| 1979 | */ | 2087 | */ |
| 1980 | static int _nfs4_do_open(struct inode *dir, | 2088 | static int _nfs4_do_open(struct inode *dir, |
| 1981 | struct dentry *dentry, | 2089 | struct nfs_open_context *ctx, |
| 1982 | fmode_t fmode, | ||
| 1983 | int flags, | 2090 | int flags, |
| 1984 | struct iattr *sattr, | 2091 | struct iattr *sattr, |
| 1985 | struct rpc_cred *cred, | 2092 | struct nfs4_label *label) |
| 1986 | struct nfs4_state **res, | ||
| 1987 | struct nfs4_threshold **ctx_th) | ||
| 1988 | { | 2093 | { |
| 1989 | struct nfs4_state_owner *sp; | 2094 | struct nfs4_state_owner *sp; |
| 1990 | struct nfs4_state *state = NULL; | 2095 | struct nfs4_state *state = NULL; |
| 1991 | struct nfs_server *server = NFS_SERVER(dir); | 2096 | struct nfs_server *server = NFS_SERVER(dir); |
| 1992 | struct nfs4_opendata *opendata; | 2097 | struct nfs4_opendata *opendata; |
| 2098 | struct dentry *dentry = ctx->dentry; | ||
| 2099 | struct rpc_cred *cred = ctx->cred; | ||
| 2100 | struct nfs4_threshold **ctx_th = &ctx->mdsthreshold; | ||
| 2101 | fmode_t fmode = ctx->mode & (FMODE_READ|FMODE_WRITE|FMODE_EXEC); | ||
| 1993 | enum open_claim_type4 claim = NFS4_OPEN_CLAIM_NULL; | 2102 | enum open_claim_type4 claim = NFS4_OPEN_CLAIM_NULL; |
| 2103 | struct nfs4_label *olabel = NULL; | ||
| 1994 | int status; | 2104 | int status; |
| 1995 | 2105 | ||
| 1996 | /* Protect against reboot recovery conflicts */ | 2106 | /* Protect against reboot recovery conflicts */ |
| @@ -2009,22 +2119,31 @@ static int _nfs4_do_open(struct inode *dir, | |||
| 2009 | if (dentry->d_inode) | 2119 | if (dentry->d_inode) |
| 2010 | claim = NFS4_OPEN_CLAIM_FH; | 2120 | claim = NFS4_OPEN_CLAIM_FH; |
| 2011 | opendata = nfs4_opendata_alloc(dentry, sp, fmode, flags, sattr, | 2121 | opendata = nfs4_opendata_alloc(dentry, sp, fmode, flags, sattr, |
| 2012 | claim, GFP_KERNEL); | 2122 | label, claim, GFP_KERNEL); |
| 2013 | if (opendata == NULL) | 2123 | if (opendata == NULL) |
| 2014 | goto err_put_state_owner; | 2124 | goto err_put_state_owner; |
| 2015 | 2125 | ||
| 2126 | if (label) { | ||
| 2127 | olabel = nfs4_label_alloc(server, GFP_KERNEL); | ||
| 2128 | if (IS_ERR(olabel)) { | ||
| 2129 | status = PTR_ERR(olabel); | ||
| 2130 | goto err_opendata_put; | ||
| 2131 | } | ||
| 2132 | } | ||
| 2133 | |||
| 2016 | if (ctx_th && server->attr_bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD) { | 2134 | if (ctx_th && server->attr_bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD) { |
| 2017 | opendata->f_attr.mdsthreshold = pnfs_mdsthreshold_alloc(); | 2135 | opendata->f_attr.mdsthreshold = pnfs_mdsthreshold_alloc(); |
| 2018 | if (!opendata->f_attr.mdsthreshold) | 2136 | if (!opendata->f_attr.mdsthreshold) |
| 2019 | goto err_opendata_put; | 2137 | goto err_free_label; |
| 2020 | opendata->o_arg.open_bitmap = &nfs4_pnfs_open_bitmap[0]; | 2138 | opendata->o_arg.open_bitmap = &nfs4_pnfs_open_bitmap[0]; |
| 2021 | } | 2139 | } |
| 2022 | if (dentry->d_inode != NULL) | 2140 | if (dentry->d_inode != NULL) |
| 2023 | opendata->state = nfs4_get_open_state(dentry->d_inode, sp); | 2141 | opendata->state = nfs4_get_open_state(dentry->d_inode, sp); |
| 2024 | 2142 | ||
| 2025 | status = _nfs4_open_and_get_state(opendata, fmode, flags, &state); | 2143 | status = _nfs4_open_and_get_state(opendata, fmode, flags, ctx); |
| 2026 | if (status != 0) | 2144 | if (status != 0) |
| 2027 | goto err_opendata_put; | 2145 | goto err_free_label; |
| 2146 | state = ctx->state; | ||
| 2028 | 2147 | ||
| 2029 | if ((opendata->o_arg.open_flags & O_EXCL) && | 2148 | if ((opendata->o_arg.open_flags & O_EXCL) && |
| 2030 | (opendata->o_arg.createmode != NFS4_CREATE_GUARDED)) { | 2149 | (opendata->o_arg.createmode != NFS4_CREATE_GUARDED)) { |
| @@ -2033,10 +2152,12 @@ static int _nfs4_do_open(struct inode *dir, | |||
| 2033 | nfs_fattr_init(opendata->o_res.f_attr); | 2152 | nfs_fattr_init(opendata->o_res.f_attr); |
| 2034 | status = nfs4_do_setattr(state->inode, cred, | 2153 | status = nfs4_do_setattr(state->inode, cred, |
| 2035 | opendata->o_res.f_attr, sattr, | 2154 | opendata->o_res.f_attr, sattr, |
| 2036 | state); | 2155 | state, label, olabel); |
| 2037 | if (status == 0) | 2156 | if (status == 0) { |
| 2038 | nfs_setattr_update_inode(state->inode, sattr); | 2157 | nfs_setattr_update_inode(state->inode, sattr); |
| 2039 | nfs_post_op_update_inode(state->inode, opendata->o_res.f_attr); | 2158 | nfs_post_op_update_inode(state->inode, opendata->o_res.f_attr); |
| 2159 | nfs_setsecurity(state->inode, opendata->o_res.f_attr, olabel); | ||
| 2160 | } | ||
| 2040 | } | 2161 | } |
| 2041 | 2162 | ||
| 2042 | if (pnfs_use_threshold(ctx_th, opendata->f_attr.mdsthreshold, server)) | 2163 | if (pnfs_use_threshold(ctx_th, opendata->f_attr.mdsthreshold, server)) |
| @@ -2045,38 +2166,37 @@ static int _nfs4_do_open(struct inode *dir, | |||
| 2045 | kfree(opendata->f_attr.mdsthreshold); | 2166 | kfree(opendata->f_attr.mdsthreshold); |
| 2046 | opendata->f_attr.mdsthreshold = NULL; | 2167 | opendata->f_attr.mdsthreshold = NULL; |
| 2047 | 2168 | ||
| 2169 | nfs4_label_free(olabel); | ||
| 2170 | |||
| 2048 | nfs4_opendata_put(opendata); | 2171 | nfs4_opendata_put(opendata); |
| 2049 | nfs4_put_state_owner(sp); | 2172 | nfs4_put_state_owner(sp); |
| 2050 | *res = state; | ||
| 2051 | return 0; | 2173 | return 0; |
| 2174 | err_free_label: | ||
| 2175 | nfs4_label_free(olabel); | ||
| 2052 | err_opendata_put: | 2176 | err_opendata_put: |
| 2053 | kfree(opendata->f_attr.mdsthreshold); | 2177 | kfree(opendata->f_attr.mdsthreshold); |
| 2054 | nfs4_opendata_put(opendata); | 2178 | nfs4_opendata_put(opendata); |
| 2055 | err_put_state_owner: | 2179 | err_put_state_owner: |
| 2056 | nfs4_put_state_owner(sp); | 2180 | nfs4_put_state_owner(sp); |
| 2057 | out_err: | 2181 | out_err: |
| 2058 | *res = NULL; | ||
| 2059 | return status; | 2182 | return status; |
| 2060 | } | 2183 | } |
| 2061 | 2184 | ||
| 2062 | 2185 | ||
| 2063 | static struct nfs4_state *nfs4_do_open(struct inode *dir, | 2186 | static struct nfs4_state *nfs4_do_open(struct inode *dir, |
| 2064 | struct dentry *dentry, | 2187 | struct nfs_open_context *ctx, |
| 2065 | fmode_t fmode, | ||
| 2066 | int flags, | 2188 | int flags, |
| 2067 | struct iattr *sattr, | 2189 | struct iattr *sattr, |
| 2068 | struct rpc_cred *cred, | 2190 | struct nfs4_label *label) |
| 2069 | struct nfs4_threshold **ctx_th) | ||
| 2070 | { | 2191 | { |
| 2071 | struct nfs_server *server = NFS_SERVER(dir); | 2192 | struct nfs_server *server = NFS_SERVER(dir); |
| 2072 | struct nfs4_exception exception = { }; | 2193 | struct nfs4_exception exception = { }; |
| 2073 | struct nfs4_state *res; | 2194 | struct nfs4_state *res; |
| 2074 | int status; | 2195 | int status; |
| 2075 | 2196 | ||
| 2076 | fmode &= FMODE_READ|FMODE_WRITE|FMODE_EXEC; | ||
| 2077 | do { | 2197 | do { |
| 2078 | status = _nfs4_do_open(dir, dentry, fmode, flags, sattr, cred, | 2198 | status = _nfs4_do_open(dir, ctx, flags, sattr, label); |
| 2079 | &res, ctx_th); | 2199 | res = ctx->state; |
| 2080 | if (status == 0) | 2200 | if (status == 0) |
| 2081 | break; | 2201 | break; |
| 2082 | /* NOTE: BAD_SEQID means the server and client disagree about the | 2202 | /* NOTE: BAD_SEQID means the server and client disagree about the |
| @@ -2122,7 +2242,8 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, | |||
| 2122 | 2242 | ||
| 2123 | static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, | 2243 | static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, |
| 2124 | struct nfs_fattr *fattr, struct iattr *sattr, | 2244 | struct nfs_fattr *fattr, struct iattr *sattr, |
| 2125 | struct nfs4_state *state) | 2245 | struct nfs4_state *state, struct nfs4_label *ilabel, |
| 2246 | struct nfs4_label *olabel) | ||
| 2126 | { | 2247 | { |
| 2127 | struct nfs_server *server = NFS_SERVER(inode); | 2248 | struct nfs_server *server = NFS_SERVER(inode); |
| 2128 | struct nfs_setattrargs arg = { | 2249 | struct nfs_setattrargs arg = { |
| @@ -2130,9 +2251,11 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, | |||
| 2130 | .iap = sattr, | 2251 | .iap = sattr, |
| 2131 | .server = server, | 2252 | .server = server, |
| 2132 | .bitmask = server->attr_bitmask, | 2253 | .bitmask = server->attr_bitmask, |
| 2254 | .label = ilabel, | ||
| 2133 | }; | 2255 | }; |
| 2134 | struct nfs_setattrres res = { | 2256 | struct nfs_setattrres res = { |
| 2135 | .fattr = fattr, | 2257 | .fattr = fattr, |
| 2258 | .label = olabel, | ||
| 2136 | .server = server, | 2259 | .server = server, |
| 2137 | }; | 2260 | }; |
| 2138 | struct rpc_message msg = { | 2261 | struct rpc_message msg = { |
| @@ -2146,6 +2269,10 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, | |||
| 2146 | bool truncate; | 2269 | bool truncate; |
| 2147 | int status; | 2270 | int status; |
| 2148 | 2271 | ||
| 2272 | arg.bitmask = nfs4_bitmask(server, ilabel); | ||
| 2273 | if (ilabel) | ||
| 2274 | arg.bitmask = nfs4_bitmask(server, olabel); | ||
| 2275 | |||
| 2149 | nfs_fattr_init(fattr); | 2276 | nfs_fattr_init(fattr); |
| 2150 | 2277 | ||
| 2151 | /* Servers should only apply open mode checks for file size changes */ | 2278 | /* Servers should only apply open mode checks for file size changes */ |
| @@ -2172,7 +2299,8 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, | |||
| 2172 | 2299 | ||
| 2173 | static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, | 2300 | static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, |
| 2174 | struct nfs_fattr *fattr, struct iattr *sattr, | 2301 | struct nfs_fattr *fattr, struct iattr *sattr, |
| 2175 | struct nfs4_state *state) | 2302 | struct nfs4_state *state, struct nfs4_label *ilabel, |
| 2303 | struct nfs4_label *olabel) | ||
| 2176 | { | 2304 | { |
| 2177 | struct nfs_server *server = NFS_SERVER(inode); | 2305 | struct nfs_server *server = NFS_SERVER(inode); |
| 2178 | struct nfs4_exception exception = { | 2306 | struct nfs4_exception exception = { |
| @@ -2181,7 +2309,7 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, | |||
| 2181 | }; | 2309 | }; |
| 2182 | int err; | 2310 | int err; |
| 2183 | do { | 2311 | do { |
| 2184 | err = _nfs4_do_setattr(inode, cred, fattr, sattr, state); | 2312 | err = _nfs4_do_setattr(inode, cred, fattr, sattr, state, ilabel, olabel); |
| 2185 | switch (err) { | 2313 | switch (err) { |
| 2186 | case -NFS4ERR_OPENMODE: | 2314 | case -NFS4ERR_OPENMODE: |
| 2187 | if (!(sattr->ia_valid & ATTR_SIZE)) { | 2315 | if (!(sattr->ia_valid & ATTR_SIZE)) { |
| @@ -2426,14 +2554,18 @@ static struct inode * | |||
| 2426 | nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, int open_flags, struct iattr *attr) | 2554 | nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, int open_flags, struct iattr *attr) |
| 2427 | { | 2555 | { |
| 2428 | struct nfs4_state *state; | 2556 | struct nfs4_state *state; |
| 2557 | struct nfs4_label l = {0, 0, 0, NULL}, *label = NULL; | ||
| 2558 | |||
| 2559 | label = nfs4_label_init_security(dir, ctx->dentry, attr, &l); | ||
| 2429 | 2560 | ||
| 2430 | /* Protect against concurrent sillydeletes */ | 2561 | /* Protect against concurrent sillydeletes */ |
| 2431 | state = nfs4_do_open(dir, ctx->dentry, ctx->mode, open_flags, attr, | 2562 | state = nfs4_do_open(dir, ctx, open_flags, attr, label); |
| 2432 | ctx->cred, &ctx->mdsthreshold); | 2563 | |
| 2564 | nfs4_label_release_security(label); | ||
| 2565 | |||
| 2433 | if (IS_ERR(state)) | 2566 | if (IS_ERR(state)) |
| 2434 | return ERR_CAST(state); | 2567 | return ERR_CAST(state); |
| 2435 | ctx->state = state; | 2568 | return state->inode; |
| 2436 | return igrab(state->inode); | ||
| 2437 | } | 2569 | } |
| 2438 | 2570 | ||
| 2439 | static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync) | 2571 | static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync) |
| @@ -2489,7 +2621,17 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f | |||
| 2489 | server->caps |= NFS_CAP_CTIME; | 2621 | server->caps |= NFS_CAP_CTIME; |
| 2490 | if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_MODIFY) | 2622 | if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_MODIFY) |
| 2491 | server->caps |= NFS_CAP_MTIME; | 2623 | server->caps |= NFS_CAP_MTIME; |
| 2624 | #ifdef CONFIG_NFS_V4_SECURITY_LABEL | ||
| 2625 | if (res.attr_bitmask[2] & FATTR4_WORD2_SECURITY_LABEL) | ||
| 2626 | server->caps |= NFS_CAP_SECURITY_LABEL; | ||
| 2627 | #endif | ||
| 2628 | memcpy(server->attr_bitmask_nl, res.attr_bitmask, | ||
| 2629 | sizeof(server->attr_bitmask)); | ||
| 2492 | 2630 | ||
| 2631 | if (server->caps & NFS_CAP_SECURITY_LABEL) { | ||
| 2632 | server->attr_bitmask_nl[2] &= ~FATTR4_WORD2_SECURITY_LABEL; | ||
| 2633 | res.attr_bitmask[2] &= ~FATTR4_WORD2_SECURITY_LABEL; | ||
| 2634 | } | ||
| 2493 | memcpy(server->cache_consistency_bitmask, res.attr_bitmask, sizeof(server->cache_consistency_bitmask)); | 2635 | memcpy(server->cache_consistency_bitmask, res.attr_bitmask, sizeof(server->cache_consistency_bitmask)); |
| 2494 | server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE; | 2636 | server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE; |
| 2495 | server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY; | 2637 | server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY; |
| @@ -2515,8 +2657,9 @@ int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) | |||
| 2515 | static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, | 2657 | static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, |
| 2516 | struct nfs_fsinfo *info) | 2658 | struct nfs_fsinfo *info) |
| 2517 | { | 2659 | { |
| 2660 | u32 bitmask[3]; | ||
| 2518 | struct nfs4_lookup_root_arg args = { | 2661 | struct nfs4_lookup_root_arg args = { |
| 2519 | .bitmask = nfs4_fattr_bitmap, | 2662 | .bitmask = bitmask, |
| 2520 | }; | 2663 | }; |
| 2521 | struct nfs4_lookup_res res = { | 2664 | struct nfs4_lookup_res res = { |
| 2522 | .server = server, | 2665 | .server = server, |
| @@ -2529,6 +2672,13 @@ static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, | |||
| 2529 | .rpc_resp = &res, | 2672 | .rpc_resp = &res, |
| 2530 | }; | 2673 | }; |
| 2531 | 2674 | ||
| 2675 | bitmask[0] = nfs4_fattr_bitmap[0]; | ||
| 2676 | bitmask[1] = nfs4_fattr_bitmap[1]; | ||
| 2677 | /* | ||
| 2678 | * Process the label in the upcoming getfattr | ||
| 2679 | */ | ||
| 2680 | bitmask[2] = nfs4_fattr_bitmap[2] & ~FATTR4_WORD2_SECURITY_LABEL; | ||
| 2681 | |||
| 2532 | nfs_fattr_init(info->fattr); | 2682 | nfs_fattr_init(info->fattr); |
| 2533 | return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); | 2683 | return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); |
| 2534 | } | 2684 | } |
| @@ -2648,6 +2798,7 @@ static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *mntfh, | |||
| 2648 | { | 2798 | { |
| 2649 | int error; | 2799 | int error; |
| 2650 | struct nfs_fattr *fattr = info->fattr; | 2800 | struct nfs_fattr *fattr = info->fattr; |
| 2801 | struct nfs4_label *label = NULL; | ||
| 2651 | 2802 | ||
| 2652 | error = nfs4_server_capabilities(server, mntfh); | 2803 | error = nfs4_server_capabilities(server, mntfh); |
| 2653 | if (error < 0) { | 2804 | if (error < 0) { |
| @@ -2655,16 +2806,23 @@ static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *mntfh, | |||
| 2655 | return error; | 2806 | return error; |
| 2656 | } | 2807 | } |
| 2657 | 2808 | ||
| 2658 | error = nfs4_proc_getattr(server, mntfh, fattr); | 2809 | label = nfs4_label_alloc(server, GFP_KERNEL); |
| 2810 | if (IS_ERR(label)) | ||
| 2811 | return PTR_ERR(label); | ||
| 2812 | |||
| 2813 | error = nfs4_proc_getattr(server, mntfh, fattr, label); | ||
| 2659 | if (error < 0) { | 2814 | if (error < 0) { |
| 2660 | dprintk("nfs4_get_root: getattr error = %d\n", -error); | 2815 | dprintk("nfs4_get_root: getattr error = %d\n", -error); |
| 2661 | return error; | 2816 | goto err_free_label; |
| 2662 | } | 2817 | } |
| 2663 | 2818 | ||
| 2664 | if (fattr->valid & NFS_ATTR_FATTR_FSID && | 2819 | if (fattr->valid & NFS_ATTR_FATTR_FSID && |
| 2665 | !nfs_fsid_equal(&server->fsid, &fattr->fsid)) | 2820 | !nfs_fsid_equal(&server->fsid, &fattr->fsid)) |
| 2666 | memcpy(&server->fsid, &fattr->fsid, sizeof(server->fsid)); | 2821 | memcpy(&server->fsid, &fattr->fsid, sizeof(server->fsid)); |
| 2667 | 2822 | ||
| 2823 | err_free_label: | ||
| 2824 | nfs4_label_free(label); | ||
| 2825 | |||
| 2668 | return error; | 2826 | return error; |
| 2669 | } | 2827 | } |
| 2670 | 2828 | ||
| @@ -2711,7 +2869,8 @@ out: | |||
| 2711 | return status; | 2869 | return status; |
| 2712 | } | 2870 | } |
| 2713 | 2871 | ||
| 2714 | static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr) | 2872 | static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, |
| 2873 | struct nfs_fattr *fattr, struct nfs4_label *label) | ||
| 2715 | { | 2874 | { |
| 2716 | struct nfs4_getattr_arg args = { | 2875 | struct nfs4_getattr_arg args = { |
| 2717 | .fh = fhandle, | 2876 | .fh = fhandle, |
| @@ -2719,6 +2878,7 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, | |||
| 2719 | }; | 2878 | }; |
| 2720 | struct nfs4_getattr_res res = { | 2879 | struct nfs4_getattr_res res = { |
| 2721 | .fattr = fattr, | 2880 | .fattr = fattr, |
| 2881 | .label = label, | ||
| 2722 | .server = server, | 2882 | .server = server, |
| 2723 | }; | 2883 | }; |
| 2724 | struct rpc_message msg = { | 2884 | struct rpc_message msg = { |
| @@ -2726,18 +2886,21 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, | |||
| 2726 | .rpc_argp = &args, | 2886 | .rpc_argp = &args, |
| 2727 | .rpc_resp = &res, | 2887 | .rpc_resp = &res, |
| 2728 | }; | 2888 | }; |
| 2729 | 2889 | ||
| 2890 | args.bitmask = nfs4_bitmask(server, label); | ||
| 2891 | |||
| 2730 | nfs_fattr_init(fattr); | 2892 | nfs_fattr_init(fattr); |
| 2731 | return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); | 2893 | return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); |
| 2732 | } | 2894 | } |
| 2733 | 2895 | ||
| 2734 | static int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr) | 2896 | static int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, |
| 2897 | struct nfs_fattr *fattr, struct nfs4_label *label) | ||
| 2735 | { | 2898 | { |
| 2736 | struct nfs4_exception exception = { }; | 2899 | struct nfs4_exception exception = { }; |
| 2737 | int err; | 2900 | int err; |
| 2738 | do { | 2901 | do { |
| 2739 | err = nfs4_handle_exception(server, | 2902 | err = nfs4_handle_exception(server, |
| 2740 | _nfs4_proc_getattr(server, fhandle, fattr), | 2903 | _nfs4_proc_getattr(server, fhandle, fattr, label), |
| 2741 | &exception); | 2904 | &exception); |
| 2742 | } while (exception.retry); | 2905 | } while (exception.retry); |
| 2743 | return err; | 2906 | return err; |
| @@ -2767,6 +2930,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, | |||
| 2767 | struct inode *inode = dentry->d_inode; | 2930 | struct inode *inode = dentry->d_inode; |
| 2768 | struct rpc_cred *cred = NULL; | 2931 | struct rpc_cred *cred = NULL; |
| 2769 | struct nfs4_state *state = NULL; | 2932 | struct nfs4_state *state = NULL; |
| 2933 | struct nfs4_label *label = NULL; | ||
| 2770 | int status; | 2934 | int status; |
| 2771 | 2935 | ||
| 2772 | if (pnfs_ld_layoutret_on_setattr(inode)) | 2936 | if (pnfs_ld_layoutret_on_setattr(inode)) |
| @@ -2793,15 +2957,22 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, | |||
| 2793 | } | 2957 | } |
| 2794 | } | 2958 | } |
| 2795 | 2959 | ||
| 2796 | status = nfs4_do_setattr(inode, cred, fattr, sattr, state); | 2960 | label = nfs4_label_alloc(NFS_SERVER(inode), GFP_KERNEL); |
| 2797 | if (status == 0) | 2961 | if (IS_ERR(label)) |
| 2962 | return PTR_ERR(label); | ||
| 2963 | |||
| 2964 | status = nfs4_do_setattr(inode, cred, fattr, sattr, state, NULL, label); | ||
| 2965 | if (status == 0) { | ||
| 2798 | nfs_setattr_update_inode(inode, sattr); | 2966 | nfs_setattr_update_inode(inode, sattr); |
| 2967 | nfs_setsecurity(inode, fattr, label); | ||
| 2968 | } | ||
| 2969 | nfs4_label_free(label); | ||
| 2799 | return status; | 2970 | return status; |
| 2800 | } | 2971 | } |
| 2801 | 2972 | ||
| 2802 | static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, | 2973 | static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, |
| 2803 | const struct qstr *name, struct nfs_fh *fhandle, | 2974 | const struct qstr *name, struct nfs_fh *fhandle, |
| 2804 | struct nfs_fattr *fattr) | 2975 | struct nfs_fattr *fattr, struct nfs4_label *label) |
| 2805 | { | 2976 | { |
| 2806 | struct nfs_server *server = NFS_SERVER(dir); | 2977 | struct nfs_server *server = NFS_SERVER(dir); |
| 2807 | int status; | 2978 | int status; |
| @@ -2813,6 +2984,7 @@ static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, | |||
| 2813 | struct nfs4_lookup_res res = { | 2984 | struct nfs4_lookup_res res = { |
| 2814 | .server = server, | 2985 | .server = server, |
| 2815 | .fattr = fattr, | 2986 | .fattr = fattr, |
| 2987 | .label = label, | ||
| 2816 | .fh = fhandle, | 2988 | .fh = fhandle, |
| 2817 | }; | 2989 | }; |
| 2818 | struct rpc_message msg = { | 2990 | struct rpc_message msg = { |
| @@ -2821,6 +2993,8 @@ static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, | |||
| 2821 | .rpc_resp = &res, | 2993 | .rpc_resp = &res, |
| 2822 | }; | 2994 | }; |
| 2823 | 2995 | ||
| 2996 | args.bitmask = nfs4_bitmask(server, label); | ||
| 2997 | |||
| 2824 | nfs_fattr_init(fattr); | 2998 | nfs_fattr_init(fattr); |
| 2825 | 2999 | ||
| 2826 | dprintk("NFS call lookup %s\n", name->name); | 3000 | dprintk("NFS call lookup %s\n", name->name); |
| @@ -2839,13 +3013,13 @@ static void nfs_fixup_secinfo_attributes(struct nfs_fattr *fattr) | |||
| 2839 | 3013 | ||
| 2840 | static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir, | 3014 | static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir, |
| 2841 | struct qstr *name, struct nfs_fh *fhandle, | 3015 | struct qstr *name, struct nfs_fh *fhandle, |
| 2842 | struct nfs_fattr *fattr) | 3016 | struct nfs_fattr *fattr, struct nfs4_label *label) |
| 2843 | { | 3017 | { |
| 2844 | struct nfs4_exception exception = { }; | 3018 | struct nfs4_exception exception = { }; |
| 2845 | struct rpc_clnt *client = *clnt; | 3019 | struct rpc_clnt *client = *clnt; |
| 2846 | int err; | 3020 | int err; |
| 2847 | do { | 3021 | do { |
| 2848 | err = _nfs4_proc_lookup(client, dir, name, fhandle, fattr); | 3022 | err = _nfs4_proc_lookup(client, dir, name, fhandle, fattr, label); |
| 2849 | switch (err) { | 3023 | switch (err) { |
| 2850 | case -NFS4ERR_BADNAME: | 3024 | case -NFS4ERR_BADNAME: |
| 2851 | err = -ENOENT; | 3025 | err = -ENOENT; |
| @@ -2879,12 +3053,13 @@ out: | |||
| 2879 | } | 3053 | } |
| 2880 | 3054 | ||
| 2881 | static int nfs4_proc_lookup(struct inode *dir, struct qstr *name, | 3055 | static int nfs4_proc_lookup(struct inode *dir, struct qstr *name, |
| 2882 | struct nfs_fh *fhandle, struct nfs_fattr *fattr) | 3056 | struct nfs_fh *fhandle, struct nfs_fattr *fattr, |
| 3057 | struct nfs4_label *label) | ||
| 2883 | { | 3058 | { |
| 2884 | int status; | 3059 | int status; |
| 2885 | struct rpc_clnt *client = NFS_CLIENT(dir); | 3060 | struct rpc_clnt *client = NFS_CLIENT(dir); |
| 2886 | 3061 | ||
| 2887 | status = nfs4_proc_lookup_common(&client, dir, name, fhandle, fattr); | 3062 | status = nfs4_proc_lookup_common(&client, dir, name, fhandle, fattr, label); |
| 2888 | if (client != NFS_CLIENT(dir)) { | 3063 | if (client != NFS_CLIENT(dir)) { |
| 2889 | rpc_shutdown_client(client); | 3064 | rpc_shutdown_client(client); |
| 2890 | nfs_fixup_secinfo_attributes(fattr); | 3065 | nfs_fixup_secinfo_attributes(fattr); |
| @@ -2899,7 +3074,7 @@ nfs4_proc_lookup_mountpoint(struct inode *dir, struct qstr *name, | |||
| 2899 | int status; | 3074 | int status; |
| 2900 | struct rpc_clnt *client = rpc_clone_client(NFS_CLIENT(dir)); | 3075 | struct rpc_clnt *client = rpc_clone_client(NFS_CLIENT(dir)); |
| 2901 | 3076 | ||
| 2902 | status = nfs4_proc_lookup_common(&client, dir, name, fhandle, fattr); | 3077 | status = nfs4_proc_lookup_common(&client, dir, name, fhandle, fattr, NULL); |
| 2903 | if (status < 0) { | 3078 | if (status < 0) { |
| 2904 | rpc_shutdown_client(client); | 3079 | rpc_shutdown_client(client); |
| 2905 | return ERR_PTR(status); | 3080 | return ERR_PTR(status); |
| @@ -2924,7 +3099,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry | |||
| 2924 | .rpc_cred = entry->cred, | 3099 | .rpc_cred = entry->cred, |
| 2925 | }; | 3100 | }; |
| 2926 | int mode = entry->mask; | 3101 | int mode = entry->mask; |
| 2927 | int status; | 3102 | int status = 0; |
| 2928 | 3103 | ||
| 2929 | /* | 3104 | /* |
| 2930 | * Determine which access bits we want to ask for... | 3105 | * Determine which access bits we want to ask for... |
| @@ -3029,6 +3204,7 @@ static int | |||
| 3029 | nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, | 3204 | nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, |
| 3030 | int flags) | 3205 | int flags) |
| 3031 | { | 3206 | { |
| 3207 | struct nfs4_label l, *ilabel = NULL; | ||
| 3032 | struct nfs_open_context *ctx; | 3208 | struct nfs_open_context *ctx; |
| 3033 | struct nfs4_state *state; | 3209 | struct nfs4_state *state; |
| 3034 | int status = 0; | 3210 | int status = 0; |
| @@ -3037,19 +3213,16 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, | |||
| 3037 | if (IS_ERR(ctx)) | 3213 | if (IS_ERR(ctx)) |
| 3038 | return PTR_ERR(ctx); | 3214 | return PTR_ERR(ctx); |
| 3039 | 3215 | ||
| 3216 | ilabel = nfs4_label_init_security(dir, dentry, sattr, &l); | ||
| 3217 | |||
| 3040 | sattr->ia_mode &= ~current_umask(); | 3218 | sattr->ia_mode &= ~current_umask(); |
| 3041 | state = nfs4_do_open(dir, dentry, ctx->mode, | 3219 | state = nfs4_do_open(dir, ctx, flags, sattr, ilabel); |
| 3042 | flags, sattr, ctx->cred, | ||
| 3043 | &ctx->mdsthreshold); | ||
| 3044 | d_drop(dentry); | ||
| 3045 | if (IS_ERR(state)) { | 3220 | if (IS_ERR(state)) { |
| 3046 | status = PTR_ERR(state); | 3221 | status = PTR_ERR(state); |
| 3047 | goto out; | 3222 | goto out; |
| 3048 | } | 3223 | } |
| 3049 | d_add(dentry, igrab(state->inode)); | ||
| 3050 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); | ||
| 3051 | ctx->state = state; | ||
| 3052 | out: | 3224 | out: |
| 3225 | nfs4_label_release_security(ilabel); | ||
| 3053 | put_nfs_open_context(ctx); | 3226 | put_nfs_open_context(ctx); |
| 3054 | return status; | 3227 | return status; |
| 3055 | } | 3228 | } |
| @@ -3098,6 +3271,8 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir) | |||
| 3098 | res->server = server; | 3271 | res->server = server; |
| 3099 | msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE]; | 3272 | msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE]; |
| 3100 | nfs41_init_sequence(&args->seq_args, &res->seq_res, 1); | 3273 | nfs41_init_sequence(&args->seq_args, &res->seq_res, 1); |
| 3274 | |||
| 3275 | nfs_fattr_init(res->dir_attr); | ||
| 3101 | } | 3276 | } |
| 3102 | 3277 | ||
| 3103 | static void nfs4_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlinkdata *data) | 3278 | static void nfs4_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlinkdata *data) |
| @@ -3173,7 +3348,7 @@ static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name, | |||
| 3173 | .rpc_resp = &res, | 3348 | .rpc_resp = &res, |
| 3174 | }; | 3349 | }; |
| 3175 | int status = -ENOMEM; | 3350 | int status = -ENOMEM; |
| 3176 | 3351 | ||
| 3177 | status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); | 3352 | status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); |
| 3178 | if (!status) { | 3353 | if (!status) { |
| 3179 | update_changeattr(old_dir, &res.old_cinfo); | 3354 | update_changeattr(old_dir, &res.old_cinfo); |
| @@ -3207,6 +3382,7 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr * | |||
| 3207 | }; | 3382 | }; |
| 3208 | struct nfs4_link_res res = { | 3383 | struct nfs4_link_res res = { |
| 3209 | .server = server, | 3384 | .server = server, |
| 3385 | .label = NULL, | ||
| 3210 | }; | 3386 | }; |
| 3211 | struct rpc_message msg = { | 3387 | struct rpc_message msg = { |
| 3212 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LINK], | 3388 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LINK], |
| @@ -3219,11 +3395,24 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr * | |||
| 3219 | if (res.fattr == NULL) | 3395 | if (res.fattr == NULL) |
| 3220 | goto out; | 3396 | goto out; |
| 3221 | 3397 | ||
| 3398 | res.label = nfs4_label_alloc(server, GFP_KERNEL); | ||
| 3399 | if (IS_ERR(res.label)) { | ||
| 3400 | status = PTR_ERR(res.label); | ||
| 3401 | goto out; | ||
| 3402 | } | ||
| 3403 | arg.bitmask = nfs4_bitmask(server, res.label); | ||
| 3404 | |||
| 3222 | status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); | 3405 | status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); |
| 3223 | if (!status) { | 3406 | if (!status) { |
| 3224 | update_changeattr(dir, &res.cinfo); | 3407 | update_changeattr(dir, &res.cinfo); |
| 3225 | nfs_post_op_update_inode(inode, res.fattr); | 3408 | status = nfs_post_op_update_inode(inode, res.fattr); |
| 3409 | if (!status) | ||
| 3410 | nfs_setsecurity(inode, res.fattr, res.label); | ||
| 3226 | } | 3411 | } |
| 3412 | |||
| 3413 | |||
| 3414 | nfs4_label_free(res.label); | ||
| 3415 | |||
| 3227 | out: | 3416 | out: |
| 3228 | nfs_free_fattr(res.fattr); | 3417 | nfs_free_fattr(res.fattr); |
| 3229 | return status; | 3418 | return status; |
| @@ -3247,6 +3436,7 @@ struct nfs4_createdata { | |||
| 3247 | struct nfs4_create_res res; | 3436 | struct nfs4_create_res res; |
| 3248 | struct nfs_fh fh; | 3437 | struct nfs_fh fh; |
| 3249 | struct nfs_fattr fattr; | 3438 | struct nfs_fattr fattr; |
| 3439 | struct nfs4_label *label; | ||
| 3250 | }; | 3440 | }; |
| 3251 | 3441 | ||
| 3252 | static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir, | 3442 | static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir, |
| @@ -3258,6 +3448,10 @@ static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir, | |||
| 3258 | if (data != NULL) { | 3448 | if (data != NULL) { |
| 3259 | struct nfs_server *server = NFS_SERVER(dir); | 3449 | struct nfs_server *server = NFS_SERVER(dir); |
| 3260 | 3450 | ||
| 3451 | data->label = nfs4_label_alloc(server, GFP_KERNEL); | ||
| 3452 | if (IS_ERR(data->label)) | ||
| 3453 | goto out_free; | ||
| 3454 | |||
| 3261 | data->msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE]; | 3455 | data->msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE]; |
| 3262 | data->msg.rpc_argp = &data->arg; | 3456 | data->msg.rpc_argp = &data->arg; |
| 3263 | data->msg.rpc_resp = &data->res; | 3457 | data->msg.rpc_resp = &data->res; |
| @@ -3266,13 +3460,17 @@ static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir, | |||
| 3266 | data->arg.name = name; | 3460 | data->arg.name = name; |
| 3267 | data->arg.attrs = sattr; | 3461 | data->arg.attrs = sattr; |
| 3268 | data->arg.ftype = ftype; | 3462 | data->arg.ftype = ftype; |
| 3269 | data->arg.bitmask = server->attr_bitmask; | 3463 | data->arg.bitmask = nfs4_bitmask(server, data->label); |
| 3270 | data->res.server = server; | 3464 | data->res.server = server; |
| 3271 | data->res.fh = &data->fh; | 3465 | data->res.fh = &data->fh; |
| 3272 | data->res.fattr = &data->fattr; | 3466 | data->res.fattr = &data->fattr; |
| 3467 | data->res.label = data->label; | ||
| 3273 | nfs_fattr_init(data->res.fattr); | 3468 | nfs_fattr_init(data->res.fattr); |
| 3274 | } | 3469 | } |
| 3275 | return data; | 3470 | return data; |
| 3471 | out_free: | ||
| 3472 | kfree(data); | ||
| 3473 | return NULL; | ||
| 3276 | } | 3474 | } |
| 3277 | 3475 | ||
| 3278 | static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_createdata *data) | 3476 | static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_createdata *data) |
| @@ -3281,18 +3479,20 @@ static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_ | |||
| 3281 | &data->arg.seq_args, &data->res.seq_res, 1); | 3479 | &data->arg.seq_args, &data->res.seq_res, 1); |
| 3282 | if (status == 0) { | 3480 | if (status == 0) { |
| 3283 | update_changeattr(dir, &data->res.dir_cinfo); | 3481 | update_changeattr(dir, &data->res.dir_cinfo); |
| 3284 | status = nfs_instantiate(dentry, data->res.fh, data->res.fattr); | 3482 | status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, data->res.label); |
| 3285 | } | 3483 | } |
| 3286 | return status; | 3484 | return status; |
| 3287 | } | 3485 | } |
| 3288 | 3486 | ||
| 3289 | static void nfs4_free_createdata(struct nfs4_createdata *data) | 3487 | static void nfs4_free_createdata(struct nfs4_createdata *data) |
| 3290 | { | 3488 | { |
| 3489 | nfs4_label_free(data->label); | ||
| 3291 | kfree(data); | 3490 | kfree(data); |
| 3292 | } | 3491 | } |
| 3293 | 3492 | ||
| 3294 | static int _nfs4_proc_symlink(struct inode *dir, struct dentry *dentry, | 3493 | static int _nfs4_proc_symlink(struct inode *dir, struct dentry *dentry, |
| 3295 | struct page *page, unsigned int len, struct iattr *sattr) | 3494 | struct page *page, unsigned int len, struct iattr *sattr, |
| 3495 | struct nfs4_label *label) | ||
| 3296 | { | 3496 | { |
| 3297 | struct nfs4_createdata *data; | 3497 | struct nfs4_createdata *data; |
| 3298 | int status = -ENAMETOOLONG; | 3498 | int status = -ENAMETOOLONG; |
| @@ -3308,6 +3508,7 @@ static int _nfs4_proc_symlink(struct inode *dir, struct dentry *dentry, | |||
| 3308 | data->msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SYMLINK]; | 3508 | data->msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SYMLINK]; |
| 3309 | data->arg.u.symlink.pages = &page; | 3509 | data->arg.u.symlink.pages = &page; |
| 3310 | data->arg.u.symlink.len = len; | 3510 | data->arg.u.symlink.len = len; |
| 3511 | data->arg.label = label; | ||
| 3311 | 3512 | ||
| 3312 | status = nfs4_do_create(dir, dentry, data); | 3513 | status = nfs4_do_create(dir, dentry, data); |
| 3313 | 3514 | ||
| @@ -3320,18 +3521,24 @@ static int nfs4_proc_symlink(struct inode *dir, struct dentry *dentry, | |||
| 3320 | struct page *page, unsigned int len, struct iattr *sattr) | 3521 | struct page *page, unsigned int len, struct iattr *sattr) |
| 3321 | { | 3522 | { |
| 3322 | struct nfs4_exception exception = { }; | 3523 | struct nfs4_exception exception = { }; |
| 3524 | struct nfs4_label l, *label = NULL; | ||
| 3323 | int err; | 3525 | int err; |
| 3526 | |||
| 3527 | label = nfs4_label_init_security(dir, dentry, sattr, &l); | ||
| 3528 | |||
| 3324 | do { | 3529 | do { |
| 3325 | err = nfs4_handle_exception(NFS_SERVER(dir), | 3530 | err = nfs4_handle_exception(NFS_SERVER(dir), |
| 3326 | _nfs4_proc_symlink(dir, dentry, page, | 3531 | _nfs4_proc_symlink(dir, dentry, page, |
| 3327 | len, sattr), | 3532 | len, sattr, label), |
| 3328 | &exception); | 3533 | &exception); |
| 3329 | } while (exception.retry); | 3534 | } while (exception.retry); |
| 3535 | |||
| 3536 | nfs4_label_release_security(label); | ||
| 3330 | return err; | 3537 | return err; |
| 3331 | } | 3538 | } |
| 3332 | 3539 | ||
| 3333 | static int _nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry, | 3540 | static int _nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry, |
| 3334 | struct iattr *sattr) | 3541 | struct iattr *sattr, struct nfs4_label *label) |
| 3335 | { | 3542 | { |
| 3336 | struct nfs4_createdata *data; | 3543 | struct nfs4_createdata *data; |
| 3337 | int status = -ENOMEM; | 3544 | int status = -ENOMEM; |
| @@ -3340,6 +3547,7 @@ static int _nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry, | |||
| 3340 | if (data == NULL) | 3547 | if (data == NULL) |
| 3341 | goto out; | 3548 | goto out; |
| 3342 | 3549 | ||
| 3550 | data->arg.label = label; | ||
| 3343 | status = nfs4_do_create(dir, dentry, data); | 3551 | status = nfs4_do_create(dir, dentry, data); |
| 3344 | 3552 | ||
| 3345 | nfs4_free_createdata(data); | 3553 | nfs4_free_createdata(data); |
| @@ -3351,14 +3559,19 @@ static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry, | |||
| 3351 | struct iattr *sattr) | 3559 | struct iattr *sattr) |
| 3352 | { | 3560 | { |
| 3353 | struct nfs4_exception exception = { }; | 3561 | struct nfs4_exception exception = { }; |
| 3562 | struct nfs4_label l, *label = NULL; | ||
| 3354 | int err; | 3563 | int err; |
| 3355 | 3564 | ||
| 3565 | label = nfs4_label_init_security(dir, dentry, sattr, &l); | ||
| 3566 | |||
| 3356 | sattr->ia_mode &= ~current_umask(); | 3567 | sattr->ia_mode &= ~current_umask(); |
| 3357 | do { | 3568 | do { |
| 3358 | err = nfs4_handle_exception(NFS_SERVER(dir), | 3569 | err = nfs4_handle_exception(NFS_SERVER(dir), |
| 3359 | _nfs4_proc_mkdir(dir, dentry, sattr), | 3570 | _nfs4_proc_mkdir(dir, dentry, sattr, label), |
| 3360 | &exception); | 3571 | &exception); |
| 3361 | } while (exception.retry); | 3572 | } while (exception.retry); |
| 3573 | nfs4_label_release_security(label); | ||
| 3574 | |||
| 3362 | return err; | 3575 | return err; |
| 3363 | } | 3576 | } |
| 3364 | 3577 | ||
| @@ -3416,7 +3629,7 @@ static int nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, | |||
| 3416 | } | 3629 | } |
| 3417 | 3630 | ||
| 3418 | static int _nfs4_proc_mknod(struct inode *dir, struct dentry *dentry, | 3631 | static int _nfs4_proc_mknod(struct inode *dir, struct dentry *dentry, |
| 3419 | struct iattr *sattr, dev_t rdev) | 3632 | struct iattr *sattr, struct nfs4_label *label, dev_t rdev) |
| 3420 | { | 3633 | { |
| 3421 | struct nfs4_createdata *data; | 3634 | struct nfs4_createdata *data; |
| 3422 | int mode = sattr->ia_mode; | 3635 | int mode = sattr->ia_mode; |
| @@ -3441,7 +3654,8 @@ static int _nfs4_proc_mknod(struct inode *dir, struct dentry *dentry, | |||
| 3441 | status = -EINVAL; | 3654 | status = -EINVAL; |
| 3442 | goto out_free; | 3655 | goto out_free; |
| 3443 | } | 3656 | } |
| 3444 | 3657 | ||
| 3658 | data->arg.label = label; | ||
| 3445 | status = nfs4_do_create(dir, dentry, data); | 3659 | status = nfs4_do_create(dir, dentry, data); |
| 3446 | out_free: | 3660 | out_free: |
| 3447 | nfs4_free_createdata(data); | 3661 | nfs4_free_createdata(data); |
| @@ -3453,14 +3667,20 @@ static int nfs4_proc_mknod(struct inode *dir, struct dentry *dentry, | |||
| 3453 | struct iattr *sattr, dev_t rdev) | 3667 | struct iattr *sattr, dev_t rdev) |
| 3454 | { | 3668 | { |
| 3455 | struct nfs4_exception exception = { }; | 3669 | struct nfs4_exception exception = { }; |
| 3670 | struct nfs4_label l, *label = NULL; | ||
| 3456 | int err; | 3671 | int err; |
| 3457 | 3672 | ||
| 3673 | label = nfs4_label_init_security(dir, dentry, sattr, &l); | ||
| 3674 | |||
| 3458 | sattr->ia_mode &= ~current_umask(); | 3675 | sattr->ia_mode &= ~current_umask(); |
| 3459 | do { | 3676 | do { |
| 3460 | err = nfs4_handle_exception(NFS_SERVER(dir), | 3677 | err = nfs4_handle_exception(NFS_SERVER(dir), |
| 3461 | _nfs4_proc_mknod(dir, dentry, sattr, rdev), | 3678 | _nfs4_proc_mknod(dir, dentry, sattr, label, rdev), |
| 3462 | &exception); | 3679 | &exception); |
| 3463 | } while (exception.retry); | 3680 | } while (exception.retry); |
| 3681 | |||
| 3682 | nfs4_label_release_security(label); | ||
| 3683 | |||
| 3464 | return err; | 3684 | return err; |
| 3465 | } | 3685 | } |
| 3466 | 3686 | ||
| @@ -4187,6 +4407,155 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen | |||
| 4187 | return err; | 4407 | return err; |
| 4188 | } | 4408 | } |
| 4189 | 4409 | ||
| 4410 | #ifdef CONFIG_NFS_V4_SECURITY_LABEL | ||
| 4411 | static int _nfs4_get_security_label(struct inode *inode, void *buf, | ||
| 4412 | size_t buflen) | ||
| 4413 | { | ||
| 4414 | struct nfs_server *server = NFS_SERVER(inode); | ||
| 4415 | struct nfs_fattr fattr; | ||
| 4416 | struct nfs4_label label = {0, 0, buflen, buf}; | ||
| 4417 | |||
| 4418 | u32 bitmask[3] = { 0, 0, FATTR4_WORD2_SECURITY_LABEL }; | ||
| 4419 | struct nfs4_getattr_arg args = { | ||
| 4420 | .fh = NFS_FH(inode), | ||
| 4421 | .bitmask = bitmask, | ||
| 4422 | }; | ||
| 4423 | struct nfs4_getattr_res res = { | ||
| 4424 | .fattr = &fattr, | ||
| 4425 | .label = &label, | ||
| 4426 | .server = server, | ||
| 4427 | }; | ||
| 4428 | struct rpc_message msg = { | ||
| 4429 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETATTR], | ||
| 4430 | .rpc_argp = &args, | ||
| 4431 | .rpc_resp = &res, | ||
| 4432 | }; | ||
| 4433 | int ret; | ||
| 4434 | |||
| 4435 | nfs_fattr_init(&fattr); | ||
| 4436 | |||
| 4437 | ret = rpc_call_sync(server->client, &msg, 0); | ||
| 4438 | if (ret) | ||
| 4439 | return ret; | ||
| 4440 | if (!(fattr.valid & NFS_ATTR_FATTR_V4_SECURITY_LABEL)) | ||
| 4441 | return -ENOENT; | ||
| 4442 | if (buflen < label.len) | ||
| 4443 | return -ERANGE; | ||
| 4444 | return 0; | ||
| 4445 | } | ||
| 4446 | |||
| 4447 | static int nfs4_get_security_label(struct inode *inode, void *buf, | ||
| 4448 | size_t buflen) | ||
| 4449 | { | ||
| 4450 | struct nfs4_exception exception = { }; | ||
| 4451 | int err; | ||
| 4452 | |||
| 4453 | if (!nfs_server_capable(inode, NFS_CAP_SECURITY_LABEL)) | ||
| 4454 | return -EOPNOTSUPP; | ||
| 4455 | |||
| 4456 | do { | ||
| 4457 | err = nfs4_handle_exception(NFS_SERVER(inode), | ||
| 4458 | _nfs4_get_security_label(inode, buf, buflen), | ||
| 4459 | &exception); | ||
| 4460 | } while (exception.retry); | ||
| 4461 | return err; | ||
| 4462 | } | ||
| 4463 | |||
| 4464 | static int _nfs4_do_set_security_label(struct inode *inode, | ||
| 4465 | struct nfs4_label *ilabel, | ||
| 4466 | struct nfs_fattr *fattr, | ||
| 4467 | struct nfs4_label *olabel) | ||
| 4468 | { | ||
| 4469 | |||
| 4470 | struct iattr sattr = {0}; | ||
| 4471 | struct nfs_server *server = NFS_SERVER(inode); | ||
| 4472 | const u32 bitmask[3] = { 0, 0, FATTR4_WORD2_SECURITY_LABEL }; | ||
| 4473 | struct nfs_setattrargs args = { | ||
| 4474 | .fh = NFS_FH(inode), | ||
| 4475 | .iap = &sattr, | ||
| 4476 | .server = server, | ||
| 4477 | .bitmask = bitmask, | ||
| 4478 | .label = ilabel, | ||
| 4479 | }; | ||
| 4480 | struct nfs_setattrres res = { | ||
| 4481 | .fattr = fattr, | ||
| 4482 | .label = olabel, | ||
| 4483 | .server = server, | ||
| 4484 | }; | ||
| 4485 | struct rpc_message msg = { | ||
| 4486 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETATTR], | ||
| 4487 | .rpc_argp = &args, | ||
| 4488 | .rpc_resp = &res, | ||
| 4489 | }; | ||
| 4490 | int status; | ||
| 4491 | |||
| 4492 | nfs4_stateid_copy(&args.stateid, &zero_stateid); | ||
| 4493 | |||
| 4494 | status = rpc_call_sync(server->client, &msg, 0); | ||
| 4495 | if (status) | ||
| 4496 | dprintk("%s failed: %d\n", __func__, status); | ||
| 4497 | |||
| 4498 | return status; | ||
| 4499 | } | ||
| 4500 | |||
| 4501 | static int nfs4_do_set_security_label(struct inode *inode, | ||
| 4502 | struct nfs4_label *ilabel, | ||
| 4503 | struct nfs_fattr *fattr, | ||
| 4504 | struct nfs4_label *olabel) | ||
| 4505 | { | ||
| 4506 | struct nfs4_exception exception = { }; | ||
| 4507 | int err; | ||
| 4508 | |||
| 4509 | do { | ||
| 4510 | err = nfs4_handle_exception(NFS_SERVER(inode), | ||
| 4511 | _nfs4_do_set_security_label(inode, ilabel, | ||
| 4512 | fattr, olabel), | ||
| 4513 | &exception); | ||
| 4514 | } while (exception.retry); | ||
| 4515 | return err; | ||
| 4516 | } | ||
| 4517 | |||
| 4518 | static int | ||
| 4519 | nfs4_set_security_label(struct dentry *dentry, const void *buf, size_t buflen) | ||
| 4520 | { | ||
| 4521 | struct nfs4_label ilabel, *olabel = NULL; | ||
| 4522 | struct nfs_fattr fattr; | ||
| 4523 | struct rpc_cred *cred; | ||
| 4524 | struct inode *inode = dentry->d_inode; | ||
| 4525 | int status; | ||
| 4526 | |||
| 4527 | if (!nfs_server_capable(inode, NFS_CAP_SECURITY_LABEL)) | ||
| 4528 | return -EOPNOTSUPP; | ||
| 4529 | |||
| 4530 | nfs_fattr_init(&fattr); | ||
| 4531 | |||
| 4532 | ilabel.pi = 0; | ||
| 4533 | ilabel.lfs = 0; | ||
| 4534 | ilabel.label = (char *)buf; | ||
| 4535 | ilabel.len = buflen; | ||
| 4536 | |||
| 4537 | cred = rpc_lookup_cred(); | ||
| 4538 | if (IS_ERR(cred)) | ||
| 4539 | return PTR_ERR(cred); | ||
| 4540 | |||
| 4541 | olabel = nfs4_label_alloc(NFS_SERVER(inode), GFP_KERNEL); | ||
| 4542 | if (IS_ERR(olabel)) { | ||
| 4543 | status = -PTR_ERR(olabel); | ||
| 4544 | goto out; | ||
| 4545 | } | ||
| 4546 | |||
| 4547 | status = nfs4_do_set_security_label(inode, &ilabel, &fattr, olabel); | ||
| 4548 | if (status == 0) | ||
| 4549 | nfs_setsecurity(inode, &fattr, olabel); | ||
| 4550 | |||
| 4551 | nfs4_label_free(olabel); | ||
| 4552 | out: | ||
| 4553 | put_rpccred(cred); | ||
| 4554 | return status; | ||
| 4555 | } | ||
| 4556 | #endif /* CONFIG_NFS_V4_SECURITY_LABEL */ | ||
| 4557 | |||
| 4558 | |||
| 4190 | static int | 4559 | static int |
| 4191 | nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs4_state *state) | 4560 | nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs4_state *state) |
| 4192 | { | 4561 | { |
| @@ -4345,7 +4714,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, | |||
| 4345 | /* cb_client4 */ | 4714 | /* cb_client4 */ |
| 4346 | rcu_read_lock(); | 4715 | rcu_read_lock(); |
| 4347 | setclientid.sc_netid_len = scnprintf(setclientid.sc_netid, | 4716 | setclientid.sc_netid_len = scnprintf(setclientid.sc_netid, |
| 4348 | sizeof(setclientid.sc_netid), | 4717 | sizeof(setclientid.sc_netid), "%s", |
| 4349 | rpc_peeraddr2str(clp->cl_rpcclient, | 4718 | rpc_peeraddr2str(clp->cl_rpcclient, |
| 4350 | RPC_DISPLAY_NETID)); | 4719 | RPC_DISPLAY_NETID)); |
| 4351 | rcu_read_unlock(); | 4720 | rcu_read_unlock(); |
| @@ -4528,7 +4897,7 @@ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4 | |||
| 4528 | static unsigned long | 4897 | static unsigned long |
| 4529 | nfs4_set_lock_task_retry(unsigned long timeout) | 4898 | nfs4_set_lock_task_retry(unsigned long timeout) |
| 4530 | { | 4899 | { |
| 4531 | freezable_schedule_timeout_killable(timeout); | 4900 | freezable_schedule_timeout_killable_unsafe(timeout); |
| 4532 | timeout <<= 1; | 4901 | timeout <<= 1; |
| 4533 | if (timeout > NFS4_LOCK_MAXTIMEOUT) | 4902 | if (timeout > NFS4_LOCK_MAXTIMEOUT) |
| 4534 | return NFS4_LOCK_MAXTIMEOUT; | 4903 | return NFS4_LOCK_MAXTIMEOUT; |
| @@ -5056,13 +5425,18 @@ static int nfs41_check_expired_locks(struct nfs4_state *state) | |||
| 5056 | 5425 | ||
| 5057 | list_for_each_entry(lsp, &state->lock_states, ls_locks) { | 5426 | list_for_each_entry(lsp, &state->lock_states, ls_locks) { |
| 5058 | if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) { | 5427 | if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) { |
| 5059 | status = nfs41_test_stateid(server, &lsp->ls_stateid); | 5428 | struct rpc_cred *cred = lsp->ls_state->owner->so_cred; |
| 5429 | |||
| 5430 | status = nfs41_test_stateid(server, | ||
| 5431 | &lsp->ls_stateid, | ||
| 5432 | cred); | ||
| 5060 | if (status != NFS_OK) { | 5433 | if (status != NFS_OK) { |
| 5061 | /* Free the stateid unless the server | 5434 | /* Free the stateid unless the server |
| 5062 | * informs us the stateid is unrecognized. */ | 5435 | * informs us the stateid is unrecognized. */ |
| 5063 | if (status != -NFS4ERR_BAD_STATEID) | 5436 | if (status != -NFS4ERR_BAD_STATEID) |
| 5064 | nfs41_free_stateid(server, | 5437 | nfs41_free_stateid(server, |
| 5065 | &lsp->ls_stateid); | 5438 | &lsp->ls_stateid, |
| 5439 | cred); | ||
| 5066 | clear_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags); | 5440 | clear_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags); |
| 5067 | ret = status; | 5441 | ret = status; |
| 5068 | } | 5442 | } |
| @@ -5295,6 +5669,53 @@ static size_t nfs4_xattr_list_nfs4_acl(struct dentry *dentry, char *list, | |||
| 5295 | return len; | 5669 | return len; |
| 5296 | } | 5670 | } |
| 5297 | 5671 | ||
| 5672 | #ifdef CONFIG_NFS_V4_SECURITY_LABEL | ||
| 5673 | static inline int nfs4_server_supports_labels(struct nfs_server *server) | ||
| 5674 | { | ||
| 5675 | return server->caps & NFS_CAP_SECURITY_LABEL; | ||
| 5676 | } | ||
| 5677 | |||
| 5678 | static int nfs4_xattr_set_nfs4_label(struct dentry *dentry, const char *key, | ||
| 5679 | const void *buf, size_t buflen, | ||
| 5680 | int flags, int type) | ||
| 5681 | { | ||
| 5682 | if (security_ismaclabel(key)) | ||
| 5683 | return nfs4_set_security_label(dentry, buf, buflen); | ||
| 5684 | |||
| 5685 | return -EOPNOTSUPP; | ||
| 5686 | } | ||
| 5687 | |||
| 5688 | static int nfs4_xattr_get_nfs4_label(struct dentry *dentry, const char *key, | ||
| 5689 | void *buf, size_t buflen, int type) | ||
| 5690 | { | ||
| 5691 | if (security_ismaclabel(key)) | ||
| 5692 | return nfs4_get_security_label(dentry->d_inode, buf, buflen); | ||
| 5693 | return -EOPNOTSUPP; | ||
| 5694 | } | ||
| 5695 | |||
| 5696 | static size_t nfs4_xattr_list_nfs4_label(struct dentry *dentry, char *list, | ||
| 5697 | size_t list_len, const char *name, | ||
| 5698 | size_t name_len, int type) | ||
| 5699 | { | ||
| 5700 | size_t len = 0; | ||
| 5701 | |||
| 5702 | if (nfs_server_capable(dentry->d_inode, NFS_CAP_SECURITY_LABEL)) { | ||
| 5703 | len = security_inode_listsecurity(dentry->d_inode, NULL, 0); | ||
| 5704 | if (list && len <= list_len) | ||
| 5705 | security_inode_listsecurity(dentry->d_inode, list, len); | ||
| 5706 | } | ||
| 5707 | return len; | ||
| 5708 | } | ||
| 5709 | |||
| 5710 | static const struct xattr_handler nfs4_xattr_nfs4_label_handler = { | ||
| 5711 | .prefix = XATTR_SECURITY_PREFIX, | ||
| 5712 | .list = nfs4_xattr_list_nfs4_label, | ||
| 5713 | .get = nfs4_xattr_get_nfs4_label, | ||
| 5714 | .set = nfs4_xattr_set_nfs4_label, | ||
| 5715 | }; | ||
| 5716 | #endif | ||
| 5717 | |||
| 5718 | |||
| 5298 | /* | 5719 | /* |
| 5299 | * nfs_fhget will use either the mounted_on_fileid or the fileid | 5720 | * nfs_fhget will use either the mounted_on_fileid or the fileid |
| 5300 | */ | 5721 | */ |
| @@ -5318,7 +5739,7 @@ static int _nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir, | |||
| 5318 | struct page *page) | 5739 | struct page *page) |
| 5319 | { | 5740 | { |
| 5320 | struct nfs_server *server = NFS_SERVER(dir); | 5741 | struct nfs_server *server = NFS_SERVER(dir); |
| 5321 | u32 bitmask[2] = { | 5742 | u32 bitmask[3] = { |
| 5322 | [0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS, | 5743 | [0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS, |
| 5323 | }; | 5744 | }; |
| 5324 | struct nfs4_fs_locations_arg args = { | 5745 | struct nfs4_fs_locations_arg args = { |
| @@ -5505,7 +5926,8 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) | |||
| 5505 | struct nfs41_exchange_id_args args = { | 5926 | struct nfs41_exchange_id_args args = { |
| 5506 | .verifier = &verifier, | 5927 | .verifier = &verifier, |
| 5507 | .client = clp, | 5928 | .client = clp, |
| 5508 | .flags = EXCHGID4_FLAG_SUPP_MOVED_REFER, | 5929 | .flags = EXCHGID4_FLAG_SUPP_MOVED_REFER | |
| 5930 | EXCHGID4_FLAG_BIND_PRINC_STATEID, | ||
| 5509 | }; | 5931 | }; |
| 5510 | struct nfs41_exchange_id_res res = { | 5932 | struct nfs41_exchange_id_res res = { |
| 5511 | 0 | 5933 | 0 |
| @@ -5762,17 +6184,14 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo) | |||
| 5762 | */ | 6184 | */ |
| 5763 | static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args) | 6185 | static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args) |
| 5764 | { | 6186 | { |
| 5765 | struct nfs4_session *session = args->client->cl_session; | 6187 | unsigned int max_rqst_sz, max_resp_sz; |
| 5766 | unsigned int mxrqst_sz = session->fc_target_max_rqst_sz, | 6188 | |
| 5767 | mxresp_sz = session->fc_target_max_resp_sz; | 6189 | max_rqst_sz = NFS_MAX_FILE_IO_SIZE + nfs41_maxwrite_overhead; |
| 6190 | max_resp_sz = NFS_MAX_FILE_IO_SIZE + nfs41_maxread_overhead; | ||
| 5768 | 6191 | ||
| 5769 | if (mxrqst_sz == 0) | ||
| 5770 | mxrqst_sz = NFS_MAX_FILE_IO_SIZE; | ||
| 5771 | if (mxresp_sz == 0) | ||
| 5772 | mxresp_sz = NFS_MAX_FILE_IO_SIZE; | ||
| 5773 | /* Fore channel attributes */ | 6192 | /* Fore channel attributes */ |
| 5774 | args->fc_attrs.max_rqst_sz = mxrqst_sz; | 6193 | args->fc_attrs.max_rqst_sz = max_rqst_sz; |
| 5775 | args->fc_attrs.max_resp_sz = mxresp_sz; | 6194 | args->fc_attrs.max_resp_sz = max_resp_sz; |
| 5776 | args->fc_attrs.max_ops = NFS4_MAX_OPS; | 6195 | args->fc_attrs.max_ops = NFS4_MAX_OPS; |
| 5777 | args->fc_attrs.max_reqs = max_session_slots; | 6196 | args->fc_attrs.max_reqs = max_session_slots; |
| 5778 | 6197 | ||
| @@ -6159,12 +6578,14 @@ static const struct rpc_call_ops nfs4_reclaim_complete_call_ops = { | |||
| 6159 | /* | 6578 | /* |
| 6160 | * Issue a global reclaim complete. | 6579 | * Issue a global reclaim complete. |
| 6161 | */ | 6580 | */ |
| 6162 | static int nfs41_proc_reclaim_complete(struct nfs_client *clp) | 6581 | static int nfs41_proc_reclaim_complete(struct nfs_client *clp, |
| 6582 | struct rpc_cred *cred) | ||
| 6163 | { | 6583 | { |
| 6164 | struct nfs4_reclaim_complete_data *calldata; | 6584 | struct nfs4_reclaim_complete_data *calldata; |
| 6165 | struct rpc_task *task; | 6585 | struct rpc_task *task; |
| 6166 | struct rpc_message msg = { | 6586 | struct rpc_message msg = { |
| 6167 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RECLAIM_COMPLETE], | 6587 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RECLAIM_COMPLETE], |
| 6588 | .rpc_cred = cred, | ||
| 6168 | }; | 6589 | }; |
| 6169 | struct rpc_task_setup task_setup_data = { | 6590 | struct rpc_task_setup task_setup_data = { |
| 6170 | .rpc_client = clp->cl_rpcclient, | 6591 | .rpc_client = clp->cl_rpcclient, |
| @@ -6348,6 +6769,7 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) | |||
| 6348 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTGET], | 6769 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTGET], |
| 6349 | .rpc_argp = &lgp->args, | 6770 | .rpc_argp = &lgp->args, |
| 6350 | .rpc_resp = &lgp->res, | 6771 | .rpc_resp = &lgp->res, |
| 6772 | .rpc_cred = lgp->cred, | ||
| 6351 | }; | 6773 | }; |
| 6352 | struct rpc_task_setup task_setup_data = { | 6774 | struct rpc_task_setup task_setup_data = { |
| 6353 | .rpc_client = server->client, | 6775 | .rpc_client = server->client, |
| @@ -6451,6 +6873,7 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp) | |||
| 6451 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTRETURN], | 6873 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTRETURN], |
| 6452 | .rpc_argp = &lrp->args, | 6874 | .rpc_argp = &lrp->args, |
| 6453 | .rpc_resp = &lrp->res, | 6875 | .rpc_resp = &lrp->res, |
| 6876 | .rpc_cred = lrp->cred, | ||
| 6454 | }; | 6877 | }; |
| 6455 | struct rpc_task_setup task_setup_data = { | 6878 | struct rpc_task_setup task_setup_data = { |
| 6456 | .rpc_client = lrp->clp->cl_rpcclient, | 6879 | .rpc_client = lrp->clp->cl_rpcclient, |
| @@ -6520,7 +6943,9 @@ int nfs4_proc_getdevicelist(struct nfs_server *server, | |||
| 6520 | EXPORT_SYMBOL_GPL(nfs4_proc_getdevicelist); | 6943 | EXPORT_SYMBOL_GPL(nfs4_proc_getdevicelist); |
| 6521 | 6944 | ||
| 6522 | static int | 6945 | static int |
| 6523 | _nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev) | 6946 | _nfs4_proc_getdeviceinfo(struct nfs_server *server, |
| 6947 | struct pnfs_device *pdev, | ||
| 6948 | struct rpc_cred *cred) | ||
| 6524 | { | 6949 | { |
| 6525 | struct nfs4_getdeviceinfo_args args = { | 6950 | struct nfs4_getdeviceinfo_args args = { |
| 6526 | .pdev = pdev, | 6951 | .pdev = pdev, |
| @@ -6532,6 +6957,7 @@ _nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev) | |||
| 6532 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETDEVICEINFO], | 6957 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETDEVICEINFO], |
| 6533 | .rpc_argp = &args, | 6958 | .rpc_argp = &args, |
| 6534 | .rpc_resp = &res, | 6959 | .rpc_resp = &res, |
| 6960 | .rpc_cred = cred, | ||
| 6535 | }; | 6961 | }; |
| 6536 | int status; | 6962 | int status; |
| 6537 | 6963 | ||
| @@ -6542,14 +6968,16 @@ _nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev) | |||
| 6542 | return status; | 6968 | return status; |
| 6543 | } | 6969 | } |
| 6544 | 6970 | ||
| 6545 | int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev) | 6971 | int nfs4_proc_getdeviceinfo(struct nfs_server *server, |
| 6972 | struct pnfs_device *pdev, | ||
| 6973 | struct rpc_cred *cred) | ||
| 6546 | { | 6974 | { |
| 6547 | struct nfs4_exception exception = { }; | 6975 | struct nfs4_exception exception = { }; |
| 6548 | int err; | 6976 | int err; |
| 6549 | 6977 | ||
| 6550 | do { | 6978 | do { |
| 6551 | err = nfs4_handle_exception(server, | 6979 | err = nfs4_handle_exception(server, |
| 6552 | _nfs4_proc_getdeviceinfo(server, pdev), | 6980 | _nfs4_proc_getdeviceinfo(server, pdev, cred), |
| 6553 | &exception); | 6981 | &exception); |
| 6554 | } while (exception.retry); | 6982 | } while (exception.retry); |
| 6555 | return err; | 6983 | return err; |
| @@ -6733,7 +7161,9 @@ out: | |||
| 6733 | return err; | 7161 | return err; |
| 6734 | } | 7162 | } |
| 6735 | 7163 | ||
| 6736 | static int _nfs41_test_stateid(struct nfs_server *server, nfs4_stateid *stateid) | 7164 | static int _nfs41_test_stateid(struct nfs_server *server, |
| 7165 | nfs4_stateid *stateid, | ||
| 7166 | struct rpc_cred *cred) | ||
| 6737 | { | 7167 | { |
| 6738 | int status; | 7168 | int status; |
| 6739 | struct nfs41_test_stateid_args args = { | 7169 | struct nfs41_test_stateid_args args = { |
| @@ -6744,6 +7174,7 @@ static int _nfs41_test_stateid(struct nfs_server *server, nfs4_stateid *stateid) | |||
| 6744 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_TEST_STATEID], | 7174 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_TEST_STATEID], |
| 6745 | .rpc_argp = &args, | 7175 | .rpc_argp = &args, |
| 6746 | .rpc_resp = &res, | 7176 | .rpc_resp = &res, |
| 7177 | .rpc_cred = cred, | ||
| 6747 | }; | 7178 | }; |
| 6748 | 7179 | ||
| 6749 | dprintk("NFS call test_stateid %p\n", stateid); | 7180 | dprintk("NFS call test_stateid %p\n", stateid); |
| @@ -6764,17 +7195,20 @@ static int _nfs41_test_stateid(struct nfs_server *server, nfs4_stateid *stateid) | |||
| 6764 | * | 7195 | * |
| 6765 | * @server: server / transport on which to perform the operation | 7196 | * @server: server / transport on which to perform the operation |
| 6766 | * @stateid: state ID to test | 7197 | * @stateid: state ID to test |
| 7198 | * @cred: credential | ||
| 6767 | * | 7199 | * |
| 6768 | * Returns NFS_OK if the server recognizes that "stateid" is valid. | 7200 | * Returns NFS_OK if the server recognizes that "stateid" is valid. |
| 6769 | * Otherwise a negative NFS4ERR value is returned if the operation | 7201 | * Otherwise a negative NFS4ERR value is returned if the operation |
| 6770 | * failed or the state ID is not currently valid. | 7202 | * failed or the state ID is not currently valid. |
| 6771 | */ | 7203 | */ |
| 6772 | static int nfs41_test_stateid(struct nfs_server *server, nfs4_stateid *stateid) | 7204 | static int nfs41_test_stateid(struct nfs_server *server, |
| 7205 | nfs4_stateid *stateid, | ||
| 7206 | struct rpc_cred *cred) | ||
| 6773 | { | 7207 | { |
| 6774 | struct nfs4_exception exception = { }; | 7208 | struct nfs4_exception exception = { }; |
| 6775 | int err; | 7209 | int err; |
| 6776 | do { | 7210 | do { |
| 6777 | err = _nfs41_test_stateid(server, stateid); | 7211 | err = _nfs41_test_stateid(server, stateid, cred); |
| 6778 | if (err != -NFS4ERR_DELAY) | 7212 | if (err != -NFS4ERR_DELAY) |
| 6779 | break; | 7213 | break; |
| 6780 | nfs4_handle_exception(server, err, &exception); | 7214 | nfs4_handle_exception(server, err, &exception); |
| @@ -6823,10 +7257,12 @@ const struct rpc_call_ops nfs41_free_stateid_ops = { | |||
| 6823 | 7257 | ||
| 6824 | static struct rpc_task *_nfs41_free_stateid(struct nfs_server *server, | 7258 | static struct rpc_task *_nfs41_free_stateid(struct nfs_server *server, |
| 6825 | nfs4_stateid *stateid, | 7259 | nfs4_stateid *stateid, |
| 7260 | struct rpc_cred *cred, | ||
| 6826 | bool privileged) | 7261 | bool privileged) |
| 6827 | { | 7262 | { |
| 6828 | struct rpc_message msg = { | 7263 | struct rpc_message msg = { |
| 6829 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FREE_STATEID], | 7264 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FREE_STATEID], |
| 7265 | .rpc_cred = cred, | ||
| 6830 | }; | 7266 | }; |
| 6831 | struct rpc_task_setup task_setup = { | 7267 | struct rpc_task_setup task_setup = { |
| 6832 | .rpc_client = server->client, | 7268 | .rpc_client = server->client, |
| @@ -6859,16 +7295,19 @@ static struct rpc_task *_nfs41_free_stateid(struct nfs_server *server, | |||
| 6859 | * | 7295 | * |
| 6860 | * @server: server / transport on which to perform the operation | 7296 | * @server: server / transport on which to perform the operation |
| 6861 | * @stateid: state ID to release | 7297 | * @stateid: state ID to release |
| 7298 | * @cred: credential | ||
| 6862 | * | 7299 | * |
| 6863 | * Returns NFS_OK if the server freed "stateid". Otherwise a | 7300 | * Returns NFS_OK if the server freed "stateid". Otherwise a |
| 6864 | * negative NFS4ERR value is returned. | 7301 | * negative NFS4ERR value is returned. |
| 6865 | */ | 7302 | */ |
| 6866 | static int nfs41_free_stateid(struct nfs_server *server, nfs4_stateid *stateid) | 7303 | static int nfs41_free_stateid(struct nfs_server *server, |
| 7304 | nfs4_stateid *stateid, | ||
| 7305 | struct rpc_cred *cred) | ||
| 6867 | { | 7306 | { |
| 6868 | struct rpc_task *task; | 7307 | struct rpc_task *task; |
| 6869 | int ret; | 7308 | int ret; |
| 6870 | 7309 | ||
| 6871 | task = _nfs41_free_stateid(server, stateid, true); | 7310 | task = _nfs41_free_stateid(server, stateid, cred, true); |
| 6872 | if (IS_ERR(task)) | 7311 | if (IS_ERR(task)) |
| 6873 | return PTR_ERR(task); | 7312 | return PTR_ERR(task); |
| 6874 | ret = rpc_wait_for_completion_task(task); | 7313 | ret = rpc_wait_for_completion_task(task); |
| @@ -6881,8 +7320,9 @@ static int nfs41_free_stateid(struct nfs_server *server, nfs4_stateid *stateid) | |||
| 6881 | static int nfs41_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp) | 7320 | static int nfs41_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp) |
| 6882 | { | 7321 | { |
| 6883 | struct rpc_task *task; | 7322 | struct rpc_task *task; |
| 7323 | struct rpc_cred *cred = lsp->ls_state->owner->so_cred; | ||
| 6884 | 7324 | ||
| 6885 | task = _nfs41_free_stateid(server, &lsp->ls_stateid, false); | 7325 | task = _nfs41_free_stateid(server, &lsp->ls_stateid, cred, false); |
| 6886 | nfs4_free_lock_state(server, lsp); | 7326 | nfs4_free_lock_state(server, lsp); |
| 6887 | if (IS_ERR(task)) | 7327 | if (IS_ERR(task)) |
| 6888 | return PTR_ERR(task); | 7328 | return PTR_ERR(task); |
| @@ -7004,11 +7444,33 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { | |||
| 7004 | }; | 7444 | }; |
| 7005 | #endif | 7445 | #endif |
| 7006 | 7446 | ||
| 7447 | #if defined(CONFIG_NFS_V4_2) | ||
| 7448 | static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { | ||
| 7449 | .minor_version = 2, | ||
| 7450 | .init_caps = NFS_CAP_READDIRPLUS | ||
| 7451 | | NFS_CAP_ATOMIC_OPEN | ||
| 7452 | | NFS_CAP_CHANGE_ATTR | ||
| 7453 | | NFS_CAP_POSIX_LOCK | ||
| 7454 | | NFS_CAP_STATEID_NFSV41 | ||
| 7455 | | NFS_CAP_ATOMIC_OPEN_V1, | ||
| 7456 | .call_sync = nfs4_call_sync_sequence, | ||
| 7457 | .match_stateid = nfs41_match_stateid, | ||
| 7458 | .find_root_sec = nfs41_find_root_sec, | ||
| 7459 | .free_lock_state = nfs41_free_lock_state, | ||
| 7460 | .reboot_recovery_ops = &nfs41_reboot_recovery_ops, | ||
| 7461 | .nograce_recovery_ops = &nfs41_nograce_recovery_ops, | ||
| 7462 | .state_renewal_ops = &nfs41_state_renewal_ops, | ||
| 7463 | }; | ||
| 7464 | #endif | ||
| 7465 | |||
| 7007 | const struct nfs4_minor_version_ops *nfs_v4_minor_ops[] = { | 7466 | const struct nfs4_minor_version_ops *nfs_v4_minor_ops[] = { |
| 7008 | [0] = &nfs_v4_0_minor_ops, | 7467 | [0] = &nfs_v4_0_minor_ops, |
| 7009 | #if defined(CONFIG_NFS_V4_1) | 7468 | #if defined(CONFIG_NFS_V4_1) |
| 7010 | [1] = &nfs_v4_1_minor_ops, | 7469 | [1] = &nfs_v4_1_minor_ops, |
| 7011 | #endif | 7470 | #endif |
| 7471 | #if defined(CONFIG_NFS_V4_2) | ||
| 7472 | [2] = &nfs_v4_2_minor_ops, | ||
| 7473 | #endif | ||
| 7012 | }; | 7474 | }; |
| 7013 | 7475 | ||
| 7014 | const struct inode_operations nfs4_dir_inode_operations = { | 7476 | const struct inode_operations nfs4_dir_inode_operations = { |
| @@ -7108,6 +7570,9 @@ static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = { | |||
| 7108 | 7570 | ||
| 7109 | const struct xattr_handler *nfs4_xattr_handlers[] = { | 7571 | const struct xattr_handler *nfs4_xattr_handlers[] = { |
| 7110 | &nfs4_xattr_nfs4_acl_handler, | 7572 | &nfs4_xattr_nfs4_acl_handler, |
| 7573 | #ifdef CONFIG_NFS_V4_SECURITY_LABEL | ||
| 7574 | &nfs4_xattr_nfs4_label_handler, | ||
| 7575 | #endif | ||
| 7111 | NULL | 7576 | NULL |
| 7112 | }; | 7577 | }; |
| 7113 | 7578 | ||
diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c index c4e225e4a9af..36e21cb29d65 100644 --- a/fs/nfs/nfs4session.c +++ b/fs/nfs/nfs4session.c | |||
| @@ -478,48 +478,12 @@ static int nfs41_check_session_ready(struct nfs_client *clp) | |||
| 478 | return 0; | 478 | return 0; |
| 479 | } | 479 | } |
| 480 | 480 | ||
| 481 | int nfs4_init_session(struct nfs_server *server) | 481 | int nfs4_init_session(struct nfs_client *clp) |
| 482 | { | 482 | { |
| 483 | struct nfs_client *clp = server->nfs_client; | ||
| 484 | struct nfs4_session *session; | ||
| 485 | unsigned int target_max_rqst_sz = NFS_MAX_FILE_IO_SIZE; | ||
| 486 | unsigned int target_max_resp_sz = NFS_MAX_FILE_IO_SIZE; | ||
| 487 | |||
| 488 | if (!nfs4_has_session(clp)) | 483 | if (!nfs4_has_session(clp)) |
| 489 | return 0; | 484 | return 0; |
| 490 | 485 | ||
| 491 | if (server->rsize != 0) | 486 | clear_bit(NFS4_SESSION_INITING, &clp->cl_session->session_state); |
| 492 | target_max_resp_sz = server->rsize; | ||
| 493 | target_max_resp_sz += nfs41_maxread_overhead; | ||
| 494 | |||
| 495 | if (server->wsize != 0) | ||
| 496 | target_max_rqst_sz = server->wsize; | ||
| 497 | target_max_rqst_sz += nfs41_maxwrite_overhead; | ||
| 498 | |||
| 499 | session = clp->cl_session; | ||
| 500 | spin_lock(&clp->cl_lock); | ||
| 501 | if (test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state)) { | ||
| 502 | /* Initialise targets and channel attributes */ | ||
| 503 | session->fc_target_max_rqst_sz = target_max_rqst_sz; | ||
| 504 | session->fc_attrs.max_rqst_sz = target_max_rqst_sz; | ||
| 505 | session->fc_target_max_resp_sz = target_max_resp_sz; | ||
| 506 | session->fc_attrs.max_resp_sz = target_max_resp_sz; | ||
| 507 | } else { | ||
| 508 | /* Just adjust the targets */ | ||
| 509 | if (target_max_rqst_sz > session->fc_target_max_rqst_sz) { | ||
| 510 | session->fc_target_max_rqst_sz = target_max_rqst_sz; | ||
| 511 | set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); | ||
| 512 | } | ||
| 513 | if (target_max_resp_sz > session->fc_target_max_resp_sz) { | ||
| 514 | session->fc_target_max_resp_sz = target_max_resp_sz; | ||
| 515 | set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); | ||
| 516 | } | ||
| 517 | } | ||
| 518 | spin_unlock(&clp->cl_lock); | ||
| 519 | |||
| 520 | if (test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state)) | ||
| 521 | nfs4_schedule_lease_recovery(clp); | ||
| 522 | |||
| 523 | return nfs41_check_session_ready(clp); | 487 | return nfs41_check_session_ready(clp); |
| 524 | } | 488 | } |
| 525 | 489 | ||
diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h index ff7d9f0f8a65..3a153d82b90c 100644 --- a/fs/nfs/nfs4session.h +++ b/fs/nfs/nfs4session.h | |||
| @@ -66,9 +66,6 @@ struct nfs4_session { | |||
| 66 | struct nfs4_channel_attrs bc_attrs; | 66 | struct nfs4_channel_attrs bc_attrs; |
| 67 | struct nfs4_slot_table bc_slot_table; | 67 | struct nfs4_slot_table bc_slot_table; |
| 68 | struct nfs_client *clp; | 68 | struct nfs_client *clp; |
| 69 | /* Create session arguments */ | ||
| 70 | unsigned int fc_target_max_rqst_sz; | ||
| 71 | unsigned int fc_target_max_resp_sz; | ||
| 72 | }; | 69 | }; |
| 73 | 70 | ||
| 74 | enum nfs4_session_state { | 71 | enum nfs4_session_state { |
| @@ -89,7 +86,7 @@ extern int nfs4_setup_session_slot_tables(struct nfs4_session *ses); | |||
| 89 | 86 | ||
| 90 | extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp); | 87 | extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp); |
| 91 | extern void nfs4_destroy_session(struct nfs4_session *session); | 88 | extern void nfs4_destroy_session(struct nfs4_session *session); |
| 92 | extern int nfs4_init_session(struct nfs_server *server); | 89 | extern int nfs4_init_session(struct nfs_client *clp); |
| 93 | extern int nfs4_init_ds_session(struct nfs_client *, unsigned long); | 90 | extern int nfs4_init_ds_session(struct nfs_client *, unsigned long); |
| 94 | 91 | ||
| 95 | extern void nfs4_slot_tbl_drain_complete(struct nfs4_slot_table *tbl); | 92 | extern void nfs4_slot_tbl_drain_complete(struct nfs4_slot_table *tbl); |
| @@ -122,7 +119,7 @@ static inline int nfs4_has_persistent_session(const struct nfs_client *clp) | |||
| 122 | 119 | ||
| 123 | #else /* defined(CONFIG_NFS_V4_1) */ | 120 | #else /* defined(CONFIG_NFS_V4_1) */ |
| 124 | 121 | ||
| 125 | static inline int nfs4_init_session(struct nfs_server *server) | 122 | static inline int nfs4_init_session(struct nfs_client *clp) |
| 126 | { | 123 | { |
| 127 | return 0; | 124 | return 0; |
| 128 | } | 125 | } |
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 1fab140764c4..e22862f13564 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c | |||
| @@ -228,19 +228,8 @@ static int nfs41_setup_state_renewal(struct nfs_client *clp) | |||
| 228 | return status; | 228 | return status; |
| 229 | } | 229 | } |
| 230 | 230 | ||
| 231 | /* | 231 | static void nfs4_end_drain_slot_table(struct nfs4_slot_table *tbl) |
| 232 | * Back channel returns NFS4ERR_DELAY for new requests when | ||
| 233 | * NFS4_SESSION_DRAINING is set so there is no work to be done when draining | ||
| 234 | * is ended. | ||
| 235 | */ | ||
| 236 | static void nfs4_end_drain_session(struct nfs_client *clp) | ||
| 237 | { | 232 | { |
| 238 | struct nfs4_session *ses = clp->cl_session; | ||
| 239 | struct nfs4_slot_table *tbl; | ||
| 240 | |||
| 241 | if (ses == NULL) | ||
| 242 | return; | ||
| 243 | tbl = &ses->fc_slot_table; | ||
| 244 | if (test_and_clear_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state)) { | 233 | if (test_and_clear_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state)) { |
| 245 | spin_lock(&tbl->slot_tbl_lock); | 234 | spin_lock(&tbl->slot_tbl_lock); |
| 246 | nfs41_wake_slot_table(tbl); | 235 | nfs41_wake_slot_table(tbl); |
| @@ -248,6 +237,16 @@ static void nfs4_end_drain_session(struct nfs_client *clp) | |||
| 248 | } | 237 | } |
| 249 | } | 238 | } |
| 250 | 239 | ||
| 240 | static void nfs4_end_drain_session(struct nfs_client *clp) | ||
| 241 | { | ||
| 242 | struct nfs4_session *ses = clp->cl_session; | ||
| 243 | |||
| 244 | if (ses != NULL) { | ||
| 245 | nfs4_end_drain_slot_table(&ses->bc_slot_table); | ||
| 246 | nfs4_end_drain_slot_table(&ses->fc_slot_table); | ||
| 247 | } | ||
| 248 | } | ||
| 249 | |||
| 251 | /* | 250 | /* |
| 252 | * Signal state manager thread if session fore channel is drained | 251 | * Signal state manager thread if session fore channel is drained |
| 253 | */ | 252 | */ |
| @@ -1194,7 +1193,7 @@ void nfs4_schedule_state_manager(struct nfs_client *clp) | |||
| 1194 | snprintf(buf, sizeof(buf), "%s-manager", | 1193 | snprintf(buf, sizeof(buf), "%s-manager", |
| 1195 | rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)); | 1194 | rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)); |
| 1196 | rcu_read_unlock(); | 1195 | rcu_read_unlock(); |
| 1197 | task = kthread_run(nfs4_run_state_manager, clp, buf); | 1196 | task = kthread_run(nfs4_run_state_manager, clp, "%s", buf); |
| 1198 | if (IS_ERR(task)) { | 1197 | if (IS_ERR(task)) { |
| 1199 | printk(KERN_ERR "%s: kthread_run: %ld\n", | 1198 | printk(KERN_ERR "%s: kthread_run: %ld\n", |
| 1200 | __func__, PTR_ERR(task)); | 1199 | __func__, PTR_ERR(task)); |
| @@ -1373,13 +1372,13 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_ | |||
| 1373 | /* Guard against delegation returns and new lock/unlock calls */ | 1372 | /* Guard against delegation returns and new lock/unlock calls */ |
| 1374 | down_write(&nfsi->rwsem); | 1373 | down_write(&nfsi->rwsem); |
| 1375 | /* Protect inode->i_flock using the BKL */ | 1374 | /* Protect inode->i_flock using the BKL */ |
| 1376 | lock_flocks(); | 1375 | spin_lock(&inode->i_lock); |
| 1377 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { | 1376 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { |
| 1378 | if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) | 1377 | if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) |
| 1379 | continue; | 1378 | continue; |
| 1380 | if (nfs_file_open_context(fl->fl_file)->state != state) | 1379 | if (nfs_file_open_context(fl->fl_file)->state != state) |
| 1381 | continue; | 1380 | continue; |
| 1382 | unlock_flocks(); | 1381 | spin_unlock(&inode->i_lock); |
| 1383 | status = ops->recover_lock(state, fl); | 1382 | status = ops->recover_lock(state, fl); |
| 1384 | switch (status) { | 1383 | switch (status) { |
| 1385 | case 0: | 1384 | case 0: |
| @@ -1406,9 +1405,9 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_ | |||
| 1406 | /* kill_proc(fl->fl_pid, SIGLOST, 1); */ | 1405 | /* kill_proc(fl->fl_pid, SIGLOST, 1); */ |
| 1407 | status = 0; | 1406 | status = 0; |
| 1408 | } | 1407 | } |
| 1409 | lock_flocks(); | 1408 | spin_lock(&inode->i_lock); |
| 1410 | } | 1409 | } |
| 1411 | unlock_flocks(); | 1410 | spin_unlock(&inode->i_lock); |
| 1412 | out: | 1411 | out: |
| 1413 | up_write(&nfsi->rwsem); | 1412 | up_write(&nfsi->rwsem); |
| 1414 | return status; | 1413 | return status; |
| @@ -1563,11 +1562,12 @@ static void nfs4_state_start_reclaim_reboot(struct nfs_client *clp) | |||
| 1563 | } | 1562 | } |
| 1564 | 1563 | ||
| 1565 | static void nfs4_reclaim_complete(struct nfs_client *clp, | 1564 | static void nfs4_reclaim_complete(struct nfs_client *clp, |
| 1566 | const struct nfs4_state_recovery_ops *ops) | 1565 | const struct nfs4_state_recovery_ops *ops, |
| 1566 | struct rpc_cred *cred) | ||
| 1567 | { | 1567 | { |
| 1568 | /* Notify the server we're done reclaiming our state */ | 1568 | /* Notify the server we're done reclaiming our state */ |
| 1569 | if (ops->reclaim_complete) | 1569 | if (ops->reclaim_complete) |
| 1570 | (void)ops->reclaim_complete(clp); | 1570 | (void)ops->reclaim_complete(clp, cred); |
| 1571 | } | 1571 | } |
| 1572 | 1572 | ||
| 1573 | static void nfs4_clear_reclaim_server(struct nfs_server *server) | 1573 | static void nfs4_clear_reclaim_server(struct nfs_server *server) |
| @@ -1612,9 +1612,15 @@ static int nfs4_state_clear_reclaim_reboot(struct nfs_client *clp) | |||
| 1612 | 1612 | ||
| 1613 | static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp) | 1613 | static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp) |
| 1614 | { | 1614 | { |
| 1615 | const struct nfs4_state_recovery_ops *ops; | ||
| 1616 | struct rpc_cred *cred; | ||
| 1617 | |||
| 1615 | if (!nfs4_state_clear_reclaim_reboot(clp)) | 1618 | if (!nfs4_state_clear_reclaim_reboot(clp)) |
| 1616 | return; | 1619 | return; |
| 1617 | nfs4_reclaim_complete(clp, clp->cl_mvops->reboot_recovery_ops); | 1620 | ops = clp->cl_mvops->reboot_recovery_ops; |
| 1621 | cred = ops->get_clid_cred(clp); | ||
| 1622 | nfs4_reclaim_complete(clp, ops, cred); | ||
| 1623 | put_rpccred(cred); | ||
| 1618 | } | 1624 | } |
| 1619 | 1625 | ||
| 1620 | static void nfs_delegation_clear_all(struct nfs_client *clp) | 1626 | static void nfs_delegation_clear_all(struct nfs_client *clp) |
diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index a5e1a3026d48..5dbe2d269210 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | #include "delegation.h" | 9 | #include "delegation.h" |
| 10 | #include "internal.h" | 10 | #include "internal.h" |
| 11 | #include "nfs4_fs.h" | 11 | #include "nfs4_fs.h" |
| 12 | #include "dns_resolve.h" | ||
| 12 | #include "pnfs.h" | 13 | #include "pnfs.h" |
| 13 | #include "nfs.h" | 14 | #include "nfs.h" |
| 14 | 15 | ||
| @@ -331,18 +332,24 @@ static int __init init_nfs_v4(void) | |||
| 331 | { | 332 | { |
| 332 | int err; | 333 | int err; |
| 333 | 334 | ||
| 334 | err = nfs_idmap_init(); | 335 | err = nfs_dns_resolver_init(); |
| 335 | if (err) | 336 | if (err) |
| 336 | goto out; | 337 | goto out; |
| 337 | 338 | ||
| 338 | err = nfs4_register_sysctl(); | 339 | err = nfs_idmap_init(); |
| 339 | if (err) | 340 | if (err) |
| 340 | goto out1; | 341 | goto out1; |
| 341 | 342 | ||
| 343 | err = nfs4_register_sysctl(); | ||
| 344 | if (err) | ||
| 345 | goto out2; | ||
| 346 | |||
| 342 | register_nfs_version(&nfs_v4); | 347 | register_nfs_version(&nfs_v4); |
| 343 | return 0; | 348 | return 0; |
| 344 | out1: | 349 | out2: |
| 345 | nfs_idmap_quit(); | 350 | nfs_idmap_quit(); |
| 351 | out1: | ||
| 352 | nfs_dns_resolver_destroy(); | ||
| 346 | out: | 353 | out: |
| 347 | return err; | 354 | return err; |
| 348 | } | 355 | } |
| @@ -352,6 +359,7 @@ static void __exit exit_nfs_v4(void) | |||
| 352 | unregister_nfs_version(&nfs_v4); | 359 | unregister_nfs_version(&nfs_v4); |
| 353 | nfs4_unregister_sysctl(); | 360 | nfs4_unregister_sysctl(); |
| 354 | nfs_idmap_quit(); | 361 | nfs_idmap_quit(); |
| 362 | nfs_dns_resolver_destroy(); | ||
| 355 | } | 363 | } |
| 356 | 364 | ||
| 357 | MODULE_LICENSE("GPL"); | 365 | MODULE_LICENSE("GPL"); |
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 4be8d135ed61..0abfb8466e79 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c | |||
| @@ -102,12 +102,23 @@ static int nfs4_stat_to_errno(int); | |||
| 102 | #define nfs4_path_maxsz (1 + ((3 + NFS4_MAXPATHLEN) >> 2)) | 102 | #define nfs4_path_maxsz (1 + ((3 + NFS4_MAXPATHLEN) >> 2)) |
| 103 | #define nfs4_owner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) | 103 | #define nfs4_owner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) |
| 104 | #define nfs4_group_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) | 104 | #define nfs4_group_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) |
| 105 | #ifdef CONFIG_NFS_V4_SECURITY_LABEL | ||
| 106 | /* PI(4 bytes) + LFS(4 bytes) + 1(for null terminator?) + MAXLABELLEN */ | ||
| 107 | #define nfs4_label_maxsz (4 + 4 + 1 + XDR_QUADLEN(NFS4_MAXLABELLEN)) | ||
| 108 | #define encode_readdir_space 24 | ||
| 109 | #define encode_readdir_bitmask_sz 3 | ||
| 110 | #else | ||
| 111 | #define nfs4_label_maxsz 0 | ||
| 112 | #define encode_readdir_space 20 | ||
| 113 | #define encode_readdir_bitmask_sz 2 | ||
| 114 | #endif | ||
| 105 | /* We support only one layout type per file system */ | 115 | /* We support only one layout type per file system */ |
| 106 | #define decode_mdsthreshold_maxsz (1 + 1 + nfs4_fattr_bitmap_maxsz + 1 + 8) | 116 | #define decode_mdsthreshold_maxsz (1 + 1 + nfs4_fattr_bitmap_maxsz + 1 + 8) |
| 107 | /* This is based on getfattr, which uses the most attributes: */ | 117 | /* This is based on getfattr, which uses the most attributes: */ |
| 108 | #define nfs4_fattr_value_maxsz (1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \ | 118 | #define nfs4_fattr_value_maxsz (1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \ |
| 109 | 3 + 3 + 3 + nfs4_owner_maxsz + \ | 119 | 3 + 3 + 3 + nfs4_owner_maxsz + \ |
| 110 | nfs4_group_maxsz + decode_mdsthreshold_maxsz)) | 120 | nfs4_group_maxsz + nfs4_label_maxsz + \ |
| 121 | decode_mdsthreshold_maxsz)) | ||
| 111 | #define nfs4_fattr_maxsz (nfs4_fattr_bitmap_maxsz + \ | 122 | #define nfs4_fattr_maxsz (nfs4_fattr_bitmap_maxsz + \ |
| 112 | nfs4_fattr_value_maxsz) | 123 | nfs4_fattr_value_maxsz) |
| 113 | #define decode_getattr_maxsz (op_decode_hdr_maxsz + nfs4_fattr_maxsz) | 124 | #define decode_getattr_maxsz (op_decode_hdr_maxsz + nfs4_fattr_maxsz) |
| @@ -115,6 +126,7 @@ static int nfs4_stat_to_errno(int); | |||
| 115 | 1 + 2 + 1 + \ | 126 | 1 + 2 + 1 + \ |
| 116 | nfs4_owner_maxsz + \ | 127 | nfs4_owner_maxsz + \ |
| 117 | nfs4_group_maxsz + \ | 128 | nfs4_group_maxsz + \ |
| 129 | nfs4_label_maxsz + \ | ||
| 118 | 4 + 4) | 130 | 4 + 4) |
| 119 | #define encode_savefh_maxsz (op_encode_hdr_maxsz) | 131 | #define encode_savefh_maxsz (op_encode_hdr_maxsz) |
| 120 | #define decode_savefh_maxsz (op_decode_hdr_maxsz) | 132 | #define decode_savefh_maxsz (op_decode_hdr_maxsz) |
| @@ -192,9 +204,11 @@ static int nfs4_stat_to_errno(int); | |||
| 192 | encode_stateid_maxsz + 3) | 204 | encode_stateid_maxsz + 3) |
| 193 | #define decode_read_maxsz (op_decode_hdr_maxsz + 2) | 205 | #define decode_read_maxsz (op_decode_hdr_maxsz + 2) |
| 194 | #define encode_readdir_maxsz (op_encode_hdr_maxsz + \ | 206 | #define encode_readdir_maxsz (op_encode_hdr_maxsz + \ |
| 195 | 2 + encode_verifier_maxsz + 5) | 207 | 2 + encode_verifier_maxsz + 5 + \ |
| 208 | nfs4_label_maxsz) | ||
| 196 | #define decode_readdir_maxsz (op_decode_hdr_maxsz + \ | 209 | #define decode_readdir_maxsz (op_decode_hdr_maxsz + \ |
| 197 | decode_verifier_maxsz) | 210 | decode_verifier_maxsz + \ |
| 211 | nfs4_label_maxsz + nfs4_fattr_maxsz) | ||
| 198 | #define encode_readlink_maxsz (op_encode_hdr_maxsz) | 212 | #define encode_readlink_maxsz (op_encode_hdr_maxsz) |
| 199 | #define decode_readlink_maxsz (op_decode_hdr_maxsz + 1) | 213 | #define decode_readlink_maxsz (op_decode_hdr_maxsz + 1) |
| 200 | #define encode_write_maxsz (op_encode_hdr_maxsz + \ | 214 | #define encode_write_maxsz (op_encode_hdr_maxsz + \ |
| @@ -853,6 +867,12 @@ const u32 nfs41_maxread_overhead = ((RPC_MAX_HEADER_WITH_AUTH + | |||
| 853 | decode_sequence_maxsz + | 867 | decode_sequence_maxsz + |
| 854 | decode_putfh_maxsz) * | 868 | decode_putfh_maxsz) * |
| 855 | XDR_UNIT); | 869 | XDR_UNIT); |
| 870 | |||
| 871 | const u32 nfs41_maxgetdevinfo_overhead = ((RPC_MAX_REPHEADER_WITH_AUTH + | ||
| 872 | compound_decode_hdr_maxsz + | ||
| 873 | decode_sequence_maxsz) * | ||
| 874 | XDR_UNIT); | ||
| 875 | EXPORT_SYMBOL_GPL(nfs41_maxgetdevinfo_overhead); | ||
| 856 | #endif /* CONFIG_NFS_V4_1 */ | 876 | #endif /* CONFIG_NFS_V4_1 */ |
| 857 | 877 | ||
| 858 | static const umode_t nfs_type2fmt[] = { | 878 | static const umode_t nfs_type2fmt[] = { |
| @@ -968,7 +988,9 @@ static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *ve | |||
| 968 | encode_opaque_fixed(xdr, verf->data, NFS4_VERIFIER_SIZE); | 988 | encode_opaque_fixed(xdr, verf->data, NFS4_VERIFIER_SIZE); |
| 969 | } | 989 | } |
| 970 | 990 | ||
| 971 | static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const struct nfs_server *server) | 991 | static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, |
| 992 | const struct nfs4_label *label, | ||
| 993 | const struct nfs_server *server) | ||
| 972 | { | 994 | { |
| 973 | char owner_name[IDMAP_NAMESZ]; | 995 | char owner_name[IDMAP_NAMESZ]; |
| 974 | char owner_group[IDMAP_NAMESZ]; | 996 | char owner_group[IDMAP_NAMESZ]; |
| @@ -979,15 +1001,16 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const | |||
| 979 | int len; | 1001 | int len; |
| 980 | uint32_t bmval0 = 0; | 1002 | uint32_t bmval0 = 0; |
| 981 | uint32_t bmval1 = 0; | 1003 | uint32_t bmval1 = 0; |
| 1004 | uint32_t bmval2 = 0; | ||
| 982 | 1005 | ||
| 983 | /* | 1006 | /* |
| 984 | * We reserve enough space to write the entire attribute buffer at once. | 1007 | * We reserve enough space to write the entire attribute buffer at once. |
| 985 | * In the worst-case, this would be | 1008 | * In the worst-case, this would be |
| 986 | * 12(bitmap) + 4(attrlen) + 8(size) + 4(mode) + 4(atime) + 4(mtime) | 1009 | * 16(bitmap) + 4(attrlen) + 8(size) + 4(mode) + 4(atime) + 4(mtime) |
| 987 | * = 36 bytes, plus any contribution from variable-length fields | 1010 | * = 40 bytes, plus any contribution from variable-length fields |
| 988 | * such as owner/group. | 1011 | * such as owner/group. |
| 989 | */ | 1012 | */ |
| 990 | len = 16; | 1013 | len = 20; |
| 991 | 1014 | ||
| 992 | /* Sigh */ | 1015 | /* Sigh */ |
| 993 | if (iap->ia_valid & ATTR_SIZE) | 1016 | if (iap->ia_valid & ATTR_SIZE) |
| @@ -1017,6 +1040,8 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const | |||
| 1017 | } | 1040 | } |
| 1018 | len += 4 + (XDR_QUADLEN(owner_grouplen) << 2); | 1041 | len += 4 + (XDR_QUADLEN(owner_grouplen) << 2); |
| 1019 | } | 1042 | } |
| 1043 | if (label) | ||
| 1044 | len += 4 + 4 + 4 + (XDR_QUADLEN(label->len) << 2); | ||
| 1020 | if (iap->ia_valid & ATTR_ATIME_SET) | 1045 | if (iap->ia_valid & ATTR_ATIME_SET) |
| 1021 | len += 16; | 1046 | len += 16; |
| 1022 | else if (iap->ia_valid & ATTR_ATIME) | 1047 | else if (iap->ia_valid & ATTR_ATIME) |
| @@ -1031,9 +1056,9 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const | |||
| 1031 | * We write the bitmap length now, but leave the bitmap and the attribute | 1056 | * We write the bitmap length now, but leave the bitmap and the attribute |
| 1032 | * buffer length to be backfilled at the end of this routine. | 1057 | * buffer length to be backfilled at the end of this routine. |
| 1033 | */ | 1058 | */ |
| 1034 | *p++ = cpu_to_be32(2); | 1059 | *p++ = cpu_to_be32(3); |
| 1035 | q = p; | 1060 | q = p; |
| 1036 | p += 3; | 1061 | p += 4; |
| 1037 | 1062 | ||
| 1038 | if (iap->ia_valid & ATTR_SIZE) { | 1063 | if (iap->ia_valid & ATTR_SIZE) { |
| 1039 | bmval0 |= FATTR4_WORD0_SIZE; | 1064 | bmval0 |= FATTR4_WORD0_SIZE; |
| @@ -1071,6 +1096,13 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const | |||
| 1071 | bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET; | 1096 | bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET; |
| 1072 | *p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME); | 1097 | *p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME); |
| 1073 | } | 1098 | } |
| 1099 | if (label) { | ||
| 1100 | bmval2 |= FATTR4_WORD2_SECURITY_LABEL; | ||
| 1101 | *p++ = cpu_to_be32(label->lfs); | ||
| 1102 | *p++ = cpu_to_be32(label->pi); | ||
| 1103 | *p++ = cpu_to_be32(label->len); | ||
| 1104 | p = xdr_encode_opaque_fixed(p, label->label, label->len); | ||
| 1105 | } | ||
| 1074 | 1106 | ||
| 1075 | /* | 1107 | /* |
| 1076 | * Now we backfill the bitmap and the attribute buffer length. | 1108 | * Now we backfill the bitmap and the attribute buffer length. |
| @@ -1080,9 +1112,10 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const | |||
| 1080 | len, ((char *)p - (char *)q) + 4); | 1112 | len, ((char *)p - (char *)q) + 4); |
| 1081 | BUG(); | 1113 | BUG(); |
| 1082 | } | 1114 | } |
| 1083 | len = (char *)p - (char *)q - 12; | 1115 | len = (char *)p - (char *)q - 16; |
| 1084 | *q++ = htonl(bmval0); | 1116 | *q++ = htonl(bmval0); |
| 1085 | *q++ = htonl(bmval1); | 1117 | *q++ = htonl(bmval1); |
| 1118 | *q++ = htonl(bmval2); | ||
| 1086 | *q = htonl(len); | 1119 | *q = htonl(len); |
| 1087 | 1120 | ||
| 1088 | /* out: */ | 1121 | /* out: */ |
| @@ -1136,7 +1169,7 @@ static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg * | |||
| 1136 | } | 1169 | } |
| 1137 | 1170 | ||
| 1138 | encode_string(xdr, create->name->len, create->name->name); | 1171 | encode_string(xdr, create->name->len, create->name->name); |
| 1139 | encode_attrs(xdr, create->attrs, create->server); | 1172 | encode_attrs(xdr, create->attrs, create->label, create->server); |
| 1140 | } | 1173 | } |
| 1141 | 1174 | ||
| 1142 | static void encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap, struct compound_hdr *hdr) | 1175 | static void encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap, struct compound_hdr *hdr) |
| @@ -1188,8 +1221,10 @@ encode_getattr_three(struct xdr_stream *xdr, | |||
| 1188 | 1221 | ||
| 1189 | static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) | 1222 | static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) |
| 1190 | { | 1223 | { |
| 1191 | encode_getattr_two(xdr, bitmask[0] & nfs4_fattr_bitmap[0], | 1224 | encode_getattr_three(xdr, bitmask[0] & nfs4_fattr_bitmap[0], |
| 1192 | bitmask[1] & nfs4_fattr_bitmap[1], hdr); | 1225 | bitmask[1] & nfs4_fattr_bitmap[1], |
| 1226 | bitmask[2] & nfs4_fattr_bitmap[2], | ||
| 1227 | hdr); | ||
| 1193 | } | 1228 | } |
| 1194 | 1229 | ||
| 1195 | static void encode_getfattr_open(struct xdr_stream *xdr, const u32 *bitmask, | 1230 | static void encode_getfattr_open(struct xdr_stream *xdr, const u32 *bitmask, |
| @@ -1367,11 +1402,11 @@ static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_op | |||
| 1367 | switch(arg->createmode) { | 1402 | switch(arg->createmode) { |
| 1368 | case NFS4_CREATE_UNCHECKED: | 1403 | case NFS4_CREATE_UNCHECKED: |
| 1369 | *p = cpu_to_be32(NFS4_CREATE_UNCHECKED); | 1404 | *p = cpu_to_be32(NFS4_CREATE_UNCHECKED); |
| 1370 | encode_attrs(xdr, arg->u.attrs, arg->server); | 1405 | encode_attrs(xdr, arg->u.attrs, arg->label, arg->server); |
| 1371 | break; | 1406 | break; |
| 1372 | case NFS4_CREATE_GUARDED: | 1407 | case NFS4_CREATE_GUARDED: |
| 1373 | *p = cpu_to_be32(NFS4_CREATE_GUARDED); | 1408 | *p = cpu_to_be32(NFS4_CREATE_GUARDED); |
| 1374 | encode_attrs(xdr, arg->u.attrs, arg->server); | 1409 | encode_attrs(xdr, arg->u.attrs, arg->label, arg->server); |
| 1375 | break; | 1410 | break; |
| 1376 | case NFS4_CREATE_EXCLUSIVE: | 1411 | case NFS4_CREATE_EXCLUSIVE: |
| 1377 | *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE); | 1412 | *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE); |
| @@ -1381,7 +1416,7 @@ static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_op | |||
| 1381 | *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE4_1); | 1416 | *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE4_1); |
| 1382 | encode_nfs4_verifier(xdr, &arg->u.verifier); | 1417 | encode_nfs4_verifier(xdr, &arg->u.verifier); |
| 1383 | dummy.ia_valid = 0; | 1418 | dummy.ia_valid = 0; |
| 1384 | encode_attrs(xdr, &dummy, arg->server); | 1419 | encode_attrs(xdr, &dummy, arg->label, arg->server); |
| 1385 | } | 1420 | } |
| 1386 | } | 1421 | } |
| 1387 | 1422 | ||
| @@ -1532,7 +1567,7 @@ static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, | |||
| 1532 | 1567 | ||
| 1533 | static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr) | 1568 | static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr) |
| 1534 | { | 1569 | { |
| 1535 | uint32_t attrs[2] = { | 1570 | uint32_t attrs[3] = { |
| 1536 | FATTR4_WORD0_RDATTR_ERROR, | 1571 | FATTR4_WORD0_RDATTR_ERROR, |
| 1537 | FATTR4_WORD1_MOUNTED_ON_FILEID, | 1572 | FATTR4_WORD1_MOUNTED_ON_FILEID, |
| 1538 | }; | 1573 | }; |
| @@ -1555,20 +1590,26 @@ static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg | |||
| 1555 | encode_op_hdr(xdr, OP_READDIR, decode_readdir_maxsz, hdr); | 1590 | encode_op_hdr(xdr, OP_READDIR, decode_readdir_maxsz, hdr); |
| 1556 | encode_uint64(xdr, readdir->cookie); | 1591 | encode_uint64(xdr, readdir->cookie); |
| 1557 | encode_nfs4_verifier(xdr, &readdir->verifier); | 1592 | encode_nfs4_verifier(xdr, &readdir->verifier); |
| 1558 | p = reserve_space(xdr, 20); | 1593 | p = reserve_space(xdr, encode_readdir_space); |
| 1559 | *p++ = cpu_to_be32(dircount); | 1594 | *p++ = cpu_to_be32(dircount); |
| 1560 | *p++ = cpu_to_be32(readdir->count); | 1595 | *p++ = cpu_to_be32(readdir->count); |
| 1561 | *p++ = cpu_to_be32(2); | 1596 | *p++ = cpu_to_be32(encode_readdir_bitmask_sz); |
| 1562 | |||
| 1563 | *p++ = cpu_to_be32(attrs[0] & readdir->bitmask[0]); | 1597 | *p++ = cpu_to_be32(attrs[0] & readdir->bitmask[0]); |
| 1564 | *p = cpu_to_be32(attrs[1] & readdir->bitmask[1]); | 1598 | *p = cpu_to_be32(attrs[1] & readdir->bitmask[1]); |
| 1599 | if (encode_readdir_bitmask_sz > 2) { | ||
| 1600 | if (hdr->minorversion > 1) | ||
| 1601 | attrs[2] |= FATTR4_WORD2_SECURITY_LABEL; | ||
| 1602 | p++, *p++ = cpu_to_be32(attrs[2] & readdir->bitmask[2]); | ||
| 1603 | } | ||
| 1565 | memcpy(verf, readdir->verifier.data, sizeof(verf)); | 1604 | memcpy(verf, readdir->verifier.data, sizeof(verf)); |
| 1566 | dprintk("%s: cookie = %Lu, verifier = %08x:%08x, bitmap = %08x:%08x\n", | 1605 | |
| 1606 | dprintk("%s: cookie = %llu, verifier = %08x:%08x, bitmap = %08x:%08x:%08x\n", | ||
| 1567 | __func__, | 1607 | __func__, |
| 1568 | (unsigned long long)readdir->cookie, | 1608 | (unsigned long long)readdir->cookie, |
| 1569 | verf[0], verf[1], | 1609 | verf[0], verf[1], |
| 1570 | attrs[0] & readdir->bitmask[0], | 1610 | attrs[0] & readdir->bitmask[0], |
| 1571 | attrs[1] & readdir->bitmask[1]); | 1611 | attrs[1] & readdir->bitmask[1], |
| 1612 | attrs[2] & readdir->bitmask[2]); | ||
| 1572 | } | 1613 | } |
| 1573 | 1614 | ||
| 1574 | static void encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink *readlink, struct rpc_rqst *req, struct compound_hdr *hdr) | 1615 | static void encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink *readlink, struct rpc_rqst *req, struct compound_hdr *hdr) |
| @@ -1627,7 +1668,7 @@ static void encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs | |||
| 1627 | { | 1668 | { |
| 1628 | encode_op_hdr(xdr, OP_SETATTR, decode_setattr_maxsz, hdr); | 1669 | encode_op_hdr(xdr, OP_SETATTR, decode_setattr_maxsz, hdr); |
| 1629 | encode_nfs4_stateid(xdr, &arg->stateid); | 1670 | encode_nfs4_stateid(xdr, &arg->stateid); |
| 1630 | encode_attrs(xdr, arg->iap, server); | 1671 | encode_attrs(xdr, arg->iap, arg->label, server); |
| 1631 | } | 1672 | } |
| 1632 | 1673 | ||
| 1633 | static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclientid *setclientid, struct compound_hdr *hdr) | 1674 | static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclientid *setclientid, struct compound_hdr *hdr) |
| @@ -1889,7 +1930,7 @@ encode_getdeviceinfo(struct xdr_stream *xdr, | |||
| 1889 | p = xdr_encode_opaque_fixed(p, args->pdev->dev_id.data, | 1930 | p = xdr_encode_opaque_fixed(p, args->pdev->dev_id.data, |
| 1890 | NFS4_DEVICEID4_SIZE); | 1931 | NFS4_DEVICEID4_SIZE); |
| 1891 | *p++ = cpu_to_be32(args->pdev->layout_type); | 1932 | *p++ = cpu_to_be32(args->pdev->layout_type); |
| 1892 | *p++ = cpu_to_be32(args->pdev->pglen); /* gdia_maxcount */ | 1933 | *p++ = cpu_to_be32(args->pdev->maxcount); /* gdia_maxcount */ |
| 1893 | *p++ = cpu_to_be32(0); /* bitmap length 0 */ | 1934 | *p++ = cpu_to_be32(0); /* bitmap length 0 */ |
| 1894 | } | 1935 | } |
| 1895 | 1936 | ||
| @@ -4038,6 +4079,56 @@ static int decode_attr_time_delta(struct xdr_stream *xdr, uint32_t *bitmap, | |||
| 4038 | return status; | 4079 | return status; |
| 4039 | } | 4080 | } |
| 4040 | 4081 | ||
| 4082 | static int decode_attr_security_label(struct xdr_stream *xdr, uint32_t *bitmap, | ||
| 4083 | struct nfs4_label *label) | ||
| 4084 | { | ||
| 4085 | uint32_t pi = 0; | ||
| 4086 | uint32_t lfs = 0; | ||
| 4087 | __u32 len; | ||
| 4088 | __be32 *p; | ||
| 4089 | int status = 0; | ||
| 4090 | |||
| 4091 | if (unlikely(bitmap[2] & (FATTR4_WORD2_SECURITY_LABEL - 1U))) | ||
| 4092 | return -EIO; | ||
| 4093 | if (likely(bitmap[2] & FATTR4_WORD2_SECURITY_LABEL)) { | ||
| 4094 | p = xdr_inline_decode(xdr, 4); | ||
| 4095 | if (unlikely(!p)) | ||
| 4096 | goto out_overflow; | ||
| 4097 | lfs = be32_to_cpup(p++); | ||
| 4098 | p = xdr_inline_decode(xdr, 4); | ||
| 4099 | if (unlikely(!p)) | ||
| 4100 | goto out_overflow; | ||
| 4101 | pi = be32_to_cpup(p++); | ||
| 4102 | p = xdr_inline_decode(xdr, 4); | ||
| 4103 | if (unlikely(!p)) | ||
| 4104 | goto out_overflow; | ||
| 4105 | len = be32_to_cpup(p++); | ||
| 4106 | p = xdr_inline_decode(xdr, len); | ||
| 4107 | if (unlikely(!p)) | ||
| 4108 | goto out_overflow; | ||
| 4109 | if (len < NFS4_MAXLABELLEN) { | ||
| 4110 | if (label) { | ||
| 4111 | memcpy(label->label, p, len); | ||
| 4112 | label->len = len; | ||
| 4113 | label->pi = pi; | ||
| 4114 | label->lfs = lfs; | ||
| 4115 | status = NFS_ATTR_FATTR_V4_SECURITY_LABEL; | ||
| 4116 | } | ||
| 4117 | bitmap[2] &= ~FATTR4_WORD2_SECURITY_LABEL; | ||
| 4118 | } else | ||
| 4119 | printk(KERN_WARNING "%s: label too long (%u)!\n", | ||
| 4120 | __func__, len); | ||
| 4121 | } | ||
| 4122 | if (label && label->label) | ||
| 4123 | dprintk("%s: label=%s, len=%d, PI=%d, LFS=%d\n", __func__, | ||
| 4124 | (char *)label->label, label->len, label->pi, label->lfs); | ||
| 4125 | return status; | ||
| 4126 | |||
| 4127 | out_overflow: | ||
| 4128 | print_overflow_msg(__func__, xdr); | ||
| 4129 | return -EIO; | ||
| 4130 | } | ||
| 4131 | |||
| 4041 | static int decode_attr_time_modify(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time) | 4132 | static int decode_attr_time_modify(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time) |
| 4042 | { | 4133 | { |
| 4043 | int status = 0; | 4134 | int status = 0; |
| @@ -4380,7 +4471,7 @@ out_overflow: | |||
| 4380 | 4471 | ||
| 4381 | static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, | 4472 | static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, |
| 4382 | struct nfs_fattr *fattr, struct nfs_fh *fh, | 4473 | struct nfs_fattr *fattr, struct nfs_fh *fh, |
| 4383 | struct nfs4_fs_locations *fs_loc, | 4474 | struct nfs4_fs_locations *fs_loc, struct nfs4_label *label, |
| 4384 | const struct nfs_server *server) | 4475 | const struct nfs_server *server) |
| 4385 | { | 4476 | { |
| 4386 | int status; | 4477 | int status; |
| @@ -4488,6 +4579,13 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, | |||
| 4488 | if (status < 0) | 4579 | if (status < 0) |
| 4489 | goto xdr_error; | 4580 | goto xdr_error; |
| 4490 | 4581 | ||
| 4582 | if (label) { | ||
| 4583 | status = decode_attr_security_label(xdr, bitmap, label); | ||
| 4584 | if (status < 0) | ||
| 4585 | goto xdr_error; | ||
| 4586 | fattr->valid |= status; | ||
| 4587 | } | ||
| 4588 | |||
| 4491 | xdr_error: | 4589 | xdr_error: |
| 4492 | dprintk("%s: xdr returned %d\n", __func__, -status); | 4590 | dprintk("%s: xdr returned %d\n", __func__, -status); |
| 4493 | return status; | 4591 | return status; |
| @@ -4495,7 +4593,7 @@ xdr_error: | |||
| 4495 | 4593 | ||
| 4496 | static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fattr, | 4594 | static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fattr, |
| 4497 | struct nfs_fh *fh, struct nfs4_fs_locations *fs_loc, | 4595 | struct nfs_fh *fh, struct nfs4_fs_locations *fs_loc, |
| 4498 | const struct nfs_server *server) | 4596 | struct nfs4_label *label, const struct nfs_server *server) |
| 4499 | { | 4597 | { |
| 4500 | unsigned int savep; | 4598 | unsigned int savep; |
| 4501 | uint32_t attrlen, | 4599 | uint32_t attrlen, |
| @@ -4514,7 +4612,8 @@ static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fat | |||
| 4514 | if (status < 0) | 4612 | if (status < 0) |
| 4515 | goto xdr_error; | 4613 | goto xdr_error; |
| 4516 | 4614 | ||
| 4517 | status = decode_getfattr_attrs(xdr, bitmap, fattr, fh, fs_loc, server); | 4615 | status = decode_getfattr_attrs(xdr, bitmap, fattr, fh, fs_loc, |
| 4616 | label, server); | ||
| 4518 | if (status < 0) | 4617 | if (status < 0) |
| 4519 | goto xdr_error; | 4618 | goto xdr_error; |
| 4520 | 4619 | ||
| @@ -4524,10 +4623,16 @@ xdr_error: | |||
| 4524 | return status; | 4623 | return status; |
| 4525 | } | 4624 | } |
| 4526 | 4625 | ||
| 4626 | static int decode_getfattr_label(struct xdr_stream *xdr, struct nfs_fattr *fattr, | ||
| 4627 | struct nfs4_label *label, const struct nfs_server *server) | ||
| 4628 | { | ||
| 4629 | return decode_getfattr_generic(xdr, fattr, NULL, NULL, label, server); | ||
| 4630 | } | ||
| 4631 | |||
| 4527 | static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, | 4632 | static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, |
| 4528 | const struct nfs_server *server) | 4633 | const struct nfs_server *server) |
| 4529 | { | 4634 | { |
| 4530 | return decode_getfattr_generic(xdr, fattr, NULL, NULL, server); | 4635 | return decode_getfattr_generic(xdr, fattr, NULL, NULL, NULL, server); |
| 4531 | } | 4636 | } |
| 4532 | 4637 | ||
| 4533 | /* | 4638 | /* |
| @@ -5919,7 +6024,7 @@ static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, struct xdr_stream *xdr, | |||
| 5919 | status = decode_getfh(xdr, res->fh); | 6024 | status = decode_getfh(xdr, res->fh); |
| 5920 | if (status) | 6025 | if (status) |
| 5921 | goto out; | 6026 | goto out; |
| 5922 | status = decode_getfattr(xdr, res->fattr, res->server); | 6027 | status = decode_getfattr_label(xdr, res->fattr, res->label, res->server); |
| 5923 | out: | 6028 | out: |
| 5924 | return status; | 6029 | return status; |
| 5925 | } | 6030 | } |
| @@ -5945,7 +6050,8 @@ static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp, | |||
| 5945 | goto out; | 6050 | goto out; |
| 5946 | status = decode_getfh(xdr, res->fh); | 6051 | status = decode_getfh(xdr, res->fh); |
| 5947 | if (status == 0) | 6052 | if (status == 0) |
| 5948 | status = decode_getfattr(xdr, res->fattr, res->server); | 6053 | status = decode_getfattr_label(xdr, res->fattr, |
| 6054 | res->label, res->server); | ||
| 5949 | out: | 6055 | out: |
| 5950 | return status; | 6056 | return status; |
| 5951 | } | 6057 | } |
| @@ -6036,7 +6142,7 @@ static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, struct xdr_stream *xdr, | |||
| 6036 | status = decode_restorefh(xdr); | 6142 | status = decode_restorefh(xdr); |
| 6037 | if (status) | 6143 | if (status) |
| 6038 | goto out; | 6144 | goto out; |
| 6039 | decode_getfattr(xdr, res->fattr, res->server); | 6145 | decode_getfattr_label(xdr, res->fattr, res->label, res->server); |
| 6040 | out: | 6146 | out: |
| 6041 | return status; | 6147 | return status; |
| 6042 | } | 6148 | } |
| @@ -6065,7 +6171,7 @@ static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, struct xdr_stream *xdr, | |||
| 6065 | status = decode_getfh(xdr, res->fh); | 6171 | status = decode_getfh(xdr, res->fh); |
| 6066 | if (status) | 6172 | if (status) |
| 6067 | goto out; | 6173 | goto out; |
| 6068 | decode_getfattr(xdr, res->fattr, res->server); | 6174 | decode_getfattr_label(xdr, res->fattr, res->label, res->server); |
| 6069 | out: | 6175 | out: |
| 6070 | return status; | 6176 | return status; |
| 6071 | } | 6177 | } |
| @@ -6097,7 +6203,7 @@ static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, struct xdr_stream *xdr, | |||
| 6097 | status = decode_putfh(xdr); | 6203 | status = decode_putfh(xdr); |
| 6098 | if (status) | 6204 | if (status) |
| 6099 | goto out; | 6205 | goto out; |
| 6100 | status = decode_getfattr(xdr, res->fattr, res->server); | 6206 | status = decode_getfattr_label(xdr, res->fattr, res->label, res->server); |
| 6101 | out: | 6207 | out: |
| 6102 | return status; | 6208 | return status; |
| 6103 | } | 6209 | } |
| @@ -6230,7 +6336,7 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr, | |||
| 6230 | goto out; | 6336 | goto out; |
| 6231 | if (res->access_request) | 6337 | if (res->access_request) |
| 6232 | decode_access(xdr, &res->access_supported, &res->access_result); | 6338 | decode_access(xdr, &res->access_supported, &res->access_result); |
| 6233 | decode_getfattr(xdr, res->f_attr, res->server); | 6339 | decode_getfattr_label(xdr, res->f_attr, res->f_label, res->server); |
| 6234 | out: | 6340 | out: |
| 6235 | return status; | 6341 | return status; |
| 6236 | } | 6342 | } |
| @@ -6307,7 +6413,7 @@ static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp, | |||
| 6307 | status = decode_setattr(xdr); | 6413 | status = decode_setattr(xdr); |
| 6308 | if (status) | 6414 | if (status) |
| 6309 | goto out; | 6415 | goto out; |
| 6310 | decode_getfattr(xdr, res->fattr, res->server); | 6416 | decode_getfattr_label(xdr, res->fattr, res->label, res->server); |
| 6311 | out: | 6417 | out: |
| 6312 | return status; | 6418 | return status; |
| 6313 | } | 6419 | } |
| @@ -6696,7 +6802,7 @@ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, | |||
| 6696 | xdr_enter_page(xdr, PAGE_SIZE); | 6802 | xdr_enter_page(xdr, PAGE_SIZE); |
| 6697 | status = decode_getfattr_generic(xdr, &res->fs_locations->fattr, | 6803 | status = decode_getfattr_generic(xdr, &res->fs_locations->fattr, |
| 6698 | NULL, res->fs_locations, | 6804 | NULL, res->fs_locations, |
| 6699 | res->fs_locations->server); | 6805 | NULL, res->fs_locations->server); |
| 6700 | out: | 6806 | out: |
| 6701 | return status; | 6807 | return status; |
| 6702 | } | 6808 | } |
| @@ -7109,7 +7215,7 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, | |||
| 7109 | goto out_overflow; | 7215 | goto out_overflow; |
| 7110 | 7216 | ||
| 7111 | if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh, | 7217 | if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh, |
| 7112 | NULL, entry->server) < 0) | 7218 | NULL, entry->label, entry->server) < 0) |
| 7113 | goto out_overflow; | 7219 | goto out_overflow; |
| 7114 | if (entry->fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) | 7220 | if (entry->fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) |
| 7115 | entry->ino = entry->fattr->mounted_on_fileid; | 7221 | entry->ino = entry->fattr->mounted_on_fileid; |
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c index a9ebd817278b..e4f9cbfec67b 100644 --- a/fs/nfs/objlayout/objlayout.c +++ b/fs/nfs/objlayout/objlayout.c | |||
| @@ -613,8 +613,10 @@ int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay, | |||
| 613 | pd.pgbase = 0; | 613 | pd.pgbase = 0; |
| 614 | pd.pglen = PAGE_SIZE; | 614 | pd.pglen = PAGE_SIZE; |
| 615 | pd.mincount = 0; | 615 | pd.mincount = 0; |
| 616 | pd.maxcount = PAGE_SIZE; | ||
| 616 | 617 | ||
| 617 | err = nfs4_proc_getdeviceinfo(NFS_SERVER(pnfslay->plh_inode), &pd); | 618 | err = nfs4_proc_getdeviceinfo(NFS_SERVER(pnfslay->plh_inode), &pd, |
| 619 | pnfslay->plh_lc_cred); | ||
| 618 | dprintk("%s nfs_getdeviceinfo returned %d\n", __func__, err); | 620 | dprintk("%s nfs_getdeviceinfo returned %d\n", __func__, err); |
| 619 | if (err) | 621 | if (err) |
| 620 | goto err_out; | 622 | goto err_out; |
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index c5bd758e5637..3a3a79d6bf15 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c | |||
| @@ -360,7 +360,7 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg) | |||
| 360 | } | 360 | } |
| 361 | EXPORT_SYMBOL_GPL(pnfs_put_lseg); | 361 | EXPORT_SYMBOL_GPL(pnfs_put_lseg); |
| 362 | 362 | ||
| 363 | static inline u64 | 363 | static u64 |
| 364 | end_offset(u64 start, u64 len) | 364 | end_offset(u64 start, u64 len) |
| 365 | { | 365 | { |
| 366 | u64 end; | 366 | u64 end; |
| @@ -376,9 +376,9 @@ end_offset(u64 start, u64 len) | |||
| 376 | * start2 end2 | 376 | * start2 end2 |
| 377 | * [----------------) | 377 | * [----------------) |
| 378 | */ | 378 | */ |
| 379 | static inline int | 379 | static bool |
| 380 | lo_seg_contained(struct pnfs_layout_range *l1, | 380 | pnfs_lseg_range_contained(const struct pnfs_layout_range *l1, |
| 381 | struct pnfs_layout_range *l2) | 381 | const struct pnfs_layout_range *l2) |
| 382 | { | 382 | { |
| 383 | u64 start1 = l1->offset; | 383 | u64 start1 = l1->offset; |
| 384 | u64 end1 = end_offset(start1, l1->length); | 384 | u64 end1 = end_offset(start1, l1->length); |
| @@ -395,9 +395,9 @@ lo_seg_contained(struct pnfs_layout_range *l1, | |||
| 395 | * start2 end2 | 395 | * start2 end2 |
| 396 | * [----------------) | 396 | * [----------------) |
| 397 | */ | 397 | */ |
| 398 | static inline int | 398 | static bool |
| 399 | lo_seg_intersecting(struct pnfs_layout_range *l1, | 399 | pnfs_lseg_range_intersecting(const struct pnfs_layout_range *l1, |
| 400 | struct pnfs_layout_range *l2) | 400 | const struct pnfs_layout_range *l2) |
| 401 | { | 401 | { |
| 402 | u64 start1 = l1->offset; | 402 | u64 start1 = l1->offset; |
| 403 | u64 end1 = end_offset(start1, l1->length); | 403 | u64 end1 = end_offset(start1, l1->length); |
| @@ -409,12 +409,12 @@ lo_seg_intersecting(struct pnfs_layout_range *l1, | |||
| 409 | } | 409 | } |
| 410 | 410 | ||
| 411 | static bool | 411 | static bool |
| 412 | should_free_lseg(struct pnfs_layout_range *lseg_range, | 412 | should_free_lseg(const struct pnfs_layout_range *lseg_range, |
| 413 | struct pnfs_layout_range *recall_range) | 413 | const struct pnfs_layout_range *recall_range) |
| 414 | { | 414 | { |
| 415 | return (recall_range->iomode == IOMODE_ANY || | 415 | return (recall_range->iomode == IOMODE_ANY || |
| 416 | lseg_range->iomode == recall_range->iomode) && | 416 | lseg_range->iomode == recall_range->iomode) && |
| 417 | lo_seg_intersecting(lseg_range, recall_range); | 417 | pnfs_lseg_range_intersecting(lseg_range, recall_range); |
| 418 | } | 418 | } |
| 419 | 419 | ||
| 420 | static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg, | 420 | static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg, |
| @@ -766,6 +766,7 @@ send_layoutget(struct pnfs_layout_hdr *lo, | |||
| 766 | lgp->args.inode = ino; | 766 | lgp->args.inode = ino; |
| 767 | lgp->args.ctx = get_nfs_open_context(ctx); | 767 | lgp->args.ctx = get_nfs_open_context(ctx); |
| 768 | lgp->gfp_flags = gfp_flags; | 768 | lgp->gfp_flags = gfp_flags; |
| 769 | lgp->cred = lo->plh_lc_cred; | ||
| 769 | 770 | ||
| 770 | /* Synchronously retrieve layout information from server and | 771 | /* Synchronously retrieve layout information from server and |
| 771 | * store in lseg. | 772 | * store in lseg. |
| @@ -860,6 +861,7 @@ _pnfs_return_layout(struct inode *ino) | |||
| 860 | lrp->args.inode = ino; | 861 | lrp->args.inode = ino; |
| 861 | lrp->args.layout = lo; | 862 | lrp->args.layout = lo; |
| 862 | lrp->clp = NFS_SERVER(ino)->nfs_client; | 863 | lrp->clp = NFS_SERVER(ino)->nfs_client; |
| 864 | lrp->cred = lo->plh_lc_cred; | ||
| 863 | 865 | ||
| 864 | status = nfs4_proc_layoutreturn(lrp); | 866 | status = nfs4_proc_layoutreturn(lrp); |
| 865 | out: | 867 | out: |
| @@ -984,8 +986,8 @@ out: | |||
| 984 | * are seen first. | 986 | * are seen first. |
| 985 | */ | 987 | */ |
| 986 | static s64 | 988 | static s64 |
| 987 | cmp_layout(struct pnfs_layout_range *l1, | 989 | pnfs_lseg_range_cmp(const struct pnfs_layout_range *l1, |
| 988 | struct pnfs_layout_range *l2) | 990 | const struct pnfs_layout_range *l2) |
| 989 | { | 991 | { |
| 990 | s64 d; | 992 | s64 d; |
| 991 | 993 | ||
| @@ -1012,7 +1014,7 @@ pnfs_layout_insert_lseg(struct pnfs_layout_hdr *lo, | |||
| 1012 | dprintk("%s:Begin\n", __func__); | 1014 | dprintk("%s:Begin\n", __func__); |
| 1013 | 1015 | ||
| 1014 | list_for_each_entry(lp, &lo->plh_segs, pls_list) { | 1016 | list_for_each_entry(lp, &lo->plh_segs, pls_list) { |
| 1015 | if (cmp_layout(&lseg->pls_range, &lp->pls_range) > 0) | 1017 | if (pnfs_lseg_range_cmp(&lseg->pls_range, &lp->pls_range) > 0) |
| 1016 | continue; | 1018 | continue; |
| 1017 | list_add_tail(&lseg->pls_list, &lp->pls_list); | 1019 | list_add_tail(&lseg->pls_list, &lp->pls_list); |
| 1018 | dprintk("%s: inserted lseg %p " | 1020 | dprintk("%s: inserted lseg %p " |
| @@ -1050,7 +1052,7 @@ alloc_init_layout_hdr(struct inode *ino, | |||
| 1050 | INIT_LIST_HEAD(&lo->plh_segs); | 1052 | INIT_LIST_HEAD(&lo->plh_segs); |
| 1051 | INIT_LIST_HEAD(&lo->plh_bulk_destroy); | 1053 | INIT_LIST_HEAD(&lo->plh_bulk_destroy); |
| 1052 | lo->plh_inode = ino; | 1054 | lo->plh_inode = ino; |
| 1053 | lo->plh_lc_cred = get_rpccred(ctx->state->owner->so_cred); | 1055 | lo->plh_lc_cred = get_rpccred(ctx->cred); |
| 1054 | return lo; | 1056 | return lo; |
| 1055 | } | 1057 | } |
| 1056 | 1058 | ||
| @@ -1091,21 +1093,21 @@ out_existing: | |||
| 1091 | * READ READ true | 1093 | * READ READ true |
| 1092 | * READ RW true | 1094 | * READ RW true |
| 1093 | */ | 1095 | */ |
| 1094 | static int | 1096 | static bool |
| 1095 | is_matching_lseg(struct pnfs_layout_range *ls_range, | 1097 | pnfs_lseg_range_match(const struct pnfs_layout_range *ls_range, |
| 1096 | struct pnfs_layout_range *range) | 1098 | const struct pnfs_layout_range *range) |
| 1097 | { | 1099 | { |
| 1098 | struct pnfs_layout_range range1; | 1100 | struct pnfs_layout_range range1; |
| 1099 | 1101 | ||
| 1100 | if ((range->iomode == IOMODE_RW && | 1102 | if ((range->iomode == IOMODE_RW && |
| 1101 | ls_range->iomode != IOMODE_RW) || | 1103 | ls_range->iomode != IOMODE_RW) || |
| 1102 | !lo_seg_intersecting(ls_range, range)) | 1104 | !pnfs_lseg_range_intersecting(ls_range, range)) |
| 1103 | return 0; | 1105 | return 0; |
| 1104 | 1106 | ||
| 1105 | /* range1 covers only the first byte in the range */ | 1107 | /* range1 covers only the first byte in the range */ |
| 1106 | range1 = *range; | 1108 | range1 = *range; |
| 1107 | range1.length = 1; | 1109 | range1.length = 1; |
| 1108 | return lo_seg_contained(ls_range, &range1); | 1110 | return pnfs_lseg_range_contained(ls_range, &range1); |
| 1109 | } | 1111 | } |
| 1110 | 1112 | ||
| 1111 | /* | 1113 | /* |
| @@ -1121,7 +1123,7 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, | |||
| 1121 | 1123 | ||
| 1122 | list_for_each_entry(lseg, &lo->plh_segs, pls_list) { | 1124 | list_for_each_entry(lseg, &lo->plh_segs, pls_list) { |
| 1123 | if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) && | 1125 | if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) && |
| 1124 | is_matching_lseg(&lseg->pls_range, range)) { | 1126 | pnfs_lseg_range_match(&lseg->pls_range, range)) { |
| 1125 | ret = pnfs_get_lseg(lseg); | 1127 | ret = pnfs_get_lseg(lseg); |
| 1126 | break; | 1128 | break; |
| 1127 | } | 1129 | } |
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index f5f8a470a647..a4f41810a7f4 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h | |||
| @@ -149,9 +149,10 @@ struct pnfs_device { | |||
| 149 | struct nfs4_deviceid dev_id; | 149 | struct nfs4_deviceid dev_id; |
| 150 | unsigned int layout_type; | 150 | unsigned int layout_type; |
| 151 | unsigned int mincount; | 151 | unsigned int mincount; |
| 152 | unsigned int maxcount; /* gdia_maxcount */ | ||
| 152 | struct page **pages; | 153 | struct page **pages; |
| 153 | unsigned int pgbase; | 154 | unsigned int pgbase; |
| 154 | unsigned int pglen; | 155 | unsigned int pglen; /* reply buffer length */ |
| 155 | }; | 156 | }; |
| 156 | 157 | ||
| 157 | #define NFS4_PNFS_GETDEVLIST_MAXNUM 16 | 158 | #define NFS4_PNFS_GETDEVLIST_MAXNUM 16 |
| @@ -170,7 +171,8 @@ extern int nfs4_proc_getdevicelist(struct nfs_server *server, | |||
| 170 | const struct nfs_fh *fh, | 171 | const struct nfs_fh *fh, |
| 171 | struct pnfs_devicelist *devlist); | 172 | struct pnfs_devicelist *devlist); |
| 172 | extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, | 173 | extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, |
| 173 | struct pnfs_device *dev); | 174 | struct pnfs_device *dev, |
| 175 | struct rpc_cred *cred); | ||
| 174 | extern struct pnfs_layout_segment* nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags); | 176 | extern struct pnfs_layout_segment* nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags); |
| 175 | extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp); | 177 | extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp); |
| 176 | 178 | ||
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index fc8de9016acf..c041c41f7a52 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c | |||
| @@ -98,7 +98,7 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, | |||
| 98 | */ | 98 | */ |
| 99 | static int | 99 | static int |
| 100 | nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, | 100 | nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, |
| 101 | struct nfs_fattr *fattr) | 101 | struct nfs_fattr *fattr, struct nfs4_label *label) |
| 102 | { | 102 | { |
| 103 | struct rpc_message msg = { | 103 | struct rpc_message msg = { |
| 104 | .rpc_proc = &nfs_procedures[NFSPROC_GETATTR], | 104 | .rpc_proc = &nfs_procedures[NFSPROC_GETATTR], |
| @@ -146,7 +146,8 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, | |||
| 146 | 146 | ||
| 147 | static int | 147 | static int |
| 148 | nfs_proc_lookup(struct inode *dir, struct qstr *name, | 148 | nfs_proc_lookup(struct inode *dir, struct qstr *name, |
| 149 | struct nfs_fh *fhandle, struct nfs_fattr *fattr) | 149 | struct nfs_fh *fhandle, struct nfs_fattr *fattr, |
| 150 | struct nfs4_label *label) | ||
| 150 | { | 151 | { |
| 151 | struct nfs_diropargs arg = { | 152 | struct nfs_diropargs arg = { |
| 152 | .fh = NFS_FH(dir), | 153 | .fh = NFS_FH(dir), |
| @@ -243,7 +244,7 @@ nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, | |||
| 243 | status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); | 244 | status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); |
| 244 | nfs_mark_for_revalidate(dir); | 245 | nfs_mark_for_revalidate(dir); |
| 245 | if (status == 0) | 246 | if (status == 0) |
| 246 | status = nfs_instantiate(dentry, data->res.fh, data->res.fattr); | 247 | status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, NULL); |
| 247 | nfs_free_createdata(data); | 248 | nfs_free_createdata(data); |
| 248 | out: | 249 | out: |
| 249 | dprintk("NFS reply create: %d\n", status); | 250 | dprintk("NFS reply create: %d\n", status); |
| @@ -290,7 +291,7 @@ nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, | |||
| 290 | status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); | 291 | status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); |
| 291 | } | 292 | } |
| 292 | if (status == 0) | 293 | if (status == 0) |
| 293 | status = nfs_instantiate(dentry, data->res.fh, data->res.fattr); | 294 | status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, NULL); |
| 294 | nfs_free_createdata(data); | 295 | nfs_free_createdata(data); |
| 295 | out: | 296 | out: |
| 296 | dprintk("NFS reply mknod: %d\n", status); | 297 | dprintk("NFS reply mknod: %d\n", status); |
| @@ -442,7 +443,7 @@ nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page, | |||
| 442 | * should fill in the data with a LOOKUP call on the wire. | 443 | * should fill in the data with a LOOKUP call on the wire. |
| 443 | */ | 444 | */ |
| 444 | if (status == 0) | 445 | if (status == 0) |
| 445 | status = nfs_instantiate(dentry, fh, fattr); | 446 | status = nfs_instantiate(dentry, fh, fattr, NULL); |
| 446 | 447 | ||
| 447 | out_free: | 448 | out_free: |
| 448 | nfs_free_fattr(fattr); | 449 | nfs_free_fattr(fattr); |
| @@ -471,7 +472,7 @@ nfs_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) | |||
| 471 | status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); | 472 | status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); |
| 472 | nfs_mark_for_revalidate(dir); | 473 | nfs_mark_for_revalidate(dir); |
| 473 | if (status == 0) | 474 | if (status == 0) |
| 474 | status = nfs_instantiate(dentry, data->res.fh, data->res.fattr); | 475 | status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, NULL); |
| 475 | nfs_free_createdata(data); | 476 | nfs_free_createdata(data); |
| 476 | out: | 477 | out: |
| 477 | dprintk("NFS reply mkdir: %d\n", status); | 478 | dprintk("NFS reply mkdir: %d\n", status); |
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 2d7525fbcf25..71fdc0dfa0d2 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
| @@ -269,7 +269,7 @@ static match_table_t nfs_local_lock_tokens = { | |||
| 269 | 269 | ||
| 270 | enum { | 270 | enum { |
| 271 | Opt_vers_2, Opt_vers_3, Opt_vers_4, Opt_vers_4_0, | 271 | Opt_vers_2, Opt_vers_3, Opt_vers_4, Opt_vers_4_0, |
| 272 | Opt_vers_4_1, | 272 | Opt_vers_4_1, Opt_vers_4_2, |
| 273 | 273 | ||
| 274 | Opt_vers_err | 274 | Opt_vers_err |
| 275 | }; | 275 | }; |
| @@ -280,6 +280,7 @@ static match_table_t nfs_vers_tokens = { | |||
| 280 | { Opt_vers_4, "4" }, | 280 | { Opt_vers_4, "4" }, |
| 281 | { Opt_vers_4_0, "4.0" }, | 281 | { Opt_vers_4_0, "4.0" }, |
| 282 | { Opt_vers_4_1, "4.1" }, | 282 | { Opt_vers_4_1, "4.1" }, |
| 283 | { Opt_vers_4_2, "4.2" }, | ||
| 283 | 284 | ||
| 284 | { Opt_vers_err, NULL } | 285 | { Opt_vers_err, NULL } |
| 285 | }; | 286 | }; |
| @@ -832,6 +833,7 @@ int nfs_show_stats(struct seq_file *m, struct dentry *root) | |||
| 832 | seq_printf(m, "\n\tnfsv4:\t"); | 833 | seq_printf(m, "\n\tnfsv4:\t"); |
| 833 | seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]); | 834 | seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]); |
| 834 | seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]); | 835 | seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]); |
| 836 | seq_printf(m, ",bm2=0x%x", nfss->attr_bitmask[2]); | ||
| 835 | seq_printf(m, ",acl=0x%x", nfss->acl_bitmask); | 837 | seq_printf(m, ",acl=0x%x", nfss->acl_bitmask); |
| 836 | show_sessions(m, nfss); | 838 | show_sessions(m, nfss); |
| 837 | show_pnfs(m, nfss); | 839 | show_pnfs(m, nfss); |
| @@ -1097,6 +1099,10 @@ static int nfs_parse_version_string(char *string, | |||
| 1097 | mnt->version = 4; | 1099 | mnt->version = 4; |
| 1098 | mnt->minorversion = 1; | 1100 | mnt->minorversion = 1; |
| 1099 | break; | 1101 | break; |
| 1102 | case Opt_vers_4_2: | ||
| 1103 | mnt->version = 4; | ||
| 1104 | mnt->minorversion = 2; | ||
| 1105 | break; | ||
| 1100 | default: | 1106 | default: |
| 1101 | return 0; | 1107 | return 0; |
| 1102 | } | 1108 | } |
| @@ -1608,29 +1614,13 @@ out_security_failure: | |||
| 1608 | } | 1614 | } |
| 1609 | 1615 | ||
| 1610 | /* | 1616 | /* |
| 1611 | * Select a security flavor for this mount. The selected flavor | 1617 | * Ensure that the specified authtype in args->auth_flavors[0] is supported by |
| 1612 | * is planted in args->auth_flavors[0]. | 1618 | * the server. Returns 0 if it's ok, and -EACCES if not. |
| 1613 | * | ||
| 1614 | * Returns 0 on success, -EACCES on failure. | ||
| 1615 | */ | 1619 | */ |
| 1616 | static int nfs_select_flavor(struct nfs_parsed_mount_data *args, | 1620 | static int nfs_verify_authflavor(struct nfs_parsed_mount_data *args, |
| 1617 | struct nfs_mount_request *request) | 1621 | rpc_authflavor_t *server_authlist, unsigned int count) |
| 1618 | { | 1622 | { |
| 1619 | unsigned int i, count = *(request->auth_flav_len); | 1623 | unsigned int i; |
| 1620 | rpc_authflavor_t flavor; | ||
| 1621 | |||
| 1622 | /* | ||
| 1623 | * The NFSv2 MNT operation does not return a flavor list. | ||
| 1624 | */ | ||
| 1625 | if (args->mount_server.version != NFS_MNT3_VERSION) | ||
| 1626 | goto out_default; | ||
| 1627 | |||
| 1628 | /* | ||
| 1629 | * Certain releases of Linux's mountd return an empty | ||
| 1630 | * flavor list in some cases. | ||
| 1631 | */ | ||
| 1632 | if (count == 0) | ||
| 1633 | goto out_default; | ||
| 1634 | 1624 | ||
| 1635 | /* | 1625 | /* |
| 1636 | * If the sec= mount option is used, the specified flavor or AUTH_NULL | 1626 | * If the sec= mount option is used, the specified flavor or AUTH_NULL |
| @@ -1640,60 +1630,19 @@ static int nfs_select_flavor(struct nfs_parsed_mount_data *args, | |||
| 1640 | * means that the server will ignore the rpc creds, so any flavor | 1630 | * means that the server will ignore the rpc creds, so any flavor |
| 1641 | * can be used. | 1631 | * can be used. |
| 1642 | */ | 1632 | */ |
| 1643 | if (args->auth_flavors[0] != RPC_AUTH_MAXFLAVOR) { | ||
| 1644 | for (i = 0; i < count; i++) { | ||
| 1645 | if (args->auth_flavors[0] == request->auth_flavs[i] || | ||
| 1646 | request->auth_flavs[i] == RPC_AUTH_NULL) | ||
| 1647 | goto out; | ||
| 1648 | } | ||
| 1649 | dfprintk(MOUNT, "NFS: auth flavor %d not supported by server\n", | ||
| 1650 | args->auth_flavors[0]); | ||
| 1651 | goto out_err; | ||
| 1652 | } | ||
| 1653 | |||
| 1654 | /* | ||
| 1655 | * RFC 2623, section 2.7 suggests we SHOULD prefer the | ||
| 1656 | * flavor listed first. However, some servers list | ||
| 1657 | * AUTH_NULL first. Avoid ever choosing AUTH_NULL. | ||
| 1658 | */ | ||
| 1659 | for (i = 0; i < count; i++) { | 1633 | for (i = 0; i < count; i++) { |
| 1660 | struct rpcsec_gss_info info; | 1634 | if (args->auth_flavors[0] == server_authlist[i] || |
| 1661 | 1635 | server_authlist[i] == RPC_AUTH_NULL) | |
| 1662 | flavor = request->auth_flavs[i]; | 1636 | goto out; |
| 1663 | switch (flavor) { | ||
| 1664 | case RPC_AUTH_UNIX: | ||
| 1665 | goto out_set; | ||
| 1666 | case RPC_AUTH_NULL: | ||
| 1667 | continue; | ||
| 1668 | default: | ||
| 1669 | if (rpcauth_get_gssinfo(flavor, &info) == 0) | ||
| 1670 | goto out_set; | ||
| 1671 | } | ||
| 1672 | } | 1637 | } |
| 1673 | 1638 | ||
| 1674 | /* | 1639 | dfprintk(MOUNT, "NFS: auth flavor %u not supported by server\n", |
| 1675 | * As a last chance, see if the server list contains AUTH_NULL - | 1640 | args->auth_flavors[0]); |
| 1676 | * if it does, use the default flavor. | 1641 | return -EACCES; |
| 1677 | */ | ||
| 1678 | for (i = 0; i < count; i++) { | ||
| 1679 | if (request->auth_flavs[i] == RPC_AUTH_NULL) | ||
| 1680 | goto out_default; | ||
| 1681 | } | ||
| 1682 | |||
| 1683 | dfprintk(MOUNT, "NFS: no auth flavors in common with server\n"); | ||
| 1684 | goto out_err; | ||
| 1685 | 1642 | ||
| 1686 | out_default: | ||
| 1687 | /* use default if flavor not already set */ | ||
| 1688 | flavor = (args->auth_flavors[0] == RPC_AUTH_MAXFLAVOR) ? | ||
| 1689 | RPC_AUTH_UNIX : args->auth_flavors[0]; | ||
| 1690 | out_set: | ||
| 1691 | args->auth_flavors[0] = flavor; | ||
| 1692 | out: | 1643 | out: |
| 1693 | dfprintk(MOUNT, "NFS: using auth flavor %d\n", args->auth_flavors[0]); | 1644 | dfprintk(MOUNT, "NFS: using auth flavor %u\n", args->auth_flavors[0]); |
| 1694 | return 0; | 1645 | return 0; |
| 1695 | out_err: | ||
| 1696 | return -EACCES; | ||
| 1697 | } | 1646 | } |
| 1698 | 1647 | ||
| 1699 | /* | 1648 | /* |
| @@ -1701,10 +1650,10 @@ out_err: | |||
| 1701 | * corresponding to the provided path. | 1650 | * corresponding to the provided path. |
| 1702 | */ | 1651 | */ |
| 1703 | static int nfs_request_mount(struct nfs_parsed_mount_data *args, | 1652 | static int nfs_request_mount(struct nfs_parsed_mount_data *args, |
| 1704 | struct nfs_fh *root_fh) | 1653 | struct nfs_fh *root_fh, |
| 1654 | rpc_authflavor_t *server_authlist, | ||
| 1655 | unsigned int *server_authlist_len) | ||
| 1705 | { | 1656 | { |
| 1706 | rpc_authflavor_t server_authlist[NFS_MAX_SECFLAVORS]; | ||
| 1707 | unsigned int server_authlist_len = ARRAY_SIZE(server_authlist); | ||
| 1708 | struct nfs_mount_request request = { | 1657 | struct nfs_mount_request request = { |
| 1709 | .sap = (struct sockaddr *) | 1658 | .sap = (struct sockaddr *) |
| 1710 | &args->mount_server.address, | 1659 | &args->mount_server.address, |
| @@ -1712,7 +1661,7 @@ static int nfs_request_mount(struct nfs_parsed_mount_data *args, | |||
| 1712 | .protocol = args->mount_server.protocol, | 1661 | .protocol = args->mount_server.protocol, |
| 1713 | .fh = root_fh, | 1662 | .fh = root_fh, |
| 1714 | .noresvport = args->flags & NFS_MOUNT_NORESVPORT, | 1663 | .noresvport = args->flags & NFS_MOUNT_NORESVPORT, |
| 1715 | .auth_flav_len = &server_authlist_len, | 1664 | .auth_flav_len = server_authlist_len, |
| 1716 | .auth_flavs = server_authlist, | 1665 | .auth_flavs = server_authlist, |
| 1717 | .net = args->net, | 1666 | .net = args->net, |
| 1718 | }; | 1667 | }; |
| @@ -1756,24 +1705,92 @@ static int nfs_request_mount(struct nfs_parsed_mount_data *args, | |||
| 1756 | return status; | 1705 | return status; |
| 1757 | } | 1706 | } |
| 1758 | 1707 | ||
| 1759 | return nfs_select_flavor(args, &request); | 1708 | return 0; |
| 1760 | } | 1709 | } |
| 1761 | 1710 | ||
| 1762 | struct dentry *nfs_try_mount(int flags, const char *dev_name, | 1711 | static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_info, |
| 1763 | struct nfs_mount_info *mount_info, | 1712 | struct nfs_subversion *nfs_mod) |
| 1764 | struct nfs_subversion *nfs_mod) | ||
| 1765 | { | 1713 | { |
| 1766 | int status; | 1714 | int status; |
| 1767 | struct nfs_server *server; | 1715 | unsigned int i; |
| 1716 | bool tried_auth_unix = false; | ||
| 1717 | bool auth_null_in_list = false; | ||
| 1718 | struct nfs_server *server = ERR_PTR(-EACCES); | ||
| 1719 | struct nfs_parsed_mount_data *args = mount_info->parsed; | ||
| 1720 | rpc_authflavor_t authlist[NFS_MAX_SECFLAVORS]; | ||
| 1721 | unsigned int authlist_len = ARRAY_SIZE(authlist); | ||
| 1722 | |||
| 1723 | status = nfs_request_mount(args, mount_info->mntfh, authlist, | ||
| 1724 | &authlist_len); | ||
| 1725 | if (status) | ||
| 1726 | return ERR_PTR(status); | ||
| 1768 | 1727 | ||
| 1769 | if (mount_info->parsed->need_mount) { | 1728 | /* |
| 1770 | status = nfs_request_mount(mount_info->parsed, mount_info->mntfh); | 1729 | * Was a sec= authflavor specified in the options? First, verify |
| 1730 | * whether the server supports it, and then just try to use it if so. | ||
| 1731 | */ | ||
| 1732 | if (args->auth_flavors[0] != RPC_AUTH_MAXFLAVOR) { | ||
| 1733 | status = nfs_verify_authflavor(args, authlist, authlist_len); | ||
| 1734 | dfprintk(MOUNT, "NFS: using auth flavor %u\n", args->auth_flavors[0]); | ||
| 1771 | if (status) | 1735 | if (status) |
| 1772 | return ERR_PTR(status); | 1736 | return ERR_PTR(status); |
| 1737 | return nfs_mod->rpc_ops->create_server(mount_info, nfs_mod); | ||
| 1738 | } | ||
| 1739 | |||
| 1740 | /* | ||
| 1741 | * No sec= option was provided. RFC 2623, section 2.7 suggests we | ||
| 1742 | * SHOULD prefer the flavor listed first. However, some servers list | ||
| 1743 | * AUTH_NULL first. Avoid ever choosing AUTH_NULL. | ||
| 1744 | */ | ||
| 1745 | for (i = 0; i < authlist_len; ++i) { | ||
| 1746 | rpc_authflavor_t flavor; | ||
| 1747 | struct rpcsec_gss_info info; | ||
| 1748 | |||
| 1749 | flavor = authlist[i]; | ||
| 1750 | switch (flavor) { | ||
| 1751 | case RPC_AUTH_UNIX: | ||
| 1752 | tried_auth_unix = true; | ||
| 1753 | break; | ||
| 1754 | case RPC_AUTH_NULL: | ||
| 1755 | auth_null_in_list = true; | ||
| 1756 | continue; | ||
| 1757 | default: | ||
| 1758 | if (rpcauth_get_gssinfo(flavor, &info) != 0) | ||
| 1759 | continue; | ||
| 1760 | /* Fallthrough */ | ||
| 1761 | } | ||
| 1762 | dfprintk(MOUNT, "NFS: attempting to use auth flavor %u\n", flavor); | ||
| 1763 | args->auth_flavors[0] = flavor; | ||
| 1764 | server = nfs_mod->rpc_ops->create_server(mount_info, nfs_mod); | ||
| 1765 | if (!IS_ERR(server)) | ||
| 1766 | return server; | ||
| 1773 | } | 1767 | } |
| 1774 | 1768 | ||
| 1775 | /* Get a volume representation */ | 1769 | /* |
| 1776 | server = nfs_mod->rpc_ops->create_server(mount_info, nfs_mod); | 1770 | * Nothing we tried so far worked. At this point, give up if we've |
| 1771 | * already tried AUTH_UNIX or if the server's list doesn't contain | ||
| 1772 | * AUTH_NULL | ||
| 1773 | */ | ||
| 1774 | if (tried_auth_unix || !auth_null_in_list) | ||
| 1775 | return server; | ||
| 1776 | |||
| 1777 | /* Last chance! Try AUTH_UNIX */ | ||
| 1778 | dfprintk(MOUNT, "NFS: attempting to use auth flavor %u\n", RPC_AUTH_UNIX); | ||
| 1779 | args->auth_flavors[0] = RPC_AUTH_UNIX; | ||
| 1780 | return nfs_mod->rpc_ops->create_server(mount_info, nfs_mod); | ||
| 1781 | } | ||
| 1782 | |||
| 1783 | struct dentry *nfs_try_mount(int flags, const char *dev_name, | ||
| 1784 | struct nfs_mount_info *mount_info, | ||
| 1785 | struct nfs_subversion *nfs_mod) | ||
| 1786 | { | ||
| 1787 | struct nfs_server *server; | ||
| 1788 | |||
| 1789 | if (mount_info->parsed->need_mount) | ||
| 1790 | server = nfs_try_mount_request(mount_info, nfs_mod); | ||
| 1791 | else | ||
| 1792 | server = nfs_mod->rpc_ops->create_server(mount_info, nfs_mod); | ||
| 1793 | |||
| 1777 | if (IS_ERR(server)) | 1794 | if (IS_ERR(server)) |
| 1778 | return ERR_CAST(server); | 1795 | return ERR_CAST(server); |
| 1779 | 1796 | ||
| @@ -2412,7 +2429,21 @@ static int nfs_bdi_register(struct nfs_server *server) | |||
| 2412 | int nfs_set_sb_security(struct super_block *s, struct dentry *mntroot, | 2429 | int nfs_set_sb_security(struct super_block *s, struct dentry *mntroot, |
| 2413 | struct nfs_mount_info *mount_info) | 2430 | struct nfs_mount_info *mount_info) |
| 2414 | { | 2431 | { |
| 2415 | return security_sb_set_mnt_opts(s, &mount_info->parsed->lsm_opts); | 2432 | int error; |
| 2433 | unsigned long kflags = 0, kflags_out = 0; | ||
| 2434 | if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL) | ||
| 2435 | kflags |= SECURITY_LSM_NATIVE_LABELS; | ||
| 2436 | |||
| 2437 | error = security_sb_set_mnt_opts(s, &mount_info->parsed->lsm_opts, | ||
| 2438 | kflags, &kflags_out); | ||
| 2439 | if (error) | ||
| 2440 | goto err; | ||
| 2441 | |||
| 2442 | if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL && | ||
| 2443 | !(kflags_out & SECURITY_LSM_NATIVE_LABELS)) | ||
| 2444 | NFS_SB(s)->caps &= ~NFS_CAP_SECURITY_LABEL; | ||
| 2445 | err: | ||
| 2446 | return error; | ||
| 2416 | } | 2447 | } |
| 2417 | EXPORT_SYMBOL_GPL(nfs_set_sb_security); | 2448 | EXPORT_SYMBOL_GPL(nfs_set_sb_security); |
| 2418 | 2449 | ||
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 1f1f38f0c5d5..60395ad3a2e4 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c | |||
| @@ -479,7 +479,7 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry) | |||
| 479 | 479 | ||
| 480 | dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n", | 480 | dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n", |
| 481 | dentry->d_parent->d_name.name, dentry->d_name.name, | 481 | dentry->d_parent->d_name.name, dentry->d_name.name, |
| 482 | dentry->d_count); | 482 | d_count(dentry)); |
| 483 | nfs_inc_stats(dir, NFSIOS_SILLYRENAME); | 483 | nfs_inc_stats(dir, NFSIOS_SILLYRENAME); |
| 484 | 484 | ||
| 485 | /* | 485 | /* |
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index a2c7c28049d5..f1bdb7254776 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
| @@ -888,6 +888,28 @@ out: | |||
| 888 | return PageUptodate(page) != 0; | 888 | return PageUptodate(page) != 0; |
| 889 | } | 889 | } |
| 890 | 890 | ||
| 891 | /* If we know the page is up to date, and we're not using byte range locks (or | ||
| 892 | * if we have the whole file locked for writing), it may be more efficient to | ||
| 893 | * extend the write to cover the entire page in order to avoid fragmentation | ||
| 894 | * inefficiencies. | ||
| 895 | * | ||
| 896 | * If the file is opened for synchronous writes or if we have a write delegation | ||
| 897 | * from the server then we can just skip the rest of the checks. | ||
| 898 | */ | ||
| 899 | static int nfs_can_extend_write(struct file *file, struct page *page, struct inode *inode) | ||
| 900 | { | ||
| 901 | if (file->f_flags & O_DSYNC) | ||
| 902 | return 0; | ||
| 903 | if (NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) | ||
| 904 | return 1; | ||
| 905 | if (nfs_write_pageuptodate(page, inode) && (inode->i_flock == NULL || | ||
| 906 | (inode->i_flock->fl_start == 0 && | ||
| 907 | inode->i_flock->fl_end == OFFSET_MAX && | ||
| 908 | inode->i_flock->fl_type != F_RDLCK))) | ||
| 909 | return 1; | ||
| 910 | return 0; | ||
| 911 | } | ||
| 912 | |||
| 891 | /* | 913 | /* |
| 892 | * Update and possibly write a cached page of an NFS file. | 914 | * Update and possibly write a cached page of an NFS file. |
| 893 | * | 915 | * |
| @@ -908,14 +930,7 @@ int nfs_updatepage(struct file *file, struct page *page, | |||
| 908 | file->f_path.dentry->d_name.name, count, | 930 | file->f_path.dentry->d_name.name, count, |
| 909 | (long long)(page_file_offset(page) + offset)); | 931 | (long long)(page_file_offset(page) + offset)); |
| 910 | 932 | ||
| 911 | /* If we're not using byte range locks, and we know the page | 933 | if (nfs_can_extend_write(file, page, inode)) { |
| 912 | * is up to date, it may be more efficient to extend the write | ||
| 913 | * to cover the entire page in order to avoid fragmentation | ||
| 914 | * inefficiencies. | ||
| 915 | */ | ||
| 916 | if (nfs_write_pageuptodate(page, inode) && | ||
| 917 | inode->i_flock == NULL && | ||
| 918 | !(file->f_flags & O_DSYNC)) { | ||
| 919 | count = max(count + offset, nfs_page_length(page)); | 934 | count = max(count + offset, nfs_page_length(page)); |
| 920 | offset = 0; | 935 | offset = 0; |
| 921 | } | 936 | } |
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index 430b6872806f..dc8f1ef665ce 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig | |||
| @@ -81,6 +81,22 @@ config NFSD_V4 | |||
| 81 | 81 | ||
| 82 | If unsure, say N. | 82 | If unsure, say N. |
| 83 | 83 | ||
| 84 | config NFSD_V4_SECURITY_LABEL | ||
| 85 | bool "Provide Security Label support for NFSv4 server" | ||
| 86 | depends on NFSD_V4 && SECURITY | ||
| 87 | help | ||
| 88 | |||
| 89 | Say Y here if you want enable fine-grained security label attribute | ||
| 90 | support for NFS version 4. Security labels allow security modules like | ||
| 91 | SELinux and Smack to label files to facilitate enforcement of their policies. | ||
| 92 | Without this an NFSv4 mount will have the same label on each file. | ||
| 93 | |||
| 94 | If you do not wish to enable fine-grained security labels SELinux or | ||
| 95 | Smack policies on NFSv4 files, say N. | ||
| 96 | |||
| 97 | WARNING: there is still a chance of backwards-incompatible protocol changes. | ||
| 98 | For now we recommend "Y" only for developers and testers." | ||
| 99 | |||
| 84 | config NFSD_FAULT_INJECTION | 100 | config NFSD_FAULT_INJECTION |
| 85 | bool "NFS server manual fault injection" | 101 | bool "NFS server manual fault injection" |
| 86 | depends on NFSD_V4 && DEBUG_KERNEL | 102 | depends on NFSD_V4 && DEBUG_KERNEL |
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 27d74a294515..a7cee864e7b2 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c | |||
| @@ -42,6 +42,36 @@ | |||
| 42 | #include "current_stateid.h" | 42 | #include "current_stateid.h" |
| 43 | #include "netns.h" | 43 | #include "netns.h" |
| 44 | 44 | ||
| 45 | #ifdef CONFIG_NFSD_V4_SECURITY_LABEL | ||
| 46 | #include <linux/security.h> | ||
| 47 | |||
| 48 | static inline void | ||
| 49 | nfsd4_security_inode_setsecctx(struct svc_fh *resfh, struct xdr_netobj *label, u32 *bmval) | ||
| 50 | { | ||
| 51 | struct inode *inode = resfh->fh_dentry->d_inode; | ||
| 52 | int status; | ||
| 53 | |||
| 54 | mutex_lock(&inode->i_mutex); | ||
| 55 | status = security_inode_setsecctx(resfh->fh_dentry, | ||
| 56 | label->data, label->len); | ||
| 57 | mutex_unlock(&inode->i_mutex); | ||
| 58 | |||
| 59 | if (status) | ||
| 60 | /* | ||
| 61 | * XXX: We should really fail the whole open, but we may | ||
| 62 | * already have created a new file, so it may be too | ||
| 63 | * late. For now this seems the least of evils: | ||
| 64 | */ | ||
| 65 | bmval[2] &= ~FATTR4_WORD2_SECURITY_LABEL; | ||
| 66 | |||
| 67 | return; | ||
| 68 | } | ||
| 69 | #else | ||
| 70 | static inline void | ||
| 71 | nfsd4_security_inode_setsecctx(struct svc_fh *resfh, struct xdr_netobj *label, u32 *bmval) | ||
| 72 | { } | ||
| 73 | #endif | ||
| 74 | |||
| 45 | #define NFSDDBG_FACILITY NFSDDBG_PROC | 75 | #define NFSDDBG_FACILITY NFSDDBG_PROC |
| 46 | 76 | ||
| 47 | static u32 nfsd_attrmask[] = { | 77 | static u32 nfsd_attrmask[] = { |
| @@ -239,6 +269,9 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru | |||
| 239 | (u32 *)open->op_verf.data, | 269 | (u32 *)open->op_verf.data, |
| 240 | &open->op_truncate, &open->op_created); | 270 | &open->op_truncate, &open->op_created); |
| 241 | 271 | ||
| 272 | if (!status && open->op_label.len) | ||
| 273 | nfsd4_security_inode_setsecctx(resfh, &open->op_label, open->op_bmval); | ||
| 274 | |||
| 242 | /* | 275 | /* |
| 243 | * Following rfc 3530 14.2.16, use the returned bitmask | 276 | * Following rfc 3530 14.2.16, use the returned bitmask |
| 244 | * to indicate which attributes we used to store the | 277 | * to indicate which attributes we used to store the |
| @@ -263,7 +296,8 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru | |||
| 263 | 296 | ||
| 264 | nfsd4_set_open_owner_reply_cache(cstate, open, resfh); | 297 | nfsd4_set_open_owner_reply_cache(cstate, open, resfh); |
| 265 | accmode = NFSD_MAY_NOP; | 298 | accmode = NFSD_MAY_NOP; |
| 266 | if (open->op_created) | 299 | if (open->op_created || |
| 300 | open->op_claim_type == NFS4_OPEN_CLAIM_DELEGATE_CUR) | ||
| 267 | accmode |= NFSD_MAY_OWNER_OVERRIDE; | 301 | accmode |= NFSD_MAY_OWNER_OVERRIDE; |
| 268 | status = do_open_permission(rqstp, resfh, open, accmode); | 302 | status = do_open_permission(rqstp, resfh, open, accmode); |
| 269 | set_change_info(&open->op_cinfo, current_fh); | 303 | set_change_info(&open->op_cinfo, current_fh); |
| @@ -637,6 +671,9 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
| 637 | if (status) | 671 | if (status) |
| 638 | goto out; | 672 | goto out; |
| 639 | 673 | ||
| 674 | if (create->cr_label.len) | ||
| 675 | nfsd4_security_inode_setsecctx(&resfh, &create->cr_label, create->cr_bmval); | ||
| 676 | |||
| 640 | if (create->cr_acl != NULL) | 677 | if (create->cr_acl != NULL) |
| 641 | do_set_nfs4_acl(rqstp, &resfh, create->cr_acl, | 678 | do_set_nfs4_acl(rqstp, &resfh, create->cr_acl, |
| 642 | create->cr_bmval); | 679 | create->cr_bmval); |
| @@ -916,6 +953,11 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
| 916 | setattr->sa_acl); | 953 | setattr->sa_acl); |
| 917 | if (status) | 954 | if (status) |
| 918 | goto out; | 955 | goto out; |
| 956 | if (setattr->sa_label.len) | ||
| 957 | status = nfsd4_set_nfs4_label(rqstp, &cstate->current_fh, | ||
| 958 | &setattr->sa_label); | ||
| 959 | if (status) | ||
| 960 | goto out; | ||
| 919 | status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr, | 961 | status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr, |
| 920 | 0, (time_t)0); | 962 | 0, (time_t)0); |
| 921 | out: | 963 | out: |
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 316ec843dec2..280acef6f0dc 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c | |||
| @@ -97,19 +97,20 @@ nfs4_lock_state(void) | |||
| 97 | 97 | ||
| 98 | static void free_session(struct nfsd4_session *); | 98 | static void free_session(struct nfsd4_session *); |
| 99 | 99 | ||
| 100 | void nfsd4_put_session(struct nfsd4_session *ses) | 100 | static bool is_session_dead(struct nfsd4_session *ses) |
| 101 | { | 101 | { |
| 102 | atomic_dec(&ses->se_ref); | 102 | return ses->se_flags & NFS4_SESSION_DEAD; |
| 103 | } | 103 | } |
| 104 | 104 | ||
| 105 | static bool is_session_dead(struct nfsd4_session *ses) | 105 | void nfsd4_put_session(struct nfsd4_session *ses) |
| 106 | { | 106 | { |
| 107 | return ses->se_flags & NFS4_SESSION_DEAD; | 107 | if (atomic_dec_and_test(&ses->se_ref) && is_session_dead(ses)) |
| 108 | free_session(ses); | ||
| 108 | } | 109 | } |
| 109 | 110 | ||
| 110 | static __be32 mark_session_dead_locked(struct nfsd4_session *ses) | 111 | static __be32 mark_session_dead_locked(struct nfsd4_session *ses, int ref_held_by_me) |
| 111 | { | 112 | { |
| 112 | if (atomic_read(&ses->se_ref)) | 113 | if (atomic_read(&ses->se_ref) > ref_held_by_me) |
| 113 | return nfserr_jukebox; | 114 | return nfserr_jukebox; |
| 114 | ses->se_flags |= NFS4_SESSION_DEAD; | 115 | ses->se_flags |= NFS4_SESSION_DEAD; |
| 115 | return nfs_ok; | 116 | return nfs_ok; |
| @@ -364,19 +365,12 @@ static struct nfs4_ol_stateid * nfs4_alloc_stateid(struct nfs4_client *clp) | |||
| 364 | } | 365 | } |
| 365 | 366 | ||
| 366 | static struct nfs4_delegation * | 367 | static struct nfs4_delegation * |
| 367 | alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct svc_fh *current_fh, u32 type) | 368 | alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct svc_fh *current_fh) |
| 368 | { | 369 | { |
| 369 | struct nfs4_delegation *dp; | 370 | struct nfs4_delegation *dp; |
| 370 | struct nfs4_file *fp = stp->st_file; | 371 | struct nfs4_file *fp = stp->st_file; |
| 371 | 372 | ||
| 372 | dprintk("NFSD alloc_init_deleg\n"); | 373 | dprintk("NFSD alloc_init_deleg\n"); |
| 373 | /* | ||
| 374 | * Major work on the lease subsystem (for example, to support | ||
| 375 | * calbacks on stat) will be required before we can support | ||
| 376 | * write delegations properly. | ||
| 377 | */ | ||
| 378 | if (type != NFS4_OPEN_DELEGATE_READ) | ||
| 379 | return NULL; | ||
| 380 | if (fp->fi_had_conflict) | 374 | if (fp->fi_had_conflict) |
| 381 | return NULL; | 375 | return NULL; |
| 382 | if (num_delegations > max_delegations) | 376 | if (num_delegations > max_delegations) |
| @@ -397,7 +391,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct sv | |||
| 397 | INIT_LIST_HEAD(&dp->dl_recall_lru); | 391 | INIT_LIST_HEAD(&dp->dl_recall_lru); |
| 398 | get_nfs4_file(fp); | 392 | get_nfs4_file(fp); |
| 399 | dp->dl_file = fp; | 393 | dp->dl_file = fp; |
| 400 | dp->dl_type = type; | 394 | dp->dl_type = NFS4_OPEN_DELEGATE_READ; |
| 401 | fh_copy_shallow(&dp->dl_fh, ¤t_fh->fh_handle); | 395 | fh_copy_shallow(&dp->dl_fh, ¤t_fh->fh_handle); |
| 402 | dp->dl_time = 0; | 396 | dp->dl_time = 0; |
| 403 | atomic_set(&dp->dl_count, 1); | 397 | atomic_set(&dp->dl_count, 1); |
| @@ -1188,6 +1182,9 @@ static int copy_cred(struct svc_cred *target, struct svc_cred *source) | |||
| 1188 | target->cr_gid = source->cr_gid; | 1182 | target->cr_gid = source->cr_gid; |
| 1189 | target->cr_group_info = source->cr_group_info; | 1183 | target->cr_group_info = source->cr_group_info; |
| 1190 | get_group_info(target->cr_group_info); | 1184 | get_group_info(target->cr_group_info); |
| 1185 | target->cr_gss_mech = source->cr_gss_mech; | ||
| 1186 | if (source->cr_gss_mech) | ||
| 1187 | gss_mech_get(source->cr_gss_mech); | ||
| 1191 | return 0; | 1188 | return 0; |
| 1192 | } | 1189 | } |
| 1193 | 1190 | ||
| @@ -1262,6 +1259,31 @@ same_creds(struct svc_cred *cr1, struct svc_cred *cr2) | |||
| 1262 | return 0 == strcmp(cr1->cr_principal, cr2->cr_principal); | 1259 | return 0 == strcmp(cr1->cr_principal, cr2->cr_principal); |
| 1263 | } | 1260 | } |
| 1264 | 1261 | ||
| 1262 | static bool svc_rqst_integrity_protected(struct svc_rqst *rqstp) | ||
| 1263 | { | ||
| 1264 | struct svc_cred *cr = &rqstp->rq_cred; | ||
| 1265 | u32 service; | ||
| 1266 | |||
| 1267 | service = gss_pseudoflavor_to_service(cr->cr_gss_mech, cr->cr_flavor); | ||
| 1268 | return service == RPC_GSS_SVC_INTEGRITY || | ||
| 1269 | service == RPC_GSS_SVC_PRIVACY; | ||
| 1270 | } | ||
| 1271 | |||
| 1272 | static bool mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp) | ||
| 1273 | { | ||
| 1274 | struct svc_cred *cr = &rqstp->rq_cred; | ||
| 1275 | |||
| 1276 | if (!cl->cl_mach_cred) | ||
| 1277 | return true; | ||
| 1278 | if (cl->cl_cred.cr_gss_mech != cr->cr_gss_mech) | ||
| 1279 | return false; | ||
| 1280 | if (!svc_rqst_integrity_protected(rqstp)) | ||
| 1281 | return false; | ||
| 1282 | if (!cr->cr_principal) | ||
| 1283 | return false; | ||
| 1284 | return 0 == strcmp(cl->cl_cred.cr_principal, cr->cr_principal); | ||
| 1285 | } | ||
| 1286 | |||
| 1265 | static void gen_clid(struct nfs4_client *clp, struct nfsd_net *nn) | 1287 | static void gen_clid(struct nfs4_client *clp, struct nfsd_net *nn) |
| 1266 | { | 1288 | { |
| 1267 | static u32 current_clientid = 1; | 1289 | static u32 current_clientid = 1; |
| @@ -1639,16 +1661,16 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, | |||
| 1639 | if (exid->flags & ~EXCHGID4_FLAG_MASK_A) | 1661 | if (exid->flags & ~EXCHGID4_FLAG_MASK_A) |
| 1640 | return nfserr_inval; | 1662 | return nfserr_inval; |
| 1641 | 1663 | ||
| 1642 | /* Currently only support SP4_NONE */ | ||
| 1643 | switch (exid->spa_how) { | 1664 | switch (exid->spa_how) { |
| 1665 | case SP4_MACH_CRED: | ||
| 1666 | if (!svc_rqst_integrity_protected(rqstp)) | ||
| 1667 | return nfserr_inval; | ||
| 1644 | case SP4_NONE: | 1668 | case SP4_NONE: |
| 1645 | break; | 1669 | break; |
| 1646 | default: /* checked by xdr code */ | 1670 | default: /* checked by xdr code */ |
| 1647 | WARN_ON_ONCE(1); | 1671 | WARN_ON_ONCE(1); |
| 1648 | case SP4_SSV: | 1672 | case SP4_SSV: |
| 1649 | return nfserr_encr_alg_unsupp; | 1673 | return nfserr_encr_alg_unsupp; |
| 1650 | case SP4_MACH_CRED: | ||
| 1651 | return nfserr_serverfault; /* no excuse :-/ */ | ||
| 1652 | } | 1674 | } |
| 1653 | 1675 | ||
| 1654 | /* Cases below refer to rfc 5661 section 18.35.4: */ | 1676 | /* Cases below refer to rfc 5661 section 18.35.4: */ |
| @@ -1663,6 +1685,10 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, | |||
| 1663 | status = nfserr_inval; | 1685 | status = nfserr_inval; |
| 1664 | goto out; | 1686 | goto out; |
| 1665 | } | 1687 | } |
| 1688 | if (!mach_creds_match(conf, rqstp)) { | ||
| 1689 | status = nfserr_wrong_cred; | ||
| 1690 | goto out; | ||
| 1691 | } | ||
| 1666 | if (!creds_match) { /* case 9 */ | 1692 | if (!creds_match) { /* case 9 */ |
| 1667 | status = nfserr_perm; | 1693 | status = nfserr_perm; |
| 1668 | goto out; | 1694 | goto out; |
| @@ -1709,7 +1735,8 @@ out_new: | |||
| 1709 | status = nfserr_jukebox; | 1735 | status = nfserr_jukebox; |
| 1710 | goto out; | 1736 | goto out; |
| 1711 | } | 1737 | } |
| 1712 | new->cl_minorversion = 1; | 1738 | new->cl_minorversion = cstate->minorversion; |
| 1739 | new->cl_mach_cred = (exid->spa_how == SP4_MACH_CRED); | ||
| 1713 | 1740 | ||
| 1714 | gen_clid(new, nn); | 1741 | gen_clid(new, nn); |
| 1715 | add_to_unconfirmed(new); | 1742 | add_to_unconfirmed(new); |
| @@ -1839,6 +1866,24 @@ static __be32 check_backchannel_attrs(struct nfsd4_channel_attrs *ca) | |||
| 1839 | return nfs_ok; | 1866 | return nfs_ok; |
| 1840 | } | 1867 | } |
| 1841 | 1868 | ||
| 1869 | static __be32 nfsd4_check_cb_sec(struct nfsd4_cb_sec *cbs) | ||
| 1870 | { | ||
| 1871 | switch (cbs->flavor) { | ||
| 1872 | case RPC_AUTH_NULL: | ||
| 1873 | case RPC_AUTH_UNIX: | ||
| 1874 | return nfs_ok; | ||
| 1875 | default: | ||
| 1876 | /* | ||
| 1877 | * GSS case: the spec doesn't allow us to return this | ||
| 1878 | * error. But it also doesn't allow us not to support | ||
| 1879 | * GSS. | ||
| 1880 | * I'd rather this fail hard than return some error the | ||
| 1881 | * client might think it can already handle: | ||
| 1882 | */ | ||
| 1883 | return nfserr_encr_alg_unsupp; | ||
| 1884 | } | ||
| 1885 | } | ||
| 1886 | |||
| 1842 | __be32 | 1887 | __be32 |
| 1843 | nfsd4_create_session(struct svc_rqst *rqstp, | 1888 | nfsd4_create_session(struct svc_rqst *rqstp, |
| 1844 | struct nfsd4_compound_state *cstate, | 1889 | struct nfsd4_compound_state *cstate, |
| @@ -1854,6 +1899,9 @@ nfsd4_create_session(struct svc_rqst *rqstp, | |||
| 1854 | 1899 | ||
| 1855 | if (cr_ses->flags & ~SESSION4_FLAG_MASK_A) | 1900 | if (cr_ses->flags & ~SESSION4_FLAG_MASK_A) |
| 1856 | return nfserr_inval; | 1901 | return nfserr_inval; |
| 1902 | status = nfsd4_check_cb_sec(&cr_ses->cb_sec); | ||
| 1903 | if (status) | ||
| 1904 | return status; | ||
| 1857 | status = check_forechannel_attrs(&cr_ses->fore_channel, nn); | 1905 | status = check_forechannel_attrs(&cr_ses->fore_channel, nn); |
| 1858 | if (status) | 1906 | if (status) |
| 1859 | return status; | 1907 | return status; |
| @@ -1874,6 +1922,9 @@ nfsd4_create_session(struct svc_rqst *rqstp, | |||
| 1874 | WARN_ON_ONCE(conf && unconf); | 1922 | WARN_ON_ONCE(conf && unconf); |
| 1875 | 1923 | ||
| 1876 | if (conf) { | 1924 | if (conf) { |
| 1925 | status = nfserr_wrong_cred; | ||
| 1926 | if (!mach_creds_match(conf, rqstp)) | ||
| 1927 | goto out_free_conn; | ||
| 1877 | cs_slot = &conf->cl_cs_slot; | 1928 | cs_slot = &conf->cl_cs_slot; |
| 1878 | status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); | 1929 | status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); |
| 1879 | if (status == nfserr_replay_cache) { | 1930 | if (status == nfserr_replay_cache) { |
| @@ -1890,6 +1941,9 @@ nfsd4_create_session(struct svc_rqst *rqstp, | |||
| 1890 | status = nfserr_clid_inuse; | 1941 | status = nfserr_clid_inuse; |
| 1891 | goto out_free_conn; | 1942 | goto out_free_conn; |
| 1892 | } | 1943 | } |
| 1944 | status = nfserr_wrong_cred; | ||
| 1945 | if (!mach_creds_match(unconf, rqstp)) | ||
| 1946 | goto out_free_conn; | ||
| 1893 | cs_slot = &unconf->cl_cs_slot; | 1947 | cs_slot = &unconf->cl_cs_slot; |
| 1894 | status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); | 1948 | status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); |
| 1895 | if (status) { | 1949 | if (status) { |
| @@ -1957,7 +2011,11 @@ __be32 nfsd4_backchannel_ctl(struct svc_rqst *rqstp, struct nfsd4_compound_state | |||
| 1957 | { | 2011 | { |
| 1958 | struct nfsd4_session *session = cstate->session; | 2012 | struct nfsd4_session *session = cstate->session; |
| 1959 | struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); | 2013 | struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); |
| 2014 | __be32 status; | ||
| 1960 | 2015 | ||
| 2016 | status = nfsd4_check_cb_sec(&bc->bc_cb_sec); | ||
| 2017 | if (status) | ||
| 2018 | return status; | ||
| 1961 | spin_lock(&nn->client_lock); | 2019 | spin_lock(&nn->client_lock); |
| 1962 | session->se_cb_prog = bc->bc_cb_program; | 2020 | session->se_cb_prog = bc->bc_cb_program; |
| 1963 | session->se_cb_sec = bc->bc_cb_sec; | 2021 | session->se_cb_sec = bc->bc_cb_sec; |
| @@ -1986,6 +2044,9 @@ __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp, | |||
| 1986 | status = nfserr_badsession; | 2044 | status = nfserr_badsession; |
| 1987 | if (!session) | 2045 | if (!session) |
| 1988 | goto out; | 2046 | goto out; |
| 2047 | status = nfserr_wrong_cred; | ||
| 2048 | if (!mach_creds_match(session->se_client, rqstp)) | ||
| 2049 | goto out; | ||
| 1989 | status = nfsd4_map_bcts_dir(&bcts->dir); | 2050 | status = nfsd4_map_bcts_dir(&bcts->dir); |
| 1990 | if (status) | 2051 | if (status) |
| 1991 | goto out; | 2052 | goto out; |
| @@ -2014,6 +2075,7 @@ nfsd4_destroy_session(struct svc_rqst *r, | |||
| 2014 | { | 2075 | { |
| 2015 | struct nfsd4_session *ses; | 2076 | struct nfsd4_session *ses; |
| 2016 | __be32 status; | 2077 | __be32 status; |
| 2078 | int ref_held_by_me = 0; | ||
| 2017 | struct nfsd_net *nn = net_generic(SVC_NET(r), nfsd_net_id); | 2079 | struct nfsd_net *nn = net_generic(SVC_NET(r), nfsd_net_id); |
| 2018 | 2080 | ||
| 2019 | nfs4_lock_state(); | 2081 | nfs4_lock_state(); |
| @@ -2021,6 +2083,7 @@ nfsd4_destroy_session(struct svc_rqst *r, | |||
| 2021 | if (nfsd4_compound_in_session(cstate->session, &sessionid->sessionid)) { | 2083 | if (nfsd4_compound_in_session(cstate->session, &sessionid->sessionid)) { |
| 2022 | if (!nfsd4_last_compound_op(r)) | 2084 | if (!nfsd4_last_compound_op(r)) |
| 2023 | goto out; | 2085 | goto out; |
| 2086 | ref_held_by_me++; | ||
| 2024 | } | 2087 | } |
| 2025 | dump_sessionid(__func__, &sessionid->sessionid); | 2088 | dump_sessionid(__func__, &sessionid->sessionid); |
| 2026 | spin_lock(&nn->client_lock); | 2089 | spin_lock(&nn->client_lock); |
| @@ -2028,17 +2091,22 @@ nfsd4_destroy_session(struct svc_rqst *r, | |||
| 2028 | status = nfserr_badsession; | 2091 | status = nfserr_badsession; |
| 2029 | if (!ses) | 2092 | if (!ses) |
| 2030 | goto out_client_lock; | 2093 | goto out_client_lock; |
| 2031 | status = mark_session_dead_locked(ses); | 2094 | status = nfserr_wrong_cred; |
| 2032 | if (status) | 2095 | if (!mach_creds_match(ses->se_client, r)) |
| 2033 | goto out_client_lock; | 2096 | goto out_client_lock; |
| 2097 | nfsd4_get_session_locked(ses); | ||
| 2098 | status = mark_session_dead_locked(ses, 1 + ref_held_by_me); | ||
| 2099 | if (status) | ||
| 2100 | goto out_put_session; | ||
| 2034 | unhash_session(ses); | 2101 | unhash_session(ses); |
| 2035 | spin_unlock(&nn->client_lock); | 2102 | spin_unlock(&nn->client_lock); |
| 2036 | 2103 | ||
| 2037 | nfsd4_probe_callback_sync(ses->se_client); | 2104 | nfsd4_probe_callback_sync(ses->se_client); |
| 2038 | 2105 | ||
| 2039 | spin_lock(&nn->client_lock); | 2106 | spin_lock(&nn->client_lock); |
| 2040 | free_session(ses); | ||
| 2041 | status = nfs_ok; | 2107 | status = nfs_ok; |
| 2108 | out_put_session: | ||
| 2109 | nfsd4_put_session(ses); | ||
| 2042 | out_client_lock: | 2110 | out_client_lock: |
| 2043 | spin_unlock(&nn->client_lock); | 2111 | spin_unlock(&nn->client_lock); |
| 2044 | out: | 2112 | out: |
| @@ -2058,26 +2126,31 @@ static struct nfsd4_conn *__nfsd4_find_conn(struct svc_xprt *xpt, struct nfsd4_s | |||
| 2058 | return NULL; | 2126 | return NULL; |
| 2059 | } | 2127 | } |
| 2060 | 2128 | ||
| 2061 | static void nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_session *ses) | 2129 | static __be32 nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_session *ses) |
| 2062 | { | 2130 | { |
| 2063 | struct nfs4_client *clp = ses->se_client; | 2131 | struct nfs4_client *clp = ses->se_client; |
| 2064 | struct nfsd4_conn *c; | 2132 | struct nfsd4_conn *c; |
| 2133 | __be32 status = nfs_ok; | ||
| 2065 | int ret; | 2134 | int ret; |
| 2066 | 2135 | ||
| 2067 | spin_lock(&clp->cl_lock); | 2136 | spin_lock(&clp->cl_lock); |
| 2068 | c = __nfsd4_find_conn(new->cn_xprt, ses); | 2137 | c = __nfsd4_find_conn(new->cn_xprt, ses); |
| 2069 | if (c) { | 2138 | if (c) |
| 2070 | spin_unlock(&clp->cl_lock); | 2139 | goto out_free; |
| 2071 | free_conn(new); | 2140 | status = nfserr_conn_not_bound_to_session; |
| 2072 | return; | 2141 | if (clp->cl_mach_cred) |
| 2073 | } | 2142 | goto out_free; |
| 2074 | __nfsd4_hash_conn(new, ses); | 2143 | __nfsd4_hash_conn(new, ses); |
| 2075 | spin_unlock(&clp->cl_lock); | 2144 | spin_unlock(&clp->cl_lock); |
| 2076 | ret = nfsd4_register_conn(new); | 2145 | ret = nfsd4_register_conn(new); |
| 2077 | if (ret) | 2146 | if (ret) |
| 2078 | /* oops; xprt is already down: */ | 2147 | /* oops; xprt is already down: */ |
| 2079 | nfsd4_conn_lost(&new->cn_xpt_user); | 2148 | nfsd4_conn_lost(&new->cn_xpt_user); |
| 2080 | return; | 2149 | return nfs_ok; |
| 2150 | out_free: | ||
| 2151 | spin_unlock(&clp->cl_lock); | ||
| 2152 | free_conn(new); | ||
| 2153 | return status; | ||
| 2081 | } | 2154 | } |
| 2082 | 2155 | ||
| 2083 | static bool nfsd4_session_too_many_ops(struct svc_rqst *rqstp, struct nfsd4_session *session) | 2156 | static bool nfsd4_session_too_many_ops(struct svc_rqst *rqstp, struct nfsd4_session *session) |
| @@ -2169,8 +2242,10 @@ nfsd4_sequence(struct svc_rqst *rqstp, | |||
| 2169 | if (status) | 2242 | if (status) |
| 2170 | goto out_put_session; | 2243 | goto out_put_session; |
| 2171 | 2244 | ||
| 2172 | nfsd4_sequence_check_conn(conn, session); | 2245 | status = nfsd4_sequence_check_conn(conn, session); |
| 2173 | conn = NULL; | 2246 | conn = NULL; |
| 2247 | if (status) | ||
| 2248 | goto out_put_session; | ||
| 2174 | 2249 | ||
| 2175 | /* Success! bump slot seqid */ | 2250 | /* Success! bump slot seqid */ |
| 2176 | slot->sl_seqid = seq->seqid; | 2251 | slot->sl_seqid = seq->seqid; |
| @@ -2232,7 +2307,10 @@ nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta | |||
| 2232 | status = nfserr_stale_clientid; | 2307 | status = nfserr_stale_clientid; |
| 2233 | goto out; | 2308 | goto out; |
| 2234 | } | 2309 | } |
| 2235 | 2310 | if (!mach_creds_match(clp, rqstp)) { | |
| 2311 | status = nfserr_wrong_cred; | ||
| 2312 | goto out; | ||
| 2313 | } | ||
| 2236 | expire_client(clp); | 2314 | expire_client(clp); |
| 2237 | out: | 2315 | out: |
| 2238 | nfs4_unlock_state(); | 2316 | nfs4_unlock_state(); |
| @@ -2645,13 +2723,13 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp) | |||
| 2645 | 2723 | ||
| 2646 | list_add_tail(&dp->dl_recall_lru, &nn->del_recall_lru); | 2724 | list_add_tail(&dp->dl_recall_lru, &nn->del_recall_lru); |
| 2647 | 2725 | ||
| 2648 | /* only place dl_time is set. protected by lock_flocks*/ | 2726 | /* Only place dl_time is set; protected by i_lock: */ |
| 2649 | dp->dl_time = get_seconds(); | 2727 | dp->dl_time = get_seconds(); |
| 2650 | 2728 | ||
| 2651 | nfsd4_cb_recall(dp); | 2729 | nfsd4_cb_recall(dp); |
| 2652 | } | 2730 | } |
| 2653 | 2731 | ||
| 2654 | /* Called from break_lease() with lock_flocks() held. */ | 2732 | /* Called from break_lease() with i_lock held. */ |
| 2655 | static void nfsd_break_deleg_cb(struct file_lock *fl) | 2733 | static void nfsd_break_deleg_cb(struct file_lock *fl) |
| 2656 | { | 2734 | { |
| 2657 | struct nfs4_file *fp = (struct nfs4_file *)fl->fl_owner; | 2735 | struct nfs4_file *fp = (struct nfs4_file *)fl->fl_owner; |
| @@ -2940,13 +3018,13 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp, int f | |||
| 2940 | return fl; | 3018 | return fl; |
| 2941 | } | 3019 | } |
| 2942 | 3020 | ||
| 2943 | static int nfs4_setlease(struct nfs4_delegation *dp, int flag) | 3021 | static int nfs4_setlease(struct nfs4_delegation *dp) |
| 2944 | { | 3022 | { |
| 2945 | struct nfs4_file *fp = dp->dl_file; | 3023 | struct nfs4_file *fp = dp->dl_file; |
| 2946 | struct file_lock *fl; | 3024 | struct file_lock *fl; |
| 2947 | int status; | 3025 | int status; |
| 2948 | 3026 | ||
| 2949 | fl = nfs4_alloc_init_lease(dp, flag); | 3027 | fl = nfs4_alloc_init_lease(dp, NFS4_OPEN_DELEGATE_READ); |
| 2950 | if (!fl) | 3028 | if (!fl) |
| 2951 | return -ENOMEM; | 3029 | return -ENOMEM; |
| 2952 | fl->fl_file = find_readable_file(fp); | 3030 | fl->fl_file = find_readable_file(fp); |
| @@ -2964,12 +3042,12 @@ static int nfs4_setlease(struct nfs4_delegation *dp, int flag) | |||
| 2964 | return 0; | 3042 | return 0; |
| 2965 | } | 3043 | } |
| 2966 | 3044 | ||
| 2967 | static int nfs4_set_delegation(struct nfs4_delegation *dp, int flag) | 3045 | static int nfs4_set_delegation(struct nfs4_delegation *dp) |
| 2968 | { | 3046 | { |
| 2969 | struct nfs4_file *fp = dp->dl_file; | 3047 | struct nfs4_file *fp = dp->dl_file; |
| 2970 | 3048 | ||
| 2971 | if (!fp->fi_lease) | 3049 | if (!fp->fi_lease) |
| 2972 | return nfs4_setlease(dp, flag); | 3050 | return nfs4_setlease(dp); |
| 2973 | spin_lock(&recall_lock); | 3051 | spin_lock(&recall_lock); |
| 2974 | if (fp->fi_had_conflict) { | 3052 | if (fp->fi_had_conflict) { |
| 2975 | spin_unlock(&recall_lock); | 3053 | spin_unlock(&recall_lock); |
| @@ -3005,6 +3083,9 @@ static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status) | |||
| 3005 | 3083 | ||
| 3006 | /* | 3084 | /* |
| 3007 | * Attempt to hand out a delegation. | 3085 | * Attempt to hand out a delegation. |
| 3086 | * | ||
| 3087 | * Note we don't support write delegations, and won't until the vfs has | ||
| 3088 | * proper support for them. | ||
| 3008 | */ | 3089 | */ |
| 3009 | static void | 3090 | static void |
| 3010 | nfs4_open_delegation(struct net *net, struct svc_fh *fh, | 3091 | nfs4_open_delegation(struct net *net, struct svc_fh *fh, |
| @@ -3013,39 +3094,45 @@ nfs4_open_delegation(struct net *net, struct svc_fh *fh, | |||
| 3013 | struct nfs4_delegation *dp; | 3094 | struct nfs4_delegation *dp; |
| 3014 | struct nfs4_openowner *oo = container_of(stp->st_stateowner, struct nfs4_openowner, oo_owner); | 3095 | struct nfs4_openowner *oo = container_of(stp->st_stateowner, struct nfs4_openowner, oo_owner); |
| 3015 | int cb_up; | 3096 | int cb_up; |
| 3016 | int status = 0, flag = 0; | 3097 | int status = 0; |
| 3017 | 3098 | ||
| 3018 | cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client); | 3099 | cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client); |
| 3019 | flag = NFS4_OPEN_DELEGATE_NONE; | ||
| 3020 | open->op_recall = 0; | 3100 | open->op_recall = 0; |
| 3021 | switch (open->op_claim_type) { | 3101 | switch (open->op_claim_type) { |
| 3022 | case NFS4_OPEN_CLAIM_PREVIOUS: | 3102 | case NFS4_OPEN_CLAIM_PREVIOUS: |
| 3023 | if (!cb_up) | 3103 | if (!cb_up) |
| 3024 | open->op_recall = 1; | 3104 | open->op_recall = 1; |
| 3025 | flag = open->op_delegate_type; | 3105 | if (open->op_delegate_type != NFS4_OPEN_DELEGATE_READ) |
| 3026 | if (flag == NFS4_OPEN_DELEGATE_NONE) | 3106 | goto out_no_deleg; |
| 3027 | goto out; | ||
| 3028 | break; | 3107 | break; |
| 3029 | case NFS4_OPEN_CLAIM_NULL: | 3108 | case NFS4_OPEN_CLAIM_NULL: |
| 3030 | /* Let's not give out any delegations till everyone's | 3109 | /* |
| 3031 | * had the chance to reclaim theirs.... */ | 3110 | * Let's not give out any delegations till everyone's |
| 3111 | * had the chance to reclaim theirs.... | ||
| 3112 | */ | ||
| 3032 | if (locks_in_grace(net)) | 3113 | if (locks_in_grace(net)) |
| 3033 | goto out; | 3114 | goto out_no_deleg; |
| 3034 | if (!cb_up || !(oo->oo_flags & NFS4_OO_CONFIRMED)) | 3115 | if (!cb_up || !(oo->oo_flags & NFS4_OO_CONFIRMED)) |
| 3035 | goto out; | 3116 | goto out_no_deleg; |
| 3117 | /* | ||
| 3118 | * Also, if the file was opened for write or | ||
| 3119 | * create, there's a good chance the client's | ||
| 3120 | * about to write to it, resulting in an | ||
| 3121 | * immediate recall (since we don't support | ||
| 3122 | * write delegations): | ||
| 3123 | */ | ||
| 3036 | if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) | 3124 | if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) |
| 3037 | flag = NFS4_OPEN_DELEGATE_WRITE; | 3125 | goto out_no_deleg; |
| 3038 | else | 3126 | if (open->op_create == NFS4_OPEN_CREATE) |
| 3039 | flag = NFS4_OPEN_DELEGATE_READ; | 3127 | goto out_no_deleg; |
| 3040 | break; | 3128 | break; |
| 3041 | default: | 3129 | default: |
| 3042 | goto out; | 3130 | goto out_no_deleg; |
| 3043 | } | 3131 | } |
| 3044 | 3132 | dp = alloc_init_deleg(oo->oo_owner.so_client, stp, fh); | |
| 3045 | dp = alloc_init_deleg(oo->oo_owner.so_client, stp, fh, flag); | ||
| 3046 | if (dp == NULL) | 3133 | if (dp == NULL) |
| 3047 | goto out_no_deleg; | 3134 | goto out_no_deleg; |
| 3048 | status = nfs4_set_delegation(dp, flag); | 3135 | status = nfs4_set_delegation(dp); |
| 3049 | if (status) | 3136 | if (status) |
| 3050 | goto out_free; | 3137 | goto out_free; |
| 3051 | 3138 | ||
| @@ -3053,24 +3140,23 @@ nfs4_open_delegation(struct net *net, struct svc_fh *fh, | |||
| 3053 | 3140 | ||
| 3054 | dprintk("NFSD: delegation stateid=" STATEID_FMT "\n", | 3141 | dprintk("NFSD: delegation stateid=" STATEID_FMT "\n", |
| 3055 | STATEID_VAL(&dp->dl_stid.sc_stateid)); | 3142 | STATEID_VAL(&dp->dl_stid.sc_stateid)); |
| 3056 | out: | 3143 | open->op_delegate_type = NFS4_OPEN_DELEGATE_READ; |
| 3057 | open->op_delegate_type = flag; | ||
| 3058 | if (flag == NFS4_OPEN_DELEGATE_NONE) { | ||
| 3059 | if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS && | ||
| 3060 | open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE) | ||
| 3061 | dprintk("NFSD: WARNING: refusing delegation reclaim\n"); | ||
| 3062 | |||
| 3063 | /* 4.1 client asking for a delegation? */ | ||
| 3064 | if (open->op_deleg_want) | ||
| 3065 | nfsd4_open_deleg_none_ext(open, status); | ||
| 3066 | } | ||
| 3067 | return; | 3144 | return; |
| 3068 | out_free: | 3145 | out_free: |
| 3069 | unhash_stid(&dp->dl_stid); | 3146 | unhash_stid(&dp->dl_stid); |
| 3070 | nfs4_put_delegation(dp); | 3147 | nfs4_put_delegation(dp); |
| 3071 | out_no_deleg: | 3148 | out_no_deleg: |
| 3072 | flag = NFS4_OPEN_DELEGATE_NONE; | 3149 | open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE; |
| 3073 | goto out; | 3150 | if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS && |
| 3151 | open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE) { | ||
| 3152 | dprintk("NFSD: WARNING: refusing delegation reclaim\n"); | ||
| 3153 | open->op_recall = 1; | ||
| 3154 | } | ||
| 3155 | |||
| 3156 | /* 4.1 client asking for a delegation? */ | ||
| 3157 | if (open->op_deleg_want) | ||
| 3158 | nfsd4_open_deleg_none_ext(open, status); | ||
| 3159 | return; | ||
| 3074 | } | 3160 | } |
| 3075 | 3161 | ||
| 3076 | static void nfsd4_deleg_xgrade_none_ext(struct nfsd4_open *open, | 3162 | static void nfsd4_deleg_xgrade_none_ext(struct nfsd4_open *open, |
| @@ -3427,7 +3513,7 @@ grace_disallows_io(struct net *net, struct inode *inode) | |||
| 3427 | /* Returns true iff a is later than b: */ | 3513 | /* Returns true iff a is later than b: */ |
| 3428 | static bool stateid_generation_after(stateid_t *a, stateid_t *b) | 3514 | static bool stateid_generation_after(stateid_t *a, stateid_t *b) |
| 3429 | { | 3515 | { |
| 3430 | return (s32)a->si_generation - (s32)b->si_generation > 0; | 3516 | return (s32)(a->si_generation - b->si_generation) > 0; |
| 3431 | } | 3517 | } |
| 3432 | 3518 | ||
| 3433 | static __be32 check_stateid_generation(stateid_t *in, stateid_t *ref, bool has_session) | 3519 | static __be32 check_stateid_generation(stateid_t *in, stateid_t *ref, bool has_session) |
| @@ -4435,7 +4521,6 @@ __be32 | |||
| 4435 | nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | 4521 | nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, |
| 4436 | struct nfsd4_locku *locku) | 4522 | struct nfsd4_locku *locku) |
| 4437 | { | 4523 | { |
| 4438 | struct nfs4_lockowner *lo; | ||
| 4439 | struct nfs4_ol_stateid *stp; | 4524 | struct nfs4_ol_stateid *stp; |
| 4440 | struct file *filp = NULL; | 4525 | struct file *filp = NULL; |
| 4441 | struct file_lock *file_lock = NULL; | 4526 | struct file_lock *file_lock = NULL; |
| @@ -4468,10 +4553,9 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
| 4468 | status = nfserr_jukebox; | 4553 | status = nfserr_jukebox; |
| 4469 | goto out; | 4554 | goto out; |
| 4470 | } | 4555 | } |
| 4471 | lo = lockowner(stp->st_stateowner); | ||
| 4472 | locks_init_lock(file_lock); | 4556 | locks_init_lock(file_lock); |
| 4473 | file_lock->fl_type = F_UNLCK; | 4557 | file_lock->fl_type = F_UNLCK; |
| 4474 | file_lock->fl_owner = (fl_owner_t)lo; | 4558 | file_lock->fl_owner = (fl_owner_t)lockowner(stp->st_stateowner); |
| 4475 | file_lock->fl_pid = current->tgid; | 4559 | file_lock->fl_pid = current->tgid; |
| 4476 | file_lock->fl_file = filp; | 4560 | file_lock->fl_file = filp; |
| 4477 | file_lock->fl_flags = FL_POSIX; | 4561 | file_lock->fl_flags = FL_POSIX; |
| @@ -4490,11 +4574,6 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
| 4490 | update_stateid(&stp->st_stid.sc_stateid); | 4574 | update_stateid(&stp->st_stid.sc_stateid); |
| 4491 | memcpy(&locku->lu_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); | 4575 | memcpy(&locku->lu_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); |
| 4492 | 4576 | ||
| 4493 | if (nfsd4_has_session(cstate) && !check_for_locks(stp->st_file, lo)) { | ||
| 4494 | WARN_ON_ONCE(cstate->replay_owner); | ||
| 4495 | release_lockowner(lo); | ||
| 4496 | } | ||
| 4497 | |||
| 4498 | out: | 4577 | out: |
| 4499 | nfsd4_bump_seqid(cstate, status); | 4578 | nfsd4_bump_seqid(cstate, status); |
| 4500 | if (!cstate->replay_owner) | 4579 | if (!cstate->replay_owner) |
| @@ -4520,7 +4599,7 @@ check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner) | |||
| 4520 | struct inode *inode = filp->fi_inode; | 4599 | struct inode *inode = filp->fi_inode; |
| 4521 | int status = 0; | 4600 | int status = 0; |
| 4522 | 4601 | ||
| 4523 | lock_flocks(); | 4602 | spin_lock(&inode->i_lock); |
| 4524 | for (flpp = &inode->i_flock; *flpp != NULL; flpp = &(*flpp)->fl_next) { | 4603 | for (flpp = &inode->i_flock; *flpp != NULL; flpp = &(*flpp)->fl_next) { |
| 4525 | if ((*flpp)->fl_owner == (fl_owner_t)lowner) { | 4604 | if ((*flpp)->fl_owner == (fl_owner_t)lowner) { |
| 4526 | status = 1; | 4605 | status = 1; |
| @@ -4528,7 +4607,7 @@ check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner) | |||
| 4528 | } | 4607 | } |
| 4529 | } | 4608 | } |
| 4530 | out: | 4609 | out: |
| 4531 | unlock_flocks(); | 4610 | spin_unlock(&inode->i_lock); |
| 4532 | return status; | 4611 | return status; |
| 4533 | } | 4612 | } |
| 4534 | 4613 | ||
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 6cd86e0fe450..0c0f3ea90de5 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c | |||
| @@ -55,6 +55,11 @@ | |||
| 55 | #include "cache.h" | 55 | #include "cache.h" |
| 56 | #include "netns.h" | 56 | #include "netns.h" |
| 57 | 57 | ||
| 58 | #ifdef CONFIG_NFSD_V4_SECURITY_LABEL | ||
| 59 | #include <linux/security.h> | ||
| 60 | #endif | ||
| 61 | |||
| 62 | |||
| 58 | #define NFSDDBG_FACILITY NFSDDBG_XDR | 63 | #define NFSDDBG_FACILITY NFSDDBG_XDR |
| 59 | 64 | ||
| 60 | /* | 65 | /* |
| @@ -134,6 +139,19 @@ xdr_error: \ | |||
| 134 | } \ | 139 | } \ |
| 135 | } while (0) | 140 | } while (0) |
| 136 | 141 | ||
| 142 | static void next_decode_page(struct nfsd4_compoundargs *argp) | ||
| 143 | { | ||
| 144 | argp->pagelist++; | ||
| 145 | argp->p = page_address(argp->pagelist[0]); | ||
| 146 | if (argp->pagelen < PAGE_SIZE) { | ||
| 147 | argp->end = argp->p + (argp->pagelen>>2); | ||
| 148 | argp->pagelen = 0; | ||
| 149 | } else { | ||
| 150 | argp->end = argp->p + (PAGE_SIZE>>2); | ||
| 151 | argp->pagelen -= PAGE_SIZE; | ||
| 152 | } | ||
| 153 | } | ||
| 154 | |||
| 137 | static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes) | 155 | static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes) |
| 138 | { | 156 | { |
| 139 | /* We want more bytes than seem to be available. | 157 | /* We want more bytes than seem to be available. |
| @@ -161,16 +179,7 @@ static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes) | |||
| 161 | * guarantee p points to at least nbytes bytes. | 179 | * guarantee p points to at least nbytes bytes. |
| 162 | */ | 180 | */ |
| 163 | memcpy(p, argp->p, avail); | 181 | memcpy(p, argp->p, avail); |
| 164 | /* step to next page */ | 182 | next_decode_page(argp); |
| 165 | argp->p = page_address(argp->pagelist[0]); | ||
| 166 | argp->pagelist++; | ||
| 167 | if (argp->pagelen < PAGE_SIZE) { | ||
| 168 | argp->end = argp->p + (argp->pagelen>>2); | ||
| 169 | argp->pagelen = 0; | ||
| 170 | } else { | ||
| 171 | argp->end = argp->p + (PAGE_SIZE>>2); | ||
| 172 | argp->pagelen -= PAGE_SIZE; | ||
| 173 | } | ||
| 174 | memcpy(((char*)p)+avail, argp->p, (nbytes - avail)); | 183 | memcpy(((char*)p)+avail, argp->p, (nbytes - avail)); |
| 175 | argp->p += XDR_QUADLEN(nbytes - avail); | 184 | argp->p += XDR_QUADLEN(nbytes - avail); |
| 176 | return p; | 185 | return p; |
| @@ -242,7 +251,8 @@ nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval) | |||
| 242 | 251 | ||
| 243 | static __be32 | 252 | static __be32 |
| 244 | nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, | 253 | nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, |
| 245 | struct iattr *iattr, struct nfs4_acl **acl) | 254 | struct iattr *iattr, struct nfs4_acl **acl, |
| 255 | struct xdr_netobj *label) | ||
| 246 | { | 256 | { |
| 247 | int expected_len, len = 0; | 257 | int expected_len, len = 0; |
| 248 | u32 dummy32; | 258 | u32 dummy32; |
| @@ -380,6 +390,32 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, | |||
| 380 | goto xdr_error; | 390 | goto xdr_error; |
| 381 | } | 391 | } |
| 382 | } | 392 | } |
| 393 | |||
| 394 | label->len = 0; | ||
| 395 | #ifdef CONFIG_NFSD_V4_SECURITY_LABEL | ||
| 396 | if (bmval[2] & FATTR4_WORD2_SECURITY_LABEL) { | ||
| 397 | READ_BUF(4); | ||
| 398 | len += 4; | ||
| 399 | READ32(dummy32); /* lfs: we don't use it */ | ||
| 400 | READ_BUF(4); | ||
| 401 | len += 4; | ||
| 402 | READ32(dummy32); /* pi: we don't use it either */ | ||
| 403 | READ_BUF(4); | ||
| 404 | len += 4; | ||
| 405 | READ32(dummy32); | ||
| 406 | READ_BUF(dummy32); | ||
| 407 | if (dummy32 > NFSD4_MAX_SEC_LABEL_LEN) | ||
| 408 | return nfserr_badlabel; | ||
| 409 | len += (XDR_QUADLEN(dummy32) << 2); | ||
| 410 | READMEM(buf, dummy32); | ||
| 411 | label->data = kzalloc(dummy32 + 1, GFP_KERNEL); | ||
| 412 | if (!label->data) | ||
| 413 | return nfserr_jukebox; | ||
| 414 | defer_free(argp, kfree, label->data); | ||
| 415 | memcpy(label->data, buf, dummy32); | ||
| 416 | } | ||
| 417 | #endif | ||
| 418 | |||
| 383 | if (bmval[0] & ~NFSD_WRITEABLE_ATTRS_WORD0 | 419 | if (bmval[0] & ~NFSD_WRITEABLE_ATTRS_WORD0 |
| 384 | || bmval[1] & ~NFSD_WRITEABLE_ATTRS_WORD1 | 420 | || bmval[1] & ~NFSD_WRITEABLE_ATTRS_WORD1 |
| 385 | || bmval[2] & ~NFSD_WRITEABLE_ATTRS_WORD2) | 421 | || bmval[2] & ~NFSD_WRITEABLE_ATTRS_WORD2) |
| @@ -428,7 +464,11 @@ static __be32 nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_ | |||
| 428 | /* callback_sec_params4 */ | 464 | /* callback_sec_params4 */ |
| 429 | READ_BUF(4); | 465 | READ_BUF(4); |
| 430 | READ32(nr_secflavs); | 466 | READ32(nr_secflavs); |
| 431 | cbs->flavor = (u32)(-1); | 467 | if (nr_secflavs) |
| 468 | cbs->flavor = (u32)(-1); | ||
| 469 | else | ||
| 470 | /* Is this legal? Be generous, take it to mean AUTH_NONE: */ | ||
| 471 | cbs->flavor = 0; | ||
| 432 | for (i = 0; i < nr_secflavs; ++i) { | 472 | for (i = 0; i < nr_secflavs; ++i) { |
| 433 | READ_BUF(4); | 473 | READ_BUF(4); |
| 434 | READ32(dummy); | 474 | READ32(dummy); |
| @@ -576,7 +616,7 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create | |||
| 576 | return status; | 616 | return status; |
| 577 | 617 | ||
| 578 | status = nfsd4_decode_fattr(argp, create->cr_bmval, &create->cr_iattr, | 618 | status = nfsd4_decode_fattr(argp, create->cr_bmval, &create->cr_iattr, |
| 579 | &create->cr_acl); | 619 | &create->cr_acl, &create->cr_label); |
| 580 | if (status) | 620 | if (status) |
| 581 | goto out; | 621 | goto out; |
| 582 | 622 | ||
| @@ -827,7 +867,7 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) | |||
| 827 | case NFS4_CREATE_UNCHECKED: | 867 | case NFS4_CREATE_UNCHECKED: |
| 828 | case NFS4_CREATE_GUARDED: | 868 | case NFS4_CREATE_GUARDED: |
| 829 | status = nfsd4_decode_fattr(argp, open->op_bmval, | 869 | status = nfsd4_decode_fattr(argp, open->op_bmval, |
| 830 | &open->op_iattr, &open->op_acl); | 870 | &open->op_iattr, &open->op_acl, &open->op_label); |
| 831 | if (status) | 871 | if (status) |
| 832 | goto out; | 872 | goto out; |
| 833 | break; | 873 | break; |
| @@ -841,7 +881,7 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) | |||
| 841 | READ_BUF(NFS4_VERIFIER_SIZE); | 881 | READ_BUF(NFS4_VERIFIER_SIZE); |
| 842 | COPYMEM(open->op_verf.data, NFS4_VERIFIER_SIZE); | 882 | COPYMEM(open->op_verf.data, NFS4_VERIFIER_SIZE); |
| 843 | status = nfsd4_decode_fattr(argp, open->op_bmval, | 883 | status = nfsd4_decode_fattr(argp, open->op_bmval, |
| 844 | &open->op_iattr, &open->op_acl); | 884 | &open->op_iattr, &open->op_acl, &open->op_label); |
| 845 | if (status) | 885 | if (status) |
| 846 | goto out; | 886 | goto out; |
| 847 | break; | 887 | break; |
| @@ -1063,7 +1103,7 @@ nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *seta | |||
| 1063 | if (status) | 1103 | if (status) |
| 1064 | return status; | 1104 | return status; |
| 1065 | return nfsd4_decode_fattr(argp, setattr->sa_bmval, &setattr->sa_iattr, | 1105 | return nfsd4_decode_fattr(argp, setattr->sa_bmval, &setattr->sa_iattr, |
| 1066 | &setattr->sa_acl); | 1106 | &setattr->sa_acl, &setattr->sa_label); |
| 1067 | } | 1107 | } |
| 1068 | 1108 | ||
| 1069 | static __be32 | 1109 | static __be32 |
| @@ -1567,6 +1607,7 @@ struct nfsd4_minorversion_ops { | |||
| 1567 | static struct nfsd4_minorversion_ops nfsd4_minorversion[] = { | 1607 | static struct nfsd4_minorversion_ops nfsd4_minorversion[] = { |
| 1568 | [0] = { nfsd4_dec_ops, ARRAY_SIZE(nfsd4_dec_ops) }, | 1608 | [0] = { nfsd4_dec_ops, ARRAY_SIZE(nfsd4_dec_ops) }, |
| 1569 | [1] = { nfsd41_dec_ops, ARRAY_SIZE(nfsd41_dec_ops) }, | 1609 | [1] = { nfsd41_dec_ops, ARRAY_SIZE(nfsd41_dec_ops) }, |
| 1610 | [2] = { nfsd41_dec_ops, ARRAY_SIZE(nfsd41_dec_ops) }, | ||
| 1570 | }; | 1611 | }; |
| 1571 | 1612 | ||
| 1572 | static __be32 | 1613 | static __be32 |
| @@ -1953,6 +1994,36 @@ nfsd4_encode_aclname(struct svc_rqst *rqstp, struct nfs4_ace *ace, | |||
| 1953 | FATTR4_WORD0_RDATTR_ERROR) | 1994 | FATTR4_WORD0_RDATTR_ERROR) |
| 1954 | #define WORD1_ABSENT_FS_ATTRS FATTR4_WORD1_MOUNTED_ON_FILEID | 1995 | #define WORD1_ABSENT_FS_ATTRS FATTR4_WORD1_MOUNTED_ON_FILEID |
| 1955 | 1996 | ||
| 1997 | #ifdef CONFIG_NFSD_V4_SECURITY_LABEL | ||
| 1998 | static inline __be32 | ||
| 1999 | nfsd4_encode_security_label(struct svc_rqst *rqstp, void *context, int len, __be32 **pp, int *buflen) | ||
| 2000 | { | ||
| 2001 | __be32 *p = *pp; | ||
| 2002 | |||
| 2003 | if (*buflen < ((XDR_QUADLEN(len) << 2) + 4 + 4 + 4)) | ||
| 2004 | return nfserr_resource; | ||
| 2005 | |||
| 2006 | /* | ||
| 2007 | * For now we use a 0 here to indicate the null translation; in | ||
| 2008 | * the future we may place a call to translation code here. | ||
| 2009 | */ | ||
| 2010 | if ((*buflen -= 8) < 0) | ||
| 2011 | return nfserr_resource; | ||
| 2012 | |||
| 2013 | WRITE32(0); /* lfs */ | ||
| 2014 | WRITE32(0); /* pi */ | ||
| 2015 | p = xdr_encode_opaque(p, context, len); | ||
| 2016 | *buflen -= (XDR_QUADLEN(len) << 2) + 4; | ||
| 2017 | |||
| 2018 | *pp = p; | ||
| 2019 | return 0; | ||
| 2020 | } | ||
| 2021 | #else | ||
| 2022 | static inline __be32 | ||
| 2023 | nfsd4_encode_security_label(struct svc_rqst *rqstp, void *context, int len, __be32 **pp, int *buflen) | ||
| 2024 | { return 0; } | ||
| 2025 | #endif | ||
| 2026 | |||
| 1956 | static __be32 fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *rdattr_err) | 2027 | static __be32 fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *rdattr_err) |
| 1957 | { | 2028 | { |
| 1958 | /* As per referral draft: */ | 2029 | /* As per referral draft: */ |
| @@ -2012,6 +2083,9 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, | |||
| 2012 | int err; | 2083 | int err; |
| 2013 | int aclsupport = 0; | 2084 | int aclsupport = 0; |
| 2014 | struct nfs4_acl *acl = NULL; | 2085 | struct nfs4_acl *acl = NULL; |
| 2086 | void *context = NULL; | ||
| 2087 | int contextlen; | ||
| 2088 | bool contextsupport = false; | ||
| 2015 | struct nfsd4_compoundres *resp = rqstp->rq_resp; | 2089 | struct nfsd4_compoundres *resp = rqstp->rq_resp; |
| 2016 | u32 minorversion = resp->cstate.minorversion; | 2090 | u32 minorversion = resp->cstate.minorversion; |
| 2017 | struct path path = { | 2091 | struct path path = { |
| @@ -2065,6 +2139,21 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, | |||
| 2065 | } | 2139 | } |
| 2066 | } | 2140 | } |
| 2067 | 2141 | ||
| 2142 | #ifdef CONFIG_NFSD_V4_SECURITY_LABEL | ||
| 2143 | if ((bmval[2] & FATTR4_WORD2_SECURITY_LABEL) || | ||
| 2144 | bmval[0] & FATTR4_WORD0_SUPPORTED_ATTRS) { | ||
| 2145 | err = security_inode_getsecctx(dentry->d_inode, | ||
| 2146 | &context, &contextlen); | ||
| 2147 | contextsupport = (err == 0); | ||
| 2148 | if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) { | ||
| 2149 | if (err == -EOPNOTSUPP) | ||
| 2150 | bmval2 &= ~FATTR4_WORD2_SECURITY_LABEL; | ||
| 2151 | else if (err) | ||
| 2152 | goto out_nfserr; | ||
| 2153 | } | ||
| 2154 | } | ||
| 2155 | #endif /* CONFIG_NFSD_V4_SECURITY_LABEL */ | ||
| 2156 | |||
| 2068 | if (bmval2) { | 2157 | if (bmval2) { |
| 2069 | if ((buflen -= 16) < 0) | 2158 | if ((buflen -= 16) < 0) |
| 2070 | goto out_resource; | 2159 | goto out_resource; |
| @@ -2093,6 +2182,8 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, | |||
| 2093 | 2182 | ||
| 2094 | if (!aclsupport) | 2183 | if (!aclsupport) |
| 2095 | word0 &= ~FATTR4_WORD0_ACL; | 2184 | word0 &= ~FATTR4_WORD0_ACL; |
| 2185 | if (!contextsupport) | ||
| 2186 | word2 &= ~FATTR4_WORD2_SECURITY_LABEL; | ||
| 2096 | if (!word2) { | 2187 | if (!word2) { |
| 2097 | if ((buflen -= 12) < 0) | 2188 | if ((buflen -= 12) < 0) |
| 2098 | goto out_resource; | 2189 | goto out_resource; |
| @@ -2400,6 +2491,12 @@ out_acl: | |||
| 2400 | get_parent_attributes(exp, &stat); | 2491 | get_parent_attributes(exp, &stat); |
| 2401 | WRITE64(stat.ino); | 2492 | WRITE64(stat.ino); |
| 2402 | } | 2493 | } |
| 2494 | if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) { | ||
| 2495 | status = nfsd4_encode_security_label(rqstp, context, | ||
| 2496 | contextlen, &p, &buflen); | ||
| 2497 | if (status) | ||
| 2498 | goto out; | ||
| 2499 | } | ||
| 2403 | if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) { | 2500 | if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) { |
| 2404 | WRITE32(3); | 2501 | WRITE32(3); |
| 2405 | WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD0); | 2502 | WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD0); |
| @@ -2412,6 +2509,10 @@ out_acl: | |||
| 2412 | status = nfs_ok; | 2509 | status = nfs_ok; |
| 2413 | 2510 | ||
| 2414 | out: | 2511 | out: |
| 2512 | #ifdef CONFIG_NFSD_V4_SECURITY_LABEL | ||
| 2513 | if (context) | ||
| 2514 | security_release_secctx(context, contextlen); | ||
| 2515 | #endif /* CONFIG_NFSD_V4_SECURITY_LABEL */ | ||
| 2415 | kfree(acl); | 2516 | kfree(acl); |
| 2416 | if (fhp == &tempfh) | 2517 | if (fhp == &tempfh) |
| 2417 | fh_put(&tempfh); | 2518 | fh_put(&tempfh); |
| @@ -3176,16 +3277,18 @@ nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 | |||
| 3176 | { | 3277 | { |
| 3177 | __be32 *p; | 3278 | __be32 *p; |
| 3178 | 3279 | ||
| 3179 | RESERVE_SPACE(12); | 3280 | RESERVE_SPACE(16); |
| 3180 | if (nfserr) { | 3281 | if (nfserr) { |
| 3181 | WRITE32(2); | 3282 | WRITE32(3); |
| 3283 | WRITE32(0); | ||
| 3182 | WRITE32(0); | 3284 | WRITE32(0); |
| 3183 | WRITE32(0); | 3285 | WRITE32(0); |
| 3184 | } | 3286 | } |
| 3185 | else { | 3287 | else { |
| 3186 | WRITE32(2); | 3288 | WRITE32(3); |
| 3187 | WRITE32(setattr->sa_bmval[0]); | 3289 | WRITE32(setattr->sa_bmval[0]); |
| 3188 | WRITE32(setattr->sa_bmval[1]); | 3290 | WRITE32(setattr->sa_bmval[1]); |
| 3291 | WRITE32(setattr->sa_bmval[2]); | ||
| 3189 | } | 3292 | } |
| 3190 | ADJUST_ARGS(); | 3293 | ADJUST_ARGS(); |
| 3191 | return nfserr; | 3294 | return nfserr; |
| @@ -3226,6 +3329,14 @@ nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_w | |||
| 3226 | return nfserr; | 3329 | return nfserr; |
| 3227 | } | 3330 | } |
| 3228 | 3331 | ||
| 3332 | static const u32 nfs4_minimal_spo_must_enforce[2] = { | ||
| 3333 | [1] = 1 << (OP_BIND_CONN_TO_SESSION - 32) | | ||
| 3334 | 1 << (OP_EXCHANGE_ID - 32) | | ||
| 3335 | 1 << (OP_CREATE_SESSION - 32) | | ||
| 3336 | 1 << (OP_DESTROY_SESSION - 32) | | ||
| 3337 | 1 << (OP_DESTROY_CLIENTID - 32) | ||
| 3338 | }; | ||
| 3339 | |||
| 3229 | static __be32 | 3340 | static __be32 |
| 3230 | nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, | 3341 | nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, |
| 3231 | struct nfsd4_exchange_id *exid) | 3342 | struct nfsd4_exchange_id *exid) |
| @@ -3264,6 +3375,20 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, | |||
| 3264 | /* state_protect4_r. Currently only support SP4_NONE */ | 3375 | /* state_protect4_r. Currently only support SP4_NONE */ |
| 3265 | BUG_ON(exid->spa_how != SP4_NONE); | 3376 | BUG_ON(exid->spa_how != SP4_NONE); |
| 3266 | WRITE32(exid->spa_how); | 3377 | WRITE32(exid->spa_how); |
| 3378 | switch (exid->spa_how) { | ||
| 3379 | case SP4_NONE: | ||
| 3380 | break; | ||
| 3381 | case SP4_MACH_CRED: | ||
| 3382 | /* spo_must_enforce bitmap: */ | ||
| 3383 | WRITE32(2); | ||
| 3384 | WRITE32(nfs4_minimal_spo_must_enforce[0]); | ||
| 3385 | WRITE32(nfs4_minimal_spo_must_enforce[1]); | ||
| 3386 | /* empty spo_must_allow bitmap: */ | ||
| 3387 | WRITE32(0); | ||
| 3388 | break; | ||
| 3389 | default: | ||
| 3390 | WARN_ON_ONCE(1); | ||
| 3391 | } | ||
| 3267 | 3392 | ||
| 3268 | /* The server_owner struct */ | 3393 | /* The server_owner struct */ |
| 3269 | WRITE64(minor_id); /* Minor id */ | 3394 | WRITE64(minor_id); /* Minor id */ |
| @@ -3635,13 +3760,17 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo | |||
| 3635 | iov->iov_len = ((char*)resp->p) - (char*)iov->iov_base; | 3760 | iov->iov_len = ((char*)resp->p) - (char*)iov->iov_base; |
| 3636 | BUG_ON(iov->iov_len > PAGE_SIZE); | 3761 | BUG_ON(iov->iov_len > PAGE_SIZE); |
| 3637 | if (nfsd4_has_session(cs)) { | 3762 | if (nfsd4_has_session(cs)) { |
| 3763 | struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); | ||
| 3764 | struct nfs4_client *clp = cs->session->se_client; | ||
| 3638 | if (cs->status != nfserr_replay_cache) { | 3765 | if (cs->status != nfserr_replay_cache) { |
| 3639 | nfsd4_store_cache_entry(resp); | 3766 | nfsd4_store_cache_entry(resp); |
| 3640 | cs->slot->sl_flags &= ~NFSD4_SLOT_INUSE; | 3767 | cs->slot->sl_flags &= ~NFSD4_SLOT_INUSE; |
| 3641 | } | 3768 | } |
| 3642 | /* Renew the clientid on success and on replay */ | 3769 | /* Renew the clientid on success and on replay */ |
| 3643 | put_client_renew(cs->session->se_client); | 3770 | spin_lock(&nn->client_lock); |
| 3644 | nfsd4_put_session(cs->session); | 3771 | nfsd4_put_session(cs->session); |
| 3772 | spin_unlock(&nn->client_lock); | ||
| 3773 | put_client_renew(clp); | ||
| 3645 | } | 3774 | } |
| 3646 | return 1; | 3775 | return 1; |
| 3647 | } | 3776 | } |
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 07a473fd49bc..2bbd94e51efc 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h | |||
| @@ -24,7 +24,7 @@ | |||
| 24 | /* | 24 | /* |
| 25 | * nfsd version | 25 | * nfsd version |
| 26 | */ | 26 | */ |
| 27 | #define NFSD_SUPPORTED_MINOR_VERSION 1 | 27 | #define NFSD_SUPPORTED_MINOR_VERSION 2 |
| 28 | /* | 28 | /* |
| 29 | * Maximum blocksizes supported by daemon under various circumstances. | 29 | * Maximum blocksizes supported by daemon under various circumstances. |
| 30 | */ | 30 | */ |
| @@ -243,6 +243,12 @@ void nfsd_lockd_shutdown(void); | |||
| 243 | #define nfserr_reject_deleg cpu_to_be32(NFS4ERR_REJECT_DELEG) | 243 | #define nfserr_reject_deleg cpu_to_be32(NFS4ERR_REJECT_DELEG) |
| 244 | #define nfserr_returnconflict cpu_to_be32(NFS4ERR_RETURNCONFLICT) | 244 | #define nfserr_returnconflict cpu_to_be32(NFS4ERR_RETURNCONFLICT) |
| 245 | #define nfserr_deleg_revoked cpu_to_be32(NFS4ERR_DELEG_REVOKED) | 245 | #define nfserr_deleg_revoked cpu_to_be32(NFS4ERR_DELEG_REVOKED) |
| 246 | #define nfserr_partner_notsupp cpu_to_be32(NFS4ERR_PARTNER_NOTSUPP) | ||
| 247 | #define nfserr_partner_no_auth cpu_to_be32(NFS4ERR_PARTNER_NO_AUTH) | ||
| 248 | #define nfserr_metadata_notsupp cpu_to_be32(NFS4ERR_METADATA_NOTSUPP) | ||
| 249 | #define nfserr_offload_denied cpu_to_be32(NFS4ERR_OFFLOAD_DENIED) | ||
| 250 | #define nfserr_wrong_lfs cpu_to_be32(NFS4ERR_WRONG_LFS) | ||
| 251 | #define nfserr_badlabel cpu_to_be32(NFS4ERR_BADLABEL) | ||
| 246 | 252 | ||
| 247 | /* error codes for internal use */ | 253 | /* error codes for internal use */ |
| 248 | /* if a request fails due to kmalloc failure, it gets dropped. | 254 | /* if a request fails due to kmalloc failure, it gets dropped. |
| @@ -322,6 +328,13 @@ void nfsd_lockd_shutdown(void); | |||
| 322 | #define NFSD4_1_SUPPORTED_ATTRS_WORD2 \ | 328 | #define NFSD4_1_SUPPORTED_ATTRS_WORD2 \ |
| 323 | (NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT) | 329 | (NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT) |
| 324 | 330 | ||
| 331 | #ifdef CONFIG_NFSD_V4_SECURITY_LABEL | ||
| 332 | #define NFSD4_2_SUPPORTED_ATTRS_WORD2 \ | ||
| 333 | (NFSD4_1_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SECURITY_LABEL) | ||
| 334 | #else | ||
| 335 | #define NFSD4_2_SUPPORTED_ATTRS_WORD2 0 | ||
| 336 | #endif | ||
| 337 | |||
| 325 | static inline u32 nfsd_suppattrs0(u32 minorversion) | 338 | static inline u32 nfsd_suppattrs0(u32 minorversion) |
| 326 | { | 339 | { |
| 327 | return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD0 | 340 | return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD0 |
| @@ -336,8 +349,11 @@ static inline u32 nfsd_suppattrs1(u32 minorversion) | |||
| 336 | 349 | ||
| 337 | static inline u32 nfsd_suppattrs2(u32 minorversion) | 350 | static inline u32 nfsd_suppattrs2(u32 minorversion) |
| 338 | { | 351 | { |
| 339 | return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD2 | 352 | switch (minorversion) { |
| 340 | : NFSD4_SUPPORTED_ATTRS_WORD2; | 353 | default: return NFSD4_2_SUPPORTED_ATTRS_WORD2; |
| 354 | case 1: return NFSD4_1_SUPPORTED_ATTRS_WORD2; | ||
| 355 | case 0: return NFSD4_SUPPORTED_ATTRS_WORD2; | ||
| 356 | } | ||
| 341 | } | 357 | } |
| 342 | 358 | ||
| 343 | /* These will return ERR_INVAL if specified in GETATTR or READDIR. */ | 359 | /* These will return ERR_INVAL if specified in GETATTR or READDIR. */ |
| @@ -350,7 +366,11 @@ static inline u32 nfsd_suppattrs2(u32 minorversion) | |||
| 350 | #define NFSD_WRITEABLE_ATTRS_WORD1 \ | 366 | #define NFSD_WRITEABLE_ATTRS_WORD1 \ |
| 351 | (FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \ | 367 | (FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \ |
| 352 | | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET) | 368 | | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET) |
| 369 | #ifdef CONFIG_NFSD_V4_SECURITY_LABEL | ||
| 370 | #define NFSD_WRITEABLE_ATTRS_WORD2 FATTR4_WORD2_SECURITY_LABEL | ||
| 371 | #else | ||
| 353 | #define NFSD_WRITEABLE_ATTRS_WORD2 0 | 372 | #define NFSD_WRITEABLE_ATTRS_WORD2 0 |
| 373 | #endif | ||
| 354 | 374 | ||
| 355 | #define NFSD_SUPPATTR_EXCLCREAT_WORD0 \ | 375 | #define NFSD_SUPPATTR_EXCLCREAT_WORD0 \ |
| 356 | NFSD_WRITEABLE_ATTRS_WORD0 | 376 | NFSD_WRITEABLE_ATTRS_WORD0 |
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 262df5ccbf59..6b9f48ca4c25 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c | |||
| @@ -116,7 +116,7 @@ struct svc_program nfsd_program = { | |||
| 116 | 116 | ||
| 117 | }; | 117 | }; |
| 118 | 118 | ||
| 119 | u32 nfsd_supported_minorversion; | 119 | u32 nfsd_supported_minorversion = 1; |
| 120 | 120 | ||
| 121 | int nfsd_vers(int vers, enum vers_op change) | 121 | int nfsd_vers(int vers, enum vers_op change) |
| 122 | { | 122 | { |
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 274e2a114e05..424d8f5f2317 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h | |||
| @@ -246,6 +246,7 @@ struct nfs4_client { | |||
| 246 | nfs4_verifier cl_verifier; /* generated by client */ | 246 | nfs4_verifier cl_verifier; /* generated by client */ |
| 247 | time_t cl_time; /* time of last lease renewal */ | 247 | time_t cl_time; /* time of last lease renewal */ |
| 248 | struct sockaddr_storage cl_addr; /* client ipaddress */ | 248 | struct sockaddr_storage cl_addr; /* client ipaddress */ |
| 249 | bool cl_mach_cred; /* SP4_MACH_CRED in force */ | ||
| 249 | struct svc_cred cl_cred; /* setclientid principal */ | 250 | struct svc_cred cl_cred; /* setclientid principal */ |
| 250 | clientid_t cl_clientid; /* generated by server */ | 251 | clientid_t cl_clientid; /* generated by server */ |
| 251 | nfs4_verifier cl_confirm; /* generated by server */ | 252 | nfs4_verifier cl_confirm; /* generated by server */ |
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index a6bc8a7423db..8ff6a0019b0b 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c | |||
| @@ -28,6 +28,7 @@ | |||
| 28 | #include <asm/uaccess.h> | 28 | #include <asm/uaccess.h> |
| 29 | #include <linux/exportfs.h> | 29 | #include <linux/exportfs.h> |
| 30 | #include <linux/writeback.h> | 30 | #include <linux/writeback.h> |
| 31 | #include <linux/security.h> | ||
| 31 | 32 | ||
| 32 | #ifdef CONFIG_NFSD_V3 | 33 | #ifdef CONFIG_NFSD_V3 |
| 33 | #include "xdr3.h" | 34 | #include "xdr3.h" |
| @@ -621,6 +622,33 @@ int nfsd4_is_junction(struct dentry *dentry) | |||
| 621 | return 0; | 622 | return 0; |
| 622 | return 1; | 623 | return 1; |
| 623 | } | 624 | } |
| 625 | #ifdef CONFIG_NFSD_V4_SECURITY_LABEL | ||
| 626 | __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp, | ||
| 627 | struct xdr_netobj *label) | ||
| 628 | { | ||
| 629 | __be32 error; | ||
| 630 | int host_error; | ||
| 631 | struct dentry *dentry; | ||
| 632 | |||
| 633 | error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, NFSD_MAY_SATTR); | ||
| 634 | if (error) | ||
| 635 | return error; | ||
| 636 | |||
| 637 | dentry = fhp->fh_dentry; | ||
| 638 | |||
| 639 | mutex_lock(&dentry->d_inode->i_mutex); | ||
| 640 | host_error = security_inode_setsecctx(dentry, label->data, label->len); | ||
| 641 | mutex_unlock(&dentry->d_inode->i_mutex); | ||
| 642 | return nfserrno(host_error); | ||
| 643 | } | ||
| 644 | #else | ||
| 645 | __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp, | ||
| 646 | struct xdr_netobj *label) | ||
| 647 | { | ||
| 648 | return nfserr_notsupp; | ||
| 649 | } | ||
| 650 | #endif | ||
| 651 | |||
| 624 | #endif /* defined(CONFIG_NFSD_V4) */ | 652 | #endif /* defined(CONFIG_NFSD_V4) */ |
| 625 | 653 | ||
| 626 | #ifdef CONFIG_NFSD_V3 | 654 | #ifdef CONFIG_NFSD_V3 |
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 5b5894159f22..a4be2e389670 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h | |||
| @@ -39,7 +39,6 @@ | |||
| 39 | typedef int (*nfsd_dirop_t)(struct inode *, struct dentry *, int, int); | 39 | typedef int (*nfsd_dirop_t)(struct inode *, struct dentry *, int, int); |
| 40 | 40 | ||
| 41 | /* nfsd/vfs.c */ | 41 | /* nfsd/vfs.c */ |
| 42 | int fh_lock_parent(struct svc_fh *, struct dentry *); | ||
| 43 | int nfsd_racache_init(int); | 42 | int nfsd_racache_init(int); |
| 44 | void nfsd_racache_shutdown(void); | 43 | void nfsd_racache_shutdown(void); |
| 45 | int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, | 44 | int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, |
| @@ -56,6 +55,8 @@ int nfsd_mountpoint(struct dentry *, struct svc_export *); | |||
| 56 | __be32 nfsd4_set_nfs4_acl(struct svc_rqst *, struct svc_fh *, | 55 | __be32 nfsd4_set_nfs4_acl(struct svc_rqst *, struct svc_fh *, |
| 57 | struct nfs4_acl *); | 56 | struct nfs4_acl *); |
| 58 | int nfsd4_get_nfs4_acl(struct svc_rqst *, struct dentry *, struct nfs4_acl **); | 57 | int nfsd4_get_nfs4_acl(struct svc_rqst *, struct dentry *, struct nfs4_acl **); |
| 58 | __be32 nfsd4_set_nfs4_label(struct svc_rqst *, struct svc_fh *, | ||
| 59 | struct xdr_netobj *); | ||
| 59 | #endif /* CONFIG_NFSD_V4 */ | 60 | #endif /* CONFIG_NFSD_V4 */ |
| 60 | __be32 nfsd_create(struct svc_rqst *, struct svc_fh *, | 61 | __be32 nfsd_create(struct svc_rqst *, struct svc_fh *, |
| 61 | char *name, int len, struct iattr *attrs, | 62 | char *name, int len, struct iattr *attrs, |
| @@ -92,17 +93,13 @@ __be32 nfsd_remove(struct svc_rqst *, | |||
| 92 | struct svc_fh *, char *, int); | 93 | struct svc_fh *, char *, int); |
| 93 | __be32 nfsd_unlink(struct svc_rqst *, struct svc_fh *, int type, | 94 | __be32 nfsd_unlink(struct svc_rqst *, struct svc_fh *, int type, |
| 94 | char *name, int len); | 95 | char *name, int len); |
| 95 | int nfsd_truncate(struct svc_rqst *, struct svc_fh *, | ||
| 96 | unsigned long size); | ||
| 97 | __be32 nfsd_readdir(struct svc_rqst *, struct svc_fh *, | 96 | __be32 nfsd_readdir(struct svc_rqst *, struct svc_fh *, |
| 98 | loff_t *, struct readdir_cd *, filldir_t); | 97 | loff_t *, struct readdir_cd *, filldir_t); |
| 99 | __be32 nfsd_statfs(struct svc_rqst *, struct svc_fh *, | 98 | __be32 nfsd_statfs(struct svc_rqst *, struct svc_fh *, |
| 100 | struct kstatfs *, int access); | 99 | struct kstatfs *, int access); |
| 101 | 100 | ||
| 102 | int nfsd_notify_change(struct inode *, struct iattr *); | ||
| 103 | __be32 nfsd_permission(struct svc_rqst *, struct svc_export *, | 101 | __be32 nfsd_permission(struct svc_rqst *, struct svc_export *, |
| 104 | struct dentry *, int); | 102 | struct dentry *, int); |
| 105 | int nfsd_sync_dir(struct dentry *dp); | ||
| 106 | 103 | ||
| 107 | #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) | 104 | #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) |
| 108 | struct posix_acl *nfsd_get_posix_acl(struct svc_fh *, int); | 105 | struct posix_acl *nfsd_get_posix_acl(struct svc_fh *, int); |
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 3b271d2092b6..b3ed6446ed8e 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h | |||
| @@ -40,6 +40,7 @@ | |||
| 40 | #include "state.h" | 40 | #include "state.h" |
| 41 | #include "nfsd.h" | 41 | #include "nfsd.h" |
| 42 | 42 | ||
| 43 | #define NFSD4_MAX_SEC_LABEL_LEN 2048 | ||
| 43 | #define NFSD4_MAX_TAGLEN 128 | 44 | #define NFSD4_MAX_TAGLEN 128 |
| 44 | #define XDR_LEN(n) (((n) + 3) & ~3) | 45 | #define XDR_LEN(n) (((n) + 3) & ~3) |
| 45 | 46 | ||
| @@ -118,6 +119,7 @@ struct nfsd4_create { | |||
| 118 | struct iattr cr_iattr; /* request */ | 119 | struct iattr cr_iattr; /* request */ |
| 119 | struct nfsd4_change_info cr_cinfo; /* response */ | 120 | struct nfsd4_change_info cr_cinfo; /* response */ |
| 120 | struct nfs4_acl *cr_acl; | 121 | struct nfs4_acl *cr_acl; |
| 122 | struct xdr_netobj cr_label; | ||
| 121 | }; | 123 | }; |
| 122 | #define cr_linklen u.link.namelen | 124 | #define cr_linklen u.link.namelen |
| 123 | #define cr_linkname u.link.name | 125 | #define cr_linkname u.link.name |
| @@ -246,6 +248,7 @@ struct nfsd4_open { | |||
| 246 | struct nfs4_file *op_file; /* used during processing */ | 248 | struct nfs4_file *op_file; /* used during processing */ |
| 247 | struct nfs4_ol_stateid *op_stp; /* used during processing */ | 249 | struct nfs4_ol_stateid *op_stp; /* used during processing */ |
| 248 | struct nfs4_acl *op_acl; | 250 | struct nfs4_acl *op_acl; |
| 251 | struct xdr_netobj op_label; | ||
| 249 | }; | 252 | }; |
| 250 | #define op_iattr iattr | 253 | #define op_iattr iattr |
| 251 | 254 | ||
| @@ -330,6 +333,7 @@ struct nfsd4_setattr { | |||
| 330 | u32 sa_bmval[3]; /* request */ | 333 | u32 sa_bmval[3]; /* request */ |
| 331 | struct iattr sa_iattr; /* request */ | 334 | struct iattr sa_iattr; /* request */ |
| 332 | struct nfs4_acl *sa_acl; | 335 | struct nfs4_acl *sa_acl; |
| 336 | struct xdr_netobj sa_label; | ||
| 333 | }; | 337 | }; |
| 334 | 338 | ||
| 335 | struct nfsd4_setclientid { | 339 | struct nfsd4_setclientid { |
diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c index eed4d7b26249..741fd02e0444 100644 --- a/fs/nilfs2/alloc.c +++ b/fs/nilfs2/alloc.c | |||
| @@ -398,6 +398,69 @@ nilfs_palloc_rest_groups_in_desc_block(const struct inode *inode, | |||
| 398 | } | 398 | } |
| 399 | 399 | ||
| 400 | /** | 400 | /** |
| 401 | * nilfs_palloc_count_desc_blocks - count descriptor blocks number | ||
| 402 | * @inode: inode of metadata file using this allocator | ||
| 403 | * @desc_blocks: descriptor blocks number [out] | ||
| 404 | */ | ||
| 405 | static int nilfs_palloc_count_desc_blocks(struct inode *inode, | ||
| 406 | unsigned long *desc_blocks) | ||
| 407 | { | ||
| 408 | unsigned long blknum; | ||
| 409 | int ret; | ||
| 410 | |||
| 411 | ret = nilfs_bmap_last_key(NILFS_I(inode)->i_bmap, &blknum); | ||
| 412 | if (likely(!ret)) | ||
| 413 | *desc_blocks = DIV_ROUND_UP( | ||
| 414 | blknum, NILFS_MDT(inode)->mi_blocks_per_desc_block); | ||
| 415 | return ret; | ||
| 416 | } | ||
| 417 | |||
| 418 | /** | ||
| 419 | * nilfs_palloc_mdt_file_can_grow - check potential opportunity for | ||
| 420 | * MDT file growing | ||
| 421 | * @inode: inode of metadata file using this allocator | ||
| 422 | * @desc_blocks: known current descriptor blocks count | ||
| 423 | */ | ||
| 424 | static inline bool nilfs_palloc_mdt_file_can_grow(struct inode *inode, | ||
| 425 | unsigned long desc_blocks) | ||
| 426 | { | ||
| 427 | return (nilfs_palloc_groups_per_desc_block(inode) * desc_blocks) < | ||
| 428 | nilfs_palloc_groups_count(inode); | ||
| 429 | } | ||
| 430 | |||
| 431 | /** | ||
| 432 | * nilfs_palloc_count_max_entries - count max number of entries that can be | ||
| 433 | * described by descriptor blocks count | ||
| 434 | * @inode: inode of metadata file using this allocator | ||
| 435 | * @nused: current number of used entries | ||
| 436 | * @nmaxp: max number of entries [out] | ||
| 437 | */ | ||
| 438 | int nilfs_palloc_count_max_entries(struct inode *inode, u64 nused, u64 *nmaxp) | ||
| 439 | { | ||
| 440 | unsigned long desc_blocks = 0; | ||
| 441 | u64 entries_per_desc_block, nmax; | ||
| 442 | int err; | ||
| 443 | |||
| 444 | err = nilfs_palloc_count_desc_blocks(inode, &desc_blocks); | ||
| 445 | if (unlikely(err)) | ||
| 446 | return err; | ||
| 447 | |||
| 448 | entries_per_desc_block = (u64)nilfs_palloc_entries_per_group(inode) * | ||
| 449 | nilfs_palloc_groups_per_desc_block(inode); | ||
| 450 | nmax = entries_per_desc_block * desc_blocks; | ||
| 451 | |||
| 452 | if (nused == nmax && | ||
| 453 | nilfs_palloc_mdt_file_can_grow(inode, desc_blocks)) | ||
| 454 | nmax += entries_per_desc_block; | ||
| 455 | |||
| 456 | if (nused > nmax) | ||
| 457 | return -ERANGE; | ||
| 458 | |||
| 459 | *nmaxp = nmax; | ||
| 460 | return 0; | ||
| 461 | } | ||
| 462 | |||
| 463 | /** | ||
| 401 | * nilfs_palloc_prepare_alloc_entry - prepare to allocate a persistent object | 464 | * nilfs_palloc_prepare_alloc_entry - prepare to allocate a persistent object |
| 402 | * @inode: inode of metadata file using this allocator | 465 | * @inode: inode of metadata file using this allocator |
| 403 | * @req: nilfs_palloc_req structure exchanged for the allocation | 466 | * @req: nilfs_palloc_req structure exchanged for the allocation |
diff --git a/fs/nilfs2/alloc.h b/fs/nilfs2/alloc.h index fb7238100548..4bd6451b5703 100644 --- a/fs/nilfs2/alloc.h +++ b/fs/nilfs2/alloc.h | |||
| @@ -48,6 +48,8 @@ int nilfs_palloc_get_entry_block(struct inode *, __u64, int, | |||
| 48 | void *nilfs_palloc_block_get_entry(const struct inode *, __u64, | 48 | void *nilfs_palloc_block_get_entry(const struct inode *, __u64, |
| 49 | const struct buffer_head *, void *); | 49 | const struct buffer_head *, void *); |
| 50 | 50 | ||
| 51 | int nilfs_palloc_count_max_entries(struct inode *, u64, u64 *); | ||
| 52 | |||
| 51 | /** | 53 | /** |
| 52 | * nilfs_palloc_req - persistent allocator request and reply | 54 | * nilfs_palloc_req - persistent allocator request and reply |
| 53 | * @pr_entry_nr: entry number (vblocknr or inode number) | 55 | * @pr_entry_nr: entry number (vblocknr or inode number) |
diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c index d8e65bde083c..6548c7851b48 100644 --- a/fs/nilfs2/ifile.c +++ b/fs/nilfs2/ifile.c | |||
| @@ -160,6 +160,28 @@ int nilfs_ifile_get_inode_block(struct inode *ifile, ino_t ino, | |||
| 160 | } | 160 | } |
| 161 | 161 | ||
| 162 | /** | 162 | /** |
| 163 | * nilfs_ifile_count_free_inodes - calculate free inodes count | ||
| 164 | * @ifile: ifile inode | ||
| 165 | * @nmaxinodes: current maximum of available inodes count [out] | ||
| 166 | * @nfreeinodes: free inodes count [out] | ||
| 167 | */ | ||
| 168 | int nilfs_ifile_count_free_inodes(struct inode *ifile, | ||
| 169 | u64 *nmaxinodes, u64 *nfreeinodes) | ||
| 170 | { | ||
| 171 | u64 nused; | ||
| 172 | int err; | ||
| 173 | |||
| 174 | *nmaxinodes = 0; | ||
| 175 | *nfreeinodes = 0; | ||
| 176 | |||
| 177 | nused = atomic64_read(&NILFS_I(ifile)->i_root->inodes_count); | ||
| 178 | err = nilfs_palloc_count_max_entries(ifile, nused, nmaxinodes); | ||
| 179 | if (likely(!err)) | ||
| 180 | *nfreeinodes = *nmaxinodes - nused; | ||
| 181 | return err; | ||
| 182 | } | ||
| 183 | |||
| 184 | /** | ||
| 163 | * nilfs_ifile_read - read or get ifile inode | 185 | * nilfs_ifile_read - read or get ifile inode |
| 164 | * @sb: super block instance | 186 | * @sb: super block instance |
| 165 | * @root: root object | 187 | * @root: root object |
diff --git a/fs/nilfs2/ifile.h b/fs/nilfs2/ifile.h index 59b6f2b51df6..679674d13372 100644 --- a/fs/nilfs2/ifile.h +++ b/fs/nilfs2/ifile.h | |||
| @@ -49,6 +49,8 @@ int nilfs_ifile_create_inode(struct inode *, ino_t *, struct buffer_head **); | |||
| 49 | int nilfs_ifile_delete_inode(struct inode *, ino_t); | 49 | int nilfs_ifile_delete_inode(struct inode *, ino_t); |
| 50 | int nilfs_ifile_get_inode_block(struct inode *, ino_t, struct buffer_head **); | 50 | int nilfs_ifile_get_inode_block(struct inode *, ino_t, struct buffer_head **); |
| 51 | 51 | ||
| 52 | int nilfs_ifile_count_free_inodes(struct inode *, u64 *, u64 *); | ||
| 53 | |||
| 52 | int nilfs_ifile_read(struct super_block *sb, struct nilfs_root *root, | 54 | int nilfs_ifile_read(struct super_block *sb, struct nilfs_root *root, |
| 53 | size_t inode_size, struct nilfs_inode *raw_inode, | 55 | size_t inode_size, struct nilfs_inode *raw_inode, |
| 54 | struct inode **inodep); | 56 | struct inode **inodep); |
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index bccfec8343c5..b1a5277cfd18 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c | |||
| @@ -54,7 +54,7 @@ void nilfs_inode_add_blocks(struct inode *inode, int n) | |||
| 54 | 54 | ||
| 55 | inode_add_bytes(inode, (1 << inode->i_blkbits) * n); | 55 | inode_add_bytes(inode, (1 << inode->i_blkbits) * n); |
| 56 | if (root) | 56 | if (root) |
| 57 | atomic_add(n, &root->blocks_count); | 57 | atomic64_add(n, &root->blocks_count); |
| 58 | } | 58 | } |
| 59 | 59 | ||
| 60 | void nilfs_inode_sub_blocks(struct inode *inode, int n) | 60 | void nilfs_inode_sub_blocks(struct inode *inode, int n) |
| @@ -63,7 +63,7 @@ void nilfs_inode_sub_blocks(struct inode *inode, int n) | |||
| 63 | 63 | ||
| 64 | inode_sub_bytes(inode, (1 << inode->i_blkbits) * n); | 64 | inode_sub_bytes(inode, (1 << inode->i_blkbits) * n); |
| 65 | if (root) | 65 | if (root) |
| 66 | atomic_sub(n, &root->blocks_count); | 66 | atomic64_sub(n, &root->blocks_count); |
| 67 | } | 67 | } |
| 68 | 68 | ||
| 69 | /** | 69 | /** |
| @@ -369,7 +369,7 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) | |||
| 369 | goto failed_ifile_create_inode; | 369 | goto failed_ifile_create_inode; |
| 370 | /* reference count of i_bh inherits from nilfs_mdt_read_block() */ | 370 | /* reference count of i_bh inherits from nilfs_mdt_read_block() */ |
| 371 | 371 | ||
| 372 | atomic_inc(&root->inodes_count); | 372 | atomic64_inc(&root->inodes_count); |
| 373 | inode_init_owner(inode, dir, mode); | 373 | inode_init_owner(inode, dir, mode); |
| 374 | inode->i_ino = ino; | 374 | inode->i_ino = ino; |
| 375 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | 375 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; |
| @@ -801,7 +801,7 @@ void nilfs_evict_inode(struct inode *inode) | |||
| 801 | 801 | ||
| 802 | ret = nilfs_ifile_delete_inode(ii->i_root->ifile, inode->i_ino); | 802 | ret = nilfs_ifile_delete_inode(ii->i_root->ifile, inode->i_ino); |
| 803 | if (!ret) | 803 | if (!ret) |
| 804 | atomic_dec(&ii->i_root->inodes_count); | 804 | atomic64_dec(&ii->i_root->inodes_count); |
| 805 | 805 | ||
| 806 | nilfs_clear_inode(inode); | 806 | nilfs_clear_inode(inode); |
| 807 | 807 | ||
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index a5752a589932..bd88a7461063 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c | |||
| @@ -835,9 +835,9 @@ static int nilfs_segctor_fill_in_checkpoint(struct nilfs_sc_info *sci) | |||
| 835 | raw_cp->cp_snapshot_list.ssl_next = 0; | 835 | raw_cp->cp_snapshot_list.ssl_next = 0; |
| 836 | raw_cp->cp_snapshot_list.ssl_prev = 0; | 836 | raw_cp->cp_snapshot_list.ssl_prev = 0; |
| 837 | raw_cp->cp_inodes_count = | 837 | raw_cp->cp_inodes_count = |
| 838 | cpu_to_le64(atomic_read(&sci->sc_root->inodes_count)); | 838 | cpu_to_le64(atomic64_read(&sci->sc_root->inodes_count)); |
| 839 | raw_cp->cp_blocks_count = | 839 | raw_cp->cp_blocks_count = |
| 840 | cpu_to_le64(atomic_read(&sci->sc_root->blocks_count)); | 840 | cpu_to_le64(atomic64_read(&sci->sc_root->blocks_count)); |
| 841 | raw_cp->cp_nblk_inc = | 841 | raw_cp->cp_nblk_inc = |
| 842 | cpu_to_le64(sci->sc_nblk_inc + sci->sc_nblk_this_inc); | 842 | cpu_to_le64(sci->sc_nblk_inc + sci->sc_nblk_this_inc); |
| 843 | raw_cp->cp_create = cpu_to_le64(sci->sc_seg_ctime); | 843 | raw_cp->cp_create = cpu_to_le64(sci->sc_seg_ctime); |
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index c7d1f9f18b09..af3ba0478cdf 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c | |||
| @@ -554,8 +554,10 @@ int nilfs_attach_checkpoint(struct super_block *sb, __u64 cno, int curr_mnt, | |||
| 554 | if (err) | 554 | if (err) |
| 555 | goto failed_bh; | 555 | goto failed_bh; |
| 556 | 556 | ||
| 557 | atomic_set(&root->inodes_count, le64_to_cpu(raw_cp->cp_inodes_count)); | 557 | atomic64_set(&root->inodes_count, |
| 558 | atomic_set(&root->blocks_count, le64_to_cpu(raw_cp->cp_blocks_count)); | 558 | le64_to_cpu(raw_cp->cp_inodes_count)); |
| 559 | atomic64_set(&root->blocks_count, | ||
| 560 | le64_to_cpu(raw_cp->cp_blocks_count)); | ||
| 559 | 561 | ||
| 560 | nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, cno, bh_cp); | 562 | nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, cno, bh_cp); |
| 561 | 563 | ||
| @@ -609,6 +611,7 @@ static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
| 609 | unsigned long overhead; | 611 | unsigned long overhead; |
| 610 | unsigned long nrsvblocks; | 612 | unsigned long nrsvblocks; |
| 611 | sector_t nfreeblocks; | 613 | sector_t nfreeblocks; |
| 614 | u64 nmaxinodes, nfreeinodes; | ||
| 612 | int err; | 615 | int err; |
| 613 | 616 | ||
| 614 | /* | 617 | /* |
| @@ -633,14 +636,34 @@ static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
| 633 | if (unlikely(err)) | 636 | if (unlikely(err)) |
| 634 | return err; | 637 | return err; |
| 635 | 638 | ||
| 639 | err = nilfs_ifile_count_free_inodes(root->ifile, | ||
| 640 | &nmaxinodes, &nfreeinodes); | ||
| 641 | if (unlikely(err)) { | ||
| 642 | printk(KERN_WARNING | ||
| 643 | "NILFS warning: fail to count free inodes: err %d.\n", | ||
| 644 | err); | ||
| 645 | if (err == -ERANGE) { | ||
| 646 | /* | ||
| 647 | * If nilfs_palloc_count_max_entries() returns | ||
| 648 | * -ERANGE error code then we simply treat | ||
| 649 | * curent inodes count as maximum possible and | ||
| 650 | * zero as free inodes value. | ||
| 651 | */ | ||
| 652 | nmaxinodes = atomic64_read(&root->inodes_count); | ||
| 653 | nfreeinodes = 0; | ||
| 654 | err = 0; | ||
| 655 | } else | ||
| 656 | return err; | ||
| 657 | } | ||
| 658 | |||
| 636 | buf->f_type = NILFS_SUPER_MAGIC; | 659 | buf->f_type = NILFS_SUPER_MAGIC; |
| 637 | buf->f_bsize = sb->s_blocksize; | 660 | buf->f_bsize = sb->s_blocksize; |
| 638 | buf->f_blocks = blocks - overhead; | 661 | buf->f_blocks = blocks - overhead; |
| 639 | buf->f_bfree = nfreeblocks; | 662 | buf->f_bfree = nfreeblocks; |
| 640 | buf->f_bavail = (buf->f_bfree >= nrsvblocks) ? | 663 | buf->f_bavail = (buf->f_bfree >= nrsvblocks) ? |
| 641 | (buf->f_bfree - nrsvblocks) : 0; | 664 | (buf->f_bfree - nrsvblocks) : 0; |
| 642 | buf->f_files = atomic_read(&root->inodes_count); | 665 | buf->f_files = nmaxinodes; |
| 643 | buf->f_ffree = 0; /* nilfs_count_free_inodes(sb); */ | 666 | buf->f_ffree = nfreeinodes; |
| 644 | buf->f_namelen = NILFS_NAME_LEN; | 667 | buf->f_namelen = NILFS_NAME_LEN; |
| 645 | buf->f_fsid.val[0] = (u32)id; | 668 | buf->f_fsid.val[0] = (u32)id; |
| 646 | buf->f_fsid.val[1] = (u32)(id >> 32); | 669 | buf->f_fsid.val[1] = (u32)(id >> 32); |
| @@ -973,7 +996,7 @@ static int nilfs_attach_snapshot(struct super_block *s, __u64 cno, | |||
| 973 | 996 | ||
| 974 | static int nilfs_tree_was_touched(struct dentry *root_dentry) | 997 | static int nilfs_tree_was_touched(struct dentry *root_dentry) |
| 975 | { | 998 | { |
| 976 | return root_dentry->d_count > 1; | 999 | return d_count(root_dentry) > 1; |
| 977 | } | 1000 | } |
| 978 | 1001 | ||
| 979 | /** | 1002 | /** |
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 41e6a04a561f..94c451ce6d24 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c | |||
| @@ -764,8 +764,8 @@ nilfs_find_or_create_root(struct the_nilfs *nilfs, __u64 cno) | |||
| 764 | new->ifile = NULL; | 764 | new->ifile = NULL; |
| 765 | new->nilfs = nilfs; | 765 | new->nilfs = nilfs; |
| 766 | atomic_set(&new->count, 1); | 766 | atomic_set(&new->count, 1); |
| 767 | atomic_set(&new->inodes_count, 0); | 767 | atomic64_set(&new->inodes_count, 0); |
| 768 | atomic_set(&new->blocks_count, 0); | 768 | atomic64_set(&new->blocks_count, 0); |
| 769 | 769 | ||
| 770 | rb_link_node(&new->rb_node, parent, p); | 770 | rb_link_node(&new->rb_node, parent, p); |
| 771 | rb_insert_color(&new->rb_node, &nilfs->ns_cptree); | 771 | rb_insert_color(&new->rb_node, &nilfs->ns_cptree); |
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index be1267a34cea..de8cc53b4a5c 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h | |||
| @@ -241,8 +241,8 @@ struct nilfs_root { | |||
| 241 | struct the_nilfs *nilfs; | 241 | struct the_nilfs *nilfs; |
| 242 | struct inode *ifile; | 242 | struct inode *ifile; |
| 243 | 243 | ||
| 244 | atomic_t inodes_count; | 244 | atomic64_t inodes_count; |
| 245 | atomic_t blocks_count; | 245 | atomic64_t blocks_count; |
| 246 | }; | 246 | }; |
| 247 | 247 | ||
| 248 | /* Special checkpoint number */ | 248 | /* Special checkpoint number */ |
diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c index 2bfe6dc413a0..1fedd5f7ccc4 100644 --- a/fs/notify/dnotify/dnotify.c +++ b/fs/notify/dnotify/dnotify.c | |||
| @@ -31,7 +31,6 @@ int dir_notify_enable __read_mostly = 1; | |||
| 31 | static struct kmem_cache *dnotify_struct_cache __read_mostly; | 31 | static struct kmem_cache *dnotify_struct_cache __read_mostly; |
| 32 | static struct kmem_cache *dnotify_mark_cache __read_mostly; | 32 | static struct kmem_cache *dnotify_mark_cache __read_mostly; |
| 33 | static struct fsnotify_group *dnotify_group __read_mostly; | 33 | static struct fsnotify_group *dnotify_group __read_mostly; |
| 34 | static DEFINE_MUTEX(dnotify_mark_mutex); | ||
| 35 | 34 | ||
| 36 | /* | 35 | /* |
| 37 | * dnotify will attach one of these to each inode (i_fsnotify_marks) which | 36 | * dnotify will attach one of these to each inode (i_fsnotify_marks) which |
| @@ -183,7 +182,7 @@ void dnotify_flush(struct file *filp, fl_owner_t id) | |||
| 183 | return; | 182 | return; |
| 184 | dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark); | 183 | dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark); |
| 185 | 184 | ||
| 186 | mutex_lock(&dnotify_mark_mutex); | 185 | mutex_lock(&dnotify_group->mark_mutex); |
| 187 | 186 | ||
| 188 | spin_lock(&fsn_mark->lock); | 187 | spin_lock(&fsn_mark->lock); |
| 189 | prev = &dn_mark->dn; | 188 | prev = &dn_mark->dn; |
| @@ -199,11 +198,12 @@ void dnotify_flush(struct file *filp, fl_owner_t id) | |||
| 199 | 198 | ||
| 200 | spin_unlock(&fsn_mark->lock); | 199 | spin_unlock(&fsn_mark->lock); |
| 201 | 200 | ||
| 202 | /* nothing else could have found us thanks to the dnotify_mark_mutex */ | 201 | /* nothing else could have found us thanks to the dnotify_groups |
| 202 | mark_mutex */ | ||
| 203 | if (dn_mark->dn == NULL) | 203 | if (dn_mark->dn == NULL) |
| 204 | fsnotify_destroy_mark(fsn_mark, dnotify_group); | 204 | fsnotify_destroy_mark_locked(fsn_mark, dnotify_group); |
| 205 | 205 | ||
| 206 | mutex_unlock(&dnotify_mark_mutex); | 206 | mutex_unlock(&dnotify_group->mark_mutex); |
| 207 | 207 | ||
| 208 | fsnotify_put_mark(fsn_mark); | 208 | fsnotify_put_mark(fsn_mark); |
| 209 | } | 209 | } |
| @@ -326,7 +326,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) | |||
| 326 | new_dn_mark->dn = NULL; | 326 | new_dn_mark->dn = NULL; |
| 327 | 327 | ||
| 328 | /* this is needed to prevent the fcntl/close race described below */ | 328 | /* this is needed to prevent the fcntl/close race described below */ |
| 329 | mutex_lock(&dnotify_mark_mutex); | 329 | mutex_lock(&dnotify_group->mark_mutex); |
| 330 | 330 | ||
| 331 | /* add the new_fsn_mark or find an old one. */ | 331 | /* add the new_fsn_mark or find an old one. */ |
| 332 | fsn_mark = fsnotify_find_inode_mark(dnotify_group, inode); | 332 | fsn_mark = fsnotify_find_inode_mark(dnotify_group, inode); |
| @@ -334,7 +334,8 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) | |||
| 334 | dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark); | 334 | dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark); |
| 335 | spin_lock(&fsn_mark->lock); | 335 | spin_lock(&fsn_mark->lock); |
| 336 | } else { | 336 | } else { |
| 337 | fsnotify_add_mark(new_fsn_mark, dnotify_group, inode, NULL, 0); | 337 | fsnotify_add_mark_locked(new_fsn_mark, dnotify_group, inode, |
| 338 | NULL, 0); | ||
| 338 | spin_lock(&new_fsn_mark->lock); | 339 | spin_lock(&new_fsn_mark->lock); |
| 339 | fsn_mark = new_fsn_mark; | 340 | fsn_mark = new_fsn_mark; |
| 340 | dn_mark = new_dn_mark; | 341 | dn_mark = new_dn_mark; |
| @@ -348,9 +349,9 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) | |||
| 348 | 349 | ||
| 349 | /* if (f != filp) means that we lost a race and another task/thread | 350 | /* if (f != filp) means that we lost a race and another task/thread |
| 350 | * actually closed the fd we are still playing with before we grabbed | 351 | * actually closed the fd we are still playing with before we grabbed |
| 351 | * the dnotify_mark_mutex and fsn_mark->lock. Since closing the fd is the | 352 | * the dnotify_groups mark_mutex and fsn_mark->lock. Since closing the |
| 352 | * only time we clean up the marks we need to get our mark off | 353 | * fd is the only time we clean up the marks we need to get our mark |
| 353 | * the list. */ | 354 | * off the list. */ |
| 354 | if (f != filp) { | 355 | if (f != filp) { |
| 355 | /* if we added ourselves, shoot ourselves, it's possible that | 356 | /* if we added ourselves, shoot ourselves, it's possible that |
| 356 | * the flush actually did shoot this fsn_mark. That's fine too | 357 | * the flush actually did shoot this fsn_mark. That's fine too |
| @@ -385,9 +386,9 @@ out: | |||
| 385 | spin_unlock(&fsn_mark->lock); | 386 | spin_unlock(&fsn_mark->lock); |
| 386 | 387 | ||
| 387 | if (destroy) | 388 | if (destroy) |
| 388 | fsnotify_destroy_mark(fsn_mark, dnotify_group); | 389 | fsnotify_destroy_mark_locked(fsn_mark, dnotify_group); |
| 389 | 390 | ||
| 390 | mutex_unlock(&dnotify_mark_mutex); | 391 | mutex_unlock(&dnotify_group->mark_mutex); |
| 391 | fsnotify_put_mark(fsn_mark); | 392 | fsnotify_put_mark(fsn_mark); |
| 392 | out_err: | 393 | out_err: |
| 393 | if (new_fsn_mark) | 394 | if (new_fsn_mark) |
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 6c80083a984f..e44cb6427df3 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c | |||
| @@ -122,6 +122,7 @@ static int fill_event_metadata(struct fsnotify_group *group, | |||
| 122 | metadata->event_len = FAN_EVENT_METADATA_LEN; | 122 | metadata->event_len = FAN_EVENT_METADATA_LEN; |
| 123 | metadata->metadata_len = FAN_EVENT_METADATA_LEN; | 123 | metadata->metadata_len = FAN_EVENT_METADATA_LEN; |
| 124 | metadata->vers = FANOTIFY_METADATA_VERSION; | 124 | metadata->vers = FANOTIFY_METADATA_VERSION; |
| 125 | metadata->reserved = 0; | ||
| 125 | metadata->mask = event->mask & FAN_ALL_OUTGOING_EVENTS; | 126 | metadata->mask = event->mask & FAN_ALL_OUTGOING_EVENTS; |
| 126 | metadata->pid = pid_vnr(event->tgid); | 127 | metadata->pid = pid_vnr(event->tgid); |
| 127 | if (unlikely(event->mask & FAN_Q_OVERFLOW)) | 128 | if (unlikely(event->mask & FAN_Q_OVERFLOW)) |
| @@ -399,9 +400,6 @@ static int fanotify_release(struct inode *ignored, struct file *file) | |||
| 399 | wake_up(&group->fanotify_data.access_waitq); | 400 | wake_up(&group->fanotify_data.access_waitq); |
| 400 | #endif | 401 | #endif |
| 401 | 402 | ||
| 402 | if (file->f_flags & FASYNC) | ||
| 403 | fsnotify_fasync(-1, file, 0); | ||
| 404 | |||
| 405 | /* matches the fanotify_init->fsnotify_alloc_group */ | 403 | /* matches the fanotify_init->fsnotify_alloc_group */ |
| 406 | fsnotify_destroy_group(group); | 404 | fsnotify_destroy_group(group); |
| 407 | 405 | ||
| @@ -526,14 +524,18 @@ static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group, | |||
| 526 | __u32 removed; | 524 | __u32 removed; |
| 527 | int destroy_mark; | 525 | int destroy_mark; |
| 528 | 526 | ||
| 527 | mutex_lock(&group->mark_mutex); | ||
| 529 | fsn_mark = fsnotify_find_vfsmount_mark(group, mnt); | 528 | fsn_mark = fsnotify_find_vfsmount_mark(group, mnt); |
| 530 | if (!fsn_mark) | 529 | if (!fsn_mark) { |
| 530 | mutex_unlock(&group->mark_mutex); | ||
| 531 | return -ENOENT; | 531 | return -ENOENT; |
| 532 | } | ||
| 532 | 533 | ||
| 533 | removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags, | 534 | removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags, |
| 534 | &destroy_mark); | 535 | &destroy_mark); |
| 535 | if (destroy_mark) | 536 | if (destroy_mark) |
| 536 | fsnotify_destroy_mark(fsn_mark, group); | 537 | fsnotify_destroy_mark_locked(fsn_mark, group); |
| 538 | mutex_unlock(&group->mark_mutex); | ||
| 537 | 539 | ||
| 538 | fsnotify_put_mark(fsn_mark); | 540 | fsnotify_put_mark(fsn_mark); |
| 539 | if (removed & real_mount(mnt)->mnt_fsnotify_mask) | 541 | if (removed & real_mount(mnt)->mnt_fsnotify_mask) |
| @@ -550,14 +552,19 @@ static int fanotify_remove_inode_mark(struct fsnotify_group *group, | |||
| 550 | __u32 removed; | 552 | __u32 removed; |
| 551 | int destroy_mark; | 553 | int destroy_mark; |
| 552 | 554 | ||
| 555 | mutex_lock(&group->mark_mutex); | ||
| 553 | fsn_mark = fsnotify_find_inode_mark(group, inode); | 556 | fsn_mark = fsnotify_find_inode_mark(group, inode); |
| 554 | if (!fsn_mark) | 557 | if (!fsn_mark) { |
| 558 | mutex_unlock(&group->mark_mutex); | ||
| 555 | return -ENOENT; | 559 | return -ENOENT; |
| 560 | } | ||
| 556 | 561 | ||
| 557 | removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags, | 562 | removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags, |
| 558 | &destroy_mark); | 563 | &destroy_mark); |
| 559 | if (destroy_mark) | 564 | if (destroy_mark) |
| 560 | fsnotify_destroy_mark(fsn_mark, group); | 565 | fsnotify_destroy_mark_locked(fsn_mark, group); |
| 566 | mutex_unlock(&group->mark_mutex); | ||
| 567 | |||
| 561 | /* matches the fsnotify_find_inode_mark() */ | 568 | /* matches the fsnotify_find_inode_mark() */ |
| 562 | fsnotify_put_mark(fsn_mark); | 569 | fsnotify_put_mark(fsn_mark); |
| 563 | if (removed & inode->i_fsnotify_mask) | 570 | if (removed & inode->i_fsnotify_mask) |
| @@ -593,35 +600,55 @@ static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark, | |||
| 593 | return mask & ~oldmask; | 600 | return mask & ~oldmask; |
| 594 | } | 601 | } |
| 595 | 602 | ||
| 603 | static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, | ||
| 604 | struct inode *inode, | ||
| 605 | struct vfsmount *mnt) | ||
| 606 | { | ||
| 607 | struct fsnotify_mark *mark; | ||
| 608 | int ret; | ||
| 609 | |||
| 610 | if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks) | ||
| 611 | return ERR_PTR(-ENOSPC); | ||
| 612 | |||
| 613 | mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL); | ||
| 614 | if (!mark) | ||
| 615 | return ERR_PTR(-ENOMEM); | ||
| 616 | |||
| 617 | fsnotify_init_mark(mark, fanotify_free_mark); | ||
| 618 | ret = fsnotify_add_mark_locked(mark, group, inode, mnt, 0); | ||
| 619 | if (ret) { | ||
| 620 | fsnotify_put_mark(mark); | ||
| 621 | return ERR_PTR(ret); | ||
| 622 | } | ||
| 623 | |||
| 624 | return mark; | ||
| 625 | } | ||
| 626 | |||
| 627 | |||
| 596 | static int fanotify_add_vfsmount_mark(struct fsnotify_group *group, | 628 | static int fanotify_add_vfsmount_mark(struct fsnotify_group *group, |
| 597 | struct vfsmount *mnt, __u32 mask, | 629 | struct vfsmount *mnt, __u32 mask, |
| 598 | unsigned int flags) | 630 | unsigned int flags) |
| 599 | { | 631 | { |
| 600 | struct fsnotify_mark *fsn_mark; | 632 | struct fsnotify_mark *fsn_mark; |
| 601 | __u32 added; | 633 | __u32 added; |
| 602 | int ret = 0; | ||
| 603 | 634 | ||
| 635 | mutex_lock(&group->mark_mutex); | ||
| 604 | fsn_mark = fsnotify_find_vfsmount_mark(group, mnt); | 636 | fsn_mark = fsnotify_find_vfsmount_mark(group, mnt); |
| 605 | if (!fsn_mark) { | 637 | if (!fsn_mark) { |
| 606 | if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks) | 638 | fsn_mark = fanotify_add_new_mark(group, NULL, mnt); |
| 607 | return -ENOSPC; | 639 | if (IS_ERR(fsn_mark)) { |
| 608 | 640 | mutex_unlock(&group->mark_mutex); | |
| 609 | fsn_mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL); | 641 | return PTR_ERR(fsn_mark); |
| 610 | if (!fsn_mark) | 642 | } |
| 611 | return -ENOMEM; | ||
| 612 | |||
| 613 | fsnotify_init_mark(fsn_mark, fanotify_free_mark); | ||
| 614 | ret = fsnotify_add_mark(fsn_mark, group, NULL, mnt, 0); | ||
| 615 | if (ret) | ||
| 616 | goto err; | ||
| 617 | } | 643 | } |
| 618 | added = fanotify_mark_add_to_mask(fsn_mark, mask, flags); | 644 | added = fanotify_mark_add_to_mask(fsn_mark, mask, flags); |
| 645 | mutex_unlock(&group->mark_mutex); | ||
| 619 | 646 | ||
| 620 | if (added & ~real_mount(mnt)->mnt_fsnotify_mask) | 647 | if (added & ~real_mount(mnt)->mnt_fsnotify_mask) |
| 621 | fsnotify_recalc_vfsmount_mask(mnt); | 648 | fsnotify_recalc_vfsmount_mask(mnt); |
| 622 | err: | 649 | |
| 623 | fsnotify_put_mark(fsn_mark); | 650 | fsnotify_put_mark(fsn_mark); |
| 624 | return ret; | 651 | return 0; |
| 625 | } | 652 | } |
| 626 | 653 | ||
| 627 | static int fanotify_add_inode_mark(struct fsnotify_group *group, | 654 | static int fanotify_add_inode_mark(struct fsnotify_group *group, |
| @@ -630,7 +657,6 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group, | |||
| 630 | { | 657 | { |
| 631 | struct fsnotify_mark *fsn_mark; | 658 | struct fsnotify_mark *fsn_mark; |
| 632 | __u32 added; | 659 | __u32 added; |
| 633 | int ret = 0; | ||
| 634 | 660 | ||
| 635 | pr_debug("%s: group=%p inode=%p\n", __func__, group, inode); | 661 | pr_debug("%s: group=%p inode=%p\n", __func__, group, inode); |
| 636 | 662 | ||
| @@ -644,27 +670,23 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group, | |||
| 644 | (atomic_read(&inode->i_writecount) > 0)) | 670 | (atomic_read(&inode->i_writecount) > 0)) |
| 645 | return 0; | 671 | return 0; |
| 646 | 672 | ||
| 673 | mutex_lock(&group->mark_mutex); | ||
| 647 | fsn_mark = fsnotify_find_inode_mark(group, inode); | 674 | fsn_mark = fsnotify_find_inode_mark(group, inode); |
| 648 | if (!fsn_mark) { | 675 | if (!fsn_mark) { |
| 649 | if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks) | 676 | fsn_mark = fanotify_add_new_mark(group, inode, NULL); |
| 650 | return -ENOSPC; | 677 | if (IS_ERR(fsn_mark)) { |
| 651 | 678 | mutex_unlock(&group->mark_mutex); | |
| 652 | fsn_mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL); | 679 | return PTR_ERR(fsn_mark); |
| 653 | if (!fsn_mark) | 680 | } |
| 654 | return -ENOMEM; | ||
| 655 | |||
| 656 | fsnotify_init_mark(fsn_mark, fanotify_free_mark); | ||
| 657 | ret = fsnotify_add_mark(fsn_mark, group, inode, NULL, 0); | ||
| 658 | if (ret) | ||
| 659 | goto err; | ||
| 660 | } | 681 | } |
| 661 | added = fanotify_mark_add_to_mask(fsn_mark, mask, flags); | 682 | added = fanotify_mark_add_to_mask(fsn_mark, mask, flags); |
| 683 | mutex_unlock(&group->mark_mutex); | ||
| 662 | 684 | ||
| 663 | if (added & ~inode->i_fsnotify_mask) | 685 | if (added & ~inode->i_fsnotify_mask) |
| 664 | fsnotify_recalc_inode_mask(inode); | 686 | fsnotify_recalc_inode_mask(inode); |
| 665 | err: | 687 | |
| 666 | fsnotify_put_mark(fsn_mark); | 688 | fsnotify_put_mark(fsn_mark); |
| 667 | return ret; | 689 | return 0; |
| 668 | } | 690 | } |
| 669 | 691 | ||
| 670 | /* fanotify syscalls */ | 692 | /* fanotify syscalls */ |
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 959815c1e017..60f954a891ab 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c | |||
| @@ -636,7 +636,8 @@ static int inotify_new_watch(struct fsnotify_group *group, | |||
| 636 | goto out_err; | 636 | goto out_err; |
| 637 | 637 | ||
| 638 | /* we are on the idr, now get on the inode */ | 638 | /* we are on the idr, now get on the inode */ |
| 639 | ret = fsnotify_add_mark(&tmp_i_mark->fsn_mark, group, inode, NULL, 0); | 639 | ret = fsnotify_add_mark_locked(&tmp_i_mark->fsn_mark, group, inode, |
| 640 | NULL, 0); | ||
| 640 | if (ret) { | 641 | if (ret) { |
| 641 | /* we failed to get on the inode, get off the idr */ | 642 | /* we failed to get on the inode, get off the idr */ |
| 642 | inotify_remove_from_idr(group, tmp_i_mark); | 643 | inotify_remove_from_idr(group, tmp_i_mark); |
| @@ -660,19 +661,13 @@ static int inotify_update_watch(struct fsnotify_group *group, struct inode *inod | |||
| 660 | { | 661 | { |
| 661 | int ret = 0; | 662 | int ret = 0; |
| 662 | 663 | ||
| 663 | retry: | 664 | mutex_lock(&group->mark_mutex); |
| 664 | /* try to update and existing watch with the new arg */ | 665 | /* try to update and existing watch with the new arg */ |
| 665 | ret = inotify_update_existing_watch(group, inode, arg); | 666 | ret = inotify_update_existing_watch(group, inode, arg); |
| 666 | /* no mark present, try to add a new one */ | 667 | /* no mark present, try to add a new one */ |
| 667 | if (ret == -ENOENT) | 668 | if (ret == -ENOENT) |
| 668 | ret = inotify_new_watch(group, inode, arg); | 669 | ret = inotify_new_watch(group, inode, arg); |
| 669 | /* | 670 | mutex_unlock(&group->mark_mutex); |
| 670 | * inotify_new_watch could race with another thread which did an | ||
| 671 | * inotify_new_watch between the update_existing and the add watch | ||
| 672 | * here, go back and try to update an existing mark again. | ||
| 673 | */ | ||
| 674 | if (ret == -EEXIST) | ||
| 675 | goto retry; | ||
| 676 | 671 | ||
| 677 | return ret; | 672 | return ret; |
| 678 | } | 673 | } |
diff --git a/fs/notify/mark.c b/fs/notify/mark.c index fc6b49bf7360..923fe4a5f503 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c | |||
| @@ -20,28 +20,29 @@ | |||
| 20 | * fsnotify inode mark locking/lifetime/and refcnting | 20 | * fsnotify inode mark locking/lifetime/and refcnting |
| 21 | * | 21 | * |
| 22 | * REFCNT: | 22 | * REFCNT: |
| 23 | * The mark->refcnt tells how many "things" in the kernel currently are | 23 | * The group->recnt and mark->refcnt tell how many "things" in the kernel |
| 24 | * referencing this object. The object typically will live inside the kernel | 24 | * currently are referencing the objects. Both kind of objects typically will |
| 25 | * with a refcnt of 2, one for each list it is on (i_list, g_list). Any task | 25 | * live inside the kernel with a refcnt of 2, one for its creation and one for |
| 26 | * which can find this object holding the appropriete locks, can take a reference | 26 | * the reference a group and a mark hold to each other. |
| 27 | * and the object itself is guaranteed to survive until the reference is dropped. | 27 | * If you are holding the appropriate locks, you can take a reference and the |
| 28 | * object itself is guaranteed to survive until the reference is dropped. | ||
| 28 | * | 29 | * |
| 29 | * LOCKING: | 30 | * LOCKING: |
| 30 | * There are 3 spinlocks involved with fsnotify inode marks and they MUST | 31 | * There are 3 locks involved with fsnotify inode marks and they MUST be taken |
| 31 | * be taken in order as follows: | 32 | * in order as follows: |
| 32 | * | 33 | * |
| 34 | * group->mark_mutex | ||
| 33 | * mark->lock | 35 | * mark->lock |
| 34 | * group->mark_lock | ||
| 35 | * inode->i_lock | 36 | * inode->i_lock |
| 36 | * | 37 | * |
| 37 | * mark->lock protects 2 things, mark->group and mark->inode. You must hold | 38 | * group->mark_mutex protects the marks_list anchored inside a given group and |
| 38 | * that lock to dereference either of these things (they could be NULL even with | 39 | * each mark is hooked via the g_list. It also protects the groups private |
| 39 | * the lock) | 40 | * data (i.e group limits). |
| 40 | * | 41 | |
| 41 | * group->mark_lock protects the marks_list anchored inside a given group | 42 | * mark->lock protects the marks attributes like its masks and flags. |
| 42 | * and each mark is hooked via the g_list. It also sorta protects the | 43 | * Furthermore it protects the access to a reference of the group that the mark |
| 43 | * free_g_list, which when used is anchored by a private list on the stack of the | 44 | * is assigned to as well as the access to a reference of the inode/vfsmount |
| 44 | * task which held the group->mark_lock. | 45 | * that is being watched by the mark. |
| 45 | * | 46 | * |
| 46 | * inode->i_lock protects the i_fsnotify_marks list anchored inside a | 47 | * inode->i_lock protects the i_fsnotify_marks list anchored inside a |
| 47 | * given inode and each mark is hooked via the i_list. (and sorta the | 48 | * given inode and each mark is hooked via the i_list. (and sorta the |
| @@ -64,18 +65,11 @@ | |||
| 64 | * inode. We take i_lock and walk the i_fsnotify_marks safely. For each | 65 | * inode. We take i_lock and walk the i_fsnotify_marks safely. For each |
| 65 | * mark on the list we take a reference (so the mark can't disappear under us). | 66 | * mark on the list we take a reference (so the mark can't disappear under us). |
| 66 | * We remove that mark form the inode's list of marks and we add this mark to a | 67 | * We remove that mark form the inode's list of marks and we add this mark to a |
| 67 | * private list anchored on the stack using i_free_list; At this point we no | 68 | * private list anchored on the stack using i_free_list; we walk i_free_list |
| 68 | * longer fear anything finding the mark using the inode's list of marks. | 69 | * and before we destroy the mark we make sure that we dont race with a |
| 69 | * | 70 | * concurrent destroy_group by getting a ref to the marks group and taking the |
| 70 | * We can safely and locklessly run the private list on the stack of everything | 71 | * groups mutex. |
| 71 | * we just unattached from the original inode. For each mark on the private list | 72 | |
| 72 | * we grab the mark-> and can thus dereference mark->group and mark->inode. If | ||
| 73 | * we see the group and inode are not NULL we take those locks. Now holding all | ||
| 74 | * 3 locks we can completely remove the mark from other tasks finding it in the | ||
| 75 | * future. Remember, 10 things might already be referencing this mark, but they | ||
| 76 | * better be holding a ref. We drop our reference we took before we unhooked it | ||
| 77 | * from the inode. When the ref hits 0 we can free the mark. | ||
| 78 | * | ||
| 79 | * Very similarly for freeing by group, except we use free_g_list. | 73 | * Very similarly for freeing by group, except we use free_g_list. |
| 80 | * | 74 | * |
| 81 | * This has the very interesting property of being able to run concurrently with | 75 | * This has the very interesting property of being able to run concurrently with |
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index b8a9d87231b1..17e6bdde96c5 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c | |||
| @@ -5655,7 +5655,7 @@ int ocfs2_remove_btree_range(struct inode *inode, | |||
| 5655 | &ref_tree, NULL); | 5655 | &ref_tree, NULL); |
| 5656 | if (ret) { | 5656 | if (ret) { |
| 5657 | mlog_errno(ret); | 5657 | mlog_errno(ret); |
| 5658 | goto out; | 5658 | goto bail; |
| 5659 | } | 5659 | } |
| 5660 | 5660 | ||
| 5661 | ret = ocfs2_prepare_refcount_change_for_del(inode, | 5661 | ret = ocfs2_prepare_refcount_change_for_del(inode, |
| @@ -5666,7 +5666,7 @@ int ocfs2_remove_btree_range(struct inode *inode, | |||
| 5666 | &extra_blocks); | 5666 | &extra_blocks); |
| 5667 | if (ret < 0) { | 5667 | if (ret < 0) { |
| 5668 | mlog_errno(ret); | 5668 | mlog_errno(ret); |
| 5669 | goto out; | 5669 | goto bail; |
| 5670 | } | 5670 | } |
| 5671 | } | 5671 | } |
| 5672 | 5672 | ||
| @@ -5674,7 +5674,7 @@ int ocfs2_remove_btree_range(struct inode *inode, | |||
| 5674 | extra_blocks); | 5674 | extra_blocks); |
| 5675 | if (ret) { | 5675 | if (ret) { |
| 5676 | mlog_errno(ret); | 5676 | mlog_errno(ret); |
| 5677 | return ret; | 5677 | goto bail; |
| 5678 | } | 5678 | } |
| 5679 | 5679 | ||
| 5680 | mutex_lock(&tl_inode->i_mutex); | 5680 | mutex_lock(&tl_inode->i_mutex); |
| @@ -5734,7 +5734,7 @@ out_commit: | |||
| 5734 | ocfs2_commit_trans(osb, handle); | 5734 | ocfs2_commit_trans(osb, handle); |
| 5735 | out: | 5735 | out: |
| 5736 | mutex_unlock(&tl_inode->i_mutex); | 5736 | mutex_unlock(&tl_inode->i_mutex); |
| 5737 | 5737 | bail: | |
| 5738 | if (meta_ac) | 5738 | if (meta_ac) |
| 5739 | ocfs2_free_alloc_context(meta_ac); | 5739 | ocfs2_free_alloc_context(meta_ac); |
| 5740 | 5740 | ||
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 42252bf64b51..5c1c864e81cc 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c | |||
| @@ -176,7 +176,7 @@ static void o2hb_dead_threshold_set(unsigned int threshold) | |||
| 176 | } | 176 | } |
| 177 | } | 177 | } |
| 178 | 178 | ||
| 179 | static int o2hb_global_hearbeat_mode_set(unsigned int hb_mode) | 179 | static int o2hb_global_heartbeat_mode_set(unsigned int hb_mode) |
| 180 | { | 180 | { |
| 181 | int ret = -1; | 181 | int ret = -1; |
| 182 | 182 | ||
| @@ -500,7 +500,7 @@ static int o2hb_issue_node_write(struct o2hb_region *reg, | |||
| 500 | } | 500 | } |
| 501 | 501 | ||
| 502 | atomic_inc(&write_wc->wc_num_reqs); | 502 | atomic_inc(&write_wc->wc_num_reqs); |
| 503 | submit_bio(WRITE, bio); | 503 | submit_bio(WRITE_SYNC, bio); |
| 504 | 504 | ||
| 505 | status = 0; | 505 | status = 0; |
| 506 | bail: | 506 | bail: |
| @@ -2271,7 +2271,7 @@ ssize_t o2hb_heartbeat_group_mode_store(struct o2hb_heartbeat_group *group, | |||
| 2271 | if (strnicmp(page, o2hb_heartbeat_mode_desc[i], len)) | 2271 | if (strnicmp(page, o2hb_heartbeat_mode_desc[i], len)) |
| 2272 | continue; | 2272 | continue; |
| 2273 | 2273 | ||
| 2274 | ret = o2hb_global_hearbeat_mode_set(i); | 2274 | ret = o2hb_global_heartbeat_mode_set(i); |
| 2275 | if (!ret) | 2275 | if (!ret) |
| 2276 | printk(KERN_NOTICE "o2hb: Heartbeat mode set to %s\n", | 2276 | printk(KERN_NOTICE "o2hb: Heartbeat mode set to %s\n", |
| 2277 | o2hb_heartbeat_mode_desc[i]); | 2277 | o2hb_heartbeat_mode_desc[i]); |
| @@ -2304,7 +2304,7 @@ static struct configfs_attribute *o2hb_heartbeat_group_attrs[] = { | |||
| 2304 | NULL, | 2304 | NULL, |
| 2305 | }; | 2305 | }; |
| 2306 | 2306 | ||
| 2307 | static struct configfs_item_operations o2hb_hearbeat_group_item_ops = { | 2307 | static struct configfs_item_operations o2hb_heartbeat_group_item_ops = { |
| 2308 | .show_attribute = o2hb_heartbeat_group_show, | 2308 | .show_attribute = o2hb_heartbeat_group_show, |
| 2309 | .store_attribute = o2hb_heartbeat_group_store, | 2309 | .store_attribute = o2hb_heartbeat_group_store, |
| 2310 | }; | 2310 | }; |
| @@ -2316,7 +2316,7 @@ static struct configfs_group_operations o2hb_heartbeat_group_group_ops = { | |||
| 2316 | 2316 | ||
| 2317 | static struct config_item_type o2hb_heartbeat_group_type = { | 2317 | static struct config_item_type o2hb_heartbeat_group_type = { |
| 2318 | .ct_group_ops = &o2hb_heartbeat_group_group_ops, | 2318 | .ct_group_ops = &o2hb_heartbeat_group_group_ops, |
| 2319 | .ct_item_ops = &o2hb_hearbeat_group_item_ops, | 2319 | .ct_item_ops = &o2hb_heartbeat_group_item_ops, |
| 2320 | .ct_attrs = o2hb_heartbeat_group_attrs, | 2320 | .ct_attrs = o2hb_heartbeat_group_attrs, |
| 2321 | .ct_owner = THIS_MODULE, | 2321 | .ct_owner = THIS_MODULE, |
| 2322 | }; | 2322 | }; |
| @@ -2389,6 +2389,9 @@ static int o2hb_region_pin(const char *region_uuid) | |||
| 2389 | assert_spin_locked(&o2hb_live_lock); | 2389 | assert_spin_locked(&o2hb_live_lock); |
| 2390 | 2390 | ||
| 2391 | list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) { | 2391 | list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) { |
| 2392 | if (reg->hr_item_dropped) | ||
| 2393 | continue; | ||
| 2394 | |||
| 2392 | uuid = config_item_name(®->hr_item); | 2395 | uuid = config_item_name(®->hr_item); |
| 2393 | 2396 | ||
| 2394 | /* local heartbeat */ | 2397 | /* local heartbeat */ |
| @@ -2439,6 +2442,9 @@ static void o2hb_region_unpin(const char *region_uuid) | |||
| 2439 | assert_spin_locked(&o2hb_live_lock); | 2442 | assert_spin_locked(&o2hb_live_lock); |
| 2440 | 2443 | ||
| 2441 | list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) { | 2444 | list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) { |
| 2445 | if (reg->hr_item_dropped) | ||
| 2446 | continue; | ||
| 2447 | |||
| 2442 | uuid = config_item_name(®->hr_item); | 2448 | uuid = config_item_name(®->hr_item); |
| 2443 | if (region_uuid) { | 2449 | if (region_uuid) { |
| 2444 | if (strcmp(region_uuid, uuid)) | 2450 | if (strcmp(region_uuid, uuid)) |
| @@ -2654,6 +2660,9 @@ int o2hb_get_all_regions(char *region_uuids, u8 max_regions) | |||
| 2654 | 2660 | ||
| 2655 | p = region_uuids; | 2661 | p = region_uuids; |
| 2656 | list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) { | 2662 | list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) { |
| 2663 | if (reg->hr_item_dropped) | ||
| 2664 | continue; | ||
| 2665 | |||
| 2657 | mlog(0, "Region: %s\n", config_item_name(®->hr_item)); | 2666 | mlog(0, "Region: %s\n", config_item_name(®->hr_item)); |
| 2658 | if (numregs < max_regions) { | 2667 | if (numregs < max_regions) { |
| 2659 | memcpy(p, config_item_name(®->hr_item), | 2668 | memcpy(p, config_item_name(®->hr_item), |
diff --git a/fs/ocfs2/cluster/quorum.c b/fs/ocfs2/cluster/quorum.c index c19897d0fe14..1ec141e758d7 100644 --- a/fs/ocfs2/cluster/quorum.c +++ b/fs/ocfs2/cluster/quorum.c | |||
| @@ -264,7 +264,7 @@ void o2quo_hb_still_up(u8 node) | |||
| 264 | /* This is analogous to hb_up. as a node's connection comes up we delay the | 264 | /* This is analogous to hb_up. as a node's connection comes up we delay the |
| 265 | * quorum decision until we see it heartbeating. the hold will be droped in | 265 | * quorum decision until we see it heartbeating. the hold will be droped in |
| 266 | * hb_up or hb_down. it might be perpetuated by con_err until hb_down. if | 266 | * hb_up or hb_down. it might be perpetuated by con_err until hb_down. if |
| 267 | * it's already heartbeating we we might be dropping a hold that conn_up got. | 267 | * it's already heartbeating we might be dropping a hold that conn_up got. |
| 268 | * */ | 268 | * */ |
| 269 | void o2quo_conn_up(u8 node) | 269 | void o2quo_conn_up(u8 node) |
| 270 | { | 270 | { |
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index aa88bd8bcedc..d644dc611425 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c | |||
| @@ -406,6 +406,9 @@ static void sc_kref_release(struct kref *kref) | |||
| 406 | sc->sc_node = NULL; | 406 | sc->sc_node = NULL; |
| 407 | 407 | ||
| 408 | o2net_debug_del_sc(sc); | 408 | o2net_debug_del_sc(sc); |
| 409 | |||
| 410 | if (sc->sc_page) | ||
| 411 | __free_page(sc->sc_page); | ||
| 409 | kfree(sc); | 412 | kfree(sc); |
| 410 | } | 413 | } |
| 411 | 414 | ||
| @@ -630,19 +633,19 @@ static void o2net_state_change(struct sock *sk) | |||
| 630 | state_change = sc->sc_state_change; | 633 | state_change = sc->sc_state_change; |
| 631 | 634 | ||
| 632 | switch(sk->sk_state) { | 635 | switch(sk->sk_state) { |
| 633 | /* ignore connecting sockets as they make progress */ | 636 | /* ignore connecting sockets as they make progress */ |
| 634 | case TCP_SYN_SENT: | 637 | case TCP_SYN_SENT: |
| 635 | case TCP_SYN_RECV: | 638 | case TCP_SYN_RECV: |
| 636 | break; | 639 | break; |
| 637 | case TCP_ESTABLISHED: | 640 | case TCP_ESTABLISHED: |
| 638 | o2net_sc_queue_work(sc, &sc->sc_connect_work); | 641 | o2net_sc_queue_work(sc, &sc->sc_connect_work); |
| 639 | break; | 642 | break; |
| 640 | default: | 643 | default: |
| 641 | printk(KERN_INFO "o2net: Connection to " SC_NODEF_FMT | 644 | printk(KERN_INFO "o2net: Connection to " SC_NODEF_FMT |
| 642 | " shutdown, state %d\n", | 645 | " shutdown, state %d\n", |
| 643 | SC_NODEF_ARGS(sc), sk->sk_state); | 646 | SC_NODEF_ARGS(sc), sk->sk_state); |
| 644 | o2net_sc_queue_work(sc, &sc->sc_shutdown_work); | 647 | o2net_sc_queue_work(sc, &sc->sc_shutdown_work); |
| 645 | break; | 648 | break; |
| 646 | } | 649 | } |
| 647 | out: | 650 | out: |
| 648 | read_unlock(&sk->sk_callback_lock); | 651 | read_unlock(&sk->sk_callback_lock); |
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c index 975810b98492..47e67c2d228f 100644 --- a/fs/ocfs2/dlm/dlmlock.c +++ b/fs/ocfs2/dlm/dlmlock.c | |||
| @@ -178,6 +178,7 @@ static enum dlm_status dlmlock_master(struct dlm_ctxt *dlm, | |||
| 178 | lock->ml.node); | 178 | lock->ml.node); |
| 179 | } | 179 | } |
| 180 | } else { | 180 | } else { |
| 181 | status = DLM_NORMAL; | ||
| 181 | dlm_lock_get(lock); | 182 | dlm_lock_get(lock); |
| 182 | list_add_tail(&lock->list, &res->blocked); | 183 | list_add_tail(&lock->list, &res->blocked); |
| 183 | kick_thread = 1; | 184 | kick_thread = 1; |
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index e68588e6b1e8..773bd32bfd8c 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c | |||
| @@ -55,9 +55,6 @@ | |||
| 55 | static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node); | 55 | static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node); |
| 56 | 56 | ||
| 57 | static int dlm_recovery_thread(void *data); | 57 | static int dlm_recovery_thread(void *data); |
| 58 | void dlm_complete_recovery_thread(struct dlm_ctxt *dlm); | ||
| 59 | int dlm_launch_recovery_thread(struct dlm_ctxt *dlm); | ||
| 60 | void dlm_kick_recovery_thread(struct dlm_ctxt *dlm); | ||
| 61 | static int dlm_do_recovery(struct dlm_ctxt *dlm); | 58 | static int dlm_do_recovery(struct dlm_ctxt *dlm); |
| 62 | 59 | ||
| 63 | static int dlm_pick_recovery_master(struct dlm_ctxt *dlm); | 60 | static int dlm_pick_recovery_master(struct dlm_ctxt *dlm); |
| @@ -789,7 +786,7 @@ static int dlm_request_all_locks(struct dlm_ctxt *dlm, u8 request_from, | |||
| 789 | u8 dead_node) | 786 | u8 dead_node) |
| 790 | { | 787 | { |
| 791 | struct dlm_lock_request lr; | 788 | struct dlm_lock_request lr; |
| 792 | enum dlm_status ret; | 789 | int ret; |
| 793 | 790 | ||
| 794 | mlog(0, "\n"); | 791 | mlog(0, "\n"); |
| 795 | 792 | ||
| @@ -802,7 +799,6 @@ static int dlm_request_all_locks(struct dlm_ctxt *dlm, u8 request_from, | |||
| 802 | lr.dead_node = dead_node; | 799 | lr.dead_node = dead_node; |
| 803 | 800 | ||
| 804 | // send message | 801 | // send message |
| 805 | ret = DLM_NOLOCKMGR; | ||
| 806 | ret = o2net_send_message(DLM_LOCK_REQUEST_MSG, dlm->key, | 802 | ret = o2net_send_message(DLM_LOCK_REQUEST_MSG, dlm->key, |
| 807 | &lr, sizeof(lr), request_from, NULL); | 803 | &lr, sizeof(lr), request_from, NULL); |
| 808 | 804 | ||
| @@ -2696,6 +2692,7 @@ int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data, | |||
| 2696 | dlm->name, br->node_idx, br->dead_node, | 2692 | dlm->name, br->node_idx, br->dead_node, |
| 2697 | dlm->reco.dead_node, dlm->reco.new_master); | 2693 | dlm->reco.dead_node, dlm->reco.new_master); |
| 2698 | spin_unlock(&dlm->spinlock); | 2694 | spin_unlock(&dlm->spinlock); |
| 2695 | dlm_put(dlm); | ||
| 2699 | return -EAGAIN; | 2696 | return -EAGAIN; |
| 2700 | } | 2697 | } |
| 2701 | spin_unlock(&dlm->spinlock); | 2698 | spin_unlock(&dlm->spinlock); |
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 8a38714f1d92..41000f223ca4 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
| @@ -2646,17 +2646,7 @@ static loff_t ocfs2_file_llseek(struct file *file, loff_t offset, int whence) | |||
| 2646 | goto out; | 2646 | goto out; |
| 2647 | } | 2647 | } |
| 2648 | 2648 | ||
| 2649 | if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET)) | 2649 | offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes); |
| 2650 | ret = -EINVAL; | ||
| 2651 | if (!ret && offset > inode->i_sb->s_maxbytes) | ||
| 2652 | ret = -EINVAL; | ||
| 2653 | if (ret) | ||
| 2654 | goto out; | ||
| 2655 | |||
| 2656 | if (offset != file->f_pos) { | ||
| 2657 | file->f_pos = offset; | ||
| 2658 | file->f_version = 0; | ||
| 2659 | } | ||
| 2660 | 2650 | ||
| 2661 | out: | 2651 | out: |
| 2662 | mutex_unlock(&inode->i_mutex); | 2652 | mutex_unlock(&inode->i_mutex); |
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index a3385b63ff5e..96f9ac237e86 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h | |||
| @@ -200,7 +200,6 @@ void ocfs2_complete_quota_recovery(struct ocfs2_super *osb); | |||
| 200 | 200 | ||
| 201 | static inline void ocfs2_start_checkpoint(struct ocfs2_super *osb) | 201 | static inline void ocfs2_start_checkpoint(struct ocfs2_super *osb) |
| 202 | { | 202 | { |
| 203 | atomic_set(&osb->needs_checkpoint, 1); | ||
| 204 | wake_up(&osb->checkpoint_event); | 203 | wake_up(&osb->checkpoint_event); |
| 205 | } | 204 | } |
| 206 | 205 | ||
| @@ -538,7 +537,7 @@ static inline int ocfs2_calc_extend_credits(struct super_block *sb, | |||
| 538 | extent_blocks = 1 + 1 + le16_to_cpu(root_el->l_tree_depth); | 537 | extent_blocks = 1 + 1 + le16_to_cpu(root_el->l_tree_depth); |
| 539 | 538 | ||
| 540 | return bitmap_blocks + sysfile_bitmap_blocks + extent_blocks + | 539 | return bitmap_blocks + sysfile_bitmap_blocks + extent_blocks + |
| 541 | ocfs2_quota_trans_credits(sb); | 540 | ocfs2_quota_trans_credits(sb) + bits_wanted; |
| 542 | } | 541 | } |
| 543 | 542 | ||
| 544 | static inline int ocfs2_calc_symlink_credits(struct super_block *sb) | 543 | static inline int ocfs2_calc_symlink_credits(struct super_block *sb) |
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index b4a5cdf9dbc5..be3f8676a438 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c | |||
| @@ -522,7 +522,7 @@ static int __ocfs2_mknod_locked(struct inode *dir, | |||
| 522 | 522 | ||
| 523 | fe->i_last_eb_blk = 0; | 523 | fe->i_last_eb_blk = 0; |
| 524 | strcpy(fe->i_signature, OCFS2_INODE_SIGNATURE); | 524 | strcpy(fe->i_signature, OCFS2_INODE_SIGNATURE); |
| 525 | le32_add_cpu(&fe->i_flags, OCFS2_VALID_FL); | 525 | fe->i_flags |= cpu_to_le32(OCFS2_VALID_FL); |
| 526 | fe->i_atime = fe->i_ctime = fe->i_mtime = | 526 | fe->i_atime = fe->i_ctime = fe->i_mtime = |
| 527 | cpu_to_le64(CURRENT_TIME.tv_sec); | 527 | cpu_to_le64(CURRENT_TIME.tv_sec); |
| 528 | fe->i_mtime_nsec = fe->i_ctime_nsec = fe->i_atime_nsec = | 528 | fe->i_mtime_nsec = fe->i_ctime_nsec = fe->i_atime_nsec = |
| @@ -773,7 +773,7 @@ static int ocfs2_remote_dentry_delete(struct dentry *dentry) | |||
| 773 | return ret; | 773 | return ret; |
| 774 | } | 774 | } |
| 775 | 775 | ||
| 776 | static inline int inode_is_unlinkable(struct inode *inode) | 776 | static inline int ocfs2_inode_is_unlinkable(struct inode *inode) |
| 777 | { | 777 | { |
| 778 | if (S_ISDIR(inode->i_mode)) { | 778 | if (S_ISDIR(inode->i_mode)) { |
| 779 | if (inode->i_nlink == 2) | 779 | if (inode->i_nlink == 2) |
| @@ -791,6 +791,7 @@ static int ocfs2_unlink(struct inode *dir, | |||
| 791 | { | 791 | { |
| 792 | int status; | 792 | int status; |
| 793 | int child_locked = 0; | 793 | int child_locked = 0; |
| 794 | bool is_unlinkable = false; | ||
| 794 | struct inode *inode = dentry->d_inode; | 795 | struct inode *inode = dentry->d_inode; |
| 795 | struct inode *orphan_dir = NULL; | 796 | struct inode *orphan_dir = NULL; |
| 796 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); | 797 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); |
| @@ -865,7 +866,7 @@ static int ocfs2_unlink(struct inode *dir, | |||
| 865 | goto leave; | 866 | goto leave; |
| 866 | } | 867 | } |
| 867 | 868 | ||
| 868 | if (inode_is_unlinkable(inode)) { | 869 | if (ocfs2_inode_is_unlinkable(inode)) { |
| 869 | status = ocfs2_prepare_orphan_dir(osb, &orphan_dir, | 870 | status = ocfs2_prepare_orphan_dir(osb, &orphan_dir, |
| 870 | OCFS2_I(inode)->ip_blkno, | 871 | OCFS2_I(inode)->ip_blkno, |
| 871 | orphan_name, &orphan_insert); | 872 | orphan_name, &orphan_insert); |
| @@ -873,6 +874,7 @@ static int ocfs2_unlink(struct inode *dir, | |||
| 873 | mlog_errno(status); | 874 | mlog_errno(status); |
| 874 | goto leave; | 875 | goto leave; |
| 875 | } | 876 | } |
| 877 | is_unlinkable = true; | ||
| 876 | } | 878 | } |
| 877 | 879 | ||
| 878 | handle = ocfs2_start_trans(osb, ocfs2_unlink_credits(osb->sb)); | 880 | handle = ocfs2_start_trans(osb, ocfs2_unlink_credits(osb->sb)); |
| @@ -892,15 +894,6 @@ static int ocfs2_unlink(struct inode *dir, | |||
| 892 | 894 | ||
| 893 | fe = (struct ocfs2_dinode *) fe_bh->b_data; | 895 | fe = (struct ocfs2_dinode *) fe_bh->b_data; |
| 894 | 896 | ||
| 895 | if (inode_is_unlinkable(inode)) { | ||
| 896 | status = ocfs2_orphan_add(osb, handle, inode, fe_bh, orphan_name, | ||
| 897 | &orphan_insert, orphan_dir); | ||
| 898 | if (status < 0) { | ||
| 899 | mlog_errno(status); | ||
| 900 | goto leave; | ||
| 901 | } | ||
| 902 | } | ||
| 903 | |||
| 904 | /* delete the name from the parent dir */ | 897 | /* delete the name from the parent dir */ |
| 905 | status = ocfs2_delete_entry(handle, dir, &lookup); | 898 | status = ocfs2_delete_entry(handle, dir, &lookup); |
| 906 | if (status < 0) { | 899 | if (status < 0) { |
| @@ -923,6 +916,14 @@ static int ocfs2_unlink(struct inode *dir, | |||
| 923 | mlog_errno(status); | 916 | mlog_errno(status); |
| 924 | if (S_ISDIR(inode->i_mode)) | 917 | if (S_ISDIR(inode->i_mode)) |
| 925 | inc_nlink(dir); | 918 | inc_nlink(dir); |
| 919 | goto leave; | ||
| 920 | } | ||
| 921 | |||
| 922 | if (is_unlinkable) { | ||
| 923 | status = ocfs2_orphan_add(osb, handle, inode, fe_bh, | ||
| 924 | orphan_name, &orphan_insert, orphan_dir); | ||
| 925 | if (status < 0) | ||
| 926 | mlog_errno(status); | ||
| 926 | } | 927 | } |
| 927 | 928 | ||
| 928 | leave: | 929 | leave: |
| @@ -2012,6 +2013,21 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb, | |||
| 2012 | goto leave; | 2013 | goto leave; |
| 2013 | } | 2014 | } |
| 2014 | 2015 | ||
| 2016 | /* | ||
| 2017 | * We're going to journal the change of i_flags and i_orphaned_slot. | ||
| 2018 | * It's safe anyway, though some callers may duplicate the journaling. | ||
| 2019 | * Journaling within the func just make the logic look more | ||
| 2020 | * straightforward. | ||
| 2021 | */ | ||
| 2022 | status = ocfs2_journal_access_di(handle, | ||
| 2023 | INODE_CACHE(inode), | ||
| 2024 | fe_bh, | ||
| 2025 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 2026 | if (status < 0) { | ||
| 2027 | mlog_errno(status); | ||
| 2028 | goto leave; | ||
| 2029 | } | ||
| 2030 | |||
| 2015 | /* we're a cluster, and nlink can change on disk from | 2031 | /* we're a cluster, and nlink can change on disk from |
| 2016 | * underneath us... */ | 2032 | * underneath us... */ |
| 2017 | orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data; | 2033 | orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data; |
| @@ -2026,25 +2042,10 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb, | |||
| 2026 | orphan_dir_bh, lookup); | 2042 | orphan_dir_bh, lookup); |
| 2027 | if (status < 0) { | 2043 | if (status < 0) { |
| 2028 | mlog_errno(status); | 2044 | mlog_errno(status); |
| 2029 | goto leave; | 2045 | goto rollback; |
| 2030 | } | ||
| 2031 | |||
| 2032 | /* | ||
| 2033 | * We're going to journal the change of i_flags and i_orphaned_slot. | ||
| 2034 | * It's safe anyway, though some callers may duplicate the journaling. | ||
| 2035 | * Journaling within the func just make the logic look more | ||
| 2036 | * straightforward. | ||
| 2037 | */ | ||
| 2038 | status = ocfs2_journal_access_di(handle, | ||
| 2039 | INODE_CACHE(inode), | ||
| 2040 | fe_bh, | ||
| 2041 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 2042 | if (status < 0) { | ||
| 2043 | mlog_errno(status); | ||
| 2044 | goto leave; | ||
| 2045 | } | 2046 | } |
| 2046 | 2047 | ||
| 2047 | le32_add_cpu(&fe->i_flags, OCFS2_ORPHANED_FL); | 2048 | fe->i_flags |= cpu_to_le32(OCFS2_ORPHANED_FL); |
| 2048 | OCFS2_I(inode)->ip_flags &= ~OCFS2_INODE_SKIP_ORPHAN_DIR; | 2049 | OCFS2_I(inode)->ip_flags &= ~OCFS2_INODE_SKIP_ORPHAN_DIR; |
| 2049 | 2050 | ||
| 2050 | /* Record which orphan dir our inode now resides | 2051 | /* Record which orphan dir our inode now resides |
| @@ -2057,11 +2058,16 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb, | |||
| 2057 | trace_ocfs2_orphan_add_end((unsigned long long)OCFS2_I(inode)->ip_blkno, | 2058 | trace_ocfs2_orphan_add_end((unsigned long long)OCFS2_I(inode)->ip_blkno, |
| 2058 | osb->slot_num); | 2059 | osb->slot_num); |
| 2059 | 2060 | ||
| 2061 | rollback: | ||
| 2062 | if (status < 0) { | ||
| 2063 | if (S_ISDIR(inode->i_mode)) | ||
| 2064 | ocfs2_add_links_count(orphan_fe, -1); | ||
| 2065 | set_nlink(orphan_dir_inode, ocfs2_read_links_count(orphan_fe)); | ||
| 2066 | } | ||
| 2067 | |||
| 2060 | leave: | 2068 | leave: |
| 2061 | brelse(orphan_dir_bh); | 2069 | brelse(orphan_dir_bh); |
| 2062 | 2070 | ||
| 2063 | if (status) | ||
| 2064 | mlog_errno(status); | ||
| 2065 | return status; | 2071 | return status; |
| 2066 | } | 2072 | } |
| 2067 | 2073 | ||
| @@ -2434,7 +2440,7 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir, | |||
| 2434 | } | 2440 | } |
| 2435 | 2441 | ||
| 2436 | di = (struct ocfs2_dinode *)di_bh->b_data; | 2442 | di = (struct ocfs2_dinode *)di_bh->b_data; |
| 2437 | le32_add_cpu(&di->i_flags, -OCFS2_ORPHANED_FL); | 2443 | di->i_flags &= ~cpu_to_le32(OCFS2_ORPHANED_FL); |
| 2438 | di->i_orphaned_slot = 0; | 2444 | di->i_orphaned_slot = 0; |
| 2439 | set_nlink(inode, 1); | 2445 | set_nlink(inode, 1); |
| 2440 | ocfs2_set_links_count(di, inode->i_nlink); | 2446 | ocfs2_set_links_count(di, inode->i_nlink); |
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index d355e6e36b36..3a903470c794 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h | |||
| @@ -347,7 +347,6 @@ struct ocfs2_super | |||
| 347 | struct task_struct *recovery_thread_task; | 347 | struct task_struct *recovery_thread_task; |
| 348 | int disable_recovery; | 348 | int disable_recovery; |
| 349 | wait_queue_head_t checkpoint_event; | 349 | wait_queue_head_t checkpoint_event; |
| 350 | atomic_t needs_checkpoint; | ||
| 351 | struct ocfs2_journal *journal; | 350 | struct ocfs2_journal *journal; |
| 352 | unsigned long osb_commit_interval; | 351 | unsigned long osb_commit_interval; |
| 353 | 352 | ||
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index b7e74b580c0f..5397c07ce608 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c | |||
| @@ -1422,7 +1422,7 @@ static int ocfs2_relink_block_group(handle_t *handle, | |||
| 1422 | int status; | 1422 | int status; |
| 1423 | /* there is a really tiny chance the journal calls could fail, | 1423 | /* there is a really tiny chance the journal calls could fail, |
| 1424 | * but we wouldn't want inconsistent blocks in *any* case. */ | 1424 | * but we wouldn't want inconsistent blocks in *any* case. */ |
| 1425 | u64 fe_ptr, bg_ptr, prev_bg_ptr; | 1425 | u64 bg_ptr, prev_bg_ptr; |
| 1426 | struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data; | 1426 | struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data; |
| 1427 | struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; | 1427 | struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; |
| 1428 | struct ocfs2_group_desc *prev_bg = (struct ocfs2_group_desc *) prev_bg_bh->b_data; | 1428 | struct ocfs2_group_desc *prev_bg = (struct ocfs2_group_desc *) prev_bg_bh->b_data; |
| @@ -1437,51 +1437,44 @@ static int ocfs2_relink_block_group(handle_t *handle, | |||
| 1437 | (unsigned long long)le64_to_cpu(bg->bg_blkno), | 1437 | (unsigned long long)le64_to_cpu(bg->bg_blkno), |
| 1438 | (unsigned long long)le64_to_cpu(prev_bg->bg_blkno)); | 1438 | (unsigned long long)le64_to_cpu(prev_bg->bg_blkno)); |
| 1439 | 1439 | ||
| 1440 | fe_ptr = le64_to_cpu(fe->id2.i_chain.cl_recs[chain].c_blkno); | ||
| 1441 | bg_ptr = le64_to_cpu(bg->bg_next_group); | 1440 | bg_ptr = le64_to_cpu(bg->bg_next_group); |
| 1442 | prev_bg_ptr = le64_to_cpu(prev_bg->bg_next_group); | 1441 | prev_bg_ptr = le64_to_cpu(prev_bg->bg_next_group); |
| 1443 | 1442 | ||
| 1444 | status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode), | 1443 | status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode), |
| 1445 | prev_bg_bh, | 1444 | prev_bg_bh, |
| 1446 | OCFS2_JOURNAL_ACCESS_WRITE); | 1445 | OCFS2_JOURNAL_ACCESS_WRITE); |
| 1447 | if (status < 0) { | 1446 | if (status < 0) |
| 1448 | mlog_errno(status); | 1447 | goto out; |
| 1449 | goto out_rollback; | ||
| 1450 | } | ||
| 1451 | 1448 | ||
| 1452 | prev_bg->bg_next_group = bg->bg_next_group; | 1449 | prev_bg->bg_next_group = bg->bg_next_group; |
| 1453 | ocfs2_journal_dirty(handle, prev_bg_bh); | 1450 | ocfs2_journal_dirty(handle, prev_bg_bh); |
| 1454 | 1451 | ||
| 1455 | status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode), | 1452 | status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode), |
| 1456 | bg_bh, OCFS2_JOURNAL_ACCESS_WRITE); | 1453 | bg_bh, OCFS2_JOURNAL_ACCESS_WRITE); |
| 1457 | if (status < 0) { | 1454 | if (status < 0) |
| 1458 | mlog_errno(status); | 1455 | goto out_rollback_prev_bg; |
| 1459 | goto out_rollback; | ||
| 1460 | } | ||
| 1461 | 1456 | ||
| 1462 | bg->bg_next_group = fe->id2.i_chain.cl_recs[chain].c_blkno; | 1457 | bg->bg_next_group = fe->id2.i_chain.cl_recs[chain].c_blkno; |
| 1463 | ocfs2_journal_dirty(handle, bg_bh); | 1458 | ocfs2_journal_dirty(handle, bg_bh); |
| 1464 | 1459 | ||
| 1465 | status = ocfs2_journal_access_di(handle, INODE_CACHE(alloc_inode), | 1460 | status = ocfs2_journal_access_di(handle, INODE_CACHE(alloc_inode), |
| 1466 | fe_bh, OCFS2_JOURNAL_ACCESS_WRITE); | 1461 | fe_bh, OCFS2_JOURNAL_ACCESS_WRITE); |
| 1467 | if (status < 0) { | 1462 | if (status < 0) |
| 1468 | mlog_errno(status); | 1463 | goto out_rollback_bg; |
| 1469 | goto out_rollback; | ||
| 1470 | } | ||
| 1471 | 1464 | ||
| 1472 | fe->id2.i_chain.cl_recs[chain].c_blkno = bg->bg_blkno; | 1465 | fe->id2.i_chain.cl_recs[chain].c_blkno = bg->bg_blkno; |
| 1473 | ocfs2_journal_dirty(handle, fe_bh); | 1466 | ocfs2_journal_dirty(handle, fe_bh); |
| 1474 | 1467 | ||
| 1475 | out_rollback: | 1468 | out: |
| 1476 | if (status < 0) { | 1469 | if (status < 0) |
| 1477 | fe->id2.i_chain.cl_recs[chain].c_blkno = cpu_to_le64(fe_ptr); | ||
| 1478 | bg->bg_next_group = cpu_to_le64(bg_ptr); | ||
| 1479 | prev_bg->bg_next_group = cpu_to_le64(prev_bg_ptr); | ||
| 1480 | } | ||
| 1481 | |||
| 1482 | if (status) | ||
| 1483 | mlog_errno(status); | 1470 | mlog_errno(status); |
| 1484 | return status; | 1471 | return status; |
| 1472 | |||
| 1473 | out_rollback_bg: | ||
| 1474 | bg->bg_next_group = cpu_to_le64(bg_ptr); | ||
| 1475 | out_rollback_prev_bg: | ||
| 1476 | prev_bg->bg_next_group = cpu_to_le64(prev_bg_ptr); | ||
| 1477 | goto out; | ||
| 1485 | } | 1478 | } |
| 1486 | 1479 | ||
| 1487 | static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg, | 1480 | static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg, |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 01b85165552b..854d80955bf8 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
| @@ -286,10 +286,9 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len) | |||
| 286 | spin_unlock(&osb->osb_lock); | 286 | spin_unlock(&osb->osb_lock); |
| 287 | 287 | ||
| 288 | out += snprintf(buf + out, len - out, | 288 | out += snprintf(buf + out, len - out, |
| 289 | "%10s => Pid: %d Interval: %lu Needs: %d\n", "Commit", | 289 | "%10s => Pid: %d Interval: %lu\n", "Commit", |
| 290 | (osb->commit_task ? task_pid_nr(osb->commit_task) : -1), | 290 | (osb->commit_task ? task_pid_nr(osb->commit_task) : -1), |
| 291 | osb->osb_commit_interval, | 291 | osb->osb_commit_interval); |
| 292 | atomic_read(&osb->needs_checkpoint)); | ||
| 293 | 292 | ||
| 294 | out += snprintf(buf + out, len - out, | 293 | out += snprintf(buf + out, len - out, |
| 295 | "%10s => State: %d TxnId: %lu NumTxns: %d\n", | 294 | "%10s => State: %d TxnId: %lu NumTxns: %d\n", |
| @@ -2154,7 +2153,6 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
| 2154 | } | 2153 | } |
| 2155 | 2154 | ||
| 2156 | init_waitqueue_head(&osb->checkpoint_event); | 2155 | init_waitqueue_head(&osb->checkpoint_event); |
| 2157 | atomic_set(&osb->needs_checkpoint, 0); | ||
| 2158 | 2156 | ||
| 2159 | osb->s_atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM; | 2157 | osb->s_atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM; |
| 2160 | 2158 | ||
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 2e3ea308c144..317ef0abccbb 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c | |||
| @@ -2751,7 +2751,6 @@ static int ocfs2_xattr_ibody_set(struct inode *inode, | |||
| 2751 | { | 2751 | { |
| 2752 | int ret; | 2752 | int ret; |
| 2753 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 2753 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
| 2754 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; | ||
| 2755 | struct ocfs2_xa_loc loc; | 2754 | struct ocfs2_xa_loc loc; |
| 2756 | 2755 | ||
| 2757 | if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) | 2756 | if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) |
| @@ -2759,13 +2758,6 @@ static int ocfs2_xattr_ibody_set(struct inode *inode, | |||
| 2759 | 2758 | ||
| 2760 | down_write(&oi->ip_alloc_sem); | 2759 | down_write(&oi->ip_alloc_sem); |
| 2761 | if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) { | 2760 | if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) { |
| 2762 | if (!ocfs2_xattr_has_space_inline(inode, di)) { | ||
| 2763 | ret = -ENOSPC; | ||
| 2764 | goto out; | ||
| 2765 | } | ||
| 2766 | } | ||
| 2767 | |||
| 2768 | if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) { | ||
| 2769 | ret = ocfs2_xattr_ibody_init(inode, xs->inode_bh, ctxt); | 2761 | ret = ocfs2_xattr_ibody_init(inode, xs->inode_bh, ctxt); |
| 2770 | if (ret) { | 2762 | if (ret) { |
| 2771 | if (ret != -ENOSPC) | 2763 | if (ret != -ENOSPC) |
| @@ -6499,6 +6491,16 @@ static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args) | |||
| 6499 | } | 6491 | } |
| 6500 | 6492 | ||
| 6501 | new_oi = OCFS2_I(args->new_inode); | 6493 | new_oi = OCFS2_I(args->new_inode); |
| 6494 | /* | ||
| 6495 | * Adjust extent record count to reserve space for extended attribute. | ||
| 6496 | * Inline data count had been adjusted in ocfs2_duplicate_inline_data(). | ||
| 6497 | */ | ||
| 6498 | if (!(new_oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) && | ||
| 6499 | !(ocfs2_inode_is_fast_symlink(args->new_inode))) { | ||
| 6500 | struct ocfs2_extent_list *el = &new_di->id2.i_list; | ||
| 6501 | le16_add_cpu(&el->l_count, -(inline_size / | ||
| 6502 | sizeof(struct ocfs2_extent_rec))); | ||
| 6503 | } | ||
| 6502 | spin_lock(&new_oi->ip_lock); | 6504 | spin_lock(&new_oi->ip_lock); |
| 6503 | new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL; | 6505 | new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL; |
| 6504 | new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features); | 6506 | new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features); |
| @@ -840,11 +840,15 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o | |||
| 840 | if (flags & __O_SYNC) | 840 | if (flags & __O_SYNC) |
| 841 | flags |= O_DSYNC; | 841 | flags |= O_DSYNC; |
| 842 | 842 | ||
| 843 | /* | 843 | if (flags & O_TMPFILE) { |
| 844 | * If we have O_PATH in the open flag. Then we | 844 | if (!(flags & O_CREAT)) |
| 845 | * cannot have anything other than the below set of flags | 845 | return -EINVAL; |
| 846 | */ | 846 | acc_mode = MAY_OPEN | ACC_MODE(flags); |
| 847 | if (flags & O_PATH) { | 847 | } else if (flags & O_PATH) { |
| 848 | /* | ||
| 849 | * If we have O_PATH in the open flag. Then we | ||
| 850 | * cannot have anything other than the below set of flags | ||
| 851 | */ | ||
| 848 | flags &= O_DIRECTORY | O_NOFOLLOW | O_PATH; | 852 | flags &= O_DIRECTORY | O_NOFOLLOW | O_PATH; |
| 849 | acc_mode = 0; | 853 | acc_mode = 0; |
| 850 | } else { | 854 | } else { |
| @@ -876,7 +880,8 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o | |||
| 876 | lookup_flags |= LOOKUP_DIRECTORY; | 880 | lookup_flags |= LOOKUP_DIRECTORY; |
| 877 | if (!(flags & O_NOFOLLOW)) | 881 | if (!(flags & O_NOFOLLOW)) |
| 878 | lookup_flags |= LOOKUP_FOLLOW; | 882 | lookup_flags |= LOOKUP_FOLLOW; |
| 879 | return lookup_flags; | 883 | op->lookup_flags = lookup_flags; |
| 884 | return 0; | ||
| 880 | } | 885 | } |
| 881 | 886 | ||
| 882 | /** | 887 | /** |
| @@ -893,8 +898,8 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o | |||
| 893 | struct file *file_open_name(struct filename *name, int flags, umode_t mode) | 898 | struct file *file_open_name(struct filename *name, int flags, umode_t mode) |
| 894 | { | 899 | { |
| 895 | struct open_flags op; | 900 | struct open_flags op; |
| 896 | int lookup = build_open_flags(flags, mode, &op); | 901 | int err = build_open_flags(flags, mode, &op); |
| 897 | return do_filp_open(AT_FDCWD, name, &op, lookup); | 902 | return err ? ERR_PTR(err) : do_filp_open(AT_FDCWD, name, &op); |
| 898 | } | 903 | } |
| 899 | 904 | ||
| 900 | /** | 905 | /** |
| @@ -919,37 +924,43 @@ struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt, | |||
| 919 | const char *filename, int flags) | 924 | const char *filename, int flags) |
| 920 | { | 925 | { |
| 921 | struct open_flags op; | 926 | struct open_flags op; |
| 922 | int lookup = build_open_flags(flags, 0, &op); | 927 | int err = build_open_flags(flags, 0, &op); |
| 928 | if (err) | ||
| 929 | return ERR_PTR(err); | ||
| 923 | if (flags & O_CREAT) | 930 | if (flags & O_CREAT) |
| 924 | return ERR_PTR(-EINVAL); | 931 | return ERR_PTR(-EINVAL); |
| 925 | if (!filename && (flags & O_DIRECTORY)) | 932 | if (!filename && (flags & O_DIRECTORY)) |
| 926 | if (!dentry->d_inode->i_op->lookup) | 933 | if (!dentry->d_inode->i_op->lookup) |
| 927 | return ERR_PTR(-ENOTDIR); | 934 | return ERR_PTR(-ENOTDIR); |
| 928 | return do_file_open_root(dentry, mnt, filename, &op, lookup); | 935 | return do_file_open_root(dentry, mnt, filename, &op); |
| 929 | } | 936 | } |
| 930 | EXPORT_SYMBOL(file_open_root); | 937 | EXPORT_SYMBOL(file_open_root); |
| 931 | 938 | ||
| 932 | long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode) | 939 | long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode) |
| 933 | { | 940 | { |
| 934 | struct open_flags op; | 941 | struct open_flags op; |
| 935 | int lookup = build_open_flags(flags, mode, &op); | 942 | int fd = build_open_flags(flags, mode, &op); |
| 936 | struct filename *tmp = getname(filename); | 943 | struct filename *tmp; |
| 937 | int fd = PTR_ERR(tmp); | 944 | |
| 938 | 945 | if (fd) | |
| 939 | if (!IS_ERR(tmp)) { | 946 | return fd; |
| 940 | fd = get_unused_fd_flags(flags); | 947 | |
| 941 | if (fd >= 0) { | 948 | tmp = getname(filename); |
| 942 | struct file *f = do_filp_open(dfd, tmp, &op, lookup); | 949 | if (IS_ERR(tmp)) |
| 943 | if (IS_ERR(f)) { | 950 | return PTR_ERR(tmp); |
| 944 | put_unused_fd(fd); | 951 | |
| 945 | fd = PTR_ERR(f); | 952 | fd = get_unused_fd_flags(flags); |
| 946 | } else { | 953 | if (fd >= 0) { |
| 947 | fsnotify_open(f); | 954 | struct file *f = do_filp_open(dfd, tmp, &op); |
| 948 | fd_install(fd, f); | 955 | if (IS_ERR(f)) { |
| 949 | } | 956 | put_unused_fd(fd); |
| 957 | fd = PTR_ERR(f); | ||
| 958 | } else { | ||
| 959 | fsnotify_open(f); | ||
| 960 | fd_install(fd, f); | ||
| 950 | } | 961 | } |
| 951 | putname(tmp); | ||
| 952 | } | 962 | } |
| 963 | putname(tmp); | ||
| 953 | return fd; | 964 | return fd; |
| 954 | } | 965 | } |
| 955 | 966 | ||
diff --git a/fs/proc/base.c b/fs/proc/base.c index 0016350ad95e..1485e38daaa3 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
| @@ -1686,41 +1686,29 @@ bool proc_fill_cache(struct file *file, struct dir_context *ctx, | |||
| 1686 | instantiate_t instantiate, struct task_struct *task, const void *ptr) | 1686 | instantiate_t instantiate, struct task_struct *task, const void *ptr) |
| 1687 | { | 1687 | { |
| 1688 | struct dentry *child, *dir = file->f_path.dentry; | 1688 | struct dentry *child, *dir = file->f_path.dentry; |
| 1689 | struct qstr qname = QSTR_INIT(name, len); | ||
| 1689 | struct inode *inode; | 1690 | struct inode *inode; |
| 1690 | struct qstr qname; | 1691 | unsigned type; |
| 1691 | ino_t ino = 0; | 1692 | ino_t ino; |
| 1692 | unsigned type = DT_UNKNOWN; | ||
| 1693 | 1693 | ||
| 1694 | qname.name = name; | 1694 | child = d_hash_and_lookup(dir, &qname); |
| 1695 | qname.len = len; | ||
| 1696 | qname.hash = full_name_hash(name, len); | ||
| 1697 | |||
| 1698 | child = d_lookup(dir, &qname); | ||
| 1699 | if (!child) { | 1695 | if (!child) { |
| 1700 | struct dentry *new; | 1696 | child = d_alloc(dir, &qname); |
| 1701 | new = d_alloc(dir, &qname); | 1697 | if (!child) |
| 1702 | if (new) { | 1698 | goto end_instantiate; |
| 1703 | child = instantiate(dir->d_inode, new, task, ptr); | 1699 | if (instantiate(dir->d_inode, child, task, ptr) < 0) { |
| 1704 | if (child) | 1700 | dput(child); |
| 1705 | dput(new); | 1701 | goto end_instantiate; |
| 1706 | else | ||
| 1707 | child = new; | ||
| 1708 | } | 1702 | } |
| 1709 | } | 1703 | } |
| 1710 | if (!child || IS_ERR(child) || !child->d_inode) | ||
| 1711 | goto end_instantiate; | ||
| 1712 | inode = child->d_inode; | 1704 | inode = child->d_inode; |
| 1713 | if (inode) { | 1705 | ino = inode->i_ino; |
| 1714 | ino = inode->i_ino; | 1706 | type = inode->i_mode >> 12; |
| 1715 | type = inode->i_mode >> 12; | ||
| 1716 | } | ||
| 1717 | dput(child); | 1707 | dput(child); |
| 1718 | end_instantiate: | ||
| 1719 | if (!ino) | ||
| 1720 | ino = find_inode_number(dir, &qname); | ||
| 1721 | if (!ino) | ||
| 1722 | ino = 1; | ||
| 1723 | return dir_emit(ctx, name, len, ino, type); | 1708 | return dir_emit(ctx, name, len, ino, type); |
| 1709 | |||
| 1710 | end_instantiate: | ||
| 1711 | return dir_emit(ctx, name, len, 1, DT_UNKNOWN); | ||
| 1724 | } | 1712 | } |
| 1725 | 1713 | ||
| 1726 | #ifdef CONFIG_CHECKPOINT_RESTORE | 1714 | #ifdef CONFIG_CHECKPOINT_RESTORE |
| @@ -1846,7 +1834,7 @@ struct map_files_info { | |||
| 1846 | unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */ | 1834 | unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */ |
| 1847 | }; | 1835 | }; |
| 1848 | 1836 | ||
| 1849 | static struct dentry * | 1837 | static int |
| 1850 | proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, | 1838 | proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, |
| 1851 | struct task_struct *task, const void *ptr) | 1839 | struct task_struct *task, const void *ptr) |
| 1852 | { | 1840 | { |
| @@ -1856,7 +1844,7 @@ proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, | |||
| 1856 | 1844 | ||
| 1857 | inode = proc_pid_make_inode(dir->i_sb, task); | 1845 | inode = proc_pid_make_inode(dir->i_sb, task); |
| 1858 | if (!inode) | 1846 | if (!inode) |
| 1859 | return ERR_PTR(-ENOENT); | 1847 | return -ENOENT; |
| 1860 | 1848 | ||
| 1861 | ei = PROC_I(inode); | 1849 | ei = PROC_I(inode); |
| 1862 | ei->op.proc_get_link = proc_map_files_get_link; | 1850 | ei->op.proc_get_link = proc_map_files_get_link; |
| @@ -1873,7 +1861,7 @@ proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, | |||
| 1873 | d_set_d_op(dentry, &tid_map_files_dentry_operations); | 1861 | d_set_d_op(dentry, &tid_map_files_dentry_operations); |
| 1874 | d_add(dentry, inode); | 1862 | d_add(dentry, inode); |
| 1875 | 1863 | ||
| 1876 | return NULL; | 1864 | return 0; |
| 1877 | } | 1865 | } |
| 1878 | 1866 | ||
| 1879 | static struct dentry *proc_map_files_lookup(struct inode *dir, | 1867 | static struct dentry *proc_map_files_lookup(struct inode *dir, |
| @@ -1882,23 +1870,23 @@ static struct dentry *proc_map_files_lookup(struct inode *dir, | |||
| 1882 | unsigned long vm_start, vm_end; | 1870 | unsigned long vm_start, vm_end; |
| 1883 | struct vm_area_struct *vma; | 1871 | struct vm_area_struct *vma; |
| 1884 | struct task_struct *task; | 1872 | struct task_struct *task; |
| 1885 | struct dentry *result; | 1873 | int result; |
| 1886 | struct mm_struct *mm; | 1874 | struct mm_struct *mm; |
| 1887 | 1875 | ||
| 1888 | result = ERR_PTR(-EPERM); | 1876 | result = -EPERM; |
| 1889 | if (!capable(CAP_SYS_ADMIN)) | 1877 | if (!capable(CAP_SYS_ADMIN)) |
| 1890 | goto out; | 1878 | goto out; |
| 1891 | 1879 | ||
| 1892 | result = ERR_PTR(-ENOENT); | 1880 | result = -ENOENT; |
| 1893 | task = get_proc_task(dir); | 1881 | task = get_proc_task(dir); |
| 1894 | if (!task) | 1882 | if (!task) |
| 1895 | goto out; | 1883 | goto out; |
| 1896 | 1884 | ||
| 1897 | result = ERR_PTR(-EACCES); | 1885 | result = -EACCES; |
| 1898 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) | 1886 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) |
| 1899 | goto out_put_task; | 1887 | goto out_put_task; |
| 1900 | 1888 | ||
| 1901 | result = ERR_PTR(-ENOENT); | 1889 | result = -ENOENT; |
| 1902 | if (dname_to_vma_addr(dentry, &vm_start, &vm_end)) | 1890 | if (dname_to_vma_addr(dentry, &vm_start, &vm_end)) |
| 1903 | goto out_put_task; | 1891 | goto out_put_task; |
| 1904 | 1892 | ||
| @@ -1921,7 +1909,7 @@ out_no_vma: | |||
| 1921 | out_put_task: | 1909 | out_put_task: |
| 1922 | put_task_struct(task); | 1910 | put_task_struct(task); |
| 1923 | out: | 1911 | out: |
| 1924 | return result; | 1912 | return ERR_PTR(result); |
| 1925 | } | 1913 | } |
| 1926 | 1914 | ||
| 1927 | static const struct inode_operations proc_map_files_inode_operations = { | 1915 | static const struct inode_operations proc_map_files_inode_operations = { |
| @@ -2135,13 +2123,12 @@ static const struct file_operations proc_timers_operations = { | |||
| 2135 | }; | 2123 | }; |
| 2136 | #endif /* CONFIG_CHECKPOINT_RESTORE */ | 2124 | #endif /* CONFIG_CHECKPOINT_RESTORE */ |
| 2137 | 2125 | ||
| 2138 | static struct dentry *proc_pident_instantiate(struct inode *dir, | 2126 | static int proc_pident_instantiate(struct inode *dir, |
| 2139 | struct dentry *dentry, struct task_struct *task, const void *ptr) | 2127 | struct dentry *dentry, struct task_struct *task, const void *ptr) |
| 2140 | { | 2128 | { |
| 2141 | const struct pid_entry *p = ptr; | 2129 | const struct pid_entry *p = ptr; |
| 2142 | struct inode *inode; | 2130 | struct inode *inode; |
| 2143 | struct proc_inode *ei; | 2131 | struct proc_inode *ei; |
| 2144 | struct dentry *error = ERR_PTR(-ENOENT); | ||
| 2145 | 2132 | ||
| 2146 | inode = proc_pid_make_inode(dir->i_sb, task); | 2133 | inode = proc_pid_make_inode(dir->i_sb, task); |
| 2147 | if (!inode) | 2134 | if (!inode) |
| @@ -2160,9 +2147,9 @@ static struct dentry *proc_pident_instantiate(struct inode *dir, | |||
| 2160 | d_add(dentry, inode); | 2147 | d_add(dentry, inode); |
| 2161 | /* Close the race of the process dying before we return the dentry */ | 2148 | /* Close the race of the process dying before we return the dentry */ |
| 2162 | if (pid_revalidate(dentry, 0)) | 2149 | if (pid_revalidate(dentry, 0)) |
| 2163 | error = NULL; | 2150 | return 0; |
| 2164 | out: | 2151 | out: |
| 2165 | return error; | 2152 | return -ENOENT; |
| 2166 | } | 2153 | } |
| 2167 | 2154 | ||
| 2168 | static struct dentry *proc_pident_lookup(struct inode *dir, | 2155 | static struct dentry *proc_pident_lookup(struct inode *dir, |
| @@ -2170,11 +2157,11 @@ static struct dentry *proc_pident_lookup(struct inode *dir, | |||
| 2170 | const struct pid_entry *ents, | 2157 | const struct pid_entry *ents, |
| 2171 | unsigned int nents) | 2158 | unsigned int nents) |
| 2172 | { | 2159 | { |
| 2173 | struct dentry *error; | 2160 | int error; |
| 2174 | struct task_struct *task = get_proc_task(dir); | 2161 | struct task_struct *task = get_proc_task(dir); |
| 2175 | const struct pid_entry *p, *last; | 2162 | const struct pid_entry *p, *last; |
| 2176 | 2163 | ||
| 2177 | error = ERR_PTR(-ENOENT); | 2164 | error = -ENOENT; |
| 2178 | 2165 | ||
| 2179 | if (!task) | 2166 | if (!task) |
| 2180 | goto out_no_task; | 2167 | goto out_no_task; |
| @@ -2197,7 +2184,7 @@ static struct dentry *proc_pident_lookup(struct inode *dir, | |||
| 2197 | out: | 2184 | out: |
| 2198 | put_task_struct(task); | 2185 | put_task_struct(task); |
| 2199 | out_no_task: | 2186 | out_no_task: |
| 2200 | return error; | 2187 | return ERR_PTR(error); |
| 2201 | } | 2188 | } |
| 2202 | 2189 | ||
| 2203 | static int proc_pident_readdir(struct file *file, struct dir_context *ctx, | 2190 | static int proc_pident_readdir(struct file *file, struct dir_context *ctx, |
| @@ -2780,11 +2767,10 @@ void proc_flush_task(struct task_struct *task) | |||
| 2780 | } | 2767 | } |
| 2781 | } | 2768 | } |
| 2782 | 2769 | ||
| 2783 | static struct dentry *proc_pid_instantiate(struct inode *dir, | 2770 | static int proc_pid_instantiate(struct inode *dir, |
| 2784 | struct dentry * dentry, | 2771 | struct dentry * dentry, |
| 2785 | struct task_struct *task, const void *ptr) | 2772 | struct task_struct *task, const void *ptr) |
| 2786 | { | 2773 | { |
| 2787 | struct dentry *error = ERR_PTR(-ENOENT); | ||
| 2788 | struct inode *inode; | 2774 | struct inode *inode; |
| 2789 | 2775 | ||
| 2790 | inode = proc_pid_make_inode(dir->i_sb, task); | 2776 | inode = proc_pid_make_inode(dir->i_sb, task); |
| @@ -2804,14 +2790,14 @@ static struct dentry *proc_pid_instantiate(struct inode *dir, | |||
| 2804 | d_add(dentry, inode); | 2790 | d_add(dentry, inode); |
| 2805 | /* Close the race of the process dying before we return the dentry */ | 2791 | /* Close the race of the process dying before we return the dentry */ |
| 2806 | if (pid_revalidate(dentry, 0)) | 2792 | if (pid_revalidate(dentry, 0)) |
| 2807 | error = NULL; | 2793 | return 0; |
| 2808 | out: | 2794 | out: |
| 2809 | return error; | 2795 | return -ENOENT; |
| 2810 | } | 2796 | } |
| 2811 | 2797 | ||
| 2812 | struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) | 2798 | struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) |
| 2813 | { | 2799 | { |
| 2814 | struct dentry *result = NULL; | 2800 | int result = 0; |
| 2815 | struct task_struct *task; | 2801 | struct task_struct *task; |
| 2816 | unsigned tgid; | 2802 | unsigned tgid; |
| 2817 | struct pid_namespace *ns; | 2803 | struct pid_namespace *ns; |
| @@ -2832,7 +2818,7 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsign | |||
| 2832 | result = proc_pid_instantiate(dir, dentry, task, NULL); | 2818 | result = proc_pid_instantiate(dir, dentry, task, NULL); |
| 2833 | put_task_struct(task); | 2819 | put_task_struct(task); |
| 2834 | out: | 2820 | out: |
| 2835 | return result; | 2821 | return ERR_PTR(result); |
| 2836 | } | 2822 | } |
| 2837 | 2823 | ||
| 2838 | /* | 2824 | /* |
| @@ -2884,21 +2870,21 @@ retry: | |||
| 2884 | int proc_pid_readdir(struct file *file, struct dir_context *ctx) | 2870 | int proc_pid_readdir(struct file *file, struct dir_context *ctx) |
| 2885 | { | 2871 | { |
| 2886 | struct tgid_iter iter; | 2872 | struct tgid_iter iter; |
| 2887 | struct pid_namespace *ns; | 2873 | struct pid_namespace *ns = file->f_dentry->d_sb->s_fs_info; |
| 2888 | loff_t pos = ctx->pos; | 2874 | loff_t pos = ctx->pos; |
| 2889 | 2875 | ||
| 2890 | if (pos >= PID_MAX_LIMIT + TGID_OFFSET) | 2876 | if (pos >= PID_MAX_LIMIT + TGID_OFFSET) |
| 2891 | return 0; | 2877 | return 0; |
| 2892 | 2878 | ||
| 2893 | if (pos == TGID_OFFSET - 1) { | 2879 | if (pos == TGID_OFFSET - 1) { |
| 2894 | if (!proc_fill_cache(file, ctx, "self", 4, NULL, NULL, NULL)) | 2880 | struct inode *inode = ns->proc_self->d_inode; |
| 2881 | if (!dir_emit(ctx, "self", 4, inode->i_ino, DT_LNK)) | ||
| 2895 | return 0; | 2882 | return 0; |
| 2896 | iter.tgid = 0; | 2883 | iter.tgid = 0; |
| 2897 | } else { | 2884 | } else { |
| 2898 | iter.tgid = pos - TGID_OFFSET; | 2885 | iter.tgid = pos - TGID_OFFSET; |
| 2899 | } | 2886 | } |
| 2900 | iter.task = NULL; | 2887 | iter.task = NULL; |
| 2901 | ns = file->f_dentry->d_sb->s_fs_info; | ||
| 2902 | for (iter = next_tgid(ns, iter); | 2888 | for (iter = next_tgid(ns, iter); |
| 2903 | iter.task; | 2889 | iter.task; |
| 2904 | iter.tgid += 1, iter = next_tgid(ns, iter)) { | 2890 | iter.tgid += 1, iter = next_tgid(ns, iter)) { |
| @@ -3027,10 +3013,9 @@ static const struct inode_operations proc_tid_base_inode_operations = { | |||
| 3027 | .setattr = proc_setattr, | 3013 | .setattr = proc_setattr, |
| 3028 | }; | 3014 | }; |
| 3029 | 3015 | ||
| 3030 | static struct dentry *proc_task_instantiate(struct inode *dir, | 3016 | static int proc_task_instantiate(struct inode *dir, |
| 3031 | struct dentry *dentry, struct task_struct *task, const void *ptr) | 3017 | struct dentry *dentry, struct task_struct *task, const void *ptr) |
| 3032 | { | 3018 | { |
| 3033 | struct dentry *error = ERR_PTR(-ENOENT); | ||
| 3034 | struct inode *inode; | 3019 | struct inode *inode; |
| 3035 | inode = proc_pid_make_inode(dir->i_sb, task); | 3020 | inode = proc_pid_make_inode(dir->i_sb, task); |
| 3036 | 3021 | ||
| @@ -3049,14 +3034,14 @@ static struct dentry *proc_task_instantiate(struct inode *dir, | |||
| 3049 | d_add(dentry, inode); | 3034 | d_add(dentry, inode); |
| 3050 | /* Close the race of the process dying before we return the dentry */ | 3035 | /* Close the race of the process dying before we return the dentry */ |
| 3051 | if (pid_revalidate(dentry, 0)) | 3036 | if (pid_revalidate(dentry, 0)) |
| 3052 | error = NULL; | 3037 | return 0; |
| 3053 | out: | 3038 | out: |
| 3054 | return error; | 3039 | return -ENOENT; |
| 3055 | } | 3040 | } |
| 3056 | 3041 | ||
| 3057 | static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) | 3042 | static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) |
| 3058 | { | 3043 | { |
| 3059 | struct dentry *result = ERR_PTR(-ENOENT); | 3044 | int result = -ENOENT; |
| 3060 | struct task_struct *task; | 3045 | struct task_struct *task; |
| 3061 | struct task_struct *leader = get_proc_task(dir); | 3046 | struct task_struct *leader = get_proc_task(dir); |
| 3062 | unsigned tid; | 3047 | unsigned tid; |
| @@ -3086,7 +3071,7 @@ out_drop_task: | |||
| 3086 | out: | 3071 | out: |
| 3087 | put_task_struct(leader); | 3072 | put_task_struct(leader); |
| 3088 | out_no_task: | 3073 | out_no_task: |
| 3089 | return result; | 3074 | return ERR_PTR(result); |
| 3090 | } | 3075 | } |
| 3091 | 3076 | ||
| 3092 | /* | 3077 | /* |
diff --git a/fs/proc/fd.c b/fs/proc/fd.c index 1441f143c43b..75f2890abbd8 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c | |||
| @@ -167,11 +167,10 @@ static int proc_fd_link(struct dentry *dentry, struct path *path) | |||
| 167 | return ret; | 167 | return ret; |
| 168 | } | 168 | } |
| 169 | 169 | ||
| 170 | static struct dentry * | 170 | static int |
| 171 | proc_fd_instantiate(struct inode *dir, struct dentry *dentry, | 171 | proc_fd_instantiate(struct inode *dir, struct dentry *dentry, |
| 172 | struct task_struct *task, const void *ptr) | 172 | struct task_struct *task, const void *ptr) |
| 173 | { | 173 | { |
| 174 | struct dentry *error = ERR_PTR(-ENOENT); | ||
| 175 | unsigned fd = (unsigned long)ptr; | 174 | unsigned fd = (unsigned long)ptr; |
| 176 | struct proc_inode *ei; | 175 | struct proc_inode *ei; |
| 177 | struct inode *inode; | 176 | struct inode *inode; |
| @@ -194,9 +193,9 @@ proc_fd_instantiate(struct inode *dir, struct dentry *dentry, | |||
| 194 | 193 | ||
| 195 | /* Close the race of the process dying before we return the dentry */ | 194 | /* Close the race of the process dying before we return the dentry */ |
| 196 | if (tid_fd_revalidate(dentry, 0)) | 195 | if (tid_fd_revalidate(dentry, 0)) |
| 197 | error = NULL; | 196 | return 0; |
| 198 | out: | 197 | out: |
| 199 | return error; | 198 | return -ENOENT; |
| 200 | } | 199 | } |
| 201 | 200 | ||
| 202 | static struct dentry *proc_lookupfd_common(struct inode *dir, | 201 | static struct dentry *proc_lookupfd_common(struct inode *dir, |
| @@ -204,7 +203,7 @@ static struct dentry *proc_lookupfd_common(struct inode *dir, | |||
| 204 | instantiate_t instantiate) | 203 | instantiate_t instantiate) |
| 205 | { | 204 | { |
| 206 | struct task_struct *task = get_proc_task(dir); | 205 | struct task_struct *task = get_proc_task(dir); |
| 207 | struct dentry *result = ERR_PTR(-ENOENT); | 206 | int result = -ENOENT; |
| 208 | unsigned fd = name_to_int(dentry); | 207 | unsigned fd = name_to_int(dentry); |
| 209 | 208 | ||
| 210 | if (!task) | 209 | if (!task) |
| @@ -216,7 +215,7 @@ static struct dentry *proc_lookupfd_common(struct inode *dir, | |||
| 216 | out: | 215 | out: |
| 217 | put_task_struct(task); | 216 | put_task_struct(task); |
| 218 | out_no_task: | 217 | out_no_task: |
| 219 | return result; | 218 | return ERR_PTR(result); |
| 220 | } | 219 | } |
| 221 | 220 | ||
| 222 | static int proc_readfd_common(struct file *file, struct dir_context *ctx, | 221 | static int proc_readfd_common(struct file *file, struct dir_context *ctx, |
| @@ -300,11 +299,10 @@ const struct inode_operations proc_fd_inode_operations = { | |||
| 300 | .setattr = proc_setattr, | 299 | .setattr = proc_setattr, |
| 301 | }; | 300 | }; |
| 302 | 301 | ||
| 303 | static struct dentry * | 302 | static int |
| 304 | proc_fdinfo_instantiate(struct inode *dir, struct dentry *dentry, | 303 | proc_fdinfo_instantiate(struct inode *dir, struct dentry *dentry, |
| 305 | struct task_struct *task, const void *ptr) | 304 | struct task_struct *task, const void *ptr) |
| 306 | { | 305 | { |
| 307 | struct dentry *error = ERR_PTR(-ENOENT); | ||
| 308 | unsigned fd = (unsigned long)ptr; | 306 | unsigned fd = (unsigned long)ptr; |
| 309 | struct proc_inode *ei; | 307 | struct proc_inode *ei; |
| 310 | struct inode *inode; | 308 | struct inode *inode; |
| @@ -324,9 +322,9 @@ proc_fdinfo_instantiate(struct inode *dir, struct dentry *dentry, | |||
| 324 | 322 | ||
| 325 | /* Close the race of the process dying before we return the dentry */ | 323 | /* Close the race of the process dying before we return the dentry */ |
| 326 | if (tid_fd_revalidate(dentry, 0)) | 324 | if (tid_fd_revalidate(dentry, 0)) |
| 327 | error = NULL; | 325 | return 0; |
| 328 | out: | 326 | out: |
| 329 | return error; | 327 | return -ENOENT; |
| 330 | } | 328 | } |
| 331 | 329 | ||
| 332 | static struct dentry * | 330 | static struct dentry * |
diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 4eae2e149f31..651d09a11dde 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h | |||
| @@ -170,7 +170,7 @@ extern struct dentry *proc_pid_lookup(struct inode *, struct dentry *, unsigned | |||
| 170 | extern loff_t mem_lseek(struct file *, loff_t, int); | 170 | extern loff_t mem_lseek(struct file *, loff_t, int); |
| 171 | 171 | ||
| 172 | /* Lookups */ | 172 | /* Lookups */ |
| 173 | typedef struct dentry *instantiate_t(struct inode *, struct dentry *, | 173 | typedef int instantiate_t(struct inode *, struct dentry *, |
| 174 | struct task_struct *, const void *); | 174 | struct task_struct *, const void *); |
| 175 | extern bool proc_fill_cache(struct file *, struct dir_context *, const char *, int, | 175 | extern bool proc_fill_cache(struct file *, struct dir_context *, const char *, int, |
| 176 | instantiate_t, struct task_struct *, const void *); | 176 | instantiate_t, struct task_struct *, const void *); |
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 0a22194e5d58..06ea155e1a59 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c | |||
| @@ -408,7 +408,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff) | |||
| 408 | prpsinfo.pr_zomb = 0; | 408 | prpsinfo.pr_zomb = 0; |
| 409 | 409 | ||
| 410 | strcpy(prpsinfo.pr_fname, "vmlinux"); | 410 | strcpy(prpsinfo.pr_fname, "vmlinux"); |
| 411 | strncpy(prpsinfo.pr_psargs, saved_command_line, ELF_PRARGSZ); | 411 | strlcpy(prpsinfo.pr_psargs, saved_command_line, sizeof(prpsinfo.pr_psargs)); |
| 412 | 412 | ||
| 413 | nhdr->p_filesz += notesize(¬es[1]); | 413 | nhdr->p_filesz += notesize(¬es[1]); |
| 414 | bufp = storenote(¬es[1], bufp); | 414 | bufp = storenote(¬es[1], bufp); |
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index f6abbbbfad8a..49a7fff2e83a 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c | |||
| @@ -187,13 +187,12 @@ static const struct inode_operations proc_ns_link_inode_operations = { | |||
| 187 | .setattr = proc_setattr, | 187 | .setattr = proc_setattr, |
| 188 | }; | 188 | }; |
| 189 | 189 | ||
| 190 | static struct dentry *proc_ns_instantiate(struct inode *dir, | 190 | static int proc_ns_instantiate(struct inode *dir, |
| 191 | struct dentry *dentry, struct task_struct *task, const void *ptr) | 191 | struct dentry *dentry, struct task_struct *task, const void *ptr) |
| 192 | { | 192 | { |
| 193 | const struct proc_ns_operations *ns_ops = ptr; | 193 | const struct proc_ns_operations *ns_ops = ptr; |
| 194 | struct inode *inode; | 194 | struct inode *inode; |
| 195 | struct proc_inode *ei; | 195 | struct proc_inode *ei; |
| 196 | struct dentry *error = ERR_PTR(-ENOENT); | ||
| 197 | 196 | ||
| 198 | inode = proc_pid_make_inode(dir->i_sb, task); | 197 | inode = proc_pid_make_inode(dir->i_sb, task); |
| 199 | if (!inode) | 198 | if (!inode) |
| @@ -208,9 +207,9 @@ static struct dentry *proc_ns_instantiate(struct inode *dir, | |||
| 208 | d_add(dentry, inode); | 207 | d_add(dentry, inode); |
| 209 | /* Close the race of the process dying before we return the dentry */ | 208 | /* Close the race of the process dying before we return the dentry */ |
| 210 | if (pid_revalidate(dentry, 0)) | 209 | if (pid_revalidate(dentry, 0)) |
| 211 | error = NULL; | 210 | return 0; |
| 212 | out: | 211 | out: |
| 213 | return error; | 212 | return -ENOENT; |
| 214 | } | 213 | } |
| 215 | 214 | ||
| 216 | static int proc_ns_dir_readdir(struct file *file, struct dir_context *ctx) | 215 | static int proc_ns_dir_readdir(struct file *file, struct dir_context *ctx) |
| @@ -248,12 +247,12 @@ const struct file_operations proc_ns_dir_operations = { | |||
| 248 | static struct dentry *proc_ns_dir_lookup(struct inode *dir, | 247 | static struct dentry *proc_ns_dir_lookup(struct inode *dir, |
| 249 | struct dentry *dentry, unsigned int flags) | 248 | struct dentry *dentry, unsigned int flags) |
| 250 | { | 249 | { |
| 251 | struct dentry *error; | 250 | int error; |
| 252 | struct task_struct *task = get_proc_task(dir); | 251 | struct task_struct *task = get_proc_task(dir); |
| 253 | const struct proc_ns_operations **entry, **last; | 252 | const struct proc_ns_operations **entry, **last; |
| 254 | unsigned int len = dentry->d_name.len; | 253 | unsigned int len = dentry->d_name.len; |
| 255 | 254 | ||
| 256 | error = ERR_PTR(-ENOENT); | 255 | error = -ENOENT; |
| 257 | 256 | ||
| 258 | if (!task) | 257 | if (!task) |
| 259 | goto out_no_task; | 258 | goto out_no_task; |
| @@ -272,7 +271,7 @@ static struct dentry *proc_ns_dir_lookup(struct inode *dir, | |||
| 272 | out: | 271 | out: |
| 273 | put_task_struct(task); | 272 | put_task_struct(task); |
| 274 | out_no_task: | 273 | out_no_task: |
| 275 | return error; | 274 | return ERR_PTR(error); |
| 276 | } | 275 | } |
| 277 | 276 | ||
| 278 | const struct inode_operations proc_ns_dir_inode_operations = { | 277 | const struct inode_operations proc_ns_dir_inode_operations = { |
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index f3a570e7c257..71290463a1d3 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c | |||
| @@ -796,15 +796,16 @@ static int sysctl_is_seen(struct ctl_table_header *p) | |||
| 796 | return res; | 796 | return res; |
| 797 | } | 797 | } |
| 798 | 798 | ||
| 799 | static int proc_sys_compare(const struct dentry *parent, | 799 | static int proc_sys_compare(const struct dentry *parent, const struct dentry *dentry, |
| 800 | const struct inode *pinode, | ||
| 801 | const struct dentry *dentry, const struct inode *inode, | ||
| 802 | unsigned int len, const char *str, const struct qstr *name) | 800 | unsigned int len, const char *str, const struct qstr *name) |
| 803 | { | 801 | { |
| 804 | struct ctl_table_header *head; | 802 | struct ctl_table_header *head; |
| 803 | struct inode *inode; | ||
| 804 | |||
| 805 | /* Although proc doesn't have negative dentries, rcu-walk means | 805 | /* Although proc doesn't have negative dentries, rcu-walk means |
| 806 | * that inode here can be NULL */ | 806 | * that inode here can be NULL */ |
| 807 | /* AV: can it, indeed? */ | 807 | /* AV: can it, indeed? */ |
| 808 | inode = ACCESS_ONCE(dentry->d_inode); | ||
| 808 | if (!inode) | 809 | if (!inode) |
| 809 | return 1; | 810 | return 1; |
| 810 | if (name->len != len) | 811 | if (name->len != len) |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 3e636d864d56..dbf61f6174f0 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | #include <linux/rmap.h> | 11 | #include <linux/rmap.h> |
| 12 | #include <linux/swap.h> | 12 | #include <linux/swap.h> |
| 13 | #include <linux/swapops.h> | 13 | #include <linux/swapops.h> |
| 14 | #include <linux/mmu_notifier.h> | ||
| 14 | 15 | ||
| 15 | #include <asm/elf.h> | 16 | #include <asm/elf.h> |
| 16 | #include <asm/uaccess.h> | 17 | #include <asm/uaccess.h> |
| @@ -688,10 +689,58 @@ const struct file_operations proc_tid_smaps_operations = { | |||
| 688 | .release = seq_release_private, | 689 | .release = seq_release_private, |
| 689 | }; | 690 | }; |
| 690 | 691 | ||
| 692 | /* | ||
| 693 | * We do not want to have constant page-shift bits sitting in | ||
| 694 | * pagemap entries and are about to reuse them some time soon. | ||
| 695 | * | ||
| 696 | * Here's the "migration strategy": | ||
| 697 | * 1. when the system boots these bits remain what they are, | ||
| 698 | * but a warning about future change is printed in log; | ||
| 699 | * 2. once anyone clears soft-dirty bits via clear_refs file, | ||
| 700 | * these flag is set to denote, that user is aware of the | ||
| 701 | * new API and those page-shift bits change their meaning. | ||
| 702 | * The respective warning is printed in dmesg; | ||
| 703 | * 3. In a couple of releases we will remove all the mentions | ||
| 704 | * of page-shift in pagemap entries. | ||
| 705 | */ | ||
| 706 | |||
| 707 | static bool soft_dirty_cleared __read_mostly; | ||
| 708 | |||
| 709 | enum clear_refs_types { | ||
| 710 | CLEAR_REFS_ALL = 1, | ||
| 711 | CLEAR_REFS_ANON, | ||
| 712 | CLEAR_REFS_MAPPED, | ||
| 713 | CLEAR_REFS_SOFT_DIRTY, | ||
| 714 | CLEAR_REFS_LAST, | ||
| 715 | }; | ||
| 716 | |||
| 717 | struct clear_refs_private { | ||
| 718 | struct vm_area_struct *vma; | ||
| 719 | enum clear_refs_types type; | ||
| 720 | }; | ||
| 721 | |||
| 722 | static inline void clear_soft_dirty(struct vm_area_struct *vma, | ||
| 723 | unsigned long addr, pte_t *pte) | ||
| 724 | { | ||
| 725 | #ifdef CONFIG_MEM_SOFT_DIRTY | ||
| 726 | /* | ||
| 727 | * The soft-dirty tracker uses #PF-s to catch writes | ||
| 728 | * to pages, so write-protect the pte as well. See the | ||
| 729 | * Documentation/vm/soft-dirty.txt for full description | ||
| 730 | * of how soft-dirty works. | ||
| 731 | */ | ||
| 732 | pte_t ptent = *pte; | ||
| 733 | ptent = pte_wrprotect(ptent); | ||
| 734 | ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY); | ||
| 735 | set_pte_at(vma->vm_mm, addr, pte, ptent); | ||
| 736 | #endif | ||
| 737 | } | ||
| 738 | |||
| 691 | static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, | 739 | static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, |
| 692 | unsigned long end, struct mm_walk *walk) | 740 | unsigned long end, struct mm_walk *walk) |
| 693 | { | 741 | { |
| 694 | struct vm_area_struct *vma = walk->private; | 742 | struct clear_refs_private *cp = walk->private; |
| 743 | struct vm_area_struct *vma = cp->vma; | ||
| 695 | pte_t *pte, ptent; | 744 | pte_t *pte, ptent; |
| 696 | spinlock_t *ptl; | 745 | spinlock_t *ptl; |
| 697 | struct page *page; | 746 | struct page *page; |
| @@ -706,6 +755,11 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, | |||
| 706 | if (!pte_present(ptent)) | 755 | if (!pte_present(ptent)) |
| 707 | continue; | 756 | continue; |
| 708 | 757 | ||
| 758 | if (cp->type == CLEAR_REFS_SOFT_DIRTY) { | ||
| 759 | clear_soft_dirty(vma, addr, pte); | ||
| 760 | continue; | ||
| 761 | } | ||
| 762 | |||
| 709 | page = vm_normal_page(vma, addr, ptent); | 763 | page = vm_normal_page(vma, addr, ptent); |
| 710 | if (!page) | 764 | if (!page) |
| 711 | continue; | 765 | continue; |
| @@ -719,10 +773,6 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, | |||
| 719 | return 0; | 773 | return 0; |
| 720 | } | 774 | } |
| 721 | 775 | ||
| 722 | #define CLEAR_REFS_ALL 1 | ||
| 723 | #define CLEAR_REFS_ANON 2 | ||
| 724 | #define CLEAR_REFS_MAPPED 3 | ||
| 725 | |||
| 726 | static ssize_t clear_refs_write(struct file *file, const char __user *buf, | 776 | static ssize_t clear_refs_write(struct file *file, const char __user *buf, |
| 727 | size_t count, loff_t *ppos) | 777 | size_t count, loff_t *ppos) |
| 728 | { | 778 | { |
| @@ -730,7 +780,8 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, | |||
| 730 | char buffer[PROC_NUMBUF]; | 780 | char buffer[PROC_NUMBUF]; |
| 731 | struct mm_struct *mm; | 781 | struct mm_struct *mm; |
| 732 | struct vm_area_struct *vma; | 782 | struct vm_area_struct *vma; |
| 733 | int type; | 783 | enum clear_refs_types type; |
| 784 | int itype; | ||
| 734 | int rv; | 785 | int rv; |
| 735 | 786 | ||
| 736 | memset(buffer, 0, sizeof(buffer)); | 787 | memset(buffer, 0, sizeof(buffer)); |
| @@ -738,23 +789,37 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, | |||
| 738 | count = sizeof(buffer) - 1; | 789 | count = sizeof(buffer) - 1; |
| 739 | if (copy_from_user(buffer, buf, count)) | 790 | if (copy_from_user(buffer, buf, count)) |
| 740 | return -EFAULT; | 791 | return -EFAULT; |
| 741 | rv = kstrtoint(strstrip(buffer), 10, &type); | 792 | rv = kstrtoint(strstrip(buffer), 10, &itype); |
| 742 | if (rv < 0) | 793 | if (rv < 0) |
| 743 | return rv; | 794 | return rv; |
| 744 | if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED) | 795 | type = (enum clear_refs_types)itype; |
| 796 | if (type < CLEAR_REFS_ALL || type >= CLEAR_REFS_LAST) | ||
| 745 | return -EINVAL; | 797 | return -EINVAL; |
| 798 | |||
| 799 | if (type == CLEAR_REFS_SOFT_DIRTY) { | ||
| 800 | soft_dirty_cleared = true; | ||
| 801 | pr_warn_once("The pagemap bits 55-60 has changed their meaning! " | ||
| 802 | "See the linux/Documentation/vm/pagemap.txt for details.\n"); | ||
| 803 | } | ||
| 804 | |||
| 746 | task = get_proc_task(file_inode(file)); | 805 | task = get_proc_task(file_inode(file)); |
| 747 | if (!task) | 806 | if (!task) |
| 748 | return -ESRCH; | 807 | return -ESRCH; |
| 749 | mm = get_task_mm(task); | 808 | mm = get_task_mm(task); |
| 750 | if (mm) { | 809 | if (mm) { |
| 810 | struct clear_refs_private cp = { | ||
| 811 | .type = type, | ||
| 812 | }; | ||
| 751 | struct mm_walk clear_refs_walk = { | 813 | struct mm_walk clear_refs_walk = { |
| 752 | .pmd_entry = clear_refs_pte_range, | 814 | .pmd_entry = clear_refs_pte_range, |
| 753 | .mm = mm, | 815 | .mm = mm, |
| 816 | .private = &cp, | ||
| 754 | }; | 817 | }; |
| 755 | down_read(&mm->mmap_sem); | 818 | down_read(&mm->mmap_sem); |
| 819 | if (type == CLEAR_REFS_SOFT_DIRTY) | ||
| 820 | mmu_notifier_invalidate_range_start(mm, 0, -1); | ||
| 756 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | 821 | for (vma = mm->mmap; vma; vma = vma->vm_next) { |
| 757 | clear_refs_walk.private = vma; | 822 | cp.vma = vma; |
| 758 | if (is_vm_hugetlb_page(vma)) | 823 | if (is_vm_hugetlb_page(vma)) |
| 759 | continue; | 824 | continue; |
| 760 | /* | 825 | /* |
| @@ -773,6 +838,8 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, | |||
| 773 | walk_page_range(vma->vm_start, vma->vm_end, | 838 | walk_page_range(vma->vm_start, vma->vm_end, |
| 774 | &clear_refs_walk); | 839 | &clear_refs_walk); |
| 775 | } | 840 | } |
| 841 | if (type == CLEAR_REFS_SOFT_DIRTY) | ||
| 842 | mmu_notifier_invalidate_range_end(mm, 0, -1); | ||
| 776 | flush_tlb_mm(mm); | 843 | flush_tlb_mm(mm); |
| 777 | up_read(&mm->mmap_sem); | 844 | up_read(&mm->mmap_sem); |
| 778 | mmput(mm); | 845 | mmput(mm); |
| @@ -794,6 +861,7 @@ typedef struct { | |||
| 794 | struct pagemapread { | 861 | struct pagemapread { |
| 795 | int pos, len; | 862 | int pos, len; |
| 796 | pagemap_entry_t *buffer; | 863 | pagemap_entry_t *buffer; |
| 864 | bool v2; | ||
| 797 | }; | 865 | }; |
| 798 | 866 | ||
| 799 | #define PAGEMAP_WALK_SIZE (PMD_SIZE) | 867 | #define PAGEMAP_WALK_SIZE (PMD_SIZE) |
| @@ -807,14 +875,17 @@ struct pagemapread { | |||
| 807 | #define PM_PSHIFT_BITS 6 | 875 | #define PM_PSHIFT_BITS 6 |
| 808 | #define PM_PSHIFT_OFFSET (PM_STATUS_OFFSET - PM_PSHIFT_BITS) | 876 | #define PM_PSHIFT_OFFSET (PM_STATUS_OFFSET - PM_PSHIFT_BITS) |
| 809 | #define PM_PSHIFT_MASK (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET) | 877 | #define PM_PSHIFT_MASK (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET) |
| 810 | #define PM_PSHIFT(x) (((u64) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK) | 878 | #define __PM_PSHIFT(x) (((u64) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK) |
| 811 | #define PM_PFRAME_MASK ((1LL << PM_PSHIFT_OFFSET) - 1) | 879 | #define PM_PFRAME_MASK ((1LL << PM_PSHIFT_OFFSET) - 1) |
| 812 | #define PM_PFRAME(x) ((x) & PM_PFRAME_MASK) | 880 | #define PM_PFRAME(x) ((x) & PM_PFRAME_MASK) |
| 881 | /* in "new" pagemap pshift bits are occupied with more status bits */ | ||
| 882 | #define PM_STATUS2(v2, x) (__PM_PSHIFT(v2 ? x : PAGE_SHIFT)) | ||
| 813 | 883 | ||
| 884 | #define __PM_SOFT_DIRTY (1LL) | ||
| 814 | #define PM_PRESENT PM_STATUS(4LL) | 885 | #define PM_PRESENT PM_STATUS(4LL) |
| 815 | #define PM_SWAP PM_STATUS(2LL) | 886 | #define PM_SWAP PM_STATUS(2LL) |
| 816 | #define PM_FILE PM_STATUS(1LL) | 887 | #define PM_FILE PM_STATUS(1LL) |
| 817 | #define PM_NOT_PRESENT PM_PSHIFT(PAGE_SHIFT) | 888 | #define PM_NOT_PRESENT(v2) PM_STATUS2(v2, 0) |
| 818 | #define PM_END_OF_BUFFER 1 | 889 | #define PM_END_OF_BUFFER 1 |
| 819 | 890 | ||
| 820 | static inline pagemap_entry_t make_pme(u64 val) | 891 | static inline pagemap_entry_t make_pme(u64 val) |
| @@ -837,7 +908,7 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end, | |||
| 837 | struct pagemapread *pm = walk->private; | 908 | struct pagemapread *pm = walk->private; |
| 838 | unsigned long addr; | 909 | unsigned long addr; |
| 839 | int err = 0; | 910 | int err = 0; |
| 840 | pagemap_entry_t pme = make_pme(PM_NOT_PRESENT); | 911 | pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2)); |
| 841 | 912 | ||
| 842 | for (addr = start; addr < end; addr += PAGE_SIZE) { | 913 | for (addr = start; addr < end; addr += PAGE_SIZE) { |
| 843 | err = add_to_pagemap(addr, &pme, pm); | 914 | err = add_to_pagemap(addr, &pme, pm); |
| @@ -847,11 +918,12 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end, | |||
| 847 | return err; | 918 | return err; |
| 848 | } | 919 | } |
| 849 | 920 | ||
| 850 | static void pte_to_pagemap_entry(pagemap_entry_t *pme, | 921 | static void pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, |
| 851 | struct vm_area_struct *vma, unsigned long addr, pte_t pte) | 922 | struct vm_area_struct *vma, unsigned long addr, pte_t pte) |
| 852 | { | 923 | { |
| 853 | u64 frame, flags; | 924 | u64 frame, flags; |
| 854 | struct page *page = NULL; | 925 | struct page *page = NULL; |
| 926 | int flags2 = 0; | ||
| 855 | 927 | ||
| 856 | if (pte_present(pte)) { | 928 | if (pte_present(pte)) { |
| 857 | frame = pte_pfn(pte); | 929 | frame = pte_pfn(pte); |
| @@ -866,19 +938,21 @@ static void pte_to_pagemap_entry(pagemap_entry_t *pme, | |||
| 866 | if (is_migration_entry(entry)) | 938 | if (is_migration_entry(entry)) |
| 867 | page = migration_entry_to_page(entry); | 939 | page = migration_entry_to_page(entry); |
| 868 | } else { | 940 | } else { |
| 869 | *pme = make_pme(PM_NOT_PRESENT); | 941 | *pme = make_pme(PM_NOT_PRESENT(pm->v2)); |
| 870 | return; | 942 | return; |
| 871 | } | 943 | } |
| 872 | 944 | ||
| 873 | if (page && !PageAnon(page)) | 945 | if (page && !PageAnon(page)) |
| 874 | flags |= PM_FILE; | 946 | flags |= PM_FILE; |
| 947 | if (pte_soft_dirty(pte)) | ||
| 948 | flags2 |= __PM_SOFT_DIRTY; | ||
| 875 | 949 | ||
| 876 | *pme = make_pme(PM_PFRAME(frame) | PM_PSHIFT(PAGE_SHIFT) | flags); | 950 | *pme = make_pme(PM_PFRAME(frame) | PM_STATUS2(pm->v2, flags2) | flags); |
| 877 | } | 951 | } |
| 878 | 952 | ||
| 879 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 953 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
| 880 | static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, | 954 | static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, |
| 881 | pmd_t pmd, int offset) | 955 | pmd_t pmd, int offset, int pmd_flags2) |
| 882 | { | 956 | { |
| 883 | /* | 957 | /* |
| 884 | * Currently pmd for thp is always present because thp can not be | 958 | * Currently pmd for thp is always present because thp can not be |
| @@ -887,13 +961,13 @@ static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, | |||
| 887 | */ | 961 | */ |
| 888 | if (pmd_present(pmd)) | 962 | if (pmd_present(pmd)) |
| 889 | *pme = make_pme(PM_PFRAME(pmd_pfn(pmd) + offset) | 963 | *pme = make_pme(PM_PFRAME(pmd_pfn(pmd) + offset) |
| 890 | | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT); | 964 | | PM_STATUS2(pm->v2, pmd_flags2) | PM_PRESENT); |
| 891 | else | 965 | else |
| 892 | *pme = make_pme(PM_NOT_PRESENT); | 966 | *pme = make_pme(PM_NOT_PRESENT(pm->v2)); |
| 893 | } | 967 | } |
| 894 | #else | 968 | #else |
| 895 | static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, | 969 | static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, |
| 896 | pmd_t pmd, int offset) | 970 | pmd_t pmd, int offset, int pmd_flags2) |
| 897 | { | 971 | { |
| 898 | } | 972 | } |
| 899 | #endif | 973 | #endif |
| @@ -905,17 +979,20 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
| 905 | struct pagemapread *pm = walk->private; | 979 | struct pagemapread *pm = walk->private; |
| 906 | pte_t *pte; | 980 | pte_t *pte; |
| 907 | int err = 0; | 981 | int err = 0; |
| 908 | pagemap_entry_t pme = make_pme(PM_NOT_PRESENT); | 982 | pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2)); |
| 909 | 983 | ||
| 910 | /* find the first VMA at or above 'addr' */ | 984 | /* find the first VMA at or above 'addr' */ |
| 911 | vma = find_vma(walk->mm, addr); | 985 | vma = find_vma(walk->mm, addr); |
| 912 | if (vma && pmd_trans_huge_lock(pmd, vma) == 1) { | 986 | if (vma && pmd_trans_huge_lock(pmd, vma) == 1) { |
| 987 | int pmd_flags2; | ||
| 988 | |||
| 989 | pmd_flags2 = (pmd_soft_dirty(*pmd) ? __PM_SOFT_DIRTY : 0); | ||
| 913 | for (; addr != end; addr += PAGE_SIZE) { | 990 | for (; addr != end; addr += PAGE_SIZE) { |
| 914 | unsigned long offset; | 991 | unsigned long offset; |
| 915 | 992 | ||
| 916 | offset = (addr & ~PAGEMAP_WALK_MASK) >> | 993 | offset = (addr & ~PAGEMAP_WALK_MASK) >> |
| 917 | PAGE_SHIFT; | 994 | PAGE_SHIFT; |
| 918 | thp_pmd_to_pagemap_entry(&pme, *pmd, offset); | 995 | thp_pmd_to_pagemap_entry(&pme, pm, *pmd, offset, pmd_flags2); |
| 919 | err = add_to_pagemap(addr, &pme, pm); | 996 | err = add_to_pagemap(addr, &pme, pm); |
| 920 | if (err) | 997 | if (err) |
| 921 | break; | 998 | break; |
| @@ -932,7 +1009,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
| 932 | * and need a new, higher one */ | 1009 | * and need a new, higher one */ |
| 933 | if (vma && (addr >= vma->vm_end)) { | 1010 | if (vma && (addr >= vma->vm_end)) { |
| 934 | vma = find_vma(walk->mm, addr); | 1011 | vma = find_vma(walk->mm, addr); |
| 935 | pme = make_pme(PM_NOT_PRESENT); | 1012 | pme = make_pme(PM_NOT_PRESENT(pm->v2)); |
| 936 | } | 1013 | } |
| 937 | 1014 | ||
| 938 | /* check that 'vma' actually covers this address, | 1015 | /* check that 'vma' actually covers this address, |
| @@ -940,7 +1017,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
| 940 | if (vma && (vma->vm_start <= addr) && | 1017 | if (vma && (vma->vm_start <= addr) && |
| 941 | !is_vm_hugetlb_page(vma)) { | 1018 | !is_vm_hugetlb_page(vma)) { |
| 942 | pte = pte_offset_map(pmd, addr); | 1019 | pte = pte_offset_map(pmd, addr); |
| 943 | pte_to_pagemap_entry(&pme, vma, addr, *pte); | 1020 | pte_to_pagemap_entry(&pme, pm, vma, addr, *pte); |
| 944 | /* unmap before userspace copy */ | 1021 | /* unmap before userspace copy */ |
| 945 | pte_unmap(pte); | 1022 | pte_unmap(pte); |
| 946 | } | 1023 | } |
| @@ -955,14 +1032,14 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
| 955 | } | 1032 | } |
| 956 | 1033 | ||
| 957 | #ifdef CONFIG_HUGETLB_PAGE | 1034 | #ifdef CONFIG_HUGETLB_PAGE |
| 958 | static void huge_pte_to_pagemap_entry(pagemap_entry_t *pme, | 1035 | static void huge_pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, |
| 959 | pte_t pte, int offset) | 1036 | pte_t pte, int offset) |
| 960 | { | 1037 | { |
| 961 | if (pte_present(pte)) | 1038 | if (pte_present(pte)) |
| 962 | *pme = make_pme(PM_PFRAME(pte_pfn(pte) + offset) | 1039 | *pme = make_pme(PM_PFRAME(pte_pfn(pte) + offset) |
| 963 | | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT); | 1040 | | PM_STATUS2(pm->v2, 0) | PM_PRESENT); |
| 964 | else | 1041 | else |
| 965 | *pme = make_pme(PM_NOT_PRESENT); | 1042 | *pme = make_pme(PM_NOT_PRESENT(pm->v2)); |
| 966 | } | 1043 | } |
| 967 | 1044 | ||
| 968 | /* This function walks within one hugetlb entry in the single call */ | 1045 | /* This function walks within one hugetlb entry in the single call */ |
| @@ -976,7 +1053,7 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask, | |||
| 976 | 1053 | ||
| 977 | for (; addr != end; addr += PAGE_SIZE) { | 1054 | for (; addr != end; addr += PAGE_SIZE) { |
| 978 | int offset = (addr & ~hmask) >> PAGE_SHIFT; | 1055 | int offset = (addr & ~hmask) >> PAGE_SHIFT; |
| 979 | huge_pte_to_pagemap_entry(&pme, *pte, offset); | 1056 | huge_pte_to_pagemap_entry(&pme, pm, *pte, offset); |
| 980 | err = add_to_pagemap(addr, &pme, pm); | 1057 | err = add_to_pagemap(addr, &pme, pm); |
| 981 | if (err) | 1058 | if (err) |
| 982 | return err; | 1059 | return err; |
| @@ -1038,6 +1115,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, | |||
| 1038 | if (!count) | 1115 | if (!count) |
| 1039 | goto out_task; | 1116 | goto out_task; |
| 1040 | 1117 | ||
| 1118 | pm.v2 = soft_dirty_cleared; | ||
| 1041 | pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT); | 1119 | pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT); |
| 1042 | pm.buffer = kmalloc(pm.len, GFP_TEMPORARY); | 1120 | pm.buffer = kmalloc(pm.len, GFP_TEMPORARY); |
| 1043 | ret = -ENOMEM; | 1121 | ret = -ENOMEM; |
| @@ -1110,9 +1188,18 @@ out: | |||
| 1110 | return ret; | 1188 | return ret; |
| 1111 | } | 1189 | } |
| 1112 | 1190 | ||
| 1191 | static int pagemap_open(struct inode *inode, struct file *file) | ||
| 1192 | { | ||
| 1193 | pr_warn_once("Bits 55-60 of /proc/PID/pagemap entries are about " | ||
| 1194 | "to stop being page-shift some time soon. See the " | ||
| 1195 | "linux/Documentation/vm/pagemap.txt for details.\n"); | ||
| 1196 | return 0; | ||
| 1197 | } | ||
| 1198 | |||
| 1113 | const struct file_operations proc_pagemap_operations = { | 1199 | const struct file_operations proc_pagemap_operations = { |
| 1114 | .llseek = mem_lseek, /* borrow this */ | 1200 | .llseek = mem_lseek, /* borrow this */ |
| 1115 | .read = pagemap_read, | 1201 | .read = pagemap_read, |
| 1202 | .open = pagemap_open, | ||
| 1116 | }; | 1203 | }; |
| 1117 | #endif /* CONFIG_PROC_PAGE_MONITOR */ | 1204 | #endif /* CONFIG_PROC_PAGE_MONITOR */ |
| 1118 | 1205 | ||
diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c index 9610ac772d7e..061894625903 100644 --- a/fs/proc/uptime.c +++ b/fs/proc/uptime.c | |||
| @@ -20,8 +20,7 @@ static int uptime_proc_show(struct seq_file *m, void *v) | |||
| 20 | for_each_possible_cpu(i) | 20 | for_each_possible_cpu(i) |
| 21 | idletime += (__force u64) kcpustat_cpu(i).cpustat[CPUTIME_IDLE]; | 21 | idletime += (__force u64) kcpustat_cpu(i).cpustat[CPUTIME_IDLE]; |
| 22 | 22 | ||
| 23 | do_posix_clock_monotonic_gettime(&uptime); | 23 | get_monotonic_boottime(&uptime); |
| 24 | monotonic_to_bootbased(&uptime); | ||
| 25 | nsec = cputime64_to_jiffies64(idletime) * TICK_NSEC; | 24 | nsec = cputime64_to_jiffies64(idletime) * TICK_NSEC; |
| 26 | idle.tv_sec = div_u64_rem(nsec, NSEC_PER_SEC, &rem); | 25 | idle.tv_sec = div_u64_rem(nsec, NSEC_PER_SEC, &rem); |
| 27 | idle.tv_nsec = rem; | 26 | idle.tv_nsec = rem; |
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 17f7e080d7ff..28503172f2e4 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c | |||
| @@ -20,6 +20,7 @@ | |||
| 20 | #include <linux/init.h> | 20 | #include <linux/init.h> |
| 21 | #include <linux/crash_dump.h> | 21 | #include <linux/crash_dump.h> |
| 22 | #include <linux/list.h> | 22 | #include <linux/list.h> |
| 23 | #include <linux/vmalloc.h> | ||
| 23 | #include <asm/uaccess.h> | 24 | #include <asm/uaccess.h> |
| 24 | #include <asm/io.h> | 25 | #include <asm/io.h> |
| 25 | #include "internal.h" | 26 | #include "internal.h" |
| @@ -32,6 +33,10 @@ static LIST_HEAD(vmcore_list); | |||
| 32 | /* Stores the pointer to the buffer containing kernel elf core headers. */ | 33 | /* Stores the pointer to the buffer containing kernel elf core headers. */ |
| 33 | static char *elfcorebuf; | 34 | static char *elfcorebuf; |
| 34 | static size_t elfcorebuf_sz; | 35 | static size_t elfcorebuf_sz; |
| 36 | static size_t elfcorebuf_sz_orig; | ||
| 37 | |||
| 38 | static char *elfnotes_buf; | ||
| 39 | static size_t elfnotes_sz; | ||
| 35 | 40 | ||
| 36 | /* Total size of vmcore file. */ | 41 | /* Total size of vmcore file. */ |
| 37 | static u64 vmcore_size; | 42 | static u64 vmcore_size; |
| @@ -118,27 +123,6 @@ static ssize_t read_from_oldmem(char *buf, size_t count, | |||
| 118 | return read; | 123 | return read; |
| 119 | } | 124 | } |
| 120 | 125 | ||
| 121 | /* Maps vmcore file offset to respective physical address in memroy. */ | ||
| 122 | static u64 map_offset_to_paddr(loff_t offset, struct list_head *vc_list, | ||
| 123 | struct vmcore **m_ptr) | ||
| 124 | { | ||
| 125 | struct vmcore *m; | ||
| 126 | u64 paddr; | ||
| 127 | |||
| 128 | list_for_each_entry(m, vc_list, list) { | ||
| 129 | u64 start, end; | ||
| 130 | start = m->offset; | ||
| 131 | end = m->offset + m->size - 1; | ||
| 132 | if (offset >= start && offset <= end) { | ||
| 133 | paddr = m->paddr + offset - start; | ||
| 134 | *m_ptr = m; | ||
| 135 | return paddr; | ||
| 136 | } | ||
| 137 | } | ||
| 138 | *m_ptr = NULL; | ||
| 139 | return 0; | ||
| 140 | } | ||
| 141 | |||
| 142 | /* Read from the ELF header and then the crash dump. On error, negative value is | 126 | /* Read from the ELF header and then the crash dump. On error, negative value is |
| 143 | * returned otherwise number of bytes read are returned. | 127 | * returned otherwise number of bytes read are returned. |
| 144 | */ | 128 | */ |
| @@ -147,8 +131,8 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer, | |||
| 147 | { | 131 | { |
| 148 | ssize_t acc = 0, tmp; | 132 | ssize_t acc = 0, tmp; |
| 149 | size_t tsz; | 133 | size_t tsz; |
| 150 | u64 start, nr_bytes; | 134 | u64 start; |
| 151 | struct vmcore *curr_m = NULL; | 135 | struct vmcore *m = NULL; |
| 152 | 136 | ||
| 153 | if (buflen == 0 || *fpos >= vmcore_size) | 137 | if (buflen == 0 || *fpos >= vmcore_size) |
| 154 | return 0; | 138 | return 0; |
| @@ -159,9 +143,7 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer, | |||
| 159 | 143 | ||
| 160 | /* Read ELF core header */ | 144 | /* Read ELF core header */ |
| 161 | if (*fpos < elfcorebuf_sz) { | 145 | if (*fpos < elfcorebuf_sz) { |
| 162 | tsz = elfcorebuf_sz - *fpos; | 146 | tsz = min(elfcorebuf_sz - (size_t)*fpos, buflen); |
| 163 | if (buflen < tsz) | ||
| 164 | tsz = buflen; | ||
| 165 | if (copy_to_user(buffer, elfcorebuf + *fpos, tsz)) | 147 | if (copy_to_user(buffer, elfcorebuf + *fpos, tsz)) |
| 166 | return -EFAULT; | 148 | return -EFAULT; |
| 167 | buflen -= tsz; | 149 | buflen -= tsz; |
| @@ -174,39 +156,161 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer, | |||
| 174 | return acc; | 156 | return acc; |
| 175 | } | 157 | } |
| 176 | 158 | ||
| 177 | start = map_offset_to_paddr(*fpos, &vmcore_list, &curr_m); | 159 | /* Read Elf note segment */ |
| 178 | if (!curr_m) | 160 | if (*fpos < elfcorebuf_sz + elfnotes_sz) { |
| 179 | return -EINVAL; | 161 | void *kaddr; |
| 180 | |||
| 181 | while (buflen) { | ||
| 182 | tsz = min_t(size_t, buflen, PAGE_SIZE - (start & ~PAGE_MASK)); | ||
| 183 | 162 | ||
| 184 | /* Calculate left bytes in current memory segment. */ | 163 | tsz = min(elfcorebuf_sz + elfnotes_sz - (size_t)*fpos, buflen); |
| 185 | nr_bytes = (curr_m->size - (start - curr_m->paddr)); | 164 | kaddr = elfnotes_buf + *fpos - elfcorebuf_sz; |
| 186 | if (tsz > nr_bytes) | 165 | if (copy_to_user(buffer, kaddr, tsz)) |
| 187 | tsz = nr_bytes; | 166 | return -EFAULT; |
| 188 | |||
| 189 | tmp = read_from_oldmem(buffer, tsz, &start, 1); | ||
| 190 | if (tmp < 0) | ||
| 191 | return tmp; | ||
| 192 | buflen -= tsz; | 167 | buflen -= tsz; |
| 193 | *fpos += tsz; | 168 | *fpos += tsz; |
| 194 | buffer += tsz; | 169 | buffer += tsz; |
| 195 | acc += tsz; | 170 | acc += tsz; |
| 196 | if (start >= (curr_m->paddr + curr_m->size)) { | 171 | |
| 197 | if (curr_m->list.next == &vmcore_list) | 172 | /* leave now if filled buffer already */ |
| 198 | return acc; /*EOF*/ | 173 | if (buflen == 0) |
| 199 | curr_m = list_entry(curr_m->list.next, | 174 | return acc; |
| 200 | struct vmcore, list); | 175 | } |
| 201 | start = curr_m->paddr; | 176 | |
| 177 | list_for_each_entry(m, &vmcore_list, list) { | ||
| 178 | if (*fpos < m->offset + m->size) { | ||
| 179 | tsz = min_t(size_t, m->offset + m->size - *fpos, buflen); | ||
| 180 | start = m->paddr + *fpos - m->offset; | ||
| 181 | tmp = read_from_oldmem(buffer, tsz, &start, 1); | ||
| 182 | if (tmp < 0) | ||
| 183 | return tmp; | ||
| 184 | buflen -= tsz; | ||
| 185 | *fpos += tsz; | ||
| 186 | buffer += tsz; | ||
| 187 | acc += tsz; | ||
| 188 | |||
| 189 | /* leave now if filled buffer already */ | ||
| 190 | if (buflen == 0) | ||
| 191 | return acc; | ||
| 202 | } | 192 | } |
| 203 | } | 193 | } |
| 194 | |||
| 204 | return acc; | 195 | return acc; |
| 205 | } | 196 | } |
| 206 | 197 | ||
| 198 | /** | ||
| 199 | * alloc_elfnotes_buf - allocate buffer for ELF note segment in | ||
| 200 | * vmalloc memory | ||
| 201 | * | ||
| 202 | * @notes_sz: size of buffer | ||
| 203 | * | ||
| 204 | * If CONFIG_MMU is defined, use vmalloc_user() to allow users to mmap | ||
| 205 | * the buffer to user-space by means of remap_vmalloc_range(). | ||
| 206 | * | ||
| 207 | * If CONFIG_MMU is not defined, use vzalloc() since mmap_vmcore() is | ||
| 208 | * disabled and there's no need to allow users to mmap the buffer. | ||
| 209 | */ | ||
| 210 | static inline char *alloc_elfnotes_buf(size_t notes_sz) | ||
| 211 | { | ||
| 212 | #ifdef CONFIG_MMU | ||
| 213 | return vmalloc_user(notes_sz); | ||
| 214 | #else | ||
| 215 | return vzalloc(notes_sz); | ||
| 216 | #endif | ||
| 217 | } | ||
| 218 | |||
| 219 | /* | ||
| 220 | * Disable mmap_vmcore() if CONFIG_MMU is not defined. MMU is | ||
| 221 | * essential for mmap_vmcore() in order to map physically | ||
| 222 | * non-contiguous objects (ELF header, ELF note segment and memory | ||
| 223 | * regions in the 1st kernel pointed to by PT_LOAD entries) into | ||
| 224 | * virtually contiguous user-space in ELF layout. | ||
| 225 | */ | ||
| 226 | #ifdef CONFIG_MMU | ||
| 227 | static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) | ||
| 228 | { | ||
| 229 | size_t size = vma->vm_end - vma->vm_start; | ||
| 230 | u64 start, end, len, tsz; | ||
| 231 | struct vmcore *m; | ||
| 232 | |||
| 233 | start = (u64)vma->vm_pgoff << PAGE_SHIFT; | ||
| 234 | end = start + size; | ||
| 235 | |||
| 236 | if (size > vmcore_size || end > vmcore_size) | ||
| 237 | return -EINVAL; | ||
| 238 | |||
| 239 | if (vma->vm_flags & (VM_WRITE | VM_EXEC)) | ||
| 240 | return -EPERM; | ||
| 241 | |||
| 242 | vma->vm_flags &= ~(VM_MAYWRITE | VM_MAYEXEC); | ||
| 243 | vma->vm_flags |= VM_MIXEDMAP; | ||
| 244 | |||
| 245 | len = 0; | ||
| 246 | |||
| 247 | if (start < elfcorebuf_sz) { | ||
| 248 | u64 pfn; | ||
| 249 | |||
| 250 | tsz = min(elfcorebuf_sz - (size_t)start, size); | ||
| 251 | pfn = __pa(elfcorebuf + start) >> PAGE_SHIFT; | ||
| 252 | if (remap_pfn_range(vma, vma->vm_start, pfn, tsz, | ||
| 253 | vma->vm_page_prot)) | ||
| 254 | return -EAGAIN; | ||
| 255 | size -= tsz; | ||
| 256 | start += tsz; | ||
| 257 | len += tsz; | ||
| 258 | |||
| 259 | if (size == 0) | ||
| 260 | return 0; | ||
| 261 | } | ||
| 262 | |||
| 263 | if (start < elfcorebuf_sz + elfnotes_sz) { | ||
| 264 | void *kaddr; | ||
| 265 | |||
| 266 | tsz = min(elfcorebuf_sz + elfnotes_sz - (size_t)start, size); | ||
| 267 | kaddr = elfnotes_buf + start - elfcorebuf_sz; | ||
| 268 | if (remap_vmalloc_range_partial(vma, vma->vm_start + len, | ||
| 269 | kaddr, tsz)) | ||
| 270 | goto fail; | ||
| 271 | size -= tsz; | ||
| 272 | start += tsz; | ||
| 273 | len += tsz; | ||
| 274 | |||
| 275 | if (size == 0) | ||
| 276 | return 0; | ||
| 277 | } | ||
| 278 | |||
| 279 | list_for_each_entry(m, &vmcore_list, list) { | ||
| 280 | if (start < m->offset + m->size) { | ||
| 281 | u64 paddr = 0; | ||
| 282 | |||
| 283 | tsz = min_t(size_t, m->offset + m->size - start, size); | ||
| 284 | paddr = m->paddr + start - m->offset; | ||
| 285 | if (remap_pfn_range(vma, vma->vm_start + len, | ||
| 286 | paddr >> PAGE_SHIFT, tsz, | ||
| 287 | vma->vm_page_prot)) | ||
| 288 | goto fail; | ||
| 289 | size -= tsz; | ||
| 290 | start += tsz; | ||
| 291 | len += tsz; | ||
| 292 | |||
| 293 | if (size == 0) | ||
| 294 | return 0; | ||
| 295 | } | ||
| 296 | } | ||
| 297 | |||
| 298 | return 0; | ||
| 299 | fail: | ||
| 300 | do_munmap(vma->vm_mm, vma->vm_start, len); | ||
| 301 | return -EAGAIN; | ||
| 302 | } | ||
| 303 | #else | ||
| 304 | static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) | ||
| 305 | { | ||
| 306 | return -ENOSYS; | ||
| 307 | } | ||
| 308 | #endif | ||
| 309 | |||
| 207 | static const struct file_operations proc_vmcore_operations = { | 310 | static const struct file_operations proc_vmcore_operations = { |
| 208 | .read = read_vmcore, | 311 | .read = read_vmcore, |
| 209 | .llseek = default_llseek, | 312 | .llseek = default_llseek, |
| 313 | .mmap = mmap_vmcore, | ||
| 210 | }; | 314 | }; |
| 211 | 315 | ||
| 212 | static struct vmcore* __init get_new_element(void) | 316 | static struct vmcore* __init get_new_element(void) |
| @@ -214,61 +318,40 @@ static struct vmcore* __init get_new_element(void) | |||
| 214 | return kzalloc(sizeof(struct vmcore), GFP_KERNEL); | 318 | return kzalloc(sizeof(struct vmcore), GFP_KERNEL); |
| 215 | } | 319 | } |
| 216 | 320 | ||
| 217 | static u64 __init get_vmcore_size_elf64(char *elfptr) | 321 | static u64 __init get_vmcore_size(size_t elfsz, size_t elfnotesegsz, |
| 322 | struct list_head *vc_list) | ||
| 218 | { | 323 | { |
| 219 | int i; | ||
| 220 | u64 size; | ||
| 221 | Elf64_Ehdr *ehdr_ptr; | ||
| 222 | Elf64_Phdr *phdr_ptr; | ||
| 223 | |||
| 224 | ehdr_ptr = (Elf64_Ehdr *)elfptr; | ||
| 225 | phdr_ptr = (Elf64_Phdr*)(elfptr + sizeof(Elf64_Ehdr)); | ||
| 226 | size = sizeof(Elf64_Ehdr) + ((ehdr_ptr->e_phnum) * sizeof(Elf64_Phdr)); | ||
| 227 | for (i = 0; i < ehdr_ptr->e_phnum; i++) { | ||
| 228 | size += phdr_ptr->p_memsz; | ||
| 229 | phdr_ptr++; | ||
| 230 | } | ||
| 231 | return size; | ||
| 232 | } | ||
| 233 | |||
| 234 | static u64 __init get_vmcore_size_elf32(char *elfptr) | ||
| 235 | { | ||
| 236 | int i; | ||
| 237 | u64 size; | 324 | u64 size; |
| 238 | Elf32_Ehdr *ehdr_ptr; | 325 | struct vmcore *m; |
| 239 | Elf32_Phdr *phdr_ptr; | ||
| 240 | 326 | ||
| 241 | ehdr_ptr = (Elf32_Ehdr *)elfptr; | 327 | size = elfsz + elfnotesegsz; |
| 242 | phdr_ptr = (Elf32_Phdr*)(elfptr + sizeof(Elf32_Ehdr)); | 328 | list_for_each_entry(m, vc_list, list) { |
| 243 | size = sizeof(Elf32_Ehdr) + ((ehdr_ptr->e_phnum) * sizeof(Elf32_Phdr)); | 329 | size += m->size; |
| 244 | for (i = 0; i < ehdr_ptr->e_phnum; i++) { | ||
| 245 | size += phdr_ptr->p_memsz; | ||
| 246 | phdr_ptr++; | ||
| 247 | } | 330 | } |
| 248 | return size; | 331 | return size; |
| 249 | } | 332 | } |
| 250 | 333 | ||
| 251 | /* Merges all the PT_NOTE headers into one. */ | 334 | /** |
| 252 | static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz, | 335 | * update_note_header_size_elf64 - update p_memsz member of each PT_NOTE entry |
| 253 | struct list_head *vc_list) | 336 | * |
| 337 | * @ehdr_ptr: ELF header | ||
| 338 | * | ||
| 339 | * This function updates p_memsz member of each PT_NOTE entry in the | ||
| 340 | * program header table pointed to by @ehdr_ptr to real size of ELF | ||
| 341 | * note segment. | ||
| 342 | */ | ||
| 343 | static int __init update_note_header_size_elf64(const Elf64_Ehdr *ehdr_ptr) | ||
| 254 | { | 344 | { |
| 255 | int i, nr_ptnote=0, rc=0; | 345 | int i, rc=0; |
| 256 | char *tmp; | 346 | Elf64_Phdr *phdr_ptr; |
| 257 | Elf64_Ehdr *ehdr_ptr; | ||
| 258 | Elf64_Phdr phdr, *phdr_ptr; | ||
| 259 | Elf64_Nhdr *nhdr_ptr; | 347 | Elf64_Nhdr *nhdr_ptr; |
| 260 | u64 phdr_sz = 0, note_off; | ||
| 261 | 348 | ||
| 262 | ehdr_ptr = (Elf64_Ehdr *)elfptr; | 349 | phdr_ptr = (Elf64_Phdr *)(ehdr_ptr + 1); |
| 263 | phdr_ptr = (Elf64_Phdr*)(elfptr + sizeof(Elf64_Ehdr)); | ||
| 264 | for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { | 350 | for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { |
| 265 | int j; | ||
| 266 | void *notes_section; | 351 | void *notes_section; |
| 267 | struct vmcore *new; | ||
| 268 | u64 offset, max_sz, sz, real_sz = 0; | 352 | u64 offset, max_sz, sz, real_sz = 0; |
| 269 | if (phdr_ptr->p_type != PT_NOTE) | 353 | if (phdr_ptr->p_type != PT_NOTE) |
| 270 | continue; | 354 | continue; |
| 271 | nr_ptnote++; | ||
| 272 | max_sz = phdr_ptr->p_memsz; | 355 | max_sz = phdr_ptr->p_memsz; |
| 273 | offset = phdr_ptr->p_offset; | 356 | offset = phdr_ptr->p_offset; |
| 274 | notes_section = kmalloc(max_sz, GFP_KERNEL); | 357 | notes_section = kmalloc(max_sz, GFP_KERNEL); |
| @@ -280,7 +363,7 @@ static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz, | |||
| 280 | return rc; | 363 | return rc; |
| 281 | } | 364 | } |
| 282 | nhdr_ptr = notes_section; | 365 | nhdr_ptr = notes_section; |
| 283 | for (j = 0; j < max_sz; j += sz) { | 366 | while (real_sz < max_sz) { |
| 284 | if (nhdr_ptr->n_namesz == 0) | 367 | if (nhdr_ptr->n_namesz == 0) |
| 285 | break; | 368 | break; |
| 286 | sz = sizeof(Elf64_Nhdr) + | 369 | sz = sizeof(Elf64_Nhdr) + |
| @@ -289,26 +372,122 @@ static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz, | |||
| 289 | real_sz += sz; | 372 | real_sz += sz; |
| 290 | nhdr_ptr = (Elf64_Nhdr*)((char*)nhdr_ptr + sz); | 373 | nhdr_ptr = (Elf64_Nhdr*)((char*)nhdr_ptr + sz); |
| 291 | } | 374 | } |
| 292 | |||
| 293 | /* Add this contiguous chunk of notes section to vmcore list.*/ | ||
| 294 | new = get_new_element(); | ||
| 295 | if (!new) { | ||
| 296 | kfree(notes_section); | ||
| 297 | return -ENOMEM; | ||
| 298 | } | ||
| 299 | new->paddr = phdr_ptr->p_offset; | ||
| 300 | new->size = real_sz; | ||
| 301 | list_add_tail(&new->list, vc_list); | ||
| 302 | phdr_sz += real_sz; | ||
| 303 | kfree(notes_section); | 375 | kfree(notes_section); |
| 376 | phdr_ptr->p_memsz = real_sz; | ||
| 377 | } | ||
| 378 | |||
| 379 | return 0; | ||
| 380 | } | ||
| 381 | |||
| 382 | /** | ||
| 383 | * get_note_number_and_size_elf64 - get the number of PT_NOTE program | ||
| 384 | * headers and sum of real size of their ELF note segment headers and | ||
| 385 | * data. | ||
| 386 | * | ||
| 387 | * @ehdr_ptr: ELF header | ||
| 388 | * @nr_ptnote: buffer for the number of PT_NOTE program headers | ||
| 389 | * @sz_ptnote: buffer for size of unique PT_NOTE program header | ||
| 390 | * | ||
| 391 | * This function is used to merge multiple PT_NOTE program headers | ||
| 392 | * into a unique single one. The resulting unique entry will have | ||
| 393 | * @sz_ptnote in its phdr->p_mem. | ||
| 394 | * | ||
| 395 | * It is assumed that program headers with PT_NOTE type pointed to by | ||
| 396 | * @ehdr_ptr has already been updated by update_note_header_size_elf64 | ||
| 397 | * and each of PT_NOTE program headers has actual ELF note segment | ||
| 398 | * size in its p_memsz member. | ||
| 399 | */ | ||
| 400 | static int __init get_note_number_and_size_elf64(const Elf64_Ehdr *ehdr_ptr, | ||
| 401 | int *nr_ptnote, u64 *sz_ptnote) | ||
| 402 | { | ||
| 403 | int i; | ||
| 404 | Elf64_Phdr *phdr_ptr; | ||
| 405 | |||
| 406 | *nr_ptnote = *sz_ptnote = 0; | ||
| 407 | |||
| 408 | phdr_ptr = (Elf64_Phdr *)(ehdr_ptr + 1); | ||
| 409 | for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { | ||
| 410 | if (phdr_ptr->p_type != PT_NOTE) | ||
| 411 | continue; | ||
| 412 | *nr_ptnote += 1; | ||
| 413 | *sz_ptnote += phdr_ptr->p_memsz; | ||
| 414 | } | ||
| 415 | |||
| 416 | return 0; | ||
| 417 | } | ||
| 418 | |||
| 419 | /** | ||
| 420 | * copy_notes_elf64 - copy ELF note segments in a given buffer | ||
| 421 | * | ||
| 422 | * @ehdr_ptr: ELF header | ||
| 423 | * @notes_buf: buffer into which ELF note segments are copied | ||
| 424 | * | ||
| 425 | * This function is used to copy ELF note segment in the 1st kernel | ||
| 426 | * into the buffer @notes_buf in the 2nd kernel. It is assumed that | ||
| 427 | * size of the buffer @notes_buf is equal to or larger than sum of the | ||
| 428 | * real ELF note segment headers and data. | ||
| 429 | * | ||
| 430 | * It is assumed that program headers with PT_NOTE type pointed to by | ||
| 431 | * @ehdr_ptr has already been updated by update_note_header_size_elf64 | ||
| 432 | * and each of PT_NOTE program headers has actual ELF note segment | ||
| 433 | * size in its p_memsz member. | ||
| 434 | */ | ||
| 435 | static int __init copy_notes_elf64(const Elf64_Ehdr *ehdr_ptr, char *notes_buf) | ||
| 436 | { | ||
| 437 | int i, rc=0; | ||
| 438 | Elf64_Phdr *phdr_ptr; | ||
| 439 | |||
| 440 | phdr_ptr = (Elf64_Phdr*)(ehdr_ptr + 1); | ||
| 441 | |||
| 442 | for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { | ||
| 443 | u64 offset; | ||
| 444 | if (phdr_ptr->p_type != PT_NOTE) | ||
| 445 | continue; | ||
| 446 | offset = phdr_ptr->p_offset; | ||
| 447 | rc = read_from_oldmem(notes_buf, phdr_ptr->p_memsz, &offset, 0); | ||
| 448 | if (rc < 0) | ||
| 449 | return rc; | ||
| 450 | notes_buf += phdr_ptr->p_memsz; | ||
| 304 | } | 451 | } |
| 305 | 452 | ||
| 453 | return 0; | ||
| 454 | } | ||
| 455 | |||
| 456 | /* Merges all the PT_NOTE headers into one. */ | ||
| 457 | static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz, | ||
| 458 | char **notes_buf, size_t *notes_sz) | ||
| 459 | { | ||
| 460 | int i, nr_ptnote=0, rc=0; | ||
| 461 | char *tmp; | ||
| 462 | Elf64_Ehdr *ehdr_ptr; | ||
| 463 | Elf64_Phdr phdr; | ||
| 464 | u64 phdr_sz = 0, note_off; | ||
| 465 | |||
| 466 | ehdr_ptr = (Elf64_Ehdr *)elfptr; | ||
| 467 | |||
| 468 | rc = update_note_header_size_elf64(ehdr_ptr); | ||
| 469 | if (rc < 0) | ||
| 470 | return rc; | ||
| 471 | |||
| 472 | rc = get_note_number_and_size_elf64(ehdr_ptr, &nr_ptnote, &phdr_sz); | ||
| 473 | if (rc < 0) | ||
| 474 | return rc; | ||
| 475 | |||
| 476 | *notes_sz = roundup(phdr_sz, PAGE_SIZE); | ||
| 477 | *notes_buf = alloc_elfnotes_buf(*notes_sz); | ||
| 478 | if (!*notes_buf) | ||
| 479 | return -ENOMEM; | ||
| 480 | |||
| 481 | rc = copy_notes_elf64(ehdr_ptr, *notes_buf); | ||
| 482 | if (rc < 0) | ||
| 483 | return rc; | ||
| 484 | |||
| 306 | /* Prepare merged PT_NOTE program header. */ | 485 | /* Prepare merged PT_NOTE program header. */ |
| 307 | phdr.p_type = PT_NOTE; | 486 | phdr.p_type = PT_NOTE; |
| 308 | phdr.p_flags = 0; | 487 | phdr.p_flags = 0; |
| 309 | note_off = sizeof(Elf64_Ehdr) + | 488 | note_off = sizeof(Elf64_Ehdr) + |
| 310 | (ehdr_ptr->e_phnum - nr_ptnote +1) * sizeof(Elf64_Phdr); | 489 | (ehdr_ptr->e_phnum - nr_ptnote +1) * sizeof(Elf64_Phdr); |
| 311 | phdr.p_offset = note_off; | 490 | phdr.p_offset = roundup(note_off, PAGE_SIZE); |
| 312 | phdr.p_vaddr = phdr.p_paddr = 0; | 491 | phdr.p_vaddr = phdr.p_paddr = 0; |
| 313 | phdr.p_filesz = phdr.p_memsz = phdr_sz; | 492 | phdr.p_filesz = phdr.p_memsz = phdr_sz; |
| 314 | phdr.p_align = 0; | 493 | phdr.p_align = 0; |
| @@ -322,6 +501,8 @@ static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz, | |||
| 322 | i = (nr_ptnote - 1) * sizeof(Elf64_Phdr); | 501 | i = (nr_ptnote - 1) * sizeof(Elf64_Phdr); |
| 323 | *elfsz = *elfsz - i; | 502 | *elfsz = *elfsz - i; |
| 324 | memmove(tmp, tmp+i, ((*elfsz)-sizeof(Elf64_Ehdr)-sizeof(Elf64_Phdr))); | 503 | memmove(tmp, tmp+i, ((*elfsz)-sizeof(Elf64_Ehdr)-sizeof(Elf64_Phdr))); |
| 504 | memset(elfptr + *elfsz, 0, i); | ||
| 505 | *elfsz = roundup(*elfsz, PAGE_SIZE); | ||
| 325 | 506 | ||
| 326 | /* Modify e_phnum to reflect merged headers. */ | 507 | /* Modify e_phnum to reflect merged headers. */ |
| 327 | ehdr_ptr->e_phnum = ehdr_ptr->e_phnum - nr_ptnote + 1; | 508 | ehdr_ptr->e_phnum = ehdr_ptr->e_phnum - nr_ptnote + 1; |
| @@ -329,27 +510,27 @@ static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz, | |||
| 329 | return 0; | 510 | return 0; |
| 330 | } | 511 | } |
| 331 | 512 | ||
| 332 | /* Merges all the PT_NOTE headers into one. */ | 513 | /** |
| 333 | static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz, | 514 | * update_note_header_size_elf32 - update p_memsz member of each PT_NOTE entry |
| 334 | struct list_head *vc_list) | 515 | * |
| 516 | * @ehdr_ptr: ELF header | ||
| 517 | * | ||
| 518 | * This function updates p_memsz member of each PT_NOTE entry in the | ||
| 519 | * program header table pointed to by @ehdr_ptr to real size of ELF | ||
| 520 | * note segment. | ||
| 521 | */ | ||
| 522 | static int __init update_note_header_size_elf32(const Elf32_Ehdr *ehdr_ptr) | ||
| 335 | { | 523 | { |
| 336 | int i, nr_ptnote=0, rc=0; | 524 | int i, rc=0; |
| 337 | char *tmp; | 525 | Elf32_Phdr *phdr_ptr; |
| 338 | Elf32_Ehdr *ehdr_ptr; | ||
| 339 | Elf32_Phdr phdr, *phdr_ptr; | ||
| 340 | Elf32_Nhdr *nhdr_ptr; | 526 | Elf32_Nhdr *nhdr_ptr; |
| 341 | u64 phdr_sz = 0, note_off; | ||
| 342 | 527 | ||
| 343 | ehdr_ptr = (Elf32_Ehdr *)elfptr; | 528 | phdr_ptr = (Elf32_Phdr *)(ehdr_ptr + 1); |
| 344 | phdr_ptr = (Elf32_Phdr*)(elfptr + sizeof(Elf32_Ehdr)); | ||
| 345 | for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { | 529 | for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { |
| 346 | int j; | ||
| 347 | void *notes_section; | 530 | void *notes_section; |
| 348 | struct vmcore *new; | ||
| 349 | u64 offset, max_sz, sz, real_sz = 0; | 531 | u64 offset, max_sz, sz, real_sz = 0; |
| 350 | if (phdr_ptr->p_type != PT_NOTE) | 532 | if (phdr_ptr->p_type != PT_NOTE) |
| 351 | continue; | 533 | continue; |
| 352 | nr_ptnote++; | ||
| 353 | max_sz = phdr_ptr->p_memsz; | 534 | max_sz = phdr_ptr->p_memsz; |
| 354 | offset = phdr_ptr->p_offset; | 535 | offset = phdr_ptr->p_offset; |
| 355 | notes_section = kmalloc(max_sz, GFP_KERNEL); | 536 | notes_section = kmalloc(max_sz, GFP_KERNEL); |
| @@ -361,7 +542,7 @@ static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz, | |||
| 361 | return rc; | 542 | return rc; |
| 362 | } | 543 | } |
| 363 | nhdr_ptr = notes_section; | 544 | nhdr_ptr = notes_section; |
| 364 | for (j = 0; j < max_sz; j += sz) { | 545 | while (real_sz < max_sz) { |
| 365 | if (nhdr_ptr->n_namesz == 0) | 546 | if (nhdr_ptr->n_namesz == 0) |
| 366 | break; | 547 | break; |
| 367 | sz = sizeof(Elf32_Nhdr) + | 548 | sz = sizeof(Elf32_Nhdr) + |
| @@ -370,26 +551,122 @@ static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz, | |||
| 370 | real_sz += sz; | 551 | real_sz += sz; |
| 371 | nhdr_ptr = (Elf32_Nhdr*)((char*)nhdr_ptr + sz); | 552 | nhdr_ptr = (Elf32_Nhdr*)((char*)nhdr_ptr + sz); |
| 372 | } | 553 | } |
| 373 | |||
| 374 | /* Add this contiguous chunk of notes section to vmcore list.*/ | ||
| 375 | new = get_new_element(); | ||
| 376 | if (!new) { | ||
| 377 | kfree(notes_section); | ||
| 378 | return -ENOMEM; | ||
| 379 | } | ||
| 380 | new->paddr = phdr_ptr->p_offset; | ||
| 381 | new->size = real_sz; | ||
| 382 | list_add_tail(&new->list, vc_list); | ||
| 383 | phdr_sz += real_sz; | ||
| 384 | kfree(notes_section); | 554 | kfree(notes_section); |
| 555 | phdr_ptr->p_memsz = real_sz; | ||
| 556 | } | ||
| 557 | |||
| 558 | return 0; | ||
| 559 | } | ||
| 560 | |||
| 561 | /** | ||
| 562 | * get_note_number_and_size_elf32 - get the number of PT_NOTE program | ||
| 563 | * headers and sum of real size of their ELF note segment headers and | ||
| 564 | * data. | ||
| 565 | * | ||
| 566 | * @ehdr_ptr: ELF header | ||
| 567 | * @nr_ptnote: buffer for the number of PT_NOTE program headers | ||
| 568 | * @sz_ptnote: buffer for size of unique PT_NOTE program header | ||
| 569 | * | ||
| 570 | * This function is used to merge multiple PT_NOTE program headers | ||
| 571 | * into a unique single one. The resulting unique entry will have | ||
| 572 | * @sz_ptnote in its phdr->p_mem. | ||
| 573 | * | ||
| 574 | * It is assumed that program headers with PT_NOTE type pointed to by | ||
| 575 | * @ehdr_ptr has already been updated by update_note_header_size_elf32 | ||
| 576 | * and each of PT_NOTE program headers has actual ELF note segment | ||
| 577 | * size in its p_memsz member. | ||
| 578 | */ | ||
| 579 | static int __init get_note_number_and_size_elf32(const Elf32_Ehdr *ehdr_ptr, | ||
| 580 | int *nr_ptnote, u64 *sz_ptnote) | ||
| 581 | { | ||
| 582 | int i; | ||
| 583 | Elf32_Phdr *phdr_ptr; | ||
| 584 | |||
| 585 | *nr_ptnote = *sz_ptnote = 0; | ||
| 586 | |||
| 587 | phdr_ptr = (Elf32_Phdr *)(ehdr_ptr + 1); | ||
| 588 | for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { | ||
| 589 | if (phdr_ptr->p_type != PT_NOTE) | ||
| 590 | continue; | ||
| 591 | *nr_ptnote += 1; | ||
| 592 | *sz_ptnote += phdr_ptr->p_memsz; | ||
| 593 | } | ||
| 594 | |||
| 595 | return 0; | ||
| 596 | } | ||
| 597 | |||
| 598 | /** | ||
| 599 | * copy_notes_elf32 - copy ELF note segments in a given buffer | ||
| 600 | * | ||
| 601 | * @ehdr_ptr: ELF header | ||
| 602 | * @notes_buf: buffer into which ELF note segments are copied | ||
| 603 | * | ||
| 604 | * This function is used to copy ELF note segment in the 1st kernel | ||
| 605 | * into the buffer @notes_buf in the 2nd kernel. It is assumed that | ||
| 606 | * size of the buffer @notes_buf is equal to or larger than sum of the | ||
| 607 | * real ELF note segment headers and data. | ||
| 608 | * | ||
| 609 | * It is assumed that program headers with PT_NOTE type pointed to by | ||
| 610 | * @ehdr_ptr has already been updated by update_note_header_size_elf32 | ||
| 611 | * and each of PT_NOTE program headers has actual ELF note segment | ||
| 612 | * size in its p_memsz member. | ||
| 613 | */ | ||
| 614 | static int __init copy_notes_elf32(const Elf32_Ehdr *ehdr_ptr, char *notes_buf) | ||
| 615 | { | ||
| 616 | int i, rc=0; | ||
| 617 | Elf32_Phdr *phdr_ptr; | ||
| 618 | |||
| 619 | phdr_ptr = (Elf32_Phdr*)(ehdr_ptr + 1); | ||
| 620 | |||
| 621 | for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { | ||
| 622 | u64 offset; | ||
| 623 | if (phdr_ptr->p_type != PT_NOTE) | ||
| 624 | continue; | ||
| 625 | offset = phdr_ptr->p_offset; | ||
| 626 | rc = read_from_oldmem(notes_buf, phdr_ptr->p_memsz, &offset, 0); | ||
| 627 | if (rc < 0) | ||
| 628 | return rc; | ||
| 629 | notes_buf += phdr_ptr->p_memsz; | ||
| 385 | } | 630 | } |
| 386 | 631 | ||
| 632 | return 0; | ||
| 633 | } | ||
| 634 | |||
| 635 | /* Merges all the PT_NOTE headers into one. */ | ||
| 636 | static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz, | ||
| 637 | char **notes_buf, size_t *notes_sz) | ||
| 638 | { | ||
| 639 | int i, nr_ptnote=0, rc=0; | ||
| 640 | char *tmp; | ||
| 641 | Elf32_Ehdr *ehdr_ptr; | ||
| 642 | Elf32_Phdr phdr; | ||
| 643 | u64 phdr_sz = 0, note_off; | ||
| 644 | |||
| 645 | ehdr_ptr = (Elf32_Ehdr *)elfptr; | ||
| 646 | |||
| 647 | rc = update_note_header_size_elf32(ehdr_ptr); | ||
| 648 | if (rc < 0) | ||
| 649 | return rc; | ||
| 650 | |||
| 651 | rc = get_note_number_and_size_elf32(ehdr_ptr, &nr_ptnote, &phdr_sz); | ||
| 652 | if (rc < 0) | ||
| 653 | return rc; | ||
| 654 | |||
| 655 | *notes_sz = roundup(phdr_sz, PAGE_SIZE); | ||
| 656 | *notes_buf = alloc_elfnotes_buf(*notes_sz); | ||
| 657 | if (!*notes_buf) | ||
| 658 | return -ENOMEM; | ||
| 659 | |||
| 660 | rc = copy_notes_elf32(ehdr_ptr, *notes_buf); | ||
| 661 | if (rc < 0) | ||
| 662 | return rc; | ||
| 663 | |||
| 387 | /* Prepare merged PT_NOTE program header. */ | 664 | /* Prepare merged PT_NOTE program header. */ |
| 388 | phdr.p_type = PT_NOTE; | 665 | phdr.p_type = PT_NOTE; |
| 389 | phdr.p_flags = 0; | 666 | phdr.p_flags = 0; |
| 390 | note_off = sizeof(Elf32_Ehdr) + | 667 | note_off = sizeof(Elf32_Ehdr) + |
| 391 | (ehdr_ptr->e_phnum - nr_ptnote +1) * sizeof(Elf32_Phdr); | 668 | (ehdr_ptr->e_phnum - nr_ptnote +1) * sizeof(Elf32_Phdr); |
| 392 | phdr.p_offset = note_off; | 669 | phdr.p_offset = roundup(note_off, PAGE_SIZE); |
| 393 | phdr.p_vaddr = phdr.p_paddr = 0; | 670 | phdr.p_vaddr = phdr.p_paddr = 0; |
| 394 | phdr.p_filesz = phdr.p_memsz = phdr_sz; | 671 | phdr.p_filesz = phdr.p_memsz = phdr_sz; |
| 395 | phdr.p_align = 0; | 672 | phdr.p_align = 0; |
| @@ -403,6 +680,8 @@ static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz, | |||
| 403 | i = (nr_ptnote - 1) * sizeof(Elf32_Phdr); | 680 | i = (nr_ptnote - 1) * sizeof(Elf32_Phdr); |
| 404 | *elfsz = *elfsz - i; | 681 | *elfsz = *elfsz - i; |
| 405 | memmove(tmp, tmp+i, ((*elfsz)-sizeof(Elf32_Ehdr)-sizeof(Elf32_Phdr))); | 682 | memmove(tmp, tmp+i, ((*elfsz)-sizeof(Elf32_Ehdr)-sizeof(Elf32_Phdr))); |
| 683 | memset(elfptr + *elfsz, 0, i); | ||
| 684 | *elfsz = roundup(*elfsz, PAGE_SIZE); | ||
| 406 | 685 | ||
| 407 | /* Modify e_phnum to reflect merged headers. */ | 686 | /* Modify e_phnum to reflect merged headers. */ |
| 408 | ehdr_ptr->e_phnum = ehdr_ptr->e_phnum - nr_ptnote + 1; | 687 | ehdr_ptr->e_phnum = ehdr_ptr->e_phnum - nr_ptnote + 1; |
| @@ -414,6 +693,7 @@ static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz, | |||
| 414 | * the new offset fields of exported program headers. */ | 693 | * the new offset fields of exported program headers. */ |
| 415 | static int __init process_ptload_program_headers_elf64(char *elfptr, | 694 | static int __init process_ptload_program_headers_elf64(char *elfptr, |
| 416 | size_t elfsz, | 695 | size_t elfsz, |
| 696 | size_t elfnotes_sz, | ||
| 417 | struct list_head *vc_list) | 697 | struct list_head *vc_list) |
| 418 | { | 698 | { |
| 419 | int i; | 699 | int i; |
| @@ -425,32 +705,38 @@ static int __init process_ptload_program_headers_elf64(char *elfptr, | |||
| 425 | ehdr_ptr = (Elf64_Ehdr *)elfptr; | 705 | ehdr_ptr = (Elf64_Ehdr *)elfptr; |
| 426 | phdr_ptr = (Elf64_Phdr*)(elfptr + sizeof(Elf64_Ehdr)); /* PT_NOTE hdr */ | 706 | phdr_ptr = (Elf64_Phdr*)(elfptr + sizeof(Elf64_Ehdr)); /* PT_NOTE hdr */ |
| 427 | 707 | ||
| 428 | /* First program header is PT_NOTE header. */ | 708 | /* Skip Elf header, program headers and Elf note segment. */ |
| 429 | vmcore_off = sizeof(Elf64_Ehdr) + | 709 | vmcore_off = elfsz + elfnotes_sz; |
| 430 | (ehdr_ptr->e_phnum) * sizeof(Elf64_Phdr) + | ||
| 431 | phdr_ptr->p_memsz; /* Note sections */ | ||
| 432 | 710 | ||
| 433 | for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { | 711 | for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { |
| 712 | u64 paddr, start, end, size; | ||
| 713 | |||
| 434 | if (phdr_ptr->p_type != PT_LOAD) | 714 | if (phdr_ptr->p_type != PT_LOAD) |
| 435 | continue; | 715 | continue; |
| 436 | 716 | ||
| 717 | paddr = phdr_ptr->p_offset; | ||
| 718 | start = rounddown(paddr, PAGE_SIZE); | ||
| 719 | end = roundup(paddr + phdr_ptr->p_memsz, PAGE_SIZE); | ||
| 720 | size = end - start; | ||
| 721 | |||
| 437 | /* Add this contiguous chunk of memory to vmcore list.*/ | 722 | /* Add this contiguous chunk of memory to vmcore list.*/ |
| 438 | new = get_new_element(); | 723 | new = get_new_element(); |
| 439 | if (!new) | 724 | if (!new) |
| 440 | return -ENOMEM; | 725 | return -ENOMEM; |
| 441 | new->paddr = phdr_ptr->p_offset; | 726 | new->paddr = start; |
| 442 | new->size = phdr_ptr->p_memsz; | 727 | new->size = size; |
| 443 | list_add_tail(&new->list, vc_list); | 728 | list_add_tail(&new->list, vc_list); |
| 444 | 729 | ||
| 445 | /* Update the program header offset. */ | 730 | /* Update the program header offset. */ |
| 446 | phdr_ptr->p_offset = vmcore_off; | 731 | phdr_ptr->p_offset = vmcore_off + (paddr - start); |
| 447 | vmcore_off = vmcore_off + phdr_ptr->p_memsz; | 732 | vmcore_off = vmcore_off + size; |
| 448 | } | 733 | } |
| 449 | return 0; | 734 | return 0; |
| 450 | } | 735 | } |
| 451 | 736 | ||
| 452 | static int __init process_ptload_program_headers_elf32(char *elfptr, | 737 | static int __init process_ptload_program_headers_elf32(char *elfptr, |
| 453 | size_t elfsz, | 738 | size_t elfsz, |
| 739 | size_t elfnotes_sz, | ||
| 454 | struct list_head *vc_list) | 740 | struct list_head *vc_list) |
| 455 | { | 741 | { |
| 456 | int i; | 742 | int i; |
| @@ -462,43 +748,44 @@ static int __init process_ptload_program_headers_elf32(char *elfptr, | |||
| 462 | ehdr_ptr = (Elf32_Ehdr *)elfptr; | 748 | ehdr_ptr = (Elf32_Ehdr *)elfptr; |
| 463 | phdr_ptr = (Elf32_Phdr*)(elfptr + sizeof(Elf32_Ehdr)); /* PT_NOTE hdr */ | 749 | phdr_ptr = (Elf32_Phdr*)(elfptr + sizeof(Elf32_Ehdr)); /* PT_NOTE hdr */ |
| 464 | 750 | ||
| 465 | /* First program header is PT_NOTE header. */ | 751 | /* Skip Elf header, program headers and Elf note segment. */ |
| 466 | vmcore_off = sizeof(Elf32_Ehdr) + | 752 | vmcore_off = elfsz + elfnotes_sz; |
| 467 | (ehdr_ptr->e_phnum) * sizeof(Elf32_Phdr) + | ||
| 468 | phdr_ptr->p_memsz; /* Note sections */ | ||
| 469 | 753 | ||
| 470 | for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { | 754 | for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { |
| 755 | u64 paddr, start, end, size; | ||
| 756 | |||
| 471 | if (phdr_ptr->p_type != PT_LOAD) | 757 | if (phdr_ptr->p_type != PT_LOAD) |
| 472 | continue; | 758 | continue; |
| 473 | 759 | ||
| 760 | paddr = phdr_ptr->p_offset; | ||
| 761 | start = rounddown(paddr, PAGE_SIZE); | ||
| 762 | end = roundup(paddr + phdr_ptr->p_memsz, PAGE_SIZE); | ||
| 763 | size = end - start; | ||
| 764 | |||
| 474 | /* Add this contiguous chunk of memory to vmcore list.*/ | 765 | /* Add this contiguous chunk of memory to vmcore list.*/ |
| 475 | new = get_new_element(); | 766 | new = get_new_element(); |
| 476 | if (!new) | 767 | if (!new) |
| 477 | return -ENOMEM; | 768 | return -ENOMEM; |
| 478 | new->paddr = phdr_ptr->p_offset; | 769 | new->paddr = start; |
| 479 | new->size = phdr_ptr->p_memsz; | 770 | new->size = size; |
| 480 | list_add_tail(&new->list, vc_list); | 771 | list_add_tail(&new->list, vc_list); |
| 481 | 772 | ||
| 482 | /* Update the program header offset */ | 773 | /* Update the program header offset */ |
| 483 | phdr_ptr->p_offset = vmcore_off; | 774 | phdr_ptr->p_offset = vmcore_off + (paddr - start); |
| 484 | vmcore_off = vmcore_off + phdr_ptr->p_memsz; | 775 | vmcore_off = vmcore_off + size; |
| 485 | } | 776 | } |
| 486 | return 0; | 777 | return 0; |
| 487 | } | 778 | } |
| 488 | 779 | ||
| 489 | /* Sets offset fields of vmcore elements. */ | 780 | /* Sets offset fields of vmcore elements. */ |
| 490 | static void __init set_vmcore_list_offsets_elf64(char *elfptr, | 781 | static void __init set_vmcore_list_offsets(size_t elfsz, size_t elfnotes_sz, |
| 491 | struct list_head *vc_list) | 782 | struct list_head *vc_list) |
| 492 | { | 783 | { |
| 493 | loff_t vmcore_off; | 784 | loff_t vmcore_off; |
| 494 | Elf64_Ehdr *ehdr_ptr; | ||
| 495 | struct vmcore *m; | 785 | struct vmcore *m; |
| 496 | 786 | ||
| 497 | ehdr_ptr = (Elf64_Ehdr *)elfptr; | 787 | /* Skip Elf header, program headers and Elf note segment. */ |
| 498 | 788 | vmcore_off = elfsz + elfnotes_sz; | |
| 499 | /* Skip Elf header and program headers. */ | ||
| 500 | vmcore_off = sizeof(Elf64_Ehdr) + | ||
| 501 | (ehdr_ptr->e_phnum) * sizeof(Elf64_Phdr); | ||
| 502 | 789 | ||
| 503 | list_for_each_entry(m, vc_list, list) { | 790 | list_for_each_entry(m, vc_list, list) { |
| 504 | m->offset = vmcore_off; | 791 | m->offset = vmcore_off; |
| @@ -506,24 +793,12 @@ static void __init set_vmcore_list_offsets_elf64(char *elfptr, | |||
| 506 | } | 793 | } |
| 507 | } | 794 | } |
| 508 | 795 | ||
| 509 | /* Sets offset fields of vmcore elements. */ | 796 | static void free_elfcorebuf(void) |
| 510 | static void __init set_vmcore_list_offsets_elf32(char *elfptr, | ||
| 511 | struct list_head *vc_list) | ||
| 512 | { | 797 | { |
| 513 | loff_t vmcore_off; | 798 | free_pages((unsigned long)elfcorebuf, get_order(elfcorebuf_sz_orig)); |
| 514 | Elf32_Ehdr *ehdr_ptr; | 799 | elfcorebuf = NULL; |
| 515 | struct vmcore *m; | 800 | vfree(elfnotes_buf); |
| 516 | 801 | elfnotes_buf = NULL; | |
| 517 | ehdr_ptr = (Elf32_Ehdr *)elfptr; | ||
| 518 | |||
| 519 | /* Skip Elf header and program headers. */ | ||
| 520 | vmcore_off = sizeof(Elf32_Ehdr) + | ||
| 521 | (ehdr_ptr->e_phnum) * sizeof(Elf32_Phdr); | ||
| 522 | |||
| 523 | list_for_each_entry(m, vc_list, list) { | ||
| 524 | m->offset = vmcore_off; | ||
| 525 | vmcore_off += m->size; | ||
| 526 | } | ||
| 527 | } | 802 | } |
| 528 | 803 | ||
| 529 | static int __init parse_crash_elf64_headers(void) | 804 | static int __init parse_crash_elf64_headers(void) |
| @@ -554,31 +829,32 @@ static int __init parse_crash_elf64_headers(void) | |||
| 554 | } | 829 | } |
| 555 | 830 | ||
| 556 | /* Read in all elf headers. */ | 831 | /* Read in all elf headers. */ |
| 557 | elfcorebuf_sz = sizeof(Elf64_Ehdr) + ehdr.e_phnum * sizeof(Elf64_Phdr); | 832 | elfcorebuf_sz_orig = sizeof(Elf64_Ehdr) + |
| 558 | elfcorebuf = kmalloc(elfcorebuf_sz, GFP_KERNEL); | 833 | ehdr.e_phnum * sizeof(Elf64_Phdr); |
| 834 | elfcorebuf_sz = elfcorebuf_sz_orig; | ||
| 835 | elfcorebuf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, | ||
| 836 | get_order(elfcorebuf_sz_orig)); | ||
| 559 | if (!elfcorebuf) | 837 | if (!elfcorebuf) |
| 560 | return -ENOMEM; | 838 | return -ENOMEM; |
| 561 | addr = elfcorehdr_addr; | 839 | addr = elfcorehdr_addr; |
| 562 | rc = read_from_oldmem(elfcorebuf, elfcorebuf_sz, &addr, 0); | 840 | rc = read_from_oldmem(elfcorebuf, elfcorebuf_sz_orig, &addr, 0); |
| 563 | if (rc < 0) { | 841 | if (rc < 0) |
| 564 | kfree(elfcorebuf); | 842 | goto fail; |
| 565 | return rc; | ||
| 566 | } | ||
| 567 | 843 | ||
| 568 | /* Merge all PT_NOTE headers into one. */ | 844 | /* Merge all PT_NOTE headers into one. */ |
| 569 | rc = merge_note_headers_elf64(elfcorebuf, &elfcorebuf_sz, &vmcore_list); | 845 | rc = merge_note_headers_elf64(elfcorebuf, &elfcorebuf_sz, |
| 570 | if (rc) { | 846 | &elfnotes_buf, &elfnotes_sz); |
| 571 | kfree(elfcorebuf); | 847 | if (rc) |
| 572 | return rc; | 848 | goto fail; |
| 573 | } | ||
| 574 | rc = process_ptload_program_headers_elf64(elfcorebuf, elfcorebuf_sz, | 849 | rc = process_ptload_program_headers_elf64(elfcorebuf, elfcorebuf_sz, |
| 575 | &vmcore_list); | 850 | elfnotes_sz, &vmcore_list); |
| 576 | if (rc) { | 851 | if (rc) |
| 577 | kfree(elfcorebuf); | 852 | goto fail; |
| 578 | return rc; | 853 | set_vmcore_list_offsets(elfcorebuf_sz, elfnotes_sz, &vmcore_list); |
| 579 | } | ||
| 580 | set_vmcore_list_offsets_elf64(elfcorebuf, &vmcore_list); | ||
| 581 | return 0; | 854 | return 0; |
| 855 | fail: | ||
| 856 | free_elfcorebuf(); | ||
| 857 | return rc; | ||
| 582 | } | 858 | } |
| 583 | 859 | ||
| 584 | static int __init parse_crash_elf32_headers(void) | 860 | static int __init parse_crash_elf32_headers(void) |
| @@ -609,31 +885,31 @@ static int __init parse_crash_elf32_headers(void) | |||
| 609 | } | 885 | } |
| 610 | 886 | ||
| 611 | /* Read in all elf headers. */ | 887 | /* Read in all elf headers. */ |
| 612 | elfcorebuf_sz = sizeof(Elf32_Ehdr) + ehdr.e_phnum * sizeof(Elf32_Phdr); | 888 | elfcorebuf_sz_orig = sizeof(Elf32_Ehdr) + ehdr.e_phnum * sizeof(Elf32_Phdr); |
| 613 | elfcorebuf = kmalloc(elfcorebuf_sz, GFP_KERNEL); | 889 | elfcorebuf_sz = elfcorebuf_sz_orig; |
| 890 | elfcorebuf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, | ||
| 891 | get_order(elfcorebuf_sz_orig)); | ||
| 614 | if (!elfcorebuf) | 892 | if (!elfcorebuf) |
| 615 | return -ENOMEM; | 893 | return -ENOMEM; |
| 616 | addr = elfcorehdr_addr; | 894 | addr = elfcorehdr_addr; |
| 617 | rc = read_from_oldmem(elfcorebuf, elfcorebuf_sz, &addr, 0); | 895 | rc = read_from_oldmem(elfcorebuf, elfcorebuf_sz_orig, &addr, 0); |
| 618 | if (rc < 0) { | 896 | if (rc < 0) |
| 619 | kfree(elfcorebuf); | 897 | goto fail; |
| 620 | return rc; | ||
| 621 | } | ||
| 622 | 898 | ||
| 623 | /* Merge all PT_NOTE headers into one. */ | 899 | /* Merge all PT_NOTE headers into one. */ |
| 624 | rc = merge_note_headers_elf32(elfcorebuf, &elfcorebuf_sz, &vmcore_list); | 900 | rc = merge_note_headers_elf32(elfcorebuf, &elfcorebuf_sz, |
| 625 | if (rc) { | 901 | &elfnotes_buf, &elfnotes_sz); |
| 626 | kfree(elfcorebuf); | 902 | if (rc) |
| 627 | return rc; | 903 | goto fail; |
| 628 | } | ||
| 629 | rc = process_ptload_program_headers_elf32(elfcorebuf, elfcorebuf_sz, | 904 | rc = process_ptload_program_headers_elf32(elfcorebuf, elfcorebuf_sz, |
| 630 | &vmcore_list); | 905 | elfnotes_sz, &vmcore_list); |
| 631 | if (rc) { | 906 | if (rc) |
| 632 | kfree(elfcorebuf); | 907 | goto fail; |
| 633 | return rc; | 908 | set_vmcore_list_offsets(elfcorebuf_sz, elfnotes_sz, &vmcore_list); |
| 634 | } | ||
| 635 | set_vmcore_list_offsets_elf32(elfcorebuf, &vmcore_list); | ||
| 636 | return 0; | 909 | return 0; |
| 910 | fail: | ||
| 911 | free_elfcorebuf(); | ||
| 912 | return rc; | ||
| 637 | } | 913 | } |
| 638 | 914 | ||
| 639 | static int __init parse_crash_elf_headers(void) | 915 | static int __init parse_crash_elf_headers(void) |
| @@ -655,20 +931,19 @@ static int __init parse_crash_elf_headers(void) | |||
| 655 | rc = parse_crash_elf64_headers(); | 931 | rc = parse_crash_elf64_headers(); |
| 656 | if (rc) | 932 | if (rc) |
| 657 | return rc; | 933 | return rc; |
| 658 | |||
| 659 | /* Determine vmcore size. */ | ||
| 660 | vmcore_size = get_vmcore_size_elf64(elfcorebuf); | ||
| 661 | } else if (e_ident[EI_CLASS] == ELFCLASS32) { | 934 | } else if (e_ident[EI_CLASS] == ELFCLASS32) { |
| 662 | rc = parse_crash_elf32_headers(); | 935 | rc = parse_crash_elf32_headers(); |
| 663 | if (rc) | 936 | if (rc) |
| 664 | return rc; | 937 | return rc; |
| 665 | |||
| 666 | /* Determine vmcore size. */ | ||
| 667 | vmcore_size = get_vmcore_size_elf32(elfcorebuf); | ||
| 668 | } else { | 938 | } else { |
| 669 | pr_warn("Warning: Core image elf header is not sane\n"); | 939 | pr_warn("Warning: Core image elf header is not sane\n"); |
| 670 | return -EINVAL; | 940 | return -EINVAL; |
| 671 | } | 941 | } |
| 942 | |||
| 943 | /* Determine vmcore size. */ | ||
| 944 | vmcore_size = get_vmcore_size(elfcorebuf_sz, elfnotes_sz, | ||
| 945 | &vmcore_list); | ||
| 946 | |||
| 672 | return 0; | 947 | return 0; |
| 673 | } | 948 | } |
| 674 | 949 | ||
| @@ -711,7 +986,6 @@ void vmcore_cleanup(void) | |||
| 711 | list_del(&m->list); | 986 | list_del(&m->list); |
| 712 | kfree(m); | 987 | kfree(m); |
| 713 | } | 988 | } |
| 714 | kfree(elfcorebuf); | 989 | free_elfcorebuf(); |
| 715 | elfcorebuf = NULL; | ||
| 716 | } | 990 | } |
| 717 | EXPORT_SYMBOL_GPL(vmcore_cleanup); | 991 | EXPORT_SYMBOL_GPL(vmcore_cleanup); |
diff --git a/fs/pstore/ftrace.c b/fs/pstore/ftrace.c index 43b12807a51d..76a4eeb92982 100644 --- a/fs/pstore/ftrace.c +++ b/fs/pstore/ftrace.c | |||
| @@ -44,7 +44,7 @@ static void notrace pstore_ftrace_call(unsigned long ip, | |||
| 44 | rec.parent_ip = parent_ip; | 44 | rec.parent_ip = parent_ip; |
| 45 | pstore_ftrace_encode_cpu(&rec, raw_smp_processor_id()); | 45 | pstore_ftrace_encode_cpu(&rec, raw_smp_processor_id()); |
| 46 | psinfo->write_buf(PSTORE_TYPE_FTRACE, 0, NULL, 0, (void *)&rec, | 46 | psinfo->write_buf(PSTORE_TYPE_FTRACE, 0, NULL, 0, (void *)&rec, |
| 47 | sizeof(rec), psinfo); | 47 | 0, sizeof(rec), psinfo); |
| 48 | 48 | ||
| 49 | local_irq_restore(flags); | 49 | local_irq_restore(flags); |
| 50 | } | 50 | } |
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index e4bcb2cf055a..71bf5f4ae84c 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c | |||
| @@ -178,6 +178,8 @@ static int pstore_unlink(struct inode *dir, struct dentry *dentry) | |||
| 178 | if (p->psi->erase) | 178 | if (p->psi->erase) |
| 179 | p->psi->erase(p->type, p->id, p->count, | 179 | p->psi->erase(p->type, p->id, p->count, |
| 180 | dentry->d_inode->i_ctime, p->psi); | 180 | dentry->d_inode->i_ctime, p->psi); |
| 181 | else | ||
| 182 | return -EPERM; | ||
| 181 | 183 | ||
| 182 | return simple_unlink(dir, dentry); | 184 | return simple_unlink(dir, dentry); |
| 183 | } | 185 | } |
| @@ -324,6 +326,15 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count, | |||
| 324 | case PSTORE_TYPE_MCE: | 326 | case PSTORE_TYPE_MCE: |
| 325 | sprintf(name, "mce-%s-%lld", psname, id); | 327 | sprintf(name, "mce-%s-%lld", psname, id); |
| 326 | break; | 328 | break; |
| 329 | case PSTORE_TYPE_PPC_RTAS: | ||
| 330 | sprintf(name, "rtas-%s-%lld", psname, id); | ||
| 331 | break; | ||
| 332 | case PSTORE_TYPE_PPC_OF: | ||
| 333 | sprintf(name, "powerpc-ofw-%s-%lld", psname, id); | ||
| 334 | break; | ||
| 335 | case PSTORE_TYPE_PPC_COMMON: | ||
| 336 | sprintf(name, "powerpc-common-%s-%lld", psname, id); | ||
| 337 | break; | ||
| 327 | case PSTORE_TYPE_UNKNOWN: | 338 | case PSTORE_TYPE_UNKNOWN: |
| 328 | sprintf(name, "unknown-%s-%lld", psname, id); | 339 | sprintf(name, "unknown-%s-%lld", psname, id); |
| 329 | break; | 340 | break; |
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 86d1038b5a12..422962ae9fc2 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c | |||
| @@ -159,7 +159,7 @@ static void pstore_dump(struct kmsg_dumper *dumper, | |||
| 159 | break; | 159 | break; |
| 160 | 160 | ||
| 161 | ret = psinfo->write(PSTORE_TYPE_DMESG, reason, &id, part, | 161 | ret = psinfo->write(PSTORE_TYPE_DMESG, reason, &id, part, |
| 162 | oopscount, hsize + len, psinfo); | 162 | oopscount, hsize, hsize + len, psinfo); |
| 163 | if (ret == 0 && reason == KMSG_DUMP_OOPS && pstore_is_mounted()) | 163 | if (ret == 0 && reason == KMSG_DUMP_OOPS && pstore_is_mounted()) |
| 164 | pstore_new_entry = 1; | 164 | pstore_new_entry = 1; |
| 165 | 165 | ||
| @@ -196,7 +196,7 @@ static void pstore_console_write(struct console *con, const char *s, unsigned c) | |||
| 196 | spin_lock_irqsave(&psinfo->buf_lock, flags); | 196 | spin_lock_irqsave(&psinfo->buf_lock, flags); |
| 197 | } | 197 | } |
| 198 | memcpy(psinfo->buf, s, c); | 198 | memcpy(psinfo->buf, s, c); |
| 199 | psinfo->write(PSTORE_TYPE_CONSOLE, 0, &id, 0, 0, c, psinfo); | 199 | psinfo->write(PSTORE_TYPE_CONSOLE, 0, &id, 0, 0, 0, c, psinfo); |
| 200 | spin_unlock_irqrestore(&psinfo->buf_lock, flags); | 200 | spin_unlock_irqrestore(&psinfo->buf_lock, flags); |
| 201 | s += c; | 201 | s += c; |
| 202 | c = e - s; | 202 | c = e - s; |
| @@ -221,9 +221,11 @@ static void pstore_register_console(void) {} | |||
| 221 | static int pstore_write_compat(enum pstore_type_id type, | 221 | static int pstore_write_compat(enum pstore_type_id type, |
| 222 | enum kmsg_dump_reason reason, | 222 | enum kmsg_dump_reason reason, |
| 223 | u64 *id, unsigned int part, int count, | 223 | u64 *id, unsigned int part, int count, |
| 224 | size_t size, struct pstore_info *psi) | 224 | size_t hsize, size_t size, |
| 225 | struct pstore_info *psi) | ||
| 225 | { | 226 | { |
| 226 | return psi->write_buf(type, reason, id, part, psinfo->buf, size, psi); | 227 | return psi->write_buf(type, reason, id, part, psinfo->buf, hsize, |
| 228 | size, psi); | ||
| 227 | } | 229 | } |
| 228 | 230 | ||
| 229 | /* | 231 | /* |
| @@ -239,17 +241,15 @@ int pstore_register(struct pstore_info *psi) | |||
| 239 | { | 241 | { |
| 240 | struct module *owner = psi->owner; | 242 | struct module *owner = psi->owner; |
| 241 | 243 | ||
| 244 | if (backend && strcmp(backend, psi->name)) | ||
| 245 | return -EPERM; | ||
| 246 | |||
| 242 | spin_lock(&pstore_lock); | 247 | spin_lock(&pstore_lock); |
| 243 | if (psinfo) { | 248 | if (psinfo) { |
| 244 | spin_unlock(&pstore_lock); | 249 | spin_unlock(&pstore_lock); |
| 245 | return -EBUSY; | 250 | return -EBUSY; |
| 246 | } | 251 | } |
| 247 | 252 | ||
| 248 | if (backend && strcmp(backend, psi->name)) { | ||
| 249 | spin_unlock(&pstore_lock); | ||
| 250 | return -EINVAL; | ||
| 251 | } | ||
| 252 | |||
| 253 | if (!psi->write) | 253 | if (!psi->write) |
| 254 | psi->write = pstore_write_compat; | 254 | psi->write = pstore_write_compat; |
| 255 | psinfo = psi; | 255 | psinfo = psi; |
| @@ -274,6 +274,9 @@ int pstore_register(struct pstore_info *psi) | |||
| 274 | add_timer(&pstore_timer); | 274 | add_timer(&pstore_timer); |
| 275 | } | 275 | } |
| 276 | 276 | ||
| 277 | pr_info("pstore: Registered %s as persistent store backend\n", | ||
| 278 | psi->name); | ||
| 279 | |||
| 277 | return 0; | 280 | return 0; |
| 278 | } | 281 | } |
| 279 | EXPORT_SYMBOL_GPL(pstore_register); | 282 | EXPORT_SYMBOL_GPL(pstore_register); |
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 1376e5a8f0d6..a6119f9469e2 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c | |||
| @@ -195,7 +195,8 @@ static size_t ramoops_write_kmsg_hdr(struct persistent_ram_zone *prz) | |||
| 195 | static int notrace ramoops_pstore_write_buf(enum pstore_type_id type, | 195 | static int notrace ramoops_pstore_write_buf(enum pstore_type_id type, |
| 196 | enum kmsg_dump_reason reason, | 196 | enum kmsg_dump_reason reason, |
| 197 | u64 *id, unsigned int part, | 197 | u64 *id, unsigned int part, |
| 198 | const char *buf, size_t size, | 198 | const char *buf, |
| 199 | size_t hsize, size_t size, | ||
| 199 | struct pstore_info *psi) | 200 | struct pstore_info *psi) |
| 200 | { | 201 | { |
| 201 | struct ramoops_context *cxt = psi->data; | 202 | struct ramoops_context *cxt = psi->data; |
| @@ -399,8 +400,6 @@ static int ramoops_probe(struct platform_device *pdev) | |||
| 399 | goto fail_out; | 400 | goto fail_out; |
| 400 | } | 401 | } |
| 401 | 402 | ||
| 402 | if (!is_power_of_2(pdata->mem_size)) | ||
| 403 | pdata->mem_size = rounddown_pow_of_two(pdata->mem_size); | ||
| 404 | if (!is_power_of_2(pdata->record_size)) | 403 | if (!is_power_of_2(pdata->record_size)) |
| 405 | pdata->record_size = rounddown_pow_of_two(pdata->record_size); | 404 | pdata->record_size = rounddown_pow_of_two(pdata->record_size); |
| 406 | if (!is_power_of_2(pdata->console_size)) | 405 | if (!is_power_of_2(pdata->console_size)) |
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c index 59337326e288..de272d426763 100644 --- a/fs/pstore/ram_core.c +++ b/fs/pstore/ram_core.c | |||
| @@ -46,7 +46,7 @@ static inline size_t buffer_start(struct persistent_ram_zone *prz) | |||
| 46 | } | 46 | } |
| 47 | 47 | ||
| 48 | /* increase and wrap the start pointer, returning the old value */ | 48 | /* increase and wrap the start pointer, returning the old value */ |
| 49 | static inline size_t buffer_start_add(struct persistent_ram_zone *prz, size_t a) | 49 | static size_t buffer_start_add_atomic(struct persistent_ram_zone *prz, size_t a) |
| 50 | { | 50 | { |
| 51 | int old; | 51 | int old; |
| 52 | int new; | 52 | int new; |
| @@ -62,7 +62,7 @@ static inline size_t buffer_start_add(struct persistent_ram_zone *prz, size_t a) | |||
| 62 | } | 62 | } |
| 63 | 63 | ||
| 64 | /* increase the size counter until it hits the max size */ | 64 | /* increase the size counter until it hits the max size */ |
| 65 | static inline void buffer_size_add(struct persistent_ram_zone *prz, size_t a) | 65 | static void buffer_size_add_atomic(struct persistent_ram_zone *prz, size_t a) |
| 66 | { | 66 | { |
| 67 | size_t old; | 67 | size_t old; |
| 68 | size_t new; | 68 | size_t new; |
| @@ -78,6 +78,53 @@ static inline void buffer_size_add(struct persistent_ram_zone *prz, size_t a) | |||
| 78 | } while (atomic_cmpxchg(&prz->buffer->size, old, new) != old); | 78 | } while (atomic_cmpxchg(&prz->buffer->size, old, new) != old); |
| 79 | } | 79 | } |
| 80 | 80 | ||
| 81 | static DEFINE_RAW_SPINLOCK(buffer_lock); | ||
| 82 | |||
| 83 | /* increase and wrap the start pointer, returning the old value */ | ||
| 84 | static size_t buffer_start_add_locked(struct persistent_ram_zone *prz, size_t a) | ||
| 85 | { | ||
| 86 | int old; | ||
| 87 | int new; | ||
| 88 | unsigned long flags; | ||
| 89 | |||
| 90 | raw_spin_lock_irqsave(&buffer_lock, flags); | ||
| 91 | |||
| 92 | old = atomic_read(&prz->buffer->start); | ||
| 93 | new = old + a; | ||
| 94 | while (unlikely(new > prz->buffer_size)) | ||
| 95 | new -= prz->buffer_size; | ||
| 96 | atomic_set(&prz->buffer->start, new); | ||
| 97 | |||
| 98 | raw_spin_unlock_irqrestore(&buffer_lock, flags); | ||
| 99 | |||
| 100 | return old; | ||
| 101 | } | ||
| 102 | |||
| 103 | /* increase the size counter until it hits the max size */ | ||
| 104 | static void buffer_size_add_locked(struct persistent_ram_zone *prz, size_t a) | ||
| 105 | { | ||
| 106 | size_t old; | ||
| 107 | size_t new; | ||
| 108 | unsigned long flags; | ||
| 109 | |||
| 110 | raw_spin_lock_irqsave(&buffer_lock, flags); | ||
| 111 | |||
| 112 | old = atomic_read(&prz->buffer->size); | ||
| 113 | if (old == prz->buffer_size) | ||
| 114 | goto exit; | ||
| 115 | |||
| 116 | new = old + a; | ||
| 117 | if (new > prz->buffer_size) | ||
| 118 | new = prz->buffer_size; | ||
| 119 | atomic_set(&prz->buffer->size, new); | ||
| 120 | |||
| 121 | exit: | ||
| 122 | raw_spin_unlock_irqrestore(&buffer_lock, flags); | ||
| 123 | } | ||
| 124 | |||
| 125 | static size_t (*buffer_start_add)(struct persistent_ram_zone *, size_t) = buffer_start_add_atomic; | ||
| 126 | static void (*buffer_size_add)(struct persistent_ram_zone *, size_t) = buffer_size_add_atomic; | ||
| 127 | |||
| 81 | static void notrace persistent_ram_encode_rs8(struct persistent_ram_zone *prz, | 128 | static void notrace persistent_ram_encode_rs8(struct persistent_ram_zone *prz, |
| 82 | uint8_t *data, size_t len, uint8_t *ecc) | 129 | uint8_t *data, size_t len, uint8_t *ecc) |
| 83 | { | 130 | { |
| @@ -372,6 +419,9 @@ static void *persistent_ram_iomap(phys_addr_t start, size_t size) | |||
| 372 | return NULL; | 419 | return NULL; |
| 373 | } | 420 | } |
| 374 | 421 | ||
| 422 | buffer_start_add = buffer_start_add_locked; | ||
| 423 | buffer_size_add = buffer_size_add_locked; | ||
| 424 | |||
| 375 | return ioremap(start, size); | 425 | return ioremap(start, size); |
| 376 | } | 426 | } |
| 377 | 427 | ||
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 3e64169ef527..fbad622841f9 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c | |||
| @@ -2585,7 +2585,7 @@ static int do_proc_dqstats(struct ctl_table *table, int write, | |||
| 2585 | return proc_dointvec(table, write, buffer, lenp, ppos); | 2585 | return proc_dointvec(table, write, buffer, lenp, ppos); |
| 2586 | } | 2586 | } |
| 2587 | 2587 | ||
| 2588 | static ctl_table fs_dqstats_table[] = { | 2588 | static struct ctl_table fs_dqstats_table[] = { |
| 2589 | { | 2589 | { |
| 2590 | .procname = "lookups", | 2590 | .procname = "lookups", |
| 2591 | .data = &dqstats.stat[DQST_LOOKUPS], | 2591 | .data = &dqstats.stat[DQST_LOOKUPS], |
| @@ -2654,7 +2654,7 @@ static ctl_table fs_dqstats_table[] = { | |||
| 2654 | { }, | 2654 | { }, |
| 2655 | }; | 2655 | }; |
| 2656 | 2656 | ||
| 2657 | static ctl_table fs_table[] = { | 2657 | static struct ctl_table fs_table[] = { |
| 2658 | { | 2658 | { |
| 2659 | .procname = "quota", | 2659 | .procname = "quota", |
| 2660 | .mode = 0555, | 2660 | .mode = 0555, |
| @@ -2663,7 +2663,7 @@ static ctl_table fs_table[] = { | |||
| 2663 | { }, | 2663 | { }, |
| 2664 | }; | 2664 | }; |
| 2665 | 2665 | ||
| 2666 | static ctl_table sys_table[] = { | 2666 | static struct ctl_table sys_table[] = { |
| 2667 | { | 2667 | { |
| 2668 | .procname = "fs", | 2668 | .procname = "fs", |
| 2669 | .mode = 0555, | 2669 | .mode = 0555, |
diff --git a/fs/read_write.c b/fs/read_write.c index 2cefa417be34..122a3846d9e1 100644 --- a/fs/read_write.c +++ b/fs/read_write.c | |||
| @@ -41,8 +41,19 @@ static inline int unsigned_offsets(struct file *file) | |||
| 41 | return file->f_mode & FMODE_UNSIGNED_OFFSET; | 41 | return file->f_mode & FMODE_UNSIGNED_OFFSET; |
| 42 | } | 42 | } |
| 43 | 43 | ||
| 44 | static loff_t lseek_execute(struct file *file, struct inode *inode, | 44 | /** |
| 45 | loff_t offset, loff_t maxsize) | 45 | * vfs_setpos - update the file offset for lseek |
| 46 | * @file: file structure in question | ||
| 47 | * @offset: file offset to seek to | ||
| 48 | * @maxsize: maximum file size | ||
| 49 | * | ||
| 50 | * This is a low-level filesystem helper for updating the file offset to | ||
| 51 | * the value specified by @offset if the given offset is valid and it is | ||
| 52 | * not equal to the current file offset. | ||
| 53 | * | ||
| 54 | * Return the specified offset on success and -EINVAL on invalid offset. | ||
| 55 | */ | ||
| 56 | loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize) | ||
| 46 | { | 57 | { |
| 47 | if (offset < 0 && !unsigned_offsets(file)) | 58 | if (offset < 0 && !unsigned_offsets(file)) |
| 48 | return -EINVAL; | 59 | return -EINVAL; |
| @@ -55,6 +66,7 @@ static loff_t lseek_execute(struct file *file, struct inode *inode, | |||
| 55 | } | 66 | } |
| 56 | return offset; | 67 | return offset; |
| 57 | } | 68 | } |
| 69 | EXPORT_SYMBOL(vfs_setpos); | ||
| 58 | 70 | ||
| 59 | /** | 71 | /** |
| 60 | * generic_file_llseek_size - generic llseek implementation for regular files | 72 | * generic_file_llseek_size - generic llseek implementation for regular files |
| @@ -76,8 +88,6 @@ loff_t | |||
| 76 | generic_file_llseek_size(struct file *file, loff_t offset, int whence, | 88 | generic_file_llseek_size(struct file *file, loff_t offset, int whence, |
| 77 | loff_t maxsize, loff_t eof) | 89 | loff_t maxsize, loff_t eof) |
| 78 | { | 90 | { |
| 79 | struct inode *inode = file->f_mapping->host; | ||
| 80 | |||
| 81 | switch (whence) { | 91 | switch (whence) { |
| 82 | case SEEK_END: | 92 | case SEEK_END: |
| 83 | offset += eof; | 93 | offset += eof; |
| @@ -97,8 +107,7 @@ generic_file_llseek_size(struct file *file, loff_t offset, int whence, | |||
| 97 | * like SEEK_SET. | 107 | * like SEEK_SET. |
| 98 | */ | 108 | */ |
| 99 | spin_lock(&file->f_lock); | 109 | spin_lock(&file->f_lock); |
| 100 | offset = lseek_execute(file, inode, file->f_pos + offset, | 110 | offset = vfs_setpos(file, file->f_pos + offset, maxsize); |
| 101 | maxsize); | ||
| 102 | spin_unlock(&file->f_lock); | 111 | spin_unlock(&file->f_lock); |
| 103 | return offset; | 112 | return offset; |
| 104 | case SEEK_DATA: | 113 | case SEEK_DATA: |
| @@ -120,7 +129,7 @@ generic_file_llseek_size(struct file *file, loff_t offset, int whence, | |||
| 120 | break; | 129 | break; |
| 121 | } | 130 | } |
| 122 | 131 | ||
| 123 | return lseek_execute(file, inode, offset, maxsize); | 132 | return vfs_setpos(file, offset, maxsize); |
| 124 | } | 133 | } |
| 125 | EXPORT_SYMBOL(generic_file_llseek_size); | 134 | EXPORT_SYMBOL(generic_file_llseek_size); |
| 126 | 135 | ||
| @@ -145,6 +154,26 @@ loff_t generic_file_llseek(struct file *file, loff_t offset, int whence) | |||
| 145 | EXPORT_SYMBOL(generic_file_llseek); | 154 | EXPORT_SYMBOL(generic_file_llseek); |
| 146 | 155 | ||
| 147 | /** | 156 | /** |
| 157 | * fixed_size_llseek - llseek implementation for fixed-sized devices | ||
| 158 | * @file: file structure to seek on | ||
| 159 | * @offset: file offset to seek to | ||
| 160 | * @whence: type of seek | ||
| 161 | * @size: size of the file | ||
| 162 | * | ||
| 163 | */ | ||
| 164 | loff_t fixed_size_llseek(struct file *file, loff_t offset, int whence, loff_t size) | ||
| 165 | { | ||
| 166 | switch (whence) { | ||
| 167 | case SEEK_SET: case SEEK_CUR: case SEEK_END: | ||
| 168 | return generic_file_llseek_size(file, offset, whence, | ||
| 169 | size, size); | ||
| 170 | default: | ||
| 171 | return -EINVAL; | ||
| 172 | } | ||
| 173 | } | ||
| 174 | EXPORT_SYMBOL(fixed_size_llseek); | ||
| 175 | |||
| 176 | /** | ||
| 148 | * noop_llseek - No Operation Performed llseek implementation | 177 | * noop_llseek - No Operation Performed llseek implementation |
| 149 | * @file: file structure to seek on | 178 | * @file: file structure to seek on |
| 150 | * @offset: file offset to seek to | 179 | * @offset: file offset to seek to |
| @@ -296,7 +325,7 @@ out_putf: | |||
| 296 | * them to something that fits in "int" so that others | 325 | * them to something that fits in "int" so that others |
| 297 | * won't have to do range checks all the time. | 326 | * won't have to do range checks all the time. |
| 298 | */ | 327 | */ |
| 299 | int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count) | 328 | int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t count) |
| 300 | { | 329 | { |
| 301 | struct inode *inode; | 330 | struct inode *inode; |
| 302 | loff_t pos; | 331 | loff_t pos; |
| @@ -477,7 +506,8 @@ SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) | |||
| 477 | if (f.file) { | 506 | if (f.file) { |
| 478 | loff_t pos = file_pos_read(f.file); | 507 | loff_t pos = file_pos_read(f.file); |
| 479 | ret = vfs_read(f.file, buf, count, &pos); | 508 | ret = vfs_read(f.file, buf, count, &pos); |
| 480 | file_pos_write(f.file, pos); | 509 | if (ret >= 0) |
| 510 | file_pos_write(f.file, pos); | ||
| 481 | fdput(f); | 511 | fdput(f); |
| 482 | } | 512 | } |
| 483 | return ret; | 513 | return ret; |
| @@ -492,7 +522,8 @@ SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, | |||
| 492 | if (f.file) { | 522 | if (f.file) { |
| 493 | loff_t pos = file_pos_read(f.file); | 523 | loff_t pos = file_pos_read(f.file); |
| 494 | ret = vfs_write(f.file, buf, count, &pos); | 524 | ret = vfs_write(f.file, buf, count, &pos); |
| 495 | file_pos_write(f.file, pos); | 525 | if (ret >= 0) |
| 526 | file_pos_write(f.file, pos); | ||
| 496 | fdput(f); | 527 | fdput(f); |
| 497 | } | 528 | } |
| 498 | 529 | ||
| @@ -780,7 +811,8 @@ SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, | |||
| 780 | if (f.file) { | 811 | if (f.file) { |
| 781 | loff_t pos = file_pos_read(f.file); | 812 | loff_t pos = file_pos_read(f.file); |
| 782 | ret = vfs_readv(f.file, vec, vlen, &pos); | 813 | ret = vfs_readv(f.file, vec, vlen, &pos); |
| 783 | file_pos_write(f.file, pos); | 814 | if (ret >= 0) |
| 815 | file_pos_write(f.file, pos); | ||
| 784 | fdput(f); | 816 | fdput(f); |
| 785 | } | 817 | } |
| 786 | 818 | ||
| @@ -799,7 +831,8 @@ SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, | |||
| 799 | if (f.file) { | 831 | if (f.file) { |
| 800 | loff_t pos = file_pos_read(f.file); | 832 | loff_t pos = file_pos_read(f.file); |
| 801 | ret = vfs_writev(f.file, vec, vlen, &pos); | 833 | ret = vfs_writev(f.file, vec, vlen, &pos); |
| 802 | file_pos_write(f.file, pos); | 834 | if (ret >= 0) |
| 835 | file_pos_write(f.file, pos); | ||
| 803 | fdput(f); | 836 | fdput(f); |
| 804 | } | 837 | } |
| 805 | 838 | ||
| @@ -959,7 +992,8 @@ COMPAT_SYSCALL_DEFINE3(readv, unsigned long, fd, | |||
| 959 | return -EBADF; | 992 | return -EBADF; |
| 960 | pos = f.file->f_pos; | 993 | pos = f.file->f_pos; |
| 961 | ret = compat_readv(f.file, vec, vlen, &pos); | 994 | ret = compat_readv(f.file, vec, vlen, &pos); |
| 962 | f.file->f_pos = pos; | 995 | if (ret >= 0) |
| 996 | f.file->f_pos = pos; | ||
| 963 | fdput(f); | 997 | fdput(f); |
| 964 | return ret; | 998 | return ret; |
| 965 | } | 999 | } |
| @@ -1025,7 +1059,8 @@ COMPAT_SYSCALL_DEFINE3(writev, unsigned long, fd, | |||
| 1025 | return -EBADF; | 1059 | return -EBADF; |
| 1026 | pos = f.file->f_pos; | 1060 | pos = f.file->f_pos; |
| 1027 | ret = compat_writev(f.file, vec, vlen, &pos); | 1061 | ret = compat_writev(f.file, vec, vlen, &pos); |
| 1028 | f.file->f_pos = pos; | 1062 | if (ret >= 0) |
| 1063 | f.file->f_pos = pos; | ||
| 1029 | fdput(f); | 1064 | fdput(f); |
| 1030 | return ret; | 1065 | return ret; |
| 1031 | } | 1066 | } |
| @@ -1129,7 +1164,9 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, | |||
| 1129 | if (in.file->f_flags & O_NONBLOCK) | 1164 | if (in.file->f_flags & O_NONBLOCK) |
| 1130 | fl = SPLICE_F_NONBLOCK; | 1165 | fl = SPLICE_F_NONBLOCK; |
| 1131 | #endif | 1166 | #endif |
| 1167 | file_start_write(out.file); | ||
| 1132 | retval = do_splice_direct(in.file, &pos, out.file, &out_pos, count, fl); | 1168 | retval = do_splice_direct(in.file, &pos, out.file, &out_pos, count, fl); |
| 1169 | file_end_write(out.file); | ||
| 1133 | 1170 | ||
| 1134 | if (retval > 0) { | 1171 | if (retval > 0) { |
| 1135 | add_rchar(current, retval); | 1172 | add_rchar(current, retval); |
diff --git a/fs/select.c b/fs/select.c index 8c1c96c27062..f9f49c40cfd4 100644 --- a/fs/select.c +++ b/fs/select.c | |||
| @@ -27,6 +27,8 @@ | |||
| 27 | #include <linux/rcupdate.h> | 27 | #include <linux/rcupdate.h> |
| 28 | #include <linux/hrtimer.h> | 28 | #include <linux/hrtimer.h> |
| 29 | #include <linux/sched/rt.h> | 29 | #include <linux/sched/rt.h> |
| 30 | #include <linux/freezer.h> | ||
| 31 | #include <net/ll_poll.h> | ||
| 30 | 32 | ||
| 31 | #include <asm/uaccess.h> | 33 | #include <asm/uaccess.h> |
| 32 | 34 | ||
| @@ -236,7 +238,8 @@ int poll_schedule_timeout(struct poll_wqueues *pwq, int state, | |||
| 236 | 238 | ||
| 237 | set_current_state(state); | 239 | set_current_state(state); |
| 238 | if (!pwq->triggered) | 240 | if (!pwq->triggered) |
| 239 | rc = schedule_hrtimeout_range(expires, slack, HRTIMER_MODE_ABS); | 241 | rc = freezable_schedule_hrtimeout_range(expires, slack, |
| 242 | HRTIMER_MODE_ABS); | ||
| 240 | __set_current_state(TASK_RUNNING); | 243 | __set_current_state(TASK_RUNNING); |
| 241 | 244 | ||
| 242 | /* | 245 | /* |
| @@ -384,9 +387,10 @@ get_max: | |||
| 384 | #define POLLEX_SET (POLLPRI) | 387 | #define POLLEX_SET (POLLPRI) |
| 385 | 388 | ||
| 386 | static inline void wait_key_set(poll_table *wait, unsigned long in, | 389 | static inline void wait_key_set(poll_table *wait, unsigned long in, |
| 387 | unsigned long out, unsigned long bit) | 390 | unsigned long out, unsigned long bit, |
| 391 | unsigned int ll_flag) | ||
| 388 | { | 392 | { |
| 389 | wait->_key = POLLEX_SET; | 393 | wait->_key = POLLEX_SET | ll_flag; |
| 390 | if (in & bit) | 394 | if (in & bit) |
| 391 | wait->_key |= POLLIN_SET; | 395 | wait->_key |= POLLIN_SET; |
| 392 | if (out & bit) | 396 | if (out & bit) |
| @@ -400,6 +404,8 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) | |||
| 400 | poll_table *wait; | 404 | poll_table *wait; |
| 401 | int retval, i, timed_out = 0; | 405 | int retval, i, timed_out = 0; |
| 402 | unsigned long slack = 0; | 406 | unsigned long slack = 0; |
| 407 | unsigned int busy_flag = net_busy_loop_on() ? POLL_BUSY_LOOP : 0; | ||
| 408 | unsigned long busy_end = 0; | ||
| 403 | 409 | ||
| 404 | rcu_read_lock(); | 410 | rcu_read_lock(); |
| 405 | retval = max_select_fd(n, fds); | 411 | retval = max_select_fd(n, fds); |
| @@ -422,6 +428,7 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) | |||
| 422 | retval = 0; | 428 | retval = 0; |
| 423 | for (;;) { | 429 | for (;;) { |
| 424 | unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp; | 430 | unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp; |
| 431 | bool can_busy_loop = false; | ||
| 425 | 432 | ||
| 426 | inp = fds->in; outp = fds->out; exp = fds->ex; | 433 | inp = fds->in; outp = fds->out; exp = fds->ex; |
| 427 | rinp = fds->res_in; routp = fds->res_out; rexp = fds->res_ex; | 434 | rinp = fds->res_in; routp = fds->res_out; rexp = fds->res_ex; |
| @@ -449,7 +456,8 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) | |||
| 449 | f_op = f.file->f_op; | 456 | f_op = f.file->f_op; |
| 450 | mask = DEFAULT_POLLMASK; | 457 | mask = DEFAULT_POLLMASK; |
| 451 | if (f_op && f_op->poll) { | 458 | if (f_op && f_op->poll) { |
| 452 | wait_key_set(wait, in, out, bit); | 459 | wait_key_set(wait, in, out, |
| 460 | bit, busy_flag); | ||
| 453 | mask = (*f_op->poll)(f.file, wait); | 461 | mask = (*f_op->poll)(f.file, wait); |
| 454 | } | 462 | } |
| 455 | fdput(f); | 463 | fdput(f); |
| @@ -468,6 +476,18 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) | |||
| 468 | retval++; | 476 | retval++; |
| 469 | wait->_qproc = NULL; | 477 | wait->_qproc = NULL; |
| 470 | } | 478 | } |
| 479 | /* got something, stop busy polling */ | ||
| 480 | if (retval) { | ||
| 481 | can_busy_loop = false; | ||
| 482 | busy_flag = 0; | ||
| 483 | |||
| 484 | /* | ||
| 485 | * only remember a returned | ||
| 486 | * POLL_BUSY_LOOP if we asked for it | ||
| 487 | */ | ||
| 488 | } else if (busy_flag & mask) | ||
| 489 | can_busy_loop = true; | ||
| 490 | |||
| 471 | } | 491 | } |
| 472 | } | 492 | } |
| 473 | if (res_in) | 493 | if (res_in) |
| @@ -486,6 +506,17 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) | |||
| 486 | break; | 506 | break; |
| 487 | } | 507 | } |
| 488 | 508 | ||
| 509 | /* only if found POLL_BUSY_LOOP sockets && not out of time */ | ||
| 510 | if (can_busy_loop && !need_resched()) { | ||
| 511 | if (!busy_end) { | ||
| 512 | busy_end = busy_loop_end_time(); | ||
| 513 | continue; | ||
| 514 | } | ||
| 515 | if (!busy_loop_timeout(busy_end)) | ||
| 516 | continue; | ||
| 517 | } | ||
| 518 | busy_flag = 0; | ||
| 519 | |||
| 489 | /* | 520 | /* |
| 490 | * If this is the first loop and we have a timeout | 521 | * If this is the first loop and we have a timeout |
| 491 | * given, then we convert to ktime_t and set the to | 522 | * given, then we convert to ktime_t and set the to |
| @@ -717,7 +748,9 @@ struct poll_list { | |||
| 717 | * pwait poll_table will be used by the fd-provided poll handler for waiting, | 748 | * pwait poll_table will be used by the fd-provided poll handler for waiting, |
| 718 | * if pwait->_qproc is non-NULL. | 749 | * if pwait->_qproc is non-NULL. |
| 719 | */ | 750 | */ |
| 720 | static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait) | 751 | static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait, |
| 752 | bool *can_busy_poll, | ||
| 753 | unsigned int busy_flag) | ||
| 721 | { | 754 | { |
| 722 | unsigned int mask; | 755 | unsigned int mask; |
| 723 | int fd; | 756 | int fd; |
| @@ -731,7 +764,10 @@ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait) | |||
| 731 | mask = DEFAULT_POLLMASK; | 764 | mask = DEFAULT_POLLMASK; |
| 732 | if (f.file->f_op && f.file->f_op->poll) { | 765 | if (f.file->f_op && f.file->f_op->poll) { |
| 733 | pwait->_key = pollfd->events|POLLERR|POLLHUP; | 766 | pwait->_key = pollfd->events|POLLERR|POLLHUP; |
| 767 | pwait->_key |= busy_flag; | ||
| 734 | mask = f.file->f_op->poll(f.file, pwait); | 768 | mask = f.file->f_op->poll(f.file, pwait); |
| 769 | if (mask & busy_flag) | ||
| 770 | *can_busy_poll = true; | ||
| 735 | } | 771 | } |
| 736 | /* Mask out unneeded events. */ | 772 | /* Mask out unneeded events. */ |
| 737 | mask &= pollfd->events | POLLERR | POLLHUP; | 773 | mask &= pollfd->events | POLLERR | POLLHUP; |
| @@ -750,6 +786,8 @@ static int do_poll(unsigned int nfds, struct poll_list *list, | |||
| 750 | ktime_t expire, *to = NULL; | 786 | ktime_t expire, *to = NULL; |
| 751 | int timed_out = 0, count = 0; | 787 | int timed_out = 0, count = 0; |
| 752 | unsigned long slack = 0; | 788 | unsigned long slack = 0; |
| 789 | unsigned int busy_flag = net_busy_loop_on() ? POLL_BUSY_LOOP : 0; | ||
| 790 | unsigned long busy_end = 0; | ||
| 753 | 791 | ||
| 754 | /* Optimise the no-wait case */ | 792 | /* Optimise the no-wait case */ |
| 755 | if (end_time && !end_time->tv_sec && !end_time->tv_nsec) { | 793 | if (end_time && !end_time->tv_sec && !end_time->tv_nsec) { |
| @@ -762,6 +800,7 @@ static int do_poll(unsigned int nfds, struct poll_list *list, | |||
| 762 | 800 | ||
| 763 | for (;;) { | 801 | for (;;) { |
| 764 | struct poll_list *walk; | 802 | struct poll_list *walk; |
| 803 | bool can_busy_loop = false; | ||
| 765 | 804 | ||
| 766 | for (walk = list; walk != NULL; walk = walk->next) { | 805 | for (walk = list; walk != NULL; walk = walk->next) { |
| 767 | struct pollfd * pfd, * pfd_end; | 806 | struct pollfd * pfd, * pfd_end; |
| @@ -776,9 +815,13 @@ static int do_poll(unsigned int nfds, struct poll_list *list, | |||
| 776 | * this. They'll get immediately deregistered | 815 | * this. They'll get immediately deregistered |
| 777 | * when we break out and return. | 816 | * when we break out and return. |
| 778 | */ | 817 | */ |
| 779 | if (do_pollfd(pfd, pt)) { | 818 | if (do_pollfd(pfd, pt, &can_busy_loop, |
| 819 | busy_flag)) { | ||
| 780 | count++; | 820 | count++; |
| 781 | pt->_qproc = NULL; | 821 | pt->_qproc = NULL; |
| 822 | /* found something, stop busy polling */ | ||
| 823 | busy_flag = 0; | ||
| 824 | can_busy_loop = false; | ||
| 782 | } | 825 | } |
| 783 | } | 826 | } |
| 784 | } | 827 | } |
| @@ -795,6 +838,17 @@ static int do_poll(unsigned int nfds, struct poll_list *list, | |||
| 795 | if (count || timed_out) | 838 | if (count || timed_out) |
| 796 | break; | 839 | break; |
| 797 | 840 | ||
| 841 | /* only if found POLL_BUSY_LOOP sockets && not out of time */ | ||
| 842 | if (can_busy_loop && !need_resched()) { | ||
| 843 | if (!busy_end) { | ||
| 844 | busy_end = busy_loop_end_time(); | ||
| 845 | continue; | ||
| 846 | } | ||
| 847 | if (!busy_loop_timeout(busy_end)) | ||
| 848 | continue; | ||
| 849 | } | ||
| 850 | busy_flag = 0; | ||
| 851 | |||
| 798 | /* | 852 | /* |
| 799 | * If this is the first loop and we have a timeout | 853 | * If this is the first loop and we have a timeout |
| 800 | * given, then we convert to ktime_t and set the to | 854 | * given, then we convert to ktime_t and set the to |
diff --git a/fs/seq_file.c b/fs/seq_file.c index 774c1eb7f1c9..3135c2525c76 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c | |||
| @@ -921,3 +921,57 @@ struct hlist_node *seq_hlist_next_rcu(void *v, | |||
| 921 | return rcu_dereference(node->next); | 921 | return rcu_dereference(node->next); |
| 922 | } | 922 | } |
| 923 | EXPORT_SYMBOL(seq_hlist_next_rcu); | 923 | EXPORT_SYMBOL(seq_hlist_next_rcu); |
| 924 | |||
| 925 | /** | ||
| 926 | * seq_hlist_start_precpu - start an iteration of a percpu hlist array | ||
| 927 | * @head: pointer to percpu array of struct hlist_heads | ||
| 928 | * @cpu: pointer to cpu "cursor" | ||
| 929 | * @pos: start position of sequence | ||
| 930 | * | ||
| 931 | * Called at seq_file->op->start(). | ||
| 932 | */ | ||
| 933 | struct hlist_node * | ||
| 934 | seq_hlist_start_percpu(struct hlist_head __percpu *head, int *cpu, loff_t pos) | ||
| 935 | { | ||
| 936 | struct hlist_node *node; | ||
| 937 | |||
| 938 | for_each_possible_cpu(*cpu) { | ||
| 939 | hlist_for_each(node, per_cpu_ptr(head, *cpu)) { | ||
| 940 | if (pos-- == 0) | ||
| 941 | return node; | ||
| 942 | } | ||
| 943 | } | ||
| 944 | return NULL; | ||
| 945 | } | ||
| 946 | EXPORT_SYMBOL(seq_hlist_start_percpu); | ||
| 947 | |||
| 948 | /** | ||
| 949 | * seq_hlist_next_percpu - move to the next position of the percpu hlist array | ||
| 950 | * @v: pointer to current hlist_node | ||
| 951 | * @head: pointer to percpu array of struct hlist_heads | ||
| 952 | * @cpu: pointer to cpu "cursor" | ||
| 953 | * @pos: start position of sequence | ||
| 954 | * | ||
| 955 | * Called at seq_file->op->next(). | ||
| 956 | */ | ||
| 957 | struct hlist_node * | ||
| 958 | seq_hlist_next_percpu(void *v, struct hlist_head __percpu *head, | ||
| 959 | int *cpu, loff_t *pos) | ||
| 960 | { | ||
| 961 | struct hlist_node *node = v; | ||
| 962 | |||
| 963 | ++*pos; | ||
| 964 | |||
| 965 | if (node->next) | ||
| 966 | return node->next; | ||
| 967 | |||
| 968 | for (*cpu = cpumask_next(*cpu, cpu_possible_mask); *cpu < nr_cpu_ids; | ||
| 969 | *cpu = cpumask_next(*cpu, cpu_possible_mask)) { | ||
| 970 | struct hlist_head *bucket = per_cpu_ptr(head, *cpu); | ||
| 971 | |||
| 972 | if (!hlist_empty(bucket)) | ||
| 973 | return bucket->first; | ||
| 974 | } | ||
| 975 | return NULL; | ||
| 976 | } | ||
| 977 | EXPORT_SYMBOL(seq_hlist_next_percpu); | ||
diff --git a/fs/splice.c b/fs/splice.c index d37431dd60a1..3b7ee656f3aa 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
| @@ -1098,27 +1098,13 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, | |||
| 1098 | { | 1098 | { |
| 1099 | ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, | 1099 | ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, |
| 1100 | loff_t *, size_t, unsigned int); | 1100 | loff_t *, size_t, unsigned int); |
| 1101 | int ret; | ||
| 1102 | |||
| 1103 | if (unlikely(!(out->f_mode & FMODE_WRITE))) | ||
| 1104 | return -EBADF; | ||
| 1105 | |||
| 1106 | if (unlikely(out->f_flags & O_APPEND)) | ||
| 1107 | return -EINVAL; | ||
| 1108 | |||
| 1109 | ret = rw_verify_area(WRITE, out, ppos, len); | ||
| 1110 | if (unlikely(ret < 0)) | ||
| 1111 | return ret; | ||
| 1112 | 1101 | ||
| 1113 | if (out->f_op && out->f_op->splice_write) | 1102 | if (out->f_op && out->f_op->splice_write) |
| 1114 | splice_write = out->f_op->splice_write; | 1103 | splice_write = out->f_op->splice_write; |
| 1115 | else | 1104 | else |
| 1116 | splice_write = default_file_splice_write; | 1105 | splice_write = default_file_splice_write; |
| 1117 | 1106 | ||
| 1118 | file_start_write(out); | 1107 | return splice_write(pipe, out, ppos, len, flags); |
| 1119 | ret = splice_write(pipe, out, ppos, len, flags); | ||
| 1120 | file_end_write(out); | ||
| 1121 | return ret; | ||
| 1122 | } | 1108 | } |
| 1123 | 1109 | ||
| 1124 | /* | 1110 | /* |
| @@ -1307,6 +1293,16 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, | |||
| 1307 | }; | 1293 | }; |
| 1308 | long ret; | 1294 | long ret; |
| 1309 | 1295 | ||
| 1296 | if (unlikely(!(out->f_mode & FMODE_WRITE))) | ||
| 1297 | return -EBADF; | ||
| 1298 | |||
| 1299 | if (unlikely(out->f_flags & O_APPEND)) | ||
| 1300 | return -EINVAL; | ||
| 1301 | |||
| 1302 | ret = rw_verify_area(WRITE, out, opos, len); | ||
| 1303 | if (unlikely(ret < 0)) | ||
| 1304 | return ret; | ||
| 1305 | |||
| 1310 | ret = splice_direct_to_actor(in, &sd, direct_splice_actor); | 1306 | ret = splice_direct_to_actor(in, &sd, direct_splice_actor); |
| 1311 | if (ret > 0) | 1307 | if (ret > 0) |
| 1312 | *ppos = sd.pos; | 1308 | *ppos = sd.pos; |
| @@ -1362,7 +1358,19 @@ static long do_splice(struct file *in, loff_t __user *off_in, | |||
| 1362 | offset = out->f_pos; | 1358 | offset = out->f_pos; |
| 1363 | } | 1359 | } |
| 1364 | 1360 | ||
| 1361 | if (unlikely(!(out->f_mode & FMODE_WRITE))) | ||
| 1362 | return -EBADF; | ||
| 1363 | |||
| 1364 | if (unlikely(out->f_flags & O_APPEND)) | ||
| 1365 | return -EINVAL; | ||
| 1366 | |||
| 1367 | ret = rw_verify_area(WRITE, out, &offset, len); | ||
| 1368 | if (unlikely(ret < 0)) | ||
| 1369 | return ret; | ||
| 1370 | |||
| 1371 | file_start_write(out); | ||
| 1365 | ret = do_splice_from(ipipe, out, &offset, len, flags); | 1372 | ret = do_splice_from(ipipe, out, &offset, len, flags); |
| 1373 | file_end_write(out); | ||
| 1366 | 1374 | ||
| 1367 | if (!off_out) | 1375 | if (!off_out) |
| 1368 | out->f_pos = offset; | 1376 | out->f_pos = offset; |
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c index 1c0d5f264767..731b2bbcaab3 100644 --- a/fs/sysv/namei.c +++ b/fs/sysv/namei.c | |||
| @@ -27,8 +27,7 @@ static int add_nondir(struct dentry *dentry, struct inode *inode) | |||
| 27 | return err; | 27 | return err; |
| 28 | } | 28 | } |
| 29 | 29 | ||
| 30 | static int sysv_hash(const struct dentry *dentry, const struct inode *inode, | 30 | static int sysv_hash(const struct dentry *dentry, struct qstr *qstr) |
| 31 | struct qstr *qstr) | ||
| 32 | { | 31 | { |
| 33 | /* Truncate the name in place, avoids having to define a compare | 32 | /* Truncate the name in place, avoids having to define a compare |
| 34 | function. */ | 33 | function. */ |
diff --git a/fs/timerfd.c b/fs/timerfd.c index 32b644f03690..929312180dd0 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | * | 8 | * |
| 9 | */ | 9 | */ |
| 10 | 10 | ||
| 11 | #include <linux/alarmtimer.h> | ||
| 11 | #include <linux/file.h> | 12 | #include <linux/file.h> |
| 12 | #include <linux/poll.h> | 13 | #include <linux/poll.h> |
| 13 | #include <linux/init.h> | 14 | #include <linux/init.h> |
| @@ -26,7 +27,10 @@ | |||
| 26 | #include <linux/rcupdate.h> | 27 | #include <linux/rcupdate.h> |
| 27 | 28 | ||
| 28 | struct timerfd_ctx { | 29 | struct timerfd_ctx { |
| 29 | struct hrtimer tmr; | 30 | union { |
| 31 | struct hrtimer tmr; | ||
| 32 | struct alarm alarm; | ||
| 33 | } t; | ||
| 30 | ktime_t tintv; | 34 | ktime_t tintv; |
| 31 | ktime_t moffs; | 35 | ktime_t moffs; |
| 32 | wait_queue_head_t wqh; | 36 | wait_queue_head_t wqh; |
| @@ -41,14 +45,19 @@ struct timerfd_ctx { | |||
| 41 | static LIST_HEAD(cancel_list); | 45 | static LIST_HEAD(cancel_list); |
| 42 | static DEFINE_SPINLOCK(cancel_lock); | 46 | static DEFINE_SPINLOCK(cancel_lock); |
| 43 | 47 | ||
| 48 | static inline bool isalarm(struct timerfd_ctx *ctx) | ||
| 49 | { | ||
| 50 | return ctx->clockid == CLOCK_REALTIME_ALARM || | ||
| 51 | ctx->clockid == CLOCK_BOOTTIME_ALARM; | ||
| 52 | } | ||
| 53 | |||
| 44 | /* | 54 | /* |
| 45 | * This gets called when the timer event triggers. We set the "expired" | 55 | * This gets called when the timer event triggers. We set the "expired" |
| 46 | * flag, but we do not re-arm the timer (in case it's necessary, | 56 | * flag, but we do not re-arm the timer (in case it's necessary, |
| 47 | * tintv.tv64 != 0) until the timer is accessed. | 57 | * tintv.tv64 != 0) until the timer is accessed. |
| 48 | */ | 58 | */ |
| 49 | static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr) | 59 | static void timerfd_triggered(struct timerfd_ctx *ctx) |
| 50 | { | 60 | { |
| 51 | struct timerfd_ctx *ctx = container_of(htmr, struct timerfd_ctx, tmr); | ||
| 52 | unsigned long flags; | 61 | unsigned long flags; |
| 53 | 62 | ||
| 54 | spin_lock_irqsave(&ctx->wqh.lock, flags); | 63 | spin_lock_irqsave(&ctx->wqh.lock, flags); |
| @@ -56,10 +65,25 @@ static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr) | |||
| 56 | ctx->ticks++; | 65 | ctx->ticks++; |
| 57 | wake_up_locked(&ctx->wqh); | 66 | wake_up_locked(&ctx->wqh); |
| 58 | spin_unlock_irqrestore(&ctx->wqh.lock, flags); | 67 | spin_unlock_irqrestore(&ctx->wqh.lock, flags); |
| 68 | } | ||
| 59 | 69 | ||
| 70 | static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr) | ||
| 71 | { | ||
| 72 | struct timerfd_ctx *ctx = container_of(htmr, struct timerfd_ctx, | ||
| 73 | t.tmr); | ||
| 74 | timerfd_triggered(ctx); | ||
| 60 | return HRTIMER_NORESTART; | 75 | return HRTIMER_NORESTART; |
| 61 | } | 76 | } |
| 62 | 77 | ||
| 78 | static enum alarmtimer_restart timerfd_alarmproc(struct alarm *alarm, | ||
| 79 | ktime_t now) | ||
| 80 | { | ||
| 81 | struct timerfd_ctx *ctx = container_of(alarm, struct timerfd_ctx, | ||
| 82 | t.alarm); | ||
| 83 | timerfd_triggered(ctx); | ||
| 84 | return ALARMTIMER_NORESTART; | ||
| 85 | } | ||
| 86 | |||
| 63 | /* | 87 | /* |
| 64 | * Called when the clock was set to cancel the timers in the cancel | 88 | * Called when the clock was set to cancel the timers in the cancel |
| 65 | * list. This will wake up processes waiting on these timers. The | 89 | * list. This will wake up processes waiting on these timers. The |
| @@ -107,8 +131,9 @@ static bool timerfd_canceled(struct timerfd_ctx *ctx) | |||
| 107 | 131 | ||
| 108 | static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags) | 132 | static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags) |
| 109 | { | 133 | { |
| 110 | if (ctx->clockid == CLOCK_REALTIME && (flags & TFD_TIMER_ABSTIME) && | 134 | if ((ctx->clockid == CLOCK_REALTIME || |
| 111 | (flags & TFD_TIMER_CANCEL_ON_SET)) { | 135 | ctx->clockid == CLOCK_REALTIME_ALARM) && |
| 136 | (flags & TFD_TIMER_ABSTIME) && (flags & TFD_TIMER_CANCEL_ON_SET)) { | ||
| 112 | if (!ctx->might_cancel) { | 137 | if (!ctx->might_cancel) { |
| 113 | ctx->might_cancel = true; | 138 | ctx->might_cancel = true; |
| 114 | spin_lock(&cancel_lock); | 139 | spin_lock(&cancel_lock); |
| @@ -124,7 +149,11 @@ static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx) | |||
| 124 | { | 149 | { |
| 125 | ktime_t remaining; | 150 | ktime_t remaining; |
| 126 | 151 | ||
| 127 | remaining = hrtimer_expires_remaining(&ctx->tmr); | 152 | if (isalarm(ctx)) |
| 153 | remaining = alarm_expires_remaining(&ctx->t.alarm); | ||
| 154 | else | ||
| 155 | remaining = hrtimer_expires_remaining(&ctx->t.tmr); | ||
| 156 | |||
| 128 | return remaining.tv64 < 0 ? ktime_set(0, 0): remaining; | 157 | return remaining.tv64 < 0 ? ktime_set(0, 0): remaining; |
| 129 | } | 158 | } |
| 130 | 159 | ||
| @@ -142,11 +171,28 @@ static int timerfd_setup(struct timerfd_ctx *ctx, int flags, | |||
| 142 | ctx->expired = 0; | 171 | ctx->expired = 0; |
| 143 | ctx->ticks = 0; | 172 | ctx->ticks = 0; |
| 144 | ctx->tintv = timespec_to_ktime(ktmr->it_interval); | 173 | ctx->tintv = timespec_to_ktime(ktmr->it_interval); |
| 145 | hrtimer_init(&ctx->tmr, clockid, htmode); | 174 | |
| 146 | hrtimer_set_expires(&ctx->tmr, texp); | 175 | if (isalarm(ctx)) { |
| 147 | ctx->tmr.function = timerfd_tmrproc; | 176 | alarm_init(&ctx->t.alarm, |
| 177 | ctx->clockid == CLOCK_REALTIME_ALARM ? | ||
| 178 | ALARM_REALTIME : ALARM_BOOTTIME, | ||
| 179 | timerfd_alarmproc); | ||
| 180 | } else { | ||
| 181 | hrtimer_init(&ctx->t.tmr, clockid, htmode); | ||
| 182 | hrtimer_set_expires(&ctx->t.tmr, texp); | ||
| 183 | ctx->t.tmr.function = timerfd_tmrproc; | ||
| 184 | } | ||
| 185 | |||
| 148 | if (texp.tv64 != 0) { | 186 | if (texp.tv64 != 0) { |
| 149 | hrtimer_start(&ctx->tmr, texp, htmode); | 187 | if (isalarm(ctx)) { |
| 188 | if (flags & TFD_TIMER_ABSTIME) | ||
| 189 | alarm_start(&ctx->t.alarm, texp); | ||
| 190 | else | ||
| 191 | alarm_start_relative(&ctx->t.alarm, texp); | ||
| 192 | } else { | ||
| 193 | hrtimer_start(&ctx->t.tmr, texp, htmode); | ||
| 194 | } | ||
| 195 | |||
| 150 | if (timerfd_canceled(ctx)) | 196 | if (timerfd_canceled(ctx)) |
| 151 | return -ECANCELED; | 197 | return -ECANCELED; |
| 152 | } | 198 | } |
| @@ -158,7 +204,11 @@ static int timerfd_release(struct inode *inode, struct file *file) | |||
| 158 | struct timerfd_ctx *ctx = file->private_data; | 204 | struct timerfd_ctx *ctx = file->private_data; |
| 159 | 205 | ||
| 160 | timerfd_remove_cancel(ctx); | 206 | timerfd_remove_cancel(ctx); |
| 161 | hrtimer_cancel(&ctx->tmr); | 207 | |
| 208 | if (isalarm(ctx)) | ||
| 209 | alarm_cancel(&ctx->t.alarm); | ||
| 210 | else | ||
| 211 | hrtimer_cancel(&ctx->t.tmr); | ||
| 162 | kfree_rcu(ctx, rcu); | 212 | kfree_rcu(ctx, rcu); |
| 163 | return 0; | 213 | return 0; |
| 164 | } | 214 | } |
| @@ -215,9 +265,15 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count, | |||
| 215 | * callback to avoid DoS attacks specifying a very | 265 | * callback to avoid DoS attacks specifying a very |
| 216 | * short timer period. | 266 | * short timer period. |
| 217 | */ | 267 | */ |
| 218 | ticks += hrtimer_forward_now(&ctx->tmr, | 268 | if (isalarm(ctx)) { |
| 219 | ctx->tintv) - 1; | 269 | ticks += alarm_forward_now( |
| 220 | hrtimer_restart(&ctx->tmr); | 270 | &ctx->t.alarm, ctx->tintv) - 1; |
| 271 | alarm_restart(&ctx->t.alarm); | ||
| 272 | } else { | ||
| 273 | ticks += hrtimer_forward_now(&ctx->t.tmr, | ||
| 274 | ctx->tintv) - 1; | ||
| 275 | hrtimer_restart(&ctx->t.tmr); | ||
| 276 | } | ||
| 221 | } | 277 | } |
| 222 | ctx->expired = 0; | 278 | ctx->expired = 0; |
| 223 | ctx->ticks = 0; | 279 | ctx->ticks = 0; |
| @@ -259,7 +315,9 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) | |||
| 259 | 315 | ||
| 260 | if ((flags & ~TFD_CREATE_FLAGS) || | 316 | if ((flags & ~TFD_CREATE_FLAGS) || |
| 261 | (clockid != CLOCK_MONOTONIC && | 317 | (clockid != CLOCK_MONOTONIC && |
| 262 | clockid != CLOCK_REALTIME)) | 318 | clockid != CLOCK_REALTIME && |
| 319 | clockid != CLOCK_REALTIME_ALARM && | ||
| 320 | clockid != CLOCK_BOOTTIME_ALARM)) | ||
| 263 | return -EINVAL; | 321 | return -EINVAL; |
| 264 | 322 | ||
| 265 | ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); | 323 | ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); |
| @@ -268,7 +326,15 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) | |||
| 268 | 326 | ||
| 269 | init_waitqueue_head(&ctx->wqh); | 327 | init_waitqueue_head(&ctx->wqh); |
| 270 | ctx->clockid = clockid; | 328 | ctx->clockid = clockid; |
| 271 | hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS); | 329 | |
| 330 | if (isalarm(ctx)) | ||
| 331 | alarm_init(&ctx->t.alarm, | ||
| 332 | ctx->clockid == CLOCK_REALTIME_ALARM ? | ||
| 333 | ALARM_REALTIME : ALARM_BOOTTIME, | ||
| 334 | timerfd_alarmproc); | ||
| 335 | else | ||
| 336 | hrtimer_init(&ctx->t.tmr, clockid, HRTIMER_MODE_ABS); | ||
| 337 | |||
| 272 | ctx->moffs = ktime_get_monotonic_offset(); | 338 | ctx->moffs = ktime_get_monotonic_offset(); |
| 273 | 339 | ||
| 274 | ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx, | 340 | ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx, |
| @@ -305,8 +371,14 @@ static int do_timerfd_settime(int ufd, int flags, | |||
| 305 | */ | 371 | */ |
| 306 | for (;;) { | 372 | for (;;) { |
| 307 | spin_lock_irq(&ctx->wqh.lock); | 373 | spin_lock_irq(&ctx->wqh.lock); |
| 308 | if (hrtimer_try_to_cancel(&ctx->tmr) >= 0) | 374 | |
| 309 | break; | 375 | if (isalarm(ctx)) { |
| 376 | if (alarm_try_to_cancel(&ctx->t.alarm) >= 0) | ||
| 377 | break; | ||
| 378 | } else { | ||
| 379 | if (hrtimer_try_to_cancel(&ctx->t.tmr) >= 0) | ||
| 380 | break; | ||
| 381 | } | ||
| 310 | spin_unlock_irq(&ctx->wqh.lock); | 382 | spin_unlock_irq(&ctx->wqh.lock); |
| 311 | cpu_relax(); | 383 | cpu_relax(); |
| 312 | } | 384 | } |
| @@ -317,8 +389,12 @@ static int do_timerfd_settime(int ufd, int flags, | |||
| 317 | * We do not update "ticks" and "expired" since the timer will be | 389 | * We do not update "ticks" and "expired" since the timer will be |
| 318 | * re-programmed again in the following timerfd_setup() call. | 390 | * re-programmed again in the following timerfd_setup() call. |
| 319 | */ | 391 | */ |
| 320 | if (ctx->expired && ctx->tintv.tv64) | 392 | if (ctx->expired && ctx->tintv.tv64) { |
| 321 | hrtimer_forward_now(&ctx->tmr, ctx->tintv); | 393 | if (isalarm(ctx)) |
| 394 | alarm_forward_now(&ctx->t.alarm, ctx->tintv); | ||
| 395 | else | ||
| 396 | hrtimer_forward_now(&ctx->t.tmr, ctx->tintv); | ||
| 397 | } | ||
| 322 | 398 | ||
| 323 | old->it_value = ktime_to_timespec(timerfd_get_remaining(ctx)); | 399 | old->it_value = ktime_to_timespec(timerfd_get_remaining(ctx)); |
| 324 | old->it_interval = ktime_to_timespec(ctx->tintv); | 400 | old->it_interval = ktime_to_timespec(ctx->tintv); |
| @@ -345,9 +421,18 @@ static int do_timerfd_gettime(int ufd, struct itimerspec *t) | |||
| 345 | spin_lock_irq(&ctx->wqh.lock); | 421 | spin_lock_irq(&ctx->wqh.lock); |
| 346 | if (ctx->expired && ctx->tintv.tv64) { | 422 | if (ctx->expired && ctx->tintv.tv64) { |
| 347 | ctx->expired = 0; | 423 | ctx->expired = 0; |
| 348 | ctx->ticks += | 424 | |
| 349 | hrtimer_forward_now(&ctx->tmr, ctx->tintv) - 1; | 425 | if (isalarm(ctx)) { |
| 350 | hrtimer_restart(&ctx->tmr); | 426 | ctx->ticks += |
| 427 | alarm_forward_now( | ||
| 428 | &ctx->t.alarm, ctx->tintv) - 1; | ||
| 429 | alarm_restart(&ctx->t.alarm); | ||
| 430 | } else { | ||
| 431 | ctx->ticks += | ||
| 432 | hrtimer_forward_now(&ctx->t.tmr, ctx->tintv) | ||
| 433 | - 1; | ||
| 434 | hrtimer_restart(&ctx->t.tmr); | ||
| 435 | } | ||
| 351 | } | 436 | } |
| 352 | t->it_value = ktime_to_timespec(timerfd_get_remaining(ctx)); | 437 | t->it_value = ktime_to_timespec(timerfd_get_remaining(ctx)); |
| 353 | t->it_interval = ktime_to_timespec(ctx->tintv); | 438 | t->it_interval = ktime_to_timespec(ctx->tintv); |
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index f21acf0ef01f..879b9976c12b 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c | |||
| @@ -1412,7 +1412,7 @@ static int mount_ubifs(struct ubifs_info *c) | |||
| 1412 | 1412 | ||
| 1413 | ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"%s", | 1413 | ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"%s", |
| 1414 | c->vi.ubi_num, c->vi.vol_id, c->vi.name, | 1414 | c->vi.ubi_num, c->vi.vol_id, c->vi.name, |
| 1415 | c->ro_mount ? ", R/O mode" : NULL); | 1415 | c->ro_mount ? ", R/O mode" : ""); |
| 1416 | x = (long long)c->main_lebs * c->leb_size; | 1416 | x = (long long)c->main_lebs * c->leb_size; |
| 1417 | y = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes; | 1417 | y = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes; |
| 1418 | ubifs_msg("LEB size: %d bytes (%d KiB), min./max. I/O unit sizes: %d bytes/%d bytes", | 1418 | ubifs_msg("LEB size: %d bytes (%d KiB), min./max. I/O unit sizes: %d bytes/%d bytes", |
diff --git a/fs/udf/namei.c b/fs/udf/namei.c index 102c072c6bbf..5f6fc17d6bc5 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c | |||
| @@ -594,6 +594,29 @@ static int udf_create(struct inode *dir, struct dentry *dentry, umode_t mode, | |||
| 594 | return 0; | 594 | return 0; |
| 595 | } | 595 | } |
| 596 | 596 | ||
| 597 | static int udf_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) | ||
| 598 | { | ||
| 599 | struct inode *inode; | ||
| 600 | struct udf_inode_info *iinfo; | ||
| 601 | int err; | ||
| 602 | |||
| 603 | inode = udf_new_inode(dir, mode, &err); | ||
| 604 | if (!inode) | ||
| 605 | return err; | ||
| 606 | |||
| 607 | iinfo = UDF_I(inode); | ||
| 608 | if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) | ||
| 609 | inode->i_data.a_ops = &udf_adinicb_aops; | ||
| 610 | else | ||
| 611 | inode->i_data.a_ops = &udf_aops; | ||
| 612 | inode->i_op = &udf_file_inode_operations; | ||
| 613 | inode->i_fop = &udf_file_operations; | ||
| 614 | mark_inode_dirty(inode); | ||
| 615 | |||
| 616 | d_tmpfile(dentry, inode); | ||
| 617 | return 0; | ||
| 618 | } | ||
| 619 | |||
| 597 | static int udf_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, | 620 | static int udf_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, |
| 598 | dev_t rdev) | 621 | dev_t rdev) |
| 599 | { | 622 | { |
| @@ -1311,6 +1334,7 @@ const struct inode_operations udf_dir_inode_operations = { | |||
| 1311 | .rmdir = udf_rmdir, | 1334 | .rmdir = udf_rmdir, |
| 1312 | .mknod = udf_mknod, | 1335 | .mknod = udf_mknod, |
| 1313 | .rename = udf_rename, | 1336 | .rename = udf_rename, |
| 1337 | .tmpfile = udf_tmpfile, | ||
| 1314 | }; | 1338 | }; |
| 1315 | const struct inode_operations udf_symlink_inode_operations = { | 1339 | const struct inode_operations udf_symlink_inode_operations = { |
| 1316 | .readlink = generic_readlink, | 1340 | .readlink = generic_readlink, |
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 6313b69b6644..4a4508023a3c 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile | |||
| @@ -71,6 +71,7 @@ xfs-y += xfs_alloc.o \ | |||
| 71 | xfs_dir2_sf.o \ | 71 | xfs_dir2_sf.o \ |
| 72 | xfs_ialloc.o \ | 72 | xfs_ialloc.o \ |
| 73 | xfs_ialloc_btree.o \ | 73 | xfs_ialloc_btree.o \ |
| 74 | xfs_icreate_item.o \ | ||
| 74 | xfs_inode.o \ | 75 | xfs_inode.o \ |
| 75 | xfs_log_recover.o \ | 76 | xfs_log_recover.o \ |
| 76 | xfs_mount.o \ | 77 | xfs_mount.o \ |
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 5673bcfda2f0..71596e57283a 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c | |||
| @@ -175,6 +175,7 @@ xfs_alloc_compute_diff( | |||
| 175 | xfs_agblock_t wantbno, /* target starting block */ | 175 | xfs_agblock_t wantbno, /* target starting block */ |
| 176 | xfs_extlen_t wantlen, /* target length */ | 176 | xfs_extlen_t wantlen, /* target length */ |
| 177 | xfs_extlen_t alignment, /* target alignment */ | 177 | xfs_extlen_t alignment, /* target alignment */ |
| 178 | char userdata, /* are we allocating data? */ | ||
| 178 | xfs_agblock_t freebno, /* freespace's starting block */ | 179 | xfs_agblock_t freebno, /* freespace's starting block */ |
| 179 | xfs_extlen_t freelen, /* freespace's length */ | 180 | xfs_extlen_t freelen, /* freespace's length */ |
| 180 | xfs_agblock_t *newbnop) /* result: best start block from free */ | 181 | xfs_agblock_t *newbnop) /* result: best start block from free */ |
| @@ -189,7 +190,14 @@ xfs_alloc_compute_diff( | |||
| 189 | ASSERT(freelen >= wantlen); | 190 | ASSERT(freelen >= wantlen); |
| 190 | freeend = freebno + freelen; | 191 | freeend = freebno + freelen; |
| 191 | wantend = wantbno + wantlen; | 192 | wantend = wantbno + wantlen; |
| 192 | if (freebno >= wantbno) { | 193 | /* |
| 194 | * We want to allocate from the start of a free extent if it is past | ||
| 195 | * the desired block or if we are allocating user data and the free | ||
| 196 | * extent is before desired block. The second case is there to allow | ||
| 197 | * for contiguous allocation from the remaining free space if the file | ||
| 198 | * grows in the short term. | ||
| 199 | */ | ||
| 200 | if (freebno >= wantbno || (userdata && freeend < wantend)) { | ||
| 193 | if ((newbno1 = roundup(freebno, alignment)) >= freeend) | 201 | if ((newbno1 = roundup(freebno, alignment)) >= freeend) |
| 194 | newbno1 = NULLAGBLOCK; | 202 | newbno1 = NULLAGBLOCK; |
| 195 | } else if (freeend >= wantend && alignment > 1) { | 203 | } else if (freeend >= wantend && alignment > 1) { |
| @@ -805,7 +813,8 @@ xfs_alloc_find_best_extent( | |||
| 805 | xfs_alloc_fix_len(args); | 813 | xfs_alloc_fix_len(args); |
| 806 | 814 | ||
| 807 | sdiff = xfs_alloc_compute_diff(args->agbno, args->len, | 815 | sdiff = xfs_alloc_compute_diff(args->agbno, args->len, |
| 808 | args->alignment, *sbnoa, | 816 | args->alignment, |
| 817 | args->userdata, *sbnoa, | ||
| 809 | *slena, &new); | 818 | *slena, &new); |
| 810 | 819 | ||
| 811 | /* | 820 | /* |
| @@ -976,7 +985,8 @@ restart: | |||
| 976 | if (args->len < blen) | 985 | if (args->len < blen) |
| 977 | continue; | 986 | continue; |
| 978 | ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, | 987 | ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, |
| 979 | args->alignment, ltbnoa, ltlena, <new); | 988 | args->alignment, args->userdata, ltbnoa, |
| 989 | ltlena, <new); | ||
| 980 | if (ltnew != NULLAGBLOCK && | 990 | if (ltnew != NULLAGBLOCK && |
| 981 | (args->len > blen || ltdiff < bdiff)) { | 991 | (args->len > blen || ltdiff < bdiff)) { |
| 982 | bdiff = ltdiff; | 992 | bdiff = ltdiff; |
| @@ -1128,7 +1138,8 @@ restart: | |||
| 1128 | args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); | 1138 | args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); |
| 1129 | xfs_alloc_fix_len(args); | 1139 | xfs_alloc_fix_len(args); |
| 1130 | ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, | 1140 | ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, |
| 1131 | args->alignment, ltbnoa, ltlena, <new); | 1141 | args->alignment, args->userdata, ltbnoa, |
| 1142 | ltlena, <new); | ||
| 1132 | 1143 | ||
| 1133 | error = xfs_alloc_find_best_extent(args, | 1144 | error = xfs_alloc_find_best_extent(args, |
| 1134 | &bno_cur_lt, &bno_cur_gt, | 1145 | &bno_cur_lt, &bno_cur_gt, |
| @@ -1144,7 +1155,8 @@ restart: | |||
| 1144 | args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen); | 1155 | args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen); |
| 1145 | xfs_alloc_fix_len(args); | 1156 | xfs_alloc_fix_len(args); |
| 1146 | gtdiff = xfs_alloc_compute_diff(args->agbno, args->len, | 1157 | gtdiff = xfs_alloc_compute_diff(args->agbno, args->len, |
| 1147 | args->alignment, gtbnoa, gtlena, >new); | 1158 | args->alignment, args->userdata, gtbnoa, |
| 1159 | gtlena, >new); | ||
| 1148 | 1160 | ||
| 1149 | error = xfs_alloc_find_best_extent(args, | 1161 | error = xfs_alloc_find_best_extent(args, |
| 1150 | &bno_cur_gt, &bno_cur_lt, | 1162 | &bno_cur_gt, &bno_cur_lt, |
| @@ -1203,7 +1215,7 @@ restart: | |||
| 1203 | } | 1215 | } |
| 1204 | rlen = args->len; | 1216 | rlen = args->len; |
| 1205 | (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, | 1217 | (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, |
| 1206 | ltbnoa, ltlena, <new); | 1218 | args->userdata, ltbnoa, ltlena, <new); |
| 1207 | ASSERT(ltnew >= ltbno); | 1219 | ASSERT(ltnew >= ltbno); |
| 1208 | ASSERT(ltnew + rlen <= ltbnoa + ltlena); | 1220 | ASSERT(ltnew + rlen <= ltbnoa + ltlena); |
| 1209 | ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); | 1221 | ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); |
diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h index 70c43d9f72c1..1b726d626941 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/xfs_bmap_btree.h | |||
| @@ -196,6 +196,8 @@ typedef __be64 xfs_bmbt_ptr_t, xfs_bmdr_ptr_t; | |||
| 196 | #define XFS_BMDR_SPACE_CALC(nrecs) \ | 196 | #define XFS_BMDR_SPACE_CALC(nrecs) \ |
| 197 | (int)(sizeof(xfs_bmdr_block_t) + \ | 197 | (int)(sizeof(xfs_bmdr_block_t) + \ |
| 198 | ((nrecs) * (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t)))) | 198 | ((nrecs) * (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t)))) |
| 199 | #define XFS_BMAP_BMDR_SPACE(bb) \ | ||
| 200 | (XFS_BMDR_SPACE_CALC(be16_to_cpu((bb)->bb_numrecs))) | ||
| 199 | 201 | ||
| 200 | /* | 202 | /* |
| 201 | * Maximum number of bmap btree levels. | 203 | * Maximum number of bmap btree levels. |
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 4ec431777048..bfc4e0c26fd3 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c | |||
| @@ -140,6 +140,16 @@ xfs_buf_item_size( | |||
| 140 | 140 | ||
| 141 | ASSERT(bip->bli_flags & XFS_BLI_LOGGED); | 141 | ASSERT(bip->bli_flags & XFS_BLI_LOGGED); |
| 142 | 142 | ||
| 143 | if (bip->bli_flags & XFS_BLI_ORDERED) { | ||
| 144 | /* | ||
| 145 | * The buffer has been logged just to order it. | ||
| 146 | * It is not being included in the transaction | ||
| 147 | * commit, so no vectors are used at all. | ||
| 148 | */ | ||
| 149 | trace_xfs_buf_item_size_ordered(bip); | ||
| 150 | return XFS_LOG_VEC_ORDERED; | ||
| 151 | } | ||
| 152 | |||
| 143 | /* | 153 | /* |
| 144 | * the vector count is based on the number of buffer vectors we have | 154 | * the vector count is based on the number of buffer vectors we have |
| 145 | * dirty bits in. This will only be greater than one when we have a | 155 | * dirty bits in. This will only be greater than one when we have a |
| @@ -212,6 +222,7 @@ xfs_buf_item_format_segment( | |||
| 212 | goto out; | 222 | goto out; |
| 213 | } | 223 | } |
| 214 | 224 | ||
| 225 | |||
| 215 | /* | 226 | /* |
| 216 | * Fill in an iovec for each set of contiguous chunks. | 227 | * Fill in an iovec for each set of contiguous chunks. |
| 217 | */ | 228 | */ |
| @@ -299,18 +310,36 @@ xfs_buf_item_format( | |||
| 299 | 310 | ||
| 300 | /* | 311 | /* |
| 301 | * If it is an inode buffer, transfer the in-memory state to the | 312 | * If it is an inode buffer, transfer the in-memory state to the |
| 302 | * format flags and clear the in-memory state. We do not transfer | 313 | * format flags and clear the in-memory state. |
| 314 | * | ||
| 315 | * For buffer based inode allocation, we do not transfer | ||
| 303 | * this state if the inode buffer allocation has not yet been committed | 316 | * this state if the inode buffer allocation has not yet been committed |
| 304 | * to the log as setting the XFS_BLI_INODE_BUF flag will prevent | 317 | * to the log as setting the XFS_BLI_INODE_BUF flag will prevent |
| 305 | * correct replay of the inode allocation. | 318 | * correct replay of the inode allocation. |
| 319 | * | ||
| 320 | * For icreate item based inode allocation, the buffers aren't written | ||
| 321 | * to the journal during allocation, and hence we should always tag the | ||
| 322 | * buffer as an inode buffer so that the correct unlinked list replay | ||
| 323 | * occurs during recovery. | ||
| 306 | */ | 324 | */ |
| 307 | if (bip->bli_flags & XFS_BLI_INODE_BUF) { | 325 | if (bip->bli_flags & XFS_BLI_INODE_BUF) { |
| 308 | if (!((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) && | 326 | if (xfs_sb_version_hascrc(&lip->li_mountp->m_sb) || |
| 327 | !((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) && | ||
| 309 | xfs_log_item_in_current_chkpt(lip))) | 328 | xfs_log_item_in_current_chkpt(lip))) |
| 310 | bip->__bli_format.blf_flags |= XFS_BLF_INODE_BUF; | 329 | bip->__bli_format.blf_flags |= XFS_BLF_INODE_BUF; |
| 311 | bip->bli_flags &= ~XFS_BLI_INODE_BUF; | 330 | bip->bli_flags &= ~XFS_BLI_INODE_BUF; |
| 312 | } | 331 | } |
| 313 | 332 | ||
| 333 | if ((bip->bli_flags & (XFS_BLI_ORDERED|XFS_BLI_STALE)) == | ||
| 334 | XFS_BLI_ORDERED) { | ||
| 335 | /* | ||
| 336 | * The buffer has been logged just to order it. It is not being | ||
| 337 | * included in the transaction commit, so don't format it. | ||
| 338 | */ | ||
| 339 | trace_xfs_buf_item_format_ordered(bip); | ||
| 340 | return; | ||
| 341 | } | ||
| 342 | |||
| 314 | for (i = 0; i < bip->bli_format_count; i++) { | 343 | for (i = 0; i < bip->bli_format_count; i++) { |
| 315 | vecp = xfs_buf_item_format_segment(bip, vecp, offset, | 344 | vecp = xfs_buf_item_format_segment(bip, vecp, offset, |
| 316 | &bip->bli_formats[i]); | 345 | &bip->bli_formats[i]); |
| @@ -340,6 +369,7 @@ xfs_buf_item_pin( | |||
| 340 | 369 | ||
| 341 | ASSERT(atomic_read(&bip->bli_refcount) > 0); | 370 | ASSERT(atomic_read(&bip->bli_refcount) > 0); |
| 342 | ASSERT((bip->bli_flags & XFS_BLI_LOGGED) || | 371 | ASSERT((bip->bli_flags & XFS_BLI_LOGGED) || |
| 372 | (bip->bli_flags & XFS_BLI_ORDERED) || | ||
| 343 | (bip->bli_flags & XFS_BLI_STALE)); | 373 | (bip->bli_flags & XFS_BLI_STALE)); |
| 344 | 374 | ||
| 345 | trace_xfs_buf_item_pin(bip); | 375 | trace_xfs_buf_item_pin(bip); |
| @@ -512,8 +542,9 @@ xfs_buf_item_unlock( | |||
| 512 | { | 542 | { |
| 513 | struct xfs_buf_log_item *bip = BUF_ITEM(lip); | 543 | struct xfs_buf_log_item *bip = BUF_ITEM(lip); |
| 514 | struct xfs_buf *bp = bip->bli_buf; | 544 | struct xfs_buf *bp = bip->bli_buf; |
| 515 | int aborted, clean, i; | 545 | bool clean; |
| 516 | uint hold; | 546 | bool aborted; |
| 547 | int flags; | ||
| 517 | 548 | ||
| 518 | /* Clear the buffer's association with this transaction. */ | 549 | /* Clear the buffer's association with this transaction. */ |
| 519 | bp->b_transp = NULL; | 550 | bp->b_transp = NULL; |
| @@ -524,23 +555,21 @@ xfs_buf_item_unlock( | |||
| 524 | * (cancelled) buffers at unpin time, but we'll never go through the | 555 | * (cancelled) buffers at unpin time, but we'll never go through the |
| 525 | * pin/unpin cycle if we abort inside commit. | 556 | * pin/unpin cycle if we abort inside commit. |
| 526 | */ | 557 | */ |
| 527 | aborted = (lip->li_flags & XFS_LI_ABORTED) != 0; | 558 | aborted = (lip->li_flags & XFS_LI_ABORTED) ? true : false; |
| 528 | |||
| 529 | /* | 559 | /* |
| 530 | * Before possibly freeing the buf item, determine if we should | 560 | * Before possibly freeing the buf item, copy the per-transaction state |
| 531 | * release the buffer at the end of this routine. | 561 | * so we can reference it safely later after clearing it from the |
| 562 | * buffer log item. | ||
| 532 | */ | 563 | */ |
| 533 | hold = bip->bli_flags & XFS_BLI_HOLD; | 564 | flags = bip->bli_flags; |
| 534 | 565 | bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_HOLD | XFS_BLI_ORDERED); | |
| 535 | /* Clear the per transaction state. */ | ||
| 536 | bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_HOLD); | ||
| 537 | 566 | ||
| 538 | /* | 567 | /* |
| 539 | * If the buf item is marked stale, then don't do anything. We'll | 568 | * If the buf item is marked stale, then don't do anything. We'll |
| 540 | * unlock the buffer and free the buf item when the buffer is unpinned | 569 | * unlock the buffer and free the buf item when the buffer is unpinned |
| 541 | * for the last time. | 570 | * for the last time. |
| 542 | */ | 571 | */ |
| 543 | if (bip->bli_flags & XFS_BLI_STALE) { | 572 | if (flags & XFS_BLI_STALE) { |
| 544 | trace_xfs_buf_item_unlock_stale(bip); | 573 | trace_xfs_buf_item_unlock_stale(bip); |
| 545 | ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL); | 574 | ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL); |
| 546 | if (!aborted) { | 575 | if (!aborted) { |
| @@ -557,13 +586,19 @@ xfs_buf_item_unlock( | |||
| 557 | * be the only reference to the buf item, so we free it anyway | 586 | * be the only reference to the buf item, so we free it anyway |
| 558 | * regardless of whether it is dirty or not. A dirty abort implies a | 587 | * regardless of whether it is dirty or not. A dirty abort implies a |
| 559 | * shutdown, anyway. | 588 | * shutdown, anyway. |
| 589 | * | ||
| 590 | * Ordered buffers are dirty but may have no recorded changes, so ensure | ||
| 591 | * we only release clean items here. | ||
| 560 | */ | 592 | */ |
| 561 | clean = 1; | 593 | clean = (flags & XFS_BLI_DIRTY) ? false : true; |
| 562 | for (i = 0; i < bip->bli_format_count; i++) { | 594 | if (clean) { |
| 563 | if (!xfs_bitmap_empty(bip->bli_formats[i].blf_data_map, | 595 | int i; |
| 564 | bip->bli_formats[i].blf_map_size)) { | 596 | for (i = 0; i < bip->bli_format_count; i++) { |
| 565 | clean = 0; | 597 | if (!xfs_bitmap_empty(bip->bli_formats[i].blf_data_map, |
| 566 | break; | 598 | bip->bli_formats[i].blf_map_size)) { |
| 599 | clean = false; | ||
| 600 | break; | ||
| 601 | } | ||
| 567 | } | 602 | } |
| 568 | } | 603 | } |
| 569 | if (clean) | 604 | if (clean) |
| @@ -576,7 +611,7 @@ xfs_buf_item_unlock( | |||
| 576 | } else | 611 | } else |
| 577 | atomic_dec(&bip->bli_refcount); | 612 | atomic_dec(&bip->bli_refcount); |
| 578 | 613 | ||
| 579 | if (!hold) | 614 | if (!(flags & XFS_BLI_HOLD)) |
| 580 | xfs_buf_relse(bp); | 615 | xfs_buf_relse(bp); |
| 581 | } | 616 | } |
| 582 | 617 | ||
| @@ -842,12 +877,6 @@ xfs_buf_item_log( | |||
| 842 | struct xfs_buf *bp = bip->bli_buf; | 877 | struct xfs_buf *bp = bip->bli_buf; |
| 843 | 878 | ||
| 844 | /* | 879 | /* |
| 845 | * Mark the item as having some dirty data for | ||
| 846 | * quick reference in xfs_buf_item_dirty. | ||
| 847 | */ | ||
| 848 | bip->bli_flags |= XFS_BLI_DIRTY; | ||
| 849 | |||
| 850 | /* | ||
| 851 | * walk each buffer segment and mark them dirty appropriately. | 880 | * walk each buffer segment and mark them dirty appropriately. |
| 852 | */ | 881 | */ |
| 853 | start = 0; | 882 | start = 0; |
| @@ -873,7 +902,7 @@ xfs_buf_item_log( | |||
| 873 | 902 | ||
| 874 | 903 | ||
| 875 | /* | 904 | /* |
| 876 | * Return 1 if the buffer has some data that has been logged (at any | 905 | * Return 1 if the buffer has been logged or ordered in a transaction (at any |
| 877 | * point, not just the current transaction) and 0 if not. | 906 | * point, not just the current transaction) and 0 if not. |
| 878 | */ | 907 | */ |
| 879 | uint | 908 | uint |
| @@ -907,11 +936,11 @@ void | |||
| 907 | xfs_buf_item_relse( | 936 | xfs_buf_item_relse( |
| 908 | xfs_buf_t *bp) | 937 | xfs_buf_t *bp) |
| 909 | { | 938 | { |
| 910 | xfs_buf_log_item_t *bip; | 939 | xfs_buf_log_item_t *bip = bp->b_fspriv; |
| 911 | 940 | ||
| 912 | trace_xfs_buf_item_relse(bp, _RET_IP_); | 941 | trace_xfs_buf_item_relse(bp, _RET_IP_); |
| 942 | ASSERT(!(bip->bli_item.li_flags & XFS_LI_IN_AIL)); | ||
| 913 | 943 | ||
| 914 | bip = bp->b_fspriv; | ||
| 915 | bp->b_fspriv = bip->bli_item.li_bio_list; | 944 | bp->b_fspriv = bip->bli_item.li_bio_list; |
| 916 | if (bp->b_fspriv == NULL) | 945 | if (bp->b_fspriv == NULL) |
| 917 | bp->b_iodone = NULL; | 946 | bp->b_iodone = NULL; |
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h index 2573d2a75fc8..0f1c247dc680 100644 --- a/fs/xfs/xfs_buf_item.h +++ b/fs/xfs/xfs_buf_item.h | |||
| @@ -120,6 +120,7 @@ xfs_blft_from_flags(struct xfs_buf_log_format *blf) | |||
| 120 | #define XFS_BLI_INODE_ALLOC_BUF 0x10 | 120 | #define XFS_BLI_INODE_ALLOC_BUF 0x10 |
| 121 | #define XFS_BLI_STALE_INODE 0x20 | 121 | #define XFS_BLI_STALE_INODE 0x20 |
| 122 | #define XFS_BLI_INODE_BUF 0x40 | 122 | #define XFS_BLI_INODE_BUF 0x40 |
| 123 | #define XFS_BLI_ORDERED 0x80 | ||
| 123 | 124 | ||
| 124 | #define XFS_BLI_FLAGS \ | 125 | #define XFS_BLI_FLAGS \ |
| 125 | { XFS_BLI_HOLD, "HOLD" }, \ | 126 | { XFS_BLI_HOLD, "HOLD" }, \ |
| @@ -128,7 +129,8 @@ xfs_blft_from_flags(struct xfs_buf_log_format *blf) | |||
| 128 | { XFS_BLI_LOGGED, "LOGGED" }, \ | 129 | { XFS_BLI_LOGGED, "LOGGED" }, \ |
| 129 | { XFS_BLI_INODE_ALLOC_BUF, "INODE_ALLOC" }, \ | 130 | { XFS_BLI_INODE_ALLOC_BUF, "INODE_ALLOC" }, \ |
| 130 | { XFS_BLI_STALE_INODE, "STALE_INODE" }, \ | 131 | { XFS_BLI_STALE_INODE, "STALE_INODE" }, \ |
| 131 | { XFS_BLI_INODE_BUF, "INODE_BUF" } | 132 | { XFS_BLI_INODE_BUF, "INODE_BUF" }, \ |
| 133 | { XFS_BLI_ORDERED, "ORDERED" } | ||
| 132 | 134 | ||
| 133 | 135 | ||
| 134 | #ifdef __KERNEL__ | 136 | #ifdef __KERNEL__ |
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index c407e1ccff43..e36445ceaf80 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c | |||
| @@ -24,6 +24,9 @@ | |||
| 24 | #include "xfs_ag.h" | 24 | #include "xfs_ag.h" |
| 25 | #include "xfs_mount.h" | 25 | #include "xfs_mount.h" |
| 26 | #include "xfs_bmap_btree.h" | 26 | #include "xfs_bmap_btree.h" |
| 27 | #include "xfs_alloc_btree.h" | ||
| 28 | #include "xfs_ialloc_btree.h" | ||
| 29 | #include "xfs_btree.h" | ||
| 27 | #include "xfs_dinode.h" | 30 | #include "xfs_dinode.h" |
| 28 | #include "xfs_inode.h" | 31 | #include "xfs_inode.h" |
| 29 | #include "xfs_inode_item.h" | 32 | #include "xfs_inode_item.h" |
| @@ -182,7 +185,7 @@ xfs_swap_extents_check_format( | |||
| 182 | */ | 185 | */ |
| 183 | if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE) { | 186 | if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE) { |
| 184 | if (XFS_IFORK_BOFF(ip) && | 187 | if (XFS_IFORK_BOFF(ip) && |
| 185 | tip->i_df.if_broot_bytes > XFS_IFORK_BOFF(ip)) | 188 | XFS_BMAP_BMDR_SPACE(tip->i_df.if_broot) > XFS_IFORK_BOFF(ip)) |
| 186 | return EINVAL; | 189 | return EINVAL; |
| 187 | if (XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <= | 190 | if (XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <= |
| 188 | XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)) | 191 | XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)) |
| @@ -192,9 +195,8 @@ xfs_swap_extents_check_format( | |||
| 192 | /* Reciprocal target->temp btree format checks */ | 195 | /* Reciprocal target->temp btree format checks */ |
| 193 | if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) { | 196 | if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) { |
| 194 | if (XFS_IFORK_BOFF(tip) && | 197 | if (XFS_IFORK_BOFF(tip) && |
| 195 | ip->i_df.if_broot_bytes > XFS_IFORK_BOFF(tip)) | 198 | XFS_BMAP_BMDR_SPACE(ip->i_df.if_broot) > XFS_IFORK_BOFF(tip)) |
| 196 | return EINVAL; | 199 | return EINVAL; |
| 197 | |||
| 198 | if (XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <= | 200 | if (XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <= |
| 199 | XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK)) | 201 | XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK)) |
| 200 | return EINVAL; | 202 | return EINVAL; |
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c index e0cc1243a8aa..2aed25cae04d 100644 --- a/fs/xfs/xfs_dir2_leaf.c +++ b/fs/xfs/xfs_dir2_leaf.c | |||
| @@ -1108,6 +1108,7 @@ xfs_dir2_leaf_readbuf( | |||
| 1108 | struct xfs_mount *mp = dp->i_mount; | 1108 | struct xfs_mount *mp = dp->i_mount; |
| 1109 | struct xfs_buf *bp = *bpp; | 1109 | struct xfs_buf *bp = *bpp; |
| 1110 | struct xfs_bmbt_irec *map = mip->map; | 1110 | struct xfs_bmbt_irec *map = mip->map; |
| 1111 | struct blk_plug plug; | ||
| 1111 | int error = 0; | 1112 | int error = 0; |
| 1112 | int length; | 1113 | int length; |
| 1113 | int i; | 1114 | int i; |
| @@ -1236,6 +1237,7 @@ xfs_dir2_leaf_readbuf( | |||
| 1236 | /* | 1237 | /* |
| 1237 | * Do we need more readahead? | 1238 | * Do we need more readahead? |
| 1238 | */ | 1239 | */ |
| 1240 | blk_start_plug(&plug); | ||
| 1239 | for (mip->ra_index = mip->ra_offset = i = 0; | 1241 | for (mip->ra_index = mip->ra_offset = i = 0; |
| 1240 | mip->ra_want > mip->ra_current && i < mip->map_blocks; | 1242 | mip->ra_want > mip->ra_current && i < mip->map_blocks; |
| 1241 | i += mp->m_dirblkfsbs) { | 1243 | i += mp->m_dirblkfsbs) { |
| @@ -1287,6 +1289,7 @@ xfs_dir2_leaf_readbuf( | |||
| 1287 | } | 1289 | } |
| 1288 | } | 1290 | } |
| 1289 | } | 1291 | } |
| 1292 | blk_finish_plug(&plug); | ||
| 1290 | 1293 | ||
| 1291 | out: | 1294 | out: |
| 1292 | *bpp = bp; | 1295 | *bpp = bp; |
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 044e97a33c8d..f01012de06d0 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c | |||
| @@ -570,13 +570,13 @@ xfs_qm_dqtobp( | |||
| 570 | xfs_buf_t **O_bpp, | 570 | xfs_buf_t **O_bpp, |
| 571 | uint flags) | 571 | uint flags) |
| 572 | { | 572 | { |
| 573 | xfs_bmbt_irec_t map; | 573 | struct xfs_bmbt_irec map; |
| 574 | int nmaps = 1, error; | 574 | int nmaps = 1, error; |
| 575 | xfs_buf_t *bp; | 575 | struct xfs_buf *bp; |
| 576 | xfs_inode_t *quotip = XFS_DQ_TO_QIP(dqp); | 576 | struct xfs_inode *quotip = xfs_dq_to_quota_inode(dqp); |
| 577 | xfs_mount_t *mp = dqp->q_mount; | 577 | struct xfs_mount *mp = dqp->q_mount; |
| 578 | xfs_dqid_t id = be32_to_cpu(dqp->q_core.d_id); | 578 | xfs_dqid_t id = be32_to_cpu(dqp->q_core.d_id); |
| 579 | xfs_trans_t *tp = (tpp ? *tpp : NULL); | 579 | struct xfs_trans *tp = (tpp ? *tpp : NULL); |
| 580 | 580 | ||
| 581 | dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk; | 581 | dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk; |
| 582 | 582 | ||
| @@ -804,7 +804,7 @@ xfs_qm_dqget( | |||
| 804 | xfs_dquot_t **O_dqpp) /* OUT : locked incore dquot */ | 804 | xfs_dquot_t **O_dqpp) /* OUT : locked incore dquot */ |
| 805 | { | 805 | { |
| 806 | struct xfs_quotainfo *qi = mp->m_quotainfo; | 806 | struct xfs_quotainfo *qi = mp->m_quotainfo; |
| 807 | struct radix_tree_root *tree = XFS_DQUOT_TREE(qi, type); | 807 | struct radix_tree_root *tree = xfs_dquot_tree(qi, type); |
| 808 | struct xfs_dquot *dqp; | 808 | struct xfs_dquot *dqp; |
| 809 | int error; | 809 | int error; |
| 810 | 810 | ||
diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h index 4f0ebfc43cc9..b596626249b8 100644 --- a/fs/xfs/xfs_dquot.h +++ b/fs/xfs/xfs_dquot.h | |||
| @@ -143,10 +143,6 @@ static inline xfs_dquot_t *xfs_inode_dquot(struct xfs_inode *ip, int type) | |||
| 143 | #define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER) | 143 | #define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER) |
| 144 | #define XFS_QM_ISPDQ(dqp) ((dqp)->dq_flags & XFS_DQ_PROJ) | 144 | #define XFS_QM_ISPDQ(dqp) ((dqp)->dq_flags & XFS_DQ_PROJ) |
| 145 | #define XFS_QM_ISGDQ(dqp) ((dqp)->dq_flags & XFS_DQ_GROUP) | 145 | #define XFS_QM_ISGDQ(dqp) ((dqp)->dq_flags & XFS_DQ_GROUP) |
| 146 | #define XFS_DQ_TO_QINF(dqp) ((dqp)->q_mount->m_quotainfo) | ||
| 147 | #define XFS_DQ_TO_QIP(dqp) (XFS_QM_ISUDQ(dqp) ? \ | ||
| 148 | XFS_DQ_TO_QINF(dqp)->qi_uquotaip : \ | ||
| 149 | XFS_DQ_TO_QINF(dqp)->qi_gquotaip) | ||
| 150 | 146 | ||
| 151 | extern int xfs_qm_dqread(struct xfs_mount *, xfs_dqid_t, uint, | 147 | extern int xfs_qm_dqread(struct xfs_mount *, xfs_dqid_t, uint, |
| 152 | uint, struct xfs_dquot **); | 148 | uint, struct xfs_dquot **); |
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 0ad2b95fca12..de3dc98f4e8f 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c | |||
| @@ -1268,8 +1268,7 @@ xfs_seek_data( | |||
| 1268 | } | 1268 | } |
| 1269 | 1269 | ||
| 1270 | out: | 1270 | out: |
| 1271 | if (offset != file->f_pos) | 1271 | offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes); |
| 1272 | file->f_pos = offset; | ||
| 1273 | 1272 | ||
| 1274 | out_unlock: | 1273 | out_unlock: |
| 1275 | xfs_iunlock_map_shared(ip, lock); | 1274 | xfs_iunlock_map_shared(ip, lock); |
| @@ -1377,8 +1376,7 @@ out: | |||
| 1377 | * situation in particular. | 1376 | * situation in particular. |
| 1378 | */ | 1377 | */ |
| 1379 | offset = min_t(loff_t, offset, isize); | 1378 | offset = min_t(loff_t, offset, isize); |
| 1380 | if (offset != file->f_pos) | 1379 | offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes); |
| 1381 | file->f_pos = offset; | ||
| 1382 | 1380 | ||
| 1383 | out_unlock: | 1381 | out_unlock: |
| 1384 | xfs_iunlock_map_shared(ip, lock); | 1382 | xfs_iunlock_map_shared(ip, lock); |
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 3c3644ea825b..614eb0cc3608 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c | |||
| @@ -176,7 +176,7 @@ xfs_growfs_data_private( | |||
| 176 | if (!bp) | 176 | if (!bp) |
| 177 | return EIO; | 177 | return EIO; |
| 178 | if (bp->b_error) { | 178 | if (bp->b_error) { |
| 179 | int error = bp->b_error; | 179 | error = bp->b_error; |
| 180 | xfs_buf_relse(bp); | 180 | xfs_buf_relse(bp); |
| 181 | return error; | 181 | return error; |
| 182 | } | 182 | } |
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index c8f5ae1debf2..7a0c17d7ec09 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c | |||
| @@ -38,6 +38,7 @@ | |||
| 38 | #include "xfs_bmap.h" | 38 | #include "xfs_bmap.h" |
| 39 | #include "xfs_cksum.h" | 39 | #include "xfs_cksum.h" |
| 40 | #include "xfs_buf_item.h" | 40 | #include "xfs_buf_item.h" |
| 41 | #include "xfs_icreate_item.h" | ||
| 41 | 42 | ||
| 42 | 43 | ||
| 43 | /* | 44 | /* |
| @@ -150,12 +151,16 @@ xfs_check_agi_freecount( | |||
| 150 | #endif | 151 | #endif |
| 151 | 152 | ||
| 152 | /* | 153 | /* |
| 153 | * Initialise a new set of inodes. | 154 | * Initialise a new set of inodes. When called without a transaction context |
| 155 | * (e.g. from recovery) we initiate a delayed write of the inode buffers rather | ||
| 156 | * than logging them (which in a transaction context puts them into the AIL | ||
| 157 | * for writeback rather than the xfsbufd queue). | ||
| 154 | */ | 158 | */ |
| 155 | STATIC int | 159 | int |
| 156 | xfs_ialloc_inode_init( | 160 | xfs_ialloc_inode_init( |
| 157 | struct xfs_mount *mp, | 161 | struct xfs_mount *mp, |
| 158 | struct xfs_trans *tp, | 162 | struct xfs_trans *tp, |
| 163 | struct list_head *buffer_list, | ||
| 159 | xfs_agnumber_t agno, | 164 | xfs_agnumber_t agno, |
| 160 | xfs_agblock_t agbno, | 165 | xfs_agblock_t agbno, |
| 161 | xfs_agblock_t length, | 166 | xfs_agblock_t length, |
| @@ -208,6 +213,18 @@ xfs_ialloc_inode_init( | |||
| 208 | version = 3; | 213 | version = 3; |
| 209 | ino = XFS_AGINO_TO_INO(mp, agno, | 214 | ino = XFS_AGINO_TO_INO(mp, agno, |
| 210 | XFS_OFFBNO_TO_AGINO(mp, agbno, 0)); | 215 | XFS_OFFBNO_TO_AGINO(mp, agbno, 0)); |
| 216 | |||
| 217 | /* | ||
| 218 | * log the initialisation that is about to take place as an | ||
| 219 | * logical operation. This means the transaction does not | ||
| 220 | * need to log the physical changes to the inode buffers as log | ||
| 221 | * recovery will know what initialisation is actually needed. | ||
| 222 | * Hence we only need to log the buffers as "ordered" buffers so | ||
| 223 | * they track in the AIL as if they were physically logged. | ||
| 224 | */ | ||
| 225 | if (tp) | ||
| 226 | xfs_icreate_log(tp, agno, agbno, XFS_IALLOC_INODES(mp), | ||
| 227 | mp->m_sb.sb_inodesize, length, gen); | ||
| 211 | } else if (xfs_sb_version_hasnlink(&mp->m_sb)) | 228 | } else if (xfs_sb_version_hasnlink(&mp->m_sb)) |
| 212 | version = 2; | 229 | version = 2; |
| 213 | else | 230 | else |
| @@ -223,13 +240,8 @@ xfs_ialloc_inode_init( | |||
| 223 | XBF_UNMAPPED); | 240 | XBF_UNMAPPED); |
| 224 | if (!fbuf) | 241 | if (!fbuf) |
| 225 | return ENOMEM; | 242 | return ENOMEM; |
| 226 | /* | 243 | |
| 227 | * Initialize all inodes in this buffer and then log them. | 244 | /* Initialize the inode buffers and log them appropriately. */ |
| 228 | * | ||
| 229 | * XXX: It would be much better if we had just one transaction | ||
| 230 | * to log a whole cluster of inodes instead of all the | ||
| 231 | * individual transactions causing a lot of log traffic. | ||
| 232 | */ | ||
| 233 | fbuf->b_ops = &xfs_inode_buf_ops; | 245 | fbuf->b_ops = &xfs_inode_buf_ops; |
| 234 | xfs_buf_zero(fbuf, 0, BBTOB(fbuf->b_length)); | 246 | xfs_buf_zero(fbuf, 0, BBTOB(fbuf->b_length)); |
| 235 | for (i = 0; i < ninodes; i++) { | 247 | for (i = 0; i < ninodes; i++) { |
| @@ -247,18 +259,39 @@ xfs_ialloc_inode_init( | |||
| 247 | ino++; | 259 | ino++; |
| 248 | uuid_copy(&free->di_uuid, &mp->m_sb.sb_uuid); | 260 | uuid_copy(&free->di_uuid, &mp->m_sb.sb_uuid); |
| 249 | xfs_dinode_calc_crc(mp, free); | 261 | xfs_dinode_calc_crc(mp, free); |
| 250 | } else { | 262 | } else if (tp) { |
| 251 | /* just log the inode core */ | 263 | /* just log the inode core */ |
| 252 | xfs_trans_log_buf(tp, fbuf, ioffset, | 264 | xfs_trans_log_buf(tp, fbuf, ioffset, |
| 253 | ioffset + isize - 1); | 265 | ioffset + isize - 1); |
| 254 | } | 266 | } |
| 255 | } | 267 | } |
| 256 | if (version == 3) { | 268 | |
| 257 | /* need to log the entire buffer */ | 269 | if (tp) { |
| 258 | xfs_trans_log_buf(tp, fbuf, 0, | 270 | /* |
| 259 | BBTOB(fbuf->b_length) - 1); | 271 | * Mark the buffer as an inode allocation buffer so it |
| 272 | * sticks in AIL at the point of this allocation | ||
| 273 | * transaction. This ensures the they are on disk before | ||
| 274 | * the tail of the log can be moved past this | ||
| 275 | * transaction (i.e. by preventing relogging from moving | ||
| 276 | * it forward in the log). | ||
| 277 | */ | ||
| 278 | xfs_trans_inode_alloc_buf(tp, fbuf); | ||
| 279 | if (version == 3) { | ||
| 280 | /* | ||
| 281 | * Mark the buffer as ordered so that they are | ||
| 282 | * not physically logged in the transaction but | ||
| 283 | * still tracked in the AIL as part of the | ||
| 284 | * transaction and pin the log appropriately. | ||
| 285 | */ | ||
| 286 | xfs_trans_ordered_buf(tp, fbuf); | ||
| 287 | xfs_trans_log_buf(tp, fbuf, 0, | ||
| 288 | BBTOB(fbuf->b_length) - 1); | ||
| 289 | } | ||
| 290 | } else { | ||
| 291 | fbuf->b_flags |= XBF_DONE; | ||
| 292 | xfs_buf_delwri_queue(fbuf, buffer_list); | ||
| 293 | xfs_buf_relse(fbuf); | ||
| 260 | } | 294 | } |
| 261 | xfs_trans_inode_alloc_buf(tp, fbuf); | ||
| 262 | } | 295 | } |
| 263 | return 0; | 296 | return 0; |
| 264 | } | 297 | } |
| @@ -303,7 +336,7 @@ xfs_ialloc_ag_alloc( | |||
| 303 | * First try to allocate inodes contiguous with the last-allocated | 336 | * First try to allocate inodes contiguous with the last-allocated |
| 304 | * chunk of inodes. If the filesystem is striped, this will fill | 337 | * chunk of inodes. If the filesystem is striped, this will fill |
| 305 | * an entire stripe unit with inodes. | 338 | * an entire stripe unit with inodes. |
| 306 | */ | 339 | */ |
| 307 | agi = XFS_BUF_TO_AGI(agbp); | 340 | agi = XFS_BUF_TO_AGI(agbp); |
| 308 | newino = be32_to_cpu(agi->agi_newino); | 341 | newino = be32_to_cpu(agi->agi_newino); |
| 309 | agno = be32_to_cpu(agi->agi_seqno); | 342 | agno = be32_to_cpu(agi->agi_seqno); |
| @@ -402,7 +435,7 @@ xfs_ialloc_ag_alloc( | |||
| 402 | * rather than a linear progression to prevent the next generation | 435 | * rather than a linear progression to prevent the next generation |
| 403 | * number from being easily guessable. | 436 | * number from being easily guessable. |
| 404 | */ | 437 | */ |
| 405 | error = xfs_ialloc_inode_init(args.mp, tp, agno, args.agbno, | 438 | error = xfs_ialloc_inode_init(args.mp, tp, NULL, agno, args.agbno, |
| 406 | args.len, prandom_u32()); | 439 | args.len, prandom_u32()); |
| 407 | 440 | ||
| 408 | if (error) | 441 | if (error) |
| @@ -615,8 +648,7 @@ xfs_ialloc_get_rec( | |||
| 615 | struct xfs_btree_cur *cur, | 648 | struct xfs_btree_cur *cur, |
| 616 | xfs_agino_t agino, | 649 | xfs_agino_t agino, |
| 617 | xfs_inobt_rec_incore_t *rec, | 650 | xfs_inobt_rec_incore_t *rec, |
| 618 | int *done, | 651 | int *done) |
| 619 | int left) | ||
| 620 | { | 652 | { |
| 621 | int error; | 653 | int error; |
| 622 | int i; | 654 | int i; |
| @@ -724,12 +756,12 @@ xfs_dialloc_ag( | |||
| 724 | pag->pagl_leftrec != NULLAGINO && | 756 | pag->pagl_leftrec != NULLAGINO && |
| 725 | pag->pagl_rightrec != NULLAGINO) { | 757 | pag->pagl_rightrec != NULLAGINO) { |
| 726 | error = xfs_ialloc_get_rec(tcur, pag->pagl_leftrec, | 758 | error = xfs_ialloc_get_rec(tcur, pag->pagl_leftrec, |
| 727 | &trec, &doneleft, 1); | 759 | &trec, &doneleft); |
| 728 | if (error) | 760 | if (error) |
| 729 | goto error1; | 761 | goto error1; |
| 730 | 762 | ||
| 731 | error = xfs_ialloc_get_rec(cur, pag->pagl_rightrec, | 763 | error = xfs_ialloc_get_rec(cur, pag->pagl_rightrec, |
| 732 | &rec, &doneright, 0); | 764 | &rec, &doneright); |
| 733 | if (error) | 765 | if (error) |
| 734 | goto error1; | 766 | goto error1; |
| 735 | } else { | 767 | } else { |
diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h index c8da3df271e6..68c07320f096 100644 --- a/fs/xfs/xfs_ialloc.h +++ b/fs/xfs/xfs_ialloc.h | |||
| @@ -150,6 +150,14 @@ int xfs_inobt_lookup(struct xfs_btree_cur *cur, xfs_agino_t ino, | |||
| 150 | int xfs_inobt_get_rec(struct xfs_btree_cur *cur, | 150 | int xfs_inobt_get_rec(struct xfs_btree_cur *cur, |
| 151 | xfs_inobt_rec_incore_t *rec, int *stat); | 151 | xfs_inobt_rec_incore_t *rec, int *stat); |
| 152 | 152 | ||
| 153 | /* | ||
| 154 | * Inode chunk initialisation routine | ||
| 155 | */ | ||
| 156 | int xfs_ialloc_inode_init(struct xfs_mount *mp, struct xfs_trans *tp, | ||
| 157 | struct list_head *buffer_list, | ||
| 158 | xfs_agnumber_t agno, xfs_agblock_t agbno, | ||
| 159 | xfs_agblock_t length, unsigned int gen); | ||
| 160 | |||
| 153 | extern const struct xfs_buf_ops xfs_agi_buf_ops; | 161 | extern const struct xfs_buf_ops xfs_agi_buf_ops; |
| 154 | 162 | ||
| 155 | #endif /* __XFS_IALLOC_H__ */ | 163 | #endif /* __XFS_IALLOC_H__ */ |
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 96e344e3e927..9560dc1f15a9 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c | |||
| @@ -335,7 +335,8 @@ xfs_iget_cache_miss( | |||
| 335 | iflags = XFS_INEW; | 335 | iflags = XFS_INEW; |
| 336 | if (flags & XFS_IGET_DONTCACHE) | 336 | if (flags & XFS_IGET_DONTCACHE) |
| 337 | iflags |= XFS_IDONTCACHE; | 337 | iflags |= XFS_IDONTCACHE; |
| 338 | ip->i_udquot = ip->i_gdquot = NULL; | 338 | ip->i_udquot = NULL; |
| 339 | ip->i_gdquot = NULL; | ||
| 339 | xfs_iflags_set(ip, iflags); | 340 | xfs_iflags_set(ip, iflags); |
| 340 | 341 | ||
| 341 | /* insert the new inode */ | 342 | /* insert the new inode */ |
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h index e0f138c70a2f..a01afbb3909a 100644 --- a/fs/xfs/xfs_icache.h +++ b/fs/xfs/xfs_icache.h | |||
| @@ -40,7 +40,6 @@ void xfs_inode_clear_eofblocks_tag(struct xfs_inode *ip); | |||
| 40 | int xfs_icache_free_eofblocks(struct xfs_mount *, struct xfs_eofblocks *); | 40 | int xfs_icache_free_eofblocks(struct xfs_mount *, struct xfs_eofblocks *); |
| 41 | void xfs_eofblocks_worker(struct work_struct *); | 41 | void xfs_eofblocks_worker(struct work_struct *); |
| 42 | 42 | ||
| 43 | int xfs_sync_inode_grab(struct xfs_inode *ip); | ||
| 44 | int xfs_inode_ag_iterator(struct xfs_mount *mp, | 43 | int xfs_inode_ag_iterator(struct xfs_mount *mp, |
| 45 | int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, | 44 | int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, |
| 46 | int flags, void *args), | 45 | int flags, void *args), |
diff --git a/fs/xfs/xfs_icreate_item.c b/fs/xfs/xfs_icreate_item.c new file mode 100644 index 000000000000..7716a4e7375e --- /dev/null +++ b/fs/xfs/xfs_icreate_item.c | |||
| @@ -0,0 +1,195 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2008-2010, 2013 Dave Chinner | ||
| 3 | * All Rights Reserved. | ||
| 4 | * | ||
| 5 | * This program is free software; you can redistribute it and/or | ||
| 6 | * modify it under the terms of the GNU General Public License as | ||
| 7 | * published by the Free Software Foundation. | ||
| 8 | * | ||
| 9 | * This program is distributed in the hope that it would be useful, | ||
| 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 12 | * GNU General Public License for more details. | ||
| 13 | * | ||
| 14 | * You should have received a copy of the GNU General Public License | ||
| 15 | * along with this program; if not, write the Free Software Foundation, | ||
| 16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 17 | */ | ||
| 18 | #include "xfs.h" | ||
| 19 | #include "xfs_fs.h" | ||
| 20 | #include "xfs_types.h" | ||
| 21 | #include "xfs_bit.h" | ||
| 22 | #include "xfs_log.h" | ||
| 23 | #include "xfs_inum.h" | ||
| 24 | #include "xfs_trans.h" | ||
| 25 | #include "xfs_buf_item.h" | ||
| 26 | #include "xfs_sb.h" | ||
| 27 | #include "xfs_ag.h" | ||
| 28 | #include "xfs_dir2.h" | ||
| 29 | #include "xfs_mount.h" | ||
| 30 | #include "xfs_trans_priv.h" | ||
| 31 | #include "xfs_bmap_btree.h" | ||
| 32 | #include "xfs_alloc_btree.h" | ||
| 33 | #include "xfs_ialloc_btree.h" | ||
| 34 | #include "xfs_attr_sf.h" | ||
| 35 | #include "xfs_dinode.h" | ||
| 36 | #include "xfs_inode.h" | ||
| 37 | #include "xfs_inode_item.h" | ||
| 38 | #include "xfs_btree.h" | ||
| 39 | #include "xfs_ialloc.h" | ||
| 40 | #include "xfs_error.h" | ||
| 41 | #include "xfs_icreate_item.h" | ||
| 42 | |||
| 43 | kmem_zone_t *xfs_icreate_zone; /* inode create item zone */ | ||
| 44 | |||
| 45 | static inline struct xfs_icreate_item *ICR_ITEM(struct xfs_log_item *lip) | ||
| 46 | { | ||
| 47 | return container_of(lip, struct xfs_icreate_item, ic_item); | ||
| 48 | } | ||
| 49 | |||
| 50 | /* | ||
| 51 | * This returns the number of iovecs needed to log the given inode item. | ||
| 52 | * | ||
| 53 | * We only need one iovec for the icreate log structure. | ||
| 54 | */ | ||
| 55 | STATIC uint | ||
| 56 | xfs_icreate_item_size( | ||
| 57 | struct xfs_log_item *lip) | ||
| 58 | { | ||
| 59 | return 1; | ||
| 60 | } | ||
| 61 | |||
| 62 | /* | ||
| 63 | * This is called to fill in the vector of log iovecs for the | ||
| 64 | * given inode create log item. | ||
| 65 | */ | ||
| 66 | STATIC void | ||
| 67 | xfs_icreate_item_format( | ||
| 68 | struct xfs_log_item *lip, | ||
| 69 | struct xfs_log_iovec *log_vector) | ||
| 70 | { | ||
| 71 | struct xfs_icreate_item *icp = ICR_ITEM(lip); | ||
| 72 | |||
| 73 | log_vector->i_addr = (xfs_caddr_t)&icp->ic_format; | ||
| 74 | log_vector->i_len = sizeof(struct xfs_icreate_log); | ||
| 75 | log_vector->i_type = XLOG_REG_TYPE_ICREATE; | ||
| 76 | } | ||
| 77 | |||
| 78 | |||
| 79 | /* Pinning has no meaning for the create item, so just return. */ | ||
| 80 | STATIC void | ||
| 81 | xfs_icreate_item_pin( | ||
| 82 | struct xfs_log_item *lip) | ||
| 83 | { | ||
| 84 | } | ||
| 85 | |||
| 86 | |||
| 87 | /* pinning has no meaning for the create item, so just return. */ | ||
| 88 | STATIC void | ||
| 89 | xfs_icreate_item_unpin( | ||
| 90 | struct xfs_log_item *lip, | ||
| 91 | int remove) | ||
| 92 | { | ||
| 93 | } | ||
| 94 | |||
| 95 | STATIC void | ||
| 96 | xfs_icreate_item_unlock( | ||
| 97 | struct xfs_log_item *lip) | ||
| 98 | { | ||
| 99 | struct xfs_icreate_item *icp = ICR_ITEM(lip); | ||
| 100 | |||
| 101 | if (icp->ic_item.li_flags & XFS_LI_ABORTED) | ||
| 102 | kmem_zone_free(xfs_icreate_zone, icp); | ||
| 103 | return; | ||
| 104 | } | ||
| 105 | |||
| 106 | /* | ||
| 107 | * Because we have ordered buffers being tracked in the AIL for the inode | ||
| 108 | * creation, we don't need the create item after this. Hence we can free | ||
| 109 | * the log item and return -1 to tell the caller we're done with the item. | ||
| 110 | */ | ||
| 111 | STATIC xfs_lsn_t | ||
| 112 | xfs_icreate_item_committed( | ||
| 113 | struct xfs_log_item *lip, | ||
| 114 | xfs_lsn_t lsn) | ||
| 115 | { | ||
| 116 | struct xfs_icreate_item *icp = ICR_ITEM(lip); | ||
| 117 | |||
| 118 | kmem_zone_free(xfs_icreate_zone, icp); | ||
| 119 | return (xfs_lsn_t)-1; | ||
| 120 | } | ||
| 121 | |||
| 122 | /* item can never get into the AIL */ | ||
| 123 | STATIC uint | ||
| 124 | xfs_icreate_item_push( | ||
| 125 | struct xfs_log_item *lip, | ||
| 126 | struct list_head *buffer_list) | ||
| 127 | { | ||
| 128 | ASSERT(0); | ||
| 129 | return XFS_ITEM_SUCCESS; | ||
| 130 | } | ||
| 131 | |||
| 132 | /* Ordered buffers do the dependency tracking here, so this does nothing. */ | ||
| 133 | STATIC void | ||
| 134 | xfs_icreate_item_committing( | ||
| 135 | struct xfs_log_item *lip, | ||
| 136 | xfs_lsn_t lsn) | ||
| 137 | { | ||
| 138 | } | ||
| 139 | |||
| 140 | /* | ||
| 141 | * This is the ops vector shared by all buf log items. | ||
| 142 | */ | ||
| 143 | static struct xfs_item_ops xfs_icreate_item_ops = { | ||
| 144 | .iop_size = xfs_icreate_item_size, | ||
| 145 | .iop_format = xfs_icreate_item_format, | ||
| 146 | .iop_pin = xfs_icreate_item_pin, | ||
| 147 | .iop_unpin = xfs_icreate_item_unpin, | ||
| 148 | .iop_push = xfs_icreate_item_push, | ||
| 149 | .iop_unlock = xfs_icreate_item_unlock, | ||
| 150 | .iop_committed = xfs_icreate_item_committed, | ||
| 151 | .iop_committing = xfs_icreate_item_committing, | ||
| 152 | }; | ||
| 153 | |||
| 154 | |||
| 155 | /* | ||
| 156 | * Initialize the inode log item for a newly allocated (in-core) inode. | ||
| 157 | * | ||
| 158 | * Inode extents can only reside within an AG. Hence specify the starting | ||
| 159 | * block for the inode chunk by offset within an AG as well as the | ||
| 160 | * length of the allocated extent. | ||
| 161 | * | ||
| 162 | * This joins the item to the transaction and marks it dirty so | ||
| 163 | * that we don't need a separate call to do this, nor does the | ||
| 164 | * caller need to know anything about the icreate item. | ||
| 165 | */ | ||
| 166 | void | ||
| 167 | xfs_icreate_log( | ||
| 168 | struct xfs_trans *tp, | ||
| 169 | xfs_agnumber_t agno, | ||
| 170 | xfs_agblock_t agbno, | ||
| 171 | unsigned int count, | ||
| 172 | unsigned int inode_size, | ||
| 173 | xfs_agblock_t length, | ||
| 174 | unsigned int generation) | ||
| 175 | { | ||
| 176 | struct xfs_icreate_item *icp; | ||
| 177 | |||
| 178 | icp = kmem_zone_zalloc(xfs_icreate_zone, KM_SLEEP); | ||
| 179 | |||
| 180 | xfs_log_item_init(tp->t_mountp, &icp->ic_item, XFS_LI_ICREATE, | ||
| 181 | &xfs_icreate_item_ops); | ||
| 182 | |||
| 183 | icp->ic_format.icl_type = XFS_LI_ICREATE; | ||
| 184 | icp->ic_format.icl_size = 1; /* single vector */ | ||
| 185 | icp->ic_format.icl_ag = cpu_to_be32(agno); | ||
| 186 | icp->ic_format.icl_agbno = cpu_to_be32(agbno); | ||
| 187 | icp->ic_format.icl_count = cpu_to_be32(count); | ||
| 188 | icp->ic_format.icl_isize = cpu_to_be32(inode_size); | ||
| 189 | icp->ic_format.icl_length = cpu_to_be32(length); | ||
| 190 | icp->ic_format.icl_gen = cpu_to_be32(generation); | ||
| 191 | |||
| 192 | xfs_trans_add_item(tp, &icp->ic_item); | ||
| 193 | tp->t_flags |= XFS_TRANS_DIRTY; | ||
| 194 | icp->ic_item.li_desc->lid_flags |= XFS_LID_DIRTY; | ||
| 195 | } | ||
diff --git a/fs/xfs/xfs_icreate_item.h b/fs/xfs/xfs_icreate_item.h new file mode 100644 index 000000000000..88ba8aa0bc41 --- /dev/null +++ b/fs/xfs/xfs_icreate_item.h | |||
| @@ -0,0 +1,52 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2008-2010, Dave Chinner | ||
| 3 | * All Rights Reserved. | ||
| 4 | * | ||
| 5 | * This program is free software; you can redistribute it and/or | ||
| 6 | * modify it under the terms of the GNU General Public License as | ||
| 7 | * published by the Free Software Foundation. | ||
| 8 | * | ||
| 9 | * This program is distributed in the hope that it would be useful, | ||
| 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 12 | * GNU General Public License for more details. | ||
| 13 | * | ||
| 14 | * You should have received a copy of the GNU General Public License | ||
| 15 | * along with this program; if not, write the Free Software Foundation, | ||
| 16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 17 | */ | ||
| 18 | #ifndef XFS_ICREATE_ITEM_H | ||
| 19 | #define XFS_ICREATE_ITEM_H 1 | ||
| 20 | |||
| 21 | /* | ||
| 22 | * on disk log item structure | ||
| 23 | * | ||
| 24 | * Log recovery assumes the first two entries are the type and size and they fit | ||
| 25 | * in 32 bits. Also in host order (ugh) so they have to be 32 bit aligned so | ||
| 26 | * decoding can be done correctly. | ||
| 27 | */ | ||
| 28 | struct xfs_icreate_log { | ||
| 29 | __uint16_t icl_type; /* type of log format structure */ | ||
| 30 | __uint16_t icl_size; /* size of log format structure */ | ||
| 31 | __be32 icl_ag; /* ag being allocated in */ | ||
| 32 | __be32 icl_agbno; /* start block of inode range */ | ||
| 33 | __be32 icl_count; /* number of inodes to initialise */ | ||
| 34 | __be32 icl_isize; /* size of inodes */ | ||
| 35 | __be32 icl_length; /* length of extent to initialise */ | ||
| 36 | __be32 icl_gen; /* inode generation number to use */ | ||
| 37 | }; | ||
| 38 | |||
| 39 | /* in memory log item structure */ | ||
| 40 | struct xfs_icreate_item { | ||
| 41 | struct xfs_log_item ic_item; | ||
| 42 | struct xfs_icreate_log ic_format; | ||
| 43 | }; | ||
| 44 | |||
| 45 | extern kmem_zone_t *xfs_icreate_zone; /* inode create item zone */ | ||
| 46 | |||
| 47 | void xfs_icreate_log(struct xfs_trans *tp, xfs_agnumber_t agno, | ||
| 48 | xfs_agblock_t agbno, unsigned int count, | ||
| 49 | unsigned int inode_size, xfs_agblock_t length, | ||
| 50 | unsigned int generation); | ||
| 51 | |||
| 52 | #endif /* XFS_ICREATE_ITEM_H */ | ||
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 7f7be5f98f52..9ecfe1e559fc 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
| @@ -1028,6 +1028,11 @@ xfs_dinode_calc_crc( | |||
| 1028 | 1028 | ||
| 1029 | /* | 1029 | /* |
| 1030 | * Read the disk inode attributes into the in-core inode structure. | 1030 | * Read the disk inode attributes into the in-core inode structure. |
| 1031 | * | ||
| 1032 | * If we are initialising a new inode and we are not utilising the | ||
| 1033 | * XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new inode core | ||
| 1034 | * with a random generation number. If we are keeping inodes around, we need to | ||
| 1035 | * read the inode cluster to get the existing generation number off disk. | ||
| 1031 | */ | 1036 | */ |
| 1032 | int | 1037 | int |
| 1033 | xfs_iread( | 1038 | xfs_iread( |
| @@ -1047,6 +1052,22 @@ xfs_iread( | |||
| 1047 | if (error) | 1052 | if (error) |
| 1048 | return error; | 1053 | return error; |
| 1049 | 1054 | ||
| 1055 | /* shortcut IO on inode allocation if possible */ | ||
| 1056 | if ((iget_flags & XFS_IGET_CREATE) && | ||
| 1057 | !(mp->m_flags & XFS_MOUNT_IKEEP)) { | ||
| 1058 | /* initialise the on-disk inode core */ | ||
| 1059 | memset(&ip->i_d, 0, sizeof(ip->i_d)); | ||
| 1060 | ip->i_d.di_magic = XFS_DINODE_MAGIC; | ||
| 1061 | ip->i_d.di_gen = prandom_u32(); | ||
| 1062 | if (xfs_sb_version_hascrc(&mp->m_sb)) { | ||
| 1063 | ip->i_d.di_version = 3; | ||
| 1064 | ip->i_d.di_ino = ip->i_ino; | ||
| 1065 | uuid_copy(&ip->i_d.di_uuid, &mp->m_sb.sb_uuid); | ||
| 1066 | } else | ||
| 1067 | ip->i_d.di_version = 2; | ||
| 1068 | return 0; | ||
| 1069 | } | ||
| 1070 | |||
| 1050 | /* | 1071 | /* |
| 1051 | * Get pointers to the on-disk inode and the buffer containing it. | 1072 | * Get pointers to the on-disk inode and the buffer containing it. |
| 1052 | */ | 1073 | */ |
| @@ -1133,17 +1154,16 @@ xfs_iread( | |||
| 1133 | xfs_buf_set_ref(bp, XFS_INO_REF); | 1154 | xfs_buf_set_ref(bp, XFS_INO_REF); |
| 1134 | 1155 | ||
| 1135 | /* | 1156 | /* |
| 1136 | * Use xfs_trans_brelse() to release the buffer containing the | 1157 | * Use xfs_trans_brelse() to release the buffer containing the on-disk |
| 1137 | * on-disk inode, because it was acquired with xfs_trans_read_buf() | 1158 | * inode, because it was acquired with xfs_trans_read_buf() in |
| 1138 | * in xfs_imap_to_bp() above. If tp is NULL, this is just a normal | 1159 | * xfs_imap_to_bp() above. If tp is NULL, this is just a normal |
| 1139 | * brelse(). If we're within a transaction, then xfs_trans_brelse() | 1160 | * brelse(). If we're within a transaction, then xfs_trans_brelse() |
| 1140 | * will only release the buffer if it is not dirty within the | 1161 | * will only release the buffer if it is not dirty within the |
| 1141 | * transaction. It will be OK to release the buffer in this case, | 1162 | * transaction. It will be OK to release the buffer in this case, |
| 1142 | * because inodes on disk are never destroyed and we will be | 1163 | * because inodes on disk are never destroyed and we will be locking the |
| 1143 | * locking the new in-core inode before putting it in the hash | 1164 | * new in-core inode before putting it in the cache where other |
| 1144 | * table where other processes can find it. Thus we don't have | 1165 | * processes can find it. Thus we don't have to worry about the inode |
| 1145 | * to worry about the inode being changed just because we released | 1166 | * being changed just because we released the buffer. |
| 1146 | * the buffer. | ||
| 1147 | */ | 1167 | */ |
| 1148 | out_brelse: | 1168 | out_brelse: |
| 1149 | xfs_trans_brelse(tp, bp); | 1169 | xfs_trans_brelse(tp, bp); |
| @@ -2028,8 +2048,6 @@ xfs_ifree( | |||
| 2028 | int error; | 2048 | int error; |
| 2029 | int delete; | 2049 | int delete; |
| 2030 | xfs_ino_t first_ino; | 2050 | xfs_ino_t first_ino; |
| 2031 | xfs_dinode_t *dip; | ||
| 2032 | xfs_buf_t *ibp; | ||
| 2033 | 2051 | ||
| 2034 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | 2052 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); |
| 2035 | ASSERT(ip->i_d.di_nlink == 0); | 2053 | ASSERT(ip->i_d.di_nlink == 0); |
| @@ -2042,14 +2060,13 @@ xfs_ifree( | |||
| 2042 | * Pull the on-disk inode from the AGI unlinked list. | 2060 | * Pull the on-disk inode from the AGI unlinked list. |
| 2043 | */ | 2061 | */ |
| 2044 | error = xfs_iunlink_remove(tp, ip); | 2062 | error = xfs_iunlink_remove(tp, ip); |
| 2045 | if (error != 0) { | 2063 | if (error) |
| 2046 | return error; | 2064 | return error; |
| 2047 | } | ||
| 2048 | 2065 | ||
| 2049 | error = xfs_difree(tp, ip->i_ino, flist, &delete, &first_ino); | 2066 | error = xfs_difree(tp, ip->i_ino, flist, &delete, &first_ino); |
| 2050 | if (error != 0) { | 2067 | if (error) |
| 2051 | return error; | 2068 | return error; |
| 2052 | } | 2069 | |
| 2053 | ip->i_d.di_mode = 0; /* mark incore inode as free */ | 2070 | ip->i_d.di_mode = 0; /* mark incore inode as free */ |
| 2054 | ip->i_d.di_flags = 0; | 2071 | ip->i_d.di_flags = 0; |
| 2055 | ip->i_d.di_dmevmask = 0; | 2072 | ip->i_d.di_dmevmask = 0; |
| @@ -2061,31 +2078,10 @@ xfs_ifree( | |||
| 2061 | * by reincarnations of this inode. | 2078 | * by reincarnations of this inode. |
| 2062 | */ | 2079 | */ |
| 2063 | ip->i_d.di_gen++; | 2080 | ip->i_d.di_gen++; |
| 2064 | |||
| 2065 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | 2081 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); |
| 2066 | 2082 | ||
| 2067 | error = xfs_imap_to_bp(ip->i_mount, tp, &ip->i_imap, &dip, &ibp, | 2083 | if (delete) |
| 2068 | 0, 0); | ||
| 2069 | if (error) | ||
| 2070 | return error; | ||
| 2071 | |||
| 2072 | /* | ||
| 2073 | * Clear the on-disk di_mode. This is to prevent xfs_bulkstat | ||
| 2074 | * from picking up this inode when it is reclaimed (its incore state | ||
| 2075 | * initialzed but not flushed to disk yet). The in-core di_mode is | ||
| 2076 | * already cleared and a corresponding transaction logged. | ||
| 2077 | * The hack here just synchronizes the in-core to on-disk | ||
| 2078 | * di_mode value in advance before the actual inode sync to disk. | ||
| 2079 | * This is OK because the inode is already unlinked and would never | ||
| 2080 | * change its di_mode again for this inode generation. | ||
| 2081 | * This is a temporary hack that would require a proper fix | ||
| 2082 | * in the future. | ||
| 2083 | */ | ||
| 2084 | dip->di_mode = 0; | ||
| 2085 | |||
| 2086 | if (delete) { | ||
| 2087 | error = xfs_ifree_cluster(ip, tp, first_ino); | 2084 | error = xfs_ifree_cluster(ip, tp, first_ino); |
| 2088 | } | ||
| 2089 | 2085 | ||
| 2090 | return error; | 2086 | return error; |
| 2091 | } | 2087 | } |
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 8f8aaee7f379..6a7096422295 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c | |||
| @@ -284,6 +284,15 @@ xfs_iomap_eof_want_preallocate( | |||
| 284 | return 0; | 284 | return 0; |
| 285 | 285 | ||
| 286 | /* | 286 | /* |
| 287 | * If the file is smaller than the minimum prealloc and we are using | ||
| 288 | * dynamic preallocation, don't do any preallocation at all as it is | ||
| 289 | * likely this is the only write to the file that is going to be done. | ||
| 290 | */ | ||
| 291 | if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) && | ||
| 292 | XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_writeio_blocks)) | ||
| 293 | return 0; | ||
| 294 | |||
| 295 | /* | ||
| 287 | * If there are any real blocks past eof, then don't | 296 | * If there are any real blocks past eof, then don't |
| 288 | * do any speculative allocation. | 297 | * do any speculative allocation. |
| 289 | */ | 298 | */ |
| @@ -345,6 +354,10 @@ xfs_iomap_eof_prealloc_initial_size( | |||
| 345 | if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) | 354 | if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) |
| 346 | return 0; | 355 | return 0; |
| 347 | 356 | ||
| 357 | /* If the file is small, then use the minimum prealloc */ | ||
| 358 | if (XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_dalign)) | ||
| 359 | return 0; | ||
| 360 | |||
| 348 | /* | 361 | /* |
| 349 | * As we write multiple pages, the offset will always align to the | 362 | * As we write multiple pages, the offset will always align to the |
| 350 | * start of a page and hence point to a hole at EOF. i.e. if the size is | 363 | * start of a page and hence point to a hole at EOF. i.e. if the size is |
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index ca9ecaa81112..c69bbc493cb0 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c | |||
| @@ -987,7 +987,8 @@ xfs_fiemap_format( | |||
| 987 | if (bmv->bmv_oflags & BMV_OF_PREALLOC) | 987 | if (bmv->bmv_oflags & BMV_OF_PREALLOC) |
| 988 | fiemap_flags |= FIEMAP_EXTENT_UNWRITTEN; | 988 | fiemap_flags |= FIEMAP_EXTENT_UNWRITTEN; |
| 989 | else if (bmv->bmv_oflags & BMV_OF_DELALLOC) { | 989 | else if (bmv->bmv_oflags & BMV_OF_DELALLOC) { |
| 990 | fiemap_flags |= FIEMAP_EXTENT_DELALLOC; | 990 | fiemap_flags |= (FIEMAP_EXTENT_DELALLOC | |
| 991 | FIEMAP_EXTENT_UNKNOWN); | ||
| 991 | physical = 0; /* no block yet */ | 992 | physical = 0; /* no block yet */ |
| 992 | } | 993 | } |
| 993 | if (bmv->bmv_oflags & BMV_OF_LAST) | 994 | if (bmv->bmv_oflags & BMV_OF_LAST) |
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 2ea7d402188d..bc92c5306a17 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c | |||
| @@ -43,7 +43,7 @@ xfs_internal_inum( | |||
| 43 | { | 43 | { |
| 44 | return (ino == mp->m_sb.sb_rbmino || ino == mp->m_sb.sb_rsumino || | 44 | return (ino == mp->m_sb.sb_rbmino || ino == mp->m_sb.sb_rsumino || |
| 45 | (xfs_sb_version_hasquota(&mp->m_sb) && | 45 | (xfs_sb_version_hasquota(&mp->m_sb) && |
| 46 | (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino))); | 46 | xfs_is_quota_inode(&mp->m_sb, ino))); |
| 47 | } | 47 | } |
| 48 | 48 | ||
| 49 | /* | 49 | /* |
| @@ -383,11 +383,13 @@ xfs_bulkstat( | |||
| 383 | * Also start read-ahead now for this chunk. | 383 | * Also start read-ahead now for this chunk. |
| 384 | */ | 384 | */ |
| 385 | if (r.ir_freecount < XFS_INODES_PER_CHUNK) { | 385 | if (r.ir_freecount < XFS_INODES_PER_CHUNK) { |
| 386 | struct blk_plug plug; | ||
| 386 | /* | 387 | /* |
| 387 | * Loop over all clusters in the next chunk. | 388 | * Loop over all clusters in the next chunk. |
| 388 | * Do a readahead if there are any allocated | 389 | * Do a readahead if there are any allocated |
| 389 | * inodes in that cluster. | 390 | * inodes in that cluster. |
| 390 | */ | 391 | */ |
| 392 | blk_start_plug(&plug); | ||
| 391 | agbno = XFS_AGINO_TO_AGBNO(mp, r.ir_startino); | 393 | agbno = XFS_AGINO_TO_AGBNO(mp, r.ir_startino); |
| 392 | for (chunkidx = 0; | 394 | for (chunkidx = 0; |
| 393 | chunkidx < XFS_INODES_PER_CHUNK; | 395 | chunkidx < XFS_INODES_PER_CHUNK; |
| @@ -399,6 +401,7 @@ xfs_bulkstat( | |||
| 399 | agbno, nbcluster, | 401 | agbno, nbcluster, |
| 400 | &xfs_inode_buf_ops); | 402 | &xfs_inode_buf_ops); |
| 401 | } | 403 | } |
| 404 | blk_finish_plug(&plug); | ||
| 402 | irbp->ir_startino = r.ir_startino; | 405 | irbp->ir_startino = r.ir_startino; |
| 403 | irbp->ir_freecount = r.ir_freecount; | 406 | irbp->ir_freecount = r.ir_freecount; |
| 404 | irbp->ir_free = r.ir_free; | 407 | irbp->ir_free = r.ir_free; |
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index b345a7c85153..d852a2b3e1fd 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
| @@ -1963,6 +1963,10 @@ xlog_write_calc_vec_length( | |||
| 1963 | headers++; | 1963 | headers++; |
| 1964 | 1964 | ||
| 1965 | for (lv = log_vector; lv; lv = lv->lv_next) { | 1965 | for (lv = log_vector; lv; lv = lv->lv_next) { |
| 1966 | /* we don't write ordered log vectors */ | ||
| 1967 | if (lv->lv_buf_len == XFS_LOG_VEC_ORDERED) | ||
| 1968 | continue; | ||
| 1969 | |||
| 1966 | headers += lv->lv_niovecs; | 1970 | headers += lv->lv_niovecs; |
| 1967 | 1971 | ||
| 1968 | for (i = 0; i < lv->lv_niovecs; i++) { | 1972 | for (i = 0; i < lv->lv_niovecs; i++) { |
| @@ -2216,7 +2220,7 @@ xlog_write( | |||
| 2216 | index = 0; | 2220 | index = 0; |
| 2217 | lv = log_vector; | 2221 | lv = log_vector; |
| 2218 | vecp = lv->lv_iovecp; | 2222 | vecp = lv->lv_iovecp; |
| 2219 | while (lv && index < lv->lv_niovecs) { | 2223 | while (lv && (!lv->lv_niovecs || index < lv->lv_niovecs)) { |
| 2220 | void *ptr; | 2224 | void *ptr; |
| 2221 | int log_offset; | 2225 | int log_offset; |
| 2222 | 2226 | ||
| @@ -2236,13 +2240,22 @@ xlog_write( | |||
| 2236 | * This loop writes out as many regions as can fit in the amount | 2240 | * This loop writes out as many regions as can fit in the amount |
| 2237 | * of space which was allocated by xlog_state_get_iclog_space(). | 2241 | * of space which was allocated by xlog_state_get_iclog_space(). |
| 2238 | */ | 2242 | */ |
| 2239 | while (lv && index < lv->lv_niovecs) { | 2243 | while (lv && (!lv->lv_niovecs || index < lv->lv_niovecs)) { |
| 2240 | struct xfs_log_iovec *reg = &vecp[index]; | 2244 | struct xfs_log_iovec *reg; |
| 2241 | struct xlog_op_header *ophdr; | 2245 | struct xlog_op_header *ophdr; |
| 2242 | int start_rec_copy; | 2246 | int start_rec_copy; |
| 2243 | int copy_len; | 2247 | int copy_len; |
| 2244 | int copy_off; | 2248 | int copy_off; |
| 2249 | bool ordered = false; | ||
| 2250 | |||
| 2251 | /* ordered log vectors have no regions to write */ | ||
| 2252 | if (lv->lv_buf_len == XFS_LOG_VEC_ORDERED) { | ||
| 2253 | ASSERT(lv->lv_niovecs == 0); | ||
| 2254 | ordered = true; | ||
| 2255 | goto next_lv; | ||
| 2256 | } | ||
| 2245 | 2257 | ||
| 2258 | reg = &vecp[index]; | ||
| 2246 | ASSERT(reg->i_len % sizeof(__int32_t) == 0); | 2259 | ASSERT(reg->i_len % sizeof(__int32_t) == 0); |
| 2247 | ASSERT((unsigned long)ptr % sizeof(__int32_t) == 0); | 2260 | ASSERT((unsigned long)ptr % sizeof(__int32_t) == 0); |
| 2248 | 2261 | ||
| @@ -2302,12 +2315,13 @@ xlog_write( | |||
| 2302 | break; | 2315 | break; |
| 2303 | 2316 | ||
| 2304 | if (++index == lv->lv_niovecs) { | 2317 | if (++index == lv->lv_niovecs) { |
| 2318 | next_lv: | ||
| 2305 | lv = lv->lv_next; | 2319 | lv = lv->lv_next; |
| 2306 | index = 0; | 2320 | index = 0; |
| 2307 | if (lv) | 2321 | if (lv) |
| 2308 | vecp = lv->lv_iovecp; | 2322 | vecp = lv->lv_iovecp; |
| 2309 | } | 2323 | } |
| 2310 | if (record_cnt == 0) { | 2324 | if (record_cnt == 0 && ordered == false) { |
| 2311 | if (!lv) | 2325 | if (!lv) |
| 2312 | return 0; | 2326 | return 0; |
| 2313 | break; | 2327 | break; |
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 5caee96059df..fb630e496c12 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h | |||
| @@ -88,7 +88,8 @@ static inline xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2) | |||
| 88 | #define XLOG_REG_TYPE_UNMOUNT 17 | 88 | #define XLOG_REG_TYPE_UNMOUNT 17 |
| 89 | #define XLOG_REG_TYPE_COMMIT 18 | 89 | #define XLOG_REG_TYPE_COMMIT 18 |
| 90 | #define XLOG_REG_TYPE_TRANSHDR 19 | 90 | #define XLOG_REG_TYPE_TRANSHDR 19 |
| 91 | #define XLOG_REG_TYPE_MAX 19 | 91 | #define XLOG_REG_TYPE_ICREATE 20 |
| 92 | #define XLOG_REG_TYPE_MAX 20 | ||
| 92 | 93 | ||
| 93 | typedef struct xfs_log_iovec { | 94 | typedef struct xfs_log_iovec { |
| 94 | void *i_addr; /* beginning address of region */ | 95 | void *i_addr; /* beginning address of region */ |
| @@ -105,6 +106,8 @@ struct xfs_log_vec { | |||
| 105 | int lv_buf_len; /* size of formatted buffer */ | 106 | int lv_buf_len; /* size of formatted buffer */ |
| 106 | }; | 107 | }; |
| 107 | 108 | ||
| 109 | #define XFS_LOG_VEC_ORDERED (-1) | ||
| 110 | |||
| 108 | /* | 111 | /* |
| 109 | * Structure used to pass callback function and the function's argument | 112 | * Structure used to pass callback function and the function's argument |
| 110 | * to the log manager. | 113 | * to the log manager. |
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index d0833b54e55d..02b9cf3f8252 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c | |||
| @@ -127,6 +127,7 @@ xlog_cil_prepare_log_vecs( | |||
| 127 | int index; | 127 | int index; |
| 128 | int len = 0; | 128 | int len = 0; |
| 129 | uint niovecs; | 129 | uint niovecs; |
| 130 | bool ordered = false; | ||
| 130 | 131 | ||
| 131 | /* Skip items which aren't dirty in this transaction. */ | 132 | /* Skip items which aren't dirty in this transaction. */ |
| 132 | if (!(lidp->lid_flags & XFS_LID_DIRTY)) | 133 | if (!(lidp->lid_flags & XFS_LID_DIRTY)) |
| @@ -137,14 +138,30 @@ xlog_cil_prepare_log_vecs( | |||
| 137 | if (!niovecs) | 138 | if (!niovecs) |
| 138 | continue; | 139 | continue; |
| 139 | 140 | ||
| 141 | /* | ||
| 142 | * Ordered items need to be tracked but we do not wish to write | ||
| 143 | * them. We need a logvec to track the object, but we do not | ||
| 144 | * need an iovec or buffer to be allocated for copying data. | ||
| 145 | */ | ||
| 146 | if (niovecs == XFS_LOG_VEC_ORDERED) { | ||
| 147 | ordered = true; | ||
| 148 | niovecs = 0; | ||
| 149 | } | ||
| 150 | |||
| 140 | new_lv = kmem_zalloc(sizeof(*new_lv) + | 151 | new_lv = kmem_zalloc(sizeof(*new_lv) + |
| 141 | niovecs * sizeof(struct xfs_log_iovec), | 152 | niovecs * sizeof(struct xfs_log_iovec), |
| 142 | KM_SLEEP|KM_NOFS); | 153 | KM_SLEEP|KM_NOFS); |
| 143 | 154 | ||
| 155 | new_lv->lv_item = lidp->lid_item; | ||
| 156 | new_lv->lv_niovecs = niovecs; | ||
| 157 | if (ordered) { | ||
| 158 | /* track as an ordered logvec */ | ||
| 159 | new_lv->lv_buf_len = XFS_LOG_VEC_ORDERED; | ||
| 160 | goto next; | ||
| 161 | } | ||
| 162 | |||
| 144 | /* The allocated iovec region lies beyond the log vector. */ | 163 | /* The allocated iovec region lies beyond the log vector. */ |
| 145 | new_lv->lv_iovecp = (struct xfs_log_iovec *)&new_lv[1]; | 164 | new_lv->lv_iovecp = (struct xfs_log_iovec *)&new_lv[1]; |
| 146 | new_lv->lv_niovecs = niovecs; | ||
| 147 | new_lv->lv_item = lidp->lid_item; | ||
| 148 | 165 | ||
| 149 | /* build the vector array and calculate it's length */ | 166 | /* build the vector array and calculate it's length */ |
| 150 | IOP_FORMAT(new_lv->lv_item, new_lv->lv_iovecp); | 167 | IOP_FORMAT(new_lv->lv_item, new_lv->lv_iovecp); |
| @@ -165,6 +182,7 @@ xlog_cil_prepare_log_vecs( | |||
| 165 | } | 182 | } |
| 166 | ASSERT(ptr == new_lv->lv_buf + new_lv->lv_buf_len); | 183 | ASSERT(ptr == new_lv->lv_buf + new_lv->lv_buf_len); |
| 167 | 184 | ||
| 185 | next: | ||
| 168 | if (!ret_lv) | 186 | if (!ret_lv) |
| 169 | ret_lv = new_lv; | 187 | ret_lv = new_lv; |
| 170 | else | 188 | else |
| @@ -191,8 +209,18 @@ xfs_cil_prepare_item( | |||
| 191 | 209 | ||
| 192 | if (old) { | 210 | if (old) { |
| 193 | /* existing lv on log item, space used is a delta */ | 211 | /* existing lv on log item, space used is a delta */ |
| 194 | ASSERT(!list_empty(&lv->lv_item->li_cil)); | 212 | ASSERT((old->lv_buf && old->lv_buf_len && old->lv_niovecs) || |
| 195 | ASSERT(old->lv_buf && old->lv_buf_len && old->lv_niovecs); | 213 | old->lv_buf_len == XFS_LOG_VEC_ORDERED); |
| 214 | |||
| 215 | /* | ||
| 216 | * If the new item is ordered, keep the old one that is already | ||
| 217 | * tracking dirty or ordered regions | ||
| 218 | */ | ||
| 219 | if (lv->lv_buf_len == XFS_LOG_VEC_ORDERED) { | ||
| 220 | ASSERT(!lv->lv_buf); | ||
| 221 | kmem_free(lv); | ||
| 222 | return; | ||
| 223 | } | ||
| 196 | 224 | ||
| 197 | *len += lv->lv_buf_len - old->lv_buf_len; | 225 | *len += lv->lv_buf_len - old->lv_buf_len; |
| 198 | *diff_iovecs += lv->lv_niovecs - old->lv_niovecs; | 226 | *diff_iovecs += lv->lv_niovecs - old->lv_niovecs; |
| @@ -201,10 +229,11 @@ xfs_cil_prepare_item( | |||
| 201 | } else { | 229 | } else { |
| 202 | /* new lv, must pin the log item */ | 230 | /* new lv, must pin the log item */ |
| 203 | ASSERT(!lv->lv_item->li_lv); | 231 | ASSERT(!lv->lv_item->li_lv); |
| 204 | ASSERT(list_empty(&lv->lv_item->li_cil)); | ||
| 205 | 232 | ||
| 206 | *len += lv->lv_buf_len; | 233 | if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED) { |
| 207 | *diff_iovecs += lv->lv_niovecs; | 234 | *len += lv->lv_buf_len; |
| 235 | *diff_iovecs += lv->lv_niovecs; | ||
| 236 | } | ||
| 208 | IOP_PIN(lv->lv_item); | 237 | IOP_PIN(lv->lv_item); |
| 209 | 238 | ||
| 210 | } | 239 | } |
| @@ -259,18 +288,24 @@ xlog_cil_insert_items( | |||
| 259 | * We can do this safely because the context can't checkpoint until we | 288 | * We can do this safely because the context can't checkpoint until we |
| 260 | * are done so it doesn't matter exactly how we update the CIL. | 289 | * are done so it doesn't matter exactly how we update the CIL. |
| 261 | */ | 290 | */ |
| 262 | for (lv = log_vector; lv; lv = lv->lv_next) | ||
| 263 | xfs_cil_prepare_item(log, lv, &len, &diff_iovecs); | ||
| 264 | |||
| 265 | /* account for space used by new iovec headers */ | ||
| 266 | len += diff_iovecs * sizeof(xlog_op_header_t); | ||
| 267 | |||
| 268 | spin_lock(&cil->xc_cil_lock); | 291 | spin_lock(&cil->xc_cil_lock); |
| 292 | for (lv = log_vector; lv; ) { | ||
| 293 | struct xfs_log_vec *next = lv->lv_next; | ||
| 269 | 294 | ||
| 270 | /* move the items to the tail of the CIL */ | 295 | ASSERT(lv->lv_item->li_lv || list_empty(&lv->lv_item->li_cil)); |
| 271 | for (lv = log_vector; lv; lv = lv->lv_next) | 296 | lv->lv_next = NULL; |
| 297 | |||
| 298 | /* | ||
| 299 | * xfs_cil_prepare_item() may free the lv, so move the item on | ||
| 300 | * the CIL first. | ||
| 301 | */ | ||
| 272 | list_move_tail(&lv->lv_item->li_cil, &cil->xc_cil); | 302 | list_move_tail(&lv->lv_item->li_cil, &cil->xc_cil); |
| 303 | xfs_cil_prepare_item(log, lv, &len, &diff_iovecs); | ||
| 304 | lv = next; | ||
| 305 | } | ||
| 273 | 306 | ||
| 307 | /* account for space used by new iovec headers */ | ||
| 308 | len += diff_iovecs * sizeof(xlog_op_header_t); | ||
| 274 | ctx->nvecs += diff_iovecs; | 309 | ctx->nvecs += diff_iovecs; |
| 275 | 310 | ||
| 276 | /* | 311 | /* |
| @@ -381,9 +416,7 @@ xlog_cil_push( | |||
| 381 | struct xfs_cil_ctx *new_ctx; | 416 | struct xfs_cil_ctx *new_ctx; |
| 382 | struct xlog_in_core *commit_iclog; | 417 | struct xlog_in_core *commit_iclog; |
| 383 | struct xlog_ticket *tic; | 418 | struct xlog_ticket *tic; |
| 384 | int num_lv; | ||
| 385 | int num_iovecs; | 419 | int num_iovecs; |
| 386 | int len; | ||
| 387 | int error = 0; | 420 | int error = 0; |
| 388 | struct xfs_trans_header thdr; | 421 | struct xfs_trans_header thdr; |
| 389 | struct xfs_log_iovec lhdr; | 422 | struct xfs_log_iovec lhdr; |
| @@ -428,12 +461,9 @@ xlog_cil_push( | |||
| 428 | * side which is currently locked out by the flush lock. | 461 | * side which is currently locked out by the flush lock. |
| 429 | */ | 462 | */ |
| 430 | lv = NULL; | 463 | lv = NULL; |
| 431 | num_lv = 0; | ||
| 432 | num_iovecs = 0; | 464 | num_iovecs = 0; |
| 433 | len = 0; | ||
| 434 | while (!list_empty(&cil->xc_cil)) { | 465 | while (!list_empty(&cil->xc_cil)) { |
| 435 | struct xfs_log_item *item; | 466 | struct xfs_log_item *item; |
| 436 | int i; | ||
| 437 | 467 | ||
| 438 | item = list_first_entry(&cil->xc_cil, | 468 | item = list_first_entry(&cil->xc_cil, |
| 439 | struct xfs_log_item, li_cil); | 469 | struct xfs_log_item, li_cil); |
| @@ -444,11 +474,7 @@ xlog_cil_push( | |||
| 444 | lv->lv_next = item->li_lv; | 474 | lv->lv_next = item->li_lv; |
| 445 | lv = item->li_lv; | 475 | lv = item->li_lv; |
| 446 | item->li_lv = NULL; | 476 | item->li_lv = NULL; |
| 447 | |||
| 448 | num_lv++; | ||
| 449 | num_iovecs += lv->lv_niovecs; | 477 | num_iovecs += lv->lv_niovecs; |
| 450 | for (i = 0; i < lv->lv_niovecs; i++) | ||
| 451 | len += lv->lv_iovecp[i].i_len; | ||
| 452 | } | 478 | } |
| 453 | 479 | ||
| 454 | /* | 480 | /* |
| @@ -701,6 +727,7 @@ xfs_log_commit_cil( | |||
| 701 | if (commit_lsn) | 727 | if (commit_lsn) |
| 702 | *commit_lsn = log->l_cilp->xc_ctx->sequence; | 728 | *commit_lsn = log->l_cilp->xc_ctx->sequence; |
| 703 | 729 | ||
| 730 | /* xlog_cil_insert_items() destroys log_vector list */ | ||
| 704 | xlog_cil_insert_items(log, log_vector, tp->t_ticket); | 731 | xlog_cil_insert_items(log, log_vector, tp->t_ticket); |
| 705 | 732 | ||
| 706 | /* check we didn't blow the reservation */ | 733 | /* check we didn't blow the reservation */ |
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 7cf5e4eafe28..6fcc910a50b9 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c | |||
| @@ -45,6 +45,7 @@ | |||
| 45 | #include "xfs_cksum.h" | 45 | #include "xfs_cksum.h" |
| 46 | #include "xfs_trace.h" | 46 | #include "xfs_trace.h" |
| 47 | #include "xfs_icache.h" | 47 | #include "xfs_icache.h" |
| 48 | #include "xfs_icreate_item.h" | ||
| 48 | 49 | ||
| 49 | /* Need all the magic numbers and buffer ops structures from these headers */ | 50 | /* Need all the magic numbers and buffer ops structures from these headers */ |
| 50 | #include "xfs_symlink.h" | 51 | #include "xfs_symlink.h" |
| @@ -1617,7 +1618,10 @@ xlog_recover_add_to_trans( | |||
| 1617 | * form the cancelled buffer table. Hence they have tobe done last. | 1618 | * form the cancelled buffer table. Hence they have tobe done last. |
| 1618 | * | 1619 | * |
| 1619 | * 3. Inode allocation buffers must be replayed before inode items that | 1620 | * 3. Inode allocation buffers must be replayed before inode items that |
| 1620 | * read the buffer and replay changes into it. | 1621 | * read the buffer and replay changes into it. For filesystems using the |
| 1622 | * ICREATE transactions, this means XFS_LI_ICREATE objects need to get | ||
| 1623 | * treated the same as inode allocation buffers as they create and | ||
| 1624 | * initialise the buffers directly. | ||
| 1621 | * | 1625 | * |
| 1622 | * 4. Inode unlink buffers must be replayed after inode items are replayed. | 1626 | * 4. Inode unlink buffers must be replayed after inode items are replayed. |
| 1623 | * This ensures that inodes are completely flushed to the inode buffer | 1627 | * This ensures that inodes are completely flushed to the inode buffer |
| @@ -1632,10 +1636,17 @@ xlog_recover_add_to_trans( | |||
| 1632 | * from all the other buffers and move them to last. | 1636 | * from all the other buffers and move them to last. |
| 1633 | * | 1637 | * |
| 1634 | * Hence, 4 lists, in order from head to tail: | 1638 | * Hence, 4 lists, in order from head to tail: |
| 1635 | * - buffer_list for all buffers except cancelled/inode unlink buffers | 1639 | * - buffer_list for all buffers except cancelled/inode unlink buffers |
| 1636 | * - item_list for all non-buffer items | 1640 | * - item_list for all non-buffer items |
| 1637 | * - inode_buffer_list for inode unlink buffers | 1641 | * - inode_buffer_list for inode unlink buffers |
| 1638 | * - cancel_list for the cancelled buffers | 1642 | * - cancel_list for the cancelled buffers |
| 1643 | * | ||
| 1644 | * Note that we add objects to the tail of the lists so that first-to-last | ||
| 1645 | * ordering is preserved within the lists. Adding objects to the head of the | ||
| 1646 | * list means when we traverse from the head we walk them in last-to-first | ||
| 1647 | * order. For cancelled buffers and inode unlink buffers this doesn't matter, | ||
| 1648 | * but for all other items there may be specific ordering that we need to | ||
| 1649 | * preserve. | ||
| 1639 | */ | 1650 | */ |
| 1640 | STATIC int | 1651 | STATIC int |
| 1641 | xlog_recover_reorder_trans( | 1652 | xlog_recover_reorder_trans( |
| @@ -1655,6 +1666,9 @@ xlog_recover_reorder_trans( | |||
| 1655 | xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; | 1666 | xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; |
| 1656 | 1667 | ||
| 1657 | switch (ITEM_TYPE(item)) { | 1668 | switch (ITEM_TYPE(item)) { |
| 1669 | case XFS_LI_ICREATE: | ||
| 1670 | list_move_tail(&item->ri_list, &buffer_list); | ||
| 1671 | break; | ||
| 1658 | case XFS_LI_BUF: | 1672 | case XFS_LI_BUF: |
| 1659 | if (buf_f->blf_flags & XFS_BLF_CANCEL) { | 1673 | if (buf_f->blf_flags & XFS_BLF_CANCEL) { |
| 1660 | trace_xfs_log_recover_item_reorder_head(log, | 1674 | trace_xfs_log_recover_item_reorder_head(log, |
| @@ -2982,6 +2996,93 @@ xlog_recover_efd_pass2( | |||
| 2982 | } | 2996 | } |
| 2983 | 2997 | ||
| 2984 | /* | 2998 | /* |
| 2999 | * This routine is called when an inode create format structure is found in a | ||
| 3000 | * committed transaction in the log. It's purpose is to initialise the inodes | ||
| 3001 | * being allocated on disk. This requires us to get inode cluster buffers that | ||
| 3002 | * match the range to be intialised, stamped with inode templates and written | ||
| 3003 | * by delayed write so that subsequent modifications will hit the cached buffer | ||
| 3004 | * and only need writing out at the end of recovery. | ||
| 3005 | */ | ||
| 3006 | STATIC int | ||
| 3007 | xlog_recover_do_icreate_pass2( | ||
| 3008 | struct xlog *log, | ||
| 3009 | struct list_head *buffer_list, | ||
| 3010 | xlog_recover_item_t *item) | ||
| 3011 | { | ||
| 3012 | struct xfs_mount *mp = log->l_mp; | ||
| 3013 | struct xfs_icreate_log *icl; | ||
| 3014 | xfs_agnumber_t agno; | ||
| 3015 | xfs_agblock_t agbno; | ||
| 3016 | unsigned int count; | ||
| 3017 | unsigned int isize; | ||
| 3018 | xfs_agblock_t length; | ||
| 3019 | |||
| 3020 | icl = (struct xfs_icreate_log *)item->ri_buf[0].i_addr; | ||
| 3021 | if (icl->icl_type != XFS_LI_ICREATE) { | ||
| 3022 | xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad type"); | ||
| 3023 | return EINVAL; | ||
| 3024 | } | ||
| 3025 | |||
| 3026 | if (icl->icl_size != 1) { | ||
| 3027 | xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad icl size"); | ||
| 3028 | return EINVAL; | ||
| 3029 | } | ||
| 3030 | |||
| 3031 | agno = be32_to_cpu(icl->icl_ag); | ||
| 3032 | if (agno >= mp->m_sb.sb_agcount) { | ||
| 3033 | xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agno"); | ||
| 3034 | return EINVAL; | ||
| 3035 | } | ||
| 3036 | agbno = be32_to_cpu(icl->icl_agbno); | ||
| 3037 | if (!agbno || agbno == NULLAGBLOCK || agbno >= mp->m_sb.sb_agblocks) { | ||
| 3038 | xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agbno"); | ||
| 3039 | return EINVAL; | ||
| 3040 | } | ||
| 3041 | isize = be32_to_cpu(icl->icl_isize); | ||
| 3042 | if (isize != mp->m_sb.sb_inodesize) { | ||
| 3043 | xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad isize"); | ||
| 3044 | return EINVAL; | ||
| 3045 | } | ||
| 3046 | count = be32_to_cpu(icl->icl_count); | ||
| 3047 | if (!count) { | ||
| 3048 | xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count"); | ||
| 3049 | return EINVAL; | ||
| 3050 | } | ||
| 3051 | length = be32_to_cpu(icl->icl_length); | ||
| 3052 | if (!length || length >= mp->m_sb.sb_agblocks) { | ||
| 3053 | xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad length"); | ||
| 3054 | return EINVAL; | ||
| 3055 | } | ||
| 3056 | |||
| 3057 | /* existing allocation is fixed value */ | ||
| 3058 | ASSERT(count == XFS_IALLOC_INODES(mp)); | ||
| 3059 | ASSERT(length == XFS_IALLOC_BLOCKS(mp)); | ||
| 3060 | if (count != XFS_IALLOC_INODES(mp) || | ||
| 3061 | length != XFS_IALLOC_BLOCKS(mp)) { | ||
| 3062 | xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count 2"); | ||
| 3063 | return EINVAL; | ||
| 3064 | } | ||
| 3065 | |||
| 3066 | /* | ||
| 3067 | * Inode buffers can be freed. Do not replay the inode initialisation as | ||
| 3068 | * we could be overwriting something written after this inode buffer was | ||
| 3069 | * cancelled. | ||
| 3070 | * | ||
| 3071 | * XXX: we need to iterate all buffers and only init those that are not | ||
| 3072 | * cancelled. I think that a more fine grained factoring of | ||
| 3073 | * xfs_ialloc_inode_init may be appropriate here to enable this to be | ||
| 3074 | * done easily. | ||
| 3075 | */ | ||
| 3076 | if (xlog_check_buffer_cancelled(log, | ||
| 3077 | XFS_AGB_TO_DADDR(mp, agno, agbno), length, 0)) | ||
| 3078 | return 0; | ||
| 3079 | |||
| 3080 | xfs_ialloc_inode_init(mp, NULL, buffer_list, agno, agbno, length, | ||
| 3081 | be32_to_cpu(icl->icl_gen)); | ||
| 3082 | return 0; | ||
| 3083 | } | ||
| 3084 | |||
| 3085 | /* | ||
| 2985 | * Free up any resources allocated by the transaction | 3086 | * Free up any resources allocated by the transaction |
| 2986 | * | 3087 | * |
| 2987 | * Remember that EFIs, EFDs, and IUNLINKs are handled later. | 3088 | * Remember that EFIs, EFDs, and IUNLINKs are handled later. |
| @@ -3023,6 +3124,7 @@ xlog_recover_commit_pass1( | |||
| 3023 | case XFS_LI_EFI: | 3124 | case XFS_LI_EFI: |
| 3024 | case XFS_LI_EFD: | 3125 | case XFS_LI_EFD: |
| 3025 | case XFS_LI_DQUOT: | 3126 | case XFS_LI_DQUOT: |
| 3127 | case XFS_LI_ICREATE: | ||
| 3026 | /* nothing to do in pass 1 */ | 3128 | /* nothing to do in pass 1 */ |
| 3027 | return 0; | 3129 | return 0; |
| 3028 | default: | 3130 | default: |
| @@ -3053,6 +3155,8 @@ xlog_recover_commit_pass2( | |||
| 3053 | return xlog_recover_efd_pass2(log, item); | 3155 | return xlog_recover_efd_pass2(log, item); |
| 3054 | case XFS_LI_DQUOT: | 3156 | case XFS_LI_DQUOT: |
| 3055 | return xlog_recover_dquot_pass2(log, buffer_list, item); | 3157 | return xlog_recover_dquot_pass2(log, buffer_list, item); |
| 3158 | case XFS_LI_ICREATE: | ||
| 3159 | return xlog_recover_do_icreate_pass2(log, buffer_list, item); | ||
| 3056 | case XFS_LI_QUOTAOFF: | 3160 | case XFS_LI_QUOTAOFF: |
| 3057 | /* nothing to do in pass2 */ | 3161 | /* nothing to do in pass2 */ |
| 3058 | return 0; | 3162 | return 0; |
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index e8e310c05097..2b0ba3581656 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c | |||
| @@ -336,6 +336,14 @@ xfs_mount_validate_sb( | |||
| 336 | return XFS_ERROR(EWRONGFS); | 336 | return XFS_ERROR(EWRONGFS); |
| 337 | } | 337 | } |
| 338 | 338 | ||
| 339 | if ((sbp->sb_qflags & (XFS_OQUOTA_ENFD | XFS_OQUOTA_CHKD)) && | ||
| 340 | (sbp->sb_qflags & (XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD | | ||
| 341 | XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD))) { | ||
| 342 | xfs_notice(mp, | ||
| 343 | "Super block has XFS_OQUOTA bits along with XFS_PQUOTA and/or XFS_GQUOTA bits.\n"); | ||
| 344 | return XFS_ERROR(EFSCORRUPTED); | ||
| 345 | } | ||
| 346 | |||
| 339 | /* | 347 | /* |
| 340 | * Version 5 superblock feature mask validation. Reject combinations the | 348 | * Version 5 superblock feature mask validation. Reject combinations the |
| 341 | * kernel cannot support up front before checking anything else. For | 349 | * kernel cannot support up front before checking anything else. For |
| @@ -561,6 +569,18 @@ out_unwind: | |||
| 561 | return error; | 569 | return error; |
| 562 | } | 570 | } |
| 563 | 571 | ||
| 572 | static void | ||
| 573 | xfs_sb_quota_from_disk(struct xfs_sb *sbp) | ||
| 574 | { | ||
| 575 | if (sbp->sb_qflags & XFS_OQUOTA_ENFD) | ||
| 576 | sbp->sb_qflags |= (sbp->sb_qflags & XFS_PQUOTA_ACCT) ? | ||
| 577 | XFS_PQUOTA_ENFD : XFS_GQUOTA_ENFD; | ||
| 578 | if (sbp->sb_qflags & XFS_OQUOTA_CHKD) | ||
| 579 | sbp->sb_qflags |= (sbp->sb_qflags & XFS_PQUOTA_ACCT) ? | ||
| 580 | XFS_PQUOTA_CHKD : XFS_GQUOTA_CHKD; | ||
| 581 | sbp->sb_qflags &= ~(XFS_OQUOTA_ENFD | XFS_OQUOTA_CHKD); | ||
| 582 | } | ||
| 583 | |||
| 564 | void | 584 | void |
| 565 | xfs_sb_from_disk( | 585 | xfs_sb_from_disk( |
| 566 | struct xfs_sb *to, | 586 | struct xfs_sb *to, |
| @@ -622,6 +642,35 @@ xfs_sb_from_disk( | |||
| 622 | to->sb_lsn = be64_to_cpu(from->sb_lsn); | 642 | to->sb_lsn = be64_to_cpu(from->sb_lsn); |
| 623 | } | 643 | } |
| 624 | 644 | ||
| 645 | static inline void | ||
| 646 | xfs_sb_quota_to_disk( | ||
| 647 | xfs_dsb_t *to, | ||
| 648 | xfs_sb_t *from, | ||
| 649 | __int64_t *fields) | ||
| 650 | { | ||
| 651 | __uint16_t qflags = from->sb_qflags; | ||
| 652 | |||
| 653 | if (*fields & XFS_SB_QFLAGS) { | ||
| 654 | /* | ||
| 655 | * The in-core version of sb_qflags do not have | ||
| 656 | * XFS_OQUOTA_* flags, whereas the on-disk version | ||
| 657 | * does. So, convert incore XFS_{PG}QUOTA_* flags | ||
| 658 | * to on-disk XFS_OQUOTA_* flags. | ||
| 659 | */ | ||
| 660 | qflags &= ~(XFS_PQUOTA_ENFD | XFS_PQUOTA_CHKD | | ||
| 661 | XFS_GQUOTA_ENFD | XFS_GQUOTA_CHKD); | ||
| 662 | |||
| 663 | if (from->sb_qflags & | ||
| 664 | (XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD)) | ||
| 665 | qflags |= XFS_OQUOTA_ENFD; | ||
| 666 | if (from->sb_qflags & | ||
| 667 | (XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD)) | ||
| 668 | qflags |= XFS_OQUOTA_CHKD; | ||
| 669 | to->sb_qflags = cpu_to_be16(qflags); | ||
| 670 | *fields &= ~XFS_SB_QFLAGS; | ||
| 671 | } | ||
| 672 | } | ||
| 673 | |||
| 625 | /* | 674 | /* |
| 626 | * Copy in core superblock to ondisk one. | 675 | * Copy in core superblock to ondisk one. |
| 627 | * | 676 | * |
| @@ -643,6 +692,7 @@ xfs_sb_to_disk( | |||
| 643 | if (!fields) | 692 | if (!fields) |
| 644 | return; | 693 | return; |
| 645 | 694 | ||
| 695 | xfs_sb_quota_to_disk(to, from, &fields); | ||
| 646 | while (fields) { | 696 | while (fields) { |
| 647 | f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields); | 697 | f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields); |
| 648 | first = xfs_sb_info[f].offset; | 698 | first = xfs_sb_info[f].offset; |
| @@ -835,6 +885,7 @@ reread: | |||
| 835 | */ | 885 | */ |
| 836 | xfs_sb_from_disk(&mp->m_sb, XFS_BUF_TO_SBP(bp)); | 886 | xfs_sb_from_disk(&mp->m_sb, XFS_BUF_TO_SBP(bp)); |
| 837 | 887 | ||
| 888 | xfs_sb_quota_from_disk(&mp->m_sb); | ||
| 838 | /* | 889 | /* |
| 839 | * We must be able to do sector-sized and sector-aligned IO. | 890 | * We must be able to do sector-sized and sector-aligned IO. |
| 840 | */ | 891 | */ |
| @@ -987,42 +1038,27 @@ xfs_update_alignment(xfs_mount_t *mp) | |||
| 987 | */ | 1038 | */ |
| 988 | if ((BBTOB(mp->m_dalign) & mp->m_blockmask) || | 1039 | if ((BBTOB(mp->m_dalign) & mp->m_blockmask) || |
| 989 | (BBTOB(mp->m_swidth) & mp->m_blockmask)) { | 1040 | (BBTOB(mp->m_swidth) & mp->m_blockmask)) { |
| 990 | if (mp->m_flags & XFS_MOUNT_RETERR) { | 1041 | xfs_warn(mp, |
| 991 | xfs_warn(mp, "alignment check failed: " | 1042 | "alignment check failed: sunit/swidth vs. blocksize(%d)", |
| 992 | "(sunit/swidth vs. blocksize)"); | 1043 | sbp->sb_blocksize); |
| 993 | return XFS_ERROR(EINVAL); | 1044 | return XFS_ERROR(EINVAL); |
| 994 | } | ||
| 995 | mp->m_dalign = mp->m_swidth = 0; | ||
| 996 | } else { | 1045 | } else { |
| 997 | /* | 1046 | /* |
| 998 | * Convert the stripe unit and width to FSBs. | 1047 | * Convert the stripe unit and width to FSBs. |
| 999 | */ | 1048 | */ |
| 1000 | mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign); | 1049 | mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign); |
| 1001 | if (mp->m_dalign && (sbp->sb_agblocks % mp->m_dalign)) { | 1050 | if (mp->m_dalign && (sbp->sb_agblocks % mp->m_dalign)) { |
| 1002 | if (mp->m_flags & XFS_MOUNT_RETERR) { | ||
| 1003 | xfs_warn(mp, "alignment check failed: " | ||
| 1004 | "(sunit/swidth vs. ag size)"); | ||
| 1005 | return XFS_ERROR(EINVAL); | ||
| 1006 | } | ||
| 1007 | xfs_warn(mp, | 1051 | xfs_warn(mp, |
| 1008 | "stripe alignment turned off: sunit(%d)/swidth(%d) " | 1052 | "alignment check failed: sunit/swidth vs. agsize(%d)", |
| 1009 | "incompatible with agsize(%d)", | 1053 | sbp->sb_agblocks); |
| 1010 | mp->m_dalign, mp->m_swidth, | 1054 | return XFS_ERROR(EINVAL); |
| 1011 | sbp->sb_agblocks); | ||
| 1012 | |||
| 1013 | mp->m_dalign = 0; | ||
| 1014 | mp->m_swidth = 0; | ||
| 1015 | } else if (mp->m_dalign) { | 1055 | } else if (mp->m_dalign) { |
| 1016 | mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth); | 1056 | mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth); |
| 1017 | } else { | 1057 | } else { |
| 1018 | if (mp->m_flags & XFS_MOUNT_RETERR) { | 1058 | xfs_warn(mp, |
| 1019 | xfs_warn(mp, "alignment check failed: " | 1059 | "alignment check failed: sunit(%d) less than bsize(%d)", |
| 1020 | "sunit(%d) less than bsize(%d)", | 1060 | mp->m_dalign, sbp->sb_blocksize); |
| 1021 | mp->m_dalign, | 1061 | return XFS_ERROR(EINVAL); |
| 1022 | mp->m_blockmask +1); | ||
| 1023 | return XFS_ERROR(EINVAL); | ||
| 1024 | } | ||
| 1025 | mp->m_swidth = 0; | ||
| 1026 | } | 1062 | } |
| 1027 | } | 1063 | } |
| 1028 | 1064 | ||
| @@ -1039,6 +1075,10 @@ xfs_update_alignment(xfs_mount_t *mp) | |||
| 1039 | sbp->sb_width = mp->m_swidth; | 1075 | sbp->sb_width = mp->m_swidth; |
| 1040 | mp->m_update_flags |= XFS_SB_WIDTH; | 1076 | mp->m_update_flags |= XFS_SB_WIDTH; |
| 1041 | } | 1077 | } |
| 1078 | } else { | ||
| 1079 | xfs_warn(mp, | ||
| 1080 | "cannot change alignment: superblock does not support data alignment"); | ||
| 1081 | return XFS_ERROR(EINVAL); | ||
| 1042 | } | 1082 | } |
| 1043 | } else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN && | 1083 | } else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN && |
| 1044 | xfs_sb_version_hasdalign(&mp->m_sb)) { | 1084 | xfs_sb_version_hasdalign(&mp->m_sb)) { |
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index b004cecdfb04..4e374d4a9189 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h | |||
| @@ -192,8 +192,6 @@ typedef struct xfs_mount { | |||
| 192 | xfs_dablk_t m_dirleafblk; /* blockno of dir non-data v2 */ | 192 | xfs_dablk_t m_dirleafblk; /* blockno of dir non-data v2 */ |
| 193 | xfs_dablk_t m_dirfreeblk; /* blockno of dirfreeindex v2 */ | 193 | xfs_dablk_t m_dirfreeblk; /* blockno of dirfreeindex v2 */ |
| 194 | uint m_chsize; /* size of next field */ | 194 | uint m_chsize; /* size of next field */ |
| 195 | struct xfs_chash *m_chash; /* fs private inode per-cluster | ||
| 196 | * hash table */ | ||
| 197 | atomic_t m_active_trans; /* number trans frozen */ | 195 | atomic_t m_active_trans; /* number trans frozen */ |
| 198 | #ifdef HAVE_PERCPU_SB | 196 | #ifdef HAVE_PERCPU_SB |
| 199 | xfs_icsb_cnts_t __percpu *m_sb_cnts; /* per-cpu superblock counters */ | 197 | xfs_icsb_cnts_t __percpu *m_sb_cnts; /* per-cpu superblock counters */ |
| @@ -229,8 +227,6 @@ typedef struct xfs_mount { | |||
| 229 | operations, typically for | 227 | operations, typically for |
| 230 | disk errors in metadata */ | 228 | disk errors in metadata */ |
| 231 | #define XFS_MOUNT_DISCARD (1ULL << 5) /* discard unused blocks */ | 229 | #define XFS_MOUNT_DISCARD (1ULL << 5) /* discard unused blocks */ |
| 232 | #define XFS_MOUNT_RETERR (1ULL << 6) /* return alignment errors to | ||
| 233 | user */ | ||
| 234 | #define XFS_MOUNT_NOALIGN (1ULL << 7) /* turn off stripe alignment | 230 | #define XFS_MOUNT_NOALIGN (1ULL << 7) /* turn off stripe alignment |
| 235 | allocations */ | 231 | allocations */ |
| 236 | #define XFS_MOUNT_ATTR2 (1ULL << 8) /* allow use of attr2 format */ | 232 | #define XFS_MOUNT_ATTR2 (1ULL << 8) /* allow use of attr2 format */ |
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index b75c9bb6e71e..7a3e007b49f4 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c | |||
| @@ -70,7 +70,7 @@ xfs_qm_dquot_walk( | |||
| 70 | void *data) | 70 | void *data) |
| 71 | { | 71 | { |
| 72 | struct xfs_quotainfo *qi = mp->m_quotainfo; | 72 | struct xfs_quotainfo *qi = mp->m_quotainfo; |
| 73 | struct radix_tree_root *tree = XFS_DQUOT_TREE(qi, type); | 73 | struct radix_tree_root *tree = xfs_dquot_tree(qi, type); |
| 74 | uint32_t next_index; | 74 | uint32_t next_index; |
| 75 | int last_error = 0; | 75 | int last_error = 0; |
| 76 | int skipped; | 76 | int skipped; |
| @@ -189,7 +189,7 @@ xfs_qm_dqpurge( | |||
| 189 | xfs_dqfunlock(dqp); | 189 | xfs_dqfunlock(dqp); |
| 190 | xfs_dqunlock(dqp); | 190 | xfs_dqunlock(dqp); |
| 191 | 191 | ||
| 192 | radix_tree_delete(XFS_DQUOT_TREE(qi, dqp->q_core.d_flags), | 192 | radix_tree_delete(xfs_dquot_tree(qi, dqp->q_core.d_flags), |
| 193 | be32_to_cpu(dqp->q_core.d_id)); | 193 | be32_to_cpu(dqp->q_core.d_id)); |
| 194 | qi->qi_dquots--; | 194 | qi->qi_dquots--; |
| 195 | 195 | ||
| @@ -299,8 +299,10 @@ xfs_qm_mount_quotas( | |||
| 299 | */ | 299 | */ |
| 300 | if (!XFS_IS_UQUOTA_ON(mp)) | 300 | if (!XFS_IS_UQUOTA_ON(mp)) |
| 301 | mp->m_qflags &= ~XFS_UQUOTA_CHKD; | 301 | mp->m_qflags &= ~XFS_UQUOTA_CHKD; |
| 302 | if (!(XFS_IS_GQUOTA_ON(mp) || XFS_IS_PQUOTA_ON(mp))) | 302 | if (!XFS_IS_GQUOTA_ON(mp)) |
| 303 | mp->m_qflags &= ~XFS_OQUOTA_CHKD; | 303 | mp->m_qflags &= ~XFS_GQUOTA_CHKD; |
| 304 | if (!XFS_IS_PQUOTA_ON(mp)) | ||
| 305 | mp->m_qflags &= ~XFS_PQUOTA_CHKD; | ||
| 304 | 306 | ||
| 305 | write_changes: | 307 | write_changes: |
| 306 | /* | 308 | /* |
| @@ -489,8 +491,7 @@ xfs_qm_need_dqattach( | |||
| 489 | return false; | 491 | return false; |
| 490 | if (!XFS_NOT_DQATTACHED(mp, ip)) | 492 | if (!XFS_NOT_DQATTACHED(mp, ip)) |
| 491 | return false; | 493 | return false; |
| 492 | if (ip->i_ino == mp->m_sb.sb_uquotino || | 494 | if (xfs_is_quota_inode(&mp->m_sb, ip->i_ino)) |
| 493 | ip->i_ino == mp->m_sb.sb_gquotino) | ||
| 494 | return false; | 495 | return false; |
| 495 | return true; | 496 | return true; |
| 496 | } | 497 | } |
| @@ -606,8 +607,7 @@ xfs_qm_dqdetach( | |||
| 606 | 607 | ||
| 607 | trace_xfs_dquot_dqdetach(ip); | 608 | trace_xfs_dquot_dqdetach(ip); |
| 608 | 609 | ||
| 609 | ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_uquotino); | 610 | ASSERT(!xfs_is_quota_inode(&ip->i_mount->m_sb, ip->i_ino)); |
| 610 | ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_gquotino); | ||
| 611 | if (ip->i_udquot) { | 611 | if (ip->i_udquot) { |
| 612 | xfs_qm_dqrele(ip->i_udquot); | 612 | xfs_qm_dqrele(ip->i_udquot); |
| 613 | ip->i_udquot = NULL; | 613 | ip->i_udquot = NULL; |
| @@ -1152,7 +1152,7 @@ xfs_qm_dqusage_adjust( | |||
| 1152 | * rootino must have its resources accounted for, not so with the quota | 1152 | * rootino must have its resources accounted for, not so with the quota |
| 1153 | * inodes. | 1153 | * inodes. |
| 1154 | */ | 1154 | */ |
| 1155 | if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) { | 1155 | if (xfs_is_quota_inode(&mp->m_sb, ino)) { |
| 1156 | *res = BULKSTAT_RV_NOTHING; | 1156 | *res = BULKSTAT_RV_NOTHING; |
| 1157 | return XFS_ERROR(EINVAL); | 1157 | return XFS_ERROR(EINVAL); |
| 1158 | } | 1158 | } |
| @@ -1262,19 +1262,20 @@ int | |||
| 1262 | xfs_qm_quotacheck( | 1262 | xfs_qm_quotacheck( |
| 1263 | xfs_mount_t *mp) | 1263 | xfs_mount_t *mp) |
| 1264 | { | 1264 | { |
| 1265 | int done, count, error, error2; | 1265 | int done, count, error, error2; |
| 1266 | xfs_ino_t lastino; | 1266 | xfs_ino_t lastino; |
| 1267 | size_t structsz; | 1267 | size_t structsz; |
| 1268 | xfs_inode_t *uip, *gip; | 1268 | uint flags; |
| 1269 | uint flags; | 1269 | LIST_HEAD (buffer_list); |
| 1270 | LIST_HEAD (buffer_list); | 1270 | struct xfs_inode *uip = mp->m_quotainfo->qi_uquotaip; |
| 1271 | struct xfs_inode *gip = mp->m_quotainfo->qi_gquotaip; | ||
| 1271 | 1272 | ||
| 1272 | count = INT_MAX; | 1273 | count = INT_MAX; |
| 1273 | structsz = 1; | 1274 | structsz = 1; |
| 1274 | lastino = 0; | 1275 | lastino = 0; |
| 1275 | flags = 0; | 1276 | flags = 0; |
| 1276 | 1277 | ||
| 1277 | ASSERT(mp->m_quotainfo->qi_uquotaip || mp->m_quotainfo->qi_gquotaip); | 1278 | ASSERT(uip || gip); |
| 1278 | ASSERT(XFS_IS_QUOTA_RUNNING(mp)); | 1279 | ASSERT(XFS_IS_QUOTA_RUNNING(mp)); |
| 1279 | 1280 | ||
| 1280 | xfs_notice(mp, "Quotacheck needed: Please wait."); | 1281 | xfs_notice(mp, "Quotacheck needed: Please wait."); |
| @@ -1284,7 +1285,6 @@ xfs_qm_quotacheck( | |||
| 1284 | * their counters to zero. We need a clean slate. | 1285 | * their counters to zero. We need a clean slate. |
| 1285 | * We don't log our changes till later. | 1286 | * We don't log our changes till later. |
| 1286 | */ | 1287 | */ |
| 1287 | uip = mp->m_quotainfo->qi_uquotaip; | ||
| 1288 | if (uip) { | 1288 | if (uip) { |
| 1289 | error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA, | 1289 | error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA, |
| 1290 | &buffer_list); | 1290 | &buffer_list); |
| @@ -1293,14 +1293,14 @@ xfs_qm_quotacheck( | |||
| 1293 | flags |= XFS_UQUOTA_CHKD; | 1293 | flags |= XFS_UQUOTA_CHKD; |
| 1294 | } | 1294 | } |
| 1295 | 1295 | ||
| 1296 | gip = mp->m_quotainfo->qi_gquotaip; | ||
| 1297 | if (gip) { | 1296 | if (gip) { |
| 1298 | error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ? | 1297 | error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ? |
| 1299 | XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA, | 1298 | XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA, |
| 1300 | &buffer_list); | 1299 | &buffer_list); |
| 1301 | if (error) | 1300 | if (error) |
| 1302 | goto error_return; | 1301 | goto error_return; |
| 1303 | flags |= XFS_OQUOTA_CHKD; | 1302 | flags |= XFS_IS_GQUOTA_ON(mp) ? |
| 1303 | XFS_GQUOTA_CHKD : XFS_PQUOTA_CHKD; | ||
| 1304 | } | 1304 | } |
| 1305 | 1305 | ||
| 1306 | do { | 1306 | do { |
| @@ -1395,15 +1395,13 @@ STATIC int | |||
| 1395 | xfs_qm_init_quotainos( | 1395 | xfs_qm_init_quotainos( |
| 1396 | xfs_mount_t *mp) | 1396 | xfs_mount_t *mp) |
| 1397 | { | 1397 | { |
| 1398 | xfs_inode_t *uip, *gip; | 1398 | struct xfs_inode *uip = NULL; |
| 1399 | int error; | 1399 | struct xfs_inode *gip = NULL; |
| 1400 | __int64_t sbflags; | 1400 | int error; |
| 1401 | uint flags; | 1401 | __int64_t sbflags = 0; |
| 1402 | uint flags = 0; | ||
| 1402 | 1403 | ||
| 1403 | ASSERT(mp->m_quotainfo); | 1404 | ASSERT(mp->m_quotainfo); |
| 1404 | uip = gip = NULL; | ||
| 1405 | sbflags = 0; | ||
| 1406 | flags = 0; | ||
| 1407 | 1405 | ||
| 1408 | /* | 1406 | /* |
| 1409 | * Get the uquota and gquota inodes | 1407 | * Get the uquota and gquota inodes |
| @@ -1412,19 +1410,18 @@ xfs_qm_init_quotainos( | |||
| 1412 | if (XFS_IS_UQUOTA_ON(mp) && | 1410 | if (XFS_IS_UQUOTA_ON(mp) && |
| 1413 | mp->m_sb.sb_uquotino != NULLFSINO) { | 1411 | mp->m_sb.sb_uquotino != NULLFSINO) { |
| 1414 | ASSERT(mp->m_sb.sb_uquotino > 0); | 1412 | ASSERT(mp->m_sb.sb_uquotino > 0); |
| 1415 | if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, | 1413 | error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, |
| 1416 | 0, 0, &uip))) | 1414 | 0, 0, &uip); |
| 1415 | if (error) | ||
| 1417 | return XFS_ERROR(error); | 1416 | return XFS_ERROR(error); |
| 1418 | } | 1417 | } |
| 1419 | if (XFS_IS_OQUOTA_ON(mp) && | 1418 | if (XFS_IS_OQUOTA_ON(mp) && |
| 1420 | mp->m_sb.sb_gquotino != NULLFSINO) { | 1419 | mp->m_sb.sb_gquotino != NULLFSINO) { |
| 1421 | ASSERT(mp->m_sb.sb_gquotino > 0); | 1420 | ASSERT(mp->m_sb.sb_gquotino > 0); |
| 1422 | if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, | 1421 | error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, |
| 1423 | 0, 0, &gip))) { | 1422 | 0, 0, &gip); |
| 1424 | if (uip) | 1423 | if (error) |
| 1425 | IRELE(uip); | 1424 | goto error_rele; |
| 1426 | return XFS_ERROR(error); | ||
| 1427 | } | ||
| 1428 | } | 1425 | } |
| 1429 | } else { | 1426 | } else { |
| 1430 | flags |= XFS_QMOPT_SBVERSION; | 1427 | flags |= XFS_QMOPT_SBVERSION; |
| @@ -1439,10 +1436,11 @@ xfs_qm_init_quotainos( | |||
| 1439 | * temporarily switch to read-write to do this. | 1436 | * temporarily switch to read-write to do this. |
| 1440 | */ | 1437 | */ |
| 1441 | if (XFS_IS_UQUOTA_ON(mp) && uip == NULL) { | 1438 | if (XFS_IS_UQUOTA_ON(mp) && uip == NULL) { |
| 1442 | if ((error = xfs_qm_qino_alloc(mp, &uip, | 1439 | error = xfs_qm_qino_alloc(mp, &uip, |
| 1443 | sbflags | XFS_SB_UQUOTINO, | 1440 | sbflags | XFS_SB_UQUOTINO, |
| 1444 | flags | XFS_QMOPT_UQUOTA))) | 1441 | flags | XFS_QMOPT_UQUOTA); |
| 1445 | return XFS_ERROR(error); | 1442 | if (error) |
| 1443 | goto error_rele; | ||
| 1446 | 1444 | ||
| 1447 | flags &= ~XFS_QMOPT_SBVERSION; | 1445 | flags &= ~XFS_QMOPT_SBVERSION; |
| 1448 | } | 1446 | } |
| @@ -1451,18 +1449,21 @@ xfs_qm_init_quotainos( | |||
| 1451 | XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA); | 1449 | XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA); |
| 1452 | error = xfs_qm_qino_alloc(mp, &gip, | 1450 | error = xfs_qm_qino_alloc(mp, &gip, |
| 1453 | sbflags | XFS_SB_GQUOTINO, flags); | 1451 | sbflags | XFS_SB_GQUOTINO, flags); |
| 1454 | if (error) { | 1452 | if (error) |
| 1455 | if (uip) | 1453 | goto error_rele; |
| 1456 | IRELE(uip); | ||
| 1457 | |||
| 1458 | return XFS_ERROR(error); | ||
| 1459 | } | ||
| 1460 | } | 1454 | } |
| 1461 | 1455 | ||
| 1462 | mp->m_quotainfo->qi_uquotaip = uip; | 1456 | mp->m_quotainfo->qi_uquotaip = uip; |
| 1463 | mp->m_quotainfo->qi_gquotaip = gip; | 1457 | mp->m_quotainfo->qi_gquotaip = gip; |
| 1464 | 1458 | ||
| 1465 | return 0; | 1459 | return 0; |
| 1460 | |||
| 1461 | error_rele: | ||
| 1462 | if (uip) | ||
| 1463 | IRELE(uip); | ||
| 1464 | if (gip) | ||
| 1465 | IRELE(gip); | ||
| 1466 | return XFS_ERROR(error); | ||
| 1466 | } | 1467 | } |
| 1467 | 1468 | ||
| 1468 | STATIC void | 1469 | STATIC void |
| @@ -1473,7 +1474,7 @@ xfs_qm_dqfree_one( | |||
| 1473 | struct xfs_quotainfo *qi = mp->m_quotainfo; | 1474 | struct xfs_quotainfo *qi = mp->m_quotainfo; |
| 1474 | 1475 | ||
| 1475 | mutex_lock(&qi->qi_tree_lock); | 1476 | mutex_lock(&qi->qi_tree_lock); |
| 1476 | radix_tree_delete(XFS_DQUOT_TREE(qi, dqp->q_core.d_flags), | 1477 | radix_tree_delete(xfs_dquot_tree(qi, dqp->q_core.d_flags), |
| 1477 | be32_to_cpu(dqp->q_core.d_id)); | 1478 | be32_to_cpu(dqp->q_core.d_id)); |
| 1478 | 1479 | ||
| 1479 | qi->qi_dquots--; | 1480 | qi->qi_dquots--; |
| @@ -1659,7 +1660,8 @@ xfs_qm_vop_dqalloc( | |||
| 1659 | struct xfs_dquot **O_gdqpp) | 1660 | struct xfs_dquot **O_gdqpp) |
| 1660 | { | 1661 | { |
| 1661 | struct xfs_mount *mp = ip->i_mount; | 1662 | struct xfs_mount *mp = ip->i_mount; |
| 1662 | struct xfs_dquot *uq, *gq; | 1663 | struct xfs_dquot *uq = NULL; |
| 1664 | struct xfs_dquot *gq = NULL; | ||
| 1663 | int error; | 1665 | int error; |
| 1664 | uint lockflags; | 1666 | uint lockflags; |
| 1665 | 1667 | ||
| @@ -1684,7 +1686,6 @@ xfs_qm_vop_dqalloc( | |||
| 1684 | } | 1686 | } |
| 1685 | } | 1687 | } |
| 1686 | 1688 | ||
| 1687 | uq = gq = NULL; | ||
| 1688 | if ((flags & XFS_QMOPT_UQUOTA) && XFS_IS_UQUOTA_ON(mp)) { | 1689 | if ((flags & XFS_QMOPT_UQUOTA) && XFS_IS_UQUOTA_ON(mp)) { |
| 1689 | if (ip->i_d.di_uid != uid) { | 1690 | if (ip->i_d.di_uid != uid) { |
| 1690 | /* | 1691 | /* |
| @@ -1697,11 +1698,12 @@ xfs_qm_vop_dqalloc( | |||
| 1697 | * holding ilock. | 1698 | * holding ilock. |
| 1698 | */ | 1699 | */ |
| 1699 | xfs_iunlock(ip, lockflags); | 1700 | xfs_iunlock(ip, lockflags); |
| 1700 | if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t) uid, | 1701 | error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t) uid, |
| 1701 | XFS_DQ_USER, | 1702 | XFS_DQ_USER, |
| 1702 | XFS_QMOPT_DQALLOC | | 1703 | XFS_QMOPT_DQALLOC | |
| 1703 | XFS_QMOPT_DOWARN, | 1704 | XFS_QMOPT_DOWARN, |
| 1704 | &uq))) { | 1705 | &uq); |
| 1706 | if (error) { | ||
| 1705 | ASSERT(error != ENOENT); | 1707 | ASSERT(error != ENOENT); |
| 1706 | return error; | 1708 | return error; |
| 1707 | } | 1709 | } |
| @@ -1723,15 +1725,14 @@ xfs_qm_vop_dqalloc( | |||
| 1723 | if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) { | 1725 | if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) { |
| 1724 | if (ip->i_d.di_gid != gid) { | 1726 | if (ip->i_d.di_gid != gid) { |
| 1725 | xfs_iunlock(ip, lockflags); | 1727 | xfs_iunlock(ip, lockflags); |
| 1726 | if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)gid, | 1728 | error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)gid, |
| 1727 | XFS_DQ_GROUP, | 1729 | XFS_DQ_GROUP, |
| 1728 | XFS_QMOPT_DQALLOC | | 1730 | XFS_QMOPT_DQALLOC | |
| 1729 | XFS_QMOPT_DOWARN, | 1731 | XFS_QMOPT_DOWARN, |
| 1730 | &gq))) { | 1732 | &gq); |
| 1731 | if (uq) | 1733 | if (error) { |
| 1732 | xfs_qm_dqrele(uq); | ||
| 1733 | ASSERT(error != ENOENT); | 1734 | ASSERT(error != ENOENT); |
| 1734 | return error; | 1735 | goto error_rele; |
| 1735 | } | 1736 | } |
| 1736 | xfs_dqunlock(gq); | 1737 | xfs_dqunlock(gq); |
| 1737 | lockflags = XFS_ILOCK_SHARED; | 1738 | lockflags = XFS_ILOCK_SHARED; |
| @@ -1743,15 +1744,14 @@ xfs_qm_vop_dqalloc( | |||
| 1743 | } else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) { | 1744 | } else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) { |
| 1744 | if (xfs_get_projid(ip) != prid) { | 1745 | if (xfs_get_projid(ip) != prid) { |
| 1745 | xfs_iunlock(ip, lockflags); | 1746 | xfs_iunlock(ip, lockflags); |
| 1746 | if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid, | 1747 | error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid, |
| 1747 | XFS_DQ_PROJ, | 1748 | XFS_DQ_PROJ, |
| 1748 | XFS_QMOPT_DQALLOC | | 1749 | XFS_QMOPT_DQALLOC | |
| 1749 | XFS_QMOPT_DOWARN, | 1750 | XFS_QMOPT_DOWARN, |
| 1750 | &gq))) { | 1751 | &gq); |
| 1751 | if (uq) | 1752 | if (error) { |
| 1752 | xfs_qm_dqrele(uq); | ||
| 1753 | ASSERT(error != ENOENT); | 1753 | ASSERT(error != ENOENT); |
| 1754 | return (error); | 1754 | goto error_rele; |
| 1755 | } | 1755 | } |
| 1756 | xfs_dqunlock(gq); | 1756 | xfs_dqunlock(gq); |
| 1757 | lockflags = XFS_ILOCK_SHARED; | 1757 | lockflags = XFS_ILOCK_SHARED; |
| @@ -1774,6 +1774,11 @@ xfs_qm_vop_dqalloc( | |||
| 1774 | else if (gq) | 1774 | else if (gq) |
| 1775 | xfs_qm_dqrele(gq); | 1775 | xfs_qm_dqrele(gq); |
| 1776 | return 0; | 1776 | return 0; |
| 1777 | |||
| 1778 | error_rele: | ||
| 1779 | if (uq) | ||
| 1780 | xfs_qm_dqrele(uq); | ||
| 1781 | return error; | ||
| 1777 | } | 1782 | } |
| 1778 | 1783 | ||
| 1779 | /* | 1784 | /* |
| @@ -1821,29 +1826,31 @@ xfs_qm_vop_chown( | |||
| 1821 | */ | 1826 | */ |
| 1822 | int | 1827 | int |
| 1823 | xfs_qm_vop_chown_reserve( | 1828 | xfs_qm_vop_chown_reserve( |
| 1824 | xfs_trans_t *tp, | 1829 | struct xfs_trans *tp, |
| 1825 | xfs_inode_t *ip, | 1830 | struct xfs_inode *ip, |
| 1826 | xfs_dquot_t *udqp, | 1831 | struct xfs_dquot *udqp, |
| 1827 | xfs_dquot_t *gdqp, | 1832 | struct xfs_dquot *gdqp, |
| 1828 | uint flags) | 1833 | uint flags) |
| 1829 | { | 1834 | { |
| 1830 | xfs_mount_t *mp = ip->i_mount; | 1835 | struct xfs_mount *mp = ip->i_mount; |
| 1831 | uint delblks, blkflags, prjflags = 0; | 1836 | uint delblks, blkflags, prjflags = 0; |
| 1832 | xfs_dquot_t *unresudq, *unresgdq, *delblksudq, *delblksgdq; | 1837 | struct xfs_dquot *udq_unres = NULL; |
| 1833 | int error; | 1838 | struct xfs_dquot *gdq_unres = NULL; |
| 1839 | struct xfs_dquot *udq_delblks = NULL; | ||
| 1840 | struct xfs_dquot *gdq_delblks = NULL; | ||
| 1841 | int error; | ||
| 1834 | 1842 | ||
| 1835 | 1843 | ||
| 1836 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); | 1844 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); |
| 1837 | ASSERT(XFS_IS_QUOTA_RUNNING(mp)); | 1845 | ASSERT(XFS_IS_QUOTA_RUNNING(mp)); |
| 1838 | 1846 | ||
| 1839 | delblks = ip->i_delayed_blks; | 1847 | delblks = ip->i_delayed_blks; |
| 1840 | delblksudq = delblksgdq = unresudq = unresgdq = NULL; | ||
| 1841 | blkflags = XFS_IS_REALTIME_INODE(ip) ? | 1848 | blkflags = XFS_IS_REALTIME_INODE(ip) ? |
| 1842 | XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS; | 1849 | XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS; |
| 1843 | 1850 | ||
| 1844 | if (XFS_IS_UQUOTA_ON(mp) && udqp && | 1851 | if (XFS_IS_UQUOTA_ON(mp) && udqp && |
| 1845 | ip->i_d.di_uid != (uid_t)be32_to_cpu(udqp->q_core.d_id)) { | 1852 | ip->i_d.di_uid != (uid_t)be32_to_cpu(udqp->q_core.d_id)) { |
| 1846 | delblksudq = udqp; | 1853 | udq_delblks = udqp; |
| 1847 | /* | 1854 | /* |
| 1848 | * If there are delayed allocation blocks, then we have to | 1855 | * If there are delayed allocation blocks, then we have to |
| 1849 | * unreserve those from the old dquot, and add them to the | 1856 | * unreserve those from the old dquot, and add them to the |
| @@ -1851,7 +1858,7 @@ xfs_qm_vop_chown_reserve( | |||
| 1851 | */ | 1858 | */ |
| 1852 | if (delblks) { | 1859 | if (delblks) { |
| 1853 | ASSERT(ip->i_udquot); | 1860 | ASSERT(ip->i_udquot); |
| 1854 | unresudq = ip->i_udquot; | 1861 | udq_unres = ip->i_udquot; |
| 1855 | } | 1862 | } |
| 1856 | } | 1863 | } |
| 1857 | if (XFS_IS_OQUOTA_ON(ip->i_mount) && gdqp) { | 1864 | if (XFS_IS_OQUOTA_ON(ip->i_mount) && gdqp) { |
| @@ -1862,18 +1869,19 @@ xfs_qm_vop_chown_reserve( | |||
| 1862 | if (prjflags || | 1869 | if (prjflags || |
| 1863 | (XFS_IS_GQUOTA_ON(ip->i_mount) && | 1870 | (XFS_IS_GQUOTA_ON(ip->i_mount) && |
| 1864 | ip->i_d.di_gid != be32_to_cpu(gdqp->q_core.d_id))) { | 1871 | ip->i_d.di_gid != be32_to_cpu(gdqp->q_core.d_id))) { |
| 1865 | delblksgdq = gdqp; | 1872 | gdq_delblks = gdqp; |
| 1866 | if (delblks) { | 1873 | if (delblks) { |
| 1867 | ASSERT(ip->i_gdquot); | 1874 | ASSERT(ip->i_gdquot); |
| 1868 | unresgdq = ip->i_gdquot; | 1875 | gdq_unres = ip->i_gdquot; |
| 1869 | } | 1876 | } |
| 1870 | } | 1877 | } |
| 1871 | } | 1878 | } |
| 1872 | 1879 | ||
| 1873 | if ((error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount, | 1880 | error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount, |
| 1874 | delblksudq, delblksgdq, ip->i_d.di_nblocks, 1, | 1881 | udq_delblks, gdq_delblks, ip->i_d.di_nblocks, 1, |
| 1875 | flags | blkflags | prjflags))) | 1882 | flags | blkflags | prjflags); |
| 1876 | return (error); | 1883 | if (error) |
| 1884 | return error; | ||
| 1877 | 1885 | ||
| 1878 | /* | 1886 | /* |
| 1879 | * Do the delayed blks reservations/unreservations now. Since, these | 1887 | * Do the delayed blks reservations/unreservations now. Since, these |
| @@ -1885,14 +1893,15 @@ xfs_qm_vop_chown_reserve( | |||
| 1885 | /* | 1893 | /* |
| 1886 | * Do the reservations first. Unreservation can't fail. | 1894 | * Do the reservations first. Unreservation can't fail. |
| 1887 | */ | 1895 | */ |
| 1888 | ASSERT(delblksudq || delblksgdq); | 1896 | ASSERT(udq_delblks || gdq_delblks); |
| 1889 | ASSERT(unresudq || unresgdq); | 1897 | ASSERT(udq_unres || gdq_unres); |
| 1890 | if ((error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount, | 1898 | error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount, |
| 1891 | delblksudq, delblksgdq, (xfs_qcnt_t)delblks, 0, | 1899 | udq_delblks, gdq_delblks, (xfs_qcnt_t)delblks, 0, |
| 1892 | flags | blkflags | prjflags))) | 1900 | flags | blkflags | prjflags); |
| 1893 | return (error); | 1901 | if (error) |
| 1902 | return error; | ||
| 1894 | xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount, | 1903 | xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount, |
| 1895 | unresudq, unresgdq, -((xfs_qcnt_t)delblks), 0, | 1904 | udq_unres, gdq_unres, -((xfs_qcnt_t)delblks), 0, |
| 1896 | blkflags); | 1905 | blkflags); |
| 1897 | } | 1906 | } |
| 1898 | 1907 | ||
diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h index 5d16a6e6900f..bdb4f8b95207 100644 --- a/fs/xfs/xfs_qm.h +++ b/fs/xfs/xfs_qm.h | |||
| @@ -69,30 +69,62 @@ typedef struct xfs_quotainfo { | |||
| 69 | struct shrinker qi_shrinker; | 69 | struct shrinker qi_shrinker; |
| 70 | } xfs_quotainfo_t; | 70 | } xfs_quotainfo_t; |
| 71 | 71 | ||
| 72 | #define XFS_DQUOT_TREE(qi, type) \ | 72 | static inline struct radix_tree_root * |
| 73 | ((type & XFS_DQ_USER) ? \ | 73 | xfs_dquot_tree( |
| 74 | &((qi)->qi_uquota_tree) : \ | 74 | struct xfs_quotainfo *qi, |
| 75 | &((qi)->qi_gquota_tree)) | 75 | int type) |
| 76 | { | ||
| 77 | switch (type) { | ||
| 78 | case XFS_DQ_USER: | ||
| 79 | return &qi->qi_uquota_tree; | ||
| 80 | case XFS_DQ_GROUP: | ||
| 81 | case XFS_DQ_PROJ: | ||
| 82 | return &qi->qi_gquota_tree; | ||
| 83 | default: | ||
| 84 | ASSERT(0); | ||
| 85 | } | ||
| 86 | return NULL; | ||
| 87 | } | ||
| 76 | 88 | ||
| 89 | static inline struct xfs_inode * | ||
| 90 | xfs_dq_to_quota_inode(struct xfs_dquot *dqp) | ||
| 91 | { | ||
| 92 | switch (dqp->dq_flags & XFS_DQ_ALLTYPES) { | ||
| 93 | case XFS_DQ_USER: | ||
| 94 | return dqp->q_mount->m_quotainfo->qi_uquotaip; | ||
| 95 | case XFS_DQ_GROUP: | ||
| 96 | case XFS_DQ_PROJ: | ||
| 97 | return dqp->q_mount->m_quotainfo->qi_gquotaip; | ||
| 98 | default: | ||
| 99 | ASSERT(0); | ||
| 100 | } | ||
| 101 | return NULL; | ||
| 102 | } | ||
| 77 | 103 | ||
| 78 | extern int xfs_qm_calc_dquots_per_chunk(struct xfs_mount *mp, | 104 | extern int xfs_qm_calc_dquots_per_chunk(struct xfs_mount *mp, |
| 79 | unsigned int nbblks); | 105 | unsigned int nbblks); |
| 80 | extern void xfs_trans_mod_dquot(xfs_trans_t *, xfs_dquot_t *, uint, long); | 106 | extern void xfs_trans_mod_dquot(struct xfs_trans *, |
| 81 | extern int xfs_trans_reserve_quota_bydquots(xfs_trans_t *, xfs_mount_t *, | 107 | struct xfs_dquot *, uint, long); |
| 82 | xfs_dquot_t *, xfs_dquot_t *, long, long, uint); | 108 | extern int xfs_trans_reserve_quota_bydquots(struct xfs_trans *, |
| 83 | extern void xfs_trans_dqjoin(xfs_trans_t *, xfs_dquot_t *); | 109 | struct xfs_mount *, struct xfs_dquot *, |
| 84 | extern void xfs_trans_log_dquot(xfs_trans_t *, xfs_dquot_t *); | 110 | struct xfs_dquot *, long, long, uint); |
| 111 | extern void xfs_trans_dqjoin(struct xfs_trans *, struct xfs_dquot *); | ||
| 112 | extern void xfs_trans_log_dquot(struct xfs_trans *, struct xfs_dquot *); | ||
| 85 | 113 | ||
| 86 | /* | 114 | /* |
| 87 | * We keep the usr and grp dquots separately so that locking will be easier | 115 | * We keep the usr and grp dquots separately so that locking will be easier |
| 88 | * to do at commit time. All transactions that we know of at this point | 116 | * to do at commit time. All transactions that we know of at this point |
| 89 | * affect no more than two dquots of one type. Hence, the TRANS_MAXDQS value. | 117 | * affect no more than two dquots of one type. Hence, the TRANS_MAXDQS value. |
| 90 | */ | 118 | */ |
| 119 | enum { | ||
| 120 | XFS_QM_TRANS_USR = 0, | ||
| 121 | XFS_QM_TRANS_GRP, | ||
| 122 | XFS_QM_TRANS_DQTYPES | ||
| 123 | }; | ||
| 91 | #define XFS_QM_TRANS_MAXDQS 2 | 124 | #define XFS_QM_TRANS_MAXDQS 2 |
| 92 | typedef struct xfs_dquot_acct { | 125 | struct xfs_dquot_acct { |
| 93 | xfs_dqtrx_t dqa_usrdquots[XFS_QM_TRANS_MAXDQS]; | 126 | struct xfs_dqtrx dqs[XFS_QM_TRANS_DQTYPES][XFS_QM_TRANS_MAXDQS]; |
| 94 | xfs_dqtrx_t dqa_grpdquots[XFS_QM_TRANS_MAXDQS]; | 127 | }; |
| 95 | } xfs_dquot_acct_t; | ||
| 96 | 128 | ||
| 97 | /* | 129 | /* |
| 98 | * Users are allowed to have a usage exceeding their softlimit for | 130 | * Users are allowed to have a usage exceeding their softlimit for |
| @@ -106,22 +138,23 @@ typedef struct xfs_dquot_acct { | |||
| 106 | #define XFS_QM_IWARNLIMIT 5 | 138 | #define XFS_QM_IWARNLIMIT 5 |
| 107 | #define XFS_QM_RTBWARNLIMIT 5 | 139 | #define XFS_QM_RTBWARNLIMIT 5 |
| 108 | 140 | ||
| 109 | extern void xfs_qm_destroy_quotainfo(xfs_mount_t *); | 141 | extern void xfs_qm_destroy_quotainfo(struct xfs_mount *); |
| 110 | extern int xfs_qm_quotacheck(xfs_mount_t *); | 142 | extern int xfs_qm_quotacheck(struct xfs_mount *); |
| 111 | extern int xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t); | 143 | extern int xfs_qm_write_sb_changes(struct xfs_mount *, __int64_t); |
| 112 | 144 | ||
| 113 | /* dquot stuff */ | 145 | /* dquot stuff */ |
| 114 | extern void xfs_qm_dqpurge_all(xfs_mount_t *, uint); | 146 | extern void xfs_qm_dqpurge_all(struct xfs_mount *, uint); |
| 115 | extern void xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint); | 147 | extern void xfs_qm_dqrele_all_inodes(struct xfs_mount *, uint); |
| 116 | 148 | ||
| 117 | /* quota ops */ | 149 | /* quota ops */ |
| 118 | extern int xfs_qm_scall_trunc_qfiles(xfs_mount_t *, uint); | 150 | extern int xfs_qm_scall_trunc_qfiles(struct xfs_mount *, uint); |
| 119 | extern int xfs_qm_scall_getquota(xfs_mount_t *, xfs_dqid_t, uint, | 151 | extern int xfs_qm_scall_getquota(struct xfs_mount *, xfs_dqid_t, |
| 120 | fs_disk_quota_t *); | 152 | uint, struct fs_disk_quota *); |
| 121 | extern int xfs_qm_scall_setqlim(struct xfs_mount *, xfs_dqid_t, uint, | 153 | extern int xfs_qm_scall_setqlim(struct xfs_mount *, xfs_dqid_t, uint, |
| 122 | fs_disk_quota_t *); | 154 | struct fs_disk_quota *); |
| 123 | extern int xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *); | 155 | extern int xfs_qm_scall_getqstat(struct xfs_mount *, |
| 124 | extern int xfs_qm_scall_quotaon(xfs_mount_t *, uint); | 156 | struct fs_quota_stat *); |
| 125 | extern int xfs_qm_scall_quotaoff(xfs_mount_t *, uint); | 157 | extern int xfs_qm_scall_quotaon(struct xfs_mount *, uint); |
| 158 | extern int xfs_qm_scall_quotaoff(struct xfs_mount *, uint); | ||
| 126 | 159 | ||
| 127 | #endif /* __XFS_QM_H__ */ | 160 | #endif /* __XFS_QM_H__ */ |
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index 6cdf6ffc36a1..a08801ae24e2 100644 --- a/fs/xfs/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c | |||
| @@ -117,11 +117,11 @@ xfs_qm_scall_quotaoff( | |||
| 117 | } | 117 | } |
| 118 | if (flags & XFS_GQUOTA_ACCT) { | 118 | if (flags & XFS_GQUOTA_ACCT) { |
| 119 | dqtype |= XFS_QMOPT_GQUOTA; | 119 | dqtype |= XFS_QMOPT_GQUOTA; |
| 120 | flags |= (XFS_OQUOTA_CHKD | XFS_OQUOTA_ENFD); | 120 | flags |= (XFS_GQUOTA_CHKD | XFS_GQUOTA_ENFD); |
| 121 | inactivate_flags |= XFS_GQUOTA_ACTIVE; | 121 | inactivate_flags |= XFS_GQUOTA_ACTIVE; |
| 122 | } else if (flags & XFS_PQUOTA_ACCT) { | 122 | } else if (flags & XFS_PQUOTA_ACCT) { |
| 123 | dqtype |= XFS_QMOPT_PQUOTA; | 123 | dqtype |= XFS_QMOPT_PQUOTA; |
| 124 | flags |= (XFS_OQUOTA_CHKD | XFS_OQUOTA_ENFD); | 124 | flags |= (XFS_PQUOTA_CHKD | XFS_PQUOTA_ENFD); |
| 125 | inactivate_flags |= XFS_PQUOTA_ACTIVE; | 125 | inactivate_flags |= XFS_PQUOTA_ACTIVE; |
| 126 | } | 126 | } |
| 127 | 127 | ||
| @@ -335,14 +335,14 @@ xfs_qm_scall_quotaon( | |||
| 335 | * quota acct on ondisk without m_qflags' knowing. | 335 | * quota acct on ondisk without m_qflags' knowing. |
| 336 | */ | 336 | */ |
| 337 | if (((flags & XFS_UQUOTA_ACCT) == 0 && | 337 | if (((flags & XFS_UQUOTA_ACCT) == 0 && |
| 338 | (mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) == 0 && | 338 | (mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) == 0 && |
| 339 | (flags & XFS_UQUOTA_ENFD)) | 339 | (flags & XFS_UQUOTA_ENFD)) || |
| 340 | || | 340 | ((flags & XFS_GQUOTA_ACCT) == 0 && |
| 341 | (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) == 0 && | ||
| 342 | (flags & XFS_GQUOTA_ENFD)) || | ||
| 341 | ((flags & XFS_PQUOTA_ACCT) == 0 && | 343 | ((flags & XFS_PQUOTA_ACCT) == 0 && |
| 342 | (mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) == 0 && | 344 | (mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) == 0 && |
| 343 | (flags & XFS_GQUOTA_ACCT) == 0 && | 345 | (flags & XFS_PQUOTA_ENFD))) { |
| 344 | (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) == 0 && | ||
| 345 | (flags & XFS_OQUOTA_ENFD))) { | ||
| 346 | xfs_debug(mp, | 346 | xfs_debug(mp, |
| 347 | "%s: Can't enforce without acct, flags=%x sbflags=%x\n", | 347 | "%s: Can't enforce without acct, flags=%x sbflags=%x\n", |
| 348 | __func__, flags, mp->m_sb.sb_qflags); | 348 | __func__, flags, mp->m_sb.sb_qflags); |
| @@ -407,11 +407,11 @@ xfs_qm_scall_getqstat( | |||
| 407 | struct fs_quota_stat *out) | 407 | struct fs_quota_stat *out) |
| 408 | { | 408 | { |
| 409 | struct xfs_quotainfo *q = mp->m_quotainfo; | 409 | struct xfs_quotainfo *q = mp->m_quotainfo; |
| 410 | struct xfs_inode *uip, *gip; | 410 | struct xfs_inode *uip = NULL; |
| 411 | bool tempuqip, tempgqip; | 411 | struct xfs_inode *gip = NULL; |
| 412 | bool tempuqip = false; | ||
| 413 | bool tempgqip = false; | ||
| 412 | 414 | ||
| 413 | uip = gip = NULL; | ||
| 414 | tempuqip = tempgqip = false; | ||
| 415 | memset(out, 0, sizeof(fs_quota_stat_t)); | 415 | memset(out, 0, sizeof(fs_quota_stat_t)); |
| 416 | 416 | ||
| 417 | out->qs_version = FS_QSTAT_VERSION; | 417 | out->qs_version = FS_QSTAT_VERSION; |
| @@ -776,9 +776,12 @@ xfs_qm_scall_getquota( | |||
| 776 | * gets turned off. No need to confuse the user level code, | 776 | * gets turned off. No need to confuse the user level code, |
| 777 | * so return zeroes in that case. | 777 | * so return zeroes in that case. |
| 778 | */ | 778 | */ |
| 779 | if ((!XFS_IS_UQUOTA_ENFORCED(mp) && dqp->q_core.d_flags == XFS_DQ_USER) || | 779 | if ((!XFS_IS_UQUOTA_ENFORCED(mp) && |
| 780 | (!XFS_IS_OQUOTA_ENFORCED(mp) && | 780 | dqp->q_core.d_flags == XFS_DQ_USER) || |
| 781 | (dqp->q_core.d_flags & (XFS_DQ_PROJ | XFS_DQ_GROUP)))) { | 781 | (!XFS_IS_GQUOTA_ENFORCED(mp) && |
| 782 | dqp->q_core.d_flags == XFS_DQ_GROUP) || | ||
| 783 | (!XFS_IS_PQUOTA_ENFORCED(mp) && | ||
| 784 | dqp->q_core.d_flags == XFS_DQ_PROJ)) { | ||
| 782 | dst->d_btimer = 0; | 785 | dst->d_btimer = 0; |
| 783 | dst->d_itimer = 0; | 786 | dst->d_itimer = 0; |
| 784 | dst->d_rtbtimer = 0; | 787 | dst->d_rtbtimer = 0; |
| @@ -786,8 +789,8 @@ xfs_qm_scall_getquota( | |||
| 786 | 789 | ||
| 787 | #ifdef DEBUG | 790 | #ifdef DEBUG |
| 788 | if (((XFS_IS_UQUOTA_ENFORCED(mp) && dst->d_flags == FS_USER_QUOTA) || | 791 | if (((XFS_IS_UQUOTA_ENFORCED(mp) && dst->d_flags == FS_USER_QUOTA) || |
| 789 | (XFS_IS_OQUOTA_ENFORCED(mp) && | 792 | (XFS_IS_GQUOTA_ENFORCED(mp) && dst->d_flags == FS_GROUP_QUOTA) || |
| 790 | (dst->d_flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)))) && | 793 | (XFS_IS_PQUOTA_ENFORCED(mp) && dst->d_flags == FS_PROJ_QUOTA)) && |
| 791 | dst->d_id != 0) { | 794 | dst->d_id != 0) { |
| 792 | if ((dst->d_bcount > dst->d_blk_softlimit) && | 795 | if ((dst->d_bcount > dst->d_blk_softlimit) && |
| 793 | (dst->d_blk_softlimit > 0)) { | 796 | (dst->d_blk_softlimit > 0)) { |
| @@ -833,16 +836,16 @@ xfs_qm_export_flags( | |||
| 833 | uflags = 0; | 836 | uflags = 0; |
| 834 | if (flags & XFS_UQUOTA_ACCT) | 837 | if (flags & XFS_UQUOTA_ACCT) |
| 835 | uflags |= FS_QUOTA_UDQ_ACCT; | 838 | uflags |= FS_QUOTA_UDQ_ACCT; |
| 836 | if (flags & XFS_PQUOTA_ACCT) | ||
| 837 | uflags |= FS_QUOTA_PDQ_ACCT; | ||
| 838 | if (flags & XFS_GQUOTA_ACCT) | 839 | if (flags & XFS_GQUOTA_ACCT) |
| 839 | uflags |= FS_QUOTA_GDQ_ACCT; | 840 | uflags |= FS_QUOTA_GDQ_ACCT; |
| 841 | if (flags & XFS_PQUOTA_ACCT) | ||
| 842 | uflags |= FS_QUOTA_PDQ_ACCT; | ||
| 840 | if (flags & XFS_UQUOTA_ENFD) | 843 | if (flags & XFS_UQUOTA_ENFD) |
| 841 | uflags |= FS_QUOTA_UDQ_ENFD; | 844 | uflags |= FS_QUOTA_UDQ_ENFD; |
| 842 | if (flags & (XFS_OQUOTA_ENFD)) { | 845 | if (flags & XFS_GQUOTA_ENFD) |
| 843 | uflags |= (flags & XFS_GQUOTA_ACCT) ? | 846 | uflags |= FS_QUOTA_GDQ_ENFD; |
| 844 | FS_QUOTA_GDQ_ENFD : FS_QUOTA_PDQ_ENFD; | 847 | if (flags & XFS_PQUOTA_ENFD) |
| 845 | } | 848 | uflags |= FS_QUOTA_PDQ_ENFD; |
| 846 | return (uflags); | 849 | return (uflags); |
| 847 | } | 850 | } |
| 848 | 851 | ||
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h index c38068f26c55..c3483bab9cde 100644 --- a/fs/xfs/xfs_quota.h +++ b/fs/xfs/xfs_quota.h | |||
| @@ -161,30 +161,42 @@ typedef struct xfs_qoff_logformat { | |||
| 161 | #define XFS_GQUOTA_ACCT 0x0040 /* group quota accounting ON */ | 161 | #define XFS_GQUOTA_ACCT 0x0040 /* group quota accounting ON */ |
| 162 | 162 | ||
| 163 | /* | 163 | /* |
| 164 | * Conversion to and from the combined OQUOTA flag (if necessary) | ||
| 165 | * is done only in xfs_sb_qflags_to_disk() and xfs_sb_qflags_from_disk() | ||
| 166 | */ | ||
| 167 | #define XFS_GQUOTA_ENFD 0x0080 /* group quota limits enforced */ | ||
| 168 | #define XFS_GQUOTA_CHKD 0x0100 /* quotacheck run on group quotas */ | ||
| 169 | #define XFS_PQUOTA_ENFD 0x0200 /* project quota limits enforced */ | ||
| 170 | #define XFS_PQUOTA_CHKD 0x0400 /* quotacheck run on project quotas */ | ||
| 171 | |||
| 172 | /* | ||
| 164 | * Quota Accounting/Enforcement flags | 173 | * Quota Accounting/Enforcement flags |
| 165 | */ | 174 | */ |
| 166 | #define XFS_ALL_QUOTA_ACCT \ | 175 | #define XFS_ALL_QUOTA_ACCT \ |
| 167 | (XFS_UQUOTA_ACCT | XFS_GQUOTA_ACCT | XFS_PQUOTA_ACCT) | 176 | (XFS_UQUOTA_ACCT | XFS_GQUOTA_ACCT | XFS_PQUOTA_ACCT) |
| 168 | #define XFS_ALL_QUOTA_ENFD (XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD) | 177 | #define XFS_ALL_QUOTA_ENFD \ |
| 169 | #define XFS_ALL_QUOTA_CHKD (XFS_UQUOTA_CHKD | XFS_OQUOTA_CHKD) | 178 | (XFS_UQUOTA_ENFD | XFS_GQUOTA_ENFD | XFS_PQUOTA_ENFD) |
| 179 | #define XFS_ALL_QUOTA_CHKD \ | ||
| 180 | (XFS_UQUOTA_CHKD | XFS_GQUOTA_CHKD | XFS_PQUOTA_CHKD) | ||
| 170 | 181 | ||
| 171 | #define XFS_IS_QUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ACCT) | 182 | #define XFS_IS_QUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ACCT) |
| 172 | #define XFS_IS_UQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_UQUOTA_ACCT) | 183 | #define XFS_IS_UQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_UQUOTA_ACCT) |
| 173 | #define XFS_IS_PQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_PQUOTA_ACCT) | 184 | #define XFS_IS_PQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_PQUOTA_ACCT) |
| 174 | #define XFS_IS_GQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_GQUOTA_ACCT) | 185 | #define XFS_IS_GQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_GQUOTA_ACCT) |
| 175 | #define XFS_IS_UQUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_UQUOTA_ENFD) | 186 | #define XFS_IS_UQUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_UQUOTA_ENFD) |
| 176 | #define XFS_IS_OQUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_OQUOTA_ENFD) | 187 | #define XFS_IS_GQUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_GQUOTA_ENFD) |
| 188 | #define XFS_IS_PQUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_PQUOTA_ENFD) | ||
| 177 | 189 | ||
| 178 | /* | 190 | /* |
| 179 | * Incore only flags for quotaoff - these bits get cleared when quota(s) | 191 | * Incore only flags for quotaoff - these bits get cleared when quota(s) |
| 180 | * are in the process of getting turned off. These flags are in m_qflags but | 192 | * are in the process of getting turned off. These flags are in m_qflags but |
| 181 | * never in sb_qflags. | 193 | * never in sb_qflags. |
| 182 | */ | 194 | */ |
| 183 | #define XFS_UQUOTA_ACTIVE 0x0100 /* uquotas are being turned off */ | 195 | #define XFS_UQUOTA_ACTIVE 0x1000 /* uquotas are being turned off */ |
| 184 | #define XFS_PQUOTA_ACTIVE 0x0200 /* pquotas are being turned off */ | 196 | #define XFS_GQUOTA_ACTIVE 0x2000 /* gquotas are being turned off */ |
| 185 | #define XFS_GQUOTA_ACTIVE 0x0400 /* gquotas are being turned off */ | 197 | #define XFS_PQUOTA_ACTIVE 0x4000 /* pquotas are being turned off */ |
| 186 | #define XFS_ALL_QUOTA_ACTIVE \ | 198 | #define XFS_ALL_QUOTA_ACTIVE \ |
| 187 | (XFS_UQUOTA_ACTIVE | XFS_PQUOTA_ACTIVE | XFS_GQUOTA_ACTIVE) | 199 | (XFS_UQUOTA_ACTIVE | XFS_GQUOTA_ACTIVE | XFS_PQUOTA_ACTIVE) |
| 188 | 200 | ||
| 189 | /* | 201 | /* |
| 190 | * Checking XFS_IS_*QUOTA_ON() while holding any inode lock guarantees | 202 | * Checking XFS_IS_*QUOTA_ON() while holding any inode lock guarantees |
| @@ -268,24 +280,23 @@ typedef struct xfs_qoff_logformat { | |||
| 268 | ((XFS_IS_UQUOTA_ON(mp) && \ | 280 | ((XFS_IS_UQUOTA_ON(mp) && \ |
| 269 | (mp->m_sb.sb_qflags & XFS_UQUOTA_CHKD) == 0) || \ | 281 | (mp->m_sb.sb_qflags & XFS_UQUOTA_CHKD) == 0) || \ |
| 270 | (XFS_IS_GQUOTA_ON(mp) && \ | 282 | (XFS_IS_GQUOTA_ON(mp) && \ |
| 271 | ((mp->m_sb.sb_qflags & XFS_OQUOTA_CHKD) == 0 || \ | 283 | (mp->m_sb.sb_qflags & XFS_GQUOTA_CHKD) == 0) || \ |
| 272 | (mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT))) || \ | ||
| 273 | (XFS_IS_PQUOTA_ON(mp) && \ | 284 | (XFS_IS_PQUOTA_ON(mp) && \ |
| 274 | ((mp->m_sb.sb_qflags & XFS_OQUOTA_CHKD) == 0 || \ | 285 | (mp->m_sb.sb_qflags & XFS_PQUOTA_CHKD) == 0)) |
| 275 | (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT)))) | ||
| 276 | 286 | ||
| 277 | #define XFS_MOUNT_QUOTA_SET1 (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\ | 287 | #define XFS_MOUNT_QUOTA_SET1 (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\ |
| 278 | XFS_UQUOTA_CHKD|XFS_PQUOTA_ACCT|\ | 288 | XFS_UQUOTA_CHKD|XFS_GQUOTA_ACCT|\ |
| 279 | XFS_OQUOTA_ENFD|XFS_OQUOTA_CHKD) | 289 | XFS_GQUOTA_ENFD|XFS_GQUOTA_CHKD) |
| 280 | 290 | ||
| 281 | #define XFS_MOUNT_QUOTA_SET2 (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\ | 291 | #define XFS_MOUNT_QUOTA_SET2 (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\ |
| 282 | XFS_UQUOTA_CHKD|XFS_GQUOTA_ACCT|\ | 292 | XFS_UQUOTA_CHKD|XFS_PQUOTA_ACCT|\ |
| 283 | XFS_OQUOTA_ENFD|XFS_OQUOTA_CHKD) | 293 | XFS_PQUOTA_ENFD|XFS_PQUOTA_CHKD) |
| 284 | 294 | ||
| 285 | #define XFS_MOUNT_QUOTA_ALL (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\ | 295 | #define XFS_MOUNT_QUOTA_ALL (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\ |
| 286 | XFS_UQUOTA_CHKD|XFS_PQUOTA_ACCT|\ | 296 | XFS_UQUOTA_CHKD|XFS_GQUOTA_ACCT|\ |
| 287 | XFS_OQUOTA_ENFD|XFS_OQUOTA_CHKD|\ | 297 | XFS_GQUOTA_ENFD|XFS_GQUOTA_CHKD|\ |
| 288 | XFS_GQUOTA_ACCT) | 298 | XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD|\ |
| 299 | XFS_PQUOTA_CHKD) | ||
| 289 | 300 | ||
| 290 | 301 | ||
| 291 | /* | 302 | /* |
diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c index 71926d630527..20e30f93b0c7 100644 --- a/fs/xfs/xfs_quotaops.c +++ b/fs/xfs/xfs_quotaops.c | |||
| @@ -75,8 +75,10 @@ xfs_fs_set_xstate( | |||
| 75 | flags |= XFS_GQUOTA_ACCT; | 75 | flags |= XFS_GQUOTA_ACCT; |
| 76 | if (uflags & FS_QUOTA_UDQ_ENFD) | 76 | if (uflags & FS_QUOTA_UDQ_ENFD) |
| 77 | flags |= XFS_UQUOTA_ENFD; | 77 | flags |= XFS_UQUOTA_ENFD; |
| 78 | if (uflags & (FS_QUOTA_PDQ_ENFD|FS_QUOTA_GDQ_ENFD)) | 78 | if (uflags & FS_QUOTA_GDQ_ENFD) |
| 79 | flags |= XFS_OQUOTA_ENFD; | 79 | flags |= XFS_GQUOTA_ENFD; |
| 80 | if (uflags & FS_QUOTA_PDQ_ENFD) | ||
| 81 | flags |= XFS_PQUOTA_ENFD; | ||
| 80 | 82 | ||
| 81 | switch (op) { | 83 | switch (op) { |
| 82 | case Q_XQUOTAON: | 84 | case Q_XQUOTAON: |
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h index 2de58a85833c..78f9e70b80c7 100644 --- a/fs/xfs/xfs_sb.h +++ b/fs/xfs/xfs_sb.h | |||
| @@ -618,6 +618,12 @@ xfs_sb_has_incompat_log_feature( | |||
| 618 | return (sbp->sb_features_log_incompat & feature) != 0; | 618 | return (sbp->sb_features_log_incompat & feature) != 0; |
| 619 | } | 619 | } |
| 620 | 620 | ||
| 621 | static inline bool | ||
| 622 | xfs_is_quota_inode(struct xfs_sb *sbp, xfs_ino_t ino) | ||
| 623 | { | ||
| 624 | return (ino == sbp->sb_uquotino || ino == sbp->sb_gquotino); | ||
| 625 | } | ||
| 626 | |||
| 621 | /* | 627 | /* |
| 622 | * end of superblock version macros | 628 | * end of superblock version macros |
| 623 | */ | 629 | */ |
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 3033ba5e9762..1d68ffcdeaa7 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c | |||
| @@ -51,6 +51,7 @@ | |||
| 51 | #include "xfs_inode_item.h" | 51 | #include "xfs_inode_item.h" |
| 52 | #include "xfs_icache.h" | 52 | #include "xfs_icache.h" |
| 53 | #include "xfs_trace.h" | 53 | #include "xfs_trace.h" |
| 54 | #include "xfs_icreate_item.h" | ||
| 54 | 55 | ||
| 55 | #include <linux/namei.h> | 56 | #include <linux/namei.h> |
| 56 | #include <linux/init.h> | 57 | #include <linux/init.h> |
| @@ -359,17 +360,17 @@ xfs_parseargs( | |||
| 359 | } else if (!strcmp(this_char, MNTOPT_PQUOTA) || | 360 | } else if (!strcmp(this_char, MNTOPT_PQUOTA) || |
| 360 | !strcmp(this_char, MNTOPT_PRJQUOTA)) { | 361 | !strcmp(this_char, MNTOPT_PRJQUOTA)) { |
| 361 | mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE | | 362 | mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE | |
| 362 | XFS_OQUOTA_ENFD); | 363 | XFS_PQUOTA_ENFD); |
| 363 | } else if (!strcmp(this_char, MNTOPT_PQUOTANOENF)) { | 364 | } else if (!strcmp(this_char, MNTOPT_PQUOTANOENF)) { |
| 364 | mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE); | 365 | mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE); |
| 365 | mp->m_qflags &= ~XFS_OQUOTA_ENFD; | 366 | mp->m_qflags &= ~XFS_PQUOTA_ENFD; |
| 366 | } else if (!strcmp(this_char, MNTOPT_GQUOTA) || | 367 | } else if (!strcmp(this_char, MNTOPT_GQUOTA) || |
| 367 | !strcmp(this_char, MNTOPT_GRPQUOTA)) { | 368 | !strcmp(this_char, MNTOPT_GRPQUOTA)) { |
| 368 | mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE | | 369 | mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE | |
| 369 | XFS_OQUOTA_ENFD); | 370 | XFS_GQUOTA_ENFD); |
| 370 | } else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) { | 371 | } else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) { |
| 371 | mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE); | 372 | mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE); |
| 372 | mp->m_qflags &= ~XFS_OQUOTA_ENFD; | 373 | mp->m_qflags &= ~XFS_GQUOTA_ENFD; |
| 373 | } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) { | 374 | } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) { |
| 374 | xfs_warn(mp, | 375 | xfs_warn(mp, |
| 375 | "delaylog is the default now, option is deprecated."); | 376 | "delaylog is the default now, option is deprecated."); |
| @@ -439,20 +440,15 @@ xfs_parseargs( | |||
| 439 | } | 440 | } |
| 440 | 441 | ||
| 441 | done: | 442 | done: |
| 442 | if (!(mp->m_flags & XFS_MOUNT_NOALIGN)) { | 443 | if (dsunit && !(mp->m_flags & XFS_MOUNT_NOALIGN)) { |
| 443 | /* | 444 | /* |
| 444 | * At this point the superblock has not been read | 445 | * At this point the superblock has not been read |
| 445 | * in, therefore we do not know the block size. | 446 | * in, therefore we do not know the block size. |
| 446 | * Before the mount call ends we will convert | 447 | * Before the mount call ends we will convert |
| 447 | * these to FSBs. | 448 | * these to FSBs. |
| 448 | */ | 449 | */ |
| 449 | if (dsunit) { | 450 | mp->m_dalign = dsunit; |
| 450 | mp->m_dalign = dsunit; | 451 | mp->m_swidth = dswidth; |
| 451 | mp->m_flags |= XFS_MOUNT_RETERR; | ||
| 452 | } | ||
| 453 | |||
| 454 | if (dswidth) | ||
| 455 | mp->m_swidth = dswidth; | ||
| 456 | } | 452 | } |
| 457 | 453 | ||
| 458 | if (mp->m_logbufs != -1 && | 454 | if (mp->m_logbufs != -1 && |
| @@ -563,12 +559,12 @@ xfs_showargs( | |||
| 563 | /* Either project or group quotas can be active, not both */ | 559 | /* Either project or group quotas can be active, not both */ |
| 564 | 560 | ||
| 565 | if (mp->m_qflags & XFS_PQUOTA_ACCT) { | 561 | if (mp->m_qflags & XFS_PQUOTA_ACCT) { |
| 566 | if (mp->m_qflags & XFS_OQUOTA_ENFD) | 562 | if (mp->m_qflags & XFS_PQUOTA_ENFD) |
| 567 | seq_puts(m, "," MNTOPT_PRJQUOTA); | 563 | seq_puts(m, "," MNTOPT_PRJQUOTA); |
| 568 | else | 564 | else |
| 569 | seq_puts(m, "," MNTOPT_PQUOTANOENF); | 565 | seq_puts(m, "," MNTOPT_PQUOTANOENF); |
| 570 | } else if (mp->m_qflags & XFS_GQUOTA_ACCT) { | 566 | } else if (mp->m_qflags & XFS_GQUOTA_ACCT) { |
| 571 | if (mp->m_qflags & XFS_OQUOTA_ENFD) | 567 | if (mp->m_qflags & XFS_GQUOTA_ENFD) |
| 572 | seq_puts(m, "," MNTOPT_GRPQUOTA); | 568 | seq_puts(m, "," MNTOPT_GRPQUOTA); |
| 573 | else | 569 | else |
| 574 | seq_puts(m, "," MNTOPT_GQUOTANOENF); | 570 | seq_puts(m, "," MNTOPT_GQUOTANOENF); |
| @@ -1136,8 +1132,8 @@ xfs_fs_statfs( | |||
| 1136 | spin_unlock(&mp->m_sb_lock); | 1132 | spin_unlock(&mp->m_sb_lock); |
| 1137 | 1133 | ||
| 1138 | if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && | 1134 | if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && |
| 1139 | ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))) == | 1135 | ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) == |
| 1140 | (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD)) | 1136 | (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD)) |
| 1141 | xfs_qm_statvfs(ip, statp); | 1137 | xfs_qm_statvfs(ip, statp); |
| 1142 | return 0; | 1138 | return 0; |
| 1143 | } | 1139 | } |
| @@ -1481,6 +1477,10 @@ xfs_fs_fill_super( | |||
| 1481 | sb->s_time_gran = 1; | 1477 | sb->s_time_gran = 1; |
| 1482 | set_posix_acl_flag(sb); | 1478 | set_posix_acl_flag(sb); |
| 1483 | 1479 | ||
| 1480 | /* version 5 superblocks support inode version counters. */ | ||
| 1481 | if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5) | ||
| 1482 | sb->s_flags |= MS_I_VERSION; | ||
| 1483 | |||
| 1484 | error = xfs_mountfs(mp); | 1484 | error = xfs_mountfs(mp); |
| 1485 | if (error) | 1485 | if (error) |
| 1486 | goto out_filestream_unmount; | 1486 | goto out_filestream_unmount; |
| @@ -1655,9 +1655,15 @@ xfs_init_zones(void) | |||
| 1655 | KM_ZONE_SPREAD, NULL); | 1655 | KM_ZONE_SPREAD, NULL); |
| 1656 | if (!xfs_ili_zone) | 1656 | if (!xfs_ili_zone) |
| 1657 | goto out_destroy_inode_zone; | 1657 | goto out_destroy_inode_zone; |
| 1658 | xfs_icreate_zone = kmem_zone_init(sizeof(struct xfs_icreate_item), | ||
| 1659 | "xfs_icr"); | ||
| 1660 | if (!xfs_icreate_zone) | ||
| 1661 | goto out_destroy_ili_zone; | ||
| 1658 | 1662 | ||
| 1659 | return 0; | 1663 | return 0; |
| 1660 | 1664 | ||
| 1665 | out_destroy_ili_zone: | ||
| 1666 | kmem_zone_destroy(xfs_ili_zone); | ||
| 1661 | out_destroy_inode_zone: | 1667 | out_destroy_inode_zone: |
| 1662 | kmem_zone_destroy(xfs_inode_zone); | 1668 | kmem_zone_destroy(xfs_inode_zone); |
| 1663 | out_destroy_efi_zone: | 1669 | out_destroy_efi_zone: |
| @@ -1696,6 +1702,7 @@ xfs_destroy_zones(void) | |||
| 1696 | * destroy caches. | 1702 | * destroy caches. |
| 1697 | */ | 1703 | */ |
| 1698 | rcu_barrier(); | 1704 | rcu_barrier(); |
| 1705 | kmem_zone_destroy(xfs_icreate_zone); | ||
| 1699 | kmem_zone_destroy(xfs_ili_zone); | 1706 | kmem_zone_destroy(xfs_ili_zone); |
| 1700 | kmem_zone_destroy(xfs_inode_zone); | 1707 | kmem_zone_destroy(xfs_inode_zone); |
| 1701 | kmem_zone_destroy(xfs_efi_zone); | 1708 | kmem_zone_destroy(xfs_efi_zone); |
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index 195a403e1522..e830fb56e27f 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c | |||
| @@ -358,7 +358,8 @@ xfs_symlink( | |||
| 358 | int n; | 358 | int n; |
| 359 | xfs_buf_t *bp; | 359 | xfs_buf_t *bp; |
| 360 | prid_t prid; | 360 | prid_t prid; |
| 361 | struct xfs_dquot *udqp, *gdqp; | 361 | struct xfs_dquot *udqp = NULL; |
| 362 | struct xfs_dquot *gdqp = NULL; | ||
| 362 | uint resblks; | 363 | uint resblks; |
| 363 | 364 | ||
| 364 | *ipp = NULL; | 365 | *ipp = NULL; |
| @@ -585,7 +586,7 @@ xfs_symlink( | |||
| 585 | /* | 586 | /* |
| 586 | * Free a symlink that has blocks associated with it. | 587 | * Free a symlink that has blocks associated with it. |
| 587 | */ | 588 | */ |
| 588 | int | 589 | STATIC int |
| 589 | xfs_inactive_symlink_rmt( | 590 | xfs_inactive_symlink_rmt( |
| 590 | xfs_inode_t *ip, | 591 | xfs_inode_t *ip, |
| 591 | xfs_trans_t **tpp) | 592 | xfs_trans_t **tpp) |
| @@ -606,7 +607,7 @@ xfs_inactive_symlink_rmt( | |||
| 606 | 607 | ||
| 607 | tp = *tpp; | 608 | tp = *tpp; |
| 608 | mp = ip->i_mount; | 609 | mp = ip->i_mount; |
| 609 | ASSERT(ip->i_d.di_size > XFS_IFORK_DSIZE(ip)); | 610 | ASSERT(ip->i_df.if_flags & XFS_IFEXTENTS); |
| 610 | /* | 611 | /* |
| 611 | * We're freeing a symlink that has some | 612 | * We're freeing a symlink that has some |
| 612 | * blocks allocated to it. Free the | 613 | * blocks allocated to it. Free the |
| @@ -720,3 +721,47 @@ xfs_inactive_symlink_rmt( | |||
| 720 | error0: | 721 | error0: |
| 721 | return error; | 722 | return error; |
| 722 | } | 723 | } |
| 724 | |||
| 725 | /* | ||
| 726 | * xfs_inactive_symlink - free a symlink | ||
| 727 | */ | ||
| 728 | int | ||
| 729 | xfs_inactive_symlink( | ||
| 730 | struct xfs_inode *ip, | ||
| 731 | struct xfs_trans **tp) | ||
| 732 | { | ||
| 733 | struct xfs_mount *mp = ip->i_mount; | ||
| 734 | int pathlen; | ||
| 735 | |||
| 736 | trace_xfs_inactive_symlink(ip); | ||
| 737 | |||
| 738 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | ||
| 739 | |||
| 740 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
| 741 | return XFS_ERROR(EIO); | ||
| 742 | |||
| 743 | /* | ||
| 744 | * Zero length symlinks _can_ exist. | ||
| 745 | */ | ||
| 746 | pathlen = (int)ip->i_d.di_size; | ||
| 747 | if (!pathlen) | ||
| 748 | return 0; | ||
| 749 | |||
| 750 | if (pathlen < 0 || pathlen > MAXPATHLEN) { | ||
| 751 | xfs_alert(mp, "%s: inode (0x%llx) bad symlink length (%d)", | ||
| 752 | __func__, (unsigned long long)ip->i_ino, pathlen); | ||
| 753 | ASSERT(0); | ||
| 754 | return XFS_ERROR(EFSCORRUPTED); | ||
| 755 | } | ||
| 756 | |||
| 757 | if (ip->i_df.if_flags & XFS_IFINLINE) { | ||
| 758 | if (ip->i_df.if_bytes > 0) | ||
| 759 | xfs_idata_realloc(ip, -(ip->i_df.if_bytes), | ||
| 760 | XFS_DATA_FORK); | ||
| 761 | ASSERT(ip->i_df.if_bytes == 0); | ||
| 762 | return 0; | ||
| 763 | } | ||
| 764 | |||
| 765 | /* remove the remote symlink */ | ||
| 766 | return xfs_inactive_symlink_rmt(ip, tp); | ||
| 767 | } | ||
diff --git a/fs/xfs/xfs_symlink.h b/fs/xfs/xfs_symlink.h index b39398d2097c..374394880c01 100644 --- a/fs/xfs/xfs_symlink.h +++ b/fs/xfs/xfs_symlink.h | |||
| @@ -60,7 +60,7 @@ extern const struct xfs_buf_ops xfs_symlink_buf_ops; | |||
| 60 | int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name, | 60 | int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name, |
| 61 | const char *target_path, umode_t mode, struct xfs_inode **ipp); | 61 | const char *target_path, umode_t mode, struct xfs_inode **ipp); |
| 62 | int xfs_readlink(struct xfs_inode *ip, char *link); | 62 | int xfs_readlink(struct xfs_inode *ip, char *link); |
| 63 | int xfs_inactive_symlink_rmt(struct xfs_inode *ip, struct xfs_trans **tpp); | 63 | int xfs_inactive_symlink(struct xfs_inode *ip, struct xfs_trans **tpp); |
| 64 | 64 | ||
| 65 | #endif /* __KERNEL__ */ | 65 | #endif /* __KERNEL__ */ |
| 66 | #endif /* __XFS_SYMLINK_H */ | 66 | #endif /* __XFS_SYMLINK_H */ |
diff --git a/fs/xfs/xfs_sysctl.c b/fs/xfs/xfs_sysctl.c index 2801b5ce6cdb..1743b9f8e23d 100644 --- a/fs/xfs/xfs_sysctl.c +++ b/fs/xfs/xfs_sysctl.c | |||
| @@ -25,11 +25,11 @@ static struct ctl_table_header *xfs_table_header; | |||
| 25 | #ifdef CONFIG_PROC_FS | 25 | #ifdef CONFIG_PROC_FS |
| 26 | STATIC int | 26 | STATIC int |
| 27 | xfs_stats_clear_proc_handler( | 27 | xfs_stats_clear_proc_handler( |
| 28 | ctl_table *ctl, | 28 | struct ctl_table *ctl, |
| 29 | int write, | 29 | int write, |
| 30 | void __user *buffer, | 30 | void __user *buffer, |
| 31 | size_t *lenp, | 31 | size_t *lenp, |
| 32 | loff_t *ppos) | 32 | loff_t *ppos) |
| 33 | { | 33 | { |
| 34 | int c, ret, *valp = ctl->data; | 34 | int c, ret, *valp = ctl->data; |
| 35 | __uint32_t vn_active; | 35 | __uint32_t vn_active; |
| @@ -55,11 +55,11 @@ xfs_stats_clear_proc_handler( | |||
| 55 | 55 | ||
| 56 | STATIC int | 56 | STATIC int |
| 57 | xfs_panic_mask_proc_handler( | 57 | xfs_panic_mask_proc_handler( |
| 58 | ctl_table *ctl, | 58 | struct ctl_table *ctl, |
| 59 | int write, | 59 | int write, |
| 60 | void __user *buffer, | 60 | void __user *buffer, |
| 61 | size_t *lenp, | 61 | size_t *lenp, |
| 62 | loff_t *ppos) | 62 | loff_t *ppos) |
| 63 | { | 63 | { |
| 64 | int ret, *valp = ctl->data; | 64 | int ret, *valp = ctl->data; |
| 65 | 65 | ||
| @@ -74,7 +74,7 @@ xfs_panic_mask_proc_handler( | |||
| 74 | } | 74 | } |
| 75 | #endif /* CONFIG_PROC_FS */ | 75 | #endif /* CONFIG_PROC_FS */ |
| 76 | 76 | ||
| 77 | static ctl_table xfs_table[] = { | 77 | static struct ctl_table xfs_table[] = { |
| 78 | { | 78 | { |
| 79 | .procname = "irix_sgid_inherit", | 79 | .procname = "irix_sgid_inherit", |
| 80 | .data = &xfs_params.sgid_inherit.val, | 80 | .data = &xfs_params.sgid_inherit.val, |
| @@ -227,7 +227,7 @@ static ctl_table xfs_table[] = { | |||
| 227 | {} | 227 | {} |
| 228 | }; | 228 | }; |
| 229 | 229 | ||
| 230 | static ctl_table xfs_dir_table[] = { | 230 | static struct ctl_table xfs_dir_table[] = { |
| 231 | { | 231 | { |
| 232 | .procname = "xfs", | 232 | .procname = "xfs", |
| 233 | .mode = 0555, | 233 | .mode = 0555, |
| @@ -236,7 +236,7 @@ static ctl_table xfs_dir_table[] = { | |||
| 236 | {} | 236 | {} |
| 237 | }; | 237 | }; |
| 238 | 238 | ||
| 239 | static ctl_table xfs_root_table[] = { | 239 | static struct ctl_table xfs_root_table[] = { |
| 240 | { | 240 | { |
| 241 | .procname = "fs", | 241 | .procname = "fs", |
| 242 | .mode = 0555, | 242 | .mode = 0555, |
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index a04701de6bbd..47910e638c18 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h | |||
| @@ -486,9 +486,12 @@ DEFINE_EVENT(xfs_buf_item_class, name, \ | |||
| 486 | TP_PROTO(struct xfs_buf_log_item *bip), \ | 486 | TP_PROTO(struct xfs_buf_log_item *bip), \ |
| 487 | TP_ARGS(bip)) | 487 | TP_ARGS(bip)) |
| 488 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size); | 488 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size); |
| 489 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size_ordered); | ||
| 489 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size_stale); | 490 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size_stale); |
| 490 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format); | 491 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format); |
| 492 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format_ordered); | ||
| 491 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format_stale); | 493 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format_stale); |
| 494 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_ordered); | ||
| 492 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pin); | 495 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pin); |
| 493 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin); | 496 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin); |
| 494 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin_stale); | 497 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin_stale); |
| @@ -508,6 +511,7 @@ DEFINE_BUF_ITEM_EVENT(xfs_trans_bjoin); | |||
| 508 | DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold); | 511 | DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold); |
| 509 | DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release); | 512 | DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release); |
| 510 | DEFINE_BUF_ITEM_EVENT(xfs_trans_binval); | 513 | DEFINE_BUF_ITEM_EVENT(xfs_trans_binval); |
| 514 | DEFINE_BUF_ITEM_EVENT(xfs_trans_buf_ordered); | ||
| 511 | 515 | ||
| 512 | DECLARE_EVENT_CLASS(xfs_lock_class, | 516 | DECLARE_EVENT_CLASS(xfs_lock_class, |
| 513 | TP_PROTO(struct xfs_inode *ip, unsigned lock_flags, | 517 | TP_PROTO(struct xfs_inode *ip, unsigned lock_flags, |
| @@ -571,6 +575,7 @@ DEFINE_INODE_EVENT(xfs_iget_miss); | |||
| 571 | DEFINE_INODE_EVENT(xfs_getattr); | 575 | DEFINE_INODE_EVENT(xfs_getattr); |
| 572 | DEFINE_INODE_EVENT(xfs_setattr); | 576 | DEFINE_INODE_EVENT(xfs_setattr); |
| 573 | DEFINE_INODE_EVENT(xfs_readlink); | 577 | DEFINE_INODE_EVENT(xfs_readlink); |
| 578 | DEFINE_INODE_EVENT(xfs_inactive_symlink); | ||
| 574 | DEFINE_INODE_EVENT(xfs_alloc_file_space); | 579 | DEFINE_INODE_EVENT(xfs_alloc_file_space); |
| 575 | DEFINE_INODE_EVENT(xfs_free_file_space); | 580 | DEFINE_INODE_EVENT(xfs_free_file_space); |
| 576 | DEFINE_INODE_EVENT(xfs_readdir); | 581 | DEFINE_INODE_EVENT(xfs_readdir); |
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 2fd7c1ff1d21..35a229981354 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c | |||
| @@ -234,71 +234,93 @@ xfs_calc_remove_reservation( | |||
| 234 | } | 234 | } |
| 235 | 235 | ||
| 236 | /* | 236 | /* |
| 237 | * For symlink we can modify: | 237 | * For create, break it in to the two cases that the transaction |
| 238 | * covers. We start with the modify case - allocation done by modification | ||
| 239 | * of the state of existing inodes - and the allocation case. | ||
| 240 | */ | ||
| 241 | |||
| 242 | /* | ||
| 243 | * For create we can modify: | ||
| 238 | * the parent directory inode: inode size | 244 | * the parent directory inode: inode size |
| 239 | * the new inode: inode size | 245 | * the new inode: inode size |
| 240 | * the inode btree entry: 1 block | 246 | * the inode btree entry: block size |
| 247 | * the superblock for the nlink flag: sector size | ||
| 241 | * the directory btree: (max depth + v2) * dir block size | 248 | * the directory btree: (max depth + v2) * dir block size |
| 242 | * the directory inode's bmap btree: (max depth + v2) * block size | 249 | * the directory inode's bmap btree: (max depth + v2) * block size |
| 243 | * the blocks for the symlink: 1 kB | 250 | */ |
| 244 | * Or in the first xact we allocate some inodes giving: | 251 | STATIC uint |
| 252 | xfs_calc_create_resv_modify( | ||
| 253 | struct xfs_mount *mp) | ||
| 254 | { | ||
| 255 | return xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) + | ||
| 256 | xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + | ||
| 257 | (uint)XFS_FSB_TO_B(mp, 1) + | ||
| 258 | xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1)); | ||
| 259 | } | ||
| 260 | |||
| 261 | /* | ||
| 262 | * For create we can allocate some inodes giving: | ||
| 245 | * the agi and agf of the ag getting the new inodes: 2 * sectorsize | 263 | * the agi and agf of the ag getting the new inodes: 2 * sectorsize |
| 264 | * the superblock for the nlink flag: sector size | ||
| 246 | * the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize | 265 | * the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize |
| 247 | * the inode btree: max depth * blocksize | 266 | * the inode btree: max depth * blocksize |
| 248 | * the allocation btrees: 2 trees * (2 * max depth - 1) * block size | 267 | * the allocation btrees: 2 trees * (max depth - 1) * block size |
| 249 | */ | 268 | */ |
| 250 | STATIC uint | 269 | STATIC uint |
| 251 | xfs_calc_symlink_reservation( | 270 | xfs_calc_create_resv_alloc( |
| 271 | struct xfs_mount *mp) | ||
| 272 | { | ||
| 273 | return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + | ||
| 274 | mp->m_sb.sb_sectsize + | ||
| 275 | xfs_calc_buf_res(XFS_IALLOC_BLOCKS(mp), XFS_FSB_TO_B(mp, 1)) + | ||
| 276 | xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + | ||
| 277 | xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), | ||
| 278 | XFS_FSB_TO_B(mp, 1)); | ||
| 279 | } | ||
| 280 | |||
| 281 | STATIC uint | ||
| 282 | __xfs_calc_create_reservation( | ||
| 252 | struct xfs_mount *mp) | 283 | struct xfs_mount *mp) |
| 253 | { | 284 | { |
| 254 | return XFS_DQUOT_LOGRES(mp) + | 285 | return XFS_DQUOT_LOGRES(mp) + |
| 255 | MAX((xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) + | 286 | MAX(xfs_calc_create_resv_alloc(mp), |
| 256 | xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) + | 287 | xfs_calc_create_resv_modify(mp)); |
| 257 | xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), | ||
| 258 | XFS_FSB_TO_B(mp, 1)) + | ||
| 259 | xfs_calc_buf_res(1, 1024)), | ||
| 260 | (xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + | ||
| 261 | xfs_calc_buf_res(XFS_IALLOC_BLOCKS(mp), | ||
| 262 | XFS_FSB_TO_B(mp, 1)) + | ||
| 263 | xfs_calc_buf_res(mp->m_in_maxlevels, | ||
| 264 | XFS_FSB_TO_B(mp, 1)) + | ||
| 265 | xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), | ||
| 266 | XFS_FSB_TO_B(mp, 1)))); | ||
| 267 | } | 288 | } |
| 268 | 289 | ||
| 269 | /* | 290 | /* |
| 270 | * For create we can modify: | 291 | * For icreate we can allocate some inodes giving: |
| 271 | * the parent directory inode: inode size | ||
| 272 | * the new inode: inode size | ||
| 273 | * the inode btree entry: block size | ||
| 274 | * the superblock for the nlink flag: sector size | ||
| 275 | * the directory btree: (max depth + v2) * dir block size | ||
| 276 | * the directory inode's bmap btree: (max depth + v2) * block size | ||
| 277 | * Or in the first xact we allocate some inodes giving: | ||
| 278 | * the agi and agf of the ag getting the new inodes: 2 * sectorsize | 292 | * the agi and agf of the ag getting the new inodes: 2 * sectorsize |
| 279 | * the superblock for the nlink flag: sector size | 293 | * the superblock for the nlink flag: sector size |
| 280 | * the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize | ||
| 281 | * the inode btree: max depth * blocksize | 294 | * the inode btree: max depth * blocksize |
| 282 | * the allocation btrees: 2 trees * (max depth - 1) * block size | 295 | * the allocation btrees: 2 trees * (max depth - 1) * block size |
| 283 | */ | 296 | */ |
| 284 | STATIC uint | 297 | STATIC uint |
| 285 | xfs_calc_create_reservation( | 298 | xfs_calc_icreate_resv_alloc( |
| 286 | struct xfs_mount *mp) | 299 | struct xfs_mount *mp) |
| 287 | { | 300 | { |
| 301 | return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + | ||
| 302 | mp->m_sb.sb_sectsize + | ||
| 303 | xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + | ||
| 304 | xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), | ||
| 305 | XFS_FSB_TO_B(mp, 1)); | ||
| 306 | } | ||
| 307 | |||
| 308 | STATIC uint | ||
| 309 | xfs_calc_icreate_reservation(xfs_mount_t *mp) | ||
| 310 | { | ||
| 288 | return XFS_DQUOT_LOGRES(mp) + | 311 | return XFS_DQUOT_LOGRES(mp) + |
| 289 | MAX((xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) + | 312 | MAX(xfs_calc_icreate_resv_alloc(mp), |
| 290 | xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + | 313 | xfs_calc_create_resv_modify(mp)); |
| 291 | (uint)XFS_FSB_TO_B(mp, 1) + | 314 | } |
| 292 | xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), | 315 | |
| 293 | XFS_FSB_TO_B(mp, 1))), | 316 | STATIC uint |
| 294 | (xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + | 317 | xfs_calc_create_reservation( |
| 295 | mp->m_sb.sb_sectsize + | 318 | struct xfs_mount *mp) |
| 296 | xfs_calc_buf_res(XFS_IALLOC_BLOCKS(mp), | 319 | { |
| 297 | XFS_FSB_TO_B(mp, 1)) + | 320 | if (xfs_sb_version_hascrc(&mp->m_sb)) |
| 298 | xfs_calc_buf_res(mp->m_in_maxlevels, | 321 | return xfs_calc_icreate_reservation(mp); |
| 299 | XFS_FSB_TO_B(mp, 1)) + | 322 | return __xfs_calc_create_reservation(mp); |
| 300 | xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), | 323 | |
| 301 | XFS_FSB_TO_B(mp, 1)))); | ||
| 302 | } | 324 | } |
| 303 | 325 | ||
| 304 | /* | 326 | /* |
| @@ -311,6 +333,20 @@ xfs_calc_mkdir_reservation( | |||
| 311 | return xfs_calc_create_reservation(mp); | 333 | return xfs_calc_create_reservation(mp); |
| 312 | } | 334 | } |
| 313 | 335 | ||
| 336 | |||
| 337 | /* | ||
| 338 | * Making a new symplink is the same as creating a new file, but | ||
| 339 | * with the added blocks for remote symlink data which can be up to 1kB in | ||
| 340 | * length (MAXPATHLEN). | ||
| 341 | */ | ||
| 342 | STATIC uint | ||
| 343 | xfs_calc_symlink_reservation( | ||
| 344 | struct xfs_mount *mp) | ||
| 345 | { | ||
| 346 | return xfs_calc_create_reservation(mp) + | ||
| 347 | xfs_calc_buf_res(1, MAXPATHLEN); | ||
| 348 | } | ||
| 349 | |||
| 314 | /* | 350 | /* |
| 315 | * In freeing an inode we can modify: | 351 | * In freeing an inode we can modify: |
| 316 | * the inode being freed: inode size | 352 | * the inode being freed: inode size |
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index a44dba5b2cdb..2b4946393e30 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h | |||
| @@ -48,6 +48,7 @@ typedef struct xfs_trans_header { | |||
| 48 | #define XFS_LI_BUF 0x123c /* v2 bufs, variable sized inode bufs */ | 48 | #define XFS_LI_BUF 0x123c /* v2 bufs, variable sized inode bufs */ |
| 49 | #define XFS_LI_DQUOT 0x123d | 49 | #define XFS_LI_DQUOT 0x123d |
| 50 | #define XFS_LI_QUOTAOFF 0x123e | 50 | #define XFS_LI_QUOTAOFF 0x123e |
| 51 | #define XFS_LI_ICREATE 0x123f | ||
| 51 | 52 | ||
| 52 | #define XFS_LI_TYPE_DESC \ | 53 | #define XFS_LI_TYPE_DESC \ |
| 53 | { XFS_LI_EFI, "XFS_LI_EFI" }, \ | 54 | { XFS_LI_EFI, "XFS_LI_EFI" }, \ |
| @@ -107,7 +108,8 @@ typedef struct xfs_trans_header { | |||
| 107 | #define XFS_TRANS_SWAPEXT 40 | 108 | #define XFS_TRANS_SWAPEXT 40 |
| 108 | #define XFS_TRANS_SB_COUNT 41 | 109 | #define XFS_TRANS_SB_COUNT 41 |
| 109 | #define XFS_TRANS_CHECKPOINT 42 | 110 | #define XFS_TRANS_CHECKPOINT 42 |
| 110 | #define XFS_TRANS_TYPE_MAX 42 | 111 | #define XFS_TRANS_ICREATE 43 |
| 112 | #define XFS_TRANS_TYPE_MAX 43 | ||
| 111 | /* new transaction types need to be reflected in xfs_logprint(8) */ | 113 | /* new transaction types need to be reflected in xfs_logprint(8) */ |
| 112 | 114 | ||
| 113 | #define XFS_TRANS_TYPES \ | 115 | #define XFS_TRANS_TYPES \ |
| @@ -210,23 +212,18 @@ struct xfs_log_item_desc { | |||
| 210 | /* | 212 | /* |
| 211 | * Per-extent log reservation for the allocation btree changes | 213 | * Per-extent log reservation for the allocation btree changes |
| 212 | * involved in freeing or allocating an extent. | 214 | * involved in freeing or allocating an extent. |
| 213 | * 2 trees * (2 blocks/level * max depth - 1) * block size | 215 | * 2 trees * (2 blocks/level * max depth - 1) |
| 214 | */ | 216 | */ |
| 215 | #define XFS_ALLOCFREE_LOG_RES(mp,nx) \ | ||
| 216 | ((nx) * (2 * XFS_FSB_TO_B((mp), 2 * XFS_AG_MAXLEVELS(mp) - 1))) | ||
| 217 | #define XFS_ALLOCFREE_LOG_COUNT(mp,nx) \ | 217 | #define XFS_ALLOCFREE_LOG_COUNT(mp,nx) \ |
| 218 | ((nx) * (2 * (2 * XFS_AG_MAXLEVELS(mp) - 1))) | 218 | ((nx) * (2 * (2 * XFS_AG_MAXLEVELS(mp) - 1))) |
| 219 | 219 | ||
| 220 | /* | 220 | /* |
| 221 | * Per-directory log reservation for any directory change. | 221 | * Per-directory log reservation for any directory change. |
| 222 | * dir blocks: (1 btree block per level + data block + free block) * dblock size | 222 | * dir blocks: (1 btree block per level + data block + free block) |
| 223 | * bmap btree: (levels + 2) * max depth * block size | 223 | * bmap btree: (levels + 2) * max depth |
| 224 | * v2 directory blocks can be fragmented below the dirblksize down to the fsb | 224 | * v2 directory blocks can be fragmented below the dirblksize down to the fsb |
| 225 | * size, so account for that in the DAENTER macros. | 225 | * size, so account for that in the DAENTER macros. |
| 226 | */ | 226 | */ |
| 227 | #define XFS_DIROP_LOG_RES(mp) \ | ||
| 228 | (XFS_FSB_TO_B(mp, XFS_DAENTER_BLOCKS(mp, XFS_DATA_FORK)) + \ | ||
| 229 | (XFS_FSB_TO_B(mp, XFS_DAENTER_BMAPS(mp, XFS_DATA_FORK) + 1))) | ||
| 230 | #define XFS_DIROP_LOG_COUNT(mp) \ | 227 | #define XFS_DIROP_LOG_COUNT(mp) \ |
| 231 | (XFS_DAENTER_BLOCKS(mp, XFS_DATA_FORK) + \ | 228 | (XFS_DAENTER_BLOCKS(mp, XFS_DATA_FORK) + \ |
| 232 | XFS_DAENTER_BMAPS(mp, XFS_DATA_FORK) + 1) | 229 | XFS_DAENTER_BMAPS(mp, XFS_DATA_FORK) + 1) |
| @@ -503,6 +500,7 @@ void xfs_trans_bhold_release(xfs_trans_t *, struct xfs_buf *); | |||
| 503 | void xfs_trans_binval(xfs_trans_t *, struct xfs_buf *); | 500 | void xfs_trans_binval(xfs_trans_t *, struct xfs_buf *); |
| 504 | void xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *); | 501 | void xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *); |
| 505 | void xfs_trans_stale_inode_buf(xfs_trans_t *, struct xfs_buf *); | 502 | void xfs_trans_stale_inode_buf(xfs_trans_t *, struct xfs_buf *); |
| 503 | void xfs_trans_ordered_buf(xfs_trans_t *, struct xfs_buf *); | ||
| 506 | void xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint); | 504 | void xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint); |
| 507 | void xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *); | 505 | void xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *); |
| 508 | void xfs_trans_ichgtime(struct xfs_trans *, struct xfs_inode *, int); | 506 | void xfs_trans_ichgtime(struct xfs_trans *, struct xfs_inode *, int); |
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 73a5fa457e16..aa5a04b844d6 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c | |||
| @@ -397,7 +397,6 @@ shutdown_abort: | |||
| 397 | return XFS_ERROR(EIO); | 397 | return XFS_ERROR(EIO); |
| 398 | } | 398 | } |
| 399 | 399 | ||
| 400 | |||
| 401 | /* | 400 | /* |
| 402 | * Release the buffer bp which was previously acquired with one of the | 401 | * Release the buffer bp which was previously acquired with one of the |
| 403 | * xfs_trans_... buffer allocation routines if the buffer has not | 402 | * xfs_trans_... buffer allocation routines if the buffer has not |
| @@ -603,8 +602,14 @@ xfs_trans_log_buf(xfs_trans_t *tp, | |||
| 603 | 602 | ||
| 604 | tp->t_flags |= XFS_TRANS_DIRTY; | 603 | tp->t_flags |= XFS_TRANS_DIRTY; |
| 605 | bip->bli_item.li_desc->lid_flags |= XFS_LID_DIRTY; | 604 | bip->bli_item.li_desc->lid_flags |= XFS_LID_DIRTY; |
| 606 | bip->bli_flags |= XFS_BLI_LOGGED; | 605 | |
| 607 | xfs_buf_item_log(bip, first, last); | 606 | /* |
| 607 | * If we have an ordered buffer we are not logging any dirty range but | ||
| 608 | * it still needs to be marked dirty and that it has been logged. | ||
| 609 | */ | ||
| 610 | bip->bli_flags |= XFS_BLI_DIRTY | XFS_BLI_LOGGED; | ||
| 611 | if (!(bip->bli_flags & XFS_BLI_ORDERED)) | ||
| 612 | xfs_buf_item_log(bip, first, last); | ||
| 608 | } | 613 | } |
| 609 | 614 | ||
| 610 | 615 | ||
| @@ -757,6 +762,29 @@ xfs_trans_inode_alloc_buf( | |||
| 757 | } | 762 | } |
| 758 | 763 | ||
| 759 | /* | 764 | /* |
| 765 | * Mark the buffer as ordered for this transaction. This means | ||
| 766 | * that the contents of the buffer are not recorded in the transaction | ||
| 767 | * but it is tracked in the AIL as though it was. This allows us | ||
| 768 | * to record logical changes in transactions rather than the physical | ||
| 769 | * changes we make to the buffer without changing writeback ordering | ||
| 770 | * constraints of metadata buffers. | ||
| 771 | */ | ||
| 772 | void | ||
| 773 | xfs_trans_ordered_buf( | ||
| 774 | struct xfs_trans *tp, | ||
| 775 | struct xfs_buf *bp) | ||
| 776 | { | ||
| 777 | struct xfs_buf_log_item *bip = bp->b_fspriv; | ||
| 778 | |||
| 779 | ASSERT(bp->b_transp == tp); | ||
| 780 | ASSERT(bip != NULL); | ||
| 781 | ASSERT(atomic_read(&bip->bli_refcount) > 0); | ||
| 782 | |||
| 783 | bip->bli_flags |= XFS_BLI_ORDERED; | ||
| 784 | trace_xfs_buf_item_ordered(bip); | ||
| 785 | } | ||
| 786 | |||
| 787 | /* | ||
| 760 | * Set the type of the buffer for log recovery so that it can correctly identify | 788 | * Set the type of the buffer for log recovery so that it can correctly identify |
| 761 | * and hence attach the correct buffer ops to the buffer after replay. | 789 | * and hence attach the correct buffer ops to the buffer after replay. |
| 762 | */ | 790 | */ |
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c index fec75d023703..3ba64d540168 100644 --- a/fs/xfs/xfs_trans_dquot.c +++ b/fs/xfs/xfs_trans_dquot.c | |||
| @@ -103,8 +103,6 @@ xfs_trans_dup_dqinfo( | |||
| 103 | return; | 103 | return; |
| 104 | 104 | ||
| 105 | xfs_trans_alloc_dqinfo(ntp); | 105 | xfs_trans_alloc_dqinfo(ntp); |
| 106 | oqa = otp->t_dqinfo->dqa_usrdquots; | ||
| 107 | nqa = ntp->t_dqinfo->dqa_usrdquots; | ||
| 108 | 106 | ||
| 109 | /* | 107 | /* |
| 110 | * Because the quota blk reservation is carried forward, | 108 | * Because the quota blk reservation is carried forward, |
| @@ -113,7 +111,9 @@ xfs_trans_dup_dqinfo( | |||
| 113 | if(otp->t_flags & XFS_TRANS_DQ_DIRTY) | 111 | if(otp->t_flags & XFS_TRANS_DQ_DIRTY) |
| 114 | ntp->t_flags |= XFS_TRANS_DQ_DIRTY; | 112 | ntp->t_flags |= XFS_TRANS_DQ_DIRTY; |
| 115 | 113 | ||
| 116 | for (j = 0; j < 2; j++) { | 114 | for (j = 0; j < XFS_QM_TRANS_DQTYPES; j++) { |
| 115 | oqa = otp->t_dqinfo->dqs[j]; | ||
| 116 | nqa = ntp->t_dqinfo->dqs[j]; | ||
| 117 | for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) { | 117 | for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) { |
| 118 | if (oqa[i].qt_dquot == NULL) | 118 | if (oqa[i].qt_dquot == NULL) |
| 119 | break; | 119 | break; |
| @@ -138,8 +138,6 @@ xfs_trans_dup_dqinfo( | |||
| 138 | oq->qt_ino_res = oq->qt_ino_res_used; | 138 | oq->qt_ino_res = oq->qt_ino_res_used; |
| 139 | 139 | ||
| 140 | } | 140 | } |
| 141 | oqa = otp->t_dqinfo->dqa_grpdquots; | ||
| 142 | nqa = ntp->t_dqinfo->dqa_grpdquots; | ||
| 143 | } | 141 | } |
| 144 | } | 142 | } |
| 145 | 143 | ||
| @@ -157,8 +155,7 @@ xfs_trans_mod_dquot_byino( | |||
| 157 | 155 | ||
| 158 | if (!XFS_IS_QUOTA_RUNNING(mp) || | 156 | if (!XFS_IS_QUOTA_RUNNING(mp) || |
| 159 | !XFS_IS_QUOTA_ON(mp) || | 157 | !XFS_IS_QUOTA_ON(mp) || |
| 160 | ip->i_ino == mp->m_sb.sb_uquotino || | 158 | xfs_is_quota_inode(&mp->m_sb, ip->i_ino)) |
| 161 | ip->i_ino == mp->m_sb.sb_gquotino) | ||
| 162 | return; | 159 | return; |
| 163 | 160 | ||
| 164 | if (tp->t_dqinfo == NULL) | 161 | if (tp->t_dqinfo == NULL) |
| @@ -170,16 +167,18 @@ xfs_trans_mod_dquot_byino( | |||
| 170 | (void) xfs_trans_mod_dquot(tp, ip->i_gdquot, field, delta); | 167 | (void) xfs_trans_mod_dquot(tp, ip->i_gdquot, field, delta); |
| 171 | } | 168 | } |
| 172 | 169 | ||
| 173 | STATIC xfs_dqtrx_t * | 170 | STATIC struct xfs_dqtrx * |
| 174 | xfs_trans_get_dqtrx( | 171 | xfs_trans_get_dqtrx( |
| 175 | xfs_trans_t *tp, | 172 | struct xfs_trans *tp, |
| 176 | xfs_dquot_t *dqp) | 173 | struct xfs_dquot *dqp) |
| 177 | { | 174 | { |
| 178 | int i; | 175 | int i; |
| 179 | xfs_dqtrx_t *qa; | 176 | struct xfs_dqtrx *qa; |
| 180 | 177 | ||
| 181 | qa = XFS_QM_ISUDQ(dqp) ? | 178 | if (XFS_QM_ISUDQ(dqp)) |
| 182 | tp->t_dqinfo->dqa_usrdquots : tp->t_dqinfo->dqa_grpdquots; | 179 | qa = tp->t_dqinfo->dqs[XFS_QM_TRANS_USR]; |
| 180 | else | ||
| 181 | qa = tp->t_dqinfo->dqs[XFS_QM_TRANS_GRP]; | ||
| 183 | 182 | ||
| 184 | for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) { | 183 | for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) { |
| 185 | if (qa[i].qt_dquot == NULL || | 184 | if (qa[i].qt_dquot == NULL || |
| @@ -339,12 +338,10 @@ xfs_trans_apply_dquot_deltas( | |||
| 339 | return; | 338 | return; |
| 340 | 339 | ||
| 341 | ASSERT(tp->t_dqinfo); | 340 | ASSERT(tp->t_dqinfo); |
| 342 | qa = tp->t_dqinfo->dqa_usrdquots; | 341 | for (j = 0; j < XFS_QM_TRANS_DQTYPES; j++) { |
| 343 | for (j = 0; j < 2; j++) { | 342 | qa = tp->t_dqinfo->dqs[j]; |
| 344 | if (qa[0].qt_dquot == NULL) { | 343 | if (qa[0].qt_dquot == NULL) |
| 345 | qa = tp->t_dqinfo->dqa_grpdquots; | ||
| 346 | continue; | 344 | continue; |
| 347 | } | ||
| 348 | 345 | ||
| 349 | /* | 346 | /* |
| 350 | * Lock all of the dquots and join them to the transaction. | 347 | * Lock all of the dquots and join them to the transaction. |
| @@ -495,10 +492,6 @@ xfs_trans_apply_dquot_deltas( | |||
| 495 | ASSERT(dqp->q_res_rtbcount >= | 492 | ASSERT(dqp->q_res_rtbcount >= |
| 496 | be64_to_cpu(dqp->q_core.d_rtbcount)); | 493 | be64_to_cpu(dqp->q_core.d_rtbcount)); |
| 497 | } | 494 | } |
| 498 | /* | ||
| 499 | * Do the group quotas next | ||
| 500 | */ | ||
| 501 | qa = tp->t_dqinfo->dqa_grpdquots; | ||
| 502 | } | 495 | } |
| 503 | } | 496 | } |
| 504 | 497 | ||
| @@ -521,9 +514,9 @@ xfs_trans_unreserve_and_mod_dquots( | |||
| 521 | if (!tp->t_dqinfo || !(tp->t_flags & XFS_TRANS_DQ_DIRTY)) | 514 | if (!tp->t_dqinfo || !(tp->t_flags & XFS_TRANS_DQ_DIRTY)) |
| 522 | return; | 515 | return; |
| 523 | 516 | ||
| 524 | qa = tp->t_dqinfo->dqa_usrdquots; | 517 | for (j = 0; j < XFS_QM_TRANS_DQTYPES; j++) { |
| 518 | qa = tp->t_dqinfo->dqs[j]; | ||
| 525 | 519 | ||
| 526 | for (j = 0; j < 2; j++) { | ||
| 527 | for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) { | 520 | for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) { |
| 528 | qtrx = &qa[i]; | 521 | qtrx = &qa[i]; |
| 529 | /* | 522 | /* |
| @@ -565,7 +558,6 @@ xfs_trans_unreserve_and_mod_dquots( | |||
| 565 | xfs_dqunlock(dqp); | 558 | xfs_dqunlock(dqp); |
| 566 | 559 | ||
| 567 | } | 560 | } |
| 568 | qa = tp->t_dqinfo->dqa_grpdquots; | ||
| 569 | } | 561 | } |
| 570 | } | 562 | } |
| 571 | 563 | ||
| @@ -640,8 +632,8 @@ xfs_trans_dqresv( | |||
| 640 | if ((flags & XFS_QMOPT_FORCE_RES) == 0 && | 632 | if ((flags & XFS_QMOPT_FORCE_RES) == 0 && |
| 641 | dqp->q_core.d_id && | 633 | dqp->q_core.d_id && |
| 642 | ((XFS_IS_UQUOTA_ENFORCED(dqp->q_mount) && XFS_QM_ISUDQ(dqp)) || | 634 | ((XFS_IS_UQUOTA_ENFORCED(dqp->q_mount) && XFS_QM_ISUDQ(dqp)) || |
| 643 | (XFS_IS_OQUOTA_ENFORCED(dqp->q_mount) && | 635 | (XFS_IS_GQUOTA_ENFORCED(dqp->q_mount) && XFS_QM_ISGDQ(dqp)) || |
| 644 | (XFS_QM_ISPDQ(dqp) || XFS_QM_ISGDQ(dqp))))) { | 636 | (XFS_IS_PQUOTA_ENFORCED(dqp->q_mount) && XFS_QM_ISPDQ(dqp)))) { |
| 645 | if (nblks > 0) { | 637 | if (nblks > 0) { |
| 646 | /* | 638 | /* |
| 647 | * dquot is locked already. See if we'd go over the | 639 | * dquot is locked already. See if we'd go over the |
| @@ -748,15 +740,15 @@ error_return: | |||
| 748 | */ | 740 | */ |
| 749 | int | 741 | int |
| 750 | xfs_trans_reserve_quota_bydquots( | 742 | xfs_trans_reserve_quota_bydquots( |
| 751 | xfs_trans_t *tp, | 743 | struct xfs_trans *tp, |
| 752 | xfs_mount_t *mp, | 744 | struct xfs_mount *mp, |
| 753 | xfs_dquot_t *udqp, | 745 | struct xfs_dquot *udqp, |
| 754 | xfs_dquot_t *gdqp, | 746 | struct xfs_dquot *gdqp, |
| 755 | long nblks, | 747 | long nblks, |
| 756 | long ninos, | 748 | long ninos, |
| 757 | uint flags) | 749 | uint flags) |
| 758 | { | 750 | { |
| 759 | int resvd = 0, error; | 751 | int error; |
| 760 | 752 | ||
| 761 | if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) | 753 | if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) |
| 762 | return 0; | 754 | return 0; |
| @@ -771,28 +763,24 @@ xfs_trans_reserve_quota_bydquots( | |||
| 771 | (flags & ~XFS_QMOPT_ENOSPC)); | 763 | (flags & ~XFS_QMOPT_ENOSPC)); |
| 772 | if (error) | 764 | if (error) |
| 773 | return error; | 765 | return error; |
| 774 | resvd = 1; | ||
| 775 | } | 766 | } |
| 776 | 767 | ||
| 777 | if (gdqp) { | 768 | if (gdqp) { |
| 778 | error = xfs_trans_dqresv(tp, mp, gdqp, nblks, ninos, flags); | 769 | error = xfs_trans_dqresv(tp, mp, gdqp, nblks, ninos, flags); |
| 779 | if (error) { | 770 | if (error) |
| 780 | /* | 771 | goto unwind_usr; |
| 781 | * can't do it, so backout previous reservation | ||
| 782 | */ | ||
| 783 | if (resvd) { | ||
| 784 | flags |= XFS_QMOPT_FORCE_RES; | ||
| 785 | xfs_trans_dqresv(tp, mp, udqp, | ||
| 786 | -nblks, -ninos, flags); | ||
| 787 | } | ||
| 788 | return error; | ||
| 789 | } | ||
| 790 | } | 772 | } |
| 791 | 773 | ||
| 792 | /* | 774 | /* |
| 793 | * Didn't change anything critical, so, no need to log | 775 | * Didn't change anything critical, so, no need to log |
| 794 | */ | 776 | */ |
| 795 | return 0; | 777 | return 0; |
| 778 | |||
| 779 | unwind_usr: | ||
| 780 | flags |= XFS_QMOPT_FORCE_RES; | ||
| 781 | if (udqp) | ||
| 782 | xfs_trans_dqresv(tp, mp, udqp, -nblks, -ninos, flags); | ||
| 783 | return error; | ||
| 796 | } | 784 | } |
| 797 | 785 | ||
| 798 | 786 | ||
| @@ -816,8 +804,7 @@ xfs_trans_reserve_quota_nblks( | |||
| 816 | if (XFS_IS_PQUOTA_ON(mp)) | 804 | if (XFS_IS_PQUOTA_ON(mp)) |
| 817 | flags |= XFS_QMOPT_ENOSPC; | 805 | flags |= XFS_QMOPT_ENOSPC; |
| 818 | 806 | ||
| 819 | ASSERT(ip->i_ino != mp->m_sb.sb_uquotino); | 807 | ASSERT(!xfs_is_quota_inode(&mp->m_sb, ip->i_ino)); |
| 820 | ASSERT(ip->i_ino != mp->m_sb.sb_gquotino); | ||
| 821 | 808 | ||
| 822 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | 809 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); |
| 823 | ASSERT((flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) == | 810 | ASSERT((flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) == |
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c index ac6d567704db..53dfe46f3680 100644 --- a/fs/xfs/xfs_trans_inode.c +++ b/fs/xfs/xfs_trans_inode.c | |||
| @@ -112,6 +112,17 @@ xfs_trans_log_inode( | |||
| 112 | ASSERT(ip->i_itemp != NULL); | 112 | ASSERT(ip->i_itemp != NULL); |
| 113 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | 113 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); |
| 114 | 114 | ||
| 115 | /* | ||
| 116 | * First time we log the inode in a transaction, bump the inode change | ||
| 117 | * counter if it is configured for this to occur. | ||
| 118 | */ | ||
| 119 | if (!(ip->i_itemp->ili_item.li_desc->lid_flags & XFS_LID_DIRTY) && | ||
| 120 | IS_I_VERSION(VFS_I(ip))) { | ||
| 121 | inode_inc_iversion(VFS_I(ip)); | ||
| 122 | ip->i_d.di_changecount = VFS_I(ip)->i_version; | ||
| 123 | flags |= XFS_ILOG_CORE; | ||
| 124 | } | ||
| 125 | |||
| 115 | tp->t_flags |= XFS_TRANS_DIRTY; | 126 | tp->t_flags |= XFS_TRANS_DIRTY; |
| 116 | ip->i_itemp->ili_item.li_desc->lid_flags |= XFS_LID_DIRTY; | 127 | ip->i_itemp->ili_item.li_desc->lid_flags |= XFS_LID_DIRTY; |
| 117 | 128 | ||
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 0176bb21f09a..42c0ef288aeb 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c | |||
| @@ -322,18 +322,9 @@ xfs_inactive( | |||
| 322 | xfs_trans_ijoin(tp, ip, 0); | 322 | xfs_trans_ijoin(tp, ip, 0); |
| 323 | 323 | ||
| 324 | if (S_ISLNK(ip->i_d.di_mode)) { | 324 | if (S_ISLNK(ip->i_d.di_mode)) { |
| 325 | /* | 325 | error = xfs_inactive_symlink(ip, &tp); |
| 326 | * Zero length symlinks _can_ exist. | 326 | if (error) |
| 327 | */ | 327 | goto out_cancel; |
| 328 | if (ip->i_d.di_size > XFS_IFORK_DSIZE(ip)) { | ||
| 329 | error = xfs_inactive_symlink_rmt(ip, &tp); | ||
| 330 | if (error) | ||
| 331 | goto out_cancel; | ||
| 332 | } else if (ip->i_df.if_bytes > 0) { | ||
| 333 | xfs_idata_realloc(ip, -(ip->i_df.if_bytes), | ||
| 334 | XFS_DATA_FORK); | ||
| 335 | ASSERT(ip->i_df.if_bytes == 0); | ||
| 336 | } | ||
| 337 | } else if (truncate) { | 328 | } else if (truncate) { |
| 338 | ip->i_d.di_size = 0; | 329 | ip->i_d.di_size = 0; |
| 339 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | 330 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); |
