diff options
author | David Woodhouse <David.Woodhouse@intel.com> | 2012-10-09 10:03:21 -0400 |
---|---|---|
committer | David Woodhouse <David.Woodhouse@intel.com> | 2012-10-09 10:04:25 -0400 |
commit | ffe315012510165ce82e4dd4767f0a5dba9edbf7 (patch) | |
tree | f601cd980af9d0ced5ca9aedecef4fa0d2ca0e15 /fs | |
parent | e2d3a35ee427aaba99b6c68a56609ce276c51270 (diff) | |
parent | 4a8e43feeac7996b8de2d5b2823e316917493df4 (diff) |
Merge tag 'disintegrate-mtd-20121009' of git://git.infradead.org/users/dhowells/linux-headers
UAPI Disintegration 2012-10-09
Conflicts:
MAINTAINERS
arch/arm/configs/bcmring_defconfig
arch/arm/mach-imx/clk-imx51-imx53.c
drivers/mtd/nand/Kconfig
drivers/mtd/nand/bcm_umi_nand.c
drivers/mtd/nand/nand_bcm_umi.h
drivers/mtd/nand/orion_nand.c
Diffstat (limited to 'fs')
401 files changed, 14234 insertions, 7965 deletions
diff --git a/fs/9p/acl.c b/fs/9p/acl.c index 9a1d42630751..15b679166201 100644 --- a/fs/9p/acl.c +++ b/fs/9p/acl.c | |||
@@ -37,7 +37,7 @@ static struct posix_acl *__v9fs_get_acl(struct p9_fid *fid, char *name) | |||
37 | return ERR_PTR(-ENOMEM); | 37 | return ERR_PTR(-ENOMEM); |
38 | size = v9fs_fid_xattr_get(fid, name, value, size); | 38 | size = v9fs_fid_xattr_get(fid, name, value, size); |
39 | if (size > 0) { | 39 | if (size > 0) { |
40 | acl = posix_acl_from_xattr(value, size); | 40 | acl = posix_acl_from_xattr(&init_user_ns, value, size); |
41 | if (IS_ERR(acl)) | 41 | if (IS_ERR(acl)) |
42 | goto err_out; | 42 | goto err_out; |
43 | } | 43 | } |
@@ -131,7 +131,7 @@ static int v9fs_set_acl(struct dentry *dentry, int type, struct posix_acl *acl) | |||
131 | buffer = kmalloc(size, GFP_KERNEL); | 131 | buffer = kmalloc(size, GFP_KERNEL); |
132 | if (!buffer) | 132 | if (!buffer) |
133 | return -ENOMEM; | 133 | return -ENOMEM; |
134 | retval = posix_acl_to_xattr(acl, buffer, size); | 134 | retval = posix_acl_to_xattr(&init_user_ns, acl, buffer, size); |
135 | if (retval < 0) | 135 | if (retval < 0) |
136 | goto err_free_out; | 136 | goto err_free_out; |
137 | switch (type) { | 137 | switch (type) { |
@@ -251,7 +251,7 @@ static int v9fs_xattr_get_acl(struct dentry *dentry, const char *name, | |||
251 | return PTR_ERR(acl); | 251 | return PTR_ERR(acl); |
252 | if (acl == NULL) | 252 | if (acl == NULL) |
253 | return -ENODATA; | 253 | return -ENODATA; |
254 | error = posix_acl_to_xattr(acl, buffer, size); | 254 | error = posix_acl_to_xattr(&init_user_ns, acl, buffer, size); |
255 | posix_acl_release(acl); | 255 | posix_acl_release(acl); |
256 | 256 | ||
257 | return error; | 257 | return error; |
@@ -304,7 +304,7 @@ static int v9fs_xattr_set_acl(struct dentry *dentry, const char *name, | |||
304 | return -EPERM; | 304 | return -EPERM; |
305 | if (value) { | 305 | if (value) { |
306 | /* update the cached acl value */ | 306 | /* update the cached acl value */ |
307 | acl = posix_acl_from_xattr(value, size); | 307 | acl = posix_acl_from_xattr(&init_user_ns, value, size); |
308 | if (IS_ERR(acl)) | 308 | if (IS_ERR(acl)) |
309 | return PTR_ERR(acl); | 309 | return PTR_ERR(acl); |
310 | else if (acl) { | 310 | else if (acl) { |
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index b85efa773949..392c5dac1981 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c | |||
@@ -560,6 +560,11 @@ static int v9fs_init_inode_cache(void) | |||
560 | */ | 560 | */ |
561 | static void v9fs_destroy_inode_cache(void) | 561 | static void v9fs_destroy_inode_cache(void) |
562 | { | 562 | { |
563 | /* | ||
564 | * Make sure all delayed rcu free inodes are flushed before we | ||
565 | * destroy cache. | ||
566 | */ | ||
567 | rcu_barrier(); | ||
563 | kmem_cache_destroy(v9fs_inode_cache); | 568 | kmem_cache_destroy(v9fs_inode_cache); |
564 | } | 569 | } |
565 | 570 | ||
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index dd6f7ee1e312..c2483e97beee 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c | |||
@@ -738,6 +738,7 @@ v9fs_cached_file_write(struct file *filp, const char __user * data, | |||
738 | static const struct vm_operations_struct v9fs_file_vm_ops = { | 738 | static const struct vm_operations_struct v9fs_file_vm_ops = { |
739 | .fault = filemap_fault, | 739 | .fault = filemap_fault, |
740 | .page_mkwrite = v9fs_vm_page_mkwrite, | 740 | .page_mkwrite = v9fs_vm_page_mkwrite, |
741 | .remap_pages = generic_file_remap_pages, | ||
741 | }; | 742 | }; |
742 | 743 | ||
743 | 744 | ||
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index 022574202749..0efd1524b977 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt | |||
@@ -164,3 +164,11 @@ config BINFMT_MISC | |||
164 | You may say M here for module support and later load the module when | 164 | You may say M here for module support and later load the module when |
165 | you have use for it; the module is called binfmt_misc. If you | 165 | you have use for it; the module is called binfmt_misc. If you |
166 | don't know what to answer at this point, say Y. | 166 | don't know what to answer at this point, say Y. |
167 | |||
168 | config COREDUMP | ||
169 | bool "Enable core dump support" if EXPERT | ||
170 | default y | ||
171 | help | ||
172 | This option enables support for performing core dumps. You almost | ||
173 | certainly want to say Y here. Not necessary on systems that never | ||
174 | need debugging or only ever run flawless code. | ||
diff --git a/fs/Makefile b/fs/Makefile index 2fb977934673..1d7af79288a0 100644 --- a/fs/Makefile +++ b/fs/Makefile | |||
@@ -48,6 +48,7 @@ obj-$(CONFIG_FS_MBCACHE) += mbcache.o | |||
48 | obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o xattr_acl.o | 48 | obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o xattr_acl.o |
49 | obj-$(CONFIG_NFS_COMMON) += nfs_common/ | 49 | obj-$(CONFIG_NFS_COMMON) += nfs_common/ |
50 | obj-$(CONFIG_GENERIC_ACL) += generic_acl.o | 50 | obj-$(CONFIG_GENERIC_ACL) += generic_acl.o |
51 | obj-$(CONFIG_COREDUMP) += coredump.o | ||
51 | 52 | ||
52 | obj-$(CONFIG_FHANDLE) += fhandle.o | 53 | obj-$(CONFIG_FHANDLE) += fhandle.o |
53 | 54 | ||
diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h index 718ac1f440c6..585adafb0cc2 100644 --- a/fs/adfs/adfs.h +++ b/fs/adfs/adfs.h | |||
@@ -46,8 +46,8 @@ struct adfs_sb_info { | |||
46 | struct adfs_discmap *s_map; /* bh list containing map */ | 46 | struct adfs_discmap *s_map; /* bh list containing map */ |
47 | struct adfs_dir_ops *s_dir; /* directory operations */ | 47 | struct adfs_dir_ops *s_dir; /* directory operations */ |
48 | 48 | ||
49 | uid_t s_uid; /* owner uid */ | 49 | kuid_t s_uid; /* owner uid */ |
50 | gid_t s_gid; /* owner gid */ | 50 | kgid_t s_gid; /* owner gid */ |
51 | umode_t s_owner_mask; /* ADFS owner perm -> unix perm */ | 51 | umode_t s_owner_mask; /* ADFS owner perm -> unix perm */ |
52 | umode_t s_other_mask; /* ADFS other perm -> unix perm */ | 52 | umode_t s_other_mask; /* ADFS other perm -> unix perm */ |
53 | int s_ftsuffix; /* ,xyz hex filetype suffix option */ | 53 | int s_ftsuffix; /* ,xyz hex filetype suffix option */ |
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c index 1dab6a174d6a..e9bad5093a3f 100644 --- a/fs/adfs/inode.c +++ b/fs/adfs/inode.c | |||
@@ -304,8 +304,8 @@ adfs_notify_change(struct dentry *dentry, struct iattr *attr) | |||
304 | * we can't change the UID or GID of any file - | 304 | * we can't change the UID or GID of any file - |
305 | * we have a global UID/GID in the superblock | 305 | * we have a global UID/GID in the superblock |
306 | */ | 306 | */ |
307 | if ((ia_valid & ATTR_UID && attr->ia_uid != ADFS_SB(sb)->s_uid) || | 307 | if ((ia_valid & ATTR_UID && !uid_eq(attr->ia_uid, ADFS_SB(sb)->s_uid)) || |
308 | (ia_valid & ATTR_GID && attr->ia_gid != ADFS_SB(sb)->s_gid)) | 308 | (ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, ADFS_SB(sb)->s_gid))) |
309 | error = -EPERM; | 309 | error = -EPERM; |
310 | 310 | ||
311 | if (error) | 311 | if (error) |
diff --git a/fs/adfs/super.c b/fs/adfs/super.c index bdaec92353c2..d57122935793 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/seq_file.h> | 15 | #include <linux/seq_file.h> |
16 | #include <linux/slab.h> | 16 | #include <linux/slab.h> |
17 | #include <linux/statfs.h> | 17 | #include <linux/statfs.h> |
18 | #include <linux/user_namespace.h> | ||
18 | #include "adfs.h" | 19 | #include "adfs.h" |
19 | #include "dir_f.h" | 20 | #include "dir_f.h" |
20 | #include "dir_fplus.h" | 21 | #include "dir_fplus.h" |
@@ -130,10 +131,10 @@ static int adfs_show_options(struct seq_file *seq, struct dentry *root) | |||
130 | { | 131 | { |
131 | struct adfs_sb_info *asb = ADFS_SB(root->d_sb); | 132 | struct adfs_sb_info *asb = ADFS_SB(root->d_sb); |
132 | 133 | ||
133 | if (asb->s_uid != 0) | 134 | if (!uid_eq(asb->s_uid, GLOBAL_ROOT_UID)) |
134 | seq_printf(seq, ",uid=%u", asb->s_uid); | 135 | seq_printf(seq, ",uid=%u", from_kuid_munged(&init_user_ns, asb->s_uid)); |
135 | if (asb->s_gid != 0) | 136 | if (!gid_eq(asb->s_gid, GLOBAL_ROOT_GID)) |
136 | seq_printf(seq, ",gid=%u", asb->s_gid); | 137 | seq_printf(seq, ",gid=%u", from_kgid_munged(&init_user_ns, asb->s_gid)); |
137 | if (asb->s_owner_mask != ADFS_DEFAULT_OWNER_MASK) | 138 | if (asb->s_owner_mask != ADFS_DEFAULT_OWNER_MASK) |
138 | seq_printf(seq, ",ownmask=%o", asb->s_owner_mask); | 139 | seq_printf(seq, ",ownmask=%o", asb->s_owner_mask); |
139 | if (asb->s_other_mask != ADFS_DEFAULT_OTHER_MASK) | 140 | if (asb->s_other_mask != ADFS_DEFAULT_OTHER_MASK) |
@@ -175,12 +176,16 @@ static int parse_options(struct super_block *sb, char *options) | |||
175 | case Opt_uid: | 176 | case Opt_uid: |
176 | if (match_int(args, &option)) | 177 | if (match_int(args, &option)) |
177 | return -EINVAL; | 178 | return -EINVAL; |
178 | asb->s_uid = option; | 179 | asb->s_uid = make_kuid(current_user_ns(), option); |
180 | if (!uid_valid(asb->s_uid)) | ||
181 | return -EINVAL; | ||
179 | break; | 182 | break; |
180 | case Opt_gid: | 183 | case Opt_gid: |
181 | if (match_int(args, &option)) | 184 | if (match_int(args, &option)) |
182 | return -EINVAL; | 185 | return -EINVAL; |
183 | asb->s_gid = option; | 186 | asb->s_gid = make_kgid(current_user_ns(), option); |
187 | if (!gid_valid(asb->s_gid)) | ||
188 | return -EINVAL; | ||
184 | break; | 189 | break; |
185 | case Opt_ownmask: | 190 | case Opt_ownmask: |
186 | if (match_octal(args, &option)) | 191 | if (match_octal(args, &option)) |
@@ -275,6 +280,11 @@ static int init_inodecache(void) | |||
275 | 280 | ||
276 | static void destroy_inodecache(void) | 281 | static void destroy_inodecache(void) |
277 | { | 282 | { |
283 | /* | ||
284 | * Make sure all delayed rcu free inodes are flushed before we | ||
285 | * destroy cache. | ||
286 | */ | ||
287 | rcu_barrier(); | ||
278 | kmem_cache_destroy(adfs_inode_cachep); | 288 | kmem_cache_destroy(adfs_inode_cachep); |
279 | } | 289 | } |
280 | 290 | ||
@@ -369,8 +379,8 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent) | |||
369 | sb->s_fs_info = asb; | 379 | sb->s_fs_info = asb; |
370 | 380 | ||
371 | /* set default options */ | 381 | /* set default options */ |
372 | asb->s_uid = 0; | 382 | asb->s_uid = GLOBAL_ROOT_UID; |
373 | asb->s_gid = 0; | 383 | asb->s_gid = GLOBAL_ROOT_GID; |
374 | asb->s_owner_mask = ADFS_DEFAULT_OWNER_MASK; | 384 | asb->s_owner_mask = ADFS_DEFAULT_OWNER_MASK; |
375 | asb->s_other_mask = ADFS_DEFAULT_OTHER_MASK; | 385 | asb->s_other_mask = ADFS_DEFAULT_OTHER_MASK; |
376 | asb->s_ftsuffix = 0; | 386 | asb->s_ftsuffix = 0; |
diff --git a/fs/affs/affs.h b/fs/affs/affs.h index 6e216419f340..3952121f2f28 100644 --- a/fs/affs/affs.h +++ b/fs/affs/affs.h | |||
@@ -88,8 +88,8 @@ struct affs_sb_info { | |||
88 | u32 s_root_block; /* FFS root block number. */ | 88 | u32 s_root_block; /* FFS root block number. */ |
89 | int s_hashsize; /* Size of hash table. */ | 89 | int s_hashsize; /* Size of hash table. */ |
90 | unsigned long s_flags; /* See below. */ | 90 | unsigned long s_flags; /* See below. */ |
91 | uid_t s_uid; /* uid to override */ | 91 | kuid_t s_uid; /* uid to override */ |
92 | gid_t s_gid; /* gid to override */ | 92 | kgid_t s_gid; /* gid to override */ |
93 | umode_t s_mode; /* mode to override */ | 93 | umode_t s_mode; /* mode to override */ |
94 | struct buffer_head *s_root_bh; /* Cached root block. */ | 94 | struct buffer_head *s_root_bh; /* Cached root block. */ |
95 | struct mutex s_bmlock; /* Protects bitmap access. */ | 95 | struct mutex s_bmlock; /* Protects bitmap access. */ |
diff --git a/fs/affs/inode.c b/fs/affs/inode.c index 8bc4a59f4e7e..15c484268229 100644 --- a/fs/affs/inode.c +++ b/fs/affs/inode.c | |||
@@ -80,17 +80,17 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino) | |||
80 | if (id == 0 || sbi->s_flags & SF_SETUID) | 80 | if (id == 0 || sbi->s_flags & SF_SETUID) |
81 | inode->i_uid = sbi->s_uid; | 81 | inode->i_uid = sbi->s_uid; |
82 | else if (id == 0xFFFF && sbi->s_flags & SF_MUFS) | 82 | else if (id == 0xFFFF && sbi->s_flags & SF_MUFS) |
83 | inode->i_uid = 0; | 83 | i_uid_write(inode, 0); |
84 | else | 84 | else |
85 | inode->i_uid = id; | 85 | i_uid_write(inode, id); |
86 | 86 | ||
87 | id = be16_to_cpu(tail->gid); | 87 | id = be16_to_cpu(tail->gid); |
88 | if (id == 0 || sbi->s_flags & SF_SETGID) | 88 | if (id == 0 || sbi->s_flags & SF_SETGID) |
89 | inode->i_gid = sbi->s_gid; | 89 | inode->i_gid = sbi->s_gid; |
90 | else if (id == 0xFFFF && sbi->s_flags & SF_MUFS) | 90 | else if (id == 0xFFFF && sbi->s_flags & SF_MUFS) |
91 | inode->i_gid = 0; | 91 | i_gid_write(inode, 0); |
92 | else | 92 | else |
93 | inode->i_gid = id; | 93 | i_gid_write(inode, id); |
94 | 94 | ||
95 | switch (be32_to_cpu(tail->stype)) { | 95 | switch (be32_to_cpu(tail->stype)) { |
96 | case ST_ROOT: | 96 | case ST_ROOT: |
@@ -193,13 +193,13 @@ affs_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
193 | tail->size = cpu_to_be32(inode->i_size); | 193 | tail->size = cpu_to_be32(inode->i_size); |
194 | secs_to_datestamp(inode->i_mtime.tv_sec,&tail->change); | 194 | secs_to_datestamp(inode->i_mtime.tv_sec,&tail->change); |
195 | if (!(inode->i_ino == AFFS_SB(sb)->s_root_block)) { | 195 | if (!(inode->i_ino == AFFS_SB(sb)->s_root_block)) { |
196 | uid = inode->i_uid; | 196 | uid = i_uid_read(inode); |
197 | gid = inode->i_gid; | 197 | gid = i_gid_read(inode); |
198 | if (AFFS_SB(sb)->s_flags & SF_MUFS) { | 198 | if (AFFS_SB(sb)->s_flags & SF_MUFS) { |
199 | if (inode->i_uid == 0 || inode->i_uid == 0xFFFF) | 199 | if (uid == 0 || uid == 0xFFFF) |
200 | uid = inode->i_uid ^ ~0; | 200 | uid = uid ^ ~0; |
201 | if (inode->i_gid == 0 || inode->i_gid == 0xFFFF) | 201 | if (gid == 0 || gid == 0xFFFF) |
202 | gid = inode->i_gid ^ ~0; | 202 | gid = gid ^ ~0; |
203 | } | 203 | } |
204 | if (!(AFFS_SB(sb)->s_flags & SF_SETUID)) | 204 | if (!(AFFS_SB(sb)->s_flags & SF_SETUID)) |
205 | tail->uid = cpu_to_be16(uid); | 205 | tail->uid = cpu_to_be16(uid); |
diff --git a/fs/affs/super.c b/fs/affs/super.c index c70f1e5fc024..b84dc7352502 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c | |||
@@ -147,6 +147,11 @@ static int init_inodecache(void) | |||
147 | 147 | ||
148 | static void destroy_inodecache(void) | 148 | static void destroy_inodecache(void) |
149 | { | 149 | { |
150 | /* | ||
151 | * Make sure all delayed rcu free inodes are flushed before we | ||
152 | * destroy cache. | ||
153 | */ | ||
154 | rcu_barrier(); | ||
150 | kmem_cache_destroy(affs_inode_cachep); | 155 | kmem_cache_destroy(affs_inode_cachep); |
151 | } | 156 | } |
152 | 157 | ||
@@ -188,7 +193,7 @@ static const match_table_t tokens = { | |||
188 | }; | 193 | }; |
189 | 194 | ||
190 | static int | 195 | static int |
191 | parse_options(char *options, uid_t *uid, gid_t *gid, int *mode, int *reserved, s32 *root, | 196 | parse_options(char *options, kuid_t *uid, kgid_t *gid, int *mode, int *reserved, s32 *root, |
192 | int *blocksize, char **prefix, char *volume, unsigned long *mount_opts) | 197 | int *blocksize, char **prefix, char *volume, unsigned long *mount_opts) |
193 | { | 198 | { |
194 | char *p; | 199 | char *p; |
@@ -253,13 +258,17 @@ parse_options(char *options, uid_t *uid, gid_t *gid, int *mode, int *reserved, s | |||
253 | case Opt_setgid: | 258 | case Opt_setgid: |
254 | if (match_int(&args[0], &option)) | 259 | if (match_int(&args[0], &option)) |
255 | return 0; | 260 | return 0; |
256 | *gid = option; | 261 | *gid = make_kgid(current_user_ns(), option); |
262 | if (!gid_valid(*gid)) | ||
263 | return 0; | ||
257 | *mount_opts |= SF_SETGID; | 264 | *mount_opts |= SF_SETGID; |
258 | break; | 265 | break; |
259 | case Opt_setuid: | 266 | case Opt_setuid: |
260 | if (match_int(&args[0], &option)) | 267 | if (match_int(&args[0], &option)) |
261 | return 0; | 268 | return 0; |
262 | *uid = option; | 269 | *uid = make_kuid(current_user_ns(), option); |
270 | if (!uid_valid(*uid)) | ||
271 | return 0; | ||
263 | *mount_opts |= SF_SETUID; | 272 | *mount_opts |= SF_SETUID; |
264 | break; | 273 | break; |
265 | case Opt_verbose: | 274 | case Opt_verbose: |
@@ -301,8 +310,8 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent) | |||
301 | int num_bm; | 310 | int num_bm; |
302 | int i, j; | 311 | int i, j; |
303 | s32 key; | 312 | s32 key; |
304 | uid_t uid; | 313 | kuid_t uid; |
305 | gid_t gid; | 314 | kgid_t gid; |
306 | int reserved; | 315 | int reserved; |
307 | unsigned long mount_flags; | 316 | unsigned long mount_flags; |
308 | int tmp_flags; /* fix remount prototype... */ | 317 | int tmp_flags; /* fix remount prototype... */ |
@@ -527,8 +536,8 @@ affs_remount(struct super_block *sb, int *flags, char *data) | |||
527 | { | 536 | { |
528 | struct affs_sb_info *sbi = AFFS_SB(sb); | 537 | struct affs_sb_info *sbi = AFFS_SB(sb); |
529 | int blocksize; | 538 | int blocksize; |
530 | uid_t uid; | 539 | kuid_t uid; |
531 | gid_t gid; | 540 | kgid_t gid; |
532 | int mode; | 541 | int mode; |
533 | int reserved; | 542 | int reserved; |
534 | int root_block; | 543 | int root_block; |
@@ -551,7 +560,7 @@ affs_remount(struct super_block *sb, int *flags, char *data) | |||
551 | return -EINVAL; | 560 | return -EINVAL; |
552 | } | 561 | } |
553 | 562 | ||
554 | flush_delayed_work_sync(&sbi->sb_work); | 563 | flush_delayed_work(&sbi->sb_work); |
555 | replace_mount_options(sb, new_opts); | 564 | replace_mount_options(sb, new_opts); |
556 | 565 | ||
557 | sbi->s_flags = mount_flags; | 566 | sbi->s_flags = mount_flags; |
diff --git a/fs/afs/callback.c b/fs/afs/callback.c index 587ef5123cd8..7ef637d7f3a5 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c | |||
@@ -351,9 +351,7 @@ void afs_dispatch_give_up_callbacks(struct work_struct *work) | |||
351 | */ | 351 | */ |
352 | void afs_flush_callback_breaks(struct afs_server *server) | 352 | void afs_flush_callback_breaks(struct afs_server *server) |
353 | { | 353 | { |
354 | cancel_delayed_work(&server->cb_break_work); | 354 | mod_delayed_work(afs_callback_update_worker, &server->cb_break_work, 0); |
355 | queue_delayed_work(afs_callback_update_worker, | ||
356 | &server->cb_break_work, 0); | ||
357 | } | 355 | } |
358 | 356 | ||
359 | #if 0 | 357 | #if 0 |
diff --git a/fs/afs/server.c b/fs/afs/server.c index d59b7516e943..f342acf3547d 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c | |||
@@ -285,12 +285,7 @@ static void afs_reap_server(struct work_struct *work) | |||
285 | expiry = server->time_of_death + afs_server_timeout; | 285 | expiry = server->time_of_death + afs_server_timeout; |
286 | if (expiry > now) { | 286 | if (expiry > now) { |
287 | delay = (expiry - now) * HZ; | 287 | delay = (expiry - now) * HZ; |
288 | if (!queue_delayed_work(afs_wq, &afs_server_reaper, | 288 | mod_delayed_work(afs_wq, &afs_server_reaper, delay); |
289 | delay)) { | ||
290 | cancel_delayed_work(&afs_server_reaper); | ||
291 | queue_delayed_work(afs_wq, &afs_server_reaper, | ||
292 | delay); | ||
293 | } | ||
294 | break; | 289 | break; |
295 | } | 290 | } |
296 | 291 | ||
@@ -323,6 +318,5 @@ static void afs_reap_server(struct work_struct *work) | |||
323 | void __exit afs_purge_servers(void) | 318 | void __exit afs_purge_servers(void) |
324 | { | 319 | { |
325 | afs_server_timeout = 0; | 320 | afs_server_timeout = 0; |
326 | cancel_delayed_work(&afs_server_reaper); | 321 | mod_delayed_work(afs_wq, &afs_server_reaper, 0); |
327 | queue_delayed_work(afs_wq, &afs_server_reaper, 0); | ||
328 | } | 322 | } |
diff --git a/fs/afs/super.c b/fs/afs/super.c index df8c6047c2a1..43165009428d 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c | |||
@@ -123,6 +123,11 @@ void __exit afs_fs_exit(void) | |||
123 | BUG(); | 123 | BUG(); |
124 | } | 124 | } |
125 | 125 | ||
126 | /* | ||
127 | * Make sure all delayed rcu free inodes are flushed before we | ||
128 | * destroy cache. | ||
129 | */ | ||
130 | rcu_barrier(); | ||
126 | kmem_cache_destroy(afs_inode_cachep); | 131 | kmem_cache_destroy(afs_inode_cachep); |
127 | _leave(""); | 132 | _leave(""); |
128 | } | 133 | } |
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c index 431984d2e372..57bcb1596530 100644 --- a/fs/afs/vlocation.c +++ b/fs/afs/vlocation.c | |||
@@ -561,12 +561,7 @@ static void afs_vlocation_reaper(struct work_struct *work) | |||
561 | if (expiry > now) { | 561 | if (expiry > now) { |
562 | delay = (expiry - now) * HZ; | 562 | delay = (expiry - now) * HZ; |
563 | _debug("delay %lu", delay); | 563 | _debug("delay %lu", delay); |
564 | if (!queue_delayed_work(afs_wq, &afs_vlocation_reap, | 564 | mod_delayed_work(afs_wq, &afs_vlocation_reap, delay); |
565 | delay)) { | ||
566 | cancel_delayed_work(&afs_vlocation_reap); | ||
567 | queue_delayed_work(afs_wq, &afs_vlocation_reap, | ||
568 | delay); | ||
569 | } | ||
570 | break; | 565 | break; |
571 | } | 566 | } |
572 | 567 | ||
@@ -614,13 +609,10 @@ void afs_vlocation_purge(void) | |||
614 | spin_lock(&afs_vlocation_updates_lock); | 609 | spin_lock(&afs_vlocation_updates_lock); |
615 | list_del_init(&afs_vlocation_updates); | 610 | list_del_init(&afs_vlocation_updates); |
616 | spin_unlock(&afs_vlocation_updates_lock); | 611 | spin_unlock(&afs_vlocation_updates_lock); |
617 | cancel_delayed_work(&afs_vlocation_update); | 612 | mod_delayed_work(afs_vlocation_update_worker, &afs_vlocation_update, 0); |
618 | queue_delayed_work(afs_vlocation_update_worker, | ||
619 | &afs_vlocation_update, 0); | ||
620 | destroy_workqueue(afs_vlocation_update_worker); | 613 | destroy_workqueue(afs_vlocation_update_worker); |
621 | 614 | ||
622 | cancel_delayed_work(&afs_vlocation_reap); | 615 | mod_delayed_work(afs_wq, &afs_vlocation_reap, 0); |
623 | queue_delayed_work(afs_wq, &afs_vlocation_reap, 0); | ||
624 | } | 616 | } |
625 | 617 | ||
626 | /* | 618 | /* |
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/fcntl.h> | 14 | #include <linux/fcntl.h> |
15 | #include <linux/security.h> | 15 | #include <linux/security.h> |
16 | #include <linux/evm.h> | 16 | #include <linux/evm.h> |
17 | #include <linux/ima.h> | ||
17 | 18 | ||
18 | /** | 19 | /** |
19 | * inode_change_ok - check if attribute changes to an inode are allowed | 20 | * inode_change_ok - check if attribute changes to an inode are allowed |
@@ -247,6 +248,7 @@ int notify_change(struct dentry * dentry, struct iattr * attr) | |||
247 | 248 | ||
248 | if (!error) { | 249 | if (!error) { |
249 | fsnotify_change(dentry, ia_valid); | 250 | fsnotify_change(dentry, ia_valid); |
251 | ima_inode_post_setattr(dentry); | ||
250 | evm_inode_post_setattr(dentry, ia_valid); | 252 | evm_inode_post_setattr(dentry, ia_valid); |
251 | } | 253 | } |
252 | 254 | ||
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index abf645c1703b..a16214109d31 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c | |||
@@ -221,20 +221,6 @@ static int test_by_type(struct path *path, void *p) | |||
221 | return ino && ino->sbi->type & *(unsigned *)p; | 221 | return ino && ino->sbi->type & *(unsigned *)p; |
222 | } | 222 | } |
223 | 223 | ||
224 | static void autofs_dev_ioctl_fd_install(unsigned int fd, struct file *file) | ||
225 | { | ||
226 | struct files_struct *files = current->files; | ||
227 | struct fdtable *fdt; | ||
228 | |||
229 | spin_lock(&files->file_lock); | ||
230 | fdt = files_fdtable(files); | ||
231 | BUG_ON(fdt->fd[fd] != NULL); | ||
232 | rcu_assign_pointer(fdt->fd[fd], file); | ||
233 | __set_close_on_exec(fd, fdt); | ||
234 | spin_unlock(&files->file_lock); | ||
235 | } | ||
236 | |||
237 | |||
238 | /* | 224 | /* |
239 | * Open a file descriptor on the autofs mount point corresponding | 225 | * Open a file descriptor on the autofs mount point corresponding |
240 | * to the given path and device number (aka. new_encode_dev(sb->s_dev)). | 226 | * to the given path and device number (aka. new_encode_dev(sb->s_dev)). |
@@ -243,7 +229,7 @@ static int autofs_dev_ioctl_open_mountpoint(const char *name, dev_t devid) | |||
243 | { | 229 | { |
244 | int err, fd; | 230 | int err, fd; |
245 | 231 | ||
246 | fd = get_unused_fd(); | 232 | fd = get_unused_fd_flags(O_CLOEXEC); |
247 | if (likely(fd >= 0)) { | 233 | if (likely(fd >= 0)) { |
248 | struct file *filp; | 234 | struct file *filp; |
249 | struct path path; | 235 | struct path path; |
@@ -264,7 +250,7 @@ static int autofs_dev_ioctl_open_mountpoint(const char *name, dev_t devid) | |||
264 | goto out; | 250 | goto out; |
265 | } | 251 | } |
266 | 252 | ||
267 | autofs_dev_ioctl_fd_install(fd, filp); | 253 | fd_install(fd, filp); |
268 | } | 254 | } |
269 | 255 | ||
270 | return fd; | 256 | return fd; |
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 8c0e56d92938..842d00048a65 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c | |||
@@ -399,11 +399,6 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, | |||
399 | DPRINTK("checking mountpoint %p %.*s", | 399 | DPRINTK("checking mountpoint %p %.*s", |
400 | dentry, (int)dentry->d_name.len, dentry->d_name.name); | 400 | dentry, (int)dentry->d_name.len, dentry->d_name.name); |
401 | 401 | ||
402 | /* Path walk currently on this dentry? */ | ||
403 | ino_count = atomic_read(&ino->count) + 2; | ||
404 | if (dentry->d_count > ino_count) | ||
405 | goto next; | ||
406 | |||
407 | /* Can we umount this guy */ | 402 | /* Can we umount this guy */ |
408 | if (autofs4_mount_busy(mnt, dentry)) | 403 | if (autofs4_mount_busy(mnt, dentry)) |
409 | goto next; | 404 | goto next; |
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index da8876d38a7b..dce436e595c1 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c | |||
@@ -175,8 +175,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, | |||
175 | return; | 175 | return; |
176 | } | 176 | } |
177 | 177 | ||
178 | pipe = sbi->pipe; | 178 | pipe = get_file(sbi->pipe); |
179 | get_file(pipe); | ||
180 | 179 | ||
181 | mutex_unlock(&sbi->wq_mutex); | 180 | mutex_unlock(&sbi->wq_mutex); |
182 | 181 | ||
diff --git a/fs/befs/befs.h b/fs/befs/befs.h index d9a40abda6b7..b26642839156 100644 --- a/fs/befs/befs.h +++ b/fs/befs/befs.h | |||
@@ -20,8 +20,8 @@ typedef u64 befs_blocknr_t; | |||
20 | */ | 20 | */ |
21 | 21 | ||
22 | typedef struct befs_mount_options { | 22 | typedef struct befs_mount_options { |
23 | gid_t gid; | 23 | kgid_t gid; |
24 | uid_t uid; | 24 | kuid_t uid; |
25 | int use_gid; | 25 | int use_gid; |
26 | int use_uid; | 26 | int use_uid; |
27 | int debug; | 27 | int debug; |
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index cf7f3c67c8b7..2b3bda8d5e68 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/vfs.h> | 15 | #include <linux/vfs.h> |
16 | #include <linux/parser.h> | 16 | #include <linux/parser.h> |
17 | #include <linux/namei.h> | 17 | #include <linux/namei.h> |
18 | #include <linux/sched.h> | ||
18 | 19 | ||
19 | #include "befs.h" | 20 | #include "befs.h" |
20 | #include "btree.h" | 21 | #include "btree.h" |
@@ -352,9 +353,11 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino) | |||
352 | */ | 353 | */ |
353 | 354 | ||
354 | inode->i_uid = befs_sb->mount_opts.use_uid ? | 355 | inode->i_uid = befs_sb->mount_opts.use_uid ? |
355 | befs_sb->mount_opts.uid : (uid_t) fs32_to_cpu(sb, raw_inode->uid); | 356 | befs_sb->mount_opts.uid : |
357 | make_kuid(&init_user_ns, fs32_to_cpu(sb, raw_inode->uid)); | ||
356 | inode->i_gid = befs_sb->mount_opts.use_gid ? | 358 | inode->i_gid = befs_sb->mount_opts.use_gid ? |
357 | befs_sb->mount_opts.gid : (gid_t) fs32_to_cpu(sb, raw_inode->gid); | 359 | befs_sb->mount_opts.gid : |
360 | make_kgid(&init_user_ns, fs32_to_cpu(sb, raw_inode->gid)); | ||
358 | 361 | ||
359 | set_nlink(inode, 1); | 362 | set_nlink(inode, 1); |
360 | 363 | ||
@@ -454,6 +457,11 @@ befs_init_inodecache(void) | |||
454 | static void | 457 | static void |
455 | befs_destroy_inodecache(void) | 458 | befs_destroy_inodecache(void) |
456 | { | 459 | { |
460 | /* | ||
461 | * Make sure all delayed rcu free inodes are flushed before we | ||
462 | * destroy cache. | ||
463 | */ | ||
464 | rcu_barrier(); | ||
457 | kmem_cache_destroy(befs_inode_cachep); | 465 | kmem_cache_destroy(befs_inode_cachep); |
458 | } | 466 | } |
459 | 467 | ||
@@ -674,10 +682,12 @@ parse_options(char *options, befs_mount_options * opts) | |||
674 | char *p; | 682 | char *p; |
675 | substring_t args[MAX_OPT_ARGS]; | 683 | substring_t args[MAX_OPT_ARGS]; |
676 | int option; | 684 | int option; |
685 | kuid_t uid; | ||
686 | kgid_t gid; | ||
677 | 687 | ||
678 | /* Initialize options */ | 688 | /* Initialize options */ |
679 | opts->uid = 0; | 689 | opts->uid = GLOBAL_ROOT_UID; |
680 | opts->gid = 0; | 690 | opts->gid = GLOBAL_ROOT_GID; |
681 | opts->use_uid = 0; | 691 | opts->use_uid = 0; |
682 | opts->use_gid = 0; | 692 | opts->use_gid = 0; |
683 | opts->iocharset = NULL; | 693 | opts->iocharset = NULL; |
@@ -696,23 +706,29 @@ parse_options(char *options, befs_mount_options * opts) | |||
696 | case Opt_uid: | 706 | case Opt_uid: |
697 | if (match_int(&args[0], &option)) | 707 | if (match_int(&args[0], &option)) |
698 | return 0; | 708 | return 0; |
699 | if (option < 0) { | 709 | uid = INVALID_UID; |
710 | if (option >= 0) | ||
711 | uid = make_kuid(current_user_ns(), option); | ||
712 | if (!uid_valid(uid)) { | ||
700 | printk(KERN_ERR "BeFS: Invalid uid %d, " | 713 | printk(KERN_ERR "BeFS: Invalid uid %d, " |
701 | "using default\n", option); | 714 | "using default\n", option); |
702 | break; | 715 | break; |
703 | } | 716 | } |
704 | opts->uid = option; | 717 | opts->uid = uid; |
705 | opts->use_uid = 1; | 718 | opts->use_uid = 1; |
706 | break; | 719 | break; |
707 | case Opt_gid: | 720 | case Opt_gid: |
708 | if (match_int(&args[0], &option)) | 721 | if (match_int(&args[0], &option)) |
709 | return 0; | 722 | return 0; |
710 | if (option < 0) { | 723 | gid = INVALID_GID; |
724 | if (option >= 0) | ||
725 | gid = make_kgid(current_user_ns(), option); | ||
726 | if (!gid_valid(gid)) { | ||
711 | printk(KERN_ERR "BeFS: Invalid gid %d, " | 727 | printk(KERN_ERR "BeFS: Invalid gid %d, " |
712 | "using default\n", option); | 728 | "using default\n", option); |
713 | break; | 729 | break; |
714 | } | 730 | } |
715 | opts->gid = option; | 731 | opts->gid = gid; |
716 | opts->use_gid = 1; | 732 | opts->use_gid = 1; |
717 | break; | 733 | break; |
718 | case Opt_charset: | 734 | case Opt_charset: |
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index 9870417c26e7..737aaa3f7090 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c | |||
@@ -76,8 +76,8 @@ struct inode *bfs_iget(struct super_block *sb, unsigned long ino) | |||
76 | BFS_I(inode)->i_sblock = le32_to_cpu(di->i_sblock); | 76 | BFS_I(inode)->i_sblock = le32_to_cpu(di->i_sblock); |
77 | BFS_I(inode)->i_eblock = le32_to_cpu(di->i_eblock); | 77 | BFS_I(inode)->i_eblock = le32_to_cpu(di->i_eblock); |
78 | BFS_I(inode)->i_dsk_ino = le16_to_cpu(di->i_ino); | 78 | BFS_I(inode)->i_dsk_ino = le16_to_cpu(di->i_ino); |
79 | inode->i_uid = le32_to_cpu(di->i_uid); | 79 | i_uid_write(inode, le32_to_cpu(di->i_uid)); |
80 | inode->i_gid = le32_to_cpu(di->i_gid); | 80 | i_gid_write(inode, le32_to_cpu(di->i_gid)); |
81 | set_nlink(inode, le32_to_cpu(di->i_nlink)); | 81 | set_nlink(inode, le32_to_cpu(di->i_nlink)); |
82 | inode->i_size = BFS_FILESIZE(di); | 82 | inode->i_size = BFS_FILESIZE(di); |
83 | inode->i_blocks = BFS_FILEBLOCKS(di); | 83 | inode->i_blocks = BFS_FILEBLOCKS(di); |
@@ -139,8 +139,8 @@ static int bfs_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
139 | 139 | ||
140 | di->i_ino = cpu_to_le16(ino); | 140 | di->i_ino = cpu_to_le16(ino); |
141 | di->i_mode = cpu_to_le32(inode->i_mode); | 141 | di->i_mode = cpu_to_le32(inode->i_mode); |
142 | di->i_uid = cpu_to_le32(inode->i_uid); | 142 | di->i_uid = cpu_to_le32(i_uid_read(inode)); |
143 | di->i_gid = cpu_to_le32(inode->i_gid); | 143 | di->i_gid = cpu_to_le32(i_gid_read(inode)); |
144 | di->i_nlink = cpu_to_le32(inode->i_nlink); | 144 | di->i_nlink = cpu_to_le32(inode->i_nlink); |
145 | di->i_atime = cpu_to_le32(inode->i_atime.tv_sec); | 145 | di->i_atime = cpu_to_le32(inode->i_atime.tv_sec); |
146 | di->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec); | 146 | di->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec); |
@@ -280,6 +280,11 @@ static int init_inodecache(void) | |||
280 | 280 | ||
281 | static void destroy_inodecache(void) | 281 | static void destroy_inodecache(void) |
282 | { | 282 | { |
283 | /* | ||
284 | * Make sure all delayed rcu free inodes are flushed before we | ||
285 | * destroy cache. | ||
286 | */ | ||
287 | rcu_barrier(); | ||
283 | kmem_cache_destroy(bfs_inode_cachep); | 288 | kmem_cache_destroy(bfs_inode_cachep); |
284 | } | 289 | } |
285 | 290 | ||
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index d146e181d10d..0e7a6f81ae36 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c | |||
@@ -32,31 +32,8 @@ | |||
32 | 32 | ||
33 | static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs); | 33 | static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs); |
34 | static int load_aout_library(struct file*); | 34 | static int load_aout_library(struct file*); |
35 | static int aout_core_dump(struct coredump_params *cprm); | ||
36 | |||
37 | static struct linux_binfmt aout_format = { | ||
38 | .module = THIS_MODULE, | ||
39 | .load_binary = load_aout_binary, | ||
40 | .load_shlib = load_aout_library, | ||
41 | .core_dump = aout_core_dump, | ||
42 | .min_coredump = PAGE_SIZE | ||
43 | }; | ||
44 | |||
45 | #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE) | ||
46 | |||
47 | static int set_brk(unsigned long start, unsigned long end) | ||
48 | { | ||
49 | start = PAGE_ALIGN(start); | ||
50 | end = PAGE_ALIGN(end); | ||
51 | if (end > start) { | ||
52 | unsigned long addr; | ||
53 | addr = vm_brk(start, end - start); | ||
54 | if (BAD_ADDR(addr)) | ||
55 | return addr; | ||
56 | } | ||
57 | return 0; | ||
58 | } | ||
59 | 35 | ||
36 | #ifdef CONFIG_COREDUMP | ||
60 | /* | 37 | /* |
61 | * Routine writes a core dump image in the current directory. | 38 | * Routine writes a core dump image in the current directory. |
62 | * Currently only a stub-function. | 39 | * Currently only a stub-function. |
@@ -66,7 +43,6 @@ static int set_brk(unsigned long start, unsigned long end) | |||
66 | * field, which also makes sure the core-dumps won't be recursive if the | 43 | * field, which also makes sure the core-dumps won't be recursive if the |
67 | * dumping of the process results in another error.. | 44 | * dumping of the process results in another error.. |
68 | */ | 45 | */ |
69 | |||
70 | static int aout_core_dump(struct coredump_params *cprm) | 46 | static int aout_core_dump(struct coredump_params *cprm) |
71 | { | 47 | { |
72 | struct file *file = cprm->file; | 48 | struct file *file = cprm->file; |
@@ -89,7 +65,7 @@ static int aout_core_dump(struct coredump_params *cprm) | |||
89 | current->flags |= PF_DUMPCORE; | 65 | current->flags |= PF_DUMPCORE; |
90 | strncpy(dump.u_comm, current->comm, sizeof(dump.u_comm)); | 66 | strncpy(dump.u_comm, current->comm, sizeof(dump.u_comm)); |
91 | dump.u_ar0 = offsetof(struct user, regs); | 67 | dump.u_ar0 = offsetof(struct user, regs); |
92 | dump.signal = cprm->signr; | 68 | dump.signal = cprm->siginfo->si_signo; |
93 | aout_dump_thread(cprm->regs, &dump); | 69 | aout_dump_thread(cprm->regs, &dump); |
94 | 70 | ||
95 | /* If the size of the dump file exceeds the rlimit, then see what would happen | 71 | /* If the size of the dump file exceeds the rlimit, then see what would happen |
@@ -135,6 +111,32 @@ end_coredump: | |||
135 | set_fs(fs); | 111 | set_fs(fs); |
136 | return has_dumped; | 112 | return has_dumped; |
137 | } | 113 | } |
114 | #else | ||
115 | #define aout_core_dump NULL | ||
116 | #endif | ||
117 | |||
118 | static struct linux_binfmt aout_format = { | ||
119 | .module = THIS_MODULE, | ||
120 | .load_binary = load_aout_binary, | ||
121 | .load_shlib = load_aout_library, | ||
122 | .core_dump = aout_core_dump, | ||
123 | .min_coredump = PAGE_SIZE | ||
124 | }; | ||
125 | |||
126 | #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE) | ||
127 | |||
128 | static int set_brk(unsigned long start, unsigned long end) | ||
129 | { | ||
130 | start = PAGE_ALIGN(start); | ||
131 | end = PAGE_ALIGN(end); | ||
132 | if (end > start) { | ||
133 | unsigned long addr; | ||
134 | addr = vm_brk(start, end - start); | ||
135 | if (BAD_ADDR(addr)) | ||
136 | return addr; | ||
137 | } | ||
138 | return 0; | ||
139 | } | ||
138 | 140 | ||
139 | /* | 141 | /* |
140 | * create_aout_tables() parses the env- and arg-strings in new user | 142 | * create_aout_tables() parses the env- and arg-strings in new user |
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 1b52956afe33..e800dec958c3 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <linux/compiler.h> | 27 | #include <linux/compiler.h> |
28 | #include <linux/highmem.h> | 28 | #include <linux/highmem.h> |
29 | #include <linux/pagemap.h> | 29 | #include <linux/pagemap.h> |
30 | #include <linux/vmalloc.h> | ||
30 | #include <linux/security.h> | 31 | #include <linux/security.h> |
31 | #include <linux/random.h> | 32 | #include <linux/random.h> |
32 | #include <linux/elf.h> | 33 | #include <linux/elf.h> |
@@ -37,6 +38,13 @@ | |||
37 | #include <asm/page.h> | 38 | #include <asm/page.h> |
38 | #include <asm/exec.h> | 39 | #include <asm/exec.h> |
39 | 40 | ||
41 | #ifndef user_long_t | ||
42 | #define user_long_t long | ||
43 | #endif | ||
44 | #ifndef user_siginfo_t | ||
45 | #define user_siginfo_t siginfo_t | ||
46 | #endif | ||
47 | |||
40 | static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs); | 48 | static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs); |
41 | static int load_elf_library(struct file *); | 49 | static int load_elf_library(struct file *); |
42 | static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *, | 50 | static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *, |
@@ -881,7 +889,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
881 | } | 889 | } |
882 | 890 | ||
883 | if (elf_interpreter) { | 891 | if (elf_interpreter) { |
884 | unsigned long uninitialized_var(interp_map_addr); | 892 | unsigned long interp_map_addr = 0; |
885 | 893 | ||
886 | elf_entry = load_elf_interp(&loc->interp_elf_ex, | 894 | elf_entry = load_elf_interp(&loc->interp_elf_ex, |
887 | interpreter, | 895 | interpreter, |
@@ -1115,7 +1123,7 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma, | |||
1115 | if (always_dump_vma(vma)) | 1123 | if (always_dump_vma(vma)) |
1116 | goto whole; | 1124 | goto whole; |
1117 | 1125 | ||
1118 | if (vma->vm_flags & VM_NODUMP) | 1126 | if (vma->vm_flags & VM_DONTDUMP) |
1119 | return 0; | 1127 | return 0; |
1120 | 1128 | ||
1121 | /* Hugetlb memory check */ | 1129 | /* Hugetlb memory check */ |
@@ -1127,7 +1135,7 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma, | |||
1127 | } | 1135 | } |
1128 | 1136 | ||
1129 | /* Do not dump I/O mapped devices or special mappings */ | 1137 | /* Do not dump I/O mapped devices or special mappings */ |
1130 | if (vma->vm_flags & (VM_IO | VM_RESERVED)) | 1138 | if (vma->vm_flags & VM_IO) |
1131 | return 0; | 1139 | return 0; |
1132 | 1140 | ||
1133 | /* By default, dump shared memory if mapped from an anonymous file. */ | 1141 | /* By default, dump shared memory if mapped from an anonymous file. */ |
@@ -1372,6 +1380,103 @@ static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm) | |||
1372 | fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv); | 1380 | fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv); |
1373 | } | 1381 | } |
1374 | 1382 | ||
1383 | static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata, | ||
1384 | siginfo_t *siginfo) | ||
1385 | { | ||
1386 | mm_segment_t old_fs = get_fs(); | ||
1387 | set_fs(KERNEL_DS); | ||
1388 | copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo); | ||
1389 | set_fs(old_fs); | ||
1390 | fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata); | ||
1391 | } | ||
1392 | |||
1393 | #define MAX_FILE_NOTE_SIZE (4*1024*1024) | ||
1394 | /* | ||
1395 | * Format of NT_FILE note: | ||
1396 | * | ||
1397 | * long count -- how many files are mapped | ||
1398 | * long page_size -- units for file_ofs | ||
1399 | * array of [COUNT] elements of | ||
1400 | * long start | ||
1401 | * long end | ||
1402 | * long file_ofs | ||
1403 | * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL... | ||
1404 | */ | ||
1405 | static void fill_files_note(struct memelfnote *note) | ||
1406 | { | ||
1407 | struct vm_area_struct *vma; | ||
1408 | unsigned count, size, names_ofs, remaining, n; | ||
1409 | user_long_t *data; | ||
1410 | user_long_t *start_end_ofs; | ||
1411 | char *name_base, *name_curpos; | ||
1412 | |||
1413 | /* *Estimated* file count and total data size needed */ | ||
1414 | count = current->mm->map_count; | ||
1415 | size = count * 64; | ||
1416 | |||
1417 | names_ofs = (2 + 3 * count) * sizeof(data[0]); | ||
1418 | alloc: | ||
1419 | if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */ | ||
1420 | goto err; | ||
1421 | size = round_up(size, PAGE_SIZE); | ||
1422 | data = vmalloc(size); | ||
1423 | if (!data) | ||
1424 | goto err; | ||
1425 | |||
1426 | start_end_ofs = data + 2; | ||
1427 | name_base = name_curpos = ((char *)data) + names_ofs; | ||
1428 | remaining = size - names_ofs; | ||
1429 | count = 0; | ||
1430 | for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) { | ||
1431 | struct file *file; | ||
1432 | const char *filename; | ||
1433 | |||
1434 | file = vma->vm_file; | ||
1435 | if (!file) | ||
1436 | continue; | ||
1437 | filename = d_path(&file->f_path, name_curpos, remaining); | ||
1438 | if (IS_ERR(filename)) { | ||
1439 | if (PTR_ERR(filename) == -ENAMETOOLONG) { | ||
1440 | vfree(data); | ||
1441 | size = size * 5 / 4; | ||
1442 | goto alloc; | ||
1443 | } | ||
1444 | continue; | ||
1445 | } | ||
1446 | |||
1447 | /* d_path() fills at the end, move name down */ | ||
1448 | /* n = strlen(filename) + 1: */ | ||
1449 | n = (name_curpos + remaining) - filename; | ||
1450 | remaining = filename - name_curpos; | ||
1451 | memmove(name_curpos, filename, n); | ||
1452 | name_curpos += n; | ||
1453 | |||
1454 | *start_end_ofs++ = vma->vm_start; | ||
1455 | *start_end_ofs++ = vma->vm_end; | ||
1456 | *start_end_ofs++ = vma->vm_pgoff; | ||
1457 | count++; | ||
1458 | } | ||
1459 | |||
1460 | /* Now we know exact count of files, can store it */ | ||
1461 | data[0] = count; | ||
1462 | data[1] = PAGE_SIZE; | ||
1463 | /* | ||
1464 | * Count usually is less than current->mm->map_count, | ||
1465 | * we need to move filenames down. | ||
1466 | */ | ||
1467 | n = current->mm->map_count - count; | ||
1468 | if (n != 0) { | ||
1469 | unsigned shift_bytes = n * 3 * sizeof(data[0]); | ||
1470 | memmove(name_base - shift_bytes, name_base, | ||
1471 | name_curpos - name_base); | ||
1472 | name_curpos -= shift_bytes; | ||
1473 | } | ||
1474 | |||
1475 | size = name_curpos - (char *)data; | ||
1476 | fill_note(note, "CORE", NT_FILE, size, data); | ||
1477 | err: ; | ||
1478 | } | ||
1479 | |||
1375 | #ifdef CORE_DUMP_USE_REGSET | 1480 | #ifdef CORE_DUMP_USE_REGSET |
1376 | #include <linux/regset.h> | 1481 | #include <linux/regset.h> |
1377 | 1482 | ||
@@ -1385,7 +1490,10 @@ struct elf_thread_core_info { | |||
1385 | struct elf_note_info { | 1490 | struct elf_note_info { |
1386 | struct elf_thread_core_info *thread; | 1491 | struct elf_thread_core_info *thread; |
1387 | struct memelfnote psinfo; | 1492 | struct memelfnote psinfo; |
1493 | struct memelfnote signote; | ||
1388 | struct memelfnote auxv; | 1494 | struct memelfnote auxv; |
1495 | struct memelfnote files; | ||
1496 | user_siginfo_t csigdata; | ||
1389 | size_t size; | 1497 | size_t size; |
1390 | int thread_notes; | 1498 | int thread_notes; |
1391 | }; | 1499 | }; |
@@ -1480,7 +1588,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t, | |||
1480 | 1588 | ||
1481 | static int fill_note_info(struct elfhdr *elf, int phdrs, | 1589 | static int fill_note_info(struct elfhdr *elf, int phdrs, |
1482 | struct elf_note_info *info, | 1590 | struct elf_note_info *info, |
1483 | long signr, struct pt_regs *regs) | 1591 | siginfo_t *siginfo, struct pt_regs *regs) |
1484 | { | 1592 | { |
1485 | struct task_struct *dump_task = current; | 1593 | struct task_struct *dump_task = current; |
1486 | const struct user_regset_view *view = task_user_regset_view(dump_task); | 1594 | const struct user_regset_view *view = task_user_regset_view(dump_task); |
@@ -1550,7 +1658,7 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, | |||
1550 | * Now fill in each thread's information. | 1658 | * Now fill in each thread's information. |
1551 | */ | 1659 | */ |
1552 | for (t = info->thread; t != NULL; t = t->next) | 1660 | for (t = info->thread; t != NULL; t = t->next) |
1553 | if (!fill_thread_core_info(t, view, signr, &info->size)) | 1661 | if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size)) |
1554 | return 0; | 1662 | return 0; |
1555 | 1663 | ||
1556 | /* | 1664 | /* |
@@ -1559,9 +1667,15 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, | |||
1559 | fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm); | 1667 | fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm); |
1560 | info->size += notesize(&info->psinfo); | 1668 | info->size += notesize(&info->psinfo); |
1561 | 1669 | ||
1670 | fill_siginfo_note(&info->signote, &info->csigdata, siginfo); | ||
1671 | info->size += notesize(&info->signote); | ||
1672 | |||
1562 | fill_auxv_note(&info->auxv, current->mm); | 1673 | fill_auxv_note(&info->auxv, current->mm); |
1563 | info->size += notesize(&info->auxv); | 1674 | info->size += notesize(&info->auxv); |
1564 | 1675 | ||
1676 | fill_files_note(&info->files); | ||
1677 | info->size += notesize(&info->files); | ||
1678 | |||
1565 | return 1; | 1679 | return 1; |
1566 | } | 1680 | } |
1567 | 1681 | ||
@@ -1588,8 +1702,12 @@ static int write_note_info(struct elf_note_info *info, | |||
1588 | 1702 | ||
1589 | if (first && !writenote(&info->psinfo, file, foffset)) | 1703 | if (first && !writenote(&info->psinfo, file, foffset)) |
1590 | return 0; | 1704 | return 0; |
1705 | if (first && !writenote(&info->signote, file, foffset)) | ||
1706 | return 0; | ||
1591 | if (first && !writenote(&info->auxv, file, foffset)) | 1707 | if (first && !writenote(&info->auxv, file, foffset)) |
1592 | return 0; | 1708 | return 0; |
1709 | if (first && !writenote(&info->files, file, foffset)) | ||
1710 | return 0; | ||
1593 | 1711 | ||
1594 | for (i = 1; i < info->thread_notes; ++i) | 1712 | for (i = 1; i < info->thread_notes; ++i) |
1595 | if (t->notes[i].data && | 1713 | if (t->notes[i].data && |
@@ -1616,6 +1734,7 @@ static void free_note_info(struct elf_note_info *info) | |||
1616 | kfree(t); | 1734 | kfree(t); |
1617 | } | 1735 | } |
1618 | kfree(info->psinfo.data); | 1736 | kfree(info->psinfo.data); |
1737 | vfree(info->files.data); | ||
1619 | } | 1738 | } |
1620 | 1739 | ||
1621 | #else | 1740 | #else |
@@ -1681,6 +1800,7 @@ struct elf_note_info { | |||
1681 | #ifdef ELF_CORE_COPY_XFPREGS | 1800 | #ifdef ELF_CORE_COPY_XFPREGS |
1682 | elf_fpxregset_t *xfpu; | 1801 | elf_fpxregset_t *xfpu; |
1683 | #endif | 1802 | #endif |
1803 | user_siginfo_t csigdata; | ||
1684 | int thread_status_size; | 1804 | int thread_status_size; |
1685 | int numnote; | 1805 | int numnote; |
1686 | }; | 1806 | }; |
@@ -1690,48 +1810,37 @@ static int elf_note_info_init(struct elf_note_info *info) | |||
1690 | memset(info, 0, sizeof(*info)); | 1810 | memset(info, 0, sizeof(*info)); |
1691 | INIT_LIST_HEAD(&info->thread_list); | 1811 | INIT_LIST_HEAD(&info->thread_list); |
1692 | 1812 | ||
1693 | /* Allocate space for six ELF notes */ | 1813 | /* Allocate space for ELF notes */ |
1694 | info->notes = kmalloc(6 * sizeof(struct memelfnote), GFP_KERNEL); | 1814 | info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL); |
1695 | if (!info->notes) | 1815 | if (!info->notes) |
1696 | return 0; | 1816 | return 0; |
1697 | info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL); | 1817 | info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL); |
1698 | if (!info->psinfo) | 1818 | if (!info->psinfo) |
1699 | goto notes_free; | 1819 | return 0; |
1700 | info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL); | 1820 | info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL); |
1701 | if (!info->prstatus) | 1821 | if (!info->prstatus) |
1702 | goto psinfo_free; | 1822 | return 0; |
1703 | info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL); | 1823 | info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL); |
1704 | if (!info->fpu) | 1824 | if (!info->fpu) |
1705 | goto prstatus_free; | 1825 | return 0; |
1706 | #ifdef ELF_CORE_COPY_XFPREGS | 1826 | #ifdef ELF_CORE_COPY_XFPREGS |
1707 | info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL); | 1827 | info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL); |
1708 | if (!info->xfpu) | 1828 | if (!info->xfpu) |
1709 | goto fpu_free; | 1829 | return 0; |
1710 | #endif | 1830 | #endif |
1711 | return 1; | 1831 | return 1; |
1712 | #ifdef ELF_CORE_COPY_XFPREGS | ||
1713 | fpu_free: | ||
1714 | kfree(info->fpu); | ||
1715 | #endif | ||
1716 | prstatus_free: | ||
1717 | kfree(info->prstatus); | ||
1718 | psinfo_free: | ||
1719 | kfree(info->psinfo); | ||
1720 | notes_free: | ||
1721 | kfree(info->notes); | ||
1722 | return 0; | ||
1723 | } | 1832 | } |
1724 | 1833 | ||
1725 | static int fill_note_info(struct elfhdr *elf, int phdrs, | 1834 | static int fill_note_info(struct elfhdr *elf, int phdrs, |
1726 | struct elf_note_info *info, | 1835 | struct elf_note_info *info, |
1727 | long signr, struct pt_regs *regs) | 1836 | siginfo_t *siginfo, struct pt_regs *regs) |
1728 | { | 1837 | { |
1729 | struct list_head *t; | 1838 | struct list_head *t; |
1730 | 1839 | ||
1731 | if (!elf_note_info_init(info)) | 1840 | if (!elf_note_info_init(info)) |
1732 | return 0; | 1841 | return 0; |
1733 | 1842 | ||
1734 | if (signr) { | 1843 | if (siginfo->si_signo) { |
1735 | struct core_thread *ct; | 1844 | struct core_thread *ct; |
1736 | struct elf_thread_status *ets; | 1845 | struct elf_thread_status *ets; |
1737 | 1846 | ||
@@ -1749,13 +1858,13 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, | |||
1749 | int sz; | 1858 | int sz; |
1750 | 1859 | ||
1751 | ets = list_entry(t, struct elf_thread_status, list); | 1860 | ets = list_entry(t, struct elf_thread_status, list); |
1752 | sz = elf_dump_thread_status(signr, ets); | 1861 | sz = elf_dump_thread_status(siginfo->si_signo, ets); |
1753 | info->thread_status_size += sz; | 1862 | info->thread_status_size += sz; |
1754 | } | 1863 | } |
1755 | } | 1864 | } |
1756 | /* now collect the dump for the current */ | 1865 | /* now collect the dump for the current */ |
1757 | memset(info->prstatus, 0, sizeof(*info->prstatus)); | 1866 | memset(info->prstatus, 0, sizeof(*info->prstatus)); |
1758 | fill_prstatus(info->prstatus, current, signr); | 1867 | fill_prstatus(info->prstatus, current, siginfo->si_signo); |
1759 | elf_core_copy_regs(&info->prstatus->pr_reg, regs); | 1868 | elf_core_copy_regs(&info->prstatus->pr_reg, regs); |
1760 | 1869 | ||
1761 | /* Set up header */ | 1870 | /* Set up header */ |
@@ -1772,9 +1881,11 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, | |||
1772 | fill_note(info->notes + 1, "CORE", NT_PRPSINFO, | 1881 | fill_note(info->notes + 1, "CORE", NT_PRPSINFO, |
1773 | sizeof(*info->psinfo), info->psinfo); | 1882 | sizeof(*info->psinfo), info->psinfo); |
1774 | 1883 | ||
1775 | info->numnote = 2; | 1884 | fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo); |
1885 | fill_auxv_note(info->notes + 3, current->mm); | ||
1886 | fill_files_note(info->notes + 4); | ||
1776 | 1887 | ||
1777 | fill_auxv_note(&info->notes[info->numnote++], current->mm); | 1888 | info->numnote = 5; |
1778 | 1889 | ||
1779 | /* Try to dump the FPU. */ | 1890 | /* Try to dump the FPU. */ |
1780 | info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs, | 1891 | info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs, |
@@ -1836,6 +1947,9 @@ static void free_note_info(struct elf_note_info *info) | |||
1836 | kfree(list_entry(tmp, struct elf_thread_status, list)); | 1947 | kfree(list_entry(tmp, struct elf_thread_status, list)); |
1837 | } | 1948 | } |
1838 | 1949 | ||
1950 | /* Free data allocated by fill_files_note(): */ | ||
1951 | vfree(info->notes[4].data); | ||
1952 | |||
1839 | kfree(info->prstatus); | 1953 | kfree(info->prstatus); |
1840 | kfree(info->psinfo); | 1954 | kfree(info->psinfo); |
1841 | kfree(info->notes); | 1955 | kfree(info->notes); |
@@ -1962,7 +2076,7 @@ static int elf_core_dump(struct coredump_params *cprm) | |||
1962 | * Collect all the non-memory information about the process for the | 2076 | * Collect all the non-memory information about the process for the |
1963 | * notes. This also sets up the file header. | 2077 | * notes. This also sets up the file header. |
1964 | */ | 2078 | */ |
1965 | if (!fill_note_info(elf, e_phnum, &info, cprm->signr, cprm->regs)) | 2079 | if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs)) |
1966 | goto cleanup; | 2080 | goto cleanup; |
1967 | 2081 | ||
1968 | has_dumped = 1; | 2082 | has_dumped = 1; |
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 3d77cf81ba3c..262db114ff01 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c | |||
@@ -1205,7 +1205,7 @@ static int maydump(struct vm_area_struct *vma, unsigned long mm_flags) | |||
1205 | int dump_ok; | 1205 | int dump_ok; |
1206 | 1206 | ||
1207 | /* Do not dump I/O mapped devices or special mappings */ | 1207 | /* Do not dump I/O mapped devices or special mappings */ |
1208 | if (vma->vm_flags & (VM_IO | VM_RESERVED)) { | 1208 | if (vma->vm_flags & VM_IO) { |
1209 | kdcore("%08lx: %08lx: no (IO)", vma->vm_start, vma->vm_flags); | 1209 | kdcore("%08lx: %08lx: no (IO)", vma->vm_start, vma->vm_flags); |
1210 | return 0; | 1210 | return 0; |
1211 | } | 1211 | } |
@@ -1642,7 +1642,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) | |||
1642 | goto cleanup; | 1642 | goto cleanup; |
1643 | #endif | 1643 | #endif |
1644 | 1644 | ||
1645 | if (cprm->signr) { | 1645 | if (cprm->siginfo->si_signo) { |
1646 | struct core_thread *ct; | 1646 | struct core_thread *ct; |
1647 | struct elf_thread_status *tmp; | 1647 | struct elf_thread_status *tmp; |
1648 | 1648 | ||
@@ -1661,13 +1661,13 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) | |||
1661 | int sz; | 1661 | int sz; |
1662 | 1662 | ||
1663 | tmp = list_entry(t, struct elf_thread_status, list); | 1663 | tmp = list_entry(t, struct elf_thread_status, list); |
1664 | sz = elf_dump_thread_status(cprm->signr, tmp); | 1664 | sz = elf_dump_thread_status(cprm->siginfo->si_signo, tmp); |
1665 | thread_status_size += sz; | 1665 | thread_status_size += sz; |
1666 | } | 1666 | } |
1667 | } | 1667 | } |
1668 | 1668 | ||
1669 | /* now collect the dump for the current */ | 1669 | /* now collect the dump for the current */ |
1670 | fill_prstatus(prstatus, current, cprm->signr); | 1670 | fill_prstatus(prstatus, current, cprm->siginfo->si_signo); |
1671 | elf_core_copy_regs(&prstatus->pr_reg, cprm->regs); | 1671 | elf_core_copy_regs(&prstatus->pr_reg, cprm->regs); |
1672 | 1672 | ||
1673 | segs = current->mm->map_count; | 1673 | segs = current->mm->map_count; |
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 178cb70acc26..e280352b28f9 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c | |||
@@ -107,7 +107,7 @@ static struct linux_binfmt flat_format = { | |||
107 | static int flat_core_dump(struct coredump_params *cprm) | 107 | static int flat_core_dump(struct coredump_params *cprm) |
108 | { | 108 | { |
109 | printk("Process %s:%d received signr %d and should have core dumped\n", | 109 | printk("Process %s:%d received signr %d and should have core dumped\n", |
110 | current->comm, current->pid, (int) cprm->signr); | 110 | current->comm, current->pid, (int) cprm->siginfo->si_signo); |
111 | return(1); | 111 | return(1); |
112 | } | 112 | } |
113 | 113 | ||
@@ -73,7 +73,7 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size) | |||
73 | { | 73 | { |
74 | unsigned int sz = sizeof(struct bio) + extra_size; | 74 | unsigned int sz = sizeof(struct bio) + extra_size; |
75 | struct kmem_cache *slab = NULL; | 75 | struct kmem_cache *slab = NULL; |
76 | struct bio_slab *bslab; | 76 | struct bio_slab *bslab, *new_bio_slabs; |
77 | unsigned int i, entry = -1; | 77 | unsigned int i, entry = -1; |
78 | 78 | ||
79 | mutex_lock(&bio_slab_lock); | 79 | mutex_lock(&bio_slab_lock); |
@@ -97,11 +97,12 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size) | |||
97 | 97 | ||
98 | if (bio_slab_nr == bio_slab_max && entry == -1) { | 98 | if (bio_slab_nr == bio_slab_max && entry == -1) { |
99 | bio_slab_max <<= 1; | 99 | bio_slab_max <<= 1; |
100 | bio_slabs = krealloc(bio_slabs, | 100 | new_bio_slabs = krealloc(bio_slabs, |
101 | bio_slab_max * sizeof(struct bio_slab), | 101 | bio_slab_max * sizeof(struct bio_slab), |
102 | GFP_KERNEL); | 102 | GFP_KERNEL); |
103 | if (!bio_slabs) | 103 | if (!new_bio_slabs) |
104 | goto out_unlock; | 104 | goto out_unlock; |
105 | bio_slabs = new_bio_slabs; | ||
105 | } | 106 | } |
106 | if (entry == -1) | 107 | if (entry == -1) |
107 | entry = bio_slab_nr++; | 108 | entry = bio_slab_nr++; |
diff --git a/fs/block_dev.c b/fs/block_dev.c index 1e519195d45b..38e721b35d45 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -1578,10 +1578,12 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
1578 | unsigned long nr_segs, loff_t pos) | 1578 | unsigned long nr_segs, loff_t pos) |
1579 | { | 1579 | { |
1580 | struct file *file = iocb->ki_filp; | 1580 | struct file *file = iocb->ki_filp; |
1581 | struct blk_plug plug; | ||
1581 | ssize_t ret; | 1582 | ssize_t ret; |
1582 | 1583 | ||
1583 | BUG_ON(iocb->ki_pos != pos); | 1584 | BUG_ON(iocb->ki_pos != pos); |
1584 | 1585 | ||
1586 | blk_start_plug(&plug); | ||
1585 | ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); | 1587 | ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); |
1586 | if (ret > 0 || ret == -EIOCBQUEUED) { | 1588 | if (ret > 0 || ret == -EIOCBQUEUED) { |
1587 | ssize_t err; | 1589 | ssize_t err; |
@@ -1590,6 +1592,7 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
1590 | if (err < 0 && ret > 0) | 1592 | if (err < 0 && ret > 0) |
1591 | ret = err; | 1593 | ret = err; |
1592 | } | 1594 | } |
1595 | blk_finish_plug(&plug); | ||
1593 | return ret; | 1596 | return ret; |
1594 | } | 1597 | } |
1595 | EXPORT_SYMBOL_GPL(blkdev_aio_write); | 1598 | EXPORT_SYMBOL_GPL(blkdev_aio_write); |
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 761e2cd8fed1..0c16e3dbfd56 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c | |||
@@ -61,7 +61,7 @@ struct posix_acl *btrfs_get_acl(struct inode *inode, int type) | |||
61 | size = __btrfs_getxattr(inode, name, value, size); | 61 | size = __btrfs_getxattr(inode, name, value, size); |
62 | } | 62 | } |
63 | if (size > 0) { | 63 | if (size > 0) { |
64 | acl = posix_acl_from_xattr(value, size); | 64 | acl = posix_acl_from_xattr(&init_user_ns, value, size); |
65 | } else if (size == -ENOENT || size == -ENODATA || size == 0) { | 65 | } else if (size == -ENOENT || size == -ENODATA || size == 0) { |
66 | /* FIXME, who returns -ENOENT? I think nobody */ | 66 | /* FIXME, who returns -ENOENT? I think nobody */ |
67 | acl = NULL; | 67 | acl = NULL; |
@@ -91,7 +91,7 @@ static int btrfs_xattr_acl_get(struct dentry *dentry, const char *name, | |||
91 | return PTR_ERR(acl); | 91 | return PTR_ERR(acl); |
92 | if (acl == NULL) | 92 | if (acl == NULL) |
93 | return -ENODATA; | 93 | return -ENODATA; |
94 | ret = posix_acl_to_xattr(acl, value, size); | 94 | ret = posix_acl_to_xattr(&init_user_ns, acl, value, size); |
95 | posix_acl_release(acl); | 95 | posix_acl_release(acl); |
96 | 96 | ||
97 | return ret; | 97 | return ret; |
@@ -141,7 +141,7 @@ static int btrfs_set_acl(struct btrfs_trans_handle *trans, | |||
141 | goto out; | 141 | goto out; |
142 | } | 142 | } |
143 | 143 | ||
144 | ret = posix_acl_to_xattr(acl, value, size); | 144 | ret = posix_acl_to_xattr(&init_user_ns, acl, value, size); |
145 | if (ret < 0) | 145 | if (ret < 0) |
146 | goto out; | 146 | goto out; |
147 | } | 147 | } |
@@ -169,7 +169,7 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name, | |||
169 | return -EOPNOTSUPP; | 169 | return -EOPNOTSUPP; |
170 | 170 | ||
171 | if (value) { | 171 | if (value) { |
172 | acl = posix_acl_from_xattr(value, size); | 172 | acl = posix_acl_from_xattr(&init_user_ns, value, size); |
173 | if (IS_ERR(acl)) | 173 | if (IS_ERR(acl)) |
174 | return PTR_ERR(acl); | 174 | return PTR_ERR(acl); |
175 | 175 | ||
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index a256f3b2a845..ff6475f409d6 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c | |||
@@ -1438,10 +1438,10 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, | |||
1438 | ret = extent_from_logical(fs_info, logical, path, | 1438 | ret = extent_from_logical(fs_info, logical, path, |
1439 | &found_key); | 1439 | &found_key); |
1440 | btrfs_release_path(path); | 1440 | btrfs_release_path(path); |
1441 | if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK) | ||
1442 | ret = -EINVAL; | ||
1443 | if (ret < 0) | 1441 | if (ret < 0) |
1444 | return ret; | 1442 | return ret; |
1443 | if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK) | ||
1444 | return -EINVAL; | ||
1445 | 1445 | ||
1446 | extent_item_pos = logical - found_key.objectid; | 1446 | extent_item_pos = logical - found_key.objectid; |
1447 | ret = iterate_extent_inodes(fs_info, found_key.objectid, | 1447 | ret = iterate_extent_inodes(fs_info, found_key.objectid, |
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 86eff48dab78..43d1c5a3a030 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c | |||
@@ -818,6 +818,7 @@ static void free_workspace(int type, struct list_head *workspace) | |||
818 | btrfs_compress_op[idx]->free_workspace(workspace); | 818 | btrfs_compress_op[idx]->free_workspace(workspace); |
819 | atomic_dec(alloc_workspace); | 819 | atomic_dec(alloc_workspace); |
820 | wake: | 820 | wake: |
821 | smp_mb(); | ||
821 | if (waitqueue_active(workspace_wait)) | 822 | if (waitqueue_active(workspace_wait)) |
822 | wake_up(workspace_wait); | 823 | wake_up(workspace_wait); |
823 | } | 824 | } |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 9d7621f271ff..6d183f60d63a 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -421,12 +421,6 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, | |||
421 | spin_unlock(&fs_info->tree_mod_seq_lock); | 421 | spin_unlock(&fs_info->tree_mod_seq_lock); |
422 | 422 | ||
423 | /* | 423 | /* |
424 | * we removed the lowest blocker from the blocker list, so there may be | ||
425 | * more processible delayed refs. | ||
426 | */ | ||
427 | wake_up(&fs_info->tree_mod_seq_wait); | ||
428 | |||
429 | /* | ||
430 | * anything that's lower than the lowest existing (read: blocked) | 424 | * anything that's lower than the lowest existing (read: blocked) |
431 | * sequence number can be removed from the tree. | 425 | * sequence number can be removed from the tree. |
432 | */ | 426 | */ |
@@ -631,6 +625,9 @@ __tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb) | |||
631 | u32 nritems; | 625 | u32 nritems; |
632 | int ret; | 626 | int ret; |
633 | 627 | ||
628 | if (btrfs_header_level(eb) == 0) | ||
629 | return; | ||
630 | |||
634 | nritems = btrfs_header_nritems(eb); | 631 | nritems = btrfs_header_nritems(eb); |
635 | for (i = nritems - 1; i >= 0; i--) { | 632 | for (i = nritems - 1; i >= 0; i--) { |
636 | ret = tree_mod_log_insert_key_locked(fs_info, eb, i, | 633 | ret = tree_mod_log_insert_key_locked(fs_info, eb, i, |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 4bab807227ad..9821b672f5a2 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -116,7 +116,7 @@ struct btrfs_ordered_sum; | |||
116 | #define BTRFS_FREE_SPACE_OBJECTID -11ULL | 116 | #define BTRFS_FREE_SPACE_OBJECTID -11ULL |
117 | 117 | ||
118 | /* | 118 | /* |
119 | * The inode number assigned to the special inode for sotring | 119 | * The inode number assigned to the special inode for storing |
120 | * free ino cache | 120 | * free ino cache |
121 | */ | 121 | */ |
122 | #define BTRFS_FREE_INO_OBJECTID -12ULL | 122 | #define BTRFS_FREE_INO_OBJECTID -12ULL |
@@ -1252,7 +1252,6 @@ struct btrfs_fs_info { | |||
1252 | atomic_t tree_mod_seq; | 1252 | atomic_t tree_mod_seq; |
1253 | struct list_head tree_mod_seq_list; | 1253 | struct list_head tree_mod_seq_list; |
1254 | struct seq_list tree_mod_seq_elem; | 1254 | struct seq_list tree_mod_seq_elem; |
1255 | wait_queue_head_t tree_mod_seq_wait; | ||
1256 | 1255 | ||
1257 | /* this protects tree_mod_log */ | 1256 | /* this protects tree_mod_log */ |
1258 | rwlock_t tree_mod_log_lock; | 1257 | rwlock_t tree_mod_log_lock; |
@@ -3192,7 +3191,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, | |||
3192 | int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, | 3191 | int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, |
3193 | struct bio *bio, u32 *dst); | 3192 | struct bio *bio, u32 *dst); |
3194 | int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, | 3193 | int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, |
3195 | struct bio *bio, u64 logical_offset, u32 *dst); | 3194 | struct bio *bio, u64 logical_offset); |
3196 | int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, | 3195 | int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, |
3197 | struct btrfs_root *root, | 3196 | struct btrfs_root *root, |
3198 | u64 objectid, u64 pos, | 3197 | u64 objectid, u64 pos, |
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 335605c8ceab..52c85e2b95d0 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c | |||
@@ -512,8 +512,8 @@ static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item) | |||
512 | 512 | ||
513 | rb_erase(&delayed_item->rb_node, root); | 513 | rb_erase(&delayed_item->rb_node, root); |
514 | delayed_item->delayed_node->count--; | 514 | delayed_item->delayed_node->count--; |
515 | atomic_dec(&delayed_root->items); | 515 | if (atomic_dec_return(&delayed_root->items) < |
516 | if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND && | 516 | BTRFS_DELAYED_BACKGROUND && |
517 | waitqueue_active(&delayed_root->wait)) | 517 | waitqueue_active(&delayed_root->wait)) |
518 | wake_up(&delayed_root->wait); | 518 | wake_up(&delayed_root->wait); |
519 | } | 519 | } |
@@ -1028,9 +1028,10 @@ do_again: | |||
1028 | btrfs_release_delayed_item(prev); | 1028 | btrfs_release_delayed_item(prev); |
1029 | ret = 0; | 1029 | ret = 0; |
1030 | btrfs_release_path(path); | 1030 | btrfs_release_path(path); |
1031 | if (curr) | 1031 | if (curr) { |
1032 | mutex_unlock(&node->mutex); | ||
1032 | goto do_again; | 1033 | goto do_again; |
1033 | else | 1034 | } else |
1034 | goto delete_fail; | 1035 | goto delete_fail; |
1035 | } | 1036 | } |
1036 | 1037 | ||
@@ -1055,8 +1056,7 @@ static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node) | |||
1055 | delayed_node->count--; | 1056 | delayed_node->count--; |
1056 | 1057 | ||
1057 | delayed_root = delayed_node->root->fs_info->delayed_root; | 1058 | delayed_root = delayed_node->root->fs_info->delayed_root; |
1058 | atomic_dec(&delayed_root->items); | 1059 | if (atomic_dec_return(&delayed_root->items) < |
1059 | if (atomic_read(&delayed_root->items) < | ||
1060 | BTRFS_DELAYED_BACKGROUND && | 1060 | BTRFS_DELAYED_BACKGROUND && |
1061 | waitqueue_active(&delayed_root->wait)) | 1061 | waitqueue_active(&delayed_root->wait)) |
1062 | wake_up(&delayed_root->wait); | 1062 | wake_up(&delayed_root->wait); |
@@ -1715,8 +1715,8 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans, | |||
1715 | struct btrfs_inode_item *inode_item, | 1715 | struct btrfs_inode_item *inode_item, |
1716 | struct inode *inode) | 1716 | struct inode *inode) |
1717 | { | 1717 | { |
1718 | btrfs_set_stack_inode_uid(inode_item, inode->i_uid); | 1718 | btrfs_set_stack_inode_uid(inode_item, i_uid_read(inode)); |
1719 | btrfs_set_stack_inode_gid(inode_item, inode->i_gid); | 1719 | btrfs_set_stack_inode_gid(inode_item, i_gid_read(inode)); |
1720 | btrfs_set_stack_inode_size(inode_item, BTRFS_I(inode)->disk_i_size); | 1720 | btrfs_set_stack_inode_size(inode_item, BTRFS_I(inode)->disk_i_size); |
1721 | btrfs_set_stack_inode_mode(inode_item, inode->i_mode); | 1721 | btrfs_set_stack_inode_mode(inode_item, inode->i_mode); |
1722 | btrfs_set_stack_inode_nlink(inode_item, inode->i_nlink); | 1722 | btrfs_set_stack_inode_nlink(inode_item, inode->i_nlink); |
@@ -1764,8 +1764,8 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev) | |||
1764 | 1764 | ||
1765 | inode_item = &delayed_node->inode_item; | 1765 | inode_item = &delayed_node->inode_item; |
1766 | 1766 | ||
1767 | inode->i_uid = btrfs_stack_inode_uid(inode_item); | 1767 | i_uid_write(inode, btrfs_stack_inode_uid(inode_item)); |
1768 | inode->i_gid = btrfs_stack_inode_gid(inode_item); | 1768 | i_gid_write(inode, btrfs_stack_inode_gid(inode_item)); |
1769 | btrfs_i_size_write(inode, btrfs_stack_inode_size(inode_item)); | 1769 | btrfs_i_size_write(inode, btrfs_stack_inode_size(inode_item)); |
1770 | inode->i_mode = btrfs_stack_inode_mode(inode_item); | 1770 | inode->i_mode = btrfs_stack_inode_mode(inode_item); |
1771 | set_nlink(inode, btrfs_stack_inode_nlink(inode_item)); | 1771 | set_nlink(inode, btrfs_stack_inode_nlink(inode_item)); |
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index da7419ed01bb..ae9411773397 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c | |||
@@ -38,17 +38,14 @@ | |||
38 | static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2, | 38 | static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2, |
39 | struct btrfs_delayed_tree_ref *ref1) | 39 | struct btrfs_delayed_tree_ref *ref1) |
40 | { | 40 | { |
41 | if (ref1->node.type == BTRFS_TREE_BLOCK_REF_KEY) { | 41 | if (ref1->root < ref2->root) |
42 | if (ref1->root < ref2->root) | 42 | return -1; |
43 | return -1; | 43 | if (ref1->root > ref2->root) |
44 | if (ref1->root > ref2->root) | 44 | return 1; |
45 | return 1; | 45 | if (ref1->parent < ref2->parent) |
46 | } else { | 46 | return -1; |
47 | if (ref1->parent < ref2->parent) | 47 | if (ref1->parent > ref2->parent) |
48 | return -1; | 48 | return 1; |
49 | if (ref1->parent > ref2->parent) | ||
50 | return 1; | ||
51 | } | ||
52 | return 0; | 49 | return 0; |
53 | } | 50 | } |
54 | 51 | ||
@@ -85,7 +82,8 @@ static int comp_data_refs(struct btrfs_delayed_data_ref *ref2, | |||
85 | * type of the delayed backrefs and content of delayed backrefs. | 82 | * type of the delayed backrefs and content of delayed backrefs. |
86 | */ | 83 | */ |
87 | static int comp_entry(struct btrfs_delayed_ref_node *ref2, | 84 | static int comp_entry(struct btrfs_delayed_ref_node *ref2, |
88 | struct btrfs_delayed_ref_node *ref1) | 85 | struct btrfs_delayed_ref_node *ref1, |
86 | bool compare_seq) | ||
89 | { | 87 | { |
90 | if (ref1->bytenr < ref2->bytenr) | 88 | if (ref1->bytenr < ref2->bytenr) |
91 | return -1; | 89 | return -1; |
@@ -102,10 +100,12 @@ static int comp_entry(struct btrfs_delayed_ref_node *ref2, | |||
102 | if (ref1->type > ref2->type) | 100 | if (ref1->type > ref2->type) |
103 | return 1; | 101 | return 1; |
104 | /* merging of sequenced refs is not allowed */ | 102 | /* merging of sequenced refs is not allowed */ |
105 | if (ref1->seq < ref2->seq) | 103 | if (compare_seq) { |
106 | return -1; | 104 | if (ref1->seq < ref2->seq) |
107 | if (ref1->seq > ref2->seq) | 105 | return -1; |
108 | return 1; | 106 | if (ref1->seq > ref2->seq) |
107 | return 1; | ||
108 | } | ||
109 | if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY || | 109 | if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY || |
110 | ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) { | 110 | ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) { |
111 | return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2), | 111 | return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2), |
@@ -139,7 +139,7 @@ static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root, | |||
139 | entry = rb_entry(parent_node, struct btrfs_delayed_ref_node, | 139 | entry = rb_entry(parent_node, struct btrfs_delayed_ref_node, |
140 | rb_node); | 140 | rb_node); |
141 | 141 | ||
142 | cmp = comp_entry(entry, ins); | 142 | cmp = comp_entry(entry, ins, 1); |
143 | if (cmp < 0) | 143 | if (cmp < 0) |
144 | p = &(*p)->rb_left; | 144 | p = &(*p)->rb_left; |
145 | else if (cmp > 0) | 145 | else if (cmp > 0) |
@@ -233,6 +233,114 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans, | |||
233 | return 0; | 233 | return 0; |
234 | } | 234 | } |
235 | 235 | ||
236 | static void inline drop_delayed_ref(struct btrfs_trans_handle *trans, | ||
237 | struct btrfs_delayed_ref_root *delayed_refs, | ||
238 | struct btrfs_delayed_ref_node *ref) | ||
239 | { | ||
240 | rb_erase(&ref->rb_node, &delayed_refs->root); | ||
241 | ref->in_tree = 0; | ||
242 | btrfs_put_delayed_ref(ref); | ||
243 | delayed_refs->num_entries--; | ||
244 | if (trans->delayed_ref_updates) | ||
245 | trans->delayed_ref_updates--; | ||
246 | } | ||
247 | |||
248 | static int merge_ref(struct btrfs_trans_handle *trans, | ||
249 | struct btrfs_delayed_ref_root *delayed_refs, | ||
250 | struct btrfs_delayed_ref_node *ref, u64 seq) | ||
251 | { | ||
252 | struct rb_node *node; | ||
253 | int merged = 0; | ||
254 | int mod = 0; | ||
255 | int done = 0; | ||
256 | |||
257 | node = rb_prev(&ref->rb_node); | ||
258 | while (node) { | ||
259 | struct btrfs_delayed_ref_node *next; | ||
260 | |||
261 | next = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); | ||
262 | node = rb_prev(node); | ||
263 | if (next->bytenr != ref->bytenr) | ||
264 | break; | ||
265 | if (seq && next->seq >= seq) | ||
266 | break; | ||
267 | if (comp_entry(ref, next, 0)) | ||
268 | continue; | ||
269 | |||
270 | if (ref->action == next->action) { | ||
271 | mod = next->ref_mod; | ||
272 | } else { | ||
273 | if (ref->ref_mod < next->ref_mod) { | ||
274 | struct btrfs_delayed_ref_node *tmp; | ||
275 | |||
276 | tmp = ref; | ||
277 | ref = next; | ||
278 | next = tmp; | ||
279 | done = 1; | ||
280 | } | ||
281 | mod = -next->ref_mod; | ||
282 | } | ||
283 | |||
284 | merged++; | ||
285 | drop_delayed_ref(trans, delayed_refs, next); | ||
286 | ref->ref_mod += mod; | ||
287 | if (ref->ref_mod == 0) { | ||
288 | drop_delayed_ref(trans, delayed_refs, ref); | ||
289 | break; | ||
290 | } else { | ||
291 | /* | ||
292 | * You can't have multiples of the same ref on a tree | ||
293 | * block. | ||
294 | */ | ||
295 | WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY || | ||
296 | ref->type == BTRFS_SHARED_BLOCK_REF_KEY); | ||
297 | } | ||
298 | |||
299 | if (done) | ||
300 | break; | ||
301 | node = rb_prev(&ref->rb_node); | ||
302 | } | ||
303 | |||
304 | return merged; | ||
305 | } | ||
306 | |||
307 | void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans, | ||
308 | struct btrfs_fs_info *fs_info, | ||
309 | struct btrfs_delayed_ref_root *delayed_refs, | ||
310 | struct btrfs_delayed_ref_head *head) | ||
311 | { | ||
312 | struct rb_node *node; | ||
313 | u64 seq = 0; | ||
314 | |||
315 | spin_lock(&fs_info->tree_mod_seq_lock); | ||
316 | if (!list_empty(&fs_info->tree_mod_seq_list)) { | ||
317 | struct seq_list *elem; | ||
318 | |||
319 | elem = list_first_entry(&fs_info->tree_mod_seq_list, | ||
320 | struct seq_list, list); | ||
321 | seq = elem->seq; | ||
322 | } | ||
323 | spin_unlock(&fs_info->tree_mod_seq_lock); | ||
324 | |||
325 | node = rb_prev(&head->node.rb_node); | ||
326 | while (node) { | ||
327 | struct btrfs_delayed_ref_node *ref; | ||
328 | |||
329 | ref = rb_entry(node, struct btrfs_delayed_ref_node, | ||
330 | rb_node); | ||
331 | if (ref->bytenr != head->node.bytenr) | ||
332 | break; | ||
333 | |||
334 | /* We can't merge refs that are outside of our seq count */ | ||
335 | if (seq && ref->seq >= seq) | ||
336 | break; | ||
337 | if (merge_ref(trans, delayed_refs, ref, seq)) | ||
338 | node = rb_prev(&head->node.rb_node); | ||
339 | else | ||
340 | node = rb_prev(node); | ||
341 | } | ||
342 | } | ||
343 | |||
236 | int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, | 344 | int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, |
237 | struct btrfs_delayed_ref_root *delayed_refs, | 345 | struct btrfs_delayed_ref_root *delayed_refs, |
238 | u64 seq) | 346 | u64 seq) |
@@ -336,18 +444,11 @@ update_existing_ref(struct btrfs_trans_handle *trans, | |||
336 | * every changing the extent allocation tree. | 444 | * every changing the extent allocation tree. |
337 | */ | 445 | */ |
338 | existing->ref_mod--; | 446 | existing->ref_mod--; |
339 | if (existing->ref_mod == 0) { | 447 | if (existing->ref_mod == 0) |
340 | rb_erase(&existing->rb_node, | 448 | drop_delayed_ref(trans, delayed_refs, existing); |
341 | &delayed_refs->root); | 449 | else |
342 | existing->in_tree = 0; | ||
343 | btrfs_put_delayed_ref(existing); | ||
344 | delayed_refs->num_entries--; | ||
345 | if (trans->delayed_ref_updates) | ||
346 | trans->delayed_ref_updates--; | ||
347 | } else { | ||
348 | WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY || | 450 | WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY || |
349 | existing->type == BTRFS_SHARED_BLOCK_REF_KEY); | 451 | existing->type == BTRFS_SHARED_BLOCK_REF_KEY); |
350 | } | ||
351 | } else { | 452 | } else { |
352 | WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY || | 453 | WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY || |
353 | existing->type == BTRFS_SHARED_BLOCK_REF_KEY); | 454 | existing->type == BTRFS_SHARED_BLOCK_REF_KEY); |
@@ -662,9 +763,6 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, | |||
662 | add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr, | 763 | add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr, |
663 | num_bytes, parent, ref_root, level, action, | 764 | num_bytes, parent, ref_root, level, action, |
664 | for_cow); | 765 | for_cow); |
665 | if (!need_ref_seq(for_cow, ref_root) && | ||
666 | waitqueue_active(&fs_info->tree_mod_seq_wait)) | ||
667 | wake_up(&fs_info->tree_mod_seq_wait); | ||
668 | spin_unlock(&delayed_refs->lock); | 766 | spin_unlock(&delayed_refs->lock); |
669 | if (need_ref_seq(for_cow, ref_root)) | 767 | if (need_ref_seq(for_cow, ref_root)) |
670 | btrfs_qgroup_record_ref(trans, &ref->node, extent_op); | 768 | btrfs_qgroup_record_ref(trans, &ref->node, extent_op); |
@@ -713,9 +811,6 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, | |||
713 | add_delayed_data_ref(fs_info, trans, &ref->node, bytenr, | 811 | add_delayed_data_ref(fs_info, trans, &ref->node, bytenr, |
714 | num_bytes, parent, ref_root, owner, offset, | 812 | num_bytes, parent, ref_root, owner, offset, |
715 | action, for_cow); | 813 | action, for_cow); |
716 | if (!need_ref_seq(for_cow, ref_root) && | ||
717 | waitqueue_active(&fs_info->tree_mod_seq_wait)) | ||
718 | wake_up(&fs_info->tree_mod_seq_wait); | ||
719 | spin_unlock(&delayed_refs->lock); | 814 | spin_unlock(&delayed_refs->lock); |
720 | if (need_ref_seq(for_cow, ref_root)) | 815 | if (need_ref_seq(for_cow, ref_root)) |
721 | btrfs_qgroup_record_ref(trans, &ref->node, extent_op); | 816 | btrfs_qgroup_record_ref(trans, &ref->node, extent_op); |
@@ -744,8 +839,6 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, | |||
744 | num_bytes, BTRFS_UPDATE_DELAYED_HEAD, | 839 | num_bytes, BTRFS_UPDATE_DELAYED_HEAD, |
745 | extent_op->is_data); | 840 | extent_op->is_data); |
746 | 841 | ||
747 | if (waitqueue_active(&fs_info->tree_mod_seq_wait)) | ||
748 | wake_up(&fs_info->tree_mod_seq_wait); | ||
749 | spin_unlock(&delayed_refs->lock); | 842 | spin_unlock(&delayed_refs->lock); |
750 | return 0; | 843 | return 0; |
751 | } | 844 | } |
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index 0d7c90c366b6..c9d703693df0 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h | |||
@@ -18,7 +18,7 @@ | |||
18 | #ifndef __DELAYED_REF__ | 18 | #ifndef __DELAYED_REF__ |
19 | #define __DELAYED_REF__ | 19 | #define __DELAYED_REF__ |
20 | 20 | ||
21 | /* these are the possible values of struct btrfs_delayed_ref->action */ | 21 | /* these are the possible values of struct btrfs_delayed_ref_node->action */ |
22 | #define BTRFS_ADD_DELAYED_REF 1 /* add one backref to the tree */ | 22 | #define BTRFS_ADD_DELAYED_REF 1 /* add one backref to the tree */ |
23 | #define BTRFS_DROP_DELAYED_REF 2 /* delete one backref from the tree */ | 23 | #define BTRFS_DROP_DELAYED_REF 2 /* delete one backref from the tree */ |
24 | #define BTRFS_ADD_DELAYED_EXTENT 3 /* record a full extent allocation */ | 24 | #define BTRFS_ADD_DELAYED_EXTENT 3 /* record a full extent allocation */ |
@@ -167,6 +167,10 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, | |||
167 | struct btrfs_trans_handle *trans, | 167 | struct btrfs_trans_handle *trans, |
168 | u64 bytenr, u64 num_bytes, | 168 | u64 bytenr, u64 num_bytes, |
169 | struct btrfs_delayed_extent_op *extent_op); | 169 | struct btrfs_delayed_extent_op *extent_op); |
170 | void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans, | ||
171 | struct btrfs_fs_info *fs_info, | ||
172 | struct btrfs_delayed_ref_root *delayed_refs, | ||
173 | struct btrfs_delayed_ref_head *head); | ||
170 | 174 | ||
171 | struct btrfs_delayed_ref_head * | 175 | struct btrfs_delayed_ref_head * |
172 | btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr); | 176 | btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr); |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 62e0cafd6e25..22e98e04c2ea 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -377,9 +377,13 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, | |||
377 | ret = read_extent_buffer_pages(io_tree, eb, start, | 377 | ret = read_extent_buffer_pages(io_tree, eb, start, |
378 | WAIT_COMPLETE, | 378 | WAIT_COMPLETE, |
379 | btree_get_extent, mirror_num); | 379 | btree_get_extent, mirror_num); |
380 | if (!ret && !verify_parent_transid(io_tree, eb, | 380 | if (!ret) { |
381 | if (!verify_parent_transid(io_tree, eb, | ||
381 | parent_transid, 0)) | 382 | parent_transid, 0)) |
382 | break; | 383 | break; |
384 | else | ||
385 | ret = -EIO; | ||
386 | } | ||
383 | 387 | ||
384 | /* | 388 | /* |
385 | * This buffer's crc is fine, but its contents are corrupted, so | 389 | * This buffer's crc is fine, but its contents are corrupted, so |
@@ -754,9 +758,7 @@ static void run_one_async_done(struct btrfs_work *work) | |||
754 | limit = btrfs_async_submit_limit(fs_info); | 758 | limit = btrfs_async_submit_limit(fs_info); |
755 | limit = limit * 2 / 3; | 759 | limit = limit * 2 / 3; |
756 | 760 | ||
757 | atomic_dec(&fs_info->nr_async_submits); | 761 | if (atomic_dec_return(&fs_info->nr_async_submits) < limit && |
758 | |||
759 | if (atomic_read(&fs_info->nr_async_submits) < limit && | ||
760 | waitqueue_active(&fs_info->async_submit_wait)) | 762 | waitqueue_active(&fs_info->async_submit_wait)) |
761 | wake_up(&fs_info->async_submit_wait); | 763 | wake_up(&fs_info->async_submit_wait); |
762 | 764 | ||
@@ -2032,8 +2034,6 @@ int open_ctree(struct super_block *sb, | |||
2032 | fs_info->free_chunk_space = 0; | 2034 | fs_info->free_chunk_space = 0; |
2033 | fs_info->tree_mod_log = RB_ROOT; | 2035 | fs_info->tree_mod_log = RB_ROOT; |
2034 | 2036 | ||
2035 | init_waitqueue_head(&fs_info->tree_mod_seq_wait); | ||
2036 | |||
2037 | /* readahead state */ | 2037 | /* readahead state */ |
2038 | INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT); | 2038 | INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT); |
2039 | spin_lock_init(&fs_info->reada_lock); | 2039 | spin_lock_init(&fs_info->reada_lock); |
@@ -2528,8 +2528,7 @@ retry_root_backup: | |||
2528 | goto fail_trans_kthread; | 2528 | goto fail_trans_kthread; |
2529 | 2529 | ||
2530 | /* do not make disk changes in broken FS */ | 2530 | /* do not make disk changes in broken FS */ |
2531 | if (btrfs_super_log_root(disk_super) != 0 && | 2531 | if (btrfs_super_log_root(disk_super) != 0) { |
2532 | !(fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)) { | ||
2533 | u64 bytenr = btrfs_super_log_root(disk_super); | 2532 | u64 bytenr = btrfs_super_log_root(disk_super); |
2534 | 2533 | ||
2535 | if (fs_devices->rw_devices == 0) { | 2534 | if (fs_devices->rw_devices == 0) { |
@@ -3189,30 +3188,14 @@ int close_ctree(struct btrfs_root *root) | |||
3189 | /* clear out the rbtree of defraggable inodes */ | 3188 | /* clear out the rbtree of defraggable inodes */ |
3190 | btrfs_run_defrag_inodes(fs_info); | 3189 | btrfs_run_defrag_inodes(fs_info); |
3191 | 3190 | ||
3192 | /* | ||
3193 | * Here come 2 situations when btrfs is broken to flip readonly: | ||
3194 | * | ||
3195 | * 1. when btrfs flips readonly somewhere else before | ||
3196 | * btrfs_commit_super, sb->s_flags has MS_RDONLY flag, | ||
3197 | * and btrfs will skip to write sb directly to keep | ||
3198 | * ERROR state on disk. | ||
3199 | * | ||
3200 | * 2. when btrfs flips readonly just in btrfs_commit_super, | ||
3201 | * and in such case, btrfs cannot write sb via btrfs_commit_super, | ||
3202 | * and since fs_state has been set BTRFS_SUPER_FLAG_ERROR flag, | ||
3203 | * btrfs will cleanup all FS resources first and write sb then. | ||
3204 | */ | ||
3205 | if (!(fs_info->sb->s_flags & MS_RDONLY)) { | 3191 | if (!(fs_info->sb->s_flags & MS_RDONLY)) { |
3206 | ret = btrfs_commit_super(root); | 3192 | ret = btrfs_commit_super(root); |
3207 | if (ret) | 3193 | if (ret) |
3208 | printk(KERN_ERR "btrfs: commit super ret %d\n", ret); | 3194 | printk(KERN_ERR "btrfs: commit super ret %d\n", ret); |
3209 | } | 3195 | } |
3210 | 3196 | ||
3211 | if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { | 3197 | if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) |
3212 | ret = btrfs_error_commit_super(root); | 3198 | btrfs_error_commit_super(root); |
3213 | if (ret) | ||
3214 | printk(KERN_ERR "btrfs: commit super ret %d\n", ret); | ||
3215 | } | ||
3216 | 3199 | ||
3217 | btrfs_put_block_group_cache(fs_info); | 3200 | btrfs_put_block_group_cache(fs_info); |
3218 | 3201 | ||
@@ -3434,18 +3417,11 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, | |||
3434 | if (read_only) | 3417 | if (read_only) |
3435 | return 0; | 3418 | return 0; |
3436 | 3419 | ||
3437 | if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { | ||
3438 | printk(KERN_WARNING "warning: mount fs with errors, " | ||
3439 | "running btrfsck is recommended\n"); | ||
3440 | } | ||
3441 | |||
3442 | return 0; | 3420 | return 0; |
3443 | } | 3421 | } |
3444 | 3422 | ||
3445 | int btrfs_error_commit_super(struct btrfs_root *root) | 3423 | void btrfs_error_commit_super(struct btrfs_root *root) |
3446 | { | 3424 | { |
3447 | int ret; | ||
3448 | |||
3449 | mutex_lock(&root->fs_info->cleaner_mutex); | 3425 | mutex_lock(&root->fs_info->cleaner_mutex); |
3450 | btrfs_run_delayed_iputs(root); | 3426 | btrfs_run_delayed_iputs(root); |
3451 | mutex_unlock(&root->fs_info->cleaner_mutex); | 3427 | mutex_unlock(&root->fs_info->cleaner_mutex); |
@@ -3455,10 +3431,6 @@ int btrfs_error_commit_super(struct btrfs_root *root) | |||
3455 | 3431 | ||
3456 | /* cleanup FS via transaction */ | 3432 | /* cleanup FS via transaction */ |
3457 | btrfs_cleanup_transaction(root); | 3433 | btrfs_cleanup_transaction(root); |
3458 | |||
3459 | ret = write_ctree_super(NULL, root, 0); | ||
3460 | |||
3461 | return ret; | ||
3462 | } | 3434 | } |
3463 | 3435 | ||
3464 | static void btrfs_destroy_ordered_operations(struct btrfs_root *root) | 3436 | static void btrfs_destroy_ordered_operations(struct btrfs_root *root) |
@@ -3782,14 +3754,17 @@ int btrfs_cleanup_transaction(struct btrfs_root *root) | |||
3782 | /* FIXME: cleanup wait for commit */ | 3754 | /* FIXME: cleanup wait for commit */ |
3783 | t->in_commit = 1; | 3755 | t->in_commit = 1; |
3784 | t->blocked = 1; | 3756 | t->blocked = 1; |
3757 | smp_mb(); | ||
3785 | if (waitqueue_active(&root->fs_info->transaction_blocked_wait)) | 3758 | if (waitqueue_active(&root->fs_info->transaction_blocked_wait)) |
3786 | wake_up(&root->fs_info->transaction_blocked_wait); | 3759 | wake_up(&root->fs_info->transaction_blocked_wait); |
3787 | 3760 | ||
3788 | t->blocked = 0; | 3761 | t->blocked = 0; |
3762 | smp_mb(); | ||
3789 | if (waitqueue_active(&root->fs_info->transaction_wait)) | 3763 | if (waitqueue_active(&root->fs_info->transaction_wait)) |
3790 | wake_up(&root->fs_info->transaction_wait); | 3764 | wake_up(&root->fs_info->transaction_wait); |
3791 | 3765 | ||
3792 | t->commit_done = 1; | 3766 | t->commit_done = 1; |
3767 | smp_mb(); | ||
3793 | if (waitqueue_active(&t->commit_wait)) | 3768 | if (waitqueue_active(&t->commit_wait)) |
3794 | wake_up(&t->commit_wait); | 3769 | wake_up(&t->commit_wait); |
3795 | 3770 | ||
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 95e147eea239..c5b00a735fef 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h | |||
@@ -54,7 +54,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans, | |||
54 | struct btrfs_root *root, int max_mirrors); | 54 | struct btrfs_root *root, int max_mirrors); |
55 | struct buffer_head *btrfs_read_dev_super(struct block_device *bdev); | 55 | struct buffer_head *btrfs_read_dev_super(struct block_device *bdev); |
56 | int btrfs_commit_super(struct btrfs_root *root); | 56 | int btrfs_commit_super(struct btrfs_root *root); |
57 | int btrfs_error_commit_super(struct btrfs_root *root); | 57 | void btrfs_error_commit_super(struct btrfs_root *root); |
58 | struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, | 58 | struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, |
59 | u64 bytenr, u32 blocksize); | 59 | u64 bytenr, u32 blocksize); |
60 | struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, | 60 | struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4e1b153b7c47..ba58024d40d3 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -2252,6 +2252,16 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, | |||
2252 | } | 2252 | } |
2253 | 2253 | ||
2254 | /* | 2254 | /* |
2255 | * We need to try and merge add/drops of the same ref since we | ||
2256 | * can run into issues with relocate dropping the implicit ref | ||
2257 | * and then it being added back again before the drop can | ||
2258 | * finish. If we merged anything we need to re-loop so we can | ||
2259 | * get a good ref. | ||
2260 | */ | ||
2261 | btrfs_merge_delayed_refs(trans, fs_info, delayed_refs, | ||
2262 | locked_ref); | ||
2263 | |||
2264 | /* | ||
2255 | * locked_ref is the head node, so we have to go one | 2265 | * locked_ref is the head node, so we have to go one |
2256 | * node back for any delayed ref updates | 2266 | * node back for any delayed ref updates |
2257 | */ | 2267 | */ |
@@ -2318,12 +2328,23 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, | |||
2318 | ref->in_tree = 0; | 2328 | ref->in_tree = 0; |
2319 | rb_erase(&ref->rb_node, &delayed_refs->root); | 2329 | rb_erase(&ref->rb_node, &delayed_refs->root); |
2320 | delayed_refs->num_entries--; | 2330 | delayed_refs->num_entries--; |
2321 | /* | 2331 | if (locked_ref) { |
2322 | * we modified num_entries, but as we're currently running | 2332 | /* |
2323 | * delayed refs, skip | 2333 | * when we play the delayed ref, also correct the |
2324 | * wake_up(&delayed_refs->seq_wait); | 2334 | * ref_mod on head |
2325 | * here. | 2335 | */ |
2326 | */ | 2336 | switch (ref->action) { |
2337 | case BTRFS_ADD_DELAYED_REF: | ||
2338 | case BTRFS_ADD_DELAYED_EXTENT: | ||
2339 | locked_ref->node.ref_mod -= ref->ref_mod; | ||
2340 | break; | ||
2341 | case BTRFS_DROP_DELAYED_REF: | ||
2342 | locked_ref->node.ref_mod += ref->ref_mod; | ||
2343 | break; | ||
2344 | default: | ||
2345 | WARN_ON(1); | ||
2346 | } | ||
2347 | } | ||
2327 | spin_unlock(&delayed_refs->lock); | 2348 | spin_unlock(&delayed_refs->lock); |
2328 | 2349 | ||
2329 | ret = run_one_delayed_ref(trans, root, ref, extent_op, | 2350 | ret = run_one_delayed_ref(trans, root, ref, extent_op, |
@@ -2350,22 +2371,6 @@ next: | |||
2350 | return count; | 2371 | return count; |
2351 | } | 2372 | } |
2352 | 2373 | ||
2353 | static void wait_for_more_refs(struct btrfs_fs_info *fs_info, | ||
2354 | struct btrfs_delayed_ref_root *delayed_refs, | ||
2355 | unsigned long num_refs, | ||
2356 | struct list_head *first_seq) | ||
2357 | { | ||
2358 | spin_unlock(&delayed_refs->lock); | ||
2359 | pr_debug("waiting for more refs (num %ld, first %p)\n", | ||
2360 | num_refs, first_seq); | ||
2361 | wait_event(fs_info->tree_mod_seq_wait, | ||
2362 | num_refs != delayed_refs->num_entries || | ||
2363 | fs_info->tree_mod_seq_list.next != first_seq); | ||
2364 | pr_debug("done waiting for more refs (num %ld, first %p)\n", | ||
2365 | delayed_refs->num_entries, fs_info->tree_mod_seq_list.next); | ||
2366 | spin_lock(&delayed_refs->lock); | ||
2367 | } | ||
2368 | |||
2369 | #ifdef SCRAMBLE_DELAYED_REFS | 2374 | #ifdef SCRAMBLE_DELAYED_REFS |
2370 | /* | 2375 | /* |
2371 | * Normally delayed refs get processed in ascending bytenr order. This | 2376 | * Normally delayed refs get processed in ascending bytenr order. This |
@@ -2460,13 +2465,11 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | |||
2460 | struct btrfs_delayed_ref_root *delayed_refs; | 2465 | struct btrfs_delayed_ref_root *delayed_refs; |
2461 | struct btrfs_delayed_ref_node *ref; | 2466 | struct btrfs_delayed_ref_node *ref; |
2462 | struct list_head cluster; | 2467 | struct list_head cluster; |
2463 | struct list_head *first_seq = NULL; | ||
2464 | int ret; | 2468 | int ret; |
2465 | u64 delayed_start; | 2469 | u64 delayed_start; |
2466 | int run_all = count == (unsigned long)-1; | 2470 | int run_all = count == (unsigned long)-1; |
2467 | int run_most = 0; | 2471 | int run_most = 0; |
2468 | unsigned long num_refs = 0; | 2472 | int loops; |
2469 | int consider_waiting; | ||
2470 | 2473 | ||
2471 | /* We'll clean this up in btrfs_cleanup_transaction */ | 2474 | /* We'll clean this up in btrfs_cleanup_transaction */ |
2472 | if (trans->aborted) | 2475 | if (trans->aborted) |
@@ -2484,7 +2487,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | |||
2484 | delayed_refs = &trans->transaction->delayed_refs; | 2487 | delayed_refs = &trans->transaction->delayed_refs; |
2485 | INIT_LIST_HEAD(&cluster); | 2488 | INIT_LIST_HEAD(&cluster); |
2486 | again: | 2489 | again: |
2487 | consider_waiting = 0; | 2490 | loops = 0; |
2488 | spin_lock(&delayed_refs->lock); | 2491 | spin_lock(&delayed_refs->lock); |
2489 | 2492 | ||
2490 | #ifdef SCRAMBLE_DELAYED_REFS | 2493 | #ifdef SCRAMBLE_DELAYED_REFS |
@@ -2512,31 +2515,6 @@ again: | |||
2512 | if (ret) | 2515 | if (ret) |
2513 | break; | 2516 | break; |
2514 | 2517 | ||
2515 | if (delayed_start >= delayed_refs->run_delayed_start) { | ||
2516 | if (consider_waiting == 0) { | ||
2517 | /* | ||
2518 | * btrfs_find_ref_cluster looped. let's do one | ||
2519 | * more cycle. if we don't run any delayed ref | ||
2520 | * during that cycle (because we can't because | ||
2521 | * all of them are blocked) and if the number of | ||
2522 | * refs doesn't change, we avoid busy waiting. | ||
2523 | */ | ||
2524 | consider_waiting = 1; | ||
2525 | num_refs = delayed_refs->num_entries; | ||
2526 | first_seq = root->fs_info->tree_mod_seq_list.next; | ||
2527 | } else { | ||
2528 | wait_for_more_refs(root->fs_info, delayed_refs, | ||
2529 | num_refs, first_seq); | ||
2530 | /* | ||
2531 | * after waiting, things have changed. we | ||
2532 | * dropped the lock and someone else might have | ||
2533 | * run some refs, built new clusters and so on. | ||
2534 | * therefore, we restart staleness detection. | ||
2535 | */ | ||
2536 | consider_waiting = 0; | ||
2537 | } | ||
2538 | } | ||
2539 | |||
2540 | ret = run_clustered_refs(trans, root, &cluster); | 2518 | ret = run_clustered_refs(trans, root, &cluster); |
2541 | if (ret < 0) { | 2519 | if (ret < 0) { |
2542 | spin_unlock(&delayed_refs->lock); | 2520 | spin_unlock(&delayed_refs->lock); |
@@ -2549,9 +2527,26 @@ again: | |||
2549 | if (count == 0) | 2527 | if (count == 0) |
2550 | break; | 2528 | break; |
2551 | 2529 | ||
2552 | if (ret || delayed_refs->run_delayed_start == 0) { | 2530 | if (delayed_start >= delayed_refs->run_delayed_start) { |
2531 | if (loops == 0) { | ||
2532 | /* | ||
2533 | * btrfs_find_ref_cluster looped. let's do one | ||
2534 | * more cycle. if we don't run any delayed ref | ||
2535 | * during that cycle (because we can't because | ||
2536 | * all of them are blocked), bail out. | ||
2537 | */ | ||
2538 | loops = 1; | ||
2539 | } else { | ||
2540 | /* | ||
2541 | * no runnable refs left, stop trying | ||
2542 | */ | ||
2543 | BUG_ON(run_all); | ||
2544 | break; | ||
2545 | } | ||
2546 | } | ||
2547 | if (ret) { | ||
2553 | /* refs were run, let's reset staleness detection */ | 2548 | /* refs were run, let's reset staleness detection */ |
2554 | consider_waiting = 0; | 2549 | loops = 0; |
2555 | } | 2550 | } |
2556 | } | 2551 | } |
2557 | 2552 | ||
@@ -3007,17 +3002,16 @@ again: | |||
3007 | } | 3002 | } |
3008 | spin_unlock(&block_group->lock); | 3003 | spin_unlock(&block_group->lock); |
3009 | 3004 | ||
3010 | num_pages = (int)div64_u64(block_group->key.offset, 1024 * 1024 * 1024); | 3005 | /* |
3006 | * Try to preallocate enough space based on how big the block group is. | ||
3007 | * Keep in mind this has to include any pinned space which could end up | ||
3008 | * taking up quite a bit since it's not folded into the other space | ||
3009 | * cache. | ||
3010 | */ | ||
3011 | num_pages = (int)div64_u64(block_group->key.offset, 256 * 1024 * 1024); | ||
3011 | if (!num_pages) | 3012 | if (!num_pages) |
3012 | num_pages = 1; | 3013 | num_pages = 1; |
3013 | 3014 | ||
3014 | /* | ||
3015 | * Just to make absolutely sure we have enough space, we're going to | ||
3016 | * preallocate 12 pages worth of space for each block group. In | ||
3017 | * practice we ought to use at most 8, but we need extra space so we can | ||
3018 | * add our header and have a terminator between the extents and the | ||
3019 | * bitmaps. | ||
3020 | */ | ||
3021 | num_pages *= 16; | 3015 | num_pages *= 16; |
3022 | num_pages *= PAGE_CACHE_SIZE; | 3016 | num_pages *= PAGE_CACHE_SIZE; |
3023 | 3017 | ||
@@ -4571,8 +4565,10 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
4571 | if (root->fs_info->quota_enabled) { | 4565 | if (root->fs_info->quota_enabled) { |
4572 | ret = btrfs_qgroup_reserve(root, num_bytes + | 4566 | ret = btrfs_qgroup_reserve(root, num_bytes + |
4573 | nr_extents * root->leafsize); | 4567 | nr_extents * root->leafsize); |
4574 | if (ret) | 4568 | if (ret) { |
4569 | mutex_unlock(&BTRFS_I(inode)->delalloc_mutex); | ||
4575 | return ret; | 4570 | return ret; |
4571 | } | ||
4576 | } | 4572 | } |
4577 | 4573 | ||
4578 | ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush); | 4574 | ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush); |
@@ -5294,9 +5290,6 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, | |||
5294 | rb_erase(&head->node.rb_node, &delayed_refs->root); | 5290 | rb_erase(&head->node.rb_node, &delayed_refs->root); |
5295 | 5291 | ||
5296 | delayed_refs->num_entries--; | 5292 | delayed_refs->num_entries--; |
5297 | smp_mb(); | ||
5298 | if (waitqueue_active(&root->fs_info->tree_mod_seq_wait)) | ||
5299 | wake_up(&root->fs_info->tree_mod_seq_wait); | ||
5300 | 5293 | ||
5301 | /* | 5294 | /* |
5302 | * we don't take a ref on the node because we're removing it from the | 5295 | * we don't take a ref on the node because we're removing it from the |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 45c81bb4ac82..b08ea4717e9d 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -107,6 +107,12 @@ void extent_io_exit(void) | |||
107 | list_del(&eb->leak_list); | 107 | list_del(&eb->leak_list); |
108 | kmem_cache_free(extent_buffer_cache, eb); | 108 | kmem_cache_free(extent_buffer_cache, eb); |
109 | } | 109 | } |
110 | |||
111 | /* | ||
112 | * Make sure all delayed rcu free are flushed before we | ||
113 | * destroy caches. | ||
114 | */ | ||
115 | rcu_barrier(); | ||
110 | if (extent_state_cache) | 116 | if (extent_state_cache) |
111 | kmem_cache_destroy(extent_state_cache); | 117 | kmem_cache_destroy(extent_state_cache); |
112 | if (extent_buffer_cache) | 118 | if (extent_buffer_cache) |
@@ -2330,23 +2336,10 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
2330 | if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { | 2336 | if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { |
2331 | ret = tree->ops->readpage_end_io_hook(page, start, end, | 2337 | ret = tree->ops->readpage_end_io_hook(page, start, end, |
2332 | state, mirror); | 2338 | state, mirror); |
2333 | if (ret) { | 2339 | if (ret) |
2334 | /* no IO indicated but software detected errors | ||
2335 | * in the block, either checksum errors or | ||
2336 | * issues with the contents */ | ||
2337 | struct btrfs_root *root = | ||
2338 | BTRFS_I(page->mapping->host)->root; | ||
2339 | struct btrfs_device *device; | ||
2340 | |||
2341 | uptodate = 0; | 2340 | uptodate = 0; |
2342 | device = btrfs_find_device_for_logical( | 2341 | else |
2343 | root, start, mirror); | ||
2344 | if (device) | ||
2345 | btrfs_dev_stat_inc_and_print(device, | ||
2346 | BTRFS_DEV_STAT_CORRUPTION_ERRS); | ||
2347 | } else { | ||
2348 | clean_io_failure(start, page); | 2342 | clean_io_failure(start, page); |
2349 | } | ||
2350 | } | 2343 | } |
2351 | 2344 | ||
2352 | if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) { | 2345 | if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) { |
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index b45b9de0c21d..857d93cd01dc 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c | |||
@@ -272,9 +272,9 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, | |||
272 | } | 272 | } |
273 | 273 | ||
274 | int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, | 274 | int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, |
275 | struct bio *bio, u64 offset, u32 *dst) | 275 | struct bio *bio, u64 offset) |
276 | { | 276 | { |
277 | return __btrfs_lookup_bio_sums(root, inode, bio, offset, dst, 1); | 277 | return __btrfs_lookup_bio_sums(root, inode, bio, offset, NULL, 1); |
278 | } | 278 | } |
279 | 279 | ||
280 | int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, | 280 | int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 5caf285c6e4d..f6b40e86121b 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -1599,6 +1599,7 @@ out: | |||
1599 | static const struct vm_operations_struct btrfs_file_vm_ops = { | 1599 | static const struct vm_operations_struct btrfs_file_vm_ops = { |
1600 | .fault = filemap_fault, | 1600 | .fault = filemap_fault, |
1601 | .page_mkwrite = btrfs_page_mkwrite, | 1601 | .page_mkwrite = btrfs_page_mkwrite, |
1602 | .remap_pages = generic_file_remap_pages, | ||
1602 | }; | 1603 | }; |
1603 | 1604 | ||
1604 | static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) | 1605 | static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) |
@@ -1610,7 +1611,6 @@ static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) | |||
1610 | 1611 | ||
1611 | file_accessed(filp); | 1612 | file_accessed(filp); |
1612 | vma->vm_ops = &btrfs_file_vm_ops; | 1613 | vma->vm_ops = &btrfs_file_vm_ops; |
1613 | vma->vm_flags |= VM_CAN_NONLINEAR; | ||
1614 | 1614 | ||
1615 | return 0; | 1615 | return 0; |
1616 | } | 1616 | } |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 6e8f416773d4..a6ed6944e50c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -1008,9 +1008,7 @@ static noinline void async_cow_submit(struct btrfs_work *work) | |||
1008 | nr_pages = (async_cow->end - async_cow->start + PAGE_CACHE_SIZE) >> | 1008 | nr_pages = (async_cow->end - async_cow->start + PAGE_CACHE_SIZE) >> |
1009 | PAGE_CACHE_SHIFT; | 1009 | PAGE_CACHE_SHIFT; |
1010 | 1010 | ||
1011 | atomic_sub(nr_pages, &root->fs_info->async_delalloc_pages); | 1011 | if (atomic_sub_return(nr_pages, &root->fs_info->async_delalloc_pages) < |
1012 | |||
1013 | if (atomic_read(&root->fs_info->async_delalloc_pages) < | ||
1014 | 5 * 1024 * 1024 && | 1012 | 5 * 1024 * 1024 && |
1015 | waitqueue_active(&root->fs_info->async_submit_wait)) | 1013 | waitqueue_active(&root->fs_info->async_submit_wait)) |
1016 | wake_up(&root->fs_info->async_submit_wait); | 1014 | wake_up(&root->fs_info->async_submit_wait); |
@@ -1885,8 +1883,11 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) | |||
1885 | trans = btrfs_join_transaction_nolock(root); | 1883 | trans = btrfs_join_transaction_nolock(root); |
1886 | else | 1884 | else |
1887 | trans = btrfs_join_transaction(root); | 1885 | trans = btrfs_join_transaction(root); |
1888 | if (IS_ERR(trans)) | 1886 | if (IS_ERR(trans)) { |
1889 | return PTR_ERR(trans); | 1887 | ret = PTR_ERR(trans); |
1888 | trans = NULL; | ||
1889 | goto out; | ||
1890 | } | ||
1890 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | 1891 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; |
1891 | ret = btrfs_update_inode_fallback(trans, root, inode); | 1892 | ret = btrfs_update_inode_fallback(trans, root, inode); |
1892 | if (ret) /* -ENOMEM or corruption */ | 1893 | if (ret) /* -ENOMEM or corruption */ |
@@ -1970,8 +1971,8 @@ out: | |||
1970 | ordered_extent->len - 1, NULL, GFP_NOFS); | 1971 | ordered_extent->len - 1, NULL, GFP_NOFS); |
1971 | 1972 | ||
1972 | /* | 1973 | /* |
1973 | * This needs to be dont to make sure anybody waiting knows we are done | 1974 | * This needs to be done to make sure anybody waiting knows we are done |
1974 | * upating everything for this ordered extent. | 1975 | * updating everything for this ordered extent. |
1975 | */ | 1976 | */ |
1976 | btrfs_remove_ordered_extent(inode, ordered_extent); | 1977 | btrfs_remove_ordered_extent(inode, ordered_extent); |
1977 | 1978 | ||
@@ -2571,8 +2572,8 @@ static void btrfs_read_locked_inode(struct inode *inode) | |||
2571 | struct btrfs_inode_item); | 2572 | struct btrfs_inode_item); |
2572 | inode->i_mode = btrfs_inode_mode(leaf, inode_item); | 2573 | inode->i_mode = btrfs_inode_mode(leaf, inode_item); |
2573 | set_nlink(inode, btrfs_inode_nlink(leaf, inode_item)); | 2574 | set_nlink(inode, btrfs_inode_nlink(leaf, inode_item)); |
2574 | inode->i_uid = btrfs_inode_uid(leaf, inode_item); | 2575 | i_uid_write(inode, btrfs_inode_uid(leaf, inode_item)); |
2575 | inode->i_gid = btrfs_inode_gid(leaf, inode_item); | 2576 | i_gid_write(inode, btrfs_inode_gid(leaf, inode_item)); |
2576 | btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item)); | 2577 | btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item)); |
2577 | 2578 | ||
2578 | tspec = btrfs_inode_atime(inode_item); | 2579 | tspec = btrfs_inode_atime(inode_item); |
@@ -2650,8 +2651,8 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, | |||
2650 | struct btrfs_inode_item *item, | 2651 | struct btrfs_inode_item *item, |
2651 | struct inode *inode) | 2652 | struct inode *inode) |
2652 | { | 2653 | { |
2653 | btrfs_set_inode_uid(leaf, item, inode->i_uid); | 2654 | btrfs_set_inode_uid(leaf, item, i_uid_read(inode)); |
2654 | btrfs_set_inode_gid(leaf, item, inode->i_gid); | 2655 | btrfs_set_inode_gid(leaf, item, i_gid_read(inode)); |
2655 | btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size); | 2656 | btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size); |
2656 | btrfs_set_inode_mode(leaf, item, inode->i_mode); | 2657 | btrfs_set_inode_mode(leaf, item, inode->i_mode); |
2657 | btrfs_set_inode_nlink(leaf, item, inode->i_nlink); | 2658 | btrfs_set_inode_nlink(leaf, item, inode->i_nlink); |
@@ -3174,7 +3175,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, | |||
3174 | btrfs_i_size_write(dir, dir->i_size - name_len * 2); | 3175 | btrfs_i_size_write(dir, dir->i_size - name_len * 2); |
3175 | inode_inc_iversion(dir); | 3176 | inode_inc_iversion(dir); |
3176 | dir->i_mtime = dir->i_ctime = CURRENT_TIME; | 3177 | dir->i_mtime = dir->i_ctime = CURRENT_TIME; |
3177 | ret = btrfs_update_inode(trans, root, dir); | 3178 | ret = btrfs_update_inode_fallback(trans, root, dir); |
3178 | if (ret) | 3179 | if (ret) |
3179 | btrfs_abort_transaction(trans, root, ret); | 3180 | btrfs_abort_transaction(trans, root, ret); |
3180 | out: | 3181 | out: |
@@ -5774,18 +5775,112 @@ out: | |||
5774 | return ret; | 5775 | return ret; |
5775 | } | 5776 | } |
5776 | 5777 | ||
5778 | static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend, | ||
5779 | struct extent_state **cached_state, int writing) | ||
5780 | { | ||
5781 | struct btrfs_ordered_extent *ordered; | ||
5782 | int ret = 0; | ||
5783 | |||
5784 | while (1) { | ||
5785 | lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, | ||
5786 | 0, cached_state); | ||
5787 | /* | ||
5788 | * We're concerned with the entire range that we're going to be | ||
5789 | * doing DIO to, so we need to make sure theres no ordered | ||
5790 | * extents in this range. | ||
5791 | */ | ||
5792 | ordered = btrfs_lookup_ordered_range(inode, lockstart, | ||
5793 | lockend - lockstart + 1); | ||
5794 | |||
5795 | /* | ||
5796 | * We need to make sure there are no buffered pages in this | ||
5797 | * range either, we could have raced between the invalidate in | ||
5798 | * generic_file_direct_write and locking the extent. The | ||
5799 | * invalidate needs to happen so that reads after a write do not | ||
5800 | * get stale data. | ||
5801 | */ | ||
5802 | if (!ordered && (!writing || | ||
5803 | !test_range_bit(&BTRFS_I(inode)->io_tree, | ||
5804 | lockstart, lockend, EXTENT_UPTODATE, 0, | ||
5805 | *cached_state))) | ||
5806 | break; | ||
5807 | |||
5808 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, | ||
5809 | cached_state, GFP_NOFS); | ||
5810 | |||
5811 | if (ordered) { | ||
5812 | btrfs_start_ordered_extent(inode, ordered, 1); | ||
5813 | btrfs_put_ordered_extent(ordered); | ||
5814 | } else { | ||
5815 | /* Screw you mmap */ | ||
5816 | ret = filemap_write_and_wait_range(inode->i_mapping, | ||
5817 | lockstart, | ||
5818 | lockend); | ||
5819 | if (ret) | ||
5820 | break; | ||
5821 | |||
5822 | /* | ||
5823 | * If we found a page that couldn't be invalidated just | ||
5824 | * fall back to buffered. | ||
5825 | */ | ||
5826 | ret = invalidate_inode_pages2_range(inode->i_mapping, | ||
5827 | lockstart >> PAGE_CACHE_SHIFT, | ||
5828 | lockend >> PAGE_CACHE_SHIFT); | ||
5829 | if (ret) | ||
5830 | break; | ||
5831 | } | ||
5832 | |||
5833 | cond_resched(); | ||
5834 | } | ||
5835 | |||
5836 | return ret; | ||
5837 | } | ||
5838 | |||
5777 | static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, | 5839 | static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, |
5778 | struct buffer_head *bh_result, int create) | 5840 | struct buffer_head *bh_result, int create) |
5779 | { | 5841 | { |
5780 | struct extent_map *em; | 5842 | struct extent_map *em; |
5781 | struct btrfs_root *root = BTRFS_I(inode)->root; | 5843 | struct btrfs_root *root = BTRFS_I(inode)->root; |
5844 | struct extent_state *cached_state = NULL; | ||
5782 | u64 start = iblock << inode->i_blkbits; | 5845 | u64 start = iblock << inode->i_blkbits; |
5846 | u64 lockstart, lockend; | ||
5783 | u64 len = bh_result->b_size; | 5847 | u64 len = bh_result->b_size; |
5784 | struct btrfs_trans_handle *trans; | 5848 | struct btrfs_trans_handle *trans; |
5849 | int unlock_bits = EXTENT_LOCKED; | ||
5850 | int ret; | ||
5851 | |||
5852 | if (create) { | ||
5853 | ret = btrfs_delalloc_reserve_space(inode, len); | ||
5854 | if (ret) | ||
5855 | return ret; | ||
5856 | unlock_bits |= EXTENT_DELALLOC | EXTENT_DIRTY; | ||
5857 | } else { | ||
5858 | len = min_t(u64, len, root->sectorsize); | ||
5859 | } | ||
5860 | |||
5861 | lockstart = start; | ||
5862 | lockend = start + len - 1; | ||
5863 | |||
5864 | /* | ||
5865 | * If this errors out it's because we couldn't invalidate pagecache for | ||
5866 | * this range and we need to fallback to buffered. | ||
5867 | */ | ||
5868 | if (lock_extent_direct(inode, lockstart, lockend, &cached_state, create)) | ||
5869 | return -ENOTBLK; | ||
5870 | |||
5871 | if (create) { | ||
5872 | ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, | ||
5873 | lockend, EXTENT_DELALLOC, NULL, | ||
5874 | &cached_state, GFP_NOFS); | ||
5875 | if (ret) | ||
5876 | goto unlock_err; | ||
5877 | } | ||
5785 | 5878 | ||
5786 | em = btrfs_get_extent(inode, NULL, 0, start, len, 0); | 5879 | em = btrfs_get_extent(inode, NULL, 0, start, len, 0); |
5787 | if (IS_ERR(em)) | 5880 | if (IS_ERR(em)) { |
5788 | return PTR_ERR(em); | 5881 | ret = PTR_ERR(em); |
5882 | goto unlock_err; | ||
5883 | } | ||
5789 | 5884 | ||
5790 | /* | 5885 | /* |
5791 | * Ok for INLINE and COMPRESSED extents we need to fallback on buffered | 5886 | * Ok for INLINE and COMPRESSED extents we need to fallback on buffered |
@@ -5804,17 +5899,16 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, | |||
5804 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) || | 5899 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) || |
5805 | em->block_start == EXTENT_MAP_INLINE) { | 5900 | em->block_start == EXTENT_MAP_INLINE) { |
5806 | free_extent_map(em); | 5901 | free_extent_map(em); |
5807 | return -ENOTBLK; | 5902 | ret = -ENOTBLK; |
5903 | goto unlock_err; | ||
5808 | } | 5904 | } |
5809 | 5905 | ||
5810 | /* Just a good old fashioned hole, return */ | 5906 | /* Just a good old fashioned hole, return */ |
5811 | if (!create && (em->block_start == EXTENT_MAP_HOLE || | 5907 | if (!create && (em->block_start == EXTENT_MAP_HOLE || |
5812 | test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { | 5908 | test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { |
5813 | free_extent_map(em); | 5909 | free_extent_map(em); |
5814 | /* DIO will do one hole at a time, so just unlock a sector */ | 5910 | ret = 0; |
5815 | unlock_extent(&BTRFS_I(inode)->io_tree, start, | 5911 | goto unlock_err; |
5816 | start + root->sectorsize - 1); | ||
5817 | return 0; | ||
5818 | } | 5912 | } |
5819 | 5913 | ||
5820 | /* | 5914 | /* |
@@ -5827,8 +5921,9 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, | |||
5827 | * | 5921 | * |
5828 | */ | 5922 | */ |
5829 | if (!create) { | 5923 | if (!create) { |
5830 | len = em->len - (start - em->start); | 5924 | len = min(len, em->len - (start - em->start)); |
5831 | goto map; | 5925 | lockstart = start + len; |
5926 | goto unlock; | ||
5832 | } | 5927 | } |
5833 | 5928 | ||
5834 | if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) || | 5929 | if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) || |
@@ -5860,7 +5955,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, | |||
5860 | btrfs_end_transaction(trans, root); | 5955 | btrfs_end_transaction(trans, root); |
5861 | if (ret) { | 5956 | if (ret) { |
5862 | free_extent_map(em); | 5957 | free_extent_map(em); |
5863 | return ret; | 5958 | goto unlock_err; |
5864 | } | 5959 | } |
5865 | goto unlock; | 5960 | goto unlock; |
5866 | } | 5961 | } |
@@ -5873,14 +5968,12 @@ must_cow: | |||
5873 | */ | 5968 | */ |
5874 | len = bh_result->b_size; | 5969 | len = bh_result->b_size; |
5875 | em = btrfs_new_extent_direct(inode, em, start, len); | 5970 | em = btrfs_new_extent_direct(inode, em, start, len); |
5876 | if (IS_ERR(em)) | 5971 | if (IS_ERR(em)) { |
5877 | return PTR_ERR(em); | 5972 | ret = PTR_ERR(em); |
5973 | goto unlock_err; | ||
5974 | } | ||
5878 | len = min(len, em->len - (start - em->start)); | 5975 | len = min(len, em->len - (start - em->start)); |
5879 | unlock: | 5976 | unlock: |
5880 | clear_extent_bit(&BTRFS_I(inode)->io_tree, start, start + len - 1, | ||
5881 | EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DIRTY, 1, | ||
5882 | 0, NULL, GFP_NOFS); | ||
5883 | map: | ||
5884 | bh_result->b_blocknr = (em->block_start + (start - em->start)) >> | 5977 | bh_result->b_blocknr = (em->block_start + (start - em->start)) >> |
5885 | inode->i_blkbits; | 5978 | inode->i_blkbits; |
5886 | bh_result->b_size = len; | 5979 | bh_result->b_size = len; |
@@ -5898,9 +5991,44 @@ map: | |||
5898 | i_size_write(inode, start + len); | 5991 | i_size_write(inode, start + len); |
5899 | } | 5992 | } |
5900 | 5993 | ||
5994 | /* | ||
5995 | * In the case of write we need to clear and unlock the entire range, | ||
5996 | * in the case of read we need to unlock only the end area that we | ||
5997 | * aren't using if there is any left over space. | ||
5998 | */ | ||
5999 | if (lockstart < lockend) { | ||
6000 | if (create && len < lockend - lockstart) { | ||
6001 | clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, | ||
6002 | lockstart + len - 1, unlock_bits, 1, 0, | ||
6003 | &cached_state, GFP_NOFS); | ||
6004 | /* | ||
6005 | * Beside unlock, we also need to cleanup reserved space | ||
6006 | * for the left range by attaching EXTENT_DO_ACCOUNTING. | ||
6007 | */ | ||
6008 | clear_extent_bit(&BTRFS_I(inode)->io_tree, | ||
6009 | lockstart + len, lockend, | ||
6010 | unlock_bits | EXTENT_DO_ACCOUNTING, | ||
6011 | 1, 0, NULL, GFP_NOFS); | ||
6012 | } else { | ||
6013 | clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, | ||
6014 | lockend, unlock_bits, 1, 0, | ||
6015 | &cached_state, GFP_NOFS); | ||
6016 | } | ||
6017 | } else { | ||
6018 | free_extent_state(cached_state); | ||
6019 | } | ||
6020 | |||
5901 | free_extent_map(em); | 6021 | free_extent_map(em); |
5902 | 6022 | ||
5903 | return 0; | 6023 | return 0; |
6024 | |||
6025 | unlock_err: | ||
6026 | if (create) | ||
6027 | unlock_bits |= EXTENT_DO_ACCOUNTING; | ||
6028 | |||
6029 | clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, | ||
6030 | unlock_bits, 1, 0, &cached_state, GFP_NOFS); | ||
6031 | return ret; | ||
5904 | } | 6032 | } |
5905 | 6033 | ||
5906 | struct btrfs_dio_private { | 6034 | struct btrfs_dio_private { |
@@ -5908,7 +6036,6 @@ struct btrfs_dio_private { | |||
5908 | u64 logical_offset; | 6036 | u64 logical_offset; |
5909 | u64 disk_bytenr; | 6037 | u64 disk_bytenr; |
5910 | u64 bytes; | 6038 | u64 bytes; |
5911 | u32 *csums; | ||
5912 | void *private; | 6039 | void *private; |
5913 | 6040 | ||
5914 | /* number of bios pending for this dio */ | 6041 | /* number of bios pending for this dio */ |
@@ -5928,7 +6055,6 @@ static void btrfs_endio_direct_read(struct bio *bio, int err) | |||
5928 | struct inode *inode = dip->inode; | 6055 | struct inode *inode = dip->inode; |
5929 | struct btrfs_root *root = BTRFS_I(inode)->root; | 6056 | struct btrfs_root *root = BTRFS_I(inode)->root; |
5930 | u64 start; | 6057 | u64 start; |
5931 | u32 *private = dip->csums; | ||
5932 | 6058 | ||
5933 | start = dip->logical_offset; | 6059 | start = dip->logical_offset; |
5934 | do { | 6060 | do { |
@@ -5936,8 +6062,12 @@ static void btrfs_endio_direct_read(struct bio *bio, int err) | |||
5936 | struct page *page = bvec->bv_page; | 6062 | struct page *page = bvec->bv_page; |
5937 | char *kaddr; | 6063 | char *kaddr; |
5938 | u32 csum = ~(u32)0; | 6064 | u32 csum = ~(u32)0; |
6065 | u64 private = ~(u32)0; | ||
5939 | unsigned long flags; | 6066 | unsigned long flags; |
5940 | 6067 | ||
6068 | if (get_state_private(&BTRFS_I(inode)->io_tree, | ||
6069 | start, &private)) | ||
6070 | goto failed; | ||
5941 | local_irq_save(flags); | 6071 | local_irq_save(flags); |
5942 | kaddr = kmap_atomic(page); | 6072 | kaddr = kmap_atomic(page); |
5943 | csum = btrfs_csum_data(root, kaddr + bvec->bv_offset, | 6073 | csum = btrfs_csum_data(root, kaddr + bvec->bv_offset, |
@@ -5947,18 +6077,18 @@ static void btrfs_endio_direct_read(struct bio *bio, int err) | |||
5947 | local_irq_restore(flags); | 6077 | local_irq_restore(flags); |
5948 | 6078 | ||
5949 | flush_dcache_page(bvec->bv_page); | 6079 | flush_dcache_page(bvec->bv_page); |
5950 | if (csum != *private) { | 6080 | if (csum != private) { |
6081 | failed: | ||
5951 | printk(KERN_ERR "btrfs csum failed ino %llu off" | 6082 | printk(KERN_ERR "btrfs csum failed ino %llu off" |
5952 | " %llu csum %u private %u\n", | 6083 | " %llu csum %u private %u\n", |
5953 | (unsigned long long)btrfs_ino(inode), | 6084 | (unsigned long long)btrfs_ino(inode), |
5954 | (unsigned long long)start, | 6085 | (unsigned long long)start, |
5955 | csum, *private); | 6086 | csum, (unsigned)private); |
5956 | err = -EIO; | 6087 | err = -EIO; |
5957 | } | 6088 | } |
5958 | } | 6089 | } |
5959 | 6090 | ||
5960 | start += bvec->bv_len; | 6091 | start += bvec->bv_len; |
5961 | private++; | ||
5962 | bvec++; | 6092 | bvec++; |
5963 | } while (bvec <= bvec_end); | 6093 | } while (bvec <= bvec_end); |
5964 | 6094 | ||
@@ -5966,7 +6096,6 @@ static void btrfs_endio_direct_read(struct bio *bio, int err) | |||
5966 | dip->logical_offset + dip->bytes - 1); | 6096 | dip->logical_offset + dip->bytes - 1); |
5967 | bio->bi_private = dip->private; | 6097 | bio->bi_private = dip->private; |
5968 | 6098 | ||
5969 | kfree(dip->csums); | ||
5970 | kfree(dip); | 6099 | kfree(dip); |
5971 | 6100 | ||
5972 | /* If we had a csum failure make sure to clear the uptodate flag */ | 6101 | /* If we had a csum failure make sure to clear the uptodate flag */ |
@@ -6072,7 +6201,7 @@ static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev, | |||
6072 | 6201 | ||
6073 | static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, | 6202 | static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, |
6074 | int rw, u64 file_offset, int skip_sum, | 6203 | int rw, u64 file_offset, int skip_sum, |
6075 | u32 *csums, int async_submit) | 6204 | int async_submit) |
6076 | { | 6205 | { |
6077 | int write = rw & REQ_WRITE; | 6206 | int write = rw & REQ_WRITE; |
6078 | struct btrfs_root *root = BTRFS_I(inode)->root; | 6207 | struct btrfs_root *root = BTRFS_I(inode)->root; |
@@ -6105,8 +6234,7 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, | |||
6105 | if (ret) | 6234 | if (ret) |
6106 | goto err; | 6235 | goto err; |
6107 | } else if (!skip_sum) { | 6236 | } else if (!skip_sum) { |
6108 | ret = btrfs_lookup_bio_sums_dio(root, inode, bio, | 6237 | ret = btrfs_lookup_bio_sums_dio(root, inode, bio, file_offset); |
6109 | file_offset, csums); | ||
6110 | if (ret) | 6238 | if (ret) |
6111 | goto err; | 6239 | goto err; |
6112 | } | 6240 | } |
@@ -6132,10 +6260,8 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, | |||
6132 | u64 submit_len = 0; | 6260 | u64 submit_len = 0; |
6133 | u64 map_length; | 6261 | u64 map_length; |
6134 | int nr_pages = 0; | 6262 | int nr_pages = 0; |
6135 | u32 *csums = dip->csums; | ||
6136 | int ret = 0; | 6263 | int ret = 0; |
6137 | int async_submit = 0; | 6264 | int async_submit = 0; |
6138 | int write = rw & REQ_WRITE; | ||
6139 | 6265 | ||
6140 | map_length = orig_bio->bi_size; | 6266 | map_length = orig_bio->bi_size; |
6141 | ret = btrfs_map_block(map_tree, READ, start_sector << 9, | 6267 | ret = btrfs_map_block(map_tree, READ, start_sector << 9, |
@@ -6171,16 +6297,13 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, | |||
6171 | atomic_inc(&dip->pending_bios); | 6297 | atomic_inc(&dip->pending_bios); |
6172 | ret = __btrfs_submit_dio_bio(bio, inode, rw, | 6298 | ret = __btrfs_submit_dio_bio(bio, inode, rw, |
6173 | file_offset, skip_sum, | 6299 | file_offset, skip_sum, |
6174 | csums, async_submit); | 6300 | async_submit); |
6175 | if (ret) { | 6301 | if (ret) { |
6176 | bio_put(bio); | 6302 | bio_put(bio); |
6177 | atomic_dec(&dip->pending_bios); | 6303 | atomic_dec(&dip->pending_bios); |
6178 | goto out_err; | 6304 | goto out_err; |
6179 | } | 6305 | } |
6180 | 6306 | ||
6181 | /* Write's use the ordered csums */ | ||
6182 | if (!write && !skip_sum) | ||
6183 | csums = csums + nr_pages; | ||
6184 | start_sector += submit_len >> 9; | 6307 | start_sector += submit_len >> 9; |
6185 | file_offset += submit_len; | 6308 | file_offset += submit_len; |
6186 | 6309 | ||
@@ -6210,7 +6333,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, | |||
6210 | 6333 | ||
6211 | submit: | 6334 | submit: |
6212 | ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum, | 6335 | ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum, |
6213 | csums, async_submit); | 6336 | async_submit); |
6214 | if (!ret) | 6337 | if (!ret) |
6215 | return 0; | 6338 | return 0; |
6216 | 6339 | ||
@@ -6246,17 +6369,6 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode, | |||
6246 | ret = -ENOMEM; | 6369 | ret = -ENOMEM; |
6247 | goto free_ordered; | 6370 | goto free_ordered; |
6248 | } | 6371 | } |
6249 | dip->csums = NULL; | ||
6250 | |||
6251 | /* Write's use the ordered csum stuff, so we don't need dip->csums */ | ||
6252 | if (!write && !skip_sum) { | ||
6253 | dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS); | ||
6254 | if (!dip->csums) { | ||
6255 | kfree(dip); | ||
6256 | ret = -ENOMEM; | ||
6257 | goto free_ordered; | ||
6258 | } | ||
6259 | } | ||
6260 | 6372 | ||
6261 | dip->private = bio->bi_private; | 6373 | dip->private = bio->bi_private; |
6262 | dip->inode = inode; | 6374 | dip->inode = inode; |
@@ -6341,132 +6453,22 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io | |||
6341 | out: | 6453 | out: |
6342 | return retval; | 6454 | return retval; |
6343 | } | 6455 | } |
6456 | |||
6344 | static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, | 6457 | static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, |
6345 | const struct iovec *iov, loff_t offset, | 6458 | const struct iovec *iov, loff_t offset, |
6346 | unsigned long nr_segs) | 6459 | unsigned long nr_segs) |
6347 | { | 6460 | { |
6348 | struct file *file = iocb->ki_filp; | 6461 | struct file *file = iocb->ki_filp; |
6349 | struct inode *inode = file->f_mapping->host; | 6462 | struct inode *inode = file->f_mapping->host; |
6350 | struct btrfs_ordered_extent *ordered; | ||
6351 | struct extent_state *cached_state = NULL; | ||
6352 | u64 lockstart, lockend; | ||
6353 | ssize_t ret; | ||
6354 | int writing = rw & WRITE; | ||
6355 | int write_bits = 0; | ||
6356 | size_t count = iov_length(iov, nr_segs); | ||
6357 | 6463 | ||
6358 | if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov, | 6464 | if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov, |
6359 | offset, nr_segs)) { | 6465 | offset, nr_segs)) |
6360 | return 0; | 6466 | return 0; |
6361 | } | ||
6362 | |||
6363 | lockstart = offset; | ||
6364 | lockend = offset + count - 1; | ||
6365 | 6467 | ||
6366 | if (writing) { | 6468 | return __blockdev_direct_IO(rw, iocb, inode, |
6367 | ret = btrfs_delalloc_reserve_space(inode, count); | ||
6368 | if (ret) | ||
6369 | goto out; | ||
6370 | } | ||
6371 | |||
6372 | while (1) { | ||
6373 | lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, | ||
6374 | 0, &cached_state); | ||
6375 | /* | ||
6376 | * We're concerned with the entire range that we're going to be | ||
6377 | * doing DIO to, so we need to make sure theres no ordered | ||
6378 | * extents in this range. | ||
6379 | */ | ||
6380 | ordered = btrfs_lookup_ordered_range(inode, lockstart, | ||
6381 | lockend - lockstart + 1); | ||
6382 | |||
6383 | /* | ||
6384 | * We need to make sure there are no buffered pages in this | ||
6385 | * range either, we could have raced between the invalidate in | ||
6386 | * generic_file_direct_write and locking the extent. The | ||
6387 | * invalidate needs to happen so that reads after a write do not | ||
6388 | * get stale data. | ||
6389 | */ | ||
6390 | if (!ordered && (!writing || | ||
6391 | !test_range_bit(&BTRFS_I(inode)->io_tree, | ||
6392 | lockstart, lockend, EXTENT_UPTODATE, 0, | ||
6393 | cached_state))) | ||
6394 | break; | ||
6395 | |||
6396 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, | ||
6397 | &cached_state, GFP_NOFS); | ||
6398 | |||
6399 | if (ordered) { | ||
6400 | btrfs_start_ordered_extent(inode, ordered, 1); | ||
6401 | btrfs_put_ordered_extent(ordered); | ||
6402 | } else { | ||
6403 | /* Screw you mmap */ | ||
6404 | ret = filemap_write_and_wait_range(file->f_mapping, | ||
6405 | lockstart, | ||
6406 | lockend); | ||
6407 | if (ret) | ||
6408 | goto out; | ||
6409 | |||
6410 | /* | ||
6411 | * If we found a page that couldn't be invalidated just | ||
6412 | * fall back to buffered. | ||
6413 | */ | ||
6414 | ret = invalidate_inode_pages2_range(file->f_mapping, | ||
6415 | lockstart >> PAGE_CACHE_SHIFT, | ||
6416 | lockend >> PAGE_CACHE_SHIFT); | ||
6417 | if (ret) { | ||
6418 | if (ret == -EBUSY) | ||
6419 | ret = 0; | ||
6420 | goto out; | ||
6421 | } | ||
6422 | } | ||
6423 | |||
6424 | cond_resched(); | ||
6425 | } | ||
6426 | |||
6427 | /* | ||
6428 | * we don't use btrfs_set_extent_delalloc because we don't want | ||
6429 | * the dirty or uptodate bits | ||
6430 | */ | ||
6431 | if (writing) { | ||
6432 | write_bits = EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING; | ||
6433 | ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, | ||
6434 | EXTENT_DELALLOC, NULL, &cached_state, | ||
6435 | GFP_NOFS); | ||
6436 | if (ret) { | ||
6437 | clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, | ||
6438 | lockend, EXTENT_LOCKED | write_bits, | ||
6439 | 1, 0, &cached_state, GFP_NOFS); | ||
6440 | goto out; | ||
6441 | } | ||
6442 | } | ||
6443 | |||
6444 | free_extent_state(cached_state); | ||
6445 | cached_state = NULL; | ||
6446 | |||
6447 | ret = __blockdev_direct_IO(rw, iocb, inode, | ||
6448 | BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev, | 6469 | BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev, |
6449 | iov, offset, nr_segs, btrfs_get_blocks_direct, NULL, | 6470 | iov, offset, nr_segs, btrfs_get_blocks_direct, NULL, |
6450 | btrfs_submit_direct, 0); | 6471 | btrfs_submit_direct, 0); |
6451 | |||
6452 | if (ret < 0 && ret != -EIOCBQUEUED) { | ||
6453 | clear_extent_bit(&BTRFS_I(inode)->io_tree, offset, | ||
6454 | offset + iov_length(iov, nr_segs) - 1, | ||
6455 | EXTENT_LOCKED | write_bits, 1, 0, | ||
6456 | &cached_state, GFP_NOFS); | ||
6457 | } else if (ret >= 0 && ret < iov_length(iov, nr_segs)) { | ||
6458 | /* | ||
6459 | * We're falling back to buffered, unlock the section we didn't | ||
6460 | * do IO on. | ||
6461 | */ | ||
6462 | clear_extent_bit(&BTRFS_I(inode)->io_tree, offset + ret, | ||
6463 | offset + iov_length(iov, nr_segs) - 1, | ||
6464 | EXTENT_LOCKED | write_bits, 1, 0, | ||
6465 | &cached_state, GFP_NOFS); | ||
6466 | } | ||
6467 | out: | ||
6468 | free_extent_state(cached_state); | ||
6469 | return ret; | ||
6470 | } | 6472 | } |
6471 | 6473 | ||
6472 | static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 6474 | static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
@@ -7074,6 +7076,11 @@ static void init_once(void *foo) | |||
7074 | 7076 | ||
7075 | void btrfs_destroy_cachep(void) | 7077 | void btrfs_destroy_cachep(void) |
7076 | { | 7078 | { |
7079 | /* | ||
7080 | * Make sure all delayed rcu free inodes are flushed before we | ||
7081 | * destroy cache. | ||
7082 | */ | ||
7083 | rcu_barrier(); | ||
7077 | if (btrfs_inode_cachep) | 7084 | if (btrfs_inode_cachep) |
7078 | kmem_cache_destroy(btrfs_inode_cachep); | 7085 | kmem_cache_destroy(btrfs_inode_cachep); |
7079 | if (btrfs_trans_handle_cachep) | 7086 | if (btrfs_trans_handle_cachep) |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 7bb755677a22..47127c1bd290 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -424,7 +424,7 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
424 | uuid_le_gen(&new_uuid); | 424 | uuid_le_gen(&new_uuid); |
425 | memcpy(root_item.uuid, new_uuid.b, BTRFS_UUID_SIZE); | 425 | memcpy(root_item.uuid, new_uuid.b, BTRFS_UUID_SIZE); |
426 | root_item.otime.sec = cpu_to_le64(cur_time.tv_sec); | 426 | root_item.otime.sec = cpu_to_le64(cur_time.tv_sec); |
427 | root_item.otime.nsec = cpu_to_le64(cur_time.tv_nsec); | 427 | root_item.otime.nsec = cpu_to_le32(cur_time.tv_nsec); |
428 | root_item.ctime = root_item.otime; | 428 | root_item.ctime = root_item.otime; |
429 | btrfs_set_root_ctransid(&root_item, trans->transid); | 429 | btrfs_set_root_ctransid(&root_item, trans->transid); |
430 | btrfs_set_root_otransid(&root_item, trans->transid); | 430 | btrfs_set_root_otransid(&root_item, trans->transid); |
@@ -575,13 +575,13 @@ fail: | |||
575 | */ | 575 | */ |
576 | static inline int btrfs_check_sticky(struct inode *dir, struct inode *inode) | 576 | static inline int btrfs_check_sticky(struct inode *dir, struct inode *inode) |
577 | { | 577 | { |
578 | uid_t fsuid = current_fsuid(); | 578 | kuid_t fsuid = current_fsuid(); |
579 | 579 | ||
580 | if (!(dir->i_mode & S_ISVTX)) | 580 | if (!(dir->i_mode & S_ISVTX)) |
581 | return 0; | 581 | return 0; |
582 | if (inode->i_uid == fsuid) | 582 | if (uid_eq(inode->i_uid, fsuid)) |
583 | return 0; | 583 | return 0; |
584 | if (dir->i_uid == fsuid) | 584 | if (uid_eq(dir->i_uid, fsuid)) |
585 | return 0; | 585 | return 0; |
586 | return !capable(CAP_FOWNER); | 586 | return !capable(CAP_FOWNER); |
587 | } | 587 | } |
@@ -1397,7 +1397,6 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, | |||
1397 | u64 *transid, bool readonly, | 1397 | u64 *transid, bool readonly, |
1398 | struct btrfs_qgroup_inherit **inherit) | 1398 | struct btrfs_qgroup_inherit **inherit) |
1399 | { | 1399 | { |
1400 | struct file *src_file; | ||
1401 | int namelen; | 1400 | int namelen; |
1402 | int ret = 0; | 1401 | int ret = 0; |
1403 | 1402 | ||
@@ -1421,25 +1420,24 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, | |||
1421 | ret = btrfs_mksubvol(&file->f_path, name, namelen, | 1420 | ret = btrfs_mksubvol(&file->f_path, name, namelen, |
1422 | NULL, transid, readonly, inherit); | 1421 | NULL, transid, readonly, inherit); |
1423 | } else { | 1422 | } else { |
1423 | struct fd src = fdget(fd); | ||
1424 | struct inode *src_inode; | 1424 | struct inode *src_inode; |
1425 | src_file = fget(fd); | 1425 | if (!src.file) { |
1426 | if (!src_file) { | ||
1427 | ret = -EINVAL; | 1426 | ret = -EINVAL; |
1428 | goto out_drop_write; | 1427 | goto out_drop_write; |
1429 | } | 1428 | } |
1430 | 1429 | ||
1431 | src_inode = src_file->f_path.dentry->d_inode; | 1430 | src_inode = src.file->f_path.dentry->d_inode; |
1432 | if (src_inode->i_sb != file->f_path.dentry->d_inode->i_sb) { | 1431 | if (src_inode->i_sb != file->f_path.dentry->d_inode->i_sb) { |
1433 | printk(KERN_INFO "btrfs: Snapshot src from " | 1432 | printk(KERN_INFO "btrfs: Snapshot src from " |
1434 | "another FS\n"); | 1433 | "another FS\n"); |
1435 | ret = -EINVAL; | 1434 | ret = -EINVAL; |
1436 | fput(src_file); | 1435 | } else { |
1437 | goto out_drop_write; | 1436 | ret = btrfs_mksubvol(&file->f_path, name, namelen, |
1437 | BTRFS_I(src_inode)->root, | ||
1438 | transid, readonly, inherit); | ||
1438 | } | 1439 | } |
1439 | ret = btrfs_mksubvol(&file->f_path, name, namelen, | 1440 | fdput(src); |
1440 | BTRFS_I(src_inode)->root, | ||
1441 | transid, readonly, inherit); | ||
1442 | fput(src_file); | ||
1443 | } | 1441 | } |
1444 | out_drop_write: | 1442 | out_drop_write: |
1445 | mnt_drop_write_file(file); | 1443 | mnt_drop_write_file(file); |
@@ -2341,7 +2339,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
2341 | { | 2339 | { |
2342 | struct inode *inode = fdentry(file)->d_inode; | 2340 | struct inode *inode = fdentry(file)->d_inode; |
2343 | struct btrfs_root *root = BTRFS_I(inode)->root; | 2341 | struct btrfs_root *root = BTRFS_I(inode)->root; |
2344 | struct file *src_file; | 2342 | struct fd src_file; |
2345 | struct inode *src; | 2343 | struct inode *src; |
2346 | struct btrfs_trans_handle *trans; | 2344 | struct btrfs_trans_handle *trans; |
2347 | struct btrfs_path *path; | 2345 | struct btrfs_path *path; |
@@ -2376,24 +2374,24 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
2376 | if (ret) | 2374 | if (ret) |
2377 | return ret; | 2375 | return ret; |
2378 | 2376 | ||
2379 | src_file = fget(srcfd); | 2377 | src_file = fdget(srcfd); |
2380 | if (!src_file) { | 2378 | if (!src_file.file) { |
2381 | ret = -EBADF; | 2379 | ret = -EBADF; |
2382 | goto out_drop_write; | 2380 | goto out_drop_write; |
2383 | } | 2381 | } |
2384 | 2382 | ||
2385 | ret = -EXDEV; | 2383 | ret = -EXDEV; |
2386 | if (src_file->f_path.mnt != file->f_path.mnt) | 2384 | if (src_file.file->f_path.mnt != file->f_path.mnt) |
2387 | goto out_fput; | 2385 | goto out_fput; |
2388 | 2386 | ||
2389 | src = src_file->f_dentry->d_inode; | 2387 | src = src_file.file->f_dentry->d_inode; |
2390 | 2388 | ||
2391 | ret = -EINVAL; | 2389 | ret = -EINVAL; |
2392 | if (src == inode) | 2390 | if (src == inode) |
2393 | goto out_fput; | 2391 | goto out_fput; |
2394 | 2392 | ||
2395 | /* the src must be open for reading */ | 2393 | /* the src must be open for reading */ |
2396 | if (!(src_file->f_mode & FMODE_READ)) | 2394 | if (!(src_file.file->f_mode & FMODE_READ)) |
2397 | goto out_fput; | 2395 | goto out_fput; |
2398 | 2396 | ||
2399 | /* don't make the dst file partly checksummed */ | 2397 | /* don't make the dst file partly checksummed */ |
@@ -2724,7 +2722,7 @@ out_unlock: | |||
2724 | vfree(buf); | 2722 | vfree(buf); |
2725 | btrfs_free_path(path); | 2723 | btrfs_free_path(path); |
2726 | out_fput: | 2724 | out_fput: |
2727 | fput(src_file); | 2725 | fdput(src_file); |
2728 | out_drop_write: | 2726 | out_drop_write: |
2729 | mnt_drop_write_file(file); | 2727 | mnt_drop_write_file(file); |
2730 | return ret; | 2728 | return ret; |
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index a44eff074805..2a1762c66041 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c | |||
@@ -67,7 +67,7 @@ void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw) | |||
67 | { | 67 | { |
68 | if (eb->lock_nested) { | 68 | if (eb->lock_nested) { |
69 | read_lock(&eb->lock); | 69 | read_lock(&eb->lock); |
70 | if (&eb->lock_nested && current->pid == eb->lock_owner) { | 70 | if (eb->lock_nested && current->pid == eb->lock_owner) { |
71 | read_unlock(&eb->lock); | 71 | read_unlock(&eb->lock); |
72 | return; | 72 | return; |
73 | } | 73 | } |
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index bc424ae5a81a..b65015581744 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c | |||
@@ -1364,8 +1364,10 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, | |||
1364 | spin_lock(&fs_info->qgroup_lock); | 1364 | spin_lock(&fs_info->qgroup_lock); |
1365 | 1365 | ||
1366 | dstgroup = add_qgroup_rb(fs_info, objectid); | 1366 | dstgroup = add_qgroup_rb(fs_info, objectid); |
1367 | if (!dstgroup) | 1367 | if (IS_ERR(dstgroup)) { |
1368 | ret = PTR_ERR(dstgroup); | ||
1368 | goto unlock; | 1369 | goto unlock; |
1370 | } | ||
1369 | 1371 | ||
1370 | if (srcid) { | 1372 | if (srcid) { |
1371 | srcgroup = find_qgroup_rb(fs_info, srcid); | 1373 | srcgroup = find_qgroup_rb(fs_info, srcid); |
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 48a4882d8ad5..a955669519a2 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c | |||
@@ -68,7 +68,7 @@ struct reada_extent { | |||
68 | u32 blocksize; | 68 | u32 blocksize; |
69 | int err; | 69 | int err; |
70 | struct list_head extctl; | 70 | struct list_head extctl; |
71 | struct kref refcnt; | 71 | int refcnt; |
72 | spinlock_t lock; | 72 | spinlock_t lock; |
73 | struct reada_zone *zones[BTRFS_MAX_MIRRORS]; | 73 | struct reada_zone *zones[BTRFS_MAX_MIRRORS]; |
74 | int nzones; | 74 | int nzones; |
@@ -126,7 +126,7 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, | |||
126 | spin_lock(&fs_info->reada_lock); | 126 | spin_lock(&fs_info->reada_lock); |
127 | re = radix_tree_lookup(&fs_info->reada_tree, index); | 127 | re = radix_tree_lookup(&fs_info->reada_tree, index); |
128 | if (re) | 128 | if (re) |
129 | kref_get(&re->refcnt); | 129 | re->refcnt++; |
130 | spin_unlock(&fs_info->reada_lock); | 130 | spin_unlock(&fs_info->reada_lock); |
131 | 131 | ||
132 | if (!re) | 132 | if (!re) |
@@ -336,7 +336,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, | |||
336 | spin_lock(&fs_info->reada_lock); | 336 | spin_lock(&fs_info->reada_lock); |
337 | re = radix_tree_lookup(&fs_info->reada_tree, index); | 337 | re = radix_tree_lookup(&fs_info->reada_tree, index); |
338 | if (re) | 338 | if (re) |
339 | kref_get(&re->refcnt); | 339 | re->refcnt++; |
340 | spin_unlock(&fs_info->reada_lock); | 340 | spin_unlock(&fs_info->reada_lock); |
341 | 341 | ||
342 | if (re) | 342 | if (re) |
@@ -352,7 +352,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, | |||
352 | re->top = *top; | 352 | re->top = *top; |
353 | INIT_LIST_HEAD(&re->extctl); | 353 | INIT_LIST_HEAD(&re->extctl); |
354 | spin_lock_init(&re->lock); | 354 | spin_lock_init(&re->lock); |
355 | kref_init(&re->refcnt); | 355 | re->refcnt = 1; |
356 | 356 | ||
357 | /* | 357 | /* |
358 | * map block | 358 | * map block |
@@ -398,7 +398,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, | |||
398 | if (ret == -EEXIST) { | 398 | if (ret == -EEXIST) { |
399 | re_exist = radix_tree_lookup(&fs_info->reada_tree, index); | 399 | re_exist = radix_tree_lookup(&fs_info->reada_tree, index); |
400 | BUG_ON(!re_exist); | 400 | BUG_ON(!re_exist); |
401 | kref_get(&re_exist->refcnt); | 401 | re_exist->refcnt++; |
402 | spin_unlock(&fs_info->reada_lock); | 402 | spin_unlock(&fs_info->reada_lock); |
403 | goto error; | 403 | goto error; |
404 | } | 404 | } |
@@ -465,10 +465,6 @@ error: | |||
465 | return re_exist; | 465 | return re_exist; |
466 | } | 466 | } |
467 | 467 | ||
468 | static void reada_kref_dummy(struct kref *kr) | ||
469 | { | ||
470 | } | ||
471 | |||
472 | static void reada_extent_put(struct btrfs_fs_info *fs_info, | 468 | static void reada_extent_put(struct btrfs_fs_info *fs_info, |
473 | struct reada_extent *re) | 469 | struct reada_extent *re) |
474 | { | 470 | { |
@@ -476,7 +472,7 @@ static void reada_extent_put(struct btrfs_fs_info *fs_info, | |||
476 | unsigned long index = re->logical >> PAGE_CACHE_SHIFT; | 472 | unsigned long index = re->logical >> PAGE_CACHE_SHIFT; |
477 | 473 | ||
478 | spin_lock(&fs_info->reada_lock); | 474 | spin_lock(&fs_info->reada_lock); |
479 | if (!kref_put(&re->refcnt, reada_kref_dummy)) { | 475 | if (--re->refcnt) { |
480 | spin_unlock(&fs_info->reada_lock); | 476 | spin_unlock(&fs_info->reada_lock); |
481 | return; | 477 | return; |
482 | } | 478 | } |
@@ -671,7 +667,7 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info, | |||
671 | return 0; | 667 | return 0; |
672 | } | 668 | } |
673 | dev->reada_next = re->logical + re->blocksize; | 669 | dev->reada_next = re->logical + re->blocksize; |
674 | kref_get(&re->refcnt); | 670 | re->refcnt++; |
675 | 671 | ||
676 | spin_unlock(&fs_info->reada_lock); | 672 | spin_unlock(&fs_info->reada_lock); |
677 | 673 | ||
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 6bb465cca20f..10d8e4d88071 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c | |||
@@ -544,8 +544,8 @@ void btrfs_update_root_times(struct btrfs_trans_handle *trans, | |||
544 | struct timespec ct = CURRENT_TIME; | 544 | struct timespec ct = CURRENT_TIME; |
545 | 545 | ||
546 | spin_lock(&root->root_times_lock); | 546 | spin_lock(&root->root_times_lock); |
547 | item->ctransid = trans->transid; | 547 | item->ctransid = cpu_to_le64(trans->transid); |
548 | item->ctime.sec = cpu_to_le64(ct.tv_sec); | 548 | item->ctime.sec = cpu_to_le64(ct.tv_sec); |
549 | item->ctime.nsec = cpu_to_le64(ct.tv_nsec); | 549 | item->ctime.nsec = cpu_to_le32(ct.tv_nsec); |
550 | spin_unlock(&root->root_times_lock); | 550 | spin_unlock(&root->root_times_lock); |
551 | } | 551 | } |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index f2eb24c477a3..83d6f9f9c220 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -838,7 +838,6 @@ int btrfs_sync_fs(struct super_block *sb, int wait) | |||
838 | struct btrfs_trans_handle *trans; | 838 | struct btrfs_trans_handle *trans; |
839 | struct btrfs_fs_info *fs_info = btrfs_sb(sb); | 839 | struct btrfs_fs_info *fs_info = btrfs_sb(sb); |
840 | struct btrfs_root *root = fs_info->tree_root; | 840 | struct btrfs_root *root = fs_info->tree_root; |
841 | int ret; | ||
842 | 841 | ||
843 | trace_btrfs_sync_fs(wait); | 842 | trace_btrfs_sync_fs(wait); |
844 | 843 | ||
@@ -849,11 +848,17 @@ int btrfs_sync_fs(struct super_block *sb, int wait) | |||
849 | 848 | ||
850 | btrfs_wait_ordered_extents(root, 0, 0); | 849 | btrfs_wait_ordered_extents(root, 0, 0); |
851 | 850 | ||
852 | trans = btrfs_start_transaction(root, 0); | 851 | spin_lock(&fs_info->trans_lock); |
852 | if (!fs_info->running_transaction) { | ||
853 | spin_unlock(&fs_info->trans_lock); | ||
854 | return 0; | ||
855 | } | ||
856 | spin_unlock(&fs_info->trans_lock); | ||
857 | |||
858 | trans = btrfs_join_transaction(root); | ||
853 | if (IS_ERR(trans)) | 859 | if (IS_ERR(trans)) |
854 | return PTR_ERR(trans); | 860 | return PTR_ERR(trans); |
855 | ret = btrfs_commit_transaction(trans, root); | 861 | return btrfs_commit_transaction(trans, root); |
856 | return ret; | ||
857 | } | 862 | } |
858 | 863 | ||
859 | static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) | 864 | static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) |
@@ -1530,6 +1535,8 @@ static int btrfs_show_devname(struct seq_file *m, struct dentry *root) | |||
1530 | while (cur_devices) { | 1535 | while (cur_devices) { |
1531 | head = &cur_devices->devices; | 1536 | head = &cur_devices->devices; |
1532 | list_for_each_entry(dev, head, dev_list) { | 1537 | list_for_each_entry(dev, head, dev_list) { |
1538 | if (dev->missing) | ||
1539 | continue; | ||
1533 | if (!first_dev || dev->devid < first_dev->devid) | 1540 | if (!first_dev || dev->devid < first_dev->devid) |
1534 | first_dev = dev; | 1541 | first_dev = dev; |
1535 | } | 1542 | } |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 17be3dedacba..27c26004e050 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -1031,6 +1031,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
1031 | 1031 | ||
1032 | btrfs_i_size_write(parent_inode, parent_inode->i_size + | 1032 | btrfs_i_size_write(parent_inode, parent_inode->i_size + |
1033 | dentry->d_name.len * 2); | 1033 | dentry->d_name.len * 2); |
1034 | parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; | ||
1034 | ret = btrfs_update_inode(trans, parent_root, parent_inode); | 1035 | ret = btrfs_update_inode(trans, parent_root, parent_inode); |
1035 | if (ret) | 1036 | if (ret) |
1036 | goto abort_trans_dput; | 1037 | goto abort_trans_dput; |
@@ -1066,7 +1067,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
1066 | memcpy(new_root_item->parent_uuid, root->root_item.uuid, | 1067 | memcpy(new_root_item->parent_uuid, root->root_item.uuid, |
1067 | BTRFS_UUID_SIZE); | 1068 | BTRFS_UUID_SIZE); |
1068 | new_root_item->otime.sec = cpu_to_le64(cur_time.tv_sec); | 1069 | new_root_item->otime.sec = cpu_to_le64(cur_time.tv_sec); |
1069 | new_root_item->otime.nsec = cpu_to_le64(cur_time.tv_nsec); | 1070 | new_root_item->otime.nsec = cpu_to_le32(cur_time.tv_nsec); |
1070 | btrfs_set_root_otransid(new_root_item, trans->transid); | 1071 | btrfs_set_root_otransid(new_root_item, trans->transid); |
1071 | memset(&new_root_item->stime, 0, sizeof(new_root_item->stime)); | 1072 | memset(&new_root_item->stime, 0, sizeof(new_root_item->stime)); |
1072 | memset(&new_root_item->rtime, 0, sizeof(new_root_item->rtime)); | 1073 | memset(&new_root_item->rtime, 0, sizeof(new_root_item->rtime)); |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index e86ae04abe6a..88b969aeeb71 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -227,9 +227,8 @@ loop_lock: | |||
227 | cur = pending; | 227 | cur = pending; |
228 | pending = pending->bi_next; | 228 | pending = pending->bi_next; |
229 | cur->bi_next = NULL; | 229 | cur->bi_next = NULL; |
230 | atomic_dec(&fs_info->nr_async_bios); | ||
231 | 230 | ||
232 | if (atomic_read(&fs_info->nr_async_bios) < limit && | 231 | if (atomic_dec_return(&fs_info->nr_async_bios) < limit && |
233 | waitqueue_active(&fs_info->async_submit_wait)) | 232 | waitqueue_active(&fs_info->async_submit_wait)) |
234 | wake_up(&fs_info->async_submit_wait); | 233 | wake_up(&fs_info->async_submit_wait); |
235 | 234 | ||
@@ -569,9 +568,11 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) | |||
569 | memcpy(new_device, device, sizeof(*new_device)); | 568 | memcpy(new_device, device, sizeof(*new_device)); |
570 | 569 | ||
571 | /* Safe because we are under uuid_mutex */ | 570 | /* Safe because we are under uuid_mutex */ |
572 | name = rcu_string_strdup(device->name->str, GFP_NOFS); | 571 | if (device->name) { |
573 | BUG_ON(device->name && !name); /* -ENOMEM */ | 572 | name = rcu_string_strdup(device->name->str, GFP_NOFS); |
574 | rcu_assign_pointer(new_device->name, name); | 573 | BUG_ON(device->name && !name); /* -ENOMEM */ |
574 | rcu_assign_pointer(new_device->name, name); | ||
575 | } | ||
575 | new_device->bdev = NULL; | 576 | new_device->bdev = NULL; |
576 | new_device->writeable = 0; | 577 | new_device->writeable = 0; |
577 | new_device->in_fs_metadata = 0; | 578 | new_device->in_fs_metadata = 0; |
@@ -4605,28 +4606,6 @@ int btrfs_read_sys_array(struct btrfs_root *root) | |||
4605 | return ret; | 4606 | return ret; |
4606 | } | 4607 | } |
4607 | 4608 | ||
4608 | struct btrfs_device *btrfs_find_device_for_logical(struct btrfs_root *root, | ||
4609 | u64 logical, int mirror_num) | ||
4610 | { | ||
4611 | struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree; | ||
4612 | int ret; | ||
4613 | u64 map_length = 0; | ||
4614 | struct btrfs_bio *bbio = NULL; | ||
4615 | struct btrfs_device *device; | ||
4616 | |||
4617 | BUG_ON(mirror_num == 0); | ||
4618 | ret = btrfs_map_block(map_tree, WRITE, logical, &map_length, &bbio, | ||
4619 | mirror_num); | ||
4620 | if (ret) { | ||
4621 | BUG_ON(bbio != NULL); | ||
4622 | return NULL; | ||
4623 | } | ||
4624 | BUG_ON(mirror_num != bbio->mirror_num); | ||
4625 | device = bbio->stripes[mirror_num - 1].dev; | ||
4626 | kfree(bbio); | ||
4627 | return device; | ||
4628 | } | ||
4629 | |||
4630 | int btrfs_read_chunk_tree(struct btrfs_root *root) | 4609 | int btrfs_read_chunk_tree(struct btrfs_root *root) |
4631 | { | 4610 | { |
4632 | struct btrfs_path *path; | 4611 | struct btrfs_path *path; |
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 5479325987b3..53c06af92e8d 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
@@ -289,8 +289,6 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info); | |||
289 | int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); | 289 | int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); |
290 | int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes, | 290 | int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes, |
291 | u64 *start, u64 *max_avail); | 291 | u64 *start, u64 *max_avail); |
292 | struct btrfs_device *btrfs_find_device_for_logical(struct btrfs_root *root, | ||
293 | u64 logical, int mirror_num); | ||
294 | void btrfs_dev_stat_print_on_error(struct btrfs_device *device); | 292 | void btrfs_dev_stat_print_on_error(struct btrfs_device *device); |
295 | void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index); | 293 | void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index); |
296 | int btrfs_get_dev_stats(struct btrfs_root *root, | 294 | int btrfs_get_dev_stats(struct btrfs_root *root, |
diff --git a/fs/buffer.c b/fs/buffer.c index 9f6d2e41281d..b5f044283edb 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -914,7 +914,7 @@ link_dev_buffers(struct page *page, struct buffer_head *head) | |||
914 | /* | 914 | /* |
915 | * Initialise the state of a blockdev page's buffers. | 915 | * Initialise the state of a blockdev page's buffers. |
916 | */ | 916 | */ |
917 | static void | 917 | static sector_t |
918 | init_page_buffers(struct page *page, struct block_device *bdev, | 918 | init_page_buffers(struct page *page, struct block_device *bdev, |
919 | sector_t block, int size) | 919 | sector_t block, int size) |
920 | { | 920 | { |
@@ -936,33 +936,41 @@ init_page_buffers(struct page *page, struct block_device *bdev, | |||
936 | block++; | 936 | block++; |
937 | bh = bh->b_this_page; | 937 | bh = bh->b_this_page; |
938 | } while (bh != head); | 938 | } while (bh != head); |
939 | |||
940 | /* | ||
941 | * Caller needs to validate requested block against end of device. | ||
942 | */ | ||
943 | return end_block; | ||
939 | } | 944 | } |
940 | 945 | ||
941 | /* | 946 | /* |
942 | * Create the page-cache page that contains the requested block. | 947 | * Create the page-cache page that contains the requested block. |
943 | * | 948 | * |
944 | * This is user purely for blockdev mappings. | 949 | * This is used purely for blockdev mappings. |
945 | */ | 950 | */ |
946 | static struct page * | 951 | static int |
947 | grow_dev_page(struct block_device *bdev, sector_t block, | 952 | grow_dev_page(struct block_device *bdev, sector_t block, |
948 | pgoff_t index, int size) | 953 | pgoff_t index, int size, int sizebits) |
949 | { | 954 | { |
950 | struct inode *inode = bdev->bd_inode; | 955 | struct inode *inode = bdev->bd_inode; |
951 | struct page *page; | 956 | struct page *page; |
952 | struct buffer_head *bh; | 957 | struct buffer_head *bh; |
958 | sector_t end_block; | ||
959 | int ret = 0; /* Will call free_more_memory() */ | ||
953 | 960 | ||
954 | page = find_or_create_page(inode->i_mapping, index, | 961 | page = find_or_create_page(inode->i_mapping, index, |
955 | (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS)|__GFP_MOVABLE); | 962 | (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS)|__GFP_MOVABLE); |
956 | if (!page) | 963 | if (!page) |
957 | return NULL; | 964 | return ret; |
958 | 965 | ||
959 | BUG_ON(!PageLocked(page)); | 966 | BUG_ON(!PageLocked(page)); |
960 | 967 | ||
961 | if (page_has_buffers(page)) { | 968 | if (page_has_buffers(page)) { |
962 | bh = page_buffers(page); | 969 | bh = page_buffers(page); |
963 | if (bh->b_size == size) { | 970 | if (bh->b_size == size) { |
964 | init_page_buffers(page, bdev, block, size); | 971 | end_block = init_page_buffers(page, bdev, |
965 | return page; | 972 | index << sizebits, size); |
973 | goto done; | ||
966 | } | 974 | } |
967 | if (!try_to_free_buffers(page)) | 975 | if (!try_to_free_buffers(page)) |
968 | goto failed; | 976 | goto failed; |
@@ -982,14 +990,14 @@ grow_dev_page(struct block_device *bdev, sector_t block, | |||
982 | */ | 990 | */ |
983 | spin_lock(&inode->i_mapping->private_lock); | 991 | spin_lock(&inode->i_mapping->private_lock); |
984 | link_dev_buffers(page, bh); | 992 | link_dev_buffers(page, bh); |
985 | init_page_buffers(page, bdev, block, size); | 993 | end_block = init_page_buffers(page, bdev, index << sizebits, size); |
986 | spin_unlock(&inode->i_mapping->private_lock); | 994 | spin_unlock(&inode->i_mapping->private_lock); |
987 | return page; | 995 | done: |
988 | 996 | ret = (block < end_block) ? 1 : -ENXIO; | |
989 | failed: | 997 | failed: |
990 | unlock_page(page); | 998 | unlock_page(page); |
991 | page_cache_release(page); | 999 | page_cache_release(page); |
992 | return NULL; | 1000 | return ret; |
993 | } | 1001 | } |
994 | 1002 | ||
995 | /* | 1003 | /* |
@@ -999,7 +1007,6 @@ failed: | |||
999 | static int | 1007 | static int |
1000 | grow_buffers(struct block_device *bdev, sector_t block, int size) | 1008 | grow_buffers(struct block_device *bdev, sector_t block, int size) |
1001 | { | 1009 | { |
1002 | struct page *page; | ||
1003 | pgoff_t index; | 1010 | pgoff_t index; |
1004 | int sizebits; | 1011 | int sizebits; |
1005 | 1012 | ||
@@ -1023,22 +1030,14 @@ grow_buffers(struct block_device *bdev, sector_t block, int size) | |||
1023 | bdevname(bdev, b)); | 1030 | bdevname(bdev, b)); |
1024 | return -EIO; | 1031 | return -EIO; |
1025 | } | 1032 | } |
1026 | block = index << sizebits; | 1033 | |
1027 | /* Create a page with the proper size buffers.. */ | 1034 | /* Create a page with the proper size buffers.. */ |
1028 | page = grow_dev_page(bdev, block, index, size); | 1035 | return grow_dev_page(bdev, block, index, size, sizebits); |
1029 | if (!page) | ||
1030 | return 0; | ||
1031 | unlock_page(page); | ||
1032 | page_cache_release(page); | ||
1033 | return 1; | ||
1034 | } | 1036 | } |
1035 | 1037 | ||
1036 | static struct buffer_head * | 1038 | static struct buffer_head * |
1037 | __getblk_slow(struct block_device *bdev, sector_t block, int size) | 1039 | __getblk_slow(struct block_device *bdev, sector_t block, int size) |
1038 | { | 1040 | { |
1039 | int ret; | ||
1040 | struct buffer_head *bh; | ||
1041 | |||
1042 | /* Size must be multiple of hard sectorsize */ | 1041 | /* Size must be multiple of hard sectorsize */ |
1043 | if (unlikely(size & (bdev_logical_block_size(bdev)-1) || | 1042 | if (unlikely(size & (bdev_logical_block_size(bdev)-1) || |
1044 | (size < 512 || size > PAGE_SIZE))) { | 1043 | (size < 512 || size > PAGE_SIZE))) { |
@@ -1051,21 +1050,20 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size) | |||
1051 | return NULL; | 1050 | return NULL; |
1052 | } | 1051 | } |
1053 | 1052 | ||
1054 | retry: | 1053 | for (;;) { |
1055 | bh = __find_get_block(bdev, block, size); | 1054 | struct buffer_head *bh; |
1056 | if (bh) | 1055 | int ret; |
1057 | return bh; | ||
1058 | 1056 | ||
1059 | ret = grow_buffers(bdev, block, size); | ||
1060 | if (ret == 0) { | ||
1061 | free_more_memory(); | ||
1062 | goto retry; | ||
1063 | } else if (ret > 0) { | ||
1064 | bh = __find_get_block(bdev, block, size); | 1057 | bh = __find_get_block(bdev, block, size); |
1065 | if (bh) | 1058 | if (bh) |
1066 | return bh; | 1059 | return bh; |
1060 | |||
1061 | ret = grow_buffers(bdev, block, size); | ||
1062 | if (ret < 0) | ||
1063 | return NULL; | ||
1064 | if (ret == 0) | ||
1065 | free_more_memory(); | ||
1067 | } | 1066 | } |
1068 | return NULL; | ||
1069 | } | 1067 | } |
1070 | 1068 | ||
1071 | /* | 1069 | /* |
@@ -1321,10 +1319,6 @@ EXPORT_SYMBOL(__find_get_block); | |||
1321 | * which corresponds to the passed block_device, block and size. The | 1319 | * which corresponds to the passed block_device, block and size. The |
1322 | * returned buffer has its reference count incremented. | 1320 | * returned buffer has its reference count incremented. |
1323 | * | 1321 | * |
1324 | * __getblk() cannot fail - it just keeps trying. If you pass it an | ||
1325 | * illegal block number, __getblk() will happily return a buffer_head | ||
1326 | * which represents the non-existent block. Very weird. | ||
1327 | * | ||
1328 | * __getblk() will lock up the machine if grow_dev_page's try_to_free_buffers() | 1322 | * __getblk() will lock up the machine if grow_dev_page's try_to_free_buffers() |
1329 | * attempt is failing. FIXME, perhaps? | 1323 | * attempt is failing. FIXME, perhaps? |
1330 | */ | 1324 | */ |
@@ -2318,12 +2312,6 @@ int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, | |||
2318 | loff_t size; | 2312 | loff_t size; |
2319 | int ret; | 2313 | int ret; |
2320 | 2314 | ||
2321 | /* | ||
2322 | * Update file times before taking page lock. We may end up failing the | ||
2323 | * fault so this update may be superfluous but who really cares... | ||
2324 | */ | ||
2325 | file_update_time(vma->vm_file); | ||
2326 | |||
2327 | lock_page(page); | 2315 | lock_page(page); |
2328 | size = i_size_read(inode); | 2316 | size = i_size_read(inode); |
2329 | if ((page->mapping != inode->i_mapping) || | 2317 | if ((page->mapping != inode->i_mapping) || |
@@ -2361,6 +2349,13 @@ int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, | |||
2361 | struct super_block *sb = vma->vm_file->f_path.dentry->d_inode->i_sb; | 2349 | struct super_block *sb = vma->vm_file->f_path.dentry->d_inode->i_sb; |
2362 | 2350 | ||
2363 | sb_start_pagefault(sb); | 2351 | sb_start_pagefault(sb); |
2352 | |||
2353 | /* | ||
2354 | * Update file times before taking page lock. We may end up failing the | ||
2355 | * fault so this update may be superfluous but who really cares... | ||
2356 | */ | ||
2357 | file_update_time(vma->vm_file); | ||
2358 | |||
2364 | ret = __block_page_mkwrite(vma, vmf, get_block); | 2359 | ret = __block_page_mkwrite(vma, vmf, get_block); |
2365 | sb_end_pagefault(sb); | 2360 | sb_end_pagefault(sb); |
2366 | return block_page_mkwrite_return(ret); | 2361 | return block_page_mkwrite_return(ret); |
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 452e71a1b753..6690269f5dde 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
@@ -205,7 +205,7 @@ static int readpage_nounlock(struct file *filp, struct page *page) | |||
205 | dout("readpage inode %p file %p page %p index %lu\n", | 205 | dout("readpage inode %p file %p page %p index %lu\n", |
206 | inode, filp, page, page->index); | 206 | inode, filp, page, page->index); |
207 | err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout, | 207 | err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout, |
208 | page->index << PAGE_CACHE_SHIFT, &len, | 208 | (u64) page_offset(page), &len, |
209 | ci->i_truncate_seq, ci->i_truncate_size, | 209 | ci->i_truncate_seq, ci->i_truncate_size, |
210 | &page, 1, 0); | 210 | &page, 1, 0); |
211 | if (err == -ENOENT) | 211 | if (err == -ENOENT) |
@@ -286,7 +286,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max) | |||
286 | int nr_pages = 0; | 286 | int nr_pages = 0; |
287 | int ret; | 287 | int ret; |
288 | 288 | ||
289 | off = page->index << PAGE_CACHE_SHIFT; | 289 | off = (u64) page_offset(page); |
290 | 290 | ||
291 | /* count pages */ | 291 | /* count pages */ |
292 | next_index = page->index; | 292 | next_index = page->index; |
@@ -308,8 +308,8 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max) | |||
308 | NULL, 0, | 308 | NULL, 0, |
309 | ci->i_truncate_seq, ci->i_truncate_size, | 309 | ci->i_truncate_seq, ci->i_truncate_size, |
310 | NULL, false, 1, 0); | 310 | NULL, false, 1, 0); |
311 | if (!req) | 311 | if (IS_ERR(req)) |
312 | return -ENOMEM; | 312 | return PTR_ERR(req); |
313 | 313 | ||
314 | /* build page vector */ | 314 | /* build page vector */ |
315 | nr_pages = len >> PAGE_CACHE_SHIFT; | 315 | nr_pages = len >> PAGE_CACHE_SHIFT; |
@@ -426,7 +426,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) | |||
426 | struct ceph_inode_info *ci; | 426 | struct ceph_inode_info *ci; |
427 | struct ceph_fs_client *fsc; | 427 | struct ceph_fs_client *fsc; |
428 | struct ceph_osd_client *osdc; | 428 | struct ceph_osd_client *osdc; |
429 | loff_t page_off = page->index << PAGE_CACHE_SHIFT; | 429 | loff_t page_off = page_offset(page); |
430 | int len = PAGE_CACHE_SIZE; | 430 | int len = PAGE_CACHE_SIZE; |
431 | loff_t i_size; | 431 | loff_t i_size; |
432 | int err = 0; | 432 | int err = 0; |
@@ -817,8 +817,7 @@ get_more_pages: | |||
817 | /* ok */ | 817 | /* ok */ |
818 | if (locked_pages == 0) { | 818 | if (locked_pages == 0) { |
819 | /* prepare async write request */ | 819 | /* prepare async write request */ |
820 | offset = (unsigned long long)page->index | 820 | offset = (u64) page_offset(page); |
821 | << PAGE_CACHE_SHIFT; | ||
822 | len = wsize; | 821 | len = wsize; |
823 | req = ceph_osdc_new_request(&fsc->client->osdc, | 822 | req = ceph_osdc_new_request(&fsc->client->osdc, |
824 | &ci->i_layout, | 823 | &ci->i_layout, |
@@ -832,8 +831,8 @@ get_more_pages: | |||
832 | ci->i_truncate_size, | 831 | ci->i_truncate_size, |
833 | &inode->i_mtime, true, 1, 0); | 832 | &inode->i_mtime, true, 1, 0); |
834 | 833 | ||
835 | if (!req) { | 834 | if (IS_ERR(req)) { |
836 | rc = -ENOMEM; | 835 | rc = PTR_ERR(req); |
837 | unlock_page(page); | 836 | unlock_page(page); |
838 | break; | 837 | break; |
839 | } | 838 | } |
@@ -1180,7 +1179,7 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
1180 | struct inode *inode = vma->vm_file->f_dentry->d_inode; | 1179 | struct inode *inode = vma->vm_file->f_dentry->d_inode; |
1181 | struct page *page = vmf->page; | 1180 | struct page *page = vmf->page; |
1182 | struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; | 1181 | struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; |
1183 | loff_t off = page->index << PAGE_CACHE_SHIFT; | 1182 | loff_t off = page_offset(page); |
1184 | loff_t size, len; | 1183 | loff_t size, len; |
1185 | int ret; | 1184 | int ret; |
1186 | 1185 | ||
@@ -1225,6 +1224,7 @@ out: | |||
1225 | static struct vm_operations_struct ceph_vmops = { | 1224 | static struct vm_operations_struct ceph_vmops = { |
1226 | .fault = filemap_fault, | 1225 | .fault = filemap_fault, |
1227 | .page_mkwrite = ceph_page_mkwrite, | 1226 | .page_mkwrite = ceph_page_mkwrite, |
1227 | .remap_pages = generic_file_remap_pages, | ||
1228 | }; | 1228 | }; |
1229 | 1229 | ||
1230 | int ceph_mmap(struct file *file, struct vm_area_struct *vma) | 1230 | int ceph_mmap(struct file *file, struct vm_area_struct *vma) |
@@ -1235,6 +1235,5 @@ int ceph_mmap(struct file *file, struct vm_area_struct *vma) | |||
1235 | return -ENOEXEC; | 1235 | return -ENOEXEC; |
1236 | file_accessed(file); | 1236 | file_accessed(file); |
1237 | vma->vm_ops = &ceph_vmops; | 1237 | vma->vm_ops = &ceph_vmops; |
1238 | vma->vm_flags |= VM_CAN_NONLINEAR; | ||
1239 | return 0; | 1238 | return 0; |
1240 | } | 1239 | } |
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 620daad201db..3251e9cc6401 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -1005,7 +1005,7 @@ static void __queue_cap_release(struct ceph_mds_session *session, | |||
1005 | 1005 | ||
1006 | BUG_ON(msg->front.iov_len + sizeof(*item) > PAGE_CACHE_SIZE); | 1006 | BUG_ON(msg->front.iov_len + sizeof(*item) > PAGE_CACHE_SIZE); |
1007 | head = msg->front.iov_base; | 1007 | head = msg->front.iov_base; |
1008 | head->num = cpu_to_le32(le32_to_cpu(head->num) + 1); | 1008 | le32_add_cpu(&head->num, 1); |
1009 | item = msg->front.iov_base + msg->front.iov_len; | 1009 | item = msg->front.iov_base + msg->front.iov_len; |
1010 | item->ino = cpu_to_le64(ino); | 1010 | item->ino = cpu_to_le64(ino); |
1011 | item->cap_id = cpu_to_le64(cap_id); | 1011 | item->cap_id = cpu_to_le64(cap_id); |
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index fb962efdacee..6d59006bfa27 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c | |||
@@ -201,6 +201,7 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc) | |||
201 | int err = -ENOMEM; | 201 | int err = -ENOMEM; |
202 | 202 | ||
203 | dout("ceph_fs_debugfs_init\n"); | 203 | dout("ceph_fs_debugfs_init\n"); |
204 | BUG_ON(!fsc->client->debugfs_dir); | ||
204 | fsc->debugfs_congestion_kb = | 205 | fsc->debugfs_congestion_kb = |
205 | debugfs_create_file("writeback_congestion_kb", | 206 | debugfs_create_file("writeback_congestion_kb", |
206 | 0600, | 207 | 0600, |
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index ecebbc09bfc7..5840d2aaed15 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
@@ -536,8 +536,8 @@ more: | |||
536 | do_sync, | 536 | do_sync, |
537 | ci->i_truncate_seq, ci->i_truncate_size, | 537 | ci->i_truncate_seq, ci->i_truncate_size, |
538 | &mtime, false, 2, page_align); | 538 | &mtime, false, 2, page_align); |
539 | if (!req) | 539 | if (IS_ERR(req)) |
540 | return -ENOMEM; | 540 | return PTR_ERR(req); |
541 | 541 | ||
542 | if (file->f_flags & O_DIRECT) { | 542 | if (file->f_flags & O_DIRECT) { |
543 | pages = ceph_get_direct_page_vector(data, num_pages, false); | 543 | pages = ceph_get_direct_page_vector(data, num_pages, false); |
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 9fff9f3b17e4..ba95eea201bf 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
@@ -992,11 +992,15 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
992 | if (rinfo->head->is_dentry) { | 992 | if (rinfo->head->is_dentry) { |
993 | struct inode *dir = req->r_locked_dir; | 993 | struct inode *dir = req->r_locked_dir; |
994 | 994 | ||
995 | err = fill_inode(dir, &rinfo->diri, rinfo->dirfrag, | 995 | if (dir) { |
996 | session, req->r_request_started, -1, | 996 | err = fill_inode(dir, &rinfo->diri, rinfo->dirfrag, |
997 | &req->r_caps_reservation); | 997 | session, req->r_request_started, -1, |
998 | if (err < 0) | 998 | &req->r_caps_reservation); |
999 | return err; | 999 | if (err < 0) |
1000 | return err; | ||
1001 | } else { | ||
1002 | WARN_ON_ONCE(1); | ||
1003 | } | ||
1000 | } | 1004 | } |
1001 | 1005 | ||
1002 | /* | 1006 | /* |
@@ -1004,6 +1008,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
1004 | * will have trouble splicing in the virtual snapdir later | 1008 | * will have trouble splicing in the virtual snapdir later |
1005 | */ | 1009 | */ |
1006 | if (rinfo->head->is_dentry && !req->r_aborted && | 1010 | if (rinfo->head->is_dentry && !req->r_aborted && |
1011 | req->r_locked_dir && | ||
1007 | (rinfo->head->is_target || strncmp(req->r_dentry->d_name.name, | 1012 | (rinfo->head->is_target || strncmp(req->r_dentry->d_name.name, |
1008 | fsc->mount_options->snapdir_name, | 1013 | fsc->mount_options->snapdir_name, |
1009 | req->r_dentry->d_name.len))) { | 1014 | req->r_dentry->d_name.len))) { |
@@ -1099,7 +1104,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
1099 | pr_err("fill_trace bad get_inode " | 1104 | pr_err("fill_trace bad get_inode " |
1100 | "%llx.%llx\n", vino.ino, vino.snap); | 1105 | "%llx.%llx\n", vino.ino, vino.snap); |
1101 | err = PTR_ERR(in); | 1106 | err = PTR_ERR(in); |
1102 | d_delete(dn); | 1107 | d_drop(dn); |
1103 | goto done; | 1108 | goto done; |
1104 | } | 1109 | } |
1105 | dn = splice_dentry(dn, in, &have_lease, true); | 1110 | dn = splice_dentry(dn, in, &have_lease, true); |
@@ -1272,7 +1277,7 @@ retry_lookup: | |||
1272 | in = ceph_get_inode(parent->d_sb, vino); | 1277 | in = ceph_get_inode(parent->d_sb, vino); |
1273 | if (IS_ERR(in)) { | 1278 | if (IS_ERR(in)) { |
1274 | dout("new_inode badness\n"); | 1279 | dout("new_inode badness\n"); |
1275 | d_delete(dn); | 1280 | d_drop(dn); |
1276 | dput(dn); | 1281 | dput(dn); |
1277 | err = PTR_ERR(in); | 1282 | err = PTR_ERR(in); |
1278 | goto out; | 1283 | goto out; |
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index 8e3fb69fbe62..36549a46e311 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c | |||
@@ -42,7 +42,8 @@ static long __validate_layout(struct ceph_mds_client *mdsc, | |||
42 | /* validate striping parameters */ | 42 | /* validate striping parameters */ |
43 | if ((l->object_size & ~PAGE_MASK) || | 43 | if ((l->object_size & ~PAGE_MASK) || |
44 | (l->stripe_unit & ~PAGE_MASK) || | 44 | (l->stripe_unit & ~PAGE_MASK) || |
45 | ((unsigned)l->object_size % (unsigned)l->stripe_unit)) | 45 | (l->stripe_unit != 0 && |
46 | ((unsigned)l->object_size % (unsigned)l->stripe_unit))) | ||
46 | return -EINVAL; | 47 | return -EINVAL; |
47 | 48 | ||
48 | /* make sure it's a valid data pool */ | 49 | /* make sure it's a valid data pool */ |
@@ -186,14 +187,18 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) | |||
186 | u64 tmp; | 187 | u64 tmp; |
187 | struct ceph_object_layout ol; | 188 | struct ceph_object_layout ol; |
188 | struct ceph_pg pgid; | 189 | struct ceph_pg pgid; |
190 | int r; | ||
189 | 191 | ||
190 | /* copy and validate */ | 192 | /* copy and validate */ |
191 | if (copy_from_user(&dl, arg, sizeof(dl))) | 193 | if (copy_from_user(&dl, arg, sizeof(dl))) |
192 | return -EFAULT; | 194 | return -EFAULT; |
193 | 195 | ||
194 | down_read(&osdc->map_sem); | 196 | down_read(&osdc->map_sem); |
195 | ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, &len, | 197 | r = ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, &len, |
196 | &dl.object_no, &dl.object_offset, &olen); | 198 | &dl.object_no, &dl.object_offset, |
199 | &olen); | ||
200 | if (r < 0) | ||
201 | return -EIO; | ||
197 | dl.file_offset -= dl.object_offset; | 202 | dl.file_offset -= dl.object_offset; |
198 | dl.object_size = ceph_file_layout_object_size(ci->i_layout); | 203 | dl.object_size = ceph_file_layout_object_size(ci->i_layout); |
199 | dl.block_size = ceph_file_layout_su(ci->i_layout); | 204 | dl.block_size = ceph_file_layout_su(ci->i_layout); |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index a5a735422aa7..1bcf712655d9 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -2625,7 +2625,8 @@ static void check_new_map(struct ceph_mds_client *mdsc, | |||
2625 | ceph_mdsmap_is_laggy(newmap, i) ? " (laggy)" : "", | 2625 | ceph_mdsmap_is_laggy(newmap, i) ? " (laggy)" : "", |
2626 | session_state_name(s->s_state)); | 2626 | session_state_name(s->s_state)); |
2627 | 2627 | ||
2628 | if (memcmp(ceph_mdsmap_get_addr(oldmap, i), | 2628 | if (i >= newmap->m_max_mds || |
2629 | memcmp(ceph_mdsmap_get_addr(oldmap, i), | ||
2629 | ceph_mdsmap_get_addr(newmap, i), | 2630 | ceph_mdsmap_get_addr(newmap, i), |
2630 | sizeof(struct ceph_entity_addr))) { | 2631 | sizeof(struct ceph_entity_addr))) { |
2631 | if (s->s_state == CEPH_MDS_SESSION_OPENING) { | 2632 | if (s->s_state == CEPH_MDS_SESSION_OPENING) { |
diff --git a/fs/ceph/super.c b/fs/ceph/super.c index b982239f38f9..2eb43f211325 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c | |||
@@ -307,7 +307,10 @@ static int parse_mount_options(struct ceph_mount_options **pfsopt, | |||
307 | { | 307 | { |
308 | struct ceph_mount_options *fsopt; | 308 | struct ceph_mount_options *fsopt; |
309 | const char *dev_name_end; | 309 | const char *dev_name_end; |
310 | int err = -ENOMEM; | 310 | int err; |
311 | |||
312 | if (!dev_name || !*dev_name) | ||
313 | return -EINVAL; | ||
311 | 314 | ||
312 | fsopt = kzalloc(sizeof(*fsopt), GFP_KERNEL); | 315 | fsopt = kzalloc(sizeof(*fsopt), GFP_KERNEL); |
313 | if (!fsopt) | 316 | if (!fsopt) |
@@ -328,21 +331,33 @@ static int parse_mount_options(struct ceph_mount_options **pfsopt, | |||
328 | fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; | 331 | fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; |
329 | fsopt->congestion_kb = default_congestion_kb(); | 332 | fsopt->congestion_kb = default_congestion_kb(); |
330 | 333 | ||
331 | /* ip1[:port1][,ip2[:port2]...]:/subdir/in/fs */ | 334 | /* |
335 | * Distinguish the server list from the path in "dev_name". | ||
336 | * Internally we do not include the leading '/' in the path. | ||
337 | * | ||
338 | * "dev_name" will look like: | ||
339 | * <server_spec>[,<server_spec>...]:[<path>] | ||
340 | * where | ||
341 | * <server_spec> is <ip>[:<port>] | ||
342 | * <path> is optional, but if present must begin with '/' | ||
343 | */ | ||
344 | dev_name_end = strchr(dev_name, '/'); | ||
345 | if (dev_name_end) { | ||
346 | /* skip over leading '/' for path */ | ||
347 | *path = dev_name_end + 1; | ||
348 | } else { | ||
349 | /* path is empty */ | ||
350 | dev_name_end = dev_name + strlen(dev_name); | ||
351 | *path = dev_name_end; | ||
352 | } | ||
332 | err = -EINVAL; | 353 | err = -EINVAL; |
333 | if (!dev_name) | 354 | dev_name_end--; /* back up to ':' separator */ |
334 | goto out; | 355 | if (*dev_name_end != ':') { |
335 | *path = strstr(dev_name, ":/"); | 356 | pr_err("device name is missing path (no : separator in %s)\n", |
336 | if (*path == NULL) { | ||
337 | pr_err("device name is missing path (no :/ in %s)\n", | ||
338 | dev_name); | 357 | dev_name); |
339 | goto out; | 358 | goto out; |
340 | } | 359 | } |
341 | dev_name_end = *path; | ||
342 | dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name); | 360 | dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name); |
343 | |||
344 | /* path on server */ | ||
345 | *path += 2; | ||
346 | dout("server path '%s'\n", *path); | 361 | dout("server path '%s'\n", *path); |
347 | 362 | ||
348 | *popt = ceph_parse_options(options, dev_name, dev_name_end, | 363 | *popt = ceph_parse_options(options, dev_name, dev_name_end, |
@@ -603,6 +618,11 @@ bad_cap: | |||
603 | 618 | ||
604 | static void destroy_caches(void) | 619 | static void destroy_caches(void) |
605 | { | 620 | { |
621 | /* | ||
622 | * Make sure all delayed rcu free inodes are flushed before we | ||
623 | * destroy cache. | ||
624 | */ | ||
625 | rcu_barrier(); | ||
606 | kmem_cache_destroy(ceph_inode_cachep); | 626 | kmem_cache_destroy(ceph_inode_cachep); |
607 | kmem_cache_destroy(ceph_cap_cachep); | 627 | kmem_cache_destroy(ceph_cap_cachep); |
608 | kmem_cache_destroy(ceph_dentry_cachep); | 628 | kmem_cache_destroy(ceph_dentry_cachep); |
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index a08306a8bec9..2075ddfffa73 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig | |||
@@ -9,13 +9,14 @@ config CIFS | |||
9 | select CRYPTO_ARC4 | 9 | select CRYPTO_ARC4 |
10 | select CRYPTO_ECB | 10 | select CRYPTO_ECB |
11 | select CRYPTO_DES | 11 | select CRYPTO_DES |
12 | select CRYPTO_SHA256 | ||
12 | help | 13 | help |
13 | This is the client VFS module for the Common Internet File System | 14 | This is the client VFS module for the Common Internet File System |
14 | (CIFS) protocol which is the successor to the Server Message Block | 15 | (CIFS) protocol which is the successor to the Server Message Block |
15 | (SMB) protocol, the native file sharing mechanism for most early | 16 | (SMB) protocol, the native file sharing mechanism for most early |
16 | PC operating systems. The CIFS protocol is fully supported by | 17 | PC operating systems. The CIFS protocol is fully supported by |
17 | file servers such as Windows 2000 (including Windows 2003, NT 4 | 18 | file servers such as Windows 2000 (including Windows 2003, Windows 2008, |
18 | and Windows XP) as well by Samba (which provides excellent CIFS | 19 | NT 4 and Windows XP) as well by Samba (which provides excellent CIFS |
19 | server support for Linux and many other operating systems). Limited | 20 | server support for Linux and many other operating systems). Limited |
20 | support for OS/2 and Windows ME and similar servers is provided as | 21 | support for OS/2 and Windows ME and similar servers is provided as |
21 | well. | 22 | well. |
@@ -114,6 +115,13 @@ config CIFS_POSIX | |||
114 | (such as Samba 3.10 and later) which can negotiate | 115 | (such as Samba 3.10 and later) which can negotiate |
115 | CIFS POSIX ACL support. If unsure, say N. | 116 | CIFS POSIX ACL support. If unsure, say N. |
116 | 117 | ||
118 | config CIFS_ACL | ||
119 | bool "Provide CIFS ACL support" | ||
120 | depends on CIFS_XATTR && KEYS | ||
121 | help | ||
122 | Allows fetching CIFS/NTFS ACL from the server. The DACL blob | ||
123 | is handed over to the application/caller. | ||
124 | |||
117 | config CIFS_DEBUG2 | 125 | config CIFS_DEBUG2 |
118 | bool "Enable additional CIFS debugging routines" | 126 | bool "Enable additional CIFS debugging routines" |
119 | depends on CIFS | 127 | depends on CIFS |
@@ -138,21 +146,6 @@ config CIFS_DFS_UPCALL | |||
138 | IP addresses) which is needed for implicit mounts of DFS junction | 146 | IP addresses) which is needed for implicit mounts of DFS junction |
139 | points. If unsure, say N. | 147 | points. If unsure, say N. |
140 | 148 | ||
141 | config CIFS_FSCACHE | ||
142 | bool "Provide CIFS client caching support" | ||
143 | depends on CIFS=m && FSCACHE || CIFS=y && FSCACHE=y | ||
144 | help | ||
145 | Makes CIFS FS-Cache capable. Say Y here if you want your CIFS data | ||
146 | to be cached locally on disk through the general filesystem cache | ||
147 | manager. If unsure, say N. | ||
148 | |||
149 | config CIFS_ACL | ||
150 | bool "Provide CIFS ACL support" | ||
151 | depends on CIFS_XATTR && KEYS | ||
152 | help | ||
153 | Allows to fetch CIFS/NTFS ACL from the server. The DACL blob | ||
154 | is handed over to the application/caller. | ||
155 | |||
156 | config CIFS_NFSD_EXPORT | 149 | config CIFS_NFSD_EXPORT |
157 | bool "Allow nfsd to export CIFS file system (EXPERIMENTAL)" | 150 | bool "Allow nfsd to export CIFS file system (EXPERIMENTAL)" |
158 | depends on CIFS && EXPERIMENTAL && BROKEN | 151 | depends on CIFS && EXPERIMENTAL && BROKEN |
@@ -161,7 +154,7 @@ config CIFS_NFSD_EXPORT | |||
161 | 154 | ||
162 | config CIFS_SMB2 | 155 | config CIFS_SMB2 |
163 | bool "SMB2 network file system support (EXPERIMENTAL)" | 156 | bool "SMB2 network file system support (EXPERIMENTAL)" |
164 | depends on EXPERIMENTAL && INET && BROKEN | 157 | depends on CIFS && EXPERIMENTAL && INET |
165 | select NLS | 158 | select NLS |
166 | select KEYS | 159 | select KEYS |
167 | select FSCACHE | 160 | select FSCACHE |
@@ -178,3 +171,12 @@ config CIFS_SMB2 | |||
178 | (compared to cifs) due to protocol improvements. | 171 | (compared to cifs) due to protocol improvements. |
179 | 172 | ||
180 | Unless you are a developer or tester, say N. | 173 | Unless you are a developer or tester, say N. |
174 | |||
175 | config CIFS_FSCACHE | ||
176 | bool "Provide CIFS client caching support" | ||
177 | depends on CIFS=m && FSCACHE || CIFS=y && FSCACHE=y | ||
178 | help | ||
179 | Makes CIFS FS-Cache capable. Say Y here if you want your CIFS data | ||
180 | to be cached locally on disk through the general filesystem cache | ||
181 | manager. If unsure, say N. | ||
182 | |||
diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile index feee94309271..aa0d68b086eb 100644 --- a/fs/cifs/Makefile +++ b/fs/cifs/Makefile | |||
@@ -17,4 +17,4 @@ cifs-$(CONFIG_CIFS_DFS_UPCALL) += dns_resolve.o cifs_dfs_ref.o | |||
17 | cifs-$(CONFIG_CIFS_FSCACHE) += fscache.o cache.o | 17 | cifs-$(CONFIG_CIFS_FSCACHE) += fscache.o cache.o |
18 | 18 | ||
19 | cifs-$(CONFIG_CIFS_SMB2) += smb2ops.o smb2maperror.o smb2transport.o \ | 19 | cifs-$(CONFIG_CIFS_SMB2) += smb2ops.o smb2maperror.o smb2transport.o \ |
20 | smb2misc.o smb2pdu.o smb2inode.o | 20 | smb2misc.o smb2pdu.o smb2inode.o smb2file.o |
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c index 7dab9c04ad52..53cf2aabce87 100644 --- a/fs/cifs/cifs_unicode.c +++ b/fs/cifs/cifs_unicode.c | |||
@@ -328,7 +328,7 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen, | |||
328 | } | 328 | } |
329 | 329 | ||
330 | ctoUTF16_out: | 330 | ctoUTF16_out: |
331 | return i; | 331 | return j; |
332 | } | 332 | } |
333 | 333 | ||
334 | #ifdef CONFIG_CIFS_SMB2 | 334 | #ifdef CONFIG_CIFS_SMB2 |
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 05f4dc263a23..2ee5c54797fa 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c | |||
@@ -1222,7 +1222,7 @@ struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *cifs_sb, | |||
1222 | if (!open_file) | 1222 | if (!open_file) |
1223 | return get_cifs_acl_by_path(cifs_sb, path, pacllen); | 1223 | return get_cifs_acl_by_path(cifs_sb, path, pacllen); |
1224 | 1224 | ||
1225 | pntsd = get_cifs_acl_by_fid(cifs_sb, open_file->netfid, pacllen); | 1225 | pntsd = get_cifs_acl_by_fid(cifs_sb, open_file->fid.netfid, pacllen); |
1226 | cifsFileInfo_put(open_file); | 1226 | cifsFileInfo_put(open_file); |
1227 | return pntsd; | 1227 | return pntsd; |
1228 | } | 1228 | } |
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 6a0d741159f0..652f5051be09 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include "ntlmssp.h" | 29 | #include "ntlmssp.h" |
30 | #include <linux/ctype.h> | 30 | #include <linux/ctype.h> |
31 | #include <linux/random.h> | 31 | #include <linux/random.h> |
32 | #include <linux/highmem.h> | ||
32 | 33 | ||
33 | /* | 34 | /* |
34 | * Calculate and return the CIFS signature based on the mac key and SMB PDU. | 35 | * Calculate and return the CIFS signature based on the mac key and SMB PDU. |
@@ -37,11 +38,13 @@ | |||
37 | * the sequence number before this function is called. Also, this function | 38 | * the sequence number before this function is called. Also, this function |
38 | * should be called with the server->srv_mutex held. | 39 | * should be called with the server->srv_mutex held. |
39 | */ | 40 | */ |
40 | static int cifs_calc_signature(const struct kvec *iov, int n_vec, | 41 | static int cifs_calc_signature(struct smb_rqst *rqst, |
41 | struct TCP_Server_Info *server, char *signature) | 42 | struct TCP_Server_Info *server, char *signature) |
42 | { | 43 | { |
43 | int i; | 44 | int i; |
44 | int rc; | 45 | int rc; |
46 | struct kvec *iov = rqst->rq_iov; | ||
47 | int n_vec = rqst->rq_nvec; | ||
45 | 48 | ||
46 | if (iov == NULL || signature == NULL || server == NULL) | 49 | if (iov == NULL || signature == NULL || server == NULL) |
47 | return -EINVAL; | 50 | return -EINVAL; |
@@ -91,6 +94,16 @@ static int cifs_calc_signature(const struct kvec *iov, int n_vec, | |||
91 | } | 94 | } |
92 | } | 95 | } |
93 | 96 | ||
97 | /* now hash over the rq_pages array */ | ||
98 | for (i = 0; i < rqst->rq_npages; i++) { | ||
99 | struct kvec p_iov; | ||
100 | |||
101 | cifs_rqst_page_to_kvec(rqst, i, &p_iov); | ||
102 | crypto_shash_update(&server->secmech.sdescmd5->shash, | ||
103 | p_iov.iov_base, p_iov.iov_len); | ||
104 | kunmap(rqst->rq_pages[i]); | ||
105 | } | ||
106 | |||
94 | rc = crypto_shash_final(&server->secmech.sdescmd5->shash, signature); | 107 | rc = crypto_shash_final(&server->secmech.sdescmd5->shash, signature); |
95 | if (rc) | 108 | if (rc) |
96 | cERROR(1, "%s: Could not generate md5 hash", __func__); | 109 | cERROR(1, "%s: Could not generate md5 hash", __func__); |
@@ -99,12 +112,12 @@ static int cifs_calc_signature(const struct kvec *iov, int n_vec, | |||
99 | } | 112 | } |
100 | 113 | ||
101 | /* must be called with server->srv_mutex held */ | 114 | /* must be called with server->srv_mutex held */ |
102 | int cifs_sign_smbv(struct kvec *iov, int n_vec, struct TCP_Server_Info *server, | 115 | int cifs_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server, |
103 | __u32 *pexpected_response_sequence_number) | 116 | __u32 *pexpected_response_sequence_number) |
104 | { | 117 | { |
105 | int rc = 0; | 118 | int rc = 0; |
106 | char smb_signature[20]; | 119 | char smb_signature[20]; |
107 | struct smb_hdr *cifs_pdu = (struct smb_hdr *)iov[0].iov_base; | 120 | struct smb_hdr *cifs_pdu = (struct smb_hdr *)rqst->rq_iov[0].iov_base; |
108 | 121 | ||
109 | if ((cifs_pdu == NULL) || (server == NULL)) | 122 | if ((cifs_pdu == NULL) || (server == NULL)) |
110 | return -EINVAL; | 123 | return -EINVAL; |
@@ -125,7 +138,7 @@ int cifs_sign_smbv(struct kvec *iov, int n_vec, struct TCP_Server_Info *server, | |||
125 | *pexpected_response_sequence_number = server->sequence_number++; | 138 | *pexpected_response_sequence_number = server->sequence_number++; |
126 | server->sequence_number++; | 139 | server->sequence_number++; |
127 | 140 | ||
128 | rc = cifs_calc_signature(iov, n_vec, server, smb_signature); | 141 | rc = cifs_calc_signature(rqst, server, smb_signature); |
129 | if (rc) | 142 | if (rc) |
130 | memset(cifs_pdu->Signature.SecuritySignature, 0, 8); | 143 | memset(cifs_pdu->Signature.SecuritySignature, 0, 8); |
131 | else | 144 | else |
@@ -134,6 +147,15 @@ int cifs_sign_smbv(struct kvec *iov, int n_vec, struct TCP_Server_Info *server, | |||
134 | return rc; | 147 | return rc; |
135 | } | 148 | } |
136 | 149 | ||
150 | int cifs_sign_smbv(struct kvec *iov, int n_vec, struct TCP_Server_Info *server, | ||
151 | __u32 *pexpected_response_sequence) | ||
152 | { | ||
153 | struct smb_rqst rqst = { .rq_iov = iov, | ||
154 | .rq_nvec = n_vec }; | ||
155 | |||
156 | return cifs_sign_rqst(&rqst, server, pexpected_response_sequence); | ||
157 | } | ||
158 | |||
137 | /* must be called with server->srv_mutex held */ | 159 | /* must be called with server->srv_mutex held */ |
138 | int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server, | 160 | int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server, |
139 | __u32 *pexpected_response_sequence_number) | 161 | __u32 *pexpected_response_sequence_number) |
@@ -147,14 +169,14 @@ int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server, | |||
147 | pexpected_response_sequence_number); | 169 | pexpected_response_sequence_number); |
148 | } | 170 | } |
149 | 171 | ||
150 | int cifs_verify_signature(struct kvec *iov, unsigned int nr_iov, | 172 | int cifs_verify_signature(struct smb_rqst *rqst, |
151 | struct TCP_Server_Info *server, | 173 | struct TCP_Server_Info *server, |
152 | __u32 expected_sequence_number) | 174 | __u32 expected_sequence_number) |
153 | { | 175 | { |
154 | unsigned int rc; | 176 | unsigned int rc; |
155 | char server_response_sig[8]; | 177 | char server_response_sig[8]; |
156 | char what_we_think_sig_should_be[20]; | 178 | char what_we_think_sig_should_be[20]; |
157 | struct smb_hdr *cifs_pdu = (struct smb_hdr *)iov[0].iov_base; | 179 | struct smb_hdr *cifs_pdu = (struct smb_hdr *)rqst->rq_iov[0].iov_base; |
158 | 180 | ||
159 | if (cifs_pdu == NULL || server == NULL) | 181 | if (cifs_pdu == NULL || server == NULL) |
160 | return -EINVAL; | 182 | return -EINVAL; |
@@ -186,8 +208,7 @@ int cifs_verify_signature(struct kvec *iov, unsigned int nr_iov, | |||
186 | cifs_pdu->Signature.Sequence.Reserved = 0; | 208 | cifs_pdu->Signature.Sequence.Reserved = 0; |
187 | 209 | ||
188 | mutex_lock(&server->srv_mutex); | 210 | mutex_lock(&server->srv_mutex); |
189 | rc = cifs_calc_signature(iov, nr_iov, server, | 211 | rc = cifs_calc_signature(rqst, server, what_we_think_sig_should_be); |
190 | what_we_think_sig_should_be); | ||
191 | mutex_unlock(&server->srv_mutex); | 212 | mutex_unlock(&server->srv_mutex); |
192 | 213 | ||
193 | if (rc) | 214 | if (rc) |
@@ -686,12 +707,17 @@ calc_seckey(struct cifs_ses *ses) | |||
686 | void | 707 | void |
687 | cifs_crypto_shash_release(struct TCP_Server_Info *server) | 708 | cifs_crypto_shash_release(struct TCP_Server_Info *server) |
688 | { | 709 | { |
710 | if (server->secmech.hmacsha256) | ||
711 | crypto_free_shash(server->secmech.hmacsha256); | ||
712 | |||
689 | if (server->secmech.md5) | 713 | if (server->secmech.md5) |
690 | crypto_free_shash(server->secmech.md5); | 714 | crypto_free_shash(server->secmech.md5); |
691 | 715 | ||
692 | if (server->secmech.hmacmd5) | 716 | if (server->secmech.hmacmd5) |
693 | crypto_free_shash(server->secmech.hmacmd5); | 717 | crypto_free_shash(server->secmech.hmacmd5); |
694 | 718 | ||
719 | kfree(server->secmech.sdeschmacsha256); | ||
720 | |||
695 | kfree(server->secmech.sdeschmacmd5); | 721 | kfree(server->secmech.sdeschmacmd5); |
696 | 722 | ||
697 | kfree(server->secmech.sdescmd5); | 723 | kfree(server->secmech.sdescmd5); |
@@ -716,6 +742,13 @@ cifs_crypto_shash_allocate(struct TCP_Server_Info *server) | |||
716 | goto crypto_allocate_md5_fail; | 742 | goto crypto_allocate_md5_fail; |
717 | } | 743 | } |
718 | 744 | ||
745 | server->secmech.hmacsha256 = crypto_alloc_shash("hmac(sha256)", 0, 0); | ||
746 | if (IS_ERR(server->secmech.hmacsha256)) { | ||
747 | cERROR(1, "could not allocate crypto hmacsha256\n"); | ||
748 | rc = PTR_ERR(server->secmech.hmacsha256); | ||
749 | goto crypto_allocate_hmacsha256_fail; | ||
750 | } | ||
751 | |||
719 | size = sizeof(struct shash_desc) + | 752 | size = sizeof(struct shash_desc) + |
720 | crypto_shash_descsize(server->secmech.hmacmd5); | 753 | crypto_shash_descsize(server->secmech.hmacmd5); |
721 | server->secmech.sdeschmacmd5 = kmalloc(size, GFP_KERNEL); | 754 | server->secmech.sdeschmacmd5 = kmalloc(size, GFP_KERNEL); |
@@ -727,7 +760,6 @@ cifs_crypto_shash_allocate(struct TCP_Server_Info *server) | |||
727 | server->secmech.sdeschmacmd5->shash.tfm = server->secmech.hmacmd5; | 760 | server->secmech.sdeschmacmd5->shash.tfm = server->secmech.hmacmd5; |
728 | server->secmech.sdeschmacmd5->shash.flags = 0x0; | 761 | server->secmech.sdeschmacmd5->shash.flags = 0x0; |
729 | 762 | ||
730 | |||
731 | size = sizeof(struct shash_desc) + | 763 | size = sizeof(struct shash_desc) + |
732 | crypto_shash_descsize(server->secmech.md5); | 764 | crypto_shash_descsize(server->secmech.md5); |
733 | server->secmech.sdescmd5 = kmalloc(size, GFP_KERNEL); | 765 | server->secmech.sdescmd5 = kmalloc(size, GFP_KERNEL); |
@@ -739,12 +771,29 @@ cifs_crypto_shash_allocate(struct TCP_Server_Info *server) | |||
739 | server->secmech.sdescmd5->shash.tfm = server->secmech.md5; | 771 | server->secmech.sdescmd5->shash.tfm = server->secmech.md5; |
740 | server->secmech.sdescmd5->shash.flags = 0x0; | 772 | server->secmech.sdescmd5->shash.flags = 0x0; |
741 | 773 | ||
774 | size = sizeof(struct shash_desc) + | ||
775 | crypto_shash_descsize(server->secmech.hmacsha256); | ||
776 | server->secmech.sdeschmacsha256 = kmalloc(size, GFP_KERNEL); | ||
777 | if (!server->secmech.sdeschmacsha256) { | ||
778 | cERROR(1, "%s: Can't alloc hmacsha256\n", __func__); | ||
779 | rc = -ENOMEM; | ||
780 | goto crypto_allocate_hmacsha256_sdesc_fail; | ||
781 | } | ||
782 | server->secmech.sdeschmacsha256->shash.tfm = server->secmech.hmacsha256; | ||
783 | server->secmech.sdeschmacsha256->shash.flags = 0x0; | ||
784 | |||
742 | return 0; | 785 | return 0; |
743 | 786 | ||
787 | crypto_allocate_hmacsha256_sdesc_fail: | ||
788 | kfree(server->secmech.sdescmd5); | ||
789 | |||
744 | crypto_allocate_md5_sdesc_fail: | 790 | crypto_allocate_md5_sdesc_fail: |
745 | kfree(server->secmech.sdeschmacmd5); | 791 | kfree(server->secmech.sdeschmacmd5); |
746 | 792 | ||
747 | crypto_allocate_hmacmd5_sdesc_fail: | 793 | crypto_allocate_hmacmd5_sdesc_fail: |
794 | crypto_free_shash(server->secmech.hmacsha256); | ||
795 | |||
796 | crypto_allocate_hmacsha256_fail: | ||
748 | crypto_free_shash(server->secmech.md5); | 797 | crypto_free_shash(server->secmech.md5); |
749 | 798 | ||
750 | crypto_allocate_md5_fail: | 799 | crypto_allocate_md5_fail: |
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index db8a404a51dd..e7931cc55d0c 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <linux/kthread.h> | 36 | #include <linux/kthread.h> |
37 | #include <linux/freezer.h> | 37 | #include <linux/freezer.h> |
38 | #include <linux/namei.h> | 38 | #include <linux/namei.h> |
39 | #include <linux/random.h> | ||
39 | #include <net/ipv6.h> | 40 | #include <net/ipv6.h> |
40 | #include "cifsfs.h" | 41 | #include "cifsfs.h" |
41 | #include "cifspdu.h" | 42 | #include "cifspdu.h" |
@@ -51,7 +52,6 @@ | |||
51 | #ifdef CONFIG_CIFS_SMB2 | 52 | #ifdef CONFIG_CIFS_SMB2 |
52 | #include "smb2pdu.h" | 53 | #include "smb2pdu.h" |
53 | #endif | 54 | #endif |
54 | #define CIFS_MAGIC_NUMBER 0xFF534D42 /* the first four bytes of SMB PDUs */ | ||
55 | 55 | ||
56 | int cifsFYI = 0; | 56 | int cifsFYI = 0; |
57 | int cifsERROR = 1; | 57 | int cifsERROR = 1; |
@@ -89,6 +89,10 @@ extern mempool_t *cifs_mid_poolp; | |||
89 | 89 | ||
90 | struct workqueue_struct *cifsiod_wq; | 90 | struct workqueue_struct *cifsiod_wq; |
91 | 91 | ||
92 | #ifdef CONFIG_CIFS_SMB2 | ||
93 | __u8 cifs_client_guid[SMB2_CLIENT_GUID_SIZE]; | ||
94 | #endif | ||
95 | |||
92 | static int | 96 | static int |
93 | cifs_read_super(struct super_block *sb) | 97 | cifs_read_super(struct super_block *sb) |
94 | { | 98 | { |
@@ -160,13 +164,12 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
160 | struct super_block *sb = dentry->d_sb; | 164 | struct super_block *sb = dentry->d_sb; |
161 | struct cifs_sb_info *cifs_sb = CIFS_SB(sb); | 165 | struct cifs_sb_info *cifs_sb = CIFS_SB(sb); |
162 | struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); | 166 | struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); |
163 | int rc = -EOPNOTSUPP; | 167 | struct TCP_Server_Info *server = tcon->ses->server; |
164 | unsigned int xid; | 168 | unsigned int xid; |
169 | int rc = 0; | ||
165 | 170 | ||
166 | xid = get_xid(); | 171 | xid = get_xid(); |
167 | 172 | ||
168 | buf->f_type = CIFS_MAGIC_NUMBER; | ||
169 | |||
170 | /* | 173 | /* |
171 | * PATH_MAX may be too long - it would presumably be total path, | 174 | * PATH_MAX may be too long - it would presumably be total path, |
172 | * but note that some servers (includinng Samba 3) have a shorter | 175 | * but note that some servers (includinng Samba 3) have a shorter |
@@ -178,27 +181,8 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
178 | buf->f_files = 0; /* undefined */ | 181 | buf->f_files = 0; /* undefined */ |
179 | buf->f_ffree = 0; /* unlimited */ | 182 | buf->f_ffree = 0; /* unlimited */ |
180 | 183 | ||
181 | /* | 184 | if (server->ops->queryfs) |
182 | * We could add a second check for a QFS Unix capability bit | 185 | rc = server->ops->queryfs(xid, tcon, buf); |
183 | */ | ||
184 | if ((tcon->ses->capabilities & CAP_UNIX) && | ||
185 | (CIFS_POSIX_EXTENSIONS & le64_to_cpu(tcon->fsUnixInfo.Capability))) | ||
186 | rc = CIFSSMBQFSPosixInfo(xid, tcon, buf); | ||
187 | |||
188 | /* | ||
189 | * Only need to call the old QFSInfo if failed on newer one, | ||
190 | * e.g. by OS/2. | ||
191 | **/ | ||
192 | if (rc && (tcon->ses->capabilities & CAP_NT_SMBS)) | ||
193 | rc = CIFSSMBQFSInfo(xid, tcon, buf); | ||
194 | |||
195 | /* | ||
196 | * Some old Windows servers also do not support level 103, retry with | ||
197 | * older level one if old server failed the previous call or we | ||
198 | * bypassed it because we detected that this was an older LANMAN sess | ||
199 | */ | ||
200 | if (rc) | ||
201 | rc = SMBOldQFSInfo(xid, tcon, buf); | ||
202 | 186 | ||
203 | free_xid(xid); | 187 | free_xid(xid); |
204 | return 0; | 188 | return 0; |
@@ -239,9 +223,10 @@ cifs_alloc_inode(struct super_block *sb) | |||
239 | return NULL; | 223 | return NULL; |
240 | cifs_inode->cifsAttrs = 0x20; /* default */ | 224 | cifs_inode->cifsAttrs = 0x20; /* default */ |
241 | cifs_inode->time = 0; | 225 | cifs_inode->time = 0; |
242 | /* Until the file is open and we have gotten oplock | 226 | /* |
243 | info back from the server, can not assume caching of | 227 | * Until the file is open and we have gotten oplock info back from the |
244 | file data or metadata */ | 228 | * server, can not assume caching of file data or metadata. |
229 | */ | ||
245 | cifs_set_oplock_level(cifs_inode, 0); | 230 | cifs_set_oplock_level(cifs_inode, 0); |
246 | cifs_inode->delete_pending = false; | 231 | cifs_inode->delete_pending = false; |
247 | cifs_inode->invalid_mapping = false; | 232 | cifs_inode->invalid_mapping = false; |
@@ -249,11 +234,16 @@ cifs_alloc_inode(struct super_block *sb) | |||
249 | cifs_inode->server_eof = 0; | 234 | cifs_inode->server_eof = 0; |
250 | cifs_inode->uniqueid = 0; | 235 | cifs_inode->uniqueid = 0; |
251 | cifs_inode->createtime = 0; | 236 | cifs_inode->createtime = 0; |
252 | 237 | #ifdef CONFIG_CIFS_SMB2 | |
253 | /* Can not set i_flags here - they get immediately overwritten | 238 | get_random_bytes(cifs_inode->lease_key, SMB2_LEASE_KEY_SIZE); |
254 | to zero by the VFS */ | 239 | #endif |
255 | /* cifs_inode->vfs_inode.i_flags = S_NOATIME | S_NOCMTIME;*/ | 240 | /* |
241 | * Can not set i_flags here - they get immediately overwritten to zero | ||
242 | * by the VFS. | ||
243 | */ | ||
244 | /* cifs_inode->vfs_inode.i_flags = S_NOATIME | S_NOCMTIME; */ | ||
256 | INIT_LIST_HEAD(&cifs_inode->openFileList); | 245 | INIT_LIST_HEAD(&cifs_inode->openFileList); |
246 | INIT_LIST_HEAD(&cifs_inode->llist); | ||
257 | return &cifs_inode->vfs_inode; | 247 | return &cifs_inode->vfs_inode; |
258 | } | 248 | } |
259 | 249 | ||
@@ -360,7 +350,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root) | |||
360 | cifs_show_security(s, tcon->ses->server); | 350 | cifs_show_security(s, tcon->ses->server); |
361 | cifs_show_cache_flavor(s, cifs_sb); | 351 | cifs_show_cache_flavor(s, cifs_sb); |
362 | 352 | ||
363 | seq_printf(s, ",unc=%s", tcon->treeName); | 353 | seq_printf(s, ",unc="); |
354 | seq_escape(s, tcon->treeName, " \t\n\\"); | ||
364 | 355 | ||
365 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER) | 356 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER) |
366 | seq_printf(s, ",multiuser"); | 357 | seq_printf(s, ",multiuser"); |
@@ -957,7 +948,7 @@ cifs_init_once(void *inode) | |||
957 | struct cifsInodeInfo *cifsi = inode; | 948 | struct cifsInodeInfo *cifsi = inode; |
958 | 949 | ||
959 | inode_init_once(&cifsi->vfs_inode); | 950 | inode_init_once(&cifsi->vfs_inode); |
960 | mutex_init(&cifsi->lock_mutex); | 951 | init_rwsem(&cifsi->lock_sem); |
961 | } | 952 | } |
962 | 953 | ||
963 | static int | 954 | static int |
@@ -977,6 +968,11 @@ cifs_init_inodecache(void) | |||
977 | static void | 968 | static void |
978 | cifs_destroy_inodecache(void) | 969 | cifs_destroy_inodecache(void) |
979 | { | 970 | { |
971 | /* | ||
972 | * Make sure all delayed rcu free inodes are flushed before we | ||
973 | * destroy cache. | ||
974 | */ | ||
975 | rcu_barrier(); | ||
980 | kmem_cache_destroy(cifs_inode_cachep); | 976 | kmem_cache_destroy(cifs_inode_cachep); |
981 | } | 977 | } |
982 | 978 | ||
@@ -1127,6 +1123,10 @@ init_cifs(void) | |||
1127 | spin_lock_init(&cifs_file_list_lock); | 1123 | spin_lock_init(&cifs_file_list_lock); |
1128 | spin_lock_init(&GlobalMid_Lock); | 1124 | spin_lock_init(&GlobalMid_Lock); |
1129 | 1125 | ||
1126 | #ifdef CONFIG_CIFS_SMB2 | ||
1127 | get_random_bytes(cifs_client_guid, SMB2_CLIENT_GUID_SIZE); | ||
1128 | #endif | ||
1129 | |||
1130 | if (cifs_max_pending < 2) { | 1130 | if (cifs_max_pending < 2) { |
1131 | cifs_max_pending = 2; | 1131 | cifs_max_pending = 2; |
1132 | cFYI(1, "cifs_max_pending set to min of 2"); | 1132 | cFYI(1, "cifs_max_pending set to min of 2"); |
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 1c49c5a9b27a..7163419cecd9 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h | |||
@@ -128,5 +128,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); | |||
128 | extern const struct export_operations cifs_export_ops; | 128 | extern const struct export_operations cifs_export_ops; |
129 | #endif /* CONFIG_CIFS_NFSD_EXPORT */ | 129 | #endif /* CONFIG_CIFS_NFSD_EXPORT */ |
130 | 130 | ||
131 | #define CIFS_VERSION "1.78" | 131 | #define CIFS_VERSION "2.0" |
132 | #endif /* _CIFSFS_H */ | 132 | #endif /* _CIFSFS_H */ |
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 977dc0e85ccb..f5af2527fc69 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h | |||
@@ -32,6 +32,8 @@ | |||
32 | #include "smb2pdu.h" | 32 | #include "smb2pdu.h" |
33 | #endif | 33 | #endif |
34 | 34 | ||
35 | #define CIFS_MAGIC_NUMBER 0xFF534D42 /* the first four bytes of SMB PDUs */ | ||
36 | |||
35 | /* | 37 | /* |
36 | * The sizes of various internal tables and strings | 38 | * The sizes of various internal tables and strings |
37 | */ | 39 | */ |
@@ -128,8 +130,10 @@ struct sdesc { | |||
128 | struct cifs_secmech { | 130 | struct cifs_secmech { |
129 | struct crypto_shash *hmacmd5; /* hmac-md5 hash function */ | 131 | struct crypto_shash *hmacmd5; /* hmac-md5 hash function */ |
130 | struct crypto_shash *md5; /* md5 hash function */ | 132 | struct crypto_shash *md5; /* md5 hash function */ |
133 | struct crypto_shash *hmacsha256; /* hmac-sha256 hash function */ | ||
131 | struct sdesc *sdeschmacmd5; /* ctxt to generate ntlmv2 hash, CR1 */ | 134 | struct sdesc *sdeschmacmd5; /* ctxt to generate ntlmv2 hash, CR1 */ |
132 | struct sdesc *sdescmd5; /* ctxt to generate cifs/smb signature */ | 135 | struct sdesc *sdescmd5; /* ctxt to generate cifs/smb signature */ |
136 | struct sdesc *sdeschmacsha256; /* ctxt to generate smb2 signature */ | ||
133 | }; | 137 | }; |
134 | 138 | ||
135 | /* per smb session structure/fields */ | 139 | /* per smb session structure/fields */ |
@@ -158,9 +162,24 @@ struct cifs_cred { | |||
158 | ***************************************************************** | 162 | ***************************************************************** |
159 | */ | 163 | */ |
160 | 164 | ||
165 | /* | ||
166 | * A smb_rqst represents a complete request to be issued to a server. It's | ||
167 | * formed by a kvec array, followed by an array of pages. Page data is assumed | ||
168 | * to start at the beginning of the first page. | ||
169 | */ | ||
170 | struct smb_rqst { | ||
171 | struct kvec *rq_iov; /* array of kvecs */ | ||
172 | unsigned int rq_nvec; /* number of kvecs in array */ | ||
173 | struct page **rq_pages; /* pointer to array of page ptrs */ | ||
174 | unsigned int rq_npages; /* number pages in array */ | ||
175 | unsigned int rq_pagesz; /* page size to use */ | ||
176 | unsigned int rq_tailsz; /* length of last page */ | ||
177 | }; | ||
178 | |||
161 | enum smb_version { | 179 | enum smb_version { |
162 | Smb_1 = 1, | 180 | Smb_1 = 1, |
163 | Smb_21, | 181 | Smb_21, |
182 | Smb_30, | ||
164 | }; | 183 | }; |
165 | 184 | ||
166 | struct mid_q_entry; | 185 | struct mid_q_entry; |
@@ -171,17 +190,23 @@ struct cifs_tcon; | |||
171 | struct dfs_info3_param; | 190 | struct dfs_info3_param; |
172 | struct cifs_fattr; | 191 | struct cifs_fattr; |
173 | struct smb_vol; | 192 | struct smb_vol; |
193 | struct cifs_fid; | ||
194 | struct cifs_readdata; | ||
195 | struct cifs_writedata; | ||
196 | struct cifs_io_parms; | ||
197 | struct cifs_search_info; | ||
198 | struct cifsInodeInfo; | ||
174 | 199 | ||
175 | struct smb_version_operations { | 200 | struct smb_version_operations { |
176 | int (*send_cancel)(struct TCP_Server_Info *, void *, | 201 | int (*send_cancel)(struct TCP_Server_Info *, void *, |
177 | struct mid_q_entry *); | 202 | struct mid_q_entry *); |
178 | bool (*compare_fids)(struct cifsFileInfo *, struct cifsFileInfo *); | 203 | bool (*compare_fids)(struct cifsFileInfo *, struct cifsFileInfo *); |
179 | /* setup request: allocate mid, sign message */ | 204 | /* setup request: allocate mid, sign message */ |
180 | int (*setup_request)(struct cifs_ses *, struct kvec *, unsigned int, | 205 | struct mid_q_entry *(*setup_request)(struct cifs_ses *, |
181 | struct mid_q_entry **); | 206 | struct smb_rqst *); |
182 | /* setup async request: allocate mid, sign message */ | 207 | /* setup async request: allocate mid, sign message */ |
183 | int (*setup_async_request)(struct TCP_Server_Info *, struct kvec *, | 208 | struct mid_q_entry *(*setup_async_request)(struct TCP_Server_Info *, |
184 | unsigned int, struct mid_q_entry **); | 209 | struct smb_rqst *); |
185 | /* check response: verify signature, map error */ | 210 | /* check response: verify signature, map error */ |
186 | int (*check_receive)(struct mid_q_entry *, struct TCP_Server_Info *, | 211 | int (*check_receive)(struct mid_q_entry *, struct TCP_Server_Info *, |
187 | bool); | 212 | bool); |
@@ -212,6 +237,10 @@ struct smb_version_operations { | |||
212 | bool (*need_neg)(struct TCP_Server_Info *); | 237 | bool (*need_neg)(struct TCP_Server_Info *); |
213 | /* negotiate to the server */ | 238 | /* negotiate to the server */ |
214 | int (*negotiate)(const unsigned int, struct cifs_ses *); | 239 | int (*negotiate)(const unsigned int, struct cifs_ses *); |
240 | /* set negotiated write size */ | ||
241 | unsigned int (*negotiate_wsize)(struct cifs_tcon *, struct smb_vol *); | ||
242 | /* set negotiated read size */ | ||
243 | unsigned int (*negotiate_rsize)(struct cifs_tcon *, struct smb_vol *); | ||
215 | /* setup smb sessionn */ | 244 | /* setup smb sessionn */ |
216 | int (*sess_setup)(const unsigned int, struct cifs_ses *, | 245 | int (*sess_setup)(const unsigned int, struct cifs_ses *, |
217 | const struct nls_table *); | 246 | const struct nls_table *); |
@@ -235,10 +264,22 @@ struct smb_version_operations { | |||
235 | int (*query_path_info)(const unsigned int, struct cifs_tcon *, | 264 | int (*query_path_info)(const unsigned int, struct cifs_tcon *, |
236 | struct cifs_sb_info *, const char *, | 265 | struct cifs_sb_info *, const char *, |
237 | FILE_ALL_INFO *, bool *); | 266 | FILE_ALL_INFO *, bool *); |
267 | /* query file data from the server */ | ||
268 | int (*query_file_info)(const unsigned int, struct cifs_tcon *, | ||
269 | struct cifs_fid *, FILE_ALL_INFO *); | ||
238 | /* get server index number */ | 270 | /* get server index number */ |
239 | int (*get_srv_inum)(const unsigned int, struct cifs_tcon *, | 271 | int (*get_srv_inum)(const unsigned int, struct cifs_tcon *, |
240 | struct cifs_sb_info *, const char *, | 272 | struct cifs_sb_info *, const char *, |
241 | u64 *uniqueid, FILE_ALL_INFO *); | 273 | u64 *uniqueid, FILE_ALL_INFO *); |
274 | /* set size by path */ | ||
275 | int (*set_path_size)(const unsigned int, struct cifs_tcon *, | ||
276 | const char *, __u64, struct cifs_sb_info *, bool); | ||
277 | /* set size by file handle */ | ||
278 | int (*set_file_size)(const unsigned int, struct cifs_tcon *, | ||
279 | struct cifsFileInfo *, __u64, bool); | ||
280 | /* set attributes */ | ||
281 | int (*set_file_info)(struct inode *, const char *, FILE_BASIC_INFO *, | ||
282 | const unsigned int); | ||
242 | /* build a full path to the root of the mount */ | 283 | /* build a full path to the root of the mount */ |
243 | char * (*build_path_to_root)(struct smb_vol *, struct cifs_sb_info *, | 284 | char * (*build_path_to_root)(struct smb_vol *, struct cifs_sb_info *, |
244 | struct cifs_tcon *); | 285 | struct cifs_tcon *); |
@@ -256,10 +297,84 @@ struct smb_version_operations { | |||
256 | /* remove directory */ | 297 | /* remove directory */ |
257 | int (*rmdir)(const unsigned int, struct cifs_tcon *, const char *, | 298 | int (*rmdir)(const unsigned int, struct cifs_tcon *, const char *, |
258 | struct cifs_sb_info *); | 299 | struct cifs_sb_info *); |
300 | /* unlink file */ | ||
301 | int (*unlink)(const unsigned int, struct cifs_tcon *, const char *, | ||
302 | struct cifs_sb_info *); | ||
303 | /* open, rename and delete file */ | ||
304 | int (*rename_pending_delete)(const char *, struct dentry *, | ||
305 | const unsigned int); | ||
306 | /* send rename request */ | ||
307 | int (*rename)(const unsigned int, struct cifs_tcon *, const char *, | ||
308 | const char *, struct cifs_sb_info *); | ||
309 | /* send create hardlink request */ | ||
310 | int (*create_hardlink)(const unsigned int, struct cifs_tcon *, | ||
311 | const char *, const char *, | ||
312 | struct cifs_sb_info *); | ||
313 | /* open a file for non-posix mounts */ | ||
314 | int (*open)(const unsigned int, struct cifs_tcon *, const char *, int, | ||
315 | int, int, struct cifs_fid *, __u32 *, FILE_ALL_INFO *, | ||
316 | struct cifs_sb_info *); | ||
317 | /* set fid protocol-specific info */ | ||
318 | void (*set_fid)(struct cifsFileInfo *, struct cifs_fid *, __u32); | ||
319 | /* close a file */ | ||
320 | void (*close)(const unsigned int, struct cifs_tcon *, | ||
321 | struct cifs_fid *); | ||
322 | /* send a flush request to the server */ | ||
323 | int (*flush)(const unsigned int, struct cifs_tcon *, struct cifs_fid *); | ||
324 | /* async read from the server */ | ||
325 | int (*async_readv)(struct cifs_readdata *); | ||
326 | /* async write to the server */ | ||
327 | int (*async_writev)(struct cifs_writedata *); | ||
328 | /* sync read from the server */ | ||
329 | int (*sync_read)(const unsigned int, struct cifsFileInfo *, | ||
330 | struct cifs_io_parms *, unsigned int *, char **, | ||
331 | int *); | ||
332 | /* sync write to the server */ | ||
333 | int (*sync_write)(const unsigned int, struct cifsFileInfo *, | ||
334 | struct cifs_io_parms *, unsigned int *, struct kvec *, | ||
335 | unsigned long); | ||
336 | /* open dir, start readdir */ | ||
337 | int (*query_dir_first)(const unsigned int, struct cifs_tcon *, | ||
338 | const char *, struct cifs_sb_info *, | ||
339 | struct cifs_fid *, __u16, | ||
340 | struct cifs_search_info *); | ||
341 | /* continue readdir */ | ||
342 | int (*query_dir_next)(const unsigned int, struct cifs_tcon *, | ||
343 | struct cifs_fid *, | ||
344 | __u16, struct cifs_search_info *srch_inf); | ||
345 | /* close dir */ | ||
346 | int (*close_dir)(const unsigned int, struct cifs_tcon *, | ||
347 | struct cifs_fid *); | ||
348 | /* calculate a size of SMB message */ | ||
349 | unsigned int (*calc_smb_size)(void *); | ||
350 | /* check for STATUS_PENDING and process it in a positive case */ | ||
351 | bool (*is_status_pending)(char *, struct TCP_Server_Info *, int); | ||
352 | /* send oplock break response */ | ||
353 | int (*oplock_response)(struct cifs_tcon *, struct cifs_fid *, | ||
354 | struct cifsInodeInfo *); | ||
355 | /* query remote filesystem */ | ||
356 | int (*queryfs)(const unsigned int, struct cifs_tcon *, | ||
357 | struct kstatfs *); | ||
358 | /* send mandatory brlock to the server */ | ||
359 | int (*mand_lock)(const unsigned int, struct cifsFileInfo *, __u64, | ||
360 | __u64, __u32, int, int, bool); | ||
361 | /* unlock range of mandatory locks */ | ||
362 | int (*mand_unlock_range)(struct cifsFileInfo *, struct file_lock *, | ||
363 | const unsigned int); | ||
364 | /* push brlocks from the cache to the server */ | ||
365 | int (*push_mand_locks)(struct cifsFileInfo *); | ||
366 | /* get lease key of the inode */ | ||
367 | void (*get_lease_key)(struct inode *, struct cifs_fid *fid); | ||
368 | /* set lease key of the inode */ | ||
369 | void (*set_lease_key)(struct inode *, struct cifs_fid *fid); | ||
370 | /* generate new lease key */ | ||
371 | void (*new_lease_key)(struct cifs_fid *fid); | ||
259 | }; | 372 | }; |
260 | 373 | ||
261 | struct smb_version_values { | 374 | struct smb_version_values { |
262 | char *version_string; | 375 | char *version_string; |
376 | __u16 protocol_id; | ||
377 | __u32 req_capabilities; | ||
263 | __u32 large_lock_type; | 378 | __u32 large_lock_type; |
264 | __u32 exclusive_lock_type; | 379 | __u32 exclusive_lock_type; |
265 | __u32 shared_lock_type; | 380 | __u32 shared_lock_type; |
@@ -496,6 +611,51 @@ get_next_mid(struct TCP_Server_Info *server) | |||
496 | } | 611 | } |
497 | 612 | ||
498 | /* | 613 | /* |
614 | * When the server supports very large reads and writes via POSIX extensions, | ||
615 | * we can allow up to 2^24-1, minus the size of a READ/WRITE_AND_X header, not | ||
616 | * including the RFC1001 length. | ||
617 | * | ||
618 | * Note that this might make for "interesting" allocation problems during | ||
619 | * writeback however as we have to allocate an array of pointers for the | ||
620 | * pages. A 16M write means ~32kb page array with PAGE_CACHE_SIZE == 4096. | ||
621 | * | ||
622 | * For reads, there is a similar problem as we need to allocate an array | ||
623 | * of kvecs to handle the receive, though that should only need to be done | ||
624 | * once. | ||
625 | */ | ||
626 | #define CIFS_MAX_WSIZE ((1<<24) - 1 - sizeof(WRITE_REQ) + 4) | ||
627 | #define CIFS_MAX_RSIZE ((1<<24) - sizeof(READ_RSP) + 4) | ||
628 | |||
629 | /* | ||
630 | * When the server doesn't allow large posix writes, only allow a rsize/wsize | ||
631 | * of 2^17-1 minus the size of the call header. That allows for a read or | ||
632 | * write up to the maximum size described by RFC1002. | ||
633 | */ | ||
634 | #define CIFS_MAX_RFC1002_WSIZE ((1<<17) - 1 - sizeof(WRITE_REQ) + 4) | ||
635 | #define CIFS_MAX_RFC1002_RSIZE ((1<<17) - 1 - sizeof(READ_RSP) + 4) | ||
636 | |||
637 | /* | ||
638 | * The default wsize is 1M. find_get_pages seems to return a maximum of 256 | ||
639 | * pages in a single call. With PAGE_CACHE_SIZE == 4k, this means we can fill | ||
640 | * a single wsize request with a single call. | ||
641 | */ | ||
642 | #define CIFS_DEFAULT_IOSIZE (1024 * 1024) | ||
643 | |||
644 | /* | ||
645 | * Windows only supports a max of 60kb reads and 65535 byte writes. Default to | ||
646 | * those values when posix extensions aren't in force. In actuality here, we | ||
647 | * use 65536 to allow for a write that is a multiple of 4k. Most servers seem | ||
648 | * to be ok with the extra byte even though Windows doesn't send writes that | ||
649 | * are that large. | ||
650 | * | ||
651 | * Citation: | ||
652 | * | ||
653 | * http://blogs.msdn.com/b/openspecification/archive/2009/04/10/smb-maximum-transmit-buffer-size-and-performance-tuning.aspx | ||
654 | */ | ||
655 | #define CIFS_DEFAULT_NON_POSIX_RSIZE (60 * 1024) | ||
656 | #define CIFS_DEFAULT_NON_POSIX_WSIZE (65536) | ||
657 | |||
658 | /* | ||
499 | * Macros to allow the TCP_Server_Info->net field and related code to drop out | 659 | * Macros to allow the TCP_Server_Info->net field and related code to drop out |
500 | * when CONFIG_NET_NS isn't set. | 660 | * when CONFIG_NET_NS isn't set. |
501 | */ | 661 | */ |
@@ -559,6 +719,7 @@ struct cifs_ses { | |||
559 | __u16 session_flags; | 719 | __u16 session_flags; |
560 | #endif /* CONFIG_CIFS_SMB2 */ | 720 | #endif /* CONFIG_CIFS_SMB2 */ |
561 | }; | 721 | }; |
722 | |||
562 | /* no more than one of the following three session flags may be set */ | 723 | /* no more than one of the following three session flags may be set */ |
563 | #define CIFS_SES_NT4 1 | 724 | #define CIFS_SES_NT4 1 |
564 | #define CIFS_SES_OS2 2 | 725 | #define CIFS_SES_OS2 2 |
@@ -665,6 +826,7 @@ struct cifs_tcon { | |||
665 | u64 resource_id; /* server resource id */ | 826 | u64 resource_id; /* server resource id */ |
666 | struct fscache_cookie *fscache; /* cookie for share */ | 827 | struct fscache_cookie *fscache; /* cookie for share */ |
667 | #endif | 828 | #endif |
829 | struct list_head pending_opens; /* list of incomplete opens */ | ||
668 | /* BB add field for back pointer to sb struct(s)? */ | 830 | /* BB add field for back pointer to sb struct(s)? */ |
669 | }; | 831 | }; |
670 | 832 | ||
@@ -707,6 +869,15 @@ cifs_get_tlink(struct tcon_link *tlink) | |||
707 | /* This function is always expected to succeed */ | 869 | /* This function is always expected to succeed */ |
708 | extern struct cifs_tcon *cifs_sb_master_tcon(struct cifs_sb_info *cifs_sb); | 870 | extern struct cifs_tcon *cifs_sb_master_tcon(struct cifs_sb_info *cifs_sb); |
709 | 871 | ||
872 | #define CIFS_OPLOCK_NO_CHANGE 0xfe | ||
873 | |||
874 | struct cifs_pending_open { | ||
875 | struct list_head olist; | ||
876 | struct tcon_link *tlink; | ||
877 | __u8 lease_key[16]; | ||
878 | __u32 oplock; | ||
879 | }; | ||
880 | |||
710 | /* | 881 | /* |
711 | * This info hangs off the cifsFileInfo structure, pointed to by llist. | 882 | * This info hangs off the cifsFileInfo structure, pointed to by llist. |
712 | * This is used to track byte stream locks on the file | 883 | * This is used to track byte stream locks on the file |
@@ -740,16 +911,29 @@ struct cifs_search_info { | |||
740 | bool smallBuf:1; /* so we know which buf_release function to call */ | 911 | bool smallBuf:1; /* so we know which buf_release function to call */ |
741 | }; | 912 | }; |
742 | 913 | ||
914 | struct cifs_fid { | ||
915 | __u16 netfid; | ||
916 | #ifdef CONFIG_CIFS_SMB2 | ||
917 | __u64 persistent_fid; /* persist file id for smb2 */ | ||
918 | __u64 volatile_fid; /* volatile file id for smb2 */ | ||
919 | __u8 lease_key[SMB2_LEASE_KEY_SIZE]; /* lease key for smb2 */ | ||
920 | #endif | ||
921 | struct cifs_pending_open *pending_open; | ||
922 | }; | ||
923 | |||
924 | struct cifs_fid_locks { | ||
925 | struct list_head llist; | ||
926 | struct cifsFileInfo *cfile; /* fid that owns locks */ | ||
927 | struct list_head locks; /* locks held by fid above */ | ||
928 | }; | ||
929 | |||
743 | struct cifsFileInfo { | 930 | struct cifsFileInfo { |
744 | struct list_head tlist; /* pointer to next fid owned by tcon */ | 931 | struct list_head tlist; /* pointer to next fid owned by tcon */ |
745 | struct list_head flist; /* next fid (file instance) for this inode */ | 932 | struct list_head flist; /* next fid (file instance) for this inode */ |
746 | struct list_head llist; /* | 933 | struct cifs_fid_locks *llist; /* brlocks held by this fid */ |
747 | * brlocks held by this fid, protected by | ||
748 | * lock_mutex from cifsInodeInfo structure | ||
749 | */ | ||
750 | unsigned int uid; /* allows finding which FileInfo structure */ | 934 | unsigned int uid; /* allows finding which FileInfo structure */ |
751 | __u32 pid; /* process id who opened file */ | 935 | __u32 pid; /* process id who opened file */ |
752 | __u16 netfid; /* file id from remote */ | 936 | struct cifs_fid fid; /* file id from remote */ |
753 | /* BB add lock scope info here if needed */ ; | 937 | /* BB add lock scope info here if needed */ ; |
754 | /* lock scope id (0 if none) */ | 938 | /* lock scope id (0 if none) */ |
755 | struct dentry *dentry; | 939 | struct dentry *dentry; |
@@ -765,12 +949,60 @@ struct cifsFileInfo { | |||
765 | 949 | ||
766 | struct cifs_io_parms { | 950 | struct cifs_io_parms { |
767 | __u16 netfid; | 951 | __u16 netfid; |
952 | #ifdef CONFIG_CIFS_SMB2 | ||
953 | __u64 persistent_fid; /* persist file id for smb2 */ | ||
954 | __u64 volatile_fid; /* volatile file id for smb2 */ | ||
955 | #endif | ||
768 | __u32 pid; | 956 | __u32 pid; |
769 | __u64 offset; | 957 | __u64 offset; |
770 | unsigned int length; | 958 | unsigned int length; |
771 | struct cifs_tcon *tcon; | 959 | struct cifs_tcon *tcon; |
772 | }; | 960 | }; |
773 | 961 | ||
962 | struct cifs_readdata; | ||
963 | |||
964 | /* asynchronous read support */ | ||
965 | struct cifs_readdata { | ||
966 | struct kref refcount; | ||
967 | struct list_head list; | ||
968 | struct completion done; | ||
969 | struct cifsFileInfo *cfile; | ||
970 | struct address_space *mapping; | ||
971 | __u64 offset; | ||
972 | unsigned int bytes; | ||
973 | pid_t pid; | ||
974 | int result; | ||
975 | struct work_struct work; | ||
976 | int (*read_into_pages)(struct TCP_Server_Info *server, | ||
977 | struct cifs_readdata *rdata, | ||
978 | unsigned int len); | ||
979 | struct kvec iov; | ||
980 | unsigned int pagesz; | ||
981 | unsigned int tailsz; | ||
982 | unsigned int nr_pages; | ||
983 | struct page *pages[]; | ||
984 | }; | ||
985 | |||
986 | struct cifs_writedata; | ||
987 | |||
988 | /* asynchronous write support */ | ||
989 | struct cifs_writedata { | ||
990 | struct kref refcount; | ||
991 | struct list_head list; | ||
992 | struct completion done; | ||
993 | enum writeback_sync_modes sync_mode; | ||
994 | struct work_struct work; | ||
995 | struct cifsFileInfo *cfile; | ||
996 | __u64 offset; | ||
997 | pid_t pid; | ||
998 | unsigned int bytes; | ||
999 | int result; | ||
1000 | unsigned int pagesz; | ||
1001 | unsigned int tailsz; | ||
1002 | unsigned int nr_pages; | ||
1003 | struct page *pages[1]; | ||
1004 | }; | ||
1005 | |||
774 | /* | 1006 | /* |
775 | * Take a reference on the file private data. Must be called with | 1007 | * Take a reference on the file private data. Must be called with |
776 | * cifs_file_list_lock held. | 1008 | * cifs_file_list_lock held. |
@@ -790,11 +1022,8 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file); | |||
790 | 1022 | ||
791 | struct cifsInodeInfo { | 1023 | struct cifsInodeInfo { |
792 | bool can_cache_brlcks; | 1024 | bool can_cache_brlcks; |
793 | struct mutex lock_mutex; /* | 1025 | struct list_head llist; /* locks helb by this inode */ |
794 | * protect the field above and llist | 1026 | struct rw_semaphore lock_sem; /* protect the fields above */ |
795 | * from every cifsFileInfo structure | ||
796 | * from openFileList | ||
797 | */ | ||
798 | /* BB add in lists for dirty pages i.e. write caching info for oplock */ | 1027 | /* BB add in lists for dirty pages i.e. write caching info for oplock */ |
799 | struct list_head openFileList; | 1028 | struct list_head openFileList; |
800 | __u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */ | 1029 | __u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */ |
@@ -806,6 +1035,9 @@ struct cifsInodeInfo { | |||
806 | u64 server_eof; /* current file size on server -- protected by i_lock */ | 1035 | u64 server_eof; /* current file size on server -- protected by i_lock */ |
807 | u64 uniqueid; /* server inode number */ | 1036 | u64 uniqueid; /* server inode number */ |
808 | u64 createtime; /* creation time on server */ | 1037 | u64 createtime; /* creation time on server */ |
1038 | #ifdef CONFIG_CIFS_SMB2 | ||
1039 | __u8 lease_key[SMB2_LEASE_KEY_SIZE]; /* lease key for this inode */ | ||
1040 | #endif | ||
809 | #ifdef CONFIG_CIFS_FSCACHE | 1041 | #ifdef CONFIG_CIFS_FSCACHE |
810 | struct fscache_cookie *fscache; | 1042 | struct fscache_cookie *fscache; |
811 | #endif | 1043 | #endif |
@@ -1130,7 +1362,7 @@ require use of the stronger protocol */ | |||
1130 | #define CIFSSEC_MUST_SEAL 0x40040 /* not supported yet */ | 1362 | #define CIFSSEC_MUST_SEAL 0x40040 /* not supported yet */ |
1131 | #define CIFSSEC_MUST_NTLMSSP 0x80080 /* raw ntlmssp with ntlmv2 */ | 1363 | #define CIFSSEC_MUST_NTLMSSP 0x80080 /* raw ntlmssp with ntlmv2 */ |
1132 | 1364 | ||
1133 | #define CIFSSEC_DEF (CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2) | 1365 | #define CIFSSEC_DEF (CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_NTLMSSP) |
1134 | #define CIFSSEC_MAX (CIFSSEC_MUST_SIGN | CIFSSEC_MUST_NTLMV2) | 1366 | #define CIFSSEC_MAX (CIFSSEC_MUST_SIGN | CIFSSEC_MUST_NTLMV2) |
1135 | #define CIFSSEC_AUTH_MASK (CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_LANMAN | CIFSSEC_MAY_PLNTXT | CIFSSEC_MAY_KRB5 | CIFSSEC_MAY_NTLMSSP) | 1367 | #define CIFSSEC_AUTH_MASK (CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_LANMAN | CIFSSEC_MAY_PLNTXT | CIFSSEC_MAY_KRB5 | CIFSSEC_MAY_NTLMSSP) |
1136 | /* | 1368 | /* |
@@ -1267,7 +1499,13 @@ extern mempool_t *cifs_mid_poolp; | |||
1267 | #define SMB1_VERSION_STRING "1.0" | 1499 | #define SMB1_VERSION_STRING "1.0" |
1268 | extern struct smb_version_operations smb1_operations; | 1500 | extern struct smb_version_operations smb1_operations; |
1269 | extern struct smb_version_values smb1_values; | 1501 | extern struct smb_version_values smb1_values; |
1502 | #define SMB20_VERSION_STRING "2.0" | ||
1503 | /*extern struct smb_version_operations smb20_operations; */ /* not needed yet */ | ||
1504 | extern struct smb_version_values smb20_values; | ||
1270 | #define SMB21_VERSION_STRING "2.1" | 1505 | #define SMB21_VERSION_STRING "2.1" |
1271 | extern struct smb_version_operations smb21_operations; | 1506 | extern struct smb_version_operations smb21_operations; |
1272 | extern struct smb_version_values smb21_values; | 1507 | extern struct smb_version_values smb21_values; |
1508 | #define SMB30_VERSION_STRING "3.0" | ||
1509 | /*extern struct smb_version_operations smb30_operations; */ /* not needed yet */ | ||
1510 | extern struct smb_version_values smb30_values; | ||
1273 | #endif /* _CIFS_GLOB_H */ | 1511 | #endif /* _CIFS_GLOB_H */ |
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index 3fb03e2c8e86..b9d59a948a2c 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h | |||
@@ -2210,7 +2210,7 @@ typedef struct { /* data block encoding of response to level 263 QPathInfo */ | |||
2210 | __u8 DeletePending; | 2210 | __u8 DeletePending; |
2211 | __u8 Directory; | 2211 | __u8 Directory; |
2212 | __u16 Pad2; | 2212 | __u16 Pad2; |
2213 | __u64 IndexNumber; | 2213 | __le64 IndexNumber; |
2214 | __le32 EASize; | 2214 | __le32 EASize; |
2215 | __le32 AccessFlags; | 2215 | __le32 AccessFlags; |
2216 | __u64 IndexNumber1; | 2216 | __u64 IndexNumber1; |
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index f1bbf8305d3a..5144e9fbeb8c 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h | |||
@@ -24,6 +24,7 @@ | |||
24 | 24 | ||
25 | struct statfs; | 25 | struct statfs; |
26 | struct smb_vol; | 26 | struct smb_vol; |
27 | struct smb_rqst; | ||
27 | 28 | ||
28 | /* | 29 | /* |
29 | ***************************************************************** | 30 | ***************************************************************** |
@@ -35,6 +36,8 @@ extern struct smb_hdr *cifs_buf_get(void); | |||
35 | extern void cifs_buf_release(void *); | 36 | extern void cifs_buf_release(void *); |
36 | extern struct smb_hdr *cifs_small_buf_get(void); | 37 | extern struct smb_hdr *cifs_small_buf_get(void); |
37 | extern void cifs_small_buf_release(void *); | 38 | extern void cifs_small_buf_release(void *); |
39 | extern void cifs_rqst_page_to_kvec(struct smb_rqst *rqst, unsigned int idx, | ||
40 | struct kvec *iov); | ||
38 | extern int smb_send(struct TCP_Server_Info *, struct smb_hdr *, | 41 | extern int smb_send(struct TCP_Server_Info *, struct smb_hdr *, |
39 | unsigned int /* length */); | 42 | unsigned int /* length */); |
40 | extern unsigned int _get_xid(void); | 43 | extern unsigned int _get_xid(void); |
@@ -65,21 +68,22 @@ extern char *cifs_compose_mount_options(const char *sb_mountdata, | |||
65 | extern struct mid_q_entry *AllocMidQEntry(const struct smb_hdr *smb_buffer, | 68 | extern struct mid_q_entry *AllocMidQEntry(const struct smb_hdr *smb_buffer, |
66 | struct TCP_Server_Info *server); | 69 | struct TCP_Server_Info *server); |
67 | extern void DeleteMidQEntry(struct mid_q_entry *midEntry); | 70 | extern void DeleteMidQEntry(struct mid_q_entry *midEntry); |
71 | extern void cifs_delete_mid(struct mid_q_entry *mid); | ||
68 | extern void cifs_wake_up_task(struct mid_q_entry *mid); | 72 | extern void cifs_wake_up_task(struct mid_q_entry *mid); |
69 | extern int cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov, | 73 | extern int cifs_call_async(struct TCP_Server_Info *server, |
70 | unsigned int nvec, mid_receive_t *receive, | 74 | struct smb_rqst *rqst, |
71 | mid_callback_t *callback, void *cbdata, | 75 | mid_receive_t *receive, mid_callback_t *callback, |
72 | const int flags); | 76 | void *cbdata, const int flags); |
73 | extern int SendReceive(const unsigned int /* xid */ , struct cifs_ses *, | 77 | extern int SendReceive(const unsigned int /* xid */ , struct cifs_ses *, |
74 | struct smb_hdr * /* input */ , | 78 | struct smb_hdr * /* input */ , |
75 | struct smb_hdr * /* out */ , | 79 | struct smb_hdr * /* out */ , |
76 | int * /* bytes returned */ , const int); | 80 | int * /* bytes returned */ , const int); |
77 | extern int SendReceiveNoRsp(const unsigned int xid, struct cifs_ses *ses, | 81 | extern int SendReceiveNoRsp(const unsigned int xid, struct cifs_ses *ses, |
78 | char *in_buf, int flags); | 82 | char *in_buf, int flags); |
79 | extern int cifs_setup_request(struct cifs_ses *, struct kvec *, unsigned int, | 83 | extern struct mid_q_entry *cifs_setup_request(struct cifs_ses *, |
80 | struct mid_q_entry **); | 84 | struct smb_rqst *); |
81 | extern int cifs_setup_async_request(struct TCP_Server_Info *, struct kvec *, | 85 | extern struct mid_q_entry *cifs_setup_async_request(struct TCP_Server_Info *, |
82 | unsigned int, struct mid_q_entry **); | 86 | struct smb_rqst *); |
83 | extern int cifs_check_receive(struct mid_q_entry *mid, | 87 | extern int cifs_check_receive(struct mid_q_entry *mid, |
84 | struct TCP_Server_Info *server, bool log_error); | 88 | struct TCP_Server_Info *server, bool log_error); |
85 | extern int SendReceive2(const unsigned int /* xid */ , struct cifs_ses *, | 89 | extern int SendReceive2(const unsigned int /* xid */ , struct cifs_ses *, |
@@ -99,7 +103,7 @@ extern void cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset, | |||
99 | unsigned int bytes_written); | 103 | unsigned int bytes_written); |
100 | extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, bool); | 104 | extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, bool); |
101 | extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool); | 105 | extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool); |
102 | extern unsigned int smbCalcSize(struct smb_hdr *ptr); | 106 | extern unsigned int smbCalcSize(void *buf); |
103 | extern int decode_negTokenInit(unsigned char *security_blob, int length, | 107 | extern int decode_negTokenInit(unsigned char *security_blob, int length, |
104 | struct TCP_Server_Info *server); | 108 | struct TCP_Server_Info *server); |
105 | extern int cifs_convert_address(struct sockaddr *dst, const char *src, int len); | 109 | extern int cifs_convert_address(struct sockaddr *dst, const char *src, int len); |
@@ -120,10 +124,14 @@ extern u64 cifs_UnixTimeToNT(struct timespec); | |||
120 | extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time, | 124 | extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time, |
121 | int offset); | 125 | int offset); |
122 | extern void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock); | 126 | extern void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock); |
123 | 127 | extern int cifs_unlock_range(struct cifsFileInfo *cfile, | |
124 | extern struct cifsFileInfo *cifs_new_fileinfo(__u16 fileHandle, | 128 | struct file_lock *flock, const unsigned int xid); |
125 | struct file *file, struct tcon_link *tlink, | 129 | extern int cifs_push_mandatory_locks(struct cifsFileInfo *cfile); |
126 | __u32 oplock); | 130 | |
131 | extern struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, | ||
132 | struct file *file, | ||
133 | struct tcon_link *tlink, | ||
134 | __u32 oplock); | ||
127 | extern int cifs_posix_open(char *full_path, struct inode **inode, | 135 | extern int cifs_posix_open(char *full_path, struct inode **inode, |
128 | struct super_block *sb, int mode, | 136 | struct super_block *sb, int mode, |
129 | unsigned int f_flags, __u32 *oplock, __u16 *netfid, | 137 | unsigned int f_flags, __u32 *oplock, __u16 *netfid, |
@@ -132,18 +140,23 @@ void cifs_fill_uniqueid(struct super_block *sb, struct cifs_fattr *fattr); | |||
132 | extern void cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, | 140 | extern void cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, |
133 | FILE_UNIX_BASIC_INFO *info, | 141 | FILE_UNIX_BASIC_INFO *info, |
134 | struct cifs_sb_info *cifs_sb); | 142 | struct cifs_sb_info *cifs_sb); |
143 | extern void cifs_dir_info_to_fattr(struct cifs_fattr *, FILE_DIRECTORY_INFO *, | ||
144 | struct cifs_sb_info *); | ||
135 | extern void cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr); | 145 | extern void cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr); |
136 | extern struct inode *cifs_iget(struct super_block *sb, | 146 | extern struct inode *cifs_iget(struct super_block *sb, |
137 | struct cifs_fattr *fattr); | 147 | struct cifs_fattr *fattr); |
138 | 148 | ||
139 | extern int cifs_get_file_info(struct file *filp); | ||
140 | extern int cifs_get_inode_info(struct inode **inode, const char *full_path, | 149 | extern int cifs_get_inode_info(struct inode **inode, const char *full_path, |
141 | FILE_ALL_INFO *data, struct super_block *sb, | 150 | FILE_ALL_INFO *data, struct super_block *sb, |
142 | int xid, const __u16 *fid); | 151 | int xid, const __u16 *fid); |
143 | extern int cifs_get_file_info_unix(struct file *filp); | ||
144 | extern int cifs_get_inode_info_unix(struct inode **pinode, | 152 | extern int cifs_get_inode_info_unix(struct inode **pinode, |
145 | const unsigned char *search_path, | 153 | const unsigned char *search_path, |
146 | struct super_block *sb, unsigned int xid); | 154 | struct super_block *sb, unsigned int xid); |
155 | extern int cifs_set_file_info(struct inode *inode, struct iattr *attrs, | ||
156 | unsigned int xid, char *full_path, __u32 dosattr); | ||
157 | extern int cifs_rename_pending_delete(const char *full_path, | ||
158 | struct dentry *dentry, | ||
159 | const unsigned int xid); | ||
147 | extern int cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, | 160 | extern int cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, |
148 | struct cifs_fattr *fattr, struct inode *inode, | 161 | struct cifs_fattr *fattr, struct inode *inode, |
149 | const char *path, const __u16 *pfid); | 162 | const char *path, const __u16 *pfid); |
@@ -169,6 +182,17 @@ extern struct smb_vol *cifs_get_volume_info(char *mount_data, | |||
169 | extern int cifs_mount(struct cifs_sb_info *, struct smb_vol *); | 182 | extern int cifs_mount(struct cifs_sb_info *, struct smb_vol *); |
170 | extern void cifs_umount(struct cifs_sb_info *); | 183 | extern void cifs_umount(struct cifs_sb_info *); |
171 | extern void cifs_mark_open_files_invalid(struct cifs_tcon *tcon); | 184 | extern void cifs_mark_open_files_invalid(struct cifs_tcon *tcon); |
185 | extern bool cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, | ||
186 | __u64 length, __u8 type, | ||
187 | struct cifsLockInfo **conf_lock, | ||
188 | bool rw_check); | ||
189 | extern void cifs_add_pending_open(struct cifs_fid *fid, | ||
190 | struct tcon_link *tlink, | ||
191 | struct cifs_pending_open *open); | ||
192 | extern void cifs_add_pending_open_locked(struct cifs_fid *fid, | ||
193 | struct tcon_link *tlink, | ||
194 | struct cifs_pending_open *open); | ||
195 | extern void cifs_del_pending_open(struct cifs_pending_open *open); | ||
172 | 196 | ||
173 | #if IS_ENABLED(CONFIG_CIFS_DFS_UPCALL) | 197 | #if IS_ENABLED(CONFIG_CIFS_DFS_UPCALL) |
174 | extern void cifs_dfs_release_automount_timer(void); | 198 | extern void cifs_dfs_release_automount_timer(void); |
@@ -179,6 +203,10 @@ extern void cifs_dfs_release_automount_timer(void); | |||
179 | void cifs_proc_init(void); | 203 | void cifs_proc_init(void); |
180 | void cifs_proc_clean(void); | 204 | void cifs_proc_clean(void); |
181 | 205 | ||
206 | extern void cifs_move_llist(struct list_head *source, struct list_head *dest); | ||
207 | extern void cifs_free_llist(struct list_head *llist); | ||
208 | extern void cifs_del_lock_waiters(struct cifsLockInfo *lock); | ||
209 | |||
182 | extern int cifs_negotiate_protocol(const unsigned int xid, | 210 | extern int cifs_negotiate_protocol(const unsigned int xid, |
183 | struct cifs_ses *ses); | 211 | struct cifs_ses *ses); |
184 | extern int cifs_setup_session(const unsigned int xid, struct cifs_ses *ses, | 212 | extern int cifs_setup_session(const unsigned int xid, struct cifs_ses *ses, |
@@ -190,10 +218,10 @@ extern int CIFSTCon(const unsigned int xid, struct cifs_ses *ses, | |||
190 | const struct nls_table *); | 218 | const struct nls_table *); |
191 | 219 | ||
192 | extern int CIFSFindFirst(const unsigned int xid, struct cifs_tcon *tcon, | 220 | extern int CIFSFindFirst(const unsigned int xid, struct cifs_tcon *tcon, |
193 | const char *searchName, const struct nls_table *nls_codepage, | 221 | const char *searchName, struct cifs_sb_info *cifs_sb, |
194 | __u16 *searchHandle, __u16 search_flags, | 222 | __u16 *searchHandle, __u16 search_flags, |
195 | struct cifs_search_info *psrch_inf, | 223 | struct cifs_search_info *psrch_inf, |
196 | int map, const char dirsep); | 224 | bool msearch); |
197 | 225 | ||
198 | extern int CIFSFindNext(const unsigned int xid, struct cifs_tcon *tcon, | 226 | extern int CIFSFindNext(const unsigned int xid, struct cifs_tcon *tcon, |
199 | __u16 searchHandle, __u16 search_flags, | 227 | __u16 searchHandle, __u16 search_flags, |
@@ -265,13 +293,11 @@ extern int CIFSSMBSetAttrLegacy(unsigned int xid, struct cifs_tcon *tcon, | |||
265 | const struct nls_table *nls_codepage); | 293 | const struct nls_table *nls_codepage); |
266 | #endif /* possibly unneeded function */ | 294 | #endif /* possibly unneeded function */ |
267 | extern int CIFSSMBSetEOF(const unsigned int xid, struct cifs_tcon *tcon, | 295 | extern int CIFSSMBSetEOF(const unsigned int xid, struct cifs_tcon *tcon, |
268 | const char *fileName, __u64 size, | 296 | const char *file_name, __u64 size, |
269 | bool setAllocationSizeFlag, | 297 | struct cifs_sb_info *cifs_sb, bool set_allocation); |
270 | const struct nls_table *nls_codepage, | ||
271 | int remap_special_chars); | ||
272 | extern int CIFSSMBSetFileSize(const unsigned int xid, struct cifs_tcon *tcon, | 298 | extern int CIFSSMBSetFileSize(const unsigned int xid, struct cifs_tcon *tcon, |
273 | __u64 size, __u16 fileHandle, __u32 opener_pid, | 299 | struct cifsFileInfo *cfile, __u64 size, |
274 | bool AllocSizeFlag); | 300 | bool set_allocation); |
275 | 301 | ||
276 | struct cifs_unix_set_info_args { | 302 | struct cifs_unix_set_info_args { |
277 | __u64 ctime; | 303 | __u64 ctime; |
@@ -303,22 +329,17 @@ extern int CIFSPOSIXDelFile(const unsigned int xid, struct cifs_tcon *tcon, | |||
303 | const struct nls_table *nls_codepage, | 329 | const struct nls_table *nls_codepage, |
304 | int remap_special_chars); | 330 | int remap_special_chars); |
305 | extern int CIFSSMBDelFile(const unsigned int xid, struct cifs_tcon *tcon, | 331 | extern int CIFSSMBDelFile(const unsigned int xid, struct cifs_tcon *tcon, |
306 | const char *name, | 332 | const char *name, struct cifs_sb_info *cifs_sb); |
307 | const struct nls_table *nls_codepage, | ||
308 | int remap_special_chars); | ||
309 | extern int CIFSSMBRename(const unsigned int xid, struct cifs_tcon *tcon, | 333 | extern int CIFSSMBRename(const unsigned int xid, struct cifs_tcon *tcon, |
310 | const char *fromName, const char *toName, | 334 | const char *from_name, const char *to_name, |
311 | const struct nls_table *nls_codepage, | 335 | struct cifs_sb_info *cifs_sb); |
312 | int remap_special_chars); | ||
313 | extern int CIFSSMBRenameOpenFile(const unsigned int xid, struct cifs_tcon *tcon, | 336 | extern int CIFSSMBRenameOpenFile(const unsigned int xid, struct cifs_tcon *tcon, |
314 | int netfid, const char *target_name, | 337 | int netfid, const char *target_name, |
315 | const struct nls_table *nls_codepage, | 338 | const struct nls_table *nls_codepage, |
316 | int remap_special_chars); | 339 | int remap_special_chars); |
317 | extern int CIFSCreateHardLink(const unsigned int xid, | 340 | extern int CIFSCreateHardLink(const unsigned int xid, struct cifs_tcon *tcon, |
318 | struct cifs_tcon *tcon, | 341 | const char *from_name, const char *to_name, |
319 | const char *fromName, const char *toName, | 342 | struct cifs_sb_info *cifs_sb); |
320 | const struct nls_table *nls_codepage, | ||
321 | int remap_special_chars); | ||
322 | extern int CIFSUnixCreateHardLink(const unsigned int xid, | 343 | extern int CIFSUnixCreateHardLink(const unsigned int xid, |
323 | struct cifs_tcon *tcon, | 344 | struct cifs_tcon *tcon, |
324 | const char *fromName, const char *toName, | 345 | const char *fromName, const char *toName, |
@@ -367,8 +388,7 @@ extern int CIFSSMBWrite(const unsigned int xid, struct cifs_io_parms *io_parms, | |||
367 | unsigned int *nbytes, const char *buf, | 388 | unsigned int *nbytes, const char *buf, |
368 | const char __user *ubuf, const int long_op); | 389 | const char __user *ubuf, const int long_op); |
369 | extern int CIFSSMBWrite2(const unsigned int xid, struct cifs_io_parms *io_parms, | 390 | extern int CIFSSMBWrite2(const unsigned int xid, struct cifs_io_parms *io_parms, |
370 | unsigned int *nbytes, struct kvec *iov, const int nvec, | 391 | unsigned int *nbytes, struct kvec *iov, const int nvec); |
371 | const int long_op); | ||
372 | extern int CIFSGetSrvInodeNumber(const unsigned int xid, struct cifs_tcon *tcon, | 392 | extern int CIFSGetSrvInodeNumber(const unsigned int xid, struct cifs_tcon *tcon, |
373 | const char *search_name, __u64 *inode_number, | 393 | const char *search_name, __u64 *inode_number, |
374 | const struct nls_table *nls_codepage, | 394 | const struct nls_table *nls_codepage, |
@@ -397,10 +417,12 @@ extern void sesInfoFree(struct cifs_ses *); | |||
397 | extern struct cifs_tcon *tconInfoAlloc(void); | 417 | extern struct cifs_tcon *tconInfoAlloc(void); |
398 | extern void tconInfoFree(struct cifs_tcon *); | 418 | extern void tconInfoFree(struct cifs_tcon *); |
399 | 419 | ||
400 | extern int cifs_sign_smb(struct smb_hdr *, struct TCP_Server_Info *, __u32 *); | 420 | extern int cifs_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server, |
421 | __u32 *pexpected_response_sequence_number); | ||
401 | extern int cifs_sign_smbv(struct kvec *iov, int n_vec, struct TCP_Server_Info *, | 422 | extern int cifs_sign_smbv(struct kvec *iov, int n_vec, struct TCP_Server_Info *, |
402 | __u32 *); | 423 | __u32 *); |
403 | extern int cifs_verify_signature(struct kvec *iov, unsigned int nr_iov, | 424 | extern int cifs_sign_smb(struct smb_hdr *, struct TCP_Server_Info *, __u32 *); |
425 | extern int cifs_verify_signature(struct smb_rqst *rqst, | ||
404 | struct TCP_Server_Info *server, | 426 | struct TCP_Server_Info *server, |
405 | __u32 expected_sequence_number); | 427 | __u32 expected_sequence_number); |
406 | extern int SMBNTencrypt(unsigned char *, unsigned char *, unsigned char *, | 428 | extern int SMBNTencrypt(unsigned char *, unsigned char *, unsigned char *, |
@@ -462,45 +484,9 @@ extern int E_md4hash(const unsigned char *passwd, unsigned char *p16, | |||
462 | extern int SMBencrypt(unsigned char *passwd, const unsigned char *c8, | 484 | extern int SMBencrypt(unsigned char *passwd, const unsigned char *c8, |
463 | unsigned char *p24); | 485 | unsigned char *p24); |
464 | 486 | ||
465 | /* asynchronous read support */ | ||
466 | struct cifs_readdata { | ||
467 | struct kref refcount; | ||
468 | struct list_head list; | ||
469 | struct completion done; | ||
470 | struct cifsFileInfo *cfile; | ||
471 | struct address_space *mapping; | ||
472 | __u64 offset; | ||
473 | unsigned int bytes; | ||
474 | pid_t pid; | ||
475 | int result; | ||
476 | struct list_head pages; | ||
477 | struct work_struct work; | ||
478 | int (*marshal_iov) (struct cifs_readdata *rdata, | ||
479 | unsigned int remaining); | ||
480 | unsigned int nr_iov; | ||
481 | struct kvec iov[1]; | ||
482 | }; | ||
483 | |||
484 | void cifs_readdata_release(struct kref *refcount); | 487 | void cifs_readdata_release(struct kref *refcount); |
485 | int cifs_async_readv(struct cifs_readdata *rdata); | 488 | int cifs_async_readv(struct cifs_readdata *rdata); |
486 | 489 | int cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid); | |
487 | /* asynchronous write support */ | ||
488 | struct cifs_writedata { | ||
489 | struct kref refcount; | ||
490 | struct list_head list; | ||
491 | struct completion done; | ||
492 | enum writeback_sync_modes sync_mode; | ||
493 | struct work_struct work; | ||
494 | struct cifsFileInfo *cfile; | ||
495 | __u64 offset; | ||
496 | pid_t pid; | ||
497 | unsigned int bytes; | ||
498 | int result; | ||
499 | void (*marshal_iov) (struct kvec *iov, | ||
500 | struct cifs_writedata *wdata); | ||
501 | unsigned int nr_pages; | ||
502 | struct page *pages[1]; | ||
503 | }; | ||
504 | 490 | ||
505 | int cifs_async_writev(struct cifs_writedata *wdata); | 491 | int cifs_async_writev(struct cifs_writedata *wdata); |
506 | void cifs_writev_complete(struct work_struct *work); | 492 | void cifs_writev_complete(struct work_struct *work); |
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 074923ce593d..76d0d2998850 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c | |||
@@ -86,32 +86,6 @@ static struct { | |||
86 | #endif /* CONFIG_CIFS_WEAK_PW_HASH */ | 86 | #endif /* CONFIG_CIFS_WEAK_PW_HASH */ |
87 | #endif /* CIFS_POSIX */ | 87 | #endif /* CIFS_POSIX */ |
88 | 88 | ||
89 | #ifdef CONFIG_HIGHMEM | ||
90 | /* | ||
91 | * On arches that have high memory, kmap address space is limited. By | ||
92 | * serializing the kmap operations on those arches, we ensure that we don't | ||
93 | * end up with a bunch of threads in writeback with partially mapped page | ||
94 | * arrays, stuck waiting for kmap to come back. That situation prevents | ||
95 | * progress and can deadlock. | ||
96 | */ | ||
97 | static DEFINE_MUTEX(cifs_kmap_mutex); | ||
98 | |||
99 | static inline void | ||
100 | cifs_kmap_lock(void) | ||
101 | { | ||
102 | mutex_lock(&cifs_kmap_mutex); | ||
103 | } | ||
104 | |||
105 | static inline void | ||
106 | cifs_kmap_unlock(void) | ||
107 | { | ||
108 | mutex_unlock(&cifs_kmap_mutex); | ||
109 | } | ||
110 | #else /* !CONFIG_HIGHMEM */ | ||
111 | #define cifs_kmap_lock() do { ; } while(0) | ||
112 | #define cifs_kmap_unlock() do { ; } while(0) | ||
113 | #endif /* CONFIG_HIGHMEM */ | ||
114 | |||
115 | /* | 89 | /* |
116 | * Mark as invalid, all open files on tree connections since they | 90 | * Mark as invalid, all open files on tree connections since they |
117 | * were closed when session to server was lost. | 91 | * were closed when session to server was lost. |
@@ -751,6 +725,8 @@ CIFSSMBEcho(struct TCP_Server_Info *server) | |||
751 | ECHO_REQ *smb; | 725 | ECHO_REQ *smb; |
752 | int rc = 0; | 726 | int rc = 0; |
753 | struct kvec iov; | 727 | struct kvec iov; |
728 | struct smb_rqst rqst = { .rq_iov = &iov, | ||
729 | .rq_nvec = 1 }; | ||
754 | 730 | ||
755 | cFYI(1, "In echo request"); | 731 | cFYI(1, "In echo request"); |
756 | 732 | ||
@@ -768,7 +744,7 @@ CIFSSMBEcho(struct TCP_Server_Info *server) | |||
768 | iov.iov_base = smb; | 744 | iov.iov_base = smb; |
769 | iov.iov_len = be32_to_cpu(smb->hdr.smb_buf_length) + 4; | 745 | iov.iov_len = be32_to_cpu(smb->hdr.smb_buf_length) + 4; |
770 | 746 | ||
771 | rc = cifs_call_async(server, &iov, 1, NULL, cifs_echo_callback, | 747 | rc = cifs_call_async(server, &rqst, NULL, cifs_echo_callback, |
772 | server, CIFS_ASYNC_OP | CIFS_ECHO_OP); | 748 | server, CIFS_ASYNC_OP | CIFS_ECHO_OP); |
773 | if (rc) | 749 | if (rc) |
774 | cFYI(1, "Echo request failed: %d", rc); | 750 | cFYI(1, "Echo request failed: %d", rc); |
@@ -902,15 +878,15 @@ PsxDelete: | |||
902 | } | 878 | } |
903 | 879 | ||
904 | int | 880 | int |
905 | CIFSSMBDelFile(const unsigned int xid, struct cifs_tcon *tcon, | 881 | CIFSSMBDelFile(const unsigned int xid, struct cifs_tcon *tcon, const char *name, |
906 | const char *fileName, const struct nls_table *nls_codepage, | 882 | struct cifs_sb_info *cifs_sb) |
907 | int remap) | ||
908 | { | 883 | { |
909 | DELETE_FILE_REQ *pSMB = NULL; | 884 | DELETE_FILE_REQ *pSMB = NULL; |
910 | DELETE_FILE_RSP *pSMBr = NULL; | 885 | DELETE_FILE_RSP *pSMBr = NULL; |
911 | int rc = 0; | 886 | int rc = 0; |
912 | int bytes_returned; | 887 | int bytes_returned; |
913 | int name_len; | 888 | int name_len; |
889 | int remap = cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR; | ||
914 | 890 | ||
915 | DelFileRetry: | 891 | DelFileRetry: |
916 | rc = smb_init(SMB_COM_DELETE, 1, tcon, (void **) &pSMB, | 892 | rc = smb_init(SMB_COM_DELETE, 1, tcon, (void **) &pSMB, |
@@ -919,15 +895,15 @@ DelFileRetry: | |||
919 | return rc; | 895 | return rc; |
920 | 896 | ||
921 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { | 897 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { |
922 | name_len = | 898 | name_len = cifsConvertToUTF16((__le16 *) pSMB->fileName, name, |
923 | cifsConvertToUTF16((__le16 *) pSMB->fileName, fileName, | 899 | PATH_MAX, cifs_sb->local_nls, |
924 | PATH_MAX, nls_codepage, remap); | 900 | remap); |
925 | name_len++; /* trailing null */ | 901 | name_len++; /* trailing null */ |
926 | name_len *= 2; | 902 | name_len *= 2; |
927 | } else { /* BB improve check for buffer overruns BB */ | 903 | } else { /* BB improve check for buffer overruns BB */ |
928 | name_len = strnlen(fileName, PATH_MAX); | 904 | name_len = strnlen(name, PATH_MAX); |
929 | name_len++; /* trailing null */ | 905 | name_len++; /* trailing null */ |
930 | strncpy(pSMB->fileName, fileName, name_len); | 906 | strncpy(pSMB->fileName, name, name_len); |
931 | } | 907 | } |
932 | pSMB->SearchAttributes = | 908 | pSMB->SearchAttributes = |
933 | cpu_to_le16(ATTR_READONLY | ATTR_HIDDEN | ATTR_SYSTEM); | 909 | cpu_to_le16(ATTR_READONLY | ATTR_HIDDEN | ATTR_SYSTEM); |
@@ -1440,7 +1416,7 @@ cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid) | |||
1440 | return 0; | 1416 | return 0; |
1441 | } | 1417 | } |
1442 | 1418 | ||
1443 | static int | 1419 | int |
1444 | cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) | 1420 | cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) |
1445 | { | 1421 | { |
1446 | int length, len; | 1422 | int length, len; |
@@ -1460,10 +1436,10 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) | |||
1460 | len = min_t(unsigned int, buflen, server->vals->read_rsp_size) - | 1436 | len = min_t(unsigned int, buflen, server->vals->read_rsp_size) - |
1461 | HEADER_SIZE(server) + 1; | 1437 | HEADER_SIZE(server) + 1; |
1462 | 1438 | ||
1463 | rdata->iov[0].iov_base = buf + HEADER_SIZE(server) - 1; | 1439 | rdata->iov.iov_base = buf + HEADER_SIZE(server) - 1; |
1464 | rdata->iov[0].iov_len = len; | 1440 | rdata->iov.iov_len = len; |
1465 | 1441 | ||
1466 | length = cifs_readv_from_socket(server, rdata->iov, 1, len); | 1442 | length = cifs_readv_from_socket(server, &rdata->iov, 1, len); |
1467 | if (length < 0) | 1443 | if (length < 0) |
1468 | return length; | 1444 | return length; |
1469 | server->total_read += length; | 1445 | server->total_read += length; |
@@ -1509,19 +1485,19 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) | |||
1509 | len = data_offset - server->total_read; | 1485 | len = data_offset - server->total_read; |
1510 | if (len > 0) { | 1486 | if (len > 0) { |
1511 | /* read any junk before data into the rest of smallbuf */ | 1487 | /* read any junk before data into the rest of smallbuf */ |
1512 | rdata->iov[0].iov_base = buf + server->total_read; | 1488 | rdata->iov.iov_base = buf + server->total_read; |
1513 | rdata->iov[0].iov_len = len; | 1489 | rdata->iov.iov_len = len; |
1514 | length = cifs_readv_from_socket(server, rdata->iov, 1, len); | 1490 | length = cifs_readv_from_socket(server, &rdata->iov, 1, len); |
1515 | if (length < 0) | 1491 | if (length < 0) |
1516 | return length; | 1492 | return length; |
1517 | server->total_read += length; | 1493 | server->total_read += length; |
1518 | } | 1494 | } |
1519 | 1495 | ||
1520 | /* set up first iov for signature check */ | 1496 | /* set up first iov for signature check */ |
1521 | rdata->iov[0].iov_base = buf; | 1497 | rdata->iov.iov_base = buf; |
1522 | rdata->iov[0].iov_len = server->total_read; | 1498 | rdata->iov.iov_len = server->total_read; |
1523 | cFYI(1, "0: iov_base=%p iov_len=%zu", | 1499 | cFYI(1, "0: iov_base=%p iov_len=%zu", |
1524 | rdata->iov[0].iov_base, rdata->iov[0].iov_len); | 1500 | rdata->iov.iov_base, rdata->iov.iov_len); |
1525 | 1501 | ||
1526 | /* how much data is in the response? */ | 1502 | /* how much data is in the response? */ |
1527 | data_len = server->ops->read_data_length(buf); | 1503 | data_len = server->ops->read_data_length(buf); |
@@ -1531,23 +1507,11 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) | |||
1531 | return cifs_readv_discard(server, mid); | 1507 | return cifs_readv_discard(server, mid); |
1532 | } | 1508 | } |
1533 | 1509 | ||
1534 | /* marshal up the page array */ | 1510 | length = rdata->read_into_pages(server, rdata, data_len); |
1535 | cifs_kmap_lock(); | 1511 | if (length < 0) |
1536 | len = rdata->marshal_iov(rdata, data_len); | 1512 | return length; |
1537 | cifs_kmap_unlock(); | ||
1538 | data_len -= len; | ||
1539 | |||
1540 | /* issue the read if we have any iovecs left to fill */ | ||
1541 | if (rdata->nr_iov > 1) { | ||
1542 | length = cifs_readv_from_socket(server, &rdata->iov[1], | ||
1543 | rdata->nr_iov - 1, len); | ||
1544 | if (length < 0) | ||
1545 | return length; | ||
1546 | server->total_read += length; | ||
1547 | } else { | ||
1548 | length = 0; | ||
1549 | } | ||
1550 | 1513 | ||
1514 | server->total_read += length; | ||
1551 | rdata->bytes = length; | 1515 | rdata->bytes = length; |
1552 | 1516 | ||
1553 | cFYI(1, "total_read=%u buflen=%u remaining=%u", server->total_read, | 1517 | cFYI(1, "total_read=%u buflen=%u remaining=%u", server->total_read, |
@@ -1567,6 +1531,12 @@ cifs_readv_callback(struct mid_q_entry *mid) | |||
1567 | struct cifs_readdata *rdata = mid->callback_data; | 1531 | struct cifs_readdata *rdata = mid->callback_data; |
1568 | struct cifs_tcon *tcon = tlink_tcon(rdata->cfile->tlink); | 1532 | struct cifs_tcon *tcon = tlink_tcon(rdata->cfile->tlink); |
1569 | struct TCP_Server_Info *server = tcon->ses->server; | 1533 | struct TCP_Server_Info *server = tcon->ses->server; |
1534 | struct smb_rqst rqst = { .rq_iov = &rdata->iov, | ||
1535 | .rq_nvec = 1, | ||
1536 | .rq_pages = rdata->pages, | ||
1537 | .rq_npages = rdata->nr_pages, | ||
1538 | .rq_pagesz = rdata->pagesz, | ||
1539 | .rq_tailsz = rdata->tailsz }; | ||
1570 | 1540 | ||
1571 | cFYI(1, "%s: mid=%llu state=%d result=%d bytes=%u", __func__, | 1541 | cFYI(1, "%s: mid=%llu state=%d result=%d bytes=%u", __func__, |
1572 | mid->mid, mid->mid_state, rdata->result, rdata->bytes); | 1542 | mid->mid, mid->mid_state, rdata->result, rdata->bytes); |
@@ -1576,9 +1546,13 @@ cifs_readv_callback(struct mid_q_entry *mid) | |||
1576 | /* result already set, check signature */ | 1546 | /* result already set, check signature */ |
1577 | if (server->sec_mode & | 1547 | if (server->sec_mode & |
1578 | (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) { | 1548 | (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) { |
1579 | if (cifs_verify_signature(rdata->iov, rdata->nr_iov, | 1549 | int rc = 0; |
1580 | server, mid->sequence_number + 1)) | 1550 | |
1581 | cERROR(1, "Unexpected SMB signature"); | 1551 | rc = cifs_verify_signature(&rqst, server, |
1552 | mid->sequence_number + 1); | ||
1553 | if (rc) | ||
1554 | cERROR(1, "SMB signature verification returned " | ||
1555 | "error = %d", rc); | ||
1582 | } | 1556 | } |
1583 | /* FIXME: should this be counted toward the initiating task? */ | 1557 | /* FIXME: should this be counted toward the initiating task? */ |
1584 | task_io_account_read(rdata->bytes); | 1558 | task_io_account_read(rdata->bytes); |
@@ -1605,6 +1579,8 @@ cifs_async_readv(struct cifs_readdata *rdata) | |||
1605 | READ_REQ *smb = NULL; | 1579 | READ_REQ *smb = NULL; |
1606 | int wct; | 1580 | int wct; |
1607 | struct cifs_tcon *tcon = tlink_tcon(rdata->cfile->tlink); | 1581 | struct cifs_tcon *tcon = tlink_tcon(rdata->cfile->tlink); |
1582 | struct smb_rqst rqst = { .rq_iov = &rdata->iov, | ||
1583 | .rq_nvec = 1 }; | ||
1608 | 1584 | ||
1609 | cFYI(1, "%s: offset=%llu bytes=%u", __func__, | 1585 | cFYI(1, "%s: offset=%llu bytes=%u", __func__, |
1610 | rdata->offset, rdata->bytes); | 1586 | rdata->offset, rdata->bytes); |
@@ -1627,7 +1603,7 @@ cifs_async_readv(struct cifs_readdata *rdata) | |||
1627 | smb->hdr.PidHigh = cpu_to_le16((__u16)(rdata->pid >> 16)); | 1603 | smb->hdr.PidHigh = cpu_to_le16((__u16)(rdata->pid >> 16)); |
1628 | 1604 | ||
1629 | smb->AndXCommand = 0xFF; /* none */ | 1605 | smb->AndXCommand = 0xFF; /* none */ |
1630 | smb->Fid = rdata->cfile->netfid; | 1606 | smb->Fid = rdata->cfile->fid.netfid; |
1631 | smb->OffsetLow = cpu_to_le32(rdata->offset & 0xFFFFFFFF); | 1607 | smb->OffsetLow = cpu_to_le32(rdata->offset & 0xFFFFFFFF); |
1632 | if (wct == 12) | 1608 | if (wct == 12) |
1633 | smb->OffsetHigh = cpu_to_le32(rdata->offset >> 32); | 1609 | smb->OffsetHigh = cpu_to_le32(rdata->offset >> 32); |
@@ -1644,13 +1620,12 @@ cifs_async_readv(struct cifs_readdata *rdata) | |||
1644 | } | 1620 | } |
1645 | 1621 | ||
1646 | /* 4 for RFC1001 length + 1 for BCC */ | 1622 | /* 4 for RFC1001 length + 1 for BCC */ |
1647 | rdata->iov[0].iov_base = smb; | 1623 | rdata->iov.iov_base = smb; |
1648 | rdata->iov[0].iov_len = be32_to_cpu(smb->hdr.smb_buf_length) + 4; | 1624 | rdata->iov.iov_len = be32_to_cpu(smb->hdr.smb_buf_length) + 4; |
1649 | 1625 | ||
1650 | kref_get(&rdata->refcount); | 1626 | kref_get(&rdata->refcount); |
1651 | rc = cifs_call_async(tcon->ses->server, rdata->iov, 1, | 1627 | rc = cifs_call_async(tcon->ses->server, &rqst, cifs_readv_receive, |
1652 | cifs_readv_receive, cifs_readv_callback, | 1628 | cifs_readv_callback, rdata, 0); |
1653 | rdata, 0); | ||
1654 | 1629 | ||
1655 | if (rc == 0) | 1630 | if (rc == 0) |
1656 | cifs_stats_inc(&tcon->stats.cifs_stats.num_reads); | 1631 | cifs_stats_inc(&tcon->stats.cifs_stats.num_reads); |
@@ -1921,6 +1896,7 @@ cifs_writev_requeue(struct cifs_writedata *wdata) | |||
1921 | { | 1896 | { |
1922 | int i, rc; | 1897 | int i, rc; |
1923 | struct inode *inode = wdata->cfile->dentry->d_inode; | 1898 | struct inode *inode = wdata->cfile->dentry->d_inode; |
1899 | struct TCP_Server_Info *server; | ||
1924 | 1900 | ||
1925 | for (i = 0; i < wdata->nr_pages; i++) { | 1901 | for (i = 0; i < wdata->nr_pages; i++) { |
1926 | lock_page(wdata->pages[i]); | 1902 | lock_page(wdata->pages[i]); |
@@ -1928,7 +1904,8 @@ cifs_writev_requeue(struct cifs_writedata *wdata) | |||
1928 | } | 1904 | } |
1929 | 1905 | ||
1930 | do { | 1906 | do { |
1931 | rc = cifs_async_writev(wdata); | 1907 | server = tlink_tcon(wdata->cfile->tlink)->ses->server; |
1908 | rc = server->ops->async_writev(wdata); | ||
1932 | } while (rc == -EAGAIN); | 1909 | } while (rc == -EAGAIN); |
1933 | 1910 | ||
1934 | for (i = 0; i < wdata->nr_pages; i++) { | 1911 | for (i = 0; i < wdata->nr_pages; i++) { |
@@ -2048,11 +2025,12 @@ cifs_writev_callback(struct mid_q_entry *mid) | |||
2048 | int | 2025 | int |
2049 | cifs_async_writev(struct cifs_writedata *wdata) | 2026 | cifs_async_writev(struct cifs_writedata *wdata) |
2050 | { | 2027 | { |
2051 | int i, rc = -EACCES; | 2028 | int rc = -EACCES; |
2052 | WRITE_REQ *smb = NULL; | 2029 | WRITE_REQ *smb = NULL; |
2053 | int wct; | 2030 | int wct; |
2054 | struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink); | 2031 | struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink); |
2055 | struct kvec *iov = NULL; | 2032 | struct kvec iov; |
2033 | struct smb_rqst rqst = { }; | ||
2056 | 2034 | ||
2057 | if (tcon->ses->capabilities & CAP_LARGE_FILES) { | 2035 | if (tcon->ses->capabilities & CAP_LARGE_FILES) { |
2058 | wct = 14; | 2036 | wct = 14; |
@@ -2068,18 +2046,11 @@ cifs_async_writev(struct cifs_writedata *wdata) | |||
2068 | if (rc) | 2046 | if (rc) |
2069 | goto async_writev_out; | 2047 | goto async_writev_out; |
2070 | 2048 | ||
2071 | /* 1 iov per page + 1 for header */ | ||
2072 | iov = kzalloc((wdata->nr_pages + 1) * sizeof(*iov), GFP_NOFS); | ||
2073 | if (iov == NULL) { | ||
2074 | rc = -ENOMEM; | ||
2075 | goto async_writev_out; | ||
2076 | } | ||
2077 | |||
2078 | smb->hdr.Pid = cpu_to_le16((__u16)wdata->pid); | 2049 | smb->hdr.Pid = cpu_to_le16((__u16)wdata->pid); |
2079 | smb->hdr.PidHigh = cpu_to_le16((__u16)(wdata->pid >> 16)); | 2050 | smb->hdr.PidHigh = cpu_to_le16((__u16)(wdata->pid >> 16)); |
2080 | 2051 | ||
2081 | smb->AndXCommand = 0xFF; /* none */ | 2052 | smb->AndXCommand = 0xFF; /* none */ |
2082 | smb->Fid = wdata->cfile->netfid; | 2053 | smb->Fid = wdata->cfile->fid.netfid; |
2083 | smb->OffsetLow = cpu_to_le32(wdata->offset & 0xFFFFFFFF); | 2054 | smb->OffsetLow = cpu_to_le32(wdata->offset & 0xFFFFFFFF); |
2084 | if (wct == 14) | 2055 | if (wct == 14) |
2085 | smb->OffsetHigh = cpu_to_le32(wdata->offset >> 32); | 2056 | smb->OffsetHigh = cpu_to_le32(wdata->offset >> 32); |
@@ -2091,18 +2062,15 @@ cifs_async_writev(struct cifs_writedata *wdata) | |||
2091 | cpu_to_le16(offsetof(struct smb_com_write_req, Data) - 4); | 2062 | cpu_to_le16(offsetof(struct smb_com_write_req, Data) - 4); |
2092 | 2063 | ||
2093 | /* 4 for RFC1001 length + 1 for BCC */ | 2064 | /* 4 for RFC1001 length + 1 for BCC */ |
2094 | iov[0].iov_len = be32_to_cpu(smb->hdr.smb_buf_length) + 4 + 1; | 2065 | iov.iov_len = be32_to_cpu(smb->hdr.smb_buf_length) + 4 + 1; |
2095 | iov[0].iov_base = smb; | 2066 | iov.iov_base = smb; |
2096 | 2067 | ||
2097 | /* | 2068 | rqst.rq_iov = &iov; |
2098 | * This function should marshal up the page array into the kvec | 2069 | rqst.rq_nvec = 1; |
2099 | * array, reserving [0] for the header. It should kmap the pages | 2070 | rqst.rq_pages = wdata->pages; |
2100 | * and set the iov_len properly for each one. It may also set | 2071 | rqst.rq_npages = wdata->nr_pages; |
2101 | * wdata->bytes too. | 2072 | rqst.rq_pagesz = wdata->pagesz; |
2102 | */ | 2073 | rqst.rq_tailsz = wdata->tailsz; |
2103 | cifs_kmap_lock(); | ||
2104 | wdata->marshal_iov(iov, wdata); | ||
2105 | cifs_kmap_unlock(); | ||
2106 | 2074 | ||
2107 | cFYI(1, "async write at %llu %u bytes", wdata->offset, wdata->bytes); | 2075 | cFYI(1, "async write at %llu %u bytes", wdata->offset, wdata->bytes); |
2108 | 2076 | ||
@@ -2118,32 +2086,26 @@ cifs_async_writev(struct cifs_writedata *wdata) | |||
2118 | (struct smb_com_writex_req *)smb; | 2086 | (struct smb_com_writex_req *)smb; |
2119 | inc_rfc1001_len(&smbw->hdr, wdata->bytes + 5); | 2087 | inc_rfc1001_len(&smbw->hdr, wdata->bytes + 5); |
2120 | put_bcc(wdata->bytes + 5, &smbw->hdr); | 2088 | put_bcc(wdata->bytes + 5, &smbw->hdr); |
2121 | iov[0].iov_len += 4; /* pad bigger by four bytes */ | 2089 | iov.iov_len += 4; /* pad bigger by four bytes */ |
2122 | } | 2090 | } |
2123 | 2091 | ||
2124 | kref_get(&wdata->refcount); | 2092 | kref_get(&wdata->refcount); |
2125 | rc = cifs_call_async(tcon->ses->server, iov, wdata->nr_pages + 1, | 2093 | rc = cifs_call_async(tcon->ses->server, &rqst, NULL, |
2126 | NULL, cifs_writev_callback, wdata, 0); | 2094 | cifs_writev_callback, wdata, 0); |
2127 | 2095 | ||
2128 | if (rc == 0) | 2096 | if (rc == 0) |
2129 | cifs_stats_inc(&tcon->stats.cifs_stats.num_writes); | 2097 | cifs_stats_inc(&tcon->stats.cifs_stats.num_writes); |
2130 | else | 2098 | else |
2131 | kref_put(&wdata->refcount, cifs_writedata_release); | 2099 | kref_put(&wdata->refcount, cifs_writedata_release); |
2132 | 2100 | ||
2133 | /* send is done, unmap pages */ | ||
2134 | for (i = 0; i < wdata->nr_pages; i++) | ||
2135 | kunmap(wdata->pages[i]); | ||
2136 | |||
2137 | async_writev_out: | 2101 | async_writev_out: |
2138 | cifs_small_buf_release(smb); | 2102 | cifs_small_buf_release(smb); |
2139 | kfree(iov); | ||
2140 | return rc; | 2103 | return rc; |
2141 | } | 2104 | } |
2142 | 2105 | ||
2143 | int | 2106 | int |
2144 | CIFSSMBWrite2(const unsigned int xid, struct cifs_io_parms *io_parms, | 2107 | CIFSSMBWrite2(const unsigned int xid, struct cifs_io_parms *io_parms, |
2145 | unsigned int *nbytes, struct kvec *iov, int n_vec, | 2108 | unsigned int *nbytes, struct kvec *iov, int n_vec) |
2146 | const int long_op) | ||
2147 | { | 2109 | { |
2148 | int rc = -EACCES; | 2110 | int rc = -EACCES; |
2149 | WRITE_REQ *pSMB = NULL; | 2111 | WRITE_REQ *pSMB = NULL; |
@@ -2214,8 +2176,7 @@ CIFSSMBWrite2(const unsigned int xid, struct cifs_io_parms *io_parms, | |||
2214 | iov[0].iov_len = smb_hdr_len + 8; | 2176 | iov[0].iov_len = smb_hdr_len + 8; |
2215 | 2177 | ||
2216 | 2178 | ||
2217 | rc = SendReceive2(xid, tcon->ses, iov, n_vec + 1, &resp_buf_type, | 2179 | rc = SendReceive2(xid, tcon->ses, iov, n_vec + 1, &resp_buf_type, 0); |
2218 | long_op); | ||
2219 | cifs_stats_inc(&tcon->stats.cifs_stats.num_writes); | 2180 | cifs_stats_inc(&tcon->stats.cifs_stats.num_writes); |
2220 | if (rc) { | 2181 | if (rc) { |
2221 | cFYI(1, "Send error Write2 = %d", rc); | 2182 | cFYI(1, "Send error Write2 = %d", rc); |
@@ -2552,8 +2513,8 @@ CIFSSMBFlush(const unsigned int xid, struct cifs_tcon *tcon, int smb_file_id) | |||
2552 | 2513 | ||
2553 | int | 2514 | int |
2554 | CIFSSMBRename(const unsigned int xid, struct cifs_tcon *tcon, | 2515 | CIFSSMBRename(const unsigned int xid, struct cifs_tcon *tcon, |
2555 | const char *fromName, const char *toName, | 2516 | const char *from_name, const char *to_name, |
2556 | const struct nls_table *nls_codepage, int remap) | 2517 | struct cifs_sb_info *cifs_sb) |
2557 | { | 2518 | { |
2558 | int rc = 0; | 2519 | int rc = 0; |
2559 | RENAME_REQ *pSMB = NULL; | 2520 | RENAME_REQ *pSMB = NULL; |
@@ -2561,6 +2522,7 @@ CIFSSMBRename(const unsigned int xid, struct cifs_tcon *tcon, | |||
2561 | int bytes_returned; | 2522 | int bytes_returned; |
2562 | int name_len, name_len2; | 2523 | int name_len, name_len2; |
2563 | __u16 count; | 2524 | __u16 count; |
2525 | int remap = cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR; | ||
2564 | 2526 | ||
2565 | cFYI(1, "In CIFSSMBRename"); | 2527 | cFYI(1, "In CIFSSMBRename"); |
2566 | renameRetry: | 2528 | renameRetry: |
@@ -2575,9 +2537,9 @@ renameRetry: | |||
2575 | ATTR_DIRECTORY); | 2537 | ATTR_DIRECTORY); |
2576 | 2538 | ||
2577 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { | 2539 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { |
2578 | name_len = | 2540 | name_len = cifsConvertToUTF16((__le16 *) pSMB->OldFileName, |
2579 | cifsConvertToUTF16((__le16 *) pSMB->OldFileName, fromName, | 2541 | from_name, PATH_MAX, |
2580 | PATH_MAX, nls_codepage, remap); | 2542 | cifs_sb->local_nls, remap); |
2581 | name_len++; /* trailing null */ | 2543 | name_len++; /* trailing null */ |
2582 | name_len *= 2; | 2544 | name_len *= 2; |
2583 | pSMB->OldFileName[name_len] = 0x04; /* pad */ | 2545 | pSMB->OldFileName[name_len] = 0x04; /* pad */ |
@@ -2585,17 +2547,18 @@ renameRetry: | |||
2585 | pSMB->OldFileName[name_len + 1] = 0x00; | 2547 | pSMB->OldFileName[name_len + 1] = 0x00; |
2586 | name_len2 = | 2548 | name_len2 = |
2587 | cifsConvertToUTF16((__le16 *)&pSMB->OldFileName[name_len+2], | 2549 | cifsConvertToUTF16((__le16 *)&pSMB->OldFileName[name_len+2], |
2588 | toName, PATH_MAX, nls_codepage, remap); | 2550 | to_name, PATH_MAX, cifs_sb->local_nls, |
2551 | remap); | ||
2589 | name_len2 += 1 /* trailing null */ + 1 /* Signature word */ ; | 2552 | name_len2 += 1 /* trailing null */ + 1 /* Signature word */ ; |
2590 | name_len2 *= 2; /* convert to bytes */ | 2553 | name_len2 *= 2; /* convert to bytes */ |
2591 | } else { /* BB improve the check for buffer overruns BB */ | 2554 | } else { /* BB improve the check for buffer overruns BB */ |
2592 | name_len = strnlen(fromName, PATH_MAX); | 2555 | name_len = strnlen(from_name, PATH_MAX); |
2593 | name_len++; /* trailing null */ | 2556 | name_len++; /* trailing null */ |
2594 | strncpy(pSMB->OldFileName, fromName, name_len); | 2557 | strncpy(pSMB->OldFileName, from_name, name_len); |
2595 | name_len2 = strnlen(toName, PATH_MAX); | 2558 | name_len2 = strnlen(to_name, PATH_MAX); |
2596 | name_len2++; /* trailing null */ | 2559 | name_len2++; /* trailing null */ |
2597 | pSMB->OldFileName[name_len] = 0x04; /* 2nd buffer format */ | 2560 | pSMB->OldFileName[name_len] = 0x04; /* 2nd buffer format */ |
2598 | strncpy(&pSMB->OldFileName[name_len + 1], toName, name_len2); | 2561 | strncpy(&pSMB->OldFileName[name_len + 1], to_name, name_len2); |
2599 | name_len2++; /* trailing null */ | 2562 | name_len2++; /* trailing null */ |
2600 | name_len2++; /* signature byte */ | 2563 | name_len2++; /* signature byte */ |
2601 | } | 2564 | } |
@@ -2943,8 +2906,8 @@ createHardLinkRetry: | |||
2943 | 2906 | ||
2944 | int | 2907 | int |
2945 | CIFSCreateHardLink(const unsigned int xid, struct cifs_tcon *tcon, | 2908 | CIFSCreateHardLink(const unsigned int xid, struct cifs_tcon *tcon, |
2946 | const char *fromName, const char *toName, | 2909 | const char *from_name, const char *to_name, |
2947 | const struct nls_table *nls_codepage, int remap) | 2910 | struct cifs_sb_info *cifs_sb) |
2948 | { | 2911 | { |
2949 | int rc = 0; | 2912 | int rc = 0; |
2950 | NT_RENAME_REQ *pSMB = NULL; | 2913 | NT_RENAME_REQ *pSMB = NULL; |
@@ -2952,6 +2915,7 @@ CIFSCreateHardLink(const unsigned int xid, struct cifs_tcon *tcon, | |||
2952 | int bytes_returned; | 2915 | int bytes_returned; |
2953 | int name_len, name_len2; | 2916 | int name_len, name_len2; |
2954 | __u16 count; | 2917 | __u16 count; |
2918 | int remap = cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR; | ||
2955 | 2919 | ||
2956 | cFYI(1, "In CIFSCreateHardLink"); | 2920 | cFYI(1, "In CIFSCreateHardLink"); |
2957 | winCreateHardLinkRetry: | 2921 | winCreateHardLinkRetry: |
@@ -2971,8 +2935,8 @@ winCreateHardLinkRetry: | |||
2971 | 2935 | ||
2972 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { | 2936 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { |
2973 | name_len = | 2937 | name_len = |
2974 | cifsConvertToUTF16((__le16 *) pSMB->OldFileName, fromName, | 2938 | cifsConvertToUTF16((__le16 *) pSMB->OldFileName, from_name, |
2975 | PATH_MAX, nls_codepage, remap); | 2939 | PATH_MAX, cifs_sb->local_nls, remap); |
2976 | name_len++; /* trailing null */ | 2940 | name_len++; /* trailing null */ |
2977 | name_len *= 2; | 2941 | name_len *= 2; |
2978 | 2942 | ||
@@ -2981,17 +2945,18 @@ winCreateHardLinkRetry: | |||
2981 | pSMB->OldFileName[name_len + 1] = 0x00; /* pad */ | 2945 | pSMB->OldFileName[name_len + 1] = 0x00; /* pad */ |
2982 | name_len2 = | 2946 | name_len2 = |
2983 | cifsConvertToUTF16((__le16 *)&pSMB->OldFileName[name_len+2], | 2947 | cifsConvertToUTF16((__le16 *)&pSMB->OldFileName[name_len+2], |
2984 | toName, PATH_MAX, nls_codepage, remap); | 2948 | to_name, PATH_MAX, cifs_sb->local_nls, |
2949 | remap); | ||
2985 | name_len2 += 1 /* trailing null */ + 1 /* Signature word */ ; | 2950 | name_len2 += 1 /* trailing null */ + 1 /* Signature word */ ; |
2986 | name_len2 *= 2; /* convert to bytes */ | 2951 | name_len2 *= 2; /* convert to bytes */ |
2987 | } else { /* BB improve the check for buffer overruns BB */ | 2952 | } else { /* BB improve the check for buffer overruns BB */ |
2988 | name_len = strnlen(fromName, PATH_MAX); | 2953 | name_len = strnlen(from_name, PATH_MAX); |
2989 | name_len++; /* trailing null */ | 2954 | name_len++; /* trailing null */ |
2990 | strncpy(pSMB->OldFileName, fromName, name_len); | 2955 | strncpy(pSMB->OldFileName, from_name, name_len); |
2991 | name_len2 = strnlen(toName, PATH_MAX); | 2956 | name_len2 = strnlen(to_name, PATH_MAX); |
2992 | name_len2++; /* trailing null */ | 2957 | name_len2++; /* trailing null */ |
2993 | pSMB->OldFileName[name_len] = 0x04; /* 2nd buffer format */ | 2958 | pSMB->OldFileName[name_len] = 0x04; /* 2nd buffer format */ |
2994 | strncpy(&pSMB->OldFileName[name_len + 1], toName, name_len2); | 2959 | strncpy(&pSMB->OldFileName[name_len + 1], to_name, name_len2); |
2995 | name_len2++; /* trailing null */ | 2960 | name_len2++; /* trailing null */ |
2996 | name_len2++; /* signature byte */ | 2961 | name_len2++; /* signature byte */ |
2997 | } | 2962 | } |
@@ -4249,10 +4214,9 @@ UnixQPathInfoRetry: | |||
4249 | /* xid, tcon, searchName and codepage are input parms, rest are returned */ | 4214 | /* xid, tcon, searchName and codepage are input parms, rest are returned */ |
4250 | int | 4215 | int |
4251 | CIFSFindFirst(const unsigned int xid, struct cifs_tcon *tcon, | 4216 | CIFSFindFirst(const unsigned int xid, struct cifs_tcon *tcon, |
4252 | const char *searchName, | 4217 | const char *searchName, struct cifs_sb_info *cifs_sb, |
4253 | const struct nls_table *nls_codepage, | ||
4254 | __u16 *pnetfid, __u16 search_flags, | 4218 | __u16 *pnetfid, __u16 search_flags, |
4255 | struct cifs_search_info *psrch_inf, int remap, const char dirsep) | 4219 | struct cifs_search_info *psrch_inf, bool msearch) |
4256 | { | 4220 | { |
4257 | /* level 257 SMB_ */ | 4221 | /* level 257 SMB_ */ |
4258 | TRANSACTION2_FFIRST_REQ *pSMB = NULL; | 4222 | TRANSACTION2_FFIRST_REQ *pSMB = NULL; |
@@ -4260,8 +4224,9 @@ CIFSFindFirst(const unsigned int xid, struct cifs_tcon *tcon, | |||
4260 | T2_FFIRST_RSP_PARMS *parms; | 4224 | T2_FFIRST_RSP_PARMS *parms; |
4261 | int rc = 0; | 4225 | int rc = 0; |
4262 | int bytes_returned = 0; | 4226 | int bytes_returned = 0; |
4263 | int name_len; | 4227 | int name_len, remap; |
4264 | __u16 params, byte_count; | 4228 | __u16 params, byte_count; |
4229 | struct nls_table *nls_codepage; | ||
4265 | 4230 | ||
4266 | cFYI(1, "In FindFirst for %s", searchName); | 4231 | cFYI(1, "In FindFirst for %s", searchName); |
4267 | 4232 | ||
@@ -4271,6 +4236,9 @@ findFirstRetry: | |||
4271 | if (rc) | 4236 | if (rc) |
4272 | return rc; | 4237 | return rc; |
4273 | 4238 | ||
4239 | nls_codepage = cifs_sb->local_nls; | ||
4240 | remap = cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR; | ||
4241 | |||
4274 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { | 4242 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { |
4275 | name_len = | 4243 | name_len = |
4276 | cifsConvertToUTF16((__le16 *) pSMB->FileName, searchName, | 4244 | cifsConvertToUTF16((__le16 *) pSMB->FileName, searchName, |
@@ -4279,24 +4247,29 @@ findFirstRetry: | |||
4279 | it got remapped to 0xF03A as if it were part of the | 4247 | it got remapped to 0xF03A as if it were part of the |
4280 | directory name instead of a wildcard */ | 4248 | directory name instead of a wildcard */ |
4281 | name_len *= 2; | 4249 | name_len *= 2; |
4282 | pSMB->FileName[name_len] = dirsep; | 4250 | if (msearch) { |
4283 | pSMB->FileName[name_len+1] = 0; | 4251 | pSMB->FileName[name_len] = CIFS_DIR_SEP(cifs_sb); |
4284 | pSMB->FileName[name_len+2] = '*'; | 4252 | pSMB->FileName[name_len+1] = 0; |
4285 | pSMB->FileName[name_len+3] = 0; | 4253 | pSMB->FileName[name_len+2] = '*'; |
4286 | name_len += 4; /* now the trailing null */ | 4254 | pSMB->FileName[name_len+3] = 0; |
4287 | pSMB->FileName[name_len] = 0; /* null terminate just in case */ | 4255 | name_len += 4; /* now the trailing null */ |
4288 | pSMB->FileName[name_len+1] = 0; | 4256 | /* null terminate just in case */ |
4289 | name_len += 2; | 4257 | pSMB->FileName[name_len] = 0; |
4258 | pSMB->FileName[name_len+1] = 0; | ||
4259 | name_len += 2; | ||
4260 | } | ||
4290 | } else { /* BB add check for overrun of SMB buf BB */ | 4261 | } else { /* BB add check for overrun of SMB buf BB */ |
4291 | name_len = strnlen(searchName, PATH_MAX); | 4262 | name_len = strnlen(searchName, PATH_MAX); |
4292 | /* BB fix here and in unicode clause above ie | 4263 | /* BB fix here and in unicode clause above ie |
4293 | if (name_len > buffersize-header) | 4264 | if (name_len > buffersize-header) |
4294 | free buffer exit; BB */ | 4265 | free buffer exit; BB */ |
4295 | strncpy(pSMB->FileName, searchName, name_len); | 4266 | strncpy(pSMB->FileName, searchName, name_len); |
4296 | pSMB->FileName[name_len] = dirsep; | 4267 | if (msearch) { |
4297 | pSMB->FileName[name_len+1] = '*'; | 4268 | pSMB->FileName[name_len] = CIFS_DIR_SEP(cifs_sb); |
4298 | pSMB->FileName[name_len+2] = 0; | 4269 | pSMB->FileName[name_len+1] = '*'; |
4299 | name_len += 3; | 4270 | pSMB->FileName[name_len+2] = 0; |
4271 | name_len += 3; | ||
4272 | } | ||
4300 | } | 4273 | } |
4301 | 4274 | ||
4302 | params = 12 + name_len /* includes null */ ; | 4275 | params = 12 + name_len /* includes null */ ; |
@@ -4384,7 +4357,8 @@ findFirstRetry: | |||
4384 | psrch_inf->last_entry = psrch_inf->srch_entries_start + | 4357 | psrch_inf->last_entry = psrch_inf->srch_entries_start + |
4385 | lnoff; | 4358 | lnoff; |
4386 | 4359 | ||
4387 | *pnetfid = parms->SearchHandle; | 4360 | if (pnetfid) |
4361 | *pnetfid = parms->SearchHandle; | ||
4388 | } else { | 4362 | } else { |
4389 | cifs_buf_release(pSMB); | 4363 | cifs_buf_release(pSMB); |
4390 | } | 4364 | } |
@@ -5412,16 +5386,16 @@ QFSPosixRetry: | |||
5412 | } | 5386 | } |
5413 | 5387 | ||
5414 | 5388 | ||
5415 | /* We can not use write of zero bytes trick to | 5389 | /* |
5416 | set file size due to need for large file support. Also note that | 5390 | * We can not use write of zero bytes trick to set file size due to need for |
5417 | this SetPathInfo is preferred to SetFileInfo based method in next | 5391 | * large file support. Also note that this SetPathInfo is preferred to |
5418 | routine which is only needed to work around a sharing violation bug | 5392 | * SetFileInfo based method in next routine which is only needed to work around |
5419 | in Samba which this routine can run into */ | 5393 | * a sharing violation bugin Samba which this routine can run into. |
5420 | 5394 | */ | |
5421 | int | 5395 | int |
5422 | CIFSSMBSetEOF(const unsigned int xid, struct cifs_tcon *tcon, | 5396 | CIFSSMBSetEOF(const unsigned int xid, struct cifs_tcon *tcon, |
5423 | const char *fileName, __u64 size, bool SetAllocation, | 5397 | const char *file_name, __u64 size, struct cifs_sb_info *cifs_sb, |
5424 | const struct nls_table *nls_codepage, int remap) | 5398 | bool set_allocation) |
5425 | { | 5399 | { |
5426 | struct smb_com_transaction2_spi_req *pSMB = NULL; | 5400 | struct smb_com_transaction2_spi_req *pSMB = NULL; |
5427 | struct smb_com_transaction2_spi_rsp *pSMBr = NULL; | 5401 | struct smb_com_transaction2_spi_rsp *pSMBr = NULL; |
@@ -5429,6 +5403,8 @@ CIFSSMBSetEOF(const unsigned int xid, struct cifs_tcon *tcon, | |||
5429 | int name_len; | 5403 | int name_len; |
5430 | int rc = 0; | 5404 | int rc = 0; |
5431 | int bytes_returned = 0; | 5405 | int bytes_returned = 0; |
5406 | int remap = cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR; | ||
5407 | |||
5432 | __u16 params, byte_count, data_count, param_offset, offset; | 5408 | __u16 params, byte_count, data_count, param_offset, offset; |
5433 | 5409 | ||
5434 | cFYI(1, "In SetEOF"); | 5410 | cFYI(1, "In SetEOF"); |
@@ -5440,14 +5416,14 @@ SetEOFRetry: | |||
5440 | 5416 | ||
5441 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { | 5417 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { |
5442 | name_len = | 5418 | name_len = |
5443 | cifsConvertToUTF16((__le16 *) pSMB->FileName, fileName, | 5419 | cifsConvertToUTF16((__le16 *) pSMB->FileName, file_name, |
5444 | PATH_MAX, nls_codepage, remap); | 5420 | PATH_MAX, cifs_sb->local_nls, remap); |
5445 | name_len++; /* trailing null */ | 5421 | name_len++; /* trailing null */ |
5446 | name_len *= 2; | 5422 | name_len *= 2; |
5447 | } else { /* BB improve the check for buffer overruns BB */ | 5423 | } else { /* BB improve the check for buffer overruns BB */ |
5448 | name_len = strnlen(fileName, PATH_MAX); | 5424 | name_len = strnlen(file_name, PATH_MAX); |
5449 | name_len++; /* trailing null */ | 5425 | name_len++; /* trailing null */ |
5450 | strncpy(pSMB->FileName, fileName, name_len); | 5426 | strncpy(pSMB->FileName, file_name, name_len); |
5451 | } | 5427 | } |
5452 | params = 6 + name_len; | 5428 | params = 6 + name_len; |
5453 | data_count = sizeof(struct file_end_of_file_info); | 5429 | data_count = sizeof(struct file_end_of_file_info); |
@@ -5461,7 +5437,7 @@ SetEOFRetry: | |||
5461 | param_offset = offsetof(struct smb_com_transaction2_spi_req, | 5437 | param_offset = offsetof(struct smb_com_transaction2_spi_req, |
5462 | InformationLevel) - 4; | 5438 | InformationLevel) - 4; |
5463 | offset = param_offset + params; | 5439 | offset = param_offset + params; |
5464 | if (SetAllocation) { | 5440 | if (set_allocation) { |
5465 | if (tcon->ses->capabilities & CAP_INFOLEVEL_PASSTHRU) | 5441 | if (tcon->ses->capabilities & CAP_INFOLEVEL_PASSTHRU) |
5466 | pSMB->InformationLevel = | 5442 | pSMB->InformationLevel = |
5467 | cpu_to_le16(SMB_SET_FILE_ALLOCATION_INFO2); | 5443 | cpu_to_le16(SMB_SET_FILE_ALLOCATION_INFO2); |
@@ -5508,8 +5484,8 @@ SetEOFRetry: | |||
5508 | } | 5484 | } |
5509 | 5485 | ||
5510 | int | 5486 | int |
5511 | CIFSSMBSetFileSize(const unsigned int xid, struct cifs_tcon *tcon, __u64 size, | 5487 | CIFSSMBSetFileSize(const unsigned int xid, struct cifs_tcon *tcon, |
5512 | __u16 fid, __u32 pid_of_opener, bool SetAllocation) | 5488 | struct cifsFileInfo *cfile, __u64 size, bool set_allocation) |
5513 | { | 5489 | { |
5514 | struct smb_com_transaction2_sfi_req *pSMB = NULL; | 5490 | struct smb_com_transaction2_sfi_req *pSMB = NULL; |
5515 | struct file_end_of_file_info *parm_data; | 5491 | struct file_end_of_file_info *parm_data; |
@@ -5523,8 +5499,8 @@ CIFSSMBSetFileSize(const unsigned int xid, struct cifs_tcon *tcon, __u64 size, | |||
5523 | if (rc) | 5499 | if (rc) |
5524 | return rc; | 5500 | return rc; |
5525 | 5501 | ||
5526 | pSMB->hdr.Pid = cpu_to_le16((__u16)pid_of_opener); | 5502 | pSMB->hdr.Pid = cpu_to_le16((__u16)cfile->pid); |
5527 | pSMB->hdr.PidHigh = cpu_to_le16((__u16)(pid_of_opener >> 16)); | 5503 | pSMB->hdr.PidHigh = cpu_to_le16((__u16)(cfile->pid >> 16)); |
5528 | 5504 | ||
5529 | params = 6; | 5505 | params = 6; |
5530 | pSMB->MaxSetupCount = 0; | 5506 | pSMB->MaxSetupCount = 0; |
@@ -5553,8 +5529,8 @@ CIFSSMBSetFileSize(const unsigned int xid, struct cifs_tcon *tcon, __u64 size, | |||
5553 | + offset); | 5529 | + offset); |
5554 | pSMB->DataOffset = cpu_to_le16(offset); | 5530 | pSMB->DataOffset = cpu_to_le16(offset); |
5555 | parm_data->FileSize = cpu_to_le64(size); | 5531 | parm_data->FileSize = cpu_to_le64(size); |
5556 | pSMB->Fid = fid; | 5532 | pSMB->Fid = cfile->fid.netfid; |
5557 | if (SetAllocation) { | 5533 | if (set_allocation) { |
5558 | if (tcon->ses->capabilities & CAP_INFOLEVEL_PASSTHRU) | 5534 | if (tcon->ses->capabilities & CAP_INFOLEVEL_PASSTHRU) |
5559 | pSMB->InformationLevel = | 5535 | pSMB->InformationLevel = |
5560 | cpu_to_le16(SMB_SET_FILE_ALLOCATION_INFO2); | 5536 | cpu_to_le16(SMB_SET_FILE_ALLOCATION_INFO2); |
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 6df6fa14cba8..2fdbe08a7a23 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c | |||
@@ -82,8 +82,7 @@ enum { | |||
82 | Opt_serverino, Opt_noserverino, | 82 | Opt_serverino, Opt_noserverino, |
83 | Opt_rwpidforward, Opt_cifsacl, Opt_nocifsacl, | 83 | Opt_rwpidforward, Opt_cifsacl, Opt_nocifsacl, |
84 | Opt_acl, Opt_noacl, Opt_locallease, | 84 | Opt_acl, Opt_noacl, Opt_locallease, |
85 | Opt_sign, Opt_seal, Opt_direct, | 85 | Opt_sign, Opt_seal, Opt_noac, |
86 | Opt_strictcache, Opt_noac, | ||
87 | Opt_fsc, Opt_mfsymlinks, | 86 | Opt_fsc, Opt_mfsymlinks, |
88 | Opt_multiuser, Opt_sloppy, | 87 | Opt_multiuser, Opt_sloppy, |
89 | 88 | ||
@@ -160,10 +159,6 @@ static const match_table_t cifs_mount_option_tokens = { | |||
160 | { Opt_locallease, "locallease" }, | 159 | { Opt_locallease, "locallease" }, |
161 | { Opt_sign, "sign" }, | 160 | { Opt_sign, "sign" }, |
162 | { Opt_seal, "seal" }, | 161 | { Opt_seal, "seal" }, |
163 | { Opt_direct, "direct" }, | ||
164 | { Opt_direct, "directio" }, | ||
165 | { Opt_direct, "forcedirectio" }, | ||
166 | { Opt_strictcache, "strictcache" }, | ||
167 | { Opt_noac, "noac" }, | 162 | { Opt_noac, "noac" }, |
168 | { Opt_fsc, "fsc" }, | 163 | { Opt_fsc, "fsc" }, |
169 | { Opt_mfsymlinks, "mfsymlinks" }, | 164 | { Opt_mfsymlinks, "mfsymlinks" }, |
@@ -277,6 +272,7 @@ static const match_table_t cifs_cacheflavor_tokens = { | |||
277 | static const match_table_t cifs_smb_version_tokens = { | 272 | static const match_table_t cifs_smb_version_tokens = { |
278 | { Smb_1, SMB1_VERSION_STRING }, | 273 | { Smb_1, SMB1_VERSION_STRING }, |
279 | { Smb_21, SMB21_VERSION_STRING }, | 274 | { Smb_21, SMB21_VERSION_STRING }, |
275 | { Smb_30, SMB30_VERSION_STRING }, | ||
280 | }; | 276 | }; |
281 | 277 | ||
282 | static int ip_connect(struct TCP_Server_Info *server); | 278 | static int ip_connect(struct TCP_Server_Info *server); |
@@ -819,6 +815,10 @@ standard_receive3(struct TCP_Server_Info *server, struct mid_q_entry *mid) | |||
819 | cifs_dump_mem("Bad SMB: ", buf, | 815 | cifs_dump_mem("Bad SMB: ", buf, |
820 | min_t(unsigned int, server->total_read, 48)); | 816 | min_t(unsigned int, server->total_read, 48)); |
821 | 817 | ||
818 | if (server->ops->is_status_pending && | ||
819 | server->ops->is_status_pending(buf, server, length)) | ||
820 | return -1; | ||
821 | |||
822 | if (!mid) | 822 | if (!mid) |
823 | return length; | 823 | return length; |
824 | 824 | ||
@@ -1075,6 +1075,10 @@ cifs_parse_smb_version(char *value, struct smb_vol *vol) | |||
1075 | vol->ops = &smb21_operations; | 1075 | vol->ops = &smb21_operations; |
1076 | vol->vals = &smb21_values; | 1076 | vol->vals = &smb21_values; |
1077 | break; | 1077 | break; |
1078 | case Smb_30: | ||
1079 | vol->ops = &smb21_operations; /* currently identical with 2.1 */ | ||
1080 | vol->vals = &smb30_values; | ||
1081 | break; | ||
1078 | #endif | 1082 | #endif |
1079 | default: | 1083 | default: |
1080 | cERROR(1, "Unknown vers= option specified: %s", value); | 1084 | cERROR(1, "Unknown vers= option specified: %s", value); |
@@ -1101,8 +1105,6 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, | |||
1101 | char *string = NULL; | 1105 | char *string = NULL; |
1102 | char *tmp_end, *value; | 1106 | char *tmp_end, *value; |
1103 | char delim; | 1107 | char delim; |
1104 | bool cache_specified = false; | ||
1105 | static bool cache_warned = false; | ||
1106 | 1108 | ||
1107 | separator[0] = ','; | 1109 | separator[0] = ','; |
1108 | separator[1] = 0; | 1110 | separator[1] = 0; |
@@ -1134,6 +1136,9 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, | |||
1134 | /* default to using server inode numbers where available */ | 1136 | /* default to using server inode numbers where available */ |
1135 | vol->server_ino = 1; | 1137 | vol->server_ino = 1; |
1136 | 1138 | ||
1139 | /* default is to use strict cifs caching semantics */ | ||
1140 | vol->strict_io = true; | ||
1141 | |||
1137 | vol->actimeo = CIFS_DEF_ACTIMEO; | 1142 | vol->actimeo = CIFS_DEF_ACTIMEO; |
1138 | 1143 | ||
1139 | /* FIXME: add autonegotiation -- for now, SMB1 is default */ | 1144 | /* FIXME: add autonegotiation -- for now, SMB1 is default */ |
@@ -1317,22 +1322,6 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, | |||
1317 | */ | 1322 | */ |
1318 | vol->seal = 1; | 1323 | vol->seal = 1; |
1319 | break; | 1324 | break; |
1320 | case Opt_direct: | ||
1321 | cache_specified = true; | ||
1322 | vol->direct_io = true; | ||
1323 | vol->strict_io = false; | ||
1324 | cERROR(1, "The \"directio\" option will be removed in " | ||
1325 | "3.7. Please switch to the \"cache=none\" " | ||
1326 | "option."); | ||
1327 | break; | ||
1328 | case Opt_strictcache: | ||
1329 | cache_specified = true; | ||
1330 | vol->direct_io = false; | ||
1331 | vol->strict_io = true; | ||
1332 | cERROR(1, "The \"strictcache\" option will be removed " | ||
1333 | "in 3.7. Please switch to the \"cache=strict\" " | ||
1334 | "option."); | ||
1335 | break; | ||
1336 | case Opt_noac: | 1325 | case Opt_noac: |
1337 | printk(KERN_WARNING "CIFS: Mount option noac not " | 1326 | printk(KERN_WARNING "CIFS: Mount option noac not " |
1338 | "supported. Instead set " | 1327 | "supported. Instead set " |
@@ -1676,8 +1665,13 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, | |||
1676 | if (string == NULL) | 1665 | if (string == NULL) |
1677 | goto out_nomem; | 1666 | goto out_nomem; |
1678 | 1667 | ||
1679 | if (strnicmp(string, "TCP_NODELAY", 11) == 0) | 1668 | if (strnicmp(string, "TCP_NODELAY", 11) == 0) { |
1669 | printk(KERN_WARNING "CIFS: the " | ||
1670 | "sockopt=TCP_NODELAY option has been " | ||
1671 | "deprecated and will be removed " | ||
1672 | "in 3.9\n"); | ||
1680 | vol->sockopt_tcp_nodelay = 1; | 1673 | vol->sockopt_tcp_nodelay = 1; |
1674 | } | ||
1681 | break; | 1675 | break; |
1682 | case Opt_netbiosname: | 1676 | case Opt_netbiosname: |
1683 | string = match_strdup(args); | 1677 | string = match_strdup(args); |
@@ -1762,7 +1756,6 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, | |||
1762 | goto cifs_parse_mount_err; | 1756 | goto cifs_parse_mount_err; |
1763 | break; | 1757 | break; |
1764 | case Opt_cache: | 1758 | case Opt_cache: |
1765 | cache_specified = true; | ||
1766 | string = match_strdup(args); | 1759 | string = match_strdup(args); |
1767 | if (string == NULL) | 1760 | if (string == NULL) |
1768 | goto out_nomem; | 1761 | goto out_nomem; |
@@ -1813,14 +1806,6 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, | |||
1813 | printk(KERN_NOTICE "CIFS: ignoring forcegid mount option " | 1806 | printk(KERN_NOTICE "CIFS: ignoring forcegid mount option " |
1814 | "specified with no gid= option.\n"); | 1807 | "specified with no gid= option.\n"); |
1815 | 1808 | ||
1816 | /* FIXME: remove this block in 3.7 */ | ||
1817 | if (!cache_specified && !cache_warned) { | ||
1818 | cache_warned = true; | ||
1819 | printk(KERN_NOTICE "CIFS: no cache= option specified, using " | ||
1820 | "\"cache=loose\". This default will change " | ||
1821 | "to \"cache=strict\" in 3.7.\n"); | ||
1822 | } | ||
1823 | |||
1824 | kfree(mountdata_copy); | 1809 | kfree(mountdata_copy); |
1825 | return 0; | 1810 | return 0; |
1826 | 1811 | ||
@@ -2636,6 +2621,7 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info) | |||
2636 | tcon->retry = volume_info->retry; | 2621 | tcon->retry = volume_info->retry; |
2637 | tcon->nocase = volume_info->nocase; | 2622 | tcon->nocase = volume_info->nocase; |
2638 | tcon->local_lease = volume_info->local_lease; | 2623 | tcon->local_lease = volume_info->local_lease; |
2624 | INIT_LIST_HEAD(&tcon->pending_opens); | ||
2639 | 2625 | ||
2640 | spin_lock(&cifs_tcp_ses_lock); | 2626 | spin_lock(&cifs_tcp_ses_lock); |
2641 | list_add(&tcon->tcon_list, &ses->tcon_list); | 2627 | list_add(&tcon->tcon_list, &ses->tcon_list); |
@@ -3261,146 +3247,6 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info, | |||
3261 | "mount option supported"); | 3247 | "mount option supported"); |
3262 | } | 3248 | } |
3263 | 3249 | ||
3264 | /* | ||
3265 | * When the server supports very large reads and writes via POSIX extensions, | ||
3266 | * we can allow up to 2^24-1, minus the size of a READ/WRITE_AND_X header, not | ||
3267 | * including the RFC1001 length. | ||
3268 | * | ||
3269 | * Note that this might make for "interesting" allocation problems during | ||
3270 | * writeback however as we have to allocate an array of pointers for the | ||
3271 | * pages. A 16M write means ~32kb page array with PAGE_CACHE_SIZE == 4096. | ||
3272 | * | ||
3273 | * For reads, there is a similar problem as we need to allocate an array | ||
3274 | * of kvecs to handle the receive, though that should only need to be done | ||
3275 | * once. | ||
3276 | */ | ||
3277 | #define CIFS_MAX_WSIZE ((1<<24) - 1 - sizeof(WRITE_REQ) + 4) | ||
3278 | #define CIFS_MAX_RSIZE ((1<<24) - sizeof(READ_RSP) + 4) | ||
3279 | |||
3280 | /* | ||
3281 | * When the server doesn't allow large posix writes, only allow a rsize/wsize | ||
3282 | * of 2^17-1 minus the size of the call header. That allows for a read or | ||
3283 | * write up to the maximum size described by RFC1002. | ||
3284 | */ | ||
3285 | #define CIFS_MAX_RFC1002_WSIZE ((1<<17) - 1 - sizeof(WRITE_REQ) + 4) | ||
3286 | #define CIFS_MAX_RFC1002_RSIZE ((1<<17) - 1 - sizeof(READ_RSP) + 4) | ||
3287 | |||
3288 | /* | ||
3289 | * The default wsize is 1M. find_get_pages seems to return a maximum of 256 | ||
3290 | * pages in a single call. With PAGE_CACHE_SIZE == 4k, this means we can fill | ||
3291 | * a single wsize request with a single call. | ||
3292 | */ | ||
3293 | #define CIFS_DEFAULT_IOSIZE (1024 * 1024) | ||
3294 | |||
3295 | /* | ||
3296 | * Windows only supports a max of 60kb reads and 65535 byte writes. Default to | ||
3297 | * those values when posix extensions aren't in force. In actuality here, we | ||
3298 | * use 65536 to allow for a write that is a multiple of 4k. Most servers seem | ||
3299 | * to be ok with the extra byte even though Windows doesn't send writes that | ||
3300 | * are that large. | ||
3301 | * | ||
3302 | * Citation: | ||
3303 | * | ||
3304 | * http://blogs.msdn.com/b/openspecification/archive/2009/04/10/smb-maximum-transmit-buffer-size-and-performance-tuning.aspx | ||
3305 | */ | ||
3306 | #define CIFS_DEFAULT_NON_POSIX_RSIZE (60 * 1024) | ||
3307 | #define CIFS_DEFAULT_NON_POSIX_WSIZE (65536) | ||
3308 | |||
3309 | /* | ||
3310 | * On hosts with high memory, we can't currently support wsize/rsize that are | ||
3311 | * larger than we can kmap at once. Cap the rsize/wsize at | ||
3312 | * LAST_PKMAP * PAGE_SIZE. We'll never be able to fill a read or write request | ||
3313 | * larger than that anyway. | ||
3314 | */ | ||
3315 | #ifdef CONFIG_HIGHMEM | ||
3316 | #define CIFS_KMAP_SIZE_LIMIT (LAST_PKMAP * PAGE_CACHE_SIZE) | ||
3317 | #else /* CONFIG_HIGHMEM */ | ||
3318 | #define CIFS_KMAP_SIZE_LIMIT (1<<24) | ||
3319 | #endif /* CONFIG_HIGHMEM */ | ||
3320 | |||
3321 | static unsigned int | ||
3322 | cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info) | ||
3323 | { | ||
3324 | __u64 unix_cap = le64_to_cpu(tcon->fsUnixInfo.Capability); | ||
3325 | struct TCP_Server_Info *server = tcon->ses->server; | ||
3326 | unsigned int wsize; | ||
3327 | |||
3328 | /* start with specified wsize, or default */ | ||
3329 | if (pvolume_info->wsize) | ||
3330 | wsize = pvolume_info->wsize; | ||
3331 | else if (tcon->unix_ext && (unix_cap & CIFS_UNIX_LARGE_WRITE_CAP)) | ||
3332 | wsize = CIFS_DEFAULT_IOSIZE; | ||
3333 | else | ||
3334 | wsize = CIFS_DEFAULT_NON_POSIX_WSIZE; | ||
3335 | |||
3336 | /* can server support 24-bit write sizes? (via UNIX extensions) */ | ||
3337 | if (!tcon->unix_ext || !(unix_cap & CIFS_UNIX_LARGE_WRITE_CAP)) | ||
3338 | wsize = min_t(unsigned int, wsize, CIFS_MAX_RFC1002_WSIZE); | ||
3339 | |||
3340 | /* | ||
3341 | * no CAP_LARGE_WRITE_X or is signing enabled without CAP_UNIX set? | ||
3342 | * Limit it to max buffer offered by the server, minus the size of the | ||
3343 | * WRITEX header, not including the 4 byte RFC1001 length. | ||
3344 | */ | ||
3345 | if (!(server->capabilities & CAP_LARGE_WRITE_X) || | ||
3346 | (!(server->capabilities & CAP_UNIX) && | ||
3347 | (server->sec_mode & (SECMODE_SIGN_ENABLED|SECMODE_SIGN_REQUIRED)))) | ||
3348 | wsize = min_t(unsigned int, wsize, | ||
3349 | server->maxBuf - sizeof(WRITE_REQ) + 4); | ||
3350 | |||
3351 | /* limit to the amount that we can kmap at once */ | ||
3352 | wsize = min_t(unsigned int, wsize, CIFS_KMAP_SIZE_LIMIT); | ||
3353 | |||
3354 | /* hard limit of CIFS_MAX_WSIZE */ | ||
3355 | wsize = min_t(unsigned int, wsize, CIFS_MAX_WSIZE); | ||
3356 | |||
3357 | return wsize; | ||
3358 | } | ||
3359 | |||
3360 | static unsigned int | ||
3361 | cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info) | ||
3362 | { | ||
3363 | __u64 unix_cap = le64_to_cpu(tcon->fsUnixInfo.Capability); | ||
3364 | struct TCP_Server_Info *server = tcon->ses->server; | ||
3365 | unsigned int rsize, defsize; | ||
3366 | |||
3367 | /* | ||
3368 | * Set default value... | ||
3369 | * | ||
3370 | * HACK alert! Ancient servers have very small buffers. Even though | ||
3371 | * MS-CIFS indicates that servers are only limited by the client's | ||
3372 | * bufsize for reads, testing against win98se shows that it throws | ||
3373 | * INVALID_PARAMETER errors if you try to request too large a read. | ||
3374 | * OS/2 just sends back short reads. | ||
3375 | * | ||
3376 | * If the server doesn't advertise CAP_LARGE_READ_X, then assume that | ||
3377 | * it can't handle a read request larger than its MaxBufferSize either. | ||
3378 | */ | ||
3379 | if (tcon->unix_ext && (unix_cap & CIFS_UNIX_LARGE_READ_CAP)) | ||
3380 | defsize = CIFS_DEFAULT_IOSIZE; | ||
3381 | else if (server->capabilities & CAP_LARGE_READ_X) | ||
3382 | defsize = CIFS_DEFAULT_NON_POSIX_RSIZE; | ||
3383 | else | ||
3384 | defsize = server->maxBuf - sizeof(READ_RSP); | ||
3385 | |||
3386 | rsize = pvolume_info->rsize ? pvolume_info->rsize : defsize; | ||
3387 | |||
3388 | /* | ||
3389 | * no CAP_LARGE_READ_X? Then MS-CIFS states that we must limit this to | ||
3390 | * the client's MaxBufferSize. | ||
3391 | */ | ||
3392 | if (!(server->capabilities & CAP_LARGE_READ_X)) | ||
3393 | rsize = min_t(unsigned int, CIFSMaxBufSize, rsize); | ||
3394 | |||
3395 | /* limit to the amount that we can kmap at once */ | ||
3396 | rsize = min_t(unsigned int, rsize, CIFS_KMAP_SIZE_LIMIT); | ||
3397 | |||
3398 | /* hard limit of CIFS_MAX_RSIZE */ | ||
3399 | rsize = min_t(unsigned int, rsize, CIFS_MAX_RSIZE); | ||
3400 | |||
3401 | return rsize; | ||
3402 | } | ||
3403 | |||
3404 | static void | 3250 | static void |
3405 | cleanup_volume_info_contents(struct smb_vol *volume_info) | 3251 | cleanup_volume_info_contents(struct smb_vol *volume_info) |
3406 | { | 3252 | { |
@@ -3651,8 +3497,8 @@ try_mount_again: | |||
3651 | if (!tcon->ipc && server->ops->qfs_tcon) | 3497 | if (!tcon->ipc && server->ops->qfs_tcon) |
3652 | server->ops->qfs_tcon(xid, tcon); | 3498 | server->ops->qfs_tcon(xid, tcon); |
3653 | 3499 | ||
3654 | cifs_sb->wsize = cifs_negotiate_wsize(tcon, volume_info); | 3500 | cifs_sb->wsize = server->ops->negotiate_wsize(tcon, volume_info); |
3655 | cifs_sb->rsize = cifs_negotiate_rsize(tcon, volume_info); | 3501 | cifs_sb->rsize = server->ops->negotiate_rsize(tcon, volume_info); |
3656 | 3502 | ||
3657 | /* tune readahead according to rsize */ | 3503 | /* tune readahead according to rsize */ |
3658 | cifs_sb->bdi.ra_pages = cifs_sb->rsize / PAGE_CACHE_SIZE; | 3504 | cifs_sb->bdi.ra_pages = cifs_sb->rsize / PAGE_CACHE_SIZE; |
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index cbe709ad6663..7c0a81283645 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c | |||
@@ -160,17 +160,18 @@ check_name(struct dentry *direntry) | |||
160 | static int | 160 | static int |
161 | cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid, | 161 | cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid, |
162 | struct tcon_link *tlink, unsigned oflags, umode_t mode, | 162 | struct tcon_link *tlink, unsigned oflags, umode_t mode, |
163 | __u32 *oplock, __u16 *fileHandle, int *created) | 163 | __u32 *oplock, struct cifs_fid *fid, int *created) |
164 | { | 164 | { |
165 | int rc = -ENOENT; | 165 | int rc = -ENOENT; |
166 | int create_options = CREATE_NOT_DIR; | 166 | int create_options = CREATE_NOT_DIR; |
167 | int desiredAccess; | 167 | int desired_access; |
168 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); | 168 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); |
169 | struct cifs_tcon *tcon = tlink_tcon(tlink); | 169 | struct cifs_tcon *tcon = tlink_tcon(tlink); |
170 | char *full_path = NULL; | 170 | char *full_path = NULL; |
171 | FILE_ALL_INFO *buf = NULL; | 171 | FILE_ALL_INFO *buf = NULL; |
172 | struct inode *newinode = NULL; | 172 | struct inode *newinode = NULL; |
173 | int disposition; | 173 | int disposition; |
174 | struct TCP_Server_Info *server = tcon->ses->server; | ||
174 | 175 | ||
175 | *oplock = 0; | 176 | *oplock = 0; |
176 | if (tcon->ses->server->oplocks) | 177 | if (tcon->ses->server->oplocks) |
@@ -185,8 +186,8 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid, | |||
185 | if (tcon->unix_ext && cap_unix(tcon->ses) && !tcon->broken_posix_open && | 186 | if (tcon->unix_ext && cap_unix(tcon->ses) && !tcon->broken_posix_open && |
186 | (CIFS_UNIX_POSIX_PATH_OPS_CAP & | 187 | (CIFS_UNIX_POSIX_PATH_OPS_CAP & |
187 | le64_to_cpu(tcon->fsUnixInfo.Capability))) { | 188 | le64_to_cpu(tcon->fsUnixInfo.Capability))) { |
188 | rc = cifs_posix_open(full_path, &newinode, | 189 | rc = cifs_posix_open(full_path, &newinode, inode->i_sb, mode, |
189 | inode->i_sb, mode, oflags, oplock, fileHandle, xid); | 190 | oflags, oplock, &fid->netfid, xid); |
190 | switch (rc) { | 191 | switch (rc) { |
191 | case 0: | 192 | case 0: |
192 | if (newinode == NULL) { | 193 | if (newinode == NULL) { |
@@ -202,7 +203,7 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid, | |||
202 | * close it and proceed as if it were a normal | 203 | * close it and proceed as if it were a normal |
203 | * lookup. | 204 | * lookup. |
204 | */ | 205 | */ |
205 | CIFSSMBClose(xid, tcon, *fileHandle); | 206 | CIFSSMBClose(xid, tcon, fid->netfid); |
206 | goto cifs_create_get_file_info; | 207 | goto cifs_create_get_file_info; |
207 | } | 208 | } |
208 | /* success, no need to query */ | 209 | /* success, no need to query */ |
@@ -244,11 +245,11 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid, | |||
244 | */ | 245 | */ |
245 | } | 246 | } |
246 | 247 | ||
247 | desiredAccess = 0; | 248 | desired_access = 0; |
248 | if (OPEN_FMODE(oflags) & FMODE_READ) | 249 | if (OPEN_FMODE(oflags) & FMODE_READ) |
249 | desiredAccess |= GENERIC_READ; /* is this too little? */ | 250 | desired_access |= GENERIC_READ; /* is this too little? */ |
250 | if (OPEN_FMODE(oflags) & FMODE_WRITE) | 251 | if (OPEN_FMODE(oflags) & FMODE_WRITE) |
251 | desiredAccess |= GENERIC_WRITE; | 252 | desired_access |= GENERIC_WRITE; |
252 | 253 | ||
253 | disposition = FILE_OVERWRITE_IF; | 254 | disposition = FILE_OVERWRITE_IF; |
254 | if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) | 255 | if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) |
@@ -260,8 +261,15 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid, | |||
260 | else | 261 | else |
261 | cFYI(1, "Create flag not set in create function"); | 262 | cFYI(1, "Create flag not set in create function"); |
262 | 263 | ||
263 | /* BB add processing to set equivalent of mode - e.g. via CreateX with | 264 | /* |
264 | ACLs */ | 265 | * BB add processing to set equivalent of mode - e.g. via CreateX with |
266 | * ACLs | ||
267 | */ | ||
268 | |||
269 | if (!server->ops->open) { | ||
270 | rc = -ENOSYS; | ||
271 | goto out; | ||
272 | } | ||
265 | 273 | ||
266 | buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL); | 274 | buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL); |
267 | if (buf == NULL) { | 275 | if (buf == NULL) { |
@@ -279,28 +287,18 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid, | |||
279 | if (backup_cred(cifs_sb)) | 287 | if (backup_cred(cifs_sb)) |
280 | create_options |= CREATE_OPEN_BACKUP_INTENT; | 288 | create_options |= CREATE_OPEN_BACKUP_INTENT; |
281 | 289 | ||
282 | if (tcon->ses->capabilities & CAP_NT_SMBS) | 290 | rc = server->ops->open(xid, tcon, full_path, disposition, |
283 | rc = CIFSSMBOpen(xid, tcon, full_path, disposition, | 291 | desired_access, create_options, fid, oplock, |
284 | desiredAccess, create_options, | 292 | buf, cifs_sb); |
285 | fileHandle, oplock, buf, cifs_sb->local_nls, | ||
286 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
287 | else | ||
288 | rc = -EIO; /* no NT SMB support fall into legacy open below */ | ||
289 | |||
290 | if (rc == -EIO) { | ||
291 | /* old server, retry the open legacy style */ | ||
292 | rc = SMBLegacyOpen(xid, tcon, full_path, disposition, | ||
293 | desiredAccess, create_options, | ||
294 | fileHandle, oplock, buf, cifs_sb->local_nls, | ||
295 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
296 | } | ||
297 | if (rc) { | 293 | if (rc) { |
298 | cFYI(1, "cifs_create returned 0x%x", rc); | 294 | cFYI(1, "cifs_create returned 0x%x", rc); |
299 | goto out; | 295 | goto out; |
300 | } | 296 | } |
301 | 297 | ||
302 | /* If Open reported that we actually created a file | 298 | /* |
303 | then we now have to set the mode if possible */ | 299 | * If Open reported that we actually created a file then we now have to |
300 | * set the mode if possible. | ||
301 | */ | ||
304 | if ((tcon->unix_ext) && (*oplock & CIFS_CREATE_ACTION)) { | 302 | if ((tcon->unix_ext) && (*oplock & CIFS_CREATE_ACTION)) { |
305 | struct cifs_unix_set_info_args args = { | 303 | struct cifs_unix_set_info_args args = { |
306 | .mode = mode, | 304 | .mode = mode, |
@@ -321,11 +319,13 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid, | |||
321 | args.uid = NO_CHANGE_64; | 319 | args.uid = NO_CHANGE_64; |
322 | args.gid = NO_CHANGE_64; | 320 | args.gid = NO_CHANGE_64; |
323 | } | 321 | } |
324 | CIFSSMBUnixSetFileInfo(xid, tcon, &args, *fileHandle, | 322 | CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid->netfid, |
325 | current->tgid); | 323 | current->tgid); |
326 | } else { | 324 | } else { |
327 | /* BB implement mode setting via Windows security | 325 | /* |
328 | descriptors e.g. */ | 326 | * BB implement mode setting via Windows security |
327 | * descriptors e.g. | ||
328 | */ | ||
329 | /* CIFSSMBWinSetPerms(xid,tcon,path,mode,-1,-1,nls);*/ | 329 | /* CIFSSMBWinSetPerms(xid,tcon,path,mode,-1,-1,nls);*/ |
330 | 330 | ||
331 | /* Could set r/o dos attribute if mode & 0222 == 0 */ | 331 | /* Could set r/o dos attribute if mode & 0222 == 0 */ |
@@ -334,12 +334,14 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid, | |||
334 | cifs_create_get_file_info: | 334 | cifs_create_get_file_info: |
335 | /* server might mask mode so we have to query for it */ | 335 | /* server might mask mode so we have to query for it */ |
336 | if (tcon->unix_ext) | 336 | if (tcon->unix_ext) |
337 | rc = cifs_get_inode_info_unix(&newinode, full_path, | 337 | rc = cifs_get_inode_info_unix(&newinode, full_path, inode->i_sb, |
338 | inode->i_sb, xid); | 338 | xid); |
339 | else { | 339 | else { |
340 | rc = cifs_get_inode_info(&newinode, full_path, buf, | 340 | rc = cifs_get_inode_info(&newinode, full_path, buf, inode->i_sb, |
341 | inode->i_sb, xid, fileHandle); | 341 | xid, &fid->netfid); |
342 | if (newinode) { | 342 | if (newinode) { |
343 | if (server->ops->set_lease_key) | ||
344 | server->ops->set_lease_key(newinode, fid); | ||
343 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) | 345 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) |
344 | newinode->i_mode = mode; | 346 | newinode->i_mode = mode; |
345 | if ((*oplock & CIFS_CREATE_ACTION) && | 347 | if ((*oplock & CIFS_CREATE_ACTION) && |
@@ -356,19 +358,13 @@ cifs_create_get_file_info: | |||
356 | cifs_create_set_dentry: | 358 | cifs_create_set_dentry: |
357 | if (rc != 0) { | 359 | if (rc != 0) { |
358 | cFYI(1, "Create worked, get_inode_info failed rc = %d", rc); | 360 | cFYI(1, "Create worked, get_inode_info failed rc = %d", rc); |
361 | if (server->ops->close) | ||
362 | server->ops->close(xid, tcon, fid); | ||
359 | goto out; | 363 | goto out; |
360 | } | 364 | } |
361 | d_drop(direntry); | 365 | d_drop(direntry); |
362 | d_add(direntry, newinode); | 366 | d_add(direntry, newinode); |
363 | 367 | ||
364 | /* ENOENT for create? How weird... */ | ||
365 | rc = -ENOENT; | ||
366 | if (!newinode) { | ||
367 | CIFSSMBClose(xid, tcon, *fileHandle); | ||
368 | goto out; | ||
369 | } | ||
370 | rc = 0; | ||
371 | |||
372 | out: | 368 | out: |
373 | kfree(buf); | 369 | kfree(buf); |
374 | kfree(full_path); | 370 | kfree(full_path); |
@@ -384,11 +380,14 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, | |||
384 | unsigned int xid; | 380 | unsigned int xid; |
385 | struct tcon_link *tlink; | 381 | struct tcon_link *tlink; |
386 | struct cifs_tcon *tcon; | 382 | struct cifs_tcon *tcon; |
387 | __u16 fileHandle; | 383 | struct TCP_Server_Info *server; |
384 | struct cifs_fid fid; | ||
385 | struct cifs_pending_open open; | ||
388 | __u32 oplock; | 386 | __u32 oplock; |
389 | struct cifsFileInfo *pfile_info; | 387 | struct cifsFileInfo *file_info; |
390 | 388 | ||
391 | /* Posix open is only called (at lookup time) for file create now. For | 389 | /* |
390 | * Posix open is only called (at lookup time) for file create now. For | ||
392 | * opens (rather than creates), because we do not know if it is a file | 391 | * opens (rather than creates), because we do not know if it is a file |
393 | * or directory yet, and current Samba no longer allows us to do posix | 392 | * or directory yet, and current Samba no longer allows us to do posix |
394 | * open on dirs, we could end up wasting an open call on what turns out | 393 | * open on dirs, we could end up wasting an open call on what turns out |
@@ -420,22 +419,34 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, | |||
420 | goto out_free_xid; | 419 | goto out_free_xid; |
421 | 420 | ||
422 | tcon = tlink_tcon(tlink); | 421 | tcon = tlink_tcon(tlink); |
422 | server = tcon->ses->server; | ||
423 | |||
424 | if (server->ops->new_lease_key) | ||
425 | server->ops->new_lease_key(&fid); | ||
426 | |||
427 | cifs_add_pending_open(&fid, tlink, &open); | ||
423 | 428 | ||
424 | rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode, | 429 | rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode, |
425 | &oplock, &fileHandle, opened); | 430 | &oplock, &fid, opened); |
426 | 431 | ||
427 | if (rc) | 432 | if (rc) { |
433 | cifs_del_pending_open(&open); | ||
428 | goto out; | 434 | goto out; |
435 | } | ||
429 | 436 | ||
430 | rc = finish_open(file, direntry, generic_file_open, opened); | 437 | rc = finish_open(file, direntry, generic_file_open, opened); |
431 | if (rc) { | 438 | if (rc) { |
432 | CIFSSMBClose(xid, tcon, fileHandle); | 439 | if (server->ops->close) |
440 | server->ops->close(xid, tcon, &fid); | ||
441 | cifs_del_pending_open(&open); | ||
433 | goto out; | 442 | goto out; |
434 | } | 443 | } |
435 | 444 | ||
436 | pfile_info = cifs_new_fileinfo(fileHandle, file, tlink, oplock); | 445 | file_info = cifs_new_fileinfo(&fid, file, tlink, oplock); |
437 | if (pfile_info == NULL) { | 446 | if (file_info == NULL) { |
438 | CIFSSMBClose(xid, tcon, fileHandle); | 447 | if (server->ops->close) |
448 | server->ops->close(xid, tcon, &fid); | ||
449 | cifs_del_pending_open(&open); | ||
439 | rc = -ENOMEM; | 450 | rc = -ENOMEM; |
440 | } | 451 | } |
441 | 452 | ||
@@ -460,7 +471,9 @@ int cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode, | |||
460 | */ | 471 | */ |
461 | unsigned oflags = O_EXCL | O_CREAT | O_RDWR; | 472 | unsigned oflags = O_EXCL | O_CREAT | O_RDWR; |
462 | struct tcon_link *tlink; | 473 | struct tcon_link *tlink; |
463 | __u16 fileHandle; | 474 | struct cifs_tcon *tcon; |
475 | struct TCP_Server_Info *server; | ||
476 | struct cifs_fid fid; | ||
464 | __u32 oplock; | 477 | __u32 oplock; |
465 | int created = FILE_CREATED; | 478 | int created = FILE_CREATED; |
466 | 479 | ||
@@ -472,10 +485,16 @@ int cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode, | |||
472 | if (IS_ERR(tlink)) | 485 | if (IS_ERR(tlink)) |
473 | goto out_free_xid; | 486 | goto out_free_xid; |
474 | 487 | ||
488 | tcon = tlink_tcon(tlink); | ||
489 | server = tcon->ses->server; | ||
490 | |||
491 | if (server->ops->new_lease_key) | ||
492 | server->ops->new_lease_key(&fid); | ||
493 | |||
475 | rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode, | 494 | rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode, |
476 | &oplock, &fileHandle, &created); | 495 | &oplock, &fid, &created); |
477 | if (!rc) | 496 | if (!rc && server->ops->close) |
478 | CIFSSMBClose(xid, tlink_tcon(tlink), fileHandle); | 497 | server->ops->close(xid, tcon, &fid); |
479 | 498 | ||
480 | cifs_put_tlink(tlink); | 499 | cifs_put_tlink(tlink); |
481 | out_free_xid: | 500 | out_free_xid: |
diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 9154192b0683..edb25b4bbb95 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c | |||
@@ -169,16 +169,20 @@ posix_open_ret: | |||
169 | 169 | ||
170 | static int | 170 | static int |
171 | cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb, | 171 | cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb, |
172 | struct cifs_tcon *tcon, unsigned int f_flags, __u32 *poplock, | 172 | struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock, |
173 | __u16 *pnetfid, unsigned int xid) | 173 | struct cifs_fid *fid, unsigned int xid) |
174 | { | 174 | { |
175 | int rc; | 175 | int rc; |
176 | int desiredAccess; | 176 | int desired_access; |
177 | int disposition; | 177 | int disposition; |
178 | int create_options = CREATE_NOT_DIR; | 178 | int create_options = CREATE_NOT_DIR; |
179 | FILE_ALL_INFO *buf; | 179 | FILE_ALL_INFO *buf; |
180 | struct TCP_Server_Info *server = tcon->ses->server; | ||
181 | |||
182 | if (!server->ops->open) | ||
183 | return -ENOSYS; | ||
180 | 184 | ||
181 | desiredAccess = cifs_convert_flags(f_flags); | 185 | desired_access = cifs_convert_flags(f_flags); |
182 | 186 | ||
183 | /********************************************************************* | 187 | /********************************************************************* |
184 | * open flag mapping table: | 188 | * open flag mapping table: |
@@ -215,16 +219,9 @@ cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb, | |||
215 | if (backup_cred(cifs_sb)) | 219 | if (backup_cred(cifs_sb)) |
216 | create_options |= CREATE_OPEN_BACKUP_INTENT; | 220 | create_options |= CREATE_OPEN_BACKUP_INTENT; |
217 | 221 | ||
218 | if (tcon->ses->capabilities & CAP_NT_SMBS) | 222 | rc = server->ops->open(xid, tcon, full_path, disposition, |
219 | rc = CIFSSMBOpen(xid, tcon, full_path, disposition, | 223 | desired_access, create_options, fid, oplock, buf, |
220 | desiredAccess, create_options, pnetfid, poplock, buf, | 224 | cifs_sb); |
221 | cifs_sb->local_nls, cifs_sb->mnt_cifs_flags | ||
222 | & CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
223 | else | ||
224 | rc = SMBLegacyOpen(xid, tcon, full_path, disposition, | ||
225 | desiredAccess, CREATE_NOT_DIR, pnetfid, poplock, buf, | ||
226 | cifs_sb->local_nls, cifs_sb->mnt_cifs_flags | ||
227 | & CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
228 | 225 | ||
229 | if (rc) | 226 | if (rc) |
230 | goto out; | 227 | goto out; |
@@ -234,7 +231,7 @@ cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb, | |||
234 | xid); | 231 | xid); |
235 | else | 232 | else |
236 | rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb, | 233 | rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb, |
237 | xid, pnetfid); | 234 | xid, &fid->netfid); |
238 | 235 | ||
239 | out: | 236 | out: |
240 | kfree(buf); | 237 | kfree(buf); |
@@ -242,48 +239,62 @@ out: | |||
242 | } | 239 | } |
243 | 240 | ||
244 | struct cifsFileInfo * | 241 | struct cifsFileInfo * |
245 | cifs_new_fileinfo(__u16 fileHandle, struct file *file, | 242 | cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, |
246 | struct tcon_link *tlink, __u32 oplock) | 243 | struct tcon_link *tlink, __u32 oplock) |
247 | { | 244 | { |
248 | struct dentry *dentry = file->f_path.dentry; | 245 | struct dentry *dentry = file->f_path.dentry; |
249 | struct inode *inode = dentry->d_inode; | 246 | struct inode *inode = dentry->d_inode; |
250 | struct cifsInodeInfo *pCifsInode = CIFS_I(inode); | 247 | struct cifsInodeInfo *cinode = CIFS_I(inode); |
251 | struct cifsFileInfo *pCifsFile; | 248 | struct cifsFileInfo *cfile; |
252 | 249 | struct cifs_fid_locks *fdlocks; | |
253 | pCifsFile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL); | 250 | struct cifs_tcon *tcon = tlink_tcon(tlink); |
254 | if (pCifsFile == NULL) | 251 | |
255 | return pCifsFile; | 252 | cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL); |
256 | 253 | if (cfile == NULL) | |
257 | pCifsFile->count = 1; | 254 | return cfile; |
258 | pCifsFile->netfid = fileHandle; | 255 | |
259 | pCifsFile->pid = current->tgid; | 256 | fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL); |
260 | pCifsFile->uid = current_fsuid(); | 257 | if (!fdlocks) { |
261 | pCifsFile->dentry = dget(dentry); | 258 | kfree(cfile); |
262 | pCifsFile->f_flags = file->f_flags; | 259 | return NULL; |
263 | pCifsFile->invalidHandle = false; | 260 | } |
264 | pCifsFile->tlink = cifs_get_tlink(tlink); | 261 | |
265 | mutex_init(&pCifsFile->fh_mutex); | 262 | INIT_LIST_HEAD(&fdlocks->locks); |
266 | INIT_WORK(&pCifsFile->oplock_break, cifs_oplock_break); | 263 | fdlocks->cfile = cfile; |
267 | INIT_LIST_HEAD(&pCifsFile->llist); | 264 | cfile->llist = fdlocks; |
265 | down_write(&cinode->lock_sem); | ||
266 | list_add(&fdlocks->llist, &cinode->llist); | ||
267 | up_write(&cinode->lock_sem); | ||
268 | |||
269 | cfile->count = 1; | ||
270 | cfile->pid = current->tgid; | ||
271 | cfile->uid = current_fsuid(); | ||
272 | cfile->dentry = dget(dentry); | ||
273 | cfile->f_flags = file->f_flags; | ||
274 | cfile->invalidHandle = false; | ||
275 | cfile->tlink = cifs_get_tlink(tlink); | ||
276 | INIT_WORK(&cfile->oplock_break, cifs_oplock_break); | ||
277 | mutex_init(&cfile->fh_mutex); | ||
268 | 278 | ||
269 | spin_lock(&cifs_file_list_lock); | 279 | spin_lock(&cifs_file_list_lock); |
270 | list_add(&pCifsFile->tlist, &(tlink_tcon(tlink)->openFileList)); | 280 | if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE) |
281 | oplock = fid->pending_open->oplock; | ||
282 | list_del(&fid->pending_open->olist); | ||
283 | |||
284 | tlink_tcon(tlink)->ses->server->ops->set_fid(cfile, fid, oplock); | ||
285 | |||
286 | list_add(&cfile->tlist, &tcon->openFileList); | ||
271 | /* if readable file instance put first in list*/ | 287 | /* if readable file instance put first in list*/ |
272 | if (file->f_mode & FMODE_READ) | 288 | if (file->f_mode & FMODE_READ) |
273 | list_add(&pCifsFile->flist, &pCifsInode->openFileList); | 289 | list_add(&cfile->flist, &cinode->openFileList); |
274 | else | 290 | else |
275 | list_add_tail(&pCifsFile->flist, &pCifsInode->openFileList); | 291 | list_add_tail(&cfile->flist, &cinode->openFileList); |
276 | spin_unlock(&cifs_file_list_lock); | 292 | spin_unlock(&cifs_file_list_lock); |
277 | 293 | ||
278 | cifs_set_oplock_level(pCifsInode, oplock); | 294 | file->private_data = cfile; |
279 | pCifsInode->can_cache_brlcks = pCifsInode->clientCanCacheAll; | 295 | return cfile; |
280 | |||
281 | file->private_data = pCifsFile; | ||
282 | return pCifsFile; | ||
283 | } | 296 | } |
284 | 297 | ||
285 | static void cifs_del_lock_waiters(struct cifsLockInfo *lock); | ||
286 | |||
287 | struct cifsFileInfo * | 298 | struct cifsFileInfo * |
288 | cifsFileInfo_get(struct cifsFileInfo *cifs_file) | 299 | cifsFileInfo_get(struct cifsFileInfo *cifs_file) |
289 | { | 300 | { |
@@ -302,9 +313,12 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file) | |||
302 | { | 313 | { |
303 | struct inode *inode = cifs_file->dentry->d_inode; | 314 | struct inode *inode = cifs_file->dentry->d_inode; |
304 | struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink); | 315 | struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink); |
316 | struct TCP_Server_Info *server = tcon->ses->server; | ||
305 | struct cifsInodeInfo *cifsi = CIFS_I(inode); | 317 | struct cifsInodeInfo *cifsi = CIFS_I(inode); |
306 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); | 318 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); |
307 | struct cifsLockInfo *li, *tmp; | 319 | struct cifsLockInfo *li, *tmp; |
320 | struct cifs_fid fid; | ||
321 | struct cifs_pending_open open; | ||
308 | 322 | ||
309 | spin_lock(&cifs_file_list_lock); | 323 | spin_lock(&cifs_file_list_lock); |
310 | if (--cifs_file->count > 0) { | 324 | if (--cifs_file->count > 0) { |
@@ -312,6 +326,12 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file) | |||
312 | return; | 326 | return; |
313 | } | 327 | } |
314 | 328 | ||
329 | if (server->ops->get_lease_key) | ||
330 | server->ops->get_lease_key(inode, &fid); | ||
331 | |||
332 | /* store open in pending opens to make sure we don't miss lease break */ | ||
333 | cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open); | ||
334 | |||
315 | /* remove it from the lists */ | 335 | /* remove it from the lists */ |
316 | list_del(&cifs_file->flist); | 336 | list_del(&cifs_file->flist); |
317 | list_del(&cifs_file->tlist); | 337 | list_del(&cifs_file->tlist); |
@@ -319,13 +339,13 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file) | |||
319 | if (list_empty(&cifsi->openFileList)) { | 339 | if (list_empty(&cifsi->openFileList)) { |
320 | cFYI(1, "closing last open instance for inode %p", | 340 | cFYI(1, "closing last open instance for inode %p", |
321 | cifs_file->dentry->d_inode); | 341 | cifs_file->dentry->d_inode); |
322 | 342 | /* | |
323 | /* in strict cache mode we need invalidate mapping on the last | 343 | * In strict cache mode we need invalidate mapping on the last |
324 | close because it may cause a error when we open this file | 344 | * close because it may cause a error when we open this file |
325 | again and get at least level II oplock */ | 345 | * again and get at least level II oplock. |
346 | */ | ||
326 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) | 347 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) |
327 | CIFS_I(inode)->invalid_mapping = true; | 348 | CIFS_I(inode)->invalid_mapping = true; |
328 | |||
329 | cifs_set_oplock_level(cifsi, 0); | 349 | cifs_set_oplock_level(cifsi, 0); |
330 | } | 350 | } |
331 | spin_unlock(&cifs_file_list_lock); | 351 | spin_unlock(&cifs_file_list_lock); |
@@ -333,23 +353,30 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file) | |||
333 | cancel_work_sync(&cifs_file->oplock_break); | 353 | cancel_work_sync(&cifs_file->oplock_break); |
334 | 354 | ||
335 | if (!tcon->need_reconnect && !cifs_file->invalidHandle) { | 355 | if (!tcon->need_reconnect && !cifs_file->invalidHandle) { |
356 | struct TCP_Server_Info *server = tcon->ses->server; | ||
336 | unsigned int xid; | 357 | unsigned int xid; |
337 | int rc; | 358 | |
338 | xid = get_xid(); | 359 | xid = get_xid(); |
339 | rc = CIFSSMBClose(xid, tcon, cifs_file->netfid); | 360 | if (server->ops->close) |
340 | free_xid(xid); | 361 | server->ops->close(xid, tcon, &cifs_file->fid); |
362 | _free_xid(xid); | ||
341 | } | 363 | } |
342 | 364 | ||
343 | /* Delete any outstanding lock records. We'll lose them when the file | 365 | cifs_del_pending_open(&open); |
366 | |||
367 | /* | ||
368 | * Delete any outstanding lock records. We'll lose them when the file | ||
344 | * is closed anyway. | 369 | * is closed anyway. |
345 | */ | 370 | */ |
346 | mutex_lock(&cifsi->lock_mutex); | 371 | down_write(&cifsi->lock_sem); |
347 | list_for_each_entry_safe(li, tmp, &cifs_file->llist, llist) { | 372 | list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) { |
348 | list_del(&li->llist); | 373 | list_del(&li->llist); |
349 | cifs_del_lock_waiters(li); | 374 | cifs_del_lock_waiters(li); |
350 | kfree(li); | 375 | kfree(li); |
351 | } | 376 | } |
352 | mutex_unlock(&cifsi->lock_mutex); | 377 | list_del(&cifs_file->llist->llist); |
378 | kfree(cifs_file->llist); | ||
379 | up_write(&cifsi->lock_sem); | ||
353 | 380 | ||
354 | cifs_put_tlink(cifs_file->tlink); | 381 | cifs_put_tlink(cifs_file->tlink); |
355 | dput(cifs_file->dentry); | 382 | dput(cifs_file->dentry); |
@@ -357,17 +384,20 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file) | |||
357 | } | 384 | } |
358 | 385 | ||
359 | int cifs_open(struct inode *inode, struct file *file) | 386 | int cifs_open(struct inode *inode, struct file *file) |
387 | |||
360 | { | 388 | { |
361 | int rc = -EACCES; | 389 | int rc = -EACCES; |
362 | unsigned int xid; | 390 | unsigned int xid; |
363 | __u32 oplock; | 391 | __u32 oplock; |
364 | struct cifs_sb_info *cifs_sb; | 392 | struct cifs_sb_info *cifs_sb; |
393 | struct TCP_Server_Info *server; | ||
365 | struct cifs_tcon *tcon; | 394 | struct cifs_tcon *tcon; |
366 | struct tcon_link *tlink; | 395 | struct tcon_link *tlink; |
367 | struct cifsFileInfo *pCifsFile = NULL; | 396 | struct cifsFileInfo *cfile = NULL; |
368 | char *full_path = NULL; | 397 | char *full_path = NULL; |
369 | bool posix_open_ok = false; | 398 | bool posix_open_ok = false; |
370 | __u16 netfid; | 399 | struct cifs_fid fid; |
400 | struct cifs_pending_open open; | ||
371 | 401 | ||
372 | xid = get_xid(); | 402 | xid = get_xid(); |
373 | 403 | ||
@@ -378,6 +408,7 @@ int cifs_open(struct inode *inode, struct file *file) | |||
378 | return PTR_ERR(tlink); | 408 | return PTR_ERR(tlink); |
379 | } | 409 | } |
380 | tcon = tlink_tcon(tlink); | 410 | tcon = tlink_tcon(tlink); |
411 | server = tcon->ses->server; | ||
381 | 412 | ||
382 | full_path = build_path_from_dentry(file->f_path.dentry); | 413 | full_path = build_path_from_dentry(file->f_path.dentry); |
383 | if (full_path == NULL) { | 414 | if (full_path == NULL) { |
@@ -388,7 +419,7 @@ int cifs_open(struct inode *inode, struct file *file) | |||
388 | cFYI(1, "inode = 0x%p file flags are 0x%x for %s", | 419 | cFYI(1, "inode = 0x%p file flags are 0x%x for %s", |
389 | inode, file->f_flags, full_path); | 420 | inode, file->f_flags, full_path); |
390 | 421 | ||
391 | if (tcon->ses->server->oplocks) | 422 | if (server->oplocks) |
392 | oplock = REQ_OPLOCK; | 423 | oplock = REQ_OPLOCK; |
393 | else | 424 | else |
394 | oplock = 0; | 425 | oplock = 0; |
@@ -399,7 +430,7 @@ int cifs_open(struct inode *inode, struct file *file) | |||
399 | /* can not refresh inode info since size could be stale */ | 430 | /* can not refresh inode info since size could be stale */ |
400 | rc = cifs_posix_open(full_path, &inode, inode->i_sb, | 431 | rc = cifs_posix_open(full_path, &inode, inode->i_sb, |
401 | cifs_sb->mnt_file_mode /* ignored */, | 432 | cifs_sb->mnt_file_mode /* ignored */, |
402 | file->f_flags, &oplock, &netfid, xid); | 433 | file->f_flags, &oplock, &fid.netfid, xid); |
403 | if (rc == 0) { | 434 | if (rc == 0) { |
404 | cFYI(1, "posix open succeeded"); | 435 | cFYI(1, "posix open succeeded"); |
405 | posix_open_ok = true; | 436 | posix_open_ok = true; |
@@ -415,20 +446,34 @@ int cifs_open(struct inode *inode, struct file *file) | |||
415 | } else if ((rc != -EIO) && (rc != -EREMOTE) && | 446 | } else if ((rc != -EIO) && (rc != -EREMOTE) && |
416 | (rc != -EOPNOTSUPP)) /* path not found or net err */ | 447 | (rc != -EOPNOTSUPP)) /* path not found or net err */ |
417 | goto out; | 448 | goto out; |
418 | /* else fallthrough to retry open the old way on network i/o | 449 | /* |
419 | or DFS errors */ | 450 | * Else fallthrough to retry open the old way on network i/o |
451 | * or DFS errors. | ||
452 | */ | ||
420 | } | 453 | } |
421 | 454 | ||
455 | if (server->ops->get_lease_key) | ||
456 | server->ops->get_lease_key(inode, &fid); | ||
457 | |||
458 | cifs_add_pending_open(&fid, tlink, &open); | ||
459 | |||
422 | if (!posix_open_ok) { | 460 | if (!posix_open_ok) { |
461 | if (server->ops->get_lease_key) | ||
462 | server->ops->get_lease_key(inode, &fid); | ||
463 | |||
423 | rc = cifs_nt_open(full_path, inode, cifs_sb, tcon, | 464 | rc = cifs_nt_open(full_path, inode, cifs_sb, tcon, |
424 | file->f_flags, &oplock, &netfid, xid); | 465 | file->f_flags, &oplock, &fid, xid); |
425 | if (rc) | 466 | if (rc) { |
467 | cifs_del_pending_open(&open); | ||
426 | goto out; | 468 | goto out; |
469 | } | ||
427 | } | 470 | } |
428 | 471 | ||
429 | pCifsFile = cifs_new_fileinfo(netfid, file, tlink, oplock); | 472 | cfile = cifs_new_fileinfo(&fid, file, tlink, oplock); |
430 | if (pCifsFile == NULL) { | 473 | if (cfile == NULL) { |
431 | CIFSSMBClose(xid, tcon, netfid); | 474 | if (server->ops->close) |
475 | server->ops->close(xid, tcon, &fid); | ||
476 | cifs_del_pending_open(&open); | ||
432 | rc = -ENOMEM; | 477 | rc = -ENOMEM; |
433 | goto out; | 478 | goto out; |
434 | } | 479 | } |
@@ -436,8 +481,10 @@ int cifs_open(struct inode *inode, struct file *file) | |||
436 | cifs_fscache_set_inode_cookie(inode, file); | 481 | cifs_fscache_set_inode_cookie(inode, file); |
437 | 482 | ||
438 | if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) { | 483 | if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) { |
439 | /* time to set mode which we can not set earlier due to | 484 | /* |
440 | problems creating new read-only files */ | 485 | * Time to set mode which we can not set earlier due to |
486 | * problems creating new read-only files. | ||
487 | */ | ||
441 | struct cifs_unix_set_info_args args = { | 488 | struct cifs_unix_set_info_args args = { |
442 | .mode = inode->i_mode, | 489 | .mode = inode->i_mode, |
443 | .uid = NO_CHANGE_64, | 490 | .uid = NO_CHANGE_64, |
@@ -447,8 +494,8 @@ int cifs_open(struct inode *inode, struct file *file) | |||
447 | .mtime = NO_CHANGE_64, | 494 | .mtime = NO_CHANGE_64, |
448 | .device = 0, | 495 | .device = 0, |
449 | }; | 496 | }; |
450 | CIFSSMBUnixSetFileInfo(xid, tcon, &args, netfid, | 497 | CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid, |
451 | pCifsFile->pid); | 498 | cfile->pid); |
452 | } | 499 | } |
453 | 500 | ||
454 | out: | 501 | out: |
@@ -458,59 +505,66 @@ out: | |||
458 | return rc; | 505 | return rc; |
459 | } | 506 | } |
460 | 507 | ||
461 | /* Try to reacquire byte range locks that were released when session */ | 508 | /* |
462 | /* to server was lost */ | 509 | * Try to reacquire byte range locks that were released when session |
510 | * to server was lost | ||
511 | */ | ||
463 | static int cifs_relock_file(struct cifsFileInfo *cifsFile) | 512 | static int cifs_relock_file(struct cifsFileInfo *cifsFile) |
464 | { | 513 | { |
465 | int rc = 0; | 514 | int rc = 0; |
466 | 515 | ||
467 | /* BB list all locks open on this file and relock */ | 516 | /* BB list all locks open on this file and relock */ |
468 | 517 | ||
469 | return rc; | 518 | return rc; |
470 | } | 519 | } |
471 | 520 | ||
472 | static int cifs_reopen_file(struct cifsFileInfo *pCifsFile, bool can_flush) | 521 | static int |
522 | cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) | ||
473 | { | 523 | { |
474 | int rc = -EACCES; | 524 | int rc = -EACCES; |
475 | unsigned int xid; | 525 | unsigned int xid; |
476 | __u32 oplock; | 526 | __u32 oplock; |
477 | struct cifs_sb_info *cifs_sb; | 527 | struct cifs_sb_info *cifs_sb; |
478 | struct cifs_tcon *tcon; | 528 | struct cifs_tcon *tcon; |
479 | struct cifsInodeInfo *pCifsInode; | 529 | struct TCP_Server_Info *server; |
530 | struct cifsInodeInfo *cinode; | ||
480 | struct inode *inode; | 531 | struct inode *inode; |
481 | char *full_path = NULL; | 532 | char *full_path = NULL; |
482 | int desiredAccess; | 533 | int desired_access; |
483 | int disposition = FILE_OPEN; | 534 | int disposition = FILE_OPEN; |
484 | int create_options = CREATE_NOT_DIR; | 535 | int create_options = CREATE_NOT_DIR; |
485 | __u16 netfid; | 536 | struct cifs_fid fid; |
486 | 537 | ||
487 | xid = get_xid(); | 538 | xid = get_xid(); |
488 | mutex_lock(&pCifsFile->fh_mutex); | 539 | mutex_lock(&cfile->fh_mutex); |
489 | if (!pCifsFile->invalidHandle) { | 540 | if (!cfile->invalidHandle) { |
490 | mutex_unlock(&pCifsFile->fh_mutex); | 541 | mutex_unlock(&cfile->fh_mutex); |
491 | rc = 0; | 542 | rc = 0; |
492 | free_xid(xid); | 543 | free_xid(xid); |
493 | return rc; | 544 | return rc; |
494 | } | 545 | } |
495 | 546 | ||
496 | inode = pCifsFile->dentry->d_inode; | 547 | inode = cfile->dentry->d_inode; |
497 | cifs_sb = CIFS_SB(inode->i_sb); | 548 | cifs_sb = CIFS_SB(inode->i_sb); |
498 | tcon = tlink_tcon(pCifsFile->tlink); | 549 | tcon = tlink_tcon(cfile->tlink); |
550 | server = tcon->ses->server; | ||
499 | 551 | ||
500 | /* can not grab rename sem here because various ops, including | 552 | /* |
501 | those that already have the rename sem can end up causing writepage | 553 | * Can not grab rename sem here because various ops, including those |
502 | to get called and if the server was down that means we end up here, | 554 | * that already have the rename sem can end up causing writepage to get |
503 | and we can never tell if the caller already has the rename_sem */ | 555 | * called and if the server was down that means we end up here, and we |
504 | full_path = build_path_from_dentry(pCifsFile->dentry); | 556 | * can never tell if the caller already has the rename_sem. |
557 | */ | ||
558 | full_path = build_path_from_dentry(cfile->dentry); | ||
505 | if (full_path == NULL) { | 559 | if (full_path == NULL) { |
506 | rc = -ENOMEM; | 560 | rc = -ENOMEM; |
507 | mutex_unlock(&pCifsFile->fh_mutex); | 561 | mutex_unlock(&cfile->fh_mutex); |
508 | free_xid(xid); | 562 | free_xid(xid); |
509 | return rc; | 563 | return rc; |
510 | } | 564 | } |
511 | 565 | ||
512 | cFYI(1, "inode = 0x%p file flags 0x%x for %s", | 566 | cFYI(1, "inode = 0x%p file flags 0x%x for %s", inode, cfile->f_flags, |
513 | inode, pCifsFile->f_flags, full_path); | 567 | full_path); |
514 | 568 | ||
515 | if (tcon->ses->server->oplocks) | 569 | if (tcon->ses->server->oplocks) |
516 | oplock = REQ_OPLOCK; | 570 | oplock = REQ_OPLOCK; |
@@ -524,69 +578,72 @@ static int cifs_reopen_file(struct cifsFileInfo *pCifsFile, bool can_flush) | |||
524 | * O_CREAT, O_EXCL and O_TRUNC already had their effect on the | 578 | * O_CREAT, O_EXCL and O_TRUNC already had their effect on the |
525 | * original open. Must mask them off for a reopen. | 579 | * original open. Must mask them off for a reopen. |
526 | */ | 580 | */ |
527 | unsigned int oflags = pCifsFile->f_flags & | 581 | unsigned int oflags = cfile->f_flags & |
528 | ~(O_CREAT | O_EXCL | O_TRUNC); | 582 | ~(O_CREAT | O_EXCL | O_TRUNC); |
529 | 583 | ||
530 | rc = cifs_posix_open(full_path, NULL, inode->i_sb, | 584 | rc = cifs_posix_open(full_path, NULL, inode->i_sb, |
531 | cifs_sb->mnt_file_mode /* ignored */, | 585 | cifs_sb->mnt_file_mode /* ignored */, |
532 | oflags, &oplock, &netfid, xid); | 586 | oflags, &oplock, &fid.netfid, xid); |
533 | if (rc == 0) { | 587 | if (rc == 0) { |
534 | cFYI(1, "posix reopen succeeded"); | 588 | cFYI(1, "posix reopen succeeded"); |
535 | goto reopen_success; | 589 | goto reopen_success; |
536 | } | 590 | } |
537 | /* fallthrough to retry open the old way on errors, especially | 591 | /* |
538 | in the reconnect path it is important to retry hard */ | 592 | * fallthrough to retry open the old way on errors, especially |
593 | * in the reconnect path it is important to retry hard | ||
594 | */ | ||
539 | } | 595 | } |
540 | 596 | ||
541 | desiredAccess = cifs_convert_flags(pCifsFile->f_flags); | 597 | desired_access = cifs_convert_flags(cfile->f_flags); |
542 | 598 | ||
543 | if (backup_cred(cifs_sb)) | 599 | if (backup_cred(cifs_sb)) |
544 | create_options |= CREATE_OPEN_BACKUP_INTENT; | 600 | create_options |= CREATE_OPEN_BACKUP_INTENT; |
545 | 601 | ||
546 | /* Can not refresh inode by passing in file_info buf to be returned | 602 | if (server->ops->get_lease_key) |
547 | by SMBOpen and then calling get_inode_info with returned buf | 603 | server->ops->get_lease_key(inode, &fid); |
548 | since file might have write behind data that needs to be flushed | ||
549 | and server version of file size can be stale. If we knew for sure | ||
550 | that inode was not dirty locally we could do this */ | ||
551 | 604 | ||
552 | rc = CIFSSMBOpen(xid, tcon, full_path, disposition, desiredAccess, | 605 | /* |
553 | create_options, &netfid, &oplock, NULL, | 606 | * Can not refresh inode by passing in file_info buf to be returned by |
554 | cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & | 607 | * CIFSSMBOpen and then calling get_inode_info with returned buf since |
555 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 608 | * file might have write behind data that needs to be flushed and server |
609 | * version of file size can be stale. If we knew for sure that inode was | ||
610 | * not dirty locally we could do this. | ||
611 | */ | ||
612 | rc = server->ops->open(xid, tcon, full_path, disposition, | ||
613 | desired_access, create_options, &fid, &oplock, | ||
614 | NULL, cifs_sb); | ||
556 | if (rc) { | 615 | if (rc) { |
557 | mutex_unlock(&pCifsFile->fh_mutex); | 616 | mutex_unlock(&cfile->fh_mutex); |
558 | cFYI(1, "cifs_open returned 0x%x", rc); | 617 | cFYI(1, "cifs_reopen returned 0x%x", rc); |
559 | cFYI(1, "oplock: %d", oplock); | 618 | cFYI(1, "oplock: %d", oplock); |
560 | goto reopen_error_exit; | 619 | goto reopen_error_exit; |
561 | } | 620 | } |
562 | 621 | ||
563 | reopen_success: | 622 | reopen_success: |
564 | pCifsFile->netfid = netfid; | 623 | cfile->invalidHandle = false; |
565 | pCifsFile->invalidHandle = false; | 624 | mutex_unlock(&cfile->fh_mutex); |
566 | mutex_unlock(&pCifsFile->fh_mutex); | 625 | cinode = CIFS_I(inode); |
567 | pCifsInode = CIFS_I(inode); | ||
568 | 626 | ||
569 | if (can_flush) { | 627 | if (can_flush) { |
570 | rc = filemap_write_and_wait(inode->i_mapping); | 628 | rc = filemap_write_and_wait(inode->i_mapping); |
571 | mapping_set_error(inode->i_mapping, rc); | 629 | mapping_set_error(inode->i_mapping, rc); |
572 | 630 | ||
573 | if (tcon->unix_ext) | 631 | if (tcon->unix_ext) |
574 | rc = cifs_get_inode_info_unix(&inode, | 632 | rc = cifs_get_inode_info_unix(&inode, full_path, |
575 | full_path, inode->i_sb, xid); | 633 | inode->i_sb, xid); |
576 | else | 634 | else |
577 | rc = cifs_get_inode_info(&inode, | 635 | rc = cifs_get_inode_info(&inode, full_path, NULL, |
578 | full_path, NULL, inode->i_sb, | 636 | inode->i_sb, xid, NULL); |
579 | xid, NULL); | 637 | } |
580 | } /* else we are writing out data to server already | 638 | /* |
581 | and could deadlock if we tried to flush data, and | 639 | * Else we are writing out data to server already and could deadlock if |
582 | since we do not know if we have data that would | 640 | * we tried to flush data, and since we do not know if we have data that |
583 | invalidate the current end of file on the server | 641 | * would invalidate the current end of file on the server we can not go |
584 | we can not go to the server to get the new inod | 642 | * to the server to get the new inode info. |
585 | info */ | 643 | */ |
586 | |||
587 | cifs_set_oplock_level(pCifsInode, oplock); | ||
588 | 644 | ||
589 | cifs_relock_file(pCifsFile); | 645 | server->ops->set_fid(cfile, &fid, oplock); |
646 | cifs_relock_file(cfile); | ||
590 | 647 | ||
591 | reopen_error_exit: | 648 | reopen_error_exit: |
592 | kfree(full_path); | 649 | kfree(full_path); |
@@ -609,42 +666,48 @@ int cifs_closedir(struct inode *inode, struct file *file) | |||
609 | { | 666 | { |
610 | int rc = 0; | 667 | int rc = 0; |
611 | unsigned int xid; | 668 | unsigned int xid; |
612 | struct cifsFileInfo *pCFileStruct = file->private_data; | 669 | struct cifsFileInfo *cfile = file->private_data; |
613 | char *ptmp; | 670 | struct cifs_tcon *tcon; |
671 | struct TCP_Server_Info *server; | ||
672 | char *buf; | ||
614 | 673 | ||
615 | cFYI(1, "Closedir inode = 0x%p", inode); | 674 | cFYI(1, "Closedir inode = 0x%p", inode); |
616 | 675 | ||
676 | if (cfile == NULL) | ||
677 | return rc; | ||
678 | |||
617 | xid = get_xid(); | 679 | xid = get_xid(); |
680 | tcon = tlink_tcon(cfile->tlink); | ||
681 | server = tcon->ses->server; | ||
618 | 682 | ||
619 | if (pCFileStruct) { | 683 | cFYI(1, "Freeing private data in close dir"); |
620 | struct cifs_tcon *pTcon = tlink_tcon(pCFileStruct->tlink); | 684 | spin_lock(&cifs_file_list_lock); |
685 | if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) { | ||
686 | cfile->invalidHandle = true; | ||
687 | spin_unlock(&cifs_file_list_lock); | ||
688 | if (server->ops->close_dir) | ||
689 | rc = server->ops->close_dir(xid, tcon, &cfile->fid); | ||
690 | else | ||
691 | rc = -ENOSYS; | ||
692 | cFYI(1, "Closing uncompleted readdir with rc %d", rc); | ||
693 | /* not much we can do if it fails anyway, ignore rc */ | ||
694 | rc = 0; | ||
695 | } else | ||
696 | spin_unlock(&cifs_file_list_lock); | ||
621 | 697 | ||
622 | cFYI(1, "Freeing private data in close dir"); | 698 | buf = cfile->srch_inf.ntwrk_buf_start; |
623 | spin_lock(&cifs_file_list_lock); | 699 | if (buf) { |
624 | if (!pCFileStruct->srch_inf.endOfSearch && | 700 | cFYI(1, "closedir free smb buf in srch struct"); |
625 | !pCFileStruct->invalidHandle) { | 701 | cfile->srch_inf.ntwrk_buf_start = NULL; |
626 | pCFileStruct->invalidHandle = true; | 702 | if (cfile->srch_inf.smallBuf) |
627 | spin_unlock(&cifs_file_list_lock); | 703 | cifs_small_buf_release(buf); |
628 | rc = CIFSFindClose(xid, pTcon, pCFileStruct->netfid); | 704 | else |
629 | cFYI(1, "Closing uncompleted readdir with rc %d", | 705 | cifs_buf_release(buf); |
630 | rc); | ||
631 | /* not much we can do if it fails anyway, ignore rc */ | ||
632 | rc = 0; | ||
633 | } else | ||
634 | spin_unlock(&cifs_file_list_lock); | ||
635 | ptmp = pCFileStruct->srch_inf.ntwrk_buf_start; | ||
636 | if (ptmp) { | ||
637 | cFYI(1, "closedir free smb buf in srch struct"); | ||
638 | pCFileStruct->srch_inf.ntwrk_buf_start = NULL; | ||
639 | if (pCFileStruct->srch_inf.smallBuf) | ||
640 | cifs_small_buf_release(ptmp); | ||
641 | else | ||
642 | cifs_buf_release(ptmp); | ||
643 | } | ||
644 | cifs_put_tlink(pCFileStruct->tlink); | ||
645 | kfree(file->private_data); | ||
646 | file->private_data = NULL; | ||
647 | } | 706 | } |
707 | |||
708 | cifs_put_tlink(cfile->tlink); | ||
709 | kfree(file->private_data); | ||
710 | file->private_data = NULL; | ||
648 | /* BB can we lock the filestruct while this is going on? */ | 711 | /* BB can we lock the filestruct while this is going on? */ |
649 | free_xid(xid); | 712 | free_xid(xid); |
650 | return rc; | 713 | return rc; |
@@ -666,7 +729,7 @@ cifs_lock_init(__u64 offset, __u64 length, __u8 type) | |||
666 | return lock; | 729 | return lock; |
667 | } | 730 | } |
668 | 731 | ||
669 | static void | 732 | void |
670 | cifs_del_lock_waiters(struct cifsLockInfo *lock) | 733 | cifs_del_lock_waiters(struct cifsLockInfo *lock) |
671 | { | 734 | { |
672 | struct cifsLockInfo *li, *tmp; | 735 | struct cifsLockInfo *li, *tmp; |
@@ -677,45 +740,47 @@ cifs_del_lock_waiters(struct cifsLockInfo *lock) | |||
677 | } | 740 | } |
678 | 741 | ||
679 | static bool | 742 | static bool |
680 | cifs_find_fid_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, | 743 | cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset, |
681 | __u64 length, __u8 type, struct cifsFileInfo *cur, | 744 | __u64 length, __u8 type, struct cifsFileInfo *cfile, |
682 | struct cifsLockInfo **conf_lock) | 745 | struct cifsLockInfo **conf_lock, bool rw_check) |
683 | { | 746 | { |
684 | struct cifsLockInfo *li; | 747 | struct cifsLockInfo *li; |
748 | struct cifsFileInfo *cur_cfile = fdlocks->cfile; | ||
685 | struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; | 749 | struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; |
686 | 750 | ||
687 | list_for_each_entry(li, &cfile->llist, llist) { | 751 | list_for_each_entry(li, &fdlocks->locks, llist) { |
688 | if (offset + length <= li->offset || | 752 | if (offset + length <= li->offset || |
689 | offset >= li->offset + li->length) | 753 | offset >= li->offset + li->length) |
690 | continue; | 754 | continue; |
691 | else if ((type & server->vals->shared_lock_type) && | 755 | if (rw_check && server->ops->compare_fids(cfile, cur_cfile) && |
692 | ((server->ops->compare_fids(cur, cfile) && | 756 | current->tgid == li->pid) |
693 | current->tgid == li->pid) || type == li->type)) | ||
694 | continue; | 757 | continue; |
695 | else { | 758 | if ((type & server->vals->shared_lock_type) && |
759 | ((server->ops->compare_fids(cfile, cur_cfile) && | ||
760 | current->tgid == li->pid) || type == li->type)) | ||
761 | continue; | ||
762 | if (conf_lock) | ||
696 | *conf_lock = li; | 763 | *conf_lock = li; |
697 | return true; | 764 | return true; |
698 | } | ||
699 | } | 765 | } |
700 | return false; | 766 | return false; |
701 | } | 767 | } |
702 | 768 | ||
703 | static bool | 769 | bool |
704 | cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length, | 770 | cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length, |
705 | __u8 type, struct cifsLockInfo **conf_lock) | 771 | __u8 type, struct cifsLockInfo **conf_lock, |
772 | bool rw_check) | ||
706 | { | 773 | { |
707 | bool rc = false; | 774 | bool rc = false; |
708 | struct cifsFileInfo *fid, *tmp; | 775 | struct cifs_fid_locks *cur; |
709 | struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); | 776 | struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); |
710 | 777 | ||
711 | spin_lock(&cifs_file_list_lock); | 778 | list_for_each_entry(cur, &cinode->llist, llist) { |
712 | list_for_each_entry_safe(fid, tmp, &cinode->openFileList, flist) { | 779 | rc = cifs_find_fid_lock_conflict(cur, offset, length, type, |
713 | rc = cifs_find_fid_lock_conflict(fid, offset, length, type, | 780 | cfile, conf_lock, rw_check); |
714 | cfile, conf_lock); | ||
715 | if (rc) | 781 | if (rc) |
716 | break; | 782 | break; |
717 | } | 783 | } |
718 | spin_unlock(&cifs_file_list_lock); | ||
719 | 784 | ||
720 | return rc; | 785 | return rc; |
721 | } | 786 | } |
@@ -737,10 +802,10 @@ cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length, | |||
737 | struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; | 802 | struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; |
738 | bool exist; | 803 | bool exist; |
739 | 804 | ||
740 | mutex_lock(&cinode->lock_mutex); | 805 | down_read(&cinode->lock_sem); |
741 | 806 | ||
742 | exist = cifs_find_lock_conflict(cfile, offset, length, type, | 807 | exist = cifs_find_lock_conflict(cfile, offset, length, type, |
743 | &conf_lock); | 808 | &conf_lock, false); |
744 | if (exist) { | 809 | if (exist) { |
745 | flock->fl_start = conf_lock->offset; | 810 | flock->fl_start = conf_lock->offset; |
746 | flock->fl_end = conf_lock->offset + conf_lock->length - 1; | 811 | flock->fl_end = conf_lock->offset + conf_lock->length - 1; |
@@ -754,7 +819,7 @@ cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length, | |||
754 | else | 819 | else |
755 | flock->fl_type = F_UNLCK; | 820 | flock->fl_type = F_UNLCK; |
756 | 821 | ||
757 | mutex_unlock(&cinode->lock_mutex); | 822 | up_read(&cinode->lock_sem); |
758 | return rc; | 823 | return rc; |
759 | } | 824 | } |
760 | 825 | ||
@@ -762,9 +827,9 @@ static void | |||
762 | cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock) | 827 | cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock) |
763 | { | 828 | { |
764 | struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); | 829 | struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); |
765 | mutex_lock(&cinode->lock_mutex); | 830 | down_write(&cinode->lock_sem); |
766 | list_add_tail(&lock->llist, &cfile->llist); | 831 | list_add_tail(&lock->llist, &cfile->llist->locks); |
767 | mutex_unlock(&cinode->lock_mutex); | 832 | up_write(&cinode->lock_sem); |
768 | } | 833 | } |
769 | 834 | ||
770 | /* | 835 | /* |
@@ -784,13 +849,13 @@ cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock, | |||
784 | 849 | ||
785 | try_again: | 850 | try_again: |
786 | exist = false; | 851 | exist = false; |
787 | mutex_lock(&cinode->lock_mutex); | 852 | down_write(&cinode->lock_sem); |
788 | 853 | ||
789 | exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length, | 854 | exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length, |
790 | lock->type, &conf_lock); | 855 | lock->type, &conf_lock, false); |
791 | if (!exist && cinode->can_cache_brlcks) { | 856 | if (!exist && cinode->can_cache_brlcks) { |
792 | list_add_tail(&lock->llist, &cfile->llist); | 857 | list_add_tail(&lock->llist, &cfile->llist->locks); |
793 | mutex_unlock(&cinode->lock_mutex); | 858 | up_write(&cinode->lock_sem); |
794 | return rc; | 859 | return rc; |
795 | } | 860 | } |
796 | 861 | ||
@@ -800,17 +865,17 @@ try_again: | |||
800 | rc = -EACCES; | 865 | rc = -EACCES; |
801 | else { | 866 | else { |
802 | list_add_tail(&lock->blist, &conf_lock->blist); | 867 | list_add_tail(&lock->blist, &conf_lock->blist); |
803 | mutex_unlock(&cinode->lock_mutex); | 868 | up_write(&cinode->lock_sem); |
804 | rc = wait_event_interruptible(lock->block_q, | 869 | rc = wait_event_interruptible(lock->block_q, |
805 | (lock->blist.prev == &lock->blist) && | 870 | (lock->blist.prev == &lock->blist) && |
806 | (lock->blist.next == &lock->blist)); | 871 | (lock->blist.next == &lock->blist)); |
807 | if (!rc) | 872 | if (!rc) |
808 | goto try_again; | 873 | goto try_again; |
809 | mutex_lock(&cinode->lock_mutex); | 874 | down_write(&cinode->lock_sem); |
810 | list_del_init(&lock->blist); | 875 | list_del_init(&lock->blist); |
811 | } | 876 | } |
812 | 877 | ||
813 | mutex_unlock(&cinode->lock_mutex); | 878 | up_write(&cinode->lock_sem); |
814 | return rc; | 879 | return rc; |
815 | } | 880 | } |
816 | 881 | ||
@@ -831,7 +896,7 @@ cifs_posix_lock_test(struct file *file, struct file_lock *flock) | |||
831 | if ((flock->fl_flags & FL_POSIX) == 0) | 896 | if ((flock->fl_flags & FL_POSIX) == 0) |
832 | return 1; | 897 | return 1; |
833 | 898 | ||
834 | mutex_lock(&cinode->lock_mutex); | 899 | down_read(&cinode->lock_sem); |
835 | posix_test_lock(file, flock); | 900 | posix_test_lock(file, flock); |
836 | 901 | ||
837 | if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) { | 902 | if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) { |
@@ -839,7 +904,7 @@ cifs_posix_lock_test(struct file *file, struct file_lock *flock) | |||
839 | rc = 1; | 904 | rc = 1; |
840 | } | 905 | } |
841 | 906 | ||
842 | mutex_unlock(&cinode->lock_mutex); | 907 | up_read(&cinode->lock_sem); |
843 | return rc; | 908 | return rc; |
844 | } | 909 | } |
845 | 910 | ||
@@ -859,14 +924,14 @@ cifs_posix_lock_set(struct file *file, struct file_lock *flock) | |||
859 | return rc; | 924 | return rc; |
860 | 925 | ||
861 | try_again: | 926 | try_again: |
862 | mutex_lock(&cinode->lock_mutex); | 927 | down_write(&cinode->lock_sem); |
863 | if (!cinode->can_cache_brlcks) { | 928 | if (!cinode->can_cache_brlcks) { |
864 | mutex_unlock(&cinode->lock_mutex); | 929 | up_write(&cinode->lock_sem); |
865 | return rc; | 930 | return rc; |
866 | } | 931 | } |
867 | 932 | ||
868 | rc = posix_lock_file(file, flock, NULL); | 933 | rc = posix_lock_file(file, flock, NULL); |
869 | mutex_unlock(&cinode->lock_mutex); | 934 | up_write(&cinode->lock_sem); |
870 | if (rc == FILE_LOCK_DEFERRED) { | 935 | if (rc == FILE_LOCK_DEFERRED) { |
871 | rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next); | 936 | rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next); |
872 | if (!rc) | 937 | if (!rc) |
@@ -876,7 +941,7 @@ try_again: | |||
876 | return rc; | 941 | return rc; |
877 | } | 942 | } |
878 | 943 | ||
879 | static int | 944 | int |
880 | cifs_push_mandatory_locks(struct cifsFileInfo *cfile) | 945 | cifs_push_mandatory_locks(struct cifsFileInfo *cfile) |
881 | { | 946 | { |
882 | unsigned int xid; | 947 | unsigned int xid; |
@@ -893,9 +958,10 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile) | |||
893 | xid = get_xid(); | 958 | xid = get_xid(); |
894 | tcon = tlink_tcon(cfile->tlink); | 959 | tcon = tlink_tcon(cfile->tlink); |
895 | 960 | ||
896 | mutex_lock(&cinode->lock_mutex); | 961 | /* we are going to update can_cache_brlcks here - need a write access */ |
962 | down_write(&cinode->lock_sem); | ||
897 | if (!cinode->can_cache_brlcks) { | 963 | if (!cinode->can_cache_brlcks) { |
898 | mutex_unlock(&cinode->lock_mutex); | 964 | up_write(&cinode->lock_sem); |
899 | free_xid(xid); | 965 | free_xid(xid); |
900 | return rc; | 966 | return rc; |
901 | } | 967 | } |
@@ -906,7 +972,7 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile) | |||
906 | */ | 972 | */ |
907 | max_buf = tcon->ses->server->maxBuf; | 973 | max_buf = tcon->ses->server->maxBuf; |
908 | if (!max_buf) { | 974 | if (!max_buf) { |
909 | mutex_unlock(&cinode->lock_mutex); | 975 | up_write(&cinode->lock_sem); |
910 | free_xid(xid); | 976 | free_xid(xid); |
911 | return -EINVAL; | 977 | return -EINVAL; |
912 | } | 978 | } |
@@ -915,15 +981,15 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile) | |||
915 | sizeof(LOCKING_ANDX_RANGE); | 981 | sizeof(LOCKING_ANDX_RANGE); |
916 | buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); | 982 | buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); |
917 | if (!buf) { | 983 | if (!buf) { |
918 | mutex_unlock(&cinode->lock_mutex); | 984 | up_write(&cinode->lock_sem); |
919 | free_xid(xid); | 985 | free_xid(xid); |
920 | return rc; | 986 | return -ENOMEM; |
921 | } | 987 | } |
922 | 988 | ||
923 | for (i = 0; i < 2; i++) { | 989 | for (i = 0; i < 2; i++) { |
924 | cur = buf; | 990 | cur = buf; |
925 | num = 0; | 991 | num = 0; |
926 | list_for_each_entry_safe(li, tmp, &cfile->llist, llist) { | 992 | list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) { |
927 | if (li->type != types[i]) | 993 | if (li->type != types[i]) |
928 | continue; | 994 | continue; |
929 | cur->Pid = cpu_to_le16(li->pid); | 995 | cur->Pid = cpu_to_le16(li->pid); |
@@ -932,7 +998,8 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile) | |||
932 | cur->OffsetLow = cpu_to_le32((u32)li->offset); | 998 | cur->OffsetLow = cpu_to_le32((u32)li->offset); |
933 | cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32)); | 999 | cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32)); |
934 | if (++num == max_num) { | 1000 | if (++num == max_num) { |
935 | stored_rc = cifs_lockv(xid, tcon, cfile->netfid, | 1001 | stored_rc = cifs_lockv(xid, tcon, |
1002 | cfile->fid.netfid, | ||
936 | (__u8)li->type, 0, num, | 1003 | (__u8)li->type, 0, num, |
937 | buf); | 1004 | buf); |
938 | if (stored_rc) | 1005 | if (stored_rc) |
@@ -944,7 +1011,7 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile) | |||
944 | } | 1011 | } |
945 | 1012 | ||
946 | if (num) { | 1013 | if (num) { |
947 | stored_rc = cifs_lockv(xid, tcon, cfile->netfid, | 1014 | stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid, |
948 | (__u8)types[i], 0, num, buf); | 1015 | (__u8)types[i], 0, num, buf); |
949 | if (stored_rc) | 1016 | if (stored_rc) |
950 | rc = stored_rc; | 1017 | rc = stored_rc; |
@@ -952,7 +1019,7 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile) | |||
952 | } | 1019 | } |
953 | 1020 | ||
954 | cinode->can_cache_brlcks = false; | 1021 | cinode->can_cache_brlcks = false; |
955 | mutex_unlock(&cinode->lock_mutex); | 1022 | up_write(&cinode->lock_sem); |
956 | 1023 | ||
957 | kfree(buf); | 1024 | kfree(buf); |
958 | free_xid(xid); | 1025 | free_xid(xid); |
@@ -987,9 +1054,10 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) | |||
987 | 1054 | ||
988 | xid = get_xid(); | 1055 | xid = get_xid(); |
989 | 1056 | ||
990 | mutex_lock(&cinode->lock_mutex); | 1057 | /* we are going to update can_cache_brlcks here - need a write access */ |
1058 | down_write(&cinode->lock_sem); | ||
991 | if (!cinode->can_cache_brlcks) { | 1059 | if (!cinode->can_cache_brlcks) { |
992 | mutex_unlock(&cinode->lock_mutex); | 1060 | up_write(&cinode->lock_sem); |
993 | free_xid(xid); | 1061 | free_xid(xid); |
994 | return rc; | 1062 | return rc; |
995 | } | 1063 | } |
@@ -1005,7 +1073,7 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) | |||
1005 | 1073 | ||
1006 | /* | 1074 | /* |
1007 | * Allocating count locks is enough because no FL_POSIX locks can be | 1075 | * Allocating count locks is enough because no FL_POSIX locks can be |
1008 | * added to the list while we are holding cinode->lock_mutex that | 1076 | * added to the list while we are holding cinode->lock_sem that |
1009 | * protects locking operations of this inode. | 1077 | * protects locking operations of this inode. |
1010 | */ | 1078 | */ |
1011 | for (; i < count; i++) { | 1079 | for (; i < count; i++) { |
@@ -1038,7 +1106,7 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) | |||
1038 | type = CIFS_WRLCK; | 1106 | type = CIFS_WRLCK; |
1039 | lck = list_entry(el, struct lock_to_push, llist); | 1107 | lck = list_entry(el, struct lock_to_push, llist); |
1040 | lck->pid = flock->fl_pid; | 1108 | lck->pid = flock->fl_pid; |
1041 | lck->netfid = cfile->netfid; | 1109 | lck->netfid = cfile->fid.netfid; |
1042 | lck->length = length; | 1110 | lck->length = length; |
1043 | lck->type = type; | 1111 | lck->type = type; |
1044 | lck->offset = flock->fl_start; | 1112 | lck->offset = flock->fl_start; |
@@ -1060,7 +1128,7 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) | |||
1060 | 1128 | ||
1061 | out: | 1129 | out: |
1062 | cinode->can_cache_brlcks = false; | 1130 | cinode->can_cache_brlcks = false; |
1063 | mutex_unlock(&cinode->lock_mutex); | 1131 | up_write(&cinode->lock_sem); |
1064 | 1132 | ||
1065 | free_xid(xid); | 1133 | free_xid(xid); |
1066 | return rc; | 1134 | return rc; |
@@ -1083,7 +1151,7 @@ cifs_push_locks(struct cifsFileInfo *cfile) | |||
1083 | ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) | 1151 | ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) |
1084 | return cifs_push_posix_locks(cfile); | 1152 | return cifs_push_posix_locks(cfile); |
1085 | 1153 | ||
1086 | return cifs_push_mandatory_locks(cfile); | 1154 | return tcon->ses->server->ops->push_mand_locks(cfile); |
1087 | } | 1155 | } |
1088 | 1156 | ||
1089 | static void | 1157 | static void |
@@ -1104,7 +1172,8 @@ cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock, | |||
1104 | if (flock->fl_flags & FL_LEASE) | 1172 | if (flock->fl_flags & FL_LEASE) |
1105 | cFYI(1, "Lease on file - not implemented yet"); | 1173 | cFYI(1, "Lease on file - not implemented yet"); |
1106 | if (flock->fl_flags & | 1174 | if (flock->fl_flags & |
1107 | (~(FL_POSIX | FL_FLOCK | FL_SLEEP | FL_ACCESS | FL_LEASE))) | 1175 | (~(FL_POSIX | FL_FLOCK | FL_SLEEP | |
1176 | FL_ACCESS | FL_LEASE | FL_CLOSE))) | ||
1108 | cFYI(1, "Unknown lock flags 0x%x", flock->fl_flags); | 1177 | cFYI(1, "Unknown lock flags 0x%x", flock->fl_flags); |
1109 | 1178 | ||
1110 | *type = server->vals->large_lock_type; | 1179 | *type = server->vals->large_lock_type; |
@@ -1134,15 +1203,6 @@ cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock, | |||
1134 | } | 1203 | } |
1135 | 1204 | ||
1136 | static int | 1205 | static int |
1137 | cifs_mandatory_lock(unsigned int xid, struct cifsFileInfo *cfile, __u64 offset, | ||
1138 | __u64 length, __u32 type, int lock, int unlock, bool wait) | ||
1139 | { | ||
1140 | return CIFSSMBLock(xid, tlink_tcon(cfile->tlink), cfile->netfid, | ||
1141 | current->tgid, length, offset, unlock, lock, | ||
1142 | (__u8)type, wait, 0); | ||
1143 | } | ||
1144 | |||
1145 | static int | ||
1146 | cifs_getlk(struct file *file, struct file_lock *flock, __u32 type, | 1206 | cifs_getlk(struct file *file, struct file_lock *flock, __u32 type, |
1147 | bool wait_flag, bool posix_lck, unsigned int xid) | 1207 | bool wait_flag, bool posix_lck, unsigned int xid) |
1148 | { | 1208 | { |
@@ -1151,7 +1211,7 @@ cifs_getlk(struct file *file, struct file_lock *flock, __u32 type, | |||
1151 | struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; | 1211 | struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; |
1152 | struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); | 1212 | struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); |
1153 | struct TCP_Server_Info *server = tcon->ses->server; | 1213 | struct TCP_Server_Info *server = tcon->ses->server; |
1154 | __u16 netfid = cfile->netfid; | 1214 | __u16 netfid = cfile->fid.netfid; |
1155 | 1215 | ||
1156 | if (posix_lck) { | 1216 | if (posix_lck) { |
1157 | int posix_lock_type; | 1217 | int posix_lock_type; |
@@ -1175,11 +1235,11 @@ cifs_getlk(struct file *file, struct file_lock *flock, __u32 type, | |||
1175 | return rc; | 1235 | return rc; |
1176 | 1236 | ||
1177 | /* BB we could chain these into one lock request BB */ | 1237 | /* BB we could chain these into one lock request BB */ |
1178 | rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length, type, | 1238 | rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type, |
1179 | 1, 0, false); | 1239 | 1, 0, false); |
1180 | if (rc == 0) { | 1240 | if (rc == 0) { |
1181 | rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length, | 1241 | rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, |
1182 | type, 0, 1, false); | 1242 | type, 0, 1, false); |
1183 | flock->fl_type = F_UNLCK; | 1243 | flock->fl_type = F_UNLCK; |
1184 | if (rc != 0) | 1244 | if (rc != 0) |
1185 | cERROR(1, "Error unlocking previously locked " | 1245 | cERROR(1, "Error unlocking previously locked " |
@@ -1192,13 +1252,14 @@ cifs_getlk(struct file *file, struct file_lock *flock, __u32 type, | |||
1192 | return 0; | 1252 | return 0; |
1193 | } | 1253 | } |
1194 | 1254 | ||
1195 | rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length, | 1255 | type &= ~server->vals->exclusive_lock_type; |
1196 | type | server->vals->shared_lock_type, 1, 0, | 1256 | |
1197 | false); | 1257 | rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, |
1258 | type | server->vals->shared_lock_type, | ||
1259 | 1, 0, false); | ||
1198 | if (rc == 0) { | 1260 | if (rc == 0) { |
1199 | rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length, | 1261 | rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, |
1200 | type | server->vals->shared_lock_type, | 1262 | type | server->vals->shared_lock_type, 0, 1, false); |
1201 | 0, 1, false); | ||
1202 | flock->fl_type = F_RDLCK; | 1263 | flock->fl_type = F_RDLCK; |
1203 | if (rc != 0) | 1264 | if (rc != 0) |
1204 | cERROR(1, "Error unlocking previously locked " | 1265 | cERROR(1, "Error unlocking previously locked " |
@@ -1209,7 +1270,7 @@ cifs_getlk(struct file *file, struct file_lock *flock, __u32 type, | |||
1209 | return 0; | 1270 | return 0; |
1210 | } | 1271 | } |
1211 | 1272 | ||
1212 | static void | 1273 | void |
1213 | cifs_move_llist(struct list_head *source, struct list_head *dest) | 1274 | cifs_move_llist(struct list_head *source, struct list_head *dest) |
1214 | { | 1275 | { |
1215 | struct list_head *li, *tmp; | 1276 | struct list_head *li, *tmp; |
@@ -1217,7 +1278,7 @@ cifs_move_llist(struct list_head *source, struct list_head *dest) | |||
1217 | list_move(li, dest); | 1278 | list_move(li, dest); |
1218 | } | 1279 | } |
1219 | 1280 | ||
1220 | static void | 1281 | void |
1221 | cifs_free_llist(struct list_head *llist) | 1282 | cifs_free_llist(struct list_head *llist) |
1222 | { | 1283 | { |
1223 | struct cifsLockInfo *li, *tmp; | 1284 | struct cifsLockInfo *li, *tmp; |
@@ -1228,7 +1289,7 @@ cifs_free_llist(struct list_head *llist) | |||
1228 | } | 1289 | } |
1229 | } | 1290 | } |
1230 | 1291 | ||
1231 | static int | 1292 | int |
1232 | cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, | 1293 | cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, |
1233 | unsigned int xid) | 1294 | unsigned int xid) |
1234 | { | 1295 | { |
@@ -1260,11 +1321,11 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, | |||
1260 | if (!buf) | 1321 | if (!buf) |
1261 | return -ENOMEM; | 1322 | return -ENOMEM; |
1262 | 1323 | ||
1263 | mutex_lock(&cinode->lock_mutex); | 1324 | down_write(&cinode->lock_sem); |
1264 | for (i = 0; i < 2; i++) { | 1325 | for (i = 0; i < 2; i++) { |
1265 | cur = buf; | 1326 | cur = buf; |
1266 | num = 0; | 1327 | num = 0; |
1267 | list_for_each_entry_safe(li, tmp, &cfile->llist, llist) { | 1328 | list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) { |
1268 | if (flock->fl_start > li->offset || | 1329 | if (flock->fl_start > li->offset || |
1269 | (flock->fl_start + length) < | 1330 | (flock->fl_start + length) < |
1270 | (li->offset + li->length)) | 1331 | (li->offset + li->length)) |
@@ -1295,7 +1356,8 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, | |||
1295 | */ | 1356 | */ |
1296 | list_move(&li->llist, &tmp_llist); | 1357 | list_move(&li->llist, &tmp_llist); |
1297 | if (++num == max_num) { | 1358 | if (++num == max_num) { |
1298 | stored_rc = cifs_lockv(xid, tcon, cfile->netfid, | 1359 | stored_rc = cifs_lockv(xid, tcon, |
1360 | cfile->fid.netfid, | ||
1299 | li->type, num, 0, buf); | 1361 | li->type, num, 0, buf); |
1300 | if (stored_rc) { | 1362 | if (stored_rc) { |
1301 | /* | 1363 | /* |
@@ -1304,7 +1366,7 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, | |||
1304 | * list to the head of the file's list. | 1366 | * list to the head of the file's list. |
1305 | */ | 1367 | */ |
1306 | cifs_move_llist(&tmp_llist, | 1368 | cifs_move_llist(&tmp_llist, |
1307 | &cfile->llist); | 1369 | &cfile->llist->locks); |
1308 | rc = stored_rc; | 1370 | rc = stored_rc; |
1309 | } else | 1371 | } else |
1310 | /* | 1372 | /* |
@@ -1318,23 +1380,24 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, | |||
1318 | cur++; | 1380 | cur++; |
1319 | } | 1381 | } |
1320 | if (num) { | 1382 | if (num) { |
1321 | stored_rc = cifs_lockv(xid, tcon, cfile->netfid, | 1383 | stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid, |
1322 | types[i], num, 0, buf); | 1384 | types[i], num, 0, buf); |
1323 | if (stored_rc) { | 1385 | if (stored_rc) { |
1324 | cifs_move_llist(&tmp_llist, &cfile->llist); | 1386 | cifs_move_llist(&tmp_llist, |
1387 | &cfile->llist->locks); | ||
1325 | rc = stored_rc; | 1388 | rc = stored_rc; |
1326 | } else | 1389 | } else |
1327 | cifs_free_llist(&tmp_llist); | 1390 | cifs_free_llist(&tmp_llist); |
1328 | } | 1391 | } |
1329 | } | 1392 | } |
1330 | 1393 | ||
1331 | mutex_unlock(&cinode->lock_mutex); | 1394 | up_write(&cinode->lock_sem); |
1332 | kfree(buf); | 1395 | kfree(buf); |
1333 | return rc; | 1396 | return rc; |
1334 | } | 1397 | } |
1335 | 1398 | ||
1336 | static int | 1399 | static int |
1337 | cifs_setlk(struct file *file, struct file_lock *flock, __u32 type, | 1400 | cifs_setlk(struct file *file, struct file_lock *flock, __u32 type, |
1338 | bool wait_flag, bool posix_lck, int lock, int unlock, | 1401 | bool wait_flag, bool posix_lck, int lock, int unlock, |
1339 | unsigned int xid) | 1402 | unsigned int xid) |
1340 | { | 1403 | { |
@@ -1343,7 +1406,6 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type, | |||
1343 | struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; | 1406 | struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; |
1344 | struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); | 1407 | struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); |
1345 | struct TCP_Server_Info *server = tcon->ses->server; | 1408 | struct TCP_Server_Info *server = tcon->ses->server; |
1346 | __u16 netfid = cfile->netfid; | ||
1347 | 1409 | ||
1348 | if (posix_lck) { | 1410 | if (posix_lck) { |
1349 | int posix_lock_type; | 1411 | int posix_lock_type; |
@@ -1360,9 +1422,9 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type, | |||
1360 | if (unlock == 1) | 1422 | if (unlock == 1) |
1361 | posix_lock_type = CIFS_UNLCK; | 1423 | posix_lock_type = CIFS_UNLCK; |
1362 | 1424 | ||
1363 | rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid, | 1425 | rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid, |
1364 | flock->fl_start, length, NULL, | 1426 | current->tgid, flock->fl_start, length, |
1365 | posix_lock_type, wait_flag); | 1427 | NULL, posix_lock_type, wait_flag); |
1366 | goto out; | 1428 | goto out; |
1367 | } | 1429 | } |
1368 | 1430 | ||
@@ -1379,8 +1441,8 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type, | |||
1379 | if (rc <= 0) | 1441 | if (rc <= 0) |
1380 | goto out; | 1442 | goto out; |
1381 | 1443 | ||
1382 | rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length, | 1444 | rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, |
1383 | type, 1, 0, wait_flag); | 1445 | type, 1, 0, wait_flag); |
1384 | if (rc) { | 1446 | if (rc) { |
1385 | kfree(lock); | 1447 | kfree(lock); |
1386 | goto out; | 1448 | goto out; |
@@ -1388,7 +1450,7 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type, | |||
1388 | 1450 | ||
1389 | cifs_lock_add(cfile, lock); | 1451 | cifs_lock_add(cfile, lock); |
1390 | } else if (unlock) | 1452 | } else if (unlock) |
1391 | rc = cifs_unlock_range(cfile, flock, xid); | 1453 | rc = server->ops->mand_unlock_range(cfile, flock, xid); |
1392 | 1454 | ||
1393 | out: | 1455 | out: |
1394 | if (flock->fl_flags & FL_POSIX) | 1456 | if (flock->fl_flags & FL_POSIX) |
@@ -1423,7 +1485,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *flock) | |||
1423 | tcon->ses->server); | 1485 | tcon->ses->server); |
1424 | 1486 | ||
1425 | cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); | 1487 | cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); |
1426 | netfid = cfile->netfid; | 1488 | netfid = cfile->fid.netfid; |
1427 | cinode = CIFS_I(file->f_path.dentry->d_inode); | 1489 | cinode = CIFS_I(file->f_path.dentry->d_inode); |
1428 | 1490 | ||
1429 | if (cap_unix(tcon->ses) && | 1491 | if (cap_unix(tcon->ses) && |
@@ -1469,15 +1531,16 @@ cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset, | |||
1469 | cifsi->server_eof = end_of_write; | 1531 | cifsi->server_eof = end_of_write; |
1470 | } | 1532 | } |
1471 | 1533 | ||
1472 | static ssize_t cifs_write(struct cifsFileInfo *open_file, __u32 pid, | 1534 | static ssize_t |
1473 | const char *write_data, size_t write_size, | 1535 | cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data, |
1474 | loff_t *poffset) | 1536 | size_t write_size, loff_t *offset) |
1475 | { | 1537 | { |
1476 | int rc = 0; | 1538 | int rc = 0; |
1477 | unsigned int bytes_written = 0; | 1539 | unsigned int bytes_written = 0; |
1478 | unsigned int total_written; | 1540 | unsigned int total_written; |
1479 | struct cifs_sb_info *cifs_sb; | 1541 | struct cifs_sb_info *cifs_sb; |
1480 | struct cifs_tcon *pTcon; | 1542 | struct cifs_tcon *tcon; |
1543 | struct TCP_Server_Info *server; | ||
1481 | unsigned int xid; | 1544 | unsigned int xid; |
1482 | struct dentry *dentry = open_file->dentry; | 1545 | struct dentry *dentry = open_file->dentry; |
1483 | struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode); | 1546 | struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode); |
@@ -1486,9 +1549,13 @@ static ssize_t cifs_write(struct cifsFileInfo *open_file, __u32 pid, | |||
1486 | cifs_sb = CIFS_SB(dentry->d_sb); | 1549 | cifs_sb = CIFS_SB(dentry->d_sb); |
1487 | 1550 | ||
1488 | cFYI(1, "write %zd bytes to offset %lld of %s", write_size, | 1551 | cFYI(1, "write %zd bytes to offset %lld of %s", write_size, |
1489 | *poffset, dentry->d_name.name); | 1552 | *offset, dentry->d_name.name); |
1490 | 1553 | ||
1491 | pTcon = tlink_tcon(open_file->tlink); | 1554 | tcon = tlink_tcon(open_file->tlink); |
1555 | server = tcon->ses->server; | ||
1556 | |||
1557 | if (!server->ops->sync_write) | ||
1558 | return -ENOSYS; | ||
1492 | 1559 | ||
1493 | xid = get_xid(); | 1560 | xid = get_xid(); |
1494 | 1561 | ||
@@ -1514,13 +1581,12 @@ static ssize_t cifs_write(struct cifsFileInfo *open_file, __u32 pid, | |||
1514 | /* iov[0] is reserved for smb header */ | 1581 | /* iov[0] is reserved for smb header */ |
1515 | iov[1].iov_base = (char *)write_data + total_written; | 1582 | iov[1].iov_base = (char *)write_data + total_written; |
1516 | iov[1].iov_len = len; | 1583 | iov[1].iov_len = len; |
1517 | io_parms.netfid = open_file->netfid; | ||
1518 | io_parms.pid = pid; | 1584 | io_parms.pid = pid; |
1519 | io_parms.tcon = pTcon; | 1585 | io_parms.tcon = tcon; |
1520 | io_parms.offset = *poffset; | 1586 | io_parms.offset = *offset; |
1521 | io_parms.length = len; | 1587 | io_parms.length = len; |
1522 | rc = CIFSSMBWrite2(xid, &io_parms, &bytes_written, iov, | 1588 | rc = server->ops->sync_write(xid, open_file, &io_parms, |
1523 | 1, 0); | 1589 | &bytes_written, iov, 1); |
1524 | } | 1590 | } |
1525 | if (rc || (bytes_written == 0)) { | 1591 | if (rc || (bytes_written == 0)) { |
1526 | if (total_written) | 1592 | if (total_written) |
@@ -1531,18 +1597,18 @@ static ssize_t cifs_write(struct cifsFileInfo *open_file, __u32 pid, | |||
1531 | } | 1597 | } |
1532 | } else { | 1598 | } else { |
1533 | spin_lock(&dentry->d_inode->i_lock); | 1599 | spin_lock(&dentry->d_inode->i_lock); |
1534 | cifs_update_eof(cifsi, *poffset, bytes_written); | 1600 | cifs_update_eof(cifsi, *offset, bytes_written); |
1535 | spin_unlock(&dentry->d_inode->i_lock); | 1601 | spin_unlock(&dentry->d_inode->i_lock); |
1536 | *poffset += bytes_written; | 1602 | *offset += bytes_written; |
1537 | } | 1603 | } |
1538 | } | 1604 | } |
1539 | 1605 | ||
1540 | cifs_stats_bytes_written(pTcon, total_written); | 1606 | cifs_stats_bytes_written(tcon, total_written); |
1541 | 1607 | ||
1542 | if (total_written > 0) { | 1608 | if (total_written > 0) { |
1543 | spin_lock(&dentry->d_inode->i_lock); | 1609 | spin_lock(&dentry->d_inode->i_lock); |
1544 | if (*poffset > dentry->d_inode->i_size) | 1610 | if (*offset > dentry->d_inode->i_size) |
1545 | i_size_write(dentry->d_inode, *poffset); | 1611 | i_size_write(dentry->d_inode, *offset); |
1546 | spin_unlock(&dentry->d_inode->i_lock); | 1612 | spin_unlock(&dentry->d_inode->i_lock); |
1547 | } | 1613 | } |
1548 | mark_inode_dirty_sync(dentry->d_inode); | 1614 | mark_inode_dirty_sync(dentry->d_inode); |
@@ -1718,27 +1784,6 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to) | |||
1718 | return rc; | 1784 | return rc; |
1719 | } | 1785 | } |
1720 | 1786 | ||
1721 | /* | ||
1722 | * Marshal up the iov array, reserving the first one for the header. Also, | ||
1723 | * set wdata->bytes. | ||
1724 | */ | ||
1725 | static void | ||
1726 | cifs_writepages_marshal_iov(struct kvec *iov, struct cifs_writedata *wdata) | ||
1727 | { | ||
1728 | int i; | ||
1729 | struct inode *inode = wdata->cfile->dentry->d_inode; | ||
1730 | loff_t size = i_size_read(inode); | ||
1731 | |||
1732 | /* marshal up the pages into iov array */ | ||
1733 | wdata->bytes = 0; | ||
1734 | for (i = 0; i < wdata->nr_pages; i++) { | ||
1735 | iov[i + 1].iov_len = min(size - page_offset(wdata->pages[i]), | ||
1736 | (loff_t)PAGE_CACHE_SIZE); | ||
1737 | iov[i + 1].iov_base = kmap(wdata->pages[i]); | ||
1738 | wdata->bytes += iov[i + 1].iov_len; | ||
1739 | } | ||
1740 | } | ||
1741 | |||
1742 | static int cifs_writepages(struct address_space *mapping, | 1787 | static int cifs_writepages(struct address_space *mapping, |
1743 | struct writeback_control *wbc) | 1788 | struct writeback_control *wbc) |
1744 | { | 1789 | { |
@@ -1746,8 +1791,10 @@ static int cifs_writepages(struct address_space *mapping, | |||
1746 | bool done = false, scanned = false, range_whole = false; | 1791 | bool done = false, scanned = false, range_whole = false; |
1747 | pgoff_t end, index; | 1792 | pgoff_t end, index; |
1748 | struct cifs_writedata *wdata; | 1793 | struct cifs_writedata *wdata; |
1794 | struct TCP_Server_Info *server; | ||
1749 | struct page *page; | 1795 | struct page *page; |
1750 | int rc = 0; | 1796 | int rc = 0; |
1797 | loff_t isize = i_size_read(mapping->host); | ||
1751 | 1798 | ||
1752 | /* | 1799 | /* |
1753 | * If wsize is smaller than the page cache size, default to writing | 1800 | * If wsize is smaller than the page cache size, default to writing |
@@ -1852,7 +1899,7 @@ retry: | |||
1852 | */ | 1899 | */ |
1853 | set_page_writeback(page); | 1900 | set_page_writeback(page); |
1854 | 1901 | ||
1855 | if (page_offset(page) >= mapping->host->i_size) { | 1902 | if (page_offset(page) >= isize) { |
1856 | done = true; | 1903 | done = true; |
1857 | unlock_page(page); | 1904 | unlock_page(page); |
1858 | end_page_writeback(page); | 1905 | end_page_writeback(page); |
@@ -1883,7 +1930,12 @@ retry: | |||
1883 | wdata->sync_mode = wbc->sync_mode; | 1930 | wdata->sync_mode = wbc->sync_mode; |
1884 | wdata->nr_pages = nr_pages; | 1931 | wdata->nr_pages = nr_pages; |
1885 | wdata->offset = page_offset(wdata->pages[0]); | 1932 | wdata->offset = page_offset(wdata->pages[0]); |
1886 | wdata->marshal_iov = cifs_writepages_marshal_iov; | 1933 | wdata->pagesz = PAGE_CACHE_SIZE; |
1934 | wdata->tailsz = | ||
1935 | min(isize - page_offset(wdata->pages[nr_pages - 1]), | ||
1936 | (loff_t)PAGE_CACHE_SIZE); | ||
1937 | wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) + | ||
1938 | wdata->tailsz; | ||
1887 | 1939 | ||
1888 | do { | 1940 | do { |
1889 | if (wdata->cfile != NULL) | 1941 | if (wdata->cfile != NULL) |
@@ -1896,7 +1948,8 @@ retry: | |||
1896 | break; | 1948 | break; |
1897 | } | 1949 | } |
1898 | wdata->pid = wdata->cfile->pid; | 1950 | wdata->pid = wdata->cfile->pid; |
1899 | rc = cifs_async_writev(wdata); | 1951 | server = tlink_tcon(wdata->cfile->tlink)->ses->server; |
1952 | rc = server->ops->async_writev(wdata); | ||
1900 | } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN); | 1953 | } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN); |
1901 | 1954 | ||
1902 | for (i = 0; i < nr_pages; ++i) | 1955 | for (i = 0; i < nr_pages; ++i) |
@@ -2054,6 +2107,7 @@ int cifs_strict_fsync(struct file *file, loff_t start, loff_t end, | |||
2054 | unsigned int xid; | 2107 | unsigned int xid; |
2055 | int rc = 0; | 2108 | int rc = 0; |
2056 | struct cifs_tcon *tcon; | 2109 | struct cifs_tcon *tcon; |
2110 | struct TCP_Server_Info *server; | ||
2057 | struct cifsFileInfo *smbfile = file->private_data; | 2111 | struct cifsFileInfo *smbfile = file->private_data; |
2058 | struct inode *inode = file->f_path.dentry->d_inode; | 2112 | struct inode *inode = file->f_path.dentry->d_inode; |
2059 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); | 2113 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); |
@@ -2077,8 +2131,13 @@ int cifs_strict_fsync(struct file *file, loff_t start, loff_t end, | |||
2077 | } | 2131 | } |
2078 | 2132 | ||
2079 | tcon = tlink_tcon(smbfile->tlink); | 2133 | tcon = tlink_tcon(smbfile->tlink); |
2080 | if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) | 2134 | if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) { |
2081 | rc = CIFSSMBFlush(xid, tcon, smbfile->netfid); | 2135 | server = tcon->ses->server; |
2136 | if (server->ops->flush) | ||
2137 | rc = server->ops->flush(xid, tcon, &smbfile->fid); | ||
2138 | else | ||
2139 | rc = -ENOSYS; | ||
2140 | } | ||
2082 | 2141 | ||
2083 | free_xid(xid); | 2142 | free_xid(xid); |
2084 | mutex_unlock(&inode->i_mutex); | 2143 | mutex_unlock(&inode->i_mutex); |
@@ -2090,6 +2149,7 @@ int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync) | |||
2090 | unsigned int xid; | 2149 | unsigned int xid; |
2091 | int rc = 0; | 2150 | int rc = 0; |
2092 | struct cifs_tcon *tcon; | 2151 | struct cifs_tcon *tcon; |
2152 | struct TCP_Server_Info *server; | ||
2093 | struct cifsFileInfo *smbfile = file->private_data; | 2153 | struct cifsFileInfo *smbfile = file->private_data; |
2094 | struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); | 2154 | struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); |
2095 | struct inode *inode = file->f_mapping->host; | 2155 | struct inode *inode = file->f_mapping->host; |
@@ -2105,8 +2165,13 @@ int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync) | |||
2105 | file->f_path.dentry->d_name.name, datasync); | 2165 | file->f_path.dentry->d_name.name, datasync); |
2106 | 2166 | ||
2107 | tcon = tlink_tcon(smbfile->tlink); | 2167 | tcon = tlink_tcon(smbfile->tlink); |
2108 | if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) | 2168 | if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) { |
2109 | rc = CIFSSMBFlush(xid, tcon, smbfile->netfid); | 2169 | server = tcon->ses->server; |
2170 | if (server->ops->flush) | ||
2171 | rc = server->ops->flush(xid, tcon, &smbfile->fid); | ||
2172 | else | ||
2173 | rc = -ENOSYS; | ||
2174 | } | ||
2110 | 2175 | ||
2111 | free_xid(xid); | 2176 | free_xid(xid); |
2112 | mutex_unlock(&inode->i_mutex); | 2177 | mutex_unlock(&inode->i_mutex); |
@@ -2172,20 +2237,6 @@ size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len) | |||
2172 | } | 2237 | } |
2173 | 2238 | ||
2174 | static void | 2239 | static void |
2175 | cifs_uncached_marshal_iov(struct kvec *iov, struct cifs_writedata *wdata) | ||
2176 | { | ||
2177 | int i; | ||
2178 | size_t bytes = wdata->bytes; | ||
2179 | |||
2180 | /* marshal up the pages into iov array */ | ||
2181 | for (i = 0; i < wdata->nr_pages; i++) { | ||
2182 | iov[i + 1].iov_len = min_t(size_t, bytes, PAGE_SIZE); | ||
2183 | iov[i + 1].iov_base = kmap(wdata->pages[i]); | ||
2184 | bytes -= iov[i + 1].iov_len; | ||
2185 | } | ||
2186 | } | ||
2187 | |||
2188 | static void | ||
2189 | cifs_uncached_writev_complete(struct work_struct *work) | 2240 | cifs_uncached_writev_complete(struct work_struct *work) |
2190 | { | 2241 | { |
2191 | int i; | 2242 | int i; |
@@ -2215,6 +2266,9 @@ static int | |||
2215 | cifs_uncached_retry_writev(struct cifs_writedata *wdata) | 2266 | cifs_uncached_retry_writev(struct cifs_writedata *wdata) |
2216 | { | 2267 | { |
2217 | int rc; | 2268 | int rc; |
2269 | struct TCP_Server_Info *server; | ||
2270 | |||
2271 | server = tlink_tcon(wdata->cfile->tlink)->ses->server; | ||
2218 | 2272 | ||
2219 | do { | 2273 | do { |
2220 | if (wdata->cfile->invalidHandle) { | 2274 | if (wdata->cfile->invalidHandle) { |
@@ -2222,7 +2276,7 @@ cifs_uncached_retry_writev(struct cifs_writedata *wdata) | |||
2222 | if (rc != 0) | 2276 | if (rc != 0) |
2223 | continue; | 2277 | continue; |
2224 | } | 2278 | } |
2225 | rc = cifs_async_writev(wdata); | 2279 | rc = server->ops->async_writev(wdata); |
2226 | } while (rc == -EAGAIN); | 2280 | } while (rc == -EAGAIN); |
2227 | 2281 | ||
2228 | return rc; | 2282 | return rc; |
@@ -2257,6 +2311,10 @@ cifs_iovec_write(struct file *file, const struct iovec *iov, | |||
2257 | cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); | 2311 | cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); |
2258 | open_file = file->private_data; | 2312 | open_file = file->private_data; |
2259 | tcon = tlink_tcon(open_file->tlink); | 2313 | tcon = tlink_tcon(open_file->tlink); |
2314 | |||
2315 | if (!tcon->ses->server->ops->async_writev) | ||
2316 | return -ENOSYS; | ||
2317 | |||
2260 | offset = *poffset; | 2318 | offset = *poffset; |
2261 | 2319 | ||
2262 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) | 2320 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) |
@@ -2298,7 +2356,8 @@ cifs_iovec_write(struct file *file, const struct iovec *iov, | |||
2298 | wdata->cfile = cifsFileInfo_get(open_file); | 2356 | wdata->cfile = cifsFileInfo_get(open_file); |
2299 | wdata->pid = pid; | 2357 | wdata->pid = pid; |
2300 | wdata->bytes = cur_len; | 2358 | wdata->bytes = cur_len; |
2301 | wdata->marshal_iov = cifs_uncached_marshal_iov; | 2359 | wdata->pagesz = PAGE_SIZE; |
2360 | wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE); | ||
2302 | rc = cifs_uncached_retry_writev(wdata); | 2361 | rc = cifs_uncached_retry_writev(wdata); |
2303 | if (rc) { | 2362 | if (rc) { |
2304 | kref_put(&wdata->refcount, cifs_writedata_release); | 2363 | kref_put(&wdata->refcount, cifs_writedata_release); |
@@ -2376,40 +2435,110 @@ ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov, | |||
2376 | return written; | 2435 | return written; |
2377 | } | 2436 | } |
2378 | 2437 | ||
2379 | ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov, | 2438 | static ssize_t |
2380 | unsigned long nr_segs, loff_t pos) | 2439 | cifs_writev(struct kiocb *iocb, const struct iovec *iov, |
2440 | unsigned long nr_segs, loff_t pos) | ||
2381 | { | 2441 | { |
2382 | struct inode *inode; | 2442 | struct file *file = iocb->ki_filp; |
2443 | struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; | ||
2444 | struct inode *inode = file->f_mapping->host; | ||
2445 | struct cifsInodeInfo *cinode = CIFS_I(inode); | ||
2446 | struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; | ||
2447 | ssize_t rc = -EACCES; | ||
2383 | 2448 | ||
2384 | inode = iocb->ki_filp->f_path.dentry->d_inode; | 2449 | BUG_ON(iocb->ki_pos != pos); |
2385 | 2450 | ||
2386 | if (CIFS_I(inode)->clientCanCacheAll) | 2451 | sb_start_write(inode->i_sb); |
2387 | return generic_file_aio_write(iocb, iov, nr_segs, pos); | ||
2388 | 2452 | ||
2389 | /* | 2453 | /* |
2390 | * In strict cache mode we need to write the data to the server exactly | 2454 | * We need to hold the sem to be sure nobody modifies lock list |
2391 | * from the pos to pos+len-1 rather than flush all affected pages | 2455 | * with a brlock that prevents writing. |
2392 | * because it may cause a error with mandatory locks on these pages but | ||
2393 | * not on the region from pos to ppos+len-1. | ||
2394 | */ | 2456 | */ |
2457 | down_read(&cinode->lock_sem); | ||
2458 | if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs), | ||
2459 | server->vals->exclusive_lock_type, NULL, | ||
2460 | true)) { | ||
2461 | mutex_lock(&inode->i_mutex); | ||
2462 | rc = __generic_file_aio_write(iocb, iov, nr_segs, | ||
2463 | &iocb->ki_pos); | ||
2464 | mutex_unlock(&inode->i_mutex); | ||
2465 | } | ||
2395 | 2466 | ||
2396 | return cifs_user_writev(iocb, iov, nr_segs, pos); | 2467 | if (rc > 0 || rc == -EIOCBQUEUED) { |
2468 | ssize_t err; | ||
2469 | |||
2470 | err = generic_write_sync(file, pos, rc); | ||
2471 | if (err < 0 && rc > 0) | ||
2472 | rc = err; | ||
2473 | } | ||
2474 | |||
2475 | up_read(&cinode->lock_sem); | ||
2476 | sb_end_write(inode->i_sb); | ||
2477 | return rc; | ||
2478 | } | ||
2479 | |||
2480 | ssize_t | ||
2481 | cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov, | ||
2482 | unsigned long nr_segs, loff_t pos) | ||
2483 | { | ||
2484 | struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; | ||
2485 | struct cifsInodeInfo *cinode = CIFS_I(inode); | ||
2486 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); | ||
2487 | struct cifsFileInfo *cfile = (struct cifsFileInfo *) | ||
2488 | iocb->ki_filp->private_data; | ||
2489 | struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); | ||
2490 | |||
2491 | #ifdef CONFIG_CIFS_SMB2 | ||
2492 | /* | ||
2493 | * If we have an oplock for read and want to write a data to the file | ||
2494 | * we need to store it in the page cache and then push it to the server | ||
2495 | * to be sure the next read will get a valid data. | ||
2496 | */ | ||
2497 | if (!cinode->clientCanCacheAll && cinode->clientCanCacheRead) { | ||
2498 | ssize_t written; | ||
2499 | int rc; | ||
2500 | |||
2501 | written = generic_file_aio_write(iocb, iov, nr_segs, pos); | ||
2502 | rc = filemap_fdatawrite(inode->i_mapping); | ||
2503 | if (rc) | ||
2504 | return (ssize_t)rc; | ||
2505 | |||
2506 | return written; | ||
2507 | } | ||
2508 | #endif | ||
2509 | |||
2510 | /* | ||
2511 | * For non-oplocked files in strict cache mode we need to write the data | ||
2512 | * to the server exactly from the pos to pos+len-1 rather than flush all | ||
2513 | * affected pages because it may cause a error with mandatory locks on | ||
2514 | * these pages but not on the region from pos to ppos+len-1. | ||
2515 | */ | ||
2516 | |||
2517 | if (!cinode->clientCanCacheAll) | ||
2518 | return cifs_user_writev(iocb, iov, nr_segs, pos); | ||
2519 | |||
2520 | if (cap_unix(tcon->ses) && | ||
2521 | (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && | ||
2522 | ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) | ||
2523 | return generic_file_aio_write(iocb, iov, nr_segs, pos); | ||
2524 | |||
2525 | return cifs_writev(iocb, iov, nr_segs, pos); | ||
2397 | } | 2526 | } |
2398 | 2527 | ||
2399 | static struct cifs_readdata * | 2528 | static struct cifs_readdata * |
2400 | cifs_readdata_alloc(unsigned int nr_vecs, work_func_t complete) | 2529 | cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete) |
2401 | { | 2530 | { |
2402 | struct cifs_readdata *rdata; | 2531 | struct cifs_readdata *rdata; |
2403 | 2532 | ||
2404 | rdata = kzalloc(sizeof(*rdata) + | 2533 | rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages), |
2405 | sizeof(struct kvec) * nr_vecs, GFP_KERNEL); | 2534 | GFP_KERNEL); |
2406 | if (rdata != NULL) { | 2535 | if (rdata != NULL) { |
2407 | kref_init(&rdata->refcount); | 2536 | kref_init(&rdata->refcount); |
2408 | INIT_LIST_HEAD(&rdata->list); | 2537 | INIT_LIST_HEAD(&rdata->list); |
2409 | init_completion(&rdata->done); | 2538 | init_completion(&rdata->done); |
2410 | INIT_WORK(&rdata->work, complete); | 2539 | INIT_WORK(&rdata->work, complete); |
2411 | INIT_LIST_HEAD(&rdata->pages); | ||
2412 | } | 2540 | } |
2541 | |||
2413 | return rdata; | 2542 | return rdata; |
2414 | } | 2543 | } |
2415 | 2544 | ||
@@ -2426,25 +2555,25 @@ cifs_readdata_release(struct kref *refcount) | |||
2426 | } | 2555 | } |
2427 | 2556 | ||
2428 | static int | 2557 | static int |
2429 | cifs_read_allocate_pages(struct list_head *list, unsigned int npages) | 2558 | cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages) |
2430 | { | 2559 | { |
2431 | int rc = 0; | 2560 | int rc = 0; |
2432 | struct page *page, *tpage; | 2561 | struct page *page; |
2433 | unsigned int i; | 2562 | unsigned int i; |
2434 | 2563 | ||
2435 | for (i = 0; i < npages; i++) { | 2564 | for (i = 0; i < nr_pages; i++) { |
2436 | page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM); | 2565 | page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM); |
2437 | if (!page) { | 2566 | if (!page) { |
2438 | rc = -ENOMEM; | 2567 | rc = -ENOMEM; |
2439 | break; | 2568 | break; |
2440 | } | 2569 | } |
2441 | list_add(&page->lru, list); | 2570 | rdata->pages[i] = page; |
2442 | } | 2571 | } |
2443 | 2572 | ||
2444 | if (rc) { | 2573 | if (rc) { |
2445 | list_for_each_entry_safe(page, tpage, list, lru) { | 2574 | for (i = 0; i < nr_pages; i++) { |
2446 | list_del(&page->lru); | 2575 | put_page(rdata->pages[i]); |
2447 | put_page(page); | 2576 | rdata->pages[i] = NULL; |
2448 | } | 2577 | } |
2449 | } | 2578 | } |
2450 | return rc; | 2579 | return rc; |
@@ -2453,13 +2582,13 @@ cifs_read_allocate_pages(struct list_head *list, unsigned int npages) | |||
2453 | static void | 2582 | static void |
2454 | cifs_uncached_readdata_release(struct kref *refcount) | 2583 | cifs_uncached_readdata_release(struct kref *refcount) |
2455 | { | 2584 | { |
2456 | struct page *page, *tpage; | ||
2457 | struct cifs_readdata *rdata = container_of(refcount, | 2585 | struct cifs_readdata *rdata = container_of(refcount, |
2458 | struct cifs_readdata, refcount); | 2586 | struct cifs_readdata, refcount); |
2587 | unsigned int i; | ||
2459 | 2588 | ||
2460 | list_for_each_entry_safe(page, tpage, &rdata->pages, lru) { | 2589 | for (i = 0; i < rdata->nr_pages; i++) { |
2461 | list_del(&page->lru); | 2590 | put_page(rdata->pages[i]); |
2462 | put_page(page); | 2591 | rdata->pages[i] = NULL; |
2463 | } | 2592 | } |
2464 | cifs_readdata_release(refcount); | 2593 | cifs_readdata_release(refcount); |
2465 | } | 2594 | } |
@@ -2468,6 +2597,9 @@ static int | |||
2468 | cifs_retry_async_readv(struct cifs_readdata *rdata) | 2597 | cifs_retry_async_readv(struct cifs_readdata *rdata) |
2469 | { | 2598 | { |
2470 | int rc; | 2599 | int rc; |
2600 | struct TCP_Server_Info *server; | ||
2601 | |||
2602 | server = tlink_tcon(rdata->cfile->tlink)->ses->server; | ||
2471 | 2603 | ||
2472 | do { | 2604 | do { |
2473 | if (rdata->cfile->invalidHandle) { | 2605 | if (rdata->cfile->invalidHandle) { |
@@ -2475,7 +2607,7 @@ cifs_retry_async_readv(struct cifs_readdata *rdata) | |||
2475 | if (rc != 0) | 2607 | if (rc != 0) |
2476 | continue; | 2608 | continue; |
2477 | } | 2609 | } |
2478 | rc = cifs_async_readv(rdata); | 2610 | rc = server->ops->async_readv(rdata); |
2479 | } while (rc == -EAGAIN); | 2611 | } while (rc == -EAGAIN); |
2480 | 2612 | ||
2481 | return rc; | 2613 | return rc; |
@@ -2500,17 +2632,18 @@ cifs_readdata_to_iov(struct cifs_readdata *rdata, const struct iovec *iov, | |||
2500 | int rc = 0; | 2632 | int rc = 0; |
2501 | struct iov_iter ii; | 2633 | struct iov_iter ii; |
2502 | size_t pos = rdata->offset - offset; | 2634 | size_t pos = rdata->offset - offset; |
2503 | struct page *page, *tpage; | ||
2504 | ssize_t remaining = rdata->bytes; | 2635 | ssize_t remaining = rdata->bytes; |
2505 | unsigned char *pdata; | 2636 | unsigned char *pdata; |
2637 | unsigned int i; | ||
2506 | 2638 | ||
2507 | /* set up iov_iter and advance to the correct offset */ | 2639 | /* set up iov_iter and advance to the correct offset */ |
2508 | iov_iter_init(&ii, iov, nr_segs, iov_length(iov, nr_segs), 0); | 2640 | iov_iter_init(&ii, iov, nr_segs, iov_length(iov, nr_segs), 0); |
2509 | iov_iter_advance(&ii, pos); | 2641 | iov_iter_advance(&ii, pos); |
2510 | 2642 | ||
2511 | *copied = 0; | 2643 | *copied = 0; |
2512 | list_for_each_entry_safe(page, tpage, &rdata->pages, lru) { | 2644 | for (i = 0; i < rdata->nr_pages; i++) { |
2513 | ssize_t copy; | 2645 | ssize_t copy; |
2646 | struct page *page = rdata->pages[i]; | ||
2514 | 2647 | ||
2515 | /* copy a whole page or whatever's left */ | 2648 | /* copy a whole page or whatever's left */ |
2516 | copy = min_t(ssize_t, remaining, PAGE_SIZE); | 2649 | copy = min_t(ssize_t, remaining, PAGE_SIZE); |
@@ -2530,9 +2663,6 @@ cifs_readdata_to_iov(struct cifs_readdata *rdata, const struct iovec *iov, | |||
2530 | iov_iter_advance(&ii, copy); | 2663 | iov_iter_advance(&ii, copy); |
2531 | } | 2664 | } |
2532 | } | 2665 | } |
2533 | |||
2534 | list_del(&page->lru); | ||
2535 | put_page(page); | ||
2536 | } | 2666 | } |
2537 | 2667 | ||
2538 | return rc; | 2668 | return rc; |
@@ -2544,59 +2674,56 @@ cifs_uncached_readv_complete(struct work_struct *work) | |||
2544 | struct cifs_readdata *rdata = container_of(work, | 2674 | struct cifs_readdata *rdata = container_of(work, |
2545 | struct cifs_readdata, work); | 2675 | struct cifs_readdata, work); |
2546 | 2676 | ||
2547 | /* if the result is non-zero then the pages weren't kmapped */ | ||
2548 | if (rdata->result == 0) { | ||
2549 | struct page *page; | ||
2550 | |||
2551 | list_for_each_entry(page, &rdata->pages, lru) | ||
2552 | kunmap(page); | ||
2553 | } | ||
2554 | |||
2555 | complete(&rdata->done); | 2677 | complete(&rdata->done); |
2556 | kref_put(&rdata->refcount, cifs_uncached_readdata_release); | 2678 | kref_put(&rdata->refcount, cifs_uncached_readdata_release); |
2557 | } | 2679 | } |
2558 | 2680 | ||
2559 | static int | 2681 | static int |
2560 | cifs_uncached_read_marshal_iov(struct cifs_readdata *rdata, | 2682 | cifs_uncached_read_into_pages(struct TCP_Server_Info *server, |
2561 | unsigned int remaining) | 2683 | struct cifs_readdata *rdata, unsigned int len) |
2562 | { | 2684 | { |
2563 | int len = 0; | 2685 | int total_read = 0, result = 0; |
2564 | struct page *page, *tpage; | 2686 | unsigned int i; |
2687 | unsigned int nr_pages = rdata->nr_pages; | ||
2688 | struct kvec iov; | ||
2689 | |||
2690 | rdata->tailsz = PAGE_SIZE; | ||
2691 | for (i = 0; i < nr_pages; i++) { | ||
2692 | struct page *page = rdata->pages[i]; | ||
2565 | 2693 | ||
2566 | rdata->nr_iov = 1; | 2694 | if (len >= PAGE_SIZE) { |
2567 | list_for_each_entry_safe(page, tpage, &rdata->pages, lru) { | ||
2568 | if (remaining >= PAGE_SIZE) { | ||
2569 | /* enough data to fill the page */ | 2695 | /* enough data to fill the page */ |
2570 | rdata->iov[rdata->nr_iov].iov_base = kmap(page); | 2696 | iov.iov_base = kmap(page); |
2571 | rdata->iov[rdata->nr_iov].iov_len = PAGE_SIZE; | 2697 | iov.iov_len = PAGE_SIZE; |
2572 | cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu", | 2698 | cFYI(1, "%u: iov_base=%p iov_len=%zu", |
2573 | rdata->nr_iov, page->index, | 2699 | i, iov.iov_base, iov.iov_len); |
2574 | rdata->iov[rdata->nr_iov].iov_base, | 2700 | len -= PAGE_SIZE; |
2575 | rdata->iov[rdata->nr_iov].iov_len); | 2701 | } else if (len > 0) { |
2576 | ++rdata->nr_iov; | ||
2577 | len += PAGE_SIZE; | ||
2578 | remaining -= PAGE_SIZE; | ||
2579 | } else if (remaining > 0) { | ||
2580 | /* enough for partial page, fill and zero the rest */ | 2702 | /* enough for partial page, fill and zero the rest */ |
2581 | rdata->iov[rdata->nr_iov].iov_base = kmap(page); | 2703 | iov.iov_base = kmap(page); |
2582 | rdata->iov[rdata->nr_iov].iov_len = remaining; | 2704 | iov.iov_len = len; |
2583 | cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu", | 2705 | cFYI(1, "%u: iov_base=%p iov_len=%zu", |
2584 | rdata->nr_iov, page->index, | 2706 | i, iov.iov_base, iov.iov_len); |
2585 | rdata->iov[rdata->nr_iov].iov_base, | 2707 | memset(iov.iov_base + len, '\0', PAGE_SIZE - len); |
2586 | rdata->iov[rdata->nr_iov].iov_len); | 2708 | rdata->tailsz = len; |
2587 | memset(rdata->iov[rdata->nr_iov].iov_base + remaining, | 2709 | len = 0; |
2588 | '\0', PAGE_SIZE - remaining); | ||
2589 | ++rdata->nr_iov; | ||
2590 | len += remaining; | ||
2591 | remaining = 0; | ||
2592 | } else { | 2710 | } else { |
2593 | /* no need to hold page hostage */ | 2711 | /* no need to hold page hostage */ |
2594 | list_del(&page->lru); | 2712 | rdata->pages[i] = NULL; |
2713 | rdata->nr_pages--; | ||
2595 | put_page(page); | 2714 | put_page(page); |
2715 | continue; | ||
2596 | } | 2716 | } |
2717 | |||
2718 | result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len); | ||
2719 | kunmap(page); | ||
2720 | if (result < 0) | ||
2721 | break; | ||
2722 | |||
2723 | total_read += result; | ||
2597 | } | 2724 | } |
2598 | 2725 | ||
2599 | return len; | 2726 | return total_read > 0 ? total_read : result; |
2600 | } | 2727 | } |
2601 | 2728 | ||
2602 | static ssize_t | 2729 | static ssize_t |
@@ -2627,6 +2754,9 @@ cifs_iovec_read(struct file *file, const struct iovec *iov, | |||
2627 | open_file = file->private_data; | 2754 | open_file = file->private_data; |
2628 | tcon = tlink_tcon(open_file->tlink); | 2755 | tcon = tlink_tcon(open_file->tlink); |
2629 | 2756 | ||
2757 | if (!tcon->ses->server->ops->async_readv) | ||
2758 | return -ENOSYS; | ||
2759 | |||
2630 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) | 2760 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) |
2631 | pid = open_file->pid; | 2761 | pid = open_file->pid; |
2632 | else | 2762 | else |
@@ -2647,15 +2777,17 @@ cifs_iovec_read(struct file *file, const struct iovec *iov, | |||
2647 | goto error; | 2777 | goto error; |
2648 | } | 2778 | } |
2649 | 2779 | ||
2650 | rc = cifs_read_allocate_pages(&rdata->pages, npages); | 2780 | rc = cifs_read_allocate_pages(rdata, npages); |
2651 | if (rc) | 2781 | if (rc) |
2652 | goto error; | 2782 | goto error; |
2653 | 2783 | ||
2654 | rdata->cfile = cifsFileInfo_get(open_file); | 2784 | rdata->cfile = cifsFileInfo_get(open_file); |
2785 | rdata->nr_pages = npages; | ||
2655 | rdata->offset = offset; | 2786 | rdata->offset = offset; |
2656 | rdata->bytes = cur_len; | 2787 | rdata->bytes = cur_len; |
2657 | rdata->pid = pid; | 2788 | rdata->pid = pid; |
2658 | rdata->marshal_iov = cifs_uncached_read_marshal_iov; | 2789 | rdata->pagesz = PAGE_SIZE; |
2790 | rdata->read_into_pages = cifs_uncached_read_into_pages; | ||
2659 | 2791 | ||
2660 | rc = cifs_retry_async_readv(rdata); | 2792 | rc = cifs_retry_async_readv(rdata); |
2661 | error: | 2793 | error: |
@@ -2706,6 +2838,10 @@ restart_loop: | |||
2706 | cifs_stats_bytes_read(tcon, total_read); | 2838 | cifs_stats_bytes_read(tcon, total_read); |
2707 | *poffset += total_read; | 2839 | *poffset += total_read; |
2708 | 2840 | ||
2841 | /* mask nodata case */ | ||
2842 | if (rc == -ENODATA) | ||
2843 | rc = 0; | ||
2844 | |||
2709 | return total_read ? total_read : rc; | 2845 | return total_read ? total_read : rc; |
2710 | } | 2846 | } |
2711 | 2847 | ||
@@ -2721,15 +2857,17 @@ ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov, | |||
2721 | return read; | 2857 | return read; |
2722 | } | 2858 | } |
2723 | 2859 | ||
2724 | ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov, | 2860 | ssize_t |
2725 | unsigned long nr_segs, loff_t pos) | 2861 | cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov, |
2862 | unsigned long nr_segs, loff_t pos) | ||
2726 | { | 2863 | { |
2727 | struct inode *inode; | 2864 | struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; |
2728 | 2865 | struct cifsInodeInfo *cinode = CIFS_I(inode); | |
2729 | inode = iocb->ki_filp->f_path.dentry->d_inode; | 2866 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); |
2730 | 2867 | struct cifsFileInfo *cfile = (struct cifsFileInfo *) | |
2731 | if (CIFS_I(inode)->clientCanCacheRead) | 2868 | iocb->ki_filp->private_data; |
2732 | return generic_file_aio_read(iocb, iov, nr_segs, pos); | 2869 | struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); |
2870 | int rc = -EACCES; | ||
2733 | 2871 | ||
2734 | /* | 2872 | /* |
2735 | * In strict cache mode we need to read from the server all the time | 2873 | * In strict cache mode we need to read from the server all the time |
@@ -2739,12 +2877,29 @@ ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov, | |||
2739 | * on pages affected by this read but not on the region from pos to | 2877 | * on pages affected by this read but not on the region from pos to |
2740 | * pos+len-1. | 2878 | * pos+len-1. |
2741 | */ | 2879 | */ |
2880 | if (!cinode->clientCanCacheRead) | ||
2881 | return cifs_user_readv(iocb, iov, nr_segs, pos); | ||
2882 | |||
2883 | if (cap_unix(tcon->ses) && | ||
2884 | (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && | ||
2885 | ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) | ||
2886 | return generic_file_aio_read(iocb, iov, nr_segs, pos); | ||
2742 | 2887 | ||
2743 | return cifs_user_readv(iocb, iov, nr_segs, pos); | 2888 | /* |
2889 | * We need to hold the sem to be sure nobody modifies lock list | ||
2890 | * with a brlock that prevents reading. | ||
2891 | */ | ||
2892 | down_read(&cinode->lock_sem); | ||
2893 | if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs), | ||
2894 | tcon->ses->server->vals->shared_lock_type, | ||
2895 | NULL, true)) | ||
2896 | rc = generic_file_aio_read(iocb, iov, nr_segs, pos); | ||
2897 | up_read(&cinode->lock_sem); | ||
2898 | return rc; | ||
2744 | } | 2899 | } |
2745 | 2900 | ||
2746 | static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size, | 2901 | static ssize_t |
2747 | loff_t *poffset) | 2902 | cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset) |
2748 | { | 2903 | { |
2749 | int rc = -EACCES; | 2904 | int rc = -EACCES; |
2750 | unsigned int bytes_read = 0; | 2905 | unsigned int bytes_read = 0; |
@@ -2753,8 +2908,9 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size, | |||
2753 | unsigned int rsize; | 2908 | unsigned int rsize; |
2754 | struct cifs_sb_info *cifs_sb; | 2909 | struct cifs_sb_info *cifs_sb; |
2755 | struct cifs_tcon *tcon; | 2910 | struct cifs_tcon *tcon; |
2911 | struct TCP_Server_Info *server; | ||
2756 | unsigned int xid; | 2912 | unsigned int xid; |
2757 | char *current_offset; | 2913 | char *cur_offset; |
2758 | struct cifsFileInfo *open_file; | 2914 | struct cifsFileInfo *open_file; |
2759 | struct cifs_io_parms io_parms; | 2915 | struct cifs_io_parms io_parms; |
2760 | int buf_type = CIFS_NO_BUFFER; | 2916 | int buf_type = CIFS_NO_BUFFER; |
@@ -2773,6 +2929,12 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size, | |||
2773 | } | 2929 | } |
2774 | open_file = file->private_data; | 2930 | open_file = file->private_data; |
2775 | tcon = tlink_tcon(open_file->tlink); | 2931 | tcon = tlink_tcon(open_file->tlink); |
2932 | server = tcon->ses->server; | ||
2933 | |||
2934 | if (!server->ops->sync_read) { | ||
2935 | free_xid(xid); | ||
2936 | return -ENOSYS; | ||
2937 | } | ||
2776 | 2938 | ||
2777 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) | 2939 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) |
2778 | pid = open_file->pid; | 2940 | pid = open_file->pid; |
@@ -2782,9 +2944,8 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size, | |||
2782 | if ((file->f_flags & O_ACCMODE) == O_WRONLY) | 2944 | if ((file->f_flags & O_ACCMODE) == O_WRONLY) |
2783 | cFYI(1, "attempting read on write only file instance"); | 2945 | cFYI(1, "attempting read on write only file instance"); |
2784 | 2946 | ||
2785 | for (total_read = 0, current_offset = read_data; | 2947 | for (total_read = 0, cur_offset = read_data; read_size > total_read; |
2786 | read_size > total_read; | 2948 | total_read += bytes_read, cur_offset += bytes_read) { |
2787 | total_read += bytes_read, current_offset += bytes_read) { | ||
2788 | current_read_size = min_t(uint, read_size - total_read, rsize); | 2949 | current_read_size = min_t(uint, read_size - total_read, rsize); |
2789 | /* | 2950 | /* |
2790 | * For windows me and 9x we do not want to request more than it | 2951 | * For windows me and 9x we do not want to request more than it |
@@ -2802,13 +2963,13 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size, | |||
2802 | if (rc != 0) | 2963 | if (rc != 0) |
2803 | break; | 2964 | break; |
2804 | } | 2965 | } |
2805 | io_parms.netfid = open_file->netfid; | ||
2806 | io_parms.pid = pid; | 2966 | io_parms.pid = pid; |
2807 | io_parms.tcon = tcon; | 2967 | io_parms.tcon = tcon; |
2808 | io_parms.offset = *poffset; | 2968 | io_parms.offset = *offset; |
2809 | io_parms.length = current_read_size; | 2969 | io_parms.length = current_read_size; |
2810 | rc = CIFSSMBRead(xid, &io_parms, &bytes_read, | 2970 | rc = server->ops->sync_read(xid, open_file, &io_parms, |
2811 | ¤t_offset, &buf_type); | 2971 | &bytes_read, &cur_offset, |
2972 | &buf_type); | ||
2812 | } | 2973 | } |
2813 | if (rc || (bytes_read == 0)) { | 2974 | if (rc || (bytes_read == 0)) { |
2814 | if (total_read) { | 2975 | if (total_read) { |
@@ -2819,7 +2980,7 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size, | |||
2819 | } | 2980 | } |
2820 | } else { | 2981 | } else { |
2821 | cifs_stats_bytes_read(tcon, total_read); | 2982 | cifs_stats_bytes_read(tcon, total_read); |
2822 | *poffset += bytes_read; | 2983 | *offset += bytes_read; |
2823 | } | 2984 | } |
2824 | } | 2985 | } |
2825 | free_xid(xid); | 2986 | free_xid(xid); |
@@ -2842,6 +3003,7 @@ cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
2842 | static struct vm_operations_struct cifs_file_vm_ops = { | 3003 | static struct vm_operations_struct cifs_file_vm_ops = { |
2843 | .fault = filemap_fault, | 3004 | .fault = filemap_fault, |
2844 | .page_mkwrite = cifs_page_mkwrite, | 3005 | .page_mkwrite = cifs_page_mkwrite, |
3006 | .remap_pages = generic_file_remap_pages, | ||
2845 | }; | 3007 | }; |
2846 | 3008 | ||
2847 | int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma) | 3009 | int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma) |
@@ -2885,16 +3047,16 @@ int cifs_file_mmap(struct file *file, struct vm_area_struct *vma) | |||
2885 | static void | 3047 | static void |
2886 | cifs_readv_complete(struct work_struct *work) | 3048 | cifs_readv_complete(struct work_struct *work) |
2887 | { | 3049 | { |
3050 | unsigned int i; | ||
2888 | struct cifs_readdata *rdata = container_of(work, | 3051 | struct cifs_readdata *rdata = container_of(work, |
2889 | struct cifs_readdata, work); | 3052 | struct cifs_readdata, work); |
2890 | struct page *page, *tpage; | ||
2891 | 3053 | ||
2892 | list_for_each_entry_safe(page, tpage, &rdata->pages, lru) { | 3054 | for (i = 0; i < rdata->nr_pages; i++) { |
2893 | list_del(&page->lru); | 3055 | struct page *page = rdata->pages[i]; |
3056 | |||
2894 | lru_cache_add_file(page); | 3057 | lru_cache_add_file(page); |
2895 | 3058 | ||
2896 | if (rdata->result == 0) { | 3059 | if (rdata->result == 0) { |
2897 | kunmap(page); | ||
2898 | flush_dcache_page(page); | 3060 | flush_dcache_page(page); |
2899 | SetPageUptodate(page); | 3061 | SetPageUptodate(page); |
2900 | } | 3062 | } |
@@ -2905,49 +3067,48 @@ cifs_readv_complete(struct work_struct *work) | |||
2905 | cifs_readpage_to_fscache(rdata->mapping->host, page); | 3067 | cifs_readpage_to_fscache(rdata->mapping->host, page); |
2906 | 3068 | ||
2907 | page_cache_release(page); | 3069 | page_cache_release(page); |
3070 | rdata->pages[i] = NULL; | ||
2908 | } | 3071 | } |
2909 | kref_put(&rdata->refcount, cifs_readdata_release); | 3072 | kref_put(&rdata->refcount, cifs_readdata_release); |
2910 | } | 3073 | } |
2911 | 3074 | ||
2912 | static int | 3075 | static int |
2913 | cifs_readpages_marshal_iov(struct cifs_readdata *rdata, unsigned int remaining) | 3076 | cifs_readpages_read_into_pages(struct TCP_Server_Info *server, |
3077 | struct cifs_readdata *rdata, unsigned int len) | ||
2914 | { | 3078 | { |
2915 | int len = 0; | 3079 | int total_read = 0, result = 0; |
2916 | struct page *page, *tpage; | 3080 | unsigned int i; |
2917 | u64 eof; | 3081 | u64 eof; |
2918 | pgoff_t eof_index; | 3082 | pgoff_t eof_index; |
3083 | unsigned int nr_pages = rdata->nr_pages; | ||
3084 | struct kvec iov; | ||
2919 | 3085 | ||
2920 | /* determine the eof that the server (probably) has */ | 3086 | /* determine the eof that the server (probably) has */ |
2921 | eof = CIFS_I(rdata->mapping->host)->server_eof; | 3087 | eof = CIFS_I(rdata->mapping->host)->server_eof; |
2922 | eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0; | 3088 | eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0; |
2923 | cFYI(1, "eof=%llu eof_index=%lu", eof, eof_index); | 3089 | cFYI(1, "eof=%llu eof_index=%lu", eof, eof_index); |
2924 | 3090 | ||
2925 | rdata->nr_iov = 1; | 3091 | rdata->tailsz = PAGE_CACHE_SIZE; |
2926 | list_for_each_entry_safe(page, tpage, &rdata->pages, lru) { | 3092 | for (i = 0; i < nr_pages; i++) { |
2927 | if (remaining >= PAGE_CACHE_SIZE) { | 3093 | struct page *page = rdata->pages[i]; |
3094 | |||
3095 | if (len >= PAGE_CACHE_SIZE) { | ||
2928 | /* enough data to fill the page */ | 3096 | /* enough data to fill the page */ |
2929 | rdata->iov[rdata->nr_iov].iov_base = kmap(page); | 3097 | iov.iov_base = kmap(page); |
2930 | rdata->iov[rdata->nr_iov].iov_len = PAGE_CACHE_SIZE; | 3098 | iov.iov_len = PAGE_CACHE_SIZE; |
2931 | cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu", | 3099 | cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu", |
2932 | rdata->nr_iov, page->index, | 3100 | i, page->index, iov.iov_base, iov.iov_len); |
2933 | rdata->iov[rdata->nr_iov].iov_base, | 3101 | len -= PAGE_CACHE_SIZE; |
2934 | rdata->iov[rdata->nr_iov].iov_len); | 3102 | } else if (len > 0) { |
2935 | ++rdata->nr_iov; | ||
2936 | len += PAGE_CACHE_SIZE; | ||
2937 | remaining -= PAGE_CACHE_SIZE; | ||
2938 | } else if (remaining > 0) { | ||
2939 | /* enough for partial page, fill and zero the rest */ | 3103 | /* enough for partial page, fill and zero the rest */ |
2940 | rdata->iov[rdata->nr_iov].iov_base = kmap(page); | 3104 | iov.iov_base = kmap(page); |
2941 | rdata->iov[rdata->nr_iov].iov_len = remaining; | 3105 | iov.iov_len = len; |
2942 | cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu", | 3106 | cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu", |
2943 | rdata->nr_iov, page->index, | 3107 | i, page->index, iov.iov_base, iov.iov_len); |
2944 | rdata->iov[rdata->nr_iov].iov_base, | 3108 | memset(iov.iov_base + len, |
2945 | rdata->iov[rdata->nr_iov].iov_len); | 3109 | '\0', PAGE_CACHE_SIZE - len); |
2946 | memset(rdata->iov[rdata->nr_iov].iov_base + remaining, | 3110 | rdata->tailsz = len; |
2947 | '\0', PAGE_CACHE_SIZE - remaining); | 3111 | len = 0; |
2948 | ++rdata->nr_iov; | ||
2949 | len += remaining; | ||
2950 | remaining = 0; | ||
2951 | } else if (page->index > eof_index) { | 3112 | } else if (page->index > eof_index) { |
2952 | /* | 3113 | /* |
2953 | * The VFS will not try to do readahead past the | 3114 | * The VFS will not try to do readahead past the |
@@ -2958,22 +3119,33 @@ cifs_readpages_marshal_iov(struct cifs_readdata *rdata, unsigned int remaining) | |||
2958 | * fill them until the writes are flushed. | 3119 | * fill them until the writes are flushed. |
2959 | */ | 3120 | */ |
2960 | zero_user(page, 0, PAGE_CACHE_SIZE); | 3121 | zero_user(page, 0, PAGE_CACHE_SIZE); |
2961 | list_del(&page->lru); | ||
2962 | lru_cache_add_file(page); | 3122 | lru_cache_add_file(page); |
2963 | flush_dcache_page(page); | 3123 | flush_dcache_page(page); |
2964 | SetPageUptodate(page); | 3124 | SetPageUptodate(page); |
2965 | unlock_page(page); | 3125 | unlock_page(page); |
2966 | page_cache_release(page); | 3126 | page_cache_release(page); |
3127 | rdata->pages[i] = NULL; | ||
3128 | rdata->nr_pages--; | ||
3129 | continue; | ||
2967 | } else { | 3130 | } else { |
2968 | /* no need to hold page hostage */ | 3131 | /* no need to hold page hostage */ |
2969 | list_del(&page->lru); | ||
2970 | lru_cache_add_file(page); | 3132 | lru_cache_add_file(page); |
2971 | unlock_page(page); | 3133 | unlock_page(page); |
2972 | page_cache_release(page); | 3134 | page_cache_release(page); |
3135 | rdata->pages[i] = NULL; | ||
3136 | rdata->nr_pages--; | ||
3137 | continue; | ||
2973 | } | 3138 | } |
3139 | |||
3140 | result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len); | ||
3141 | kunmap(page); | ||
3142 | if (result < 0) | ||
3143 | break; | ||
3144 | |||
3145 | total_read += result; | ||
2974 | } | 3146 | } |
2975 | 3147 | ||
2976 | return len; | 3148 | return total_read > 0 ? total_read : result; |
2977 | } | 3149 | } |
2978 | 3150 | ||
2979 | static int cifs_readpages(struct file *file, struct address_space *mapping, | 3151 | static int cifs_readpages(struct file *file, struct address_space *mapping, |
@@ -3027,6 +3199,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, | |||
3027 | * the rdata->pages, then we want them in increasing order. | 3199 | * the rdata->pages, then we want them in increasing order. |
3028 | */ | 3200 | */ |
3029 | while (!list_empty(page_list)) { | 3201 | while (!list_empty(page_list)) { |
3202 | unsigned int i; | ||
3030 | unsigned int bytes = PAGE_CACHE_SIZE; | 3203 | unsigned int bytes = PAGE_CACHE_SIZE; |
3031 | unsigned int expected_index; | 3204 | unsigned int expected_index; |
3032 | unsigned int nr_pages = 1; | 3205 | unsigned int nr_pages = 1; |
@@ -3096,14 +3269,18 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, | |||
3096 | rdata->offset = offset; | 3269 | rdata->offset = offset; |
3097 | rdata->bytes = bytes; | 3270 | rdata->bytes = bytes; |
3098 | rdata->pid = pid; | 3271 | rdata->pid = pid; |
3099 | rdata->marshal_iov = cifs_readpages_marshal_iov; | 3272 | rdata->pagesz = PAGE_CACHE_SIZE; |
3100 | list_splice_init(&tmplist, &rdata->pages); | 3273 | rdata->read_into_pages = cifs_readpages_read_into_pages; |
3274 | |||
3275 | list_for_each_entry_safe(page, tpage, &tmplist, lru) { | ||
3276 | list_del(&page->lru); | ||
3277 | rdata->pages[rdata->nr_pages++] = page; | ||
3278 | } | ||
3101 | 3279 | ||
3102 | rc = cifs_retry_async_readv(rdata); | 3280 | rc = cifs_retry_async_readv(rdata); |
3103 | if (rc != 0) { | 3281 | if (rc != 0) { |
3104 | list_for_each_entry_safe(page, tpage, &rdata->pages, | 3282 | for (i = 0; i < rdata->nr_pages; i++) { |
3105 | lru) { | 3283 | page = rdata->pages[i]; |
3106 | list_del(&page->lru); | ||
3107 | lru_cache_add_file(page); | 3284 | lru_cache_add_file(page); |
3108 | unlock_page(page); | 3285 | unlock_page(page); |
3109 | page_cache_release(page); | 3286 | page_cache_release(page); |
@@ -3347,6 +3524,7 @@ void cifs_oplock_break(struct work_struct *work) | |||
3347 | oplock_break); | 3524 | oplock_break); |
3348 | struct inode *inode = cfile->dentry->d_inode; | 3525 | struct inode *inode = cfile->dentry->d_inode; |
3349 | struct cifsInodeInfo *cinode = CIFS_I(inode); | 3526 | struct cifsInodeInfo *cinode = CIFS_I(inode); |
3527 | struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); | ||
3350 | int rc = 0; | 3528 | int rc = 0; |
3351 | 3529 | ||
3352 | if (inode && S_ISREG(inode->i_mode)) { | 3530 | if (inode && S_ISREG(inode->i_mode)) { |
@@ -3374,10 +3552,8 @@ void cifs_oplock_break(struct work_struct *work) | |||
3374 | * disconnected since oplock already released by the server | 3552 | * disconnected since oplock already released by the server |
3375 | */ | 3553 | */ |
3376 | if (!cfile->oplock_break_cancelled) { | 3554 | if (!cfile->oplock_break_cancelled) { |
3377 | rc = CIFSSMBLock(0, tlink_tcon(cfile->tlink), cfile->netfid, | 3555 | rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid, |
3378 | current->tgid, 0, 0, 0, 0, | 3556 | cinode); |
3379 | LOCKING_ANDX_OPLOCK_RELEASE, false, | ||
3380 | cinode->clientCanCacheRead ? 1 : 0); | ||
3381 | cFYI(1, "Oplock release rc = %d", rc); | 3557 | cFYI(1, "Oplock release rc = %d", rc); |
3382 | } | 3558 | } |
3383 | } | 3559 | } |
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 7354877fa3bd..afdff79651f1 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c | |||
@@ -124,10 +124,10 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr) | |||
124 | { | 124 | { |
125 | struct cifsInodeInfo *cifs_i = CIFS_I(inode); | 125 | struct cifsInodeInfo *cifs_i = CIFS_I(inode); |
126 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); | 126 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); |
127 | unsigned long oldtime = cifs_i->time; | ||
128 | 127 | ||
129 | cifs_revalidate_cache(inode, fattr); | 128 | cifs_revalidate_cache(inode, fattr); |
130 | 129 | ||
130 | spin_lock(&inode->i_lock); | ||
131 | inode->i_atime = fattr->cf_atime; | 131 | inode->i_atime = fattr->cf_atime; |
132 | inode->i_mtime = fattr->cf_mtime; | 132 | inode->i_mtime = fattr->cf_mtime; |
133 | inode->i_ctime = fattr->cf_ctime; | 133 | inode->i_ctime = fattr->cf_ctime; |
@@ -148,9 +148,6 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr) | |||
148 | else | 148 | else |
149 | cifs_i->time = jiffies; | 149 | cifs_i->time = jiffies; |
150 | 150 | ||
151 | cFYI(1, "inode 0x%p old_time=%ld new_time=%ld", inode, | ||
152 | oldtime, cifs_i->time); | ||
153 | |||
154 | cifs_i->delete_pending = fattr->cf_flags & CIFS_FATTR_DELETE_PENDING; | 151 | cifs_i->delete_pending = fattr->cf_flags & CIFS_FATTR_DELETE_PENDING; |
155 | 152 | ||
156 | cifs_i->server_eof = fattr->cf_eof; | 153 | cifs_i->server_eof = fattr->cf_eof; |
@@ -158,7 +155,6 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr) | |||
158 | * Can't safely change the file size here if the client is writing to | 155 | * Can't safely change the file size here if the client is writing to |
159 | * it due to potential races. | 156 | * it due to potential races. |
160 | */ | 157 | */ |
161 | spin_lock(&inode->i_lock); | ||
162 | if (is_size_safe_to_change(cifs_i, fattr->cf_eof)) { | 158 | if (is_size_safe_to_change(cifs_i, fattr->cf_eof)) { |
163 | i_size_write(inode, fattr->cf_eof); | 159 | i_size_write(inode, fattr->cf_eof); |
164 | 160 | ||
@@ -286,7 +282,8 @@ cifs_create_dfs_fattr(struct cifs_fattr *fattr, struct super_block *sb) | |||
286 | fattr->cf_flags |= CIFS_FATTR_DFS_REFERRAL; | 282 | fattr->cf_flags |= CIFS_FATTR_DFS_REFERRAL; |
287 | } | 283 | } |
288 | 284 | ||
289 | int cifs_get_file_info_unix(struct file *filp) | 285 | static int |
286 | cifs_get_file_info_unix(struct file *filp) | ||
290 | { | 287 | { |
291 | int rc; | 288 | int rc; |
292 | unsigned int xid; | 289 | unsigned int xid; |
@@ -298,7 +295,7 @@ int cifs_get_file_info_unix(struct file *filp) | |||
298 | struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); | 295 | struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); |
299 | 296 | ||
300 | xid = get_xid(); | 297 | xid = get_xid(); |
301 | rc = CIFSSMBUnixQFileInfo(xid, tcon, cfile->netfid, &find_data); | 298 | rc = CIFSSMBUnixQFileInfo(xid, tcon, cfile->fid.netfid, &find_data); |
302 | if (!rc) { | 299 | if (!rc) { |
303 | cifs_unix_basic_to_fattr(&fattr, &find_data, cifs_sb); | 300 | cifs_unix_basic_to_fattr(&fattr, &find_data, cifs_sb); |
304 | } else if (rc == -EREMOTE) { | 301 | } else if (rc == -EREMOTE) { |
@@ -554,7 +551,8 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info, | |||
554 | fattr->cf_gid = cifs_sb->mnt_gid; | 551 | fattr->cf_gid = cifs_sb->mnt_gid; |
555 | } | 552 | } |
556 | 553 | ||
557 | int cifs_get_file_info(struct file *filp) | 554 | static int |
555 | cifs_get_file_info(struct file *filp) | ||
558 | { | 556 | { |
559 | int rc; | 557 | int rc; |
560 | unsigned int xid; | 558 | unsigned int xid; |
@@ -564,9 +562,13 @@ int cifs_get_file_info(struct file *filp) | |||
564 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); | 562 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); |
565 | struct cifsFileInfo *cfile = filp->private_data; | 563 | struct cifsFileInfo *cfile = filp->private_data; |
566 | struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); | 564 | struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); |
565 | struct TCP_Server_Info *server = tcon->ses->server; | ||
566 | |||
567 | if (!server->ops->query_file_info) | ||
568 | return -ENOSYS; | ||
567 | 569 | ||
568 | xid = get_xid(); | 570 | xid = get_xid(); |
569 | rc = CIFSSMBQFileInfo(xid, tcon, cfile->netfid, &find_data); | 571 | rc = server->ops->query_file_info(xid, tcon, &cfile->fid, &find_data); |
570 | switch (rc) { | 572 | switch (rc) { |
571 | case 0: | 573 | case 0: |
572 | cifs_all_info_to_fattr(&fattr, &find_data, cifs_sb, false); | 574 | cifs_all_info_to_fattr(&fattr, &find_data, cifs_sb, false); |
@@ -605,7 +607,9 @@ cifs_get_inode_info(struct inode **inode, const char *full_path, | |||
605 | FILE_ALL_INFO *data, struct super_block *sb, int xid, | 607 | FILE_ALL_INFO *data, struct super_block *sb, int xid, |
606 | const __u16 *fid) | 608 | const __u16 *fid) |
607 | { | 609 | { |
608 | int rc = 0, tmprc; | 610 | bool validinum = false; |
611 | __u16 srchflgs; | ||
612 | int rc = 0, tmprc = ENOSYS; | ||
609 | struct cifs_tcon *tcon; | 613 | struct cifs_tcon *tcon; |
610 | struct TCP_Server_Info *server; | 614 | struct TCP_Server_Info *server; |
611 | struct tcon_link *tlink; | 615 | struct tcon_link *tlink; |
@@ -613,6 +617,7 @@ cifs_get_inode_info(struct inode **inode, const char *full_path, | |||
613 | char *buf = NULL; | 617 | char *buf = NULL; |
614 | bool adjust_tz = false; | 618 | bool adjust_tz = false; |
615 | struct cifs_fattr fattr; | 619 | struct cifs_fattr fattr; |
620 | struct cifs_search_info *srchinf = NULL; | ||
616 | 621 | ||
617 | tlink = cifs_sb_tlink(cifs_sb); | 622 | tlink = cifs_sb_tlink(cifs_sb); |
618 | if (IS_ERR(tlink)) | 623 | if (IS_ERR(tlink)) |
@@ -651,9 +656,38 @@ cifs_get_inode_info(struct inode **inode, const char *full_path, | |||
651 | } else if (rc == -EREMOTE) { | 656 | } else if (rc == -EREMOTE) { |
652 | cifs_create_dfs_fattr(&fattr, sb); | 657 | cifs_create_dfs_fattr(&fattr, sb); |
653 | rc = 0; | 658 | rc = 0; |
654 | } else { | 659 | } else if (rc == -EACCES && backup_cred(cifs_sb)) { |
660 | srchinf = kzalloc(sizeof(struct cifs_search_info), | ||
661 | GFP_KERNEL); | ||
662 | if (srchinf == NULL) { | ||
663 | rc = -ENOMEM; | ||
664 | goto cgii_exit; | ||
665 | } | ||
666 | |||
667 | srchinf->endOfSearch = false; | ||
668 | srchinf->info_level = SMB_FIND_FILE_ID_FULL_DIR_INFO; | ||
669 | |||
670 | srchflgs = CIFS_SEARCH_CLOSE_ALWAYS | | ||
671 | CIFS_SEARCH_CLOSE_AT_END | | ||
672 | CIFS_SEARCH_BACKUP_SEARCH; | ||
673 | |||
674 | rc = CIFSFindFirst(xid, tcon, full_path, | ||
675 | cifs_sb, NULL, srchflgs, srchinf, false); | ||
676 | if (!rc) { | ||
677 | data = | ||
678 | (FILE_ALL_INFO *)srchinf->srch_entries_start; | ||
679 | |||
680 | cifs_dir_info_to_fattr(&fattr, | ||
681 | (FILE_DIRECTORY_INFO *)data, cifs_sb); | ||
682 | fattr.cf_uniqueid = le64_to_cpu( | ||
683 | ((SEARCH_ID_FULL_DIR_INFO *)data)->UniqueId); | ||
684 | validinum = true; | ||
685 | |||
686 | cifs_buf_release(srchinf->ntwrk_buf_start); | ||
687 | } | ||
688 | kfree(srchinf); | ||
689 | } else | ||
655 | goto cgii_exit; | 690 | goto cgii_exit; |
656 | } | ||
657 | 691 | ||
658 | /* | 692 | /* |
659 | * If an inode wasn't passed in, then get the inode number | 693 | * If an inode wasn't passed in, then get the inode number |
@@ -664,23 +698,21 @@ cifs_get_inode_info(struct inode **inode, const char *full_path, | |||
664 | */ | 698 | */ |
665 | if (*inode == NULL) { | 699 | if (*inode == NULL) { |
666 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { | 700 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { |
667 | if (server->ops->get_srv_inum) | 701 | if (validinum == false) { |
668 | tmprc = server->ops->get_srv_inum(xid, tcon, | 702 | if (server->ops->get_srv_inum) |
669 | cifs_sb, full_path, &fattr.cf_uniqueid, | 703 | tmprc = server->ops->get_srv_inum(xid, |
670 | data); | 704 | tcon, cifs_sb, full_path, |
671 | else | 705 | &fattr.cf_uniqueid, data); |
672 | tmprc = -ENOSYS; | 706 | if (tmprc) { |
673 | if (tmprc || !fattr.cf_uniqueid) { | 707 | cFYI(1, "GetSrvInodeNum rc %d", tmprc); |
674 | cFYI(1, "GetSrvInodeNum rc %d", tmprc); | 708 | fattr.cf_uniqueid = iunique(sb, ROOT_I); |
675 | fattr.cf_uniqueid = iunique(sb, ROOT_I); | 709 | cifs_autodisable_serverino(cifs_sb); |
676 | cifs_autodisable_serverino(cifs_sb); | 710 | } |
677 | } | 711 | } |
678 | } else { | 712 | } else |
679 | fattr.cf_uniqueid = iunique(sb, ROOT_I); | 713 | fattr.cf_uniqueid = iunique(sb, ROOT_I); |
680 | } | 714 | } else |
681 | } else { | ||
682 | fattr.cf_uniqueid = CIFS_I(*inode)->uniqueid; | 715 | fattr.cf_uniqueid = CIFS_I(*inode)->uniqueid; |
683 | } | ||
684 | 716 | ||
685 | /* query for SFU type info if supported and needed */ | 717 | /* query for SFU type info if supported and needed */ |
686 | if (fattr.cf_cifsattrs & ATTR_SYSTEM && | 718 | if (fattr.cf_cifsattrs & ATTR_SYSTEM && |
@@ -859,12 +891,14 @@ struct inode *cifs_root_iget(struct super_block *sb) | |||
859 | 891 | ||
860 | if (rc && tcon->ipc) { | 892 | if (rc && tcon->ipc) { |
861 | cFYI(1, "ipc connection - fake read inode"); | 893 | cFYI(1, "ipc connection - fake read inode"); |
894 | spin_lock(&inode->i_lock); | ||
862 | inode->i_mode |= S_IFDIR; | 895 | inode->i_mode |= S_IFDIR; |
863 | set_nlink(inode, 2); | 896 | set_nlink(inode, 2); |
864 | inode->i_op = &cifs_ipc_inode_ops; | 897 | inode->i_op = &cifs_ipc_inode_ops; |
865 | inode->i_fop = &simple_dir_operations; | 898 | inode->i_fop = &simple_dir_operations; |
866 | inode->i_uid = cifs_sb->mnt_uid; | 899 | inode->i_uid = cifs_sb->mnt_uid; |
867 | inode->i_gid = cifs_sb->mnt_gid; | 900 | inode->i_gid = cifs_sb->mnt_gid; |
901 | spin_unlock(&inode->i_lock); | ||
868 | } else if (rc) { | 902 | } else if (rc) { |
869 | iget_failed(inode); | 903 | iget_failed(inode); |
870 | inode = ERR_PTR(rc); | 904 | inode = ERR_PTR(rc); |
@@ -878,25 +912,22 @@ out: | |||
878 | return inode; | 912 | return inode; |
879 | } | 913 | } |
880 | 914 | ||
881 | static int | 915 | int |
882 | cifs_set_file_info(struct inode *inode, struct iattr *attrs, unsigned int xid, | 916 | cifs_set_file_info(struct inode *inode, struct iattr *attrs, unsigned int xid, |
883 | char *full_path, __u32 dosattr) | 917 | char *full_path, __u32 dosattr) |
884 | { | 918 | { |
885 | int rc; | ||
886 | int oplock = 0; | ||
887 | __u16 netfid; | ||
888 | __u32 netpid; | ||
889 | bool set_time = false; | 919 | bool set_time = false; |
890 | struct cifsFileInfo *open_file; | ||
891 | struct cifsInodeInfo *cifsInode = CIFS_I(inode); | ||
892 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); | 920 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); |
893 | struct tcon_link *tlink = NULL; | 921 | struct TCP_Server_Info *server; |
894 | struct cifs_tcon *pTcon; | ||
895 | FILE_BASIC_INFO info_buf; | 922 | FILE_BASIC_INFO info_buf; |
896 | 923 | ||
897 | if (attrs == NULL) | 924 | if (attrs == NULL) |
898 | return -EINVAL; | 925 | return -EINVAL; |
899 | 926 | ||
927 | server = cifs_sb_master_tcon(cifs_sb)->ses->server; | ||
928 | if (!server->ops->set_file_info) | ||
929 | return -ENOSYS; | ||
930 | |||
900 | if (attrs->ia_valid & ATTR_ATIME) { | 931 | if (attrs->ia_valid & ATTR_ATIME) { |
901 | set_time = true; | 932 | set_time = true; |
902 | info_buf.LastAccessTime = | 933 | info_buf.LastAccessTime = |
@@ -927,81 +958,17 @@ cifs_set_file_info(struct inode *inode, struct iattr *attrs, unsigned int xid, | |||
927 | info_buf.CreationTime = 0; /* don't change */ | 958 | info_buf.CreationTime = 0; /* don't change */ |
928 | info_buf.Attributes = cpu_to_le32(dosattr); | 959 | info_buf.Attributes = cpu_to_le32(dosattr); |
929 | 960 | ||
930 | /* | 961 | return server->ops->set_file_info(inode, full_path, &info_buf, xid); |
931 | * If the file is already open for write, just use that fileid | ||
932 | */ | ||
933 | open_file = find_writable_file(cifsInode, true); | ||
934 | if (open_file) { | ||
935 | netfid = open_file->netfid; | ||
936 | netpid = open_file->pid; | ||
937 | pTcon = tlink_tcon(open_file->tlink); | ||
938 | goto set_via_filehandle; | ||
939 | } | ||
940 | |||
941 | tlink = cifs_sb_tlink(cifs_sb); | ||
942 | if (IS_ERR(tlink)) { | ||
943 | rc = PTR_ERR(tlink); | ||
944 | tlink = NULL; | ||
945 | goto out; | ||
946 | } | ||
947 | pTcon = tlink_tcon(tlink); | ||
948 | |||
949 | /* | ||
950 | * NT4 apparently returns success on this call, but it doesn't | ||
951 | * really work. | ||
952 | */ | ||
953 | if (!(pTcon->ses->flags & CIFS_SES_NT4)) { | ||
954 | rc = CIFSSMBSetPathInfo(xid, pTcon, full_path, | ||
955 | &info_buf, cifs_sb->local_nls, | ||
956 | cifs_sb->mnt_cifs_flags & | ||
957 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
958 | if (rc == 0) { | ||
959 | cifsInode->cifsAttrs = dosattr; | ||
960 | goto out; | ||
961 | } else if (rc != -EOPNOTSUPP && rc != -EINVAL) | ||
962 | goto out; | ||
963 | } | ||
964 | |||
965 | cFYI(1, "calling SetFileInfo since SetPathInfo for " | ||
966 | "times not supported by this server"); | ||
967 | rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN, | ||
968 | SYNCHRONIZE | FILE_WRITE_ATTRIBUTES, | ||
969 | CREATE_NOT_DIR, &netfid, &oplock, | ||
970 | NULL, cifs_sb->local_nls, | ||
971 | cifs_sb->mnt_cifs_flags & | ||
972 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
973 | |||
974 | if (rc != 0) { | ||
975 | if (rc == -EIO) | ||
976 | rc = -EINVAL; | ||
977 | goto out; | ||
978 | } | ||
979 | |||
980 | netpid = current->tgid; | ||
981 | |||
982 | set_via_filehandle: | ||
983 | rc = CIFSSMBSetFileInfo(xid, pTcon, &info_buf, netfid, netpid); | ||
984 | if (!rc) | ||
985 | cifsInode->cifsAttrs = dosattr; | ||
986 | |||
987 | if (open_file == NULL) | ||
988 | CIFSSMBClose(xid, pTcon, netfid); | ||
989 | else | ||
990 | cifsFileInfo_put(open_file); | ||
991 | out: | ||
992 | if (tlink != NULL) | ||
993 | cifs_put_tlink(tlink); | ||
994 | return rc; | ||
995 | } | 962 | } |
996 | 963 | ||
997 | /* | 964 | /* |
998 | * open the given file (if it isn't already), set the DELETE_ON_CLOSE bit | 965 | * Open the given file (if it isn't already), set the DELETE_ON_CLOSE bit |
999 | * and rename it to a random name that hopefully won't conflict with | 966 | * and rename it to a random name that hopefully won't conflict with |
1000 | * anything else. | 967 | * anything else. |
1001 | */ | 968 | */ |
1002 | static int | 969 | int |
1003 | cifs_rename_pending_delete(char *full_path, struct dentry *dentry, | 970 | cifs_rename_pending_delete(const char *full_path, struct dentry *dentry, |
1004 | unsigned int xid) | 971 | const unsigned int xid) |
1005 | { | 972 | { |
1006 | int oplock = 0; | 973 | int oplock = 0; |
1007 | int rc; | 974 | int rc; |
@@ -1110,6 +1077,15 @@ undo_setattr: | |||
1110 | goto out_close; | 1077 | goto out_close; |
1111 | } | 1078 | } |
1112 | 1079 | ||
1080 | /* copied from fs/nfs/dir.c with small changes */ | ||
1081 | static void | ||
1082 | cifs_drop_nlink(struct inode *inode) | ||
1083 | { | ||
1084 | spin_lock(&inode->i_lock); | ||
1085 | if (inode->i_nlink > 0) | ||
1086 | drop_nlink(inode); | ||
1087 | spin_unlock(&inode->i_lock); | ||
1088 | } | ||
1113 | 1089 | ||
1114 | /* | 1090 | /* |
1115 | * If dentry->d_inode is null (usually meaning the cached dentry | 1091 | * If dentry->d_inode is null (usually meaning the cached dentry |
@@ -1129,6 +1105,7 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry) | |||
1129 | struct cifs_sb_info *cifs_sb = CIFS_SB(sb); | 1105 | struct cifs_sb_info *cifs_sb = CIFS_SB(sb); |
1130 | struct tcon_link *tlink; | 1106 | struct tcon_link *tlink; |
1131 | struct cifs_tcon *tcon; | 1107 | struct cifs_tcon *tcon; |
1108 | struct TCP_Server_Info *server; | ||
1132 | struct iattr *attrs = NULL; | 1109 | struct iattr *attrs = NULL; |
1133 | __u32 dosattr = 0, origattr = 0; | 1110 | __u32 dosattr = 0, origattr = 0; |
1134 | 1111 | ||
@@ -1138,6 +1115,7 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry) | |||
1138 | if (IS_ERR(tlink)) | 1115 | if (IS_ERR(tlink)) |
1139 | return PTR_ERR(tlink); | 1116 | return PTR_ERR(tlink); |
1140 | tcon = tlink_tcon(tlink); | 1117 | tcon = tlink_tcon(tlink); |
1118 | server = tcon->ses->server; | ||
1141 | 1119 | ||
1142 | xid = get_xid(); | 1120 | xid = get_xid(); |
1143 | 1121 | ||
@@ -1160,19 +1138,28 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry) | |||
1160 | } | 1138 | } |
1161 | 1139 | ||
1162 | retry_std_delete: | 1140 | retry_std_delete: |
1163 | rc = CIFSSMBDelFile(xid, tcon, full_path, cifs_sb->local_nls, | 1141 | if (!server->ops->unlink) { |
1164 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); | 1142 | rc = -ENOSYS; |
1143 | goto psx_del_no_retry; | ||
1144 | } | ||
1145 | |||
1146 | rc = server->ops->unlink(xid, tcon, full_path, cifs_sb); | ||
1165 | 1147 | ||
1166 | psx_del_no_retry: | 1148 | psx_del_no_retry: |
1167 | if (!rc) { | 1149 | if (!rc) { |
1168 | if (inode) | 1150 | if (inode) |
1169 | drop_nlink(inode); | 1151 | cifs_drop_nlink(inode); |
1170 | } else if (rc == -ENOENT) { | 1152 | } else if (rc == -ENOENT) { |
1171 | d_drop(dentry); | 1153 | d_drop(dentry); |
1172 | } else if (rc == -ETXTBSY) { | 1154 | } else if (rc == -ETXTBSY) { |
1173 | rc = cifs_rename_pending_delete(full_path, dentry, xid); | 1155 | if (server->ops->rename_pending_delete) { |
1174 | if (rc == 0) | 1156 | rc = server->ops->rename_pending_delete(full_path, |
1175 | drop_nlink(inode); | 1157 | dentry, xid); |
1158 | if (rc == 0) | ||
1159 | cifs_drop_nlink(inode); | ||
1160 | } | ||
1161 | if (rc == -ETXTBSY) | ||
1162 | rc = -EBUSY; | ||
1176 | } else if ((rc == -EACCES) && (dosattr == 0) && inode) { | 1163 | } else if ((rc == -EACCES) && (dosattr == 0) && inode) { |
1177 | attrs = kzalloc(sizeof(*attrs), GFP_KERNEL); | 1164 | attrs = kzalloc(sizeof(*attrs), GFP_KERNEL); |
1178 | if (attrs == NULL) { | 1165 | if (attrs == NULL) { |
@@ -1220,33 +1207,33 @@ unlink_out: | |||
1220 | } | 1207 | } |
1221 | 1208 | ||
1222 | static int | 1209 | static int |
1223 | cifs_mkdir_qinfo(struct inode *inode, struct dentry *dentry, umode_t mode, | 1210 | cifs_mkdir_qinfo(struct inode *parent, struct dentry *dentry, umode_t mode, |
1224 | const char *full_path, struct cifs_sb_info *cifs_sb, | 1211 | const char *full_path, struct cifs_sb_info *cifs_sb, |
1225 | struct cifs_tcon *tcon, const unsigned int xid) | 1212 | struct cifs_tcon *tcon, const unsigned int xid) |
1226 | { | 1213 | { |
1227 | int rc = 0; | 1214 | int rc = 0; |
1228 | struct inode *newinode = NULL; | 1215 | struct inode *inode = NULL; |
1229 | 1216 | ||
1230 | if (tcon->unix_ext) | 1217 | if (tcon->unix_ext) |
1231 | rc = cifs_get_inode_info_unix(&newinode, full_path, inode->i_sb, | 1218 | rc = cifs_get_inode_info_unix(&inode, full_path, parent->i_sb, |
1232 | xid); | 1219 | xid); |
1233 | else | 1220 | else |
1234 | rc = cifs_get_inode_info(&newinode, full_path, NULL, | 1221 | rc = cifs_get_inode_info(&inode, full_path, NULL, parent->i_sb, |
1235 | inode->i_sb, xid, NULL); | 1222 | xid, NULL); |
1223 | |||
1236 | if (rc) | 1224 | if (rc) |
1237 | return rc; | 1225 | return rc; |
1238 | 1226 | ||
1239 | d_instantiate(dentry, newinode); | ||
1240 | /* | 1227 | /* |
1241 | * setting nlink not necessary except in cases where we failed to get it | 1228 | * setting nlink not necessary except in cases where we failed to get it |
1242 | * from the server or was set bogus | 1229 | * from the server or was set bogus. Also, since this is a brand new |
1230 | * inode, no need to grab the i_lock before setting the i_nlink. | ||
1243 | */ | 1231 | */ |
1244 | if ((dentry->d_inode) && (dentry->d_inode->i_nlink < 2)) | 1232 | if (inode->i_nlink < 2) |
1245 | set_nlink(dentry->d_inode, 2); | 1233 | set_nlink(inode, 2); |
1246 | |||
1247 | mode &= ~current_umask(); | 1234 | mode &= ~current_umask(); |
1248 | /* must turn on setgid bit if parent dir has it */ | 1235 | /* must turn on setgid bit if parent dir has it */ |
1249 | if (inode->i_mode & S_ISGID) | 1236 | if (parent->i_mode & S_ISGID) |
1250 | mode |= S_ISGID; | 1237 | mode |= S_ISGID; |
1251 | 1238 | ||
1252 | if (tcon->unix_ext) { | 1239 | if (tcon->unix_ext) { |
@@ -1259,8 +1246,8 @@ cifs_mkdir_qinfo(struct inode *inode, struct dentry *dentry, umode_t mode, | |||
1259 | }; | 1246 | }; |
1260 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { | 1247 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { |
1261 | args.uid = (__u64)current_fsuid(); | 1248 | args.uid = (__u64)current_fsuid(); |
1262 | if (inode->i_mode & S_ISGID) | 1249 | if (parent->i_mode & S_ISGID) |
1263 | args.gid = (__u64)inode->i_gid; | 1250 | args.gid = (__u64)parent->i_gid; |
1264 | else | 1251 | else |
1265 | args.gid = (__u64)current_fsgid(); | 1252 | args.gid = (__u64)current_fsgid(); |
1266 | } else { | 1253 | } else { |
@@ -1275,22 +1262,20 @@ cifs_mkdir_qinfo(struct inode *inode, struct dentry *dentry, umode_t mode, | |||
1275 | struct TCP_Server_Info *server = tcon->ses->server; | 1262 | struct TCP_Server_Info *server = tcon->ses->server; |
1276 | if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) && | 1263 | if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) && |
1277 | (mode & S_IWUGO) == 0 && server->ops->mkdir_setinfo) | 1264 | (mode & S_IWUGO) == 0 && server->ops->mkdir_setinfo) |
1278 | server->ops->mkdir_setinfo(newinode, full_path, cifs_sb, | 1265 | server->ops->mkdir_setinfo(inode, full_path, cifs_sb, |
1279 | tcon, xid); | 1266 | tcon, xid); |
1280 | if (dentry->d_inode) { | 1267 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) |
1281 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) | 1268 | inode->i_mode = (mode | S_IFDIR); |
1282 | dentry->d_inode->i_mode = (mode | S_IFDIR); | 1269 | |
1283 | 1270 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { | |
1284 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { | 1271 | inode->i_uid = current_fsuid(); |
1285 | dentry->d_inode->i_uid = current_fsuid(); | 1272 | if (inode->i_mode & S_ISGID) |
1286 | if (inode->i_mode & S_ISGID) | 1273 | inode->i_gid = parent->i_gid; |
1287 | dentry->d_inode->i_gid = inode->i_gid; | 1274 | else |
1288 | else | 1275 | inode->i_gid = current_fsgid(); |
1289 | dentry->d_inode->i_gid = | ||
1290 | current_fsgid(); | ||
1291 | } | ||
1292 | } | 1276 | } |
1293 | } | 1277 | } |
1278 | d_instantiate(dentry, inode); | ||
1294 | return rc; | 1279 | return rc; |
1295 | } | 1280 | } |
1296 | 1281 | ||
@@ -1487,29 +1472,32 @@ rmdir_exit: | |||
1487 | } | 1472 | } |
1488 | 1473 | ||
1489 | static int | 1474 | static int |
1490 | cifs_do_rename(unsigned int xid, struct dentry *from_dentry, | 1475 | cifs_do_rename(const unsigned int xid, struct dentry *from_dentry, |
1491 | const char *fromPath, struct dentry *to_dentry, | 1476 | const char *from_path, struct dentry *to_dentry, |
1492 | const char *toPath) | 1477 | const char *to_path) |
1493 | { | 1478 | { |
1494 | struct cifs_sb_info *cifs_sb = CIFS_SB(from_dentry->d_sb); | 1479 | struct cifs_sb_info *cifs_sb = CIFS_SB(from_dentry->d_sb); |
1495 | struct tcon_link *tlink; | 1480 | struct tcon_link *tlink; |
1496 | struct cifs_tcon *pTcon; | 1481 | struct cifs_tcon *tcon; |
1482 | struct TCP_Server_Info *server; | ||
1497 | __u16 srcfid; | 1483 | __u16 srcfid; |
1498 | int oplock, rc; | 1484 | int oplock, rc; |
1499 | 1485 | ||
1500 | tlink = cifs_sb_tlink(cifs_sb); | 1486 | tlink = cifs_sb_tlink(cifs_sb); |
1501 | if (IS_ERR(tlink)) | 1487 | if (IS_ERR(tlink)) |
1502 | return PTR_ERR(tlink); | 1488 | return PTR_ERR(tlink); |
1503 | pTcon = tlink_tcon(tlink); | 1489 | tcon = tlink_tcon(tlink); |
1490 | server = tcon->ses->server; | ||
1491 | |||
1492 | if (!server->ops->rename) | ||
1493 | return -ENOSYS; | ||
1504 | 1494 | ||
1505 | /* try path-based rename first */ | 1495 | /* try path-based rename first */ |
1506 | rc = CIFSSMBRename(xid, pTcon, fromPath, toPath, cifs_sb->local_nls, | 1496 | rc = server->ops->rename(xid, tcon, from_path, to_path, cifs_sb); |
1507 | cifs_sb->mnt_cifs_flags & | ||
1508 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
1509 | 1497 | ||
1510 | /* | 1498 | /* |
1511 | * don't bother with rename by filehandle unless file is busy and | 1499 | * Don't bother with rename by filehandle unless file is busy and |
1512 | * source Note that cross directory moves do not work with | 1500 | * source. Note that cross directory moves do not work with |
1513 | * rename by filehandle to various Windows servers. | 1501 | * rename by filehandle to various Windows servers. |
1514 | */ | 1502 | */ |
1515 | if (rc == 0 || rc != -ETXTBSY) | 1503 | if (rc == 0 || rc != -ETXTBSY) |
@@ -1520,29 +1508,28 @@ cifs_do_rename(unsigned int xid, struct dentry *from_dentry, | |||
1520 | goto do_rename_exit; | 1508 | goto do_rename_exit; |
1521 | 1509 | ||
1522 | /* open the file to be renamed -- we need DELETE perms */ | 1510 | /* open the file to be renamed -- we need DELETE perms */ |
1523 | rc = CIFSSMBOpen(xid, pTcon, fromPath, FILE_OPEN, DELETE, | 1511 | rc = CIFSSMBOpen(xid, tcon, from_path, FILE_OPEN, DELETE, |
1524 | CREATE_NOT_DIR, &srcfid, &oplock, NULL, | 1512 | CREATE_NOT_DIR, &srcfid, &oplock, NULL, |
1525 | cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & | 1513 | cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & |
1526 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 1514 | CIFS_MOUNT_MAP_SPECIAL_CHR); |
1527 | |||
1528 | if (rc == 0) { | 1515 | if (rc == 0) { |
1529 | rc = CIFSSMBRenameOpenFile(xid, pTcon, srcfid, | 1516 | rc = CIFSSMBRenameOpenFile(xid, tcon, srcfid, |
1530 | (const char *) to_dentry->d_name.name, | 1517 | (const char *) to_dentry->d_name.name, |
1531 | cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & | 1518 | cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & |
1532 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 1519 | CIFS_MOUNT_MAP_SPECIAL_CHR); |
1533 | 1520 | CIFSSMBClose(xid, tcon, srcfid); | |
1534 | CIFSSMBClose(xid, pTcon, srcfid); | ||
1535 | } | 1521 | } |
1536 | do_rename_exit: | 1522 | do_rename_exit: |
1537 | cifs_put_tlink(tlink); | 1523 | cifs_put_tlink(tlink); |
1538 | return rc; | 1524 | return rc; |
1539 | } | 1525 | } |
1540 | 1526 | ||
1541 | int cifs_rename(struct inode *source_dir, struct dentry *source_dentry, | 1527 | int |
1542 | struct inode *target_dir, struct dentry *target_dentry) | 1528 | cifs_rename(struct inode *source_dir, struct dentry *source_dentry, |
1529 | struct inode *target_dir, struct dentry *target_dentry) | ||
1543 | { | 1530 | { |
1544 | char *fromName = NULL; | 1531 | char *from_name = NULL; |
1545 | char *toName = NULL; | 1532 | char *to_name = NULL; |
1546 | struct cifs_sb_info *cifs_sb; | 1533 | struct cifs_sb_info *cifs_sb; |
1547 | struct tcon_link *tlink; | 1534 | struct tcon_link *tlink; |
1548 | struct cifs_tcon *tcon; | 1535 | struct cifs_tcon *tcon; |
@@ -1563,25 +1550,25 @@ int cifs_rename(struct inode *source_dir, struct dentry *source_dentry, | |||
1563 | * we already have the rename sem so we do not need to | 1550 | * we already have the rename sem so we do not need to |
1564 | * grab it again here to protect the path integrity | 1551 | * grab it again here to protect the path integrity |
1565 | */ | 1552 | */ |
1566 | fromName = build_path_from_dentry(source_dentry); | 1553 | from_name = build_path_from_dentry(source_dentry); |
1567 | if (fromName == NULL) { | 1554 | if (from_name == NULL) { |
1568 | rc = -ENOMEM; | 1555 | rc = -ENOMEM; |
1569 | goto cifs_rename_exit; | 1556 | goto cifs_rename_exit; |
1570 | } | 1557 | } |
1571 | 1558 | ||
1572 | toName = build_path_from_dentry(target_dentry); | 1559 | to_name = build_path_from_dentry(target_dentry); |
1573 | if (toName == NULL) { | 1560 | if (to_name == NULL) { |
1574 | rc = -ENOMEM; | 1561 | rc = -ENOMEM; |
1575 | goto cifs_rename_exit; | 1562 | goto cifs_rename_exit; |
1576 | } | 1563 | } |
1577 | 1564 | ||
1578 | rc = cifs_do_rename(xid, source_dentry, fromName, | 1565 | rc = cifs_do_rename(xid, source_dentry, from_name, target_dentry, |
1579 | target_dentry, toName); | 1566 | to_name); |
1580 | 1567 | ||
1581 | if (rc == -EEXIST && tcon->unix_ext) { | 1568 | if (rc == -EEXIST && tcon->unix_ext) { |
1582 | /* | 1569 | /* |
1583 | * Are src and dst hardlinks of same inode? We can | 1570 | * Are src and dst hardlinks of same inode? We can only tell |
1584 | * only tell with unix extensions enabled | 1571 | * with unix extensions enabled. |
1585 | */ | 1572 | */ |
1586 | info_buf_source = | 1573 | info_buf_source = |
1587 | kmalloc(2 * sizeof(FILE_UNIX_BASIC_INFO), | 1574 | kmalloc(2 * sizeof(FILE_UNIX_BASIC_INFO), |
@@ -1592,19 +1579,19 @@ int cifs_rename(struct inode *source_dir, struct dentry *source_dentry, | |||
1592 | } | 1579 | } |
1593 | 1580 | ||
1594 | info_buf_target = info_buf_source + 1; | 1581 | info_buf_target = info_buf_source + 1; |
1595 | tmprc = CIFSSMBUnixQPathInfo(xid, tcon, fromName, | 1582 | tmprc = CIFSSMBUnixQPathInfo(xid, tcon, from_name, |
1596 | info_buf_source, | 1583 | info_buf_source, |
1597 | cifs_sb->local_nls, | 1584 | cifs_sb->local_nls, |
1598 | cifs_sb->mnt_cifs_flags & | 1585 | cifs_sb->mnt_cifs_flags & |
1599 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 1586 | CIFS_MOUNT_MAP_SPECIAL_CHR); |
1600 | if (tmprc != 0) | 1587 | if (tmprc != 0) |
1601 | goto unlink_target; | 1588 | goto unlink_target; |
1602 | 1589 | ||
1603 | tmprc = CIFSSMBUnixQPathInfo(xid, tcon, toName, | 1590 | tmprc = CIFSSMBUnixQPathInfo(xid, tcon, to_name, |
1604 | info_buf_target, | 1591 | info_buf_target, |
1605 | cifs_sb->local_nls, | 1592 | cifs_sb->local_nls, |
1606 | cifs_sb->mnt_cifs_flags & | 1593 | cifs_sb->mnt_cifs_flags & |
1607 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 1594 | CIFS_MOUNT_MAP_SPECIAL_CHR); |
1608 | 1595 | ||
1609 | if (tmprc == 0 && (info_buf_source->UniqueId == | 1596 | if (tmprc == 0 && (info_buf_source->UniqueId == |
1610 | info_buf_target->UniqueId)) { | 1597 | info_buf_target->UniqueId)) { |
@@ -1612,8 +1599,11 @@ int cifs_rename(struct inode *source_dir, struct dentry *source_dentry, | |||
1612 | rc = 0; | 1599 | rc = 0; |
1613 | goto cifs_rename_exit; | 1600 | goto cifs_rename_exit; |
1614 | } | 1601 | } |
1615 | } /* else ... BB we could add the same check for Windows by | 1602 | } |
1616 | checking the UniqueId via FILE_INTERNAL_INFO */ | 1603 | /* |
1604 | * else ... BB we could add the same check for Windows by | ||
1605 | * checking the UniqueId via FILE_INTERNAL_INFO | ||
1606 | */ | ||
1617 | 1607 | ||
1618 | unlink_target: | 1608 | unlink_target: |
1619 | /* Try unlinking the target dentry if it's not negative */ | 1609 | /* Try unlinking the target dentry if it's not negative */ |
@@ -1621,15 +1611,14 @@ unlink_target: | |||
1621 | tmprc = cifs_unlink(target_dir, target_dentry); | 1611 | tmprc = cifs_unlink(target_dir, target_dentry); |
1622 | if (tmprc) | 1612 | if (tmprc) |
1623 | goto cifs_rename_exit; | 1613 | goto cifs_rename_exit; |
1624 | 1614 | rc = cifs_do_rename(xid, source_dentry, from_name, | |
1625 | rc = cifs_do_rename(xid, source_dentry, fromName, | 1615 | target_dentry, to_name); |
1626 | target_dentry, toName); | ||
1627 | } | 1616 | } |
1628 | 1617 | ||
1629 | cifs_rename_exit: | 1618 | cifs_rename_exit: |
1630 | kfree(info_buf_source); | 1619 | kfree(info_buf_source); |
1631 | kfree(fromName); | 1620 | kfree(from_name); |
1632 | kfree(toName); | 1621 | kfree(to_name); |
1633 | free_xid(xid); | 1622 | free_xid(xid); |
1634 | cifs_put_tlink(tlink); | 1623 | cifs_put_tlink(tlink); |
1635 | return rc; | 1624 | return rc; |
@@ -1854,7 +1843,8 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs, | |||
1854 | struct cifsInodeInfo *cifsInode = CIFS_I(inode); | 1843 | struct cifsInodeInfo *cifsInode = CIFS_I(inode); |
1855 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); | 1844 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); |
1856 | struct tcon_link *tlink = NULL; | 1845 | struct tcon_link *tlink = NULL; |
1857 | struct cifs_tcon *pTcon = NULL; | 1846 | struct cifs_tcon *tcon = NULL; |
1847 | struct TCP_Server_Info *server; | ||
1858 | struct cifs_io_parms io_parms; | 1848 | struct cifs_io_parms io_parms; |
1859 | 1849 | ||
1860 | /* | 1850 | /* |
@@ -1868,19 +1858,21 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs, | |||
1868 | */ | 1858 | */ |
1869 | open_file = find_writable_file(cifsInode, true); | 1859 | open_file = find_writable_file(cifsInode, true); |
1870 | if (open_file) { | 1860 | if (open_file) { |
1871 | __u16 nfid = open_file->netfid; | 1861 | tcon = tlink_tcon(open_file->tlink); |
1872 | __u32 npid = open_file->pid; | 1862 | server = tcon->ses->server; |
1873 | pTcon = tlink_tcon(open_file->tlink); | 1863 | if (server->ops->set_file_size) |
1874 | rc = CIFSSMBSetFileSize(xid, pTcon, attrs->ia_size, nfid, | 1864 | rc = server->ops->set_file_size(xid, tcon, open_file, |
1875 | npid, false); | 1865 | attrs->ia_size, false); |
1866 | else | ||
1867 | rc = -ENOSYS; | ||
1876 | cifsFileInfo_put(open_file); | 1868 | cifsFileInfo_put(open_file); |
1877 | cFYI(1, "SetFSize for attrs rc = %d", rc); | 1869 | cFYI(1, "SetFSize for attrs rc = %d", rc); |
1878 | if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { | 1870 | if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { |
1879 | unsigned int bytes_written; | 1871 | unsigned int bytes_written; |
1880 | 1872 | ||
1881 | io_parms.netfid = nfid; | 1873 | io_parms.netfid = open_file->fid.netfid; |
1882 | io_parms.pid = npid; | 1874 | io_parms.pid = open_file->pid; |
1883 | io_parms.tcon = pTcon; | 1875 | io_parms.tcon = tcon; |
1884 | io_parms.offset = 0; | 1876 | io_parms.offset = 0; |
1885 | io_parms.length = attrs->ia_size; | 1877 | io_parms.length = attrs->ia_size; |
1886 | rc = CIFSSMBWrite(xid, &io_parms, &bytes_written, | 1878 | rc = CIFSSMBWrite(xid, &io_parms, &bytes_written, |
@@ -1890,52 +1882,55 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs, | |||
1890 | } else | 1882 | } else |
1891 | rc = -EINVAL; | 1883 | rc = -EINVAL; |
1892 | 1884 | ||
1893 | if (rc != 0) { | 1885 | if (!rc) |
1894 | if (pTcon == NULL) { | 1886 | goto set_size_out; |
1895 | tlink = cifs_sb_tlink(cifs_sb); | ||
1896 | if (IS_ERR(tlink)) | ||
1897 | return PTR_ERR(tlink); | ||
1898 | pTcon = tlink_tcon(tlink); | ||
1899 | } | ||
1900 | 1887 | ||
1901 | /* Set file size by pathname rather than by handle | 1888 | if (tcon == NULL) { |
1902 | either because no valid, writeable file handle for | 1889 | tlink = cifs_sb_tlink(cifs_sb); |
1903 | it was found or because there was an error setting | 1890 | if (IS_ERR(tlink)) |
1904 | it by handle */ | 1891 | return PTR_ERR(tlink); |
1905 | rc = CIFSSMBSetEOF(xid, pTcon, full_path, attrs->ia_size, | 1892 | tcon = tlink_tcon(tlink); |
1906 | false, cifs_sb->local_nls, | 1893 | server = tcon->ses->server; |
1894 | } | ||
1895 | |||
1896 | /* | ||
1897 | * Set file size by pathname rather than by handle either because no | ||
1898 | * valid, writeable file handle for it was found or because there was | ||
1899 | * an error setting it by handle. | ||
1900 | */ | ||
1901 | if (server->ops->set_path_size) | ||
1902 | rc = server->ops->set_path_size(xid, tcon, full_path, | ||
1903 | attrs->ia_size, cifs_sb, false); | ||
1904 | else | ||
1905 | rc = -ENOSYS; | ||
1906 | cFYI(1, "SetEOF by path (setattrs) rc = %d", rc); | ||
1907 | if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { | ||
1908 | __u16 netfid; | ||
1909 | int oplock = 0; | ||
1910 | |||
1911 | rc = SMBLegacyOpen(xid, tcon, full_path, FILE_OPEN, | ||
1912 | GENERIC_WRITE, CREATE_NOT_DIR, &netfid, | ||
1913 | &oplock, NULL, cifs_sb->local_nls, | ||
1907 | cifs_sb->mnt_cifs_flags & | 1914 | cifs_sb->mnt_cifs_flags & |
1908 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 1915 | CIFS_MOUNT_MAP_SPECIAL_CHR); |
1909 | cFYI(1, "SetEOF by path (setattrs) rc = %d", rc); | 1916 | if (rc == 0) { |
1910 | if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { | 1917 | unsigned int bytes_written; |
1911 | __u16 netfid; | 1918 | |
1912 | int oplock = 0; | 1919 | io_parms.netfid = netfid; |
1913 | 1920 | io_parms.pid = current->tgid; | |
1914 | rc = SMBLegacyOpen(xid, pTcon, full_path, | 1921 | io_parms.tcon = tcon; |
1915 | FILE_OPEN, GENERIC_WRITE, | 1922 | io_parms.offset = 0; |
1916 | CREATE_NOT_DIR, &netfid, &oplock, NULL, | 1923 | io_parms.length = attrs->ia_size; |
1917 | cifs_sb->local_nls, | 1924 | rc = CIFSSMBWrite(xid, &io_parms, &bytes_written, NULL, |
1918 | cifs_sb->mnt_cifs_flags & | 1925 | NULL, 1); |
1919 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 1926 | cFYI(1, "wrt seteof rc %d", rc); |
1920 | if (rc == 0) { | 1927 | CIFSSMBClose(xid, tcon, netfid); |
1921 | unsigned int bytes_written; | ||
1922 | |||
1923 | io_parms.netfid = netfid; | ||
1924 | io_parms.pid = current->tgid; | ||
1925 | io_parms.tcon = pTcon; | ||
1926 | io_parms.offset = 0; | ||
1927 | io_parms.length = attrs->ia_size; | ||
1928 | rc = CIFSSMBWrite(xid, &io_parms, | ||
1929 | &bytes_written, | ||
1930 | NULL, NULL, 1); | ||
1931 | cFYI(1, "wrt seteof rc %d", rc); | ||
1932 | CIFSSMBClose(xid, pTcon, netfid); | ||
1933 | } | ||
1934 | } | 1928 | } |
1935 | if (tlink) | ||
1936 | cifs_put_tlink(tlink); | ||
1937 | } | 1929 | } |
1930 | if (tlink) | ||
1931 | cifs_put_tlink(tlink); | ||
1938 | 1932 | ||
1933 | set_size_out: | ||
1939 | if (rc == 0) { | 1934 | if (rc == 0) { |
1940 | cifsInode->server_eof = attrs->ia_size; | 1935 | cifsInode->server_eof = attrs->ia_size; |
1941 | cifs_setsize(inode, attrs->ia_size); | 1936 | cifs_setsize(inode, attrs->ia_size); |
@@ -2042,7 +2037,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) | |||
2042 | args->device = 0; | 2037 | args->device = 0; |
2043 | open_file = find_writable_file(cifsInode, true); | 2038 | open_file = find_writable_file(cifsInode, true); |
2044 | if (open_file) { | 2039 | if (open_file) { |
2045 | u16 nfid = open_file->netfid; | 2040 | u16 nfid = open_file->fid.netfid; |
2046 | u32 npid = open_file->pid; | 2041 | u32 npid = open_file->pid; |
2047 | pTcon = tlink_tcon(open_file->tlink); | 2042 | pTcon = tlink_tcon(open_file->tlink); |
2048 | rc = CIFSSMBUnixSetFileInfo(xid, pTcon, args, nfid, npid); | 2043 | rc = CIFSSMBUnixSetFileInfo(xid, pTcon, args, nfid, npid); |
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index ae082a66de2f..fd5009d56f9f 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c | |||
@@ -28,8 +28,6 @@ | |||
28 | #include "cifs_debug.h" | 28 | #include "cifs_debug.h" |
29 | #include "cifsfs.h" | 29 | #include "cifsfs.h" |
30 | 30 | ||
31 | #define CIFS_IOC_CHECKUMOUNT _IO(0xCF, 2) | ||
32 | |||
33 | long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) | 31 | long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) |
34 | { | 32 | { |
35 | struct inode *inode = filep->f_dentry->d_inode; | 33 | struct inode *inode = filep->f_dentry->d_inode; |
@@ -51,23 +49,6 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) | |||
51 | cifs_sb = CIFS_SB(inode->i_sb); | 49 | cifs_sb = CIFS_SB(inode->i_sb); |
52 | 50 | ||
53 | switch (command) { | 51 | switch (command) { |
54 | static bool warned = false; | ||
55 | case CIFS_IOC_CHECKUMOUNT: | ||
56 | if (!warned) { | ||
57 | warned = true; | ||
58 | cERROR(1, "the CIFS_IOC_CHECKMOUNT ioctl will " | ||
59 | "be deprecated in 3.7. Please " | ||
60 | "migrate away from the use of " | ||
61 | "umount.cifs"); | ||
62 | } | ||
63 | cFYI(1, "User unmount attempted"); | ||
64 | if (cifs_sb->mnt_uid == current_uid()) | ||
65 | rc = 0; | ||
66 | else { | ||
67 | rc = -EACCES; | ||
68 | cFYI(1, "uids do not match"); | ||
69 | } | ||
70 | break; | ||
71 | #ifdef CONFIG_CIFS_POSIX | 52 | #ifdef CONFIG_CIFS_POSIX |
72 | case FS_IOC_GETFLAGS: | 53 | case FS_IOC_GETFLAGS: |
73 | if (pSMBFile == NULL) | 54 | if (pSMBFile == NULL) |
@@ -75,8 +56,9 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) | |||
75 | tcon = tlink_tcon(pSMBFile->tlink); | 56 | tcon = tlink_tcon(pSMBFile->tlink); |
76 | caps = le64_to_cpu(tcon->fsUnixInfo.Capability); | 57 | caps = le64_to_cpu(tcon->fsUnixInfo.Capability); |
77 | if (CIFS_UNIX_EXTATTR_CAP & caps) { | 58 | if (CIFS_UNIX_EXTATTR_CAP & caps) { |
78 | rc = CIFSGetExtAttr(xid, tcon, pSMBFile->netfid, | 59 | rc = CIFSGetExtAttr(xid, tcon, |
79 | &ExtAttrBits, &ExtAttrMask); | 60 | pSMBFile->fid.netfid, |
61 | &ExtAttrBits, &ExtAttrMask); | ||
80 | if (rc == 0) | 62 | if (rc == 0) |
81 | rc = put_user(ExtAttrBits & | 63 | rc = put_user(ExtAttrBits & |
82 | FS_FL_USER_VISIBLE, | 64 | FS_FL_USER_VISIBLE, |
@@ -94,8 +76,12 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) | |||
94 | rc = -EFAULT; | 76 | rc = -EFAULT; |
95 | break; | 77 | break; |
96 | } | 78 | } |
97 | /* rc= CIFSGetExtAttr(xid,tcon,pSMBFile->netfid, | 79 | /* |
98 | extAttrBits, &ExtAttrMask);*/ | 80 | * rc = CIFSGetExtAttr(xid, tcon, |
81 | * pSMBFile->fid.netfid, | ||
82 | * extAttrBits, | ||
83 | * &ExtAttrMask); | ||
84 | */ | ||
99 | } | 85 | } |
100 | cFYI(1, "set flags not implemented yet"); | 86 | cFYI(1, "set flags not implemented yet"); |
101 | break; | 87 | break; |
diff --git a/fs/cifs/link.c b/fs/cifs/link.c index 09e4b3ae4564..51dc2fb6e854 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c | |||
@@ -391,70 +391,86 @@ cifs_hardlink(struct dentry *old_file, struct inode *inode, | |||
391 | { | 391 | { |
392 | int rc = -EACCES; | 392 | int rc = -EACCES; |
393 | unsigned int xid; | 393 | unsigned int xid; |
394 | char *fromName = NULL; | 394 | char *from_name = NULL; |
395 | char *toName = NULL; | 395 | char *to_name = NULL; |
396 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); | 396 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); |
397 | struct tcon_link *tlink; | 397 | struct tcon_link *tlink; |
398 | struct cifs_tcon *pTcon; | 398 | struct cifs_tcon *tcon; |
399 | struct TCP_Server_Info *server; | ||
399 | struct cifsInodeInfo *cifsInode; | 400 | struct cifsInodeInfo *cifsInode; |
400 | 401 | ||
401 | tlink = cifs_sb_tlink(cifs_sb); | 402 | tlink = cifs_sb_tlink(cifs_sb); |
402 | if (IS_ERR(tlink)) | 403 | if (IS_ERR(tlink)) |
403 | return PTR_ERR(tlink); | 404 | return PTR_ERR(tlink); |
404 | pTcon = tlink_tcon(tlink); | 405 | tcon = tlink_tcon(tlink); |
405 | 406 | ||
406 | xid = get_xid(); | 407 | xid = get_xid(); |
407 | 408 | ||
408 | fromName = build_path_from_dentry(old_file); | 409 | from_name = build_path_from_dentry(old_file); |
409 | toName = build_path_from_dentry(direntry); | 410 | to_name = build_path_from_dentry(direntry); |
410 | if ((fromName == NULL) || (toName == NULL)) { | 411 | if ((from_name == NULL) || (to_name == NULL)) { |
411 | rc = -ENOMEM; | 412 | rc = -ENOMEM; |
412 | goto cifs_hl_exit; | 413 | goto cifs_hl_exit; |
413 | } | 414 | } |
414 | 415 | ||
415 | if (pTcon->unix_ext) | 416 | if (tcon->unix_ext) |
416 | rc = CIFSUnixCreateHardLink(xid, pTcon, fromName, toName, | 417 | rc = CIFSUnixCreateHardLink(xid, tcon, from_name, to_name, |
417 | cifs_sb->local_nls, | 418 | cifs_sb->local_nls, |
418 | cifs_sb->mnt_cifs_flags & | 419 | cifs_sb->mnt_cifs_flags & |
419 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 420 | CIFS_MOUNT_MAP_SPECIAL_CHR); |
420 | else { | 421 | else { |
421 | rc = CIFSCreateHardLink(xid, pTcon, fromName, toName, | 422 | server = tcon->ses->server; |
422 | cifs_sb->local_nls, | 423 | if (!server->ops->create_hardlink) |
423 | cifs_sb->mnt_cifs_flags & | 424 | return -ENOSYS; |
424 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 425 | rc = server->ops->create_hardlink(xid, tcon, from_name, to_name, |
426 | cifs_sb); | ||
425 | if ((rc == -EIO) || (rc == -EINVAL)) | 427 | if ((rc == -EIO) || (rc == -EINVAL)) |
426 | rc = -EOPNOTSUPP; | 428 | rc = -EOPNOTSUPP; |
427 | } | 429 | } |
428 | 430 | ||
429 | d_drop(direntry); /* force new lookup from server of target */ | 431 | d_drop(direntry); /* force new lookup from server of target */ |
430 | 432 | ||
431 | /* if source file is cached (oplocked) revalidate will not go to server | 433 | /* |
432 | until the file is closed or oplock broken so update nlinks locally */ | 434 | * if source file is cached (oplocked) revalidate will not go to server |
435 | * until the file is closed or oplock broken so update nlinks locally | ||
436 | */ | ||
433 | if (old_file->d_inode) { | 437 | if (old_file->d_inode) { |
434 | cifsInode = CIFS_I(old_file->d_inode); | 438 | cifsInode = CIFS_I(old_file->d_inode); |
435 | if (rc == 0) { | 439 | if (rc == 0) { |
440 | spin_lock(&old_file->d_inode->i_lock); | ||
436 | inc_nlink(old_file->d_inode); | 441 | inc_nlink(old_file->d_inode); |
437 | /* BB should we make this contingent on superblock flag NOATIME? */ | 442 | spin_unlock(&old_file->d_inode->i_lock); |
438 | /* old_file->d_inode->i_ctime = CURRENT_TIME;*/ | 443 | /* |
439 | /* parent dir timestamps will update from srv | 444 | * BB should we make this contingent on superblock flag |
440 | within a second, would it really be worth it | 445 | * NOATIME? |
441 | to set the parent dir cifs inode time to zero | 446 | */ |
442 | to force revalidate (faster) for it too? */ | 447 | /* old_file->d_inode->i_ctime = CURRENT_TIME; */ |
448 | /* | ||
449 | * parent dir timestamps will update from srv within a | ||
450 | * second, would it really be worth it to set the parent | ||
451 | * dir cifs inode time to zero to force revalidate | ||
452 | * (faster) for it too? | ||
453 | */ | ||
443 | } | 454 | } |
444 | /* if not oplocked will force revalidate to get info | 455 | /* |
445 | on source file from srv */ | 456 | * if not oplocked will force revalidate to get info on source |
457 | * file from srv | ||
458 | */ | ||
446 | cifsInode->time = 0; | 459 | cifsInode->time = 0; |
447 | 460 | ||
448 | /* Will update parent dir timestamps from srv within a second. | 461 | /* |
449 | Would it really be worth it to set the parent dir (cifs | 462 | * Will update parent dir timestamps from srv within a second. |
450 | inode) time field to zero to force revalidate on parent | 463 | * Would it really be worth it to set the parent dir (cifs |
451 | directory faster ie | 464 | * inode) time field to zero to force revalidate on parent |
452 | CIFS_I(inode)->time = 0; */ | 465 | * directory faster ie |
466 | * | ||
467 | * CIFS_I(inode)->time = 0; | ||
468 | */ | ||
453 | } | 469 | } |
454 | 470 | ||
455 | cifs_hl_exit: | 471 | cifs_hl_exit: |
456 | kfree(fromName); | 472 | kfree(from_name); |
457 | kfree(toName); | 473 | kfree(to_name); |
458 | free_xid(xid); | 474 | free_xid(xid); |
459 | cifs_put_tlink(tlink); | 475 | cifs_put_tlink(tlink); |
460 | return rc; | 476 | return rc; |
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index ce41fee07e5b..3a00c0d0cead 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c | |||
@@ -466,7 +466,7 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv) | |||
466 | list_for_each(tmp2, &tcon->openFileList) { | 466 | list_for_each(tmp2, &tcon->openFileList) { |
467 | netfile = list_entry(tmp2, struct cifsFileInfo, | 467 | netfile = list_entry(tmp2, struct cifsFileInfo, |
468 | tlist); | 468 | tlist); |
469 | if (pSMB->Fid != netfile->netfid) | 469 | if (pSMB->Fid != netfile->fid.netfid) |
470 | continue; | 470 | continue; |
471 | 471 | ||
472 | cFYI(1, "file id match, oplock break"); | 472 | cFYI(1, "file id match, oplock break"); |
@@ -579,3 +579,33 @@ backup_cred(struct cifs_sb_info *cifs_sb) | |||
579 | 579 | ||
580 | return false; | 580 | return false; |
581 | } | 581 | } |
582 | |||
583 | void | ||
584 | cifs_del_pending_open(struct cifs_pending_open *open) | ||
585 | { | ||
586 | spin_lock(&cifs_file_list_lock); | ||
587 | list_del(&open->olist); | ||
588 | spin_unlock(&cifs_file_list_lock); | ||
589 | } | ||
590 | |||
591 | void | ||
592 | cifs_add_pending_open_locked(struct cifs_fid *fid, struct tcon_link *tlink, | ||
593 | struct cifs_pending_open *open) | ||
594 | { | ||
595 | #ifdef CONFIG_CIFS_SMB2 | ||
596 | memcpy(open->lease_key, fid->lease_key, SMB2_LEASE_KEY_SIZE); | ||
597 | #endif | ||
598 | open->oplock = CIFS_OPLOCK_NO_CHANGE; | ||
599 | open->tlink = tlink; | ||
600 | fid->pending_open = open; | ||
601 | list_add_tail(&open->olist, &tlink_tcon(tlink)->pending_opens); | ||
602 | } | ||
603 | |||
604 | void | ||
605 | cifs_add_pending_open(struct cifs_fid *fid, struct tcon_link *tlink, | ||
606 | struct cifs_pending_open *open) | ||
607 | { | ||
608 | spin_lock(&cifs_file_list_lock); | ||
609 | cifs_add_pending_open_locked(fid, tlink, open); | ||
610 | spin_unlock(&cifs_file_list_lock); | ||
611 | } | ||
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index 581c225f7f50..d5ce9e26696c 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c | |||
@@ -110,7 +110,7 @@ static const struct smb_to_posix_error mapping_table_ERRSRV[] = { | |||
110 | {ERRnoroom, -ENOSPC}, | 110 | {ERRnoroom, -ENOSPC}, |
111 | {ERRrmuns, -EUSERS}, | 111 | {ERRrmuns, -EUSERS}, |
112 | {ERRtimeout, -ETIME}, | 112 | {ERRtimeout, -ETIME}, |
113 | {ERRnoresource, -ENOBUFS}, | 113 | {ERRnoresource, -EREMOTEIO}, |
114 | {ERRtoomanyuids, -EUSERS}, | 114 | {ERRtoomanyuids, -EUSERS}, |
115 | {ERRbaduid, -EACCES}, | 115 | {ERRbaduid, -EACCES}, |
116 | {ERRusempx, -EIO}, | 116 | {ERRusempx, -EIO}, |
@@ -412,7 +412,7 @@ static const struct { | |||
412 | from NT_STATUS_INSUFFICIENT_RESOURCES to | 412 | from NT_STATUS_INSUFFICIENT_RESOURCES to |
413 | NT_STATUS_INSUFF_SERVER_RESOURCES during the session setup } */ | 413 | NT_STATUS_INSUFF_SERVER_RESOURCES during the session setup } */ |
414 | { | 414 | { |
415 | ERRDOS, ERRnomem, NT_STATUS_INSUFFICIENT_RESOURCES}, { | 415 | ERRDOS, ERRnoresource, NT_STATUS_INSUFFICIENT_RESOURCES}, { |
416 | ERRDOS, ERRbadpath, NT_STATUS_DFS_EXIT_PATH_FOUND}, { | 416 | ERRDOS, ERRbadpath, NT_STATUS_DFS_EXIT_PATH_FOUND}, { |
417 | ERRDOS, 23, NT_STATUS_DEVICE_DATA_ERROR}, { | 417 | ERRDOS, 23, NT_STATUS_DEVICE_DATA_ERROR}, { |
418 | ERRHRD, ERRgeneral, NT_STATUS_DEVICE_NOT_CONNECTED}, { | 418 | ERRHRD, ERRgeneral, NT_STATUS_DEVICE_NOT_CONNECTED}, { |
@@ -682,7 +682,7 @@ static const struct { | |||
682 | ERRHRD, ERRgeneral, NT_STATUS_NO_USER_SESSION_KEY}, { | 682 | ERRHRD, ERRgeneral, NT_STATUS_NO_USER_SESSION_KEY}, { |
683 | ERRDOS, 59, NT_STATUS_USER_SESSION_DELETED}, { | 683 | ERRDOS, 59, NT_STATUS_USER_SESSION_DELETED}, { |
684 | ERRHRD, ERRgeneral, NT_STATUS_RESOURCE_LANG_NOT_FOUND}, { | 684 | ERRHRD, ERRgeneral, NT_STATUS_RESOURCE_LANG_NOT_FOUND}, { |
685 | ERRDOS, ERRnomem, NT_STATUS_INSUFF_SERVER_RESOURCES}, { | 685 | ERRDOS, ERRnoresource, NT_STATUS_INSUFF_SERVER_RESOURCES}, { |
686 | ERRHRD, ERRgeneral, NT_STATUS_INVALID_BUFFER_SIZE}, { | 686 | ERRHRD, ERRgeneral, NT_STATUS_INVALID_BUFFER_SIZE}, { |
687 | ERRHRD, ERRgeneral, NT_STATUS_INVALID_ADDRESS_COMPONENT}, { | 687 | ERRHRD, ERRgeneral, NT_STATUS_INVALID_ADDRESS_COMPONENT}, { |
688 | ERRHRD, ERRgeneral, NT_STATUS_INVALID_ADDRESS_WILDCARD}, { | 688 | ERRHRD, ERRgeneral, NT_STATUS_INVALID_ADDRESS_WILDCARD}, { |
@@ -913,8 +913,9 @@ map_smb_to_linux_error(char *buf, bool logErr) | |||
913 | * portion, the number of word parameters and the data portion of the message | 913 | * portion, the number of word parameters and the data portion of the message |
914 | */ | 914 | */ |
915 | unsigned int | 915 | unsigned int |
916 | smbCalcSize(struct smb_hdr *ptr) | 916 | smbCalcSize(void *buf) |
917 | { | 917 | { |
918 | struct smb_hdr *ptr = (struct smb_hdr *)buf; | ||
918 | return (sizeof(struct smb_hdr) + (2 * ptr->WordCount) + | 919 | return (sizeof(struct smb_hdr) + (2 * ptr->WordCount) + |
919 | 2 /* size of the bcc field */ + get_bcc(ptr)); | 920 | 2 /* size of the bcc field */ + get_bcc(ptr)); |
920 | } | 921 | } |
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index d87f82678bc7..f9b5d3d6cf33 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c | |||
@@ -151,7 +151,7 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb) | |||
151 | } | 151 | } |
152 | } | 152 | } |
153 | 153 | ||
154 | static void | 154 | void |
155 | cifs_dir_info_to_fattr(struct cifs_fattr *fattr, FILE_DIRECTORY_INFO *info, | 155 | cifs_dir_info_to_fattr(struct cifs_fattr *fattr, FILE_DIRECTORY_INFO *info, |
156 | struct cifs_sb_info *cifs_sb) | 156 | struct cifs_sb_info *cifs_sb) |
157 | { | 157 | { |
@@ -220,7 +220,8 @@ int get_symlink_reparse_path(char *full_path, struct cifs_sb_info *cifs_sb, | |||
220 | } | 220 | } |
221 | */ | 221 | */ |
222 | 222 | ||
223 | static int initiate_cifs_search(const unsigned int xid, struct file *file) | 223 | static int |
224 | initiate_cifs_search(const unsigned int xid, struct file *file) | ||
224 | { | 225 | { |
225 | __u16 search_flags; | 226 | __u16 search_flags; |
226 | int rc = 0; | 227 | int rc = 0; |
@@ -229,6 +230,7 @@ static int initiate_cifs_search(const unsigned int xid, struct file *file) | |||
229 | struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); | 230 | struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); |
230 | struct tcon_link *tlink = NULL; | 231 | struct tcon_link *tlink = NULL; |
231 | struct cifs_tcon *tcon; | 232 | struct cifs_tcon *tcon; |
233 | struct TCP_Server_Info *server; | ||
232 | 234 | ||
233 | if (file->private_data == NULL) { | 235 | if (file->private_data == NULL) { |
234 | tlink = cifs_sb_tlink(cifs_sb); | 236 | tlink = cifs_sb_tlink(cifs_sb); |
@@ -248,6 +250,13 @@ static int initiate_cifs_search(const unsigned int xid, struct file *file) | |||
248 | tcon = tlink_tcon(cifsFile->tlink); | 250 | tcon = tlink_tcon(cifsFile->tlink); |
249 | } | 251 | } |
250 | 252 | ||
253 | server = tcon->ses->server; | ||
254 | |||
255 | if (!server->ops->query_dir_first) { | ||
256 | rc = -ENOSYS; | ||
257 | goto error_exit; | ||
258 | } | ||
259 | |||
251 | cifsFile->invalidHandle = true; | 260 | cifsFile->invalidHandle = true; |
252 | cifsFile->srch_inf.endOfSearch = false; | 261 | cifsFile->srch_inf.endOfSearch = false; |
253 | 262 | ||
@@ -278,10 +287,10 @@ ffirst_retry: | |||
278 | if (backup_cred(cifs_sb)) | 287 | if (backup_cred(cifs_sb)) |
279 | search_flags |= CIFS_SEARCH_BACKUP_SEARCH; | 288 | search_flags |= CIFS_SEARCH_BACKUP_SEARCH; |
280 | 289 | ||
281 | rc = CIFSFindFirst(xid, tcon, full_path, cifs_sb->local_nls, | 290 | rc = server->ops->query_dir_first(xid, tcon, full_path, cifs_sb, |
282 | &cifsFile->netfid, search_flags, &cifsFile->srch_inf, | 291 | &cifsFile->fid, search_flags, |
283 | cifs_sb->mnt_cifs_flags & | 292 | &cifsFile->srch_inf); |
284 | CIFS_MOUNT_MAP_SPECIAL_CHR, CIFS_DIR_SEP(cifs_sb)); | 293 | |
285 | if (rc == 0) | 294 | if (rc == 0) |
286 | cifsFile->invalidHandle = false; | 295 | cifsFile->invalidHandle = false; |
287 | /* BB add following call to handle readdir on new NTFS symlink errors | 296 | /* BB add following call to handle readdir on new NTFS symlink errors |
@@ -501,62 +510,67 @@ static int cifs_save_resume_key(const char *current_entry, | |||
501 | return rc; | 510 | return rc; |
502 | } | 511 | } |
503 | 512 | ||
504 | /* find the corresponding entry in the search */ | 513 | /* |
505 | /* Note that the SMB server returns search entries for . and .. which | 514 | * Find the corresponding entry in the search. Note that the SMB server returns |
506 | complicates logic here if we choose to parse for them and we do not | 515 | * search entries for . and .. which complicates logic here if we choose to |
507 | assume that they are located in the findfirst return buffer.*/ | 516 | * parse for them and we do not assume that they are located in the findfirst |
508 | /* We start counting in the buffer with entry 2 and increment for every | 517 | * return buffer. We start counting in the buffer with entry 2 and increment for |
509 | entry (do not increment for . or .. entry) */ | 518 | * every entry (do not increment for . or .. entry). |
510 | static int find_cifs_entry(const unsigned int xid, struct cifs_tcon *pTcon, | 519 | */ |
511 | struct file *file, char **ppCurrentEntry, int *num_to_ret) | 520 | static int |
521 | find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, | ||
522 | struct file *file, char **current_entry, int *num_to_ret) | ||
512 | { | 523 | { |
513 | __u16 search_flags; | 524 | __u16 search_flags; |
514 | int rc = 0; | 525 | int rc = 0; |
515 | int pos_in_buf = 0; | 526 | int pos_in_buf = 0; |
516 | loff_t first_entry_in_buffer; | 527 | loff_t first_entry_in_buffer; |
517 | loff_t index_to_find = file->f_pos; | 528 | loff_t index_to_find = file->f_pos; |
518 | struct cifsFileInfo *cifsFile = file->private_data; | 529 | struct cifsFileInfo *cfile = file->private_data; |
519 | struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); | 530 | struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); |
531 | struct TCP_Server_Info *server = tcon->ses->server; | ||
520 | /* check if index in the buffer */ | 532 | /* check if index in the buffer */ |
521 | 533 | ||
522 | if ((cifsFile == NULL) || (ppCurrentEntry == NULL) || | 534 | if (!server->ops->query_dir_first || !server->ops->query_dir_next) |
523 | (num_to_ret == NULL)) | 535 | return -ENOSYS; |
536 | |||
537 | if ((cfile == NULL) || (current_entry == NULL) || (num_to_ret == NULL)) | ||
524 | return -ENOENT; | 538 | return -ENOENT; |
525 | 539 | ||
526 | *ppCurrentEntry = NULL; | 540 | *current_entry = NULL; |
527 | first_entry_in_buffer = | 541 | first_entry_in_buffer = cfile->srch_inf.index_of_last_entry - |
528 | cifsFile->srch_inf.index_of_last_entry - | 542 | cfile->srch_inf.entries_in_buffer; |
529 | cifsFile->srch_inf.entries_in_buffer; | ||
530 | 543 | ||
531 | /* if first entry in buf is zero then is first buffer | 544 | /* |
532 | in search response data which means it is likely . and .. | 545 | * If first entry in buf is zero then is first buffer |
533 | will be in this buffer, although some servers do not return | 546 | * in search response data which means it is likely . and .. |
534 | . and .. for the root of a drive and for those we need | 547 | * will be in this buffer, although some servers do not return |
535 | to start two entries earlier */ | 548 | * . and .. for the root of a drive and for those we need |
549 | * to start two entries earlier. | ||
550 | */ | ||
536 | 551 | ||
537 | dump_cifs_file_struct(file, "In fce "); | 552 | dump_cifs_file_struct(file, "In fce "); |
538 | if (((index_to_find < cifsFile->srch_inf.index_of_last_entry) && | 553 | if (((index_to_find < cfile->srch_inf.index_of_last_entry) && |
539 | is_dir_changed(file)) || | 554 | is_dir_changed(file)) || (index_to_find < first_entry_in_buffer)) { |
540 | (index_to_find < first_entry_in_buffer)) { | ||
541 | /* close and restart search */ | 555 | /* close and restart search */ |
542 | cFYI(1, "search backing up - close and restart search"); | 556 | cFYI(1, "search backing up - close and restart search"); |
543 | spin_lock(&cifs_file_list_lock); | 557 | spin_lock(&cifs_file_list_lock); |
544 | if (!cifsFile->srch_inf.endOfSearch && | 558 | if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) { |
545 | !cifsFile->invalidHandle) { | 559 | cfile->invalidHandle = true; |
546 | cifsFile->invalidHandle = true; | ||
547 | spin_unlock(&cifs_file_list_lock); | 560 | spin_unlock(&cifs_file_list_lock); |
548 | CIFSFindClose(xid, pTcon, cifsFile->netfid); | 561 | if (server->ops->close) |
562 | server->ops->close(xid, tcon, &cfile->fid); | ||
549 | } else | 563 | } else |
550 | spin_unlock(&cifs_file_list_lock); | 564 | spin_unlock(&cifs_file_list_lock); |
551 | if (cifsFile->srch_inf.ntwrk_buf_start) { | 565 | if (cfile->srch_inf.ntwrk_buf_start) { |
552 | cFYI(1, "freeing SMB ff cache buf on search rewind"); | 566 | cFYI(1, "freeing SMB ff cache buf on search rewind"); |
553 | if (cifsFile->srch_inf.smallBuf) | 567 | if (cfile->srch_inf.smallBuf) |
554 | cifs_small_buf_release(cifsFile->srch_inf. | 568 | cifs_small_buf_release(cfile->srch_inf. |
555 | ntwrk_buf_start); | 569 | ntwrk_buf_start); |
556 | else | 570 | else |
557 | cifs_buf_release(cifsFile->srch_inf. | 571 | cifs_buf_release(cfile->srch_inf. |
558 | ntwrk_buf_start); | 572 | ntwrk_buf_start); |
559 | cifsFile->srch_inf.ntwrk_buf_start = NULL; | 573 | cfile->srch_inf.ntwrk_buf_start = NULL; |
560 | } | 574 | } |
561 | rc = initiate_cifs_search(xid, file); | 575 | rc = initiate_cifs_search(xid, file); |
562 | if (rc) { | 576 | if (rc) { |
@@ -565,65 +579,64 @@ static int find_cifs_entry(const unsigned int xid, struct cifs_tcon *pTcon, | |||
565 | return rc; | 579 | return rc; |
566 | } | 580 | } |
567 | /* FindFirst/Next set last_entry to NULL on malformed reply */ | 581 | /* FindFirst/Next set last_entry to NULL on malformed reply */ |
568 | if (cifsFile->srch_inf.last_entry) | 582 | if (cfile->srch_inf.last_entry) |
569 | cifs_save_resume_key(cifsFile->srch_inf.last_entry, | 583 | cifs_save_resume_key(cfile->srch_inf.last_entry, cfile); |
570 | cifsFile); | ||
571 | } | 584 | } |
572 | 585 | ||
573 | search_flags = CIFS_SEARCH_CLOSE_AT_END | CIFS_SEARCH_RETURN_RESUME; | 586 | search_flags = CIFS_SEARCH_CLOSE_AT_END | CIFS_SEARCH_RETURN_RESUME; |
574 | if (backup_cred(cifs_sb)) | 587 | if (backup_cred(cifs_sb)) |
575 | search_flags |= CIFS_SEARCH_BACKUP_SEARCH; | 588 | search_flags |= CIFS_SEARCH_BACKUP_SEARCH; |
576 | 589 | ||
577 | while ((index_to_find >= cifsFile->srch_inf.index_of_last_entry) && | 590 | while ((index_to_find >= cfile->srch_inf.index_of_last_entry) && |
578 | (rc == 0) && !cifsFile->srch_inf.endOfSearch) { | 591 | (rc == 0) && !cfile->srch_inf.endOfSearch) { |
579 | cFYI(1, "calling findnext2"); | 592 | cFYI(1, "calling findnext2"); |
580 | rc = CIFSFindNext(xid, pTcon, cifsFile->netfid, search_flags, | 593 | rc = server->ops->query_dir_next(xid, tcon, &cfile->fid, |
581 | &cifsFile->srch_inf); | 594 | search_flags, |
595 | &cfile->srch_inf); | ||
582 | /* FindFirst/Next set last_entry to NULL on malformed reply */ | 596 | /* FindFirst/Next set last_entry to NULL on malformed reply */ |
583 | if (cifsFile->srch_inf.last_entry) | 597 | if (cfile->srch_inf.last_entry) |
584 | cifs_save_resume_key(cifsFile->srch_inf.last_entry, | 598 | cifs_save_resume_key(cfile->srch_inf.last_entry, cfile); |
585 | cifsFile); | ||
586 | if (rc) | 599 | if (rc) |
587 | return -ENOENT; | 600 | return -ENOENT; |
588 | } | 601 | } |
589 | if (index_to_find < cifsFile->srch_inf.index_of_last_entry) { | 602 | if (index_to_find < cfile->srch_inf.index_of_last_entry) { |
590 | /* we found the buffer that contains the entry */ | 603 | /* we found the buffer that contains the entry */ |
591 | /* scan and find it */ | 604 | /* scan and find it */ |
592 | int i; | 605 | int i; |
593 | char *current_entry; | 606 | char *cur_ent; |
594 | char *end_of_smb = cifsFile->srch_inf.ntwrk_buf_start + | 607 | char *end_of_smb = cfile->srch_inf.ntwrk_buf_start + |
595 | smbCalcSize((struct smb_hdr *) | 608 | server->ops->calc_smb_size( |
596 | cifsFile->srch_inf.ntwrk_buf_start); | 609 | cfile->srch_inf.ntwrk_buf_start); |
597 | 610 | ||
598 | current_entry = cifsFile->srch_inf.srch_entries_start; | 611 | cur_ent = cfile->srch_inf.srch_entries_start; |
599 | first_entry_in_buffer = cifsFile->srch_inf.index_of_last_entry | 612 | first_entry_in_buffer = cfile->srch_inf.index_of_last_entry |
600 | - cifsFile->srch_inf.entries_in_buffer; | 613 | - cfile->srch_inf.entries_in_buffer; |
601 | pos_in_buf = index_to_find - first_entry_in_buffer; | 614 | pos_in_buf = index_to_find - first_entry_in_buffer; |
602 | cFYI(1, "found entry - pos_in_buf %d", pos_in_buf); | 615 | cFYI(1, "found entry - pos_in_buf %d", pos_in_buf); |
603 | 616 | ||
604 | for (i = 0; (i < (pos_in_buf)) && (current_entry != NULL); i++) { | 617 | for (i = 0; (i < (pos_in_buf)) && (cur_ent != NULL); i++) { |
605 | /* go entry by entry figuring out which is first */ | 618 | /* go entry by entry figuring out which is first */ |
606 | current_entry = nxt_dir_entry(current_entry, end_of_smb, | 619 | cur_ent = nxt_dir_entry(cur_ent, end_of_smb, |
607 | cifsFile->srch_inf.info_level); | 620 | cfile->srch_inf.info_level); |
608 | } | 621 | } |
609 | if ((current_entry == NULL) && (i < pos_in_buf)) { | 622 | if ((cur_ent == NULL) && (i < pos_in_buf)) { |
610 | /* BB fixme - check if we should flag this error */ | 623 | /* BB fixme - check if we should flag this error */ |
611 | cERROR(1, "reached end of buf searching for pos in buf" | 624 | cERROR(1, "reached end of buf searching for pos in buf" |
612 | " %d index to find %lld rc %d", | 625 | " %d index to find %lld rc %d", pos_in_buf, |
613 | pos_in_buf, index_to_find, rc); | 626 | index_to_find, rc); |
614 | } | 627 | } |
615 | rc = 0; | 628 | rc = 0; |
616 | *ppCurrentEntry = current_entry; | 629 | *current_entry = cur_ent; |
617 | } else { | 630 | } else { |
618 | cFYI(1, "index not in buffer - could not findnext into it"); | 631 | cFYI(1, "index not in buffer - could not findnext into it"); |
619 | return 0; | 632 | return 0; |
620 | } | 633 | } |
621 | 634 | ||
622 | if (pos_in_buf >= cifsFile->srch_inf.entries_in_buffer) { | 635 | if (pos_in_buf >= cfile->srch_inf.entries_in_buffer) { |
623 | cFYI(1, "can not return entries pos_in_buf beyond last"); | 636 | cFYI(1, "can not return entries pos_in_buf beyond last"); |
624 | *num_to_ret = 0; | 637 | *num_to_ret = 0; |
625 | } else | 638 | } else |
626 | *num_to_ret = cifsFile->srch_inf.entries_in_buffer - pos_in_buf; | 639 | *num_to_ret = cfile->srch_inf.entries_in_buffer - pos_in_buf; |
627 | 640 | ||
628 | return rc; | 641 | return rc; |
629 | } | 642 | } |
@@ -723,7 +736,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) | |||
723 | int rc = 0; | 736 | int rc = 0; |
724 | unsigned int xid; | 737 | unsigned int xid; |
725 | int i; | 738 | int i; |
726 | struct cifs_tcon *pTcon; | 739 | struct cifs_tcon *tcon; |
727 | struct cifsFileInfo *cifsFile = NULL; | 740 | struct cifsFileInfo *cifsFile = NULL; |
728 | char *current_entry; | 741 | char *current_entry; |
729 | int num_to_fill = 0; | 742 | int num_to_fill = 0; |
@@ -781,12 +794,12 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) | |||
781 | } | 794 | } |
782 | } /* else { | 795 | } /* else { |
783 | cifsFile->invalidHandle = true; | 796 | cifsFile->invalidHandle = true; |
784 | CIFSFindClose(xid, pTcon, cifsFile->netfid); | 797 | tcon->ses->server->close(xid, tcon, &cifsFile->fid); |
785 | } */ | 798 | } */ |
786 | 799 | ||
787 | pTcon = tlink_tcon(cifsFile->tlink); | 800 | tcon = tlink_tcon(cifsFile->tlink); |
788 | rc = find_cifs_entry(xid, pTcon, file, | 801 | rc = find_cifs_entry(xid, tcon, file, ¤t_entry, |
789 | ¤t_entry, &num_to_fill); | 802 | &num_to_fill); |
790 | if (rc) { | 803 | if (rc) { |
791 | cFYI(1, "fce error %d", rc); | 804 | cFYI(1, "fce error %d", rc); |
792 | goto rddir2_exit; | 805 | goto rddir2_exit; |
@@ -798,7 +811,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) | |||
798 | } | 811 | } |
799 | cFYI(1, "loop through %d times filling dir for net buf %p", | 812 | cFYI(1, "loop through %d times filling dir for net buf %p", |
800 | num_to_fill, cifsFile->srch_inf.ntwrk_buf_start); | 813 | num_to_fill, cifsFile->srch_inf.ntwrk_buf_start); |
801 | max_len = smbCalcSize((struct smb_hdr *) | 814 | max_len = tcon->ses->server->ops->calc_smb_size( |
802 | cifsFile->srch_inf.ntwrk_buf_start); | 815 | cifsFile->srch_inf.ntwrk_buf_start); |
803 | end_of_smb = cifsFile->srch_inf.ntwrk_buf_start + max_len; | 816 | end_of_smb = cifsFile->srch_inf.ntwrk_buf_start + max_len; |
804 | 817 | ||
@@ -815,10 +828,12 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) | |||
815 | num_to_fill, i); | 828 | num_to_fill, i); |
816 | break; | 829 | break; |
817 | } | 830 | } |
818 | /* if buggy server returns . and .. late do | 831 | /* |
819 | we want to check for that here? */ | 832 | * if buggy server returns . and .. late do we want to |
820 | rc = cifs_filldir(current_entry, file, | 833 | * check for that here? |
821 | filldir, direntry, tmp_buf, max_len); | 834 | */ |
835 | rc = cifs_filldir(current_entry, file, filldir, | ||
836 | direntry, tmp_buf, max_len); | ||
822 | if (rc == -EOVERFLOW) { | 837 | if (rc == -EOVERFLOW) { |
823 | rc = 0; | 838 | rc = 0; |
824 | break; | 839 | break; |
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 382c06d01b38..76809f4d3428 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c | |||
@@ -876,7 +876,8 @@ ssetup_ntlmssp_authenticate: | |||
876 | pSMB = (SESSION_SETUP_ANDX *)iov[0].iov_base; | 876 | pSMB = (SESSION_SETUP_ANDX *)iov[0].iov_base; |
877 | smb_buf = (struct smb_hdr *)iov[0].iov_base; | 877 | smb_buf = (struct smb_hdr *)iov[0].iov_base; |
878 | 878 | ||
879 | if ((type == RawNTLMSSP) && (smb_buf->Status.CifsError == | 879 | if ((type == RawNTLMSSP) && (resp_buf_type != CIFS_NO_BUFFER) && |
880 | (smb_buf->Status.CifsError == | ||
880 | cpu_to_le32(NT_STATUS_MORE_PROCESSING_REQUIRED))) { | 881 | cpu_to_le32(NT_STATUS_MORE_PROCESSING_REQUIRED))) { |
881 | if (phase != NtLmNegotiate) { | 882 | if (phase != NtLmNegotiate) { |
882 | cERROR(1, "Unexpected more processing error"); | 883 | cERROR(1, "Unexpected more processing error"); |
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 3129ac74b819..56cc4be87807 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c | |||
@@ -17,6 +17,8 @@ | |||
17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
18 | */ | 18 | */ |
19 | 19 | ||
20 | #include <linux/pagemap.h> | ||
21 | #include <linux/vfs.h> | ||
20 | #include "cifsglob.h" | 22 | #include "cifsglob.h" |
21 | #include "cifsproto.h" | 23 | #include "cifsproto.h" |
22 | #include "cifs_debug.h" | 24 | #include "cifs_debug.h" |
@@ -63,7 +65,7 @@ send_nt_cancel(struct TCP_Server_Info *server, void *buf, | |||
63 | static bool | 65 | static bool |
64 | cifs_compare_fids(struct cifsFileInfo *ob1, struct cifsFileInfo *ob2) | 66 | cifs_compare_fids(struct cifsFileInfo *ob1, struct cifsFileInfo *ob2) |
65 | { | 67 | { |
66 | return ob1->netfid == ob2->netfid; | 68 | return ob1->fid.netfid == ob2->fid.netfid; |
67 | } | 69 | } |
68 | 70 | ||
69 | static unsigned int | 71 | static unsigned int |
@@ -410,6 +412,83 @@ cifs_negotiate(const unsigned int xid, struct cifs_ses *ses) | |||
410 | return rc; | 412 | return rc; |
411 | } | 413 | } |
412 | 414 | ||
415 | static unsigned int | ||
416 | cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info) | ||
417 | { | ||
418 | __u64 unix_cap = le64_to_cpu(tcon->fsUnixInfo.Capability); | ||
419 | struct TCP_Server_Info *server = tcon->ses->server; | ||
420 | unsigned int wsize; | ||
421 | |||
422 | /* start with specified wsize, or default */ | ||
423 | if (volume_info->wsize) | ||
424 | wsize = volume_info->wsize; | ||
425 | else if (tcon->unix_ext && (unix_cap & CIFS_UNIX_LARGE_WRITE_CAP)) | ||
426 | wsize = CIFS_DEFAULT_IOSIZE; | ||
427 | else | ||
428 | wsize = CIFS_DEFAULT_NON_POSIX_WSIZE; | ||
429 | |||
430 | /* can server support 24-bit write sizes? (via UNIX extensions) */ | ||
431 | if (!tcon->unix_ext || !(unix_cap & CIFS_UNIX_LARGE_WRITE_CAP)) | ||
432 | wsize = min_t(unsigned int, wsize, CIFS_MAX_RFC1002_WSIZE); | ||
433 | |||
434 | /* | ||
435 | * no CAP_LARGE_WRITE_X or is signing enabled without CAP_UNIX set? | ||
436 | * Limit it to max buffer offered by the server, minus the size of the | ||
437 | * WRITEX header, not including the 4 byte RFC1001 length. | ||
438 | */ | ||
439 | if (!(server->capabilities & CAP_LARGE_WRITE_X) || | ||
440 | (!(server->capabilities & CAP_UNIX) && | ||
441 | (server->sec_mode & (SECMODE_SIGN_ENABLED|SECMODE_SIGN_REQUIRED)))) | ||
442 | wsize = min_t(unsigned int, wsize, | ||
443 | server->maxBuf - sizeof(WRITE_REQ) + 4); | ||
444 | |||
445 | /* hard limit of CIFS_MAX_WSIZE */ | ||
446 | wsize = min_t(unsigned int, wsize, CIFS_MAX_WSIZE); | ||
447 | |||
448 | return wsize; | ||
449 | } | ||
450 | |||
451 | static unsigned int | ||
452 | cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info) | ||
453 | { | ||
454 | __u64 unix_cap = le64_to_cpu(tcon->fsUnixInfo.Capability); | ||
455 | struct TCP_Server_Info *server = tcon->ses->server; | ||
456 | unsigned int rsize, defsize; | ||
457 | |||
458 | /* | ||
459 | * Set default value... | ||
460 | * | ||
461 | * HACK alert! Ancient servers have very small buffers. Even though | ||
462 | * MS-CIFS indicates that servers are only limited by the client's | ||
463 | * bufsize for reads, testing against win98se shows that it throws | ||
464 | * INVALID_PARAMETER errors if you try to request too large a read. | ||
465 | * OS/2 just sends back short reads. | ||
466 | * | ||
467 | * If the server doesn't advertise CAP_LARGE_READ_X, then assume that | ||
468 | * it can't handle a read request larger than its MaxBufferSize either. | ||
469 | */ | ||
470 | if (tcon->unix_ext && (unix_cap & CIFS_UNIX_LARGE_READ_CAP)) | ||
471 | defsize = CIFS_DEFAULT_IOSIZE; | ||
472 | else if (server->capabilities & CAP_LARGE_READ_X) | ||
473 | defsize = CIFS_DEFAULT_NON_POSIX_RSIZE; | ||
474 | else | ||
475 | defsize = server->maxBuf - sizeof(READ_RSP); | ||
476 | |||
477 | rsize = volume_info->rsize ? volume_info->rsize : defsize; | ||
478 | |||
479 | /* | ||
480 | * no CAP_LARGE_READ_X? Then MS-CIFS states that we must limit this to | ||
481 | * the client's MaxBufferSize. | ||
482 | */ | ||
483 | if (!(server->capabilities & CAP_LARGE_READ_X)) | ||
484 | rsize = min_t(unsigned int, CIFSMaxBufSize, rsize); | ||
485 | |||
486 | /* hard limit of CIFS_MAX_RSIZE */ | ||
487 | rsize = min_t(unsigned int, rsize, CIFS_MAX_RSIZE); | ||
488 | |||
489 | return rsize; | ||
490 | } | ||
491 | |||
413 | static void | 492 | static void |
414 | cifs_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon) | 493 | cifs_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon) |
415 | { | 494 | { |
@@ -489,6 +568,13 @@ cifs_get_srv_inum(const unsigned int xid, struct cifs_tcon *tcon, | |||
489 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 568 | CIFS_MOUNT_MAP_SPECIAL_CHR); |
490 | } | 569 | } |
491 | 570 | ||
571 | static int | ||
572 | cifs_query_file_info(const unsigned int xid, struct cifs_tcon *tcon, | ||
573 | struct cifs_fid *fid, FILE_ALL_INFO *data) | ||
574 | { | ||
575 | return CIFSSMBQFileInfo(xid, tcon, fid->netfid, data); | ||
576 | } | ||
577 | |||
492 | static char * | 578 | static char * |
493 | cifs_build_path_to_root(struct smb_vol *vol, struct cifs_sb_info *cifs_sb, | 579 | cifs_build_path_to_root(struct smb_vol *vol, struct cifs_sb_info *cifs_sb, |
494 | struct cifs_tcon *tcon) | 580 | struct cifs_tcon *tcon) |
@@ -607,6 +693,219 @@ cifs_mkdir_setinfo(struct inode *inode, const char *full_path, | |||
607 | cifsInode->cifsAttrs = dosattrs; | 693 | cifsInode->cifsAttrs = dosattrs; |
608 | } | 694 | } |
609 | 695 | ||
696 | static int | ||
697 | cifs_open_file(const unsigned int xid, struct cifs_tcon *tcon, const char *path, | ||
698 | int disposition, int desired_access, int create_options, | ||
699 | struct cifs_fid *fid, __u32 *oplock, FILE_ALL_INFO *buf, | ||
700 | struct cifs_sb_info *cifs_sb) | ||
701 | { | ||
702 | if (!(tcon->ses->capabilities & CAP_NT_SMBS)) | ||
703 | return SMBLegacyOpen(xid, tcon, path, disposition, | ||
704 | desired_access, create_options, | ||
705 | &fid->netfid, oplock, buf, | ||
706 | cifs_sb->local_nls, cifs_sb->mnt_cifs_flags | ||
707 | & CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
708 | return CIFSSMBOpen(xid, tcon, path, disposition, desired_access, | ||
709 | create_options, &fid->netfid, oplock, buf, | ||
710 | cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & | ||
711 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
712 | } | ||
713 | |||
714 | static void | ||
715 | cifs_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock) | ||
716 | { | ||
717 | struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); | ||
718 | cfile->fid.netfid = fid->netfid; | ||
719 | cifs_set_oplock_level(cinode, oplock); | ||
720 | cinode->can_cache_brlcks = cinode->clientCanCacheAll; | ||
721 | } | ||
722 | |||
723 | static void | ||
724 | cifs_close_file(const unsigned int xid, struct cifs_tcon *tcon, | ||
725 | struct cifs_fid *fid) | ||
726 | { | ||
727 | CIFSSMBClose(xid, tcon, fid->netfid); | ||
728 | } | ||
729 | |||
730 | static int | ||
731 | cifs_flush_file(const unsigned int xid, struct cifs_tcon *tcon, | ||
732 | struct cifs_fid *fid) | ||
733 | { | ||
734 | return CIFSSMBFlush(xid, tcon, fid->netfid); | ||
735 | } | ||
736 | |||
737 | static int | ||
738 | cifs_sync_read(const unsigned int xid, struct cifsFileInfo *cfile, | ||
739 | struct cifs_io_parms *parms, unsigned int *bytes_read, | ||
740 | char **buf, int *buf_type) | ||
741 | { | ||
742 | parms->netfid = cfile->fid.netfid; | ||
743 | return CIFSSMBRead(xid, parms, bytes_read, buf, buf_type); | ||
744 | } | ||
745 | |||
746 | static int | ||
747 | cifs_sync_write(const unsigned int xid, struct cifsFileInfo *cfile, | ||
748 | struct cifs_io_parms *parms, unsigned int *written, | ||
749 | struct kvec *iov, unsigned long nr_segs) | ||
750 | { | ||
751 | |||
752 | parms->netfid = cfile->fid.netfid; | ||
753 | return CIFSSMBWrite2(xid, parms, written, iov, nr_segs); | ||
754 | } | ||
755 | |||
756 | static int | ||
757 | smb_set_file_info(struct inode *inode, const char *full_path, | ||
758 | FILE_BASIC_INFO *buf, const unsigned int xid) | ||
759 | { | ||
760 | int oplock = 0; | ||
761 | int rc; | ||
762 | __u16 netfid; | ||
763 | __u32 netpid; | ||
764 | struct cifsFileInfo *open_file; | ||
765 | struct cifsInodeInfo *cinode = CIFS_I(inode); | ||
766 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); | ||
767 | struct tcon_link *tlink = NULL; | ||
768 | struct cifs_tcon *tcon; | ||
769 | FILE_BASIC_INFO info_buf; | ||
770 | |||
771 | /* if the file is already open for write, just use that fileid */ | ||
772 | open_file = find_writable_file(cinode, true); | ||
773 | if (open_file) { | ||
774 | netfid = open_file->fid.netfid; | ||
775 | netpid = open_file->pid; | ||
776 | tcon = tlink_tcon(open_file->tlink); | ||
777 | goto set_via_filehandle; | ||
778 | } | ||
779 | |||
780 | tlink = cifs_sb_tlink(cifs_sb); | ||
781 | if (IS_ERR(tlink)) { | ||
782 | rc = PTR_ERR(tlink); | ||
783 | tlink = NULL; | ||
784 | goto out; | ||
785 | } | ||
786 | tcon = tlink_tcon(tlink); | ||
787 | |||
788 | /* | ||
789 | * NT4 apparently returns success on this call, but it doesn't really | ||
790 | * work. | ||
791 | */ | ||
792 | if (!(tcon->ses->flags & CIFS_SES_NT4)) { | ||
793 | rc = CIFSSMBSetPathInfo(xid, tcon, full_path, buf, | ||
794 | cifs_sb->local_nls, | ||
795 | cifs_sb->mnt_cifs_flags & | ||
796 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
797 | if (rc == 0) { | ||
798 | cinode->cifsAttrs = le32_to_cpu(buf->Attributes); | ||
799 | goto out; | ||
800 | } else if (rc != -EOPNOTSUPP && rc != -EINVAL) | ||
801 | goto out; | ||
802 | } | ||
803 | |||
804 | cFYI(1, "calling SetFileInfo since SetPathInfo for times not supported " | ||
805 | "by this server"); | ||
806 | rc = CIFSSMBOpen(xid, tcon, full_path, FILE_OPEN, | ||
807 | SYNCHRONIZE | FILE_WRITE_ATTRIBUTES, CREATE_NOT_DIR, | ||
808 | &netfid, &oplock, NULL, cifs_sb->local_nls, | ||
809 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
810 | |||
811 | if (rc != 0) { | ||
812 | if (rc == -EIO) | ||
813 | rc = -EINVAL; | ||
814 | goto out; | ||
815 | } | ||
816 | |||
817 | netpid = current->tgid; | ||
818 | |||
819 | set_via_filehandle: | ||
820 | rc = CIFSSMBSetFileInfo(xid, tcon, &info_buf, netfid, netpid); | ||
821 | if (!rc) | ||
822 | cinode->cifsAttrs = le32_to_cpu(buf->Attributes); | ||
823 | |||
824 | if (open_file == NULL) | ||
825 | CIFSSMBClose(xid, tcon, netfid); | ||
826 | else | ||
827 | cifsFileInfo_put(open_file); | ||
828 | out: | ||
829 | if (tlink != NULL) | ||
830 | cifs_put_tlink(tlink); | ||
831 | return rc; | ||
832 | } | ||
833 | |||
834 | static int | ||
835 | cifs_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon, | ||
836 | const char *path, struct cifs_sb_info *cifs_sb, | ||
837 | struct cifs_fid *fid, __u16 search_flags, | ||
838 | struct cifs_search_info *srch_inf) | ||
839 | { | ||
840 | return CIFSFindFirst(xid, tcon, path, cifs_sb, | ||
841 | &fid->netfid, search_flags, srch_inf, true); | ||
842 | } | ||
843 | |||
844 | static int | ||
845 | cifs_query_dir_next(const unsigned int xid, struct cifs_tcon *tcon, | ||
846 | struct cifs_fid *fid, __u16 search_flags, | ||
847 | struct cifs_search_info *srch_inf) | ||
848 | { | ||
849 | return CIFSFindNext(xid, tcon, fid->netfid, search_flags, srch_inf); | ||
850 | } | ||
851 | |||
852 | static int | ||
853 | cifs_close_dir(const unsigned int xid, struct cifs_tcon *tcon, | ||
854 | struct cifs_fid *fid) | ||
855 | { | ||
856 | return CIFSFindClose(xid, tcon, fid->netfid); | ||
857 | } | ||
858 | |||
859 | static int | ||
860 | cifs_oplock_response(struct cifs_tcon *tcon, struct cifs_fid *fid, | ||
861 | struct cifsInodeInfo *cinode) | ||
862 | { | ||
863 | return CIFSSMBLock(0, tcon, fid->netfid, current->tgid, 0, 0, 0, 0, | ||
864 | LOCKING_ANDX_OPLOCK_RELEASE, false, | ||
865 | cinode->clientCanCacheRead ? 1 : 0); | ||
866 | } | ||
867 | |||
868 | static int | ||
869 | cifs_queryfs(const unsigned int xid, struct cifs_tcon *tcon, | ||
870 | struct kstatfs *buf) | ||
871 | { | ||
872 | int rc = -EOPNOTSUPP; | ||
873 | |||
874 | buf->f_type = CIFS_MAGIC_NUMBER; | ||
875 | |||
876 | /* | ||
877 | * We could add a second check for a QFS Unix capability bit | ||
878 | */ | ||
879 | if ((tcon->ses->capabilities & CAP_UNIX) && | ||
880 | (CIFS_POSIX_EXTENSIONS & le64_to_cpu(tcon->fsUnixInfo.Capability))) | ||
881 | rc = CIFSSMBQFSPosixInfo(xid, tcon, buf); | ||
882 | |||
883 | /* | ||
884 | * Only need to call the old QFSInfo if failed on newer one, | ||
885 | * e.g. by OS/2. | ||
886 | **/ | ||
887 | if (rc && (tcon->ses->capabilities & CAP_NT_SMBS)) | ||
888 | rc = CIFSSMBQFSInfo(xid, tcon, buf); | ||
889 | |||
890 | /* | ||
891 | * Some old Windows servers also do not support level 103, retry with | ||
892 | * older level one if old server failed the previous call or we | ||
893 | * bypassed it because we detected that this was an older LANMAN sess | ||
894 | */ | ||
895 | if (rc) | ||
896 | rc = SMBOldQFSInfo(xid, tcon, buf); | ||
897 | return rc; | ||
898 | } | ||
899 | |||
900 | static int | ||
901 | cifs_mand_lock(const unsigned int xid, struct cifsFileInfo *cfile, __u64 offset, | ||
902 | __u64 length, __u32 type, int lock, int unlock, bool wait) | ||
903 | { | ||
904 | return CIFSSMBLock(xid, tlink_tcon(cfile->tlink), cfile->fid.netfid, | ||
905 | current->tgid, length, offset, unlock, lock, | ||
906 | (__u8)type, wait, 0); | ||
907 | } | ||
908 | |||
610 | struct smb_version_operations smb1_operations = { | 909 | struct smb_version_operations smb1_operations = { |
611 | .send_cancel = send_nt_cancel, | 910 | .send_cancel = send_nt_cancel, |
612 | .compare_fids = cifs_compare_fids, | 911 | .compare_fids = cifs_compare_fids, |
@@ -630,6 +929,8 @@ struct smb_version_operations smb1_operations = { | |||
630 | .check_trans2 = cifs_check_trans2, | 929 | .check_trans2 = cifs_check_trans2, |
631 | .need_neg = cifs_need_neg, | 930 | .need_neg = cifs_need_neg, |
632 | .negotiate = cifs_negotiate, | 931 | .negotiate = cifs_negotiate, |
932 | .negotiate_wsize = cifs_negotiate_wsize, | ||
933 | .negotiate_rsize = cifs_negotiate_rsize, | ||
633 | .sess_setup = CIFS_SessSetup, | 934 | .sess_setup = CIFS_SessSetup, |
634 | .logoff = CIFSSMBLogoff, | 935 | .logoff = CIFSSMBLogoff, |
635 | .tree_connect = CIFSTCon, | 936 | .tree_connect = CIFSTCon, |
@@ -638,12 +939,37 @@ struct smb_version_operations smb1_operations = { | |||
638 | .qfs_tcon = cifs_qfs_tcon, | 939 | .qfs_tcon = cifs_qfs_tcon, |
639 | .is_path_accessible = cifs_is_path_accessible, | 940 | .is_path_accessible = cifs_is_path_accessible, |
640 | .query_path_info = cifs_query_path_info, | 941 | .query_path_info = cifs_query_path_info, |
942 | .query_file_info = cifs_query_file_info, | ||
641 | .get_srv_inum = cifs_get_srv_inum, | 943 | .get_srv_inum = cifs_get_srv_inum, |
944 | .set_path_size = CIFSSMBSetEOF, | ||
945 | .set_file_size = CIFSSMBSetFileSize, | ||
946 | .set_file_info = smb_set_file_info, | ||
642 | .build_path_to_root = cifs_build_path_to_root, | 947 | .build_path_to_root = cifs_build_path_to_root, |
643 | .echo = CIFSSMBEcho, | 948 | .echo = CIFSSMBEcho, |
644 | .mkdir = CIFSSMBMkDir, | 949 | .mkdir = CIFSSMBMkDir, |
645 | .mkdir_setinfo = cifs_mkdir_setinfo, | 950 | .mkdir_setinfo = cifs_mkdir_setinfo, |
646 | .rmdir = CIFSSMBRmDir, | 951 | .rmdir = CIFSSMBRmDir, |
952 | .unlink = CIFSSMBDelFile, | ||
953 | .rename_pending_delete = cifs_rename_pending_delete, | ||
954 | .rename = CIFSSMBRename, | ||
955 | .create_hardlink = CIFSCreateHardLink, | ||
956 | .open = cifs_open_file, | ||
957 | .set_fid = cifs_set_fid, | ||
958 | .close = cifs_close_file, | ||
959 | .flush = cifs_flush_file, | ||
960 | .async_readv = cifs_async_readv, | ||
961 | .async_writev = cifs_async_writev, | ||
962 | .sync_read = cifs_sync_read, | ||
963 | .sync_write = cifs_sync_write, | ||
964 | .query_dir_first = cifs_query_dir_first, | ||
965 | .query_dir_next = cifs_query_dir_next, | ||
966 | .close_dir = cifs_close_dir, | ||
967 | .calc_smb_size = smbCalcSize, | ||
968 | .oplock_response = cifs_oplock_response, | ||
969 | .queryfs = cifs_queryfs, | ||
970 | .mand_lock = cifs_mand_lock, | ||
971 | .mand_unlock_range = cifs_unlock_range, | ||
972 | .push_mand_locks = cifs_push_mandatory_locks, | ||
647 | }; | 973 | }; |
648 | 974 | ||
649 | struct smb_version_values smb1_values = { | 975 | struct smb_version_values smb1_values = { |
diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c new file mode 100644 index 000000000000..a93eec30a50d --- /dev/null +++ b/fs/cifs/smb2file.c | |||
@@ -0,0 +1,302 @@ | |||
1 | /* | ||
2 | * fs/cifs/smb2file.c | ||
3 | * | ||
4 | * Copyright (C) International Business Machines Corp., 2002, 2011 | ||
5 | * Author(s): Steve French (sfrench@us.ibm.com), | ||
6 | * Pavel Shilovsky ((pshilovsky@samba.org) 2012 | ||
7 | * | ||
8 | * This library is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU Lesser General Public License as published | ||
10 | * by the Free Software Foundation; either version 2.1 of the License, or | ||
11 | * (at your option) any later version. | ||
12 | * | ||
13 | * This library is distributed in the hope that it will be useful, | ||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | ||
16 | * the GNU Lesser General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU Lesser General Public License | ||
19 | * along with this library; if not, write to the Free Software | ||
20 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
21 | */ | ||
22 | #include <linux/fs.h> | ||
23 | #include <linux/stat.h> | ||
24 | #include <linux/slab.h> | ||
25 | #include <linux/pagemap.h> | ||
26 | #include <asm/div64.h> | ||
27 | #include "cifsfs.h" | ||
28 | #include "cifspdu.h" | ||
29 | #include "cifsglob.h" | ||
30 | #include "cifsproto.h" | ||
31 | #include "cifs_debug.h" | ||
32 | #include "cifs_fs_sb.h" | ||
33 | #include "cifs_unicode.h" | ||
34 | #include "fscache.h" | ||
35 | #include "smb2proto.h" | ||
36 | |||
37 | void | ||
38 | smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock) | ||
39 | { | ||
40 | oplock &= 0xFF; | ||
41 | if (oplock == SMB2_OPLOCK_LEVEL_NOCHANGE) | ||
42 | return; | ||
43 | if (oplock == SMB2_OPLOCK_LEVEL_EXCLUSIVE) { | ||
44 | cinode->clientCanCacheAll = true; | ||
45 | cinode->clientCanCacheRead = true; | ||
46 | cFYI(1, "Exclusive Oplock granted on inode %p", | ||
47 | &cinode->vfs_inode); | ||
48 | } else if (oplock == SMB2_OPLOCK_LEVEL_II) { | ||
49 | cinode->clientCanCacheAll = false; | ||
50 | cinode->clientCanCacheRead = true; | ||
51 | cFYI(1, "Level II Oplock granted on inode %p", | ||
52 | &cinode->vfs_inode); | ||
53 | } else { | ||
54 | cinode->clientCanCacheAll = false; | ||
55 | cinode->clientCanCacheRead = false; | ||
56 | } | ||
57 | } | ||
58 | |||
59 | int | ||
60 | smb2_open_file(const unsigned int xid, struct cifs_tcon *tcon, const char *path, | ||
61 | int disposition, int desired_access, int create_options, | ||
62 | struct cifs_fid *fid, __u32 *oplock, FILE_ALL_INFO *buf, | ||
63 | struct cifs_sb_info *cifs_sb) | ||
64 | { | ||
65 | int rc; | ||
66 | __le16 *smb2_path; | ||
67 | struct smb2_file_all_info *smb2_data = NULL; | ||
68 | __u8 smb2_oplock[17]; | ||
69 | |||
70 | smb2_path = cifs_convert_path_to_utf16(path, cifs_sb); | ||
71 | if (smb2_path == NULL) { | ||
72 | rc = -ENOMEM; | ||
73 | goto out; | ||
74 | } | ||
75 | |||
76 | smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + MAX_NAME * 2, | ||
77 | GFP_KERNEL); | ||
78 | if (smb2_data == NULL) { | ||
79 | rc = -ENOMEM; | ||
80 | goto out; | ||
81 | } | ||
82 | |||
83 | desired_access |= FILE_READ_ATTRIBUTES; | ||
84 | *smb2_oplock = SMB2_OPLOCK_LEVEL_EXCLUSIVE; | ||
85 | |||
86 | if (tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LEASING) | ||
87 | memcpy(smb2_oplock + 1, fid->lease_key, SMB2_LEASE_KEY_SIZE); | ||
88 | |||
89 | rc = SMB2_open(xid, tcon, smb2_path, &fid->persistent_fid, | ||
90 | &fid->volatile_fid, desired_access, disposition, | ||
91 | 0, 0, smb2_oplock, smb2_data); | ||
92 | if (rc) | ||
93 | goto out; | ||
94 | |||
95 | if (buf) { | ||
96 | /* open response does not have IndexNumber field - get it */ | ||
97 | rc = SMB2_get_srv_num(xid, tcon, fid->persistent_fid, | ||
98 | fid->volatile_fid, | ||
99 | &smb2_data->IndexNumber); | ||
100 | if (rc) { | ||
101 | /* let get_inode_info disable server inode numbers */ | ||
102 | smb2_data->IndexNumber = 0; | ||
103 | rc = 0; | ||
104 | } | ||
105 | move_smb2_info_to_cifs(buf, smb2_data); | ||
106 | } | ||
107 | |||
108 | *oplock = *smb2_oplock; | ||
109 | out: | ||
110 | kfree(smb2_data); | ||
111 | kfree(smb2_path); | ||
112 | return rc; | ||
113 | } | ||
114 | |||
115 | int | ||
116 | smb2_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, | ||
117 | const unsigned int xid) | ||
118 | { | ||
119 | int rc = 0, stored_rc; | ||
120 | unsigned int max_num, num = 0, max_buf; | ||
121 | struct smb2_lock_element *buf, *cur; | ||
122 | struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); | ||
123 | struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); | ||
124 | struct cifsLockInfo *li, *tmp; | ||
125 | __u64 length = 1 + flock->fl_end - flock->fl_start; | ||
126 | struct list_head tmp_llist; | ||
127 | |||
128 | INIT_LIST_HEAD(&tmp_llist); | ||
129 | |||
130 | /* | ||
131 | * Accessing maxBuf is racy with cifs_reconnect - need to store value | ||
132 | * and check it for zero before using. | ||
133 | */ | ||
134 | max_buf = tcon->ses->server->maxBuf; | ||
135 | if (!max_buf) | ||
136 | return -EINVAL; | ||
137 | |||
138 | max_num = max_buf / sizeof(struct smb2_lock_element); | ||
139 | buf = kzalloc(max_num * sizeof(struct smb2_lock_element), GFP_KERNEL); | ||
140 | if (!buf) | ||
141 | return -ENOMEM; | ||
142 | |||
143 | cur = buf; | ||
144 | |||
145 | down_write(&cinode->lock_sem); | ||
146 | list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) { | ||
147 | if (flock->fl_start > li->offset || | ||
148 | (flock->fl_start + length) < | ||
149 | (li->offset + li->length)) | ||
150 | continue; | ||
151 | if (current->tgid != li->pid) | ||
152 | continue; | ||
153 | if (cinode->can_cache_brlcks) { | ||
154 | /* | ||
155 | * We can cache brlock requests - simply remove a lock | ||
156 | * from the file's list. | ||
157 | */ | ||
158 | list_del(&li->llist); | ||
159 | cifs_del_lock_waiters(li); | ||
160 | kfree(li); | ||
161 | continue; | ||
162 | } | ||
163 | cur->Length = cpu_to_le64(li->length); | ||
164 | cur->Offset = cpu_to_le64(li->offset); | ||
165 | cur->Flags = cpu_to_le32(SMB2_LOCKFLAG_UNLOCK); | ||
166 | /* | ||
167 | * We need to save a lock here to let us add it again to the | ||
168 | * file's list if the unlock range request fails on the server. | ||
169 | */ | ||
170 | list_move(&li->llist, &tmp_llist); | ||
171 | if (++num == max_num) { | ||
172 | stored_rc = smb2_lockv(xid, tcon, | ||
173 | cfile->fid.persistent_fid, | ||
174 | cfile->fid.volatile_fid, | ||
175 | current->tgid, num, buf); | ||
176 | if (stored_rc) { | ||
177 | /* | ||
178 | * We failed on the unlock range request - add | ||
179 | * all locks from the tmp list to the head of | ||
180 | * the file's list. | ||
181 | */ | ||
182 | cifs_move_llist(&tmp_llist, | ||
183 | &cfile->llist->locks); | ||
184 | rc = stored_rc; | ||
185 | } else | ||
186 | /* | ||
187 | * The unlock range request succeed - free the | ||
188 | * tmp list. | ||
189 | */ | ||
190 | cifs_free_llist(&tmp_llist); | ||
191 | cur = buf; | ||
192 | num = 0; | ||
193 | } else | ||
194 | cur++; | ||
195 | } | ||
196 | if (num) { | ||
197 | stored_rc = smb2_lockv(xid, tcon, cfile->fid.persistent_fid, | ||
198 | cfile->fid.volatile_fid, current->tgid, | ||
199 | num, buf); | ||
200 | if (stored_rc) { | ||
201 | cifs_move_llist(&tmp_llist, &cfile->llist->locks); | ||
202 | rc = stored_rc; | ||
203 | } else | ||
204 | cifs_free_llist(&tmp_llist); | ||
205 | } | ||
206 | up_write(&cinode->lock_sem); | ||
207 | |||
208 | kfree(buf); | ||
209 | return rc; | ||
210 | } | ||
211 | |||
212 | static int | ||
213 | smb2_push_mand_fdlocks(struct cifs_fid_locks *fdlocks, const unsigned int xid, | ||
214 | struct smb2_lock_element *buf, unsigned int max_num) | ||
215 | { | ||
216 | int rc = 0, stored_rc; | ||
217 | struct cifsFileInfo *cfile = fdlocks->cfile; | ||
218 | struct cifsLockInfo *li; | ||
219 | unsigned int num = 0; | ||
220 | struct smb2_lock_element *cur = buf; | ||
221 | struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); | ||
222 | |||
223 | list_for_each_entry(li, &fdlocks->locks, llist) { | ||
224 | cur->Length = cpu_to_le64(li->length); | ||
225 | cur->Offset = cpu_to_le64(li->offset); | ||
226 | cur->Flags = cpu_to_le32(li->type | | ||
227 | SMB2_LOCKFLAG_FAIL_IMMEDIATELY); | ||
228 | if (++num == max_num) { | ||
229 | stored_rc = smb2_lockv(xid, tcon, | ||
230 | cfile->fid.persistent_fid, | ||
231 | cfile->fid.volatile_fid, | ||
232 | current->tgid, num, buf); | ||
233 | if (stored_rc) | ||
234 | rc = stored_rc; | ||
235 | cur = buf; | ||
236 | num = 0; | ||
237 | } else | ||
238 | cur++; | ||
239 | } | ||
240 | if (num) { | ||
241 | stored_rc = smb2_lockv(xid, tcon, | ||
242 | cfile->fid.persistent_fid, | ||
243 | cfile->fid.volatile_fid, | ||
244 | current->tgid, num, buf); | ||
245 | if (stored_rc) | ||
246 | rc = stored_rc; | ||
247 | } | ||
248 | |||
249 | return rc; | ||
250 | } | ||
251 | |||
252 | int | ||
253 | smb2_push_mandatory_locks(struct cifsFileInfo *cfile) | ||
254 | { | ||
255 | int rc = 0, stored_rc; | ||
256 | unsigned int xid; | ||
257 | unsigned int max_num, max_buf; | ||
258 | struct smb2_lock_element *buf; | ||
259 | struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); | ||
260 | struct cifs_fid_locks *fdlocks; | ||
261 | |||
262 | xid = get_xid(); | ||
263 | /* we are going to update can_cache_brlcks here - need a write access */ | ||
264 | down_write(&cinode->lock_sem); | ||
265 | if (!cinode->can_cache_brlcks) { | ||
266 | up_write(&cinode->lock_sem); | ||
267 | free_xid(xid); | ||
268 | return rc; | ||
269 | } | ||
270 | |||
271 | /* | ||
272 | * Accessing maxBuf is racy with cifs_reconnect - need to store value | ||
273 | * and check it for zero before using. | ||
274 | */ | ||
275 | max_buf = tlink_tcon(cfile->tlink)->ses->server->maxBuf; | ||
276 | if (!max_buf) { | ||
277 | up_write(&cinode->lock_sem); | ||
278 | free_xid(xid); | ||
279 | return -EINVAL; | ||
280 | } | ||
281 | |||
282 | max_num = max_buf / sizeof(struct smb2_lock_element); | ||
283 | buf = kzalloc(max_num * sizeof(struct smb2_lock_element), GFP_KERNEL); | ||
284 | if (!buf) { | ||
285 | up_write(&cinode->lock_sem); | ||
286 | free_xid(xid); | ||
287 | return -ENOMEM; | ||
288 | } | ||
289 | |||
290 | list_for_each_entry(fdlocks, &cinode->llist, llist) { | ||
291 | stored_rc = smb2_push_mand_fdlocks(fdlocks, xid, buf, max_num); | ||
292 | if (stored_rc) | ||
293 | rc = stored_rc; | ||
294 | } | ||
295 | |||
296 | cinode->can_cache_brlcks = false; | ||
297 | kfree(buf); | ||
298 | |||
299 | up_write(&cinode->lock_sem); | ||
300 | free_xid(xid); | ||
301 | return rc; | ||
302 | } | ||
diff --git a/fs/cifs/smb2glob.h b/fs/cifs/smb2glob.h index 33c1d89090c0..7c0e2143e775 100644 --- a/fs/cifs/smb2glob.h +++ b/fs/cifs/smb2glob.h | |||
@@ -23,6 +23,8 @@ | |||
23 | #ifndef _SMB2_GLOB_H | 23 | #ifndef _SMB2_GLOB_H |
24 | #define _SMB2_GLOB_H | 24 | #define _SMB2_GLOB_H |
25 | 25 | ||
26 | #define SMB2_MAGIC_NUMBER 0xFE534D42 | ||
27 | |||
26 | /* | 28 | /* |
27 | ***************************************************************** | 29 | ***************************************************************** |
28 | * Constants go here | 30 | * Constants go here |
@@ -40,5 +42,17 @@ | |||
40 | #define SMB2_OP_MKDIR 5 | 42 | #define SMB2_OP_MKDIR 5 |
41 | #define SMB2_OP_RENAME 6 | 43 | #define SMB2_OP_RENAME 6 |
42 | #define SMB2_OP_DELETE 7 | 44 | #define SMB2_OP_DELETE 7 |
45 | #define SMB2_OP_HARDLINK 8 | ||
46 | #define SMB2_OP_SET_EOF 9 | ||
47 | |||
48 | /* Used when constructing chained read requests. */ | ||
49 | #define CHAINED_REQUEST 1 | ||
50 | #define START_OF_CHAIN 2 | ||
51 | #define END_OF_CHAIN 4 | ||
52 | #define RELATED_REQUEST 8 | ||
53 | |||
54 | #define SMB2_SIGNATURE_SIZE (16) | ||
55 | #define SMB2_NTLMV2_SESSKEY_SIZE (16) | ||
56 | #define SMB2_HMACSHA256_SIZE (32) | ||
43 | 57 | ||
44 | #endif /* _SMB2_GLOB_H */ | 58 | #endif /* _SMB2_GLOB_H */ |
diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index 2aa5cb08c526..706482452df4 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c | |||
@@ -47,6 +47,7 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon, | |||
47 | int rc, tmprc = 0; | 47 | int rc, tmprc = 0; |
48 | u64 persistent_fid, volatile_fid; | 48 | u64 persistent_fid, volatile_fid; |
49 | __le16 *utf16_path; | 49 | __le16 *utf16_path; |
50 | __u8 oplock = SMB2_OPLOCK_LEVEL_NONE; | ||
50 | 51 | ||
51 | utf16_path = cifs_convert_path_to_utf16(full_path, cifs_sb); | 52 | utf16_path = cifs_convert_path_to_utf16(full_path, cifs_sb); |
52 | if (!utf16_path) | 53 | if (!utf16_path) |
@@ -54,7 +55,7 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon, | |||
54 | 55 | ||
55 | rc = SMB2_open(xid, tcon, utf16_path, &persistent_fid, &volatile_fid, | 56 | rc = SMB2_open(xid, tcon, utf16_path, &persistent_fid, &volatile_fid, |
56 | desired_access, create_disposition, file_attributes, | 57 | desired_access, create_disposition, file_attributes, |
57 | create_options); | 58 | create_options, &oplock, NULL); |
58 | if (rc) { | 59 | if (rc) { |
59 | kfree(utf16_path); | 60 | kfree(utf16_path); |
60 | return rc; | 61 | return rc; |
@@ -74,6 +75,22 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon, | |||
74 | * SMB2_open() call. | 75 | * SMB2_open() call. |
75 | */ | 76 | */ |
76 | break; | 77 | break; |
78 | case SMB2_OP_RENAME: | ||
79 | tmprc = SMB2_rename(xid, tcon, persistent_fid, volatile_fid, | ||
80 | (__le16 *)data); | ||
81 | break; | ||
82 | case SMB2_OP_HARDLINK: | ||
83 | tmprc = SMB2_set_hardlink(xid, tcon, persistent_fid, | ||
84 | volatile_fid, (__le16 *)data); | ||
85 | break; | ||
86 | case SMB2_OP_SET_EOF: | ||
87 | tmprc = SMB2_set_eof(xid, tcon, persistent_fid, volatile_fid, | ||
88 | current->tgid, (__le64 *)data); | ||
89 | break; | ||
90 | case SMB2_OP_SET_INFO: | ||
91 | tmprc = SMB2_set_info(xid, tcon, persistent_fid, volatile_fid, | ||
92 | (FILE_BASIC_INFO *)data); | ||
93 | break; | ||
77 | default: | 94 | default: |
78 | cERROR(1, "Invalid command"); | 95 | cERROR(1, "Invalid command"); |
79 | break; | 96 | break; |
@@ -86,7 +103,7 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon, | |||
86 | return rc; | 103 | return rc; |
87 | } | 104 | } |
88 | 105 | ||
89 | static void | 106 | void |
90 | move_smb2_info_to_cifs(FILE_ALL_INFO *dst, struct smb2_file_all_info *src) | 107 | move_smb2_info_to_cifs(FILE_ALL_INFO *dst, struct smb2_file_all_info *src) |
91 | { | 108 | { |
92 | memcpy(dst, src, (size_t)(&src->CurrentByteOffset) - (size_t)src); | 109 | memcpy(dst, src, (size_t)(&src->CurrentByteOffset) - (size_t)src); |
@@ -161,3 +178,80 @@ smb2_rmdir(const unsigned int xid, struct cifs_tcon *tcon, const char *name, | |||
161 | 0, CREATE_NOT_FILE | CREATE_DELETE_ON_CLOSE, | 178 | 0, CREATE_NOT_FILE | CREATE_DELETE_ON_CLOSE, |
162 | NULL, SMB2_OP_DELETE); | 179 | NULL, SMB2_OP_DELETE); |
163 | } | 180 | } |
181 | |||
182 | int | ||
183 | smb2_unlink(const unsigned int xid, struct cifs_tcon *tcon, const char *name, | ||
184 | struct cifs_sb_info *cifs_sb) | ||
185 | { | ||
186 | return smb2_open_op_close(xid, tcon, cifs_sb, name, DELETE, FILE_OPEN, | ||
187 | 0, CREATE_DELETE_ON_CLOSE, NULL, | ||
188 | SMB2_OP_DELETE); | ||
189 | } | ||
190 | |||
191 | static int | ||
192 | smb2_set_path_attr(const unsigned int xid, struct cifs_tcon *tcon, | ||
193 | const char *from_name, const char *to_name, | ||
194 | struct cifs_sb_info *cifs_sb, __u32 access, int command) | ||
195 | { | ||
196 | __le16 *smb2_to_name = NULL; | ||
197 | int rc; | ||
198 | |||
199 | smb2_to_name = cifs_convert_path_to_utf16(to_name, cifs_sb); | ||
200 | if (smb2_to_name == NULL) { | ||
201 | rc = -ENOMEM; | ||
202 | goto smb2_rename_path; | ||
203 | } | ||
204 | |||
205 | rc = smb2_open_op_close(xid, tcon, cifs_sb, from_name, access, | ||
206 | FILE_OPEN, 0, 0, smb2_to_name, command); | ||
207 | smb2_rename_path: | ||
208 | kfree(smb2_to_name); | ||
209 | return rc; | ||
210 | } | ||
211 | |||
212 | int | ||
213 | smb2_rename_path(const unsigned int xid, struct cifs_tcon *tcon, | ||
214 | const char *from_name, const char *to_name, | ||
215 | struct cifs_sb_info *cifs_sb) | ||
216 | { | ||
217 | return smb2_set_path_attr(xid, tcon, from_name, to_name, cifs_sb, | ||
218 | DELETE, SMB2_OP_RENAME); | ||
219 | } | ||
220 | |||
221 | int | ||
222 | smb2_create_hardlink(const unsigned int xid, struct cifs_tcon *tcon, | ||
223 | const char *from_name, const char *to_name, | ||
224 | struct cifs_sb_info *cifs_sb) | ||
225 | { | ||
226 | return smb2_set_path_attr(xid, tcon, from_name, to_name, cifs_sb, | ||
227 | FILE_READ_ATTRIBUTES, SMB2_OP_HARDLINK); | ||
228 | } | ||
229 | |||
230 | int | ||
231 | smb2_set_path_size(const unsigned int xid, struct cifs_tcon *tcon, | ||
232 | const char *full_path, __u64 size, | ||
233 | struct cifs_sb_info *cifs_sb, bool set_alloc) | ||
234 | { | ||
235 | __le64 eof = cpu_to_le64(size); | ||
236 | return smb2_open_op_close(xid, tcon, cifs_sb, full_path, | ||
237 | FILE_WRITE_DATA, FILE_OPEN, 0, 0, &eof, | ||
238 | SMB2_OP_SET_EOF); | ||
239 | } | ||
240 | |||
241 | int | ||
242 | smb2_set_file_info(struct inode *inode, const char *full_path, | ||
243 | FILE_BASIC_INFO *buf, const unsigned int xid) | ||
244 | { | ||
245 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); | ||
246 | struct tcon_link *tlink; | ||
247 | int rc; | ||
248 | |||
249 | tlink = cifs_sb_tlink(cifs_sb); | ||
250 | if (IS_ERR(tlink)) | ||
251 | return PTR_ERR(tlink); | ||
252 | rc = smb2_open_op_close(xid, tlink_tcon(tlink), cifs_sb, full_path, | ||
253 | FILE_WRITE_ATTRIBUTES, FILE_OPEN, 0, 0, buf, | ||
254 | SMB2_OP_SET_INFO); | ||
255 | cifs_put_tlink(tlink); | ||
256 | return rc; | ||
257 | } | ||
diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c index be41478acc05..494c912c76fe 100644 --- a/fs/cifs/smb2maperror.c +++ b/fs/cifs/smb2maperror.c | |||
@@ -453,7 +453,8 @@ static const struct status_to_posix_error smb2_error_map_table[] = { | |||
453 | {STATUS_FILE_INVALID, -EIO, "STATUS_FILE_INVALID"}, | 453 | {STATUS_FILE_INVALID, -EIO, "STATUS_FILE_INVALID"}, |
454 | {STATUS_ALLOTTED_SPACE_EXCEEDED, -EIO, | 454 | {STATUS_ALLOTTED_SPACE_EXCEEDED, -EIO, |
455 | "STATUS_ALLOTTED_SPACE_EXCEEDED"}, | 455 | "STATUS_ALLOTTED_SPACE_EXCEEDED"}, |
456 | {STATUS_INSUFFICIENT_RESOURCES, -EIO, "STATUS_INSUFFICIENT_RESOURCES"}, | 456 | {STATUS_INSUFFICIENT_RESOURCES, -EREMOTEIO, |
457 | "STATUS_INSUFFICIENT_RESOURCES"}, | ||
457 | {STATUS_DFS_EXIT_PATH_FOUND, -EIO, "STATUS_DFS_EXIT_PATH_FOUND"}, | 458 | {STATUS_DFS_EXIT_PATH_FOUND, -EIO, "STATUS_DFS_EXIT_PATH_FOUND"}, |
458 | {STATUS_DEVICE_DATA_ERROR, -EIO, "STATUS_DEVICE_DATA_ERROR"}, | 459 | {STATUS_DEVICE_DATA_ERROR, -EIO, "STATUS_DEVICE_DATA_ERROR"}, |
459 | {STATUS_DEVICE_NOT_CONNECTED, -EIO, "STATUS_DEVICE_NOT_CONNECTED"}, | 460 | {STATUS_DEVICE_NOT_CONNECTED, -EIO, "STATUS_DEVICE_NOT_CONNECTED"}, |
@@ -2455,7 +2456,8 @@ map_smb2_to_linux_error(char *buf, bool log_err) | |||
2455 | return 0; | 2456 | return 0; |
2456 | 2457 | ||
2457 | /* mask facility */ | 2458 | /* mask facility */ |
2458 | if (log_err && (smb2err != (STATUS_MORE_PROCESSING_REQUIRED))) | 2459 | if (log_err && (smb2err != STATUS_MORE_PROCESSING_REQUIRED) && |
2460 | (smb2err != STATUS_END_OF_FILE)) | ||
2459 | smb2_print_status(smb2err); | 2461 | smb2_print_status(smb2err); |
2460 | else if (cifsFYI & CIFS_RC) | 2462 | else if (cifsFYI & CIFS_RC) |
2461 | smb2_print_status(smb2err); | 2463 | smb2_print_status(smb2err); |
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index a4ff5d547554..7b1c5e3287fb 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c | |||
@@ -52,7 +52,8 @@ check_smb2_hdr(struct smb2_hdr *hdr, __u64 mid) | |||
52 | cERROR(1, "Bad protocol string signature header %x", | 52 | cERROR(1, "Bad protocol string signature header %x", |
53 | *(unsigned int *) hdr->ProtocolId); | 53 | *(unsigned int *) hdr->ProtocolId); |
54 | if (mid != hdr->MessageId) | 54 | if (mid != hdr->MessageId) |
55 | cERROR(1, "Mids do not match"); | 55 | cERROR(1, "Mids do not match: %llu and %llu", mid, |
56 | hdr->MessageId); | ||
56 | } | 57 | } |
57 | cERROR(1, "Bad SMB detected. The Mid=%llu", hdr->MessageId); | 58 | cERROR(1, "Bad SMB detected. The Mid=%llu", hdr->MessageId); |
58 | return 1; | 59 | return 1; |
@@ -107,7 +108,7 @@ smb2_check_message(char *buf, unsigned int length) | |||
107 | * ie Validate the wct via smb2_struct_sizes table above | 108 | * ie Validate the wct via smb2_struct_sizes table above |
108 | */ | 109 | */ |
109 | 110 | ||
110 | if (length < 2 + sizeof(struct smb2_hdr)) { | 111 | if (length < sizeof(struct smb2_pdu)) { |
111 | if ((length >= sizeof(struct smb2_hdr)) && (hdr->Status != 0)) { | 112 | if ((length >= sizeof(struct smb2_hdr)) && (hdr->Status != 0)) { |
112 | pdu->StructureSize2 = 0; | 113 | pdu->StructureSize2 = 0; |
113 | /* | 114 | /* |
@@ -121,15 +122,15 @@ smb2_check_message(char *buf, unsigned int length) | |||
121 | return 1; | 122 | return 1; |
122 | } | 123 | } |
123 | if (len > CIFSMaxBufSize + MAX_SMB2_HDR_SIZE - 4) { | 124 | if (len > CIFSMaxBufSize + MAX_SMB2_HDR_SIZE - 4) { |
124 | cERROR(1, "SMB length greater than maximum, mid=%lld", mid); | 125 | cERROR(1, "SMB length greater than maximum, mid=%llu", mid); |
125 | return 1; | 126 | return 1; |
126 | } | 127 | } |
127 | 128 | ||
128 | if (check_smb2_hdr(hdr, mid)) | 129 | if (check_smb2_hdr(hdr, mid)) |
129 | return 1; | 130 | return 1; |
130 | 131 | ||
131 | if (hdr->StructureSize != SMB2_HEADER_SIZE) { | 132 | if (hdr->StructureSize != SMB2_HEADER_STRUCTURE_SIZE) { |
132 | cERROR(1, "Illegal structure size %d", | 133 | cERROR(1, "Illegal structure size %u", |
133 | le16_to_cpu(hdr->StructureSize)); | 134 | le16_to_cpu(hdr->StructureSize)); |
134 | return 1; | 135 | return 1; |
135 | } | 136 | } |
@@ -141,12 +142,19 @@ smb2_check_message(char *buf, unsigned int length) | |||
141 | } | 142 | } |
142 | 143 | ||
143 | if (smb2_rsp_struct_sizes[command] != pdu->StructureSize2) { | 144 | if (smb2_rsp_struct_sizes[command] != pdu->StructureSize2) { |
144 | if (hdr->Status == 0 || | 145 | if (command != SMB2_OPLOCK_BREAK_HE && (hdr->Status == 0 || |
145 | pdu->StructureSize2 != SMB2_ERROR_STRUCTURE_SIZE2) { | 146 | pdu->StructureSize2 != SMB2_ERROR_STRUCTURE_SIZE2)) { |
146 | /* error packets have 9 byte structure size */ | 147 | /* error packets have 9 byte structure size */ |
147 | cERROR(1, "Illegal response size %u for command %d", | 148 | cERROR(1, "Illegal response size %u for command %d", |
148 | le16_to_cpu(pdu->StructureSize2), command); | 149 | le16_to_cpu(pdu->StructureSize2), command); |
149 | return 1; | 150 | return 1; |
151 | } else if (command == SMB2_OPLOCK_BREAK_HE && (hdr->Status == 0) | ||
152 | && (le16_to_cpu(pdu->StructureSize2) != 44) | ||
153 | && (le16_to_cpu(pdu->StructureSize2) != 36)) { | ||
154 | /* special case for SMB2.1 lease break message */ | ||
155 | cERROR(1, "Illegal response size %d for oplock break", | ||
156 | le16_to_cpu(pdu->StructureSize2)); | ||
157 | return 1; | ||
150 | } | 158 | } |
151 | } | 159 | } |
152 | 160 | ||
@@ -161,8 +169,12 @@ smb2_check_message(char *buf, unsigned int length) | |||
161 | if (4 + len != clc_len) { | 169 | if (4 + len != clc_len) { |
162 | cFYI(1, "Calculated size %u length %u mismatch mid %llu", | 170 | cFYI(1, "Calculated size %u length %u mismatch mid %llu", |
163 | clc_len, 4 + len, mid); | 171 | clc_len, 4 + len, mid); |
164 | if (clc_len == 4 + len + 1) /* BB FIXME (fix samba) */ | 172 | /* Windows 7 server returns 24 bytes more */ |
165 | return 0; /* BB workaround Samba 3 bug SessSetup rsp */ | 173 | if (clc_len + 20 == len && command == SMB2_OPLOCK_BREAK_HE) |
174 | return 0; | ||
175 | /* server can return one byte more */ | ||
176 | if (clc_len == 4 + len + 1) | ||
177 | return 0; | ||
166 | return 1; | 178 | return 1; |
167 | } | 179 | } |
168 | return 0; | 180 | return 0; |
@@ -242,7 +254,15 @@ smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr) | |||
242 | ((struct smb2_query_info_rsp *)hdr)->OutputBufferLength); | 254 | ((struct smb2_query_info_rsp *)hdr)->OutputBufferLength); |
243 | break; | 255 | break; |
244 | case SMB2_READ: | 256 | case SMB2_READ: |
257 | *off = ((struct smb2_read_rsp *)hdr)->DataOffset; | ||
258 | *len = le32_to_cpu(((struct smb2_read_rsp *)hdr)->DataLength); | ||
259 | break; | ||
245 | case SMB2_QUERY_DIRECTORY: | 260 | case SMB2_QUERY_DIRECTORY: |
261 | *off = le16_to_cpu( | ||
262 | ((struct smb2_query_directory_rsp *)hdr)->OutputBufferOffset); | ||
263 | *len = le32_to_cpu( | ||
264 | ((struct smb2_query_directory_rsp *)hdr)->OutputBufferLength); | ||
265 | break; | ||
246 | case SMB2_IOCTL: | 266 | case SMB2_IOCTL: |
247 | case SMB2_CHANGE_NOTIFY: | 267 | case SMB2_CHANGE_NOTIFY: |
248 | default: | 268 | default: |
@@ -285,8 +305,9 @@ smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr) | |||
285 | * portion, the number of word parameters and the data portion of the message. | 305 | * portion, the number of word parameters and the data portion of the message. |
286 | */ | 306 | */ |
287 | unsigned int | 307 | unsigned int |
288 | smb2_calc_size(struct smb2_hdr *hdr) | 308 | smb2_calc_size(void *buf) |
289 | { | 309 | { |
310 | struct smb2_hdr *hdr = (struct smb2_hdr *)buf; | ||
290 | struct smb2_pdu *pdu = (struct smb2_pdu *)hdr; | 311 | struct smb2_pdu *pdu = (struct smb2_pdu *)hdr; |
291 | int offset; /* the offset from the beginning of SMB to data area */ | 312 | int offset; /* the offset from the beginning of SMB to data area */ |
292 | int data_length; /* the length of the variable length data area */ | 313 | int data_length; /* the length of the variable length data area */ |
@@ -345,3 +366,218 @@ cifs_convert_path_to_utf16(const char *from, struct cifs_sb_info *cifs_sb) | |||
345 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 366 | CIFS_MOUNT_MAP_SPECIAL_CHR); |
346 | return to; | 367 | return to; |
347 | } | 368 | } |
369 | |||
370 | __le32 | ||
371 | smb2_get_lease_state(struct cifsInodeInfo *cinode) | ||
372 | { | ||
373 | if (cinode->clientCanCacheAll) | ||
374 | return SMB2_LEASE_WRITE_CACHING | SMB2_LEASE_READ_CACHING; | ||
375 | else if (cinode->clientCanCacheRead) | ||
376 | return SMB2_LEASE_READ_CACHING; | ||
377 | return 0; | ||
378 | } | ||
379 | |||
380 | __u8 smb2_map_lease_to_oplock(__le32 lease_state) | ||
381 | { | ||
382 | if (lease_state & SMB2_LEASE_WRITE_CACHING) { | ||
383 | if (lease_state & SMB2_LEASE_HANDLE_CACHING) | ||
384 | return SMB2_OPLOCK_LEVEL_BATCH; | ||
385 | else | ||
386 | return SMB2_OPLOCK_LEVEL_EXCLUSIVE; | ||
387 | } else if (lease_state & SMB2_LEASE_READ_CACHING) | ||
388 | return SMB2_OPLOCK_LEVEL_II; | ||
389 | return 0; | ||
390 | } | ||
391 | |||
392 | struct smb2_lease_break_work { | ||
393 | struct work_struct lease_break; | ||
394 | struct tcon_link *tlink; | ||
395 | __u8 lease_key[16]; | ||
396 | __le32 lease_state; | ||
397 | }; | ||
398 | |||
399 | static void | ||
400 | cifs_ses_oplock_break(struct work_struct *work) | ||
401 | { | ||
402 | struct smb2_lease_break_work *lw = container_of(work, | ||
403 | struct smb2_lease_break_work, lease_break); | ||
404 | int rc; | ||
405 | |||
406 | rc = SMB2_lease_break(0, tlink_tcon(lw->tlink), lw->lease_key, | ||
407 | lw->lease_state); | ||
408 | cFYI(1, "Lease release rc %d", rc); | ||
409 | cifs_put_tlink(lw->tlink); | ||
410 | kfree(lw); | ||
411 | } | ||
412 | |||
413 | static bool | ||
414 | smb2_is_valid_lease_break(char *buffer, struct TCP_Server_Info *server) | ||
415 | { | ||
416 | struct smb2_lease_break *rsp = (struct smb2_lease_break *)buffer; | ||
417 | struct list_head *tmp, *tmp1, *tmp2; | ||
418 | struct cifs_ses *ses; | ||
419 | struct cifs_tcon *tcon; | ||
420 | struct cifsInodeInfo *cinode; | ||
421 | struct cifsFileInfo *cfile; | ||
422 | struct cifs_pending_open *open; | ||
423 | struct smb2_lease_break_work *lw; | ||
424 | bool found; | ||
425 | int ack_req = le32_to_cpu(rsp->Flags & | ||
426 | SMB2_NOTIFY_BREAK_LEASE_FLAG_ACK_REQUIRED); | ||
427 | |||
428 | lw = kmalloc(sizeof(struct smb2_lease_break_work), GFP_KERNEL); | ||
429 | if (!lw) { | ||
430 | cERROR(1, "Memory allocation failed during lease break check"); | ||
431 | return false; | ||
432 | } | ||
433 | |||
434 | INIT_WORK(&lw->lease_break, cifs_ses_oplock_break); | ||
435 | lw->lease_state = rsp->NewLeaseState; | ||
436 | |||
437 | cFYI(1, "Checking for lease break"); | ||
438 | |||
439 | /* look up tcon based on tid & uid */ | ||
440 | spin_lock(&cifs_tcp_ses_lock); | ||
441 | list_for_each(tmp, &server->smb_ses_list) { | ||
442 | ses = list_entry(tmp, struct cifs_ses, smb_ses_list); | ||
443 | |||
444 | spin_lock(&cifs_file_list_lock); | ||
445 | list_for_each(tmp1, &ses->tcon_list) { | ||
446 | tcon = list_entry(tmp1, struct cifs_tcon, tcon_list); | ||
447 | |||
448 | cifs_stats_inc(&tcon->stats.cifs_stats.num_oplock_brks); | ||
449 | list_for_each(tmp2, &tcon->openFileList) { | ||
450 | cfile = list_entry(tmp2, struct cifsFileInfo, | ||
451 | tlist); | ||
452 | cinode = CIFS_I(cfile->dentry->d_inode); | ||
453 | |||
454 | if (memcmp(cinode->lease_key, rsp->LeaseKey, | ||
455 | SMB2_LEASE_KEY_SIZE)) | ||
456 | continue; | ||
457 | |||
458 | cFYI(1, "found in the open list"); | ||
459 | cFYI(1, "lease key match, lease break 0x%d", | ||
460 | le32_to_cpu(rsp->NewLeaseState)); | ||
461 | |||
462 | smb2_set_oplock_level(cinode, | ||
463 | smb2_map_lease_to_oplock(rsp->NewLeaseState)); | ||
464 | |||
465 | if (ack_req) | ||
466 | cfile->oplock_break_cancelled = false; | ||
467 | else | ||
468 | cfile->oplock_break_cancelled = true; | ||
469 | |||
470 | queue_work(cifsiod_wq, &cfile->oplock_break); | ||
471 | |||
472 | spin_unlock(&cifs_file_list_lock); | ||
473 | spin_unlock(&cifs_tcp_ses_lock); | ||
474 | return true; | ||
475 | } | ||
476 | |||
477 | found = false; | ||
478 | list_for_each_entry(open, &tcon->pending_opens, olist) { | ||
479 | if (memcmp(open->lease_key, rsp->LeaseKey, | ||
480 | SMB2_LEASE_KEY_SIZE)) | ||
481 | continue; | ||
482 | |||
483 | if (!found && ack_req) { | ||
484 | found = true; | ||
485 | memcpy(lw->lease_key, open->lease_key, | ||
486 | SMB2_LEASE_KEY_SIZE); | ||
487 | lw->tlink = cifs_get_tlink(open->tlink); | ||
488 | queue_work(cifsiod_wq, | ||
489 | &lw->lease_break); | ||
490 | } | ||
491 | |||
492 | cFYI(1, "found in the pending open list"); | ||
493 | cFYI(1, "lease key match, lease break 0x%d", | ||
494 | le32_to_cpu(rsp->NewLeaseState)); | ||
495 | |||
496 | open->oplock = | ||
497 | smb2_map_lease_to_oplock(rsp->NewLeaseState); | ||
498 | } | ||
499 | if (found) { | ||
500 | spin_unlock(&cifs_file_list_lock); | ||
501 | spin_unlock(&cifs_tcp_ses_lock); | ||
502 | return true; | ||
503 | } | ||
504 | } | ||
505 | spin_unlock(&cifs_file_list_lock); | ||
506 | } | ||
507 | spin_unlock(&cifs_tcp_ses_lock); | ||
508 | kfree(lw); | ||
509 | cFYI(1, "Can not process lease break - no lease matched"); | ||
510 | return false; | ||
511 | } | ||
512 | |||
513 | bool | ||
514 | smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server) | ||
515 | { | ||
516 | struct smb2_oplock_break *rsp = (struct smb2_oplock_break *)buffer; | ||
517 | struct list_head *tmp, *tmp1, *tmp2; | ||
518 | struct cifs_ses *ses; | ||
519 | struct cifs_tcon *tcon; | ||
520 | struct cifsInodeInfo *cinode; | ||
521 | struct cifsFileInfo *cfile; | ||
522 | |||
523 | cFYI(1, "Checking for oplock break"); | ||
524 | |||
525 | if (rsp->hdr.Command != SMB2_OPLOCK_BREAK) | ||
526 | return false; | ||
527 | |||
528 | if (rsp->StructureSize != | ||
529 | smb2_rsp_struct_sizes[SMB2_OPLOCK_BREAK_HE]) { | ||
530 | if (le16_to_cpu(rsp->StructureSize) == 44) | ||
531 | return smb2_is_valid_lease_break(buffer, server); | ||
532 | else | ||
533 | return false; | ||
534 | } | ||
535 | |||
536 | cFYI(1, "oplock level 0x%d", rsp->OplockLevel); | ||
537 | |||
538 | /* look up tcon based on tid & uid */ | ||
539 | spin_lock(&cifs_tcp_ses_lock); | ||
540 | list_for_each(tmp, &server->smb_ses_list) { | ||
541 | ses = list_entry(tmp, struct cifs_ses, smb_ses_list); | ||
542 | list_for_each(tmp1, &ses->tcon_list) { | ||
543 | tcon = list_entry(tmp1, struct cifs_tcon, tcon_list); | ||
544 | |||
545 | cifs_stats_inc(&tcon->stats.cifs_stats.num_oplock_brks); | ||
546 | spin_lock(&cifs_file_list_lock); | ||
547 | list_for_each(tmp2, &tcon->openFileList) { | ||
548 | cfile = list_entry(tmp2, struct cifsFileInfo, | ||
549 | tlist); | ||
550 | if (rsp->PersistentFid != | ||
551 | cfile->fid.persistent_fid || | ||
552 | rsp->VolatileFid != | ||
553 | cfile->fid.volatile_fid) | ||
554 | continue; | ||
555 | |||
556 | cFYI(1, "file id match, oplock break"); | ||
557 | cinode = CIFS_I(cfile->dentry->d_inode); | ||
558 | |||
559 | if (!cinode->clientCanCacheAll && | ||
560 | rsp->OplockLevel == SMB2_OPLOCK_LEVEL_NONE) | ||
561 | cfile->oplock_break_cancelled = true; | ||
562 | else | ||
563 | cfile->oplock_break_cancelled = false; | ||
564 | |||
565 | smb2_set_oplock_level(cinode, | ||
566 | rsp->OplockLevel ? SMB2_OPLOCK_LEVEL_II : 0); | ||
567 | |||
568 | queue_work(cifsiod_wq, &cfile->oplock_break); | ||
569 | |||
570 | spin_unlock(&cifs_file_list_lock); | ||
571 | spin_unlock(&cifs_tcp_ses_lock); | ||
572 | return true; | ||
573 | } | ||
574 | spin_unlock(&cifs_file_list_lock); | ||
575 | spin_unlock(&cifs_tcp_ses_lock); | ||
576 | cFYI(1, "No matching file for oplock break"); | ||
577 | return true; | ||
578 | } | ||
579 | } | ||
580 | spin_unlock(&cifs_tcp_ses_lock); | ||
581 | cFYI(1, "Can not process oplock break for non-existent connection"); | ||
582 | return false; | ||
583 | } | ||
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 826209bf3684..4d9dbe0b7385 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c | |||
@@ -17,11 +17,15 @@ | |||
17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
18 | */ | 18 | */ |
19 | 19 | ||
20 | #include <linux/pagemap.h> | ||
21 | #include <linux/vfs.h> | ||
20 | #include "cifsglob.h" | 22 | #include "cifsglob.h" |
21 | #include "smb2pdu.h" | 23 | #include "smb2pdu.h" |
22 | #include "smb2proto.h" | 24 | #include "smb2proto.h" |
23 | #include "cifsproto.h" | 25 | #include "cifsproto.h" |
24 | #include "cifs_debug.h" | 26 | #include "cifs_debug.h" |
27 | #include "smb2status.h" | ||
28 | #include "smb2glob.h" | ||
25 | 29 | ||
26 | static int | 30 | static int |
27 | change_conf(struct TCP_Server_Info *server) | 31 | change_conf(struct TCP_Server_Info *server) |
@@ -63,6 +67,17 @@ smb2_add_credits(struct TCP_Server_Info *server, const unsigned int add, | |||
63 | server->in_flight--; | 67 | server->in_flight--; |
64 | if (server->in_flight == 0 && (optype & CIFS_OP_MASK) != CIFS_NEG_OP) | 68 | if (server->in_flight == 0 && (optype & CIFS_OP_MASK) != CIFS_NEG_OP) |
65 | rc = change_conf(server); | 69 | rc = change_conf(server); |
70 | /* | ||
71 | * Sometimes server returns 0 credits on oplock break ack - we need to | ||
72 | * rebalance credits in this case. | ||
73 | */ | ||
74 | else if (server->in_flight > 0 && server->oplock_credits == 0 && | ||
75 | server->oplocks) { | ||
76 | if (server->credits > 1) { | ||
77 | server->credits--; | ||
78 | server->oplock_credits++; | ||
79 | } | ||
80 | } | ||
66 | spin_unlock(&server->req_lock); | 81 | spin_unlock(&server->req_lock); |
67 | wake_up(&server->request_q); | 82 | wake_up(&server->request_q); |
68 | if (rc) | 83 | if (rc) |
@@ -157,6 +172,42 @@ smb2_negotiate(const unsigned int xid, struct cifs_ses *ses) | |||
157 | return rc; | 172 | return rc; |
158 | } | 173 | } |
159 | 174 | ||
175 | static unsigned int | ||
176 | smb2_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info) | ||
177 | { | ||
178 | struct TCP_Server_Info *server = tcon->ses->server; | ||
179 | unsigned int wsize; | ||
180 | |||
181 | /* start with specified wsize, or default */ | ||
182 | wsize = volume_info->wsize ? volume_info->wsize : CIFS_DEFAULT_IOSIZE; | ||
183 | wsize = min_t(unsigned int, wsize, server->max_write); | ||
184 | /* | ||
185 | * limit write size to 2 ** 16, because we don't support multicredit | ||
186 | * requests now. | ||
187 | */ | ||
188 | wsize = min_t(unsigned int, wsize, 2 << 15); | ||
189 | |||
190 | return wsize; | ||
191 | } | ||
192 | |||
193 | static unsigned int | ||
194 | smb2_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info) | ||
195 | { | ||
196 | struct TCP_Server_Info *server = tcon->ses->server; | ||
197 | unsigned int rsize; | ||
198 | |||
199 | /* start with specified rsize, or default */ | ||
200 | rsize = volume_info->rsize ? volume_info->rsize : CIFS_DEFAULT_IOSIZE; | ||
201 | rsize = min_t(unsigned int, rsize, server->max_read); | ||
202 | /* | ||
203 | * limit write size to 2 ** 16, because we don't support multicredit | ||
204 | * requests now. | ||
205 | */ | ||
206 | rsize = min_t(unsigned int, rsize, 2 << 15); | ||
207 | |||
208 | return rsize; | ||
209 | } | ||
210 | |||
160 | static int | 211 | static int |
161 | smb2_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon, | 212 | smb2_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon, |
162 | struct cifs_sb_info *cifs_sb, const char *full_path) | 213 | struct cifs_sb_info *cifs_sb, const char *full_path) |
@@ -164,13 +215,14 @@ smb2_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon, | |||
164 | int rc; | 215 | int rc; |
165 | __u64 persistent_fid, volatile_fid; | 216 | __u64 persistent_fid, volatile_fid; |
166 | __le16 *utf16_path; | 217 | __le16 *utf16_path; |
218 | __u8 oplock = SMB2_OPLOCK_LEVEL_NONE; | ||
167 | 219 | ||
168 | utf16_path = cifs_convert_path_to_utf16(full_path, cifs_sb); | 220 | utf16_path = cifs_convert_path_to_utf16(full_path, cifs_sb); |
169 | if (!utf16_path) | 221 | if (!utf16_path) |
170 | return -ENOMEM; | 222 | return -ENOMEM; |
171 | 223 | ||
172 | rc = SMB2_open(xid, tcon, utf16_path, &persistent_fid, &volatile_fid, | 224 | rc = SMB2_open(xid, tcon, utf16_path, &persistent_fid, &volatile_fid, |
173 | FILE_READ_ATTRIBUTES, FILE_OPEN, 0, 0); | 225 | FILE_READ_ATTRIBUTES, FILE_OPEN, 0, 0, &oplock, NULL); |
174 | if (rc) { | 226 | if (rc) { |
175 | kfree(utf16_path); | 227 | kfree(utf16_path); |
176 | return rc; | 228 | return rc; |
@@ -190,6 +242,26 @@ smb2_get_srv_inum(const unsigned int xid, struct cifs_tcon *tcon, | |||
190 | return 0; | 242 | return 0; |
191 | } | 243 | } |
192 | 244 | ||
245 | static int | ||
246 | smb2_query_file_info(const unsigned int xid, struct cifs_tcon *tcon, | ||
247 | struct cifs_fid *fid, FILE_ALL_INFO *data) | ||
248 | { | ||
249 | int rc; | ||
250 | struct smb2_file_all_info *smb2_data; | ||
251 | |||
252 | smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + MAX_NAME * 2, | ||
253 | GFP_KERNEL); | ||
254 | if (smb2_data == NULL) | ||
255 | return -ENOMEM; | ||
256 | |||
257 | rc = SMB2_query_info(xid, tcon, fid->persistent_fid, fid->volatile_fid, | ||
258 | smb2_data); | ||
259 | if (!rc) | ||
260 | move_smb2_info_to_cifs(data, smb2_data); | ||
261 | kfree(smb2_data); | ||
262 | return rc; | ||
263 | } | ||
264 | |||
193 | static char * | 265 | static char * |
194 | smb2_build_path_to_root(struct smb_vol *vol, struct cifs_sb_info *cifs_sb, | 266 | smb2_build_path_to_root(struct smb_vol *vol, struct cifs_sb_info *cifs_sb, |
195 | struct cifs_tcon *tcon) | 267 | struct cifs_tcon *tcon) |
@@ -292,7 +364,221 @@ smb2_print_stats(struct seq_file *m, struct cifs_tcon *tcon) | |||
292 | #endif | 364 | #endif |
293 | } | 365 | } |
294 | 366 | ||
367 | static void | ||
368 | smb2_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock) | ||
369 | { | ||
370 | struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); | ||
371 | cfile->fid.persistent_fid = fid->persistent_fid; | ||
372 | cfile->fid.volatile_fid = fid->volatile_fid; | ||
373 | smb2_set_oplock_level(cinode, oplock); | ||
374 | cinode->can_cache_brlcks = cinode->clientCanCacheAll; | ||
375 | } | ||
376 | |||
377 | static void | ||
378 | smb2_close_file(const unsigned int xid, struct cifs_tcon *tcon, | ||
379 | struct cifs_fid *fid) | ||
380 | { | ||
381 | SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid); | ||
382 | } | ||
383 | |||
384 | static int | ||
385 | smb2_flush_file(const unsigned int xid, struct cifs_tcon *tcon, | ||
386 | struct cifs_fid *fid) | ||
387 | { | ||
388 | return SMB2_flush(xid, tcon, fid->persistent_fid, fid->volatile_fid); | ||
389 | } | ||
390 | |||
391 | static unsigned int | ||
392 | smb2_read_data_offset(char *buf) | ||
393 | { | ||
394 | struct smb2_read_rsp *rsp = (struct smb2_read_rsp *)buf; | ||
395 | return rsp->DataOffset; | ||
396 | } | ||
397 | |||
398 | static unsigned int | ||
399 | smb2_read_data_length(char *buf) | ||
400 | { | ||
401 | struct smb2_read_rsp *rsp = (struct smb2_read_rsp *)buf; | ||
402 | return le32_to_cpu(rsp->DataLength); | ||
403 | } | ||
404 | |||
405 | |||
406 | static int | ||
407 | smb2_sync_read(const unsigned int xid, struct cifsFileInfo *cfile, | ||
408 | struct cifs_io_parms *parms, unsigned int *bytes_read, | ||
409 | char **buf, int *buf_type) | ||
410 | { | ||
411 | parms->persistent_fid = cfile->fid.persistent_fid; | ||
412 | parms->volatile_fid = cfile->fid.volatile_fid; | ||
413 | return SMB2_read(xid, parms, bytes_read, buf, buf_type); | ||
414 | } | ||
415 | |||
416 | static int | ||
417 | smb2_sync_write(const unsigned int xid, struct cifsFileInfo *cfile, | ||
418 | struct cifs_io_parms *parms, unsigned int *written, | ||
419 | struct kvec *iov, unsigned long nr_segs) | ||
420 | { | ||
421 | |||
422 | parms->persistent_fid = cfile->fid.persistent_fid; | ||
423 | parms->volatile_fid = cfile->fid.volatile_fid; | ||
424 | return SMB2_write(xid, parms, written, iov, nr_segs); | ||
425 | } | ||
426 | |||
427 | static int | ||
428 | smb2_set_file_size(const unsigned int xid, struct cifs_tcon *tcon, | ||
429 | struct cifsFileInfo *cfile, __u64 size, bool set_alloc) | ||
430 | { | ||
431 | __le64 eof = cpu_to_le64(size); | ||
432 | return SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid, | ||
433 | cfile->fid.volatile_fid, cfile->pid, &eof); | ||
434 | } | ||
435 | |||
436 | static int | ||
437 | smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon, | ||
438 | const char *path, struct cifs_sb_info *cifs_sb, | ||
439 | struct cifs_fid *fid, __u16 search_flags, | ||
440 | struct cifs_search_info *srch_inf) | ||
441 | { | ||
442 | __le16 *utf16_path; | ||
443 | int rc; | ||
444 | __u8 oplock = SMB2_OPLOCK_LEVEL_NONE; | ||
445 | __u64 persistent_fid, volatile_fid; | ||
446 | |||
447 | utf16_path = cifs_convert_path_to_utf16(path, cifs_sb); | ||
448 | if (!utf16_path) | ||
449 | return -ENOMEM; | ||
450 | |||
451 | rc = SMB2_open(xid, tcon, utf16_path, &persistent_fid, &volatile_fid, | ||
452 | FILE_READ_ATTRIBUTES | FILE_READ_DATA, FILE_OPEN, 0, 0, | ||
453 | &oplock, NULL); | ||
454 | kfree(utf16_path); | ||
455 | if (rc) { | ||
456 | cERROR(1, "open dir failed"); | ||
457 | return rc; | ||
458 | } | ||
459 | |||
460 | srch_inf->entries_in_buffer = 0; | ||
461 | srch_inf->index_of_last_entry = 0; | ||
462 | fid->persistent_fid = persistent_fid; | ||
463 | fid->volatile_fid = volatile_fid; | ||
464 | |||
465 | rc = SMB2_query_directory(xid, tcon, persistent_fid, volatile_fid, 0, | ||
466 | srch_inf); | ||
467 | if (rc) { | ||
468 | cERROR(1, "query directory failed"); | ||
469 | SMB2_close(xid, tcon, persistent_fid, volatile_fid); | ||
470 | } | ||
471 | return rc; | ||
472 | } | ||
473 | |||
474 | static int | ||
475 | smb2_query_dir_next(const unsigned int xid, struct cifs_tcon *tcon, | ||
476 | struct cifs_fid *fid, __u16 search_flags, | ||
477 | struct cifs_search_info *srch_inf) | ||
478 | { | ||
479 | return SMB2_query_directory(xid, tcon, fid->persistent_fid, | ||
480 | fid->volatile_fid, 0, srch_inf); | ||
481 | } | ||
482 | |||
483 | static int | ||
484 | smb2_close_dir(const unsigned int xid, struct cifs_tcon *tcon, | ||
485 | struct cifs_fid *fid) | ||
486 | { | ||
487 | return SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid); | ||
488 | } | ||
489 | |||
490 | /* | ||
491 | * If we negotiate SMB2 protocol and get STATUS_PENDING - update | ||
492 | * the number of credits and return true. Otherwise - return false. | ||
493 | */ | ||
494 | static bool | ||
495 | smb2_is_status_pending(char *buf, struct TCP_Server_Info *server, int length) | ||
496 | { | ||
497 | struct smb2_hdr *hdr = (struct smb2_hdr *)buf; | ||
498 | |||
499 | if (hdr->Status != STATUS_PENDING) | ||
500 | return false; | ||
501 | |||
502 | if (!length) { | ||
503 | spin_lock(&server->req_lock); | ||
504 | server->credits += le16_to_cpu(hdr->CreditRequest); | ||
505 | spin_unlock(&server->req_lock); | ||
506 | wake_up(&server->request_q); | ||
507 | } | ||
508 | |||
509 | return true; | ||
510 | } | ||
511 | |||
512 | static int | ||
513 | smb2_oplock_response(struct cifs_tcon *tcon, struct cifs_fid *fid, | ||
514 | struct cifsInodeInfo *cinode) | ||
515 | { | ||
516 | if (tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LEASING) | ||
517 | return SMB2_lease_break(0, tcon, cinode->lease_key, | ||
518 | smb2_get_lease_state(cinode)); | ||
519 | |||
520 | return SMB2_oplock_break(0, tcon, fid->persistent_fid, | ||
521 | fid->volatile_fid, | ||
522 | cinode->clientCanCacheRead ? 1 : 0); | ||
523 | } | ||
524 | |||
525 | static int | ||
526 | smb2_queryfs(const unsigned int xid, struct cifs_tcon *tcon, | ||
527 | struct kstatfs *buf) | ||
528 | { | ||
529 | int rc; | ||
530 | u64 persistent_fid, volatile_fid; | ||
531 | __le16 srch_path = 0; /* Null - open root of share */ | ||
532 | u8 oplock = SMB2_OPLOCK_LEVEL_NONE; | ||
533 | |||
534 | rc = SMB2_open(xid, tcon, &srch_path, &persistent_fid, &volatile_fid, | ||
535 | FILE_READ_ATTRIBUTES, FILE_OPEN, 0, 0, &oplock, NULL); | ||
536 | if (rc) | ||
537 | return rc; | ||
538 | buf->f_type = SMB2_MAGIC_NUMBER; | ||
539 | rc = SMB2_QFS_info(xid, tcon, persistent_fid, volatile_fid, buf); | ||
540 | SMB2_close(xid, tcon, persistent_fid, volatile_fid); | ||
541 | return rc; | ||
542 | } | ||
543 | |||
544 | static bool | ||
545 | smb2_compare_fids(struct cifsFileInfo *ob1, struct cifsFileInfo *ob2) | ||
546 | { | ||
547 | return ob1->fid.persistent_fid == ob2->fid.persistent_fid && | ||
548 | ob1->fid.volatile_fid == ob2->fid.volatile_fid; | ||
549 | } | ||
550 | |||
551 | static int | ||
552 | smb2_mand_lock(const unsigned int xid, struct cifsFileInfo *cfile, __u64 offset, | ||
553 | __u64 length, __u32 type, int lock, int unlock, bool wait) | ||
554 | { | ||
555 | if (unlock && !lock) | ||
556 | type = SMB2_LOCKFLAG_UNLOCK; | ||
557 | return SMB2_lock(xid, tlink_tcon(cfile->tlink), | ||
558 | cfile->fid.persistent_fid, cfile->fid.volatile_fid, | ||
559 | current->tgid, length, offset, type, wait); | ||
560 | } | ||
561 | |||
562 | static void | ||
563 | smb2_get_lease_key(struct inode *inode, struct cifs_fid *fid) | ||
564 | { | ||
565 | memcpy(fid->lease_key, CIFS_I(inode)->lease_key, SMB2_LEASE_KEY_SIZE); | ||
566 | } | ||
567 | |||
568 | static void | ||
569 | smb2_set_lease_key(struct inode *inode, struct cifs_fid *fid) | ||
570 | { | ||
571 | memcpy(CIFS_I(inode)->lease_key, fid->lease_key, SMB2_LEASE_KEY_SIZE); | ||
572 | } | ||
573 | |||
574 | static void | ||
575 | smb2_new_lease_key(struct cifs_fid *fid) | ||
576 | { | ||
577 | get_random_bytes(fid->lease_key, SMB2_LEASE_KEY_SIZE); | ||
578 | } | ||
579 | |||
295 | struct smb_version_operations smb21_operations = { | 580 | struct smb_version_operations smb21_operations = { |
581 | .compare_fids = smb2_compare_fids, | ||
296 | .setup_request = smb2_setup_request, | 582 | .setup_request = smb2_setup_request, |
297 | .setup_async_request = smb2_setup_async_request, | 583 | .setup_async_request = smb2_setup_async_request, |
298 | .check_receive = smb2_check_receive, | 584 | .check_receive = smb2_check_receive, |
@@ -301,13 +587,19 @@ struct smb_version_operations smb21_operations = { | |||
301 | .get_credits_field = smb2_get_credits_field, | 587 | .get_credits_field = smb2_get_credits_field, |
302 | .get_credits = smb2_get_credits, | 588 | .get_credits = smb2_get_credits, |
303 | .get_next_mid = smb2_get_next_mid, | 589 | .get_next_mid = smb2_get_next_mid, |
590 | .read_data_offset = smb2_read_data_offset, | ||
591 | .read_data_length = smb2_read_data_length, | ||
592 | .map_error = map_smb2_to_linux_error, | ||
304 | .find_mid = smb2_find_mid, | 593 | .find_mid = smb2_find_mid, |
305 | .check_message = smb2_check_message, | 594 | .check_message = smb2_check_message, |
306 | .dump_detail = smb2_dump_detail, | 595 | .dump_detail = smb2_dump_detail, |
307 | .clear_stats = smb2_clear_stats, | 596 | .clear_stats = smb2_clear_stats, |
308 | .print_stats = smb2_print_stats, | 597 | .print_stats = smb2_print_stats, |
598 | .is_oplock_break = smb2_is_valid_oplock_break, | ||
309 | .need_neg = smb2_need_neg, | 599 | .need_neg = smb2_need_neg, |
310 | .negotiate = smb2_negotiate, | 600 | .negotiate = smb2_negotiate, |
601 | .negotiate_wsize = smb2_negotiate_wsize, | ||
602 | .negotiate_rsize = smb2_negotiate_rsize, | ||
311 | .sess_setup = SMB2_sess_setup, | 603 | .sess_setup = SMB2_sess_setup, |
312 | .logoff = SMB2_logoff, | 604 | .logoff = SMB2_logoff, |
313 | .tree_connect = SMB2_tcon, | 605 | .tree_connect = SMB2_tcon, |
@@ -317,16 +609,68 @@ struct smb_version_operations smb21_operations = { | |||
317 | .echo = SMB2_echo, | 609 | .echo = SMB2_echo, |
318 | .query_path_info = smb2_query_path_info, | 610 | .query_path_info = smb2_query_path_info, |
319 | .get_srv_inum = smb2_get_srv_inum, | 611 | .get_srv_inum = smb2_get_srv_inum, |
612 | .query_file_info = smb2_query_file_info, | ||
613 | .set_path_size = smb2_set_path_size, | ||
614 | .set_file_size = smb2_set_file_size, | ||
615 | .set_file_info = smb2_set_file_info, | ||
320 | .build_path_to_root = smb2_build_path_to_root, | 616 | .build_path_to_root = smb2_build_path_to_root, |
321 | .mkdir = smb2_mkdir, | 617 | .mkdir = smb2_mkdir, |
322 | .mkdir_setinfo = smb2_mkdir_setinfo, | 618 | .mkdir_setinfo = smb2_mkdir_setinfo, |
323 | .rmdir = smb2_rmdir, | 619 | .rmdir = smb2_rmdir, |
620 | .unlink = smb2_unlink, | ||
621 | .rename = smb2_rename_path, | ||
622 | .create_hardlink = smb2_create_hardlink, | ||
623 | .open = smb2_open_file, | ||
624 | .set_fid = smb2_set_fid, | ||
625 | .close = smb2_close_file, | ||
626 | .flush = smb2_flush_file, | ||
627 | .async_readv = smb2_async_readv, | ||
628 | .async_writev = smb2_async_writev, | ||
629 | .sync_read = smb2_sync_read, | ||
630 | .sync_write = smb2_sync_write, | ||
631 | .query_dir_first = smb2_query_dir_first, | ||
632 | .query_dir_next = smb2_query_dir_next, | ||
633 | .close_dir = smb2_close_dir, | ||
634 | .calc_smb_size = smb2_calc_size, | ||
635 | .is_status_pending = smb2_is_status_pending, | ||
636 | .oplock_response = smb2_oplock_response, | ||
637 | .queryfs = smb2_queryfs, | ||
638 | .mand_lock = smb2_mand_lock, | ||
639 | .mand_unlock_range = smb2_unlock_range, | ||
640 | .push_mand_locks = smb2_push_mandatory_locks, | ||
641 | .get_lease_key = smb2_get_lease_key, | ||
642 | .set_lease_key = smb2_set_lease_key, | ||
643 | .new_lease_key = smb2_new_lease_key, | ||
324 | }; | 644 | }; |
325 | 645 | ||
326 | struct smb_version_values smb21_values = { | 646 | struct smb_version_values smb21_values = { |
327 | .version_string = SMB21_VERSION_STRING, | 647 | .version_string = SMB21_VERSION_STRING, |
648 | .protocol_id = SMB21_PROT_ID, | ||
649 | .req_capabilities = 0, /* MBZ on negotiate req until SMB3 dialect */ | ||
650 | .large_lock_type = 0, | ||
651 | .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK, | ||
652 | .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK, | ||
653 | .unlock_lock_type = SMB2_LOCKFLAG_UNLOCK, | ||
654 | .header_size = sizeof(struct smb2_hdr), | ||
655 | .max_header_size = MAX_SMB2_HDR_SIZE, | ||
656 | .read_rsp_size = sizeof(struct smb2_read_rsp) - 1, | ||
657 | .lock_cmd = SMB2_LOCK, | ||
658 | .cap_unix = 0, | ||
659 | .cap_nt_find = SMB2_NT_FIND, | ||
660 | .cap_large_files = SMB2_LARGE_FILES, | ||
661 | }; | ||
662 | |||
663 | struct smb_version_values smb30_values = { | ||
664 | .version_string = SMB30_VERSION_STRING, | ||
665 | .protocol_id = SMB30_PROT_ID, | ||
666 | .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU, | ||
667 | .large_lock_type = 0, | ||
668 | .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK, | ||
669 | .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK, | ||
670 | .unlock_lock_type = SMB2_LOCKFLAG_UNLOCK, | ||
328 | .header_size = sizeof(struct smb2_hdr), | 671 | .header_size = sizeof(struct smb2_hdr), |
329 | .max_header_size = MAX_SMB2_HDR_SIZE, | 672 | .max_header_size = MAX_SMB2_HDR_SIZE, |
673 | .read_rsp_size = sizeof(struct smb2_read_rsp) - 1, | ||
330 | .lock_cmd = SMB2_LOCK, | 674 | .lock_cmd = SMB2_LOCK, |
331 | .cap_unix = 0, | 675 | .cap_unix = 0, |
332 | .cap_nt_find = SMB2_NT_FIND, | 676 | .cap_nt_find = SMB2_NT_FIND, |
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 62b3f17d0613..cf33622cdac8 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * fs/cifs/smb2pdu.c | 2 | * fs/cifs/smb2pdu.c |
3 | * | 3 | * |
4 | * Copyright (C) International Business Machines Corp., 2009, 2011 | 4 | * Copyright (C) International Business Machines Corp., 2009, 2012 |
5 | * Etersoft, 2012 | 5 | * Etersoft, 2012 |
6 | * Author(s): Steve French (sfrench@us.ibm.com) | 6 | * Author(s): Steve French (sfrench@us.ibm.com) |
7 | * Pavel Shilovsky (pshilovsky@samba.org) 2012 | 7 | * Pavel Shilovsky (pshilovsky@samba.org) 2012 |
@@ -31,7 +31,9 @@ | |||
31 | #include <linux/fs.h> | 31 | #include <linux/fs.h> |
32 | #include <linux/kernel.h> | 32 | #include <linux/kernel.h> |
33 | #include <linux/vfs.h> | 33 | #include <linux/vfs.h> |
34 | #include <linux/task_io_accounting_ops.h> | ||
34 | #include <linux/uaccess.h> | 35 | #include <linux/uaccess.h> |
36 | #include <linux/pagemap.h> | ||
35 | #include <linux/xattr.h> | 37 | #include <linux/xattr.h> |
36 | #include "smb2pdu.h" | 38 | #include "smb2pdu.h" |
37 | #include "cifsglob.h" | 39 | #include "cifsglob.h" |
@@ -42,6 +44,8 @@ | |||
42 | #include "cifs_debug.h" | 44 | #include "cifs_debug.h" |
43 | #include "ntlmssp.h" | 45 | #include "ntlmssp.h" |
44 | #include "smb2status.h" | 46 | #include "smb2status.h" |
47 | #include "smb2glob.h" | ||
48 | #include "cifspdu.h" | ||
45 | 49 | ||
46 | /* | 50 | /* |
47 | * The following table defines the expected "StructureSize" of SMB2 requests | 51 | * The following table defines the expected "StructureSize" of SMB2 requests |
@@ -115,9 +119,9 @@ smb2_hdr_assemble(struct smb2_hdr *hdr, __le16 smb2_cmd /* command */ , | |||
115 | /* BB how does SMB2 do case sensitive? */ | 119 | /* BB how does SMB2 do case sensitive? */ |
116 | /* if (tcon->nocase) | 120 | /* if (tcon->nocase) |
117 | hdr->Flags |= SMBFLG_CASELESS; */ | 121 | hdr->Flags |= SMBFLG_CASELESS; */ |
118 | /* if (tcon->ses && tcon->ses->server && | 122 | if (tcon->ses && tcon->ses->server && |
119 | (tcon->ses->server->sec_mode & SECMODE_SIGN_REQUIRED)) | 123 | (tcon->ses->server->sec_mode & SECMODE_SIGN_REQUIRED)) |
120 | hdr->Flags |= SMB2_FLAGS_SIGNED; */ | 124 | hdr->Flags |= SMB2_FLAGS_SIGNED; |
121 | out: | 125 | out: |
122 | pdu->StructureSize2 = cpu_to_le16(parmsize); | 126 | pdu->StructureSize2 = cpu_to_le16(parmsize); |
123 | return; | 127 | return; |
@@ -300,24 +304,6 @@ free_rsp_buf(int resp_buftype, void *rsp) | |||
300 | cifs_buf_release(rsp); | 304 | cifs_buf_release(rsp); |
301 | } | 305 | } |
302 | 306 | ||
303 | #define SMB2_NUM_PROT 1 | ||
304 | |||
305 | #define SMB2_PROT 0 | ||
306 | #define SMB21_PROT 1 | ||
307 | #define BAD_PROT 0xFFFF | ||
308 | |||
309 | #define SMB2_PROT_ID 0x0202 | ||
310 | #define SMB21_PROT_ID 0x0210 | ||
311 | #define BAD_PROT_ID 0xFFFF | ||
312 | |||
313 | static struct { | ||
314 | int index; | ||
315 | __le16 name; | ||
316 | } smb2protocols[] = { | ||
317 | {SMB2_PROT, cpu_to_le16(SMB2_PROT_ID)}, | ||
318 | {SMB21_PROT, cpu_to_le16(SMB21_PROT_ID)}, | ||
319 | {BAD_PROT, cpu_to_le16(BAD_PROT_ID)} | ||
320 | }; | ||
321 | 307 | ||
322 | /* | 308 | /* |
323 | * | 309 | * |
@@ -344,7 +330,6 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) | |||
344 | int resp_buftype; | 330 | int resp_buftype; |
345 | struct TCP_Server_Info *server; | 331 | struct TCP_Server_Info *server; |
346 | unsigned int sec_flags; | 332 | unsigned int sec_flags; |
347 | u16 i; | ||
348 | u16 temp = 0; | 333 | u16 temp = 0; |
349 | int blob_offset, blob_length; | 334 | int blob_offset, blob_length; |
350 | char *security_blob; | 335 | char *security_blob; |
@@ -373,11 +358,10 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) | |||
373 | 358 | ||
374 | req->hdr.SessionId = 0; | 359 | req->hdr.SessionId = 0; |
375 | 360 | ||
376 | for (i = 0; i < SMB2_NUM_PROT; i++) | 361 | req->Dialects[0] = cpu_to_le16(ses->server->vals->protocol_id); |
377 | req->Dialects[i] = smb2protocols[i].name; | ||
378 | 362 | ||
379 | req->DialectCount = cpu_to_le16(i); | 363 | req->DialectCount = cpu_to_le16(1); /* One vers= at a time for now */ |
380 | inc_rfc1001_len(req, i * 2); | 364 | inc_rfc1001_len(req, 2); |
381 | 365 | ||
382 | /* only one of SMB2 signing flags may be set in SMB2 request */ | 366 | /* only one of SMB2 signing flags may be set in SMB2 request */ |
383 | if ((sec_flags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN) | 367 | if ((sec_flags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN) |
@@ -387,7 +371,9 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) | |||
387 | 371 | ||
388 | req->SecurityMode = cpu_to_le16(temp); | 372 | req->SecurityMode = cpu_to_le16(temp); |
389 | 373 | ||
390 | req->Capabilities = cpu_to_le32(SMB2_GLOBAL_CAP_DFS); | 374 | req->Capabilities = cpu_to_le32(ses->server->vals->req_capabilities); |
375 | |||
376 | memcpy(req->ClientGUID, cifs_client_guid, SMB2_CLIENT_GUID_SIZE); | ||
391 | 377 | ||
392 | iov[0].iov_base = (char *)req; | 378 | iov[0].iov_base = (char *)req; |
393 | /* 4 for rfc1002 length field */ | 379 | /* 4 for rfc1002 length field */ |
@@ -403,17 +389,16 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) | |||
403 | if (rc != 0) | 389 | if (rc != 0) |
404 | goto neg_exit; | 390 | goto neg_exit; |
405 | 391 | ||
406 | if (rsp == NULL) { | ||
407 | rc = -EIO; | ||
408 | goto neg_exit; | ||
409 | } | ||
410 | |||
411 | cFYI(1, "mode 0x%x", rsp->SecurityMode); | 392 | cFYI(1, "mode 0x%x", rsp->SecurityMode); |
412 | 393 | ||
413 | if (rsp->DialectRevision == smb2protocols[SMB21_PROT].name) | 394 | /* BB we may eventually want to match the negotiated vs. requested |
395 | dialect, even though we are only requesting one at a time */ | ||
396 | if (rsp->DialectRevision == cpu_to_le16(SMB20_PROT_ID)) | ||
397 | cFYI(1, "negotiated smb2.0 dialect"); | ||
398 | else if (rsp->DialectRevision == cpu_to_le16(SMB21_PROT_ID)) | ||
414 | cFYI(1, "negotiated smb2.1 dialect"); | 399 | cFYI(1, "negotiated smb2.1 dialect"); |
415 | else if (rsp->DialectRevision == smb2protocols[SMB2_PROT].name) | 400 | else if (rsp->DialectRevision == cpu_to_le16(SMB30_PROT_ID)) |
416 | cFYI(1, "negotiated smb2 dialect"); | 401 | cFYI(1, "negotiated smb3.0 dialect"); |
417 | else { | 402 | else { |
418 | cERROR(1, "Illegal dialect returned by server %d", | 403 | cERROR(1, "Illegal dialect returned by server %d", |
419 | le16_to_cpu(rsp->DialectRevision)); | 404 | le16_to_cpu(rsp->DialectRevision)); |
@@ -438,6 +423,38 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) | |||
438 | rc = -EIO; | 423 | rc = -EIO; |
439 | goto neg_exit; | 424 | goto neg_exit; |
440 | } | 425 | } |
426 | |||
427 | cFYI(1, "sec_flags 0x%x", sec_flags); | ||
428 | if (sec_flags & CIFSSEC_MUST_SIGN) { | ||
429 | cFYI(1, "Signing required"); | ||
430 | if (!(server->sec_mode & (SMB2_NEGOTIATE_SIGNING_REQUIRED | | ||
431 | SMB2_NEGOTIATE_SIGNING_ENABLED))) { | ||
432 | cERROR(1, "signing required but server lacks support"); | ||
433 | rc = -EOPNOTSUPP; | ||
434 | goto neg_exit; | ||
435 | } | ||
436 | server->sec_mode |= SECMODE_SIGN_REQUIRED; | ||
437 | } else if (sec_flags & CIFSSEC_MAY_SIGN) { | ||
438 | cFYI(1, "Signing optional"); | ||
439 | if (server->sec_mode & SMB2_NEGOTIATE_SIGNING_REQUIRED) { | ||
440 | cFYI(1, "Server requires signing"); | ||
441 | server->sec_mode |= SECMODE_SIGN_REQUIRED; | ||
442 | } else { | ||
443 | server->sec_mode &= | ||
444 | ~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED); | ||
445 | } | ||
446 | } else { | ||
447 | cFYI(1, "Signing disabled"); | ||
448 | if (server->sec_mode & SMB2_NEGOTIATE_SIGNING_REQUIRED) { | ||
449 | cERROR(1, "Server requires packet signing to be enabled" | ||
450 | " in /proc/fs/cifs/SecurityFlags."); | ||
451 | rc = -EOPNOTSUPP; | ||
452 | goto neg_exit; | ||
453 | } | ||
454 | server->sec_mode &= | ||
455 | ~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED); | ||
456 | } | ||
457 | |||
441 | #ifdef CONFIG_SMB2_ASN1 /* BB REMOVEME when updated asn1.c ready */ | 458 | #ifdef CONFIG_SMB2_ASN1 /* BB REMOVEME when updated asn1.c ready */ |
442 | rc = decode_neg_token_init(security_blob, blob_length, | 459 | rc = decode_neg_token_init(security_blob, blob_length, |
443 | &server->sec_type); | 460 | &server->sec_type); |
@@ -599,13 +616,14 @@ ssetup_ntlmssp_authenticate: | |||
599 | 616 | ||
600 | kfree(security_blob); | 617 | kfree(security_blob); |
601 | rsp = (struct smb2_sess_setup_rsp *)iov[0].iov_base; | 618 | rsp = (struct smb2_sess_setup_rsp *)iov[0].iov_base; |
602 | if (rsp->hdr.Status == STATUS_MORE_PROCESSING_REQUIRED) { | 619 | if (resp_buftype != CIFS_NO_BUFFER && |
620 | rsp->hdr.Status == STATUS_MORE_PROCESSING_REQUIRED) { | ||
603 | if (phase != NtLmNegotiate) { | 621 | if (phase != NtLmNegotiate) { |
604 | cERROR(1, "Unexpected more processing error"); | 622 | cERROR(1, "Unexpected more processing error"); |
605 | goto ssetup_exit; | 623 | goto ssetup_exit; |
606 | } | 624 | } |
607 | if (offsetof(struct smb2_sess_setup_rsp, Buffer) - 4 != | 625 | if (offsetof(struct smb2_sess_setup_rsp, Buffer) - 4 != |
608 | le16_to_cpu(rsp->SecurityBufferOffset)) { | 626 | le16_to_cpu(rsp->SecurityBufferOffset)) { |
609 | cERROR(1, "Invalid security buffer offset %d", | 627 | cERROR(1, "Invalid security buffer offset %d", |
610 | le16_to_cpu(rsp->SecurityBufferOffset)); | 628 | le16_to_cpu(rsp->SecurityBufferOffset)); |
611 | rc = -EIO; | 629 | rc = -EIO; |
@@ -631,11 +649,6 @@ ssetup_ntlmssp_authenticate: | |||
631 | if (rc != 0) | 649 | if (rc != 0) |
632 | goto ssetup_exit; | 650 | goto ssetup_exit; |
633 | 651 | ||
634 | if (rsp == NULL) { | ||
635 | rc = -EIO; | ||
636 | goto ssetup_exit; | ||
637 | } | ||
638 | |||
639 | ses->session_flags = le16_to_cpu(rsp->SessionFlags); | 652 | ses->session_flags = le16_to_cpu(rsp->SessionFlags); |
640 | ssetup_exit: | 653 | ssetup_exit: |
641 | free_rsp_buf(resp_buftype, rsp); | 654 | free_rsp_buf(resp_buftype, rsp); |
@@ -666,6 +679,8 @@ SMB2_logoff(const unsigned int xid, struct cifs_ses *ses) | |||
666 | 679 | ||
667 | /* since no tcon, smb2_init can not do this, so do here */ | 680 | /* since no tcon, smb2_init can not do this, so do here */ |
668 | req->hdr.SessionId = ses->Suid; | 681 | req->hdr.SessionId = ses->Suid; |
682 | if (server->sec_mode & SECMODE_SIGN_REQUIRED) | ||
683 | req->hdr.Flags |= SMB2_FLAGS_SIGNED; | ||
669 | 684 | ||
670 | rc = SendReceiveNoRsp(xid, ses, (char *) &req->hdr, 0); | 685 | rc = SendReceiveNoRsp(xid, ses, (char *) &req->hdr, 0); |
671 | /* | 686 | /* |
@@ -753,11 +768,6 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, | |||
753 | goto tcon_error_exit; | 768 | goto tcon_error_exit; |
754 | } | 769 | } |
755 | 770 | ||
756 | if (rsp == NULL) { | ||
757 | rc = -EIO; | ||
758 | goto tcon_exit; | ||
759 | } | ||
760 | |||
761 | if (tcon == NULL) { | 771 | if (tcon == NULL) { |
762 | ses->ipc_tid = rsp->hdr.TreeId; | 772 | ses->ipc_tid = rsp->hdr.TreeId; |
763 | goto tcon_exit; | 773 | goto tcon_exit; |
@@ -830,18 +840,87 @@ SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon) | |||
830 | return rc; | 840 | return rc; |
831 | } | 841 | } |
832 | 842 | ||
843 | static struct create_lease * | ||
844 | create_lease_buf(u8 *lease_key, u8 oplock) | ||
845 | { | ||
846 | struct create_lease *buf; | ||
847 | |||
848 | buf = kmalloc(sizeof(struct create_lease), GFP_KERNEL); | ||
849 | if (!buf) | ||
850 | return NULL; | ||
851 | |||
852 | memset(buf, 0, sizeof(struct create_lease)); | ||
853 | |||
854 | buf->lcontext.LeaseKeyLow = cpu_to_le64(*((u64 *)lease_key)); | ||
855 | buf->lcontext.LeaseKeyHigh = cpu_to_le64(*((u64 *)(lease_key + 8))); | ||
856 | if (oplock == SMB2_OPLOCK_LEVEL_EXCLUSIVE) | ||
857 | buf->lcontext.LeaseState = SMB2_LEASE_WRITE_CACHING | | ||
858 | SMB2_LEASE_READ_CACHING; | ||
859 | else if (oplock == SMB2_OPLOCK_LEVEL_II) | ||
860 | buf->lcontext.LeaseState = SMB2_LEASE_READ_CACHING; | ||
861 | else if (oplock == SMB2_OPLOCK_LEVEL_BATCH) | ||
862 | buf->lcontext.LeaseState = SMB2_LEASE_HANDLE_CACHING | | ||
863 | SMB2_LEASE_READ_CACHING | | ||
864 | SMB2_LEASE_WRITE_CACHING; | ||
865 | |||
866 | buf->ccontext.DataOffset = cpu_to_le16(offsetof | ||
867 | (struct create_lease, lcontext)); | ||
868 | buf->ccontext.DataLength = cpu_to_le32(sizeof(struct lease_context)); | ||
869 | buf->ccontext.NameOffset = cpu_to_le16(offsetof | ||
870 | (struct create_lease, Name)); | ||
871 | buf->ccontext.NameLength = cpu_to_le16(4); | ||
872 | buf->Name[0] = 'R'; | ||
873 | buf->Name[1] = 'q'; | ||
874 | buf->Name[2] = 'L'; | ||
875 | buf->Name[3] = 's'; | ||
876 | return buf; | ||
877 | } | ||
878 | |||
879 | static __u8 | ||
880 | parse_lease_state(struct smb2_create_rsp *rsp) | ||
881 | { | ||
882 | char *data_offset; | ||
883 | struct create_lease *lc; | ||
884 | bool found = false; | ||
885 | |||
886 | data_offset = (char *)rsp; | ||
887 | data_offset += 4 + le32_to_cpu(rsp->CreateContextsOffset); | ||
888 | lc = (struct create_lease *)data_offset; | ||
889 | do { | ||
890 | char *name = le16_to_cpu(lc->ccontext.NameOffset) + (char *)lc; | ||
891 | if (le16_to_cpu(lc->ccontext.NameLength) != 4 || | ||
892 | strncmp(name, "RqLs", 4)) { | ||
893 | lc = (struct create_lease *)((char *)lc | ||
894 | + le32_to_cpu(lc->ccontext.Next)); | ||
895 | continue; | ||
896 | } | ||
897 | if (lc->lcontext.LeaseFlags & SMB2_LEASE_FLAG_BREAK_IN_PROGRESS) | ||
898 | return SMB2_OPLOCK_LEVEL_NOCHANGE; | ||
899 | found = true; | ||
900 | break; | ||
901 | } while (le32_to_cpu(lc->ccontext.Next) != 0); | ||
902 | |||
903 | if (!found) | ||
904 | return 0; | ||
905 | |||
906 | return smb2_map_lease_to_oplock(lc->lcontext.LeaseState); | ||
907 | } | ||
908 | |||
833 | int | 909 | int |
834 | SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path, | 910 | SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path, |
835 | u64 *persistent_fid, u64 *volatile_fid, __u32 desired_access, | 911 | u64 *persistent_fid, u64 *volatile_fid, __u32 desired_access, |
836 | __u32 create_disposition, __u32 file_attributes, __u32 create_options) | 912 | __u32 create_disposition, __u32 file_attributes, __u32 create_options, |
913 | __u8 *oplock, struct smb2_file_all_info *buf) | ||
837 | { | 914 | { |
838 | struct smb2_create_req *req; | 915 | struct smb2_create_req *req; |
839 | struct smb2_create_rsp *rsp; | 916 | struct smb2_create_rsp *rsp; |
840 | struct TCP_Server_Info *server; | 917 | struct TCP_Server_Info *server; |
841 | struct cifs_ses *ses = tcon->ses; | 918 | struct cifs_ses *ses = tcon->ses; |
842 | struct kvec iov[2]; | 919 | struct kvec iov[3]; |
843 | int resp_buftype; | 920 | int resp_buftype; |
844 | int uni_path_len; | 921 | int uni_path_len; |
922 | __le16 *copy_path = NULL; | ||
923 | int copy_size; | ||
845 | int rc = 0; | 924 | int rc = 0; |
846 | int num_iovecs = 2; | 925 | int num_iovecs = 2; |
847 | 926 | ||
@@ -856,10 +935,6 @@ SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path, | |||
856 | if (rc) | 935 | if (rc) |
857 | return rc; | 936 | return rc; |
858 | 937 | ||
859 | if (enable_oplocks) | ||
860 | req->RequestedOplockLevel = SMB2_OPLOCK_LEVEL_BATCH; | ||
861 | else | ||
862 | req->RequestedOplockLevel = SMB2_OPLOCK_LEVEL_NONE; | ||
863 | req->ImpersonationLevel = IL_IMPERSONATION; | 938 | req->ImpersonationLevel = IL_IMPERSONATION; |
864 | req->DesiredAccess = cpu_to_le32(desired_access); | 939 | req->DesiredAccess = cpu_to_le32(desired_access); |
865 | /* File attributes ignored on open (used in create though) */ | 940 | /* File attributes ignored on open (used in create though) */ |
@@ -869,7 +944,7 @@ SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path, | |||
869 | req->CreateOptions = cpu_to_le32(create_options); | 944 | req->CreateOptions = cpu_to_le32(create_options); |
870 | uni_path_len = (2 * UniStrnlen((wchar_t *)path, PATH_MAX)) + 2; | 945 | uni_path_len = (2 * UniStrnlen((wchar_t *)path, PATH_MAX)) + 2; |
871 | req->NameOffset = cpu_to_le16(sizeof(struct smb2_create_req) | 946 | req->NameOffset = cpu_to_le16(sizeof(struct smb2_create_req) |
872 | - 1 /* pad */ - 4 /* do not count rfc1001 len field */); | 947 | - 8 /* pad */ - 4 /* do not count rfc1001 len field */); |
873 | 948 | ||
874 | iov[0].iov_base = (char *)req; | 949 | iov[0].iov_base = (char *)req; |
875 | /* 4 for rfc1002 length field */ | 950 | /* 4 for rfc1002 length field */ |
@@ -880,6 +955,20 @@ SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path, | |||
880 | req->NameLength = cpu_to_le16(uni_path_len - 2); | 955 | req->NameLength = cpu_to_le16(uni_path_len - 2); |
881 | /* -1 since last byte is buf[0] which is sent below (path) */ | 956 | /* -1 since last byte is buf[0] which is sent below (path) */ |
882 | iov[0].iov_len--; | 957 | iov[0].iov_len--; |
958 | if (uni_path_len % 8 != 0) { | ||
959 | copy_size = uni_path_len / 8 * 8; | ||
960 | if (copy_size < uni_path_len) | ||
961 | copy_size += 8; | ||
962 | |||
963 | copy_path = kzalloc(copy_size, GFP_KERNEL); | ||
964 | if (!copy_path) | ||
965 | return -ENOMEM; | ||
966 | memcpy((char *)copy_path, (const char *)path, | ||
967 | uni_path_len); | ||
968 | uni_path_len = copy_size; | ||
969 | path = copy_path; | ||
970 | } | ||
971 | |||
883 | iov[1].iov_len = uni_path_len; | 972 | iov[1].iov_len = uni_path_len; |
884 | iov[1].iov_base = path; | 973 | iov[1].iov_base = path; |
885 | /* | 974 | /* |
@@ -888,10 +977,37 @@ SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path, | |||
888 | */ | 977 | */ |
889 | inc_rfc1001_len(req, uni_path_len - 1); | 978 | inc_rfc1001_len(req, uni_path_len - 1); |
890 | } else { | 979 | } else { |
980 | iov[0].iov_len += 7; | ||
981 | req->hdr.smb2_buf_length = cpu_to_be32(be32_to_cpu( | ||
982 | req->hdr.smb2_buf_length) + 8 - 1); | ||
891 | num_iovecs = 1; | 983 | num_iovecs = 1; |
892 | req->NameLength = 0; | 984 | req->NameLength = 0; |
893 | } | 985 | } |
894 | 986 | ||
987 | if (!server->oplocks) | ||
988 | *oplock = SMB2_OPLOCK_LEVEL_NONE; | ||
989 | |||
990 | if (!(tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LEASING) || | ||
991 | *oplock == SMB2_OPLOCK_LEVEL_NONE) | ||
992 | req->RequestedOplockLevel = *oplock; | ||
993 | else { | ||
994 | iov[num_iovecs].iov_base = create_lease_buf(oplock+1, *oplock); | ||
995 | if (iov[num_iovecs].iov_base == NULL) { | ||
996 | cifs_small_buf_release(req); | ||
997 | kfree(copy_path); | ||
998 | return -ENOMEM; | ||
999 | } | ||
1000 | iov[num_iovecs].iov_len = sizeof(struct create_lease); | ||
1001 | req->RequestedOplockLevel = SMB2_OPLOCK_LEVEL_LEASE; | ||
1002 | req->CreateContextsOffset = cpu_to_le32( | ||
1003 | sizeof(struct smb2_create_req) - 4 - 8 + | ||
1004 | iov[num_iovecs-1].iov_len); | ||
1005 | req->CreateContextsLength = cpu_to_le32( | ||
1006 | sizeof(struct create_lease)); | ||
1007 | inc_rfc1001_len(&req->hdr, sizeof(struct create_lease)); | ||
1008 | num_iovecs++; | ||
1009 | } | ||
1010 | |||
895 | rc = SendReceive2(xid, ses, iov, num_iovecs, &resp_buftype, 0); | 1011 | rc = SendReceive2(xid, ses, iov, num_iovecs, &resp_buftype, 0); |
896 | rsp = (struct smb2_create_rsp *)iov[0].iov_base; | 1012 | rsp = (struct smb2_create_rsp *)iov[0].iov_base; |
897 | 1013 | ||
@@ -900,13 +1016,24 @@ SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path, | |||
900 | goto creat_exit; | 1016 | goto creat_exit; |
901 | } | 1017 | } |
902 | 1018 | ||
903 | if (rsp == NULL) { | ||
904 | rc = -EIO; | ||
905 | goto creat_exit; | ||
906 | } | ||
907 | *persistent_fid = rsp->PersistentFileId; | 1019 | *persistent_fid = rsp->PersistentFileId; |
908 | *volatile_fid = rsp->VolatileFileId; | 1020 | *volatile_fid = rsp->VolatileFileId; |
1021 | |||
1022 | if (buf) { | ||
1023 | memcpy(buf, &rsp->CreationTime, 32); | ||
1024 | buf->AllocationSize = rsp->AllocationSize; | ||
1025 | buf->EndOfFile = rsp->EndofFile; | ||
1026 | buf->Attributes = rsp->FileAttributes; | ||
1027 | buf->NumberOfLinks = cpu_to_le32(1); | ||
1028 | buf->DeletePending = 0; | ||
1029 | } | ||
1030 | |||
1031 | if (rsp->OplockLevel == SMB2_OPLOCK_LEVEL_LEASE) | ||
1032 | *oplock = parse_lease_state(rsp); | ||
1033 | else | ||
1034 | *oplock = rsp->OplockLevel; | ||
909 | creat_exit: | 1035 | creat_exit: |
1036 | kfree(copy_path); | ||
910 | free_rsp_buf(resp_buftype, rsp); | 1037 | free_rsp_buf(resp_buftype, rsp); |
911 | return rc; | 1038 | return rc; |
912 | } | 1039 | } |
@@ -950,11 +1077,6 @@ SMB2_close(const unsigned int xid, struct cifs_tcon *tcon, | |||
950 | goto close_exit; | 1077 | goto close_exit; |
951 | } | 1078 | } |
952 | 1079 | ||
953 | if (rsp == NULL) { | ||
954 | rc = -EIO; | ||
955 | goto close_exit; | ||
956 | } | ||
957 | |||
958 | /* BB FIXME - decode close response, update inode for caching */ | 1080 | /* BB FIXME - decode close response, update inode for caching */ |
959 | 1081 | ||
960 | close_exit: | 1082 | close_exit: |
@@ -1019,10 +1141,10 @@ validate_and_copy_buf(unsigned int offset, unsigned int buffer_length, | |||
1019 | return 0; | 1141 | return 0; |
1020 | } | 1142 | } |
1021 | 1143 | ||
1022 | int | 1144 | static int |
1023 | SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon, | 1145 | query_info(const unsigned int xid, struct cifs_tcon *tcon, |
1024 | u64 persistent_fid, u64 volatile_fid, | 1146 | u64 persistent_fid, u64 volatile_fid, u8 info_class, |
1025 | struct smb2_file_all_info *data) | 1147 | size_t output_len, size_t min_len, void *data) |
1026 | { | 1148 | { |
1027 | struct smb2_query_info_req *req; | 1149 | struct smb2_query_info_req *req; |
1028 | struct smb2_query_info_rsp *rsp = NULL; | 1150 | struct smb2_query_info_rsp *rsp = NULL; |
@@ -1044,37 +1166,56 @@ SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon, | |||
1044 | return rc; | 1166 | return rc; |
1045 | 1167 | ||
1046 | req->InfoType = SMB2_O_INFO_FILE; | 1168 | req->InfoType = SMB2_O_INFO_FILE; |
1047 | req->FileInfoClass = FILE_ALL_INFORMATION; | 1169 | req->FileInfoClass = info_class; |
1048 | req->PersistentFileId = persistent_fid; | 1170 | req->PersistentFileId = persistent_fid; |
1049 | req->VolatileFileId = volatile_fid; | 1171 | req->VolatileFileId = volatile_fid; |
1050 | /* 4 for rfc1002 length field and 1 for Buffer */ | 1172 | /* 4 for rfc1002 length field and 1 for Buffer */ |
1051 | req->InputBufferOffset = | 1173 | req->InputBufferOffset = |
1052 | cpu_to_le16(sizeof(struct smb2_query_info_req) - 1 - 4); | 1174 | cpu_to_le16(sizeof(struct smb2_query_info_req) - 1 - 4); |
1053 | req->OutputBufferLength = | 1175 | req->OutputBufferLength = cpu_to_le32(output_len); |
1054 | cpu_to_le32(sizeof(struct smb2_file_all_info) + MAX_NAME * 2); | ||
1055 | 1176 | ||
1056 | iov[0].iov_base = (char *)req; | 1177 | iov[0].iov_base = (char *)req; |
1057 | /* 4 for rfc1002 length field */ | 1178 | /* 4 for rfc1002 length field */ |
1058 | iov[0].iov_len = get_rfc1002_length(req) + 4; | 1179 | iov[0].iov_len = get_rfc1002_length(req) + 4; |
1059 | 1180 | ||
1060 | rc = SendReceive2(xid, ses, iov, 1, &resp_buftype, 0); | 1181 | rc = SendReceive2(xid, ses, iov, 1, &resp_buftype, 0); |
1182 | rsp = (struct smb2_query_info_rsp *)iov[0].iov_base; | ||
1183 | |||
1061 | if (rc) { | 1184 | if (rc) { |
1062 | cifs_stats_fail_inc(tcon, SMB2_QUERY_INFO_HE); | 1185 | cifs_stats_fail_inc(tcon, SMB2_QUERY_INFO_HE); |
1063 | goto qinf_exit; | 1186 | goto qinf_exit; |
1064 | } | 1187 | } |
1065 | 1188 | ||
1066 | rsp = (struct smb2_query_info_rsp *)iov[0].iov_base; | ||
1067 | |||
1068 | rc = validate_and_copy_buf(le16_to_cpu(rsp->OutputBufferOffset), | 1189 | rc = validate_and_copy_buf(le16_to_cpu(rsp->OutputBufferOffset), |
1069 | le32_to_cpu(rsp->OutputBufferLength), | 1190 | le32_to_cpu(rsp->OutputBufferLength), |
1070 | &rsp->hdr, sizeof(struct smb2_file_all_info), | 1191 | &rsp->hdr, min_len, data); |
1071 | (char *)data); | ||
1072 | 1192 | ||
1073 | qinf_exit: | 1193 | qinf_exit: |
1074 | free_rsp_buf(resp_buftype, rsp); | 1194 | free_rsp_buf(resp_buftype, rsp); |
1075 | return rc; | 1195 | return rc; |
1076 | } | 1196 | } |
1077 | 1197 | ||
1198 | int | ||
1199 | SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon, | ||
1200 | u64 persistent_fid, u64 volatile_fid, | ||
1201 | struct smb2_file_all_info *data) | ||
1202 | { | ||
1203 | return query_info(xid, tcon, persistent_fid, volatile_fid, | ||
1204 | FILE_ALL_INFORMATION, | ||
1205 | sizeof(struct smb2_file_all_info) + MAX_NAME * 2, | ||
1206 | sizeof(struct smb2_file_all_info), data); | ||
1207 | } | ||
1208 | |||
1209 | int | ||
1210 | SMB2_get_srv_num(const unsigned int xid, struct cifs_tcon *tcon, | ||
1211 | u64 persistent_fid, u64 volatile_fid, __le64 *uniqueid) | ||
1212 | { | ||
1213 | return query_info(xid, tcon, persistent_fid, volatile_fid, | ||
1214 | FILE_INTERNAL_INFORMATION, | ||
1215 | sizeof(struct smb2_file_internal_info), | ||
1216 | sizeof(struct smb2_file_internal_info), uniqueid); | ||
1217 | } | ||
1218 | |||
1078 | /* | 1219 | /* |
1079 | * This is a no-op for now. We're not really interested in the reply, but | 1220 | * This is a no-op for now. We're not really interested in the reply, but |
1080 | * rather in the fact that the server sent one and that server->lstrp | 1221 | * rather in the fact that the server sent one and that server->lstrp |
@@ -1102,6 +1243,8 @@ SMB2_echo(struct TCP_Server_Info *server) | |||
1102 | struct smb2_echo_req *req; | 1243 | struct smb2_echo_req *req; |
1103 | int rc = 0; | 1244 | int rc = 0; |
1104 | struct kvec iov; | 1245 | struct kvec iov; |
1246 | struct smb_rqst rqst = { .rq_iov = &iov, | ||
1247 | .rq_nvec = 1 }; | ||
1105 | 1248 | ||
1106 | cFYI(1, "In echo request"); | 1249 | cFYI(1, "In echo request"); |
1107 | 1250 | ||
@@ -1115,7 +1258,7 @@ SMB2_echo(struct TCP_Server_Info *server) | |||
1115 | /* 4 for rfc1002 length field */ | 1258 | /* 4 for rfc1002 length field */ |
1116 | iov.iov_len = get_rfc1002_length(req) + 4; | 1259 | iov.iov_len = get_rfc1002_length(req) + 4; |
1117 | 1260 | ||
1118 | rc = cifs_call_async(server, &iov, 1, NULL, smb2_echo_callback, server, | 1261 | rc = cifs_call_async(server, &rqst, NULL, smb2_echo_callback, server, |
1119 | CIFS_ECHO_OP); | 1262 | CIFS_ECHO_OP); |
1120 | if (rc) | 1263 | if (rc) |
1121 | cFYI(1, "Echo request failed: %d", rc); | 1264 | cFYI(1, "Echo request failed: %d", rc); |
@@ -1123,3 +1266,945 @@ SMB2_echo(struct TCP_Server_Info *server) | |||
1123 | cifs_small_buf_release(req); | 1266 | cifs_small_buf_release(req); |
1124 | return rc; | 1267 | return rc; |
1125 | } | 1268 | } |
1269 | |||
1270 | int | ||
1271 | SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, | ||
1272 | u64 volatile_fid) | ||
1273 | { | ||
1274 | struct smb2_flush_req *req; | ||
1275 | struct TCP_Server_Info *server; | ||
1276 | struct cifs_ses *ses = tcon->ses; | ||
1277 | struct kvec iov[1]; | ||
1278 | int resp_buftype; | ||
1279 | int rc = 0; | ||
1280 | |||
1281 | cFYI(1, "Flush"); | ||
1282 | |||
1283 | if (ses && (ses->server)) | ||
1284 | server = ses->server; | ||
1285 | else | ||
1286 | return -EIO; | ||
1287 | |||
1288 | rc = small_smb2_init(SMB2_FLUSH, tcon, (void **) &req); | ||
1289 | if (rc) | ||
1290 | return rc; | ||
1291 | |||
1292 | req->PersistentFileId = persistent_fid; | ||
1293 | req->VolatileFileId = volatile_fid; | ||
1294 | |||
1295 | iov[0].iov_base = (char *)req; | ||
1296 | /* 4 for rfc1002 length field */ | ||
1297 | iov[0].iov_len = get_rfc1002_length(req) + 4; | ||
1298 | |||
1299 | rc = SendReceive2(xid, ses, iov, 1, &resp_buftype, 0); | ||
1300 | |||
1301 | if ((rc != 0) && tcon) | ||
1302 | cifs_stats_fail_inc(tcon, SMB2_FLUSH_HE); | ||
1303 | |||
1304 | free_rsp_buf(resp_buftype, iov[0].iov_base); | ||
1305 | return rc; | ||
1306 | } | ||
1307 | |||
1308 | /* | ||
1309 | * To form a chain of read requests, any read requests after the first should | ||
1310 | * have the end_of_chain boolean set to true. | ||
1311 | */ | ||
1312 | static int | ||
1313 | smb2_new_read_req(struct kvec *iov, struct cifs_io_parms *io_parms, | ||
1314 | unsigned int remaining_bytes, int request_type) | ||
1315 | { | ||
1316 | int rc = -EACCES; | ||
1317 | struct smb2_read_req *req = NULL; | ||
1318 | |||
1319 | rc = small_smb2_init(SMB2_READ, io_parms->tcon, (void **) &req); | ||
1320 | if (rc) | ||
1321 | return rc; | ||
1322 | if (io_parms->tcon->ses->server == NULL) | ||
1323 | return -ECONNABORTED; | ||
1324 | |||
1325 | req->hdr.ProcessId = cpu_to_le32(io_parms->pid); | ||
1326 | |||
1327 | req->PersistentFileId = io_parms->persistent_fid; | ||
1328 | req->VolatileFileId = io_parms->volatile_fid; | ||
1329 | req->ReadChannelInfoOffset = 0; /* reserved */ | ||
1330 | req->ReadChannelInfoLength = 0; /* reserved */ | ||
1331 | req->Channel = 0; /* reserved */ | ||
1332 | req->MinimumCount = 0; | ||
1333 | req->Length = cpu_to_le32(io_parms->length); | ||
1334 | req->Offset = cpu_to_le64(io_parms->offset); | ||
1335 | |||
1336 | if (request_type & CHAINED_REQUEST) { | ||
1337 | if (!(request_type & END_OF_CHAIN)) { | ||
1338 | /* 4 for rfc1002 length field */ | ||
1339 | req->hdr.NextCommand = | ||
1340 | cpu_to_le32(get_rfc1002_length(req) + 4); | ||
1341 | } else /* END_OF_CHAIN */ | ||
1342 | req->hdr.NextCommand = 0; | ||
1343 | if (request_type & RELATED_REQUEST) { | ||
1344 | req->hdr.Flags |= SMB2_FLAGS_RELATED_OPERATIONS; | ||
1345 | /* | ||
1346 | * Related requests use info from previous read request | ||
1347 | * in chain. | ||
1348 | */ | ||
1349 | req->hdr.SessionId = 0xFFFFFFFF; | ||
1350 | req->hdr.TreeId = 0xFFFFFFFF; | ||
1351 | req->PersistentFileId = 0xFFFFFFFF; | ||
1352 | req->VolatileFileId = 0xFFFFFFFF; | ||
1353 | } | ||
1354 | } | ||
1355 | if (remaining_bytes > io_parms->length) | ||
1356 | req->RemainingBytes = cpu_to_le32(remaining_bytes); | ||
1357 | else | ||
1358 | req->RemainingBytes = 0; | ||
1359 | |||
1360 | iov[0].iov_base = (char *)req; | ||
1361 | /* 4 for rfc1002 length field */ | ||
1362 | iov[0].iov_len = get_rfc1002_length(req) + 4; | ||
1363 | return rc; | ||
1364 | } | ||
1365 | |||
1366 | static void | ||
1367 | smb2_readv_callback(struct mid_q_entry *mid) | ||
1368 | { | ||
1369 | struct cifs_readdata *rdata = mid->callback_data; | ||
1370 | struct cifs_tcon *tcon = tlink_tcon(rdata->cfile->tlink); | ||
1371 | struct TCP_Server_Info *server = tcon->ses->server; | ||
1372 | struct smb2_hdr *buf = (struct smb2_hdr *)rdata->iov.iov_base; | ||
1373 | unsigned int credits_received = 1; | ||
1374 | struct smb_rqst rqst = { .rq_iov = &rdata->iov, | ||
1375 | .rq_nvec = 1, | ||
1376 | .rq_pages = rdata->pages, | ||
1377 | .rq_npages = rdata->nr_pages, | ||
1378 | .rq_pagesz = rdata->pagesz, | ||
1379 | .rq_tailsz = rdata->tailsz }; | ||
1380 | |||
1381 | cFYI(1, "%s: mid=%llu state=%d result=%d bytes=%u", __func__, | ||
1382 | mid->mid, mid->mid_state, rdata->result, rdata->bytes); | ||
1383 | |||
1384 | switch (mid->mid_state) { | ||
1385 | case MID_RESPONSE_RECEIVED: | ||
1386 | credits_received = le16_to_cpu(buf->CreditRequest); | ||
1387 | /* result already set, check signature */ | ||
1388 | if (server->sec_mode & | ||
1389 | (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) { | ||
1390 | int rc; | ||
1391 | |||
1392 | rc = smb2_verify_signature(&rqst, server); | ||
1393 | if (rc) | ||
1394 | cERROR(1, "SMB signature verification returned " | ||
1395 | "error = %d", rc); | ||
1396 | } | ||
1397 | /* FIXME: should this be counted toward the initiating task? */ | ||
1398 | task_io_account_read(rdata->bytes); | ||
1399 | cifs_stats_bytes_read(tcon, rdata->bytes); | ||
1400 | break; | ||
1401 | case MID_REQUEST_SUBMITTED: | ||
1402 | case MID_RETRY_NEEDED: | ||
1403 | rdata->result = -EAGAIN; | ||
1404 | break; | ||
1405 | default: | ||
1406 | if (rdata->result != -ENODATA) | ||
1407 | rdata->result = -EIO; | ||
1408 | } | ||
1409 | |||
1410 | if (rdata->result) | ||
1411 | cifs_stats_fail_inc(tcon, SMB2_READ_HE); | ||
1412 | |||
1413 | queue_work(cifsiod_wq, &rdata->work); | ||
1414 | DeleteMidQEntry(mid); | ||
1415 | add_credits(server, credits_received, 0); | ||
1416 | } | ||
1417 | |||
1418 | /* smb2_async_readv - send an async write, and set up mid to handle result */ | ||
1419 | int | ||
1420 | smb2_async_readv(struct cifs_readdata *rdata) | ||
1421 | { | ||
1422 | int rc; | ||
1423 | struct smb2_hdr *buf; | ||
1424 | struct cifs_io_parms io_parms; | ||
1425 | struct smb_rqst rqst = { .rq_iov = &rdata->iov, | ||
1426 | .rq_nvec = 1 }; | ||
1427 | |||
1428 | cFYI(1, "%s: offset=%llu bytes=%u", __func__, | ||
1429 | rdata->offset, rdata->bytes); | ||
1430 | |||
1431 | io_parms.tcon = tlink_tcon(rdata->cfile->tlink); | ||
1432 | io_parms.offset = rdata->offset; | ||
1433 | io_parms.length = rdata->bytes; | ||
1434 | io_parms.persistent_fid = rdata->cfile->fid.persistent_fid; | ||
1435 | io_parms.volatile_fid = rdata->cfile->fid.volatile_fid; | ||
1436 | io_parms.pid = rdata->pid; | ||
1437 | rc = smb2_new_read_req(&rdata->iov, &io_parms, 0, 0); | ||
1438 | if (rc) | ||
1439 | return rc; | ||
1440 | |||
1441 | buf = (struct smb2_hdr *)rdata->iov.iov_base; | ||
1442 | /* 4 for rfc1002 length field */ | ||
1443 | rdata->iov.iov_len = get_rfc1002_length(rdata->iov.iov_base) + 4; | ||
1444 | |||
1445 | kref_get(&rdata->refcount); | ||
1446 | rc = cifs_call_async(io_parms.tcon->ses->server, &rqst, | ||
1447 | cifs_readv_receive, smb2_readv_callback, | ||
1448 | rdata, 0); | ||
1449 | if (rc) { | ||
1450 | kref_put(&rdata->refcount, cifs_readdata_release); | ||
1451 | cifs_stats_fail_inc(io_parms.tcon, SMB2_READ_HE); | ||
1452 | } | ||
1453 | |||
1454 | cifs_small_buf_release(buf); | ||
1455 | return rc; | ||
1456 | } | ||
1457 | |||
1458 | int | ||
1459 | SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms, | ||
1460 | unsigned int *nbytes, char **buf, int *buf_type) | ||
1461 | { | ||
1462 | int resp_buftype, rc = -EACCES; | ||
1463 | struct smb2_read_rsp *rsp = NULL; | ||
1464 | struct kvec iov[1]; | ||
1465 | |||
1466 | *nbytes = 0; | ||
1467 | rc = smb2_new_read_req(iov, io_parms, 0, 0); | ||
1468 | if (rc) | ||
1469 | return rc; | ||
1470 | |||
1471 | rc = SendReceive2(xid, io_parms->tcon->ses, iov, 1, | ||
1472 | &resp_buftype, CIFS_LOG_ERROR); | ||
1473 | |||
1474 | rsp = (struct smb2_read_rsp *)iov[0].iov_base; | ||
1475 | |||
1476 | if (rsp->hdr.Status == STATUS_END_OF_FILE) { | ||
1477 | free_rsp_buf(resp_buftype, iov[0].iov_base); | ||
1478 | return 0; | ||
1479 | } | ||
1480 | |||
1481 | if (rc) { | ||
1482 | cifs_stats_fail_inc(io_parms->tcon, SMB2_READ_HE); | ||
1483 | cERROR(1, "Send error in read = %d", rc); | ||
1484 | } else { | ||
1485 | *nbytes = le32_to_cpu(rsp->DataLength); | ||
1486 | if ((*nbytes > CIFS_MAX_MSGSIZE) || | ||
1487 | (*nbytes > io_parms->length)) { | ||
1488 | cFYI(1, "bad length %d for count %d", *nbytes, | ||
1489 | io_parms->length); | ||
1490 | rc = -EIO; | ||
1491 | *nbytes = 0; | ||
1492 | } | ||
1493 | } | ||
1494 | |||
1495 | if (*buf) { | ||
1496 | memcpy(*buf, (char *)rsp->hdr.ProtocolId + rsp->DataOffset, | ||
1497 | *nbytes); | ||
1498 | free_rsp_buf(resp_buftype, iov[0].iov_base); | ||
1499 | } else if (resp_buftype != CIFS_NO_BUFFER) { | ||
1500 | *buf = iov[0].iov_base; | ||
1501 | if (resp_buftype == CIFS_SMALL_BUFFER) | ||
1502 | *buf_type = CIFS_SMALL_BUFFER; | ||
1503 | else if (resp_buftype == CIFS_LARGE_BUFFER) | ||
1504 | *buf_type = CIFS_LARGE_BUFFER; | ||
1505 | } | ||
1506 | return rc; | ||
1507 | } | ||
1508 | |||
1509 | /* | ||
1510 | * Check the mid_state and signature on received buffer (if any), and queue the | ||
1511 | * workqueue completion task. | ||
1512 | */ | ||
1513 | static void | ||
1514 | smb2_writev_callback(struct mid_q_entry *mid) | ||
1515 | { | ||
1516 | struct cifs_writedata *wdata = mid->callback_data; | ||
1517 | struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink); | ||
1518 | unsigned int written; | ||
1519 | struct smb2_write_rsp *rsp = (struct smb2_write_rsp *)mid->resp_buf; | ||
1520 | unsigned int credits_received = 1; | ||
1521 | |||
1522 | switch (mid->mid_state) { | ||
1523 | case MID_RESPONSE_RECEIVED: | ||
1524 | credits_received = le16_to_cpu(rsp->hdr.CreditRequest); | ||
1525 | wdata->result = smb2_check_receive(mid, tcon->ses->server, 0); | ||
1526 | if (wdata->result != 0) | ||
1527 | break; | ||
1528 | |||
1529 | written = le32_to_cpu(rsp->DataLength); | ||
1530 | /* | ||
1531 | * Mask off high 16 bits when bytes written as returned | ||
1532 | * by the server is greater than bytes requested by the | ||
1533 | * client. OS/2 servers are known to set incorrect | ||
1534 | * CountHigh values. | ||
1535 | */ | ||
1536 | if (written > wdata->bytes) | ||
1537 | written &= 0xFFFF; | ||
1538 | |||
1539 | if (written < wdata->bytes) | ||
1540 | wdata->result = -ENOSPC; | ||
1541 | else | ||
1542 | wdata->bytes = written; | ||
1543 | break; | ||
1544 | case MID_REQUEST_SUBMITTED: | ||
1545 | case MID_RETRY_NEEDED: | ||
1546 | wdata->result = -EAGAIN; | ||
1547 | break; | ||
1548 | default: | ||
1549 | wdata->result = -EIO; | ||
1550 | break; | ||
1551 | } | ||
1552 | |||
1553 | if (wdata->result) | ||
1554 | cifs_stats_fail_inc(tcon, SMB2_WRITE_HE); | ||
1555 | |||
1556 | queue_work(cifsiod_wq, &wdata->work); | ||
1557 | DeleteMidQEntry(mid); | ||
1558 | add_credits(tcon->ses->server, credits_received, 0); | ||
1559 | } | ||
1560 | |||
1561 | /* smb2_async_writev - send an async write, and set up mid to handle result */ | ||
1562 | int | ||
1563 | smb2_async_writev(struct cifs_writedata *wdata) | ||
1564 | { | ||
1565 | int rc = -EACCES; | ||
1566 | struct smb2_write_req *req = NULL; | ||
1567 | struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink); | ||
1568 | struct kvec iov; | ||
1569 | struct smb_rqst rqst; | ||
1570 | |||
1571 | rc = small_smb2_init(SMB2_WRITE, tcon, (void **) &req); | ||
1572 | if (rc) | ||
1573 | goto async_writev_out; | ||
1574 | |||
1575 | req->hdr.ProcessId = cpu_to_le32(wdata->cfile->pid); | ||
1576 | |||
1577 | req->PersistentFileId = wdata->cfile->fid.persistent_fid; | ||
1578 | req->VolatileFileId = wdata->cfile->fid.volatile_fid; | ||
1579 | req->WriteChannelInfoOffset = 0; | ||
1580 | req->WriteChannelInfoLength = 0; | ||
1581 | req->Channel = 0; | ||
1582 | req->Offset = cpu_to_le64(wdata->offset); | ||
1583 | /* 4 for rfc1002 length field */ | ||
1584 | req->DataOffset = cpu_to_le16( | ||
1585 | offsetof(struct smb2_write_req, Buffer) - 4); | ||
1586 | req->RemainingBytes = 0; | ||
1587 | |||
1588 | /* 4 for rfc1002 length field and 1 for Buffer */ | ||
1589 | iov.iov_len = get_rfc1002_length(req) + 4 - 1; | ||
1590 | iov.iov_base = req; | ||
1591 | |||
1592 | rqst.rq_iov = &iov; | ||
1593 | rqst.rq_nvec = 1; | ||
1594 | rqst.rq_pages = wdata->pages; | ||
1595 | rqst.rq_npages = wdata->nr_pages; | ||
1596 | rqst.rq_pagesz = wdata->pagesz; | ||
1597 | rqst.rq_tailsz = wdata->tailsz; | ||
1598 | |||
1599 | cFYI(1, "async write at %llu %u bytes", wdata->offset, wdata->bytes); | ||
1600 | |||
1601 | req->Length = cpu_to_le32(wdata->bytes); | ||
1602 | |||
1603 | inc_rfc1001_len(&req->hdr, wdata->bytes - 1 /* Buffer */); | ||
1604 | |||
1605 | kref_get(&wdata->refcount); | ||
1606 | rc = cifs_call_async(tcon->ses->server, &rqst, NULL, | ||
1607 | smb2_writev_callback, wdata, 0); | ||
1608 | |||
1609 | if (rc) { | ||
1610 | kref_put(&wdata->refcount, cifs_writedata_release); | ||
1611 | cifs_stats_fail_inc(tcon, SMB2_WRITE_HE); | ||
1612 | } | ||
1613 | |||
1614 | async_writev_out: | ||
1615 | cifs_small_buf_release(req); | ||
1616 | return rc; | ||
1617 | } | ||
1618 | |||
1619 | /* | ||
1620 | * SMB2_write function gets iov pointer to kvec array with n_vec as a length. | ||
1621 | * The length field from io_parms must be at least 1 and indicates a number of | ||
1622 | * elements with data to write that begins with position 1 in iov array. All | ||
1623 | * data length is specified by count. | ||
1624 | */ | ||
1625 | int | ||
1626 | SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms, | ||
1627 | unsigned int *nbytes, struct kvec *iov, int n_vec) | ||
1628 | { | ||
1629 | int rc = 0; | ||
1630 | struct smb2_write_req *req = NULL; | ||
1631 | struct smb2_write_rsp *rsp = NULL; | ||
1632 | int resp_buftype; | ||
1633 | *nbytes = 0; | ||
1634 | |||
1635 | if (n_vec < 1) | ||
1636 | return rc; | ||
1637 | |||
1638 | rc = small_smb2_init(SMB2_WRITE, io_parms->tcon, (void **) &req); | ||
1639 | if (rc) | ||
1640 | return rc; | ||
1641 | |||
1642 | if (io_parms->tcon->ses->server == NULL) | ||
1643 | return -ECONNABORTED; | ||
1644 | |||
1645 | req->hdr.ProcessId = cpu_to_le32(io_parms->pid); | ||
1646 | |||
1647 | req->PersistentFileId = io_parms->persistent_fid; | ||
1648 | req->VolatileFileId = io_parms->volatile_fid; | ||
1649 | req->WriteChannelInfoOffset = 0; | ||
1650 | req->WriteChannelInfoLength = 0; | ||
1651 | req->Channel = 0; | ||
1652 | req->Length = cpu_to_le32(io_parms->length); | ||
1653 | req->Offset = cpu_to_le64(io_parms->offset); | ||
1654 | /* 4 for rfc1002 length field */ | ||
1655 | req->DataOffset = cpu_to_le16( | ||
1656 | offsetof(struct smb2_write_req, Buffer) - 4); | ||
1657 | req->RemainingBytes = 0; | ||
1658 | |||
1659 | iov[0].iov_base = (char *)req; | ||
1660 | /* 4 for rfc1002 length field and 1 for Buffer */ | ||
1661 | iov[0].iov_len = get_rfc1002_length(req) + 4 - 1; | ||
1662 | |||
1663 | /* length of entire message including data to be written */ | ||
1664 | inc_rfc1001_len(req, io_parms->length - 1 /* Buffer */); | ||
1665 | |||
1666 | rc = SendReceive2(xid, io_parms->tcon->ses, iov, n_vec + 1, | ||
1667 | &resp_buftype, 0); | ||
1668 | rsp = (struct smb2_write_rsp *)iov[0].iov_base; | ||
1669 | |||
1670 | if (rc) { | ||
1671 | cifs_stats_fail_inc(io_parms->tcon, SMB2_WRITE_HE); | ||
1672 | cERROR(1, "Send error in write = %d", rc); | ||
1673 | } else | ||
1674 | *nbytes = le32_to_cpu(rsp->DataLength); | ||
1675 | |||
1676 | free_rsp_buf(resp_buftype, rsp); | ||
1677 | return rc; | ||
1678 | } | ||
1679 | |||
1680 | static unsigned int | ||
1681 | num_entries(char *bufstart, char *end_of_buf, char **lastentry, size_t size) | ||
1682 | { | ||
1683 | int len; | ||
1684 | unsigned int entrycount = 0; | ||
1685 | unsigned int next_offset = 0; | ||
1686 | FILE_DIRECTORY_INFO *entryptr; | ||
1687 | |||
1688 | if (bufstart == NULL) | ||
1689 | return 0; | ||
1690 | |||
1691 | entryptr = (FILE_DIRECTORY_INFO *)bufstart; | ||
1692 | |||
1693 | while (1) { | ||
1694 | entryptr = (FILE_DIRECTORY_INFO *) | ||
1695 | ((char *)entryptr + next_offset); | ||
1696 | |||
1697 | if ((char *)entryptr + size > end_of_buf) { | ||
1698 | cERROR(1, "malformed search entry would overflow"); | ||
1699 | break; | ||
1700 | } | ||
1701 | |||
1702 | len = le32_to_cpu(entryptr->FileNameLength); | ||
1703 | if ((char *)entryptr + len + size > end_of_buf) { | ||
1704 | cERROR(1, "directory entry name would overflow frame " | ||
1705 | "end of buf %p", end_of_buf); | ||
1706 | break; | ||
1707 | } | ||
1708 | |||
1709 | *lastentry = (char *)entryptr; | ||
1710 | entrycount++; | ||
1711 | |||
1712 | next_offset = le32_to_cpu(entryptr->NextEntryOffset); | ||
1713 | if (!next_offset) | ||
1714 | break; | ||
1715 | } | ||
1716 | |||
1717 | return entrycount; | ||
1718 | } | ||
1719 | |||
1720 | /* | ||
1721 | * Readdir/FindFirst | ||
1722 | */ | ||
1723 | int | ||
1724 | SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon, | ||
1725 | u64 persistent_fid, u64 volatile_fid, int index, | ||
1726 | struct cifs_search_info *srch_inf) | ||
1727 | { | ||
1728 | struct smb2_query_directory_req *req; | ||
1729 | struct smb2_query_directory_rsp *rsp = NULL; | ||
1730 | struct kvec iov[2]; | ||
1731 | int rc = 0; | ||
1732 | int len; | ||
1733 | int resp_buftype; | ||
1734 | unsigned char *bufptr; | ||
1735 | struct TCP_Server_Info *server; | ||
1736 | struct cifs_ses *ses = tcon->ses; | ||
1737 | __le16 asteriks = cpu_to_le16('*'); | ||
1738 | char *end_of_smb; | ||
1739 | unsigned int output_size = CIFSMaxBufSize; | ||
1740 | size_t info_buf_size; | ||
1741 | |||
1742 | if (ses && (ses->server)) | ||
1743 | server = ses->server; | ||
1744 | else | ||
1745 | return -EIO; | ||
1746 | |||
1747 | rc = small_smb2_init(SMB2_QUERY_DIRECTORY, tcon, (void **) &req); | ||
1748 | if (rc) | ||
1749 | return rc; | ||
1750 | |||
1751 | switch (srch_inf->info_level) { | ||
1752 | case SMB_FIND_FILE_DIRECTORY_INFO: | ||
1753 | req->FileInformationClass = FILE_DIRECTORY_INFORMATION; | ||
1754 | info_buf_size = sizeof(FILE_DIRECTORY_INFO) - 1; | ||
1755 | break; | ||
1756 | case SMB_FIND_FILE_ID_FULL_DIR_INFO: | ||
1757 | req->FileInformationClass = FILEID_FULL_DIRECTORY_INFORMATION; | ||
1758 | info_buf_size = sizeof(SEARCH_ID_FULL_DIR_INFO) - 1; | ||
1759 | break; | ||
1760 | default: | ||
1761 | cERROR(1, "info level %u isn't supported", | ||
1762 | srch_inf->info_level); | ||
1763 | rc = -EINVAL; | ||
1764 | goto qdir_exit; | ||
1765 | } | ||
1766 | |||
1767 | req->FileIndex = cpu_to_le32(index); | ||
1768 | req->PersistentFileId = persistent_fid; | ||
1769 | req->VolatileFileId = volatile_fid; | ||
1770 | |||
1771 | len = 0x2; | ||
1772 | bufptr = req->Buffer; | ||
1773 | memcpy(bufptr, &asteriks, len); | ||
1774 | |||
1775 | req->FileNameOffset = | ||
1776 | cpu_to_le16(sizeof(struct smb2_query_directory_req) - 1 - 4); | ||
1777 | req->FileNameLength = cpu_to_le16(len); | ||
1778 | /* | ||
1779 | * BB could be 30 bytes or so longer if we used SMB2 specific | ||
1780 | * buffer lengths, but this is safe and close enough. | ||
1781 | */ | ||
1782 | output_size = min_t(unsigned int, output_size, server->maxBuf); | ||
1783 | output_size = min_t(unsigned int, output_size, 2 << 15); | ||
1784 | req->OutputBufferLength = cpu_to_le32(output_size); | ||
1785 | |||
1786 | iov[0].iov_base = (char *)req; | ||
1787 | /* 4 for RFC1001 length and 1 for Buffer */ | ||
1788 | iov[0].iov_len = get_rfc1002_length(req) + 4 - 1; | ||
1789 | |||
1790 | iov[1].iov_base = (char *)(req->Buffer); | ||
1791 | iov[1].iov_len = len; | ||
1792 | |||
1793 | inc_rfc1001_len(req, len - 1 /* Buffer */); | ||
1794 | |||
1795 | rc = SendReceive2(xid, ses, iov, 2, &resp_buftype, 0); | ||
1796 | rsp = (struct smb2_query_directory_rsp *)iov[0].iov_base; | ||
1797 | |||
1798 | if (rc) { | ||
1799 | cifs_stats_fail_inc(tcon, SMB2_QUERY_DIRECTORY_HE); | ||
1800 | goto qdir_exit; | ||
1801 | } | ||
1802 | |||
1803 | rc = validate_buf(le16_to_cpu(rsp->OutputBufferOffset), | ||
1804 | le32_to_cpu(rsp->OutputBufferLength), &rsp->hdr, | ||
1805 | info_buf_size); | ||
1806 | if (rc) | ||
1807 | goto qdir_exit; | ||
1808 | |||
1809 | srch_inf->unicode = true; | ||
1810 | |||
1811 | if (srch_inf->ntwrk_buf_start) { | ||
1812 | if (srch_inf->smallBuf) | ||
1813 | cifs_small_buf_release(srch_inf->ntwrk_buf_start); | ||
1814 | else | ||
1815 | cifs_buf_release(srch_inf->ntwrk_buf_start); | ||
1816 | } | ||
1817 | srch_inf->ntwrk_buf_start = (char *)rsp; | ||
1818 | srch_inf->srch_entries_start = srch_inf->last_entry = 4 /* rfclen */ + | ||
1819 | (char *)&rsp->hdr + le16_to_cpu(rsp->OutputBufferOffset); | ||
1820 | /* 4 for rfc1002 length field */ | ||
1821 | end_of_smb = get_rfc1002_length(rsp) + 4 + (char *)&rsp->hdr; | ||
1822 | srch_inf->entries_in_buffer = | ||
1823 | num_entries(srch_inf->srch_entries_start, end_of_smb, | ||
1824 | &srch_inf->last_entry, info_buf_size); | ||
1825 | srch_inf->index_of_last_entry += srch_inf->entries_in_buffer; | ||
1826 | cFYI(1, "num entries %d last_index %lld srch start %p srch end %p", | ||
1827 | srch_inf->entries_in_buffer, srch_inf->index_of_last_entry, | ||
1828 | srch_inf->srch_entries_start, srch_inf->last_entry); | ||
1829 | if (resp_buftype == CIFS_LARGE_BUFFER) | ||
1830 | srch_inf->smallBuf = false; | ||
1831 | else if (resp_buftype == CIFS_SMALL_BUFFER) | ||
1832 | srch_inf->smallBuf = true; | ||
1833 | else | ||
1834 | cERROR(1, "illegal search buffer type"); | ||
1835 | |||
1836 | if (rsp->hdr.Status == STATUS_NO_MORE_FILES) | ||
1837 | srch_inf->endOfSearch = 1; | ||
1838 | else | ||
1839 | srch_inf->endOfSearch = 0; | ||
1840 | |||
1841 | return rc; | ||
1842 | |||
1843 | qdir_exit: | ||
1844 | free_rsp_buf(resp_buftype, rsp); | ||
1845 | return rc; | ||
1846 | } | ||
1847 | |||
1848 | static int | ||
1849 | send_set_info(const unsigned int xid, struct cifs_tcon *tcon, | ||
1850 | u64 persistent_fid, u64 volatile_fid, u32 pid, int info_class, | ||
1851 | unsigned int num, void **data, unsigned int *size) | ||
1852 | { | ||
1853 | struct smb2_set_info_req *req; | ||
1854 | struct smb2_set_info_rsp *rsp = NULL; | ||
1855 | struct kvec *iov; | ||
1856 | int rc = 0; | ||
1857 | int resp_buftype; | ||
1858 | unsigned int i; | ||
1859 | struct TCP_Server_Info *server; | ||
1860 | struct cifs_ses *ses = tcon->ses; | ||
1861 | |||
1862 | if (ses && (ses->server)) | ||
1863 | server = ses->server; | ||
1864 | else | ||
1865 | return -EIO; | ||
1866 | |||
1867 | if (!num) | ||
1868 | return -EINVAL; | ||
1869 | |||
1870 | iov = kmalloc(sizeof(struct kvec) * num, GFP_KERNEL); | ||
1871 | if (!iov) | ||
1872 | return -ENOMEM; | ||
1873 | |||
1874 | rc = small_smb2_init(SMB2_SET_INFO, tcon, (void **) &req); | ||
1875 | if (rc) { | ||
1876 | kfree(iov); | ||
1877 | return rc; | ||
1878 | } | ||
1879 | |||
1880 | req->hdr.ProcessId = cpu_to_le32(pid); | ||
1881 | |||
1882 | req->InfoType = SMB2_O_INFO_FILE; | ||
1883 | req->FileInfoClass = info_class; | ||
1884 | req->PersistentFileId = persistent_fid; | ||
1885 | req->VolatileFileId = volatile_fid; | ||
1886 | |||
1887 | /* 4 for RFC1001 length and 1 for Buffer */ | ||
1888 | req->BufferOffset = | ||
1889 | cpu_to_le16(sizeof(struct smb2_set_info_req) - 1 - 4); | ||
1890 | req->BufferLength = cpu_to_le32(*size); | ||
1891 | |||
1892 | inc_rfc1001_len(req, *size - 1 /* Buffer */); | ||
1893 | |||
1894 | memcpy(req->Buffer, *data, *size); | ||
1895 | |||
1896 | iov[0].iov_base = (char *)req; | ||
1897 | /* 4 for RFC1001 length */ | ||
1898 | iov[0].iov_len = get_rfc1002_length(req) + 4; | ||
1899 | |||
1900 | for (i = 1; i < num; i++) { | ||
1901 | inc_rfc1001_len(req, size[i]); | ||
1902 | le32_add_cpu(&req->BufferLength, size[i]); | ||
1903 | iov[i].iov_base = (char *)data[i]; | ||
1904 | iov[i].iov_len = size[i]; | ||
1905 | } | ||
1906 | |||
1907 | rc = SendReceive2(xid, ses, iov, num, &resp_buftype, 0); | ||
1908 | rsp = (struct smb2_set_info_rsp *)iov[0].iov_base; | ||
1909 | |||
1910 | if (rc != 0) { | ||
1911 | cifs_stats_fail_inc(tcon, SMB2_SET_INFO_HE); | ||
1912 | goto out; | ||
1913 | } | ||
1914 | out: | ||
1915 | free_rsp_buf(resp_buftype, rsp); | ||
1916 | kfree(iov); | ||
1917 | return rc; | ||
1918 | } | ||
1919 | |||
1920 | int | ||
1921 | SMB2_rename(const unsigned int xid, struct cifs_tcon *tcon, | ||
1922 | u64 persistent_fid, u64 volatile_fid, __le16 *target_file) | ||
1923 | { | ||
1924 | struct smb2_file_rename_info info; | ||
1925 | void **data; | ||
1926 | unsigned int size[2]; | ||
1927 | int rc; | ||
1928 | int len = (2 * UniStrnlen((wchar_t *)target_file, PATH_MAX)); | ||
1929 | |||
1930 | data = kmalloc(sizeof(void *) * 2, GFP_KERNEL); | ||
1931 | if (!data) | ||
1932 | return -ENOMEM; | ||
1933 | |||
1934 | info.ReplaceIfExists = 1; /* 1 = replace existing target with new */ | ||
1935 | /* 0 = fail if target already exists */ | ||
1936 | info.RootDirectory = 0; /* MBZ for network ops (why does spec say?) */ | ||
1937 | info.FileNameLength = cpu_to_le32(len); | ||
1938 | |||
1939 | data[0] = &info; | ||
1940 | size[0] = sizeof(struct smb2_file_rename_info); | ||
1941 | |||
1942 | data[1] = target_file; | ||
1943 | size[1] = len + 2 /* null */; | ||
1944 | |||
1945 | rc = send_set_info(xid, tcon, persistent_fid, volatile_fid, | ||
1946 | current->tgid, FILE_RENAME_INFORMATION, 2, data, | ||
1947 | size); | ||
1948 | kfree(data); | ||
1949 | return rc; | ||
1950 | } | ||
1951 | |||
1952 | int | ||
1953 | SMB2_set_hardlink(const unsigned int xid, struct cifs_tcon *tcon, | ||
1954 | u64 persistent_fid, u64 volatile_fid, __le16 *target_file) | ||
1955 | { | ||
1956 | struct smb2_file_link_info info; | ||
1957 | void **data; | ||
1958 | unsigned int size[2]; | ||
1959 | int rc; | ||
1960 | int len = (2 * UniStrnlen((wchar_t *)target_file, PATH_MAX)); | ||
1961 | |||
1962 | data = kmalloc(sizeof(void *) * 2, GFP_KERNEL); | ||
1963 | if (!data) | ||
1964 | return -ENOMEM; | ||
1965 | |||
1966 | info.ReplaceIfExists = 0; /* 1 = replace existing link with new */ | ||
1967 | /* 0 = fail if link already exists */ | ||
1968 | info.RootDirectory = 0; /* MBZ for network ops (why does spec say?) */ | ||
1969 | info.FileNameLength = cpu_to_le32(len); | ||
1970 | |||
1971 | data[0] = &info; | ||
1972 | size[0] = sizeof(struct smb2_file_link_info); | ||
1973 | |||
1974 | data[1] = target_file; | ||
1975 | size[1] = len + 2 /* null */; | ||
1976 | |||
1977 | rc = send_set_info(xid, tcon, persistent_fid, volatile_fid, | ||
1978 | current->tgid, FILE_LINK_INFORMATION, 2, data, size); | ||
1979 | kfree(data); | ||
1980 | return rc; | ||
1981 | } | ||
1982 | |||
1983 | int | ||
1984 | SMB2_set_eof(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, | ||
1985 | u64 volatile_fid, u32 pid, __le64 *eof) | ||
1986 | { | ||
1987 | struct smb2_file_eof_info info; | ||
1988 | void *data; | ||
1989 | unsigned int size; | ||
1990 | |||
1991 | info.EndOfFile = *eof; | ||
1992 | |||
1993 | data = &info; | ||
1994 | size = sizeof(struct smb2_file_eof_info); | ||
1995 | |||
1996 | return send_set_info(xid, tcon, persistent_fid, volatile_fid, pid, | ||
1997 | FILE_END_OF_FILE_INFORMATION, 1, &data, &size); | ||
1998 | } | ||
1999 | |||
2000 | int | ||
2001 | SMB2_set_info(const unsigned int xid, struct cifs_tcon *tcon, | ||
2002 | u64 persistent_fid, u64 volatile_fid, FILE_BASIC_INFO *buf) | ||
2003 | { | ||
2004 | unsigned int size; | ||
2005 | size = sizeof(FILE_BASIC_INFO); | ||
2006 | return send_set_info(xid, tcon, persistent_fid, volatile_fid, | ||
2007 | current->tgid, FILE_BASIC_INFORMATION, 1, | ||
2008 | (void **)&buf, &size); | ||
2009 | } | ||
2010 | |||
2011 | int | ||
2012 | SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon, | ||
2013 | const u64 persistent_fid, const u64 volatile_fid, | ||
2014 | __u8 oplock_level) | ||
2015 | { | ||
2016 | int rc; | ||
2017 | struct smb2_oplock_break *req = NULL; | ||
2018 | |||
2019 | cFYI(1, "SMB2_oplock_break"); | ||
2020 | rc = small_smb2_init(SMB2_OPLOCK_BREAK, tcon, (void **) &req); | ||
2021 | |||
2022 | if (rc) | ||
2023 | return rc; | ||
2024 | |||
2025 | req->VolatileFid = volatile_fid; | ||
2026 | req->PersistentFid = persistent_fid; | ||
2027 | req->OplockLevel = oplock_level; | ||
2028 | req->hdr.CreditRequest = cpu_to_le16(1); | ||
2029 | |||
2030 | rc = SendReceiveNoRsp(xid, tcon->ses, (char *) req, CIFS_OBREAK_OP); | ||
2031 | /* SMB2 buffer freed by function above */ | ||
2032 | |||
2033 | if (rc) { | ||
2034 | cifs_stats_fail_inc(tcon, SMB2_OPLOCK_BREAK_HE); | ||
2035 | cFYI(1, "Send error in Oplock Break = %d", rc); | ||
2036 | } | ||
2037 | |||
2038 | return rc; | ||
2039 | } | ||
2040 | |||
2041 | static void | ||
2042 | copy_fs_info_to_kstatfs(struct smb2_fs_full_size_info *pfs_inf, | ||
2043 | struct kstatfs *kst) | ||
2044 | { | ||
2045 | kst->f_bsize = le32_to_cpu(pfs_inf->BytesPerSector) * | ||
2046 | le32_to_cpu(pfs_inf->SectorsPerAllocationUnit); | ||
2047 | kst->f_blocks = le64_to_cpu(pfs_inf->TotalAllocationUnits); | ||
2048 | kst->f_bfree = le64_to_cpu(pfs_inf->ActualAvailableAllocationUnits); | ||
2049 | kst->f_bavail = le64_to_cpu(pfs_inf->CallerAvailableAllocationUnits); | ||
2050 | return; | ||
2051 | } | ||
2052 | |||
2053 | static int | ||
2054 | build_qfs_info_req(struct kvec *iov, struct cifs_tcon *tcon, int level, | ||
2055 | int outbuf_len, u64 persistent_fid, u64 volatile_fid) | ||
2056 | { | ||
2057 | int rc; | ||
2058 | struct smb2_query_info_req *req; | ||
2059 | |||
2060 | cFYI(1, "Query FSInfo level %d", level); | ||
2061 | |||
2062 | if ((tcon->ses == NULL) || (tcon->ses->server == NULL)) | ||
2063 | return -EIO; | ||
2064 | |||
2065 | rc = small_smb2_init(SMB2_QUERY_INFO, tcon, (void **) &req); | ||
2066 | if (rc) | ||
2067 | return rc; | ||
2068 | |||
2069 | req->InfoType = SMB2_O_INFO_FILESYSTEM; | ||
2070 | req->FileInfoClass = level; | ||
2071 | req->PersistentFileId = persistent_fid; | ||
2072 | req->VolatileFileId = volatile_fid; | ||
2073 | /* 4 for rfc1002 length field and 1 for pad */ | ||
2074 | req->InputBufferOffset = | ||
2075 | cpu_to_le16(sizeof(struct smb2_query_info_req) - 1 - 4); | ||
2076 | req->OutputBufferLength = cpu_to_le32( | ||
2077 | outbuf_len + sizeof(struct smb2_query_info_rsp) - 1 - 4); | ||
2078 | |||
2079 | iov->iov_base = (char *)req; | ||
2080 | /* 4 for rfc1002 length field */ | ||
2081 | iov->iov_len = get_rfc1002_length(req) + 4; | ||
2082 | return 0; | ||
2083 | } | ||
2084 | |||
2085 | int | ||
2086 | SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon, | ||
2087 | u64 persistent_fid, u64 volatile_fid, struct kstatfs *fsdata) | ||
2088 | { | ||
2089 | struct smb2_query_info_rsp *rsp = NULL; | ||
2090 | struct kvec iov; | ||
2091 | int rc = 0; | ||
2092 | int resp_buftype; | ||
2093 | struct cifs_ses *ses = tcon->ses; | ||
2094 | struct smb2_fs_full_size_info *info = NULL; | ||
2095 | |||
2096 | rc = build_qfs_info_req(&iov, tcon, FS_FULL_SIZE_INFORMATION, | ||
2097 | sizeof(struct smb2_fs_full_size_info), | ||
2098 | persistent_fid, volatile_fid); | ||
2099 | if (rc) | ||
2100 | return rc; | ||
2101 | |||
2102 | rc = SendReceive2(xid, ses, &iov, 1, &resp_buftype, 0); | ||
2103 | if (rc) { | ||
2104 | cifs_stats_fail_inc(tcon, SMB2_QUERY_INFO_HE); | ||
2105 | goto qinf_exit; | ||
2106 | } | ||
2107 | rsp = (struct smb2_query_info_rsp *)iov.iov_base; | ||
2108 | |||
2109 | info = (struct smb2_fs_full_size_info *)(4 /* RFC1001 len */ + | ||
2110 | le16_to_cpu(rsp->OutputBufferOffset) + (char *)&rsp->hdr); | ||
2111 | rc = validate_buf(le16_to_cpu(rsp->OutputBufferOffset), | ||
2112 | le32_to_cpu(rsp->OutputBufferLength), &rsp->hdr, | ||
2113 | sizeof(struct smb2_fs_full_size_info)); | ||
2114 | if (!rc) | ||
2115 | copy_fs_info_to_kstatfs(info, fsdata); | ||
2116 | |||
2117 | qinf_exit: | ||
2118 | free_rsp_buf(resp_buftype, iov.iov_base); | ||
2119 | return rc; | ||
2120 | } | ||
2121 | |||
2122 | int | ||
2123 | smb2_lockv(const unsigned int xid, struct cifs_tcon *tcon, | ||
2124 | const __u64 persist_fid, const __u64 volatile_fid, const __u32 pid, | ||
2125 | const __u32 num_lock, struct smb2_lock_element *buf) | ||
2126 | { | ||
2127 | int rc = 0; | ||
2128 | struct smb2_lock_req *req = NULL; | ||
2129 | struct kvec iov[2]; | ||
2130 | int resp_buf_type; | ||
2131 | unsigned int count; | ||
2132 | |||
2133 | cFYI(1, "smb2_lockv num lock %d", num_lock); | ||
2134 | |||
2135 | rc = small_smb2_init(SMB2_LOCK, tcon, (void **) &req); | ||
2136 | if (rc) | ||
2137 | return rc; | ||
2138 | |||
2139 | req->hdr.ProcessId = cpu_to_le32(pid); | ||
2140 | req->LockCount = cpu_to_le16(num_lock); | ||
2141 | |||
2142 | req->PersistentFileId = persist_fid; | ||
2143 | req->VolatileFileId = volatile_fid; | ||
2144 | |||
2145 | count = num_lock * sizeof(struct smb2_lock_element); | ||
2146 | inc_rfc1001_len(req, count - sizeof(struct smb2_lock_element)); | ||
2147 | |||
2148 | iov[0].iov_base = (char *)req; | ||
2149 | /* 4 for rfc1002 length field and count for all locks */ | ||
2150 | iov[0].iov_len = get_rfc1002_length(req) + 4 - count; | ||
2151 | iov[1].iov_base = (char *)buf; | ||
2152 | iov[1].iov_len = count; | ||
2153 | |||
2154 | cifs_stats_inc(&tcon->stats.cifs_stats.num_locks); | ||
2155 | rc = SendReceive2(xid, tcon->ses, iov, 2, &resp_buf_type, CIFS_NO_RESP); | ||
2156 | if (rc) { | ||
2157 | cFYI(1, "Send error in smb2_lockv = %d", rc); | ||
2158 | cifs_stats_fail_inc(tcon, SMB2_LOCK_HE); | ||
2159 | } | ||
2160 | |||
2161 | return rc; | ||
2162 | } | ||
2163 | |||
2164 | int | ||
2165 | SMB2_lock(const unsigned int xid, struct cifs_tcon *tcon, | ||
2166 | const __u64 persist_fid, const __u64 volatile_fid, const __u32 pid, | ||
2167 | const __u64 length, const __u64 offset, const __u32 lock_flags, | ||
2168 | const bool wait) | ||
2169 | { | ||
2170 | struct smb2_lock_element lock; | ||
2171 | |||
2172 | lock.Offset = cpu_to_le64(offset); | ||
2173 | lock.Length = cpu_to_le64(length); | ||
2174 | lock.Flags = cpu_to_le32(lock_flags); | ||
2175 | if (!wait && lock_flags != SMB2_LOCKFLAG_UNLOCK) | ||
2176 | lock.Flags |= cpu_to_le32(SMB2_LOCKFLAG_FAIL_IMMEDIATELY); | ||
2177 | |||
2178 | return smb2_lockv(xid, tcon, persist_fid, volatile_fid, pid, 1, &lock); | ||
2179 | } | ||
2180 | |||
2181 | int | ||
2182 | SMB2_lease_break(const unsigned int xid, struct cifs_tcon *tcon, | ||
2183 | __u8 *lease_key, const __le32 lease_state) | ||
2184 | { | ||
2185 | int rc; | ||
2186 | struct smb2_lease_ack *req = NULL; | ||
2187 | |||
2188 | cFYI(1, "SMB2_lease_break"); | ||
2189 | rc = small_smb2_init(SMB2_OPLOCK_BREAK, tcon, (void **) &req); | ||
2190 | |||
2191 | if (rc) | ||
2192 | return rc; | ||
2193 | |||
2194 | req->hdr.CreditRequest = cpu_to_le16(1); | ||
2195 | req->StructureSize = cpu_to_le16(36); | ||
2196 | inc_rfc1001_len(req, 12); | ||
2197 | |||
2198 | memcpy(req->LeaseKey, lease_key, 16); | ||
2199 | req->LeaseState = lease_state; | ||
2200 | |||
2201 | rc = SendReceiveNoRsp(xid, tcon->ses, (char *) req, CIFS_OBREAK_OP); | ||
2202 | /* SMB2 buffer freed by function above */ | ||
2203 | |||
2204 | if (rc) { | ||
2205 | cifs_stats_fail_inc(tcon, SMB2_OPLOCK_BREAK_HE); | ||
2206 | cFYI(1, "Send error in Lease Break = %d", rc); | ||
2207 | } | ||
2208 | |||
2209 | return rc; | ||
2210 | } | ||
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index f37a1b41b402..4cb4ced258cb 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h | |||
@@ -87,10 +87,6 @@ | |||
87 | 87 | ||
88 | #define SMB2_PROTO_NUMBER __constant_cpu_to_le32(0x424d53fe) | 88 | #define SMB2_PROTO_NUMBER __constant_cpu_to_le32(0x424d53fe) |
89 | 89 | ||
90 | #define SMB2_HEADER_SIZE __constant_le16_to_cpu(64) | ||
91 | |||
92 | #define SMB2_ERROR_STRUCTURE_SIZE2 __constant_le16_to_cpu(9) | ||
93 | |||
94 | /* | 90 | /* |
95 | * SMB2 Header Definition | 91 | * SMB2 Header Definition |
96 | * | 92 | * |
@@ -99,6 +95,9 @@ | |||
99 | * "PDU" : "Protocol Data Unit" (ie a network "frame") | 95 | * "PDU" : "Protocol Data Unit" (ie a network "frame") |
100 | * | 96 | * |
101 | */ | 97 | */ |
98 | |||
99 | #define SMB2_HEADER_STRUCTURE_SIZE __constant_cpu_to_le16(64) | ||
100 | |||
102 | struct smb2_hdr { | 101 | struct smb2_hdr { |
103 | __be32 smb2_buf_length; /* big endian on wire */ | 102 | __be32 smb2_buf_length; /* big endian on wire */ |
104 | /* length is only two or three bytes - with | 103 | /* length is only two or three bytes - with |
@@ -140,6 +139,9 @@ struct smb2_pdu { | |||
140 | * command code name for the struct. Note that structures must be packed. | 139 | * command code name for the struct. Note that structures must be packed. |
141 | * | 140 | * |
142 | */ | 141 | */ |
142 | |||
143 | #define SMB2_ERROR_STRUCTURE_SIZE2 __constant_cpu_to_le16(9) | ||
144 | |||
143 | struct smb2_err_rsp { | 145 | struct smb2_err_rsp { |
144 | struct smb2_hdr hdr; | 146 | struct smb2_hdr hdr; |
145 | __le16 StructureSize; | 147 | __le16 StructureSize; |
@@ -148,6 +150,10 @@ struct smb2_err_rsp { | |||
148 | __u8 ErrorData[1]; /* variable length */ | 150 | __u8 ErrorData[1]; /* variable length */ |
149 | } __packed; | 151 | } __packed; |
150 | 152 | ||
153 | #define SMB2_CLIENT_GUID_SIZE 16 | ||
154 | |||
155 | extern __u8 cifs_client_guid[SMB2_CLIENT_GUID_SIZE]; | ||
156 | |||
151 | struct smb2_negotiate_req { | 157 | struct smb2_negotiate_req { |
152 | struct smb2_hdr hdr; | 158 | struct smb2_hdr hdr; |
153 | __le16 StructureSize; /* Must be 36 */ | 159 | __le16 StructureSize; /* Must be 36 */ |
@@ -155,11 +161,17 @@ struct smb2_negotiate_req { | |||
155 | __le16 SecurityMode; | 161 | __le16 SecurityMode; |
156 | __le16 Reserved; /* MBZ */ | 162 | __le16 Reserved; /* MBZ */ |
157 | __le32 Capabilities; | 163 | __le32 Capabilities; |
158 | __u8 ClientGUID[16]; /* MBZ */ | 164 | __u8 ClientGUID[SMB2_CLIENT_GUID_SIZE]; |
159 | __le64 ClientStartTime; /* MBZ */ | 165 | __le64 ClientStartTime; /* MBZ */ |
160 | __le16 Dialects[2]; /* variable length */ | 166 | __le16 Dialects[1]; /* One dialect (vers=) at a time for now */ |
161 | } __packed; | 167 | } __packed; |
162 | 168 | ||
169 | /* Dialects */ | ||
170 | #define SMB20_PROT_ID 0x0202 | ||
171 | #define SMB21_PROT_ID 0x0210 | ||
172 | #define SMB30_PROT_ID 0x0300 | ||
173 | #define BAD_PROT_ID 0xFFFF | ||
174 | |||
163 | /* SecurityMode flags */ | 175 | /* SecurityMode flags */ |
164 | #define SMB2_NEGOTIATE_SIGNING_ENABLED 0x0001 | 176 | #define SMB2_NEGOTIATE_SIGNING_ENABLED 0x0001 |
165 | #define SMB2_NEGOTIATE_SIGNING_REQUIRED 0x0002 | 177 | #define SMB2_NEGOTIATE_SIGNING_REQUIRED 0x0002 |
@@ -167,6 +179,10 @@ struct smb2_negotiate_req { | |||
167 | #define SMB2_GLOBAL_CAP_DFS 0x00000001 | 179 | #define SMB2_GLOBAL_CAP_DFS 0x00000001 |
168 | #define SMB2_GLOBAL_CAP_LEASING 0x00000002 /* Resp only New to SMB2.1 */ | 180 | #define SMB2_GLOBAL_CAP_LEASING 0x00000002 /* Resp only New to SMB2.1 */ |
169 | #define SMB2_GLOBAL_CAP_LARGE_MTU 0X00000004 /* Resp only New to SMB2.1 */ | 181 | #define SMB2_GLOBAL_CAP_LARGE_MTU 0X00000004 /* Resp only New to SMB2.1 */ |
182 | #define SMB2_GLOBAL_CAP_MULTI_CHANNEL 0x00000008 /* New to SMB3 */ | ||
183 | #define SMB2_GLOBAL_CAP_PERSISTENT_HANDLES 0x00000010 /* New to SMB3 */ | ||
184 | #define SMB2_GLOBAL_CAP_DIRECTORY_LEASING 0x00000020 /* New to SMB3 */ | ||
185 | #define SMB2_GLOBAL_CAP_ENCRYPTION 0x00000040 /* New to SMB3 */ | ||
170 | /* Internal types */ | 186 | /* Internal types */ |
171 | #define SMB2_NT_FIND 0x00100000 | 187 | #define SMB2_NT_FIND 0x00100000 |
172 | #define SMB2_LARGE_FILES 0x00200000 | 188 | #define SMB2_LARGE_FILES 0x00200000 |
@@ -305,6 +321,8 @@ struct smb2_tree_disconnect_rsp { | |||
305 | #define SMB2_OPLOCK_LEVEL_EXCLUSIVE 0x08 | 321 | #define SMB2_OPLOCK_LEVEL_EXCLUSIVE 0x08 |
306 | #define SMB2_OPLOCK_LEVEL_BATCH 0x09 | 322 | #define SMB2_OPLOCK_LEVEL_BATCH 0x09 |
307 | #define SMB2_OPLOCK_LEVEL_LEASE 0xFF | 323 | #define SMB2_OPLOCK_LEVEL_LEASE 0xFF |
324 | /* Non-spec internal type */ | ||
325 | #define SMB2_OPLOCK_LEVEL_NOCHANGE 0x99 | ||
308 | 326 | ||
309 | /* Desired Access Flags */ | 327 | /* Desired Access Flags */ |
310 | #define FILE_READ_DATA_LE cpu_to_le32(0x00000001) | 328 | #define FILE_READ_DATA_LE cpu_to_le32(0x00000001) |
@@ -402,7 +420,7 @@ struct smb2_create_req { | |||
402 | __le16 NameLength; | 420 | __le16 NameLength; |
403 | __le32 CreateContextsOffset; | 421 | __le32 CreateContextsOffset; |
404 | __le32 CreateContextsLength; | 422 | __le32 CreateContextsLength; |
405 | __u8 Buffer[1]; | 423 | __u8 Buffer[8]; |
406 | } __packed; | 424 | } __packed; |
407 | 425 | ||
408 | struct smb2_create_rsp { | 426 | struct smb2_create_rsp { |
@@ -426,6 +444,39 @@ struct smb2_create_rsp { | |||
426 | __u8 Buffer[1]; | 444 | __u8 Buffer[1]; |
427 | } __packed; | 445 | } __packed; |
428 | 446 | ||
447 | struct create_context { | ||
448 | __le32 Next; | ||
449 | __le16 NameOffset; | ||
450 | __le16 NameLength; | ||
451 | __le16 Reserved; | ||
452 | __le16 DataOffset; | ||
453 | __le32 DataLength; | ||
454 | __u8 Buffer[0]; | ||
455 | } __packed; | ||
456 | |||
457 | #define SMB2_LEASE_NONE __constant_cpu_to_le32(0x00) | ||
458 | #define SMB2_LEASE_READ_CACHING __constant_cpu_to_le32(0x01) | ||
459 | #define SMB2_LEASE_HANDLE_CACHING __constant_cpu_to_le32(0x02) | ||
460 | #define SMB2_LEASE_WRITE_CACHING __constant_cpu_to_le32(0x04) | ||
461 | |||
462 | #define SMB2_LEASE_FLAG_BREAK_IN_PROGRESS __constant_cpu_to_le32(0x02) | ||
463 | |||
464 | #define SMB2_LEASE_KEY_SIZE 16 | ||
465 | |||
466 | struct lease_context { | ||
467 | __le64 LeaseKeyLow; | ||
468 | __le64 LeaseKeyHigh; | ||
469 | __le32 LeaseState; | ||
470 | __le32 LeaseFlags; | ||
471 | __le64 LeaseDuration; | ||
472 | } __packed; | ||
473 | |||
474 | struct create_lease { | ||
475 | struct create_context ccontext; | ||
476 | __u8 Name[8]; | ||
477 | struct lease_context lcontext; | ||
478 | } __packed; | ||
479 | |||
429 | /* Currently defined values for close flags */ | 480 | /* Currently defined values for close flags */ |
430 | #define SMB2_CLOSE_FLAG_POSTQUERY_ATTRIB cpu_to_le16(0x0001) | 481 | #define SMB2_CLOSE_FLAG_POSTQUERY_ATTRIB cpu_to_le16(0x0001) |
431 | struct smb2_close_req { | 482 | struct smb2_close_req { |
@@ -451,6 +502,108 @@ struct smb2_close_rsp { | |||
451 | __le32 Attributes; | 502 | __le32 Attributes; |
452 | } __packed; | 503 | } __packed; |
453 | 504 | ||
505 | struct smb2_flush_req { | ||
506 | struct smb2_hdr hdr; | ||
507 | __le16 StructureSize; /* Must be 24 */ | ||
508 | __le16 Reserved1; | ||
509 | __le32 Reserved2; | ||
510 | __u64 PersistentFileId; /* opaque endianness */ | ||
511 | __u64 VolatileFileId; /* opaque endianness */ | ||
512 | } __packed; | ||
513 | |||
514 | struct smb2_flush_rsp { | ||
515 | struct smb2_hdr hdr; | ||
516 | __le16 StructureSize; | ||
517 | __le16 Reserved; | ||
518 | } __packed; | ||
519 | |||
520 | struct smb2_read_req { | ||
521 | struct smb2_hdr hdr; | ||
522 | __le16 StructureSize; /* Must be 49 */ | ||
523 | __u8 Padding; /* offset from start of SMB2 header to place read */ | ||
524 | __u8 Reserved; | ||
525 | __le32 Length; | ||
526 | __le64 Offset; | ||
527 | __u64 PersistentFileId; /* opaque endianness */ | ||
528 | __u64 VolatileFileId; /* opaque endianness */ | ||
529 | __le32 MinimumCount; | ||
530 | __le32 Channel; /* Reserved MBZ */ | ||
531 | __le32 RemainingBytes; | ||
532 | __le16 ReadChannelInfoOffset; /* Reserved MBZ */ | ||
533 | __le16 ReadChannelInfoLength; /* Reserved MBZ */ | ||
534 | __u8 Buffer[1]; | ||
535 | } __packed; | ||
536 | |||
537 | struct smb2_read_rsp { | ||
538 | struct smb2_hdr hdr; | ||
539 | __le16 StructureSize; /* Must be 17 */ | ||
540 | __u8 DataOffset; | ||
541 | __u8 Reserved; | ||
542 | __le32 DataLength; | ||
543 | __le32 DataRemaining; | ||
544 | __u32 Reserved2; | ||
545 | __u8 Buffer[1]; | ||
546 | } __packed; | ||
547 | |||
548 | /* For write request Flags field below the following flag is defined: */ | ||
549 | #define SMB2_WRITEFLAG_WRITE_THROUGH 0x00000001 | ||
550 | |||
551 | struct smb2_write_req { | ||
552 | struct smb2_hdr hdr; | ||
553 | __le16 StructureSize; /* Must be 49 */ | ||
554 | __le16 DataOffset; /* offset from start of SMB2 header to write data */ | ||
555 | __le32 Length; | ||
556 | __le64 Offset; | ||
557 | __u64 PersistentFileId; /* opaque endianness */ | ||
558 | __u64 VolatileFileId; /* opaque endianness */ | ||
559 | __le32 Channel; /* Reserved MBZ */ | ||
560 | __le32 RemainingBytes; | ||
561 | __le16 WriteChannelInfoOffset; /* Reserved MBZ */ | ||
562 | __le16 WriteChannelInfoLength; /* Reserved MBZ */ | ||
563 | __le32 Flags; | ||
564 | __u8 Buffer[1]; | ||
565 | } __packed; | ||
566 | |||
567 | struct smb2_write_rsp { | ||
568 | struct smb2_hdr hdr; | ||
569 | __le16 StructureSize; /* Must be 17 */ | ||
570 | __u8 DataOffset; | ||
571 | __u8 Reserved; | ||
572 | __le32 DataLength; | ||
573 | __le32 DataRemaining; | ||
574 | __u32 Reserved2; | ||
575 | __u8 Buffer[1]; | ||
576 | } __packed; | ||
577 | |||
578 | #define SMB2_LOCKFLAG_SHARED_LOCK 0x0001 | ||
579 | #define SMB2_LOCKFLAG_EXCLUSIVE_LOCK 0x0002 | ||
580 | #define SMB2_LOCKFLAG_UNLOCK 0x0004 | ||
581 | #define SMB2_LOCKFLAG_FAIL_IMMEDIATELY 0x0010 | ||
582 | |||
583 | struct smb2_lock_element { | ||
584 | __le64 Offset; | ||
585 | __le64 Length; | ||
586 | __le32 Flags; | ||
587 | __le32 Reserved; | ||
588 | } __packed; | ||
589 | |||
590 | struct smb2_lock_req { | ||
591 | struct smb2_hdr hdr; | ||
592 | __le16 StructureSize; /* Must be 48 */ | ||
593 | __le16 LockCount; | ||
594 | __le32 Reserved; | ||
595 | __u64 PersistentFileId; /* opaque endianness */ | ||
596 | __u64 VolatileFileId; /* opaque endianness */ | ||
597 | /* Followed by at least one */ | ||
598 | struct smb2_lock_element locks[1]; | ||
599 | } __packed; | ||
600 | |||
601 | struct smb2_lock_rsp { | ||
602 | struct smb2_hdr hdr; | ||
603 | __le16 StructureSize; /* Must be 4 */ | ||
604 | __le16 Reserved; | ||
605 | } __packed; | ||
606 | |||
454 | struct smb2_echo_req { | 607 | struct smb2_echo_req { |
455 | struct smb2_hdr hdr; | 608 | struct smb2_hdr hdr; |
456 | __le16 StructureSize; /* Must be 4 */ | 609 | __le16 StructureSize; /* Must be 4 */ |
@@ -463,6 +616,34 @@ struct smb2_echo_rsp { | |||
463 | __u16 Reserved; | 616 | __u16 Reserved; |
464 | } __packed; | 617 | } __packed; |
465 | 618 | ||
619 | /* search (query_directory) Flags field */ | ||
620 | #define SMB2_RESTART_SCANS 0x01 | ||
621 | #define SMB2_RETURN_SINGLE_ENTRY 0x02 | ||
622 | #define SMB2_INDEX_SPECIFIED 0x04 | ||
623 | #define SMB2_REOPEN 0x10 | ||
624 | |||
625 | struct smb2_query_directory_req { | ||
626 | struct smb2_hdr hdr; | ||
627 | __le16 StructureSize; /* Must be 33 */ | ||
628 | __u8 FileInformationClass; | ||
629 | __u8 Flags; | ||
630 | __le32 FileIndex; | ||
631 | __u64 PersistentFileId; /* opaque endianness */ | ||
632 | __u64 VolatileFileId; /* opaque endianness */ | ||
633 | __le16 FileNameOffset; | ||
634 | __le16 FileNameLength; | ||
635 | __le32 OutputBufferLength; | ||
636 | __u8 Buffer[1]; | ||
637 | } __packed; | ||
638 | |||
639 | struct smb2_query_directory_rsp { | ||
640 | struct smb2_hdr hdr; | ||
641 | __le16 StructureSize; /* Must be 9 */ | ||
642 | __le16 OutputBufferOffset; | ||
643 | __le32 OutputBufferLength; | ||
644 | __u8 Buffer[1]; | ||
645 | } __packed; | ||
646 | |||
466 | /* Possible InfoType values */ | 647 | /* Possible InfoType values */ |
467 | #define SMB2_O_INFO_FILE 0x01 | 648 | #define SMB2_O_INFO_FILE 0x01 |
468 | #define SMB2_O_INFO_FILESYSTEM 0x02 | 649 | #define SMB2_O_INFO_FILESYSTEM 0x02 |
@@ -493,11 +674,84 @@ struct smb2_query_info_rsp { | |||
493 | __u8 Buffer[1]; | 674 | __u8 Buffer[1]; |
494 | } __packed; | 675 | } __packed; |
495 | 676 | ||
677 | struct smb2_set_info_req { | ||
678 | struct smb2_hdr hdr; | ||
679 | __le16 StructureSize; /* Must be 33 */ | ||
680 | __u8 InfoType; | ||
681 | __u8 FileInfoClass; | ||
682 | __le32 BufferLength; | ||
683 | __le16 BufferOffset; | ||
684 | __u16 Reserved; | ||
685 | __le32 AdditionalInformation; | ||
686 | __u64 PersistentFileId; /* opaque endianness */ | ||
687 | __u64 VolatileFileId; /* opaque endianness */ | ||
688 | __u8 Buffer[1]; | ||
689 | } __packed; | ||
690 | |||
691 | struct smb2_set_info_rsp { | ||
692 | struct smb2_hdr hdr; | ||
693 | __le16 StructureSize; /* Must be 2 */ | ||
694 | } __packed; | ||
695 | |||
696 | struct smb2_oplock_break { | ||
697 | struct smb2_hdr hdr; | ||
698 | __le16 StructureSize; /* Must be 24 */ | ||
699 | __u8 OplockLevel; | ||
700 | __u8 Reserved; | ||
701 | __le32 Reserved2; | ||
702 | __u64 PersistentFid; | ||
703 | __u64 VolatileFid; | ||
704 | } __packed; | ||
705 | |||
706 | #define SMB2_NOTIFY_BREAK_LEASE_FLAG_ACK_REQUIRED cpu_to_le32(0x01) | ||
707 | |||
708 | struct smb2_lease_break { | ||
709 | struct smb2_hdr hdr; | ||
710 | __le16 StructureSize; /* Must be 44 */ | ||
711 | __le16 Reserved; | ||
712 | __le32 Flags; | ||
713 | __u8 LeaseKey[16]; | ||
714 | __le32 CurrentLeaseState; | ||
715 | __le32 NewLeaseState; | ||
716 | __le32 BreakReason; | ||
717 | __le32 AccessMaskHint; | ||
718 | __le32 ShareMaskHint; | ||
719 | } __packed; | ||
720 | |||
721 | struct smb2_lease_ack { | ||
722 | struct smb2_hdr hdr; | ||
723 | __le16 StructureSize; /* Must be 36 */ | ||
724 | __le16 Reserved; | ||
725 | __le32 Flags; | ||
726 | __u8 LeaseKey[16]; | ||
727 | __le32 LeaseState; | ||
728 | __le64 LeaseDuration; | ||
729 | } __packed; | ||
730 | |||
496 | /* | 731 | /* |
497 | * PDU infolevel structure definitions | 732 | * PDU infolevel structure definitions |
498 | * BB consider moving to a different header | 733 | * BB consider moving to a different header |
499 | */ | 734 | */ |
500 | 735 | ||
736 | /* File System Information Classes */ | ||
737 | #define FS_VOLUME_INFORMATION 1 /* Query */ | ||
738 | #define FS_LABEL_INFORMATION 2 /* Set */ | ||
739 | #define FS_SIZE_INFORMATION 3 /* Query */ | ||
740 | #define FS_DEVICE_INFORMATION 4 /* Query */ | ||
741 | #define FS_ATTRIBUTE_INFORMATION 5 /* Query */ | ||
742 | #define FS_CONTROL_INFORMATION 6 /* Query, Set */ | ||
743 | #define FS_FULL_SIZE_INFORMATION 7 /* Query */ | ||
744 | #define FS_OBJECT_ID_INFORMATION 8 /* Query, Set */ | ||
745 | #define FS_DRIVER_PATH_INFORMATION 9 /* Query */ | ||
746 | |||
747 | struct smb2_fs_full_size_info { | ||
748 | __le64 TotalAllocationUnits; | ||
749 | __le64 CallerAvailableAllocationUnits; | ||
750 | __le64 ActualAvailableAllocationUnits; | ||
751 | __le32 SectorsPerAllocationUnit; | ||
752 | __le32 BytesPerSector; | ||
753 | } __packed; | ||
754 | |||
501 | /* partial list of QUERY INFO levels */ | 755 | /* partial list of QUERY INFO levels */ |
502 | #define FILE_DIRECTORY_INFORMATION 1 | 756 | #define FILE_DIRECTORY_INFORMATION 1 |
503 | #define FILE_FULL_DIRECTORY_INFORMATION 2 | 757 | #define FILE_FULL_DIRECTORY_INFORMATION 2 |
@@ -546,6 +800,28 @@ struct smb2_query_info_rsp { | |||
546 | #define FILEID_GLOBAL_TX_DIRECTORY_INFORMATION 50 | 800 | #define FILEID_GLOBAL_TX_DIRECTORY_INFORMATION 50 |
547 | #define FILE_STANDARD_LINK_INFORMATION 54 | 801 | #define FILE_STANDARD_LINK_INFORMATION 54 |
548 | 802 | ||
803 | struct smb2_file_internal_info { | ||
804 | __le64 IndexNumber; | ||
805 | } __packed; /* level 6 Query */ | ||
806 | |||
807 | struct smb2_file_rename_info { /* encoding of request for level 10 */ | ||
808 | __u8 ReplaceIfExists; /* 1 = replace existing target with new */ | ||
809 | /* 0 = fail if target already exists */ | ||
810 | __u8 Reserved[7]; | ||
811 | __u64 RootDirectory; /* MBZ for network operations (why says spec?) */ | ||
812 | __le32 FileNameLength; | ||
813 | char FileName[0]; /* New name to be assigned */ | ||
814 | } __packed; /* level 10 Set */ | ||
815 | |||
816 | struct smb2_file_link_info { /* encoding of request for level 11 */ | ||
817 | __u8 ReplaceIfExists; /* 1 = replace existing link with new */ | ||
818 | /* 0 = fail if link already exists */ | ||
819 | __u8 Reserved[7]; | ||
820 | __u64 RootDirectory; /* MBZ for network operations (why says spec?) */ | ||
821 | __le32 FileNameLength; | ||
822 | char FileName[0]; /* Name to be assigned to new link */ | ||
823 | } __packed; /* level 11 Set */ | ||
824 | |||
549 | /* | 825 | /* |
550 | * This level 18, although with struct with same name is different from cifs | 826 | * This level 18, although with struct with same name is different from cifs |
551 | * level 0x107. Level 0x107 has an extra u64 between AccessFlags and | 827 | * level 0x107. Level 0x107 has an extra u64 between AccessFlags and |
@@ -574,4 +850,8 @@ struct smb2_file_all_info { /* data block encoding of response to level 18 */ | |||
574 | char FileName[1]; | 850 | char FileName[1]; |
575 | } __packed; /* level 18 Query */ | 851 | } __packed; /* level 18 Query */ |
576 | 852 | ||
853 | struct smb2_file_eof_info { /* encoding of request for level 10 */ | ||
854 | __le64 EndOfFile; /* new end of file value */ | ||
855 | } __packed; /* level 20 Set */ | ||
856 | |||
577 | #endif /* _SMB2PDU_H */ | 857 | #endif /* _SMB2PDU_H */ |
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index bfaa7b148afd..7d25f8b14f93 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <linux/key-type.h> | 26 | #include <linux/key-type.h> |
27 | 27 | ||
28 | struct statfs; | 28 | struct statfs; |
29 | struct smb_rqst; | ||
29 | 30 | ||
30 | /* | 31 | /* |
31 | ***************************************************************** | 32 | ***************************************************************** |
@@ -34,24 +35,35 @@ struct statfs; | |||
34 | */ | 35 | */ |
35 | extern int map_smb2_to_linux_error(char *buf, bool log_err); | 36 | extern int map_smb2_to_linux_error(char *buf, bool log_err); |
36 | extern int smb2_check_message(char *buf, unsigned int length); | 37 | extern int smb2_check_message(char *buf, unsigned int length); |
37 | extern unsigned int smb2_calc_size(struct smb2_hdr *hdr); | 38 | extern unsigned int smb2_calc_size(void *buf); |
38 | extern char *smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr); | 39 | extern char *smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr); |
39 | extern __le16 *cifs_convert_path_to_utf16(const char *from, | 40 | extern __le16 *cifs_convert_path_to_utf16(const char *from, |
40 | struct cifs_sb_info *cifs_sb); | 41 | struct cifs_sb_info *cifs_sb); |
41 | 42 | ||
43 | extern int smb2_verify_signature(struct smb_rqst *, struct TCP_Server_Info *); | ||
42 | extern int smb2_check_receive(struct mid_q_entry *mid, | 44 | extern int smb2_check_receive(struct mid_q_entry *mid, |
43 | struct TCP_Server_Info *server, bool log_error); | 45 | struct TCP_Server_Info *server, bool log_error); |
44 | extern int smb2_setup_request(struct cifs_ses *ses, struct kvec *iov, | 46 | extern struct mid_q_entry *smb2_setup_request(struct cifs_ses *ses, |
45 | unsigned int nvec, struct mid_q_entry **ret_mid); | 47 | struct smb_rqst *rqst); |
46 | extern int smb2_setup_async_request(struct TCP_Server_Info *server, | 48 | extern struct mid_q_entry *smb2_setup_async_request( |
47 | struct kvec *iov, unsigned int nvec, | 49 | struct TCP_Server_Info *server, struct smb_rqst *rqst); |
48 | struct mid_q_entry **ret_mid); | ||
49 | extern void smb2_echo_request(struct work_struct *work); | 50 | extern void smb2_echo_request(struct work_struct *work); |
51 | extern __le32 smb2_get_lease_state(struct cifsInodeInfo *cinode); | ||
52 | extern __u8 smb2_map_lease_to_oplock(__le32 lease_state); | ||
53 | extern bool smb2_is_valid_oplock_break(char *buffer, | ||
54 | struct TCP_Server_Info *srv); | ||
50 | 55 | ||
56 | extern void move_smb2_info_to_cifs(FILE_ALL_INFO *dst, | ||
57 | struct smb2_file_all_info *src); | ||
51 | extern int smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, | 58 | extern int smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, |
52 | struct cifs_sb_info *cifs_sb, | 59 | struct cifs_sb_info *cifs_sb, |
53 | const char *full_path, FILE_ALL_INFO *data, | 60 | const char *full_path, FILE_ALL_INFO *data, |
54 | bool *adjust_tz); | 61 | bool *adjust_tz); |
62 | extern int smb2_set_path_size(const unsigned int xid, struct cifs_tcon *tcon, | ||
63 | const char *full_path, __u64 size, | ||
64 | struct cifs_sb_info *cifs_sb, bool set_alloc); | ||
65 | extern int smb2_set_file_info(struct inode *inode, const char *full_path, | ||
66 | FILE_BASIC_INFO *buf, const unsigned int xid); | ||
55 | extern int smb2_mkdir(const unsigned int xid, struct cifs_tcon *tcon, | 67 | extern int smb2_mkdir(const unsigned int xid, struct cifs_tcon *tcon, |
56 | const char *name, struct cifs_sb_info *cifs_sb); | 68 | const char *name, struct cifs_sb_info *cifs_sb); |
57 | extern void smb2_mkdir_setinfo(struct inode *inode, const char *full_path, | 69 | extern void smb2_mkdir_setinfo(struct inode *inode, const char *full_path, |
@@ -59,6 +71,24 @@ extern void smb2_mkdir_setinfo(struct inode *inode, const char *full_path, | |||
59 | struct cifs_tcon *tcon, const unsigned int xid); | 71 | struct cifs_tcon *tcon, const unsigned int xid); |
60 | extern int smb2_rmdir(const unsigned int xid, struct cifs_tcon *tcon, | 72 | extern int smb2_rmdir(const unsigned int xid, struct cifs_tcon *tcon, |
61 | const char *name, struct cifs_sb_info *cifs_sb); | 73 | const char *name, struct cifs_sb_info *cifs_sb); |
74 | extern int smb2_unlink(const unsigned int xid, struct cifs_tcon *tcon, | ||
75 | const char *name, struct cifs_sb_info *cifs_sb); | ||
76 | extern int smb2_rename_path(const unsigned int xid, struct cifs_tcon *tcon, | ||
77 | const char *from_name, const char *to_name, | ||
78 | struct cifs_sb_info *cifs_sb); | ||
79 | extern int smb2_create_hardlink(const unsigned int xid, struct cifs_tcon *tcon, | ||
80 | const char *from_name, const char *to_name, | ||
81 | struct cifs_sb_info *cifs_sb); | ||
82 | |||
83 | extern int smb2_open_file(const unsigned int xid, struct cifs_tcon *tcon, | ||
84 | const char *full_path, int disposition, | ||
85 | int desired_access, int create_options, | ||
86 | struct cifs_fid *fid, __u32 *oplock, | ||
87 | FILE_ALL_INFO *buf, struct cifs_sb_info *cifs_sb); | ||
88 | extern void smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock); | ||
89 | extern int smb2_unlock_range(struct cifsFileInfo *cfile, | ||
90 | struct file_lock *flock, const unsigned int xid); | ||
91 | extern int smb2_push_mandatory_locks(struct cifsFileInfo *cfile); | ||
62 | 92 | ||
63 | /* | 93 | /* |
64 | * SMB2 Worker functions - most of protocol specific implementation details | 94 | * SMB2 Worker functions - most of protocol specific implementation details |
@@ -75,12 +105,55 @@ extern int SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon); | |||
75 | extern int SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, | 105 | extern int SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, |
76 | __le16 *path, u64 *persistent_fid, u64 *volatile_fid, | 106 | __le16 *path, u64 *persistent_fid, u64 *volatile_fid, |
77 | __u32 desired_access, __u32 create_disposition, | 107 | __u32 desired_access, __u32 create_disposition, |
78 | __u32 file_attributes, __u32 create_options); | 108 | __u32 file_attributes, __u32 create_options, |
109 | __u8 *oplock, struct smb2_file_all_info *buf); | ||
79 | extern int SMB2_close(const unsigned int xid, struct cifs_tcon *tcon, | 110 | extern int SMB2_close(const unsigned int xid, struct cifs_tcon *tcon, |
80 | u64 persistent_file_id, u64 volatile_file_id); | 111 | u64 persistent_file_id, u64 volatile_file_id); |
112 | extern int SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, | ||
113 | u64 persistent_file_id, u64 volatile_file_id); | ||
81 | extern int SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon, | 114 | extern int SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon, |
82 | u64 persistent_file_id, u64 volatile_file_id, | 115 | u64 persistent_file_id, u64 volatile_file_id, |
83 | struct smb2_file_all_info *data); | 116 | struct smb2_file_all_info *data); |
117 | extern int SMB2_get_srv_num(const unsigned int xid, struct cifs_tcon *tcon, | ||
118 | u64 persistent_fid, u64 volatile_fid, | ||
119 | __le64 *uniqueid); | ||
120 | extern int smb2_async_readv(struct cifs_readdata *rdata); | ||
121 | extern int SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms, | ||
122 | unsigned int *nbytes, char **buf, int *buf_type); | ||
123 | extern int smb2_async_writev(struct cifs_writedata *wdata); | ||
124 | extern int SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms, | ||
125 | unsigned int *nbytes, struct kvec *iov, int n_vec); | ||
84 | extern int SMB2_echo(struct TCP_Server_Info *server); | 126 | extern int SMB2_echo(struct TCP_Server_Info *server); |
127 | extern int SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon, | ||
128 | u64 persistent_fid, u64 volatile_fid, int index, | ||
129 | struct cifs_search_info *srch_inf); | ||
130 | extern int SMB2_rename(const unsigned int xid, struct cifs_tcon *tcon, | ||
131 | u64 persistent_fid, u64 volatile_fid, | ||
132 | __le16 *target_file); | ||
133 | extern int SMB2_set_hardlink(const unsigned int xid, struct cifs_tcon *tcon, | ||
134 | u64 persistent_fid, u64 volatile_fid, | ||
135 | __le16 *target_file); | ||
136 | extern int SMB2_set_eof(const unsigned int xid, struct cifs_tcon *tcon, | ||
137 | u64 persistent_fid, u64 volatile_fid, u32 pid, | ||
138 | __le64 *eof); | ||
139 | extern int SMB2_set_info(const unsigned int xid, struct cifs_tcon *tcon, | ||
140 | u64 persistent_fid, u64 volatile_fid, | ||
141 | FILE_BASIC_INFO *buf); | ||
142 | extern int SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon, | ||
143 | const u64 persistent_fid, const u64 volatile_fid, | ||
144 | const __u8 oplock_level); | ||
145 | extern int SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon, | ||
146 | u64 persistent_file_id, u64 volatile_file_id, | ||
147 | struct kstatfs *FSData); | ||
148 | extern int SMB2_lock(const unsigned int xid, struct cifs_tcon *tcon, | ||
149 | const __u64 persist_fid, const __u64 volatile_fid, | ||
150 | const __u32 pid, const __u64 length, const __u64 offset, | ||
151 | const __u32 lockFlags, const bool wait); | ||
152 | extern int smb2_lockv(const unsigned int xid, struct cifs_tcon *tcon, | ||
153 | const __u64 persist_fid, const __u64 volatile_fid, | ||
154 | const __u32 pid, const __u32 num_lock, | ||
155 | struct smb2_lock_element *buf); | ||
156 | extern int SMB2_lease_break(const unsigned int xid, struct cifs_tcon *tcon, | ||
157 | __u8 *lease_key, const __le32 lease_state); | ||
85 | 158 | ||
86 | #endif /* _SMB2PROTO_H */ | 159 | #endif /* _SMB2PROTO_H */ |
diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index 31f5d420b3ea..2a5fdf26f79f 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c | |||
@@ -30,12 +30,156 @@ | |||
30 | #include <linux/uaccess.h> | 30 | #include <linux/uaccess.h> |
31 | #include <asm/processor.h> | 31 | #include <asm/processor.h> |
32 | #include <linux/mempool.h> | 32 | #include <linux/mempool.h> |
33 | #include <linux/highmem.h> | ||
33 | #include "smb2pdu.h" | 34 | #include "smb2pdu.h" |
34 | #include "cifsglob.h" | 35 | #include "cifsglob.h" |
35 | #include "cifsproto.h" | 36 | #include "cifsproto.h" |
36 | #include "smb2proto.h" | 37 | #include "smb2proto.h" |
37 | #include "cifs_debug.h" | 38 | #include "cifs_debug.h" |
38 | #include "smb2status.h" | 39 | #include "smb2status.h" |
40 | #include "smb2glob.h" | ||
41 | |||
42 | static int | ||
43 | smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) | ||
44 | { | ||
45 | int i, rc; | ||
46 | unsigned char smb2_signature[SMB2_HMACSHA256_SIZE]; | ||
47 | unsigned char *sigptr = smb2_signature; | ||
48 | struct kvec *iov = rqst->rq_iov; | ||
49 | int n_vec = rqst->rq_nvec; | ||
50 | struct smb2_hdr *smb2_pdu = (struct smb2_hdr *)iov[0].iov_base; | ||
51 | |||
52 | memset(smb2_signature, 0x0, SMB2_HMACSHA256_SIZE); | ||
53 | memset(smb2_pdu->Signature, 0x0, SMB2_SIGNATURE_SIZE); | ||
54 | |||
55 | rc = crypto_shash_setkey(server->secmech.hmacsha256, | ||
56 | server->session_key.response, SMB2_NTLMV2_SESSKEY_SIZE); | ||
57 | if (rc) { | ||
58 | cERROR(1, "%s: Could not update with response\n", __func__); | ||
59 | return rc; | ||
60 | } | ||
61 | |||
62 | rc = crypto_shash_init(&server->secmech.sdeschmacsha256->shash); | ||
63 | if (rc) { | ||
64 | cERROR(1, "%s: Could not init md5\n", __func__); | ||
65 | return rc; | ||
66 | } | ||
67 | |||
68 | for (i = 0; i < n_vec; i++) { | ||
69 | if (iov[i].iov_len == 0) | ||
70 | continue; | ||
71 | if (iov[i].iov_base == NULL) { | ||
72 | cERROR(1, "null iovec entry"); | ||
73 | return -EIO; | ||
74 | } | ||
75 | /* | ||
76 | * The first entry includes a length field (which does not get | ||
77 | * signed that occupies the first 4 bytes before the header). | ||
78 | */ | ||
79 | if (i == 0) { | ||
80 | if (iov[0].iov_len <= 8) /* cmd field at offset 9 */ | ||
81 | break; /* nothing to sign or corrupt header */ | ||
82 | rc = | ||
83 | crypto_shash_update( | ||
84 | &server->secmech.sdeschmacsha256->shash, | ||
85 | iov[i].iov_base + 4, iov[i].iov_len - 4); | ||
86 | } else { | ||
87 | rc = | ||
88 | crypto_shash_update( | ||
89 | &server->secmech.sdeschmacsha256->shash, | ||
90 | iov[i].iov_base, iov[i].iov_len); | ||
91 | } | ||
92 | if (rc) { | ||
93 | cERROR(1, "%s: Could not update with payload\n", | ||
94 | __func__); | ||
95 | return rc; | ||
96 | } | ||
97 | } | ||
98 | |||
99 | /* now hash over the rq_pages array */ | ||
100 | for (i = 0; i < rqst->rq_npages; i++) { | ||
101 | struct kvec p_iov; | ||
102 | |||
103 | cifs_rqst_page_to_kvec(rqst, i, &p_iov); | ||
104 | crypto_shash_update(&server->secmech.sdeschmacsha256->shash, | ||
105 | p_iov.iov_base, p_iov.iov_len); | ||
106 | kunmap(rqst->rq_pages[i]); | ||
107 | } | ||
108 | |||
109 | rc = crypto_shash_final(&server->secmech.sdeschmacsha256->shash, | ||
110 | sigptr); | ||
111 | if (rc) | ||
112 | cERROR(1, "%s: Could not generate sha256 hash\n", __func__); | ||
113 | |||
114 | memcpy(smb2_pdu->Signature, sigptr, SMB2_SIGNATURE_SIZE); | ||
115 | |||
116 | return rc; | ||
117 | } | ||
118 | |||
119 | /* must be called with server->srv_mutex held */ | ||
120 | static int | ||
121 | smb2_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server) | ||
122 | { | ||
123 | int rc = 0; | ||
124 | struct smb2_hdr *smb2_pdu = rqst->rq_iov[0].iov_base; | ||
125 | |||
126 | if (!(smb2_pdu->Flags & SMB2_FLAGS_SIGNED) || | ||
127 | server->tcpStatus == CifsNeedNegotiate) | ||
128 | return rc; | ||
129 | |||
130 | if (!server->session_estab) { | ||
131 | strncpy(smb2_pdu->Signature, "BSRSPYL", 8); | ||
132 | return rc; | ||
133 | } | ||
134 | |||
135 | rc = smb2_calc_signature(rqst, server); | ||
136 | |||
137 | return rc; | ||
138 | } | ||
139 | |||
140 | int | ||
141 | smb2_verify_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) | ||
142 | { | ||
143 | unsigned int rc; | ||
144 | char server_response_sig[16]; | ||
145 | struct smb2_hdr *smb2_pdu = (struct smb2_hdr *)rqst->rq_iov[0].iov_base; | ||
146 | |||
147 | if ((smb2_pdu->Command == SMB2_NEGOTIATE) || | ||
148 | (smb2_pdu->Command == SMB2_OPLOCK_BREAK) || | ||
149 | (!server->session_estab)) | ||
150 | return 0; | ||
151 | |||
152 | /* | ||
153 | * BB what if signatures are supposed to be on for session but | ||
154 | * server does not send one? BB | ||
155 | */ | ||
156 | |||
157 | /* Do not need to verify session setups with signature "BSRSPYL " */ | ||
158 | if (memcmp(smb2_pdu->Signature, "BSRSPYL ", 8) == 0) | ||
159 | cFYI(1, "dummy signature received for smb command 0x%x", | ||
160 | smb2_pdu->Command); | ||
161 | |||
162 | /* | ||
163 | * Save off the origiginal signature so we can modify the smb and check | ||
164 | * our calculated signature against what the server sent. | ||
165 | */ | ||
166 | memcpy(server_response_sig, smb2_pdu->Signature, SMB2_SIGNATURE_SIZE); | ||
167 | |||
168 | memset(smb2_pdu->Signature, 0, SMB2_SIGNATURE_SIZE); | ||
169 | |||
170 | mutex_lock(&server->srv_mutex); | ||
171 | rc = smb2_calc_signature(rqst, server); | ||
172 | mutex_unlock(&server->srv_mutex); | ||
173 | |||
174 | if (rc) | ||
175 | return rc; | ||
176 | |||
177 | if (memcmp(server_response_sig, smb2_pdu->Signature, | ||
178 | SMB2_SIGNATURE_SIZE)) | ||
179 | return -EACCES; | ||
180 | else | ||
181 | return 0; | ||
182 | } | ||
39 | 183 | ||
40 | /* | 184 | /* |
41 | * Set message id for the request. Should be called after wait_for_free_request | 185 | * Set message id for the request. Should be called after wait_for_free_request |
@@ -115,58 +259,66 @@ smb2_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server, | |||
115 | bool log_error) | 259 | bool log_error) |
116 | { | 260 | { |
117 | unsigned int len = get_rfc1002_length(mid->resp_buf); | 261 | unsigned int len = get_rfc1002_length(mid->resp_buf); |
262 | struct kvec iov; | ||
263 | struct smb_rqst rqst = { .rq_iov = &iov, | ||
264 | .rq_nvec = 1 }; | ||
265 | |||
266 | iov.iov_base = (char *)mid->resp_buf; | ||
267 | iov.iov_len = get_rfc1002_length(mid->resp_buf) + 4; | ||
118 | 268 | ||
119 | dump_smb(mid->resp_buf, min_t(u32, 80, len)); | 269 | dump_smb(mid->resp_buf, min_t(u32, 80, len)); |
120 | /* convert the length into a more usable form */ | 270 | /* convert the length into a more usable form */ |
121 | /* BB - uncomment with SMB2 signing implementation */ | 271 | if ((len > 24) && |
122 | /* if ((len > 24) && | ||
123 | (server->sec_mode & (SECMODE_SIGN_REQUIRED|SECMODE_SIGN_ENABLED))) { | 272 | (server->sec_mode & (SECMODE_SIGN_REQUIRED|SECMODE_SIGN_ENABLED))) { |
124 | if (smb2_verify_signature(mid->resp_buf, server)) | 273 | int rc; |
125 | cERROR(1, "Unexpected SMB signature"); | 274 | |
126 | } */ | 275 | rc = smb2_verify_signature(&rqst, server); |
276 | if (rc) | ||
277 | cERROR(1, "SMB signature verification returned error = " | ||
278 | "%d", rc); | ||
279 | } | ||
127 | 280 | ||
128 | return map_smb2_to_linux_error(mid->resp_buf, log_error); | 281 | return map_smb2_to_linux_error(mid->resp_buf, log_error); |
129 | } | 282 | } |
130 | 283 | ||
131 | int | 284 | struct mid_q_entry * |
132 | smb2_setup_request(struct cifs_ses *ses, struct kvec *iov, | 285 | smb2_setup_request(struct cifs_ses *ses, struct smb_rqst *rqst) |
133 | unsigned int nvec, struct mid_q_entry **ret_mid) | ||
134 | { | 286 | { |
135 | int rc; | 287 | int rc; |
136 | struct smb2_hdr *hdr = (struct smb2_hdr *)iov[0].iov_base; | 288 | struct smb2_hdr *hdr = (struct smb2_hdr *)rqst->rq_iov[0].iov_base; |
137 | struct mid_q_entry *mid; | 289 | struct mid_q_entry *mid; |
138 | 290 | ||
139 | smb2_seq_num_into_buf(ses->server, hdr); | 291 | smb2_seq_num_into_buf(ses->server, hdr); |
140 | 292 | ||
141 | rc = smb2_get_mid_entry(ses, hdr, &mid); | 293 | rc = smb2_get_mid_entry(ses, hdr, &mid); |
142 | if (rc) | 294 | if (rc) |
143 | return rc; | 295 | return ERR_PTR(rc); |
144 | /* rc = smb2_sign_smb2(iov, nvec, ses->server); | 296 | rc = smb2_sign_rqst(rqst, ses->server); |
145 | if (rc) | 297 | if (rc) { |
146 | delete_mid(mid); */ | 298 | cifs_delete_mid(mid); |
147 | *ret_mid = mid; | 299 | return ERR_PTR(rc); |
148 | return rc; | 300 | } |
301 | return mid; | ||
149 | } | 302 | } |
150 | 303 | ||
151 | int | 304 | struct mid_q_entry * |
152 | smb2_setup_async_request(struct TCP_Server_Info *server, struct kvec *iov, | 305 | smb2_setup_async_request(struct TCP_Server_Info *server, struct smb_rqst *rqst) |
153 | unsigned int nvec, struct mid_q_entry **ret_mid) | ||
154 | { | 306 | { |
155 | int rc = 0; | 307 | int rc; |
156 | struct smb2_hdr *hdr = (struct smb2_hdr *)iov[0].iov_base; | 308 | struct smb2_hdr *hdr = (struct smb2_hdr *)rqst->rq_iov[0].iov_base; |
157 | struct mid_q_entry *mid; | 309 | struct mid_q_entry *mid; |
158 | 310 | ||
159 | smb2_seq_num_into_buf(server, hdr); | 311 | smb2_seq_num_into_buf(server, hdr); |
160 | 312 | ||
161 | mid = smb2_mid_entry_alloc(hdr, server); | 313 | mid = smb2_mid_entry_alloc(hdr, server); |
162 | if (mid == NULL) | 314 | if (mid == NULL) |
163 | return -ENOMEM; | 315 | return ERR_PTR(-ENOMEM); |
164 | 316 | ||
165 | /* rc = smb2_sign_smb2(iov, nvec, server); | 317 | rc = smb2_sign_rqst(rqst, server); |
166 | if (rc) { | 318 | if (rc) { |
167 | DeleteMidQEntry(mid); | 319 | DeleteMidQEntry(mid); |
168 | return rc; | 320 | return ERR_PTR(rc); |
169 | }*/ | 321 | } |
170 | *ret_mid = mid; | 322 | |
171 | return rc; | 323 | return mid; |
172 | } | 324 | } |
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 83867ef348df..2126ab185045 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c | |||
@@ -27,6 +27,8 @@ | |||
27 | #include <linux/net.h> | 27 | #include <linux/net.h> |
28 | #include <linux/delay.h> | 28 | #include <linux/delay.h> |
29 | #include <linux/freezer.h> | 29 | #include <linux/freezer.h> |
30 | #include <linux/tcp.h> | ||
31 | #include <linux/highmem.h> | ||
30 | #include <asm/uaccess.h> | 32 | #include <asm/uaccess.h> |
31 | #include <asm/processor.h> | 33 | #include <asm/processor.h> |
32 | #include <linux/mempool.h> | 34 | #include <linux/mempool.h> |
@@ -109,8 +111,8 @@ DeleteMidQEntry(struct mid_q_entry *midEntry) | |||
109 | mempool_free(midEntry, cifs_mid_poolp); | 111 | mempool_free(midEntry, cifs_mid_poolp); |
110 | } | 112 | } |
111 | 113 | ||
112 | static void | 114 | void |
113 | delete_mid(struct mid_q_entry *mid) | 115 | cifs_delete_mid(struct mid_q_entry *mid) |
114 | { | 116 | { |
115 | spin_lock(&GlobalMid_Lock); | 117 | spin_lock(&GlobalMid_Lock); |
116 | list_del(&mid->qhead); | 118 | list_del(&mid->qhead); |
@@ -119,18 +121,29 @@ delete_mid(struct mid_q_entry *mid) | |||
119 | DeleteMidQEntry(mid); | 121 | DeleteMidQEntry(mid); |
120 | } | 122 | } |
121 | 123 | ||
124 | /* | ||
125 | * smb_send_kvec - send an array of kvecs to the server | ||
126 | * @server: Server to send the data to | ||
127 | * @iov: Pointer to array of kvecs | ||
128 | * @n_vec: length of kvec array | ||
129 | * @sent: amount of data sent on socket is stored here | ||
130 | * | ||
131 | * Our basic "send data to server" function. Should be called with srv_mutex | ||
132 | * held. The caller is responsible for handling the results. | ||
133 | */ | ||
122 | static int | 134 | static int |
123 | smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec) | 135 | smb_send_kvec(struct TCP_Server_Info *server, struct kvec *iov, size_t n_vec, |
136 | size_t *sent) | ||
124 | { | 137 | { |
125 | int rc = 0; | 138 | int rc = 0; |
126 | int i = 0; | 139 | int i = 0; |
127 | struct msghdr smb_msg; | 140 | struct msghdr smb_msg; |
128 | unsigned int len = iov[0].iov_len; | 141 | unsigned int remaining; |
129 | unsigned int total_len; | 142 | size_t first_vec = 0; |
130 | int first_vec = 0; | ||
131 | unsigned int smb_buf_length = get_rfc1002_length(iov[0].iov_base); | ||
132 | struct socket *ssocket = server->ssocket; | 143 | struct socket *ssocket = server->ssocket; |
133 | 144 | ||
145 | *sent = 0; | ||
146 | |||
134 | if (ssocket == NULL) | 147 | if (ssocket == NULL) |
135 | return -ENOTSOCK; /* BB eventually add reconnect code here */ | 148 | return -ENOTSOCK; /* BB eventually add reconnect code here */ |
136 | 149 | ||
@@ -143,56 +156,60 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec) | |||
143 | else | 156 | else |
144 | smb_msg.msg_flags = MSG_NOSIGNAL; | 157 | smb_msg.msg_flags = MSG_NOSIGNAL; |
145 | 158 | ||
146 | total_len = 0; | 159 | remaining = 0; |
147 | for (i = 0; i < n_vec; i++) | 160 | for (i = 0; i < n_vec; i++) |
148 | total_len += iov[i].iov_len; | 161 | remaining += iov[i].iov_len; |
149 | |||
150 | cFYI(1, "Sending smb: total_len %d", total_len); | ||
151 | dump_smb(iov[0].iov_base, len); | ||
152 | 162 | ||
153 | i = 0; | 163 | i = 0; |
154 | while (total_len) { | 164 | while (remaining) { |
165 | /* | ||
166 | * If blocking send, we try 3 times, since each can block | ||
167 | * for 5 seconds. For nonblocking we have to try more | ||
168 | * but wait increasing amounts of time allowing time for | ||
169 | * socket to clear. The overall time we wait in either | ||
170 | * case to send on the socket is about 15 seconds. | ||
171 | * Similarly we wait for 15 seconds for a response from | ||
172 | * the server in SendReceive[2] for the server to send | ||
173 | * a response back for most types of requests (except | ||
174 | * SMB Write past end of file which can be slow, and | ||
175 | * blocking lock operations). NFS waits slightly longer | ||
176 | * than CIFS, but this can make it take longer for | ||
177 | * nonresponsive servers to be detected and 15 seconds | ||
178 | * is more than enough time for modern networks to | ||
179 | * send a packet. In most cases if we fail to send | ||
180 | * after the retries we will kill the socket and | ||
181 | * reconnect which may clear the network problem. | ||
182 | */ | ||
155 | rc = kernel_sendmsg(ssocket, &smb_msg, &iov[first_vec], | 183 | rc = kernel_sendmsg(ssocket, &smb_msg, &iov[first_vec], |
156 | n_vec - first_vec, total_len); | 184 | n_vec - first_vec, remaining); |
157 | if ((rc == -ENOSPC) || (rc == -EAGAIN)) { | 185 | if (rc == -ENOSPC || rc == -EAGAIN) { |
158 | i++; | 186 | i++; |
159 | /* | 187 | if (i >= 14 || (!server->noblocksnd && (i > 2))) { |
160 | * If blocking send we try 3 times, since each can block | 188 | cERROR(1, "sends on sock %p stuck for 15 " |
161 | * for 5 seconds. For nonblocking we have to try more | 189 | "seconds", ssocket); |
162 | * but wait increasing amounts of time allowing time for | ||
163 | * socket to clear. The overall time we wait in either | ||
164 | * case to send on the socket is about 15 seconds. | ||
165 | * Similarly we wait for 15 seconds for a response from | ||
166 | * the server in SendReceive[2] for the server to send | ||
167 | * a response back for most types of requests (except | ||
168 | * SMB Write past end of file which can be slow, and | ||
169 | * blocking lock operations). NFS waits slightly longer | ||
170 | * than CIFS, but this can make it take longer for | ||
171 | * nonresponsive servers to be detected and 15 seconds | ||
172 | * is more than enough time for modern networks to | ||
173 | * send a packet. In most cases if we fail to send | ||
174 | * after the retries we will kill the socket and | ||
175 | * reconnect which may clear the network problem. | ||
176 | */ | ||
177 | if ((i >= 14) || (!server->noblocksnd && (i > 2))) { | ||
178 | cERROR(1, "sends on sock %p stuck for 15 seconds", | ||
179 | ssocket); | ||
180 | rc = -EAGAIN; | 190 | rc = -EAGAIN; |
181 | break; | 191 | break; |
182 | } | 192 | } |
183 | msleep(1 << i); | 193 | msleep(1 << i); |
184 | continue; | 194 | continue; |
185 | } | 195 | } |
196 | |||
186 | if (rc < 0) | 197 | if (rc < 0) |
187 | break; | 198 | break; |
188 | 199 | ||
189 | if (rc == total_len) { | 200 | /* send was at least partially successful */ |
190 | total_len = 0; | 201 | *sent += rc; |
202 | |||
203 | if (rc == remaining) { | ||
204 | remaining = 0; | ||
191 | break; | 205 | break; |
192 | } else if (rc > total_len) { | 206 | } |
193 | cERROR(1, "sent %d requested %d", rc, total_len); | 207 | |
208 | if (rc > remaining) { | ||
209 | cERROR(1, "sent %d requested %d", rc, remaining); | ||
194 | break; | 210 | break; |
195 | } | 211 | } |
212 | |||
196 | if (rc == 0) { | 213 | if (rc == 0) { |
197 | /* should never happen, letting socket clear before | 214 | /* should never happen, letting socket clear before |
198 | retrying is our only obvious option here */ | 215 | retrying is our only obvious option here */ |
@@ -200,7 +217,9 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec) | |||
200 | msleep(500); | 217 | msleep(500); |
201 | continue; | 218 | continue; |
202 | } | 219 | } |
203 | total_len -= rc; | 220 | |
221 | remaining -= rc; | ||
222 | |||
204 | /* the line below resets i */ | 223 | /* the line below resets i */ |
205 | for (i = first_vec; i < n_vec; i++) { | 224 | for (i = first_vec; i < n_vec; i++) { |
206 | if (iov[i].iov_len) { | 225 | if (iov[i].iov_len) { |
@@ -215,16 +234,97 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec) | |||
215 | } | 234 | } |
216 | } | 235 | } |
217 | } | 236 | } |
237 | |||
218 | i = 0; /* in case we get ENOSPC on the next send */ | 238 | i = 0; /* in case we get ENOSPC on the next send */ |
239 | rc = 0; | ||
219 | } | 240 | } |
241 | return rc; | ||
242 | } | ||
243 | |||
244 | /** | ||
245 | * rqst_page_to_kvec - Turn a slot in the smb_rqst page array into a kvec | ||
246 | * @rqst: pointer to smb_rqst | ||
247 | * @idx: index into the array of the page | ||
248 | * @iov: pointer to struct kvec that will hold the result | ||
249 | * | ||
250 | * Helper function to convert a slot in the rqst->rq_pages array into a kvec. | ||
251 | * The page will be kmapped and the address placed into iov_base. The length | ||
252 | * will then be adjusted according to the ptailoff. | ||
253 | */ | ||
254 | void | ||
255 | cifs_rqst_page_to_kvec(struct smb_rqst *rqst, unsigned int idx, | ||
256 | struct kvec *iov) | ||
257 | { | ||
258 | /* | ||
259 | * FIXME: We could avoid this kmap altogether if we used | ||
260 | * kernel_sendpage instead of kernel_sendmsg. That will only | ||
261 | * work if signing is disabled though as sendpage inlines the | ||
262 | * page directly into the fraglist. If userspace modifies the | ||
263 | * page after we calculate the signature, then the server will | ||
264 | * reject it and may break the connection. kernel_sendmsg does | ||
265 | * an extra copy of the data and avoids that issue. | ||
266 | */ | ||
267 | iov->iov_base = kmap(rqst->rq_pages[idx]); | ||
268 | |||
269 | /* if last page, don't send beyond this offset into page */ | ||
270 | if (idx == (rqst->rq_npages - 1)) | ||
271 | iov->iov_len = rqst->rq_tailsz; | ||
272 | else | ||
273 | iov->iov_len = rqst->rq_pagesz; | ||
274 | } | ||
275 | |||
276 | static int | ||
277 | smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst) | ||
278 | { | ||
279 | int rc; | ||
280 | struct kvec *iov = rqst->rq_iov; | ||
281 | int n_vec = rqst->rq_nvec; | ||
282 | unsigned int smb_buf_length = get_rfc1002_length(iov[0].iov_base); | ||
283 | unsigned int i; | ||
284 | size_t total_len = 0, sent; | ||
285 | struct socket *ssocket = server->ssocket; | ||
286 | int val = 1; | ||
287 | |||
288 | cFYI(1, "Sending smb: smb_len=%u", smb_buf_length); | ||
289 | dump_smb(iov[0].iov_base, iov[0].iov_len); | ||
290 | |||
291 | /* cork the socket */ | ||
292 | kernel_setsockopt(ssocket, SOL_TCP, TCP_CORK, | ||
293 | (char *)&val, sizeof(val)); | ||
294 | |||
295 | rc = smb_send_kvec(server, iov, n_vec, &sent); | ||
296 | if (rc < 0) | ||
297 | goto uncork; | ||
298 | |||
299 | total_len += sent; | ||
300 | |||
301 | /* now walk the page array and send each page in it */ | ||
302 | for (i = 0; i < rqst->rq_npages; i++) { | ||
303 | struct kvec p_iov; | ||
304 | |||
305 | cifs_rqst_page_to_kvec(rqst, i, &p_iov); | ||
306 | rc = smb_send_kvec(server, &p_iov, 1, &sent); | ||
307 | kunmap(rqst->rq_pages[i]); | ||
308 | if (rc < 0) | ||
309 | break; | ||
310 | |||
311 | total_len += sent; | ||
312 | } | ||
313 | |||
314 | uncork: | ||
315 | /* uncork it */ | ||
316 | val = 0; | ||
317 | kernel_setsockopt(ssocket, SOL_TCP, TCP_CORK, | ||
318 | (char *)&val, sizeof(val)); | ||
220 | 319 | ||
221 | if ((total_len > 0) && (total_len != smb_buf_length + 4)) { | 320 | if ((total_len > 0) && (total_len != smb_buf_length + 4)) { |
222 | cFYI(1, "partial send (%d remaining), terminating session", | 321 | cFYI(1, "partial send (wanted=%u sent=%zu): terminating " |
223 | total_len); | 322 | "session", smb_buf_length + 4, total_len); |
224 | /* If we have only sent part of an SMB then the next SMB | 323 | /* |
225 | could be taken as the remainder of this one. We need | 324 | * If we have only sent part of an SMB then the next SMB could |
226 | to kill the socket so the server throws away the partial | 325 | * be taken as the remainder of this one. We need to kill the |
227 | SMB */ | 326 | * socket so the server throws away the partial SMB |
327 | */ | ||
228 | server->tcpStatus = CifsNeedReconnect; | 328 | server->tcpStatus = CifsNeedReconnect; |
229 | } | 329 | } |
230 | 330 | ||
@@ -236,6 +336,15 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec) | |||
236 | return rc; | 336 | return rc; |
237 | } | 337 | } |
238 | 338 | ||
339 | static int | ||
340 | smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec) | ||
341 | { | ||
342 | struct smb_rqst rqst = { .rq_iov = iov, | ||
343 | .rq_nvec = n_vec }; | ||
344 | |||
345 | return smb_send_rqst(server, &rqst); | ||
346 | } | ||
347 | |||
239 | int | 348 | int |
240 | smb_send(struct TCP_Server_Info *server, struct smb_hdr *smb_buffer, | 349 | smb_send(struct TCP_Server_Info *server, struct smb_hdr *smb_buffer, |
241 | unsigned int smb_buf_length) | 350 | unsigned int smb_buf_length) |
@@ -345,12 +454,11 @@ wait_for_response(struct TCP_Server_Info *server, struct mid_q_entry *midQ) | |||
345 | return 0; | 454 | return 0; |
346 | } | 455 | } |
347 | 456 | ||
348 | int | 457 | struct mid_q_entry * |
349 | cifs_setup_async_request(struct TCP_Server_Info *server, struct kvec *iov, | 458 | cifs_setup_async_request(struct TCP_Server_Info *server, struct smb_rqst *rqst) |
350 | unsigned int nvec, struct mid_q_entry **ret_mid) | ||
351 | { | 459 | { |
352 | int rc; | 460 | int rc; |
353 | struct smb_hdr *hdr = (struct smb_hdr *)iov[0].iov_base; | 461 | struct smb_hdr *hdr = (struct smb_hdr *)rqst->rq_iov[0].iov_base; |
354 | struct mid_q_entry *mid; | 462 | struct mid_q_entry *mid; |
355 | 463 | ||
356 | /* enable signing if server requires it */ | 464 | /* enable signing if server requires it */ |
@@ -359,16 +467,15 @@ cifs_setup_async_request(struct TCP_Server_Info *server, struct kvec *iov, | |||
359 | 467 | ||
360 | mid = AllocMidQEntry(hdr, server); | 468 | mid = AllocMidQEntry(hdr, server); |
361 | if (mid == NULL) | 469 | if (mid == NULL) |
362 | return -ENOMEM; | 470 | return ERR_PTR(-ENOMEM); |
363 | 471 | ||
364 | rc = cifs_sign_smbv(iov, nvec, server, &mid->sequence_number); | 472 | rc = cifs_sign_rqst(rqst, server, &mid->sequence_number); |
365 | if (rc) { | 473 | if (rc) { |
366 | DeleteMidQEntry(mid); | 474 | DeleteMidQEntry(mid); |
367 | return rc; | 475 | return ERR_PTR(rc); |
368 | } | 476 | } |
369 | 477 | ||
370 | *ret_mid = mid; | 478 | return mid; |
371 | return 0; | ||
372 | } | 479 | } |
373 | 480 | ||
374 | /* | 481 | /* |
@@ -376,9 +483,9 @@ cifs_setup_async_request(struct TCP_Server_Info *server, struct kvec *iov, | |||
376 | * the result. Caller is responsible for dealing with timeouts. | 483 | * the result. Caller is responsible for dealing with timeouts. |
377 | */ | 484 | */ |
378 | int | 485 | int |
379 | cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov, | 486 | cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst, |
380 | unsigned int nvec, mid_receive_t *receive, | 487 | mid_receive_t *receive, mid_callback_t *callback, |
381 | mid_callback_t *callback, void *cbdata, const int flags) | 488 | void *cbdata, const int flags) |
382 | { | 489 | { |
383 | int rc, timeout, optype; | 490 | int rc, timeout, optype; |
384 | struct mid_q_entry *mid; | 491 | struct mid_q_entry *mid; |
@@ -391,12 +498,12 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov, | |||
391 | return rc; | 498 | return rc; |
392 | 499 | ||
393 | mutex_lock(&server->srv_mutex); | 500 | mutex_lock(&server->srv_mutex); |
394 | rc = server->ops->setup_async_request(server, iov, nvec, &mid); | 501 | mid = server->ops->setup_async_request(server, rqst); |
395 | if (rc) { | 502 | if (IS_ERR(mid)) { |
396 | mutex_unlock(&server->srv_mutex); | 503 | mutex_unlock(&server->srv_mutex); |
397 | add_credits(server, 1, optype); | 504 | add_credits(server, 1, optype); |
398 | wake_up(&server->request_q); | 505 | wake_up(&server->request_q); |
399 | return rc; | 506 | return PTR_ERR(mid); |
400 | } | 507 | } |
401 | 508 | ||
402 | mid->receive = receive; | 509 | mid->receive = receive; |
@@ -411,7 +518,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov, | |||
411 | 518 | ||
412 | 519 | ||
413 | cifs_in_send_inc(server); | 520 | cifs_in_send_inc(server); |
414 | rc = smb_sendv(server, iov, nvec); | 521 | rc = smb_send_rqst(server, rqst); |
415 | cifs_in_send_dec(server); | 522 | cifs_in_send_dec(server); |
416 | cifs_save_when_sent(mid); | 523 | cifs_save_when_sent(mid); |
417 | mutex_unlock(&server->srv_mutex); | 524 | mutex_unlock(&server->srv_mutex); |
@@ -419,7 +526,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov, | |||
419 | if (rc == 0) | 526 | if (rc == 0) |
420 | return 0; | 527 | return 0; |
421 | 528 | ||
422 | delete_mid(mid); | 529 | cifs_delete_mid(mid); |
423 | add_credits(server, 1, optype); | 530 | add_credits(server, 1, optype); |
424 | wake_up(&server->request_q); | 531 | wake_up(&server->request_q); |
425 | return rc; | 532 | return rc; |
@@ -503,35 +610,40 @@ cifs_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server, | |||
503 | /* convert the length into a more usable form */ | 610 | /* convert the length into a more usable form */ |
504 | if (server->sec_mode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) { | 611 | if (server->sec_mode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) { |
505 | struct kvec iov; | 612 | struct kvec iov; |
613 | int rc = 0; | ||
614 | struct smb_rqst rqst = { .rq_iov = &iov, | ||
615 | .rq_nvec = 1 }; | ||
506 | 616 | ||
507 | iov.iov_base = mid->resp_buf; | 617 | iov.iov_base = mid->resp_buf; |
508 | iov.iov_len = len; | 618 | iov.iov_len = len; |
509 | /* FIXME: add code to kill session */ | 619 | /* FIXME: add code to kill session */ |
510 | if (cifs_verify_signature(&iov, 1, server, | 620 | rc = cifs_verify_signature(&rqst, server, |
511 | mid->sequence_number + 1) != 0) | 621 | mid->sequence_number + 1); |
512 | cERROR(1, "Unexpected SMB signature"); | 622 | if (rc) |
623 | cERROR(1, "SMB signature verification returned error = " | ||
624 | "%d", rc); | ||
513 | } | 625 | } |
514 | 626 | ||
515 | /* BB special case reconnect tid and uid here? */ | 627 | /* BB special case reconnect tid and uid here? */ |
516 | return map_smb_to_linux_error(mid->resp_buf, log_error); | 628 | return map_smb_to_linux_error(mid->resp_buf, log_error); |
517 | } | 629 | } |
518 | 630 | ||
519 | int | 631 | struct mid_q_entry * |
520 | cifs_setup_request(struct cifs_ses *ses, struct kvec *iov, | 632 | cifs_setup_request(struct cifs_ses *ses, struct smb_rqst *rqst) |
521 | unsigned int nvec, struct mid_q_entry **ret_mid) | ||
522 | { | 633 | { |
523 | int rc; | 634 | int rc; |
524 | struct smb_hdr *hdr = (struct smb_hdr *)iov[0].iov_base; | 635 | struct smb_hdr *hdr = (struct smb_hdr *)rqst->rq_iov[0].iov_base; |
525 | struct mid_q_entry *mid; | 636 | struct mid_q_entry *mid; |
526 | 637 | ||
527 | rc = allocate_mid(ses, hdr, &mid); | 638 | rc = allocate_mid(ses, hdr, &mid); |
528 | if (rc) | 639 | if (rc) |
529 | return rc; | 640 | return ERR_PTR(rc); |
530 | rc = cifs_sign_smbv(iov, nvec, ses->server, &mid->sequence_number); | 641 | rc = cifs_sign_rqst(rqst, ses->server, &mid->sequence_number); |
531 | if (rc) | 642 | if (rc) { |
532 | delete_mid(mid); | 643 | cifs_delete_mid(mid); |
533 | *ret_mid = mid; | 644 | return ERR_PTR(rc); |
534 | return rc; | 645 | } |
646 | return mid; | ||
535 | } | 647 | } |
536 | 648 | ||
537 | int | 649 | int |
@@ -544,6 +656,8 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses, | |||
544 | struct mid_q_entry *midQ; | 656 | struct mid_q_entry *midQ; |
545 | char *buf = iov[0].iov_base; | 657 | char *buf = iov[0].iov_base; |
546 | unsigned int credits = 1; | 658 | unsigned int credits = 1; |
659 | struct smb_rqst rqst = { .rq_iov = iov, | ||
660 | .rq_nvec = n_vec }; | ||
547 | 661 | ||
548 | timeout = flags & CIFS_TIMEOUT_MASK; | 662 | timeout = flags & CIFS_TIMEOUT_MASK; |
549 | optype = flags & CIFS_OP_MASK; | 663 | optype = flags & CIFS_OP_MASK; |
@@ -581,13 +695,13 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses, | |||
581 | 695 | ||
582 | mutex_lock(&ses->server->srv_mutex); | 696 | mutex_lock(&ses->server->srv_mutex); |
583 | 697 | ||
584 | rc = ses->server->ops->setup_request(ses, iov, n_vec, &midQ); | 698 | midQ = ses->server->ops->setup_request(ses, &rqst); |
585 | if (rc) { | 699 | if (IS_ERR(midQ)) { |
586 | mutex_unlock(&ses->server->srv_mutex); | 700 | mutex_unlock(&ses->server->srv_mutex); |
587 | cifs_small_buf_release(buf); | 701 | cifs_small_buf_release(buf); |
588 | /* Update # of requests on wire to server */ | 702 | /* Update # of requests on wire to server */ |
589 | add_credits(ses->server, 1, optype); | 703 | add_credits(ses->server, 1, optype); |
590 | return rc; | 704 | return PTR_ERR(midQ); |
591 | } | 705 | } |
592 | 706 | ||
593 | midQ->mid_state = MID_REQUEST_SUBMITTED; | 707 | midQ->mid_state = MID_REQUEST_SUBMITTED; |
@@ -649,11 +763,11 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses, | |||
649 | rc = ses->server->ops->check_receive(midQ, ses->server, | 763 | rc = ses->server->ops->check_receive(midQ, ses->server, |
650 | flags & CIFS_LOG_ERROR); | 764 | flags & CIFS_LOG_ERROR); |
651 | 765 | ||
652 | /* mark it so buf will not be freed by delete_mid */ | 766 | /* mark it so buf will not be freed by cifs_delete_mid */ |
653 | if ((flags & CIFS_NO_RESP) == 0) | 767 | if ((flags & CIFS_NO_RESP) == 0) |
654 | midQ->resp_buf = NULL; | 768 | midQ->resp_buf = NULL; |
655 | out: | 769 | out: |
656 | delete_mid(midQ); | 770 | cifs_delete_mid(midQ); |
657 | add_credits(ses->server, credits, optype); | 771 | add_credits(ses->server, credits, optype); |
658 | 772 | ||
659 | return rc; | 773 | return rc; |
@@ -759,7 +873,7 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses, | |||
759 | memcpy(out_buf, midQ->resp_buf, *pbytes_returned + 4); | 873 | memcpy(out_buf, midQ->resp_buf, *pbytes_returned + 4); |
760 | rc = cifs_check_receive(midQ, ses->server, 0); | 874 | rc = cifs_check_receive(midQ, ses->server, 0); |
761 | out: | 875 | out: |
762 | delete_mid(midQ); | 876 | cifs_delete_mid(midQ); |
763 | add_credits(ses->server, 1, 0); | 877 | add_credits(ses->server, 1, 0); |
764 | 878 | ||
765 | return rc; | 879 | return rc; |
@@ -843,7 +957,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon, | |||
843 | 957 | ||
844 | rc = cifs_sign_smb(in_buf, ses->server, &midQ->sequence_number); | 958 | rc = cifs_sign_smb(in_buf, ses->server, &midQ->sequence_number); |
845 | if (rc) { | 959 | if (rc) { |
846 | delete_mid(midQ); | 960 | cifs_delete_mid(midQ); |
847 | mutex_unlock(&ses->server->srv_mutex); | 961 | mutex_unlock(&ses->server->srv_mutex); |
848 | return rc; | 962 | return rc; |
849 | } | 963 | } |
@@ -856,7 +970,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon, | |||
856 | mutex_unlock(&ses->server->srv_mutex); | 970 | mutex_unlock(&ses->server->srv_mutex); |
857 | 971 | ||
858 | if (rc < 0) { | 972 | if (rc < 0) { |
859 | delete_mid(midQ); | 973 | cifs_delete_mid(midQ); |
860 | return rc; | 974 | return rc; |
861 | } | 975 | } |
862 | 976 | ||
@@ -877,7 +991,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon, | |||
877 | blocking lock to return. */ | 991 | blocking lock to return. */ |
878 | rc = send_cancel(ses->server, in_buf, midQ); | 992 | rc = send_cancel(ses->server, in_buf, midQ); |
879 | if (rc) { | 993 | if (rc) { |
880 | delete_mid(midQ); | 994 | cifs_delete_mid(midQ); |
881 | return rc; | 995 | return rc; |
882 | } | 996 | } |
883 | } else { | 997 | } else { |
@@ -889,7 +1003,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon, | |||
889 | /* If we get -ENOLCK back the lock may have | 1003 | /* If we get -ENOLCK back the lock may have |
890 | already been removed. Don't exit in this case. */ | 1004 | already been removed. Don't exit in this case. */ |
891 | if (rc && rc != -ENOLCK) { | 1005 | if (rc && rc != -ENOLCK) { |
892 | delete_mid(midQ); | 1006 | cifs_delete_mid(midQ); |
893 | return rc; | 1007 | return rc; |
894 | } | 1008 | } |
895 | } | 1009 | } |
@@ -926,7 +1040,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon, | |||
926 | memcpy(out_buf, midQ->resp_buf, *pbytes_returned + 4); | 1040 | memcpy(out_buf, midQ->resp_buf, *pbytes_returned + 4); |
927 | rc = cifs_check_receive(midQ, ses->server, 0); | 1041 | rc = cifs_check_receive(midQ, ses->server, 0); |
928 | out: | 1042 | out: |
929 | delete_mid(midQ); | 1043 | cifs_delete_mid(midQ); |
930 | if (rstart && rc == -EACCES) | 1044 | if (rstart && rc == -EACCES) |
931 | return -ERESTARTSYS; | 1045 | return -ERESTARTSYS; |
932 | return rc; | 1046 | return rc; |
diff --git a/fs/coda/inode.c b/fs/coda/inode.c index f1813120d753..be2aa4909487 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c | |||
@@ -85,6 +85,11 @@ int coda_init_inodecache(void) | |||
85 | 85 | ||
86 | void coda_destroy_inodecache(void) | 86 | void coda_destroy_inodecache(void) |
87 | { | 87 | { |
88 | /* | ||
89 | * Make sure all delayed rcu free inodes are flushed before we | ||
90 | * destroy cache. | ||
91 | */ | ||
92 | rcu_barrier(); | ||
88 | kmem_cache_destroy(coda_inode_cachep); | 93 | kmem_cache_destroy(coda_inode_cachep); |
89 | } | 94 | } |
90 | 95 | ||
@@ -107,43 +112,41 @@ static const struct super_operations coda_super_operations = | |||
107 | 112 | ||
108 | static int get_device_index(struct coda_mount_data *data) | 113 | static int get_device_index(struct coda_mount_data *data) |
109 | { | 114 | { |
110 | struct file *file; | 115 | struct fd f; |
111 | struct inode *inode; | 116 | struct inode *inode; |
112 | int idx; | 117 | int idx; |
113 | 118 | ||
114 | if(data == NULL) { | 119 | if (data == NULL) { |
115 | printk("coda_read_super: Bad mount data\n"); | 120 | printk("coda_read_super: Bad mount data\n"); |
116 | return -1; | 121 | return -1; |
117 | } | 122 | } |
118 | 123 | ||
119 | if(data->version != CODA_MOUNT_VERSION) { | 124 | if (data->version != CODA_MOUNT_VERSION) { |
120 | printk("coda_read_super: Bad mount version\n"); | 125 | printk("coda_read_super: Bad mount version\n"); |
121 | return -1; | 126 | return -1; |
122 | } | 127 | } |
123 | 128 | ||
124 | file = fget(data->fd); | 129 | f = fdget(data->fd); |
125 | inode = NULL; | 130 | if (!f.file) |
126 | if(file) | 131 | goto Ebadf; |
127 | inode = file->f_path.dentry->d_inode; | 132 | inode = f.file->f_path.dentry->d_inode; |
128 | 133 | if (!S_ISCHR(inode->i_mode) || imajor(inode) != CODA_PSDEV_MAJOR) { | |
129 | if(!inode || !S_ISCHR(inode->i_mode) || | 134 | fdput(f); |
130 | imajor(inode) != CODA_PSDEV_MAJOR) { | 135 | goto Ebadf; |
131 | if(file) | ||
132 | fput(file); | ||
133 | |||
134 | printk("coda_read_super: Bad file\n"); | ||
135 | return -1; | ||
136 | } | 136 | } |
137 | 137 | ||
138 | idx = iminor(inode); | 138 | idx = iminor(inode); |
139 | fput(file); | 139 | fdput(f); |
140 | 140 | ||
141 | if(idx < 0 || idx >= MAX_CODADEVS) { | 141 | if (idx < 0 || idx >= MAX_CODADEVS) { |
142 | printk("coda_read_super: Bad minor number\n"); | 142 | printk("coda_read_super: Bad minor number\n"); |
143 | return -1; | 143 | return -1; |
144 | } | 144 | } |
145 | 145 | ||
146 | return idx; | 146 | return idx; |
147 | Ebadf: | ||
148 | printk("coda_read_super: Bad file\n"); | ||
149 | return -1; | ||
147 | } | 150 | } |
148 | 151 | ||
149 | static int coda_fill_super(struct super_block *sb, void *data, int silent) | 152 | static int coda_fill_super(struct super_block *sb, void *data, int silent) |
diff --git a/fs/compat.c b/fs/compat.c index 6161255fac45..b7a24d0ca30d 100644 --- a/fs/compat.c +++ b/fs/compat.c | |||
@@ -870,22 +870,20 @@ asmlinkage long compat_sys_old_readdir(unsigned int fd, | |||
870 | struct compat_old_linux_dirent __user *dirent, unsigned int count) | 870 | struct compat_old_linux_dirent __user *dirent, unsigned int count) |
871 | { | 871 | { |
872 | int error; | 872 | int error; |
873 | struct file *file; | 873 | struct fd f = fdget(fd); |
874 | int fput_needed; | ||
875 | struct compat_readdir_callback buf; | 874 | struct compat_readdir_callback buf; |
876 | 875 | ||
877 | file = fget_light(fd, &fput_needed); | 876 | if (!f.file) |
878 | if (!file) | ||
879 | return -EBADF; | 877 | return -EBADF; |
880 | 878 | ||
881 | buf.result = 0; | 879 | buf.result = 0; |
882 | buf.dirent = dirent; | 880 | buf.dirent = dirent; |
883 | 881 | ||
884 | error = vfs_readdir(file, compat_fillonedir, &buf); | 882 | error = vfs_readdir(f.file, compat_fillonedir, &buf); |
885 | if (buf.result) | 883 | if (buf.result) |
886 | error = buf.result; | 884 | error = buf.result; |
887 | 885 | ||
888 | fput_light(file, fput_needed); | 886 | fdput(f); |
889 | return error; | 887 | return error; |
890 | } | 888 | } |
891 | 889 | ||
@@ -949,17 +947,16 @@ efault: | |||
949 | asmlinkage long compat_sys_getdents(unsigned int fd, | 947 | asmlinkage long compat_sys_getdents(unsigned int fd, |
950 | struct compat_linux_dirent __user *dirent, unsigned int count) | 948 | struct compat_linux_dirent __user *dirent, unsigned int count) |
951 | { | 949 | { |
952 | struct file * file; | 950 | struct fd f; |
953 | struct compat_linux_dirent __user * lastdirent; | 951 | struct compat_linux_dirent __user * lastdirent; |
954 | struct compat_getdents_callback buf; | 952 | struct compat_getdents_callback buf; |
955 | int fput_needed; | ||
956 | int error; | 953 | int error; |
957 | 954 | ||
958 | if (!access_ok(VERIFY_WRITE, dirent, count)) | 955 | if (!access_ok(VERIFY_WRITE, dirent, count)) |
959 | return -EFAULT; | 956 | return -EFAULT; |
960 | 957 | ||
961 | file = fget_light(fd, &fput_needed); | 958 | f = fdget(fd); |
962 | if (!file) | 959 | if (!f.file) |
963 | return -EBADF; | 960 | return -EBADF; |
964 | 961 | ||
965 | buf.current_dir = dirent; | 962 | buf.current_dir = dirent; |
@@ -967,17 +964,17 @@ asmlinkage long compat_sys_getdents(unsigned int fd, | |||
967 | buf.count = count; | 964 | buf.count = count; |
968 | buf.error = 0; | 965 | buf.error = 0; |
969 | 966 | ||
970 | error = vfs_readdir(file, compat_filldir, &buf); | 967 | error = vfs_readdir(f.file, compat_filldir, &buf); |
971 | if (error >= 0) | 968 | if (error >= 0) |
972 | error = buf.error; | 969 | error = buf.error; |
973 | lastdirent = buf.previous; | 970 | lastdirent = buf.previous; |
974 | if (lastdirent) { | 971 | if (lastdirent) { |
975 | if (put_user(file->f_pos, &lastdirent->d_off)) | 972 | if (put_user(f.file->f_pos, &lastdirent->d_off)) |
976 | error = -EFAULT; | 973 | error = -EFAULT; |
977 | else | 974 | else |
978 | error = count - buf.count; | 975 | error = count - buf.count; |
979 | } | 976 | } |
980 | fput_light(file, fput_needed); | 977 | fdput(f); |
981 | return error; | 978 | return error; |
982 | } | 979 | } |
983 | 980 | ||
@@ -1035,17 +1032,16 @@ efault: | |||
1035 | asmlinkage long compat_sys_getdents64(unsigned int fd, | 1032 | asmlinkage long compat_sys_getdents64(unsigned int fd, |
1036 | struct linux_dirent64 __user * dirent, unsigned int count) | 1033 | struct linux_dirent64 __user * dirent, unsigned int count) |
1037 | { | 1034 | { |
1038 | struct file * file; | 1035 | struct fd f; |
1039 | struct linux_dirent64 __user * lastdirent; | 1036 | struct linux_dirent64 __user * lastdirent; |
1040 | struct compat_getdents_callback64 buf; | 1037 | struct compat_getdents_callback64 buf; |
1041 | int fput_needed; | ||
1042 | int error; | 1038 | int error; |
1043 | 1039 | ||
1044 | if (!access_ok(VERIFY_WRITE, dirent, count)) | 1040 | if (!access_ok(VERIFY_WRITE, dirent, count)) |
1045 | return -EFAULT; | 1041 | return -EFAULT; |
1046 | 1042 | ||
1047 | file = fget_light(fd, &fput_needed); | 1043 | f = fdget(fd); |
1048 | if (!file) | 1044 | if (!f.file) |
1049 | return -EBADF; | 1045 | return -EBADF; |
1050 | 1046 | ||
1051 | buf.current_dir = dirent; | 1047 | buf.current_dir = dirent; |
@@ -1053,18 +1049,18 @@ asmlinkage long compat_sys_getdents64(unsigned int fd, | |||
1053 | buf.count = count; | 1049 | buf.count = count; |
1054 | buf.error = 0; | 1050 | buf.error = 0; |
1055 | 1051 | ||
1056 | error = vfs_readdir(file, compat_filldir64, &buf); | 1052 | error = vfs_readdir(f.file, compat_filldir64, &buf); |
1057 | if (error >= 0) | 1053 | if (error >= 0) |
1058 | error = buf.error; | 1054 | error = buf.error; |
1059 | lastdirent = buf.previous; | 1055 | lastdirent = buf.previous; |
1060 | if (lastdirent) { | 1056 | if (lastdirent) { |
1061 | typeof(lastdirent->d_off) d_off = file->f_pos; | 1057 | typeof(lastdirent->d_off) d_off = f.file->f_pos; |
1062 | if (__put_user_unaligned(d_off, &lastdirent->d_off)) | 1058 | if (__put_user_unaligned(d_off, &lastdirent->d_off)) |
1063 | error = -EFAULT; | 1059 | error = -EFAULT; |
1064 | else | 1060 | else |
1065 | error = count - buf.count; | 1061 | error = count - buf.count; |
1066 | } | 1062 | } |
1067 | fput_light(file, fput_needed); | 1063 | fdput(f); |
1068 | return error; | 1064 | return error; |
1069 | } | 1065 | } |
1070 | #endif /* ! __ARCH_OMIT_COMPAT_SYS_GETDENTS64 */ | 1066 | #endif /* ! __ARCH_OMIT_COMPAT_SYS_GETDENTS64 */ |
@@ -1152,15 +1148,16 @@ asmlinkage ssize_t | |||
1152 | compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec, | 1148 | compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec, |
1153 | unsigned long vlen) | 1149 | unsigned long vlen) |
1154 | { | 1150 | { |
1155 | struct file *file; | 1151 | struct fd f = fdget(fd); |
1156 | int fput_needed; | ||
1157 | ssize_t ret; | 1152 | ssize_t ret; |
1153 | loff_t pos; | ||
1158 | 1154 | ||
1159 | file = fget_light(fd, &fput_needed); | 1155 | if (!f.file) |
1160 | if (!file) | ||
1161 | return -EBADF; | 1156 | return -EBADF; |
1162 | ret = compat_readv(file, vec, vlen, &file->f_pos); | 1157 | pos = f.file->f_pos; |
1163 | fput_light(file, fput_needed); | 1158 | ret = compat_readv(f.file, vec, vlen, &pos); |
1159 | f.file->f_pos = pos; | ||
1160 | fdput(f); | ||
1164 | return ret; | 1161 | return ret; |
1165 | } | 1162 | } |
1166 | 1163 | ||
@@ -1168,19 +1165,18 @@ asmlinkage ssize_t | |||
1168 | compat_sys_preadv64(unsigned long fd, const struct compat_iovec __user *vec, | 1165 | compat_sys_preadv64(unsigned long fd, const struct compat_iovec __user *vec, |
1169 | unsigned long vlen, loff_t pos) | 1166 | unsigned long vlen, loff_t pos) |
1170 | { | 1167 | { |
1171 | struct file *file; | 1168 | struct fd f; |
1172 | int fput_needed; | ||
1173 | ssize_t ret; | 1169 | ssize_t ret; |
1174 | 1170 | ||
1175 | if (pos < 0) | 1171 | if (pos < 0) |
1176 | return -EINVAL; | 1172 | return -EINVAL; |
1177 | file = fget_light(fd, &fput_needed); | 1173 | f = fdget(fd); |
1178 | if (!file) | 1174 | if (!f.file) |
1179 | return -EBADF; | 1175 | return -EBADF; |
1180 | ret = -ESPIPE; | 1176 | ret = -ESPIPE; |
1181 | if (file->f_mode & FMODE_PREAD) | 1177 | if (f.file->f_mode & FMODE_PREAD) |
1182 | ret = compat_readv(file, vec, vlen, &pos); | 1178 | ret = compat_readv(f.file, vec, vlen, &pos); |
1183 | fput_light(file, fput_needed); | 1179 | fdput(f); |
1184 | return ret; | 1180 | return ret; |
1185 | } | 1181 | } |
1186 | 1182 | ||
@@ -1218,15 +1214,16 @@ asmlinkage ssize_t | |||
1218 | compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec, | 1214 | compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec, |
1219 | unsigned long vlen) | 1215 | unsigned long vlen) |
1220 | { | 1216 | { |
1221 | struct file *file; | 1217 | struct fd f = fdget(fd); |
1222 | int fput_needed; | ||
1223 | ssize_t ret; | 1218 | ssize_t ret; |
1219 | loff_t pos; | ||
1224 | 1220 | ||
1225 | file = fget_light(fd, &fput_needed); | 1221 | if (!f.file) |
1226 | if (!file) | ||
1227 | return -EBADF; | 1222 | return -EBADF; |
1228 | ret = compat_writev(file, vec, vlen, &file->f_pos); | 1223 | pos = f.file->f_pos; |
1229 | fput_light(file, fput_needed); | 1224 | ret = compat_writev(f.file, vec, vlen, &pos); |
1225 | f.file->f_pos = pos; | ||
1226 | fdput(f); | ||
1230 | return ret; | 1227 | return ret; |
1231 | } | 1228 | } |
1232 | 1229 | ||
@@ -1234,19 +1231,18 @@ asmlinkage ssize_t | |||
1234 | compat_sys_pwritev64(unsigned long fd, const struct compat_iovec __user *vec, | 1231 | compat_sys_pwritev64(unsigned long fd, const struct compat_iovec __user *vec, |
1235 | unsigned long vlen, loff_t pos) | 1232 | unsigned long vlen, loff_t pos) |
1236 | { | 1233 | { |
1237 | struct file *file; | 1234 | struct fd f; |
1238 | int fput_needed; | ||
1239 | ssize_t ret; | 1235 | ssize_t ret; |
1240 | 1236 | ||
1241 | if (pos < 0) | 1237 | if (pos < 0) |
1242 | return -EINVAL; | 1238 | return -EINVAL; |
1243 | file = fget_light(fd, &fput_needed); | 1239 | f = fdget(fd); |
1244 | if (!file) | 1240 | if (!f.file) |
1245 | return -EBADF; | 1241 | return -EBADF; |
1246 | ret = -ESPIPE; | 1242 | ret = -ESPIPE; |
1247 | if (file->f_mode & FMODE_PWRITE) | 1243 | if (f.file->f_mode & FMODE_PWRITE) |
1248 | ret = compat_writev(file, vec, vlen, &pos); | 1244 | ret = compat_writev(f.file, vec, vlen, &pos); |
1249 | fput_light(file, fput_needed); | 1245 | fdput(f); |
1250 | return ret; | 1246 | return ret; |
1251 | } | 1247 | } |
1252 | 1248 | ||
@@ -1796,3 +1792,25 @@ compat_sys_open_by_handle_at(int mountdirfd, | |||
1796 | return do_handle_open(mountdirfd, handle, flags); | 1792 | return do_handle_open(mountdirfd, handle, flags); |
1797 | } | 1793 | } |
1798 | #endif | 1794 | #endif |
1795 | |||
1796 | #ifdef __ARCH_WANT_COMPAT_SYS_SENDFILE | ||
1797 | asmlinkage long compat_sys_sendfile(int out_fd, int in_fd, | ||
1798 | compat_off_t __user *offset, compat_size_t count) | ||
1799 | { | ||
1800 | loff_t pos; | ||
1801 | off_t off; | ||
1802 | ssize_t ret; | ||
1803 | |||
1804 | if (offset) { | ||
1805 | if (unlikely(get_user(off, offset))) | ||
1806 | return -EFAULT; | ||
1807 | pos = off; | ||
1808 | ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS); | ||
1809 | if (unlikely(put_user(pos, offset))) | ||
1810 | return -EFAULT; | ||
1811 | return ret; | ||
1812 | } | ||
1813 | |||
1814 | return do_sendfile(out_fd, in_fd, NULL, count, 0); | ||
1815 | } | ||
1816 | #endif /* __ARCH_WANT_COMPAT_SYS_SENDFILE */ | ||
diff --git a/fs/compat_binfmt_elf.c b/fs/compat_binfmt_elf.c index 112e45a17e99..a81147e2e4ef 100644 --- a/fs/compat_binfmt_elf.c +++ b/fs/compat_binfmt_elf.c | |||
@@ -38,6 +38,13 @@ | |||
38 | #define elf_addr_t Elf32_Addr | 38 | #define elf_addr_t Elf32_Addr |
39 | 39 | ||
40 | /* | 40 | /* |
41 | * Some data types as stored in coredump. | ||
42 | */ | ||
43 | #define user_long_t compat_long_t | ||
44 | #define user_siginfo_t compat_siginfo_t | ||
45 | #define copy_siginfo_to_user copy_siginfo_to_user32 | ||
46 | |||
47 | /* | ||
41 | * The machine-dependent core note format types are defined in elfcore-compat.h, | 48 | * The machine-dependent core note format types are defined in elfcore-compat.h, |
42 | * which requires asm/elf.h to define compat_elf_gregset_t et al. | 49 | * which requires asm/elf.h to define compat_elf_gregset_t et al. |
43 | */ | 50 | */ |
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index debdfe0fc809..f5054025f9da 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c | |||
@@ -866,6 +866,12 @@ COMPATIBLE_IOCTL(TIOCGPTN) | |||
866 | COMPATIBLE_IOCTL(TIOCSPTLCK) | 866 | COMPATIBLE_IOCTL(TIOCSPTLCK) |
867 | COMPATIBLE_IOCTL(TIOCSERGETLSR) | 867 | COMPATIBLE_IOCTL(TIOCSERGETLSR) |
868 | COMPATIBLE_IOCTL(TIOCSIG) | 868 | COMPATIBLE_IOCTL(TIOCSIG) |
869 | #ifdef TIOCSRS485 | ||
870 | COMPATIBLE_IOCTL(TIOCSRS485) | ||
871 | #endif | ||
872 | #ifdef TIOCGRS485 | ||
873 | COMPATIBLE_IOCTL(TIOCGRS485) | ||
874 | #endif | ||
869 | #ifdef TCGETS2 | 875 | #ifdef TCGETS2 |
870 | COMPATIBLE_IOCTL(TCGETS2) | 876 | COMPATIBLE_IOCTL(TCGETS2) |
871 | COMPATIBLE_IOCTL(TCSETS2) | 877 | COMPATIBLE_IOCTL(TCSETS2) |
@@ -897,6 +903,8 @@ COMPATIBLE_IOCTL(KDGKBSENT) | |||
897 | COMPATIBLE_IOCTL(KDSKBSENT) | 903 | COMPATIBLE_IOCTL(KDSKBSENT) |
898 | COMPATIBLE_IOCTL(KDGKBDIACR) | 904 | COMPATIBLE_IOCTL(KDGKBDIACR) |
899 | COMPATIBLE_IOCTL(KDSKBDIACR) | 905 | COMPATIBLE_IOCTL(KDSKBDIACR) |
906 | COMPATIBLE_IOCTL(KDGKBDIACRUC) | ||
907 | COMPATIBLE_IOCTL(KDSKBDIACRUC) | ||
900 | COMPATIBLE_IOCTL(KDKBDREP) | 908 | COMPATIBLE_IOCTL(KDKBDREP) |
901 | COMPATIBLE_IOCTL(KDGKBLED) | 909 | COMPATIBLE_IOCTL(KDGKBLED) |
902 | COMPATIBLE_IOCTL(KDGETLED) | 910 | COMPATIBLE_IOCTL(KDGETLED) |
@@ -1531,16 +1539,13 @@ static int compat_ioctl_check_table(unsigned int xcmd) | |||
1531 | asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd, | 1539 | asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd, |
1532 | unsigned long arg) | 1540 | unsigned long arg) |
1533 | { | 1541 | { |
1534 | struct file *filp; | 1542 | struct fd f = fdget(fd); |
1535 | int error = -EBADF; | 1543 | int error = -EBADF; |
1536 | int fput_needed; | 1544 | if (!f.file) |
1537 | |||
1538 | filp = fget_light(fd, &fput_needed); | ||
1539 | if (!filp) | ||
1540 | goto out; | 1545 | goto out; |
1541 | 1546 | ||
1542 | /* RED-PEN how should LSM module know it's handling 32bit? */ | 1547 | /* RED-PEN how should LSM module know it's handling 32bit? */ |
1543 | error = security_file_ioctl(filp, cmd, arg); | 1548 | error = security_file_ioctl(f.file, cmd, arg); |
1544 | if (error) | 1549 | if (error) |
1545 | goto out_fput; | 1550 | goto out_fput; |
1546 | 1551 | ||
@@ -1560,30 +1565,30 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd, | |||
1560 | #if defined(CONFIG_IA64) || defined(CONFIG_X86_64) | 1565 | #if defined(CONFIG_IA64) || defined(CONFIG_X86_64) |
1561 | case FS_IOC_RESVSP_32: | 1566 | case FS_IOC_RESVSP_32: |
1562 | case FS_IOC_RESVSP64_32: | 1567 | case FS_IOC_RESVSP64_32: |
1563 | error = compat_ioctl_preallocate(filp, compat_ptr(arg)); | 1568 | error = compat_ioctl_preallocate(f.file, compat_ptr(arg)); |
1564 | goto out_fput; | 1569 | goto out_fput; |
1565 | #else | 1570 | #else |
1566 | case FS_IOC_RESVSP: | 1571 | case FS_IOC_RESVSP: |
1567 | case FS_IOC_RESVSP64: | 1572 | case FS_IOC_RESVSP64: |
1568 | error = ioctl_preallocate(filp, compat_ptr(arg)); | 1573 | error = ioctl_preallocate(f.file, compat_ptr(arg)); |
1569 | goto out_fput; | 1574 | goto out_fput; |
1570 | #endif | 1575 | #endif |
1571 | 1576 | ||
1572 | case FIBMAP: | 1577 | case FIBMAP: |
1573 | case FIGETBSZ: | 1578 | case FIGETBSZ: |
1574 | case FIONREAD: | 1579 | case FIONREAD: |
1575 | if (S_ISREG(filp->f_path.dentry->d_inode->i_mode)) | 1580 | if (S_ISREG(f.file->f_path.dentry->d_inode->i_mode)) |
1576 | break; | 1581 | break; |
1577 | /*FALL THROUGH*/ | 1582 | /*FALL THROUGH*/ |
1578 | 1583 | ||
1579 | default: | 1584 | default: |
1580 | if (filp->f_op && filp->f_op->compat_ioctl) { | 1585 | if (f.file->f_op && f.file->f_op->compat_ioctl) { |
1581 | error = filp->f_op->compat_ioctl(filp, cmd, arg); | 1586 | error = f.file->f_op->compat_ioctl(f.file, cmd, arg); |
1582 | if (error != -ENOIOCTLCMD) | 1587 | if (error != -ENOIOCTLCMD) |
1583 | goto out_fput; | 1588 | goto out_fput; |
1584 | } | 1589 | } |
1585 | 1590 | ||
1586 | if (!filp->f_op || !filp->f_op->unlocked_ioctl) | 1591 | if (!f.file->f_op || !f.file->f_op->unlocked_ioctl) |
1587 | goto do_ioctl; | 1592 | goto do_ioctl; |
1588 | break; | 1593 | break; |
1589 | } | 1594 | } |
@@ -1591,7 +1596,7 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd, | |||
1591 | if (compat_ioctl_check_table(XFORM(cmd))) | 1596 | if (compat_ioctl_check_table(XFORM(cmd))) |
1592 | goto found_handler; | 1597 | goto found_handler; |
1593 | 1598 | ||
1594 | error = do_ioctl_trans(fd, cmd, arg, filp); | 1599 | error = do_ioctl_trans(fd, cmd, arg, f.file); |
1595 | if (error == -ENOIOCTLCMD) | 1600 | if (error == -ENOIOCTLCMD) |
1596 | error = -ENOTTY; | 1601 | error = -ENOTTY; |
1597 | 1602 | ||
@@ -1600,9 +1605,9 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd, | |||
1600 | found_handler: | 1605 | found_handler: |
1601 | arg = (unsigned long)compat_ptr(arg); | 1606 | arg = (unsigned long)compat_ptr(arg); |
1602 | do_ioctl: | 1607 | do_ioctl: |
1603 | error = do_vfs_ioctl(filp, fd, cmd, arg); | 1608 | error = do_vfs_ioctl(f.file, fd, cmd, arg); |
1604 | out_fput: | 1609 | out_fput: |
1605 | fput_light(filp, fput_needed); | 1610 | fdput(f); |
1606 | out: | 1611 | out: |
1607 | return error; | 1612 | return error; |
1608 | } | 1613 | } |
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index 0074362d9f7f..a9d35b0e06cf 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c | |||
@@ -79,8 +79,8 @@ int configfs_setattr(struct dentry * dentry, struct iattr * iattr) | |||
79 | return -ENOMEM; | 79 | return -ENOMEM; |
80 | /* assign default attributes */ | 80 | /* assign default attributes */ |
81 | sd_iattr->ia_mode = sd->s_mode; | 81 | sd_iattr->ia_mode = sd->s_mode; |
82 | sd_iattr->ia_uid = 0; | 82 | sd_iattr->ia_uid = GLOBAL_ROOT_UID; |
83 | sd_iattr->ia_gid = 0; | 83 | sd_iattr->ia_gid = GLOBAL_ROOT_GID; |
84 | sd_iattr->ia_atime = sd_iattr->ia_mtime = sd_iattr->ia_ctime = CURRENT_TIME; | 84 | sd_iattr->ia_atime = sd_iattr->ia_mtime = sd_iattr->ia_ctime = CURRENT_TIME; |
85 | sd->s_iattr = sd_iattr; | 85 | sd->s_iattr = sd_iattr; |
86 | } | 86 | } |
diff --git a/fs/coredump.c b/fs/coredump.c new file mode 100644 index 000000000000..fd37facac8dc --- /dev/null +++ b/fs/coredump.c | |||
@@ -0,0 +1,692 @@ | |||
1 | #include <linux/slab.h> | ||
2 | #include <linux/file.h> | ||
3 | #include <linux/fdtable.h> | ||
4 | #include <linux/mm.h> | ||
5 | #include <linux/stat.h> | ||
6 | #include <linux/fcntl.h> | ||
7 | #include <linux/swap.h> | ||
8 | #include <linux/string.h> | ||
9 | #include <linux/init.h> | ||
10 | #include <linux/pagemap.h> | ||
11 | #include <linux/perf_event.h> | ||
12 | #include <linux/highmem.h> | ||
13 | #include <linux/spinlock.h> | ||
14 | #include <linux/key.h> | ||
15 | #include <linux/personality.h> | ||
16 | #include <linux/binfmts.h> | ||
17 | #include <linux/coredump.h> | ||
18 | #include <linux/utsname.h> | ||
19 | #include <linux/pid_namespace.h> | ||
20 | #include <linux/module.h> | ||
21 | #include <linux/namei.h> | ||
22 | #include <linux/mount.h> | ||
23 | #include <linux/security.h> | ||
24 | #include <linux/syscalls.h> | ||
25 | #include <linux/tsacct_kern.h> | ||
26 | #include <linux/cn_proc.h> | ||
27 | #include <linux/audit.h> | ||
28 | #include <linux/tracehook.h> | ||
29 | #include <linux/kmod.h> | ||
30 | #include <linux/fsnotify.h> | ||
31 | #include <linux/fs_struct.h> | ||
32 | #include <linux/pipe_fs_i.h> | ||
33 | #include <linux/oom.h> | ||
34 | #include <linux/compat.h> | ||
35 | |||
36 | #include <asm/uaccess.h> | ||
37 | #include <asm/mmu_context.h> | ||
38 | #include <asm/tlb.h> | ||
39 | #include <asm/exec.h> | ||
40 | |||
41 | #include <trace/events/task.h> | ||
42 | #include "internal.h" | ||
43 | #include "coredump.h" | ||
44 | |||
45 | #include <trace/events/sched.h> | ||
46 | |||
47 | int core_uses_pid; | ||
48 | char core_pattern[CORENAME_MAX_SIZE] = "core"; | ||
49 | unsigned int core_pipe_limit; | ||
50 | |||
51 | struct core_name { | ||
52 | char *corename; | ||
53 | int used, size; | ||
54 | }; | ||
55 | static atomic_t call_count = ATOMIC_INIT(1); | ||
56 | |||
57 | /* The maximal length of core_pattern is also specified in sysctl.c */ | ||
58 | |||
59 | static int expand_corename(struct core_name *cn) | ||
60 | { | ||
61 | char *old_corename = cn->corename; | ||
62 | |||
63 | cn->size = CORENAME_MAX_SIZE * atomic_inc_return(&call_count); | ||
64 | cn->corename = krealloc(old_corename, cn->size, GFP_KERNEL); | ||
65 | |||
66 | if (!cn->corename) { | ||
67 | kfree(old_corename); | ||
68 | return -ENOMEM; | ||
69 | } | ||
70 | |||
71 | return 0; | ||
72 | } | ||
73 | |||
74 | static int cn_printf(struct core_name *cn, const char *fmt, ...) | ||
75 | { | ||
76 | char *cur; | ||
77 | int need; | ||
78 | int ret; | ||
79 | va_list arg; | ||
80 | |||
81 | va_start(arg, fmt); | ||
82 | need = vsnprintf(NULL, 0, fmt, arg); | ||
83 | va_end(arg); | ||
84 | |||
85 | if (likely(need < cn->size - cn->used - 1)) | ||
86 | goto out_printf; | ||
87 | |||
88 | ret = expand_corename(cn); | ||
89 | if (ret) | ||
90 | goto expand_fail; | ||
91 | |||
92 | out_printf: | ||
93 | cur = cn->corename + cn->used; | ||
94 | va_start(arg, fmt); | ||
95 | vsnprintf(cur, need + 1, fmt, arg); | ||
96 | va_end(arg); | ||
97 | cn->used += need; | ||
98 | return 0; | ||
99 | |||
100 | expand_fail: | ||
101 | return ret; | ||
102 | } | ||
103 | |||
104 | static void cn_escape(char *str) | ||
105 | { | ||
106 | for (; *str; str++) | ||
107 | if (*str == '/') | ||
108 | *str = '!'; | ||
109 | } | ||
110 | |||
111 | static int cn_print_exe_file(struct core_name *cn) | ||
112 | { | ||
113 | struct file *exe_file; | ||
114 | char *pathbuf, *path; | ||
115 | int ret; | ||
116 | |||
117 | exe_file = get_mm_exe_file(current->mm); | ||
118 | if (!exe_file) { | ||
119 | char *commstart = cn->corename + cn->used; | ||
120 | ret = cn_printf(cn, "%s (path unknown)", current->comm); | ||
121 | cn_escape(commstart); | ||
122 | return ret; | ||
123 | } | ||
124 | |||
125 | pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY); | ||
126 | if (!pathbuf) { | ||
127 | ret = -ENOMEM; | ||
128 | goto put_exe_file; | ||
129 | } | ||
130 | |||
131 | path = d_path(&exe_file->f_path, pathbuf, PATH_MAX); | ||
132 | if (IS_ERR(path)) { | ||
133 | ret = PTR_ERR(path); | ||
134 | goto free_buf; | ||
135 | } | ||
136 | |||
137 | cn_escape(path); | ||
138 | |||
139 | ret = cn_printf(cn, "%s", path); | ||
140 | |||
141 | free_buf: | ||
142 | kfree(pathbuf); | ||
143 | put_exe_file: | ||
144 | fput(exe_file); | ||
145 | return ret; | ||
146 | } | ||
147 | |||
148 | /* format_corename will inspect the pattern parameter, and output a | ||
149 | * name into corename, which must have space for at least | ||
150 | * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. | ||
151 | */ | ||
152 | static int format_corename(struct core_name *cn, struct coredump_params *cprm) | ||
153 | { | ||
154 | const struct cred *cred = current_cred(); | ||
155 | const char *pat_ptr = core_pattern; | ||
156 | int ispipe = (*pat_ptr == '|'); | ||
157 | int pid_in_pattern = 0; | ||
158 | int err = 0; | ||
159 | |||
160 | cn->size = CORENAME_MAX_SIZE * atomic_read(&call_count); | ||
161 | cn->corename = kmalloc(cn->size, GFP_KERNEL); | ||
162 | cn->used = 0; | ||
163 | |||
164 | if (!cn->corename) | ||
165 | return -ENOMEM; | ||
166 | |||
167 | /* Repeat as long as we have more pattern to process and more output | ||
168 | space */ | ||
169 | while (*pat_ptr) { | ||
170 | if (*pat_ptr != '%') { | ||
171 | if (*pat_ptr == 0) | ||
172 | goto out; | ||
173 | err = cn_printf(cn, "%c", *pat_ptr++); | ||
174 | } else { | ||
175 | switch (*++pat_ptr) { | ||
176 | /* single % at the end, drop that */ | ||
177 | case 0: | ||
178 | goto out; | ||
179 | /* Double percent, output one percent */ | ||
180 | case '%': | ||
181 | err = cn_printf(cn, "%c", '%'); | ||
182 | break; | ||
183 | /* pid */ | ||
184 | case 'p': | ||
185 | pid_in_pattern = 1; | ||
186 | err = cn_printf(cn, "%d", | ||
187 | task_tgid_vnr(current)); | ||
188 | break; | ||
189 | /* uid */ | ||
190 | case 'u': | ||
191 | err = cn_printf(cn, "%d", cred->uid); | ||
192 | break; | ||
193 | /* gid */ | ||
194 | case 'g': | ||
195 | err = cn_printf(cn, "%d", cred->gid); | ||
196 | break; | ||
197 | case 'd': | ||
198 | err = cn_printf(cn, "%d", | ||
199 | __get_dumpable(cprm->mm_flags)); | ||
200 | break; | ||
201 | /* signal that caused the coredump */ | ||
202 | case 's': | ||
203 | err = cn_printf(cn, "%ld", cprm->siginfo->si_signo); | ||
204 | break; | ||
205 | /* UNIX time of coredump */ | ||
206 | case 't': { | ||
207 | struct timeval tv; | ||
208 | do_gettimeofday(&tv); | ||
209 | err = cn_printf(cn, "%lu", tv.tv_sec); | ||
210 | break; | ||
211 | } | ||
212 | /* hostname */ | ||
213 | case 'h': { | ||
214 | char *namestart = cn->corename + cn->used; | ||
215 | down_read(&uts_sem); | ||
216 | err = cn_printf(cn, "%s", | ||
217 | utsname()->nodename); | ||
218 | up_read(&uts_sem); | ||
219 | cn_escape(namestart); | ||
220 | break; | ||
221 | } | ||
222 | /* executable */ | ||
223 | case 'e': { | ||
224 | char *commstart = cn->corename + cn->used; | ||
225 | err = cn_printf(cn, "%s", current->comm); | ||
226 | cn_escape(commstart); | ||
227 | break; | ||
228 | } | ||
229 | case 'E': | ||
230 | err = cn_print_exe_file(cn); | ||
231 | break; | ||
232 | /* core limit size */ | ||
233 | case 'c': | ||
234 | err = cn_printf(cn, "%lu", | ||
235 | rlimit(RLIMIT_CORE)); | ||
236 | break; | ||
237 | default: | ||
238 | break; | ||
239 | } | ||
240 | ++pat_ptr; | ||
241 | } | ||
242 | |||
243 | if (err) | ||
244 | return err; | ||
245 | } | ||
246 | |||
247 | /* Backward compatibility with core_uses_pid: | ||
248 | * | ||
249 | * If core_pattern does not include a %p (as is the default) | ||
250 | * and core_uses_pid is set, then .%pid will be appended to | ||
251 | * the filename. Do not do this for piped commands. */ | ||
252 | if (!ispipe && !pid_in_pattern && core_uses_pid) { | ||
253 | err = cn_printf(cn, ".%d", task_tgid_vnr(current)); | ||
254 | if (err) | ||
255 | return err; | ||
256 | } | ||
257 | out: | ||
258 | return ispipe; | ||
259 | } | ||
260 | |||
261 | static int zap_process(struct task_struct *start, int exit_code) | ||
262 | { | ||
263 | struct task_struct *t; | ||
264 | int nr = 0; | ||
265 | |||
266 | start->signal->flags = SIGNAL_GROUP_EXIT; | ||
267 | start->signal->group_exit_code = exit_code; | ||
268 | start->signal->group_stop_count = 0; | ||
269 | |||
270 | t = start; | ||
271 | do { | ||
272 | task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK); | ||
273 | if (t != current && t->mm) { | ||
274 | sigaddset(&t->pending.signal, SIGKILL); | ||
275 | signal_wake_up(t, 1); | ||
276 | nr++; | ||
277 | } | ||
278 | } while_each_thread(start, t); | ||
279 | |||
280 | return nr; | ||
281 | } | ||
282 | |||
283 | static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm, | ||
284 | struct core_state *core_state, int exit_code) | ||
285 | { | ||
286 | struct task_struct *g, *p; | ||
287 | unsigned long flags; | ||
288 | int nr = -EAGAIN; | ||
289 | |||
290 | spin_lock_irq(&tsk->sighand->siglock); | ||
291 | if (!signal_group_exit(tsk->signal)) { | ||
292 | mm->core_state = core_state; | ||
293 | nr = zap_process(tsk, exit_code); | ||
294 | } | ||
295 | spin_unlock_irq(&tsk->sighand->siglock); | ||
296 | if (unlikely(nr < 0)) | ||
297 | return nr; | ||
298 | |||
299 | if (atomic_read(&mm->mm_users) == nr + 1) | ||
300 | goto done; | ||
301 | /* | ||
302 | * We should find and kill all tasks which use this mm, and we should | ||
303 | * count them correctly into ->nr_threads. We don't take tasklist | ||
304 | * lock, but this is safe wrt: | ||
305 | * | ||
306 | * fork: | ||
307 | * None of sub-threads can fork after zap_process(leader). All | ||
308 | * processes which were created before this point should be | ||
309 | * visible to zap_threads() because copy_process() adds the new | ||
310 | * process to the tail of init_task.tasks list, and lock/unlock | ||
311 | * of ->siglock provides a memory barrier. | ||
312 | * | ||
313 | * do_exit: | ||
314 | * The caller holds mm->mmap_sem. This means that the task which | ||
315 | * uses this mm can't pass exit_mm(), so it can't exit or clear | ||
316 | * its ->mm. | ||
317 | * | ||
318 | * de_thread: | ||
319 | * It does list_replace_rcu(&leader->tasks, ¤t->tasks), | ||
320 | * we must see either old or new leader, this does not matter. | ||
321 | * However, it can change p->sighand, so lock_task_sighand(p) | ||
322 | * must be used. Since p->mm != NULL and we hold ->mmap_sem | ||
323 | * it can't fail. | ||
324 | * | ||
325 | * Note also that "g" can be the old leader with ->mm == NULL | ||
326 | * and already unhashed and thus removed from ->thread_group. | ||
327 | * This is OK, __unhash_process()->list_del_rcu() does not | ||
328 | * clear the ->next pointer, we will find the new leader via | ||
329 | * next_thread(). | ||
330 | */ | ||
331 | rcu_read_lock(); | ||
332 | for_each_process(g) { | ||
333 | if (g == tsk->group_leader) | ||
334 | continue; | ||
335 | if (g->flags & PF_KTHREAD) | ||
336 | continue; | ||
337 | p = g; | ||
338 | do { | ||
339 | if (p->mm) { | ||
340 | if (unlikely(p->mm == mm)) { | ||
341 | lock_task_sighand(p, &flags); | ||
342 | nr += zap_process(p, exit_code); | ||
343 | unlock_task_sighand(p, &flags); | ||
344 | } | ||
345 | break; | ||
346 | } | ||
347 | } while_each_thread(g, p); | ||
348 | } | ||
349 | rcu_read_unlock(); | ||
350 | done: | ||
351 | atomic_set(&core_state->nr_threads, nr); | ||
352 | return nr; | ||
353 | } | ||
354 | |||
355 | static int coredump_wait(int exit_code, struct core_state *core_state) | ||
356 | { | ||
357 | struct task_struct *tsk = current; | ||
358 | struct mm_struct *mm = tsk->mm; | ||
359 | int core_waiters = -EBUSY; | ||
360 | |||
361 | init_completion(&core_state->startup); | ||
362 | core_state->dumper.task = tsk; | ||
363 | core_state->dumper.next = NULL; | ||
364 | |||
365 | down_write(&mm->mmap_sem); | ||
366 | if (!mm->core_state) | ||
367 | core_waiters = zap_threads(tsk, mm, core_state, exit_code); | ||
368 | up_write(&mm->mmap_sem); | ||
369 | |||
370 | if (core_waiters > 0) { | ||
371 | struct core_thread *ptr; | ||
372 | |||
373 | wait_for_completion(&core_state->startup); | ||
374 | /* | ||
375 | * Wait for all the threads to become inactive, so that | ||
376 | * all the thread context (extended register state, like | ||
377 | * fpu etc) gets copied to the memory. | ||
378 | */ | ||
379 | ptr = core_state->dumper.next; | ||
380 | while (ptr != NULL) { | ||
381 | wait_task_inactive(ptr->task, 0); | ||
382 | ptr = ptr->next; | ||
383 | } | ||
384 | } | ||
385 | |||
386 | return core_waiters; | ||
387 | } | ||
388 | |||
389 | static void coredump_finish(struct mm_struct *mm) | ||
390 | { | ||
391 | struct core_thread *curr, *next; | ||
392 | struct task_struct *task; | ||
393 | |||
394 | next = mm->core_state->dumper.next; | ||
395 | while ((curr = next) != NULL) { | ||
396 | next = curr->next; | ||
397 | task = curr->task; | ||
398 | /* | ||
399 | * see exit_mm(), curr->task must not see | ||
400 | * ->task == NULL before we read ->next. | ||
401 | */ | ||
402 | smp_mb(); | ||
403 | curr->task = NULL; | ||
404 | wake_up_process(task); | ||
405 | } | ||
406 | |||
407 | mm->core_state = NULL; | ||
408 | } | ||
409 | |||
410 | static void wait_for_dump_helpers(struct file *file) | ||
411 | { | ||
412 | struct pipe_inode_info *pipe; | ||
413 | |||
414 | pipe = file->f_path.dentry->d_inode->i_pipe; | ||
415 | |||
416 | pipe_lock(pipe); | ||
417 | pipe->readers++; | ||
418 | pipe->writers--; | ||
419 | |||
420 | while ((pipe->readers > 1) && (!signal_pending(current))) { | ||
421 | wake_up_interruptible_sync(&pipe->wait); | ||
422 | kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); | ||
423 | pipe_wait(pipe); | ||
424 | } | ||
425 | |||
426 | pipe->readers--; | ||
427 | pipe->writers++; | ||
428 | pipe_unlock(pipe); | ||
429 | |||
430 | } | ||
431 | |||
432 | /* | ||
433 | * umh_pipe_setup | ||
434 | * helper function to customize the process used | ||
435 | * to collect the core in userspace. Specifically | ||
436 | * it sets up a pipe and installs it as fd 0 (stdin) | ||
437 | * for the process. Returns 0 on success, or | ||
438 | * PTR_ERR on failure. | ||
439 | * Note that it also sets the core limit to 1. This | ||
440 | * is a special value that we use to trap recursive | ||
441 | * core dumps | ||
442 | */ | ||
443 | static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) | ||
444 | { | ||
445 | struct file *files[2]; | ||
446 | struct coredump_params *cp = (struct coredump_params *)info->data; | ||
447 | int err = create_pipe_files(files, 0); | ||
448 | if (err) | ||
449 | return err; | ||
450 | |||
451 | cp->file = files[1]; | ||
452 | |||
453 | replace_fd(0, files[0], 0); | ||
454 | /* and disallow core files too */ | ||
455 | current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1}; | ||
456 | |||
457 | return 0; | ||
458 | } | ||
459 | |||
460 | void do_coredump(siginfo_t *siginfo, struct pt_regs *regs) | ||
461 | { | ||
462 | struct core_state core_state; | ||
463 | struct core_name cn; | ||
464 | struct mm_struct *mm = current->mm; | ||
465 | struct linux_binfmt * binfmt; | ||
466 | const struct cred *old_cred; | ||
467 | struct cred *cred; | ||
468 | int retval = 0; | ||
469 | int flag = 0; | ||
470 | int ispipe; | ||
471 | struct files_struct *displaced; | ||
472 | bool need_nonrelative = false; | ||
473 | static atomic_t core_dump_count = ATOMIC_INIT(0); | ||
474 | struct coredump_params cprm = { | ||
475 | .siginfo = siginfo, | ||
476 | .regs = regs, | ||
477 | .limit = rlimit(RLIMIT_CORE), | ||
478 | /* | ||
479 | * We must use the same mm->flags while dumping core to avoid | ||
480 | * inconsistency of bit flags, since this flag is not protected | ||
481 | * by any locks. | ||
482 | */ | ||
483 | .mm_flags = mm->flags, | ||
484 | }; | ||
485 | |||
486 | audit_core_dumps(siginfo->si_signo); | ||
487 | |||
488 | binfmt = mm->binfmt; | ||
489 | if (!binfmt || !binfmt->core_dump) | ||
490 | goto fail; | ||
491 | if (!__get_dumpable(cprm.mm_flags)) | ||
492 | goto fail; | ||
493 | |||
494 | cred = prepare_creds(); | ||
495 | if (!cred) | ||
496 | goto fail; | ||
497 | /* | ||
498 | * We cannot trust fsuid as being the "true" uid of the process | ||
499 | * nor do we know its entire history. We only know it was tainted | ||
500 | * so we dump it as root in mode 2, and only into a controlled | ||
501 | * environment (pipe handler or fully qualified path). | ||
502 | */ | ||
503 | if (__get_dumpable(cprm.mm_flags) == SUID_DUMPABLE_SAFE) { | ||
504 | /* Setuid core dump mode */ | ||
505 | flag = O_EXCL; /* Stop rewrite attacks */ | ||
506 | cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */ | ||
507 | need_nonrelative = true; | ||
508 | } | ||
509 | |||
510 | retval = coredump_wait(siginfo->si_signo, &core_state); | ||
511 | if (retval < 0) | ||
512 | goto fail_creds; | ||
513 | |||
514 | old_cred = override_creds(cred); | ||
515 | |||
516 | /* | ||
517 | * Clear any false indication of pending signals that might | ||
518 | * be seen by the filesystem code called to write the core file. | ||
519 | */ | ||
520 | clear_thread_flag(TIF_SIGPENDING); | ||
521 | |||
522 | ispipe = format_corename(&cn, &cprm); | ||
523 | |||
524 | if (ispipe) { | ||
525 | int dump_count; | ||
526 | char **helper_argv; | ||
527 | |||
528 | if (ispipe < 0) { | ||
529 | printk(KERN_WARNING "format_corename failed\n"); | ||
530 | printk(KERN_WARNING "Aborting core\n"); | ||
531 | goto fail_corename; | ||
532 | } | ||
533 | |||
534 | if (cprm.limit == 1) { | ||
535 | /* See umh_pipe_setup() which sets RLIMIT_CORE = 1. | ||
536 | * | ||
537 | * Normally core limits are irrelevant to pipes, since | ||
538 | * we're not writing to the file system, but we use | ||
539 | * cprm.limit of 1 here as a speacial value, this is a | ||
540 | * consistent way to catch recursive crashes. | ||
541 | * We can still crash if the core_pattern binary sets | ||
542 | * RLIM_CORE = !1, but it runs as root, and can do | ||
543 | * lots of stupid things. | ||
544 | * | ||
545 | * Note that we use task_tgid_vnr here to grab the pid | ||
546 | * of the process group leader. That way we get the | ||
547 | * right pid if a thread in a multi-threaded | ||
548 | * core_pattern process dies. | ||
549 | */ | ||
550 | printk(KERN_WARNING | ||
551 | "Process %d(%s) has RLIMIT_CORE set to 1\n", | ||
552 | task_tgid_vnr(current), current->comm); | ||
553 | printk(KERN_WARNING "Aborting core\n"); | ||
554 | goto fail_unlock; | ||
555 | } | ||
556 | cprm.limit = RLIM_INFINITY; | ||
557 | |||
558 | dump_count = atomic_inc_return(&core_dump_count); | ||
559 | if (core_pipe_limit && (core_pipe_limit < dump_count)) { | ||
560 | printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n", | ||
561 | task_tgid_vnr(current), current->comm); | ||
562 | printk(KERN_WARNING "Skipping core dump\n"); | ||
563 | goto fail_dropcount; | ||
564 | } | ||
565 | |||
566 | helper_argv = argv_split(GFP_KERNEL, cn.corename+1, NULL); | ||
567 | if (!helper_argv) { | ||
568 | printk(KERN_WARNING "%s failed to allocate memory\n", | ||
569 | __func__); | ||
570 | goto fail_dropcount; | ||
571 | } | ||
572 | |||
573 | retval = call_usermodehelper_fns(helper_argv[0], helper_argv, | ||
574 | NULL, UMH_WAIT_EXEC, umh_pipe_setup, | ||
575 | NULL, &cprm); | ||
576 | argv_free(helper_argv); | ||
577 | if (retval) { | ||
578 | printk(KERN_INFO "Core dump to %s pipe failed\n", | ||
579 | cn.corename); | ||
580 | goto close_fail; | ||
581 | } | ||
582 | } else { | ||
583 | struct inode *inode; | ||
584 | |||
585 | if (cprm.limit < binfmt->min_coredump) | ||
586 | goto fail_unlock; | ||
587 | |||
588 | if (need_nonrelative && cn.corename[0] != '/') { | ||
589 | printk(KERN_WARNING "Pid %d(%s) can only dump core "\ | ||
590 | "to fully qualified path!\n", | ||
591 | task_tgid_vnr(current), current->comm); | ||
592 | printk(KERN_WARNING "Skipping core dump\n"); | ||
593 | goto fail_unlock; | ||
594 | } | ||
595 | |||
596 | cprm.file = filp_open(cn.corename, | ||
597 | O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, | ||
598 | 0600); | ||
599 | if (IS_ERR(cprm.file)) | ||
600 | goto fail_unlock; | ||
601 | |||
602 | inode = cprm.file->f_path.dentry->d_inode; | ||
603 | if (inode->i_nlink > 1) | ||
604 | goto close_fail; | ||
605 | if (d_unhashed(cprm.file->f_path.dentry)) | ||
606 | goto close_fail; | ||
607 | /* | ||
608 | * AK: actually i see no reason to not allow this for named | ||
609 | * pipes etc, but keep the previous behaviour for now. | ||
610 | */ | ||
611 | if (!S_ISREG(inode->i_mode)) | ||
612 | goto close_fail; | ||
613 | /* | ||
614 | * Dont allow local users get cute and trick others to coredump | ||
615 | * into their pre-created files. | ||
616 | */ | ||
617 | if (!uid_eq(inode->i_uid, current_fsuid())) | ||
618 | goto close_fail; | ||
619 | if (!cprm.file->f_op || !cprm.file->f_op->write) | ||
620 | goto close_fail; | ||
621 | if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file)) | ||
622 | goto close_fail; | ||
623 | } | ||
624 | |||
625 | /* get us an unshared descriptor table; almost always a no-op */ | ||
626 | retval = unshare_files(&displaced); | ||
627 | if (retval) | ||
628 | goto close_fail; | ||
629 | if (displaced) | ||
630 | put_files_struct(displaced); | ||
631 | retval = binfmt->core_dump(&cprm); | ||
632 | if (retval) | ||
633 | current->signal->group_exit_code |= 0x80; | ||
634 | |||
635 | if (ispipe && core_pipe_limit) | ||
636 | wait_for_dump_helpers(cprm.file); | ||
637 | close_fail: | ||
638 | if (cprm.file) | ||
639 | filp_close(cprm.file, NULL); | ||
640 | fail_dropcount: | ||
641 | if (ispipe) | ||
642 | atomic_dec(&core_dump_count); | ||
643 | fail_unlock: | ||
644 | kfree(cn.corename); | ||
645 | fail_corename: | ||
646 | coredump_finish(mm); | ||
647 | revert_creds(old_cred); | ||
648 | fail_creds: | ||
649 | put_cred(cred); | ||
650 | fail: | ||
651 | return; | ||
652 | } | ||
653 | |||
654 | /* | ||
655 | * Core dumping helper functions. These are the only things you should | ||
656 | * do on a core-file: use only these functions to write out all the | ||
657 | * necessary info. | ||
658 | */ | ||
659 | int dump_write(struct file *file, const void *addr, int nr) | ||
660 | { | ||
661 | return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr; | ||
662 | } | ||
663 | EXPORT_SYMBOL(dump_write); | ||
664 | |||
665 | int dump_seek(struct file *file, loff_t off) | ||
666 | { | ||
667 | int ret = 1; | ||
668 | |||
669 | if (file->f_op->llseek && file->f_op->llseek != no_llseek) { | ||
670 | if (file->f_op->llseek(file, off, SEEK_CUR) < 0) | ||
671 | return 0; | ||
672 | } else { | ||
673 | char *buf = (char *)get_zeroed_page(GFP_KERNEL); | ||
674 | |||
675 | if (!buf) | ||
676 | return 0; | ||
677 | while (off > 0) { | ||
678 | unsigned long n = off; | ||
679 | |||
680 | if (n > PAGE_SIZE) | ||
681 | n = PAGE_SIZE; | ||
682 | if (!dump_write(file, buf, n)) { | ||
683 | ret = 0; | ||
684 | break; | ||
685 | } | ||
686 | off -= n; | ||
687 | } | ||
688 | free_page((unsigned long)buf); | ||
689 | } | ||
690 | return ret; | ||
691 | } | ||
692 | EXPORT_SYMBOL(dump_seek); | ||
diff --git a/fs/coredump.h b/fs/coredump.h new file mode 100644 index 000000000000..e39ff072110d --- /dev/null +++ b/fs/coredump.h | |||
@@ -0,0 +1,6 @@ | |||
1 | #ifndef _FS_COREDUMP_H | ||
2 | #define _FS_COREDUMP_H | ||
3 | |||
4 | extern int __get_dumpable(unsigned long mm_flags); | ||
5 | |||
6 | #endif | ||
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index 28cca01ca9c9..c6c3f91ecf06 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c | |||
@@ -90,8 +90,8 @@ static struct inode *get_cramfs_inode(struct super_block *sb, | |||
90 | } | 90 | } |
91 | 91 | ||
92 | inode->i_mode = cramfs_inode->mode; | 92 | inode->i_mode = cramfs_inode->mode; |
93 | inode->i_uid = cramfs_inode->uid; | 93 | i_uid_write(inode, cramfs_inode->uid); |
94 | inode->i_gid = cramfs_inode->gid; | 94 | i_gid_write(inode, cramfs_inode->gid); |
95 | 95 | ||
96 | /* if the lower 2 bits are zero, the inode contains data */ | 96 | /* if the lower 2 bits are zero, the inode contains data */ |
97 | if (!(inode->i_ino & 3)) { | 97 | if (!(inode->i_ino & 3)) { |
diff --git a/fs/dcache.c b/fs/dcache.c index 8086636bf796..3a463d0c4fe8 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -389,7 +389,7 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) | |||
389 | * Inform try_to_ascend() that we are no longer attached to the | 389 | * Inform try_to_ascend() that we are no longer attached to the |
390 | * dentry tree | 390 | * dentry tree |
391 | */ | 391 | */ |
392 | dentry->d_flags |= DCACHE_DISCONNECTED; | 392 | dentry->d_flags |= DCACHE_DENTRY_KILLED; |
393 | if (parent) | 393 | if (parent) |
394 | spin_unlock(&parent->d_lock); | 394 | spin_unlock(&parent->d_lock); |
395 | dentry_iput(dentry); | 395 | dentry_iput(dentry); |
@@ -1048,7 +1048,7 @@ static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq | |||
1048 | * or deletion | 1048 | * or deletion |
1049 | */ | 1049 | */ |
1050 | if (new != old->d_parent || | 1050 | if (new != old->d_parent || |
1051 | (old->d_flags & DCACHE_DISCONNECTED) || | 1051 | (old->d_flags & DCACHE_DENTRY_KILLED) || |
1052 | (!locked && read_seqretry(&rename_lock, seq))) { | 1052 | (!locked && read_seqretry(&rename_lock, seq))) { |
1053 | spin_unlock(&new->d_lock); | 1053 | spin_unlock(&new->d_lock); |
1054 | new = NULL; | 1054 | new = NULL; |
@@ -1134,6 +1134,8 @@ positive: | |||
1134 | return 1; | 1134 | return 1; |
1135 | 1135 | ||
1136 | rename_retry: | 1136 | rename_retry: |
1137 | if (locked) | ||
1138 | goto again; | ||
1137 | locked = 1; | 1139 | locked = 1; |
1138 | write_seqlock(&rename_lock); | 1140 | write_seqlock(&rename_lock); |
1139 | goto again; | 1141 | goto again; |
@@ -1141,7 +1143,7 @@ rename_retry: | |||
1141 | EXPORT_SYMBOL(have_submounts); | 1143 | EXPORT_SYMBOL(have_submounts); |
1142 | 1144 | ||
1143 | /* | 1145 | /* |
1144 | * Search the dentry child list for the specified parent, | 1146 | * Search the dentry child list of the specified parent, |
1145 | * and move any unused dentries to the end of the unused | 1147 | * and move any unused dentries to the end of the unused |
1146 | * list for prune_dcache(). We descend to the next level | 1148 | * list for prune_dcache(). We descend to the next level |
1147 | * whenever the d_subdirs list is non-empty and continue | 1149 | * whenever the d_subdirs list is non-empty and continue |
@@ -1236,6 +1238,8 @@ out: | |||
1236 | rename_retry: | 1238 | rename_retry: |
1237 | if (found) | 1239 | if (found) |
1238 | return found; | 1240 | return found; |
1241 | if (locked) | ||
1242 | goto again; | ||
1239 | locked = 1; | 1243 | locked = 1; |
1240 | write_seqlock(&rename_lock); | 1244 | write_seqlock(&rename_lock); |
1241 | goto again; | 1245 | goto again; |
@@ -2109,7 +2113,7 @@ again: | |||
2109 | inode = dentry->d_inode; | 2113 | inode = dentry->d_inode; |
2110 | isdir = S_ISDIR(inode->i_mode); | 2114 | isdir = S_ISDIR(inode->i_mode); |
2111 | if (dentry->d_count == 1) { | 2115 | if (dentry->d_count == 1) { |
2112 | if (inode && !spin_trylock(&inode->i_lock)) { | 2116 | if (!spin_trylock(&inode->i_lock)) { |
2113 | spin_unlock(&dentry->d_lock); | 2117 | spin_unlock(&dentry->d_lock); |
2114 | cpu_relax(); | 2118 | cpu_relax(); |
2115 | goto again; | 2119 | goto again; |
@@ -3035,6 +3039,8 @@ resume: | |||
3035 | return; | 3039 | return; |
3036 | 3040 | ||
3037 | rename_retry: | 3041 | rename_retry: |
3042 | if (locked) | ||
3043 | goto again; | ||
3038 | locked = 1; | 3044 | locked = 1; |
3039 | write_seqlock(&rename_lock); | 3045 | write_seqlock(&rename_lock); |
3040 | goto again; | 3046 | goto again; |
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index 2340f6978d6e..c5ca6ae5a30c 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c | |||
@@ -526,73 +526,51 @@ struct array_data { | |||
526 | u32 elements; | 526 | u32 elements; |
527 | }; | 527 | }; |
528 | 528 | ||
529 | static int u32_array_open(struct inode *inode, struct file *file) | 529 | static size_t u32_format_array(char *buf, size_t bufsize, |
530 | { | 530 | u32 *array, int array_size) |
531 | file->private_data = NULL; | ||
532 | return nonseekable_open(inode, file); | ||
533 | } | ||
534 | |||
535 | static size_t format_array(char *buf, size_t bufsize, const char *fmt, | ||
536 | u32 *array, u32 array_size) | ||
537 | { | 531 | { |
538 | size_t ret = 0; | 532 | size_t ret = 0; |
539 | u32 i; | ||
540 | 533 | ||
541 | for (i = 0; i < array_size; i++) { | 534 | while (--array_size >= 0) { |
542 | size_t len; | 535 | size_t len; |
536 | char term = array_size ? ' ' : '\n'; | ||
543 | 537 | ||
544 | len = snprintf(buf, bufsize, fmt, array[i]); | 538 | len = snprintf(buf, bufsize, "%u%c", *array++, term); |
545 | len++; /* ' ' or '\n' */ | ||
546 | ret += len; | 539 | ret += len; |
547 | 540 | ||
548 | if (buf) { | 541 | buf += len; |
549 | buf += len; | 542 | bufsize -= len; |
550 | bufsize -= len; | ||
551 | buf[-1] = (i == array_size-1) ? '\n' : ' '; | ||
552 | } | ||
553 | } | 543 | } |
554 | |||
555 | ret++; /* \0 */ | ||
556 | if (buf) | ||
557 | *buf = '\0'; | ||
558 | |||
559 | return ret; | 544 | return ret; |
560 | } | 545 | } |
561 | 546 | ||
562 | static char *format_array_alloc(const char *fmt, u32 *array, | 547 | static int u32_array_open(struct inode *inode, struct file *file) |
563 | u32 array_size) | ||
564 | { | 548 | { |
565 | size_t len = format_array(NULL, 0, fmt, array, array_size); | 549 | struct array_data *data = inode->i_private; |
566 | char *ret; | 550 | int size, elements = data->elements; |
567 | 551 | char *buf; | |
568 | ret = kmalloc(len, GFP_KERNEL); | 552 | |
569 | if (ret == NULL) | 553 | /* |
570 | return NULL; | 554 | * Max size: |
555 | * - 10 digits + ' '/'\n' = 11 bytes per number | ||
556 | * - terminating NUL character | ||
557 | */ | ||
558 | size = elements*11; | ||
559 | buf = kmalloc(size+1, GFP_KERNEL); | ||
560 | if (!buf) | ||
561 | return -ENOMEM; | ||
562 | buf[size] = 0; | ||
563 | |||
564 | file->private_data = buf; | ||
565 | u32_format_array(buf, size, data->array, data->elements); | ||
571 | 566 | ||
572 | format_array(ret, len, fmt, array, array_size); | 567 | return nonseekable_open(inode, file); |
573 | return ret; | ||
574 | } | 568 | } |
575 | 569 | ||
576 | static ssize_t u32_array_read(struct file *file, char __user *buf, size_t len, | 570 | static ssize_t u32_array_read(struct file *file, char __user *buf, size_t len, |
577 | loff_t *ppos) | 571 | loff_t *ppos) |
578 | { | 572 | { |
579 | struct inode *inode = file->f_path.dentry->d_inode; | 573 | size_t size = strlen(file->private_data); |
580 | struct array_data *data = inode->i_private; | ||
581 | size_t size; | ||
582 | |||
583 | if (*ppos == 0) { | ||
584 | if (file->private_data) { | ||
585 | kfree(file->private_data); | ||
586 | file->private_data = NULL; | ||
587 | } | ||
588 | |||
589 | file->private_data = format_array_alloc("%u", data->array, | ||
590 | data->elements); | ||
591 | } | ||
592 | |||
593 | size = 0; | ||
594 | if (file->private_data) | ||
595 | size = strlen(file->private_data); | ||
596 | 574 | ||
597 | return simple_read_from_buffer(buf, len, ppos, | 575 | return simple_read_from_buffer(buf, len, ppos, |
598 | file->private_data, size); | 576 | file->private_data, size); |
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 4733eab34a23..b607d92cdf24 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c | |||
@@ -28,7 +28,7 @@ | |||
28 | #include <linux/magic.h> | 28 | #include <linux/magic.h> |
29 | #include <linux/slab.h> | 29 | #include <linux/slab.h> |
30 | 30 | ||
31 | #define DEBUGFS_DEFAULT_MODE 0755 | 31 | #define DEBUGFS_DEFAULT_MODE 0700 |
32 | 32 | ||
33 | static struct vfsmount *debugfs_mount; | 33 | static struct vfsmount *debugfs_mount; |
34 | static int debugfs_mount_count; | 34 | static int debugfs_mount_count; |
@@ -128,8 +128,8 @@ static inline int debugfs_positive(struct dentry *dentry) | |||
128 | } | 128 | } |
129 | 129 | ||
130 | struct debugfs_mount_opts { | 130 | struct debugfs_mount_opts { |
131 | uid_t uid; | 131 | kuid_t uid; |
132 | gid_t gid; | 132 | kgid_t gid; |
133 | umode_t mode; | 133 | umode_t mode; |
134 | }; | 134 | }; |
135 | 135 | ||
@@ -156,6 +156,8 @@ static int debugfs_parse_options(char *data, struct debugfs_mount_opts *opts) | |||
156 | substring_t args[MAX_OPT_ARGS]; | 156 | substring_t args[MAX_OPT_ARGS]; |
157 | int option; | 157 | int option; |
158 | int token; | 158 | int token; |
159 | kuid_t uid; | ||
160 | kgid_t gid; | ||
159 | char *p; | 161 | char *p; |
160 | 162 | ||
161 | opts->mode = DEBUGFS_DEFAULT_MODE; | 163 | opts->mode = DEBUGFS_DEFAULT_MODE; |
@@ -169,12 +171,18 @@ static int debugfs_parse_options(char *data, struct debugfs_mount_opts *opts) | |||
169 | case Opt_uid: | 171 | case Opt_uid: |
170 | if (match_int(&args[0], &option)) | 172 | if (match_int(&args[0], &option)) |
171 | return -EINVAL; | 173 | return -EINVAL; |
172 | opts->uid = option; | 174 | uid = make_kuid(current_user_ns(), option); |
175 | if (!uid_valid(uid)) | ||
176 | return -EINVAL; | ||
177 | opts->uid = uid; | ||
173 | break; | 178 | break; |
174 | case Opt_gid: | 179 | case Opt_gid: |
175 | if (match_octal(&args[0], &option)) | 180 | if (match_octal(&args[0], &option)) |
176 | return -EINVAL; | 181 | return -EINVAL; |
177 | opts->gid = option; | 182 | gid = make_kgid(current_user_ns(), option); |
183 | if (!gid_valid(gid)) | ||
184 | return -EINVAL; | ||
185 | opts->gid = gid; | ||
178 | break; | 186 | break; |
179 | case Opt_mode: | 187 | case Opt_mode: |
180 | if (match_octal(&args[0], &option)) | 188 | if (match_octal(&args[0], &option)) |
@@ -226,10 +234,12 @@ static int debugfs_show_options(struct seq_file *m, struct dentry *root) | |||
226 | struct debugfs_fs_info *fsi = root->d_sb->s_fs_info; | 234 | struct debugfs_fs_info *fsi = root->d_sb->s_fs_info; |
227 | struct debugfs_mount_opts *opts = &fsi->mount_opts; | 235 | struct debugfs_mount_opts *opts = &fsi->mount_opts; |
228 | 236 | ||
229 | if (opts->uid != 0) | 237 | if (!uid_eq(opts->uid, GLOBAL_ROOT_UID)) |
230 | seq_printf(m, ",uid=%u", opts->uid); | 238 | seq_printf(m, ",uid=%u", |
231 | if (opts->gid != 0) | 239 | from_kuid_munged(&init_user_ns, opts->uid)); |
232 | seq_printf(m, ",gid=%u", opts->gid); | 240 | if (!gid_eq(opts->gid, GLOBAL_ROOT_GID)) |
241 | seq_printf(m, ",gid=%u", | ||
242 | from_kgid_munged(&init_user_ns, opts->gid)); | ||
233 | if (opts->mode != DEBUGFS_DEFAULT_MODE) | 243 | if (opts->mode != DEBUGFS_DEFAULT_MODE) |
234 | seq_printf(m, ",mode=%o", opts->mode); | 244 | seq_printf(m, ",mode=%o", opts->mode); |
235 | 245 | ||
@@ -291,9 +301,9 @@ static struct file_system_type debug_fs_type = { | |||
291 | .kill_sb = kill_litter_super, | 301 | .kill_sb = kill_litter_super, |
292 | }; | 302 | }; |
293 | 303 | ||
294 | struct dentry *__create_file(const char *name, umode_t mode, | 304 | static struct dentry *__create_file(const char *name, umode_t mode, |
295 | struct dentry *parent, void *data, | 305 | struct dentry *parent, void *data, |
296 | const struct file_operations *fops) | 306 | const struct file_operations *fops) |
297 | { | 307 | { |
298 | struct dentry *dentry = NULL; | 308 | struct dentry *dentry = NULL; |
299 | int error; | 309 | int error; |
diff --git a/fs/direct-io.c b/fs/direct-io.c index 1faf4cb56f39..f86c720dba0e 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c | |||
@@ -1062,6 +1062,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | |||
1062 | unsigned long user_addr; | 1062 | unsigned long user_addr; |
1063 | size_t bytes; | 1063 | size_t bytes; |
1064 | struct buffer_head map_bh = { 0, }; | 1064 | struct buffer_head map_bh = { 0, }; |
1065 | struct blk_plug plug; | ||
1065 | 1066 | ||
1066 | if (rw & WRITE) | 1067 | if (rw & WRITE) |
1067 | rw = WRITE_ODIRECT; | 1068 | rw = WRITE_ODIRECT; |
@@ -1177,6 +1178,8 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | |||
1177 | PAGE_SIZE - user_addr / PAGE_SIZE); | 1178 | PAGE_SIZE - user_addr / PAGE_SIZE); |
1178 | } | 1179 | } |
1179 | 1180 | ||
1181 | blk_start_plug(&plug); | ||
1182 | |||
1180 | for (seg = 0; seg < nr_segs; seg++) { | 1183 | for (seg = 0; seg < nr_segs; seg++) { |
1181 | user_addr = (unsigned long)iov[seg].iov_base; | 1184 | user_addr = (unsigned long)iov[seg].iov_base; |
1182 | sdio.size += bytes = iov[seg].iov_len; | 1185 | sdio.size += bytes = iov[seg].iov_len; |
@@ -1235,6 +1238,8 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | |||
1235 | if (sdio.bio) | 1238 | if (sdio.bio) |
1236 | dio_bio_submit(dio, &sdio); | 1239 | dio_bio_submit(dio, &sdio); |
1237 | 1240 | ||
1241 | blk_finish_plug(&plug); | ||
1242 | |||
1238 | /* | 1243 | /* |
1239 | * It is possible that, we return short IO due to end of file. | 1244 | * It is possible that, we return short IO due to end of file. |
1240 | * In that case, we need to release all the pages we got hold on. | 1245 | * In that case, we need to release all the pages we got hold on. |
diff --git a/fs/dlm/ast.c b/fs/dlm/ast.c index 63dc19c54d5a..27a6ba9aaeec 100644 --- a/fs/dlm/ast.c +++ b/fs/dlm/ast.c | |||
@@ -15,8 +15,8 @@ | |||
15 | #include "lock.h" | 15 | #include "lock.h" |
16 | #include "user.h" | 16 | #include "user.h" |
17 | 17 | ||
18 | static uint64_t dlm_cb_seq; | 18 | static uint64_t dlm_cb_seq; |
19 | static spinlock_t dlm_cb_seq_spin; | 19 | static DEFINE_SPINLOCK(dlm_cb_seq_spin); |
20 | 20 | ||
21 | static void dlm_dump_lkb_callbacks(struct dlm_lkb *lkb) | 21 | static void dlm_dump_lkb_callbacks(struct dlm_lkb *lkb) |
22 | { | 22 | { |
diff --git a/fs/dlm/config.c b/fs/dlm/config.c index 9ccf7346834a..a0387dd8b1f0 100644 --- a/fs/dlm/config.c +++ b/fs/dlm/config.c | |||
@@ -750,6 +750,7 @@ static ssize_t comm_local_write(struct dlm_comm *cm, const char *buf, | |||
750 | static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf, size_t len) | 750 | static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf, size_t len) |
751 | { | 751 | { |
752 | struct sockaddr_storage *addr; | 752 | struct sockaddr_storage *addr; |
753 | int rv; | ||
753 | 754 | ||
754 | if (len != sizeof(struct sockaddr_storage)) | 755 | if (len != sizeof(struct sockaddr_storage)) |
755 | return -EINVAL; | 756 | return -EINVAL; |
@@ -762,6 +763,13 @@ static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf, size_t len) | |||
762 | return -ENOMEM; | 763 | return -ENOMEM; |
763 | 764 | ||
764 | memcpy(addr, buf, len); | 765 | memcpy(addr, buf, len); |
766 | |||
767 | rv = dlm_lowcomms_addr(cm->nodeid, addr, len); | ||
768 | if (rv) { | ||
769 | kfree(addr); | ||
770 | return rv; | ||
771 | } | ||
772 | |||
765 | cm->addr[cm->addr_count++] = addr; | 773 | cm->addr[cm->addr_count++] = addr; |
766 | return len; | 774 | return len; |
767 | } | 775 | } |
@@ -878,34 +886,7 @@ static void put_space(struct dlm_space *sp) | |||
878 | config_item_put(&sp->group.cg_item); | 886 | config_item_put(&sp->group.cg_item); |
879 | } | 887 | } |
880 | 888 | ||
881 | static int addr_compare(struct sockaddr_storage *x, struct sockaddr_storage *y) | 889 | static struct dlm_comm *get_comm(int nodeid) |
882 | { | ||
883 | switch (x->ss_family) { | ||
884 | case AF_INET: { | ||
885 | struct sockaddr_in *sinx = (struct sockaddr_in *)x; | ||
886 | struct sockaddr_in *siny = (struct sockaddr_in *)y; | ||
887 | if (sinx->sin_addr.s_addr != siny->sin_addr.s_addr) | ||
888 | return 0; | ||
889 | if (sinx->sin_port != siny->sin_port) | ||
890 | return 0; | ||
891 | break; | ||
892 | } | ||
893 | case AF_INET6: { | ||
894 | struct sockaddr_in6 *sinx = (struct sockaddr_in6 *)x; | ||
895 | struct sockaddr_in6 *siny = (struct sockaddr_in6 *)y; | ||
896 | if (!ipv6_addr_equal(&sinx->sin6_addr, &siny->sin6_addr)) | ||
897 | return 0; | ||
898 | if (sinx->sin6_port != siny->sin6_port) | ||
899 | return 0; | ||
900 | break; | ||
901 | } | ||
902 | default: | ||
903 | return 0; | ||
904 | } | ||
905 | return 1; | ||
906 | } | ||
907 | |||
908 | static struct dlm_comm *get_comm(int nodeid, struct sockaddr_storage *addr) | ||
909 | { | 890 | { |
910 | struct config_item *i; | 891 | struct config_item *i; |
911 | struct dlm_comm *cm = NULL; | 892 | struct dlm_comm *cm = NULL; |
@@ -919,19 +900,11 @@ static struct dlm_comm *get_comm(int nodeid, struct sockaddr_storage *addr) | |||
919 | list_for_each_entry(i, &comm_list->cg_children, ci_entry) { | 900 | list_for_each_entry(i, &comm_list->cg_children, ci_entry) { |
920 | cm = config_item_to_comm(i); | 901 | cm = config_item_to_comm(i); |
921 | 902 | ||
922 | if (nodeid) { | 903 | if (cm->nodeid != nodeid) |
923 | if (cm->nodeid != nodeid) | 904 | continue; |
924 | continue; | 905 | found = 1; |
925 | found = 1; | 906 | config_item_get(i); |
926 | config_item_get(i); | 907 | break; |
927 | break; | ||
928 | } else { | ||
929 | if (!cm->addr_count || !addr_compare(cm->addr[0], addr)) | ||
930 | continue; | ||
931 | found = 1; | ||
932 | config_item_get(i); | ||
933 | break; | ||
934 | } | ||
935 | } | 908 | } |
936 | mutex_unlock(&clusters_root.subsys.su_mutex); | 909 | mutex_unlock(&clusters_root.subsys.su_mutex); |
937 | 910 | ||
@@ -995,7 +968,7 @@ int dlm_config_nodes(char *lsname, struct dlm_config_node **nodes_out, | |||
995 | 968 | ||
996 | int dlm_comm_seq(int nodeid, uint32_t *seq) | 969 | int dlm_comm_seq(int nodeid, uint32_t *seq) |
997 | { | 970 | { |
998 | struct dlm_comm *cm = get_comm(nodeid, NULL); | 971 | struct dlm_comm *cm = get_comm(nodeid); |
999 | if (!cm) | 972 | if (!cm) |
1000 | return -EEXIST; | 973 | return -EEXIST; |
1001 | *seq = cm->seq; | 974 | *seq = cm->seq; |
@@ -1003,28 +976,6 @@ int dlm_comm_seq(int nodeid, uint32_t *seq) | |||
1003 | return 0; | 976 | return 0; |
1004 | } | 977 | } |
1005 | 978 | ||
1006 | int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr) | ||
1007 | { | ||
1008 | struct dlm_comm *cm = get_comm(nodeid, NULL); | ||
1009 | if (!cm) | ||
1010 | return -EEXIST; | ||
1011 | if (!cm->addr_count) | ||
1012 | return -ENOENT; | ||
1013 | memcpy(addr, cm->addr[0], sizeof(*addr)); | ||
1014 | put_comm(cm); | ||
1015 | return 0; | ||
1016 | } | ||
1017 | |||
1018 | int dlm_addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid) | ||
1019 | { | ||
1020 | struct dlm_comm *cm = get_comm(0, addr); | ||
1021 | if (!cm) | ||
1022 | return -EEXIST; | ||
1023 | *nodeid = cm->nodeid; | ||
1024 | put_comm(cm); | ||
1025 | return 0; | ||
1026 | } | ||
1027 | |||
1028 | int dlm_our_nodeid(void) | 979 | int dlm_our_nodeid(void) |
1029 | { | 980 | { |
1030 | return local_comm ? local_comm->nodeid : 0; | 981 | return local_comm ? local_comm->nodeid : 0; |
diff --git a/fs/dlm/config.h b/fs/dlm/config.h index dbd35a08f3a5..f30697bc2780 100644 --- a/fs/dlm/config.h +++ b/fs/dlm/config.h | |||
@@ -46,8 +46,6 @@ void dlm_config_exit(void); | |||
46 | int dlm_config_nodes(char *lsname, struct dlm_config_node **nodes_out, | 46 | int dlm_config_nodes(char *lsname, struct dlm_config_node **nodes_out, |
47 | int *count_out); | 47 | int *count_out); |
48 | int dlm_comm_seq(int nodeid, uint32_t *seq); | 48 | int dlm_comm_seq(int nodeid, uint32_t *seq); |
49 | int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr); | ||
50 | int dlm_addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid); | ||
51 | int dlm_our_nodeid(void); | 49 | int dlm_our_nodeid(void); |
52 | int dlm_our_addr(struct sockaddr_storage *addr, int num); | 50 | int dlm_our_addr(struct sockaddr_storage *addr, int num); |
53 | 51 | ||
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index 9d3e485f88c8..871c1abf6029 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h | |||
@@ -604,6 +604,7 @@ struct dlm_ls { | |||
604 | struct idr ls_recover_idr; | 604 | struct idr ls_recover_idr; |
605 | spinlock_t ls_recover_idr_lock; | 605 | spinlock_t ls_recover_idr_lock; |
606 | wait_queue_head_t ls_wait_general; | 606 | wait_queue_head_t ls_wait_general; |
607 | wait_queue_head_t ls_recover_lock_wait; | ||
607 | struct mutex ls_clear_proc_locks; | 608 | struct mutex ls_clear_proc_locks; |
608 | 609 | ||
609 | struct list_head ls_root_list; /* root resources */ | 610 | struct list_head ls_root_list; /* root resources */ |
@@ -616,15 +617,40 @@ struct dlm_ls { | |||
616 | char ls_name[1]; | 617 | char ls_name[1]; |
617 | }; | 618 | }; |
618 | 619 | ||
619 | #define LSFL_WORK 0 | 620 | /* |
620 | #define LSFL_RUNNING 1 | 621 | * LSFL_RECOVER_STOP - dlm_ls_stop() sets this to tell dlm recovery routines |
621 | #define LSFL_RECOVERY_STOP 2 | 622 | * that they should abort what they're doing so new recovery can be started. |
622 | #define LSFL_RCOM_READY 3 | 623 | * |
623 | #define LSFL_RCOM_WAIT 4 | 624 | * LSFL_RECOVER_DOWN - dlm_ls_stop() sets this to tell dlm_recoverd that it |
624 | #define LSFL_UEVENT_WAIT 5 | 625 | * should do down_write() on the in_recovery rw_semaphore. (doing down_write |
625 | #define LSFL_TIMEWARN 6 | 626 | * within dlm_ls_stop causes complaints about the lock acquired/released |
626 | #define LSFL_CB_DELAY 7 | 627 | * in different contexts.) |
627 | #define LSFL_NODIR 8 | 628 | * |
629 | * LSFL_RECOVER_LOCK - dlm_recoverd holds the in_recovery rw_semaphore. | ||
630 | * It sets this after it is done with down_write() on the in_recovery | ||
631 | * rw_semaphore and clears it after it has released the rw_semaphore. | ||
632 | * | ||
633 | * LSFL_RECOVER_WORK - dlm_ls_start() sets this to tell dlm_recoverd that it | ||
634 | * should begin recovery of the lockspace. | ||
635 | * | ||
636 | * LSFL_RUNNING - set when normal locking activity is enabled. | ||
637 | * dlm_ls_stop() clears this to tell dlm locking routines that they should | ||
638 | * quit what they are doing so recovery can run. dlm_recoverd sets | ||
639 | * this after recovery is finished. | ||
640 | */ | ||
641 | |||
642 | #define LSFL_RECOVER_STOP 0 | ||
643 | #define LSFL_RECOVER_DOWN 1 | ||
644 | #define LSFL_RECOVER_LOCK 2 | ||
645 | #define LSFL_RECOVER_WORK 3 | ||
646 | #define LSFL_RUNNING 4 | ||
647 | |||
648 | #define LSFL_RCOM_READY 5 | ||
649 | #define LSFL_RCOM_WAIT 6 | ||
650 | #define LSFL_UEVENT_WAIT 7 | ||
651 | #define LSFL_TIMEWARN 8 | ||
652 | #define LSFL_CB_DELAY 9 | ||
653 | #define LSFL_NODIR 10 | ||
628 | 654 | ||
629 | /* much of this is just saving user space pointers associated with the | 655 | /* much of this is just saving user space pointers associated with the |
630 | lock that we pass back to the user lib with an ast */ | 656 | lock that we pass back to the user lib with an ast */ |
@@ -667,7 +693,7 @@ static inline int dlm_locking_stopped(struct dlm_ls *ls) | |||
667 | 693 | ||
668 | static inline int dlm_recovery_stopped(struct dlm_ls *ls) | 694 | static inline int dlm_recovery_stopped(struct dlm_ls *ls) |
669 | { | 695 | { |
670 | return test_bit(LSFL_RECOVERY_STOP, &ls->ls_flags); | 696 | return test_bit(LSFL_RECOVER_STOP, &ls->ls_flags); |
671 | } | 697 | } |
672 | 698 | ||
673 | static inline int dlm_no_directory(struct dlm_ls *ls) | 699 | static inline int dlm_no_directory(struct dlm_ls *ls) |
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index 952557d00ccd..2e99fb0c9737 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c | |||
@@ -582,8 +582,6 @@ static int new_lockspace(const char *name, const char *cluster, | |||
582 | INIT_LIST_HEAD(&ls->ls_root_list); | 582 | INIT_LIST_HEAD(&ls->ls_root_list); |
583 | init_rwsem(&ls->ls_root_sem); | 583 | init_rwsem(&ls->ls_root_sem); |
584 | 584 | ||
585 | down_write(&ls->ls_in_recovery); | ||
586 | |||
587 | spin_lock(&lslist_lock); | 585 | spin_lock(&lslist_lock); |
588 | ls->ls_create_count = 1; | 586 | ls->ls_create_count = 1; |
589 | list_add(&ls->ls_list, &lslist); | 587 | list_add(&ls->ls_list, &lslist); |
@@ -597,13 +595,24 @@ static int new_lockspace(const char *name, const char *cluster, | |||
597 | } | 595 | } |
598 | } | 596 | } |
599 | 597 | ||
600 | /* needs to find ls in lslist */ | 598 | init_waitqueue_head(&ls->ls_recover_lock_wait); |
599 | |||
600 | /* | ||
601 | * Once started, dlm_recoverd first looks for ls in lslist, then | ||
602 | * initializes ls_in_recovery as locked in "down" mode. We need | ||
603 | * to wait for the wakeup from dlm_recoverd because in_recovery | ||
604 | * has to start out in down mode. | ||
605 | */ | ||
606 | |||
601 | error = dlm_recoverd_start(ls); | 607 | error = dlm_recoverd_start(ls); |
602 | if (error) { | 608 | if (error) { |
603 | log_error(ls, "can't start dlm_recoverd %d", error); | 609 | log_error(ls, "can't start dlm_recoverd %d", error); |
604 | goto out_callback; | 610 | goto out_callback; |
605 | } | 611 | } |
606 | 612 | ||
613 | wait_event(ls->ls_recover_lock_wait, | ||
614 | test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags)); | ||
615 | |||
607 | ls->ls_kobj.kset = dlm_kset; | 616 | ls->ls_kobj.kset = dlm_kset; |
608 | error = kobject_init_and_add(&ls->ls_kobj, &dlm_ktype, NULL, | 617 | error = kobject_init_and_add(&ls->ls_kobj, &dlm_ktype, NULL, |
609 | "%s", ls->ls_name); | 618 | "%s", ls->ls_name); |
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 5c1b0e38c7a4..331ea4f94efd 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c | |||
@@ -140,6 +140,16 @@ struct writequeue_entry { | |||
140 | struct connection *con; | 140 | struct connection *con; |
141 | }; | 141 | }; |
142 | 142 | ||
143 | struct dlm_node_addr { | ||
144 | struct list_head list; | ||
145 | int nodeid; | ||
146 | int addr_count; | ||
147 | struct sockaddr_storage *addr[DLM_MAX_ADDR_COUNT]; | ||
148 | }; | ||
149 | |||
150 | static LIST_HEAD(dlm_node_addrs); | ||
151 | static DEFINE_SPINLOCK(dlm_node_addrs_spin); | ||
152 | |||
143 | static struct sockaddr_storage *dlm_local_addr[DLM_MAX_ADDR_COUNT]; | 153 | static struct sockaddr_storage *dlm_local_addr[DLM_MAX_ADDR_COUNT]; |
144 | static int dlm_local_count; | 154 | static int dlm_local_count; |
145 | static int dlm_allow_conn; | 155 | static int dlm_allow_conn; |
@@ -264,31 +274,146 @@ static struct connection *assoc2con(int assoc_id) | |||
264 | return NULL; | 274 | return NULL; |
265 | } | 275 | } |
266 | 276 | ||
267 | static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr) | 277 | static struct dlm_node_addr *find_node_addr(int nodeid) |
278 | { | ||
279 | struct dlm_node_addr *na; | ||
280 | |||
281 | list_for_each_entry(na, &dlm_node_addrs, list) { | ||
282 | if (na->nodeid == nodeid) | ||
283 | return na; | ||
284 | } | ||
285 | return NULL; | ||
286 | } | ||
287 | |||
288 | static int addr_compare(struct sockaddr_storage *x, struct sockaddr_storage *y) | ||
289 | { | ||
290 | switch (x->ss_family) { | ||
291 | case AF_INET: { | ||
292 | struct sockaddr_in *sinx = (struct sockaddr_in *)x; | ||
293 | struct sockaddr_in *siny = (struct sockaddr_in *)y; | ||
294 | if (sinx->sin_addr.s_addr != siny->sin_addr.s_addr) | ||
295 | return 0; | ||
296 | if (sinx->sin_port != siny->sin_port) | ||
297 | return 0; | ||
298 | break; | ||
299 | } | ||
300 | case AF_INET6: { | ||
301 | struct sockaddr_in6 *sinx = (struct sockaddr_in6 *)x; | ||
302 | struct sockaddr_in6 *siny = (struct sockaddr_in6 *)y; | ||
303 | if (!ipv6_addr_equal(&sinx->sin6_addr, &siny->sin6_addr)) | ||
304 | return 0; | ||
305 | if (sinx->sin6_port != siny->sin6_port) | ||
306 | return 0; | ||
307 | break; | ||
308 | } | ||
309 | default: | ||
310 | return 0; | ||
311 | } | ||
312 | return 1; | ||
313 | } | ||
314 | |||
315 | static int nodeid_to_addr(int nodeid, struct sockaddr_storage *sas_out, | ||
316 | struct sockaddr *sa_out) | ||
268 | { | 317 | { |
269 | struct sockaddr_storage addr; | 318 | struct sockaddr_storage sas; |
270 | int error; | 319 | struct dlm_node_addr *na; |
271 | 320 | ||
272 | if (!dlm_local_count) | 321 | if (!dlm_local_count) |
273 | return -1; | 322 | return -1; |
274 | 323 | ||
275 | error = dlm_nodeid_to_addr(nodeid, &addr); | 324 | spin_lock(&dlm_node_addrs_spin); |
276 | if (error) | 325 | na = find_node_addr(nodeid); |
277 | return error; | 326 | if (na && na->addr_count) |
327 | memcpy(&sas, na->addr[0], sizeof(struct sockaddr_storage)); | ||
328 | spin_unlock(&dlm_node_addrs_spin); | ||
329 | |||
330 | if (!na) | ||
331 | return -EEXIST; | ||
332 | |||
333 | if (!na->addr_count) | ||
334 | return -ENOENT; | ||
335 | |||
336 | if (sas_out) | ||
337 | memcpy(sas_out, &sas, sizeof(struct sockaddr_storage)); | ||
338 | |||
339 | if (!sa_out) | ||
340 | return 0; | ||
278 | 341 | ||
279 | if (dlm_local_addr[0]->ss_family == AF_INET) { | 342 | if (dlm_local_addr[0]->ss_family == AF_INET) { |
280 | struct sockaddr_in *in4 = (struct sockaddr_in *) &addr; | 343 | struct sockaddr_in *in4 = (struct sockaddr_in *) &sas; |
281 | struct sockaddr_in *ret4 = (struct sockaddr_in *) retaddr; | 344 | struct sockaddr_in *ret4 = (struct sockaddr_in *) sa_out; |
282 | ret4->sin_addr.s_addr = in4->sin_addr.s_addr; | 345 | ret4->sin_addr.s_addr = in4->sin_addr.s_addr; |
283 | } else { | 346 | } else { |
284 | struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) &addr; | 347 | struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) &sas; |
285 | struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) retaddr; | 348 | struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) sa_out; |
286 | ret6->sin6_addr = in6->sin6_addr; | 349 | ret6->sin6_addr = in6->sin6_addr; |
287 | } | 350 | } |
288 | 351 | ||
289 | return 0; | 352 | return 0; |
290 | } | 353 | } |
291 | 354 | ||
355 | static int addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid) | ||
356 | { | ||
357 | struct dlm_node_addr *na; | ||
358 | int rv = -EEXIST; | ||
359 | |||
360 | spin_lock(&dlm_node_addrs_spin); | ||
361 | list_for_each_entry(na, &dlm_node_addrs, list) { | ||
362 | if (!na->addr_count) | ||
363 | continue; | ||
364 | |||
365 | if (!addr_compare(na->addr[0], addr)) | ||
366 | continue; | ||
367 | |||
368 | *nodeid = na->nodeid; | ||
369 | rv = 0; | ||
370 | break; | ||
371 | } | ||
372 | spin_unlock(&dlm_node_addrs_spin); | ||
373 | return rv; | ||
374 | } | ||
375 | |||
376 | int dlm_lowcomms_addr(int nodeid, struct sockaddr_storage *addr, int len) | ||
377 | { | ||
378 | struct sockaddr_storage *new_addr; | ||
379 | struct dlm_node_addr *new_node, *na; | ||
380 | |||
381 | new_node = kzalloc(sizeof(struct dlm_node_addr), GFP_NOFS); | ||
382 | if (!new_node) | ||
383 | return -ENOMEM; | ||
384 | |||
385 | new_addr = kzalloc(sizeof(struct sockaddr_storage), GFP_NOFS); | ||
386 | if (!new_addr) { | ||
387 | kfree(new_node); | ||
388 | return -ENOMEM; | ||
389 | } | ||
390 | |||
391 | memcpy(new_addr, addr, len); | ||
392 | |||
393 | spin_lock(&dlm_node_addrs_spin); | ||
394 | na = find_node_addr(nodeid); | ||
395 | if (!na) { | ||
396 | new_node->nodeid = nodeid; | ||
397 | new_node->addr[0] = new_addr; | ||
398 | new_node->addr_count = 1; | ||
399 | list_add(&new_node->list, &dlm_node_addrs); | ||
400 | spin_unlock(&dlm_node_addrs_spin); | ||
401 | return 0; | ||
402 | } | ||
403 | |||
404 | if (na->addr_count >= DLM_MAX_ADDR_COUNT) { | ||
405 | spin_unlock(&dlm_node_addrs_spin); | ||
406 | kfree(new_addr); | ||
407 | kfree(new_node); | ||
408 | return -ENOSPC; | ||
409 | } | ||
410 | |||
411 | na->addr[na->addr_count++] = new_addr; | ||
412 | spin_unlock(&dlm_node_addrs_spin); | ||
413 | kfree(new_node); | ||
414 | return 0; | ||
415 | } | ||
416 | |||
292 | /* Data available on socket or listen socket received a connect */ | 417 | /* Data available on socket or listen socket received a connect */ |
293 | static void lowcomms_data_ready(struct sock *sk, int count_unused) | 418 | static void lowcomms_data_ready(struct sock *sk, int count_unused) |
294 | { | 419 | { |
@@ -348,7 +473,7 @@ int dlm_lowcomms_connect_node(int nodeid) | |||
348 | } | 473 | } |
349 | 474 | ||
350 | /* Make a socket active */ | 475 | /* Make a socket active */ |
351 | static int add_sock(struct socket *sock, struct connection *con) | 476 | static void add_sock(struct socket *sock, struct connection *con) |
352 | { | 477 | { |
353 | con->sock = sock; | 478 | con->sock = sock; |
354 | 479 | ||
@@ -358,7 +483,6 @@ static int add_sock(struct socket *sock, struct connection *con) | |||
358 | con->sock->sk->sk_state_change = lowcomms_state_change; | 483 | con->sock->sk->sk_state_change = lowcomms_state_change; |
359 | con->sock->sk->sk_user_data = con; | 484 | con->sock->sk->sk_user_data = con; |
360 | con->sock->sk->sk_allocation = GFP_NOFS; | 485 | con->sock->sk->sk_allocation = GFP_NOFS; |
361 | return 0; | ||
362 | } | 486 | } |
363 | 487 | ||
364 | /* Add the port number to an IPv6 or 4 sockaddr and return the address | 488 | /* Add the port number to an IPv6 or 4 sockaddr and return the address |
@@ -510,7 +634,7 @@ static void process_sctp_notification(struct connection *con, | |||
510 | return; | 634 | return; |
511 | } | 635 | } |
512 | make_sockaddr(&prim.ssp_addr, 0, &addr_len); | 636 | make_sockaddr(&prim.ssp_addr, 0, &addr_len); |
513 | if (dlm_addr_to_nodeid(&prim.ssp_addr, &nodeid)) { | 637 | if (addr_to_nodeid(&prim.ssp_addr, &nodeid)) { |
514 | unsigned char *b=(unsigned char *)&prim.ssp_addr; | 638 | unsigned char *b=(unsigned char *)&prim.ssp_addr; |
515 | log_print("reject connect from unknown addr"); | 639 | log_print("reject connect from unknown addr"); |
516 | print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE, | 640 | print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE, |
@@ -747,7 +871,7 @@ static int tcp_accept_from_sock(struct connection *con) | |||
747 | 871 | ||
748 | /* Get the new node's NODEID */ | 872 | /* Get the new node's NODEID */ |
749 | make_sockaddr(&peeraddr, 0, &len); | 873 | make_sockaddr(&peeraddr, 0, &len); |
750 | if (dlm_addr_to_nodeid(&peeraddr, &nodeid)) { | 874 | if (addr_to_nodeid(&peeraddr, &nodeid)) { |
751 | unsigned char *b=(unsigned char *)&peeraddr; | 875 | unsigned char *b=(unsigned char *)&peeraddr; |
752 | log_print("connect from non cluster node"); | 876 | log_print("connect from non cluster node"); |
753 | print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE, | 877 | print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE, |
@@ -862,7 +986,7 @@ static void sctp_init_assoc(struct connection *con) | |||
862 | if (con->retries++ > MAX_CONNECT_RETRIES) | 986 | if (con->retries++ > MAX_CONNECT_RETRIES) |
863 | return; | 987 | return; |
864 | 988 | ||
865 | if (nodeid_to_addr(con->nodeid, (struct sockaddr *)&rem_addr)) { | 989 | if (nodeid_to_addr(con->nodeid, NULL, (struct sockaddr *)&rem_addr)) { |
866 | log_print("no address for nodeid %d", con->nodeid); | 990 | log_print("no address for nodeid %d", con->nodeid); |
867 | return; | 991 | return; |
868 | } | 992 | } |
@@ -928,11 +1052,11 @@ static void sctp_init_assoc(struct connection *con) | |||
928 | /* Connect a new socket to its peer */ | 1052 | /* Connect a new socket to its peer */ |
929 | static void tcp_connect_to_sock(struct connection *con) | 1053 | static void tcp_connect_to_sock(struct connection *con) |
930 | { | 1054 | { |
931 | int result = -EHOSTUNREACH; | ||
932 | struct sockaddr_storage saddr, src_addr; | 1055 | struct sockaddr_storage saddr, src_addr; |
933 | int addr_len; | 1056 | int addr_len; |
934 | struct socket *sock = NULL; | 1057 | struct socket *sock = NULL; |
935 | int one = 1; | 1058 | int one = 1; |
1059 | int result; | ||
936 | 1060 | ||
937 | if (con->nodeid == 0) { | 1061 | if (con->nodeid == 0) { |
938 | log_print("attempt to connect sock 0 foiled"); | 1062 | log_print("attempt to connect sock 0 foiled"); |
@@ -944,10 +1068,8 @@ static void tcp_connect_to_sock(struct connection *con) | |||
944 | goto out; | 1068 | goto out; |
945 | 1069 | ||
946 | /* Some odd races can cause double-connects, ignore them */ | 1070 | /* Some odd races can cause double-connects, ignore them */ |
947 | if (con->sock) { | 1071 | if (con->sock) |
948 | result = 0; | ||
949 | goto out; | 1072 | goto out; |
950 | } | ||
951 | 1073 | ||
952 | /* Create a socket to communicate with */ | 1074 | /* Create a socket to communicate with */ |
953 | result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_STREAM, | 1075 | result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_STREAM, |
@@ -956,8 +1078,11 @@ static void tcp_connect_to_sock(struct connection *con) | |||
956 | goto out_err; | 1078 | goto out_err; |
957 | 1079 | ||
958 | memset(&saddr, 0, sizeof(saddr)); | 1080 | memset(&saddr, 0, sizeof(saddr)); |
959 | if (dlm_nodeid_to_addr(con->nodeid, &saddr)) | 1081 | result = nodeid_to_addr(con->nodeid, &saddr, NULL); |
1082 | if (result < 0) { | ||
1083 | log_print("no address for nodeid %d", con->nodeid); | ||
960 | goto out_err; | 1084 | goto out_err; |
1085 | } | ||
961 | 1086 | ||
962 | sock->sk->sk_user_data = con; | 1087 | sock->sk->sk_user_data = con; |
963 | con->rx_action = receive_from_sock; | 1088 | con->rx_action = receive_from_sock; |
@@ -983,8 +1108,7 @@ static void tcp_connect_to_sock(struct connection *con) | |||
983 | kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (char *)&one, | 1108 | kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (char *)&one, |
984 | sizeof(one)); | 1109 | sizeof(one)); |
985 | 1110 | ||
986 | result = | 1111 | result = sock->ops->connect(sock, (struct sockaddr *)&saddr, addr_len, |
987 | sock->ops->connect(sock, (struct sockaddr *)&saddr, addr_len, | ||
988 | O_NONBLOCK); | 1112 | O_NONBLOCK); |
989 | if (result == -EINPROGRESS) | 1113 | if (result == -EINPROGRESS) |
990 | result = 0; | 1114 | result = 0; |
@@ -1002,11 +1126,17 @@ out_err: | |||
1002 | * Some errors are fatal and this list might need adjusting. For other | 1126 | * Some errors are fatal and this list might need adjusting. For other |
1003 | * errors we try again until the max number of retries is reached. | 1127 | * errors we try again until the max number of retries is reached. |
1004 | */ | 1128 | */ |
1005 | if (result != -EHOSTUNREACH && result != -ENETUNREACH && | 1129 | if (result != -EHOSTUNREACH && |
1006 | result != -ENETDOWN && result != -EINVAL | 1130 | result != -ENETUNREACH && |
1007 | && result != -EPROTONOSUPPORT) { | 1131 | result != -ENETDOWN && |
1132 | result != -EINVAL && | ||
1133 | result != -EPROTONOSUPPORT) { | ||
1134 | log_print("connect %d try %d error %d", con->nodeid, | ||
1135 | con->retries, result); | ||
1136 | mutex_unlock(&con->sock_mutex); | ||
1137 | msleep(1000); | ||
1008 | lowcomms_connect_sock(con); | 1138 | lowcomms_connect_sock(con); |
1009 | result = 0; | 1139 | return; |
1010 | } | 1140 | } |
1011 | out: | 1141 | out: |
1012 | mutex_unlock(&con->sock_mutex); | 1142 | mutex_unlock(&con->sock_mutex); |
@@ -1044,10 +1174,8 @@ static struct socket *tcp_create_listen_sock(struct connection *con, | |||
1044 | if (result < 0) { | 1174 | if (result < 0) { |
1045 | log_print("Failed to set SO_REUSEADDR on socket: %d", result); | 1175 | log_print("Failed to set SO_REUSEADDR on socket: %d", result); |
1046 | } | 1176 | } |
1047 | sock->sk->sk_user_data = con; | ||
1048 | con->rx_action = tcp_accept_from_sock; | 1177 | con->rx_action = tcp_accept_from_sock; |
1049 | con->connect_action = tcp_connect_to_sock; | 1178 | con->connect_action = tcp_connect_to_sock; |
1050 | con->sock = sock; | ||
1051 | 1179 | ||
1052 | /* Bind to our port */ | 1180 | /* Bind to our port */ |
1053 | make_sockaddr(saddr, dlm_config.ci_tcp_port, &addr_len); | 1181 | make_sockaddr(saddr, dlm_config.ci_tcp_port, &addr_len); |
@@ -1358,8 +1486,7 @@ static void send_to_sock(struct connection *con) | |||
1358 | } | 1486 | } |
1359 | cond_resched(); | 1487 | cond_resched(); |
1360 | goto out; | 1488 | goto out; |
1361 | } | 1489 | } else if (ret < 0) |
1362 | if (ret <= 0) | ||
1363 | goto send_error; | 1490 | goto send_error; |
1364 | } | 1491 | } |
1365 | 1492 | ||
@@ -1376,7 +1503,6 @@ static void send_to_sock(struct connection *con) | |||
1376 | if (e->len == 0 && e->users == 0) { | 1503 | if (e->len == 0 && e->users == 0) { |
1377 | list_del(&e->list); | 1504 | list_del(&e->list); |
1378 | free_entry(e); | 1505 | free_entry(e); |
1379 | continue; | ||
1380 | } | 1506 | } |
1381 | } | 1507 | } |
1382 | spin_unlock(&con->writequeue_lock); | 1508 | spin_unlock(&con->writequeue_lock); |
@@ -1394,7 +1520,6 @@ out_connect: | |||
1394 | mutex_unlock(&con->sock_mutex); | 1520 | mutex_unlock(&con->sock_mutex); |
1395 | if (!test_bit(CF_INIT_PENDING, &con->flags)) | 1521 | if (!test_bit(CF_INIT_PENDING, &con->flags)) |
1396 | lowcomms_connect_sock(con); | 1522 | lowcomms_connect_sock(con); |
1397 | return; | ||
1398 | } | 1523 | } |
1399 | 1524 | ||
1400 | static void clean_one_writequeue(struct connection *con) | 1525 | static void clean_one_writequeue(struct connection *con) |
@@ -1414,6 +1539,7 @@ static void clean_one_writequeue(struct connection *con) | |||
1414 | int dlm_lowcomms_close(int nodeid) | 1539 | int dlm_lowcomms_close(int nodeid) |
1415 | { | 1540 | { |
1416 | struct connection *con; | 1541 | struct connection *con; |
1542 | struct dlm_node_addr *na; | ||
1417 | 1543 | ||
1418 | log_print("closing connection to node %d", nodeid); | 1544 | log_print("closing connection to node %d", nodeid); |
1419 | con = nodeid2con(nodeid, 0); | 1545 | con = nodeid2con(nodeid, 0); |
@@ -1428,6 +1554,17 @@ int dlm_lowcomms_close(int nodeid) | |||
1428 | clean_one_writequeue(con); | 1554 | clean_one_writequeue(con); |
1429 | close_connection(con, true); | 1555 | close_connection(con, true); |
1430 | } | 1556 | } |
1557 | |||
1558 | spin_lock(&dlm_node_addrs_spin); | ||
1559 | na = find_node_addr(nodeid); | ||
1560 | if (na) { | ||
1561 | list_del(&na->list); | ||
1562 | while (na->addr_count--) | ||
1563 | kfree(na->addr[na->addr_count]); | ||
1564 | kfree(na); | ||
1565 | } | ||
1566 | spin_unlock(&dlm_node_addrs_spin); | ||
1567 | |||
1431 | return 0; | 1568 | return 0; |
1432 | } | 1569 | } |
1433 | 1570 | ||
@@ -1577,3 +1714,17 @@ fail_destroy: | |||
1577 | fail: | 1714 | fail: |
1578 | return error; | 1715 | return error; |
1579 | } | 1716 | } |
1717 | |||
1718 | void dlm_lowcomms_exit(void) | ||
1719 | { | ||
1720 | struct dlm_node_addr *na, *safe; | ||
1721 | |||
1722 | spin_lock(&dlm_node_addrs_spin); | ||
1723 | list_for_each_entry_safe(na, safe, &dlm_node_addrs, list) { | ||
1724 | list_del(&na->list); | ||
1725 | while (na->addr_count--) | ||
1726 | kfree(na->addr[na->addr_count]); | ||
1727 | kfree(na); | ||
1728 | } | ||
1729 | spin_unlock(&dlm_node_addrs_spin); | ||
1730 | } | ||
diff --git a/fs/dlm/lowcomms.h b/fs/dlm/lowcomms.h index 1311e6426287..67462e54fc2f 100644 --- a/fs/dlm/lowcomms.h +++ b/fs/dlm/lowcomms.h | |||
@@ -16,10 +16,12 @@ | |||
16 | 16 | ||
17 | int dlm_lowcomms_start(void); | 17 | int dlm_lowcomms_start(void); |
18 | void dlm_lowcomms_stop(void); | 18 | void dlm_lowcomms_stop(void); |
19 | void dlm_lowcomms_exit(void); | ||
19 | int dlm_lowcomms_close(int nodeid); | 20 | int dlm_lowcomms_close(int nodeid); |
20 | void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc); | 21 | void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc); |
21 | void dlm_lowcomms_commit_buffer(void *mh); | 22 | void dlm_lowcomms_commit_buffer(void *mh); |
22 | int dlm_lowcomms_connect_node(int nodeid); | 23 | int dlm_lowcomms_connect_node(int nodeid); |
24 | int dlm_lowcomms_addr(int nodeid, struct sockaddr_storage *addr, int len); | ||
23 | 25 | ||
24 | #endif /* __LOWCOMMS_DOT_H__ */ | 26 | #endif /* __LOWCOMMS_DOT_H__ */ |
25 | 27 | ||
diff --git a/fs/dlm/main.c b/fs/dlm/main.c index 5a59efa0bb46..079c0bd71ab7 100644 --- a/fs/dlm/main.c +++ b/fs/dlm/main.c | |||
@@ -17,6 +17,7 @@ | |||
17 | #include "user.h" | 17 | #include "user.h" |
18 | #include "memory.h" | 18 | #include "memory.h" |
19 | #include "config.h" | 19 | #include "config.h" |
20 | #include "lowcomms.h" | ||
20 | 21 | ||
21 | static int __init init_dlm(void) | 22 | static int __init init_dlm(void) |
22 | { | 23 | { |
@@ -78,6 +79,7 @@ static void __exit exit_dlm(void) | |||
78 | dlm_config_exit(); | 79 | dlm_config_exit(); |
79 | dlm_memory_exit(); | 80 | dlm_memory_exit(); |
80 | dlm_lockspace_exit(); | 81 | dlm_lockspace_exit(); |
82 | dlm_lowcomms_exit(); | ||
81 | dlm_unregister_debugfs(); | 83 | dlm_unregister_debugfs(); |
82 | } | 84 | } |
83 | 85 | ||
diff --git a/fs/dlm/member.c b/fs/dlm/member.c index 862640a36d5c..476557b54921 100644 --- a/fs/dlm/member.c +++ b/fs/dlm/member.c | |||
@@ -616,13 +616,13 @@ int dlm_ls_stop(struct dlm_ls *ls) | |||
616 | down_write(&ls->ls_recv_active); | 616 | down_write(&ls->ls_recv_active); |
617 | 617 | ||
618 | /* | 618 | /* |
619 | * Abort any recovery that's in progress (see RECOVERY_STOP, | 619 | * Abort any recovery that's in progress (see RECOVER_STOP, |
620 | * dlm_recovery_stopped()) and tell any other threads running in the | 620 | * dlm_recovery_stopped()) and tell any other threads running in the |
621 | * dlm to quit any processing (see RUNNING, dlm_locking_stopped()). | 621 | * dlm to quit any processing (see RUNNING, dlm_locking_stopped()). |
622 | */ | 622 | */ |
623 | 623 | ||
624 | spin_lock(&ls->ls_recover_lock); | 624 | spin_lock(&ls->ls_recover_lock); |
625 | set_bit(LSFL_RECOVERY_STOP, &ls->ls_flags); | 625 | set_bit(LSFL_RECOVER_STOP, &ls->ls_flags); |
626 | new = test_and_clear_bit(LSFL_RUNNING, &ls->ls_flags); | 626 | new = test_and_clear_bit(LSFL_RUNNING, &ls->ls_flags); |
627 | ls->ls_recover_seq++; | 627 | ls->ls_recover_seq++; |
628 | spin_unlock(&ls->ls_recover_lock); | 628 | spin_unlock(&ls->ls_recover_lock); |
@@ -642,12 +642,16 @@ int dlm_ls_stop(struct dlm_ls *ls) | |||
642 | * when recovery is complete. | 642 | * when recovery is complete. |
643 | */ | 643 | */ |
644 | 644 | ||
645 | if (new) | 645 | if (new) { |
646 | down_write(&ls->ls_in_recovery); | 646 | set_bit(LSFL_RECOVER_DOWN, &ls->ls_flags); |
647 | wake_up_process(ls->ls_recoverd_task); | ||
648 | wait_event(ls->ls_recover_lock_wait, | ||
649 | test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags)); | ||
650 | } | ||
647 | 651 | ||
648 | /* | 652 | /* |
649 | * The recoverd suspend/resume makes sure that dlm_recoverd (if | 653 | * The recoverd suspend/resume makes sure that dlm_recoverd (if |
650 | * running) has noticed RECOVERY_STOP above and quit processing the | 654 | * running) has noticed RECOVER_STOP above and quit processing the |
651 | * previous recovery. | 655 | * previous recovery. |
652 | */ | 656 | */ |
653 | 657 | ||
@@ -709,7 +713,8 @@ int dlm_ls_start(struct dlm_ls *ls) | |||
709 | kfree(rv_old); | 713 | kfree(rv_old); |
710 | } | 714 | } |
711 | 715 | ||
712 | dlm_recoverd_kick(ls); | 716 | set_bit(LSFL_RECOVER_WORK, &ls->ls_flags); |
717 | wake_up_process(ls->ls_recoverd_task); | ||
713 | return 0; | 718 | return 0; |
714 | 719 | ||
715 | fail: | 720 | fail: |
diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c index ef17e0169da1..60a327863b11 100644 --- a/fs/dlm/netlink.c +++ b/fs/dlm/netlink.c | |||
@@ -14,7 +14,7 @@ | |||
14 | #include "dlm_internal.h" | 14 | #include "dlm_internal.h" |
15 | 15 | ||
16 | static uint32_t dlm_nl_seqnum; | 16 | static uint32_t dlm_nl_seqnum; |
17 | static uint32_t listener_nlpid; | 17 | static uint32_t listener_nlportid; |
18 | 18 | ||
19 | static struct genl_family family = { | 19 | static struct genl_family family = { |
20 | .id = GENL_ID_GENERATE, | 20 | .id = GENL_ID_GENERATE, |
@@ -64,13 +64,13 @@ static int send_data(struct sk_buff *skb) | |||
64 | return rv; | 64 | return rv; |
65 | } | 65 | } |
66 | 66 | ||
67 | return genlmsg_unicast(&init_net, skb, listener_nlpid); | 67 | return genlmsg_unicast(&init_net, skb, listener_nlportid); |
68 | } | 68 | } |
69 | 69 | ||
70 | static int user_cmd(struct sk_buff *skb, struct genl_info *info) | 70 | static int user_cmd(struct sk_buff *skb, struct genl_info *info) |
71 | { | 71 | { |
72 | listener_nlpid = info->snd_pid; | 72 | listener_nlportid = info->snd_portid; |
73 | printk("user_cmd nlpid %u\n", listener_nlpid); | 73 | printk("user_cmd nlpid %u\n", listener_nlportid); |
74 | return 0; | 74 | return 0; |
75 | } | 75 | } |
76 | 76 | ||
diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c index 87f1a56eab32..9d61947d473a 100644 --- a/fs/dlm/rcom.c +++ b/fs/dlm/rcom.c | |||
@@ -581,7 +581,7 @@ void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) | |||
581 | 581 | ||
582 | spin_lock(&ls->ls_recover_lock); | 582 | spin_lock(&ls->ls_recover_lock); |
583 | status = ls->ls_recover_status; | 583 | status = ls->ls_recover_status; |
584 | stop = test_bit(LSFL_RECOVERY_STOP, &ls->ls_flags); | 584 | stop = test_bit(LSFL_RECOVER_STOP, &ls->ls_flags); |
585 | seq = ls->ls_recover_seq; | 585 | seq = ls->ls_recover_seq; |
586 | spin_unlock(&ls->ls_recover_lock); | 586 | spin_unlock(&ls->ls_recover_lock); |
587 | 587 | ||
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c index 88ce65ff021e..32f9f8926ec3 100644 --- a/fs/dlm/recoverd.c +++ b/fs/dlm/recoverd.c | |||
@@ -41,6 +41,7 @@ static int enable_locking(struct dlm_ls *ls, uint64_t seq) | |||
41 | set_bit(LSFL_RUNNING, &ls->ls_flags); | 41 | set_bit(LSFL_RUNNING, &ls->ls_flags); |
42 | /* unblocks processes waiting to enter the dlm */ | 42 | /* unblocks processes waiting to enter the dlm */ |
43 | up_write(&ls->ls_in_recovery); | 43 | up_write(&ls->ls_in_recovery); |
44 | clear_bit(LSFL_RECOVER_LOCK, &ls->ls_flags); | ||
44 | error = 0; | 45 | error = 0; |
45 | } | 46 | } |
46 | spin_unlock(&ls->ls_recover_lock); | 47 | spin_unlock(&ls->ls_recover_lock); |
@@ -262,7 +263,7 @@ static void do_ls_recovery(struct dlm_ls *ls) | |||
262 | rv = ls->ls_recover_args; | 263 | rv = ls->ls_recover_args; |
263 | ls->ls_recover_args = NULL; | 264 | ls->ls_recover_args = NULL; |
264 | if (rv && ls->ls_recover_seq == rv->seq) | 265 | if (rv && ls->ls_recover_seq == rv->seq) |
265 | clear_bit(LSFL_RECOVERY_STOP, &ls->ls_flags); | 266 | clear_bit(LSFL_RECOVER_STOP, &ls->ls_flags); |
266 | spin_unlock(&ls->ls_recover_lock); | 267 | spin_unlock(&ls->ls_recover_lock); |
267 | 268 | ||
268 | if (rv) { | 269 | if (rv) { |
@@ -282,26 +283,34 @@ static int dlm_recoverd(void *arg) | |||
282 | return -1; | 283 | return -1; |
283 | } | 284 | } |
284 | 285 | ||
286 | down_write(&ls->ls_in_recovery); | ||
287 | set_bit(LSFL_RECOVER_LOCK, &ls->ls_flags); | ||
288 | wake_up(&ls->ls_recover_lock_wait); | ||
289 | |||
285 | while (!kthread_should_stop()) { | 290 | while (!kthread_should_stop()) { |
286 | set_current_state(TASK_INTERRUPTIBLE); | 291 | set_current_state(TASK_INTERRUPTIBLE); |
287 | if (!test_bit(LSFL_WORK, &ls->ls_flags)) | 292 | if (!test_bit(LSFL_RECOVER_WORK, &ls->ls_flags) && |
293 | !test_bit(LSFL_RECOVER_DOWN, &ls->ls_flags)) | ||
288 | schedule(); | 294 | schedule(); |
289 | set_current_state(TASK_RUNNING); | 295 | set_current_state(TASK_RUNNING); |
290 | 296 | ||
291 | if (test_and_clear_bit(LSFL_WORK, &ls->ls_flags)) | 297 | if (test_and_clear_bit(LSFL_RECOVER_DOWN, &ls->ls_flags)) { |
298 | down_write(&ls->ls_in_recovery); | ||
299 | set_bit(LSFL_RECOVER_LOCK, &ls->ls_flags); | ||
300 | wake_up(&ls->ls_recover_lock_wait); | ||
301 | } | ||
302 | |||
303 | if (test_and_clear_bit(LSFL_RECOVER_WORK, &ls->ls_flags)) | ||
292 | do_ls_recovery(ls); | 304 | do_ls_recovery(ls); |
293 | } | 305 | } |
294 | 306 | ||
307 | if (test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags)) | ||
308 | up_write(&ls->ls_in_recovery); | ||
309 | |||
295 | dlm_put_lockspace(ls); | 310 | dlm_put_lockspace(ls); |
296 | return 0; | 311 | return 0; |
297 | } | 312 | } |
298 | 313 | ||
299 | void dlm_recoverd_kick(struct dlm_ls *ls) | ||
300 | { | ||
301 | set_bit(LSFL_WORK, &ls->ls_flags); | ||
302 | wake_up_process(ls->ls_recoverd_task); | ||
303 | } | ||
304 | |||
305 | int dlm_recoverd_start(struct dlm_ls *ls) | 314 | int dlm_recoverd_start(struct dlm_ls *ls) |
306 | { | 315 | { |
307 | struct task_struct *p; | 316 | struct task_struct *p; |
diff --git a/fs/dlm/recoverd.h b/fs/dlm/recoverd.h index 866657c5d69d..8856079733fa 100644 --- a/fs/dlm/recoverd.h +++ b/fs/dlm/recoverd.h | |||
@@ -14,7 +14,6 @@ | |||
14 | #ifndef __RECOVERD_DOT_H__ | 14 | #ifndef __RECOVERD_DOT_H__ |
15 | #define __RECOVERD_DOT_H__ | 15 | #define __RECOVERD_DOT_H__ |
16 | 16 | ||
17 | void dlm_recoverd_kick(struct dlm_ls *ls); | ||
18 | void dlm_recoverd_stop(struct dlm_ls *ls); | 17 | void dlm_recoverd_stop(struct dlm_ls *ls); |
19 | int dlm_recoverd_start(struct dlm_ls *ls); | 18 | int dlm_recoverd_start(struct dlm_ls *ls); |
20 | void dlm_recoverd_suspend(struct dlm_ls *ls); | 19 | void dlm_recoverd_suspend(struct dlm_ls *ls); |
diff --git a/fs/dlm/user.c b/fs/dlm/user.c index eb4ed9ba3098..7ff49852b0cb 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c | |||
@@ -503,6 +503,13 @@ static ssize_t device_write(struct file *file, const char __user *buf, | |||
503 | #endif | 503 | #endif |
504 | return -EINVAL; | 504 | return -EINVAL; |
505 | 505 | ||
506 | #ifdef CONFIG_COMPAT | ||
507 | if (count > sizeof(struct dlm_write_request32) + DLM_RESNAME_MAXLEN) | ||
508 | #else | ||
509 | if (count > sizeof(struct dlm_write_request) + DLM_RESNAME_MAXLEN) | ||
510 | #endif | ||
511 | return -EINVAL; | ||
512 | |||
506 | kbuf = kzalloc(count + 1, GFP_NOFS); | 513 | kbuf = kzalloc(count + 1, GFP_NOFS); |
507 | if (!kbuf) | 514 | if (!kbuf) |
508 | return -ENOMEM; | 515 | return -ENOMEM; |
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 44ce5c6a541d..d45ba4568128 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c | |||
@@ -275,8 +275,14 @@ out: | |||
275 | 275 | ||
276 | static int ecryptfs_flush(struct file *file, fl_owner_t td) | 276 | static int ecryptfs_flush(struct file *file, fl_owner_t td) |
277 | { | 277 | { |
278 | return file->f_mode & FMODE_WRITE | 278 | struct file *lower_file = ecryptfs_file_to_lower(file); |
279 | ? filemap_write_and_wait(file->f_mapping) : 0; | 279 | |
280 | if (lower_file->f_op && lower_file->f_op->flush) { | ||
281 | filemap_write_and_wait(file->f_mapping); | ||
282 | return lower_file->f_op->flush(lower_file, td); | ||
283 | } | ||
284 | |||
285 | return 0; | ||
280 | } | 286 | } |
281 | 287 | ||
282 | static int ecryptfs_release(struct inode *inode, struct file *file) | 288 | static int ecryptfs_release(struct inode *inode, struct file *file) |
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 534b129ea676..cc7709e7c508 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c | |||
@@ -619,6 +619,7 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
619 | struct dentry *lower_old_dir_dentry; | 619 | struct dentry *lower_old_dir_dentry; |
620 | struct dentry *lower_new_dir_dentry; | 620 | struct dentry *lower_new_dir_dentry; |
621 | struct dentry *trap = NULL; | 621 | struct dentry *trap = NULL; |
622 | struct inode *target_inode; | ||
622 | 623 | ||
623 | lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry); | 624 | lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry); |
624 | lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry); | 625 | lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry); |
@@ -626,6 +627,7 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
626 | dget(lower_new_dentry); | 627 | dget(lower_new_dentry); |
627 | lower_old_dir_dentry = dget_parent(lower_old_dentry); | 628 | lower_old_dir_dentry = dget_parent(lower_old_dentry); |
628 | lower_new_dir_dentry = dget_parent(lower_new_dentry); | 629 | lower_new_dir_dentry = dget_parent(lower_new_dentry); |
630 | target_inode = new_dentry->d_inode; | ||
629 | trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry); | 631 | trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry); |
630 | /* source should not be ancestor of target */ | 632 | /* source should not be ancestor of target */ |
631 | if (trap == lower_old_dentry) { | 633 | if (trap == lower_old_dentry) { |
@@ -641,6 +643,9 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
641 | lower_new_dir_dentry->d_inode, lower_new_dentry); | 643 | lower_new_dir_dentry->d_inode, lower_new_dentry); |
642 | if (rc) | 644 | if (rc) |
643 | goto out_lock; | 645 | goto out_lock; |
646 | if (target_inode) | ||
647 | fsstack_copy_attr_all(target_inode, | ||
648 | ecryptfs_inode_to_lower(target_inode)); | ||
644 | fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode); | 649 | fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode); |
645 | if (new_dir != old_dir) | 650 | if (new_dir != old_dir) |
646 | fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode); | 651 | fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode); |
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 2768138eefee..4e0886c9e5c4 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c | |||
@@ -162,6 +162,7 @@ void ecryptfs_put_lower_file(struct inode *inode) | |||
162 | inode_info = ecryptfs_inode_to_private(inode); | 162 | inode_info = ecryptfs_inode_to_private(inode); |
163 | if (atomic_dec_and_mutex_lock(&inode_info->lower_file_count, | 163 | if (atomic_dec_and_mutex_lock(&inode_info->lower_file_count, |
164 | &inode_info->lower_file_mutex)) { | 164 | &inode_info->lower_file_mutex)) { |
165 | filemap_write_and_wait(inode->i_mapping); | ||
165 | fput(inode_info->lower_file); | 166 | fput(inode_info->lower_file); |
166 | inode_info->lower_file = NULL; | 167 | inode_info->lower_file = NULL; |
167 | mutex_unlock(&inode_info->lower_file_mutex); | 168 | mutex_unlock(&inode_info->lower_file_mutex); |
@@ -544,11 +545,12 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags | |||
544 | goto out_free; | 545 | goto out_free; |
545 | } | 546 | } |
546 | 547 | ||
547 | if (check_ruid && path.dentry->d_inode->i_uid != current_uid()) { | 548 | if (check_ruid && !uid_eq(path.dentry->d_inode->i_uid, current_uid())) { |
548 | rc = -EPERM; | 549 | rc = -EPERM; |
549 | printk(KERN_ERR "Mount of device (uid: %d) not owned by " | 550 | printk(KERN_ERR "Mount of device (uid: %d) not owned by " |
550 | "requested user (uid: %d)\n", | 551 | "requested user (uid: %d)\n", |
551 | path.dentry->d_inode->i_uid, current_uid()); | 552 | i_uid_read(path.dentry->d_inode), |
553 | from_kuid(&init_user_ns, current_uid())); | ||
552 | goto out_free; | 554 | goto out_free; |
553 | } | 555 | } |
554 | 556 | ||
@@ -709,6 +711,12 @@ static void ecryptfs_free_kmem_caches(void) | |||
709 | { | 711 | { |
710 | int i; | 712 | int i; |
711 | 713 | ||
714 | /* | ||
715 | * Make sure all delayed rcu free inodes are flushed before we | ||
716 | * destroy cache. | ||
717 | */ | ||
718 | rcu_barrier(); | ||
719 | |||
712 | for (i = 0; i < ARRAY_SIZE(ecryptfs_cache_infos); i++) { | 720 | for (i = 0; i < ARRAY_SIZE(ecryptfs_cache_infos); i++) { |
713 | struct ecryptfs_cache_info *info; | 721 | struct ecryptfs_cache_info *info; |
714 | 722 | ||
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c index b29bb8bfa8d9..5fa2471796c2 100644 --- a/fs/ecryptfs/messaging.c +++ b/fs/ecryptfs/messaging.c | |||
@@ -33,7 +33,7 @@ static struct hlist_head *ecryptfs_daemon_hash; | |||
33 | struct mutex ecryptfs_daemon_hash_mux; | 33 | struct mutex ecryptfs_daemon_hash_mux; |
34 | static int ecryptfs_hash_bits; | 34 | static int ecryptfs_hash_bits; |
35 | #define ecryptfs_current_euid_hash(uid) \ | 35 | #define ecryptfs_current_euid_hash(uid) \ |
36 | hash_long((unsigned long)current_euid(), ecryptfs_hash_bits) | 36 | hash_long((unsigned long)from_kuid(&init_user_ns, current_euid()), ecryptfs_hash_bits) |
37 | 37 | ||
38 | static u32 ecryptfs_msg_counter; | 38 | static u32 ecryptfs_msg_counter; |
39 | static struct ecryptfs_msg_ctx *ecryptfs_msg_ctx_arr; | 39 | static struct ecryptfs_msg_ctx *ecryptfs_msg_ctx_arr; |
@@ -121,8 +121,7 @@ int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon) | |||
121 | hlist_for_each_entry(*daemon, elem, | 121 | hlist_for_each_entry(*daemon, elem, |
122 | &ecryptfs_daemon_hash[ecryptfs_current_euid_hash()], | 122 | &ecryptfs_daemon_hash[ecryptfs_current_euid_hash()], |
123 | euid_chain) { | 123 | euid_chain) { |
124 | if ((*daemon)->file->f_cred->euid == current_euid() && | 124 | if (uid_eq((*daemon)->file->f_cred->euid, current_euid())) { |
125 | (*daemon)->file->f_cred->user_ns == current_user_ns()) { | ||
126 | rc = 0; | 125 | rc = 0; |
127 | goto out; | 126 | goto out; |
128 | } | 127 | } |
diff --git a/fs/efs/inode.c b/fs/efs/inode.c index bc84f365d75c..f3913eb2c474 100644 --- a/fs/efs/inode.c +++ b/fs/efs/inode.c | |||
@@ -97,8 +97,8 @@ struct inode *efs_iget(struct super_block *super, unsigned long ino) | |||
97 | 97 | ||
98 | inode->i_mode = be16_to_cpu(efs_inode->di_mode); | 98 | inode->i_mode = be16_to_cpu(efs_inode->di_mode); |
99 | set_nlink(inode, be16_to_cpu(efs_inode->di_nlink)); | 99 | set_nlink(inode, be16_to_cpu(efs_inode->di_nlink)); |
100 | inode->i_uid = (uid_t)be16_to_cpu(efs_inode->di_uid); | 100 | i_uid_write(inode, (uid_t)be16_to_cpu(efs_inode->di_uid)); |
101 | inode->i_gid = (gid_t)be16_to_cpu(efs_inode->di_gid); | 101 | i_gid_write(inode, (gid_t)be16_to_cpu(efs_inode->di_gid)); |
102 | inode->i_size = be32_to_cpu(efs_inode->di_size); | 102 | inode->i_size = be32_to_cpu(efs_inode->di_size); |
103 | inode->i_atime.tv_sec = be32_to_cpu(efs_inode->di_atime); | 103 | inode->i_atime.tv_sec = be32_to_cpu(efs_inode->di_atime); |
104 | inode->i_mtime.tv_sec = be32_to_cpu(efs_inode->di_mtime); | 104 | inode->i_mtime.tv_sec = be32_to_cpu(efs_inode->di_mtime); |
diff --git a/fs/efs/super.c b/fs/efs/super.c index e755ec746c69..2002431ef9a0 100644 --- a/fs/efs/super.c +++ b/fs/efs/super.c | |||
@@ -96,6 +96,11 @@ static int init_inodecache(void) | |||
96 | 96 | ||
97 | static void destroy_inodecache(void) | 97 | static void destroy_inodecache(void) |
98 | { | 98 | { |
99 | /* | ||
100 | * Make sure all delayed rcu free inodes are flushed before we | ||
101 | * destroy cache. | ||
102 | */ | ||
103 | rcu_barrier(); | ||
99 | kmem_cache_destroy(efs_inode_cachep); | 104 | kmem_cache_destroy(efs_inode_cachep); |
100 | } | 105 | } |
101 | 106 | ||
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 1c8b55670804..da72250ddc1c 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c | |||
@@ -346,7 +346,7 @@ static inline struct epitem *ep_item_from_epqueue(poll_table *p) | |||
346 | /* Tells if the epoll_ctl(2) operation needs an event copy from userspace */ | 346 | /* Tells if the epoll_ctl(2) operation needs an event copy from userspace */ |
347 | static inline int ep_op_has_event(int op) | 347 | static inline int ep_op_has_event(int op) |
348 | { | 348 | { |
349 | return op != EPOLL_CTL_DEL; | 349 | return op == EPOLL_CTL_ADD || op == EPOLL_CTL_MOD; |
350 | } | 350 | } |
351 | 351 | ||
352 | /* Initialize the poll safe wake up structure */ | 352 | /* Initialize the poll safe wake up structure */ |
@@ -676,6 +676,34 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi) | |||
676 | return 0; | 676 | return 0; |
677 | } | 677 | } |
678 | 678 | ||
679 | /* | ||
680 | * Disables a "struct epitem" in the eventpoll set. Returns -EBUSY if the item | ||
681 | * had no event flags set, indicating that another thread may be currently | ||
682 | * handling that item's events (in the case that EPOLLONESHOT was being | ||
683 | * used). Otherwise a zero result indicates that the item has been disabled | ||
684 | * from receiving events. A disabled item may be re-enabled via | ||
685 | * EPOLL_CTL_MOD. Must be called with "mtx" held. | ||
686 | */ | ||
687 | static int ep_disable(struct eventpoll *ep, struct epitem *epi) | ||
688 | { | ||
689 | int result = 0; | ||
690 | unsigned long flags; | ||
691 | |||
692 | spin_lock_irqsave(&ep->lock, flags); | ||
693 | if (epi->event.events & ~EP_PRIVATE_BITS) { | ||
694 | if (ep_is_linked(&epi->rdllink)) | ||
695 | list_del_init(&epi->rdllink); | ||
696 | /* Ensure ep_poll_callback will not add epi back onto ready | ||
697 | list: */ | ||
698 | epi->event.events &= EP_PRIVATE_BITS; | ||
699 | } | ||
700 | else | ||
701 | result = -EBUSY; | ||
702 | spin_unlock_irqrestore(&ep->lock, flags); | ||
703 | |||
704 | return result; | ||
705 | } | ||
706 | |||
679 | static void ep_free(struct eventpoll *ep) | 707 | static void ep_free(struct eventpoll *ep) |
680 | { | 708 | { |
681 | struct rb_node *rbp; | 709 | struct rb_node *rbp; |
@@ -1020,8 +1048,6 @@ static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi) | |||
1020 | rb_insert_color(&epi->rbn, &ep->rbr); | 1048 | rb_insert_color(&epi->rbn, &ep->rbr); |
1021 | } | 1049 | } |
1022 | 1050 | ||
1023 | |||
1024 | |||
1025 | #define PATH_ARR_SIZE 5 | 1051 | #define PATH_ARR_SIZE 5 |
1026 | /* | 1052 | /* |
1027 | * These are the number paths of length 1 to 5, that we are allowing to emanate | 1053 | * These are the number paths of length 1 to 5, that we are allowing to emanate |
@@ -1654,8 +1680,8 @@ SYSCALL_DEFINE1(epoll_create1, int, flags) | |||
1654 | error = PTR_ERR(file); | 1680 | error = PTR_ERR(file); |
1655 | goto out_free_fd; | 1681 | goto out_free_fd; |
1656 | } | 1682 | } |
1657 | fd_install(fd, file); | ||
1658 | ep->file = file; | 1683 | ep->file = file; |
1684 | fd_install(fd, file); | ||
1659 | return fd; | 1685 | return fd; |
1660 | 1686 | ||
1661 | out_free_fd: | 1687 | out_free_fd: |
@@ -1787,6 +1813,12 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, | |||
1787 | } else | 1813 | } else |
1788 | error = -ENOENT; | 1814 | error = -ENOENT; |
1789 | break; | 1815 | break; |
1816 | case EPOLL_CTL_DISABLE: | ||
1817 | if (epi) | ||
1818 | error = ep_disable(ep, epi); | ||
1819 | else | ||
1820 | error = -ENOENT; | ||
1821 | break; | ||
1790 | } | 1822 | } |
1791 | mutex_unlock(&ep->mtx); | 1823 | mutex_unlock(&ep->mtx); |
1792 | 1824 | ||
@@ -1810,7 +1842,7 @@ SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events, | |||
1810 | int, maxevents, int, timeout) | 1842 | int, maxevents, int, timeout) |
1811 | { | 1843 | { |
1812 | int error; | 1844 | int error; |
1813 | struct file *file; | 1845 | struct fd f; |
1814 | struct eventpoll *ep; | 1846 | struct eventpoll *ep; |
1815 | 1847 | ||
1816 | /* The maximum number of event must be greater than zero */ | 1848 | /* The maximum number of event must be greater than zero */ |
@@ -1818,38 +1850,33 @@ SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events, | |||
1818 | return -EINVAL; | 1850 | return -EINVAL; |
1819 | 1851 | ||
1820 | /* Verify that the area passed by the user is writeable */ | 1852 | /* Verify that the area passed by the user is writeable */ |
1821 | if (!access_ok(VERIFY_WRITE, events, maxevents * sizeof(struct epoll_event))) { | 1853 | if (!access_ok(VERIFY_WRITE, events, maxevents * sizeof(struct epoll_event))) |
1822 | error = -EFAULT; | 1854 | return -EFAULT; |
1823 | goto error_return; | ||
1824 | } | ||
1825 | 1855 | ||
1826 | /* Get the "struct file *" for the eventpoll file */ | 1856 | /* Get the "struct file *" for the eventpoll file */ |
1827 | error = -EBADF; | 1857 | f = fdget(epfd); |
1828 | file = fget(epfd); | 1858 | if (!f.file) |
1829 | if (!file) | 1859 | return -EBADF; |
1830 | goto error_return; | ||
1831 | 1860 | ||
1832 | /* | 1861 | /* |
1833 | * We have to check that the file structure underneath the fd | 1862 | * We have to check that the file structure underneath the fd |
1834 | * the user passed to us _is_ an eventpoll file. | 1863 | * the user passed to us _is_ an eventpoll file. |
1835 | */ | 1864 | */ |
1836 | error = -EINVAL; | 1865 | error = -EINVAL; |
1837 | if (!is_file_epoll(file)) | 1866 | if (!is_file_epoll(f.file)) |
1838 | goto error_fput; | 1867 | goto error_fput; |
1839 | 1868 | ||
1840 | /* | 1869 | /* |
1841 | * At this point it is safe to assume that the "private_data" contains | 1870 | * At this point it is safe to assume that the "private_data" contains |
1842 | * our own data structure. | 1871 | * our own data structure. |
1843 | */ | 1872 | */ |
1844 | ep = file->private_data; | 1873 | ep = f.file->private_data; |
1845 | 1874 | ||
1846 | /* Time to fish for events ... */ | 1875 | /* Time to fish for events ... */ |
1847 | error = ep_poll(ep, events, maxevents, timeout); | 1876 | error = ep_poll(ep, events, maxevents, timeout); |
1848 | 1877 | ||
1849 | error_fput: | 1878 | error_fput: |
1850 | fput(file); | 1879 | fdput(f); |
1851 | error_return: | ||
1852 | |||
1853 | return error; | 1880 | return error; |
1854 | } | 1881 | } |
1855 | 1882 | ||
@@ -63,22 +63,12 @@ | |||
63 | 63 | ||
64 | #include <trace/events/task.h> | 64 | #include <trace/events/task.h> |
65 | #include "internal.h" | 65 | #include "internal.h" |
66 | #include "coredump.h" | ||
66 | 67 | ||
67 | #include <trace/events/sched.h> | 68 | #include <trace/events/sched.h> |
68 | 69 | ||
69 | int core_uses_pid; | ||
70 | char core_pattern[CORENAME_MAX_SIZE] = "core"; | ||
71 | unsigned int core_pipe_limit; | ||
72 | int suid_dumpable = 0; | 70 | int suid_dumpable = 0; |
73 | 71 | ||
74 | struct core_name { | ||
75 | char *corename; | ||
76 | int used, size; | ||
77 | }; | ||
78 | static atomic_t call_count = ATOMIC_INIT(1); | ||
79 | |||
80 | /* The maximal length of core_pattern is also specified in sysctl.c */ | ||
81 | |||
82 | static LIST_HEAD(formats); | 72 | static LIST_HEAD(formats); |
83 | static DEFINE_RWLOCK(binfmt_lock); | 73 | static DEFINE_RWLOCK(binfmt_lock); |
84 | 74 | ||
@@ -613,7 +603,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) | |||
613 | * process cleanup to remove whatever mess we made. | 603 | * process cleanup to remove whatever mess we made. |
614 | */ | 604 | */ |
615 | if (length != move_page_tables(vma, old_start, | 605 | if (length != move_page_tables(vma, old_start, |
616 | vma, new_start, length)) | 606 | vma, new_start, length, false)) |
617 | return -ENOMEM; | 607 | return -ENOMEM; |
618 | 608 | ||
619 | lru_add_drain(); | 609 | lru_add_drain(); |
@@ -888,9 +878,11 @@ static int de_thread(struct task_struct *tsk) | |||
888 | sig->notify_count--; | 878 | sig->notify_count--; |
889 | 879 | ||
890 | while (sig->notify_count) { | 880 | while (sig->notify_count) { |
891 | __set_current_state(TASK_UNINTERRUPTIBLE); | 881 | __set_current_state(TASK_KILLABLE); |
892 | spin_unlock_irq(lock); | 882 | spin_unlock_irq(lock); |
893 | schedule(); | 883 | schedule(); |
884 | if (unlikely(__fatal_signal_pending(tsk))) | ||
885 | goto killed; | ||
894 | spin_lock_irq(lock); | 886 | spin_lock_irq(lock); |
895 | } | 887 | } |
896 | spin_unlock_irq(lock); | 888 | spin_unlock_irq(lock); |
@@ -908,9 +900,11 @@ static int de_thread(struct task_struct *tsk) | |||
908 | write_lock_irq(&tasklist_lock); | 900 | write_lock_irq(&tasklist_lock); |
909 | if (likely(leader->exit_state)) | 901 | if (likely(leader->exit_state)) |
910 | break; | 902 | break; |
911 | __set_current_state(TASK_UNINTERRUPTIBLE); | 903 | __set_current_state(TASK_KILLABLE); |
912 | write_unlock_irq(&tasklist_lock); | 904 | write_unlock_irq(&tasklist_lock); |
913 | schedule(); | 905 | schedule(); |
906 | if (unlikely(__fatal_signal_pending(tsk))) | ||
907 | goto killed; | ||
914 | } | 908 | } |
915 | 909 | ||
916 | /* | 910 | /* |
@@ -1004,40 +998,14 @@ no_thread_group: | |||
1004 | 998 | ||
1005 | BUG_ON(!thread_group_leader(tsk)); | 999 | BUG_ON(!thread_group_leader(tsk)); |
1006 | return 0; | 1000 | return 0; |
1007 | } | ||
1008 | |||
1009 | /* | ||
1010 | * These functions flushes out all traces of the currently running executable | ||
1011 | * so that a new one can be started | ||
1012 | */ | ||
1013 | static void flush_old_files(struct files_struct * files) | ||
1014 | { | ||
1015 | long j = -1; | ||
1016 | struct fdtable *fdt; | ||
1017 | |||
1018 | spin_lock(&files->file_lock); | ||
1019 | for (;;) { | ||
1020 | unsigned long set, i; | ||
1021 | 1001 | ||
1022 | j++; | 1002 | killed: |
1023 | i = j * BITS_PER_LONG; | 1003 | /* protects against exit_notify() and __exit_signal() */ |
1024 | fdt = files_fdtable(files); | 1004 | read_lock(&tasklist_lock); |
1025 | if (i >= fdt->max_fds) | 1005 | sig->group_exit_task = NULL; |
1026 | break; | 1006 | sig->notify_count = 0; |
1027 | set = fdt->close_on_exec[j]; | 1007 | read_unlock(&tasklist_lock); |
1028 | if (!set) | 1008 | return -EAGAIN; |
1029 | continue; | ||
1030 | fdt->close_on_exec[j] = 0; | ||
1031 | spin_unlock(&files->file_lock); | ||
1032 | for ( ; set ; i++,set >>= 1) { | ||
1033 | if (set & 1) { | ||
1034 | sys_close(i); | ||
1035 | } | ||
1036 | } | ||
1037 | spin_lock(&files->file_lock); | ||
1038 | |||
1039 | } | ||
1040 | spin_unlock(&files->file_lock); | ||
1041 | } | 1009 | } |
1042 | 1010 | ||
1043 | char *get_task_comm(char *buf, struct task_struct *tsk) | 1011 | char *get_task_comm(char *buf, struct task_struct *tsk) |
@@ -1050,6 +1018,11 @@ char *get_task_comm(char *buf, struct task_struct *tsk) | |||
1050 | } | 1018 | } |
1051 | EXPORT_SYMBOL_GPL(get_task_comm); | 1019 | EXPORT_SYMBOL_GPL(get_task_comm); |
1052 | 1020 | ||
1021 | /* | ||
1022 | * These functions flushes out all traces of the currently running executable | ||
1023 | * so that a new one can be started | ||
1024 | */ | ||
1025 | |||
1053 | void set_task_comm(struct task_struct *tsk, char *buf) | 1026 | void set_task_comm(struct task_struct *tsk, char *buf) |
1054 | { | 1027 | { |
1055 | task_lock(tsk); | 1028 | task_lock(tsk); |
@@ -1136,7 +1109,7 @@ void setup_new_exec(struct linux_binprm * bprm) | |||
1136 | current->sas_ss_sp = current->sas_ss_size = 0; | 1109 | current->sas_ss_sp = current->sas_ss_size = 0; |
1137 | 1110 | ||
1138 | if (uid_eq(current_euid(), current_uid()) && gid_eq(current_egid(), current_gid())) | 1111 | if (uid_eq(current_euid(), current_uid()) && gid_eq(current_egid(), current_gid())) |
1139 | set_dumpable(current->mm, 1); | 1112 | set_dumpable(current->mm, SUID_DUMPABLE_ENABLED); |
1140 | else | 1113 | else |
1141 | set_dumpable(current->mm, suid_dumpable); | 1114 | set_dumpable(current->mm, suid_dumpable); |
1142 | 1115 | ||
@@ -1171,7 +1144,7 @@ void setup_new_exec(struct linux_binprm * bprm) | |||
1171 | current->self_exec_id++; | 1144 | current->self_exec_id++; |
1172 | 1145 | ||
1173 | flush_signal_handlers(current, 0); | 1146 | flush_signal_handlers(current, 0); |
1174 | flush_old_files(current->files); | 1147 | do_close_on_exec(current->files); |
1175 | } | 1148 | } |
1176 | EXPORT_SYMBOL(setup_new_exec); | 1149 | EXPORT_SYMBOL(setup_new_exec); |
1177 | 1150 | ||
@@ -1632,353 +1605,6 @@ void set_binfmt(struct linux_binfmt *new) | |||
1632 | 1605 | ||
1633 | EXPORT_SYMBOL(set_binfmt); | 1606 | EXPORT_SYMBOL(set_binfmt); |
1634 | 1607 | ||
1635 | static int expand_corename(struct core_name *cn) | ||
1636 | { | ||
1637 | char *old_corename = cn->corename; | ||
1638 | |||
1639 | cn->size = CORENAME_MAX_SIZE * atomic_inc_return(&call_count); | ||
1640 | cn->corename = krealloc(old_corename, cn->size, GFP_KERNEL); | ||
1641 | |||
1642 | if (!cn->corename) { | ||
1643 | kfree(old_corename); | ||
1644 | return -ENOMEM; | ||
1645 | } | ||
1646 | |||
1647 | return 0; | ||
1648 | } | ||
1649 | |||
1650 | static int cn_printf(struct core_name *cn, const char *fmt, ...) | ||
1651 | { | ||
1652 | char *cur; | ||
1653 | int need; | ||
1654 | int ret; | ||
1655 | va_list arg; | ||
1656 | |||
1657 | va_start(arg, fmt); | ||
1658 | need = vsnprintf(NULL, 0, fmt, arg); | ||
1659 | va_end(arg); | ||
1660 | |||
1661 | if (likely(need < cn->size - cn->used - 1)) | ||
1662 | goto out_printf; | ||
1663 | |||
1664 | ret = expand_corename(cn); | ||
1665 | if (ret) | ||
1666 | goto expand_fail; | ||
1667 | |||
1668 | out_printf: | ||
1669 | cur = cn->corename + cn->used; | ||
1670 | va_start(arg, fmt); | ||
1671 | vsnprintf(cur, need + 1, fmt, arg); | ||
1672 | va_end(arg); | ||
1673 | cn->used += need; | ||
1674 | return 0; | ||
1675 | |||
1676 | expand_fail: | ||
1677 | return ret; | ||
1678 | } | ||
1679 | |||
1680 | static void cn_escape(char *str) | ||
1681 | { | ||
1682 | for (; *str; str++) | ||
1683 | if (*str == '/') | ||
1684 | *str = '!'; | ||
1685 | } | ||
1686 | |||
1687 | static int cn_print_exe_file(struct core_name *cn) | ||
1688 | { | ||
1689 | struct file *exe_file; | ||
1690 | char *pathbuf, *path; | ||
1691 | int ret; | ||
1692 | |||
1693 | exe_file = get_mm_exe_file(current->mm); | ||
1694 | if (!exe_file) { | ||
1695 | char *commstart = cn->corename + cn->used; | ||
1696 | ret = cn_printf(cn, "%s (path unknown)", current->comm); | ||
1697 | cn_escape(commstart); | ||
1698 | return ret; | ||
1699 | } | ||
1700 | |||
1701 | pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY); | ||
1702 | if (!pathbuf) { | ||
1703 | ret = -ENOMEM; | ||
1704 | goto put_exe_file; | ||
1705 | } | ||
1706 | |||
1707 | path = d_path(&exe_file->f_path, pathbuf, PATH_MAX); | ||
1708 | if (IS_ERR(path)) { | ||
1709 | ret = PTR_ERR(path); | ||
1710 | goto free_buf; | ||
1711 | } | ||
1712 | |||
1713 | cn_escape(path); | ||
1714 | |||
1715 | ret = cn_printf(cn, "%s", path); | ||
1716 | |||
1717 | free_buf: | ||
1718 | kfree(pathbuf); | ||
1719 | put_exe_file: | ||
1720 | fput(exe_file); | ||
1721 | return ret; | ||
1722 | } | ||
1723 | |||
1724 | /* format_corename will inspect the pattern parameter, and output a | ||
1725 | * name into corename, which must have space for at least | ||
1726 | * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. | ||
1727 | */ | ||
1728 | static int format_corename(struct core_name *cn, long signr) | ||
1729 | { | ||
1730 | const struct cred *cred = current_cred(); | ||
1731 | const char *pat_ptr = core_pattern; | ||
1732 | int ispipe = (*pat_ptr == '|'); | ||
1733 | int pid_in_pattern = 0; | ||
1734 | int err = 0; | ||
1735 | |||
1736 | cn->size = CORENAME_MAX_SIZE * atomic_read(&call_count); | ||
1737 | cn->corename = kmalloc(cn->size, GFP_KERNEL); | ||
1738 | cn->used = 0; | ||
1739 | |||
1740 | if (!cn->corename) | ||
1741 | return -ENOMEM; | ||
1742 | |||
1743 | /* Repeat as long as we have more pattern to process and more output | ||
1744 | space */ | ||
1745 | while (*pat_ptr) { | ||
1746 | if (*pat_ptr != '%') { | ||
1747 | if (*pat_ptr == 0) | ||
1748 | goto out; | ||
1749 | err = cn_printf(cn, "%c", *pat_ptr++); | ||
1750 | } else { | ||
1751 | switch (*++pat_ptr) { | ||
1752 | /* single % at the end, drop that */ | ||
1753 | case 0: | ||
1754 | goto out; | ||
1755 | /* Double percent, output one percent */ | ||
1756 | case '%': | ||
1757 | err = cn_printf(cn, "%c", '%'); | ||
1758 | break; | ||
1759 | /* pid */ | ||
1760 | case 'p': | ||
1761 | pid_in_pattern = 1; | ||
1762 | err = cn_printf(cn, "%d", | ||
1763 | task_tgid_vnr(current)); | ||
1764 | break; | ||
1765 | /* uid */ | ||
1766 | case 'u': | ||
1767 | err = cn_printf(cn, "%d", cred->uid); | ||
1768 | break; | ||
1769 | /* gid */ | ||
1770 | case 'g': | ||
1771 | err = cn_printf(cn, "%d", cred->gid); | ||
1772 | break; | ||
1773 | /* signal that caused the coredump */ | ||
1774 | case 's': | ||
1775 | err = cn_printf(cn, "%ld", signr); | ||
1776 | break; | ||
1777 | /* UNIX time of coredump */ | ||
1778 | case 't': { | ||
1779 | struct timeval tv; | ||
1780 | do_gettimeofday(&tv); | ||
1781 | err = cn_printf(cn, "%lu", tv.tv_sec); | ||
1782 | break; | ||
1783 | } | ||
1784 | /* hostname */ | ||
1785 | case 'h': { | ||
1786 | char *namestart = cn->corename + cn->used; | ||
1787 | down_read(&uts_sem); | ||
1788 | err = cn_printf(cn, "%s", | ||
1789 | utsname()->nodename); | ||
1790 | up_read(&uts_sem); | ||
1791 | cn_escape(namestart); | ||
1792 | break; | ||
1793 | } | ||
1794 | /* executable */ | ||
1795 | case 'e': { | ||
1796 | char *commstart = cn->corename + cn->used; | ||
1797 | err = cn_printf(cn, "%s", current->comm); | ||
1798 | cn_escape(commstart); | ||
1799 | break; | ||
1800 | } | ||
1801 | case 'E': | ||
1802 | err = cn_print_exe_file(cn); | ||
1803 | break; | ||
1804 | /* core limit size */ | ||
1805 | case 'c': | ||
1806 | err = cn_printf(cn, "%lu", | ||
1807 | rlimit(RLIMIT_CORE)); | ||
1808 | break; | ||
1809 | default: | ||
1810 | break; | ||
1811 | } | ||
1812 | ++pat_ptr; | ||
1813 | } | ||
1814 | |||
1815 | if (err) | ||
1816 | return err; | ||
1817 | } | ||
1818 | |||
1819 | /* Backward compatibility with core_uses_pid: | ||
1820 | * | ||
1821 | * If core_pattern does not include a %p (as is the default) | ||
1822 | * and core_uses_pid is set, then .%pid will be appended to | ||
1823 | * the filename. Do not do this for piped commands. */ | ||
1824 | if (!ispipe && !pid_in_pattern && core_uses_pid) { | ||
1825 | err = cn_printf(cn, ".%d", task_tgid_vnr(current)); | ||
1826 | if (err) | ||
1827 | return err; | ||
1828 | } | ||
1829 | out: | ||
1830 | return ispipe; | ||
1831 | } | ||
1832 | |||
1833 | static int zap_process(struct task_struct *start, int exit_code) | ||
1834 | { | ||
1835 | struct task_struct *t; | ||
1836 | int nr = 0; | ||
1837 | |||
1838 | start->signal->flags = SIGNAL_GROUP_EXIT; | ||
1839 | start->signal->group_exit_code = exit_code; | ||
1840 | start->signal->group_stop_count = 0; | ||
1841 | |||
1842 | t = start; | ||
1843 | do { | ||
1844 | task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK); | ||
1845 | if (t != current && t->mm) { | ||
1846 | sigaddset(&t->pending.signal, SIGKILL); | ||
1847 | signal_wake_up(t, 1); | ||
1848 | nr++; | ||
1849 | } | ||
1850 | } while_each_thread(start, t); | ||
1851 | |||
1852 | return nr; | ||
1853 | } | ||
1854 | |||
1855 | static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm, | ||
1856 | struct core_state *core_state, int exit_code) | ||
1857 | { | ||
1858 | struct task_struct *g, *p; | ||
1859 | unsigned long flags; | ||
1860 | int nr = -EAGAIN; | ||
1861 | |||
1862 | spin_lock_irq(&tsk->sighand->siglock); | ||
1863 | if (!signal_group_exit(tsk->signal)) { | ||
1864 | mm->core_state = core_state; | ||
1865 | nr = zap_process(tsk, exit_code); | ||
1866 | } | ||
1867 | spin_unlock_irq(&tsk->sighand->siglock); | ||
1868 | if (unlikely(nr < 0)) | ||
1869 | return nr; | ||
1870 | |||
1871 | if (atomic_read(&mm->mm_users) == nr + 1) | ||
1872 | goto done; | ||
1873 | /* | ||
1874 | * We should find and kill all tasks which use this mm, and we should | ||
1875 | * count them correctly into ->nr_threads. We don't take tasklist | ||
1876 | * lock, but this is safe wrt: | ||
1877 | * | ||
1878 | * fork: | ||
1879 | * None of sub-threads can fork after zap_process(leader). All | ||
1880 | * processes which were created before this point should be | ||
1881 | * visible to zap_threads() because copy_process() adds the new | ||
1882 | * process to the tail of init_task.tasks list, and lock/unlock | ||
1883 | * of ->siglock provides a memory barrier. | ||
1884 | * | ||
1885 | * do_exit: | ||
1886 | * The caller holds mm->mmap_sem. This means that the task which | ||
1887 | * uses this mm can't pass exit_mm(), so it can't exit or clear | ||
1888 | * its ->mm. | ||
1889 | * | ||
1890 | * de_thread: | ||
1891 | * It does list_replace_rcu(&leader->tasks, ¤t->tasks), | ||
1892 | * we must see either old or new leader, this does not matter. | ||
1893 | * However, it can change p->sighand, so lock_task_sighand(p) | ||
1894 | * must be used. Since p->mm != NULL and we hold ->mmap_sem | ||
1895 | * it can't fail. | ||
1896 | * | ||
1897 | * Note also that "g" can be the old leader with ->mm == NULL | ||
1898 | * and already unhashed and thus removed from ->thread_group. | ||
1899 | * This is OK, __unhash_process()->list_del_rcu() does not | ||
1900 | * clear the ->next pointer, we will find the new leader via | ||
1901 | * next_thread(). | ||
1902 | */ | ||
1903 | rcu_read_lock(); | ||
1904 | for_each_process(g) { | ||
1905 | if (g == tsk->group_leader) | ||
1906 | continue; | ||
1907 | if (g->flags & PF_KTHREAD) | ||
1908 | continue; | ||
1909 | p = g; | ||
1910 | do { | ||
1911 | if (p->mm) { | ||
1912 | if (unlikely(p->mm == mm)) { | ||
1913 | lock_task_sighand(p, &flags); | ||
1914 | nr += zap_process(p, exit_code); | ||
1915 | unlock_task_sighand(p, &flags); | ||
1916 | } | ||
1917 | break; | ||
1918 | } | ||
1919 | } while_each_thread(g, p); | ||
1920 | } | ||
1921 | rcu_read_unlock(); | ||
1922 | done: | ||
1923 | atomic_set(&core_state->nr_threads, nr); | ||
1924 | return nr; | ||
1925 | } | ||
1926 | |||
1927 | static int coredump_wait(int exit_code, struct core_state *core_state) | ||
1928 | { | ||
1929 | struct task_struct *tsk = current; | ||
1930 | struct mm_struct *mm = tsk->mm; | ||
1931 | int core_waiters = -EBUSY; | ||
1932 | |||
1933 | init_completion(&core_state->startup); | ||
1934 | core_state->dumper.task = tsk; | ||
1935 | core_state->dumper.next = NULL; | ||
1936 | |||
1937 | down_write(&mm->mmap_sem); | ||
1938 | if (!mm->core_state) | ||
1939 | core_waiters = zap_threads(tsk, mm, core_state, exit_code); | ||
1940 | up_write(&mm->mmap_sem); | ||
1941 | |||
1942 | if (core_waiters > 0) { | ||
1943 | struct core_thread *ptr; | ||
1944 | |||
1945 | wait_for_completion(&core_state->startup); | ||
1946 | /* | ||
1947 | * Wait for all the threads to become inactive, so that | ||
1948 | * all the thread context (extended register state, like | ||
1949 | * fpu etc) gets copied to the memory. | ||
1950 | */ | ||
1951 | ptr = core_state->dumper.next; | ||
1952 | while (ptr != NULL) { | ||
1953 | wait_task_inactive(ptr->task, 0); | ||
1954 | ptr = ptr->next; | ||
1955 | } | ||
1956 | } | ||
1957 | |||
1958 | return core_waiters; | ||
1959 | } | ||
1960 | |||
1961 | static void coredump_finish(struct mm_struct *mm) | ||
1962 | { | ||
1963 | struct core_thread *curr, *next; | ||
1964 | struct task_struct *task; | ||
1965 | |||
1966 | next = mm->core_state->dumper.next; | ||
1967 | while ((curr = next) != NULL) { | ||
1968 | next = curr->next; | ||
1969 | task = curr->task; | ||
1970 | /* | ||
1971 | * see exit_mm(), curr->task must not see | ||
1972 | * ->task == NULL before we read ->next. | ||
1973 | */ | ||
1974 | smp_mb(); | ||
1975 | curr->task = NULL; | ||
1976 | wake_up_process(task); | ||
1977 | } | ||
1978 | |||
1979 | mm->core_state = NULL; | ||
1980 | } | ||
1981 | |||
1982 | /* | 1608 | /* |
1983 | * set_dumpable converts traditional three-value dumpable to two flags and | 1609 | * set_dumpable converts traditional three-value dumpable to two flags and |
1984 | * stores them into mm->flags. It modifies lower two bits of mm->flags, but | 1610 | * stores them into mm->flags. It modifies lower two bits of mm->flags, but |
@@ -2020,7 +1646,7 @@ void set_dumpable(struct mm_struct *mm, int value) | |||
2020 | } | 1646 | } |
2021 | } | 1647 | } |
2022 | 1648 | ||
2023 | static int __get_dumpable(unsigned long mm_flags) | 1649 | int __get_dumpable(unsigned long mm_flags) |
2024 | { | 1650 | { |
2025 | int ret; | 1651 | int ret; |
2026 | 1652 | ||
@@ -2032,290 +1658,3 @@ int get_dumpable(struct mm_struct *mm) | |||
2032 | { | 1658 | { |
2033 | return __get_dumpable(mm->flags); | 1659 | return __get_dumpable(mm->flags); |
2034 | } | 1660 | } |
2035 | |||
2036 | static void wait_for_dump_helpers(struct file *file) | ||
2037 | { | ||
2038 | struct pipe_inode_info *pipe; | ||
2039 | |||
2040 | pipe = file->f_path.dentry->d_inode->i_pipe; | ||
2041 | |||
2042 | pipe_lock(pipe); | ||
2043 | pipe->readers++; | ||
2044 | pipe->writers--; | ||
2045 | |||
2046 | while ((pipe->readers > 1) && (!signal_pending(current))) { | ||
2047 | wake_up_interruptible_sync(&pipe->wait); | ||
2048 | kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); | ||
2049 | pipe_wait(pipe); | ||
2050 | } | ||
2051 | |||
2052 | pipe->readers--; | ||
2053 | pipe->writers++; | ||
2054 | pipe_unlock(pipe); | ||
2055 | |||
2056 | } | ||
2057 | |||
2058 | |||
2059 | /* | ||
2060 | * umh_pipe_setup | ||
2061 | * helper function to customize the process used | ||
2062 | * to collect the core in userspace. Specifically | ||
2063 | * it sets up a pipe and installs it as fd 0 (stdin) | ||
2064 | * for the process. Returns 0 on success, or | ||
2065 | * PTR_ERR on failure. | ||
2066 | * Note that it also sets the core limit to 1. This | ||
2067 | * is a special value that we use to trap recursive | ||
2068 | * core dumps | ||
2069 | */ | ||
2070 | static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) | ||
2071 | { | ||
2072 | struct file *files[2]; | ||
2073 | struct fdtable *fdt; | ||
2074 | struct coredump_params *cp = (struct coredump_params *)info->data; | ||
2075 | struct files_struct *cf = current->files; | ||
2076 | int err = create_pipe_files(files, 0); | ||
2077 | if (err) | ||
2078 | return err; | ||
2079 | |||
2080 | cp->file = files[1]; | ||
2081 | |||
2082 | sys_close(0); | ||
2083 | fd_install(0, files[0]); | ||
2084 | spin_lock(&cf->file_lock); | ||
2085 | fdt = files_fdtable(cf); | ||
2086 | __set_open_fd(0, fdt); | ||
2087 | __clear_close_on_exec(0, fdt); | ||
2088 | spin_unlock(&cf->file_lock); | ||
2089 | |||
2090 | /* and disallow core files too */ | ||
2091 | current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1}; | ||
2092 | |||
2093 | return 0; | ||
2094 | } | ||
2095 | |||
2096 | void do_coredump(long signr, int exit_code, struct pt_regs *regs) | ||
2097 | { | ||
2098 | struct core_state core_state; | ||
2099 | struct core_name cn; | ||
2100 | struct mm_struct *mm = current->mm; | ||
2101 | struct linux_binfmt * binfmt; | ||
2102 | const struct cred *old_cred; | ||
2103 | struct cred *cred; | ||
2104 | int retval = 0; | ||
2105 | int flag = 0; | ||
2106 | int ispipe; | ||
2107 | bool need_nonrelative = false; | ||
2108 | static atomic_t core_dump_count = ATOMIC_INIT(0); | ||
2109 | struct coredump_params cprm = { | ||
2110 | .signr = signr, | ||
2111 | .regs = regs, | ||
2112 | .limit = rlimit(RLIMIT_CORE), | ||
2113 | /* | ||
2114 | * We must use the same mm->flags while dumping core to avoid | ||
2115 | * inconsistency of bit flags, since this flag is not protected | ||
2116 | * by any locks. | ||
2117 | */ | ||
2118 | .mm_flags = mm->flags, | ||
2119 | }; | ||
2120 | |||
2121 | audit_core_dumps(signr); | ||
2122 | |||
2123 | binfmt = mm->binfmt; | ||
2124 | if (!binfmt || !binfmt->core_dump) | ||
2125 | goto fail; | ||
2126 | if (!__get_dumpable(cprm.mm_flags)) | ||
2127 | goto fail; | ||
2128 | |||
2129 | cred = prepare_creds(); | ||
2130 | if (!cred) | ||
2131 | goto fail; | ||
2132 | /* | ||
2133 | * We cannot trust fsuid as being the "true" uid of the process | ||
2134 | * nor do we know its entire history. We only know it was tainted | ||
2135 | * so we dump it as root in mode 2, and only into a controlled | ||
2136 | * environment (pipe handler or fully qualified path). | ||
2137 | */ | ||
2138 | if (__get_dumpable(cprm.mm_flags) == SUID_DUMPABLE_SAFE) { | ||
2139 | /* Setuid core dump mode */ | ||
2140 | flag = O_EXCL; /* Stop rewrite attacks */ | ||
2141 | cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */ | ||
2142 | need_nonrelative = true; | ||
2143 | } | ||
2144 | |||
2145 | retval = coredump_wait(exit_code, &core_state); | ||
2146 | if (retval < 0) | ||
2147 | goto fail_creds; | ||
2148 | |||
2149 | old_cred = override_creds(cred); | ||
2150 | |||
2151 | /* | ||
2152 | * Clear any false indication of pending signals that might | ||
2153 | * be seen by the filesystem code called to write the core file. | ||
2154 | */ | ||
2155 | clear_thread_flag(TIF_SIGPENDING); | ||
2156 | |||
2157 | ispipe = format_corename(&cn, signr); | ||
2158 | |||
2159 | if (ispipe) { | ||
2160 | int dump_count; | ||
2161 | char **helper_argv; | ||
2162 | |||
2163 | if (ispipe < 0) { | ||
2164 | printk(KERN_WARNING "format_corename failed\n"); | ||
2165 | printk(KERN_WARNING "Aborting core\n"); | ||
2166 | goto fail_corename; | ||
2167 | } | ||
2168 | |||
2169 | if (cprm.limit == 1) { | ||
2170 | /* See umh_pipe_setup() which sets RLIMIT_CORE = 1. | ||
2171 | * | ||
2172 | * Normally core limits are irrelevant to pipes, since | ||
2173 | * we're not writing to the file system, but we use | ||
2174 | * cprm.limit of 1 here as a speacial value, this is a | ||
2175 | * consistent way to catch recursive crashes. | ||
2176 | * We can still crash if the core_pattern binary sets | ||
2177 | * RLIM_CORE = !1, but it runs as root, and can do | ||
2178 | * lots of stupid things. | ||
2179 | * | ||
2180 | * Note that we use task_tgid_vnr here to grab the pid | ||
2181 | * of the process group leader. That way we get the | ||
2182 | * right pid if a thread in a multi-threaded | ||
2183 | * core_pattern process dies. | ||
2184 | */ | ||
2185 | printk(KERN_WARNING | ||
2186 | "Process %d(%s) has RLIMIT_CORE set to 1\n", | ||
2187 | task_tgid_vnr(current), current->comm); | ||
2188 | printk(KERN_WARNING "Aborting core\n"); | ||
2189 | goto fail_unlock; | ||
2190 | } | ||
2191 | cprm.limit = RLIM_INFINITY; | ||
2192 | |||
2193 | dump_count = atomic_inc_return(&core_dump_count); | ||
2194 | if (core_pipe_limit && (core_pipe_limit < dump_count)) { | ||
2195 | printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n", | ||
2196 | task_tgid_vnr(current), current->comm); | ||
2197 | printk(KERN_WARNING "Skipping core dump\n"); | ||
2198 | goto fail_dropcount; | ||
2199 | } | ||
2200 | |||
2201 | helper_argv = argv_split(GFP_KERNEL, cn.corename+1, NULL); | ||
2202 | if (!helper_argv) { | ||
2203 | printk(KERN_WARNING "%s failed to allocate memory\n", | ||
2204 | __func__); | ||
2205 | goto fail_dropcount; | ||
2206 | } | ||
2207 | |||
2208 | retval = call_usermodehelper_fns(helper_argv[0], helper_argv, | ||
2209 | NULL, UMH_WAIT_EXEC, umh_pipe_setup, | ||
2210 | NULL, &cprm); | ||
2211 | argv_free(helper_argv); | ||
2212 | if (retval) { | ||
2213 | printk(KERN_INFO "Core dump to %s pipe failed\n", | ||
2214 | cn.corename); | ||
2215 | goto close_fail; | ||
2216 | } | ||
2217 | } else { | ||
2218 | struct inode *inode; | ||
2219 | |||
2220 | if (cprm.limit < binfmt->min_coredump) | ||
2221 | goto fail_unlock; | ||
2222 | |||
2223 | if (need_nonrelative && cn.corename[0] != '/') { | ||
2224 | printk(KERN_WARNING "Pid %d(%s) can only dump core "\ | ||
2225 | "to fully qualified path!\n", | ||
2226 | task_tgid_vnr(current), current->comm); | ||
2227 | printk(KERN_WARNING "Skipping core dump\n"); | ||
2228 | goto fail_unlock; | ||
2229 | } | ||
2230 | |||
2231 | cprm.file = filp_open(cn.corename, | ||
2232 | O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, | ||
2233 | 0600); | ||
2234 | if (IS_ERR(cprm.file)) | ||
2235 | goto fail_unlock; | ||
2236 | |||
2237 | inode = cprm.file->f_path.dentry->d_inode; | ||
2238 | if (inode->i_nlink > 1) | ||
2239 | goto close_fail; | ||
2240 | if (d_unhashed(cprm.file->f_path.dentry)) | ||
2241 | goto close_fail; | ||
2242 | /* | ||
2243 | * AK: actually i see no reason to not allow this for named | ||
2244 | * pipes etc, but keep the previous behaviour for now. | ||
2245 | */ | ||
2246 | if (!S_ISREG(inode->i_mode)) | ||
2247 | goto close_fail; | ||
2248 | /* | ||
2249 | * Dont allow local users get cute and trick others to coredump | ||
2250 | * into their pre-created files. | ||
2251 | */ | ||
2252 | if (!uid_eq(inode->i_uid, current_fsuid())) | ||
2253 | goto close_fail; | ||
2254 | if (!cprm.file->f_op || !cprm.file->f_op->write) | ||
2255 | goto close_fail; | ||
2256 | if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file)) | ||
2257 | goto close_fail; | ||
2258 | } | ||
2259 | |||
2260 | retval = binfmt->core_dump(&cprm); | ||
2261 | if (retval) | ||
2262 | current->signal->group_exit_code |= 0x80; | ||
2263 | |||
2264 | if (ispipe && core_pipe_limit) | ||
2265 | wait_for_dump_helpers(cprm.file); | ||
2266 | close_fail: | ||
2267 | if (cprm.file) | ||
2268 | filp_close(cprm.file, NULL); | ||
2269 | fail_dropcount: | ||
2270 | if (ispipe) | ||
2271 | atomic_dec(&core_dump_count); | ||
2272 | fail_unlock: | ||
2273 | kfree(cn.corename); | ||
2274 | fail_corename: | ||
2275 | coredump_finish(mm); | ||
2276 | revert_creds(old_cred); | ||
2277 | fail_creds: | ||
2278 | put_cred(cred); | ||
2279 | fail: | ||
2280 | return; | ||
2281 | } | ||
2282 | |||
2283 | /* | ||
2284 | * Core dumping helper functions. These are the only things you should | ||
2285 | * do on a core-file: use only these functions to write out all the | ||
2286 | * necessary info. | ||
2287 | */ | ||
2288 | int dump_write(struct file *file, const void *addr, int nr) | ||
2289 | { | ||
2290 | return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr; | ||
2291 | } | ||
2292 | EXPORT_SYMBOL(dump_write); | ||
2293 | |||
2294 | int dump_seek(struct file *file, loff_t off) | ||
2295 | { | ||
2296 | int ret = 1; | ||
2297 | |||
2298 | if (file->f_op->llseek && file->f_op->llseek != no_llseek) { | ||
2299 | if (file->f_op->llseek(file, off, SEEK_CUR) < 0) | ||
2300 | return 0; | ||
2301 | } else { | ||
2302 | char *buf = (char *)get_zeroed_page(GFP_KERNEL); | ||
2303 | |||
2304 | if (!buf) | ||
2305 | return 0; | ||
2306 | while (off > 0) { | ||
2307 | unsigned long n = off; | ||
2308 | |||
2309 | if (n > PAGE_SIZE) | ||
2310 | n = PAGE_SIZE; | ||
2311 | if (!dump_write(file, buf, n)) { | ||
2312 | ret = 0; | ||
2313 | break; | ||
2314 | } | ||
2315 | off -= n; | ||
2316 | } | ||
2317 | free_page((unsigned long)buf); | ||
2318 | } | ||
2319 | return ret; | ||
2320 | } | ||
2321 | EXPORT_SYMBOL(dump_seek); | ||
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index 1562c27a2fab..b56181047751 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c | |||
@@ -1172,8 +1172,8 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino) | |||
1172 | 1172 | ||
1173 | /* copy stuff from on-disk struct to in-memory struct */ | 1173 | /* copy stuff from on-disk struct to in-memory struct */ |
1174 | inode->i_mode = le16_to_cpu(fcb.i_mode); | 1174 | inode->i_mode = le16_to_cpu(fcb.i_mode); |
1175 | inode->i_uid = le32_to_cpu(fcb.i_uid); | 1175 | i_uid_write(inode, le32_to_cpu(fcb.i_uid)); |
1176 | inode->i_gid = le32_to_cpu(fcb.i_gid); | 1176 | i_gid_write(inode, le32_to_cpu(fcb.i_gid)); |
1177 | set_nlink(inode, le16_to_cpu(fcb.i_links_count)); | 1177 | set_nlink(inode, le16_to_cpu(fcb.i_links_count)); |
1178 | inode->i_ctime.tv_sec = (signed)le32_to_cpu(fcb.i_ctime); | 1178 | inode->i_ctime.tv_sec = (signed)le32_to_cpu(fcb.i_ctime); |
1179 | inode->i_atime.tv_sec = (signed)le32_to_cpu(fcb.i_atime); | 1179 | inode->i_atime.tv_sec = (signed)le32_to_cpu(fcb.i_atime); |
@@ -1385,8 +1385,8 @@ static int exofs_update_inode(struct inode *inode, int do_sync) | |||
1385 | fcb = &args->fcb; | 1385 | fcb = &args->fcb; |
1386 | 1386 | ||
1387 | fcb->i_mode = cpu_to_le16(inode->i_mode); | 1387 | fcb->i_mode = cpu_to_le16(inode->i_mode); |
1388 | fcb->i_uid = cpu_to_le32(inode->i_uid); | 1388 | fcb->i_uid = cpu_to_le32(i_uid_read(inode)); |
1389 | fcb->i_gid = cpu_to_le32(inode->i_gid); | 1389 | fcb->i_gid = cpu_to_le32(i_gid_read(inode)); |
1390 | fcb->i_links_count = cpu_to_le16(inode->i_nlink); | 1390 | fcb->i_links_count = cpu_to_le16(inode->i_nlink); |
1391 | fcb->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec); | 1391 | fcb->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec); |
1392 | fcb->i_atime = cpu_to_le32(inode->i_atime.tv_sec); | 1392 | fcb->i_atime = cpu_to_le32(inode->i_atime.tv_sec); |
diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c index 5f376d14fdcc..b963f38ac298 100644 --- a/fs/exofs/ore_raid.c +++ b/fs/exofs/ore_raid.c | |||
@@ -203,7 +203,7 @@ static unsigned _sp2d_min_pg(struct __stripe_pages_2d *sp2d) | |||
203 | 203 | ||
204 | static unsigned _sp2d_max_pg(struct __stripe_pages_2d *sp2d) | 204 | static unsigned _sp2d_max_pg(struct __stripe_pages_2d *sp2d) |
205 | { | 205 | { |
206 | unsigned p; | 206 | int p; |
207 | 207 | ||
208 | for (p = sp2d->pages_in_unit - 1; p >= 0; --p) { | 208 | for (p = sp2d->pages_in_unit - 1; p >= 0; --p) { |
209 | struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p]; | 209 | struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p]; |
diff --git a/fs/exofs/super.c b/fs/exofs/super.c index dde41a75c7c8..59e3bbfac0b1 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c | |||
@@ -206,6 +206,11 @@ static int init_inodecache(void) | |||
206 | */ | 206 | */ |
207 | static void destroy_inodecache(void) | 207 | static void destroy_inodecache(void) |
208 | { | 208 | { |
209 | /* | ||
210 | * Make sure all delayed rcu free inodes are flushed before we | ||
211 | * destroy cache. | ||
212 | */ | ||
213 | rcu_barrier(); | ||
209 | kmem_cache_destroy(exofs_inode_cachep); | 214 | kmem_cache_destroy(exofs_inode_cachep); |
210 | } | 215 | } |
211 | 216 | ||
diff --git a/fs/exofs/sys.c b/fs/exofs/sys.c index 5a7b691e748b..1b4f2f95fc37 100644 --- a/fs/exofs/sys.c +++ b/fs/exofs/sys.c | |||
@@ -80,8 +80,13 @@ static ssize_t uri_show(struct exofs_dev *edp, char *buf) | |||
80 | 80 | ||
81 | static ssize_t uri_store(struct exofs_dev *edp, const char *buf, size_t len) | 81 | static ssize_t uri_store(struct exofs_dev *edp, const char *buf, size_t len) |
82 | { | 82 | { |
83 | uint8_t *new_uri; | ||
84 | |||
83 | edp->urilen = strlen(buf) + 1; | 85 | edp->urilen = strlen(buf) + 1; |
84 | edp->uri = krealloc(edp->uri, edp->urilen, GFP_KERNEL); | 86 | new_uri = krealloc(edp->uri, edp->urilen, GFP_KERNEL); |
87 | if (new_uri == NULL) | ||
88 | return -ENOMEM; | ||
89 | edp->uri = new_uri; | ||
85 | strncpy(edp->uri, buf, edp->urilen); | 90 | strncpy(edp->uri, buf, edp->urilen); |
86 | return edp->urilen; | 91 | return edp->urilen; |
87 | } | 92 | } |
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c index 35d6a3cfd9ff..110b6b371a4e 100644 --- a/fs/ext2/acl.c +++ b/fs/ext2/acl.c | |||
@@ -53,16 +53,23 @@ ext2_acl_from_disk(const void *value, size_t size) | |||
53 | case ACL_OTHER: | 53 | case ACL_OTHER: |
54 | value = (char *)value + | 54 | value = (char *)value + |
55 | sizeof(ext2_acl_entry_short); | 55 | sizeof(ext2_acl_entry_short); |
56 | acl->a_entries[n].e_id = ACL_UNDEFINED_ID; | ||
57 | break; | 56 | break; |
58 | 57 | ||
59 | case ACL_USER: | 58 | case ACL_USER: |
59 | value = (char *)value + sizeof(ext2_acl_entry); | ||
60 | if ((char *)value > end) | ||
61 | goto fail; | ||
62 | acl->a_entries[n].e_uid = | ||
63 | make_kuid(&init_user_ns, | ||
64 | le32_to_cpu(entry->e_id)); | ||
65 | break; | ||
60 | case ACL_GROUP: | 66 | case ACL_GROUP: |
61 | value = (char *)value + sizeof(ext2_acl_entry); | 67 | value = (char *)value + sizeof(ext2_acl_entry); |
62 | if ((char *)value > end) | 68 | if ((char *)value > end) |
63 | goto fail; | 69 | goto fail; |
64 | acl->a_entries[n].e_id = | 70 | acl->a_entries[n].e_gid = |
65 | le32_to_cpu(entry->e_id); | 71 | make_kgid(&init_user_ns, |
72 | le32_to_cpu(entry->e_id)); | ||
66 | break; | 73 | break; |
67 | 74 | ||
68 | default: | 75 | default: |
@@ -96,14 +103,19 @@ ext2_acl_to_disk(const struct posix_acl *acl, size_t *size) | |||
96 | ext_acl->a_version = cpu_to_le32(EXT2_ACL_VERSION); | 103 | ext_acl->a_version = cpu_to_le32(EXT2_ACL_VERSION); |
97 | e = (char *)ext_acl + sizeof(ext2_acl_header); | 104 | e = (char *)ext_acl + sizeof(ext2_acl_header); |
98 | for (n=0; n < acl->a_count; n++) { | 105 | for (n=0; n < acl->a_count; n++) { |
106 | const struct posix_acl_entry *acl_e = &acl->a_entries[n]; | ||
99 | ext2_acl_entry *entry = (ext2_acl_entry *)e; | 107 | ext2_acl_entry *entry = (ext2_acl_entry *)e; |
100 | entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag); | 108 | entry->e_tag = cpu_to_le16(acl_e->e_tag); |
101 | entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm); | 109 | entry->e_perm = cpu_to_le16(acl_e->e_perm); |
102 | switch(acl->a_entries[n].e_tag) { | 110 | switch(acl_e->e_tag) { |
103 | case ACL_USER: | 111 | case ACL_USER: |
112 | entry->e_id = cpu_to_le32( | ||
113 | from_kuid(&init_user_ns, acl_e->e_uid)); | ||
114 | e += sizeof(ext2_acl_entry); | ||
115 | break; | ||
104 | case ACL_GROUP: | 116 | case ACL_GROUP: |
105 | entry->e_id = | 117 | entry->e_id = cpu_to_le32( |
106 | cpu_to_le32(acl->a_entries[n].e_id); | 118 | from_kgid(&init_user_ns, acl_e->e_gid)); |
107 | e += sizeof(ext2_acl_entry); | 119 | e += sizeof(ext2_acl_entry); |
108 | break; | 120 | break; |
109 | 121 | ||
@@ -350,7 +362,7 @@ ext2_xattr_get_acl(struct dentry *dentry, const char *name, void *buffer, | |||
350 | return PTR_ERR(acl); | 362 | return PTR_ERR(acl); |
351 | if (acl == NULL) | 363 | if (acl == NULL) |
352 | return -ENODATA; | 364 | return -ENODATA; |
353 | error = posix_acl_to_xattr(acl, buffer, size); | 365 | error = posix_acl_to_xattr(&init_user_ns, acl, buffer, size); |
354 | posix_acl_release(acl); | 366 | posix_acl_release(acl); |
355 | 367 | ||
356 | return error; | 368 | return error; |
@@ -371,7 +383,7 @@ ext2_xattr_set_acl(struct dentry *dentry, const char *name, const void *value, | |||
371 | return -EPERM; | 383 | return -EPERM; |
372 | 384 | ||
373 | if (value) { | 385 | if (value) { |
374 | acl = posix_acl_from_xattr(value, size); | 386 | acl = posix_acl_from_xattr(&init_user_ns, value, size); |
375 | if (IS_ERR(acl)) | 387 | if (IS_ERR(acl)) |
376 | return PTR_ERR(acl); | 388 | return PTR_ERR(acl); |
377 | else if (acl) { | 389 | else if (acl) { |
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c index 376aa77f3ca7..2616d0ea5c5c 100644 --- a/fs/ext2/balloc.c +++ b/fs/ext2/balloc.c | |||
@@ -479,7 +479,7 @@ void ext2_discard_reservation(struct inode *inode) | |||
479 | /** | 479 | /** |
480 | * ext2_free_blocks() -- Free given blocks and update quota and i_blocks | 480 | * ext2_free_blocks() -- Free given blocks and update quota and i_blocks |
481 | * @inode: inode | 481 | * @inode: inode |
482 | * @block: start physcial block to free | 482 | * @block: start physical block to free |
483 | * @count: number of blocks to free | 483 | * @count: number of blocks to free |
484 | */ | 484 | */ |
485 | void ext2_free_blocks (struct inode * inode, unsigned long block, | 485 | void ext2_free_blocks (struct inode * inode, unsigned long block, |
diff --git a/fs/ext2/super.c b/fs/ext2/super.c index af74d9e27b71..6c205d0c565b 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c | |||
@@ -206,6 +206,11 @@ static int init_inodecache(void) | |||
206 | 206 | ||
207 | static void destroy_inodecache(void) | 207 | static void destroy_inodecache(void) |
208 | { | 208 | { |
209 | /* | ||
210 | * Make sure all delayed rcu free inodes are flushed before we | ||
211 | * destroy cache. | ||
212 | */ | ||
213 | rcu_barrier(); | ||
209 | kmem_cache_destroy(ext2_inode_cachep); | 214 | kmem_cache_destroy(ext2_inode_cachep); |
210 | } | 215 | } |
211 | 216 | ||
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c index c76832c8d192..dbb5ad59a7fc 100644 --- a/fs/ext3/acl.c +++ b/fs/ext3/acl.c | |||
@@ -48,16 +48,23 @@ ext3_acl_from_disk(const void *value, size_t size) | |||
48 | case ACL_OTHER: | 48 | case ACL_OTHER: |
49 | value = (char *)value + | 49 | value = (char *)value + |
50 | sizeof(ext3_acl_entry_short); | 50 | sizeof(ext3_acl_entry_short); |
51 | acl->a_entries[n].e_id = ACL_UNDEFINED_ID; | ||
52 | break; | 51 | break; |
53 | 52 | ||
54 | case ACL_USER: | 53 | case ACL_USER: |
54 | value = (char *)value + sizeof(ext3_acl_entry); | ||
55 | if ((char *)value > end) | ||
56 | goto fail; | ||
57 | acl->a_entries[n].e_uid = | ||
58 | make_kuid(&init_user_ns, | ||
59 | le32_to_cpu(entry->e_id)); | ||
60 | break; | ||
55 | case ACL_GROUP: | 61 | case ACL_GROUP: |
56 | value = (char *)value + sizeof(ext3_acl_entry); | 62 | value = (char *)value + sizeof(ext3_acl_entry); |
57 | if ((char *)value > end) | 63 | if ((char *)value > end) |
58 | goto fail; | 64 | goto fail; |
59 | acl->a_entries[n].e_id = | 65 | acl->a_entries[n].e_gid = |
60 | le32_to_cpu(entry->e_id); | 66 | make_kgid(&init_user_ns, |
67 | le32_to_cpu(entry->e_id)); | ||
61 | break; | 68 | break; |
62 | 69 | ||
63 | default: | 70 | default: |
@@ -91,14 +98,19 @@ ext3_acl_to_disk(const struct posix_acl *acl, size_t *size) | |||
91 | ext_acl->a_version = cpu_to_le32(EXT3_ACL_VERSION); | 98 | ext_acl->a_version = cpu_to_le32(EXT3_ACL_VERSION); |
92 | e = (char *)ext_acl + sizeof(ext3_acl_header); | 99 | e = (char *)ext_acl + sizeof(ext3_acl_header); |
93 | for (n=0; n < acl->a_count; n++) { | 100 | for (n=0; n < acl->a_count; n++) { |
101 | const struct posix_acl_entry *acl_e = &acl->a_entries[n]; | ||
94 | ext3_acl_entry *entry = (ext3_acl_entry *)e; | 102 | ext3_acl_entry *entry = (ext3_acl_entry *)e; |
95 | entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag); | 103 | entry->e_tag = cpu_to_le16(acl_e->e_tag); |
96 | entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm); | 104 | entry->e_perm = cpu_to_le16(acl_e->e_perm); |
97 | switch(acl->a_entries[n].e_tag) { | 105 | switch(acl_e->e_tag) { |
98 | case ACL_USER: | 106 | case ACL_USER: |
107 | entry->e_id = cpu_to_le32( | ||
108 | from_kuid(&init_user_ns, acl_e->e_uid)); | ||
109 | e += sizeof(ext3_acl_entry); | ||
110 | break; | ||
99 | case ACL_GROUP: | 111 | case ACL_GROUP: |
100 | entry->e_id = | 112 | entry->e_id = cpu_to_le32( |
101 | cpu_to_le32(acl->a_entries[n].e_id); | 113 | from_kgid(&init_user_ns, acl_e->e_gid)); |
102 | e += sizeof(ext3_acl_entry); | 114 | e += sizeof(ext3_acl_entry); |
103 | break; | 115 | break; |
104 | 116 | ||
@@ -369,7 +381,7 @@ ext3_xattr_get_acl(struct dentry *dentry, const char *name, void *buffer, | |||
369 | return PTR_ERR(acl); | 381 | return PTR_ERR(acl); |
370 | if (acl == NULL) | 382 | if (acl == NULL) |
371 | return -ENODATA; | 383 | return -ENODATA; |
372 | error = posix_acl_to_xattr(acl, buffer, size); | 384 | error = posix_acl_to_xattr(&init_user_ns, acl, buffer, size); |
373 | posix_acl_release(acl); | 385 | posix_acl_release(acl); |
374 | 386 | ||
375 | return error; | 387 | return error; |
@@ -392,7 +404,7 @@ ext3_xattr_set_acl(struct dentry *dentry, const char *name, const void *value, | |||
392 | return -EPERM; | 404 | return -EPERM; |
393 | 405 | ||
394 | if (value) { | 406 | if (value) { |
395 | acl = posix_acl_from_xattr(value, size); | 407 | acl = posix_acl_from_xattr(&init_user_ns, value, size); |
396 | if (IS_ERR(acl)) | 408 | if (IS_ERR(acl)) |
397 | return PTR_ERR(acl); | 409 | return PTR_ERR(acl); |
398 | else if (acl) { | 410 | else if (acl) { |
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c index 90d901f0486b..7320a66e958f 100644 --- a/fs/ext3/balloc.c +++ b/fs/ext3/balloc.c | |||
@@ -483,7 +483,7 @@ void ext3_discard_reservation(struct inode *inode) | |||
483 | * ext3_free_blocks_sb() -- Free given blocks and update quota | 483 | * ext3_free_blocks_sb() -- Free given blocks and update quota |
484 | * @handle: handle to this transaction | 484 | * @handle: handle to this transaction |
485 | * @sb: super block | 485 | * @sb: super block |
486 | * @block: start physcial block to free | 486 | * @block: start physical block to free |
487 | * @count: number of blocks to free | 487 | * @count: number of blocks to free |
488 | * @pdquot_freed_blocks: pointer to quota | 488 | * @pdquot_freed_blocks: pointer to quota |
489 | */ | 489 | */ |
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index a07597307fd1..7e87e37a372a 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c | |||
@@ -3072,6 +3072,8 @@ static int ext3_do_update_inode(handle_t *handle, | |||
3072 | struct ext3_inode_info *ei = EXT3_I(inode); | 3072 | struct ext3_inode_info *ei = EXT3_I(inode); |
3073 | struct buffer_head *bh = iloc->bh; | 3073 | struct buffer_head *bh = iloc->bh; |
3074 | int err = 0, rc, block; | 3074 | int err = 0, rc, block; |
3075 | int need_datasync = 0; | ||
3076 | __le32 disksize; | ||
3075 | uid_t i_uid; | 3077 | uid_t i_uid; |
3076 | gid_t i_gid; | 3078 | gid_t i_gid; |
3077 | 3079 | ||
@@ -3113,7 +3115,11 @@ again: | |||
3113 | raw_inode->i_gid_high = 0; | 3115 | raw_inode->i_gid_high = 0; |
3114 | } | 3116 | } |
3115 | raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); | 3117 | raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); |
3116 | raw_inode->i_size = cpu_to_le32(ei->i_disksize); | 3118 | disksize = cpu_to_le32(ei->i_disksize); |
3119 | if (disksize != raw_inode->i_size) { | ||
3120 | need_datasync = 1; | ||
3121 | raw_inode->i_size = disksize; | ||
3122 | } | ||
3117 | raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec); | 3123 | raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec); |
3118 | raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec); | 3124 | raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec); |
3119 | raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec); | 3125 | raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec); |
@@ -3129,8 +3135,11 @@ again: | |||
3129 | if (!S_ISREG(inode->i_mode)) { | 3135 | if (!S_ISREG(inode->i_mode)) { |
3130 | raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl); | 3136 | raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl); |
3131 | } else { | 3137 | } else { |
3132 | raw_inode->i_size_high = | 3138 | disksize = cpu_to_le32(ei->i_disksize >> 32); |
3133 | cpu_to_le32(ei->i_disksize >> 32); | 3139 | if (disksize != raw_inode->i_size_high) { |
3140 | raw_inode->i_size_high = disksize; | ||
3141 | need_datasync = 1; | ||
3142 | } | ||
3134 | if (ei->i_disksize > 0x7fffffffULL) { | 3143 | if (ei->i_disksize > 0x7fffffffULL) { |
3135 | struct super_block *sb = inode->i_sb; | 3144 | struct super_block *sb = inode->i_sb; |
3136 | if (!EXT3_HAS_RO_COMPAT_FEATURE(sb, | 3145 | if (!EXT3_HAS_RO_COMPAT_FEATURE(sb, |
@@ -3183,6 +3192,8 @@ again: | |||
3183 | ext3_clear_inode_state(inode, EXT3_STATE_NEW); | 3192 | ext3_clear_inode_state(inode, EXT3_STATE_NEW); |
3184 | 3193 | ||
3185 | atomic_set(&ei->i_sync_tid, handle->h_transaction->t_tid); | 3194 | atomic_set(&ei->i_sync_tid, handle->h_transaction->t_tid); |
3195 | if (need_datasync) | ||
3196 | atomic_set(&ei->i_datasync_tid, handle->h_transaction->t_tid); | ||
3186 | out_brelse: | 3197 | out_brelse: |
3187 | brelse (bh); | 3198 | brelse (bh); |
3188 | ext3_std_error(inode->i_sb, err); | 3199 | ext3_std_error(inode->i_sb, err); |
@@ -3196,7 +3207,7 @@ out_brelse: | |||
3196 | * | 3207 | * |
3197 | * - Within generic_file_write() for O_SYNC files. | 3208 | * - Within generic_file_write() for O_SYNC files. |
3198 | * Here, there will be no transaction running. We wait for any running | 3209 | * Here, there will be no transaction running. We wait for any running |
3199 | * trasnaction to commit. | 3210 | * transaction to commit. |
3200 | * | 3211 | * |
3201 | * - Within sys_sync(), kupdate and such. | 3212 | * - Within sys_sync(), kupdate and such. |
3202 | * We wait on commit, if tol to. | 3213 | * We wait on commit, if tol to. |
diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 8c892e93d8e7..17ae5c83d234 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c | |||
@@ -532,6 +532,11 @@ static int init_inodecache(void) | |||
532 | 532 | ||
533 | static void destroy_inodecache(void) | 533 | static void destroy_inodecache(void) |
534 | { | 534 | { |
535 | /* | ||
536 | * Make sure all delayed rcu free inodes are flushed before we | ||
537 | * destroy cache. | ||
538 | */ | ||
539 | rcu_barrier(); | ||
535 | kmem_cache_destroy(ext3_inode_cachep); | 540 | kmem_cache_destroy(ext3_inode_cachep); |
536 | } | 541 | } |
537 | 542 | ||
@@ -975,7 +980,7 @@ static int parse_options (char *options, struct super_block *sb, | |||
975 | * Initialize args struct so we know whether arg was | 980 | * Initialize args struct so we know whether arg was |
976 | * found; some options take optional arguments. | 981 | * found; some options take optional arguments. |
977 | */ | 982 | */ |
978 | args[0].to = args[0].from = 0; | 983 | args[0].to = args[0].from = NULL; |
979 | token = match_token(p, tokens, args); | 984 | token = match_token(p, tokens, args); |
980 | switch (token) { | 985 | switch (token) { |
981 | case Opt_bsd_df: | 986 | case Opt_bsd_df: |
@@ -1479,10 +1484,12 @@ static void ext3_orphan_cleanup (struct super_block * sb, | |||
1479 | } | 1484 | } |
1480 | 1485 | ||
1481 | if (EXT3_SB(sb)->s_mount_state & EXT3_ERROR_FS) { | 1486 | if (EXT3_SB(sb)->s_mount_state & EXT3_ERROR_FS) { |
1482 | if (es->s_last_orphan) | 1487 | /* don't clear list on RO mount w/ errors */ |
1488 | if (es->s_last_orphan && !(s_flags & MS_RDONLY)) { | ||
1483 | jbd_debug(1, "Errors on filesystem, " | 1489 | jbd_debug(1, "Errors on filesystem, " |
1484 | "clearing orphan list.\n"); | 1490 | "clearing orphan list.\n"); |
1485 | es->s_last_orphan = 0; | 1491 | es->s_last_orphan = 0; |
1492 | } | ||
1486 | jbd_debug(1, "Skipping orphan recovery on fs with errors.\n"); | 1493 | jbd_debug(1, "Skipping orphan recovery on fs with errors.\n"); |
1487 | return; | 1494 | return; |
1488 | } | 1495 | } |
@@ -2803,7 +2810,7 @@ static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf) | |||
2803 | 2810 | ||
2804 | static inline struct inode *dquot_to_inode(struct dquot *dquot) | 2811 | static inline struct inode *dquot_to_inode(struct dquot *dquot) |
2805 | { | 2812 | { |
2806 | return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type]; | 2813 | return sb_dqopt(dquot->dq_sb)->files[dquot->dq_id.type]; |
2807 | } | 2814 | } |
2808 | 2815 | ||
2809 | static int ext3_write_dquot(struct dquot *dquot) | 2816 | static int ext3_write_dquot(struct dquot *dquot) |
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index a5c29bb3b835..d3c5b88fd89f 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c | |||
@@ -55,16 +55,23 @@ ext4_acl_from_disk(const void *value, size_t size) | |||
55 | case ACL_OTHER: | 55 | case ACL_OTHER: |
56 | value = (char *)value + | 56 | value = (char *)value + |
57 | sizeof(ext4_acl_entry_short); | 57 | sizeof(ext4_acl_entry_short); |
58 | acl->a_entries[n].e_id = ACL_UNDEFINED_ID; | ||
59 | break; | 58 | break; |
60 | 59 | ||
61 | case ACL_USER: | 60 | case ACL_USER: |
61 | value = (char *)value + sizeof(ext4_acl_entry); | ||
62 | if ((char *)value > end) | ||
63 | goto fail; | ||
64 | acl->a_entries[n].e_uid = | ||
65 | make_kuid(&init_user_ns, | ||
66 | le32_to_cpu(entry->e_id)); | ||
67 | break; | ||
62 | case ACL_GROUP: | 68 | case ACL_GROUP: |
63 | value = (char *)value + sizeof(ext4_acl_entry); | 69 | value = (char *)value + sizeof(ext4_acl_entry); |
64 | if ((char *)value > end) | 70 | if ((char *)value > end) |
65 | goto fail; | 71 | goto fail; |
66 | acl->a_entries[n].e_id = | 72 | acl->a_entries[n].e_gid = |
67 | le32_to_cpu(entry->e_id); | 73 | make_kgid(&init_user_ns, |
74 | le32_to_cpu(entry->e_id)); | ||
68 | break; | 75 | break; |
69 | 76 | ||
70 | default: | 77 | default: |
@@ -98,13 +105,19 @@ ext4_acl_to_disk(const struct posix_acl *acl, size_t *size) | |||
98 | ext_acl->a_version = cpu_to_le32(EXT4_ACL_VERSION); | 105 | ext_acl->a_version = cpu_to_le32(EXT4_ACL_VERSION); |
99 | e = (char *)ext_acl + sizeof(ext4_acl_header); | 106 | e = (char *)ext_acl + sizeof(ext4_acl_header); |
100 | for (n = 0; n < acl->a_count; n++) { | 107 | for (n = 0; n < acl->a_count; n++) { |
108 | const struct posix_acl_entry *acl_e = &acl->a_entries[n]; | ||
101 | ext4_acl_entry *entry = (ext4_acl_entry *)e; | 109 | ext4_acl_entry *entry = (ext4_acl_entry *)e; |
102 | entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag); | 110 | entry->e_tag = cpu_to_le16(acl_e->e_tag); |
103 | entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm); | 111 | entry->e_perm = cpu_to_le16(acl_e->e_perm); |
104 | switch (acl->a_entries[n].e_tag) { | 112 | switch (acl_e->e_tag) { |
105 | case ACL_USER: | 113 | case ACL_USER: |
114 | entry->e_id = cpu_to_le32( | ||
115 | from_kuid(&init_user_ns, acl_e->e_uid)); | ||
116 | e += sizeof(ext4_acl_entry); | ||
117 | break; | ||
106 | case ACL_GROUP: | 118 | case ACL_GROUP: |
107 | entry->e_id = cpu_to_le32(acl->a_entries[n].e_id); | 119 | entry->e_id = cpu_to_le32( |
120 | from_kgid(&init_user_ns, acl_e->e_gid)); | ||
108 | e += sizeof(ext4_acl_entry); | 121 | e += sizeof(ext4_acl_entry); |
109 | break; | 122 | break; |
110 | 123 | ||
@@ -374,7 +387,7 @@ ext4_xattr_get_acl(struct dentry *dentry, const char *name, void *buffer, | |||
374 | return PTR_ERR(acl); | 387 | return PTR_ERR(acl); |
375 | if (acl == NULL) | 388 | if (acl == NULL) |
376 | return -ENODATA; | 389 | return -ENODATA; |
377 | error = posix_acl_to_xattr(acl, buffer, size); | 390 | error = posix_acl_to_xattr(&init_user_ns, acl, buffer, size); |
378 | posix_acl_release(acl); | 391 | posix_acl_release(acl); |
379 | 392 | ||
380 | return error; | 393 | return error; |
@@ -397,7 +410,7 @@ ext4_xattr_set_acl(struct dentry *dentry, const char *name, const void *value, | |||
397 | return -EPERM; | 410 | return -EPERM; |
398 | 411 | ||
399 | if (value) { | 412 | if (value) { |
400 | acl = posix_acl_from_xattr(value, size); | 413 | acl = posix_acl_from_xattr(&init_user_ns, value, size); |
401 | if (IS_ERR(acl)) | 414 | if (IS_ERR(acl)) |
402 | return PTR_ERR(acl); | 415 | return PTR_ERR(acl); |
403 | else if (acl) { | 416 | else if (acl) { |
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index d23b31ca9d7a..1b5089067d01 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
@@ -280,14 +280,18 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, | |||
280 | return desc; | 280 | return desc; |
281 | } | 281 | } |
282 | 282 | ||
283 | static int ext4_valid_block_bitmap(struct super_block *sb, | 283 | /* |
284 | struct ext4_group_desc *desc, | 284 | * Return the block number which was discovered to be invalid, or 0 if |
285 | unsigned int block_group, | 285 | * the block bitmap is valid. |
286 | struct buffer_head *bh) | 286 | */ |
287 | static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb, | ||
288 | struct ext4_group_desc *desc, | ||
289 | unsigned int block_group, | ||
290 | struct buffer_head *bh) | ||
287 | { | 291 | { |
288 | ext4_grpblk_t offset; | 292 | ext4_grpblk_t offset; |
289 | ext4_grpblk_t next_zero_bit; | 293 | ext4_grpblk_t next_zero_bit; |
290 | ext4_fsblk_t bitmap_blk; | 294 | ext4_fsblk_t blk; |
291 | ext4_fsblk_t group_first_block; | 295 | ext4_fsblk_t group_first_block; |
292 | 296 | ||
293 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) { | 297 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) { |
@@ -297,37 +301,33 @@ static int ext4_valid_block_bitmap(struct super_block *sb, | |||
297 | * or it has to also read the block group where the bitmaps | 301 | * or it has to also read the block group where the bitmaps |
298 | * are located to verify they are set. | 302 | * are located to verify they are set. |
299 | */ | 303 | */ |
300 | return 1; | 304 | return 0; |
301 | } | 305 | } |
302 | group_first_block = ext4_group_first_block_no(sb, block_group); | 306 | group_first_block = ext4_group_first_block_no(sb, block_group); |
303 | 307 | ||
304 | /* check whether block bitmap block number is set */ | 308 | /* check whether block bitmap block number is set */ |
305 | bitmap_blk = ext4_block_bitmap(sb, desc); | 309 | blk = ext4_block_bitmap(sb, desc); |
306 | offset = bitmap_blk - group_first_block; | 310 | offset = blk - group_first_block; |
307 | if (!ext4_test_bit(offset, bh->b_data)) | 311 | if (!ext4_test_bit(offset, bh->b_data)) |
308 | /* bad block bitmap */ | 312 | /* bad block bitmap */ |
309 | goto err_out; | 313 | return blk; |
310 | 314 | ||
311 | /* check whether the inode bitmap block number is set */ | 315 | /* check whether the inode bitmap block number is set */ |
312 | bitmap_blk = ext4_inode_bitmap(sb, desc); | 316 | blk = ext4_inode_bitmap(sb, desc); |
313 | offset = bitmap_blk - group_first_block; | 317 | offset = blk - group_first_block; |
314 | if (!ext4_test_bit(offset, bh->b_data)) | 318 | if (!ext4_test_bit(offset, bh->b_data)) |
315 | /* bad block bitmap */ | 319 | /* bad block bitmap */ |
316 | goto err_out; | 320 | return blk; |
317 | 321 | ||
318 | /* check whether the inode table block number is set */ | 322 | /* check whether the inode table block number is set */ |
319 | bitmap_blk = ext4_inode_table(sb, desc); | 323 | blk = ext4_inode_table(sb, desc); |
320 | offset = bitmap_blk - group_first_block; | 324 | offset = blk - group_first_block; |
321 | next_zero_bit = ext4_find_next_zero_bit(bh->b_data, | 325 | next_zero_bit = ext4_find_next_zero_bit(bh->b_data, |
322 | offset + EXT4_SB(sb)->s_itb_per_group, | 326 | offset + EXT4_SB(sb)->s_itb_per_group, |
323 | offset); | 327 | offset); |
324 | if (next_zero_bit >= offset + EXT4_SB(sb)->s_itb_per_group) | 328 | if (next_zero_bit < offset + EXT4_SB(sb)->s_itb_per_group) |
325 | /* good bitmap for inode tables */ | 329 | /* bad bitmap for inode tables */ |
326 | return 1; | 330 | return blk; |
327 | |||
328 | err_out: | ||
329 | ext4_error(sb, "Invalid block bitmap - block_group = %d, block = %llu", | ||
330 | block_group, bitmap_blk); | ||
331 | return 0; | 331 | return 0; |
332 | } | 332 | } |
333 | 333 | ||
@@ -336,14 +336,26 @@ void ext4_validate_block_bitmap(struct super_block *sb, | |||
336 | unsigned int block_group, | 336 | unsigned int block_group, |
337 | struct buffer_head *bh) | 337 | struct buffer_head *bh) |
338 | { | 338 | { |
339 | ext4_fsblk_t blk; | ||
340 | |||
339 | if (buffer_verified(bh)) | 341 | if (buffer_verified(bh)) |
340 | return; | 342 | return; |
341 | 343 | ||
342 | ext4_lock_group(sb, block_group); | 344 | ext4_lock_group(sb, block_group); |
343 | if (ext4_valid_block_bitmap(sb, desc, block_group, bh) && | 345 | blk = ext4_valid_block_bitmap(sb, desc, block_group, bh); |
344 | ext4_block_bitmap_csum_verify(sb, block_group, desc, bh, | 346 | if (unlikely(blk != 0)) { |
345 | EXT4_BLOCKS_PER_GROUP(sb) / 8)) | 347 | ext4_unlock_group(sb, block_group); |
346 | set_buffer_verified(bh); | 348 | ext4_error(sb, "bg %u: block %llu: invalid block bitmap", |
349 | block_group, blk); | ||
350 | return; | ||
351 | } | ||
352 | if (unlikely(!ext4_block_bitmap_csum_verify(sb, block_group, | ||
353 | desc, bh, EXT4_BLOCKS_PER_GROUP(sb) / 8))) { | ||
354 | ext4_unlock_group(sb, block_group); | ||
355 | ext4_error(sb, "bg %u: bad block bitmap checksum", block_group); | ||
356 | return; | ||
357 | } | ||
358 | set_buffer_verified(bh); | ||
347 | ext4_unlock_group(sb, block_group); | 359 | ext4_unlock_group(sb, block_group); |
348 | } | 360 | } |
349 | 361 | ||
diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c index f8716eab9995..5c2d1813ebe9 100644 --- a/fs/ext4/bitmap.c +++ b/fs/ext4/bitmap.c | |||
@@ -79,7 +79,6 @@ int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group, | |||
79 | if (provided == calculated) | 79 | if (provided == calculated) |
80 | return 1; | 80 | return 1; |
81 | 81 | ||
82 | ext4_error(sb, "Bad block bitmap checksum: block_group = %u", group); | ||
83 | return 0; | 82 | return 0; |
84 | } | 83 | } |
85 | 84 | ||
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index c3411d4ce2da..3ab2539b7b2e 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -186,7 +186,6 @@ struct mpage_da_data { | |||
186 | #define EXT4_IO_END_ERROR 0x0002 | 186 | #define EXT4_IO_END_ERROR 0x0002 |
187 | #define EXT4_IO_END_QUEUED 0x0004 | 187 | #define EXT4_IO_END_QUEUED 0x0004 |
188 | #define EXT4_IO_END_DIRECT 0x0008 | 188 | #define EXT4_IO_END_DIRECT 0x0008 |
189 | #define EXT4_IO_END_IN_FSYNC 0x0010 | ||
190 | 189 | ||
191 | struct ext4_io_page { | 190 | struct ext4_io_page { |
192 | struct page *p_page; | 191 | struct page *p_page; |
@@ -912,9 +911,7 @@ struct ext4_inode_info { | |||
912 | struct list_head i_completed_io_list; | 911 | struct list_head i_completed_io_list; |
913 | spinlock_t i_completed_io_lock; | 912 | spinlock_t i_completed_io_lock; |
914 | atomic_t i_ioend_count; /* Number of outstanding io_end structs */ | 913 | atomic_t i_ioend_count; /* Number of outstanding io_end structs */ |
915 | /* current io_end structure for async DIO write*/ | 914 | atomic_t i_unwritten; /* Nr. of inflight conversions pending */ |
916 | ext4_io_end_t *cur_aio_dio; | ||
917 | atomic_t i_aiodio_unwritten; /* Nr. of inflight conversions pending */ | ||
918 | 915 | ||
919 | spinlock_t i_block_reservation_lock; | 916 | spinlock_t i_block_reservation_lock; |
920 | 917 | ||
@@ -1233,6 +1230,7 @@ struct ext4_sb_info { | |||
1233 | spinlock_t s_md_lock; | 1230 | spinlock_t s_md_lock; |
1234 | unsigned short *s_mb_offsets; | 1231 | unsigned short *s_mb_offsets; |
1235 | unsigned int *s_mb_maxs; | 1232 | unsigned int *s_mb_maxs; |
1233 | unsigned int s_group_info_size; | ||
1236 | 1234 | ||
1237 | /* tunables */ | 1235 | /* tunables */ |
1238 | unsigned long s_stripe; | 1236 | unsigned long s_stripe; |
@@ -1243,6 +1241,7 @@ struct ext4_sb_info { | |||
1243 | unsigned int s_mb_order2_reqs; | 1241 | unsigned int s_mb_order2_reqs; |
1244 | unsigned int s_mb_group_prealloc; | 1242 | unsigned int s_mb_group_prealloc; |
1245 | unsigned int s_max_writeback_mb_bump; | 1243 | unsigned int s_max_writeback_mb_bump; |
1244 | unsigned int s_max_dir_size_kb; | ||
1246 | /* where last allocation was done - for stream allocation */ | 1245 | /* where last allocation was done - for stream allocation */ |
1247 | unsigned long s_mb_last_group; | 1246 | unsigned long s_mb_last_group; |
1248 | unsigned long s_mb_last_start; | 1247 | unsigned long s_mb_last_start; |
@@ -1270,8 +1269,12 @@ struct ext4_sb_info { | |||
1270 | unsigned long s_sectors_written_start; | 1269 | unsigned long s_sectors_written_start; |
1271 | u64 s_kbytes_written; | 1270 | u64 s_kbytes_written; |
1272 | 1271 | ||
1272 | /* the size of zero-out chunk */ | ||
1273 | unsigned int s_extent_max_zeroout_kb; | ||
1274 | |||
1273 | unsigned int s_log_groups_per_flex; | 1275 | unsigned int s_log_groups_per_flex; |
1274 | struct flex_groups *s_flex_groups; | 1276 | struct flex_groups *s_flex_groups; |
1277 | ext4_group_t s_flex_groups_allocated; | ||
1275 | 1278 | ||
1276 | /* workqueue for dio unwritten */ | 1279 | /* workqueue for dio unwritten */ |
1277 | struct workqueue_struct *dio_unwritten_wq; | 1280 | struct workqueue_struct *dio_unwritten_wq; |
@@ -1328,10 +1331,20 @@ static inline void ext4_set_io_unwritten_flag(struct inode *inode, | |||
1328 | { | 1331 | { |
1329 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { | 1332 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { |
1330 | io_end->flag |= EXT4_IO_END_UNWRITTEN; | 1333 | io_end->flag |= EXT4_IO_END_UNWRITTEN; |
1331 | atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten); | 1334 | atomic_inc(&EXT4_I(inode)->i_unwritten); |
1332 | } | 1335 | } |
1333 | } | 1336 | } |
1334 | 1337 | ||
1338 | static inline ext4_io_end_t *ext4_inode_aio(struct inode *inode) | ||
1339 | { | ||
1340 | return inode->i_private; | ||
1341 | } | ||
1342 | |||
1343 | static inline void ext4_inode_aio_set(struct inode *inode, ext4_io_end_t *io) | ||
1344 | { | ||
1345 | inode->i_private = io; | ||
1346 | } | ||
1347 | |||
1335 | /* | 1348 | /* |
1336 | * Inode dynamic state flags | 1349 | * Inode dynamic state flags |
1337 | */ | 1350 | */ |
@@ -1345,6 +1358,8 @@ enum { | |||
1345 | EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/ | 1358 | EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/ |
1346 | EXT4_STATE_NEWENTRY, /* File just added to dir */ | 1359 | EXT4_STATE_NEWENTRY, /* File just added to dir */ |
1347 | EXT4_STATE_DELALLOC_RESERVED, /* blks already reserved for delalloc */ | 1360 | EXT4_STATE_DELALLOC_RESERVED, /* blks already reserved for delalloc */ |
1361 | EXT4_STATE_DIOREAD_LOCK, /* Disable support for dio read | ||
1362 | nolocking */ | ||
1348 | }; | 1363 | }; |
1349 | 1364 | ||
1350 | #define EXT4_INODE_BIT_FNS(name, field, offset) \ | 1365 | #define EXT4_INODE_BIT_FNS(name, field, offset) \ |
@@ -1932,7 +1947,7 @@ extern void ext4_htree_free_dir_info(struct dir_private_info *p); | |||
1932 | 1947 | ||
1933 | /* fsync.c */ | 1948 | /* fsync.c */ |
1934 | extern int ext4_sync_file(struct file *, loff_t, loff_t, int); | 1949 | extern int ext4_sync_file(struct file *, loff_t, loff_t, int); |
1935 | extern int ext4_flush_completed_IO(struct inode *); | 1950 | extern int ext4_flush_unwritten_io(struct inode *); |
1936 | 1951 | ||
1937 | /* hash.c */ | 1952 | /* hash.c */ |
1938 | extern int ext4fs_dirhash(const char *name, int len, struct | 1953 | extern int ext4fs_dirhash(const char *name, int len, struct |
@@ -1966,6 +1981,8 @@ extern void ext4_exit_mballoc(void); | |||
1966 | extern void ext4_free_blocks(handle_t *handle, struct inode *inode, | 1981 | extern void ext4_free_blocks(handle_t *handle, struct inode *inode, |
1967 | struct buffer_head *bh, ext4_fsblk_t block, | 1982 | struct buffer_head *bh, ext4_fsblk_t block, |
1968 | unsigned long count, int flags); | 1983 | unsigned long count, int flags); |
1984 | extern int ext4_mb_alloc_groupinfo(struct super_block *sb, | ||
1985 | ext4_group_t ngroups); | ||
1969 | extern int ext4_mb_add_groupinfo(struct super_block *sb, | 1986 | extern int ext4_mb_add_groupinfo(struct super_block *sb, |
1970 | ext4_group_t i, struct ext4_group_desc *desc); | 1987 | ext4_group_t i, struct ext4_group_desc *desc); |
1971 | extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, | 1988 | extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, |
@@ -2051,6 +2068,8 @@ extern void ext4_superblock_csum_set(struct super_block *sb, | |||
2051 | extern void *ext4_kvmalloc(size_t size, gfp_t flags); | 2068 | extern void *ext4_kvmalloc(size_t size, gfp_t flags); |
2052 | extern void *ext4_kvzalloc(size_t size, gfp_t flags); | 2069 | extern void *ext4_kvzalloc(size_t size, gfp_t flags); |
2053 | extern void ext4_kvfree(void *ptr); | 2070 | extern void ext4_kvfree(void *ptr); |
2071 | extern int ext4_alloc_flex_bg_array(struct super_block *sb, | ||
2072 | ext4_group_t ngroup); | ||
2054 | extern __printf(4, 5) | 2073 | extern __printf(4, 5) |
2055 | void __ext4_error(struct super_block *, const char *, unsigned int, | 2074 | void __ext4_error(struct super_block *, const char *, unsigned int, |
2056 | const char *, ...); | 2075 | const char *, ...); |
@@ -2352,6 +2371,7 @@ extern const struct file_operations ext4_dir_operations; | |||
2352 | extern const struct inode_operations ext4_file_inode_operations; | 2371 | extern const struct inode_operations ext4_file_inode_operations; |
2353 | extern const struct file_operations ext4_file_operations; | 2372 | extern const struct file_operations ext4_file_operations; |
2354 | extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin); | 2373 | extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin); |
2374 | extern void ext4_unwritten_wait(struct inode *inode); | ||
2355 | 2375 | ||
2356 | /* namei.c */ | 2376 | /* namei.c */ |
2357 | extern const struct inode_operations ext4_dir_inode_operations; | 2377 | extern const struct inode_operations ext4_dir_inode_operations; |
@@ -2400,11 +2420,11 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
2400 | 2420 | ||
2401 | /* page-io.c */ | 2421 | /* page-io.c */ |
2402 | extern int __init ext4_init_pageio(void); | 2422 | extern int __init ext4_init_pageio(void); |
2423 | extern void ext4_add_complete_io(ext4_io_end_t *io_end); | ||
2403 | extern void ext4_exit_pageio(void); | 2424 | extern void ext4_exit_pageio(void); |
2404 | extern void ext4_ioend_wait(struct inode *); | 2425 | extern void ext4_ioend_wait(struct inode *); |
2405 | extern void ext4_free_io_end(ext4_io_end_t *io); | 2426 | extern void ext4_free_io_end(ext4_io_end_t *io); |
2406 | extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags); | 2427 | extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags); |
2407 | extern int ext4_end_io_nolock(ext4_io_end_t *io); | ||
2408 | extern void ext4_io_submit(struct ext4_io_submit *io); | 2428 | extern void ext4_io_submit(struct ext4_io_submit *io); |
2409 | extern int ext4_bio_write_page(struct ext4_io_submit *io, | 2429 | extern int ext4_bio_write_page(struct ext4_io_submit *io, |
2410 | struct page *page, | 2430 | struct page *page, |
@@ -2452,6 +2472,21 @@ static inline void set_bitmap_uptodate(struct buffer_head *bh) | |||
2452 | set_bit(BH_BITMAP_UPTODATE, &(bh)->b_state); | 2472 | set_bit(BH_BITMAP_UPTODATE, &(bh)->b_state); |
2453 | } | 2473 | } |
2454 | 2474 | ||
2475 | /* | ||
2476 | * Disable DIO read nolock optimization, so new dioreaders will be forced | ||
2477 | * to grab i_mutex | ||
2478 | */ | ||
2479 | static inline void ext4_inode_block_unlocked_dio(struct inode *inode) | ||
2480 | { | ||
2481 | ext4_set_inode_state(inode, EXT4_STATE_DIOREAD_LOCK); | ||
2482 | smp_mb(); | ||
2483 | } | ||
2484 | static inline void ext4_inode_resume_unlocked_dio(struct inode *inode) | ||
2485 | { | ||
2486 | smp_mb(); | ||
2487 | ext4_clear_inode_state(inode, EXT4_STATE_DIOREAD_LOCK); | ||
2488 | } | ||
2489 | |||
2455 | #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) | 2490 | #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) |
2456 | 2491 | ||
2457 | /* For ioend & aio unwritten conversion wait queues */ | 2492 | /* For ioend & aio unwritten conversion wait queues */ |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index cd0c7ed06772..1c94cca35ed1 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -1177,7 +1177,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, | |||
1177 | le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block), | 1177 | le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block), |
1178 | ext4_idx_pblock(EXT_FIRST_INDEX(neh))); | 1178 | ext4_idx_pblock(EXT_FIRST_INDEX(neh))); |
1179 | 1179 | ||
1180 | neh->eh_depth = cpu_to_le16(le16_to_cpu(neh->eh_depth) + 1); | 1180 | le16_add_cpu(&neh->eh_depth, 1); |
1181 | ext4_mark_inode_dirty(handle, inode); | 1181 | ext4_mark_inode_dirty(handle, inode); |
1182 | out: | 1182 | out: |
1183 | brelse(bh); | 1183 | brelse(bh); |
@@ -1656,16 +1656,60 @@ static int ext4_ext_try_to_merge_right(struct inode *inode, | |||
1656 | } | 1656 | } |
1657 | 1657 | ||
1658 | /* | 1658 | /* |
1659 | * This function does a very simple check to see if we can collapse | ||
1660 | * an extent tree with a single extent tree leaf block into the inode. | ||
1661 | */ | ||
1662 | static void ext4_ext_try_to_merge_up(handle_t *handle, | ||
1663 | struct inode *inode, | ||
1664 | struct ext4_ext_path *path) | ||
1665 | { | ||
1666 | size_t s; | ||
1667 | unsigned max_root = ext4_ext_space_root(inode, 0); | ||
1668 | ext4_fsblk_t blk; | ||
1669 | |||
1670 | if ((path[0].p_depth != 1) || | ||
1671 | (le16_to_cpu(path[0].p_hdr->eh_entries) != 1) || | ||
1672 | (le16_to_cpu(path[1].p_hdr->eh_entries) > max_root)) | ||
1673 | return; | ||
1674 | |||
1675 | /* | ||
1676 | * We need to modify the block allocation bitmap and the block | ||
1677 | * group descriptor to release the extent tree block. If we | ||
1678 | * can't get the journal credits, give up. | ||
1679 | */ | ||
1680 | if (ext4_journal_extend(handle, 2)) | ||
1681 | return; | ||
1682 | |||
1683 | /* | ||
1684 | * Copy the extent data up to the inode | ||
1685 | */ | ||
1686 | blk = ext4_idx_pblock(path[0].p_idx); | ||
1687 | s = le16_to_cpu(path[1].p_hdr->eh_entries) * | ||
1688 | sizeof(struct ext4_extent_idx); | ||
1689 | s += sizeof(struct ext4_extent_header); | ||
1690 | |||
1691 | memcpy(path[0].p_hdr, path[1].p_hdr, s); | ||
1692 | path[0].p_depth = 0; | ||
1693 | path[0].p_ext = EXT_FIRST_EXTENT(path[0].p_hdr) + | ||
1694 | (path[1].p_ext - EXT_FIRST_EXTENT(path[1].p_hdr)); | ||
1695 | path[0].p_hdr->eh_max = cpu_to_le16(max_root); | ||
1696 | |||
1697 | brelse(path[1].p_bh); | ||
1698 | ext4_free_blocks(handle, inode, NULL, blk, 1, | ||
1699 | EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); | ||
1700 | } | ||
1701 | |||
1702 | /* | ||
1659 | * This function tries to merge the @ex extent to neighbours in the tree. | 1703 | * This function tries to merge the @ex extent to neighbours in the tree. |
1660 | * return 1 if merge left else 0. | 1704 | * return 1 if merge left else 0. |
1661 | */ | 1705 | */ |
1662 | static int ext4_ext_try_to_merge(struct inode *inode, | 1706 | static void ext4_ext_try_to_merge(handle_t *handle, |
1707 | struct inode *inode, | ||
1663 | struct ext4_ext_path *path, | 1708 | struct ext4_ext_path *path, |
1664 | struct ext4_extent *ex) { | 1709 | struct ext4_extent *ex) { |
1665 | struct ext4_extent_header *eh; | 1710 | struct ext4_extent_header *eh; |
1666 | unsigned int depth; | 1711 | unsigned int depth; |
1667 | int merge_done = 0; | 1712 | int merge_done = 0; |
1668 | int ret = 0; | ||
1669 | 1713 | ||
1670 | depth = ext_depth(inode); | 1714 | depth = ext_depth(inode); |
1671 | BUG_ON(path[depth].p_hdr == NULL); | 1715 | BUG_ON(path[depth].p_hdr == NULL); |
@@ -1675,9 +1719,9 @@ static int ext4_ext_try_to_merge(struct inode *inode, | |||
1675 | merge_done = ext4_ext_try_to_merge_right(inode, path, ex - 1); | 1719 | merge_done = ext4_ext_try_to_merge_right(inode, path, ex - 1); |
1676 | 1720 | ||
1677 | if (!merge_done) | 1721 | if (!merge_done) |
1678 | ret = ext4_ext_try_to_merge_right(inode, path, ex); | 1722 | (void) ext4_ext_try_to_merge_right(inode, path, ex); |
1679 | 1723 | ||
1680 | return ret; | 1724 | ext4_ext_try_to_merge_up(handle, inode, path); |
1681 | } | 1725 | } |
1682 | 1726 | ||
1683 | /* | 1727 | /* |
@@ -1893,7 +1937,7 @@ has_space: | |||
1893 | merge: | 1937 | merge: |
1894 | /* try to merge extents */ | 1938 | /* try to merge extents */ |
1895 | if (!(flag & EXT4_GET_BLOCKS_PRE_IO)) | 1939 | if (!(flag & EXT4_GET_BLOCKS_PRE_IO)) |
1896 | ext4_ext_try_to_merge(inode, path, nearex); | 1940 | ext4_ext_try_to_merge(handle, inode, path, nearex); |
1897 | 1941 | ||
1898 | 1942 | ||
1899 | /* time to correct all indexes above */ | 1943 | /* time to correct all indexes above */ |
@@ -1901,7 +1945,7 @@ merge: | |||
1901 | if (err) | 1945 | if (err) |
1902 | goto cleanup; | 1946 | goto cleanup; |
1903 | 1947 | ||
1904 | err = ext4_ext_dirty(handle, inode, path + depth); | 1948 | err = ext4_ext_dirty(handle, inode, path + path->p_depth); |
1905 | 1949 | ||
1906 | cleanup: | 1950 | cleanup: |
1907 | if (npath) { | 1951 | if (npath) { |
@@ -2092,13 +2136,10 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path, | |||
2092 | } | 2136 | } |
2093 | 2137 | ||
2094 | /* | 2138 | /* |
2095 | * ext4_ext_check_cache() | 2139 | * ext4_ext_in_cache() |
2096 | * Checks to see if the given block is in the cache. | 2140 | * Checks to see if the given block is in the cache. |
2097 | * If it is, the cached extent is stored in the given | 2141 | * If it is, the cached extent is stored in the given |
2098 | * cache extent pointer. If the cached extent is a hole, | 2142 | * cache extent pointer. |
2099 | * this routine should be used instead of | ||
2100 | * ext4_ext_in_cache if the calling function needs to | ||
2101 | * know the size of the hole. | ||
2102 | * | 2143 | * |
2103 | * @inode: The files inode | 2144 | * @inode: The files inode |
2104 | * @block: The block to look for in the cache | 2145 | * @block: The block to look for in the cache |
@@ -2107,8 +2148,10 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path, | |||
2107 | * | 2148 | * |
2108 | * Return 0 if cache is invalid; 1 if the cache is valid | 2149 | * Return 0 if cache is invalid; 1 if the cache is valid |
2109 | */ | 2150 | */ |
2110 | static int ext4_ext_check_cache(struct inode *inode, ext4_lblk_t block, | 2151 | static int |
2111 | struct ext4_ext_cache *ex){ | 2152 | ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, |
2153 | struct ext4_extent *ex) | ||
2154 | { | ||
2112 | struct ext4_ext_cache *cex; | 2155 | struct ext4_ext_cache *cex; |
2113 | struct ext4_sb_info *sbi; | 2156 | struct ext4_sb_info *sbi; |
2114 | int ret = 0; | 2157 | int ret = 0; |
@@ -2125,7 +2168,9 @@ static int ext4_ext_check_cache(struct inode *inode, ext4_lblk_t block, | |||
2125 | goto errout; | 2168 | goto errout; |
2126 | 2169 | ||
2127 | if (in_range(block, cex->ec_block, cex->ec_len)) { | 2170 | if (in_range(block, cex->ec_block, cex->ec_len)) { |
2128 | memcpy(ex, cex, sizeof(struct ext4_ext_cache)); | 2171 | ex->ee_block = cpu_to_le32(cex->ec_block); |
2172 | ext4_ext_store_pblock(ex, cex->ec_start); | ||
2173 | ex->ee_len = cpu_to_le16(cex->ec_len); | ||
2129 | ext_debug("%u cached by %u:%u:%llu\n", | 2174 | ext_debug("%u cached by %u:%u:%llu\n", |
2130 | block, | 2175 | block, |
2131 | cex->ec_block, cex->ec_len, cex->ec_start); | 2176 | cex->ec_block, cex->ec_len, cex->ec_start); |
@@ -2138,37 +2183,6 @@ errout: | |||
2138 | } | 2183 | } |
2139 | 2184 | ||
2140 | /* | 2185 | /* |
2141 | * ext4_ext_in_cache() | ||
2142 | * Checks to see if the given block is in the cache. | ||
2143 | * If it is, the cached extent is stored in the given | ||
2144 | * extent pointer. | ||
2145 | * | ||
2146 | * @inode: The files inode | ||
2147 | * @block: The block to look for in the cache | ||
2148 | * @ex: Pointer where the cached extent will be stored | ||
2149 | * if it contains block | ||
2150 | * | ||
2151 | * Return 0 if cache is invalid; 1 if the cache is valid | ||
2152 | */ | ||
2153 | static int | ||
2154 | ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, | ||
2155 | struct ext4_extent *ex) | ||
2156 | { | ||
2157 | struct ext4_ext_cache cex; | ||
2158 | int ret = 0; | ||
2159 | |||
2160 | if (ext4_ext_check_cache(inode, block, &cex)) { | ||
2161 | ex->ee_block = cpu_to_le32(cex.ec_block); | ||
2162 | ext4_ext_store_pblock(ex, cex.ec_start); | ||
2163 | ex->ee_len = cpu_to_le16(cex.ec_len); | ||
2164 | ret = 1; | ||
2165 | } | ||
2166 | |||
2167 | return ret; | ||
2168 | } | ||
2169 | |||
2170 | |||
2171 | /* | ||
2172 | * ext4_ext_rm_idx: | 2186 | * ext4_ext_rm_idx: |
2173 | * removes index from the index block. | 2187 | * removes index from the index block. |
2174 | */ | 2188 | */ |
@@ -2274,10 +2288,13 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | |||
2274 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 2288 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
2275 | unsigned short ee_len = ext4_ext_get_actual_len(ex); | 2289 | unsigned short ee_len = ext4_ext_get_actual_len(ex); |
2276 | ext4_fsblk_t pblk; | 2290 | ext4_fsblk_t pblk; |
2277 | int flags = EXT4_FREE_BLOCKS_FORGET; | 2291 | int flags = 0; |
2278 | 2292 | ||
2279 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) | 2293 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) |
2280 | flags |= EXT4_FREE_BLOCKS_METADATA; | 2294 | flags |= EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET; |
2295 | else if (ext4_should_journal_data(inode)) | ||
2296 | flags |= EXT4_FREE_BLOCKS_FORGET; | ||
2297 | |||
2281 | /* | 2298 | /* |
2282 | * For bigalloc file systems, we never free a partial cluster | 2299 | * For bigalloc file systems, we never free a partial cluster |
2283 | * at the beginning of the extent. Instead, we make a note | 2300 | * at the beginning of the extent. Instead, we make a note |
@@ -2572,7 +2589,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, | |||
2572 | struct ext4_ext_path *path = NULL; | 2589 | struct ext4_ext_path *path = NULL; |
2573 | ext4_fsblk_t partial_cluster = 0; | 2590 | ext4_fsblk_t partial_cluster = 0; |
2574 | handle_t *handle; | 2591 | handle_t *handle; |
2575 | int i = 0, err; | 2592 | int i = 0, err = 0; |
2576 | 2593 | ||
2577 | ext_debug("truncate since %u to %u\n", start, end); | 2594 | ext_debug("truncate since %u to %u\n", start, end); |
2578 | 2595 | ||
@@ -2604,12 +2621,16 @@ again: | |||
2604 | return PTR_ERR(path); | 2621 | return PTR_ERR(path); |
2605 | } | 2622 | } |
2606 | depth = ext_depth(inode); | 2623 | depth = ext_depth(inode); |
2624 | /* Leaf not may not exist only if inode has no blocks at all */ | ||
2607 | ex = path[depth].p_ext; | 2625 | ex = path[depth].p_ext; |
2608 | if (!ex) { | 2626 | if (!ex) { |
2609 | ext4_ext_drop_refs(path); | 2627 | if (depth) { |
2610 | kfree(path); | 2628 | EXT4_ERROR_INODE(inode, |
2611 | path = NULL; | 2629 | "path[%d].p_hdr == NULL", |
2612 | goto cont; | 2630 | depth); |
2631 | err = -EIO; | ||
2632 | } | ||
2633 | goto out; | ||
2613 | } | 2634 | } |
2614 | 2635 | ||
2615 | ee_block = le32_to_cpu(ex->ee_block); | 2636 | ee_block = le32_to_cpu(ex->ee_block); |
@@ -2641,8 +2662,6 @@ again: | |||
2641 | goto out; | 2662 | goto out; |
2642 | } | 2663 | } |
2643 | } | 2664 | } |
2644 | cont: | ||
2645 | |||
2646 | /* | 2665 | /* |
2647 | * We start scanning from right side, freeing all the blocks | 2666 | * We start scanning from right side, freeing all the blocks |
2648 | * after i_size and walking into the tree depth-wise. | 2667 | * after i_size and walking into the tree depth-wise. |
@@ -2662,6 +2681,7 @@ cont: | |||
2662 | } | 2681 | } |
2663 | path[0].p_depth = depth; | 2682 | path[0].p_depth = depth; |
2664 | path[0].p_hdr = ext_inode_hdr(inode); | 2683 | path[0].p_hdr = ext_inode_hdr(inode); |
2684 | i = 0; | ||
2665 | 2685 | ||
2666 | if (ext4_ext_check(inode, path[0].p_hdr, depth)) { | 2686 | if (ext4_ext_check(inode, path[0].p_hdr, depth)) { |
2667 | err = -EIO; | 2687 | err = -EIO; |
@@ -2923,9 +2943,9 @@ static int ext4_split_extent_at(handle_t *handle, | |||
2923 | ext4_ext_mark_initialized(ex); | 2943 | ext4_ext_mark_initialized(ex); |
2924 | 2944 | ||
2925 | if (!(flags & EXT4_GET_BLOCKS_PRE_IO)) | 2945 | if (!(flags & EXT4_GET_BLOCKS_PRE_IO)) |
2926 | ext4_ext_try_to_merge(inode, path, ex); | 2946 | ext4_ext_try_to_merge(handle, inode, path, ex); |
2927 | 2947 | ||
2928 | err = ext4_ext_dirty(handle, inode, path + depth); | 2948 | err = ext4_ext_dirty(handle, inode, path + path->p_depth); |
2929 | goto out; | 2949 | goto out; |
2930 | } | 2950 | } |
2931 | 2951 | ||
@@ -2957,8 +2977,8 @@ static int ext4_split_extent_at(handle_t *handle, | |||
2957 | goto fix_extent_len; | 2977 | goto fix_extent_len; |
2958 | /* update the extent length and mark as initialized */ | 2978 | /* update the extent length and mark as initialized */ |
2959 | ex->ee_len = cpu_to_le16(ee_len); | 2979 | ex->ee_len = cpu_to_le16(ee_len); |
2960 | ext4_ext_try_to_merge(inode, path, ex); | 2980 | ext4_ext_try_to_merge(handle, inode, path, ex); |
2961 | err = ext4_ext_dirty(handle, inode, path + depth); | 2981 | err = ext4_ext_dirty(handle, inode, path + path->p_depth); |
2962 | goto out; | 2982 | goto out; |
2963 | } else if (err) | 2983 | } else if (err) |
2964 | goto fix_extent_len; | 2984 | goto fix_extent_len; |
@@ -3040,7 +3060,6 @@ out: | |||
3040 | return err ? err : map->m_len; | 3060 | return err ? err : map->m_len; |
3041 | } | 3061 | } |
3042 | 3062 | ||
3043 | #define EXT4_EXT_ZERO_LEN 7 | ||
3044 | /* | 3063 | /* |
3045 | * This function is called by ext4_ext_map_blocks() if someone tries to write | 3064 | * This function is called by ext4_ext_map_blocks() if someone tries to write |
3046 | * to an uninitialized extent. It may result in splitting the uninitialized | 3065 | * to an uninitialized extent. It may result in splitting the uninitialized |
@@ -3066,13 +3085,14 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
3066 | struct ext4_map_blocks *map, | 3085 | struct ext4_map_blocks *map, |
3067 | struct ext4_ext_path *path) | 3086 | struct ext4_ext_path *path) |
3068 | { | 3087 | { |
3088 | struct ext4_sb_info *sbi; | ||
3069 | struct ext4_extent_header *eh; | 3089 | struct ext4_extent_header *eh; |
3070 | struct ext4_map_blocks split_map; | 3090 | struct ext4_map_blocks split_map; |
3071 | struct ext4_extent zero_ex; | 3091 | struct ext4_extent zero_ex; |
3072 | struct ext4_extent *ex; | 3092 | struct ext4_extent *ex; |
3073 | ext4_lblk_t ee_block, eof_block; | 3093 | ext4_lblk_t ee_block, eof_block; |
3074 | unsigned int ee_len, depth; | 3094 | unsigned int ee_len, depth; |
3075 | int allocated; | 3095 | int allocated, max_zeroout = 0; |
3076 | int err = 0; | 3096 | int err = 0; |
3077 | int split_flag = 0; | 3097 | int split_flag = 0; |
3078 | 3098 | ||
@@ -3080,6 +3100,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
3080 | "block %llu, max_blocks %u\n", inode->i_ino, | 3100 | "block %llu, max_blocks %u\n", inode->i_ino, |
3081 | (unsigned long long)map->m_lblk, map->m_len); | 3101 | (unsigned long long)map->m_lblk, map->m_len); |
3082 | 3102 | ||
3103 | sbi = EXT4_SB(inode->i_sb); | ||
3083 | eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> | 3104 | eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> |
3084 | inode->i_sb->s_blocksize_bits; | 3105 | inode->i_sb->s_blocksize_bits; |
3085 | if (eof_block < map->m_lblk + map->m_len) | 3106 | if (eof_block < map->m_lblk + map->m_len) |
@@ -3179,9 +3200,12 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
3179 | */ | 3200 | */ |
3180 | split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; | 3201 | split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; |
3181 | 3202 | ||
3182 | /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */ | 3203 | if (EXT4_EXT_MAY_ZEROOUT & split_flag) |
3183 | if (ee_len <= 2*EXT4_EXT_ZERO_LEN && | 3204 | max_zeroout = sbi->s_extent_max_zeroout_kb >> |
3184 | (EXT4_EXT_MAY_ZEROOUT & split_flag)) { | 3205 | inode->i_sb->s_blocksize_bits; |
3206 | |||
3207 | /* If extent is less than s_max_zeroout_kb, zeroout directly */ | ||
3208 | if (max_zeroout && (ee_len <= max_zeroout)) { | ||
3185 | err = ext4_ext_zeroout(inode, ex); | 3209 | err = ext4_ext_zeroout(inode, ex); |
3186 | if (err) | 3210 | if (err) |
3187 | goto out; | 3211 | goto out; |
@@ -3190,8 +3214,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
3190 | if (err) | 3214 | if (err) |
3191 | goto out; | 3215 | goto out; |
3192 | ext4_ext_mark_initialized(ex); | 3216 | ext4_ext_mark_initialized(ex); |
3193 | ext4_ext_try_to_merge(inode, path, ex); | 3217 | ext4_ext_try_to_merge(handle, inode, path, ex); |
3194 | err = ext4_ext_dirty(handle, inode, path + depth); | 3218 | err = ext4_ext_dirty(handle, inode, path + path->p_depth); |
3195 | goto out; | 3219 | goto out; |
3196 | } | 3220 | } |
3197 | 3221 | ||
@@ -3205,9 +3229,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
3205 | split_map.m_lblk = map->m_lblk; | 3229 | split_map.m_lblk = map->m_lblk; |
3206 | split_map.m_len = map->m_len; | 3230 | split_map.m_len = map->m_len; |
3207 | 3231 | ||
3208 | if (allocated > map->m_len) { | 3232 | if (max_zeroout && (allocated > map->m_len)) { |
3209 | if (allocated <= EXT4_EXT_ZERO_LEN && | 3233 | if (allocated <= max_zeroout) { |
3210 | (EXT4_EXT_MAY_ZEROOUT & split_flag)) { | ||
3211 | /* case 3 */ | 3234 | /* case 3 */ |
3212 | zero_ex.ee_block = | 3235 | zero_ex.ee_block = |
3213 | cpu_to_le32(map->m_lblk); | 3236 | cpu_to_le32(map->m_lblk); |
@@ -3219,9 +3242,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
3219 | goto out; | 3242 | goto out; |
3220 | split_map.m_lblk = map->m_lblk; | 3243 | split_map.m_lblk = map->m_lblk; |
3221 | split_map.m_len = allocated; | 3244 | split_map.m_len = allocated; |
3222 | } else if ((map->m_lblk - ee_block + map->m_len < | 3245 | } else if (map->m_lblk - ee_block + map->m_len < max_zeroout) { |
3223 | EXT4_EXT_ZERO_LEN) && | ||
3224 | (EXT4_EXT_MAY_ZEROOUT & split_flag)) { | ||
3225 | /* case 2 */ | 3246 | /* case 2 */ |
3226 | if (map->m_lblk != ee_block) { | 3247 | if (map->m_lblk != ee_block) { |
3227 | zero_ex.ee_block = ex->ee_block; | 3248 | zero_ex.ee_block = ex->ee_block; |
@@ -3241,7 +3262,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
3241 | } | 3262 | } |
3242 | 3263 | ||
3243 | allocated = ext4_split_extent(handle, inode, path, | 3264 | allocated = ext4_split_extent(handle, inode, path, |
3244 | &split_map, split_flag, 0); | 3265 | &split_map, split_flag, 0); |
3245 | if (allocated < 0) | 3266 | if (allocated < 0) |
3246 | err = allocated; | 3267 | err = allocated; |
3247 | 3268 | ||
@@ -3255,7 +3276,7 @@ out: | |||
3255 | * to an uninitialized extent. | 3276 | * to an uninitialized extent. |
3256 | * | 3277 | * |
3257 | * Writing to an uninitialized extent may result in splitting the uninitialized | 3278 | * Writing to an uninitialized extent may result in splitting the uninitialized |
3258 | * extent into multiple /initialized uninitialized extents (up to three) | 3279 | * extent into multiple initialized/uninitialized extents (up to three) |
3259 | * There are three possibilities: | 3280 | * There are three possibilities: |
3260 | * a> There is no split required: Entire extent should be uninitialized | 3281 | * a> There is no split required: Entire extent should be uninitialized |
3261 | * b> Splits in two extents: Write is happening at either end of the extent | 3282 | * b> Splits in two extents: Write is happening at either end of the extent |
@@ -3332,10 +3353,10 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle, | |||
3332 | /* note: ext4_ext_correct_indexes() isn't needed here because | 3353 | /* note: ext4_ext_correct_indexes() isn't needed here because |
3333 | * borders are not changed | 3354 | * borders are not changed |
3334 | */ | 3355 | */ |
3335 | ext4_ext_try_to_merge(inode, path, ex); | 3356 | ext4_ext_try_to_merge(handle, inode, path, ex); |
3336 | 3357 | ||
3337 | /* Mark modified extent as dirty */ | 3358 | /* Mark modified extent as dirty */ |
3338 | err = ext4_ext_dirty(handle, inode, path + depth); | 3359 | err = ext4_ext_dirty(handle, inode, path + path->p_depth); |
3339 | out: | 3360 | out: |
3340 | ext4_ext_show_leaf(inode, path); | 3361 | ext4_ext_show_leaf(inode, path); |
3341 | return err; | 3362 | return err; |
@@ -3599,7 +3620,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | |||
3599 | { | 3620 | { |
3600 | int ret = 0; | 3621 | int ret = 0; |
3601 | int err = 0; | 3622 | int err = 0; |
3602 | ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; | 3623 | ext4_io_end_t *io = ext4_inode_aio(inode); |
3603 | 3624 | ||
3604 | ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical " | 3625 | ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical " |
3605 | "block %llu, max_blocks %u, flags %x, allocated %u\n", | 3626 | "block %llu, max_blocks %u, flags %x, allocated %u\n", |
@@ -3614,6 +3635,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | |||
3614 | if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { | 3635 | if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { |
3615 | ret = ext4_split_unwritten_extents(handle, inode, map, | 3636 | ret = ext4_split_unwritten_extents(handle, inode, map, |
3616 | path, flags); | 3637 | path, flags); |
3638 | if (ret <= 0) | ||
3639 | goto out; | ||
3617 | /* | 3640 | /* |
3618 | * Flag the inode(non aio case) or end_io struct (aio case) | 3641 | * Flag the inode(non aio case) or end_io struct (aio case) |
3619 | * that this IO needs to conversion to written when IO is | 3642 | * that this IO needs to conversion to written when IO is |
@@ -3857,8 +3880,9 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3857 | unsigned int allocated = 0, offset = 0; | 3880 | unsigned int allocated = 0, offset = 0; |
3858 | unsigned int allocated_clusters = 0; | 3881 | unsigned int allocated_clusters = 0; |
3859 | struct ext4_allocation_request ar; | 3882 | struct ext4_allocation_request ar; |
3860 | ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; | 3883 | ext4_io_end_t *io = ext4_inode_aio(inode); |
3861 | ext4_lblk_t cluster_offset; | 3884 | ext4_lblk_t cluster_offset; |
3885 | int set_unwritten = 0; | ||
3862 | 3886 | ||
3863 | ext_debug("blocks %u/%u requested for inode %lu\n", | 3887 | ext_debug("blocks %u/%u requested for inode %lu\n", |
3864 | map->m_lblk, map->m_len, inode->i_ino); | 3888 | map->m_lblk, map->m_len, inode->i_ino); |
@@ -4081,13 +4105,8 @@ got_allocated_blocks: | |||
4081 | * For non asycn direct IO case, flag the inode state | 4105 | * For non asycn direct IO case, flag the inode state |
4082 | * that we need to perform conversion when IO is done. | 4106 | * that we need to perform conversion when IO is done. |
4083 | */ | 4107 | */ |
4084 | if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { | 4108 | if ((flags & EXT4_GET_BLOCKS_PRE_IO)) |
4085 | if (io) | 4109 | set_unwritten = 1; |
4086 | ext4_set_io_unwritten_flag(inode, io); | ||
4087 | else | ||
4088 | ext4_set_inode_state(inode, | ||
4089 | EXT4_STATE_DIO_UNWRITTEN); | ||
4090 | } | ||
4091 | if (ext4_should_dioread_nolock(inode)) | 4110 | if (ext4_should_dioread_nolock(inode)) |
4092 | map->m_flags |= EXT4_MAP_UNINIT; | 4111 | map->m_flags |= EXT4_MAP_UNINIT; |
4093 | } | 4112 | } |
@@ -4099,6 +4118,15 @@ got_allocated_blocks: | |||
4099 | if (!err) | 4118 | if (!err) |
4100 | err = ext4_ext_insert_extent(handle, inode, path, | 4119 | err = ext4_ext_insert_extent(handle, inode, path, |
4101 | &newex, flags); | 4120 | &newex, flags); |
4121 | |||
4122 | if (!err && set_unwritten) { | ||
4123 | if (io) | ||
4124 | ext4_set_io_unwritten_flag(inode, io); | ||
4125 | else | ||
4126 | ext4_set_inode_state(inode, | ||
4127 | EXT4_STATE_DIO_UNWRITTEN); | ||
4128 | } | ||
4129 | |||
4102 | if (err && free_on_err) { | 4130 | if (err && free_on_err) { |
4103 | int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ? | 4131 | int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ? |
4104 | EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0; | 4132 | EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0; |
@@ -4240,7 +4268,7 @@ void ext4_ext_truncate(struct inode *inode) | |||
4240 | * finish any pending end_io work so we won't run the risk of | 4268 | * finish any pending end_io work so we won't run the risk of |
4241 | * converting any truncated blocks to initialized later | 4269 | * converting any truncated blocks to initialized later |
4242 | */ | 4270 | */ |
4243 | ext4_flush_completed_IO(inode); | 4271 | ext4_flush_unwritten_io(inode); |
4244 | 4272 | ||
4245 | /* | 4273 | /* |
4246 | * probably first extent we're gonna free will be last in block | 4274 | * probably first extent we're gonna free will be last in block |
@@ -4768,9 +4796,32 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) | |||
4768 | loff_t first_page_offset, last_page_offset; | 4796 | loff_t first_page_offset, last_page_offset; |
4769 | int credits, err = 0; | 4797 | int credits, err = 0; |
4770 | 4798 | ||
4799 | /* | ||
4800 | * Write out all dirty pages to avoid race conditions | ||
4801 | * Then release them. | ||
4802 | */ | ||
4803 | if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { | ||
4804 | err = filemap_write_and_wait_range(mapping, | ||
4805 | offset, offset + length - 1); | ||
4806 | |||
4807 | if (err) | ||
4808 | return err; | ||
4809 | } | ||
4810 | |||
4811 | mutex_lock(&inode->i_mutex); | ||
4812 | /* It's not possible punch hole on append only file */ | ||
4813 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) { | ||
4814 | err = -EPERM; | ||
4815 | goto out_mutex; | ||
4816 | } | ||
4817 | if (IS_SWAPFILE(inode)) { | ||
4818 | err = -ETXTBSY; | ||
4819 | goto out_mutex; | ||
4820 | } | ||
4821 | |||
4771 | /* No need to punch hole beyond i_size */ | 4822 | /* No need to punch hole beyond i_size */ |
4772 | if (offset >= inode->i_size) | 4823 | if (offset >= inode->i_size) |
4773 | return 0; | 4824 | goto out_mutex; |
4774 | 4825 | ||
4775 | /* | 4826 | /* |
4776 | * If the hole extends beyond i_size, set the hole | 4827 | * If the hole extends beyond i_size, set the hole |
@@ -4788,35 +4839,26 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) | |||
4788 | first_page_offset = first_page << PAGE_CACHE_SHIFT; | 4839 | first_page_offset = first_page << PAGE_CACHE_SHIFT; |
4789 | last_page_offset = last_page << PAGE_CACHE_SHIFT; | 4840 | last_page_offset = last_page << PAGE_CACHE_SHIFT; |
4790 | 4841 | ||
4791 | /* | ||
4792 | * Write out all dirty pages to avoid race conditions | ||
4793 | * Then release them. | ||
4794 | */ | ||
4795 | if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { | ||
4796 | err = filemap_write_and_wait_range(mapping, | ||
4797 | offset, offset + length - 1); | ||
4798 | |||
4799 | if (err) | ||
4800 | return err; | ||
4801 | } | ||
4802 | |||
4803 | /* Now release the pages */ | 4842 | /* Now release the pages */ |
4804 | if (last_page_offset > first_page_offset) { | 4843 | if (last_page_offset > first_page_offset) { |
4805 | truncate_pagecache_range(inode, first_page_offset, | 4844 | truncate_pagecache_range(inode, first_page_offset, |
4806 | last_page_offset - 1); | 4845 | last_page_offset - 1); |
4807 | } | 4846 | } |
4808 | 4847 | ||
4809 | /* finish any pending end_io work */ | 4848 | /* Wait all existing dio workers, newcomers will block on i_mutex */ |
4810 | ext4_flush_completed_IO(inode); | 4849 | ext4_inode_block_unlocked_dio(inode); |
4850 | err = ext4_flush_unwritten_io(inode); | ||
4851 | if (err) | ||
4852 | goto out_dio; | ||
4853 | inode_dio_wait(inode); | ||
4811 | 4854 | ||
4812 | credits = ext4_writepage_trans_blocks(inode); | 4855 | credits = ext4_writepage_trans_blocks(inode); |
4813 | handle = ext4_journal_start(inode, credits); | 4856 | handle = ext4_journal_start(inode, credits); |
4814 | if (IS_ERR(handle)) | 4857 | if (IS_ERR(handle)) { |
4815 | return PTR_ERR(handle); | 4858 | err = PTR_ERR(handle); |
4859 | goto out_dio; | ||
4860 | } | ||
4816 | 4861 | ||
4817 | err = ext4_orphan_add(handle, inode); | ||
4818 | if (err) | ||
4819 | goto out; | ||
4820 | 4862 | ||
4821 | /* | 4863 | /* |
4822 | * Now we need to zero out the non-page-aligned data in the | 4864 | * Now we need to zero out the non-page-aligned data in the |
@@ -4902,10 +4944,13 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) | |||
4902 | up_write(&EXT4_I(inode)->i_data_sem); | 4944 | up_write(&EXT4_I(inode)->i_data_sem); |
4903 | 4945 | ||
4904 | out: | 4946 | out: |
4905 | ext4_orphan_del(handle, inode); | ||
4906 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); | 4947 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); |
4907 | ext4_mark_inode_dirty(handle, inode); | 4948 | ext4_mark_inode_dirty(handle, inode); |
4908 | ext4_journal_stop(handle); | 4949 | ext4_journal_stop(handle); |
4950 | out_dio: | ||
4951 | ext4_inode_resume_unlocked_dio(inode); | ||
4952 | out_mutex: | ||
4953 | mutex_unlock(&inode->i_mutex); | ||
4909 | return err; | 4954 | return err; |
4910 | } | 4955 | } |
4911 | int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 4956 | int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 3b0e3bdaabfc..bf3966bccd34 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
@@ -55,11 +55,11 @@ static int ext4_release_file(struct inode *inode, struct file *filp) | |||
55 | return 0; | 55 | return 0; |
56 | } | 56 | } |
57 | 57 | ||
58 | static void ext4_aiodio_wait(struct inode *inode) | 58 | void ext4_unwritten_wait(struct inode *inode) |
59 | { | 59 | { |
60 | wait_queue_head_t *wq = ext4_ioend_wq(inode); | 60 | wait_queue_head_t *wq = ext4_ioend_wq(inode); |
61 | 61 | ||
62 | wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_aiodio_unwritten) == 0)); | 62 | wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_unwritten) == 0)); |
63 | } | 63 | } |
64 | 64 | ||
65 | /* | 65 | /* |
@@ -116,7 +116,7 @@ ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov, | |||
116 | "performance will be poor.", | 116 | "performance will be poor.", |
117 | inode->i_ino, current->comm); | 117 | inode->i_ino, current->comm); |
118 | mutex_lock(ext4_aio_mutex(inode)); | 118 | mutex_lock(ext4_aio_mutex(inode)); |
119 | ext4_aiodio_wait(inode); | 119 | ext4_unwritten_wait(inode); |
120 | } | 120 | } |
121 | 121 | ||
122 | BUG_ON(iocb->ki_pos != pos); | 122 | BUG_ON(iocb->ki_pos != pos); |
@@ -207,6 +207,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov, | |||
207 | static const struct vm_operations_struct ext4_file_vm_ops = { | 207 | static const struct vm_operations_struct ext4_file_vm_ops = { |
208 | .fault = filemap_fault, | 208 | .fault = filemap_fault, |
209 | .page_mkwrite = ext4_page_mkwrite, | 209 | .page_mkwrite = ext4_page_mkwrite, |
210 | .remap_pages = generic_file_remap_pages, | ||
210 | }; | 211 | }; |
211 | 212 | ||
212 | static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) | 213 | static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) |
@@ -217,7 +218,6 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) | |||
217 | return -ENOEXEC; | 218 | return -ENOEXEC; |
218 | file_accessed(file); | 219 | file_accessed(file); |
219 | vma->vm_ops = &ext4_file_vm_ops; | 220 | vma->vm_ops = &ext4_file_vm_ops; |
220 | vma->vm_flags |= VM_CAN_NONLINEAR; | ||
221 | return 0; | 221 | return 0; |
222 | } | 222 | } |
223 | 223 | ||
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 2a1dcea4f12e..be1d89f385b4 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c | |||
@@ -34,87 +34,6 @@ | |||
34 | 34 | ||
35 | #include <trace/events/ext4.h> | 35 | #include <trace/events/ext4.h> |
36 | 36 | ||
37 | static void dump_completed_IO(struct inode * inode) | ||
38 | { | ||
39 | #ifdef EXT4FS_DEBUG | ||
40 | struct list_head *cur, *before, *after; | ||
41 | ext4_io_end_t *io, *io0, *io1; | ||
42 | unsigned long flags; | ||
43 | |||
44 | if (list_empty(&EXT4_I(inode)->i_completed_io_list)){ | ||
45 | ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino); | ||
46 | return; | ||
47 | } | ||
48 | |||
49 | ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino); | ||
50 | spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
51 | list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){ | ||
52 | cur = &io->list; | ||
53 | before = cur->prev; | ||
54 | io0 = container_of(before, ext4_io_end_t, list); | ||
55 | after = cur->next; | ||
56 | io1 = container_of(after, ext4_io_end_t, list); | ||
57 | |||
58 | ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n", | ||
59 | io, inode->i_ino, io0, io1); | ||
60 | } | ||
61 | spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
62 | #endif | ||
63 | } | ||
64 | |||
65 | /* | ||
66 | * This function is called from ext4_sync_file(). | ||
67 | * | ||
68 | * When IO is completed, the work to convert unwritten extents to | ||
69 | * written is queued on workqueue but may not get immediately | ||
70 | * scheduled. When fsync is called, we need to ensure the | ||
71 | * conversion is complete before fsync returns. | ||
72 | * The inode keeps track of a list of pending/completed IO that | ||
73 | * might needs to do the conversion. This function walks through | ||
74 | * the list and convert the related unwritten extents for completed IO | ||
75 | * to written. | ||
76 | * The function return the number of pending IOs on success. | ||
77 | */ | ||
78 | int ext4_flush_completed_IO(struct inode *inode) | ||
79 | { | ||
80 | ext4_io_end_t *io; | ||
81 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
82 | unsigned long flags; | ||
83 | int ret = 0; | ||
84 | int ret2 = 0; | ||
85 | |||
86 | dump_completed_IO(inode); | ||
87 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
88 | while (!list_empty(&ei->i_completed_io_list)){ | ||
89 | io = list_entry(ei->i_completed_io_list.next, | ||
90 | ext4_io_end_t, list); | ||
91 | list_del_init(&io->list); | ||
92 | io->flag |= EXT4_IO_END_IN_FSYNC; | ||
93 | /* | ||
94 | * Calling ext4_end_io_nolock() to convert completed | ||
95 | * IO to written. | ||
96 | * | ||
97 | * When ext4_sync_file() is called, run_queue() may already | ||
98 | * about to flush the work corresponding to this io structure. | ||
99 | * It will be upset if it founds the io structure related | ||
100 | * to the work-to-be schedule is freed. | ||
101 | * | ||
102 | * Thus we need to keep the io structure still valid here after | ||
103 | * conversion finished. The io structure has a flag to | ||
104 | * avoid double converting from both fsync and background work | ||
105 | * queue work. | ||
106 | */ | ||
107 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
108 | ret = ext4_end_io_nolock(io); | ||
109 | if (ret < 0) | ||
110 | ret2 = ret; | ||
111 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
112 | io->flag &= ~EXT4_IO_END_IN_FSYNC; | ||
113 | } | ||
114 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
115 | return (ret2 < 0) ? ret2 : 0; | ||
116 | } | ||
117 | |||
118 | /* | 37 | /* |
119 | * If we're not journaling and this is a just-created file, we have to | 38 | * If we're not journaling and this is a just-created file, we have to |
120 | * sync our parent directory (if it was freshly created) since | 39 | * sync our parent directory (if it was freshly created) since |
@@ -203,7 +122,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
203 | struct inode *inode = file->f_mapping->host; | 122 | struct inode *inode = file->f_mapping->host; |
204 | struct ext4_inode_info *ei = EXT4_I(inode); | 123 | struct ext4_inode_info *ei = EXT4_I(inode); |
205 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; | 124 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; |
206 | int ret; | 125 | int ret, err; |
207 | tid_t commit_tid; | 126 | tid_t commit_tid; |
208 | bool needs_barrier = false; | 127 | bool needs_barrier = false; |
209 | 128 | ||
@@ -219,7 +138,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
219 | if (inode->i_sb->s_flags & MS_RDONLY) | 138 | if (inode->i_sb->s_flags & MS_RDONLY) |
220 | goto out; | 139 | goto out; |
221 | 140 | ||
222 | ret = ext4_flush_completed_IO(inode); | 141 | ret = ext4_flush_unwritten_io(inode); |
223 | if (ret < 0) | 142 | if (ret < 0) |
224 | goto out; | 143 | goto out; |
225 | 144 | ||
@@ -255,8 +174,11 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
255 | needs_barrier = true; | 174 | needs_barrier = true; |
256 | jbd2_log_start_commit(journal, commit_tid); | 175 | jbd2_log_start_commit(journal, commit_tid); |
257 | ret = jbd2_log_wait_commit(journal, commit_tid); | 176 | ret = jbd2_log_wait_commit(journal, commit_tid); |
258 | if (needs_barrier) | 177 | if (needs_barrier) { |
259 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); | 178 | err = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); |
179 | if (!ret) | ||
180 | ret = err; | ||
181 | } | ||
260 | out: | 182 | out: |
261 | mutex_unlock(&inode->i_mutex); | 183 | mutex_unlock(&inode->i_mutex); |
262 | trace_ext4_sync_file_exit(inode, ret); | 184 | trace_ext4_sync_file_exit(inode, ret); |
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 26154b81b836..fa36372f3fdf 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
@@ -697,6 +697,15 @@ got_group: | |||
697 | if (!gdp) | 697 | if (!gdp) |
698 | goto fail; | 698 | goto fail; |
699 | 699 | ||
700 | /* | ||
701 | * Check free inodes count before loading bitmap. | ||
702 | */ | ||
703 | if (ext4_free_inodes_count(sb, gdp) == 0) { | ||
704 | if (++group == ngroups) | ||
705 | group = 0; | ||
706 | continue; | ||
707 | } | ||
708 | |||
700 | brelse(inode_bitmap_bh); | 709 | brelse(inode_bitmap_bh); |
701 | inode_bitmap_bh = ext4_read_inode_bitmap(sb, group); | 710 | inode_bitmap_bh = ext4_read_inode_bitmap(sb, group); |
702 | if (!inode_bitmap_bh) | 711 | if (!inode_bitmap_bh) |
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 830e1b2bf145..792e388e7b44 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c | |||
@@ -807,16 +807,30 @@ ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, | |||
807 | 807 | ||
808 | retry: | 808 | retry: |
809 | if (rw == READ && ext4_should_dioread_nolock(inode)) { | 809 | if (rw == READ && ext4_should_dioread_nolock(inode)) { |
810 | if (unlikely(!list_empty(&ei->i_completed_io_list))) { | 810 | if (unlikely(atomic_read(&EXT4_I(inode)->i_unwritten))) { |
811 | mutex_lock(&inode->i_mutex); | 811 | mutex_lock(&inode->i_mutex); |
812 | ext4_flush_completed_IO(inode); | 812 | ext4_flush_unwritten_io(inode); |
813 | mutex_unlock(&inode->i_mutex); | 813 | mutex_unlock(&inode->i_mutex); |
814 | } | 814 | } |
815 | /* | ||
816 | * Nolock dioread optimization may be dynamically disabled | ||
817 | * via ext4_inode_block_unlocked_dio(). Check inode's state | ||
818 | * while holding extra i_dio_count ref. | ||
819 | */ | ||
820 | atomic_inc(&inode->i_dio_count); | ||
821 | smp_mb(); | ||
822 | if (unlikely(ext4_test_inode_state(inode, | ||
823 | EXT4_STATE_DIOREAD_LOCK))) { | ||
824 | inode_dio_done(inode); | ||
825 | goto locked; | ||
826 | } | ||
815 | ret = __blockdev_direct_IO(rw, iocb, inode, | 827 | ret = __blockdev_direct_IO(rw, iocb, inode, |
816 | inode->i_sb->s_bdev, iov, | 828 | inode->i_sb->s_bdev, iov, |
817 | offset, nr_segs, | 829 | offset, nr_segs, |
818 | ext4_get_block, NULL, NULL, 0); | 830 | ext4_get_block, NULL, NULL, 0); |
831 | inode_dio_done(inode); | ||
819 | } else { | 832 | } else { |
833 | locked: | ||
820 | ret = blockdev_direct_IO(rw, iocb, inode, iov, | 834 | ret = blockdev_direct_IO(rw, iocb, inode, iov, |
821 | offset, nr_segs, ext4_get_block); | 835 | offset, nr_segs, ext4_get_block); |
822 | 836 | ||
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index dff171c3a123..b3c243b9afa5 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -732,11 +732,13 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, | |||
732 | err = ext4_map_blocks(handle, inode, &map, | 732 | err = ext4_map_blocks(handle, inode, &map, |
733 | create ? EXT4_GET_BLOCKS_CREATE : 0); | 733 | create ? EXT4_GET_BLOCKS_CREATE : 0); |
734 | 734 | ||
735 | /* ensure we send some value back into *errp */ | ||
736 | *errp = 0; | ||
737 | |||
735 | if (err < 0) | 738 | if (err < 0) |
736 | *errp = err; | 739 | *errp = err; |
737 | if (err <= 0) | 740 | if (err <= 0) |
738 | return NULL; | 741 | return NULL; |
739 | *errp = 0; | ||
740 | 742 | ||
741 | bh = sb_getblk(inode->i_sb, map.m_pblk); | 743 | bh = sb_getblk(inode->i_sb, map.m_pblk); |
742 | if (!bh) { | 744 | if (!bh) { |
@@ -1954,9 +1956,6 @@ out: | |||
1954 | return ret; | 1956 | return ret; |
1955 | } | 1957 | } |
1956 | 1958 | ||
1957 | static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode); | ||
1958 | static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate); | ||
1959 | |||
1960 | /* | 1959 | /* |
1961 | * Note that we don't need to start a transaction unless we're journaling data | 1960 | * Note that we don't need to start a transaction unless we're journaling data |
1962 | * because we should have holes filled from ext4_page_mkwrite(). We even don't | 1961 | * because we should have holes filled from ext4_page_mkwrite(). We even don't |
@@ -2463,6 +2462,16 @@ static int ext4_nonda_switch(struct super_block *sb) | |||
2463 | free_blocks = EXT4_C2B(sbi, | 2462 | free_blocks = EXT4_C2B(sbi, |
2464 | percpu_counter_read_positive(&sbi->s_freeclusters_counter)); | 2463 | percpu_counter_read_positive(&sbi->s_freeclusters_counter)); |
2465 | dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyclusters_counter); | 2464 | dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyclusters_counter); |
2465 | /* | ||
2466 | * Start pushing delalloc when 1/2 of free blocks are dirty. | ||
2467 | */ | ||
2468 | if (dirty_blocks && (free_blocks < 2 * dirty_blocks) && | ||
2469 | !writeback_in_progress(sb->s_bdi) && | ||
2470 | down_read_trylock(&sb->s_umount)) { | ||
2471 | writeback_inodes_sb(sb, WB_REASON_FS_FREE_SPACE); | ||
2472 | up_read(&sb->s_umount); | ||
2473 | } | ||
2474 | |||
2466 | if (2 * free_blocks < 3 * dirty_blocks || | 2475 | if (2 * free_blocks < 3 * dirty_blocks || |
2467 | free_blocks < (dirty_blocks + EXT4_FREECLUSTERS_WATERMARK)) { | 2476 | free_blocks < (dirty_blocks + EXT4_FREECLUSTERS_WATERMARK)) { |
2468 | /* | 2477 | /* |
@@ -2471,13 +2480,6 @@ static int ext4_nonda_switch(struct super_block *sb) | |||
2471 | */ | 2480 | */ |
2472 | return 1; | 2481 | return 1; |
2473 | } | 2482 | } |
2474 | /* | ||
2475 | * Even if we don't switch but are nearing capacity, | ||
2476 | * start pushing delalloc when 1/2 of free blocks are dirty. | ||
2477 | */ | ||
2478 | if (free_blocks < 2 * dirty_blocks) | ||
2479 | writeback_inodes_sb_if_idle(sb, WB_REASON_FS_FREE_SPACE); | ||
2480 | |||
2481 | return 0; | 2483 | return 0; |
2482 | } | 2484 | } |
2483 | 2485 | ||
@@ -2879,9 +2881,6 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
2879 | { | 2881 | { |
2880 | struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; | 2882 | struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; |
2881 | ext4_io_end_t *io_end = iocb->private; | 2883 | ext4_io_end_t *io_end = iocb->private; |
2882 | struct workqueue_struct *wq; | ||
2883 | unsigned long flags; | ||
2884 | struct ext4_inode_info *ei; | ||
2885 | 2884 | ||
2886 | /* if not async direct IO or dio with 0 bytes write, just return */ | 2885 | /* if not async direct IO or dio with 0 bytes write, just return */ |
2887 | if (!io_end || !size) | 2886 | if (!io_end || !size) |
@@ -2910,24 +2909,14 @@ out: | |||
2910 | io_end->iocb = iocb; | 2909 | io_end->iocb = iocb; |
2911 | io_end->result = ret; | 2910 | io_end->result = ret; |
2912 | } | 2911 | } |
2913 | wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; | ||
2914 | 2912 | ||
2915 | /* Add the io_end to per-inode completed aio dio list*/ | 2913 | ext4_add_complete_io(io_end); |
2916 | ei = EXT4_I(io_end->inode); | ||
2917 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
2918 | list_add_tail(&io_end->list, &ei->i_completed_io_list); | ||
2919 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
2920 | |||
2921 | /* queue the work to convert unwritten extents to written */ | ||
2922 | queue_work(wq, &io_end->work); | ||
2923 | } | 2914 | } |
2924 | 2915 | ||
2925 | static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) | 2916 | static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) |
2926 | { | 2917 | { |
2927 | ext4_io_end_t *io_end = bh->b_private; | 2918 | ext4_io_end_t *io_end = bh->b_private; |
2928 | struct workqueue_struct *wq; | ||
2929 | struct inode *inode; | 2919 | struct inode *inode; |
2930 | unsigned long flags; | ||
2931 | 2920 | ||
2932 | if (!test_clear_buffer_uninit(bh) || !io_end) | 2921 | if (!test_clear_buffer_uninit(bh) || !io_end) |
2933 | goto out; | 2922 | goto out; |
@@ -2946,15 +2935,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) | |||
2946 | */ | 2935 | */ |
2947 | inode = io_end->inode; | 2936 | inode = io_end->inode; |
2948 | ext4_set_io_unwritten_flag(inode, io_end); | 2937 | ext4_set_io_unwritten_flag(inode, io_end); |
2949 | 2938 | ext4_add_complete_io(io_end); | |
2950 | /* Add the io_end to per-inode completed io list*/ | ||
2951 | spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
2952 | list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list); | ||
2953 | spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
2954 | |||
2955 | wq = EXT4_SB(inode->i_sb)->dio_unwritten_wq; | ||
2956 | /* queue the work to convert unwritten extents to written */ | ||
2957 | queue_work(wq, &io_end->work); | ||
2958 | out: | 2939 | out: |
2959 | bh->b_private = NULL; | 2940 | bh->b_private = NULL; |
2960 | bh->b_end_io = NULL; | 2941 | bh->b_end_io = NULL; |
@@ -3029,6 +3010,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3029 | overwrite = *((int *)iocb->private); | 3010 | overwrite = *((int *)iocb->private); |
3030 | 3011 | ||
3031 | if (overwrite) { | 3012 | if (overwrite) { |
3013 | atomic_inc(&inode->i_dio_count); | ||
3032 | down_read(&EXT4_I(inode)->i_data_sem); | 3014 | down_read(&EXT4_I(inode)->i_data_sem); |
3033 | mutex_unlock(&inode->i_mutex); | 3015 | mutex_unlock(&inode->i_mutex); |
3034 | } | 3016 | } |
@@ -3054,7 +3036,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3054 | * hook to the iocb. | 3036 | * hook to the iocb. |
3055 | */ | 3037 | */ |
3056 | iocb->private = NULL; | 3038 | iocb->private = NULL; |
3057 | EXT4_I(inode)->cur_aio_dio = NULL; | 3039 | ext4_inode_aio_set(inode, NULL); |
3058 | if (!is_sync_kiocb(iocb)) { | 3040 | if (!is_sync_kiocb(iocb)) { |
3059 | ext4_io_end_t *io_end = | 3041 | ext4_io_end_t *io_end = |
3060 | ext4_init_io_end(inode, GFP_NOFS); | 3042 | ext4_init_io_end(inode, GFP_NOFS); |
@@ -3071,7 +3053,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3071 | * is a unwritten extents needs to be converted | 3053 | * is a unwritten extents needs to be converted |
3072 | * when IO is completed. | 3054 | * when IO is completed. |
3073 | */ | 3055 | */ |
3074 | EXT4_I(inode)->cur_aio_dio = iocb->private; | 3056 | ext4_inode_aio_set(inode, io_end); |
3075 | } | 3057 | } |
3076 | 3058 | ||
3077 | if (overwrite) | 3059 | if (overwrite) |
@@ -3091,7 +3073,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3091 | NULL, | 3073 | NULL, |
3092 | DIO_LOCKING); | 3074 | DIO_LOCKING); |
3093 | if (iocb->private) | 3075 | if (iocb->private) |
3094 | EXT4_I(inode)->cur_aio_dio = NULL; | 3076 | ext4_inode_aio_set(inode, NULL); |
3095 | /* | 3077 | /* |
3096 | * The io_end structure takes a reference to the inode, | 3078 | * The io_end structure takes a reference to the inode, |
3097 | * that structure needs to be destroyed and the | 3079 | * that structure needs to be destroyed and the |
@@ -3126,6 +3108,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3126 | retake_lock: | 3108 | retake_lock: |
3127 | /* take i_mutex locking again if we do a ovewrite dio */ | 3109 | /* take i_mutex locking again if we do a ovewrite dio */ |
3128 | if (overwrite) { | 3110 | if (overwrite) { |
3111 | inode_dio_done(inode); | ||
3129 | up_read(&EXT4_I(inode)->i_data_sem); | 3112 | up_read(&EXT4_I(inode)->i_data_sem); |
3130 | mutex_lock(&inode->i_mutex); | 3113 | mutex_lock(&inode->i_mutex); |
3131 | } | 3114 | } |
@@ -3313,7 +3296,7 @@ int ext4_discard_partial_page_buffers(handle_t *handle, | |||
3313 | * handle: The journal handle | 3296 | * handle: The journal handle |
3314 | * inode: The files inode | 3297 | * inode: The files inode |
3315 | * page: A locked page that contains the offset "from" | 3298 | * page: A locked page that contains the offset "from" |
3316 | * from: The starting byte offset (from the begining of the file) | 3299 | * from: The starting byte offset (from the beginning of the file) |
3317 | * to begin discarding | 3300 | * to begin discarding |
3318 | * len: The length of bytes to discard | 3301 | * len: The length of bytes to discard |
3319 | * flags: Optional flags that may be used: | 3302 | * flags: Optional flags that may be used: |
@@ -3321,11 +3304,11 @@ int ext4_discard_partial_page_buffers(handle_t *handle, | |||
3321 | * EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED | 3304 | * EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED |
3322 | * Only zero the regions of the page whose buffer heads | 3305 | * Only zero the regions of the page whose buffer heads |
3323 | * have already been unmapped. This flag is appropriate | 3306 | * have already been unmapped. This flag is appropriate |
3324 | * for updateing the contents of a page whose blocks may | 3307 | * for updating the contents of a page whose blocks may |
3325 | * have already been released, and we only want to zero | 3308 | * have already been released, and we only want to zero |
3326 | * out the regions that correspond to those released blocks. | 3309 | * out the regions that correspond to those released blocks. |
3327 | * | 3310 | * |
3328 | * Returns zero on sucess or negative on failure. | 3311 | * Returns zero on success or negative on failure. |
3329 | */ | 3312 | */ |
3330 | static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle, | 3313 | static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle, |
3331 | struct inode *inode, struct page *page, loff_t from, | 3314 | struct inode *inode, struct page *page, loff_t from, |
@@ -3486,7 +3469,7 @@ int ext4_can_truncate(struct inode *inode) | |||
3486 | * @offset: The offset where the hole will begin | 3469 | * @offset: The offset where the hole will begin |
3487 | * @len: The length of the hole | 3470 | * @len: The length of the hole |
3488 | * | 3471 | * |
3489 | * Returns: 0 on sucess or negative on failure | 3472 | * Returns: 0 on success or negative on failure |
3490 | */ | 3473 | */ |
3491 | 3474 | ||
3492 | int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) | 3475 | int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) |
@@ -4008,7 +3991,7 @@ static int ext4_inode_blocks_set(handle_t *handle, | |||
4008 | 3991 | ||
4009 | if (i_blocks <= ~0U) { | 3992 | if (i_blocks <= ~0U) { |
4010 | /* | 3993 | /* |
4011 | * i_blocks can be represnted in a 32 bit variable | 3994 | * i_blocks can be represented in a 32 bit variable |
4012 | * as multiple of 512 bytes | 3995 | * as multiple of 512 bytes |
4013 | */ | 3996 | */ |
4014 | raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); | 3997 | raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); |
@@ -4052,6 +4035,7 @@ static int ext4_do_update_inode(handle_t *handle, | |||
4052 | struct ext4_inode_info *ei = EXT4_I(inode); | 4035 | struct ext4_inode_info *ei = EXT4_I(inode); |
4053 | struct buffer_head *bh = iloc->bh; | 4036 | struct buffer_head *bh = iloc->bh; |
4054 | int err = 0, rc, block; | 4037 | int err = 0, rc, block; |
4038 | int need_datasync = 0; | ||
4055 | uid_t i_uid; | 4039 | uid_t i_uid; |
4056 | gid_t i_gid; | 4040 | gid_t i_gid; |
4057 | 4041 | ||
@@ -4102,7 +4086,10 @@ static int ext4_do_update_inode(handle_t *handle, | |||
4102 | raw_inode->i_file_acl_high = | 4086 | raw_inode->i_file_acl_high = |
4103 | cpu_to_le16(ei->i_file_acl >> 32); | 4087 | cpu_to_le16(ei->i_file_acl >> 32); |
4104 | raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl); | 4088 | raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl); |
4105 | ext4_isize_set(raw_inode, ei->i_disksize); | 4089 | if (ei->i_disksize != ext4_isize(raw_inode)) { |
4090 | ext4_isize_set(raw_inode, ei->i_disksize); | ||
4091 | need_datasync = 1; | ||
4092 | } | ||
4106 | if (ei->i_disksize > 0x7fffffffULL) { | 4093 | if (ei->i_disksize > 0x7fffffffULL) { |
4107 | struct super_block *sb = inode->i_sb; | 4094 | struct super_block *sb = inode->i_sb; |
4108 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, | 4095 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, |
@@ -4155,7 +4142,7 @@ static int ext4_do_update_inode(handle_t *handle, | |||
4155 | err = rc; | 4142 | err = rc; |
4156 | ext4_clear_inode_state(inode, EXT4_STATE_NEW); | 4143 | ext4_clear_inode_state(inode, EXT4_STATE_NEW); |
4157 | 4144 | ||
4158 | ext4_update_inode_fsync_trans(handle, inode, 0); | 4145 | ext4_update_inode_fsync_trans(handle, inode, need_datasync); |
4159 | out_brelse: | 4146 | out_brelse: |
4160 | brelse(bh); | 4147 | brelse(bh); |
4161 | ext4_std_error(inode->i_sb, err); | 4148 | ext4_std_error(inode->i_sb, err); |
@@ -4169,7 +4156,7 @@ out_brelse: | |||
4169 | * | 4156 | * |
4170 | * - Within generic_file_write() for O_SYNC files. | 4157 | * - Within generic_file_write() for O_SYNC files. |
4171 | * Here, there will be no transaction running. We wait for any running | 4158 | * Here, there will be no transaction running. We wait for any running |
4172 | * trasnaction to commit. | 4159 | * transaction to commit. |
4173 | * | 4160 | * |
4174 | * - Within sys_sync(), kupdate and such. | 4161 | * - Within sys_sync(), kupdate and such. |
4175 | * We wait on commit, if tol to. | 4162 | * We wait on commit, if tol to. |
@@ -4298,7 +4285,6 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
4298 | } | 4285 | } |
4299 | 4286 | ||
4300 | if (attr->ia_valid & ATTR_SIZE) { | 4287 | if (attr->ia_valid & ATTR_SIZE) { |
4301 | inode_dio_wait(inode); | ||
4302 | 4288 | ||
4303 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { | 4289 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { |
4304 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 4290 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
@@ -4347,8 +4333,17 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
4347 | } | 4333 | } |
4348 | 4334 | ||
4349 | if (attr->ia_valid & ATTR_SIZE) { | 4335 | if (attr->ia_valid & ATTR_SIZE) { |
4350 | if (attr->ia_size != i_size_read(inode)) | 4336 | if (attr->ia_size != i_size_read(inode)) { |
4351 | truncate_setsize(inode, attr->ia_size); | 4337 | truncate_setsize(inode, attr->ia_size); |
4338 | /* Inode size will be reduced, wait for dio in flight. | ||
4339 | * Temporarily disable dioread_nolock to prevent | ||
4340 | * livelock. */ | ||
4341 | if (orphan) { | ||
4342 | ext4_inode_block_unlocked_dio(inode); | ||
4343 | inode_dio_wait(inode); | ||
4344 | ext4_inode_resume_unlocked_dio(inode); | ||
4345 | } | ||
4346 | } | ||
4352 | ext4_truncate(inode); | 4347 | ext4_truncate(inode); |
4353 | } | 4348 | } |
4354 | 4349 | ||
@@ -4413,7 +4408,7 @@ static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) | |||
4413 | * worse case, the indexs blocks spread over different block groups | 4408 | * worse case, the indexs blocks spread over different block groups |
4414 | * | 4409 | * |
4415 | * If datablocks are discontiguous, they are possible to spread over | 4410 | * If datablocks are discontiguous, they are possible to spread over |
4416 | * different block groups too. If they are contiuguous, with flexbg, | 4411 | * different block groups too. If they are contiguous, with flexbg, |
4417 | * they could still across block group boundary. | 4412 | * they could still across block group boundary. |
4418 | * | 4413 | * |
4419 | * Also account for superblock, inode, quota and xattr blocks | 4414 | * Also account for superblock, inode, quota and xattr blocks |
@@ -4727,6 +4722,10 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) | |||
4727 | return err; | 4722 | return err; |
4728 | } | 4723 | } |
4729 | 4724 | ||
4725 | /* Wait for all existing dio workers */ | ||
4726 | ext4_inode_block_unlocked_dio(inode); | ||
4727 | inode_dio_wait(inode); | ||
4728 | |||
4730 | jbd2_journal_lock_updates(journal); | 4729 | jbd2_journal_lock_updates(journal); |
4731 | 4730 | ||
4732 | /* | 4731 | /* |
@@ -4746,6 +4745,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) | |||
4746 | ext4_set_aops(inode); | 4745 | ext4_set_aops(inode); |
4747 | 4746 | ||
4748 | jbd2_journal_unlock_updates(journal); | 4747 | jbd2_journal_unlock_updates(journal); |
4748 | ext4_inode_resume_unlocked_dio(inode); | ||
4749 | 4749 | ||
4750 | /* Finally we can mark the inode as dirty. */ | 4750 | /* Finally we can mark the inode as dirty. */ |
4751 | 4751 | ||
@@ -4780,6 +4780,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
4780 | int retries = 0; | 4780 | int retries = 0; |
4781 | 4781 | ||
4782 | sb_start_pagefault(inode->i_sb); | 4782 | sb_start_pagefault(inode->i_sb); |
4783 | file_update_time(vma->vm_file); | ||
4783 | /* Delalloc case is easy... */ | 4784 | /* Delalloc case is easy... */ |
4784 | if (test_opt(inode->i_sb, DELALLOC) && | 4785 | if (test_opt(inode->i_sb, DELALLOC) && |
4785 | !ext4_should_journal_data(inode) && | 4786 | !ext4_should_journal_data(inode) && |
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 7f7dad787603..5747f52f7c72 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c | |||
@@ -233,7 +233,7 @@ group_extend_out: | |||
233 | 233 | ||
234 | case EXT4_IOC_MOVE_EXT: { | 234 | case EXT4_IOC_MOVE_EXT: { |
235 | struct move_extent me; | 235 | struct move_extent me; |
236 | struct file *donor_filp; | 236 | struct fd donor; |
237 | int err; | 237 | int err; |
238 | 238 | ||
239 | if (!(filp->f_mode & FMODE_READ) || | 239 | if (!(filp->f_mode & FMODE_READ) || |
@@ -245,11 +245,11 @@ group_extend_out: | |||
245 | return -EFAULT; | 245 | return -EFAULT; |
246 | me.moved_len = 0; | 246 | me.moved_len = 0; |
247 | 247 | ||
248 | donor_filp = fget(me.donor_fd); | 248 | donor = fdget(me.donor_fd); |
249 | if (!donor_filp) | 249 | if (!donor.file) |
250 | return -EBADF; | 250 | return -EBADF; |
251 | 251 | ||
252 | if (!(donor_filp->f_mode & FMODE_WRITE)) { | 252 | if (!(donor.file->f_mode & FMODE_WRITE)) { |
253 | err = -EBADF; | 253 | err = -EBADF; |
254 | goto mext_out; | 254 | goto mext_out; |
255 | } | 255 | } |
@@ -258,14 +258,15 @@ group_extend_out: | |||
258 | EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { | 258 | EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { |
259 | ext4_msg(sb, KERN_ERR, | 259 | ext4_msg(sb, KERN_ERR, |
260 | "Online defrag not supported with bigalloc"); | 260 | "Online defrag not supported with bigalloc"); |
261 | return -EOPNOTSUPP; | 261 | err = -EOPNOTSUPP; |
262 | goto mext_out; | ||
262 | } | 263 | } |
263 | 264 | ||
264 | err = mnt_want_write_file(filp); | 265 | err = mnt_want_write_file(filp); |
265 | if (err) | 266 | if (err) |
266 | goto mext_out; | 267 | goto mext_out; |
267 | 268 | ||
268 | err = ext4_move_extents(filp, donor_filp, me.orig_start, | 269 | err = ext4_move_extents(filp, donor.file, me.orig_start, |
269 | me.donor_start, me.len, &me.moved_len); | 270 | me.donor_start, me.len, &me.moved_len); |
270 | mnt_drop_write_file(filp); | 271 | mnt_drop_write_file(filp); |
271 | 272 | ||
@@ -273,7 +274,7 @@ group_extend_out: | |||
273 | &me, sizeof(me))) | 274 | &me, sizeof(me))) |
274 | err = -EFAULT; | 275 | err = -EFAULT; |
275 | mext_out: | 276 | mext_out: |
276 | fput(donor_filp); | 277 | fdput(donor); |
277 | return err; | 278 | return err; |
278 | } | 279 | } |
279 | 280 | ||
@@ -365,26 +366,11 @@ group_add_out: | |||
365 | return -EOPNOTSUPP; | 366 | return -EOPNOTSUPP; |
366 | } | 367 | } |
367 | 368 | ||
368 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, | ||
369 | EXT4_FEATURE_INCOMPAT_META_BG)) { | ||
370 | ext4_msg(sb, KERN_ERR, | ||
371 | "Online resizing not (yet) supported with meta_bg"); | ||
372 | return -EOPNOTSUPP; | ||
373 | } | ||
374 | |||
375 | if (copy_from_user(&n_blocks_count, (__u64 __user *)arg, | 369 | if (copy_from_user(&n_blocks_count, (__u64 __user *)arg, |
376 | sizeof(__u64))) { | 370 | sizeof(__u64))) { |
377 | return -EFAULT; | 371 | return -EFAULT; |
378 | } | 372 | } |
379 | 373 | ||
380 | if (n_blocks_count > MAX_32_NUM && | ||
381 | !EXT4_HAS_INCOMPAT_FEATURE(sb, | ||
382 | EXT4_FEATURE_INCOMPAT_64BIT)) { | ||
383 | ext4_msg(sb, KERN_ERR, | ||
384 | "File system only supports 32-bit block numbers"); | ||
385 | return -EOPNOTSUPP; | ||
386 | } | ||
387 | |||
388 | err = ext4_resize_begin(sb); | 374 | err = ext4_resize_begin(sb); |
389 | if (err) | 375 | if (err) |
390 | return err; | 376 | return err; |
@@ -419,13 +405,6 @@ resizefs_out: | |||
419 | if (!blk_queue_discard(q)) | 405 | if (!blk_queue_discard(q)) |
420 | return -EOPNOTSUPP; | 406 | return -EOPNOTSUPP; |
421 | 407 | ||
422 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, | ||
423 | EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { | ||
424 | ext4_msg(sb, KERN_ERR, | ||
425 | "FITRIM not supported with bigalloc"); | ||
426 | return -EOPNOTSUPP; | ||
427 | } | ||
428 | |||
429 | if (copy_from_user(&range, (struct fstrim_range __user *)arg, | 408 | if (copy_from_user(&range, (struct fstrim_range __user *)arg, |
430 | sizeof(range))) | 409 | sizeof(range))) |
431 | return -EFAULT; | 410 | return -EFAULT; |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 8eae94771c45..f8b27bf80aca 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include "ext4_jbd2.h" | 24 | #include "ext4_jbd2.h" |
25 | #include "mballoc.h" | 25 | #include "mballoc.h" |
26 | #include <linux/debugfs.h> | 26 | #include <linux/debugfs.h> |
27 | #include <linux/log2.h> | ||
27 | #include <linux/slab.h> | 28 | #include <linux/slab.h> |
28 | #include <trace/events/ext4.h> | 29 | #include <trace/events/ext4.h> |
29 | 30 | ||
@@ -1338,17 +1339,17 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, | |||
1338 | mb_check_buddy(e4b); | 1339 | mb_check_buddy(e4b); |
1339 | } | 1340 | } |
1340 | 1341 | ||
1341 | static int mb_find_extent(struct ext4_buddy *e4b, int order, int block, | 1342 | static int mb_find_extent(struct ext4_buddy *e4b, int block, |
1342 | int needed, struct ext4_free_extent *ex) | 1343 | int needed, struct ext4_free_extent *ex) |
1343 | { | 1344 | { |
1344 | int next = block; | 1345 | int next = block; |
1345 | int max; | 1346 | int max, order; |
1346 | void *buddy; | 1347 | void *buddy; |
1347 | 1348 | ||
1348 | assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group)); | 1349 | assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group)); |
1349 | BUG_ON(ex == NULL); | 1350 | BUG_ON(ex == NULL); |
1350 | 1351 | ||
1351 | buddy = mb_find_buddy(e4b, order, &max); | 1352 | buddy = mb_find_buddy(e4b, 0, &max); |
1352 | BUG_ON(buddy == NULL); | 1353 | BUG_ON(buddy == NULL); |
1353 | BUG_ON(block >= max); | 1354 | BUG_ON(block >= max); |
1354 | if (mb_test_bit(block, buddy)) { | 1355 | if (mb_test_bit(block, buddy)) { |
@@ -1358,12 +1359,9 @@ static int mb_find_extent(struct ext4_buddy *e4b, int order, int block, | |||
1358 | return 0; | 1359 | return 0; |
1359 | } | 1360 | } |
1360 | 1361 | ||
1361 | /* FIXME dorp order completely ? */ | 1362 | /* find actual order */ |
1362 | if (likely(order == 0)) { | 1363 | order = mb_find_order_for_block(e4b, block); |
1363 | /* find actual order */ | 1364 | block = block >> order; |
1364 | order = mb_find_order_for_block(e4b, block); | ||
1365 | block = block >> order; | ||
1366 | } | ||
1367 | 1365 | ||
1368 | ex->fe_len = 1 << order; | 1366 | ex->fe_len = 1 << order; |
1369 | ex->fe_start = block << order; | 1367 | ex->fe_start = block << order; |
@@ -1549,7 +1547,7 @@ static void ext4_mb_check_limits(struct ext4_allocation_context *ac, | |||
1549 | /* recheck chunk's availability - we don't know | 1547 | /* recheck chunk's availability - we don't know |
1550 | * when it was found (within this lock-unlock | 1548 | * when it was found (within this lock-unlock |
1551 | * period or not) */ | 1549 | * period or not) */ |
1552 | max = mb_find_extent(e4b, 0, bex->fe_start, gex->fe_len, &ex); | 1550 | max = mb_find_extent(e4b, bex->fe_start, gex->fe_len, &ex); |
1553 | if (max >= gex->fe_len) { | 1551 | if (max >= gex->fe_len) { |
1554 | ext4_mb_use_best_found(ac, e4b); | 1552 | ext4_mb_use_best_found(ac, e4b); |
1555 | return; | 1553 | return; |
@@ -1641,7 +1639,7 @@ int ext4_mb_try_best_found(struct ext4_allocation_context *ac, | |||
1641 | return err; | 1639 | return err; |
1642 | 1640 | ||
1643 | ext4_lock_group(ac->ac_sb, group); | 1641 | ext4_lock_group(ac->ac_sb, group); |
1644 | max = mb_find_extent(e4b, 0, ex.fe_start, ex.fe_len, &ex); | 1642 | max = mb_find_extent(e4b, ex.fe_start, ex.fe_len, &ex); |
1645 | 1643 | ||
1646 | if (max > 0) { | 1644 | if (max > 0) { |
1647 | ac->ac_b_ex = ex; | 1645 | ac->ac_b_ex = ex; |
@@ -1662,17 +1660,20 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, | |||
1662 | int max; | 1660 | int max; |
1663 | int err; | 1661 | int err; |
1664 | struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); | 1662 | struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); |
1663 | struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group); | ||
1665 | struct ext4_free_extent ex; | 1664 | struct ext4_free_extent ex; |
1666 | 1665 | ||
1667 | if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL)) | 1666 | if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL)) |
1668 | return 0; | 1667 | return 0; |
1668 | if (grp->bb_free == 0) | ||
1669 | return 0; | ||
1669 | 1670 | ||
1670 | err = ext4_mb_load_buddy(ac->ac_sb, group, e4b); | 1671 | err = ext4_mb_load_buddy(ac->ac_sb, group, e4b); |
1671 | if (err) | 1672 | if (err) |
1672 | return err; | 1673 | return err; |
1673 | 1674 | ||
1674 | ext4_lock_group(ac->ac_sb, group); | 1675 | ext4_lock_group(ac->ac_sb, group); |
1675 | max = mb_find_extent(e4b, 0, ac->ac_g_ex.fe_start, | 1676 | max = mb_find_extent(e4b, ac->ac_g_ex.fe_start, |
1676 | ac->ac_g_ex.fe_len, &ex); | 1677 | ac->ac_g_ex.fe_len, &ex); |
1677 | 1678 | ||
1678 | if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) { | 1679 | if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) { |
@@ -1788,7 +1789,7 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, | |||
1788 | break; | 1789 | break; |
1789 | } | 1790 | } |
1790 | 1791 | ||
1791 | mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex); | 1792 | mb_find_extent(e4b, i, ac->ac_g_ex.fe_len, &ex); |
1792 | BUG_ON(ex.fe_len <= 0); | 1793 | BUG_ON(ex.fe_len <= 0); |
1793 | if (free < ex.fe_len) { | 1794 | if (free < ex.fe_len) { |
1794 | ext4_grp_locked_error(sb, e4b->bd_group, 0, 0, | 1795 | ext4_grp_locked_error(sb, e4b->bd_group, 0, 0, |
@@ -1840,7 +1841,7 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac, | |||
1840 | 1841 | ||
1841 | while (i < EXT4_CLUSTERS_PER_GROUP(sb)) { | 1842 | while (i < EXT4_CLUSTERS_PER_GROUP(sb)) { |
1842 | if (!mb_test_bit(i, bitmap)) { | 1843 | if (!mb_test_bit(i, bitmap)) { |
1843 | max = mb_find_extent(e4b, 0, i, sbi->s_stripe, &ex); | 1844 | max = mb_find_extent(e4b, i, sbi->s_stripe, &ex); |
1844 | if (max >= sbi->s_stripe) { | 1845 | if (max >= sbi->s_stripe) { |
1845 | ac->ac_found++; | 1846 | ac->ac_found++; |
1846 | ac->ac_b_ex = ex; | 1847 | ac->ac_b_ex = ex; |
@@ -1862,6 +1863,12 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac, | |||
1862 | 1863 | ||
1863 | BUG_ON(cr < 0 || cr >= 4); | 1864 | BUG_ON(cr < 0 || cr >= 4); |
1864 | 1865 | ||
1866 | free = grp->bb_free; | ||
1867 | if (free == 0) | ||
1868 | return 0; | ||
1869 | if (cr <= 2 && free < ac->ac_g_ex.fe_len) | ||
1870 | return 0; | ||
1871 | |||
1865 | /* We only do this if the grp has never been initialized */ | 1872 | /* We only do this if the grp has never been initialized */ |
1866 | if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { | 1873 | if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { |
1867 | int ret = ext4_mb_init_group(ac->ac_sb, group); | 1874 | int ret = ext4_mb_init_group(ac->ac_sb, group); |
@@ -1869,10 +1876,7 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac, | |||
1869 | return 0; | 1876 | return 0; |
1870 | } | 1877 | } |
1871 | 1878 | ||
1872 | free = grp->bb_free; | ||
1873 | fragments = grp->bb_fragments; | 1879 | fragments = grp->bb_fragments; |
1874 | if (free == 0) | ||
1875 | return 0; | ||
1876 | if (fragments == 0) | 1880 | if (fragments == 0) |
1877 | return 0; | 1881 | return 0; |
1878 | 1882 | ||
@@ -2163,6 +2167,39 @@ static struct kmem_cache *get_groupinfo_cache(int blocksize_bits) | |||
2163 | return cachep; | 2167 | return cachep; |
2164 | } | 2168 | } |
2165 | 2169 | ||
2170 | /* | ||
2171 | * Allocate the top-level s_group_info array for the specified number | ||
2172 | * of groups | ||
2173 | */ | ||
2174 | int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups) | ||
2175 | { | ||
2176 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
2177 | unsigned size; | ||
2178 | struct ext4_group_info ***new_groupinfo; | ||
2179 | |||
2180 | size = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >> | ||
2181 | EXT4_DESC_PER_BLOCK_BITS(sb); | ||
2182 | if (size <= sbi->s_group_info_size) | ||
2183 | return 0; | ||
2184 | |||
2185 | size = roundup_pow_of_two(sizeof(*sbi->s_group_info) * size); | ||
2186 | new_groupinfo = ext4_kvzalloc(size, GFP_KERNEL); | ||
2187 | if (!new_groupinfo) { | ||
2188 | ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group"); | ||
2189 | return -ENOMEM; | ||
2190 | } | ||
2191 | if (sbi->s_group_info) { | ||
2192 | memcpy(new_groupinfo, sbi->s_group_info, | ||
2193 | sbi->s_group_info_size * sizeof(*sbi->s_group_info)); | ||
2194 | ext4_kvfree(sbi->s_group_info); | ||
2195 | } | ||
2196 | sbi->s_group_info = new_groupinfo; | ||
2197 | sbi->s_group_info_size = size / sizeof(*sbi->s_group_info); | ||
2198 | ext4_debug("allocated s_groupinfo array for %d meta_bg's\n", | ||
2199 | sbi->s_group_info_size); | ||
2200 | return 0; | ||
2201 | } | ||
2202 | |||
2166 | /* Create and initialize ext4_group_info data for the given group. */ | 2203 | /* Create and initialize ext4_group_info data for the given group. */ |
2167 | int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, | 2204 | int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, |
2168 | struct ext4_group_desc *desc) | 2205 | struct ext4_group_desc *desc) |
@@ -2195,12 +2232,11 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, | |||
2195 | sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]; | 2232 | sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]; |
2196 | i = group & (EXT4_DESC_PER_BLOCK(sb) - 1); | 2233 | i = group & (EXT4_DESC_PER_BLOCK(sb) - 1); |
2197 | 2234 | ||
2198 | meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL); | 2235 | meta_group_info[i] = kmem_cache_zalloc(cachep, GFP_KERNEL); |
2199 | if (meta_group_info[i] == NULL) { | 2236 | if (meta_group_info[i] == NULL) { |
2200 | ext4_msg(sb, KERN_ERR, "can't allocate buddy mem"); | 2237 | ext4_msg(sb, KERN_ERR, "can't allocate buddy mem"); |
2201 | goto exit_group_info; | 2238 | goto exit_group_info; |
2202 | } | 2239 | } |
2203 | memset(meta_group_info[i], 0, kmem_cache_size(cachep)); | ||
2204 | set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, | 2240 | set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, |
2205 | &(meta_group_info[i]->bb_state)); | 2241 | &(meta_group_info[i]->bb_state)); |
2206 | 2242 | ||
@@ -2252,49 +2288,14 @@ static int ext4_mb_init_backend(struct super_block *sb) | |||
2252 | ext4_group_t ngroups = ext4_get_groups_count(sb); | 2288 | ext4_group_t ngroups = ext4_get_groups_count(sb); |
2253 | ext4_group_t i; | 2289 | ext4_group_t i; |
2254 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2290 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2255 | struct ext4_super_block *es = sbi->s_es; | 2291 | int err; |
2256 | int num_meta_group_infos; | ||
2257 | int num_meta_group_infos_max; | ||
2258 | int array_size; | ||
2259 | struct ext4_group_desc *desc; | 2292 | struct ext4_group_desc *desc; |
2260 | struct kmem_cache *cachep; | 2293 | struct kmem_cache *cachep; |
2261 | 2294 | ||
2262 | /* This is the number of blocks used by GDT */ | 2295 | err = ext4_mb_alloc_groupinfo(sb, ngroups); |
2263 | num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) - | 2296 | if (err) |
2264 | 1) >> EXT4_DESC_PER_BLOCK_BITS(sb); | 2297 | return err; |
2265 | |||
2266 | /* | ||
2267 | * This is the total number of blocks used by GDT including | ||
2268 | * the number of reserved blocks for GDT. | ||
2269 | * The s_group_info array is allocated with this value | ||
2270 | * to allow a clean online resize without a complex | ||
2271 | * manipulation of pointer. | ||
2272 | * The drawback is the unused memory when no resize | ||
2273 | * occurs but it's very low in terms of pages | ||
2274 | * (see comments below) | ||
2275 | * Need to handle this properly when META_BG resizing is allowed | ||
2276 | */ | ||
2277 | num_meta_group_infos_max = num_meta_group_infos + | ||
2278 | le16_to_cpu(es->s_reserved_gdt_blocks); | ||
2279 | 2298 | ||
2280 | /* | ||
2281 | * array_size is the size of s_group_info array. We round it | ||
2282 | * to the next power of two because this approximation is done | ||
2283 | * internally by kmalloc so we can have some more memory | ||
2284 | * for free here (e.g. may be used for META_BG resize). | ||
2285 | */ | ||
2286 | array_size = 1; | ||
2287 | while (array_size < sizeof(*sbi->s_group_info) * | ||
2288 | num_meta_group_infos_max) | ||
2289 | array_size = array_size << 1; | ||
2290 | /* An 8TB filesystem with 64-bit pointers requires a 4096 byte | ||
2291 | * kmalloc. A 128kb malloc should suffice for a 256TB filesystem. | ||
2292 | * So a two level scheme suffices for now. */ | ||
2293 | sbi->s_group_info = ext4_kvzalloc(array_size, GFP_KERNEL); | ||
2294 | if (sbi->s_group_info == NULL) { | ||
2295 | ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group"); | ||
2296 | return -ENOMEM; | ||
2297 | } | ||
2298 | sbi->s_buddy_cache = new_inode(sb); | 2299 | sbi->s_buddy_cache = new_inode(sb); |
2299 | if (sbi->s_buddy_cache == NULL) { | 2300 | if (sbi->s_buddy_cache == NULL) { |
2300 | ext4_msg(sb, KERN_ERR, "can't get new inode"); | 2301 | ext4_msg(sb, KERN_ERR, "can't get new inode"); |
@@ -2322,7 +2323,7 @@ err_freebuddy: | |||
2322 | cachep = get_groupinfo_cache(sb->s_blocksize_bits); | 2323 | cachep = get_groupinfo_cache(sb->s_blocksize_bits); |
2323 | while (i-- > 0) | 2324 | while (i-- > 0) |
2324 | kmem_cache_free(cachep, ext4_get_group_info(sb, i)); | 2325 | kmem_cache_free(cachep, ext4_get_group_info(sb, i)); |
2325 | i = num_meta_group_infos; | 2326 | i = sbi->s_group_info_size; |
2326 | while (i-- > 0) | 2327 | while (i-- > 0) |
2327 | kfree(sbi->s_group_info[i]); | 2328 | kfree(sbi->s_group_info[i]); |
2328 | iput(sbi->s_buddy_cache); | 2329 | iput(sbi->s_buddy_cache); |
@@ -4008,7 +4009,6 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac, | |||
4008 | ext4_get_group_no_and_offset(sb, goal, &group, &block); | 4009 | ext4_get_group_no_and_offset(sb, goal, &group, &block); |
4009 | 4010 | ||
4010 | /* set up allocation goals */ | 4011 | /* set up allocation goals */ |
4011 | memset(ac, 0, sizeof(struct ext4_allocation_context)); | ||
4012 | ac->ac_b_ex.fe_logical = ar->logical & ~(sbi->s_cluster_ratio - 1); | 4012 | ac->ac_b_ex.fe_logical = ar->logical & ~(sbi->s_cluster_ratio - 1); |
4013 | ac->ac_status = AC_STATUS_CONTINUE; | 4013 | ac->ac_status = AC_STATUS_CONTINUE; |
4014 | ac->ac_sb = sb; | 4014 | ac->ac_sb = sb; |
@@ -4291,7 +4291,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | |||
4291 | } | 4291 | } |
4292 | } | 4292 | } |
4293 | 4293 | ||
4294 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); | 4294 | ac = kmem_cache_zalloc(ext4_ac_cachep, GFP_NOFS); |
4295 | if (!ac) { | 4295 | if (!ac) { |
4296 | ar->len = 0; | 4296 | ar->len = 0; |
4297 | *errp = -ENOMEM; | 4297 | *errp = -ENOMEM; |
@@ -4657,6 +4657,8 @@ do_more: | |||
4657 | * with group lock held. generate_buddy look at | 4657 | * with group lock held. generate_buddy look at |
4658 | * them with group lock_held | 4658 | * them with group lock_held |
4659 | */ | 4659 | */ |
4660 | if (test_opt(sb, DISCARD)) | ||
4661 | ext4_issue_discard(sb, block_group, bit, count); | ||
4660 | ext4_lock_group(sb, block_group); | 4662 | ext4_lock_group(sb, block_group); |
4661 | mb_clear_bits(bitmap_bh->b_data, bit, count_clusters); | 4663 | mb_clear_bits(bitmap_bh->b_data, bit, count_clusters); |
4662 | mb_free_blocks(inode, &e4b, bit, count_clusters); | 4664 | mb_free_blocks(inode, &e4b, bit, count_clusters); |
@@ -4709,7 +4711,7 @@ error_return: | |||
4709 | * ext4_group_add_blocks() -- Add given blocks to an existing group | 4711 | * ext4_group_add_blocks() -- Add given blocks to an existing group |
4710 | * @handle: handle to this transaction | 4712 | * @handle: handle to this transaction |
4711 | * @sb: super block | 4713 | * @sb: super block |
4712 | * @block: start physcial block to add to the block group | 4714 | * @block: start physical block to add to the block group |
4713 | * @count: number of blocks to free | 4715 | * @count: number of blocks to free |
4714 | * | 4716 | * |
4715 | * This marks the blocks as free in the bitmap and buddy. | 4717 | * This marks the blocks as free in the bitmap and buddy. |
@@ -4988,7 +4990,8 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) | |||
4988 | 4990 | ||
4989 | start = range->start >> sb->s_blocksize_bits; | 4991 | start = range->start >> sb->s_blocksize_bits; |
4990 | end = start + (range->len >> sb->s_blocksize_bits) - 1; | 4992 | end = start + (range->len >> sb->s_blocksize_bits) - 1; |
4991 | minlen = range->minlen >> sb->s_blocksize_bits; | 4993 | minlen = EXT4_NUM_B2C(EXT4_SB(sb), |
4994 | range->minlen >> sb->s_blocksize_bits); | ||
4992 | 4995 | ||
4993 | if (unlikely(minlen > EXT4_CLUSTERS_PER_GROUP(sb)) || | 4996 | if (unlikely(minlen > EXT4_CLUSTERS_PER_GROUP(sb)) || |
4994 | unlikely(start >= max_blks)) | 4997 | unlikely(start >= max_blks)) |
@@ -5048,6 +5051,6 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) | |||
5048 | atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen); | 5051 | atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen); |
5049 | 5052 | ||
5050 | out: | 5053 | out: |
5051 | range->len = trimmed * sb->s_blocksize; | 5054 | range->len = EXT4_C2B(EXT4_SB(sb), trimmed) << sb->s_blocksize_bits; |
5052 | return ret; | 5055 | return ret; |
5053 | } | 5056 | } |
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index c070618c21ce..3ccd889ba953 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h | |||
@@ -65,11 +65,6 @@ extern u8 mb_enable_debug; | |||
65 | #define MB_DEFAULT_MIN_TO_SCAN 10 | 65 | #define MB_DEFAULT_MIN_TO_SCAN 10 |
66 | 66 | ||
67 | /* | 67 | /* |
68 | * How many groups mballoc will scan looking for the best chunk | ||
69 | */ | ||
70 | #define MB_DEFAULT_MAX_GROUPS_TO_SCAN 5 | ||
71 | |||
72 | /* | ||
73 | * with 'ext4_mb_stats' allocator will collect stats that will be | 68 | * with 'ext4_mb_stats' allocator will collect stats that will be |
74 | * shown at umount. The collecting costs though! | 69 | * shown at umount. The collecting costs though! |
75 | */ | 70 | */ |
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index c5826c623e7a..292daeeed455 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c | |||
@@ -141,55 +141,21 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path, | |||
141 | } | 141 | } |
142 | 142 | ||
143 | /** | 143 | /** |
144 | * mext_check_null_inode - NULL check for two inodes | ||
145 | * | ||
146 | * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0. | ||
147 | */ | ||
148 | static int | ||
149 | mext_check_null_inode(struct inode *inode1, struct inode *inode2, | ||
150 | const char *function, unsigned int line) | ||
151 | { | ||
152 | int ret = 0; | ||
153 | |||
154 | if (inode1 == NULL) { | ||
155 | __ext4_error(inode2->i_sb, function, line, | ||
156 | "Both inodes should not be NULL: " | ||
157 | "inode1 NULL inode2 %lu", inode2->i_ino); | ||
158 | ret = -EIO; | ||
159 | } else if (inode2 == NULL) { | ||
160 | __ext4_error(inode1->i_sb, function, line, | ||
161 | "Both inodes should not be NULL: " | ||
162 | "inode1 %lu inode2 NULL", inode1->i_ino); | ||
163 | ret = -EIO; | ||
164 | } | ||
165 | return ret; | ||
166 | } | ||
167 | |||
168 | /** | ||
169 | * double_down_write_data_sem - Acquire two inodes' write lock of i_data_sem | 144 | * double_down_write_data_sem - Acquire two inodes' write lock of i_data_sem |
170 | * | 145 | * |
171 | * @orig_inode: original inode structure | 146 | * Acquire write lock of i_data_sem of the two inodes |
172 | * @donor_inode: donor inode structure | ||
173 | * Acquire write lock of i_data_sem of the two inodes (orig and donor) by | ||
174 | * i_ino order. | ||
175 | */ | 147 | */ |
176 | static void | 148 | static void |
177 | double_down_write_data_sem(struct inode *orig_inode, struct inode *donor_inode) | 149 | double_down_write_data_sem(struct inode *first, struct inode *second) |
178 | { | 150 | { |
179 | struct inode *first = orig_inode, *second = donor_inode; | 151 | if (first < second) { |
152 | down_write(&EXT4_I(first)->i_data_sem); | ||
153 | down_write_nested(&EXT4_I(second)->i_data_sem, SINGLE_DEPTH_NESTING); | ||
154 | } else { | ||
155 | down_write(&EXT4_I(second)->i_data_sem); | ||
156 | down_write_nested(&EXT4_I(first)->i_data_sem, SINGLE_DEPTH_NESTING); | ||
180 | 157 | ||
181 | /* | ||
182 | * Use the inode number to provide the stable locking order instead | ||
183 | * of its address, because the C language doesn't guarantee you can | ||
184 | * compare pointers that don't come from the same array. | ||
185 | */ | ||
186 | if (donor_inode->i_ino < orig_inode->i_ino) { | ||
187 | first = donor_inode; | ||
188 | second = orig_inode; | ||
189 | } | 158 | } |
190 | |||
191 | down_write(&EXT4_I(first)->i_data_sem); | ||
192 | down_write_nested(&EXT4_I(second)->i_data_sem, SINGLE_DEPTH_NESTING); | ||
193 | } | 159 | } |
194 | 160 | ||
195 | /** | 161 | /** |
@@ -604,9 +570,8 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext, | |||
604 | diff = donor_off - le32_to_cpu(tmp_dext->ee_block); | 570 | diff = donor_off - le32_to_cpu(tmp_dext->ee_block); |
605 | 571 | ||
606 | ext4_ext_store_pblock(tmp_dext, ext4_ext_pblock(tmp_dext) + diff); | 572 | ext4_ext_store_pblock(tmp_dext, ext4_ext_pblock(tmp_dext) + diff); |
607 | tmp_dext->ee_block = | 573 | le32_add_cpu(&tmp_dext->ee_block, diff); |
608 | cpu_to_le32(le32_to_cpu(tmp_dext->ee_block) + diff); | 574 | le16_add_cpu(&tmp_dext->ee_len, -diff); |
609 | tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_dext->ee_len) - diff); | ||
610 | 575 | ||
611 | if (max_count < ext4_ext_get_actual_len(tmp_dext)) | 576 | if (max_count < ext4_ext_get_actual_len(tmp_dext)) |
612 | tmp_dext->ee_len = cpu_to_le16(max_count); | 577 | tmp_dext->ee_len = cpu_to_le16(max_count); |
@@ -629,6 +594,43 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext, | |||
629 | } | 594 | } |
630 | 595 | ||
631 | /** | 596 | /** |
597 | * mext_check_coverage - Check that all extents in range has the same type | ||
598 | * | ||
599 | * @inode: inode in question | ||
600 | * @from: block offset of inode | ||
601 | * @count: block count to be checked | ||
602 | * @uninit: extents expected to be uninitialized | ||
603 | * @err: pointer to save error value | ||
604 | * | ||
605 | * Return 1 if all extents in range has expected type, and zero otherwise. | ||
606 | */ | ||
607 | static int | ||
608 | mext_check_coverage(struct inode *inode, ext4_lblk_t from, ext4_lblk_t count, | ||
609 | int uninit, int *err) | ||
610 | { | ||
611 | struct ext4_ext_path *path = NULL; | ||
612 | struct ext4_extent *ext; | ||
613 | ext4_lblk_t last = from + count; | ||
614 | while (from < last) { | ||
615 | *err = get_ext_path(inode, from, &path); | ||
616 | if (*err) | ||
617 | return 0; | ||
618 | ext = path[ext_depth(inode)].p_ext; | ||
619 | if (!ext) { | ||
620 | ext4_ext_drop_refs(path); | ||
621 | return 0; | ||
622 | } | ||
623 | if (uninit != ext4_ext_is_uninitialized(ext)) { | ||
624 | ext4_ext_drop_refs(path); | ||
625 | return 0; | ||
626 | } | ||
627 | from += ext4_ext_get_actual_len(ext); | ||
628 | ext4_ext_drop_refs(path); | ||
629 | } | ||
630 | return 1; | ||
631 | } | ||
632 | |||
633 | /** | ||
632 | * mext_replace_branches - Replace original extents with new extents | 634 | * mext_replace_branches - Replace original extents with new extents |
633 | * | 635 | * |
634 | * @handle: journal handle | 636 | * @handle: journal handle |
@@ -663,9 +665,6 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode, | |||
663 | int replaced_count = 0; | 665 | int replaced_count = 0; |
664 | int dext_alen; | 666 | int dext_alen; |
665 | 667 | ||
666 | /* Protect extent trees against block allocations via delalloc */ | ||
667 | double_down_write_data_sem(orig_inode, donor_inode); | ||
668 | |||
669 | /* Get the original extent for the block "orig_off" */ | 668 | /* Get the original extent for the block "orig_off" */ |
670 | *err = get_ext_path(orig_inode, orig_off, &orig_path); | 669 | *err = get_ext_path(orig_inode, orig_off, &orig_path); |
671 | if (*err) | 670 | if (*err) |
@@ -764,12 +763,122 @@ out: | |||
764 | ext4_ext_invalidate_cache(orig_inode); | 763 | ext4_ext_invalidate_cache(orig_inode); |
765 | ext4_ext_invalidate_cache(donor_inode); | 764 | ext4_ext_invalidate_cache(donor_inode); |
766 | 765 | ||
767 | double_up_write_data_sem(orig_inode, donor_inode); | ||
768 | |||
769 | return replaced_count; | 766 | return replaced_count; |
770 | } | 767 | } |
771 | 768 | ||
772 | /** | 769 | /** |
770 | * mext_page_double_lock - Grab and lock pages on both @inode1 and @inode2 | ||
771 | * | ||
772 | * @inode1: the inode structure | ||
773 | * @inode2: the inode structure | ||
774 | * @index: page index | ||
775 | * @page: result page vector | ||
776 | * | ||
777 | * Grab two locked pages for inode's by inode order | ||
778 | */ | ||
779 | static int | ||
780 | mext_page_double_lock(struct inode *inode1, struct inode *inode2, | ||
781 | pgoff_t index, struct page *page[2]) | ||
782 | { | ||
783 | struct address_space *mapping[2]; | ||
784 | unsigned fl = AOP_FLAG_NOFS; | ||
785 | |||
786 | BUG_ON(!inode1 || !inode2); | ||
787 | if (inode1 < inode2) { | ||
788 | mapping[0] = inode1->i_mapping; | ||
789 | mapping[1] = inode2->i_mapping; | ||
790 | } else { | ||
791 | mapping[0] = inode2->i_mapping; | ||
792 | mapping[1] = inode1->i_mapping; | ||
793 | } | ||
794 | |||
795 | page[0] = grab_cache_page_write_begin(mapping[0], index, fl); | ||
796 | if (!page[0]) | ||
797 | return -ENOMEM; | ||
798 | |||
799 | page[1] = grab_cache_page_write_begin(mapping[1], index, fl); | ||
800 | if (!page[1]) { | ||
801 | unlock_page(page[0]); | ||
802 | page_cache_release(page[0]); | ||
803 | return -ENOMEM; | ||
804 | } | ||
805 | |||
806 | if (inode1 > inode2) { | ||
807 | struct page *tmp; | ||
808 | tmp = page[0]; | ||
809 | page[0] = page[1]; | ||
810 | page[1] = tmp; | ||
811 | } | ||
812 | return 0; | ||
813 | } | ||
814 | |||
815 | /* Force page buffers uptodate w/o dropping page's lock */ | ||
816 | static int | ||
817 | mext_page_mkuptodate(struct page *page, unsigned from, unsigned to) | ||
818 | { | ||
819 | struct inode *inode = page->mapping->host; | ||
820 | sector_t block; | ||
821 | struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; | ||
822 | unsigned int blocksize, block_start, block_end; | ||
823 | int i, err, nr = 0, partial = 0; | ||
824 | BUG_ON(!PageLocked(page)); | ||
825 | BUG_ON(PageWriteback(page)); | ||
826 | |||
827 | if (PageUptodate(page)) | ||
828 | return 0; | ||
829 | |||
830 | blocksize = 1 << inode->i_blkbits; | ||
831 | if (!page_has_buffers(page)) | ||
832 | create_empty_buffers(page, blocksize, 0); | ||
833 | |||
834 | head = page_buffers(page); | ||
835 | block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
836 | for (bh = head, block_start = 0; bh != head || !block_start; | ||
837 | block++, block_start = block_end, bh = bh->b_this_page) { | ||
838 | block_end = block_start + blocksize; | ||
839 | if (block_end <= from || block_start >= to) { | ||
840 | if (!buffer_uptodate(bh)) | ||
841 | partial = 1; | ||
842 | continue; | ||
843 | } | ||
844 | if (buffer_uptodate(bh)) | ||
845 | continue; | ||
846 | if (!buffer_mapped(bh)) { | ||
847 | int err = 0; | ||
848 | err = ext4_get_block(inode, block, bh, 0); | ||
849 | if (err) { | ||
850 | SetPageError(page); | ||
851 | return err; | ||
852 | } | ||
853 | if (!buffer_mapped(bh)) { | ||
854 | zero_user(page, block_start, blocksize); | ||
855 | if (!err) | ||
856 | set_buffer_uptodate(bh); | ||
857 | continue; | ||
858 | } | ||
859 | } | ||
860 | BUG_ON(nr >= MAX_BUF_PER_PAGE); | ||
861 | arr[nr++] = bh; | ||
862 | } | ||
863 | /* No io required */ | ||
864 | if (!nr) | ||
865 | goto out; | ||
866 | |||
867 | for (i = 0; i < nr; i++) { | ||
868 | bh = arr[i]; | ||
869 | if (!bh_uptodate_or_lock(bh)) { | ||
870 | err = bh_submit_read(bh); | ||
871 | if (err) | ||
872 | return err; | ||
873 | } | ||
874 | } | ||
875 | out: | ||
876 | if (!partial) | ||
877 | SetPageUptodate(page); | ||
878 | return 0; | ||
879 | } | ||
880 | |||
881 | /** | ||
773 | * move_extent_per_page - Move extent data per page | 882 | * move_extent_per_page - Move extent data per page |
774 | * | 883 | * |
775 | * @o_filp: file structure of original file | 884 | * @o_filp: file structure of original file |
@@ -791,26 +900,24 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, | |||
791 | int block_len_in_page, int uninit, int *err) | 900 | int block_len_in_page, int uninit, int *err) |
792 | { | 901 | { |
793 | struct inode *orig_inode = o_filp->f_dentry->d_inode; | 902 | struct inode *orig_inode = o_filp->f_dentry->d_inode; |
794 | struct address_space *mapping = orig_inode->i_mapping; | 903 | struct page *pagep[2] = {NULL, NULL}; |
795 | struct buffer_head *bh; | ||
796 | struct page *page = NULL; | ||
797 | const struct address_space_operations *a_ops = mapping->a_ops; | ||
798 | handle_t *handle; | 904 | handle_t *handle; |
799 | ext4_lblk_t orig_blk_offset; | 905 | ext4_lblk_t orig_blk_offset; |
800 | long long offs = orig_page_offset << PAGE_CACHE_SHIFT; | 906 | long long offs = orig_page_offset << PAGE_CACHE_SHIFT; |
801 | unsigned long blocksize = orig_inode->i_sb->s_blocksize; | 907 | unsigned long blocksize = orig_inode->i_sb->s_blocksize; |
802 | unsigned int w_flags = 0; | 908 | unsigned int w_flags = 0; |
803 | unsigned int tmp_data_size, data_size, replaced_size; | 909 | unsigned int tmp_data_size, data_size, replaced_size; |
804 | void *fsdata; | 910 | int err2, jblocks, retries = 0; |
805 | int i, jblocks; | ||
806 | int err2 = 0; | ||
807 | int replaced_count = 0; | 911 | int replaced_count = 0; |
912 | int from = data_offset_in_page << orig_inode->i_blkbits; | ||
808 | int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; | 913 | int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; |
809 | 914 | ||
810 | /* | 915 | /* |
811 | * It needs twice the amount of ordinary journal buffers because | 916 | * It needs twice the amount of ordinary journal buffers because |
812 | * inode and donor_inode may change each different metadata blocks. | 917 | * inode and donor_inode may change each different metadata blocks. |
813 | */ | 918 | */ |
919 | again: | ||
920 | *err = 0; | ||
814 | jblocks = ext4_writepage_trans_blocks(orig_inode) * 2; | 921 | jblocks = ext4_writepage_trans_blocks(orig_inode) * 2; |
815 | handle = ext4_journal_start(orig_inode, jblocks); | 922 | handle = ext4_journal_start(orig_inode, jblocks); |
816 | if (IS_ERR(handle)) { | 923 | if (IS_ERR(handle)) { |
@@ -824,19 +931,6 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, | |||
824 | orig_blk_offset = orig_page_offset * blocks_per_page + | 931 | orig_blk_offset = orig_page_offset * blocks_per_page + |
825 | data_offset_in_page; | 932 | data_offset_in_page; |
826 | 933 | ||
827 | /* | ||
828 | * If orig extent is uninitialized one, | ||
829 | * it's not necessary force the page into memory | ||
830 | * and then force it to be written out again. | ||
831 | * Just swap data blocks between orig and donor. | ||
832 | */ | ||
833 | if (uninit) { | ||
834 | replaced_count = mext_replace_branches(handle, orig_inode, | ||
835 | donor_inode, orig_blk_offset, | ||
836 | block_len_in_page, err); | ||
837 | goto out2; | ||
838 | } | ||
839 | |||
840 | offs = (long long)orig_blk_offset << orig_inode->i_blkbits; | 934 | offs = (long long)orig_blk_offset << orig_inode->i_blkbits; |
841 | 935 | ||
842 | /* Calculate data_size */ | 936 | /* Calculate data_size */ |
@@ -858,75 +952,120 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, | |||
858 | 952 | ||
859 | replaced_size = data_size; | 953 | replaced_size = data_size; |
860 | 954 | ||
861 | *err = a_ops->write_begin(o_filp, mapping, offs, data_size, w_flags, | 955 | *err = mext_page_double_lock(orig_inode, donor_inode, orig_page_offset, |
862 | &page, &fsdata); | 956 | pagep); |
863 | if (unlikely(*err < 0)) | 957 | if (unlikely(*err < 0)) |
864 | goto out; | 958 | goto stop_journal; |
865 | |||
866 | if (!PageUptodate(page)) { | ||
867 | mapping->a_ops->readpage(o_filp, page); | ||
868 | lock_page(page); | ||
869 | } | ||
870 | |||
871 | /* | 959 | /* |
872 | * try_to_release_page() doesn't call releasepage in writeback mode. | 960 | * If orig extent was uninitialized it can become initialized |
873 | * We should care about the order of writing to the same file | 961 | * at any time after i_data_sem was dropped, in order to |
874 | * by multiple move extent processes. | 962 | * serialize with delalloc we have recheck extent while we |
875 | * It needs to call wait_on_page_writeback() to wait for the | 963 | * hold page's lock, if it is still the case data copy is not |
876 | * writeback of the page. | 964 | * necessary, just swap data blocks between orig and donor. |
877 | */ | 965 | */ |
878 | wait_on_page_writeback(page); | 966 | if (uninit) { |
967 | double_down_write_data_sem(orig_inode, donor_inode); | ||
968 | /* If any of extents in range became initialized we have to | ||
969 | * fallback to data copying */ | ||
970 | uninit = mext_check_coverage(orig_inode, orig_blk_offset, | ||
971 | block_len_in_page, 1, err); | ||
972 | if (*err) | ||
973 | goto drop_data_sem; | ||
879 | 974 | ||
880 | /* Release old bh and drop refs */ | 975 | uninit &= mext_check_coverage(donor_inode, orig_blk_offset, |
881 | try_to_release_page(page, 0); | 976 | block_len_in_page, 1, err); |
977 | if (*err) | ||
978 | goto drop_data_sem; | ||
979 | |||
980 | if (!uninit) { | ||
981 | double_up_write_data_sem(orig_inode, donor_inode); | ||
982 | goto data_copy; | ||
983 | } | ||
984 | if ((page_has_private(pagep[0]) && | ||
985 | !try_to_release_page(pagep[0], 0)) || | ||
986 | (page_has_private(pagep[1]) && | ||
987 | !try_to_release_page(pagep[1], 0))) { | ||
988 | *err = -EBUSY; | ||
989 | goto drop_data_sem; | ||
990 | } | ||
991 | replaced_count = mext_replace_branches(handle, orig_inode, | ||
992 | donor_inode, orig_blk_offset, | ||
993 | block_len_in_page, err); | ||
994 | drop_data_sem: | ||
995 | double_up_write_data_sem(orig_inode, donor_inode); | ||
996 | goto unlock_pages; | ||
997 | } | ||
998 | data_copy: | ||
999 | *err = mext_page_mkuptodate(pagep[0], from, from + replaced_size); | ||
1000 | if (*err) | ||
1001 | goto unlock_pages; | ||
1002 | |||
1003 | /* At this point all buffers in range are uptodate, old mapping layout | ||
1004 | * is no longer required, try to drop it now. */ | ||
1005 | if ((page_has_private(pagep[0]) && !try_to_release_page(pagep[0], 0)) || | ||
1006 | (page_has_private(pagep[1]) && !try_to_release_page(pagep[1], 0))) { | ||
1007 | *err = -EBUSY; | ||
1008 | goto unlock_pages; | ||
1009 | } | ||
882 | 1010 | ||
883 | replaced_count = mext_replace_branches(handle, orig_inode, donor_inode, | 1011 | replaced_count = mext_replace_branches(handle, orig_inode, donor_inode, |
884 | orig_blk_offset, block_len_in_page, | 1012 | orig_blk_offset, |
885 | &err2); | 1013 | block_len_in_page, err); |
886 | if (err2) { | 1014 | if (*err) { |
887 | if (replaced_count) { | 1015 | if (replaced_count) { |
888 | block_len_in_page = replaced_count; | 1016 | block_len_in_page = replaced_count; |
889 | replaced_size = | 1017 | replaced_size = |
890 | block_len_in_page << orig_inode->i_blkbits; | 1018 | block_len_in_page << orig_inode->i_blkbits; |
891 | } else | 1019 | } else |
892 | goto out; | 1020 | goto unlock_pages; |
893 | } | 1021 | } |
1022 | /* Perform all necessary steps similar write_begin()/write_end() | ||
1023 | * but keeping in mind that i_size will not change */ | ||
1024 | *err = __block_write_begin(pagep[0], from, from + replaced_size, | ||
1025 | ext4_get_block); | ||
1026 | if (!*err) | ||
1027 | *err = block_commit_write(pagep[0], from, from + replaced_size); | ||
894 | 1028 | ||
895 | if (!page_has_buffers(page)) | 1029 | if (unlikely(*err < 0)) |
896 | create_empty_buffers(page, 1 << orig_inode->i_blkbits, 0); | 1030 | goto repair_branches; |
897 | 1031 | ||
898 | bh = page_buffers(page); | 1032 | /* Even in case of data=writeback it is reasonable to pin |
899 | for (i = 0; i < data_offset_in_page; i++) | 1033 | * inode to transaction, to prevent unexpected data loss */ |
900 | bh = bh->b_this_page; | 1034 | *err = ext4_jbd2_file_inode(handle, orig_inode); |
901 | 1035 | ||
902 | for (i = 0; i < block_len_in_page; i++) { | 1036 | unlock_pages: |
903 | *err = ext4_get_block(orig_inode, | 1037 | unlock_page(pagep[0]); |
904 | (sector_t)(orig_blk_offset + i), bh, 0); | 1038 | page_cache_release(pagep[0]); |
905 | if (*err < 0) | 1039 | unlock_page(pagep[1]); |
906 | goto out; | 1040 | page_cache_release(pagep[1]); |
907 | 1041 | stop_journal: | |
908 | if (bh->b_this_page != NULL) | ||
909 | bh = bh->b_this_page; | ||
910 | } | ||
911 | |||
912 | *err = a_ops->write_end(o_filp, mapping, offs, data_size, replaced_size, | ||
913 | page, fsdata); | ||
914 | page = NULL; | ||
915 | |||
916 | out: | ||
917 | if (unlikely(page)) { | ||
918 | if (PageLocked(page)) | ||
919 | unlock_page(page); | ||
920 | page_cache_release(page); | ||
921 | ext4_journal_stop(handle); | ||
922 | } | ||
923 | out2: | ||
924 | ext4_journal_stop(handle); | 1042 | ext4_journal_stop(handle); |
925 | 1043 | /* Buffer was busy because probably is pinned to journal transaction, | |
926 | if (err2) | 1044 | * force transaction commit may help to free it. */ |
927 | *err = err2; | 1045 | if (*err == -EBUSY && ext4_should_retry_alloc(orig_inode->i_sb, |
928 | 1046 | &retries)) | |
1047 | goto again; | ||
929 | return replaced_count; | 1048 | return replaced_count; |
1049 | |||
1050 | repair_branches: | ||
1051 | /* | ||
1052 | * This should never ever happen! | ||
1053 | * Extents are swapped already, but we are not able to copy data. | ||
1054 | * Try to swap extents to it's original places | ||
1055 | */ | ||
1056 | double_down_write_data_sem(orig_inode, donor_inode); | ||
1057 | replaced_count = mext_replace_branches(handle, donor_inode, orig_inode, | ||
1058 | orig_blk_offset, | ||
1059 | block_len_in_page, &err2); | ||
1060 | double_up_write_data_sem(orig_inode, donor_inode); | ||
1061 | if (replaced_count != block_len_in_page) { | ||
1062 | EXT4_ERROR_INODE_BLOCK(orig_inode, (sector_t)(orig_blk_offset), | ||
1063 | "Unable to copy data block," | ||
1064 | " data will be lost."); | ||
1065 | *err = -EIO; | ||
1066 | } | ||
1067 | replaced_count = 0; | ||
1068 | goto unlock_pages; | ||
930 | } | 1069 | } |
931 | 1070 | ||
932 | /** | 1071 | /** |
@@ -969,14 +1108,6 @@ mext_check_arguments(struct inode *orig_inode, | |||
969 | return -EINVAL; | 1108 | return -EINVAL; |
970 | } | 1109 | } |
971 | 1110 | ||
972 | /* Files should be in the same ext4 FS */ | ||
973 | if (orig_inode->i_sb != donor_inode->i_sb) { | ||
974 | ext4_debug("ext4 move extent: The argument files " | ||
975 | "should be in same FS [ino:orig %lu, donor %lu]\n", | ||
976 | orig_inode->i_ino, donor_inode->i_ino); | ||
977 | return -EINVAL; | ||
978 | } | ||
979 | |||
980 | /* Ext4 move extent supports only extent based file */ | 1111 | /* Ext4 move extent supports only extent based file */ |
981 | if (!(ext4_test_inode_flag(orig_inode, EXT4_INODE_EXTENTS))) { | 1112 | if (!(ext4_test_inode_flag(orig_inode, EXT4_INODE_EXTENTS))) { |
982 | ext4_debug("ext4 move extent: orig file is not extents " | 1113 | ext4_debug("ext4 move extent: orig file is not extents " |
@@ -1002,7 +1133,6 @@ mext_check_arguments(struct inode *orig_inode, | |||
1002 | } | 1133 | } |
1003 | 1134 | ||
1004 | if ((orig_start >= EXT_MAX_BLOCKS) || | 1135 | if ((orig_start >= EXT_MAX_BLOCKS) || |
1005 | (donor_start >= EXT_MAX_BLOCKS) || | ||
1006 | (*len > EXT_MAX_BLOCKS) || | 1136 | (*len > EXT_MAX_BLOCKS) || |
1007 | (orig_start + *len >= EXT_MAX_BLOCKS)) { | 1137 | (orig_start + *len >= EXT_MAX_BLOCKS)) { |
1008 | ext4_debug("ext4 move extent: Can't handle over [%u] blocks " | 1138 | ext4_debug("ext4 move extent: Can't handle over [%u] blocks " |
@@ -1072,35 +1202,19 @@ mext_check_arguments(struct inode *orig_inode, | |||
1072 | * @inode1: the inode structure | 1202 | * @inode1: the inode structure |
1073 | * @inode2: the inode structure | 1203 | * @inode2: the inode structure |
1074 | * | 1204 | * |
1075 | * Lock two inodes' i_mutex by i_ino order. | 1205 | * Lock two inodes' i_mutex |
1076 | * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0. | ||
1077 | */ | 1206 | */ |
1078 | static int | 1207 | static void |
1079 | mext_inode_double_lock(struct inode *inode1, struct inode *inode2) | 1208 | mext_inode_double_lock(struct inode *inode1, struct inode *inode2) |
1080 | { | 1209 | { |
1081 | int ret = 0; | 1210 | BUG_ON(inode1 == inode2); |
1082 | 1211 | if (inode1 < inode2) { | |
1083 | BUG_ON(inode1 == NULL && inode2 == NULL); | ||
1084 | |||
1085 | ret = mext_check_null_inode(inode1, inode2, __func__, __LINE__); | ||
1086 | if (ret < 0) | ||
1087 | goto out; | ||
1088 | |||
1089 | if (inode1 == inode2) { | ||
1090 | mutex_lock(&inode1->i_mutex); | ||
1091 | goto out; | ||
1092 | } | ||
1093 | |||
1094 | if (inode1->i_ino < inode2->i_ino) { | ||
1095 | mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT); | 1212 | mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT); |
1096 | mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD); | 1213 | mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD); |
1097 | } else { | 1214 | } else { |
1098 | mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT); | 1215 | mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT); |
1099 | mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD); | 1216 | mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD); |
1100 | } | 1217 | } |
1101 | |||
1102 | out: | ||
1103 | return ret; | ||
1104 | } | 1218 | } |
1105 | 1219 | ||
1106 | /** | 1220 | /** |
@@ -1109,28 +1223,13 @@ out: | |||
1109 | * @inode1: the inode that is released first | 1223 | * @inode1: the inode that is released first |
1110 | * @inode2: the inode that is released second | 1224 | * @inode2: the inode that is released second |
1111 | * | 1225 | * |
1112 | * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0. | ||
1113 | */ | 1226 | */ |
1114 | 1227 | ||
1115 | static int | 1228 | static void |
1116 | mext_inode_double_unlock(struct inode *inode1, struct inode *inode2) | 1229 | mext_inode_double_unlock(struct inode *inode1, struct inode *inode2) |
1117 | { | 1230 | { |
1118 | int ret = 0; | 1231 | mutex_unlock(&inode1->i_mutex); |
1119 | 1232 | mutex_unlock(&inode2->i_mutex); | |
1120 | BUG_ON(inode1 == NULL && inode2 == NULL); | ||
1121 | |||
1122 | ret = mext_check_null_inode(inode1, inode2, __func__, __LINE__); | ||
1123 | if (ret < 0) | ||
1124 | goto out; | ||
1125 | |||
1126 | if (inode1) | ||
1127 | mutex_unlock(&inode1->i_mutex); | ||
1128 | |||
1129 | if (inode2 && inode2 != inode1) | ||
1130 | mutex_unlock(&inode2->i_mutex); | ||
1131 | |||
1132 | out: | ||
1133 | return ret; | ||
1134 | } | 1233 | } |
1135 | 1234 | ||
1136 | /** | 1235 | /** |
@@ -1187,16 +1286,23 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
1187 | ext4_lblk_t block_end, seq_start, add_blocks, file_end, seq_blocks = 0; | 1286 | ext4_lblk_t block_end, seq_start, add_blocks, file_end, seq_blocks = 0; |
1188 | ext4_lblk_t rest_blocks; | 1287 | ext4_lblk_t rest_blocks; |
1189 | pgoff_t orig_page_offset = 0, seq_end_page; | 1288 | pgoff_t orig_page_offset = 0, seq_end_page; |
1190 | int ret1, ret2, depth, last_extent = 0; | 1289 | int ret, depth, last_extent = 0; |
1191 | int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; | 1290 | int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; |
1192 | int data_offset_in_page; | 1291 | int data_offset_in_page; |
1193 | int block_len_in_page; | 1292 | int block_len_in_page; |
1194 | int uninit; | 1293 | int uninit; |
1195 | 1294 | ||
1196 | /* orig and donor should be different file */ | 1295 | if (orig_inode->i_sb != donor_inode->i_sb) { |
1197 | if (orig_inode->i_ino == donor_inode->i_ino) { | 1296 | ext4_debug("ext4 move extent: The argument files " |
1297 | "should be in same FS [ino:orig %lu, donor %lu]\n", | ||
1298 | orig_inode->i_ino, donor_inode->i_ino); | ||
1299 | return -EINVAL; | ||
1300 | } | ||
1301 | |||
1302 | /* orig and donor should be different inodes */ | ||
1303 | if (orig_inode == donor_inode) { | ||
1198 | ext4_debug("ext4 move extent: The argument files should not " | 1304 | ext4_debug("ext4 move extent: The argument files should not " |
1199 | "be same file [ino:orig %lu, donor %lu]\n", | 1305 | "be same inode [ino:orig %lu, donor %lu]\n", |
1200 | orig_inode->i_ino, donor_inode->i_ino); | 1306 | orig_inode->i_ino, donor_inode->i_ino); |
1201 | return -EINVAL; | 1307 | return -EINVAL; |
1202 | } | 1308 | } |
@@ -1208,18 +1314,27 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
1208 | orig_inode->i_ino, donor_inode->i_ino); | 1314 | orig_inode->i_ino, donor_inode->i_ino); |
1209 | return -EINVAL; | 1315 | return -EINVAL; |
1210 | } | 1316 | } |
1211 | 1317 | /* TODO: This is non obvious task to swap blocks for inodes with full | |
1318 | jornaling enabled */ | ||
1319 | if (ext4_should_journal_data(orig_inode) || | ||
1320 | ext4_should_journal_data(donor_inode)) { | ||
1321 | return -EINVAL; | ||
1322 | } | ||
1212 | /* Protect orig and donor inodes against a truncate */ | 1323 | /* Protect orig and donor inodes against a truncate */ |
1213 | ret1 = mext_inode_double_lock(orig_inode, donor_inode); | 1324 | mext_inode_double_lock(orig_inode, donor_inode); |
1214 | if (ret1 < 0) | 1325 | |
1215 | return ret1; | 1326 | /* Wait for all existing dio workers */ |
1327 | ext4_inode_block_unlocked_dio(orig_inode); | ||
1328 | ext4_inode_block_unlocked_dio(donor_inode); | ||
1329 | inode_dio_wait(orig_inode); | ||
1330 | inode_dio_wait(donor_inode); | ||
1216 | 1331 | ||
1217 | /* Protect extent tree against block allocations via delalloc */ | 1332 | /* Protect extent tree against block allocations via delalloc */ |
1218 | double_down_write_data_sem(orig_inode, donor_inode); | 1333 | double_down_write_data_sem(orig_inode, donor_inode); |
1219 | /* Check the filesystem environment whether move_extent can be done */ | 1334 | /* Check the filesystem environment whether move_extent can be done */ |
1220 | ret1 = mext_check_arguments(orig_inode, donor_inode, orig_start, | 1335 | ret = mext_check_arguments(orig_inode, donor_inode, orig_start, |
1221 | donor_start, &len); | 1336 | donor_start, &len); |
1222 | if (ret1) | 1337 | if (ret) |
1223 | goto out; | 1338 | goto out; |
1224 | 1339 | ||
1225 | file_end = (i_size_read(orig_inode) - 1) >> orig_inode->i_blkbits; | 1340 | file_end = (i_size_read(orig_inode) - 1) >> orig_inode->i_blkbits; |
@@ -1227,13 +1342,13 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
1227 | if (file_end < block_end) | 1342 | if (file_end < block_end) |
1228 | len -= block_end - file_end; | 1343 | len -= block_end - file_end; |
1229 | 1344 | ||
1230 | ret1 = get_ext_path(orig_inode, block_start, &orig_path); | 1345 | ret = get_ext_path(orig_inode, block_start, &orig_path); |
1231 | if (ret1) | 1346 | if (ret) |
1232 | goto out; | 1347 | goto out; |
1233 | 1348 | ||
1234 | /* Get path structure to check the hole */ | 1349 | /* Get path structure to check the hole */ |
1235 | ret1 = get_ext_path(orig_inode, block_start, &holecheck_path); | 1350 | ret = get_ext_path(orig_inode, block_start, &holecheck_path); |
1236 | if (ret1) | 1351 | if (ret) |
1237 | goto out; | 1352 | goto out; |
1238 | 1353 | ||
1239 | depth = ext_depth(orig_inode); | 1354 | depth = ext_depth(orig_inode); |
@@ -1252,13 +1367,13 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
1252 | last_extent = mext_next_extent(orig_inode, | 1367 | last_extent = mext_next_extent(orig_inode, |
1253 | holecheck_path, &ext_cur); | 1368 | holecheck_path, &ext_cur); |
1254 | if (last_extent < 0) { | 1369 | if (last_extent < 0) { |
1255 | ret1 = last_extent; | 1370 | ret = last_extent; |
1256 | goto out; | 1371 | goto out; |
1257 | } | 1372 | } |
1258 | last_extent = mext_next_extent(orig_inode, orig_path, | 1373 | last_extent = mext_next_extent(orig_inode, orig_path, |
1259 | &ext_dummy); | 1374 | &ext_dummy); |
1260 | if (last_extent < 0) { | 1375 | if (last_extent < 0) { |
1261 | ret1 = last_extent; | 1376 | ret = last_extent; |
1262 | goto out; | 1377 | goto out; |
1263 | } | 1378 | } |
1264 | seq_start = le32_to_cpu(ext_cur->ee_block); | 1379 | seq_start = le32_to_cpu(ext_cur->ee_block); |
@@ -1272,7 +1387,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
1272 | if (le32_to_cpu(ext_cur->ee_block) > block_end) { | 1387 | if (le32_to_cpu(ext_cur->ee_block) > block_end) { |
1273 | ext4_debug("ext4 move extent: The specified range of file " | 1388 | ext4_debug("ext4 move extent: The specified range of file " |
1274 | "may be the hole\n"); | 1389 | "may be the hole\n"); |
1275 | ret1 = -EINVAL; | 1390 | ret = -EINVAL; |
1276 | goto out; | 1391 | goto out; |
1277 | } | 1392 | } |
1278 | 1393 | ||
@@ -1292,7 +1407,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
1292 | last_extent = mext_next_extent(orig_inode, holecheck_path, | 1407 | last_extent = mext_next_extent(orig_inode, holecheck_path, |
1293 | &ext_cur); | 1408 | &ext_cur); |
1294 | if (last_extent < 0) { | 1409 | if (last_extent < 0) { |
1295 | ret1 = last_extent; | 1410 | ret = last_extent; |
1296 | break; | 1411 | break; |
1297 | } | 1412 | } |
1298 | add_blocks = ext4_ext_get_actual_len(ext_cur); | 1413 | add_blocks = ext4_ext_get_actual_len(ext_cur); |
@@ -1349,18 +1464,18 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
1349 | orig_page_offset, | 1464 | orig_page_offset, |
1350 | data_offset_in_page, | 1465 | data_offset_in_page, |
1351 | block_len_in_page, uninit, | 1466 | block_len_in_page, uninit, |
1352 | &ret1); | 1467 | &ret); |
1353 | 1468 | ||
1354 | /* Count how many blocks we have exchanged */ | 1469 | /* Count how many blocks we have exchanged */ |
1355 | *moved_len += block_len_in_page; | 1470 | *moved_len += block_len_in_page; |
1356 | if (ret1 < 0) | 1471 | if (ret < 0) |
1357 | break; | 1472 | break; |
1358 | if (*moved_len > len) { | 1473 | if (*moved_len > len) { |
1359 | EXT4_ERROR_INODE(orig_inode, | 1474 | EXT4_ERROR_INODE(orig_inode, |
1360 | "We replaced blocks too much! " | 1475 | "We replaced blocks too much! " |
1361 | "sum of replaced: %llu requested: %llu", | 1476 | "sum of replaced: %llu requested: %llu", |
1362 | *moved_len, len); | 1477 | *moved_len, len); |
1363 | ret1 = -EIO; | 1478 | ret = -EIO; |
1364 | break; | 1479 | break; |
1365 | } | 1480 | } |
1366 | 1481 | ||
@@ -1374,22 +1489,22 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
1374 | } | 1489 | } |
1375 | 1490 | ||
1376 | double_down_write_data_sem(orig_inode, donor_inode); | 1491 | double_down_write_data_sem(orig_inode, donor_inode); |
1377 | if (ret1 < 0) | 1492 | if (ret < 0) |
1378 | break; | 1493 | break; |
1379 | 1494 | ||
1380 | /* Decrease buffer counter */ | 1495 | /* Decrease buffer counter */ |
1381 | if (holecheck_path) | 1496 | if (holecheck_path) |
1382 | ext4_ext_drop_refs(holecheck_path); | 1497 | ext4_ext_drop_refs(holecheck_path); |
1383 | ret1 = get_ext_path(orig_inode, seq_start, &holecheck_path); | 1498 | ret = get_ext_path(orig_inode, seq_start, &holecheck_path); |
1384 | if (ret1) | 1499 | if (ret) |
1385 | break; | 1500 | break; |
1386 | depth = holecheck_path->p_depth; | 1501 | depth = holecheck_path->p_depth; |
1387 | 1502 | ||
1388 | /* Decrease buffer counter */ | 1503 | /* Decrease buffer counter */ |
1389 | if (orig_path) | 1504 | if (orig_path) |
1390 | ext4_ext_drop_refs(orig_path); | 1505 | ext4_ext_drop_refs(orig_path); |
1391 | ret1 = get_ext_path(orig_inode, seq_start, &orig_path); | 1506 | ret = get_ext_path(orig_inode, seq_start, &orig_path); |
1392 | if (ret1) | 1507 | if (ret) |
1393 | break; | 1508 | break; |
1394 | 1509 | ||
1395 | ext_cur = holecheck_path[depth].p_ext; | 1510 | ext_cur = holecheck_path[depth].p_ext; |
@@ -1412,12 +1527,9 @@ out: | |||
1412 | kfree(holecheck_path); | 1527 | kfree(holecheck_path); |
1413 | } | 1528 | } |
1414 | double_up_write_data_sem(orig_inode, donor_inode); | 1529 | double_up_write_data_sem(orig_inode, donor_inode); |
1415 | ret2 = mext_inode_double_unlock(orig_inode, donor_inode); | 1530 | ext4_inode_resume_unlocked_dio(orig_inode); |
1416 | 1531 | ext4_inode_resume_unlocked_dio(donor_inode); | |
1417 | if (ret1) | 1532 | mext_inode_double_unlock(orig_inode, donor_inode); |
1418 | return ret1; | ||
1419 | else if (ret2) | ||
1420 | return ret2; | ||
1421 | 1533 | ||
1422 | return 0; | 1534 | return ret; |
1423 | } | 1535 | } |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 2a42cc04466f..6d600a69fc9d 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -55,6 +55,13 @@ static struct buffer_head *ext4_append(handle_t *handle, | |||
55 | { | 55 | { |
56 | struct buffer_head *bh; | 56 | struct buffer_head *bh; |
57 | 57 | ||
58 | if (unlikely(EXT4_SB(inode->i_sb)->s_max_dir_size_kb && | ||
59 | ((inode->i_size >> 10) >= | ||
60 | EXT4_SB(inode->i_sb)->s_max_dir_size_kb))) { | ||
61 | *err = -ENOSPC; | ||
62 | return NULL; | ||
63 | } | ||
64 | |||
58 | *block = inode->i_size >> inode->i_sb->s_blocksize_bits; | 65 | *block = inode->i_size >> inode->i_sb->s_blocksize_bits; |
59 | 66 | ||
60 | bh = ext4_bread(handle, inode, *block, 1, err); | 67 | bh = ext4_bread(handle, inode, *block, 1, err); |
@@ -67,6 +74,12 @@ static struct buffer_head *ext4_append(handle_t *handle, | |||
67 | bh = NULL; | 74 | bh = NULL; |
68 | } | 75 | } |
69 | } | 76 | } |
77 | if (!bh && !(*err)) { | ||
78 | *err = -EIO; | ||
79 | ext4_error(inode->i_sb, | ||
80 | "Directory hole detected on inode %lu\n", | ||
81 | inode->i_ino); | ||
82 | } | ||
70 | return bh; | 83 | return bh; |
71 | } | 84 | } |
72 | 85 | ||
@@ -594,8 +607,11 @@ dx_probe(const struct qstr *d_name, struct inode *dir, | |||
594 | u32 hash; | 607 | u32 hash; |
595 | 608 | ||
596 | frame->bh = NULL; | 609 | frame->bh = NULL; |
597 | if (!(bh = ext4_bread (NULL,dir, 0, 0, err))) | 610 | if (!(bh = ext4_bread(NULL, dir, 0, 0, err))) { |
611 | if (*err == 0) | ||
612 | *err = ERR_BAD_DX_DIR; | ||
598 | goto fail; | 613 | goto fail; |
614 | } | ||
599 | root = (struct dx_root *) bh->b_data; | 615 | root = (struct dx_root *) bh->b_data; |
600 | if (root->info.hash_version != DX_HASH_TEA && | 616 | if (root->info.hash_version != DX_HASH_TEA && |
601 | root->info.hash_version != DX_HASH_HALF_MD4 && | 617 | root->info.hash_version != DX_HASH_HALF_MD4 && |
@@ -696,8 +712,11 @@ dx_probe(const struct qstr *d_name, struct inode *dir, | |||
696 | frame->entries = entries; | 712 | frame->entries = entries; |
697 | frame->at = at; | 713 | frame->at = at; |
698 | if (!indirect--) return frame; | 714 | if (!indirect--) return frame; |
699 | if (!(bh = ext4_bread (NULL,dir, dx_get_block(at), 0, err))) | 715 | if (!(bh = ext4_bread(NULL, dir, dx_get_block(at), 0, err))) { |
716 | if (!(*err)) | ||
717 | *err = ERR_BAD_DX_DIR; | ||
700 | goto fail2; | 718 | goto fail2; |
719 | } | ||
701 | at = entries = ((struct dx_node *) bh->b_data)->entries; | 720 | at = entries = ((struct dx_node *) bh->b_data)->entries; |
702 | 721 | ||
703 | if (!buffer_verified(bh) && | 722 | if (!buffer_verified(bh) && |
@@ -807,8 +826,15 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash, | |||
807 | */ | 826 | */ |
808 | while (num_frames--) { | 827 | while (num_frames--) { |
809 | if (!(bh = ext4_bread(NULL, dir, dx_get_block(p->at), | 828 | if (!(bh = ext4_bread(NULL, dir, dx_get_block(p->at), |
810 | 0, &err))) | 829 | 0, &err))) { |
830 | if (!err) { | ||
831 | ext4_error(dir->i_sb, | ||
832 | "Directory hole detected on inode %lu\n", | ||
833 | dir->i_ino); | ||
834 | return -EIO; | ||
835 | } | ||
811 | return err; /* Failure */ | 836 | return err; /* Failure */ |
837 | } | ||
812 | 838 | ||
813 | if (!buffer_verified(bh) && | 839 | if (!buffer_verified(bh) && |
814 | !ext4_dx_csum_verify(dir, | 840 | !ext4_dx_csum_verify(dir, |
@@ -839,12 +865,19 @@ static int htree_dirblock_to_tree(struct file *dir_file, | |||
839 | { | 865 | { |
840 | struct buffer_head *bh; | 866 | struct buffer_head *bh; |
841 | struct ext4_dir_entry_2 *de, *top; | 867 | struct ext4_dir_entry_2 *de, *top; |
842 | int err, count = 0; | 868 | int err = 0, count = 0; |
843 | 869 | ||
844 | dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n", | 870 | dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n", |
845 | (unsigned long)block)); | 871 | (unsigned long)block)); |
846 | if (!(bh = ext4_bread (NULL, dir, block, 0, &err))) | 872 | if (!(bh = ext4_bread(NULL, dir, block, 0, &err))) { |
873 | if (!err) { | ||
874 | err = -EIO; | ||
875 | ext4_error(dir->i_sb, | ||
876 | "Directory hole detected on inode %lu\n", | ||
877 | dir->i_ino); | ||
878 | } | ||
847 | return err; | 879 | return err; |
880 | } | ||
848 | 881 | ||
849 | if (!buffer_verified(bh) && | 882 | if (!buffer_verified(bh) && |
850 | !ext4_dirent_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data)) | 883 | !ext4_dirent_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data)) |
@@ -1267,8 +1300,15 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q | |||
1267 | return NULL; | 1300 | return NULL; |
1268 | do { | 1301 | do { |
1269 | block = dx_get_block(frame->at); | 1302 | block = dx_get_block(frame->at); |
1270 | if (!(bh = ext4_bread(NULL, dir, block, 0, err))) | 1303 | if (!(bh = ext4_bread(NULL, dir, block, 0, err))) { |
1304 | if (!(*err)) { | ||
1305 | *err = -EIO; | ||
1306 | ext4_error(dir->i_sb, | ||
1307 | "Directory hole detected on inode %lu\n", | ||
1308 | dir->i_ino); | ||
1309 | } | ||
1271 | goto errout; | 1310 | goto errout; |
1311 | } | ||
1272 | 1312 | ||
1273 | if (!buffer_verified(bh) && | 1313 | if (!buffer_verified(bh) && |
1274 | !ext4_dirent_csum_verify(dir, | 1314 | !ext4_dirent_csum_verify(dir, |
@@ -1801,9 +1841,15 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, | |||
1801 | } | 1841 | } |
1802 | blocks = dir->i_size >> sb->s_blocksize_bits; | 1842 | blocks = dir->i_size >> sb->s_blocksize_bits; |
1803 | for (block = 0; block < blocks; block++) { | 1843 | for (block = 0; block < blocks; block++) { |
1804 | bh = ext4_bread(handle, dir, block, 0, &retval); | 1844 | if (!(bh = ext4_bread(handle, dir, block, 0, &retval))) { |
1805 | if(!bh) | 1845 | if (!retval) { |
1846 | retval = -EIO; | ||
1847 | ext4_error(inode->i_sb, | ||
1848 | "Directory hole detected on inode %lu\n", | ||
1849 | inode->i_ino); | ||
1850 | } | ||
1806 | return retval; | 1851 | return retval; |
1852 | } | ||
1807 | if (!buffer_verified(bh) && | 1853 | if (!buffer_verified(bh) && |
1808 | !ext4_dirent_csum_verify(dir, | 1854 | !ext4_dirent_csum_verify(dir, |
1809 | (struct ext4_dir_entry *)bh->b_data)) | 1855 | (struct ext4_dir_entry *)bh->b_data)) |
@@ -1860,8 +1906,15 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
1860 | entries = frame->entries; | 1906 | entries = frame->entries; |
1861 | at = frame->at; | 1907 | at = frame->at; |
1862 | 1908 | ||
1863 | if (!(bh = ext4_bread(handle,dir, dx_get_block(frame->at), 0, &err))) | 1909 | if (!(bh = ext4_bread(handle, dir, dx_get_block(frame->at), 0, &err))) { |
1910 | if (!err) { | ||
1911 | err = -EIO; | ||
1912 | ext4_error(dir->i_sb, | ||
1913 | "Directory hole detected on inode %lu\n", | ||
1914 | dir->i_ino); | ||
1915 | } | ||
1864 | goto cleanup; | 1916 | goto cleanup; |
1917 | } | ||
1865 | 1918 | ||
1866 | if (!buffer_verified(bh) && | 1919 | if (!buffer_verified(bh) && |
1867 | !ext4_dirent_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data)) | 1920 | !ext4_dirent_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data)) |
@@ -2149,9 +2202,7 @@ retry: | |||
2149 | err = PTR_ERR(inode); | 2202 | err = PTR_ERR(inode); |
2150 | if (!IS_ERR(inode)) { | 2203 | if (!IS_ERR(inode)) { |
2151 | init_special_inode(inode, inode->i_mode, rdev); | 2204 | init_special_inode(inode, inode->i_mode, rdev); |
2152 | #ifdef CONFIG_EXT4_FS_XATTR | ||
2153 | inode->i_op = &ext4_special_inode_operations; | 2205 | inode->i_op = &ext4_special_inode_operations; |
2154 | #endif | ||
2155 | err = ext4_add_nondir(handle, dentry, inode); | 2206 | err = ext4_add_nondir(handle, dentry, inode); |
2156 | } | 2207 | } |
2157 | ext4_journal_stop(handle); | 2208 | ext4_journal_stop(handle); |
@@ -2199,9 +2250,15 @@ retry: | |||
2199 | inode->i_op = &ext4_dir_inode_operations; | 2250 | inode->i_op = &ext4_dir_inode_operations; |
2200 | inode->i_fop = &ext4_dir_operations; | 2251 | inode->i_fop = &ext4_dir_operations; |
2201 | inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize; | 2252 | inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize; |
2202 | dir_block = ext4_bread(handle, inode, 0, 1, &err); | 2253 | if (!(dir_block = ext4_bread(handle, inode, 0, 1, &err))) { |
2203 | if (!dir_block) | 2254 | if (!err) { |
2255 | err = -EIO; | ||
2256 | ext4_error(inode->i_sb, | ||
2257 | "Directory hole detected on inode %lu\n", | ||
2258 | inode->i_ino); | ||
2259 | } | ||
2204 | goto out_clear_inode; | 2260 | goto out_clear_inode; |
2261 | } | ||
2205 | BUFFER_TRACE(dir_block, "get_write_access"); | 2262 | BUFFER_TRACE(dir_block, "get_write_access"); |
2206 | err = ext4_journal_get_write_access(handle, dir_block); | 2263 | err = ext4_journal_get_write_access(handle, dir_block); |
2207 | if (err) | 2264 | if (err) |
@@ -2318,6 +2375,11 @@ static int empty_dir(struct inode *inode) | |||
2318 | EXT4_ERROR_INODE(inode, | 2375 | EXT4_ERROR_INODE(inode, |
2319 | "error %d reading directory " | 2376 | "error %d reading directory " |
2320 | "lblock %u", err, lblock); | 2377 | "lblock %u", err, lblock); |
2378 | else | ||
2379 | ext4_warning(inode->i_sb, | ||
2380 | "bad directory (dir #%lu) - no data block", | ||
2381 | inode->i_ino); | ||
2382 | |||
2321 | offset += sb->s_blocksize; | 2383 | offset += sb->s_blocksize; |
2322 | continue; | 2384 | continue; |
2323 | } | 2385 | } |
@@ -2362,7 +2424,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) | |||
2362 | struct ext4_iloc iloc; | 2424 | struct ext4_iloc iloc; |
2363 | int err = 0, rc; | 2425 | int err = 0, rc; |
2364 | 2426 | ||
2365 | if (!ext4_handle_valid(handle)) | 2427 | if (!EXT4_SB(sb)->s_journal) |
2366 | return 0; | 2428 | return 0; |
2367 | 2429 | ||
2368 | mutex_lock(&EXT4_SB(sb)->s_orphan_lock); | 2430 | mutex_lock(&EXT4_SB(sb)->s_orphan_lock); |
@@ -2436,8 +2498,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) | |||
2436 | struct ext4_iloc iloc; | 2498 | struct ext4_iloc iloc; |
2437 | int err = 0; | 2499 | int err = 0; |
2438 | 2500 | ||
2439 | /* ext4_handle_valid() assumes a valid handle_t pointer */ | 2501 | if (!EXT4_SB(inode->i_sb)->s_journal) |
2440 | if (handle && !ext4_handle_valid(handle)) | ||
2441 | return 0; | 2502 | return 0; |
2442 | 2503 | ||
2443 | mutex_lock(&EXT4_SB(inode->i_sb)->s_orphan_lock); | 2504 | mutex_lock(&EXT4_SB(inode->i_sb)->s_orphan_lock); |
@@ -2456,7 +2517,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) | |||
2456 | * transaction handle with which to update the orphan list on | 2517 | * transaction handle with which to update the orphan list on |
2457 | * disk, but we still need to remove the inode from the linked | 2518 | * disk, but we still need to remove the inode from the linked |
2458 | * list in memory. */ | 2519 | * list in memory. */ |
2459 | if (sbi->s_journal && !handle) | 2520 | if (!handle) |
2460 | goto out; | 2521 | goto out; |
2461 | 2522 | ||
2462 | err = ext4_reserve_inode_write(handle, inode, &iloc); | 2523 | err = ext4_reserve_inode_write(handle, inode, &iloc); |
@@ -2826,9 +2887,15 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
2826 | goto end_rename; | 2887 | goto end_rename; |
2827 | } | 2888 | } |
2828 | retval = -EIO; | 2889 | retval = -EIO; |
2829 | dir_bh = ext4_bread(handle, old_inode, 0, 0, &retval); | 2890 | if (!(dir_bh = ext4_bread(handle, old_inode, 0, 0, &retval))) { |
2830 | if (!dir_bh) | 2891 | if (!retval) { |
2892 | retval = -EIO; | ||
2893 | ext4_error(old_inode->i_sb, | ||
2894 | "Directory hole detected on inode %lu\n", | ||
2895 | old_inode->i_ino); | ||
2896 | } | ||
2831 | goto end_rename; | 2897 | goto end_rename; |
2898 | } | ||
2832 | if (!buffer_verified(dir_bh) && | 2899 | if (!buffer_verified(dir_bh) && |
2833 | !ext4_dirent_csum_verify(old_inode, | 2900 | !ext4_dirent_csum_verify(old_inode, |
2834 | (struct ext4_dir_entry *)dir_bh->b_data)) | 2901 | (struct ext4_dir_entry *)dir_bh->b_data)) |
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index dcdeef169a69..68e896e12a67 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c | |||
@@ -71,6 +71,9 @@ void ext4_free_io_end(ext4_io_end_t *io) | |||
71 | int i; | 71 | int i; |
72 | 72 | ||
73 | BUG_ON(!io); | 73 | BUG_ON(!io); |
74 | BUG_ON(!list_empty(&io->list)); | ||
75 | BUG_ON(io->flag & EXT4_IO_END_UNWRITTEN); | ||
76 | |||
74 | if (io->page) | 77 | if (io->page) |
75 | put_page(io->page); | 78 | put_page(io->page); |
76 | for (i = 0; i < io->num_io_pages; i++) | 79 | for (i = 0; i < io->num_io_pages; i++) |
@@ -81,13 +84,8 @@ void ext4_free_io_end(ext4_io_end_t *io) | |||
81 | kmem_cache_free(io_end_cachep, io); | 84 | kmem_cache_free(io_end_cachep, io); |
82 | } | 85 | } |
83 | 86 | ||
84 | /* | 87 | /* check a range of space and convert unwritten extents to written. */ |
85 | * check a range of space and convert unwritten extents to written. | 88 | static int ext4_end_io(ext4_io_end_t *io) |
86 | * | ||
87 | * Called with inode->i_mutex; we depend on this when we manipulate | ||
88 | * io->flag, since we could otherwise race with ext4_flush_completed_IO() | ||
89 | */ | ||
90 | int ext4_end_io_nolock(ext4_io_end_t *io) | ||
91 | { | 89 | { |
92 | struct inode *inode = io->inode; | 90 | struct inode *inode = io->inode; |
93 | loff_t offset = io->offset; | 91 | loff_t offset = io->offset; |
@@ -106,63 +104,136 @@ int ext4_end_io_nolock(ext4_io_end_t *io) | |||
106 | "(inode %lu, offset %llu, size %zd, error %d)", | 104 | "(inode %lu, offset %llu, size %zd, error %d)", |
107 | inode->i_ino, offset, size, ret); | 105 | inode->i_ino, offset, size, ret); |
108 | } | 106 | } |
109 | |||
110 | if (io->iocb) | 107 | if (io->iocb) |
111 | aio_complete(io->iocb, io->result, 0); | 108 | aio_complete(io->iocb, io->result, 0); |
112 | 109 | ||
113 | if (io->flag & EXT4_IO_END_DIRECT) | 110 | if (io->flag & EXT4_IO_END_DIRECT) |
114 | inode_dio_done(inode); | 111 | inode_dio_done(inode); |
115 | /* Wake up anyone waiting on unwritten extent conversion */ | 112 | /* Wake up anyone waiting on unwritten extent conversion */ |
116 | if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten)) | 113 | if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten)) |
117 | wake_up_all(ext4_ioend_wq(io->inode)); | 114 | wake_up_all(ext4_ioend_wq(io->inode)); |
118 | return ret; | 115 | return ret; |
119 | } | 116 | } |
120 | 117 | ||
121 | /* | 118 | static void dump_completed_IO(struct inode *inode) |
122 | * work on completed aio dio IO, to convert unwritten extents to extents | 119 | { |
123 | */ | 120 | #ifdef EXT4FS_DEBUG |
124 | static void ext4_end_io_work(struct work_struct *work) | 121 | struct list_head *cur, *before, *after; |
122 | ext4_io_end_t *io, *io0, *io1; | ||
123 | unsigned long flags; | ||
124 | |||
125 | if (list_empty(&EXT4_I(inode)->i_completed_io_list)) { | ||
126 | ext4_debug("inode %lu completed_io list is empty\n", | ||
127 | inode->i_ino); | ||
128 | return; | ||
129 | } | ||
130 | |||
131 | ext4_debug("Dump inode %lu completed_io list\n", inode->i_ino); | ||
132 | list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list) { | ||
133 | cur = &io->list; | ||
134 | before = cur->prev; | ||
135 | io0 = container_of(before, ext4_io_end_t, list); | ||
136 | after = cur->next; | ||
137 | io1 = container_of(after, ext4_io_end_t, list); | ||
138 | |||
139 | ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n", | ||
140 | io, inode->i_ino, io0, io1); | ||
141 | } | ||
142 | #endif | ||
143 | } | ||
144 | |||
145 | /* Add the io_end to per-inode completed end_io list. */ | ||
146 | void ext4_add_complete_io(ext4_io_end_t *io_end) | ||
125 | { | 147 | { |
126 | ext4_io_end_t *io = container_of(work, ext4_io_end_t, work); | 148 | struct ext4_inode_info *ei = EXT4_I(io_end->inode); |
127 | struct inode *inode = io->inode; | 149 | struct workqueue_struct *wq; |
128 | struct ext4_inode_info *ei = EXT4_I(inode); | 150 | unsigned long flags; |
129 | unsigned long flags; | 151 | |
152 | BUG_ON(!(io_end->flag & EXT4_IO_END_UNWRITTEN)); | ||
153 | wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; | ||
130 | 154 | ||
131 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | 155 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); |
132 | if (io->flag & EXT4_IO_END_IN_FSYNC) | 156 | if (list_empty(&ei->i_completed_io_list)) { |
133 | goto requeue; | 157 | io_end->flag |= EXT4_IO_END_QUEUED; |
134 | if (list_empty(&io->list)) { | 158 | queue_work(wq, &io_end->work); |
135 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
136 | goto free; | ||
137 | } | 159 | } |
160 | list_add_tail(&io_end->list, &ei->i_completed_io_list); | ||
161 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
162 | } | ||
138 | 163 | ||
139 | if (!mutex_trylock(&inode->i_mutex)) { | 164 | static int ext4_do_flush_completed_IO(struct inode *inode, |
140 | bool was_queued; | 165 | ext4_io_end_t *work_io) |
141 | requeue: | 166 | { |
142 | was_queued = !!(io->flag & EXT4_IO_END_QUEUED); | 167 | ext4_io_end_t *io; |
143 | io->flag |= EXT4_IO_END_QUEUED; | 168 | struct list_head unwritten, complete, to_free; |
144 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | 169 | unsigned long flags; |
145 | /* | 170 | struct ext4_inode_info *ei = EXT4_I(inode); |
146 | * Requeue the work instead of waiting so that the work | 171 | int err, ret = 0; |
147 | * items queued after this can be processed. | 172 | |
148 | */ | 173 | INIT_LIST_HEAD(&complete); |
149 | queue_work(EXT4_SB(inode->i_sb)->dio_unwritten_wq, &io->work); | 174 | INIT_LIST_HEAD(&to_free); |
150 | /* | 175 | |
151 | * To prevent the ext4-dio-unwritten thread from keeping | 176 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); |
152 | * requeueing end_io requests and occupying cpu for too long, | 177 | dump_completed_IO(inode); |
153 | * yield the cpu if it sees an end_io request that has already | 178 | list_replace_init(&ei->i_completed_io_list, &unwritten); |
154 | * been requeued. | 179 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); |
155 | */ | 180 | |
156 | if (was_queued) | 181 | while (!list_empty(&unwritten)) { |
157 | yield(); | 182 | io = list_entry(unwritten.next, ext4_io_end_t, list); |
158 | return; | 183 | BUG_ON(!(io->flag & EXT4_IO_END_UNWRITTEN)); |
184 | list_del_init(&io->list); | ||
185 | |||
186 | err = ext4_end_io(io); | ||
187 | if (unlikely(!ret && err)) | ||
188 | ret = err; | ||
189 | |||
190 | list_add_tail(&io->list, &complete); | ||
191 | } | ||
192 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
193 | while (!list_empty(&complete)) { | ||
194 | io = list_entry(complete.next, ext4_io_end_t, list); | ||
195 | io->flag &= ~EXT4_IO_END_UNWRITTEN; | ||
196 | /* end_io context can not be destroyed now because it still | ||
197 | * used by queued worker. Worker thread will destroy it later */ | ||
198 | if (io->flag & EXT4_IO_END_QUEUED) | ||
199 | list_del_init(&io->list); | ||
200 | else | ||
201 | list_move(&io->list, &to_free); | ||
202 | } | ||
203 | /* If we are called from worker context, it is time to clear queued | ||
204 | * flag, and destroy it's end_io if it was converted already */ | ||
205 | if (work_io) { | ||
206 | work_io->flag &= ~EXT4_IO_END_QUEUED; | ||
207 | if (!(work_io->flag & EXT4_IO_END_UNWRITTEN)) | ||
208 | list_add_tail(&work_io->list, &to_free); | ||
159 | } | 209 | } |
160 | list_del_init(&io->list); | ||
161 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | 210 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); |
162 | (void) ext4_end_io_nolock(io); | 211 | |
163 | mutex_unlock(&inode->i_mutex); | 212 | while (!list_empty(&to_free)) { |
164 | free: | 213 | io = list_entry(to_free.next, ext4_io_end_t, list); |
165 | ext4_free_io_end(io); | 214 | list_del_init(&io->list); |
215 | ext4_free_io_end(io); | ||
216 | } | ||
217 | return ret; | ||
218 | } | ||
219 | |||
220 | /* | ||
221 | * work on completed aio dio IO, to convert unwritten extents to extents | ||
222 | */ | ||
223 | static void ext4_end_io_work(struct work_struct *work) | ||
224 | { | ||
225 | ext4_io_end_t *io = container_of(work, ext4_io_end_t, work); | ||
226 | ext4_do_flush_completed_IO(io->inode, io); | ||
227 | } | ||
228 | |||
229 | int ext4_flush_unwritten_io(struct inode *inode) | ||
230 | { | ||
231 | int ret; | ||
232 | WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex) && | ||
233 | !(inode->i_state & I_FREEING)); | ||
234 | ret = ext4_do_flush_completed_IO(inode, NULL); | ||
235 | ext4_unwritten_wait(inode); | ||
236 | return ret; | ||
166 | } | 237 | } |
167 | 238 | ||
168 | ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags) | 239 | ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags) |
@@ -195,9 +266,7 @@ static void buffer_io_error(struct buffer_head *bh) | |||
195 | static void ext4_end_bio(struct bio *bio, int error) | 266 | static void ext4_end_bio(struct bio *bio, int error) |
196 | { | 267 | { |
197 | ext4_io_end_t *io_end = bio->bi_private; | 268 | ext4_io_end_t *io_end = bio->bi_private; |
198 | struct workqueue_struct *wq; | ||
199 | struct inode *inode; | 269 | struct inode *inode; |
200 | unsigned long flags; | ||
201 | int i; | 270 | int i; |
202 | sector_t bi_sector = bio->bi_sector; | 271 | sector_t bi_sector = bio->bi_sector; |
203 | 272 | ||
@@ -255,14 +324,7 @@ static void ext4_end_bio(struct bio *bio, int error) | |||
255 | return; | 324 | return; |
256 | } | 325 | } |
257 | 326 | ||
258 | /* Add the io_end to per-inode completed io list*/ | 327 | ext4_add_complete_io(io_end); |
259 | spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
260 | list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list); | ||
261 | spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
262 | |||
263 | wq = EXT4_SB(inode->i_sb)->dio_unwritten_wq; | ||
264 | /* queue the work to convert unwritten extents to written */ | ||
265 | queue_work(wq, &io_end->work); | ||
266 | } | 328 | } |
267 | 329 | ||
268 | void ext4_io_submit(struct ext4_io_submit *io) | 330 | void ext4_io_submit(struct ext4_io_submit *io) |
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 41f6ef68e2e1..7a75e1086961 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c | |||
@@ -45,6 +45,28 @@ void ext4_resize_end(struct super_block *sb) | |||
45 | smp_mb__after_clear_bit(); | 45 | smp_mb__after_clear_bit(); |
46 | } | 46 | } |
47 | 47 | ||
48 | static ext4_group_t ext4_meta_bg_first_group(struct super_block *sb, | ||
49 | ext4_group_t group) { | ||
50 | return (group >> EXT4_DESC_PER_BLOCK_BITS(sb)) << | ||
51 | EXT4_DESC_PER_BLOCK_BITS(sb); | ||
52 | } | ||
53 | |||
54 | static ext4_fsblk_t ext4_meta_bg_first_block_no(struct super_block *sb, | ||
55 | ext4_group_t group) { | ||
56 | group = ext4_meta_bg_first_group(sb, group); | ||
57 | return ext4_group_first_block_no(sb, group); | ||
58 | } | ||
59 | |||
60 | static ext4_grpblk_t ext4_group_overhead_blocks(struct super_block *sb, | ||
61 | ext4_group_t group) { | ||
62 | ext4_grpblk_t overhead; | ||
63 | overhead = ext4_bg_num_gdb(sb, group); | ||
64 | if (ext4_bg_has_super(sb, group)) | ||
65 | overhead += 1 + | ||
66 | le16_to_cpu(EXT4_SB(sb)->s_es->s_reserved_gdt_blocks); | ||
67 | return overhead; | ||
68 | } | ||
69 | |||
48 | #define outside(b, first, last) ((b) < (first) || (b) >= (last)) | 70 | #define outside(b, first, last) ((b) < (first) || (b) >= (last)) |
49 | #define inside(b, first, last) ((b) >= (first) && (b) < (last)) | 71 | #define inside(b, first, last) ((b) >= (first) && (b) < (last)) |
50 | 72 | ||
@@ -57,9 +79,7 @@ static int verify_group_input(struct super_block *sb, | |||
57 | ext4_fsblk_t end = start + input->blocks_count; | 79 | ext4_fsblk_t end = start + input->blocks_count; |
58 | ext4_group_t group = input->group; | 80 | ext4_group_t group = input->group; |
59 | ext4_fsblk_t itend = input->inode_table + sbi->s_itb_per_group; | 81 | ext4_fsblk_t itend = input->inode_table + sbi->s_itb_per_group; |
60 | unsigned overhead = ext4_bg_has_super(sb, group) ? | 82 | unsigned overhead = ext4_group_overhead_blocks(sb, group); |
61 | (1 + ext4_bg_num_gdb(sb, group) + | ||
62 | le16_to_cpu(es->s_reserved_gdt_blocks)) : 0; | ||
63 | ext4_fsblk_t metaend = start + overhead; | 83 | ext4_fsblk_t metaend = start + overhead; |
64 | struct buffer_head *bh = NULL; | 84 | struct buffer_head *bh = NULL; |
65 | ext4_grpblk_t free_blocks_count, offset; | 85 | ext4_grpblk_t free_blocks_count, offset; |
@@ -200,13 +220,15 @@ static void free_flex_gd(struct ext4_new_flex_group_data *flex_gd) | |||
200 | * be a partial of a flex group. | 220 | * be a partial of a flex group. |
201 | * | 221 | * |
202 | * @sb: super block of fs to which the groups belongs | 222 | * @sb: super block of fs to which the groups belongs |
223 | * | ||
224 | * Returns 0 on a successful allocation of the metadata blocks in the | ||
225 | * block group. | ||
203 | */ | 226 | */ |
204 | static void ext4_alloc_group_tables(struct super_block *sb, | 227 | static int ext4_alloc_group_tables(struct super_block *sb, |
205 | struct ext4_new_flex_group_data *flex_gd, | 228 | struct ext4_new_flex_group_data *flex_gd, |
206 | int flexbg_size) | 229 | int flexbg_size) |
207 | { | 230 | { |
208 | struct ext4_new_group_data *group_data = flex_gd->groups; | 231 | struct ext4_new_group_data *group_data = flex_gd->groups; |
209 | struct ext4_super_block *es = EXT4_SB(sb)->s_es; | ||
210 | ext4_fsblk_t start_blk; | 232 | ext4_fsblk_t start_blk; |
211 | ext4_fsblk_t last_blk; | 233 | ext4_fsblk_t last_blk; |
212 | ext4_group_t src_group; | 234 | ext4_group_t src_group; |
@@ -226,23 +248,24 @@ static void ext4_alloc_group_tables(struct super_block *sb, | |||
226 | (last_group & ~(flexbg_size - 1)))); | 248 | (last_group & ~(flexbg_size - 1)))); |
227 | next_group: | 249 | next_group: |
228 | group = group_data[0].group; | 250 | group = group_data[0].group; |
251 | if (src_group >= group_data[0].group + flex_gd->count) | ||
252 | return -ENOSPC; | ||
229 | start_blk = ext4_group_first_block_no(sb, src_group); | 253 | start_blk = ext4_group_first_block_no(sb, src_group); |
230 | last_blk = start_blk + group_data[src_group - group].blocks_count; | 254 | last_blk = start_blk + group_data[src_group - group].blocks_count; |
231 | 255 | ||
232 | overhead = ext4_bg_has_super(sb, src_group) ? | 256 | overhead = ext4_group_overhead_blocks(sb, src_group); |
233 | (1 + ext4_bg_num_gdb(sb, src_group) + | ||
234 | le16_to_cpu(es->s_reserved_gdt_blocks)) : 0; | ||
235 | 257 | ||
236 | start_blk += overhead; | 258 | start_blk += overhead; |
237 | 259 | ||
238 | BUG_ON(src_group >= group_data[0].group + flex_gd->count); | ||
239 | /* We collect contiguous blocks as much as possible. */ | 260 | /* We collect contiguous blocks as much as possible. */ |
240 | src_group++; | 261 | src_group++; |
241 | for (; src_group <= last_group; src_group++) | 262 | for (; src_group <= last_group; src_group++) { |
242 | if (!ext4_bg_has_super(sb, src_group)) | 263 | overhead = ext4_group_overhead_blocks(sb, src_group); |
264 | if (overhead != 0) | ||
243 | last_blk += group_data[src_group - group].blocks_count; | 265 | last_blk += group_data[src_group - group].blocks_count; |
244 | else | 266 | else |
245 | break; | 267 | break; |
268 | } | ||
246 | 269 | ||
247 | /* Allocate block bitmaps */ | 270 | /* Allocate block bitmaps */ |
248 | for (; bb_index < flex_gd->count; bb_index++) { | 271 | for (; bb_index < flex_gd->count; bb_index++) { |
@@ -300,6 +323,7 @@ next_group: | |||
300 | group_data[i].free_blocks_count); | 323 | group_data[i].free_blocks_count); |
301 | } | 324 | } |
302 | } | 325 | } |
326 | return 0; | ||
303 | } | 327 | } |
304 | 328 | ||
305 | static struct buffer_head *bclean(handle_t *handle, struct super_block *sb, | 329 | static struct buffer_head *bclean(handle_t *handle, struct super_block *sb, |
@@ -433,11 +457,13 @@ static int setup_new_flex_group_blocks(struct super_block *sb, | |||
433 | ext4_group_t group, count; | 457 | ext4_group_t group, count; |
434 | struct buffer_head *bh = NULL; | 458 | struct buffer_head *bh = NULL; |
435 | int reserved_gdb, i, j, err = 0, err2; | 459 | int reserved_gdb, i, j, err = 0, err2; |
460 | int meta_bg; | ||
436 | 461 | ||
437 | BUG_ON(!flex_gd->count || !group_data || | 462 | BUG_ON(!flex_gd->count || !group_data || |
438 | group_data[0].group != sbi->s_groups_count); | 463 | group_data[0].group != sbi->s_groups_count); |
439 | 464 | ||
440 | reserved_gdb = le16_to_cpu(es->s_reserved_gdt_blocks); | 465 | reserved_gdb = le16_to_cpu(es->s_reserved_gdt_blocks); |
466 | meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG); | ||
441 | 467 | ||
442 | /* This transaction may be extended/restarted along the way */ | 468 | /* This transaction may be extended/restarted along the way */ |
443 | handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA); | 469 | handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA); |
@@ -447,12 +473,25 @@ static int setup_new_flex_group_blocks(struct super_block *sb, | |||
447 | group = group_data[0].group; | 473 | group = group_data[0].group; |
448 | for (i = 0; i < flex_gd->count; i++, group++) { | 474 | for (i = 0; i < flex_gd->count; i++, group++) { |
449 | unsigned long gdblocks; | 475 | unsigned long gdblocks; |
476 | ext4_grpblk_t overhead; | ||
450 | 477 | ||
451 | gdblocks = ext4_bg_num_gdb(sb, group); | 478 | gdblocks = ext4_bg_num_gdb(sb, group); |
452 | start = ext4_group_first_block_no(sb, group); | 479 | start = ext4_group_first_block_no(sb, group); |
453 | 480 | ||
481 | if (meta_bg == 0 && !ext4_bg_has_super(sb, group)) | ||
482 | goto handle_itb; | ||
483 | |||
484 | if (meta_bg == 1) { | ||
485 | ext4_group_t first_group; | ||
486 | first_group = ext4_meta_bg_first_group(sb, group); | ||
487 | if (first_group != group + 1 && | ||
488 | first_group != group + EXT4_DESC_PER_BLOCK(sb) - 1) | ||
489 | goto handle_itb; | ||
490 | } | ||
491 | |||
492 | block = start + ext4_bg_has_super(sb, group); | ||
454 | /* Copy all of the GDT blocks into the backup in this group */ | 493 | /* Copy all of the GDT blocks into the backup in this group */ |
455 | for (j = 0, block = start + 1; j < gdblocks; j++, block++) { | 494 | for (j = 0; j < gdblocks; j++, block++) { |
456 | struct buffer_head *gdb; | 495 | struct buffer_head *gdb; |
457 | 496 | ||
458 | ext4_debug("update backup group %#04llx\n", block); | 497 | ext4_debug("update backup group %#04llx\n", block); |
@@ -493,6 +532,7 @@ static int setup_new_flex_group_blocks(struct super_block *sb, | |||
493 | goto out; | 532 | goto out; |
494 | } | 533 | } |
495 | 534 | ||
535 | handle_itb: | ||
496 | /* Initialize group tables of the grop @group */ | 536 | /* Initialize group tables of the grop @group */ |
497 | if (!(bg_flags[i] & EXT4_BG_INODE_ZEROED)) | 537 | if (!(bg_flags[i] & EXT4_BG_INODE_ZEROED)) |
498 | goto handle_bb; | 538 | goto handle_bb; |
@@ -521,11 +561,11 @@ handle_bb: | |||
521 | err = PTR_ERR(bh); | 561 | err = PTR_ERR(bh); |
522 | goto out; | 562 | goto out; |
523 | } | 563 | } |
524 | if (ext4_bg_has_super(sb, group)) { | 564 | overhead = ext4_group_overhead_blocks(sb, group); |
565 | if (overhead != 0) { | ||
525 | ext4_debug("mark backup superblock %#04llx (+0)\n", | 566 | ext4_debug("mark backup superblock %#04llx (+0)\n", |
526 | start); | 567 | start); |
527 | ext4_set_bits(bh->b_data, 0, gdblocks + reserved_gdb + | 568 | ext4_set_bits(bh->b_data, 0, overhead); |
528 | 1); | ||
529 | } | 569 | } |
530 | ext4_mark_bitmap_end(group_data[i].blocks_count, | 570 | ext4_mark_bitmap_end(group_data[i].blocks_count, |
531 | sb->s_blocksize * 8, bh->b_data); | 571 | sb->s_blocksize * 8, bh->b_data); |
@@ -822,6 +862,45 @@ exit_bh: | |||
822 | } | 862 | } |
823 | 863 | ||
824 | /* | 864 | /* |
865 | * add_new_gdb_meta_bg is the sister of add_new_gdb. | ||
866 | */ | ||
867 | static int add_new_gdb_meta_bg(struct super_block *sb, | ||
868 | handle_t *handle, ext4_group_t group) { | ||
869 | ext4_fsblk_t gdblock; | ||
870 | struct buffer_head *gdb_bh; | ||
871 | struct buffer_head **o_group_desc, **n_group_desc; | ||
872 | unsigned long gdb_num = group / EXT4_DESC_PER_BLOCK(sb); | ||
873 | int err; | ||
874 | |||
875 | gdblock = ext4_meta_bg_first_block_no(sb, group) + | ||
876 | ext4_bg_has_super(sb, group); | ||
877 | gdb_bh = sb_bread(sb, gdblock); | ||
878 | if (!gdb_bh) | ||
879 | return -EIO; | ||
880 | n_group_desc = ext4_kvmalloc((gdb_num + 1) * | ||
881 | sizeof(struct buffer_head *), | ||
882 | GFP_NOFS); | ||
883 | if (!n_group_desc) { | ||
884 | err = -ENOMEM; | ||
885 | ext4_warning(sb, "not enough memory for %lu groups", | ||
886 | gdb_num + 1); | ||
887 | return err; | ||
888 | } | ||
889 | |||
890 | o_group_desc = EXT4_SB(sb)->s_group_desc; | ||
891 | memcpy(n_group_desc, o_group_desc, | ||
892 | EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *)); | ||
893 | n_group_desc[gdb_num] = gdb_bh; | ||
894 | EXT4_SB(sb)->s_group_desc = n_group_desc; | ||
895 | EXT4_SB(sb)->s_gdb_count++; | ||
896 | ext4_kvfree(o_group_desc); | ||
897 | err = ext4_journal_get_write_access(handle, gdb_bh); | ||
898 | if (unlikely(err)) | ||
899 | brelse(gdb_bh); | ||
900 | return err; | ||
901 | } | ||
902 | |||
903 | /* | ||
825 | * Called when we are adding a new group which has a backup copy of each of | 904 | * Called when we are adding a new group which has a backup copy of each of |
826 | * the GDT blocks (i.e. sparse group) and there are reserved GDT blocks. | 905 | * the GDT blocks (i.e. sparse group) and there are reserved GDT blocks. |
827 | * We need to add these reserved backup GDT blocks to the resize inode, so | 906 | * We need to add these reserved backup GDT blocks to the resize inode, so |
@@ -949,16 +1028,16 @@ exit_free: | |||
949 | * do not copy the full number of backups at this time. The resize | 1028 | * do not copy the full number of backups at this time. The resize |
950 | * which changed s_groups_count will backup again. | 1029 | * which changed s_groups_count will backup again. |
951 | */ | 1030 | */ |
952 | static void update_backups(struct super_block *sb, | 1031 | static void update_backups(struct super_block *sb, int blk_off, char *data, |
953 | int blk_off, char *data, int size) | 1032 | int size, int meta_bg) |
954 | { | 1033 | { |
955 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 1034 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
956 | const ext4_group_t last = sbi->s_groups_count; | 1035 | ext4_group_t last; |
957 | const int bpg = EXT4_BLOCKS_PER_GROUP(sb); | 1036 | const int bpg = EXT4_BLOCKS_PER_GROUP(sb); |
958 | unsigned three = 1; | 1037 | unsigned three = 1; |
959 | unsigned five = 5; | 1038 | unsigned five = 5; |
960 | unsigned seven = 7; | 1039 | unsigned seven = 7; |
961 | ext4_group_t group; | 1040 | ext4_group_t group = 0; |
962 | int rest = sb->s_blocksize - size; | 1041 | int rest = sb->s_blocksize - size; |
963 | handle_t *handle; | 1042 | handle_t *handle; |
964 | int err = 0, err2; | 1043 | int err = 0, err2; |
@@ -970,10 +1049,17 @@ static void update_backups(struct super_block *sb, | |||
970 | goto exit_err; | 1049 | goto exit_err; |
971 | } | 1050 | } |
972 | 1051 | ||
973 | ext4_superblock_csum_set(sb, (struct ext4_super_block *)data); | 1052 | if (meta_bg == 0) { |
1053 | group = ext4_list_backups(sb, &three, &five, &seven); | ||
1054 | last = sbi->s_groups_count; | ||
1055 | } else { | ||
1056 | group = ext4_meta_bg_first_group(sb, group) + 1; | ||
1057 | last = (ext4_group_t)(group + EXT4_DESC_PER_BLOCK(sb) - 2); | ||
1058 | } | ||
974 | 1059 | ||
975 | while ((group = ext4_list_backups(sb, &three, &five, &seven)) < last) { | 1060 | while (group < sbi->s_groups_count) { |
976 | struct buffer_head *bh; | 1061 | struct buffer_head *bh; |
1062 | ext4_fsblk_t backup_block; | ||
977 | 1063 | ||
978 | /* Out of journal space, and can't get more - abort - so sad */ | 1064 | /* Out of journal space, and can't get more - abort - so sad */ |
979 | if (ext4_handle_valid(handle) && | 1065 | if (ext4_handle_valid(handle) && |
@@ -982,13 +1068,20 @@ static void update_backups(struct super_block *sb, | |||
982 | (err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA))) | 1068 | (err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA))) |
983 | break; | 1069 | break; |
984 | 1070 | ||
985 | bh = sb_getblk(sb, group * bpg + blk_off); | 1071 | if (meta_bg == 0) |
1072 | backup_block = group * bpg + blk_off; | ||
1073 | else | ||
1074 | backup_block = (ext4_group_first_block_no(sb, group) + | ||
1075 | ext4_bg_has_super(sb, group)); | ||
1076 | |||
1077 | bh = sb_getblk(sb, backup_block); | ||
986 | if (!bh) { | 1078 | if (!bh) { |
987 | err = -EIO; | 1079 | err = -EIO; |
988 | break; | 1080 | break; |
989 | } | 1081 | } |
990 | ext4_debug("update metadata backup %#04lx\n", | 1082 | ext4_debug("update metadata backup %llu(+%llu)\n", |
991 | (unsigned long)bh->b_blocknr); | 1083 | backup_block, backup_block - |
1084 | ext4_group_first_block_no(sb, group)); | ||
992 | if ((err = ext4_journal_get_write_access(handle, bh))) | 1085 | if ((err = ext4_journal_get_write_access(handle, bh))) |
993 | break; | 1086 | break; |
994 | lock_buffer(bh); | 1087 | lock_buffer(bh); |
@@ -1001,6 +1094,13 @@ static void update_backups(struct super_block *sb, | |||
1001 | if (unlikely(err)) | 1094 | if (unlikely(err)) |
1002 | ext4_std_error(sb, err); | 1095 | ext4_std_error(sb, err); |
1003 | brelse(bh); | 1096 | brelse(bh); |
1097 | |||
1098 | if (meta_bg == 0) | ||
1099 | group = ext4_list_backups(sb, &three, &five, &seven); | ||
1100 | else if (group == last) | ||
1101 | break; | ||
1102 | else | ||
1103 | group = last; | ||
1004 | } | 1104 | } |
1005 | if ((err2 = ext4_journal_stop(handle)) && !err) | 1105 | if ((err2 = ext4_journal_stop(handle)) && !err) |
1006 | err = err2; | 1106 | err = err2; |
@@ -1043,7 +1143,9 @@ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb, | |||
1043 | struct ext4_super_block *es = sbi->s_es; | 1143 | struct ext4_super_block *es = sbi->s_es; |
1044 | struct buffer_head *gdb_bh; | 1144 | struct buffer_head *gdb_bh; |
1045 | int i, gdb_off, gdb_num, err = 0; | 1145 | int i, gdb_off, gdb_num, err = 0; |
1146 | int meta_bg; | ||
1046 | 1147 | ||
1148 | meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG); | ||
1047 | for (i = 0; i < count; i++, group++) { | 1149 | for (i = 0; i < count; i++, group++) { |
1048 | int reserved_gdb = ext4_bg_has_super(sb, group) ? | 1150 | int reserved_gdb = ext4_bg_has_super(sb, group) ? |
1049 | le16_to_cpu(es->s_reserved_gdt_blocks) : 0; | 1151 | le16_to_cpu(es->s_reserved_gdt_blocks) : 0; |
@@ -1063,8 +1165,11 @@ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb, | |||
1063 | 1165 | ||
1064 | if (!err && reserved_gdb && ext4_bg_num_gdb(sb, group)) | 1166 | if (!err && reserved_gdb && ext4_bg_num_gdb(sb, group)) |
1065 | err = reserve_backup_gdb(handle, resize_inode, group); | 1167 | err = reserve_backup_gdb(handle, resize_inode, group); |
1066 | } else | 1168 | } else if (meta_bg != 0) { |
1169 | err = add_new_gdb_meta_bg(sb, handle, group); | ||
1170 | } else { | ||
1067 | err = add_new_gdb(handle, resize_inode, group); | 1171 | err = add_new_gdb(handle, resize_inode, group); |
1172 | } | ||
1068 | if (err) | 1173 | if (err) |
1069 | break; | 1174 | break; |
1070 | } | 1175 | } |
@@ -1076,17 +1181,12 @@ static struct buffer_head *ext4_get_bitmap(struct super_block *sb, __u64 block) | |||
1076 | struct buffer_head *bh = sb_getblk(sb, block); | 1181 | struct buffer_head *bh = sb_getblk(sb, block); |
1077 | if (!bh) | 1182 | if (!bh) |
1078 | return NULL; | 1183 | return NULL; |
1079 | 1184 | if (!bh_uptodate_or_lock(bh)) { | |
1080 | if (bitmap_uptodate(bh)) | 1185 | if (bh_submit_read(bh) < 0) { |
1081 | return bh; | 1186 | brelse(bh); |
1082 | 1187 | return NULL; | |
1083 | lock_buffer(bh); | 1188 | } |
1084 | if (bh_submit_read(bh) < 0) { | ||
1085 | unlock_buffer(bh); | ||
1086 | brelse(bh); | ||
1087 | return NULL; | ||
1088 | } | 1189 | } |
1089 | unlock_buffer(bh); | ||
1090 | 1190 | ||
1091 | return bh; | 1191 | return bh; |
1092 | } | 1192 | } |
@@ -1161,6 +1261,9 @@ static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb, | |||
1161 | ext4_free_group_clusters_set(sb, gdp, | 1261 | ext4_free_group_clusters_set(sb, gdp, |
1162 | EXT4_B2C(sbi, group_data->free_blocks_count)); | 1262 | EXT4_B2C(sbi, group_data->free_blocks_count)); |
1163 | ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb)); | 1263 | ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb)); |
1264 | if (ext4_has_group_desc_csum(sb)) | ||
1265 | ext4_itable_unused_set(sb, gdp, | ||
1266 | EXT4_INODES_PER_GROUP(sb)); | ||
1164 | gdp->bg_flags = cpu_to_le16(*bg_flags); | 1267 | gdp->bg_flags = cpu_to_le16(*bg_flags); |
1165 | ext4_group_desc_csum_set(sb, group, gdp); | 1268 | ext4_group_desc_csum_set(sb, group, gdp); |
1166 | 1269 | ||
@@ -1216,7 +1319,7 @@ static void ext4_update_super(struct super_block *sb, | |||
1216 | } | 1319 | } |
1217 | 1320 | ||
1218 | reserved_blocks = ext4_r_blocks_count(es) * 100; | 1321 | reserved_blocks = ext4_r_blocks_count(es) * 100; |
1219 | do_div(reserved_blocks, ext4_blocks_count(es)); | 1322 | reserved_blocks = div64_u64(reserved_blocks, ext4_blocks_count(es)); |
1220 | reserved_blocks *= blocks_count; | 1323 | reserved_blocks *= blocks_count; |
1221 | do_div(reserved_blocks, 100); | 1324 | do_div(reserved_blocks, 100); |
1222 | 1325 | ||
@@ -1227,6 +1330,7 @@ static void ext4_update_super(struct super_block *sb, | |||
1227 | le32_add_cpu(&es->s_free_inodes_count, EXT4_INODES_PER_GROUP(sb) * | 1330 | le32_add_cpu(&es->s_free_inodes_count, EXT4_INODES_PER_GROUP(sb) * |
1228 | flex_gd->count); | 1331 | flex_gd->count); |
1229 | 1332 | ||
1333 | ext4_debug("free blocks count %llu", ext4_free_blocks_count(es)); | ||
1230 | /* | 1334 | /* |
1231 | * We need to protect s_groups_count against other CPUs seeing | 1335 | * We need to protect s_groups_count against other CPUs seeing |
1232 | * inconsistent state in the superblock. | 1336 | * inconsistent state in the superblock. |
@@ -1261,6 +1365,8 @@ static void ext4_update_super(struct super_block *sb, | |||
1261 | percpu_counter_add(&sbi->s_freeinodes_counter, | 1365 | percpu_counter_add(&sbi->s_freeinodes_counter, |
1262 | EXT4_INODES_PER_GROUP(sb) * flex_gd->count); | 1366 | EXT4_INODES_PER_GROUP(sb) * flex_gd->count); |
1263 | 1367 | ||
1368 | ext4_debug("free blocks count %llu", | ||
1369 | percpu_counter_read(&sbi->s_freeclusters_counter)); | ||
1264 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, | 1370 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, |
1265 | EXT4_FEATURE_INCOMPAT_FLEX_BG) && | 1371 | EXT4_FEATURE_INCOMPAT_FLEX_BG) && |
1266 | sbi->s_log_groups_per_flex) { | 1372 | sbi->s_log_groups_per_flex) { |
@@ -1349,16 +1455,24 @@ exit_journal: | |||
1349 | err = err2; | 1455 | err = err2; |
1350 | 1456 | ||
1351 | if (!err) { | 1457 | if (!err) { |
1352 | int i; | 1458 | int gdb_num = group / EXT4_DESC_PER_BLOCK(sb); |
1459 | int gdb_num_end = ((group + flex_gd->count - 1) / | ||
1460 | EXT4_DESC_PER_BLOCK(sb)); | ||
1461 | int meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb, | ||
1462 | EXT4_FEATURE_INCOMPAT_META_BG); | ||
1463 | sector_t old_gdb = 0; | ||
1464 | |||
1353 | update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es, | 1465 | update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es, |
1354 | sizeof(struct ext4_super_block)); | 1466 | sizeof(struct ext4_super_block), 0); |
1355 | for (i = 0; i < flex_gd->count; i++, group++) { | 1467 | for (; gdb_num <= gdb_num_end; gdb_num++) { |
1356 | struct buffer_head *gdb_bh; | 1468 | struct buffer_head *gdb_bh; |
1357 | int gdb_num; | 1469 | |
1358 | gdb_num = group / EXT4_BLOCKS_PER_GROUP(sb); | ||
1359 | gdb_bh = sbi->s_group_desc[gdb_num]; | 1470 | gdb_bh = sbi->s_group_desc[gdb_num]; |
1471 | if (old_gdb == gdb_bh->b_blocknr) | ||
1472 | continue; | ||
1360 | update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data, | 1473 | update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data, |
1361 | gdb_bh->b_size); | 1474 | gdb_bh->b_size, meta_bg); |
1475 | old_gdb = gdb_bh->b_blocknr; | ||
1362 | } | 1476 | } |
1363 | } | 1477 | } |
1364 | exit: | 1478 | exit: |
@@ -1402,9 +1516,7 @@ static int ext4_setup_next_flex_gd(struct super_block *sb, | |||
1402 | 1516 | ||
1403 | group_data[i].group = group + i; | 1517 | group_data[i].group = group + i; |
1404 | group_data[i].blocks_count = blocks_per_group; | 1518 | group_data[i].blocks_count = blocks_per_group; |
1405 | overhead = ext4_bg_has_super(sb, group + i) ? | 1519 | overhead = ext4_group_overhead_blocks(sb, group + i); |
1406 | (1 + ext4_bg_num_gdb(sb, group + i) + | ||
1407 | le16_to_cpu(es->s_reserved_gdt_blocks)) : 0; | ||
1408 | group_data[i].free_blocks_count = blocks_per_group - overhead; | 1520 | group_data[i].free_blocks_count = blocks_per_group - overhead; |
1409 | if (ext4_has_group_desc_csum(sb)) | 1521 | if (ext4_has_group_desc_csum(sb)) |
1410 | flex_gd->bg_flags[i] = EXT4_BG_BLOCK_UNINIT | | 1522 | flex_gd->bg_flags[i] = EXT4_BG_BLOCK_UNINIT | |
@@ -1492,6 +1604,14 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
1492 | if (err) | 1604 | if (err) |
1493 | goto out; | 1605 | goto out; |
1494 | 1606 | ||
1607 | err = ext4_alloc_flex_bg_array(sb, input->group + 1); | ||
1608 | if (err) | ||
1609 | return err; | ||
1610 | |||
1611 | err = ext4_mb_alloc_groupinfo(sb, input->group + 1); | ||
1612 | if (err) | ||
1613 | goto out; | ||
1614 | |||
1495 | flex_gd.count = 1; | 1615 | flex_gd.count = 1; |
1496 | flex_gd.groups = input; | 1616 | flex_gd.groups = input; |
1497 | flex_gd.bg_flags = &bg_flags; | 1617 | flex_gd.bg_flags = &bg_flags; |
@@ -1544,11 +1664,13 @@ errout: | |||
1544 | err = err2; | 1664 | err = err2; |
1545 | 1665 | ||
1546 | if (!err) { | 1666 | if (!err) { |
1667 | ext4_fsblk_t first_block; | ||
1668 | first_block = ext4_group_first_block_no(sb, 0); | ||
1547 | if (test_opt(sb, DEBUG)) | 1669 | if (test_opt(sb, DEBUG)) |
1548 | printk(KERN_DEBUG "EXT4-fs: extended group to %llu " | 1670 | printk(KERN_DEBUG "EXT4-fs: extended group to %llu " |
1549 | "blocks\n", ext4_blocks_count(es)); | 1671 | "blocks\n", ext4_blocks_count(es)); |
1550 | update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr, (char *)es, | 1672 | update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr - first_block, |
1551 | sizeof(struct ext4_super_block)); | 1673 | (char *)es, sizeof(struct ext4_super_block), 0); |
1552 | } | 1674 | } |
1553 | return err; | 1675 | return err; |
1554 | } | 1676 | } |
@@ -1631,6 +1753,94 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
1631 | return err; | 1753 | return err; |
1632 | } /* ext4_group_extend */ | 1754 | } /* ext4_group_extend */ |
1633 | 1755 | ||
1756 | |||
1757 | static int num_desc_blocks(struct super_block *sb, ext4_group_t groups) | ||
1758 | { | ||
1759 | return (groups + EXT4_DESC_PER_BLOCK(sb) - 1) / EXT4_DESC_PER_BLOCK(sb); | ||
1760 | } | ||
1761 | |||
1762 | /* | ||
1763 | * Release the resize inode and drop the resize_inode feature if there | ||
1764 | * are no more reserved gdt blocks, and then convert the file system | ||
1765 | * to enable meta_bg | ||
1766 | */ | ||
1767 | static int ext4_convert_meta_bg(struct super_block *sb, struct inode *inode) | ||
1768 | { | ||
1769 | handle_t *handle; | ||
1770 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
1771 | struct ext4_super_block *es = sbi->s_es; | ||
1772 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
1773 | ext4_fsblk_t nr; | ||
1774 | int i, ret, err = 0; | ||
1775 | int credits = 1; | ||
1776 | |||
1777 | ext4_msg(sb, KERN_INFO, "Converting file system to meta_bg"); | ||
1778 | if (inode) { | ||
1779 | if (es->s_reserved_gdt_blocks) { | ||
1780 | ext4_error(sb, "Unexpected non-zero " | ||
1781 | "s_reserved_gdt_blocks"); | ||
1782 | return -EPERM; | ||
1783 | } | ||
1784 | |||
1785 | /* Do a quick sanity check of the resize inode */ | ||
1786 | if (inode->i_blocks != 1 << (inode->i_blkbits - 9)) | ||
1787 | goto invalid_resize_inode; | ||
1788 | for (i = 0; i < EXT4_N_BLOCKS; i++) { | ||
1789 | if (i == EXT4_DIND_BLOCK) { | ||
1790 | if (ei->i_data[i]) | ||
1791 | continue; | ||
1792 | else | ||
1793 | goto invalid_resize_inode; | ||
1794 | } | ||
1795 | if (ei->i_data[i]) | ||
1796 | goto invalid_resize_inode; | ||
1797 | } | ||
1798 | credits += 3; /* block bitmap, bg descriptor, resize inode */ | ||
1799 | } | ||
1800 | |||
1801 | handle = ext4_journal_start_sb(sb, credits); | ||
1802 | if (IS_ERR(handle)) | ||
1803 | return PTR_ERR(handle); | ||
1804 | |||
1805 | err = ext4_journal_get_write_access(handle, sbi->s_sbh); | ||
1806 | if (err) | ||
1807 | goto errout; | ||
1808 | |||
1809 | EXT4_CLEAR_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_RESIZE_INODE); | ||
1810 | EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG); | ||
1811 | sbi->s_es->s_first_meta_bg = | ||
1812 | cpu_to_le32(num_desc_blocks(sb, sbi->s_groups_count)); | ||
1813 | |||
1814 | err = ext4_handle_dirty_super(handle, sb); | ||
1815 | if (err) { | ||
1816 | ext4_std_error(sb, err); | ||
1817 | goto errout; | ||
1818 | } | ||
1819 | |||
1820 | if (inode) { | ||
1821 | nr = le32_to_cpu(ei->i_data[EXT4_DIND_BLOCK]); | ||
1822 | ext4_free_blocks(handle, inode, NULL, nr, 1, | ||
1823 | EXT4_FREE_BLOCKS_METADATA | | ||
1824 | EXT4_FREE_BLOCKS_FORGET); | ||
1825 | ei->i_data[EXT4_DIND_BLOCK] = 0; | ||
1826 | inode->i_blocks = 0; | ||
1827 | |||
1828 | err = ext4_mark_inode_dirty(handle, inode); | ||
1829 | if (err) | ||
1830 | ext4_std_error(sb, err); | ||
1831 | } | ||
1832 | |||
1833 | errout: | ||
1834 | ret = ext4_journal_stop(handle); | ||
1835 | if (!err) | ||
1836 | err = ret; | ||
1837 | return ret; | ||
1838 | |||
1839 | invalid_resize_inode: | ||
1840 | ext4_error(sb, "corrupted/inconsistent resize inode"); | ||
1841 | return -EINVAL; | ||
1842 | } | ||
1843 | |||
1634 | /* | 1844 | /* |
1635 | * ext4_resize_fs() resizes a fs to new size specified by @n_blocks_count | 1845 | * ext4_resize_fs() resizes a fs to new size specified by @n_blocks_count |
1636 | * | 1846 | * |
@@ -1643,21 +1853,31 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) | |||
1643 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 1853 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
1644 | struct ext4_super_block *es = sbi->s_es; | 1854 | struct ext4_super_block *es = sbi->s_es; |
1645 | struct buffer_head *bh; | 1855 | struct buffer_head *bh; |
1646 | struct inode *resize_inode; | 1856 | struct inode *resize_inode = NULL; |
1647 | ext4_fsblk_t o_blocks_count; | 1857 | ext4_grpblk_t add, offset; |
1648 | ext4_group_t o_group; | ||
1649 | ext4_group_t n_group; | ||
1650 | ext4_grpblk_t offset, add; | ||
1651 | unsigned long n_desc_blocks; | 1858 | unsigned long n_desc_blocks; |
1652 | unsigned long o_desc_blocks; | 1859 | unsigned long o_desc_blocks; |
1653 | unsigned long desc_blocks; | 1860 | ext4_group_t o_group; |
1654 | int err = 0, flexbg_size = 1; | 1861 | ext4_group_t n_group; |
1862 | ext4_fsblk_t o_blocks_count; | ||
1863 | ext4_fsblk_t n_blocks_count_retry = 0; | ||
1864 | unsigned long last_update_time = 0; | ||
1865 | int err = 0, flexbg_size = 1 << sbi->s_log_groups_per_flex; | ||
1866 | int meta_bg; | ||
1655 | 1867 | ||
1868 | /* See if the device is actually as big as what was requested */ | ||
1869 | bh = sb_bread(sb, n_blocks_count - 1); | ||
1870 | if (!bh) { | ||
1871 | ext4_warning(sb, "can't read last block, resize aborted"); | ||
1872 | return -ENOSPC; | ||
1873 | } | ||
1874 | brelse(bh); | ||
1875 | |||
1876 | retry: | ||
1656 | o_blocks_count = ext4_blocks_count(es); | 1877 | o_blocks_count = ext4_blocks_count(es); |
1657 | 1878 | ||
1658 | if (test_opt(sb, DEBUG)) | 1879 | ext4_msg(sb, KERN_INFO, "resizing filesystem from %llu " |
1659 | ext4_msg(sb, KERN_DEBUG, "resizing filesystem from %llu " | 1880 | "to %llu blocks", o_blocks_count, n_blocks_count); |
1660 | "to %llu blocks", o_blocks_count, n_blocks_count); | ||
1661 | 1881 | ||
1662 | if (n_blocks_count < o_blocks_count) { | 1882 | if (n_blocks_count < o_blocks_count) { |
1663 | /* On-line shrinking not supported */ | 1883 | /* On-line shrinking not supported */ |
@@ -1672,32 +1892,49 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) | |||
1672 | ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &offset); | 1892 | ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &offset); |
1673 | ext4_get_group_no_and_offset(sb, o_blocks_count - 1, &o_group, &offset); | 1893 | ext4_get_group_no_and_offset(sb, o_blocks_count - 1, &o_group, &offset); |
1674 | 1894 | ||
1675 | n_desc_blocks = (n_group + EXT4_DESC_PER_BLOCK(sb)) / | 1895 | n_desc_blocks = num_desc_blocks(sb, n_group + 1); |
1676 | EXT4_DESC_PER_BLOCK(sb); | 1896 | o_desc_blocks = num_desc_blocks(sb, sbi->s_groups_count); |
1677 | o_desc_blocks = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / | ||
1678 | EXT4_DESC_PER_BLOCK(sb); | ||
1679 | desc_blocks = n_desc_blocks - o_desc_blocks; | ||
1680 | 1897 | ||
1681 | if (desc_blocks && | 1898 | meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG); |
1682 | (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_RESIZE_INODE) || | ||
1683 | le16_to_cpu(es->s_reserved_gdt_blocks) < desc_blocks)) { | ||
1684 | ext4_warning(sb, "No reserved GDT blocks, can't resize"); | ||
1685 | return -EPERM; | ||
1686 | } | ||
1687 | 1899 | ||
1688 | resize_inode = ext4_iget(sb, EXT4_RESIZE_INO); | 1900 | if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_RESIZE_INODE)) { |
1689 | if (IS_ERR(resize_inode)) { | 1901 | if (meta_bg) { |
1690 | ext4_warning(sb, "Error opening resize inode"); | 1902 | ext4_error(sb, "resize_inode and meta_bg enabled " |
1691 | return PTR_ERR(resize_inode); | 1903 | "simultaneously"); |
1904 | return -EINVAL; | ||
1905 | } | ||
1906 | if (n_desc_blocks > o_desc_blocks + | ||
1907 | le16_to_cpu(es->s_reserved_gdt_blocks)) { | ||
1908 | n_blocks_count_retry = n_blocks_count; | ||
1909 | n_desc_blocks = o_desc_blocks + | ||
1910 | le16_to_cpu(es->s_reserved_gdt_blocks); | ||
1911 | n_group = n_desc_blocks * EXT4_DESC_PER_BLOCK(sb); | ||
1912 | n_blocks_count = n_group * EXT4_BLOCKS_PER_GROUP(sb); | ||
1913 | n_group--; /* set to last group number */ | ||
1914 | } | ||
1915 | |||
1916 | if (!resize_inode) | ||
1917 | resize_inode = ext4_iget(sb, EXT4_RESIZE_INO); | ||
1918 | if (IS_ERR(resize_inode)) { | ||
1919 | ext4_warning(sb, "Error opening resize inode"); | ||
1920 | return PTR_ERR(resize_inode); | ||
1921 | } | ||
1692 | } | 1922 | } |
1693 | 1923 | ||
1694 | /* See if the device is actually as big as what was requested */ | 1924 | if ((!resize_inode && !meta_bg) || n_blocks_count == o_blocks_count) { |
1695 | bh = sb_bread(sb, n_blocks_count - 1); | 1925 | err = ext4_convert_meta_bg(sb, resize_inode); |
1696 | if (!bh) { | 1926 | if (err) |
1697 | ext4_warning(sb, "can't read last block, resize aborted"); | 1927 | goto out; |
1698 | return -ENOSPC; | 1928 | if (resize_inode) { |
1929 | iput(resize_inode); | ||
1930 | resize_inode = NULL; | ||
1931 | } | ||
1932 | if (n_blocks_count_retry) { | ||
1933 | n_blocks_count = n_blocks_count_retry; | ||
1934 | n_blocks_count_retry = 0; | ||
1935 | goto retry; | ||
1936 | } | ||
1699 | } | 1937 | } |
1700 | brelse(bh); | ||
1701 | 1938 | ||
1702 | /* extend the last group */ | 1939 | /* extend the last group */ |
1703 | if (n_group == o_group) | 1940 | if (n_group == o_group) |
@@ -1710,12 +1947,15 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) | |||
1710 | goto out; | 1947 | goto out; |
1711 | } | 1948 | } |
1712 | 1949 | ||
1713 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG) && | 1950 | if (ext4_blocks_count(es) == n_blocks_count) |
1714 | es->s_log_groups_per_flex) | 1951 | goto out; |
1715 | flexbg_size = 1 << es->s_log_groups_per_flex; | ||
1716 | 1952 | ||
1717 | o_blocks_count = ext4_blocks_count(es); | 1953 | err = ext4_alloc_flex_bg_array(sb, n_group + 1); |
1718 | if (o_blocks_count == n_blocks_count) | 1954 | if (err) |
1955 | return err; | ||
1956 | |||
1957 | err = ext4_mb_alloc_groupinfo(sb, n_group + 1); | ||
1958 | if (err) | ||
1719 | goto out; | 1959 | goto out; |
1720 | 1960 | ||
1721 | flex_gd = alloc_flex_gd(flexbg_size); | 1961 | flex_gd = alloc_flex_gd(flexbg_size); |
@@ -1729,19 +1969,33 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) | |||
1729 | */ | 1969 | */ |
1730 | while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count, | 1970 | while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count, |
1731 | flexbg_size)) { | 1971 | flexbg_size)) { |
1732 | ext4_alloc_group_tables(sb, flex_gd, flexbg_size); | 1972 | if (jiffies - last_update_time > HZ * 10) { |
1973 | if (last_update_time) | ||
1974 | ext4_msg(sb, KERN_INFO, | ||
1975 | "resized to %llu blocks", | ||
1976 | ext4_blocks_count(es)); | ||
1977 | last_update_time = jiffies; | ||
1978 | } | ||
1979 | if (ext4_alloc_group_tables(sb, flex_gd, flexbg_size) != 0) | ||
1980 | break; | ||
1733 | err = ext4_flex_group_add(sb, resize_inode, flex_gd); | 1981 | err = ext4_flex_group_add(sb, resize_inode, flex_gd); |
1734 | if (unlikely(err)) | 1982 | if (unlikely(err)) |
1735 | break; | 1983 | break; |
1736 | } | 1984 | } |
1737 | 1985 | ||
1986 | if (!err && n_blocks_count_retry) { | ||
1987 | n_blocks_count = n_blocks_count_retry; | ||
1988 | n_blocks_count_retry = 0; | ||
1989 | free_flex_gd(flex_gd); | ||
1990 | flex_gd = NULL; | ||
1991 | goto retry; | ||
1992 | } | ||
1993 | |||
1738 | out: | 1994 | out: |
1739 | if (flex_gd) | 1995 | if (flex_gd) |
1740 | free_flex_gd(flex_gd); | 1996 | free_flex_gd(flex_gd); |
1741 | 1997 | if (resize_inode != NULL) | |
1742 | iput(resize_inode); | 1998 | iput(resize_inode); |
1743 | if (test_opt(sb, DEBUG)) | 1999 | ext4_msg(sb, KERN_INFO, "resized filesystem to %llu", n_blocks_count); |
1744 | ext4_msg(sb, KERN_DEBUG, "resized filesystem from %llu " | ||
1745 | "upto %llu blocks", o_blocks_count, n_blocks_count); | ||
1746 | return err; | 2000 | return err; |
1747 | } | 2001 | } |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 3e0851e4f468..7265a0367476 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -420,7 +420,7 @@ static void __save_error_info(struct super_block *sb, const char *func, | |||
420 | */ | 420 | */ |
421 | if (!es->s_error_count) | 421 | if (!es->s_error_count) |
422 | mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ); | 422 | mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ); |
423 | es->s_error_count = cpu_to_le32(le32_to_cpu(es->s_error_count) + 1); | 423 | le32_add_cpu(&es->s_error_count, 1); |
424 | } | 424 | } |
425 | 425 | ||
426 | static void save_error_info(struct super_block *sb, const char *func, | 426 | static void save_error_info(struct super_block *sb, const char *func, |
@@ -850,7 +850,6 @@ static void ext4_put_super(struct super_block *sb) | |||
850 | flush_workqueue(sbi->dio_unwritten_wq); | 850 | flush_workqueue(sbi->dio_unwritten_wq); |
851 | destroy_workqueue(sbi->dio_unwritten_wq); | 851 | destroy_workqueue(sbi->dio_unwritten_wq); |
852 | 852 | ||
853 | lock_super(sb); | ||
854 | if (sbi->s_journal) { | 853 | if (sbi->s_journal) { |
855 | err = jbd2_journal_destroy(sbi->s_journal); | 854 | err = jbd2_journal_destroy(sbi->s_journal); |
856 | sbi->s_journal = NULL; | 855 | sbi->s_journal = NULL; |
@@ -917,7 +916,6 @@ static void ext4_put_super(struct super_block *sb) | |||
917 | * Now that we are completely done shutting down the | 916 | * Now that we are completely done shutting down the |
918 | * superblock, we need to actually destroy the kobject. | 917 | * superblock, we need to actually destroy the kobject. |
919 | */ | 918 | */ |
920 | unlock_super(sb); | ||
921 | kobject_put(&sbi->s_kobj); | 919 | kobject_put(&sbi->s_kobj); |
922 | wait_for_completion(&sbi->s_kobj_unregister); | 920 | wait_for_completion(&sbi->s_kobj_unregister); |
923 | if (sbi->s_chksum_driver) | 921 | if (sbi->s_chksum_driver) |
@@ -948,6 +946,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) | |||
948 | ei->i_reserved_meta_blocks = 0; | 946 | ei->i_reserved_meta_blocks = 0; |
949 | ei->i_allocated_meta_blocks = 0; | 947 | ei->i_allocated_meta_blocks = 0; |
950 | ei->i_da_metadata_calc_len = 0; | 948 | ei->i_da_metadata_calc_len = 0; |
949 | ei->i_da_metadata_calc_last_lblock = 0; | ||
951 | spin_lock_init(&(ei->i_block_reservation_lock)); | 950 | spin_lock_init(&(ei->i_block_reservation_lock)); |
952 | #ifdef CONFIG_QUOTA | 951 | #ifdef CONFIG_QUOTA |
953 | ei->i_reserved_quota = 0; | 952 | ei->i_reserved_quota = 0; |
@@ -955,11 +954,10 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) | |||
955 | ei->jinode = NULL; | 954 | ei->jinode = NULL; |
956 | INIT_LIST_HEAD(&ei->i_completed_io_list); | 955 | INIT_LIST_HEAD(&ei->i_completed_io_list); |
957 | spin_lock_init(&ei->i_completed_io_lock); | 956 | spin_lock_init(&ei->i_completed_io_lock); |
958 | ei->cur_aio_dio = NULL; | ||
959 | ei->i_sync_tid = 0; | 957 | ei->i_sync_tid = 0; |
960 | ei->i_datasync_tid = 0; | 958 | ei->i_datasync_tid = 0; |
961 | atomic_set(&ei->i_ioend_count, 0); | 959 | atomic_set(&ei->i_ioend_count, 0); |
962 | atomic_set(&ei->i_aiodio_unwritten, 0); | 960 | atomic_set(&ei->i_unwritten, 0); |
963 | 961 | ||
964 | return &ei->vfs_inode; | 962 | return &ei->vfs_inode; |
965 | } | 963 | } |
@@ -1018,6 +1016,11 @@ static int init_inodecache(void) | |||
1018 | 1016 | ||
1019 | static void destroy_inodecache(void) | 1017 | static void destroy_inodecache(void) |
1020 | { | 1018 | { |
1019 | /* | ||
1020 | * Make sure all delayed rcu free inodes are flushed before we | ||
1021 | * destroy cache. | ||
1022 | */ | ||
1023 | rcu_barrier(); | ||
1021 | kmem_cache_destroy(ext4_inode_cachep); | 1024 | kmem_cache_destroy(ext4_inode_cachep); |
1022 | } | 1025 | } |
1023 | 1026 | ||
@@ -1218,6 +1221,7 @@ enum { | |||
1218 | Opt_inode_readahead_blks, Opt_journal_ioprio, | 1221 | Opt_inode_readahead_blks, Opt_journal_ioprio, |
1219 | Opt_dioread_nolock, Opt_dioread_lock, | 1222 | Opt_dioread_nolock, Opt_dioread_lock, |
1220 | Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable, | 1223 | Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable, |
1224 | Opt_max_dir_size_kb, | ||
1221 | }; | 1225 | }; |
1222 | 1226 | ||
1223 | static const match_table_t tokens = { | 1227 | static const match_table_t tokens = { |
@@ -1291,6 +1295,7 @@ static const match_table_t tokens = { | |||
1291 | {Opt_init_itable, "init_itable=%u"}, | 1295 | {Opt_init_itable, "init_itable=%u"}, |
1292 | {Opt_init_itable, "init_itable"}, | 1296 | {Opt_init_itable, "init_itable"}, |
1293 | {Opt_noinit_itable, "noinit_itable"}, | 1297 | {Opt_noinit_itable, "noinit_itable"}, |
1298 | {Opt_max_dir_size_kb, "max_dir_size_kb=%u"}, | ||
1294 | {Opt_removed, "check=none"}, /* mount option from ext2/3 */ | 1299 | {Opt_removed, "check=none"}, /* mount option from ext2/3 */ |
1295 | {Opt_removed, "nocheck"}, /* mount option from ext2/3 */ | 1300 | {Opt_removed, "nocheck"}, /* mount option from ext2/3 */ |
1296 | {Opt_removed, "reservation"}, /* mount option from ext2/3 */ | 1301 | {Opt_removed, "reservation"}, /* mount option from ext2/3 */ |
@@ -1471,6 +1476,7 @@ static const struct mount_opts { | |||
1471 | {Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT}, | 1476 | {Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT}, |
1472 | {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT}, | 1477 | {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT}, |
1473 | {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT}, | 1478 | {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT}, |
1479 | {Opt_max_dir_size_kb, 0, MOPT_GTE0}, | ||
1474 | {Opt_err, 0, 0} | 1480 | {Opt_err, 0, 0} |
1475 | }; | 1481 | }; |
1476 | 1482 | ||
@@ -1586,6 +1592,8 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, | |||
1586 | if (!args->from) | 1592 | if (!args->from) |
1587 | arg = EXT4_DEF_LI_WAIT_MULT; | 1593 | arg = EXT4_DEF_LI_WAIT_MULT; |
1588 | sbi->s_li_wait_mult = arg; | 1594 | sbi->s_li_wait_mult = arg; |
1595 | } else if (token == Opt_max_dir_size_kb) { | ||
1596 | sbi->s_max_dir_size_kb = arg; | ||
1589 | } else if (token == Opt_stripe) { | 1597 | } else if (token == Opt_stripe) { |
1590 | sbi->s_stripe = arg; | 1598 | sbi->s_stripe = arg; |
1591 | } else if (m->flags & MOPT_DATAJ) { | 1599 | } else if (m->flags & MOPT_DATAJ) { |
@@ -1658,7 +1666,7 @@ static int parse_options(char *options, struct super_block *sb, | |||
1658 | * Initialize args struct so we know whether arg was | 1666 | * Initialize args struct so we know whether arg was |
1659 | * found; some options take optional arguments. | 1667 | * found; some options take optional arguments. |
1660 | */ | 1668 | */ |
1661 | args[0].to = args[0].from = 0; | 1669 | args[0].to = args[0].from = NULL; |
1662 | token = match_token(p, tokens, args); | 1670 | token = match_token(p, tokens, args); |
1663 | if (handle_mount_opt(sb, p, token, args, journal_devnum, | 1671 | if (handle_mount_opt(sb, p, token, args, journal_devnum, |
1664 | journal_ioprio, is_remount) < 0) | 1672 | journal_ioprio, is_remount) < 0) |
@@ -1734,7 +1742,7 @@ static inline void ext4_show_quota_options(struct seq_file *seq, | |||
1734 | 1742 | ||
1735 | static const char *token2str(int token) | 1743 | static const char *token2str(int token) |
1736 | { | 1744 | { |
1737 | static const struct match_token *t; | 1745 | const struct match_token *t; |
1738 | 1746 | ||
1739 | for (t = tokens; t->token != Opt_err; t++) | 1747 | for (t = tokens; t->token != Opt_err; t++) |
1740 | if (t->token == token && !strchr(t->pattern, '=')) | 1748 | if (t->token == token && !strchr(t->pattern, '=')) |
@@ -1817,6 +1825,8 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb, | |||
1817 | if (nodefs || (test_opt(sb, INIT_INODE_TABLE) && | 1825 | if (nodefs || (test_opt(sb, INIT_INODE_TABLE) && |
1818 | (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT))) | 1826 | (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT))) |
1819 | SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult); | 1827 | SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult); |
1828 | if (nodefs || sbi->s_max_dir_size_kb) | ||
1829 | SEQ_OPTS_PRINT("max_dir_size_kb=%u", sbi->s_max_dir_size_kb); | ||
1820 | 1830 | ||
1821 | ext4_show_quota_options(seq, sb); | 1831 | ext4_show_quota_options(seq, sb); |
1822 | return 0; | 1832 | return 0; |
@@ -1908,15 +1918,45 @@ done: | |||
1908 | return res; | 1918 | return res; |
1909 | } | 1919 | } |
1910 | 1920 | ||
1921 | int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup) | ||
1922 | { | ||
1923 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
1924 | struct flex_groups *new_groups; | ||
1925 | int size; | ||
1926 | |||
1927 | if (!sbi->s_log_groups_per_flex) | ||
1928 | return 0; | ||
1929 | |||
1930 | size = ext4_flex_group(sbi, ngroup - 1) + 1; | ||
1931 | if (size <= sbi->s_flex_groups_allocated) | ||
1932 | return 0; | ||
1933 | |||
1934 | size = roundup_pow_of_two(size * sizeof(struct flex_groups)); | ||
1935 | new_groups = ext4_kvzalloc(size, GFP_KERNEL); | ||
1936 | if (!new_groups) { | ||
1937 | ext4_msg(sb, KERN_ERR, "not enough memory for %d flex groups", | ||
1938 | size / (int) sizeof(struct flex_groups)); | ||
1939 | return -ENOMEM; | ||
1940 | } | ||
1941 | |||
1942 | if (sbi->s_flex_groups) { | ||
1943 | memcpy(new_groups, sbi->s_flex_groups, | ||
1944 | (sbi->s_flex_groups_allocated * | ||
1945 | sizeof(struct flex_groups))); | ||
1946 | ext4_kvfree(sbi->s_flex_groups); | ||
1947 | } | ||
1948 | sbi->s_flex_groups = new_groups; | ||
1949 | sbi->s_flex_groups_allocated = size / sizeof(struct flex_groups); | ||
1950 | return 0; | ||
1951 | } | ||
1952 | |||
1911 | static int ext4_fill_flex_info(struct super_block *sb) | 1953 | static int ext4_fill_flex_info(struct super_block *sb) |
1912 | { | 1954 | { |
1913 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 1955 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
1914 | struct ext4_group_desc *gdp = NULL; | 1956 | struct ext4_group_desc *gdp = NULL; |
1915 | ext4_group_t flex_group_count; | ||
1916 | ext4_group_t flex_group; | 1957 | ext4_group_t flex_group; |
1917 | unsigned int groups_per_flex = 0; | 1958 | unsigned int groups_per_flex = 0; |
1918 | size_t size; | 1959 | int i, err; |
1919 | int i; | ||
1920 | 1960 | ||
1921 | sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; | 1961 | sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; |
1922 | if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) { | 1962 | if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) { |
@@ -1925,17 +1965,9 @@ static int ext4_fill_flex_info(struct super_block *sb) | |||
1925 | } | 1965 | } |
1926 | groups_per_flex = 1 << sbi->s_log_groups_per_flex; | 1966 | groups_per_flex = 1 << sbi->s_log_groups_per_flex; |
1927 | 1967 | ||
1928 | /* We allocate both existing and potentially added groups */ | 1968 | err = ext4_alloc_flex_bg_array(sb, sbi->s_groups_count); |
1929 | flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + | 1969 | if (err) |
1930 | ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) << | ||
1931 | EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex; | ||
1932 | size = flex_group_count * sizeof(struct flex_groups); | ||
1933 | sbi->s_flex_groups = ext4_kvzalloc(size, GFP_KERNEL); | ||
1934 | if (sbi->s_flex_groups == NULL) { | ||
1935 | ext4_msg(sb, KERN_ERR, "not enough memory for %u flex groups", | ||
1936 | flex_group_count); | ||
1937 | goto failed; | 1970 | goto failed; |
1938 | } | ||
1939 | 1971 | ||
1940 | for (i = 0; i < sbi->s_groups_count; i++) { | 1972 | for (i = 0; i < sbi->s_groups_count; i++) { |
1941 | gdp = ext4_get_group_desc(sb, i, NULL); | 1973 | gdp = ext4_get_group_desc(sb, i, NULL); |
@@ -2138,10 +2170,12 @@ static void ext4_orphan_cleanup(struct super_block *sb, | |||
2138 | } | 2170 | } |
2139 | 2171 | ||
2140 | if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { | 2172 | if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { |
2141 | if (es->s_last_orphan) | 2173 | /* don't clear list on RO mount w/ errors */ |
2174 | if (es->s_last_orphan && !(s_flags & MS_RDONLY)) { | ||
2142 | jbd_debug(1, "Errors on filesystem, " | 2175 | jbd_debug(1, "Errors on filesystem, " |
2143 | "clearing orphan list.\n"); | 2176 | "clearing orphan list.\n"); |
2144 | es->s_last_orphan = 0; | 2177 | es->s_last_orphan = 0; |
2178 | } | ||
2145 | jbd_debug(1, "Skipping orphan recovery on fs with errors.\n"); | 2179 | jbd_debug(1, "Skipping orphan recovery on fs with errors.\n"); |
2146 | return; | 2180 | return; |
2147 | } | 2181 | } |
@@ -2522,6 +2556,7 @@ EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs); | |||
2522 | EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); | 2556 | EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); |
2523 | EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); | 2557 | EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); |
2524 | EXT4_RW_ATTR_SBI_UI(max_writeback_mb_bump, s_max_writeback_mb_bump); | 2558 | EXT4_RW_ATTR_SBI_UI(max_writeback_mb_bump, s_max_writeback_mb_bump); |
2559 | EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb); | ||
2525 | EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error); | 2560 | EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error); |
2526 | 2561 | ||
2527 | static struct attribute *ext4_attrs[] = { | 2562 | static struct attribute *ext4_attrs[] = { |
@@ -2537,6 +2572,7 @@ static struct attribute *ext4_attrs[] = { | |||
2537 | ATTR_LIST(mb_stream_req), | 2572 | ATTR_LIST(mb_stream_req), |
2538 | ATTR_LIST(mb_group_prealloc), | 2573 | ATTR_LIST(mb_group_prealloc), |
2539 | ATTR_LIST(max_writeback_mb_bump), | 2574 | ATTR_LIST(max_writeback_mb_bump), |
2575 | ATTR_LIST(extent_max_zeroout_kb), | ||
2540 | ATTR_LIST(trigger_fs_error), | 2576 | ATTR_LIST(trigger_fs_error), |
2541 | NULL, | 2577 | NULL, |
2542 | }; | 2578 | }; |
@@ -2544,10 +2580,12 @@ static struct attribute *ext4_attrs[] = { | |||
2544 | /* Features this copy of ext4 supports */ | 2580 | /* Features this copy of ext4 supports */ |
2545 | EXT4_INFO_ATTR(lazy_itable_init); | 2581 | EXT4_INFO_ATTR(lazy_itable_init); |
2546 | EXT4_INFO_ATTR(batched_discard); | 2582 | EXT4_INFO_ATTR(batched_discard); |
2583 | EXT4_INFO_ATTR(meta_bg_resize); | ||
2547 | 2584 | ||
2548 | static struct attribute *ext4_feat_attrs[] = { | 2585 | static struct attribute *ext4_feat_attrs[] = { |
2549 | ATTR_LIST(lazy_itable_init), | 2586 | ATTR_LIST(lazy_itable_init), |
2550 | ATTR_LIST(batched_discard), | 2587 | ATTR_LIST(batched_discard), |
2588 | ATTR_LIST(meta_bg_resize), | ||
2551 | NULL, | 2589 | NULL, |
2552 | }; | 2590 | }; |
2553 | 2591 | ||
@@ -3108,6 +3146,10 @@ static int count_overhead(struct super_block *sb, ext4_group_t grp, | |||
3108 | ext4_group_t i, ngroups = ext4_get_groups_count(sb); | 3146 | ext4_group_t i, ngroups = ext4_get_groups_count(sb); |
3109 | int s, j, count = 0; | 3147 | int s, j, count = 0; |
3110 | 3148 | ||
3149 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC)) | ||
3150 | return (ext4_bg_has_super(sb, grp) + ext4_bg_num_gdb(sb, grp) + | ||
3151 | sbi->s_itb_per_group + 2); | ||
3152 | |||
3111 | first_block = le32_to_cpu(sbi->s_es->s_first_data_block) + | 3153 | first_block = le32_to_cpu(sbi->s_es->s_first_data_block) + |
3112 | (grp * EXT4_BLOCKS_PER_GROUP(sb)); | 3154 | (grp * EXT4_BLOCKS_PER_GROUP(sb)); |
3113 | last_block = first_block + EXT4_BLOCKS_PER_GROUP(sb) - 1; | 3155 | last_block = first_block + EXT4_BLOCKS_PER_GROUP(sb) - 1; |
@@ -3364,7 +3406,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3364 | * enable delayed allocation by default | 3406 | * enable delayed allocation by default |
3365 | * Use -o nodelalloc to turn it off | 3407 | * Use -o nodelalloc to turn it off |
3366 | */ | 3408 | */ |
3367 | if (!IS_EXT3_SB(sb) && | 3409 | if (!IS_EXT3_SB(sb) && !IS_EXT2_SB(sb) && |
3368 | ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0)) | 3410 | ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0)) |
3369 | set_opt(sb, DELALLOC); | 3411 | set_opt(sb, DELALLOC); |
3370 | 3412 | ||
@@ -3733,6 +3775,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3733 | 3775 | ||
3734 | sbi->s_stripe = ext4_get_stripe_size(sbi); | 3776 | sbi->s_stripe = ext4_get_stripe_size(sbi); |
3735 | sbi->s_max_writeback_mb_bump = 128; | 3777 | sbi->s_max_writeback_mb_bump = 128; |
3778 | sbi->s_extent_max_zeroout_kb = 32; | ||
3736 | 3779 | ||
3737 | /* | 3780 | /* |
3738 | * set up enough so that it can read an inode | 3781 | * set up enough so that it can read an inode |
@@ -4419,6 +4462,7 @@ static void ext4_clear_journal_err(struct super_block *sb, | |||
4419 | ext4_commit_super(sb, 1); | 4462 | ext4_commit_super(sb, 1); |
4420 | 4463 | ||
4421 | jbd2_journal_clear_err(journal); | 4464 | jbd2_journal_clear_err(journal); |
4465 | jbd2_journal_update_sb_errno(journal); | ||
4422 | } | 4466 | } |
4423 | } | 4467 | } |
4424 | 4468 | ||
@@ -4508,11 +4552,9 @@ static int ext4_unfreeze(struct super_block *sb) | |||
4508 | if (sb->s_flags & MS_RDONLY) | 4552 | if (sb->s_flags & MS_RDONLY) |
4509 | return 0; | 4553 | return 0; |
4510 | 4554 | ||
4511 | lock_super(sb); | ||
4512 | /* Reset the needs_recovery flag before the fs is unlocked. */ | 4555 | /* Reset the needs_recovery flag before the fs is unlocked. */ |
4513 | EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); | 4556 | EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); |
4514 | ext4_commit_super(sb, 1); | 4557 | ext4_commit_super(sb, 1); |
4515 | unlock_super(sb); | ||
4516 | return 0; | 4558 | return 0; |
4517 | } | 4559 | } |
4518 | 4560 | ||
@@ -4548,7 +4590,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
4548 | char *orig_data = kstrdup(data, GFP_KERNEL); | 4590 | char *orig_data = kstrdup(data, GFP_KERNEL); |
4549 | 4591 | ||
4550 | /* Store the original options */ | 4592 | /* Store the original options */ |
4551 | lock_super(sb); | ||
4552 | old_sb_flags = sb->s_flags; | 4593 | old_sb_flags = sb->s_flags; |
4553 | old_opts.s_mount_opt = sbi->s_mount_opt; | 4594 | old_opts.s_mount_opt = sbi->s_mount_opt; |
4554 | old_opts.s_mount_opt2 = sbi->s_mount_opt2; | 4595 | old_opts.s_mount_opt2 = sbi->s_mount_opt2; |
@@ -4690,7 +4731,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
4690 | if (sbi->s_journal == NULL) | 4731 | if (sbi->s_journal == NULL) |
4691 | ext4_commit_super(sb, 1); | 4732 | ext4_commit_super(sb, 1); |
4692 | 4733 | ||
4693 | unlock_super(sb); | ||
4694 | #ifdef CONFIG_QUOTA | 4734 | #ifdef CONFIG_QUOTA |
4695 | /* Release old quota file names */ | 4735 | /* Release old quota file names */ |
4696 | for (i = 0; i < MAXQUOTAS; i++) | 4736 | for (i = 0; i < MAXQUOTAS; i++) |
@@ -4703,10 +4743,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
4703 | else if (EXT4_HAS_RO_COMPAT_FEATURE(sb, | 4743 | else if (EXT4_HAS_RO_COMPAT_FEATURE(sb, |
4704 | EXT4_FEATURE_RO_COMPAT_QUOTA)) { | 4744 | EXT4_FEATURE_RO_COMPAT_QUOTA)) { |
4705 | err = ext4_enable_quotas(sb); | 4745 | err = ext4_enable_quotas(sb); |
4706 | if (err) { | 4746 | if (err) |
4707 | lock_super(sb); | ||
4708 | goto restore_opts; | 4747 | goto restore_opts; |
4709 | } | ||
4710 | } | 4748 | } |
4711 | } | 4749 | } |
4712 | #endif | 4750 | #endif |
@@ -4733,7 +4771,6 @@ restore_opts: | |||
4733 | sbi->s_qf_names[i] = old_opts.s_qf_names[i]; | 4771 | sbi->s_qf_names[i] = old_opts.s_qf_names[i]; |
4734 | } | 4772 | } |
4735 | #endif | 4773 | #endif |
4736 | unlock_super(sb); | ||
4737 | kfree(orig_data); | 4774 | kfree(orig_data); |
4738 | return err; | 4775 | return err; |
4739 | } | 4776 | } |
@@ -4785,7 +4822,7 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
4785 | 4822 | ||
4786 | static inline struct inode *dquot_to_inode(struct dquot *dquot) | 4823 | static inline struct inode *dquot_to_inode(struct dquot *dquot) |
4787 | { | 4824 | { |
4788 | return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type]; | 4825 | return sb_dqopt(dquot->dq_sb)->files[dquot->dq_id.type]; |
4789 | } | 4826 | } |
4790 | 4827 | ||
4791 | static int ext4_write_dquot(struct dquot *dquot) | 4828 | static int ext4_write_dquot(struct dquot *dquot) |
@@ -5258,8 +5295,10 @@ static int __init ext4_init_fs(void) | |||
5258 | if (err) | 5295 | if (err) |
5259 | goto out6; | 5296 | goto out6; |
5260 | ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); | 5297 | ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); |
5261 | if (!ext4_kset) | 5298 | if (!ext4_kset) { |
5299 | err = -ENOMEM; | ||
5262 | goto out5; | 5300 | goto out5; |
5301 | } | ||
5263 | ext4_proc_root = proc_mkdir("fs/ext4", NULL); | 5302 | ext4_proc_root = proc_mkdir("fs/ext4", NULL); |
5264 | 5303 | ||
5265 | err = ext4_init_feat_adverts(); | 5304 | err = ext4_init_feat_adverts(); |
diff --git a/fs/fat/Makefile b/fs/fat/Makefile index e06190322c1c..964b634f6667 100644 --- a/fs/fat/Makefile +++ b/fs/fat/Makefile | |||
@@ -6,6 +6,6 @@ obj-$(CONFIG_FAT_FS) += fat.o | |||
6 | obj-$(CONFIG_VFAT_FS) += vfat.o | 6 | obj-$(CONFIG_VFAT_FS) += vfat.o |
7 | obj-$(CONFIG_MSDOS_FS) += msdos.o | 7 | obj-$(CONFIG_MSDOS_FS) += msdos.o |
8 | 8 | ||
9 | fat-y := cache.o dir.o fatent.o file.o inode.o misc.o | 9 | fat-y := cache.o dir.o fatent.o file.o inode.o misc.o nfs.o |
10 | vfat-y := namei_vfat.o | 10 | vfat-y := namei_vfat.o |
11 | msdos-y := namei_msdos.o | 11 | msdos-y := namei_msdos.o |
diff --git a/fs/fat/cache.c b/fs/fat/cache.c index 1cc7038e273d..91ad9e1c9441 100644 --- a/fs/fat/cache.c +++ b/fs/fat/cache.c | |||
@@ -190,7 +190,8 @@ static void __fat_cache_inval_inode(struct inode *inode) | |||
190 | struct fat_cache *cache; | 190 | struct fat_cache *cache; |
191 | 191 | ||
192 | while (!list_empty(&i->cache_lru)) { | 192 | while (!list_empty(&i->cache_lru)) { |
193 | cache = list_entry(i->cache_lru.next, struct fat_cache, cache_list); | 193 | cache = list_entry(i->cache_lru.next, |
194 | struct fat_cache, cache_list); | ||
194 | list_del_init(&cache->cache_list); | 195 | list_del_init(&cache->cache_list); |
195 | i->nr_caches--; | 196 | i->nr_caches--; |
196 | fat_cache_free(cache); | 197 | fat_cache_free(cache); |
@@ -261,9 +262,10 @@ int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus) | |||
261 | if (nr < 0) | 262 | if (nr < 0) |
262 | goto out; | 263 | goto out; |
263 | else if (nr == FAT_ENT_FREE) { | 264 | else if (nr == FAT_ENT_FREE) { |
264 | fat_fs_error_ratelimit(sb, "%s: invalid cluster chain" | 265 | fat_fs_error_ratelimit(sb, |
265 | " (i_pos %lld)", __func__, | 266 | "%s: invalid cluster chain (i_pos %lld)", |
266 | MSDOS_I(inode)->i_pos); | 267 | __func__, |
268 | MSDOS_I(inode)->i_pos); | ||
267 | nr = -EIO; | 269 | nr = -EIO; |
268 | goto out; | 270 | goto out; |
269 | } else if (nr == FAT_ENT_EOF) { | 271 | } else if (nr == FAT_ENT_EOF) { |
diff --git a/fs/fat/dir.c b/fs/fat/dir.c index dc49ed2cbffa..bca6d0a1255e 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c | |||
@@ -18,7 +18,7 @@ | |||
18 | #include <linux/time.h> | 18 | #include <linux/time.h> |
19 | #include <linux/buffer_head.h> | 19 | #include <linux/buffer_head.h> |
20 | #include <linux/compat.h> | 20 | #include <linux/compat.h> |
21 | #include <asm/uaccess.h> | 21 | #include <linux/uaccess.h> |
22 | #include <linux/kernel.h> | 22 | #include <linux/kernel.h> |
23 | #include "fat.h" | 23 | #include "fat.h" |
24 | 24 | ||
@@ -123,7 +123,8 @@ static inline int fat_get_entry(struct inode *dir, loff_t *pos, | |||
123 | { | 123 | { |
124 | /* Fast stuff first */ | 124 | /* Fast stuff first */ |
125 | if (*bh && *de && | 125 | if (*bh && *de && |
126 | (*de - (struct msdos_dir_entry *)(*bh)->b_data) < MSDOS_SB(dir->i_sb)->dir_per_block - 1) { | 126 | (*de - (struct msdos_dir_entry *)(*bh)->b_data) < |
127 | MSDOS_SB(dir->i_sb)->dir_per_block - 1) { | ||
127 | *pos += sizeof(struct msdos_dir_entry); | 128 | *pos += sizeof(struct msdos_dir_entry); |
128 | (*de)++; | 129 | (*de)++; |
129 | return 0; | 130 | return 0; |
@@ -155,7 +156,8 @@ static int uni16_to_x8(struct super_block *sb, unsigned char *ascii, | |||
155 | 156 | ||
156 | while (*ip && ((len - NLS_MAX_CHARSET_SIZE) > 0)) { | 157 | while (*ip && ((len - NLS_MAX_CHARSET_SIZE) > 0)) { |
157 | ec = *ip++; | 158 | ec = *ip++; |
158 | if ((charlen = nls->uni2char(ec, op, NLS_MAX_CHARSET_SIZE)) > 0) { | 159 | charlen = nls->uni2char(ec, op, NLS_MAX_CHARSET_SIZE); |
160 | if (charlen > 0) { | ||
159 | op += charlen; | 161 | op += charlen; |
160 | len -= charlen; | 162 | len -= charlen; |
161 | } else { | 163 | } else { |
@@ -172,12 +174,12 @@ static int uni16_to_x8(struct super_block *sb, unsigned char *ascii, | |||
172 | } | 174 | } |
173 | 175 | ||
174 | if (unlikely(*ip)) { | 176 | if (unlikely(*ip)) { |
175 | fat_msg(sb, KERN_WARNING, "filename was truncated while " | 177 | fat_msg(sb, KERN_WARNING, |
176 | "converting."); | 178 | "filename was truncated while converting."); |
177 | } | 179 | } |
178 | 180 | ||
179 | *op = 0; | 181 | *op = 0; |
180 | return (op - ascii); | 182 | return op - ascii; |
181 | } | 183 | } |
182 | 184 | ||
183 | static inline int fat_uni_to_x8(struct super_block *sb, const wchar_t *uni, | 185 | static inline int fat_uni_to_x8(struct super_block *sb, const wchar_t *uni, |
@@ -205,7 +207,8 @@ fat_short2uni(struct nls_table *t, unsigned char *c, int clen, wchar_t *uni) | |||
205 | } | 207 | } |
206 | 208 | ||
207 | static inline int | 209 | static inline int |
208 | fat_short2lower_uni(struct nls_table *t, unsigned char *c, int clen, wchar_t *uni) | 210 | fat_short2lower_uni(struct nls_table *t, unsigned char *c, |
211 | int clen, wchar_t *uni) | ||
209 | { | 212 | { |
210 | int charlen; | 213 | int charlen; |
211 | wchar_t wc; | 214 | wchar_t wc; |
@@ -220,7 +223,8 @@ fat_short2lower_uni(struct nls_table *t, unsigned char *c, int clen, wchar_t *un | |||
220 | if (!nc) | 223 | if (!nc) |
221 | nc = *c; | 224 | nc = *c; |
222 | 225 | ||
223 | if ( (charlen = t->char2uni(&nc, 1, uni)) < 0) { | 226 | charlen = t->char2uni(&nc, 1, uni); |
227 | if (charlen < 0) { | ||
224 | *uni = 0x003f; /* a question mark */ | 228 | *uni = 0x003f; /* a question mark */ |
225 | charlen = 1; | 229 | charlen = 1; |
226 | } | 230 | } |
@@ -537,7 +541,6 @@ end_of_dir: | |||
537 | 541 | ||
538 | return err; | 542 | return err; |
539 | } | 543 | } |
540 | |||
541 | EXPORT_SYMBOL_GPL(fat_search_long); | 544 | EXPORT_SYMBOL_GPL(fat_search_long); |
542 | 545 | ||
543 | struct fat_ioctl_filldir_callback { | 546 | struct fat_ioctl_filldir_callback { |
@@ -574,7 +577,8 @@ static int __fat_readdir(struct inode *inode, struct file *filp, void *dirent, | |||
574 | /* Fake . and .. for the root directory. */ | 577 | /* Fake . and .. for the root directory. */ |
575 | if (inode->i_ino == MSDOS_ROOT_INO) { | 578 | if (inode->i_ino == MSDOS_ROOT_INO) { |
576 | while (cpos < 2) { | 579 | while (cpos < 2) { |
577 | if (filldir(dirent, "..", cpos+1, cpos, MSDOS_ROOT_INO, DT_DIR) < 0) | 580 | if (filldir(dirent, "..", cpos+1, cpos, |
581 | MSDOS_ROOT_INO, DT_DIR) < 0) | ||
578 | goto out; | 582 | goto out; |
579 | cpos++; | 583 | cpos++; |
580 | filp->f_pos++; | 584 | filp->f_pos++; |
@@ -872,25 +876,26 @@ static int fat_get_short_entry(struct inode *dir, loff_t *pos, | |||
872 | } | 876 | } |
873 | 877 | ||
874 | /* | 878 | /* |
875 | * The ".." entry can not provide the "struct fat_slot_info" informations | 879 | * The ".." entry can not provide the "struct fat_slot_info" information |
876 | * for inode. So, this function provide the some informations only. | 880 | * for inode, nor a usable i_pos. So, this function provides some information |
881 | * only. | ||
882 | * | ||
883 | * Since this function walks through the on-disk inodes within a directory, | ||
884 | * callers are responsible for taking any locks necessary to prevent the | ||
885 | * directory from changing. | ||
877 | */ | 886 | */ |
878 | int fat_get_dotdot_entry(struct inode *dir, struct buffer_head **bh, | 887 | int fat_get_dotdot_entry(struct inode *dir, struct buffer_head **bh, |
879 | struct msdos_dir_entry **de, loff_t *i_pos) | 888 | struct msdos_dir_entry **de) |
880 | { | 889 | { |
881 | loff_t offset; | 890 | loff_t offset = 0; |
882 | 891 | ||
883 | offset = 0; | 892 | *de = NULL; |
884 | *bh = NULL; | ||
885 | while (fat_get_short_entry(dir, &offset, bh, de) >= 0) { | 893 | while (fat_get_short_entry(dir, &offset, bh, de) >= 0) { |
886 | if (!strncmp((*de)->name, MSDOS_DOTDOT, MSDOS_NAME)) { | 894 | if (!strncmp((*de)->name, MSDOS_DOTDOT, MSDOS_NAME)) |
887 | *i_pos = fat_make_i_pos(dir->i_sb, *bh, *de); | ||
888 | return 0; | 895 | return 0; |
889 | } | ||
890 | } | 896 | } |
891 | return -ENOENT; | 897 | return -ENOENT; |
892 | } | 898 | } |
893 | |||
894 | EXPORT_SYMBOL_GPL(fat_get_dotdot_entry); | 899 | EXPORT_SYMBOL_GPL(fat_get_dotdot_entry); |
895 | 900 | ||
896 | /* See if directory is empty */ | 901 | /* See if directory is empty */ |
@@ -913,7 +918,6 @@ int fat_dir_empty(struct inode *dir) | |||
913 | brelse(bh); | 918 | brelse(bh); |
914 | return result; | 919 | return result; |
915 | } | 920 | } |
916 | |||
917 | EXPORT_SYMBOL_GPL(fat_dir_empty); | 921 | EXPORT_SYMBOL_GPL(fat_dir_empty); |
918 | 922 | ||
919 | /* | 923 | /* |
@@ -959,7 +963,6 @@ int fat_scan(struct inode *dir, const unsigned char *name, | |||
959 | } | 963 | } |
960 | return -ENOENT; | 964 | return -ENOENT; |
961 | } | 965 | } |
962 | |||
963 | EXPORT_SYMBOL_GPL(fat_scan); | 966 | EXPORT_SYMBOL_GPL(fat_scan); |
964 | 967 | ||
965 | static int __fat_remove_entries(struct inode *dir, loff_t pos, int nr_slots) | 968 | static int __fat_remove_entries(struct inode *dir, loff_t pos, int nr_slots) |
@@ -1047,7 +1050,6 @@ int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo) | |||
1047 | 1050 | ||
1048 | return 0; | 1051 | return 0; |
1049 | } | 1052 | } |
1050 | |||
1051 | EXPORT_SYMBOL_GPL(fat_remove_entries); | 1053 | EXPORT_SYMBOL_GPL(fat_remove_entries); |
1052 | 1054 | ||
1053 | static int fat_zeroed_cluster(struct inode *dir, sector_t blknr, int nr_used, | 1055 | static int fat_zeroed_cluster(struct inode *dir, sector_t blknr, int nr_used, |
@@ -1141,10 +1143,8 @@ int fat_alloc_new_dir(struct inode *dir, struct timespec *ts) | |||
1141 | de[0].ctime_cs = de[1].ctime_cs = 0; | 1143 | de[0].ctime_cs = de[1].ctime_cs = 0; |
1142 | de[0].adate = de[0].cdate = de[1].adate = de[1].cdate = 0; | 1144 | de[0].adate = de[0].cdate = de[1].adate = de[1].cdate = 0; |
1143 | } | 1145 | } |
1144 | de[0].start = cpu_to_le16(cluster); | 1146 | fat_set_start(&de[0], cluster); |
1145 | de[0].starthi = cpu_to_le16(cluster >> 16); | 1147 | fat_set_start(&de[1], MSDOS_I(dir)->i_logstart); |
1146 | de[1].start = cpu_to_le16(MSDOS_I(dir)->i_logstart); | ||
1147 | de[1].starthi = cpu_to_le16(MSDOS_I(dir)->i_logstart >> 16); | ||
1148 | de[0].size = de[1].size = 0; | 1148 | de[0].size = de[1].size = 0; |
1149 | memset(de + 2, 0, sb->s_blocksize - 2 * sizeof(*de)); | 1149 | memset(de + 2, 0, sb->s_blocksize - 2 * sizeof(*de)); |
1150 | set_buffer_uptodate(bhs[0]); | 1150 | set_buffer_uptodate(bhs[0]); |
@@ -1161,7 +1161,6 @@ error_free: | |||
1161 | error: | 1161 | error: |
1162 | return err; | 1162 | return err; |
1163 | } | 1163 | } |
1164 | |||
1165 | EXPORT_SYMBOL_GPL(fat_alloc_new_dir); | 1164 | EXPORT_SYMBOL_GPL(fat_alloc_new_dir); |
1166 | 1165 | ||
1167 | static int fat_add_new_entries(struct inode *dir, void *slots, int nr_slots, | 1166 | static int fat_add_new_entries(struct inode *dir, void *slots, int nr_slots, |
@@ -1377,5 +1376,4 @@ error_remove: | |||
1377 | __fat_remove_entries(dir, pos, free_slots); | 1376 | __fat_remove_entries(dir, pos, free_slots); |
1378 | return err; | 1377 | return err; |
1379 | } | 1378 | } |
1380 | |||
1381 | EXPORT_SYMBOL_GPL(fat_add_entries); | 1379 | EXPORT_SYMBOL_GPL(fat_add_entries); |
diff --git a/fs/fat/fat.h b/fs/fat/fat.h index 2deeeb86f331..ca7e8f8bad7c 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h | |||
@@ -5,6 +5,7 @@ | |||
5 | #include <linux/string.h> | 5 | #include <linux/string.h> |
6 | #include <linux/nls.h> | 6 | #include <linux/nls.h> |
7 | #include <linux/fs.h> | 7 | #include <linux/fs.h> |
8 | #include <linux/hash.h> | ||
8 | #include <linux/mutex.h> | 9 | #include <linux/mutex.h> |
9 | #include <linux/ratelimit.h> | 10 | #include <linux/ratelimit.h> |
10 | #include <linux/msdos_fs.h> | 11 | #include <linux/msdos_fs.h> |
@@ -23,30 +24,31 @@ | |||
23 | #define FAT_ERRORS_RO 3 /* remount r/o on error */ | 24 | #define FAT_ERRORS_RO 3 /* remount r/o on error */ |
24 | 25 | ||
25 | struct fat_mount_options { | 26 | struct fat_mount_options { |
26 | uid_t fs_uid; | 27 | kuid_t fs_uid; |
27 | gid_t fs_gid; | 28 | kgid_t fs_gid; |
28 | unsigned short fs_fmask; | 29 | unsigned short fs_fmask; |
29 | unsigned short fs_dmask; | 30 | unsigned short fs_dmask; |
30 | unsigned short codepage; /* Codepage for shortname conversions */ | 31 | unsigned short codepage; /* Codepage for shortname conversions */ |
31 | char *iocharset; /* Charset used for filename input/display */ | 32 | char *iocharset; /* Charset used for filename input/display */ |
32 | unsigned short shortname; /* flags for shortname display/create rule */ | 33 | unsigned short shortname; /* flags for shortname display/create rule */ |
33 | unsigned char name_check; /* r = relaxed, n = normal, s = strict */ | 34 | unsigned char name_check; /* r = relaxed, n = normal, s = strict */ |
34 | unsigned char errors; /* On error: continue, panic, remount-ro */ | 35 | unsigned char errors; /* On error: continue, panic, remount-ro */ |
35 | unsigned short allow_utime;/* permission for setting the [am]time */ | 36 | unsigned short allow_utime;/* permission for setting the [am]time */ |
36 | unsigned quiet:1, /* set = fake successful chmods and chowns */ | 37 | unsigned quiet:1, /* set = fake successful chmods and chowns */ |
37 | showexec:1, /* set = only set x bit for com/exe/bat */ | 38 | showexec:1, /* set = only set x bit for com/exe/bat */ |
38 | sys_immutable:1, /* set = system files are immutable */ | 39 | sys_immutable:1, /* set = system files are immutable */ |
39 | dotsOK:1, /* set = hidden and system files are named '.filename' */ | 40 | dotsOK:1, /* set = hidden and system files are named '.filename' */ |
40 | isvfat:1, /* 0=no vfat long filename support, 1=vfat support */ | 41 | isvfat:1, /* 0=no vfat long filename support, 1=vfat support */ |
41 | utf8:1, /* Use of UTF-8 character set (Default) */ | 42 | utf8:1, /* Use of UTF-8 character set (Default) */ |
42 | unicode_xlate:1, /* create escape sequences for unhandled Unicode */ | 43 | unicode_xlate:1, /* create escape sequences for unhandled Unicode */ |
43 | numtail:1, /* Does first alias have a numeric '~1' type tail? */ | 44 | numtail:1, /* Does first alias have a numeric '~1' type tail? */ |
44 | flush:1, /* write things quickly */ | 45 | flush:1, /* write things quickly */ |
45 | nocase:1, /* Does this need case conversion? 0=need case conversion*/ | 46 | nocase:1, /* Does this need case conversion? 0=need case conversion*/ |
46 | usefree:1, /* Use free_clusters for FAT32 */ | 47 | usefree:1, /* Use free_clusters for FAT32 */ |
47 | tz_utc:1, /* Filesystem timestamps are in UTC */ | 48 | tz_utc:1, /* Filesystem timestamps are in UTC */ |
48 | rodir:1, /* allow ATTR_RO for directory */ | 49 | rodir:1, /* allow ATTR_RO for directory */ |
49 | discard:1; /* Issue discard requests on deletions */ | 50 | discard:1, /* Issue discard requests on deletions */ |
51 | nfs:1; /* Do extra work needed for NFS export */ | ||
50 | }; | 52 | }; |
51 | 53 | ||
52 | #define FAT_HASH_BITS 8 | 54 | #define FAT_HASH_BITS 8 |
@@ -56,28 +58,28 @@ struct fat_mount_options { | |||
56 | * MS-DOS file system in-core superblock data | 58 | * MS-DOS file system in-core superblock data |
57 | */ | 59 | */ |
58 | struct msdos_sb_info { | 60 | struct msdos_sb_info { |
59 | unsigned short sec_per_clus; /* sectors/cluster */ | 61 | unsigned short sec_per_clus; /* sectors/cluster */ |
60 | unsigned short cluster_bits; /* log2(cluster_size) */ | 62 | unsigned short cluster_bits; /* log2(cluster_size) */ |
61 | unsigned int cluster_size; /* cluster size */ | 63 | unsigned int cluster_size; /* cluster size */ |
62 | unsigned char fats,fat_bits; /* number of FATs, FAT bits (12 or 16) */ | 64 | unsigned char fats, fat_bits; /* number of FATs, FAT bits (12 or 16) */ |
63 | unsigned short fat_start; | 65 | unsigned short fat_start; |
64 | unsigned long fat_length; /* FAT start & length (sec.) */ | 66 | unsigned long fat_length; /* FAT start & length (sec.) */ |
65 | unsigned long dir_start; | 67 | unsigned long dir_start; |
66 | unsigned short dir_entries; /* root dir start & entries */ | 68 | unsigned short dir_entries; /* root dir start & entries */ |
67 | unsigned long data_start; /* first data sector */ | 69 | unsigned long data_start; /* first data sector */ |
68 | unsigned long max_cluster; /* maximum cluster number */ | 70 | unsigned long max_cluster; /* maximum cluster number */ |
69 | unsigned long root_cluster; /* first cluster of the root directory */ | 71 | unsigned long root_cluster; /* first cluster of the root directory */ |
70 | unsigned long fsinfo_sector; /* sector number of FAT32 fsinfo */ | 72 | unsigned long fsinfo_sector; /* sector number of FAT32 fsinfo */ |
71 | struct mutex fat_lock; | 73 | struct mutex fat_lock; |
72 | unsigned int prev_free; /* previously allocated cluster number */ | 74 | unsigned int prev_free; /* previously allocated cluster number */ |
73 | unsigned int free_clusters; /* -1 if undefined */ | 75 | unsigned int free_clusters; /* -1 if undefined */ |
74 | unsigned int free_clus_valid; /* is free_clusters valid? */ | 76 | unsigned int free_clus_valid; /* is free_clusters valid? */ |
75 | struct fat_mount_options options; | 77 | struct fat_mount_options options; |
76 | struct nls_table *nls_disk; /* Codepage used on disk */ | 78 | struct nls_table *nls_disk; /* Codepage used on disk */ |
77 | struct nls_table *nls_io; /* Charset used for input and display */ | 79 | struct nls_table *nls_io; /* Charset used for input and display */ |
78 | const void *dir_ops; /* Opaque; default directory operations */ | 80 | const void *dir_ops; /* Opaque; default directory operations */ |
79 | int dir_per_block; /* dir entries per block */ | 81 | int dir_per_block; /* dir entries per block */ |
80 | int dir_per_block_bits; /* log2(dir_per_block) */ | 82 | int dir_per_block_bits; /* log2(dir_per_block) */ |
81 | 83 | ||
82 | int fatent_shift; | 84 | int fatent_shift; |
83 | struct fatent_operations *fatent_ops; | 85 | struct fatent_operations *fatent_ops; |
@@ -88,6 +90,9 @@ struct msdos_sb_info { | |||
88 | 90 | ||
89 | spinlock_t inode_hash_lock; | 91 | spinlock_t inode_hash_lock; |
90 | struct hlist_head inode_hashtable[FAT_HASH_SIZE]; | 92 | struct hlist_head inode_hashtable[FAT_HASH_SIZE]; |
93 | |||
94 | spinlock_t dir_hash_lock; | ||
95 | struct hlist_head dir_hashtable[FAT_HASH_SIZE]; | ||
91 | }; | 96 | }; |
92 | 97 | ||
93 | #define FAT_CACHE_VALID 0 /* special case for valid cache */ | 98 | #define FAT_CACHE_VALID 0 /* special case for valid cache */ |
@@ -110,6 +115,7 @@ struct msdos_inode_info { | |||
110 | int i_attrs; /* unused attribute bits */ | 115 | int i_attrs; /* unused attribute bits */ |
111 | loff_t i_pos; /* on-disk position of directory entry or 0 */ | 116 | loff_t i_pos; /* on-disk position of directory entry or 0 */ |
112 | struct hlist_node i_fat_hash; /* hash by i_location */ | 117 | struct hlist_node i_fat_hash; /* hash by i_location */ |
118 | struct hlist_node i_dir_hash; /* hash by i_logstart */ | ||
113 | struct rw_semaphore truncate_lock; /* protect bmap against truncate */ | 119 | struct rw_semaphore truncate_lock; /* protect bmap against truncate */ |
114 | struct inode vfs_inode; | 120 | struct inode vfs_inode; |
115 | }; | 121 | }; |
@@ -262,7 +268,7 @@ extern int fat_subdirs(struct inode *dir); | |||
262 | extern int fat_scan(struct inode *dir, const unsigned char *name, | 268 | extern int fat_scan(struct inode *dir, const unsigned char *name, |
263 | struct fat_slot_info *sinfo); | 269 | struct fat_slot_info *sinfo); |
264 | extern int fat_get_dotdot_entry(struct inode *dir, struct buffer_head **bh, | 270 | extern int fat_get_dotdot_entry(struct inode *dir, struct buffer_head **bh, |
265 | struct msdos_dir_entry **de, loff_t *i_pos); | 271 | struct msdos_dir_entry **de); |
266 | extern int fat_alloc_new_dir(struct inode *dir, struct timespec *ts); | 272 | extern int fat_alloc_new_dir(struct inode *dir, struct timespec *ts); |
267 | extern int fat_add_entries(struct inode *dir, void *slots, int nr_slots, | 273 | extern int fat_add_entries(struct inode *dir, void *slots, int nr_slots, |
268 | struct fat_slot_info *sinfo); | 274 | struct fat_slot_info *sinfo); |
@@ -322,7 +328,7 @@ extern long fat_generic_ioctl(struct file *filp, unsigned int cmd, | |||
322 | unsigned long arg); | 328 | unsigned long arg); |
323 | extern const struct file_operations fat_file_operations; | 329 | extern const struct file_operations fat_file_operations; |
324 | extern const struct inode_operations fat_file_inode_operations; | 330 | extern const struct inode_operations fat_file_inode_operations; |
325 | extern int fat_setattr(struct dentry * dentry, struct iattr * attr); | 331 | extern int fat_setattr(struct dentry *dentry, struct iattr *attr); |
326 | extern void fat_truncate_blocks(struct inode *inode, loff_t offset); | 332 | extern void fat_truncate_blocks(struct inode *inode, loff_t offset); |
327 | extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, | 333 | extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, |
328 | struct kstat *stat); | 334 | struct kstat *stat); |
@@ -340,7 +346,12 @@ extern int fat_fill_super(struct super_block *sb, void *data, int silent, | |||
340 | int isvfat, void (*setup)(struct super_block *)); | 346 | int isvfat, void (*setup)(struct super_block *)); |
341 | 347 | ||
342 | extern int fat_flush_inodes(struct super_block *sb, struct inode *i1, | 348 | extern int fat_flush_inodes(struct super_block *sb, struct inode *i1, |
343 | struct inode *i2); | 349 | struct inode *i2); |
350 | static inline unsigned long fat_dir_hash(int logstart) | ||
351 | { | ||
352 | return hash_32(logstart, FAT_HASH_BITS); | ||
353 | } | ||
354 | |||
344 | /* fat/misc.c */ | 355 | /* fat/misc.c */ |
345 | extern __printf(3, 4) __cold | 356 | extern __printf(3, 4) __cold |
346 | void __fat_fs_error(struct super_block *sb, int report, const char *fmt, ...); | 357 | void __fat_fs_error(struct super_block *sb, int report, const char *fmt, ...); |
@@ -366,6 +377,14 @@ extern int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs); | |||
366 | int fat_cache_init(void); | 377 | int fat_cache_init(void); |
367 | void fat_cache_destroy(void); | 378 | void fat_cache_destroy(void); |
368 | 379 | ||
380 | /* fat/nfs.c */ | ||
381 | struct fid; | ||
382 | extern struct dentry *fat_fh_to_dentry(struct super_block *sb, struct fid *fid, | ||
383 | int fh_len, int fh_type); | ||
384 | extern struct dentry *fat_fh_to_parent(struct super_block *sb, struct fid *fid, | ||
385 | int fh_len, int fh_type); | ||
386 | extern struct dentry *fat_get_parent(struct dentry *child_dir); | ||
387 | |||
369 | /* helper for printk */ | 388 | /* helper for printk */ |
370 | typedef unsigned long long llu; | 389 | typedef unsigned long long llu; |
371 | 390 | ||
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index 31f08ab62c56..260705c58062 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c | |||
@@ -186,9 +186,6 @@ static void fat16_ent_put(struct fat_entry *fatent, int new) | |||
186 | 186 | ||
187 | static void fat32_ent_put(struct fat_entry *fatent, int new) | 187 | static void fat32_ent_put(struct fat_entry *fatent, int new) |
188 | { | 188 | { |
189 | if (new == FAT_ENT_EOF) | ||
190 | new = EOF_FAT32; | ||
191 | |||
192 | WARN_ON(new & 0xf0000000); | 189 | WARN_ON(new & 0xf0000000); |
193 | new |= le32_to_cpu(*fatent->u.ent32_p) & ~0x0fffffff; | 190 | new |= le32_to_cpu(*fatent->u.ent32_p) & ~0x0fffffff; |
194 | *fatent->u.ent32_p = cpu_to_le32(new); | 191 | *fatent->u.ent32_p = cpu_to_le32(new); |
@@ -203,15 +200,18 @@ static int fat12_ent_next(struct fat_entry *fatent) | |||
203 | 200 | ||
204 | fatent->entry++; | 201 | fatent->entry++; |
205 | if (fatent->nr_bhs == 1) { | 202 | if (fatent->nr_bhs == 1) { |
206 | WARN_ON(ent12_p[0] > (u8 *)(bhs[0]->b_data + (bhs[0]->b_size - 2))); | 203 | WARN_ON(ent12_p[0] > (u8 *)(bhs[0]->b_data + |
207 | WARN_ON(ent12_p[1] > (u8 *)(bhs[0]->b_data + (bhs[0]->b_size - 1))); | 204 | (bhs[0]->b_size - 2))); |
205 | WARN_ON(ent12_p[1] > (u8 *)(bhs[0]->b_data + | ||
206 | (bhs[0]->b_size - 1))); | ||
208 | if (nextp < (u8 *)(bhs[0]->b_data + (bhs[0]->b_size - 1))) { | 207 | if (nextp < (u8 *)(bhs[0]->b_data + (bhs[0]->b_size - 1))) { |
209 | ent12_p[0] = nextp - 1; | 208 | ent12_p[0] = nextp - 1; |
210 | ent12_p[1] = nextp; | 209 | ent12_p[1] = nextp; |
211 | return 1; | 210 | return 1; |
212 | } | 211 | } |
213 | } else { | 212 | } else { |
214 | WARN_ON(ent12_p[0] != (u8 *)(bhs[0]->b_data + (bhs[0]->b_size - 1))); | 213 | WARN_ON(ent12_p[0] != (u8 *)(bhs[0]->b_data + |
214 | (bhs[0]->b_size - 1))); | ||
215 | WARN_ON(ent12_p[1] != (u8 *)bhs[1]->b_data); | 215 | WARN_ON(ent12_p[1] != (u8 *)bhs[1]->b_data); |
216 | ent12_p[0] = nextp - 1; | 216 | ent12_p[0] = nextp - 1; |
217 | ent12_p[1] = nextp; | 217 | ent12_p[1] = nextp; |
@@ -631,7 +631,6 @@ error: | |||
631 | 631 | ||
632 | return err; | 632 | return err; |
633 | } | 633 | } |
634 | |||
635 | EXPORT_SYMBOL_GPL(fat_free_clusters); | 634 | EXPORT_SYMBOL_GPL(fat_free_clusters); |
636 | 635 | ||
637 | /* 128kb is the whole sectors for FAT12 and FAT16 */ | 636 | /* 128kb is the whole sectors for FAT12 and FAT16 */ |
diff --git a/fs/fat/file.c b/fs/fat/file.c index e007b8bd8e5e..a62e0ecbe2db 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c | |||
@@ -352,7 +352,7 @@ static int fat_allow_set_time(struct msdos_sb_info *sbi, struct inode *inode) | |||
352 | { | 352 | { |
353 | umode_t allow_utime = sbi->options.allow_utime; | 353 | umode_t allow_utime = sbi->options.allow_utime; |
354 | 354 | ||
355 | if (current_fsuid() != inode->i_uid) { | 355 | if (!uid_eq(current_fsuid(), inode->i_uid)) { |
356 | if (in_group_p(inode->i_gid)) | 356 | if (in_group_p(inode->i_gid)) |
357 | allow_utime >>= 3; | 357 | allow_utime >>= 3; |
358 | if (allow_utime & MAY_WRITE) | 358 | if (allow_utime & MAY_WRITE) |
@@ -407,9 +407,9 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr) | |||
407 | } | 407 | } |
408 | 408 | ||
409 | if (((attr->ia_valid & ATTR_UID) && | 409 | if (((attr->ia_valid & ATTR_UID) && |
410 | (attr->ia_uid != sbi->options.fs_uid)) || | 410 | (!uid_eq(attr->ia_uid, sbi->options.fs_uid))) || |
411 | ((attr->ia_valid & ATTR_GID) && | 411 | ((attr->ia_valid & ATTR_GID) && |
412 | (attr->ia_gid != sbi->options.fs_gid)) || | 412 | (!gid_eq(attr->ia_gid, sbi->options.fs_gid))) || |
413 | ((attr->ia_valid & ATTR_MODE) && | 413 | ((attr->ia_valid & ATTR_MODE) && |
414 | (attr->ia_mode & ~FAT_VALID_MODE))) | 414 | (attr->ia_mode & ~FAT_VALID_MODE))) |
415 | error = -EPERM; | 415 | error = -EPERM; |
diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 05e897fe9866..76f60c642c06 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c | |||
@@ -281,15 +281,42 @@ static inline unsigned long fat_hash(loff_t i_pos) | |||
281 | return hash_32(i_pos, FAT_HASH_BITS); | 281 | return hash_32(i_pos, FAT_HASH_BITS); |
282 | } | 282 | } |
283 | 283 | ||
284 | static void dir_hash_init(struct super_block *sb) | ||
285 | { | ||
286 | struct msdos_sb_info *sbi = MSDOS_SB(sb); | ||
287 | int i; | ||
288 | |||
289 | spin_lock_init(&sbi->dir_hash_lock); | ||
290 | for (i = 0; i < FAT_HASH_SIZE; i++) | ||
291 | INIT_HLIST_HEAD(&sbi->dir_hashtable[i]); | ||
292 | } | ||
293 | |||
284 | void fat_attach(struct inode *inode, loff_t i_pos) | 294 | void fat_attach(struct inode *inode, loff_t i_pos) |
285 | { | 295 | { |
286 | struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); | 296 | struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); |
287 | struct hlist_head *head = sbi->inode_hashtable + fat_hash(i_pos); | ||
288 | 297 | ||
289 | spin_lock(&sbi->inode_hash_lock); | 298 | if (inode->i_ino != MSDOS_ROOT_INO) { |
290 | MSDOS_I(inode)->i_pos = i_pos; | 299 | struct hlist_head *head = sbi->inode_hashtable |
291 | hlist_add_head(&MSDOS_I(inode)->i_fat_hash, head); | 300 | + fat_hash(i_pos); |
292 | spin_unlock(&sbi->inode_hash_lock); | 301 | |
302 | spin_lock(&sbi->inode_hash_lock); | ||
303 | MSDOS_I(inode)->i_pos = i_pos; | ||
304 | hlist_add_head(&MSDOS_I(inode)->i_fat_hash, head); | ||
305 | spin_unlock(&sbi->inode_hash_lock); | ||
306 | } | ||
307 | |||
308 | /* If NFS support is enabled, cache the mapping of start cluster | ||
309 | * to directory inode. This is used during reconnection of | ||
310 | * dentries to the filesystem root. | ||
311 | */ | ||
312 | if (S_ISDIR(inode->i_mode) && sbi->options.nfs) { | ||
313 | struct hlist_head *d_head = sbi->dir_hashtable; | ||
314 | d_head += fat_dir_hash(MSDOS_I(inode)->i_logstart); | ||
315 | |||
316 | spin_lock(&sbi->dir_hash_lock); | ||
317 | hlist_add_head(&MSDOS_I(inode)->i_dir_hash, d_head); | ||
318 | spin_unlock(&sbi->dir_hash_lock); | ||
319 | } | ||
293 | } | 320 | } |
294 | EXPORT_SYMBOL_GPL(fat_attach); | 321 | EXPORT_SYMBOL_GPL(fat_attach); |
295 | 322 | ||
@@ -300,6 +327,12 @@ void fat_detach(struct inode *inode) | |||
300 | MSDOS_I(inode)->i_pos = 0; | 327 | MSDOS_I(inode)->i_pos = 0; |
301 | hlist_del_init(&MSDOS_I(inode)->i_fat_hash); | 328 | hlist_del_init(&MSDOS_I(inode)->i_fat_hash); |
302 | spin_unlock(&sbi->inode_hash_lock); | 329 | spin_unlock(&sbi->inode_hash_lock); |
330 | |||
331 | if (S_ISDIR(inode->i_mode) && sbi->options.nfs) { | ||
332 | spin_lock(&sbi->dir_hash_lock); | ||
333 | hlist_del_init(&MSDOS_I(inode)->i_dir_hash); | ||
334 | spin_unlock(&sbi->dir_hash_lock); | ||
335 | } | ||
303 | } | 336 | } |
304 | EXPORT_SYMBOL_GPL(fat_detach); | 337 | EXPORT_SYMBOL_GPL(fat_detach); |
305 | 338 | ||
@@ -504,6 +537,7 @@ static void init_once(void *foo) | |||
504 | ei->cache_valid_id = FAT_CACHE_VALID + 1; | 537 | ei->cache_valid_id = FAT_CACHE_VALID + 1; |
505 | INIT_LIST_HEAD(&ei->cache_lru); | 538 | INIT_LIST_HEAD(&ei->cache_lru); |
506 | INIT_HLIST_NODE(&ei->i_fat_hash); | 539 | INIT_HLIST_NODE(&ei->i_fat_hash); |
540 | INIT_HLIST_NODE(&ei->i_dir_hash); | ||
507 | inode_init_once(&ei->vfs_inode); | 541 | inode_init_once(&ei->vfs_inode); |
508 | } | 542 | } |
509 | 543 | ||
@@ -521,6 +555,11 @@ static int __init fat_init_inodecache(void) | |||
521 | 555 | ||
522 | static void __exit fat_destroy_inodecache(void) | 556 | static void __exit fat_destroy_inodecache(void) |
523 | { | 557 | { |
558 | /* | ||
559 | * Make sure all delayed rcu free inodes are flushed before we | ||
560 | * destroy cache. | ||
561 | */ | ||
562 | rcu_barrier(); | ||
524 | kmem_cache_destroy(fat_inode_cachep); | 563 | kmem_cache_destroy(fat_inode_cachep); |
525 | } | 564 | } |
526 | 565 | ||
@@ -663,125 +702,9 @@ static const struct super_operations fat_sops = { | |||
663 | .show_options = fat_show_options, | 702 | .show_options = fat_show_options, |
664 | }; | 703 | }; |
665 | 704 | ||
666 | /* | ||
667 | * a FAT file handle with fhtype 3 is | ||
668 | * 0/ i_ino - for fast, reliable lookup if still in the cache | ||
669 | * 1/ i_generation - to see if i_ino is still valid | ||
670 | * bit 0 == 0 iff directory | ||
671 | * 2/ i_pos(8-39) - if ino has changed, but still in cache | ||
672 | * 3/ i_pos(4-7)|i_logstart - to semi-verify inode found at i_pos | ||
673 | * 4/ i_pos(0-3)|parent->i_logstart - maybe used to hunt for the file on disc | ||
674 | * | ||
675 | * Hack for NFSv2: Maximum FAT entry number is 28bits and maximum | ||
676 | * i_pos is 40bits (blocknr(32) + dir offset(8)), so two 4bits | ||
677 | * of i_logstart is used to store the directory entry offset. | ||
678 | */ | ||
679 | |||
680 | static struct dentry *fat_fh_to_dentry(struct super_block *sb, | ||
681 | struct fid *fid, int fh_len, int fh_type) | ||
682 | { | ||
683 | struct inode *inode = NULL; | ||
684 | u32 *fh = fid->raw; | ||
685 | |||
686 | if (fh_len < 5 || fh_type != 3) | ||
687 | return NULL; | ||
688 | |||
689 | inode = ilookup(sb, fh[0]); | ||
690 | if (!inode || inode->i_generation != fh[1]) { | ||
691 | if (inode) | ||
692 | iput(inode); | ||
693 | inode = NULL; | ||
694 | } | ||
695 | if (!inode) { | ||
696 | loff_t i_pos; | ||
697 | int i_logstart = fh[3] & 0x0fffffff; | ||
698 | |||
699 | i_pos = (loff_t)fh[2] << 8; | ||
700 | i_pos |= ((fh[3] >> 24) & 0xf0) | (fh[4] >> 28); | ||
701 | |||
702 | /* try 2 - see if i_pos is in F-d-c | ||
703 | * require i_logstart to be the same | ||
704 | * Will fail if you truncate and then re-write | ||
705 | */ | ||
706 | |||
707 | inode = fat_iget(sb, i_pos); | ||
708 | if (inode && MSDOS_I(inode)->i_logstart != i_logstart) { | ||
709 | iput(inode); | ||
710 | inode = NULL; | ||
711 | } | ||
712 | } | ||
713 | |||
714 | /* | ||
715 | * For now, do nothing if the inode is not found. | ||
716 | * | ||
717 | * What we could do is: | ||
718 | * | ||
719 | * - follow the file starting at fh[4], and record the ".." entry, | ||
720 | * and the name of the fh[2] entry. | ||
721 | * - then follow the ".." file finding the next step up. | ||
722 | * | ||
723 | * This way we build a path to the root of the tree. If this works, we | ||
724 | * lookup the path and so get this inode into the cache. Finally try | ||
725 | * the fat_iget lookup again. If that fails, then we are totally out | ||
726 | * of luck. But all that is for another day | ||
727 | */ | ||
728 | return d_obtain_alias(inode); | ||
729 | } | ||
730 | |||
731 | static int | ||
732 | fat_encode_fh(struct inode *inode, __u32 *fh, int *lenp, struct inode *parent) | ||
733 | { | ||
734 | int len = *lenp; | ||
735 | struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); | ||
736 | loff_t i_pos; | ||
737 | |||
738 | if (len < 5) { | ||
739 | *lenp = 5; | ||
740 | return 255; /* no room */ | ||
741 | } | ||
742 | |||
743 | i_pos = fat_i_pos_read(sbi, inode); | ||
744 | *lenp = 5; | ||
745 | fh[0] = inode->i_ino; | ||
746 | fh[1] = inode->i_generation; | ||
747 | fh[2] = i_pos >> 8; | ||
748 | fh[3] = ((i_pos & 0xf0) << 24) | MSDOS_I(inode)->i_logstart; | ||
749 | fh[4] = (i_pos & 0x0f) << 28; | ||
750 | if (parent) | ||
751 | fh[4] |= MSDOS_I(parent)->i_logstart; | ||
752 | return 3; | ||
753 | } | ||
754 | |||
755 | static struct dentry *fat_get_parent(struct dentry *child) | ||
756 | { | ||
757 | struct super_block *sb = child->d_sb; | ||
758 | struct buffer_head *bh; | ||
759 | struct msdos_dir_entry *de; | ||
760 | loff_t i_pos; | ||
761 | struct dentry *parent; | ||
762 | struct inode *inode; | ||
763 | int err; | ||
764 | |||
765 | lock_super(sb); | ||
766 | |||
767 | err = fat_get_dotdot_entry(child->d_inode, &bh, &de, &i_pos); | ||
768 | if (err) { | ||
769 | parent = ERR_PTR(err); | ||
770 | goto out; | ||
771 | } | ||
772 | inode = fat_build_inode(sb, de, i_pos); | ||
773 | brelse(bh); | ||
774 | |||
775 | parent = d_obtain_alias(inode); | ||
776 | out: | ||
777 | unlock_super(sb); | ||
778 | |||
779 | return parent; | ||
780 | } | ||
781 | |||
782 | static const struct export_operations fat_export_ops = { | 705 | static const struct export_operations fat_export_ops = { |
783 | .encode_fh = fat_encode_fh, | ||
784 | .fh_to_dentry = fat_fh_to_dentry, | 706 | .fh_to_dentry = fat_fh_to_dentry, |
707 | .fh_to_parent = fat_fh_to_parent, | ||
785 | .get_parent = fat_get_parent, | 708 | .get_parent = fat_get_parent, |
786 | }; | 709 | }; |
787 | 710 | ||
@@ -791,10 +714,12 @@ static int fat_show_options(struct seq_file *m, struct dentry *root) | |||
791 | struct fat_mount_options *opts = &sbi->options; | 714 | struct fat_mount_options *opts = &sbi->options; |
792 | int isvfat = opts->isvfat; | 715 | int isvfat = opts->isvfat; |
793 | 716 | ||
794 | if (opts->fs_uid != 0) | 717 | if (!uid_eq(opts->fs_uid, GLOBAL_ROOT_UID)) |
795 | seq_printf(m, ",uid=%u", opts->fs_uid); | 718 | seq_printf(m, ",uid=%u", |
796 | if (opts->fs_gid != 0) | 719 | from_kuid_munged(&init_user_ns, opts->fs_uid)); |
797 | seq_printf(m, ",gid=%u", opts->fs_gid); | 720 | if (!gid_eq(opts->fs_gid, GLOBAL_ROOT_GID)) |
721 | seq_printf(m, ",gid=%u", | ||
722 | from_kgid_munged(&init_user_ns, opts->fs_gid)); | ||
798 | seq_printf(m, ",fmask=%04o", opts->fs_fmask); | 723 | seq_printf(m, ",fmask=%04o", opts->fs_fmask); |
799 | seq_printf(m, ",dmask=%04o", opts->fs_dmask); | 724 | seq_printf(m, ",dmask=%04o", opts->fs_dmask); |
800 | if (opts->allow_utime) | 725 | if (opts->allow_utime) |
@@ -829,6 +754,8 @@ static int fat_show_options(struct seq_file *m, struct dentry *root) | |||
829 | seq_puts(m, ",usefree"); | 754 | seq_puts(m, ",usefree"); |
830 | if (opts->quiet) | 755 | if (opts->quiet) |
831 | seq_puts(m, ",quiet"); | 756 | seq_puts(m, ",quiet"); |
757 | if (opts->nfs) | ||
758 | seq_puts(m, ",nfs"); | ||
832 | if (opts->showexec) | 759 | if (opts->showexec) |
833 | seq_puts(m, ",showexec"); | 760 | seq_puts(m, ",showexec"); |
834 | if (opts->sys_immutable) | 761 | if (opts->sys_immutable) |
@@ -873,7 +800,7 @@ enum { | |||
873 | Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes, | 800 | Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes, |
874 | Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes, | 801 | Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes, |
875 | Opt_obsolete, Opt_flush, Opt_tz_utc, Opt_rodir, Opt_err_cont, | 802 | Opt_obsolete, Opt_flush, Opt_tz_utc, Opt_rodir, Opt_err_cont, |
876 | Opt_err_panic, Opt_err_ro, Opt_discard, Opt_err, | 803 | Opt_err_panic, Opt_err_ro, Opt_discard, Opt_nfs, Opt_err, |
877 | }; | 804 | }; |
878 | 805 | ||
879 | static const match_table_t fat_tokens = { | 806 | static const match_table_t fat_tokens = { |
@@ -902,6 +829,7 @@ static const match_table_t fat_tokens = { | |||
902 | {Opt_err_panic, "errors=panic"}, | 829 | {Opt_err_panic, "errors=panic"}, |
903 | {Opt_err_ro, "errors=remount-ro"}, | 830 | {Opt_err_ro, "errors=remount-ro"}, |
904 | {Opt_discard, "discard"}, | 831 | {Opt_discard, "discard"}, |
832 | {Opt_nfs, "nfs"}, | ||
905 | {Opt_obsolete, "conv=binary"}, | 833 | {Opt_obsolete, "conv=binary"}, |
906 | {Opt_obsolete, "conv=text"}, | 834 | {Opt_obsolete, "conv=text"}, |
907 | {Opt_obsolete, "conv=auto"}, | 835 | {Opt_obsolete, "conv=auto"}, |
@@ -982,6 +910,7 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat, | |||
982 | opts->numtail = 1; | 910 | opts->numtail = 1; |
983 | opts->usefree = opts->nocase = 0; | 911 | opts->usefree = opts->nocase = 0; |
984 | opts->tz_utc = 0; | 912 | opts->tz_utc = 0; |
913 | opts->nfs = 0; | ||
985 | opts->errors = FAT_ERRORS_RO; | 914 | opts->errors = FAT_ERRORS_RO; |
986 | *debug = 0; | 915 | *debug = 0; |
987 | 916 | ||
@@ -1037,12 +966,16 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat, | |||
1037 | case Opt_uid: | 966 | case Opt_uid: |
1038 | if (match_int(&args[0], &option)) | 967 | if (match_int(&args[0], &option)) |
1039 | return 0; | 968 | return 0; |
1040 | opts->fs_uid = option; | 969 | opts->fs_uid = make_kuid(current_user_ns(), option); |
970 | if (!uid_valid(opts->fs_uid)) | ||
971 | return 0; | ||
1041 | break; | 972 | break; |
1042 | case Opt_gid: | 973 | case Opt_gid: |
1043 | if (match_int(&args[0], &option)) | 974 | if (match_int(&args[0], &option)) |
1044 | return 0; | 975 | return 0; |
1045 | opts->fs_gid = option; | 976 | opts->fs_gid = make_kgid(current_user_ns(), option); |
977 | if (!gid_valid(opts->fs_gid)) | ||
978 | return 0; | ||
1046 | break; | 979 | break; |
1047 | case Opt_umask: | 980 | case Opt_umask: |
1048 | if (match_octal(&args[0], &option)) | 981 | if (match_octal(&args[0], &option)) |
@@ -1142,6 +1075,9 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat, | |||
1142 | case Opt_discard: | 1075 | case Opt_discard: |
1143 | opts->discard = 1; | 1076 | opts->discard = 1; |
1144 | break; | 1077 | break; |
1078 | case Opt_nfs: | ||
1079 | opts->nfs = 1; | ||
1080 | break; | ||
1145 | 1081 | ||
1146 | /* obsolete mount options */ | 1082 | /* obsolete mount options */ |
1147 | case Opt_obsolete: | 1083 | case Opt_obsolete: |
@@ -1432,6 +1368,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, | |||
1432 | 1368 | ||
1433 | /* set up enough so that it can read an inode */ | 1369 | /* set up enough so that it can read an inode */ |
1434 | fat_hash_init(sb); | 1370 | fat_hash_init(sb); |
1371 | dir_hash_init(sb); | ||
1435 | fat_ent_access_init(sb); | 1372 | fat_ent_access_init(sb); |
1436 | 1373 | ||
1437 | /* | 1374 | /* |
@@ -1486,6 +1423,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, | |||
1486 | } | 1423 | } |
1487 | error = -ENOMEM; | 1424 | error = -ENOMEM; |
1488 | insert_inode_hash(root_inode); | 1425 | insert_inode_hash(root_inode); |
1426 | fat_attach(root_inode, 0); | ||
1489 | sb->s_root = d_make_root(root_inode); | 1427 | sb->s_root = d_make_root(root_inode); |
1490 | if (!sb->s_root) { | 1428 | if (!sb->s_root) { |
1491 | fat_msg(sb, KERN_ERR, "get root inode failed"); | 1429 | fat_msg(sb, KERN_ERR, "get root inode failed"); |
@@ -1525,18 +1463,14 @@ static int writeback_inode(struct inode *inode) | |||
1525 | { | 1463 | { |
1526 | 1464 | ||
1527 | int ret; | 1465 | int ret; |
1528 | struct address_space *mapping = inode->i_mapping; | 1466 | |
1529 | struct writeback_control wbc = { | 1467 | /* if we used wait=1, sync_inode_metadata waits for the io for the |
1530 | .sync_mode = WB_SYNC_NONE, | 1468 | * inode to finish. So wait=0 is sent down to sync_inode_metadata |
1531 | .nr_to_write = 0, | ||
1532 | }; | ||
1533 | /* if we used WB_SYNC_ALL, sync_inode waits for the io for the | ||
1534 | * inode to finish. So WB_SYNC_NONE is sent down to sync_inode | ||
1535 | * and filemap_fdatawrite is used for the data blocks | 1469 | * and filemap_fdatawrite is used for the data blocks |
1536 | */ | 1470 | */ |
1537 | ret = sync_inode(inode, &wbc); | 1471 | ret = sync_inode_metadata(inode, 0); |
1538 | if (!ret) | 1472 | if (!ret) |
1539 | ret = filemap_fdatawrite(mapping); | 1473 | ret = filemap_fdatawrite(inode->i_mapping); |
1540 | return ret; | 1474 | return ret; |
1541 | } | 1475 | } |
1542 | 1476 | ||
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index b0e12bf9f4a1..c1055e778fff 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c | |||
@@ -407,7 +407,7 @@ out: | |||
407 | static int msdos_unlink(struct inode *dir, struct dentry *dentry) | 407 | static int msdos_unlink(struct inode *dir, struct dentry *dentry) |
408 | { | 408 | { |
409 | struct inode *inode = dentry->d_inode; | 409 | struct inode *inode = dentry->d_inode; |
410 | struct super_block *sb= inode->i_sb; | 410 | struct super_block *sb = inode->i_sb; |
411 | struct fat_slot_info sinfo; | 411 | struct fat_slot_info sinfo; |
412 | int err; | 412 | int err; |
413 | 413 | ||
@@ -440,7 +440,7 @@ static int do_msdos_rename(struct inode *old_dir, unsigned char *old_name, | |||
440 | struct inode *old_inode, *new_inode; | 440 | struct inode *old_inode, *new_inode; |
441 | struct fat_slot_info old_sinfo, sinfo; | 441 | struct fat_slot_info old_sinfo, sinfo; |
442 | struct timespec ts; | 442 | struct timespec ts; |
443 | loff_t dotdot_i_pos, new_i_pos; | 443 | loff_t new_i_pos; |
444 | int err, old_attrs, is_dir, update_dotdot, corrupt = 0; | 444 | int err, old_attrs, is_dir, update_dotdot, corrupt = 0; |
445 | 445 | ||
446 | old_sinfo.bh = sinfo.bh = dotdot_bh = NULL; | 446 | old_sinfo.bh = sinfo.bh = dotdot_bh = NULL; |
@@ -456,8 +456,7 @@ static int do_msdos_rename(struct inode *old_dir, unsigned char *old_name, | |||
456 | is_dir = S_ISDIR(old_inode->i_mode); | 456 | is_dir = S_ISDIR(old_inode->i_mode); |
457 | update_dotdot = (is_dir && old_dir != new_dir); | 457 | update_dotdot = (is_dir && old_dir != new_dir); |
458 | if (update_dotdot) { | 458 | if (update_dotdot) { |
459 | if (fat_get_dotdot_entry(old_inode, &dotdot_bh, &dotdot_de, | 459 | if (fat_get_dotdot_entry(old_inode, &dotdot_bh, &dotdot_de)) { |
460 | &dotdot_i_pos) < 0) { | ||
461 | err = -EIO; | 460 | err = -EIO; |
462 | goto out; | 461 | goto out; |
463 | } | 462 | } |
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 6a6d8c0715a1..e535dd75b986 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c | |||
@@ -914,7 +914,7 @@ static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
914 | struct inode *old_inode, *new_inode; | 914 | struct inode *old_inode, *new_inode; |
915 | struct fat_slot_info old_sinfo, sinfo; | 915 | struct fat_slot_info old_sinfo, sinfo; |
916 | struct timespec ts; | 916 | struct timespec ts; |
917 | loff_t dotdot_i_pos, new_i_pos; | 917 | loff_t new_i_pos; |
918 | int err, is_dir, update_dotdot, corrupt = 0; | 918 | int err, is_dir, update_dotdot, corrupt = 0; |
919 | struct super_block *sb = old_dir->i_sb; | 919 | struct super_block *sb = old_dir->i_sb; |
920 | 920 | ||
@@ -929,8 +929,7 @@ static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
929 | is_dir = S_ISDIR(old_inode->i_mode); | 929 | is_dir = S_ISDIR(old_inode->i_mode); |
930 | update_dotdot = (is_dir && old_dir != new_dir); | 930 | update_dotdot = (is_dir && old_dir != new_dir); |
931 | if (update_dotdot) { | 931 | if (update_dotdot) { |
932 | if (fat_get_dotdot_entry(old_inode, &dotdot_bh, &dotdot_de, | 932 | if (fat_get_dotdot_entry(old_inode, &dotdot_bh, &dotdot_de)) { |
933 | &dotdot_i_pos) < 0) { | ||
934 | err = -EIO; | 933 | err = -EIO; |
935 | goto out; | 934 | goto out; |
936 | } | 935 | } |
diff --git a/fs/fat/nfs.c b/fs/fat/nfs.c new file mode 100644 index 000000000000..ef4b5faba87b --- /dev/null +++ b/fs/fat/nfs.c | |||
@@ -0,0 +1,101 @@ | |||
1 | /* fs/fat/nfs.c | ||
2 | * | ||
3 | * This software is licensed under the terms of the GNU General Public | ||
4 | * License version 2, as published by the Free Software Foundation, and | ||
5 | * may be copied, distributed, and modified under those terms. | ||
6 | * | ||
7 | * This program is distributed in the hope that it will be useful, | ||
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
10 | * GNU General Public License for more details. | ||
11 | * | ||
12 | */ | ||
13 | |||
14 | #include <linux/exportfs.h> | ||
15 | #include "fat.h" | ||
16 | |||
17 | /** | ||
18 | * Look up a directory inode given its starting cluster. | ||
19 | */ | ||
20 | static struct inode *fat_dget(struct super_block *sb, int i_logstart) | ||
21 | { | ||
22 | struct msdos_sb_info *sbi = MSDOS_SB(sb); | ||
23 | struct hlist_head *head; | ||
24 | struct hlist_node *_p; | ||
25 | struct msdos_inode_info *i; | ||
26 | struct inode *inode = NULL; | ||
27 | |||
28 | head = sbi->dir_hashtable + fat_dir_hash(i_logstart); | ||
29 | spin_lock(&sbi->dir_hash_lock); | ||
30 | hlist_for_each_entry(i, _p, head, i_dir_hash) { | ||
31 | BUG_ON(i->vfs_inode.i_sb != sb); | ||
32 | if (i->i_logstart != i_logstart) | ||
33 | continue; | ||
34 | inode = igrab(&i->vfs_inode); | ||
35 | if (inode) | ||
36 | break; | ||
37 | } | ||
38 | spin_unlock(&sbi->dir_hash_lock); | ||
39 | return inode; | ||
40 | } | ||
41 | |||
42 | static struct inode *fat_nfs_get_inode(struct super_block *sb, | ||
43 | u64 ino, u32 generation) | ||
44 | { | ||
45 | struct inode *inode; | ||
46 | |||
47 | if ((ino < MSDOS_ROOT_INO) || (ino == MSDOS_FSINFO_INO)) | ||
48 | return NULL; | ||
49 | |||
50 | inode = ilookup(sb, ino); | ||
51 | if (inode && generation && (inode->i_generation != generation)) { | ||
52 | iput(inode); | ||
53 | inode = NULL; | ||
54 | } | ||
55 | |||
56 | return inode; | ||
57 | } | ||
58 | |||
59 | /** | ||
60 | * Map a NFS file handle to a corresponding dentry. | ||
61 | * The dentry may or may not be connected to the filesystem root. | ||
62 | */ | ||
63 | struct dentry *fat_fh_to_dentry(struct super_block *sb, struct fid *fid, | ||
64 | int fh_len, int fh_type) | ||
65 | { | ||
66 | return generic_fh_to_dentry(sb, fid, fh_len, fh_type, | ||
67 | fat_nfs_get_inode); | ||
68 | } | ||
69 | |||
70 | /* | ||
71 | * Find the parent for a file specified by NFS handle. | ||
72 | * This requires that the handle contain the i_ino of the parent. | ||
73 | */ | ||
74 | struct dentry *fat_fh_to_parent(struct super_block *sb, struct fid *fid, | ||
75 | int fh_len, int fh_type) | ||
76 | { | ||
77 | return generic_fh_to_parent(sb, fid, fh_len, fh_type, | ||
78 | fat_nfs_get_inode); | ||
79 | } | ||
80 | |||
81 | /* | ||
82 | * Find the parent for a directory that is not currently connected to | ||
83 | * the filesystem root. | ||
84 | * | ||
85 | * On entry, the caller holds child_dir->d_inode->i_mutex. | ||
86 | */ | ||
87 | struct dentry *fat_get_parent(struct dentry *child_dir) | ||
88 | { | ||
89 | struct super_block *sb = child_dir->d_sb; | ||
90 | struct buffer_head *bh = NULL; | ||
91 | struct msdos_dir_entry *de; | ||
92 | struct inode *parent_inode = NULL; | ||
93 | |||
94 | if (!fat_get_dotdot_entry(child_dir->d_inode, &bh, &de)) { | ||
95 | int parent_logstart = fat_get_start(MSDOS_SB(sb), de); | ||
96 | parent_inode = fat_dget(sb, parent_logstart); | ||
97 | } | ||
98 | brelse(bh); | ||
99 | |||
100 | return d_obtain_alias(parent_inode); | ||
101 | } | ||
diff --git a/fs/fcntl.c b/fs/fcntl.c index 887b5ba8c9b5..71a600a19f06 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c | |||
@@ -26,124 +26,6 @@ | |||
26 | #include <asm/siginfo.h> | 26 | #include <asm/siginfo.h> |
27 | #include <asm/uaccess.h> | 27 | #include <asm/uaccess.h> |
28 | 28 | ||
29 | void set_close_on_exec(unsigned int fd, int flag) | ||
30 | { | ||
31 | struct files_struct *files = current->files; | ||
32 | struct fdtable *fdt; | ||
33 | spin_lock(&files->file_lock); | ||
34 | fdt = files_fdtable(files); | ||
35 | if (flag) | ||
36 | __set_close_on_exec(fd, fdt); | ||
37 | else | ||
38 | __clear_close_on_exec(fd, fdt); | ||
39 | spin_unlock(&files->file_lock); | ||
40 | } | ||
41 | |||
42 | static bool get_close_on_exec(unsigned int fd) | ||
43 | { | ||
44 | struct files_struct *files = current->files; | ||
45 | struct fdtable *fdt; | ||
46 | bool res; | ||
47 | rcu_read_lock(); | ||
48 | fdt = files_fdtable(files); | ||
49 | res = close_on_exec(fd, fdt); | ||
50 | rcu_read_unlock(); | ||
51 | return res; | ||
52 | } | ||
53 | |||
54 | SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags) | ||
55 | { | ||
56 | int err = -EBADF; | ||
57 | struct file * file, *tofree; | ||
58 | struct files_struct * files = current->files; | ||
59 | struct fdtable *fdt; | ||
60 | |||
61 | if ((flags & ~O_CLOEXEC) != 0) | ||
62 | return -EINVAL; | ||
63 | |||
64 | if (unlikely(oldfd == newfd)) | ||
65 | return -EINVAL; | ||
66 | |||
67 | spin_lock(&files->file_lock); | ||
68 | err = expand_files(files, newfd); | ||
69 | file = fcheck(oldfd); | ||
70 | if (unlikely(!file)) | ||
71 | goto Ebadf; | ||
72 | if (unlikely(err < 0)) { | ||
73 | if (err == -EMFILE) | ||
74 | goto Ebadf; | ||
75 | goto out_unlock; | ||
76 | } | ||
77 | /* | ||
78 | * We need to detect attempts to do dup2() over allocated but still | ||
79 | * not finished descriptor. NB: OpenBSD avoids that at the price of | ||
80 | * extra work in their equivalent of fget() - they insert struct | ||
81 | * file immediately after grabbing descriptor, mark it larval if | ||
82 | * more work (e.g. actual opening) is needed and make sure that | ||
83 | * fget() treats larval files as absent. Potentially interesting, | ||
84 | * but while extra work in fget() is trivial, locking implications | ||
85 | * and amount of surgery on open()-related paths in VFS are not. | ||
86 | * FreeBSD fails with -EBADF in the same situation, NetBSD "solution" | ||
87 | * deadlocks in rather amusing ways, AFAICS. All of that is out of | ||
88 | * scope of POSIX or SUS, since neither considers shared descriptor | ||
89 | * tables and this condition does not arise without those. | ||
90 | */ | ||
91 | err = -EBUSY; | ||
92 | fdt = files_fdtable(files); | ||
93 | tofree = fdt->fd[newfd]; | ||
94 | if (!tofree && fd_is_open(newfd, fdt)) | ||
95 | goto out_unlock; | ||
96 | get_file(file); | ||
97 | rcu_assign_pointer(fdt->fd[newfd], file); | ||
98 | __set_open_fd(newfd, fdt); | ||
99 | if (flags & O_CLOEXEC) | ||
100 | __set_close_on_exec(newfd, fdt); | ||
101 | else | ||
102 | __clear_close_on_exec(newfd, fdt); | ||
103 | spin_unlock(&files->file_lock); | ||
104 | |||
105 | if (tofree) | ||
106 | filp_close(tofree, files); | ||
107 | |||
108 | return newfd; | ||
109 | |||
110 | Ebadf: | ||
111 | err = -EBADF; | ||
112 | out_unlock: | ||
113 | spin_unlock(&files->file_lock); | ||
114 | return err; | ||
115 | } | ||
116 | |||
117 | SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd) | ||
118 | { | ||
119 | if (unlikely(newfd == oldfd)) { /* corner case */ | ||
120 | struct files_struct *files = current->files; | ||
121 | int retval = oldfd; | ||
122 | |||
123 | rcu_read_lock(); | ||
124 | if (!fcheck_files(files, oldfd)) | ||
125 | retval = -EBADF; | ||
126 | rcu_read_unlock(); | ||
127 | return retval; | ||
128 | } | ||
129 | return sys_dup3(oldfd, newfd, 0); | ||
130 | } | ||
131 | |||
132 | SYSCALL_DEFINE1(dup, unsigned int, fildes) | ||
133 | { | ||
134 | int ret = -EBADF; | ||
135 | struct file *file = fget_raw(fildes); | ||
136 | |||
137 | if (file) { | ||
138 | ret = get_unused_fd(); | ||
139 | if (ret >= 0) | ||
140 | fd_install(ret, file); | ||
141 | else | ||
142 | fput(file); | ||
143 | } | ||
144 | return ret; | ||
145 | } | ||
146 | |||
147 | #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME) | 29 | #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME) |
148 | 30 | ||
149 | static int setfl(int fd, struct file * filp, unsigned long arg) | 31 | static int setfl(int fd, struct file * filp, unsigned long arg) |
@@ -267,7 +149,7 @@ pid_t f_getown(struct file *filp) | |||
267 | 149 | ||
268 | static int f_setown_ex(struct file *filp, unsigned long arg) | 150 | static int f_setown_ex(struct file *filp, unsigned long arg) |
269 | { | 151 | { |
270 | struct f_owner_ex * __user owner_p = (void * __user)arg; | 152 | struct f_owner_ex __user *owner_p = (void __user *)arg; |
271 | struct f_owner_ex owner; | 153 | struct f_owner_ex owner; |
272 | struct pid *pid; | 154 | struct pid *pid; |
273 | int type; | 155 | int type; |
@@ -307,7 +189,7 @@ static int f_setown_ex(struct file *filp, unsigned long arg) | |||
307 | 189 | ||
308 | static int f_getown_ex(struct file *filp, unsigned long arg) | 190 | static int f_getown_ex(struct file *filp, unsigned long arg) |
309 | { | 191 | { |
310 | struct f_owner_ex * __user owner_p = (void * __user)arg; | 192 | struct f_owner_ex __user *owner_p = (void __user *)arg; |
311 | struct f_owner_ex owner; | 193 | struct f_owner_ex owner; |
312 | int ret = 0; | 194 | int ret = 0; |
313 | 195 | ||
@@ -345,7 +227,7 @@ static int f_getown_ex(struct file *filp, unsigned long arg) | |||
345 | static int f_getowner_uids(struct file *filp, unsigned long arg) | 227 | static int f_getowner_uids(struct file *filp, unsigned long arg) |
346 | { | 228 | { |
347 | struct user_namespace *user_ns = current_user_ns(); | 229 | struct user_namespace *user_ns = current_user_ns(); |
348 | uid_t * __user dst = (void * __user)arg; | 230 | uid_t __user *dst = (void __user *)arg; |
349 | uid_t src[2]; | 231 | uid_t src[2]; |
350 | int err; | 232 | int err; |
351 | 233 | ||
@@ -373,14 +255,10 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, | |||
373 | 255 | ||
374 | switch (cmd) { | 256 | switch (cmd) { |
375 | case F_DUPFD: | 257 | case F_DUPFD: |
258 | err = f_dupfd(arg, filp, 0); | ||
259 | break; | ||
376 | case F_DUPFD_CLOEXEC: | 260 | case F_DUPFD_CLOEXEC: |
377 | if (arg >= rlimit(RLIMIT_NOFILE)) | 261 | err = f_dupfd(arg, filp, O_CLOEXEC); |
378 | break; | ||
379 | err = alloc_fd(arg, cmd == F_DUPFD_CLOEXEC ? O_CLOEXEC : 0); | ||
380 | if (err >= 0) { | ||
381 | get_file(filp); | ||
382 | fd_install(err, filp); | ||
383 | } | ||
384 | break; | 262 | break; |
385 | case F_GETFD: | 263 | case F_GETFD: |
386 | err = get_close_on_exec(fd) ? FD_CLOEXEC : 0; | 264 | err = get_close_on_exec(fd) ? FD_CLOEXEC : 0; |
@@ -470,25 +348,23 @@ static int check_fcntl_cmd(unsigned cmd) | |||
470 | 348 | ||
471 | SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) | 349 | SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) |
472 | { | 350 | { |
473 | struct file *filp; | 351 | struct fd f = fdget_raw(fd); |
474 | int fput_needed; | ||
475 | long err = -EBADF; | 352 | long err = -EBADF; |
476 | 353 | ||
477 | filp = fget_raw_light(fd, &fput_needed); | 354 | if (!f.file) |
478 | if (!filp) | ||
479 | goto out; | 355 | goto out; |
480 | 356 | ||
481 | if (unlikely(filp->f_mode & FMODE_PATH)) { | 357 | if (unlikely(f.file->f_mode & FMODE_PATH)) { |
482 | if (!check_fcntl_cmd(cmd)) | 358 | if (!check_fcntl_cmd(cmd)) |
483 | goto out1; | 359 | goto out1; |
484 | } | 360 | } |
485 | 361 | ||
486 | err = security_file_fcntl(filp, cmd, arg); | 362 | err = security_file_fcntl(f.file, cmd, arg); |
487 | if (!err) | 363 | if (!err) |
488 | err = do_fcntl(fd, cmd, arg, filp); | 364 | err = do_fcntl(fd, cmd, arg, f.file); |
489 | 365 | ||
490 | out1: | 366 | out1: |
491 | fput_light(filp, fput_needed); | 367 | fdput(f); |
492 | out: | 368 | out: |
493 | return err; | 369 | return err; |
494 | } | 370 | } |
@@ -497,38 +373,36 @@ out: | |||
497 | SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, | 373 | SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, |
498 | unsigned long, arg) | 374 | unsigned long, arg) |
499 | { | 375 | { |
500 | struct file * filp; | 376 | struct fd f = fdget_raw(fd); |
501 | long err = -EBADF; | 377 | long err = -EBADF; |
502 | int fput_needed; | ||
503 | 378 | ||
504 | filp = fget_raw_light(fd, &fput_needed); | 379 | if (!f.file) |
505 | if (!filp) | ||
506 | goto out; | 380 | goto out; |
507 | 381 | ||
508 | if (unlikely(filp->f_mode & FMODE_PATH)) { | 382 | if (unlikely(f.file->f_mode & FMODE_PATH)) { |
509 | if (!check_fcntl_cmd(cmd)) | 383 | if (!check_fcntl_cmd(cmd)) |
510 | goto out1; | 384 | goto out1; |
511 | } | 385 | } |
512 | 386 | ||
513 | err = security_file_fcntl(filp, cmd, arg); | 387 | err = security_file_fcntl(f.file, cmd, arg); |
514 | if (err) | 388 | if (err) |
515 | goto out1; | 389 | goto out1; |
516 | 390 | ||
517 | switch (cmd) { | 391 | switch (cmd) { |
518 | case F_GETLK64: | 392 | case F_GETLK64: |
519 | err = fcntl_getlk64(filp, (struct flock64 __user *) arg); | 393 | err = fcntl_getlk64(f.file, (struct flock64 __user *) arg); |
520 | break; | 394 | break; |
521 | case F_SETLK64: | 395 | case F_SETLK64: |
522 | case F_SETLKW64: | 396 | case F_SETLKW64: |
523 | err = fcntl_setlk64(fd, filp, cmd, | 397 | err = fcntl_setlk64(fd, f.file, cmd, |
524 | (struct flock64 __user *) arg); | 398 | (struct flock64 __user *) arg); |
525 | break; | 399 | break; |
526 | default: | 400 | default: |
527 | err = do_fcntl(fd, cmd, arg, filp); | 401 | err = do_fcntl(fd, cmd, arg, f.file); |
528 | break; | 402 | break; |
529 | } | 403 | } |
530 | out1: | 404 | out1: |
531 | fput_light(filp, fput_needed); | 405 | fdput(f); |
532 | out: | 406 | out: |
533 | return err; | 407 | return err; |
534 | } | 408 | } |
diff --git a/fs/fhandle.c b/fs/fhandle.c index a48e4a139be1..f775bfdd6e4a 100644 --- a/fs/fhandle.c +++ b/fs/fhandle.c | |||
@@ -113,24 +113,21 @@ SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name, | |||
113 | 113 | ||
114 | static struct vfsmount *get_vfsmount_from_fd(int fd) | 114 | static struct vfsmount *get_vfsmount_from_fd(int fd) |
115 | { | 115 | { |
116 | struct path path; | 116 | struct vfsmount *mnt; |
117 | 117 | ||
118 | if (fd == AT_FDCWD) { | 118 | if (fd == AT_FDCWD) { |
119 | struct fs_struct *fs = current->fs; | 119 | struct fs_struct *fs = current->fs; |
120 | spin_lock(&fs->lock); | 120 | spin_lock(&fs->lock); |
121 | path = fs->pwd; | 121 | mnt = mntget(fs->pwd.mnt); |
122 | mntget(path.mnt); | ||
123 | spin_unlock(&fs->lock); | 122 | spin_unlock(&fs->lock); |
124 | } else { | 123 | } else { |
125 | int fput_needed; | 124 | struct fd f = fdget(fd); |
126 | struct file *file = fget_light(fd, &fput_needed); | 125 | if (!f.file) |
127 | if (!file) | ||
128 | return ERR_PTR(-EBADF); | 126 | return ERR_PTR(-EBADF); |
129 | path = file->f_path; | 127 | mnt = mntget(f.file->f_path.mnt); |
130 | mntget(path.mnt); | 128 | fdput(f); |
131 | fput_light(file, fput_needed); | ||
132 | } | 129 | } |
133 | return path.mnt; | 130 | return mnt; |
134 | } | 131 | } |
135 | 132 | ||
136 | static int vfs_dentry_acceptable(void *context, struct dentry *dentry) | 133 | static int vfs_dentry_acceptable(void *context, struct dentry *dentry) |
@@ -6,6 +6,7 @@ | |||
6 | * Manage the dynamic fd arrays in the process files_struct. | 6 | * Manage the dynamic fd arrays in the process files_struct. |
7 | */ | 7 | */ |
8 | 8 | ||
9 | #include <linux/syscalls.h> | ||
9 | #include <linux/export.h> | 10 | #include <linux/export.h> |
10 | #include <linux/fs.h> | 11 | #include <linux/fs.h> |
11 | #include <linux/mm.h> | 12 | #include <linux/mm.h> |
@@ -84,22 +85,14 @@ static void free_fdtable_work(struct work_struct *work) | |||
84 | } | 85 | } |
85 | } | 86 | } |
86 | 87 | ||
87 | void free_fdtable_rcu(struct rcu_head *rcu) | 88 | static void free_fdtable_rcu(struct rcu_head *rcu) |
88 | { | 89 | { |
89 | struct fdtable *fdt = container_of(rcu, struct fdtable, rcu); | 90 | struct fdtable *fdt = container_of(rcu, struct fdtable, rcu); |
90 | struct fdtable_defer *fddef; | 91 | struct fdtable_defer *fddef; |
91 | 92 | ||
92 | BUG_ON(!fdt); | 93 | BUG_ON(!fdt); |
94 | BUG_ON(fdt->max_fds <= NR_OPEN_DEFAULT); | ||
93 | 95 | ||
94 | if (fdt->max_fds <= NR_OPEN_DEFAULT) { | ||
95 | /* | ||
96 | * This fdtable is embedded in the files structure and that | ||
97 | * structure itself is getting destroyed. | ||
98 | */ | ||
99 | kmem_cache_free(files_cachep, | ||
100 | container_of(fdt, struct files_struct, fdtab)); | ||
101 | return; | ||
102 | } | ||
103 | if (!is_vmalloc_addr(fdt->fd) && !is_vmalloc_addr(fdt->open_fds)) { | 96 | if (!is_vmalloc_addr(fdt->fd) && !is_vmalloc_addr(fdt->open_fds)) { |
104 | kfree(fdt->fd); | 97 | kfree(fdt->fd); |
105 | kfree(fdt->open_fds); | 98 | kfree(fdt->open_fds); |
@@ -229,7 +222,7 @@ static int expand_fdtable(struct files_struct *files, int nr) | |||
229 | copy_fdtable(new_fdt, cur_fdt); | 222 | copy_fdtable(new_fdt, cur_fdt); |
230 | rcu_assign_pointer(files->fdt, new_fdt); | 223 | rcu_assign_pointer(files->fdt, new_fdt); |
231 | if (cur_fdt->max_fds > NR_OPEN_DEFAULT) | 224 | if (cur_fdt->max_fds > NR_OPEN_DEFAULT) |
232 | free_fdtable(cur_fdt); | 225 | call_rcu(&cur_fdt->rcu, free_fdtable_rcu); |
233 | } else { | 226 | } else { |
234 | /* Somebody else expanded, so undo our attempt */ | 227 | /* Somebody else expanded, so undo our attempt */ |
235 | __free_fdtable(new_fdt); | 228 | __free_fdtable(new_fdt); |
@@ -245,19 +238,12 @@ static int expand_fdtable(struct files_struct *files, int nr) | |||
245 | * expanded and execution may have blocked. | 238 | * expanded and execution may have blocked. |
246 | * The files->file_lock should be held on entry, and will be held on exit. | 239 | * The files->file_lock should be held on entry, and will be held on exit. |
247 | */ | 240 | */ |
248 | int expand_files(struct files_struct *files, int nr) | 241 | static int expand_files(struct files_struct *files, int nr) |
249 | { | 242 | { |
250 | struct fdtable *fdt; | 243 | struct fdtable *fdt; |
251 | 244 | ||
252 | fdt = files_fdtable(files); | 245 | fdt = files_fdtable(files); |
253 | 246 | ||
254 | /* | ||
255 | * N.B. For clone tasks sharing a files structure, this test | ||
256 | * will limit the total number of files that can be opened. | ||
257 | */ | ||
258 | if (nr >= rlimit(RLIMIT_NOFILE)) | ||
259 | return -EMFILE; | ||
260 | |||
261 | /* Do we need to expand? */ | 247 | /* Do we need to expand? */ |
262 | if (nr < fdt->max_fds) | 248 | if (nr < fdt->max_fds) |
263 | return 0; | 249 | return 0; |
@@ -270,6 +256,26 @@ int expand_files(struct files_struct *files, int nr) | |||
270 | return expand_fdtable(files, nr); | 256 | return expand_fdtable(files, nr); |
271 | } | 257 | } |
272 | 258 | ||
259 | static inline void __set_close_on_exec(int fd, struct fdtable *fdt) | ||
260 | { | ||
261 | __set_bit(fd, fdt->close_on_exec); | ||
262 | } | ||
263 | |||
264 | static inline void __clear_close_on_exec(int fd, struct fdtable *fdt) | ||
265 | { | ||
266 | __clear_bit(fd, fdt->close_on_exec); | ||
267 | } | ||
268 | |||
269 | static inline void __set_open_fd(int fd, struct fdtable *fdt) | ||
270 | { | ||
271 | __set_bit(fd, fdt->open_fds); | ||
272 | } | ||
273 | |||
274 | static inline void __clear_open_fd(int fd, struct fdtable *fdt) | ||
275 | { | ||
276 | __clear_bit(fd, fdt->open_fds); | ||
277 | } | ||
278 | |||
273 | static int count_open_files(struct fdtable *fdt) | 279 | static int count_open_files(struct fdtable *fdt) |
274 | { | 280 | { |
275 | int size = fdt->max_fds; | 281 | int size = fdt->max_fds; |
@@ -395,6 +401,95 @@ out: | |||
395 | return NULL; | 401 | return NULL; |
396 | } | 402 | } |
397 | 403 | ||
404 | static void close_files(struct files_struct * files) | ||
405 | { | ||
406 | int i, j; | ||
407 | struct fdtable *fdt; | ||
408 | |||
409 | j = 0; | ||
410 | |||
411 | /* | ||
412 | * It is safe to dereference the fd table without RCU or | ||
413 | * ->file_lock because this is the last reference to the | ||
414 | * files structure. But use RCU to shut RCU-lockdep up. | ||
415 | */ | ||
416 | rcu_read_lock(); | ||
417 | fdt = files_fdtable(files); | ||
418 | rcu_read_unlock(); | ||
419 | for (;;) { | ||
420 | unsigned long set; | ||
421 | i = j * BITS_PER_LONG; | ||
422 | if (i >= fdt->max_fds) | ||
423 | break; | ||
424 | set = fdt->open_fds[j++]; | ||
425 | while (set) { | ||
426 | if (set & 1) { | ||
427 | struct file * file = xchg(&fdt->fd[i], NULL); | ||
428 | if (file) { | ||
429 | filp_close(file, files); | ||
430 | cond_resched(); | ||
431 | } | ||
432 | } | ||
433 | i++; | ||
434 | set >>= 1; | ||
435 | } | ||
436 | } | ||
437 | } | ||
438 | |||
439 | struct files_struct *get_files_struct(struct task_struct *task) | ||
440 | { | ||
441 | struct files_struct *files; | ||
442 | |||
443 | task_lock(task); | ||
444 | files = task->files; | ||
445 | if (files) | ||
446 | atomic_inc(&files->count); | ||
447 | task_unlock(task); | ||
448 | |||
449 | return files; | ||
450 | } | ||
451 | |||
452 | void put_files_struct(struct files_struct *files) | ||
453 | { | ||
454 | struct fdtable *fdt; | ||
455 | |||
456 | if (atomic_dec_and_test(&files->count)) { | ||
457 | close_files(files); | ||
458 | /* not really needed, since nobody can see us */ | ||
459 | rcu_read_lock(); | ||
460 | fdt = files_fdtable(files); | ||
461 | rcu_read_unlock(); | ||
462 | /* free the arrays if they are not embedded */ | ||
463 | if (fdt != &files->fdtab) | ||
464 | __free_fdtable(fdt); | ||
465 | kmem_cache_free(files_cachep, files); | ||
466 | } | ||
467 | } | ||
468 | |||
469 | void reset_files_struct(struct files_struct *files) | ||
470 | { | ||
471 | struct task_struct *tsk = current; | ||
472 | struct files_struct *old; | ||
473 | |||
474 | old = tsk->files; | ||
475 | task_lock(tsk); | ||
476 | tsk->files = files; | ||
477 | task_unlock(tsk); | ||
478 | put_files_struct(old); | ||
479 | } | ||
480 | |||
481 | void exit_files(struct task_struct *tsk) | ||
482 | { | ||
483 | struct files_struct * files = tsk->files; | ||
484 | |||
485 | if (files) { | ||
486 | task_lock(tsk); | ||
487 | tsk->files = NULL; | ||
488 | task_unlock(tsk); | ||
489 | put_files_struct(files); | ||
490 | } | ||
491 | } | ||
492 | |||
398 | static void __devinit fdtable_defer_list_init(int cpu) | 493 | static void __devinit fdtable_defer_list_init(int cpu) |
399 | { | 494 | { |
400 | struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu); | 495 | struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu); |
@@ -424,12 +519,18 @@ struct files_struct init_files = { | |||
424 | .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), | 519 | .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), |
425 | }; | 520 | }; |
426 | 521 | ||
522 | void daemonize_descriptors(void) | ||
523 | { | ||
524 | atomic_inc(&init_files.count); | ||
525 | reset_files_struct(&init_files); | ||
526 | } | ||
527 | |||
427 | /* | 528 | /* |
428 | * allocate a file descriptor, mark it busy. | 529 | * allocate a file descriptor, mark it busy. |
429 | */ | 530 | */ |
430 | int alloc_fd(unsigned start, unsigned flags) | 531 | int __alloc_fd(struct files_struct *files, |
532 | unsigned start, unsigned end, unsigned flags) | ||
431 | { | 533 | { |
432 | struct files_struct *files = current->files; | ||
433 | unsigned int fd; | 534 | unsigned int fd; |
434 | int error; | 535 | int error; |
435 | struct fdtable *fdt; | 536 | struct fdtable *fdt; |
@@ -444,6 +545,14 @@ repeat: | |||
444 | if (fd < fdt->max_fds) | 545 | if (fd < fdt->max_fds) |
445 | fd = find_next_zero_bit(fdt->open_fds, fdt->max_fds, fd); | 546 | fd = find_next_zero_bit(fdt->open_fds, fdt->max_fds, fd); |
446 | 547 | ||
548 | /* | ||
549 | * N.B. For clone tasks sharing a files structure, this test | ||
550 | * will limit the total number of files that can be opened. | ||
551 | */ | ||
552 | error = -EMFILE; | ||
553 | if (fd >= end) | ||
554 | goto out; | ||
555 | |||
447 | error = expand_files(files, fd); | 556 | error = expand_files(files, fd); |
448 | if (error < 0) | 557 | if (error < 0) |
449 | goto out; | 558 | goto out; |
@@ -477,8 +586,424 @@ out: | |||
477 | return error; | 586 | return error; |
478 | } | 587 | } |
479 | 588 | ||
480 | int get_unused_fd(void) | 589 | static int alloc_fd(unsigned start, unsigned flags) |
590 | { | ||
591 | return __alloc_fd(current->files, start, rlimit(RLIMIT_NOFILE), flags); | ||
592 | } | ||
593 | |||
594 | int get_unused_fd_flags(unsigned flags) | ||
595 | { | ||
596 | return __alloc_fd(current->files, 0, rlimit(RLIMIT_NOFILE), flags); | ||
597 | } | ||
598 | EXPORT_SYMBOL(get_unused_fd_flags); | ||
599 | |||
600 | static void __put_unused_fd(struct files_struct *files, unsigned int fd) | ||
601 | { | ||
602 | struct fdtable *fdt = files_fdtable(files); | ||
603 | __clear_open_fd(fd, fdt); | ||
604 | if (fd < files->next_fd) | ||
605 | files->next_fd = fd; | ||
606 | } | ||
607 | |||
608 | void put_unused_fd(unsigned int fd) | ||
609 | { | ||
610 | struct files_struct *files = current->files; | ||
611 | spin_lock(&files->file_lock); | ||
612 | __put_unused_fd(files, fd); | ||
613 | spin_unlock(&files->file_lock); | ||
614 | } | ||
615 | |||
616 | EXPORT_SYMBOL(put_unused_fd); | ||
617 | |||
618 | /* | ||
619 | * Install a file pointer in the fd array. | ||
620 | * | ||
621 | * The VFS is full of places where we drop the files lock between | ||
622 | * setting the open_fds bitmap and installing the file in the file | ||
623 | * array. At any such point, we are vulnerable to a dup2() race | ||
624 | * installing a file in the array before us. We need to detect this and | ||
625 | * fput() the struct file we are about to overwrite in this case. | ||
626 | * | ||
627 | * It should never happen - if we allow dup2() do it, _really_ bad things | ||
628 | * will follow. | ||
629 | * | ||
630 | * NOTE: __fd_install() variant is really, really low-level; don't | ||
631 | * use it unless you are forced to by truly lousy API shoved down | ||
632 | * your throat. 'files' *MUST* be either current->files or obtained | ||
633 | * by get_files_struct(current) done by whoever had given it to you, | ||
634 | * or really bad things will happen. Normally you want to use | ||
635 | * fd_install() instead. | ||
636 | */ | ||
637 | |||
638 | void __fd_install(struct files_struct *files, unsigned int fd, | ||
639 | struct file *file) | ||
640 | { | ||
641 | struct fdtable *fdt; | ||
642 | spin_lock(&files->file_lock); | ||
643 | fdt = files_fdtable(files); | ||
644 | BUG_ON(fdt->fd[fd] != NULL); | ||
645 | rcu_assign_pointer(fdt->fd[fd], file); | ||
646 | spin_unlock(&files->file_lock); | ||
647 | } | ||
648 | |||
649 | void fd_install(unsigned int fd, struct file *file) | ||
481 | { | 650 | { |
482 | return alloc_fd(0, 0); | 651 | __fd_install(current->files, fd, file); |
652 | } | ||
653 | |||
654 | EXPORT_SYMBOL(fd_install); | ||
655 | |||
656 | /* | ||
657 | * The same warnings as for __alloc_fd()/__fd_install() apply here... | ||
658 | */ | ||
659 | int __close_fd(struct files_struct *files, unsigned fd) | ||
660 | { | ||
661 | struct file *file; | ||
662 | struct fdtable *fdt; | ||
663 | |||
664 | spin_lock(&files->file_lock); | ||
665 | fdt = files_fdtable(files); | ||
666 | if (fd >= fdt->max_fds) | ||
667 | goto out_unlock; | ||
668 | file = fdt->fd[fd]; | ||
669 | if (!file) | ||
670 | goto out_unlock; | ||
671 | rcu_assign_pointer(fdt->fd[fd], NULL); | ||
672 | __clear_close_on_exec(fd, fdt); | ||
673 | __put_unused_fd(files, fd); | ||
674 | spin_unlock(&files->file_lock); | ||
675 | return filp_close(file, files); | ||
676 | |||
677 | out_unlock: | ||
678 | spin_unlock(&files->file_lock); | ||
679 | return -EBADF; | ||
680 | } | ||
681 | |||
682 | void do_close_on_exec(struct files_struct *files) | ||
683 | { | ||
684 | unsigned i; | ||
685 | struct fdtable *fdt; | ||
686 | |||
687 | /* exec unshares first */ | ||
688 | BUG_ON(atomic_read(&files->count) != 1); | ||
689 | spin_lock(&files->file_lock); | ||
690 | for (i = 0; ; i++) { | ||
691 | unsigned long set; | ||
692 | unsigned fd = i * BITS_PER_LONG; | ||
693 | fdt = files_fdtable(files); | ||
694 | if (fd >= fdt->max_fds) | ||
695 | break; | ||
696 | set = fdt->close_on_exec[i]; | ||
697 | if (!set) | ||
698 | continue; | ||
699 | fdt->close_on_exec[i] = 0; | ||
700 | for ( ; set ; fd++, set >>= 1) { | ||
701 | struct file *file; | ||
702 | if (!(set & 1)) | ||
703 | continue; | ||
704 | file = fdt->fd[fd]; | ||
705 | if (!file) | ||
706 | continue; | ||
707 | rcu_assign_pointer(fdt->fd[fd], NULL); | ||
708 | __put_unused_fd(files, fd); | ||
709 | spin_unlock(&files->file_lock); | ||
710 | filp_close(file, files); | ||
711 | cond_resched(); | ||
712 | spin_lock(&files->file_lock); | ||
713 | } | ||
714 | |||
715 | } | ||
716 | spin_unlock(&files->file_lock); | ||
717 | } | ||
718 | |||
719 | struct file *fget(unsigned int fd) | ||
720 | { | ||
721 | struct file *file; | ||
722 | struct files_struct *files = current->files; | ||
723 | |||
724 | rcu_read_lock(); | ||
725 | file = fcheck_files(files, fd); | ||
726 | if (file) { | ||
727 | /* File object ref couldn't be taken */ | ||
728 | if (file->f_mode & FMODE_PATH || | ||
729 | !atomic_long_inc_not_zero(&file->f_count)) | ||
730 | file = NULL; | ||
731 | } | ||
732 | rcu_read_unlock(); | ||
733 | |||
734 | return file; | ||
735 | } | ||
736 | |||
737 | EXPORT_SYMBOL(fget); | ||
738 | |||
739 | struct file *fget_raw(unsigned int fd) | ||
740 | { | ||
741 | struct file *file; | ||
742 | struct files_struct *files = current->files; | ||
743 | |||
744 | rcu_read_lock(); | ||
745 | file = fcheck_files(files, fd); | ||
746 | if (file) { | ||
747 | /* File object ref couldn't be taken */ | ||
748 | if (!atomic_long_inc_not_zero(&file->f_count)) | ||
749 | file = NULL; | ||
750 | } | ||
751 | rcu_read_unlock(); | ||
752 | |||
753 | return file; | ||
754 | } | ||
755 | |||
756 | EXPORT_SYMBOL(fget_raw); | ||
757 | |||
758 | /* | ||
759 | * Lightweight file lookup - no refcnt increment if fd table isn't shared. | ||
760 | * | ||
761 | * You can use this instead of fget if you satisfy all of the following | ||
762 | * conditions: | ||
763 | * 1) You must call fput_light before exiting the syscall and returning control | ||
764 | * to userspace (i.e. you cannot remember the returned struct file * after | ||
765 | * returning to userspace). | ||
766 | * 2) You must not call filp_close on the returned struct file * in between | ||
767 | * calls to fget_light and fput_light. | ||
768 | * 3) You must not clone the current task in between the calls to fget_light | ||
769 | * and fput_light. | ||
770 | * | ||
771 | * The fput_needed flag returned by fget_light should be passed to the | ||
772 | * corresponding fput_light. | ||
773 | */ | ||
774 | struct file *fget_light(unsigned int fd, int *fput_needed) | ||
775 | { | ||
776 | struct file *file; | ||
777 | struct files_struct *files = current->files; | ||
778 | |||
779 | *fput_needed = 0; | ||
780 | if (atomic_read(&files->count) == 1) { | ||
781 | file = fcheck_files(files, fd); | ||
782 | if (file && (file->f_mode & FMODE_PATH)) | ||
783 | file = NULL; | ||
784 | } else { | ||
785 | rcu_read_lock(); | ||
786 | file = fcheck_files(files, fd); | ||
787 | if (file) { | ||
788 | if (!(file->f_mode & FMODE_PATH) && | ||
789 | atomic_long_inc_not_zero(&file->f_count)) | ||
790 | *fput_needed = 1; | ||
791 | else | ||
792 | /* Didn't get the reference, someone's freed */ | ||
793 | file = NULL; | ||
794 | } | ||
795 | rcu_read_unlock(); | ||
796 | } | ||
797 | |||
798 | return file; | ||
799 | } | ||
800 | EXPORT_SYMBOL(fget_light); | ||
801 | |||
802 | struct file *fget_raw_light(unsigned int fd, int *fput_needed) | ||
803 | { | ||
804 | struct file *file; | ||
805 | struct files_struct *files = current->files; | ||
806 | |||
807 | *fput_needed = 0; | ||
808 | if (atomic_read(&files->count) == 1) { | ||
809 | file = fcheck_files(files, fd); | ||
810 | } else { | ||
811 | rcu_read_lock(); | ||
812 | file = fcheck_files(files, fd); | ||
813 | if (file) { | ||
814 | if (atomic_long_inc_not_zero(&file->f_count)) | ||
815 | *fput_needed = 1; | ||
816 | else | ||
817 | /* Didn't get the reference, someone's freed */ | ||
818 | file = NULL; | ||
819 | } | ||
820 | rcu_read_unlock(); | ||
821 | } | ||
822 | |||
823 | return file; | ||
824 | } | ||
825 | |||
826 | void set_close_on_exec(unsigned int fd, int flag) | ||
827 | { | ||
828 | struct files_struct *files = current->files; | ||
829 | struct fdtable *fdt; | ||
830 | spin_lock(&files->file_lock); | ||
831 | fdt = files_fdtable(files); | ||
832 | if (flag) | ||
833 | __set_close_on_exec(fd, fdt); | ||
834 | else | ||
835 | __clear_close_on_exec(fd, fdt); | ||
836 | spin_unlock(&files->file_lock); | ||
837 | } | ||
838 | |||
839 | bool get_close_on_exec(unsigned int fd) | ||
840 | { | ||
841 | struct files_struct *files = current->files; | ||
842 | struct fdtable *fdt; | ||
843 | bool res; | ||
844 | rcu_read_lock(); | ||
845 | fdt = files_fdtable(files); | ||
846 | res = close_on_exec(fd, fdt); | ||
847 | rcu_read_unlock(); | ||
848 | return res; | ||
849 | } | ||
850 | |||
851 | static int do_dup2(struct files_struct *files, | ||
852 | struct file *file, unsigned fd, unsigned flags) | ||
853 | { | ||
854 | struct file *tofree; | ||
855 | struct fdtable *fdt; | ||
856 | |||
857 | /* | ||
858 | * We need to detect attempts to do dup2() over allocated but still | ||
859 | * not finished descriptor. NB: OpenBSD avoids that at the price of | ||
860 | * extra work in their equivalent of fget() - they insert struct | ||
861 | * file immediately after grabbing descriptor, mark it larval if | ||
862 | * more work (e.g. actual opening) is needed and make sure that | ||
863 | * fget() treats larval files as absent. Potentially interesting, | ||
864 | * but while extra work in fget() is trivial, locking implications | ||
865 | * and amount of surgery on open()-related paths in VFS are not. | ||
866 | * FreeBSD fails with -EBADF in the same situation, NetBSD "solution" | ||
867 | * deadlocks in rather amusing ways, AFAICS. All of that is out of | ||
868 | * scope of POSIX or SUS, since neither considers shared descriptor | ||
869 | * tables and this condition does not arise without those. | ||
870 | */ | ||
871 | fdt = files_fdtable(files); | ||
872 | tofree = fdt->fd[fd]; | ||
873 | if (!tofree && fd_is_open(fd, fdt)) | ||
874 | goto Ebusy; | ||
875 | get_file(file); | ||
876 | rcu_assign_pointer(fdt->fd[fd], file); | ||
877 | __set_open_fd(fd, fdt); | ||
878 | if (flags & O_CLOEXEC) | ||
879 | __set_close_on_exec(fd, fdt); | ||
880 | else | ||
881 | __clear_close_on_exec(fd, fdt); | ||
882 | spin_unlock(&files->file_lock); | ||
883 | |||
884 | if (tofree) | ||
885 | filp_close(tofree, files); | ||
886 | |||
887 | return fd; | ||
888 | |||
889 | Ebusy: | ||
890 | spin_unlock(&files->file_lock); | ||
891 | return -EBUSY; | ||
892 | } | ||
893 | |||
894 | int replace_fd(unsigned fd, struct file *file, unsigned flags) | ||
895 | { | ||
896 | int err; | ||
897 | struct files_struct *files = current->files; | ||
898 | |||
899 | if (!file) | ||
900 | return __close_fd(files, fd); | ||
901 | |||
902 | if (fd >= rlimit(RLIMIT_NOFILE)) | ||
903 | return -EMFILE; | ||
904 | |||
905 | spin_lock(&files->file_lock); | ||
906 | err = expand_files(files, fd); | ||
907 | if (unlikely(err < 0)) | ||
908 | goto out_unlock; | ||
909 | return do_dup2(files, file, fd, flags); | ||
910 | |||
911 | out_unlock: | ||
912 | spin_unlock(&files->file_lock); | ||
913 | return err; | ||
914 | } | ||
915 | |||
916 | SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags) | ||
917 | { | ||
918 | int err = -EBADF; | ||
919 | struct file *file; | ||
920 | struct files_struct *files = current->files; | ||
921 | |||
922 | if ((flags & ~O_CLOEXEC) != 0) | ||
923 | return -EINVAL; | ||
924 | |||
925 | if (newfd >= rlimit(RLIMIT_NOFILE)) | ||
926 | return -EMFILE; | ||
927 | |||
928 | spin_lock(&files->file_lock); | ||
929 | err = expand_files(files, newfd); | ||
930 | file = fcheck(oldfd); | ||
931 | if (unlikely(!file)) | ||
932 | goto Ebadf; | ||
933 | if (unlikely(err < 0)) { | ||
934 | if (err == -EMFILE) | ||
935 | goto Ebadf; | ||
936 | goto out_unlock; | ||
937 | } | ||
938 | return do_dup2(files, file, newfd, flags); | ||
939 | |||
940 | Ebadf: | ||
941 | err = -EBADF; | ||
942 | out_unlock: | ||
943 | spin_unlock(&files->file_lock); | ||
944 | return err; | ||
945 | } | ||
946 | |||
947 | SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd) | ||
948 | { | ||
949 | if (unlikely(newfd == oldfd)) { /* corner case */ | ||
950 | struct files_struct *files = current->files; | ||
951 | int retval = oldfd; | ||
952 | |||
953 | rcu_read_lock(); | ||
954 | if (!fcheck_files(files, oldfd)) | ||
955 | retval = -EBADF; | ||
956 | rcu_read_unlock(); | ||
957 | return retval; | ||
958 | } | ||
959 | return sys_dup3(oldfd, newfd, 0); | ||
960 | } | ||
961 | |||
962 | SYSCALL_DEFINE1(dup, unsigned int, fildes) | ||
963 | { | ||
964 | int ret = -EBADF; | ||
965 | struct file *file = fget_raw(fildes); | ||
966 | |||
967 | if (file) { | ||
968 | ret = get_unused_fd(); | ||
969 | if (ret >= 0) | ||
970 | fd_install(ret, file); | ||
971 | else | ||
972 | fput(file); | ||
973 | } | ||
974 | return ret; | ||
975 | } | ||
976 | |||
977 | int f_dupfd(unsigned int from, struct file *file, unsigned flags) | ||
978 | { | ||
979 | int err; | ||
980 | if (from >= rlimit(RLIMIT_NOFILE)) | ||
981 | return -EINVAL; | ||
982 | err = alloc_fd(from, flags); | ||
983 | if (err >= 0) { | ||
984 | get_file(file); | ||
985 | fd_install(err, file); | ||
986 | } | ||
987 | return err; | ||
988 | } | ||
989 | |||
990 | int iterate_fd(struct files_struct *files, unsigned n, | ||
991 | int (*f)(const void *, struct file *, unsigned), | ||
992 | const void *p) | ||
993 | { | ||
994 | struct fdtable *fdt; | ||
995 | struct file *file; | ||
996 | int res = 0; | ||
997 | if (!files) | ||
998 | return 0; | ||
999 | spin_lock(&files->file_lock); | ||
1000 | fdt = files_fdtable(files); | ||
1001 | while (!res && n < fdt->max_fds) { | ||
1002 | file = rcu_dereference_check_fdtable(files, fdt->fd[n++]); | ||
1003 | if (file) | ||
1004 | res = f(p, file, n); | ||
1005 | } | ||
1006 | spin_unlock(&files->file_lock); | ||
1007 | return res; | ||
483 | } | 1008 | } |
484 | EXPORT_SYMBOL(get_unused_fd); | 1009 | EXPORT_SYMBOL(iterate_fd); |
diff --git a/fs/file_table.c b/fs/file_table.c index 701985e4ccda..dac67923330f 100644 --- a/fs/file_table.c +++ b/fs/file_table.c | |||
@@ -243,10 +243,10 @@ static void __fput(struct file *file) | |||
243 | if (file->f_op && file->f_op->fasync) | 243 | if (file->f_op && file->f_op->fasync) |
244 | file->f_op->fasync(-1, file, 0); | 244 | file->f_op->fasync(-1, file, 0); |
245 | } | 245 | } |
246 | ima_file_free(file); | ||
246 | if (file->f_op && file->f_op->release) | 247 | if (file->f_op && file->f_op->release) |
247 | file->f_op->release(inode, file); | 248 | file->f_op->release(inode, file); |
248 | security_file_free(file); | 249 | security_file_free(file); |
249 | ima_file_free(file); | ||
250 | if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL && | 250 | if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL && |
251 | !(file->f_mode & FMODE_PATH))) { | 251 | !(file->f_mode & FMODE_PATH))) { |
252 | cdev_put(inode->i_cdev); | 252 | cdev_put(inode->i_cdev); |
@@ -339,112 +339,6 @@ void __fput_sync(struct file *file) | |||
339 | 339 | ||
340 | EXPORT_SYMBOL(fput); | 340 | EXPORT_SYMBOL(fput); |
341 | 341 | ||
342 | struct file *fget(unsigned int fd) | ||
343 | { | ||
344 | struct file *file; | ||
345 | struct files_struct *files = current->files; | ||
346 | |||
347 | rcu_read_lock(); | ||
348 | file = fcheck_files(files, fd); | ||
349 | if (file) { | ||
350 | /* File object ref couldn't be taken */ | ||
351 | if (file->f_mode & FMODE_PATH || | ||
352 | !atomic_long_inc_not_zero(&file->f_count)) | ||
353 | file = NULL; | ||
354 | } | ||
355 | rcu_read_unlock(); | ||
356 | |||
357 | return file; | ||
358 | } | ||
359 | |||
360 | EXPORT_SYMBOL(fget); | ||
361 | |||
362 | struct file *fget_raw(unsigned int fd) | ||
363 | { | ||
364 | struct file *file; | ||
365 | struct files_struct *files = current->files; | ||
366 | |||
367 | rcu_read_lock(); | ||
368 | file = fcheck_files(files, fd); | ||
369 | if (file) { | ||
370 | /* File object ref couldn't be taken */ | ||
371 | if (!atomic_long_inc_not_zero(&file->f_count)) | ||
372 | file = NULL; | ||
373 | } | ||
374 | rcu_read_unlock(); | ||
375 | |||
376 | return file; | ||
377 | } | ||
378 | |||
379 | EXPORT_SYMBOL(fget_raw); | ||
380 | |||
381 | /* | ||
382 | * Lightweight file lookup - no refcnt increment if fd table isn't shared. | ||
383 | * | ||
384 | * You can use this instead of fget if you satisfy all of the following | ||
385 | * conditions: | ||
386 | * 1) You must call fput_light before exiting the syscall and returning control | ||
387 | * to userspace (i.e. you cannot remember the returned struct file * after | ||
388 | * returning to userspace). | ||
389 | * 2) You must not call filp_close on the returned struct file * in between | ||
390 | * calls to fget_light and fput_light. | ||
391 | * 3) You must not clone the current task in between the calls to fget_light | ||
392 | * and fput_light. | ||
393 | * | ||
394 | * The fput_needed flag returned by fget_light should be passed to the | ||
395 | * corresponding fput_light. | ||
396 | */ | ||
397 | struct file *fget_light(unsigned int fd, int *fput_needed) | ||
398 | { | ||
399 | struct file *file; | ||
400 | struct files_struct *files = current->files; | ||
401 | |||
402 | *fput_needed = 0; | ||
403 | if (atomic_read(&files->count) == 1) { | ||
404 | file = fcheck_files(files, fd); | ||
405 | if (file && (file->f_mode & FMODE_PATH)) | ||
406 | file = NULL; | ||
407 | } else { | ||
408 | rcu_read_lock(); | ||
409 | file = fcheck_files(files, fd); | ||
410 | if (file) { | ||
411 | if (!(file->f_mode & FMODE_PATH) && | ||
412 | atomic_long_inc_not_zero(&file->f_count)) | ||
413 | *fput_needed = 1; | ||
414 | else | ||
415 | /* Didn't get the reference, someone's freed */ | ||
416 | file = NULL; | ||
417 | } | ||
418 | rcu_read_unlock(); | ||
419 | } | ||
420 | |||
421 | return file; | ||
422 | } | ||
423 | |||
424 | struct file *fget_raw_light(unsigned int fd, int *fput_needed) | ||
425 | { | ||
426 | struct file *file; | ||
427 | struct files_struct *files = current->files; | ||
428 | |||
429 | *fput_needed = 0; | ||
430 | if (atomic_read(&files->count) == 1) { | ||
431 | file = fcheck_files(files, fd); | ||
432 | } else { | ||
433 | rcu_read_lock(); | ||
434 | file = fcheck_files(files, fd); | ||
435 | if (file) { | ||
436 | if (atomic_long_inc_not_zero(&file->f_count)) | ||
437 | *fput_needed = 1; | ||
438 | else | ||
439 | /* Didn't get the reference, someone's freed */ | ||
440 | file = NULL; | ||
441 | } | ||
442 | rcu_read_unlock(); | ||
443 | } | ||
444 | |||
445 | return file; | ||
446 | } | ||
447 | |||
448 | void put_filp(struct file *file) | 342 | void put_filp(struct file *file) |
449 | { | 343 | { |
450 | if (atomic_long_dec_and_test(&file->f_count)) { | 344 | if (atomic_long_dec_and_test(&file->f_count)) { |
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c index ef67c95f12d4..f47df72cef17 100644 --- a/fs/freevxfs/vxfs_inode.c +++ b/fs/freevxfs/vxfs_inode.c | |||
@@ -224,8 +224,8 @@ vxfs_iinit(struct inode *ip, struct vxfs_inode_info *vip) | |||
224 | { | 224 | { |
225 | 225 | ||
226 | ip->i_mode = vxfs_transmod(vip); | 226 | ip->i_mode = vxfs_transmod(vip); |
227 | ip->i_uid = (uid_t)vip->vii_uid; | 227 | i_uid_write(ip, (uid_t)vip->vii_uid); |
228 | ip->i_gid = (gid_t)vip->vii_gid; | 228 | i_gid_write(ip, (gid_t)vip->vii_gid); |
229 | 229 | ||
230 | set_nlink(ip, vip->vii_nlink); | 230 | set_nlink(ip, vip->vii_nlink); |
231 | ip->i_size = vip->vii_size; | 231 | ip->i_size = vip->vii_size; |
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c index d4fabd26084e..fed2c8afb3a9 100644 --- a/fs/freevxfs/vxfs_super.c +++ b/fs/freevxfs/vxfs_super.c | |||
@@ -279,6 +279,11 @@ static void __exit | |||
279 | vxfs_cleanup(void) | 279 | vxfs_cleanup(void) |
280 | { | 280 | { |
281 | unregister_filesystem(&vxfs_fs_type); | 281 | unregister_filesystem(&vxfs_fs_type); |
282 | /* | ||
283 | * Make sure all delayed rcu free inodes are flushed before we | ||
284 | * destroy cache. | ||
285 | */ | ||
286 | rcu_barrier(); | ||
282 | kmem_cache_destroy(vxfs_inode_cachep); | 287 | kmem_cache_destroy(vxfs_inode_cachep); |
283 | } | 288 | } |
284 | 289 | ||
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index be3efc4f64f4..401b6c6248ae 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -63,6 +63,7 @@ int writeback_in_progress(struct backing_dev_info *bdi) | |||
63 | { | 63 | { |
64 | return test_bit(BDI_writeback_running, &bdi->state); | 64 | return test_bit(BDI_writeback_running, &bdi->state); |
65 | } | 65 | } |
66 | EXPORT_SYMBOL(writeback_in_progress); | ||
66 | 67 | ||
67 | static inline struct backing_dev_info *inode_to_bdi(struct inode *inode) | 68 | static inline struct backing_dev_info *inode_to_bdi(struct inode *inode) |
68 | { | 69 | { |
@@ -438,8 +439,7 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb, | |||
438 | * setting I_SYNC flag and calling inode_sync_complete() to clear it. | 439 | * setting I_SYNC flag and calling inode_sync_complete() to clear it. |
439 | */ | 440 | */ |
440 | static int | 441 | static int |
441 | __writeback_single_inode(struct inode *inode, struct bdi_writeback *wb, | 442 | __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) |
442 | struct writeback_control *wbc) | ||
443 | { | 443 | { |
444 | struct address_space *mapping = inode->i_mapping; | 444 | struct address_space *mapping = inode->i_mapping; |
445 | long nr_to_write = wbc->nr_to_write; | 445 | long nr_to_write = wbc->nr_to_write; |
@@ -526,7 +526,7 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb, | |||
526 | inode->i_state |= I_SYNC; | 526 | inode->i_state |= I_SYNC; |
527 | spin_unlock(&inode->i_lock); | 527 | spin_unlock(&inode->i_lock); |
528 | 528 | ||
529 | ret = __writeback_single_inode(inode, wb, wbc); | 529 | ret = __writeback_single_inode(inode, wbc); |
530 | 530 | ||
531 | spin_lock(&wb->list_lock); | 531 | spin_lock(&wb->list_lock); |
532 | spin_lock(&inode->i_lock); | 532 | spin_lock(&inode->i_lock); |
@@ -577,10 +577,6 @@ static long writeback_chunk_size(struct backing_dev_info *bdi, | |||
577 | /* | 577 | /* |
578 | * Write a portion of b_io inodes which belong to @sb. | 578 | * Write a portion of b_io inodes which belong to @sb. |
579 | * | 579 | * |
580 | * If @only_this_sb is true, then find and write all such | ||
581 | * inodes. Otherwise write only ones which go sequentially | ||
582 | * in reverse order. | ||
583 | * | ||
584 | * Return the number of pages and/or inodes written. | 580 | * Return the number of pages and/or inodes written. |
585 | */ | 581 | */ |
586 | static long writeback_sb_inodes(struct super_block *sb, | 582 | static long writeback_sb_inodes(struct super_block *sb, |
@@ -673,7 +669,7 @@ static long writeback_sb_inodes(struct super_block *sb, | |||
673 | * We use I_SYNC to pin the inode in memory. While it is set | 669 | * We use I_SYNC to pin the inode in memory. While it is set |
674 | * evict_inode() will wait so the inode cannot be freed. | 670 | * evict_inode() will wait so the inode cannot be freed. |
675 | */ | 671 | */ |
676 | __writeback_single_inode(inode, wb, &wbc); | 672 | __writeback_single_inode(inode, &wbc); |
677 | 673 | ||
678 | work->nr_pages -= write_chunk - wbc.nr_to_write; | 674 | work->nr_pages -= write_chunk - wbc.nr_to_write; |
679 | wrote += write_chunk - wbc.nr_to_write; | 675 | wrote += write_chunk - wbc.nr_to_write; |
diff --git a/fs/fuse/control.c b/fs/fuse/control.c index 03ff5b1eba93..75a20c092dd4 100644 --- a/fs/fuse/control.c +++ b/fs/fuse/control.c | |||
@@ -117,7 +117,7 @@ static ssize_t fuse_conn_max_background_write(struct file *file, | |||
117 | const char __user *buf, | 117 | const char __user *buf, |
118 | size_t count, loff_t *ppos) | 118 | size_t count, loff_t *ppos) |
119 | { | 119 | { |
120 | unsigned val; | 120 | unsigned uninitialized_var(val); |
121 | ssize_t ret; | 121 | ssize_t ret; |
122 | 122 | ||
123 | ret = fuse_conn_limit_write(file, buf, count, ppos, &val, | 123 | ret = fuse_conn_limit_write(file, buf, count, ppos, &val, |
@@ -154,7 +154,7 @@ static ssize_t fuse_conn_congestion_threshold_write(struct file *file, | |||
154 | const char __user *buf, | 154 | const char __user *buf, |
155 | size_t count, loff_t *ppos) | 155 | size_t count, loff_t *ppos) |
156 | { | 156 | { |
157 | unsigned val; | 157 | unsigned uninitialized_var(val); |
158 | ssize_t ret; | 158 | ssize_t ret; |
159 | 159 | ||
160 | ret = fuse_conn_limit_write(file, buf, count, ppos, &val, | 160 | ret = fuse_conn_limit_write(file, buf, count, ppos, &val, |
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index 3426521f3205..ee8d55042298 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c | |||
@@ -396,7 +396,7 @@ err_device: | |||
396 | err_region: | 396 | err_region: |
397 | unregister_chrdev_region(devt, 1); | 397 | unregister_chrdev_region(devt, 1); |
398 | err: | 398 | err: |
399 | fc->conn_error = 1; | 399 | fuse_conn_kill(fc); |
400 | goto out; | 400 | goto out; |
401 | } | 401 | } |
402 | 402 | ||
@@ -532,8 +532,6 @@ static int cuse_channel_release(struct inode *inode, struct file *file) | |||
532 | cdev_del(cc->cdev); | 532 | cdev_del(cc->cdev); |
533 | } | 533 | } |
534 | 534 | ||
535 | /* kill connection and shutdown channel */ | ||
536 | fuse_conn_kill(&cc->fc); | ||
537 | rc = fuse_dev_release(inode, file); /* puts the base reference */ | 535 | rc = fuse_dev_release(inode, file); /* puts the base reference */ |
538 | 536 | ||
539 | return rc; | 537 | return rc; |
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 7df2b5e8fbe1..8c23fa7a91e6 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c | |||
@@ -148,8 +148,7 @@ static struct fuse_req *get_reserved_req(struct fuse_conn *fc, | |||
148 | if (ff->reserved_req) { | 148 | if (ff->reserved_req) { |
149 | req = ff->reserved_req; | 149 | req = ff->reserved_req; |
150 | ff->reserved_req = NULL; | 150 | ff->reserved_req = NULL; |
151 | get_file(file); | 151 | req->stolen_file = get_file(file); |
152 | req->stolen_file = file; | ||
153 | } | 152 | } |
154 | spin_unlock(&fc->lock); | 153 | spin_unlock(&fc->lock); |
155 | } while (!req); | 154 | } while (!req); |
@@ -1576,6 +1575,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, | |||
1576 | req->pages[req->num_pages] = page; | 1575 | req->pages[req->num_pages] = page; |
1577 | req->num_pages++; | 1576 | req->num_pages++; |
1578 | 1577 | ||
1578 | offset = 0; | ||
1579 | num -= this_num; | 1579 | num -= this_num; |
1580 | total_len += this_num; | 1580 | total_len += this_num; |
1581 | index++; | 1581 | index++; |
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 8964cf3999b2..324bc0850534 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c | |||
@@ -383,6 +383,9 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, | |||
383 | struct fuse_entry_out outentry; | 383 | struct fuse_entry_out outentry; |
384 | struct fuse_file *ff; | 384 | struct fuse_file *ff; |
385 | 385 | ||
386 | /* Userspace expects S_IFREG in create mode */ | ||
387 | BUG_ON((mode & S_IFMT) != S_IFREG); | ||
388 | |||
386 | forget = fuse_alloc_forget(); | 389 | forget = fuse_alloc_forget(); |
387 | err = -ENOMEM; | 390 | err = -ENOMEM; |
388 | if (!forget) | 391 | if (!forget) |
diff --git a/fs/fuse/file.c b/fs/fuse/file.c index aba15f1b7ad2..78d2837bc940 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c | |||
@@ -1379,6 +1379,7 @@ static const struct vm_operations_struct fuse_file_vm_ops = { | |||
1379 | .close = fuse_vma_close, | 1379 | .close = fuse_vma_close, |
1380 | .fault = filemap_fault, | 1380 | .fault = filemap_fault, |
1381 | .page_mkwrite = fuse_page_mkwrite, | 1381 | .page_mkwrite = fuse_page_mkwrite, |
1382 | .remap_pages = generic_file_remap_pages, | ||
1382 | }; | 1383 | }; |
1383 | 1384 | ||
1384 | static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) | 1385 | static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) |
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index ce0a2838ccd0..f0eda124cffb 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c | |||
@@ -367,11 +367,6 @@ void fuse_conn_kill(struct fuse_conn *fc) | |||
367 | wake_up_all(&fc->waitq); | 367 | wake_up_all(&fc->waitq); |
368 | wake_up_all(&fc->blocked_waitq); | 368 | wake_up_all(&fc->blocked_waitq); |
369 | wake_up_all(&fc->reserved_req_waitq); | 369 | wake_up_all(&fc->reserved_req_waitq); |
370 | mutex_lock(&fuse_mutex); | ||
371 | list_del(&fc->entry); | ||
372 | fuse_ctl_remove_conn(fc); | ||
373 | mutex_unlock(&fuse_mutex); | ||
374 | fuse_bdi_destroy(fc); | ||
375 | } | 370 | } |
376 | EXPORT_SYMBOL_GPL(fuse_conn_kill); | 371 | EXPORT_SYMBOL_GPL(fuse_conn_kill); |
377 | 372 | ||
@@ -380,7 +375,14 @@ static void fuse_put_super(struct super_block *sb) | |||
380 | struct fuse_conn *fc = get_fuse_conn_super(sb); | 375 | struct fuse_conn *fc = get_fuse_conn_super(sb); |
381 | 376 | ||
382 | fuse_send_destroy(fc); | 377 | fuse_send_destroy(fc); |
378 | |||
383 | fuse_conn_kill(fc); | 379 | fuse_conn_kill(fc); |
380 | mutex_lock(&fuse_mutex); | ||
381 | list_del(&fc->entry); | ||
382 | fuse_ctl_remove_conn(fc); | ||
383 | mutex_unlock(&fuse_mutex); | ||
384 | fuse_bdi_destroy(fc); | ||
385 | |||
384 | fuse_conn_put(fc); | 386 | fuse_conn_put(fc); |
385 | } | 387 | } |
386 | 388 | ||
@@ -1195,6 +1197,12 @@ static void fuse_fs_cleanup(void) | |||
1195 | { | 1197 | { |
1196 | unregister_filesystem(&fuse_fs_type); | 1198 | unregister_filesystem(&fuse_fs_type); |
1197 | unregister_fuseblk(); | 1199 | unregister_fuseblk(); |
1200 | |||
1201 | /* | ||
1202 | * Make sure all delayed rcu free inodes are flushed before we | ||
1203 | * destroy cache. | ||
1204 | */ | ||
1205 | rcu_barrier(); | ||
1198 | kmem_cache_destroy(fuse_inode_cachep); | 1206 | kmem_cache_destroy(fuse_inode_cachep); |
1199 | } | 1207 | } |
1200 | 1208 | ||
diff --git a/fs/generic_acl.c b/fs/generic_acl.c index d0dddaceac59..b3f3676796d3 100644 --- a/fs/generic_acl.c +++ b/fs/generic_acl.c | |||
@@ -56,7 +56,7 @@ generic_acl_get(struct dentry *dentry, const char *name, void *buffer, | |||
56 | acl = get_cached_acl(dentry->d_inode, type); | 56 | acl = get_cached_acl(dentry->d_inode, type); |
57 | if (!acl) | 57 | if (!acl) |
58 | return -ENODATA; | 58 | return -ENODATA; |
59 | error = posix_acl_to_xattr(acl, buffer, size); | 59 | error = posix_acl_to_xattr(&init_user_ns, acl, buffer, size); |
60 | posix_acl_release(acl); | 60 | posix_acl_release(acl); |
61 | 61 | ||
62 | return error; | 62 | return error; |
@@ -77,7 +77,7 @@ generic_acl_set(struct dentry *dentry, const char *name, const void *value, | |||
77 | if (!inode_owner_or_capable(inode)) | 77 | if (!inode_owner_or_capable(inode)) |
78 | return -EPERM; | 78 | return -EPERM; |
79 | if (value) { | 79 | if (value) { |
80 | acl = posix_acl_from_xattr(value, size); | 80 | acl = posix_acl_from_xattr(&init_user_ns, value, size); |
81 | if (IS_ERR(acl)) | 81 | if (IS_ERR(acl)) |
82 | return PTR_ERR(acl); | 82 | return PTR_ERR(acl); |
83 | } | 83 | } |
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index bd4a5892c93c..f850020ad906 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c | |||
@@ -63,7 +63,7 @@ struct posix_acl *gfs2_get_acl(struct inode *inode, int type) | |||
63 | if (len == 0) | 63 | if (len == 0) |
64 | return NULL; | 64 | return NULL; |
65 | 65 | ||
66 | acl = posix_acl_from_xattr(data, len); | 66 | acl = posix_acl_from_xattr(&init_user_ns, data, len); |
67 | kfree(data); | 67 | kfree(data); |
68 | return acl; | 68 | return acl; |
69 | } | 69 | } |
@@ -88,13 +88,13 @@ static int gfs2_acl_set(struct inode *inode, int type, struct posix_acl *acl) | |||
88 | const char *name = gfs2_acl_name(type); | 88 | const char *name = gfs2_acl_name(type); |
89 | 89 | ||
90 | BUG_ON(name == NULL); | 90 | BUG_ON(name == NULL); |
91 | len = posix_acl_to_xattr(acl, NULL, 0); | 91 | len = posix_acl_to_xattr(&init_user_ns, acl, NULL, 0); |
92 | if (len == 0) | 92 | if (len == 0) |
93 | return 0; | 93 | return 0; |
94 | data = kmalloc(len, GFP_NOFS); | 94 | data = kmalloc(len, GFP_NOFS); |
95 | if (data == NULL) | 95 | if (data == NULL) |
96 | return -ENOMEM; | 96 | return -ENOMEM; |
97 | error = posix_acl_to_xattr(acl, data, len); | 97 | error = posix_acl_to_xattr(&init_user_ns, acl, data, len); |
98 | if (error < 0) | 98 | if (error < 0) |
99 | goto out; | 99 | goto out; |
100 | error = __gfs2_xattr_set(inode, name, data, len, 0, GFS2_EATYPE_SYS); | 100 | error = __gfs2_xattr_set(inode, name, data, len, 0, GFS2_EATYPE_SYS); |
@@ -166,12 +166,12 @@ int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr) | |||
166 | if (error) | 166 | if (error) |
167 | return error; | 167 | return error; |
168 | 168 | ||
169 | len = posix_acl_to_xattr(acl, NULL, 0); | 169 | len = posix_acl_to_xattr(&init_user_ns, acl, NULL, 0); |
170 | data = kmalloc(len, GFP_NOFS); | 170 | data = kmalloc(len, GFP_NOFS); |
171 | error = -ENOMEM; | 171 | error = -ENOMEM; |
172 | if (data == NULL) | 172 | if (data == NULL) |
173 | goto out; | 173 | goto out; |
174 | posix_acl_to_xattr(acl, data, len); | 174 | posix_acl_to_xattr(&init_user_ns, acl, data, len); |
175 | error = gfs2_xattr_acl_chmod(ip, attr, data); | 175 | error = gfs2_xattr_acl_chmod(ip, attr, data); |
176 | kfree(data); | 176 | kfree(data); |
177 | set_cached_acl(&ip->i_inode, ACL_TYPE_ACCESS, acl); | 177 | set_cached_acl(&ip->i_inode, ACL_TYPE_ACCESS, acl); |
@@ -212,7 +212,7 @@ static int gfs2_xattr_system_get(struct dentry *dentry, const char *name, | |||
212 | if (acl == NULL) | 212 | if (acl == NULL) |
213 | return -ENODATA; | 213 | return -ENODATA; |
214 | 214 | ||
215 | error = posix_acl_to_xattr(acl, buffer, size); | 215 | error = posix_acl_to_xattr(&init_user_ns, acl, buffer, size); |
216 | posix_acl_release(acl); | 216 | posix_acl_release(acl); |
217 | 217 | ||
218 | return error; | 218 | return error; |
@@ -245,7 +245,7 @@ static int gfs2_xattr_system_set(struct dentry *dentry, const char *name, | |||
245 | if (!value) | 245 | if (!value) |
246 | goto set_acl; | 246 | goto set_acl; |
247 | 247 | ||
248 | acl = posix_acl_from_xattr(value, size); | 248 | acl = posix_acl_from_xattr(&init_user_ns, value, size); |
249 | if (!acl) { | 249 | if (!acl) { |
250 | /* | 250 | /* |
251 | * acl_set_file(3) may request that we set default ACLs with | 251 | * acl_set_file(3) may request that we set default ACLs with |
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index d6526347d386..01c4975da4bc 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c | |||
@@ -612,6 +612,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, | |||
612 | struct gfs2_sbd *sdp = GFS2_SB(mapping->host); | 612 | struct gfs2_sbd *sdp = GFS2_SB(mapping->host); |
613 | struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); | 613 | struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); |
614 | unsigned int data_blocks = 0, ind_blocks = 0, rblocks; | 614 | unsigned int data_blocks = 0, ind_blocks = 0, rblocks; |
615 | unsigned requested = 0; | ||
615 | int alloc_required; | 616 | int alloc_required; |
616 | int error = 0; | 617 | int error = 0; |
617 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; | 618 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; |
@@ -641,7 +642,8 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, | |||
641 | if (error) | 642 | if (error) |
642 | goto out_unlock; | 643 | goto out_unlock; |
643 | 644 | ||
644 | error = gfs2_inplace_reserve(ip, data_blocks + ind_blocks); | 645 | requested = data_blocks + ind_blocks; |
646 | error = gfs2_inplace_reserve(ip, requested); | ||
645 | if (error) | 647 | if (error) |
646 | goto out_qunlock; | 648 | goto out_qunlock; |
647 | } | 649 | } |
@@ -654,7 +656,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, | |||
654 | if (&ip->i_inode == sdp->sd_rindex) | 656 | if (&ip->i_inode == sdp->sd_rindex) |
655 | rblocks += 2 * RES_STATFS; | 657 | rblocks += 2 * RES_STATFS; |
656 | if (alloc_required) | 658 | if (alloc_required) |
657 | rblocks += gfs2_rg_blocks(ip); | 659 | rblocks += gfs2_rg_blocks(ip, requested); |
658 | 660 | ||
659 | error = gfs2_trans_begin(sdp, rblocks, | 661 | error = gfs2_trans_begin(sdp, rblocks, |
660 | PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize); | 662 | PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize); |
@@ -868,8 +870,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, | |||
868 | brelse(dibh); | 870 | brelse(dibh); |
869 | failed: | 871 | failed: |
870 | gfs2_trans_end(sdp); | 872 | gfs2_trans_end(sdp); |
871 | if (gfs2_mb_reserved(ip)) | 873 | gfs2_inplace_release(ip); |
872 | gfs2_inplace_release(ip); | ||
873 | if (ip->i_res->rs_qa_qd_num) | 874 | if (ip->i_res->rs_qa_qd_num) |
874 | gfs2_quota_unlock(ip); | 875 | gfs2_quota_unlock(ip); |
875 | if (inode == sdp->sd_rindex) { | 876 | if (inode == sdp->sd_rindex) { |
@@ -1023,7 +1024,7 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, | |||
1023 | offset, nr_segs, gfs2_get_block_direct, | 1024 | offset, nr_segs, gfs2_get_block_direct, |
1024 | NULL, NULL, 0); | 1025 | NULL, NULL, 0); |
1025 | out: | 1026 | out: |
1026 | gfs2_glock_dq_m(1, &gh); | 1027 | gfs2_glock_dq(&gh); |
1027 | gfs2_holder_uninit(&gh); | 1028 | gfs2_holder_uninit(&gh); |
1028 | return rv; | 1029 | return rv; |
1029 | } | 1030 | } |
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 49cd7dd4a9fa..1fd3ae237bdd 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c | |||
@@ -786,7 +786,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, | |||
786 | goto out_rlist; | 786 | goto out_rlist; |
787 | 787 | ||
788 | if (gfs2_rs_active(ip->i_res)) /* needs to be done with the rgrp glock held */ | 788 | if (gfs2_rs_active(ip->i_res)) /* needs to be done with the rgrp glock held */ |
789 | gfs2_rs_deltree(ip->i_res); | 789 | gfs2_rs_deltree(ip, ip->i_res); |
790 | 790 | ||
791 | error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE + | 791 | error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE + |
792 | RES_INDIRECT + RES_STATFS + RES_QUOTA, | 792 | RES_INDIRECT + RES_STATFS + RES_QUOTA, |
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index d1d791ef38de..0def0504afc1 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c | |||
@@ -323,6 +323,29 @@ static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
323 | } | 323 | } |
324 | 324 | ||
325 | /** | 325 | /** |
326 | * gfs2_size_hint - Give a hint to the size of a write request | ||
327 | * @file: The struct file | ||
328 | * @offset: The file offset of the write | ||
329 | * @size: The length of the write | ||
330 | * | ||
331 | * When we are about to do a write, this function records the total | ||
332 | * write size in order to provide a suitable hint to the lower layers | ||
333 | * about how many blocks will be required. | ||
334 | * | ||
335 | */ | ||
336 | |||
337 | static void gfs2_size_hint(struct file *filep, loff_t offset, size_t size) | ||
338 | { | ||
339 | struct inode *inode = filep->f_dentry->d_inode; | ||
340 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
341 | struct gfs2_inode *ip = GFS2_I(inode); | ||
342 | size_t blks = (size + sdp->sd_sb.sb_bsize - 1) >> sdp->sd_sb.sb_bsize_shift; | ||
343 | int hint = min_t(size_t, INT_MAX, blks); | ||
344 | |||
345 | atomic_set(&ip->i_res->rs_sizehint, hint); | ||
346 | } | ||
347 | |||
348 | /** | ||
326 | * gfs2_allocate_page_backing - Use bmap to allocate blocks | 349 | * gfs2_allocate_page_backing - Use bmap to allocate blocks |
327 | * @page: The (locked) page to allocate backing for | 350 | * @page: The (locked) page to allocate backing for |
328 | * | 351 | * |
@@ -382,8 +405,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
382 | if (ret) | 405 | if (ret) |
383 | return ret; | 406 | return ret; |
384 | 407 | ||
385 | atomic_set(&ip->i_res->rs_sizehint, | 408 | gfs2_size_hint(vma->vm_file, pos, PAGE_CACHE_SIZE); |
386 | PAGE_CACHE_SIZE >> sdp->sd_sb.sb_bsize_shift); | ||
387 | 409 | ||
388 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); | 410 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); |
389 | ret = gfs2_glock_nq(&gh); | 411 | ret = gfs2_glock_nq(&gh); |
@@ -419,7 +441,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
419 | rblocks += data_blocks ? data_blocks : 1; | 441 | rblocks += data_blocks ? data_blocks : 1; |
420 | if (ind_blocks || data_blocks) { | 442 | if (ind_blocks || data_blocks) { |
421 | rblocks += RES_STATFS + RES_QUOTA; | 443 | rblocks += RES_STATFS + RES_QUOTA; |
422 | rblocks += gfs2_rg_blocks(ip); | 444 | rblocks += gfs2_rg_blocks(ip, data_blocks + ind_blocks); |
423 | } | 445 | } |
424 | ret = gfs2_trans_begin(sdp, rblocks, 0); | 446 | ret = gfs2_trans_begin(sdp, rblocks, 0); |
425 | if (ret) | 447 | if (ret) |
@@ -470,6 +492,7 @@ out: | |||
470 | static const struct vm_operations_struct gfs2_vm_ops = { | 492 | static const struct vm_operations_struct gfs2_vm_ops = { |
471 | .fault = filemap_fault, | 493 | .fault = filemap_fault, |
472 | .page_mkwrite = gfs2_page_mkwrite, | 494 | .page_mkwrite = gfs2_page_mkwrite, |
495 | .remap_pages = generic_file_remap_pages, | ||
473 | }; | 496 | }; |
474 | 497 | ||
475 | /** | 498 | /** |
@@ -504,7 +527,6 @@ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma) | |||
504 | return error; | 527 | return error; |
505 | } | 528 | } |
506 | vma->vm_ops = &gfs2_vm_ops; | 529 | vma->vm_ops = &gfs2_vm_ops; |
507 | vma->vm_flags |= VM_CAN_NONLINEAR; | ||
508 | 530 | ||
509 | return 0; | 531 | return 0; |
510 | } | 532 | } |
@@ -663,7 +685,8 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
663 | if (ret) | 685 | if (ret) |
664 | return ret; | 686 | return ret; |
665 | 687 | ||
666 | atomic_set(&ip->i_res->rs_sizehint, writesize >> sdp->sd_sb.sb_bsize_shift); | 688 | gfs2_size_hint(file, pos, writesize); |
689 | |||
667 | if (file->f_flags & O_APPEND) { | 690 | if (file->f_flags & O_APPEND) { |
668 | struct gfs2_holder gh; | 691 | struct gfs2_holder gh; |
669 | 692 | ||
@@ -789,7 +812,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, | |||
789 | if (unlikely(error)) | 812 | if (unlikely(error)) |
790 | goto out_uninit; | 813 | goto out_uninit; |
791 | 814 | ||
792 | atomic_set(&ip->i_res->rs_sizehint, len >> sdp->sd_sb.sb_bsize_shift); | 815 | gfs2_size_hint(file, offset, len); |
793 | 816 | ||
794 | while (len > 0) { | 817 | while (len > 0) { |
795 | if (len < bytes) | 818 | if (len < bytes) |
@@ -822,7 +845,7 @@ retry: | |||
822 | &max_bytes, &data_blocks, &ind_blocks); | 845 | &max_bytes, &data_blocks, &ind_blocks); |
823 | 846 | ||
824 | rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA + | 847 | rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA + |
825 | RES_RG_HDR + gfs2_rg_blocks(ip); | 848 | RES_RG_HDR + gfs2_rg_blocks(ip, data_blocks + ind_blocks); |
826 | if (gfs2_is_jdata(ip)) | 849 | if (gfs2_is_jdata(ip)) |
827 | rblocks += data_blocks ? data_blocks : 1; | 850 | rblocks += data_blocks ? data_blocks : 1; |
828 | 851 | ||
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 1ed81f40da0d..e6c2fd53cab2 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c | |||
@@ -186,20 +186,6 @@ static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl) | |||
186 | } | 186 | } |
187 | 187 | ||
188 | /** | 188 | /** |
189 | * __gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list | ||
190 | * @gl: the glock | ||
191 | * | ||
192 | * If the glock is demotable, then we add it (or move it) to the end | ||
193 | * of the glock LRU list. | ||
194 | */ | ||
195 | |||
196 | static void __gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl) | ||
197 | { | ||
198 | if (demote_ok(gl)) | ||
199 | gfs2_glock_add_to_lru(gl); | ||
200 | } | ||
201 | |||
202 | /** | ||
203 | * gfs2_glock_put_nolock() - Decrement reference count on glock | 189 | * gfs2_glock_put_nolock() - Decrement reference count on glock |
204 | * @gl: The glock to put | 190 | * @gl: The glock to put |
205 | * | 191 | * |
@@ -883,7 +869,14 @@ static int gfs2_glock_demote_wait(void *word) | |||
883 | return 0; | 869 | return 0; |
884 | } | 870 | } |
885 | 871 | ||
886 | static void wait_on_holder(struct gfs2_holder *gh) | 872 | /** |
873 | * gfs2_glock_wait - wait on a glock acquisition | ||
874 | * @gh: the glock holder | ||
875 | * | ||
876 | * Returns: 0 on success | ||
877 | */ | ||
878 | |||
879 | int gfs2_glock_wait(struct gfs2_holder *gh) | ||
887 | { | 880 | { |
888 | unsigned long time1 = jiffies; | 881 | unsigned long time1 = jiffies; |
889 | 882 | ||
@@ -894,12 +887,7 @@ static void wait_on_holder(struct gfs2_holder *gh) | |||
894 | gh->gh_gl->gl_hold_time = min(gh->gh_gl->gl_hold_time + | 887 | gh->gh_gl->gl_hold_time = min(gh->gh_gl->gl_hold_time + |
895 | GL_GLOCK_HOLD_INCR, | 888 | GL_GLOCK_HOLD_INCR, |
896 | GL_GLOCK_MAX_HOLD); | 889 | GL_GLOCK_MAX_HOLD); |
897 | } | 890 | return gh->gh_error; |
898 | |||
899 | static void wait_on_demote(struct gfs2_glock *gl) | ||
900 | { | ||
901 | might_sleep(); | ||
902 | wait_on_bit(&gl->gl_flags, GLF_DEMOTE, gfs2_glock_demote_wait, TASK_UNINTERRUPTIBLE); | ||
903 | } | 891 | } |
904 | 892 | ||
905 | /** | 893 | /** |
@@ -929,19 +917,6 @@ static void handle_callback(struct gfs2_glock *gl, unsigned int state, | |||
929 | trace_gfs2_demote_rq(gl); | 917 | trace_gfs2_demote_rq(gl); |
930 | } | 918 | } |
931 | 919 | ||
932 | /** | ||
933 | * gfs2_glock_wait - wait on a glock acquisition | ||
934 | * @gh: the glock holder | ||
935 | * | ||
936 | * Returns: 0 on success | ||
937 | */ | ||
938 | |||
939 | int gfs2_glock_wait(struct gfs2_holder *gh) | ||
940 | { | ||
941 | wait_on_holder(gh); | ||
942 | return gh->gh_error; | ||
943 | } | ||
944 | |||
945 | void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...) | 920 | void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...) |
946 | { | 921 | { |
947 | struct va_format vaf; | 922 | struct va_format vaf; |
@@ -979,7 +954,7 @@ __acquires(&gl->gl_spin) | |||
979 | struct gfs2_sbd *sdp = gl->gl_sbd; | 954 | struct gfs2_sbd *sdp = gl->gl_sbd; |
980 | struct list_head *insert_pt = NULL; | 955 | struct list_head *insert_pt = NULL; |
981 | struct gfs2_holder *gh2; | 956 | struct gfs2_holder *gh2; |
982 | int try_lock = 0; | 957 | int try_futile = 0; |
983 | 958 | ||
984 | BUG_ON(gh->gh_owner_pid == NULL); | 959 | BUG_ON(gh->gh_owner_pid == NULL); |
985 | if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags)) | 960 | if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags)) |
@@ -987,7 +962,7 @@ __acquires(&gl->gl_spin) | |||
987 | 962 | ||
988 | if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) { | 963 | if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) { |
989 | if (test_bit(GLF_LOCK, &gl->gl_flags)) | 964 | if (test_bit(GLF_LOCK, &gl->gl_flags)) |
990 | try_lock = 1; | 965 | try_futile = !may_grant(gl, gh); |
991 | if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) | 966 | if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) |
992 | goto fail; | 967 | goto fail; |
993 | } | 968 | } |
@@ -996,9 +971,8 @@ __acquires(&gl->gl_spin) | |||
996 | if (unlikely(gh2->gh_owner_pid == gh->gh_owner_pid && | 971 | if (unlikely(gh2->gh_owner_pid == gh->gh_owner_pid && |
997 | (gh->gh_gl->gl_ops->go_type != LM_TYPE_FLOCK))) | 972 | (gh->gh_gl->gl_ops->go_type != LM_TYPE_FLOCK))) |
998 | goto trap_recursive; | 973 | goto trap_recursive; |
999 | if (try_lock && | 974 | if (try_futile && |
1000 | !(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) && | 975 | !(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) { |
1001 | !may_grant(gl, gh)) { | ||
1002 | fail: | 976 | fail: |
1003 | gh->gh_error = GLR_TRYFAILED; | 977 | gh->gh_error = GLR_TRYFAILED; |
1004 | gfs2_holder_wake(gh); | 978 | gfs2_holder_wake(gh); |
@@ -1121,8 +1095,9 @@ void gfs2_glock_dq(struct gfs2_holder *gh) | |||
1121 | !test_bit(GLF_DEMOTE, &gl->gl_flags)) | 1095 | !test_bit(GLF_DEMOTE, &gl->gl_flags)) |
1122 | fast_path = 1; | 1096 | fast_path = 1; |
1123 | } | 1097 | } |
1124 | if (!test_bit(GLF_LFLUSH, &gl->gl_flags)) | 1098 | if (!test_bit(GLF_LFLUSH, &gl->gl_flags) && demote_ok(gl)) |
1125 | __gfs2_glock_schedule_for_reclaim(gl); | 1099 | gfs2_glock_add_to_lru(gl); |
1100 | |||
1126 | trace_gfs2_glock_queue(gh, 0); | 1101 | trace_gfs2_glock_queue(gh, 0); |
1127 | spin_unlock(&gl->gl_spin); | 1102 | spin_unlock(&gl->gl_spin); |
1128 | if (likely(fast_path)) | 1103 | if (likely(fast_path)) |
@@ -1141,7 +1116,8 @@ void gfs2_glock_dq_wait(struct gfs2_holder *gh) | |||
1141 | { | 1116 | { |
1142 | struct gfs2_glock *gl = gh->gh_gl; | 1117 | struct gfs2_glock *gl = gh->gh_gl; |
1143 | gfs2_glock_dq(gh); | 1118 | gfs2_glock_dq(gh); |
1144 | wait_on_demote(gl); | 1119 | might_sleep(); |
1120 | wait_on_bit(&gl->gl_flags, GLF_DEMOTE, gfs2_glock_demote_wait, TASK_UNINTERRUPTIBLE); | ||
1145 | } | 1121 | } |
1146 | 1122 | ||
1147 | /** | 1123 | /** |
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 4bdcf3784187..32cc4fde975c 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c | |||
@@ -94,6 +94,7 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl) | |||
94 | /* A shortened, inline version of gfs2_trans_begin() */ | 94 | /* A shortened, inline version of gfs2_trans_begin() */ |
95 | tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64)); | 95 | tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64)); |
96 | tr.tr_ip = (unsigned long)__builtin_return_address(0); | 96 | tr.tr_ip = (unsigned long)__builtin_return_address(0); |
97 | sb_start_intwrite(sdp->sd_vfs); | ||
97 | gfs2_log_reserve(sdp, tr.tr_reserved); | 98 | gfs2_log_reserve(sdp, tr.tr_reserved); |
98 | BUG_ON(current->journal_info); | 99 | BUG_ON(current->journal_info); |
99 | current->journal_info = &tr; | 100 | current->journal_info = &tr; |
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index aaecc8085fc5..3d469d37345e 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h | |||
@@ -99,9 +99,26 @@ struct gfs2_rgrpd { | |||
99 | #define GFS2_RDF_MASK 0xf0000000 /* mask for internal flags */ | 99 | #define GFS2_RDF_MASK 0xf0000000 /* mask for internal flags */ |
100 | spinlock_t rd_rsspin; /* protects reservation related vars */ | 100 | spinlock_t rd_rsspin; /* protects reservation related vars */ |
101 | struct rb_root rd_rstree; /* multi-block reservation tree */ | 101 | struct rb_root rd_rstree; /* multi-block reservation tree */ |
102 | u32 rd_rs_cnt; /* count of current reservations */ | ||
103 | }; | 102 | }; |
104 | 103 | ||
104 | struct gfs2_rbm { | ||
105 | struct gfs2_rgrpd *rgd; | ||
106 | struct gfs2_bitmap *bi; /* Bitmap must belong to the rgd */ | ||
107 | u32 offset; /* The offset is bitmap relative */ | ||
108 | }; | ||
109 | |||
110 | static inline u64 gfs2_rbm_to_block(const struct gfs2_rbm *rbm) | ||
111 | { | ||
112 | return rbm->rgd->rd_data0 + (rbm->bi->bi_start * GFS2_NBBY) + rbm->offset; | ||
113 | } | ||
114 | |||
115 | static inline bool gfs2_rbm_eq(const struct gfs2_rbm *rbm1, | ||
116 | const struct gfs2_rbm *rbm2) | ||
117 | { | ||
118 | return (rbm1->rgd == rbm2->rgd) && (rbm1->bi == rbm2->bi) && | ||
119 | (rbm1->offset == rbm2->offset); | ||
120 | } | ||
121 | |||
105 | enum gfs2_state_bits { | 122 | enum gfs2_state_bits { |
106 | BH_Pinned = BH_PrivateStart, | 123 | BH_Pinned = BH_PrivateStart, |
107 | BH_Escaped = BH_PrivateStart + 1, | 124 | BH_Escaped = BH_PrivateStart + 1, |
@@ -250,18 +267,11 @@ struct gfs2_blkreserv { | |||
250 | /* components used during write (step 1): */ | 267 | /* components used during write (step 1): */ |
251 | atomic_t rs_sizehint; /* hint of the write size */ | 268 | atomic_t rs_sizehint; /* hint of the write size */ |
252 | 269 | ||
253 | /* components used during inplace_reserve (step 2): */ | ||
254 | u32 rs_requested; /* Filled in by caller of gfs2_inplace_reserve() */ | ||
255 | |||
256 | /* components used during get_local_rgrp (step 3): */ | ||
257 | struct gfs2_rgrpd *rs_rgd; /* pointer to the gfs2_rgrpd */ | ||
258 | struct gfs2_holder rs_rgd_gh; /* Filled in by get_local_rgrp */ | 270 | struct gfs2_holder rs_rgd_gh; /* Filled in by get_local_rgrp */ |
259 | struct rb_node rs_node; /* link to other block reservations */ | 271 | struct rb_node rs_node; /* link to other block reservations */ |
260 | 272 | struct gfs2_rbm rs_rbm; /* Start of reservation */ | |
261 | /* components used during block searches and assignments (step 4): */ | ||
262 | struct gfs2_bitmap *rs_bi; /* bitmap for the current allocation */ | ||
263 | u32 rs_biblk; /* start block relative to the bi */ | ||
264 | u32 rs_free; /* how many blocks are still free */ | 273 | u32 rs_free; /* how many blocks are still free */ |
274 | u64 rs_inum; /* Inode number for reservation */ | ||
265 | 275 | ||
266 | /* ancillary quota stuff */ | 276 | /* ancillary quota stuff */ |
267 | struct gfs2_quota_data *rs_qa_qd[2 * MAXQUOTAS]; | 277 | struct gfs2_quota_data *rs_qa_qd[2 * MAXQUOTAS]; |
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 4ce22e547308..381893ceefa4 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c | |||
@@ -712,14 +712,9 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, | |||
712 | if (error) | 712 | if (error) |
713 | goto fail_gunlock2; | 713 | goto fail_gunlock2; |
714 | 714 | ||
715 | /* The newly created inode needs a reservation so it can allocate | 715 | error = gfs2_rs_alloc(ip); |
716 | xattrs. At the same time, we want new blocks allocated to the new | 716 | if (error) |
717 | dinode to be as contiguous as possible. Since we allocated the | 717 | goto fail_gunlock2; |
718 | dinode block under the directory's reservation, we transfer | ||
719 | ownership of that reservation to the new inode. The directory | ||
720 | doesn't need a reservation unless it needs a new allocation. */ | ||
721 | ip->i_res = dip->i_res; | ||
722 | dip->i_res = NULL; | ||
723 | 718 | ||
724 | error = gfs2_acl_create(dip, inode); | 719 | error = gfs2_acl_create(dip, inode); |
725 | if (error) | 720 | if (error) |
@@ -737,10 +732,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, | |||
737 | brelse(bh); | 732 | brelse(bh); |
738 | 733 | ||
739 | gfs2_trans_end(sdp); | 734 | gfs2_trans_end(sdp); |
740 | /* Check if we reserved space in the rgrp. Function link_dinode may | 735 | gfs2_inplace_release(dip); |
741 | not, depending on whether alloc is required. */ | ||
742 | if (gfs2_mb_reserved(dip)) | ||
743 | gfs2_inplace_release(dip); | ||
744 | gfs2_quota_unlock(dip); | 736 | gfs2_quota_unlock(dip); |
745 | mark_inode_dirty(inode); | 737 | mark_inode_dirty(inode); |
746 | gfs2_glock_dq_uninit_m(2, ghs); | 738 | gfs2_glock_dq_uninit_m(2, ghs); |
@@ -897,7 +889,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, | |||
897 | goto out_gunlock_q; | 889 | goto out_gunlock_q; |
898 | 890 | ||
899 | error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + | 891 | error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + |
900 | gfs2_rg_blocks(dip) + | 892 | gfs2_rg_blocks(dip, sdp->sd_max_dirres) + |
901 | 2 * RES_DINODE + RES_STATFS + | 893 | 2 * RES_DINODE + RES_STATFS + |
902 | RES_QUOTA, 0); | 894 | RES_QUOTA, 0); |
903 | if (error) | 895 | if (error) |
@@ -1378,7 +1370,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
1378 | goto out_gunlock_q; | 1370 | goto out_gunlock_q; |
1379 | 1371 | ||
1380 | error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + | 1372 | error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + |
1381 | gfs2_rg_blocks(ndip) + | 1373 | gfs2_rg_blocks(ndip, sdp->sd_max_dirres) + |
1382 | 4 * RES_DINODE + 4 * RES_LEAF + | 1374 | 4 * RES_DINODE + 4 * RES_LEAF + |
1383 | RES_STATFS + RES_QUOTA + 4, 0); | 1375 | RES_STATFS + RES_QUOTA + 4, 0); |
1384 | if (error) | 1376 | if (error) |
@@ -1722,7 +1714,9 @@ static int gfs2_setxattr(struct dentry *dentry, const char *name, | |||
1722 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); | 1714 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); |
1723 | ret = gfs2_glock_nq(&gh); | 1715 | ret = gfs2_glock_nq(&gh); |
1724 | if (ret == 0) { | 1716 | if (ret == 0) { |
1725 | ret = generic_setxattr(dentry, name, data, size, flags); | 1717 | ret = gfs2_rs_alloc(ip); |
1718 | if (ret == 0) | ||
1719 | ret = generic_setxattr(dentry, name, data, size, flags); | ||
1726 | gfs2_glock_dq(&gh); | 1720 | gfs2_glock_dq(&gh); |
1727 | } | 1721 | } |
1728 | gfs2_holder_uninit(&gh); | 1722 | gfs2_holder_uninit(&gh); |
@@ -1757,7 +1751,9 @@ static int gfs2_removexattr(struct dentry *dentry, const char *name) | |||
1757 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); | 1751 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); |
1758 | ret = gfs2_glock_nq(&gh); | 1752 | ret = gfs2_glock_nq(&gh); |
1759 | if (ret == 0) { | 1753 | if (ret == 0) { |
1760 | ret = generic_removexattr(dentry, name); | 1754 | ret = gfs2_rs_alloc(ip); |
1755 | if (ret == 0) | ||
1756 | ret = generic_removexattr(dentry, name); | ||
1761 | gfs2_glock_dq(&gh); | 1757 | gfs2_glock_dq(&gh); |
1762 | } | 1758 | } |
1763 | gfs2_holder_uninit(&gh); | 1759 | gfs2_holder_uninit(&gh); |
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 4a38db739ca0..0fb6539b0c8c 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c | |||
@@ -1289,7 +1289,7 @@ static void gdlm_unmount(struct gfs2_sbd *sdp) | |||
1289 | spin_lock(&ls->ls_recover_spin); | 1289 | spin_lock(&ls->ls_recover_spin); |
1290 | set_bit(DFL_UNMOUNT, &ls->ls_recover_flags); | 1290 | set_bit(DFL_UNMOUNT, &ls->ls_recover_flags); |
1291 | spin_unlock(&ls->ls_recover_spin); | 1291 | spin_unlock(&ls->ls_recover_spin); |
1292 | flush_delayed_work_sync(&sdp->sd_control_work); | 1292 | flush_delayed_work(&sdp->sd_control_work); |
1293 | 1293 | ||
1294 | /* mounted_lock and control_lock will be purged in dlm recovery */ | 1294 | /* mounted_lock and control_lock will be purged in dlm recovery */ |
1295 | release: | 1295 | release: |
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index e5af9dc420ef..e443966c8106 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <linux/mount.h> | 19 | #include <linux/mount.h> |
20 | #include <linux/gfs2_ondisk.h> | 20 | #include <linux/gfs2_ondisk.h> |
21 | #include <linux/quotaops.h> | 21 | #include <linux/quotaops.h> |
22 | #include <linux/lockdep.h> | ||
22 | 23 | ||
23 | #include "gfs2.h" | 24 | #include "gfs2.h" |
24 | #include "incore.h" | 25 | #include "incore.h" |
@@ -766,6 +767,7 @@ fail: | |||
766 | return error; | 767 | return error; |
767 | } | 768 | } |
768 | 769 | ||
770 | static struct lock_class_key gfs2_quota_imutex_key; | ||
769 | 771 | ||
770 | static int init_inodes(struct gfs2_sbd *sdp, int undo) | 772 | static int init_inodes(struct gfs2_sbd *sdp, int undo) |
771 | { | 773 | { |
@@ -803,6 +805,12 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo) | |||
803 | fs_err(sdp, "can't get quota file inode: %d\n", error); | 805 | fs_err(sdp, "can't get quota file inode: %d\n", error); |
804 | goto fail_rindex; | 806 | goto fail_rindex; |
805 | } | 807 | } |
808 | /* | ||
809 | * i_mutex on quota files is special. Since this inode is hidden system | ||
810 | * file, we are safe to define locking ourselves. | ||
811 | */ | ||
812 | lockdep_set_class(&sdp->sd_quota_inode->i_mutex, | ||
813 | &gfs2_quota_imutex_key); | ||
806 | 814 | ||
807 | error = gfs2_rindex_update(sdp); | 815 | error = gfs2_rindex_update(sdp); |
808 | if (error) | 816 | if (error) |
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index a3bde91645c2..40c4b0d42fa8 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c | |||
@@ -765,6 +765,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) | |||
765 | struct gfs2_holder *ghs, i_gh; | 765 | struct gfs2_holder *ghs, i_gh; |
766 | unsigned int qx, x; | 766 | unsigned int qx, x; |
767 | struct gfs2_quota_data *qd; | 767 | struct gfs2_quota_data *qd; |
768 | unsigned reserved; | ||
768 | loff_t offset; | 769 | loff_t offset; |
769 | unsigned int nalloc = 0, blocks; | 770 | unsigned int nalloc = 0, blocks; |
770 | int error; | 771 | int error; |
@@ -781,7 +782,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) | |||
781 | return -ENOMEM; | 782 | return -ENOMEM; |
782 | 783 | ||
783 | sort(qda, num_qd, sizeof(struct gfs2_quota_data *), sort_qd, NULL); | 784 | sort(qda, num_qd, sizeof(struct gfs2_quota_data *), sort_qd, NULL); |
784 | mutex_lock_nested(&ip->i_inode.i_mutex, I_MUTEX_QUOTA); | 785 | mutex_lock(&ip->i_inode.i_mutex); |
785 | for (qx = 0; qx < num_qd; qx++) { | 786 | for (qx = 0; qx < num_qd; qx++) { |
786 | error = gfs2_glock_nq_init(qda[qx]->qd_gl, LM_ST_EXCLUSIVE, | 787 | error = gfs2_glock_nq_init(qda[qx]->qd_gl, LM_ST_EXCLUSIVE, |
787 | GL_NOCACHE, &ghs[qx]); | 788 | GL_NOCACHE, &ghs[qx]); |
@@ -811,13 +812,13 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) | |||
811 | * two blocks need to be updated instead of 1 */ | 812 | * two blocks need to be updated instead of 1 */ |
812 | blocks = num_qd * data_blocks + RES_DINODE + num_qd + 3; | 813 | blocks = num_qd * data_blocks + RES_DINODE + num_qd + 3; |
813 | 814 | ||
814 | error = gfs2_inplace_reserve(ip, 1 + | 815 | reserved = 1 + (nalloc * (data_blocks + ind_blocks)); |
815 | (nalloc * (data_blocks + ind_blocks))); | 816 | error = gfs2_inplace_reserve(ip, reserved); |
816 | if (error) | 817 | if (error) |
817 | goto out_alloc; | 818 | goto out_alloc; |
818 | 819 | ||
819 | if (nalloc) | 820 | if (nalloc) |
820 | blocks += gfs2_rg_blocks(ip) + nalloc * ind_blocks + RES_STATFS; | 821 | blocks += gfs2_rg_blocks(ip, reserved) + nalloc * ind_blocks + RES_STATFS; |
821 | 822 | ||
822 | error = gfs2_trans_begin(sdp, blocks, 0); | 823 | error = gfs2_trans_begin(sdp, blocks, 0); |
823 | if (error) | 824 | if (error) |
@@ -1070,8 +1071,10 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid) | |||
1070 | 1071 | ||
1071 | if (be64_to_cpu(qd->qd_qb.qb_limit) && (s64)be64_to_cpu(qd->qd_qb.qb_limit) < value) { | 1072 | if (be64_to_cpu(qd->qd_qb.qb_limit) && (s64)be64_to_cpu(qd->qd_qb.qb_limit) < value) { |
1072 | print_message(qd, "exceeded"); | 1073 | print_message(qd, "exceeded"); |
1073 | quota_send_warning(test_bit(QDF_USER, &qd->qd_flags) ? | 1074 | quota_send_warning(make_kqid(&init_user_ns, |
1074 | USRQUOTA : GRPQUOTA, qd->qd_id, | 1075 | test_bit(QDF_USER, &qd->qd_flags) ? |
1076 | USRQUOTA : GRPQUOTA, | ||
1077 | qd->qd_id), | ||
1075 | sdp->sd_vfs->s_dev, QUOTA_NL_BHARDWARN); | 1078 | sdp->sd_vfs->s_dev, QUOTA_NL_BHARDWARN); |
1076 | 1079 | ||
1077 | error = -EDQUOT; | 1080 | error = -EDQUOT; |
@@ -1081,8 +1084,10 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid) | |||
1081 | time_after_eq(jiffies, qd->qd_last_warn + | 1084 | time_after_eq(jiffies, qd->qd_last_warn + |
1082 | gfs2_tune_get(sdp, | 1085 | gfs2_tune_get(sdp, |
1083 | gt_quota_warn_period) * HZ)) { | 1086 | gt_quota_warn_period) * HZ)) { |
1084 | quota_send_warning(test_bit(QDF_USER, &qd->qd_flags) ? | 1087 | quota_send_warning(make_kqid(&init_user_ns, |
1085 | USRQUOTA : GRPQUOTA, qd->qd_id, | 1088 | test_bit(QDF_USER, &qd->qd_flags) ? |
1089 | USRQUOTA : GRPQUOTA, | ||
1090 | qd->qd_id), | ||
1086 | sdp->sd_vfs->s_dev, QUOTA_NL_BSOFTWARN); | 1091 | sdp->sd_vfs->s_dev, QUOTA_NL_BSOFTWARN); |
1087 | error = print_message(qd, "warning"); | 1092 | error = print_message(qd, "warning"); |
1088 | qd->qd_last_warn = jiffies; | 1093 | qd->qd_last_warn = jiffies; |
@@ -1469,7 +1474,7 @@ static int gfs2_quota_get_xstate(struct super_block *sb, | |||
1469 | return 0; | 1474 | return 0; |
1470 | } | 1475 | } |
1471 | 1476 | ||
1472 | static int gfs2_get_dqblk(struct super_block *sb, int type, qid_t id, | 1477 | static int gfs2_get_dqblk(struct super_block *sb, struct kqid qid, |
1473 | struct fs_disk_quota *fdq) | 1478 | struct fs_disk_quota *fdq) |
1474 | { | 1479 | { |
1475 | struct gfs2_sbd *sdp = sb->s_fs_info; | 1480 | struct gfs2_sbd *sdp = sb->s_fs_info; |
@@ -1477,20 +1482,21 @@ static int gfs2_get_dqblk(struct super_block *sb, int type, qid_t id, | |||
1477 | struct gfs2_quota_data *qd; | 1482 | struct gfs2_quota_data *qd; |
1478 | struct gfs2_holder q_gh; | 1483 | struct gfs2_holder q_gh; |
1479 | int error; | 1484 | int error; |
1485 | int type; | ||
1480 | 1486 | ||
1481 | memset(fdq, 0, sizeof(struct fs_disk_quota)); | 1487 | memset(fdq, 0, sizeof(struct fs_disk_quota)); |
1482 | 1488 | ||
1483 | if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) | 1489 | if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) |
1484 | return -ESRCH; /* Crazy XFS error code */ | 1490 | return -ESRCH; /* Crazy XFS error code */ |
1485 | 1491 | ||
1486 | if (type == USRQUOTA) | 1492 | if (qid.type == USRQUOTA) |
1487 | type = QUOTA_USER; | 1493 | type = QUOTA_USER; |
1488 | else if (type == GRPQUOTA) | 1494 | else if (qid.type == GRPQUOTA) |
1489 | type = QUOTA_GROUP; | 1495 | type = QUOTA_GROUP; |
1490 | else | 1496 | else |
1491 | return -EINVAL; | 1497 | return -EINVAL; |
1492 | 1498 | ||
1493 | error = qd_get(sdp, type, id, &qd); | 1499 | error = qd_get(sdp, type, from_kqid(&init_user_ns, qid), &qd); |
1494 | if (error) | 1500 | if (error) |
1495 | return error; | 1501 | return error; |
1496 | error = do_glock(qd, FORCE, &q_gh); | 1502 | error = do_glock(qd, FORCE, &q_gh); |
@@ -1500,7 +1506,7 @@ static int gfs2_get_dqblk(struct super_block *sb, int type, qid_t id, | |||
1500 | qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb; | 1506 | qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb; |
1501 | fdq->d_version = FS_DQUOT_VERSION; | 1507 | fdq->d_version = FS_DQUOT_VERSION; |
1502 | fdq->d_flags = (type == QUOTA_USER) ? FS_USER_QUOTA : FS_GROUP_QUOTA; | 1508 | fdq->d_flags = (type == QUOTA_USER) ? FS_USER_QUOTA : FS_GROUP_QUOTA; |
1503 | fdq->d_id = id; | 1509 | fdq->d_id = from_kqid(&init_user_ns, qid); |
1504 | fdq->d_blk_hardlimit = be64_to_cpu(qlvb->qb_limit) << sdp->sd_fsb2bb_shift; | 1510 | fdq->d_blk_hardlimit = be64_to_cpu(qlvb->qb_limit) << sdp->sd_fsb2bb_shift; |
1505 | fdq->d_blk_softlimit = be64_to_cpu(qlvb->qb_warn) << sdp->sd_fsb2bb_shift; | 1511 | fdq->d_blk_softlimit = be64_to_cpu(qlvb->qb_warn) << sdp->sd_fsb2bb_shift; |
1506 | fdq->d_bcount = be64_to_cpu(qlvb->qb_value) << sdp->sd_fsb2bb_shift; | 1512 | fdq->d_bcount = be64_to_cpu(qlvb->qb_value) << sdp->sd_fsb2bb_shift; |
@@ -1514,7 +1520,7 @@ out: | |||
1514 | /* GFS2 only supports a subset of the XFS fields */ | 1520 | /* GFS2 only supports a subset of the XFS fields */ |
1515 | #define GFS2_FIELDMASK (FS_DQ_BSOFT|FS_DQ_BHARD|FS_DQ_BCOUNT) | 1521 | #define GFS2_FIELDMASK (FS_DQ_BSOFT|FS_DQ_BHARD|FS_DQ_BCOUNT) |
1516 | 1522 | ||
1517 | static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id, | 1523 | static int gfs2_set_dqblk(struct super_block *sb, struct kqid qid, |
1518 | struct fs_disk_quota *fdq) | 1524 | struct fs_disk_quota *fdq) |
1519 | { | 1525 | { |
1520 | struct gfs2_sbd *sdp = sb->s_fs_info; | 1526 | struct gfs2_sbd *sdp = sb->s_fs_info; |
@@ -1526,11 +1532,12 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id, | |||
1526 | int alloc_required; | 1532 | int alloc_required; |
1527 | loff_t offset; | 1533 | loff_t offset; |
1528 | int error; | 1534 | int error; |
1535 | int type; | ||
1529 | 1536 | ||
1530 | if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) | 1537 | if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) |
1531 | return -ESRCH; /* Crazy XFS error code */ | 1538 | return -ESRCH; /* Crazy XFS error code */ |
1532 | 1539 | ||
1533 | switch(type) { | 1540 | switch(qid.type) { |
1534 | case USRQUOTA: | 1541 | case USRQUOTA: |
1535 | type = QUOTA_USER; | 1542 | type = QUOTA_USER; |
1536 | if (fdq->d_flags != FS_USER_QUOTA) | 1543 | if (fdq->d_flags != FS_USER_QUOTA) |
@@ -1547,10 +1554,10 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id, | |||
1547 | 1554 | ||
1548 | if (fdq->d_fieldmask & ~GFS2_FIELDMASK) | 1555 | if (fdq->d_fieldmask & ~GFS2_FIELDMASK) |
1549 | return -EINVAL; | 1556 | return -EINVAL; |
1550 | if (fdq->d_id != id) | 1557 | if (fdq->d_id != from_kqid(&init_user_ns, qid)) |
1551 | return -EINVAL; | 1558 | return -EINVAL; |
1552 | 1559 | ||
1553 | error = qd_get(sdp, type, id, &qd); | 1560 | error = qd_get(sdp, type, from_kqid(&init_user_ns, qid), &qd); |
1554 | if (error) | 1561 | if (error) |
1555 | return error; | 1562 | return error; |
1556 | 1563 | ||
@@ -1598,7 +1605,7 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id, | |||
1598 | error = gfs2_inplace_reserve(ip, blocks); | 1605 | error = gfs2_inplace_reserve(ip, blocks); |
1599 | if (error) | 1606 | if (error) |
1600 | goto out_i; | 1607 | goto out_i; |
1601 | blocks += gfs2_rg_blocks(ip); | 1608 | blocks += gfs2_rg_blocks(ip, blocks); |
1602 | } | 1609 | } |
1603 | 1610 | ||
1604 | /* Some quotas span block boundaries and can update two blocks, | 1611 | /* Some quotas span block boundaries and can update two blocks, |
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 4d34887a601d..3cc402ce6fea 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c | |||
@@ -35,9 +35,6 @@ | |||
35 | #define BFITNOENT ((u32)~0) | 35 | #define BFITNOENT ((u32)~0) |
36 | #define NO_BLOCK ((u64)~0) | 36 | #define NO_BLOCK ((u64)~0) |
37 | 37 | ||
38 | #define RSRV_CONTENTION_FACTOR 4 | ||
39 | #define RGRP_RSRV_MAX_CONTENDERS 2 | ||
40 | |||
41 | #if BITS_PER_LONG == 32 | 38 | #if BITS_PER_LONG == 32 |
42 | #define LBITMASK (0x55555555UL) | 39 | #define LBITMASK (0x55555555UL) |
43 | #define LBITSKIP55 (0x55555555UL) | 40 | #define LBITSKIP55 (0x55555555UL) |
@@ -67,53 +64,48 @@ static const char valid_change[16] = { | |||
67 | 1, 0, 0, 0 | 64 | 1, 0, 0, 0 |
68 | }; | 65 | }; |
69 | 66 | ||
70 | static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, | 67 | static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext, |
71 | unsigned char old_state, | 68 | const struct gfs2_inode *ip, bool nowrap); |
72 | struct gfs2_bitmap **rbi); | 69 | |
73 | 70 | ||
74 | /** | 71 | /** |
75 | * gfs2_setbit - Set a bit in the bitmaps | 72 | * gfs2_setbit - Set a bit in the bitmaps |
76 | * @rgd: the resource group descriptor | 73 | * @rbm: The position of the bit to set |
77 | * @buf2: the clone buffer that holds the bitmaps | 74 | * @do_clone: Also set the clone bitmap, if it exists |
78 | * @bi: the bitmap structure | ||
79 | * @block: the block to set | ||
80 | * @new_state: the new state of the block | 75 | * @new_state: the new state of the block |
81 | * | 76 | * |
82 | */ | 77 | */ |
83 | 78 | ||
84 | static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf2, | 79 | static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone, |
85 | struct gfs2_bitmap *bi, u32 block, | ||
86 | unsigned char new_state) | 80 | unsigned char new_state) |
87 | { | 81 | { |
88 | unsigned char *byte1, *byte2, *end, cur_state; | 82 | unsigned char *byte1, *byte2, *end, cur_state; |
89 | unsigned int buflen = bi->bi_len; | 83 | unsigned int buflen = rbm->bi->bi_len; |
90 | const unsigned int bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE; | 84 | const unsigned int bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE; |
91 | 85 | ||
92 | byte1 = bi->bi_bh->b_data + bi->bi_offset + (block / GFS2_NBBY); | 86 | byte1 = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset + (rbm->offset / GFS2_NBBY); |
93 | end = bi->bi_bh->b_data + bi->bi_offset + buflen; | 87 | end = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset + buflen; |
94 | 88 | ||
95 | BUG_ON(byte1 >= end); | 89 | BUG_ON(byte1 >= end); |
96 | 90 | ||
97 | cur_state = (*byte1 >> bit) & GFS2_BIT_MASK; | 91 | cur_state = (*byte1 >> bit) & GFS2_BIT_MASK; |
98 | 92 | ||
99 | if (unlikely(!valid_change[new_state * 4 + cur_state])) { | 93 | if (unlikely(!valid_change[new_state * 4 + cur_state])) { |
100 | printk(KERN_WARNING "GFS2: buf_blk = 0x%llx old_state=%d, " | 94 | printk(KERN_WARNING "GFS2: buf_blk = 0x%x old_state=%d, " |
101 | "new_state=%d\n", | 95 | "new_state=%d\n", rbm->offset, cur_state, new_state); |
102 | (unsigned long long)block, cur_state, new_state); | 96 | printk(KERN_WARNING "GFS2: rgrp=0x%llx bi_start=0x%x\n", |
103 | printk(KERN_WARNING "GFS2: rgrp=0x%llx bi_start=0x%lx\n", | 97 | (unsigned long long)rbm->rgd->rd_addr, |
104 | (unsigned long long)rgd->rd_addr, | 98 | rbm->bi->bi_start); |
105 | (unsigned long)bi->bi_start); | 99 | printk(KERN_WARNING "GFS2: bi_offset=0x%x bi_len=0x%x\n", |
106 | printk(KERN_WARNING "GFS2: bi_offset=0x%lx bi_len=0x%lx\n", | 100 | rbm->bi->bi_offset, rbm->bi->bi_len); |
107 | (unsigned long)bi->bi_offset, | ||
108 | (unsigned long)bi->bi_len); | ||
109 | dump_stack(); | 101 | dump_stack(); |
110 | gfs2_consist_rgrpd(rgd); | 102 | gfs2_consist_rgrpd(rbm->rgd); |
111 | return; | 103 | return; |
112 | } | 104 | } |
113 | *byte1 ^= (cur_state ^ new_state) << bit; | 105 | *byte1 ^= (cur_state ^ new_state) << bit; |
114 | 106 | ||
115 | if (buf2) { | 107 | if (do_clone && rbm->bi->bi_clone) { |
116 | byte2 = buf2 + bi->bi_offset + (block / GFS2_NBBY); | 108 | byte2 = rbm->bi->bi_clone + rbm->bi->bi_offset + (rbm->offset / GFS2_NBBY); |
117 | cur_state = (*byte2 >> bit) & GFS2_BIT_MASK; | 109 | cur_state = (*byte2 >> bit) & GFS2_BIT_MASK; |
118 | *byte2 ^= (cur_state ^ new_state) << bit; | 110 | *byte2 ^= (cur_state ^ new_state) << bit; |
119 | } | 111 | } |
@@ -121,30 +113,21 @@ static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf2, | |||
121 | 113 | ||
122 | /** | 114 | /** |
123 | * gfs2_testbit - test a bit in the bitmaps | 115 | * gfs2_testbit - test a bit in the bitmaps |
124 | * @rgd: the resource group descriptor | 116 | * @rbm: The bit to test |
125 | * @buffer: the buffer that holds the bitmaps | ||
126 | * @buflen: the length (in bytes) of the buffer | ||
127 | * @block: the block to read | ||
128 | * | 117 | * |
118 | * Returns: The two bit block state of the requested bit | ||
129 | */ | 119 | */ |
130 | 120 | ||
131 | static inline unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd, | 121 | static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm) |
132 | const unsigned char *buffer, | ||
133 | unsigned int buflen, u32 block) | ||
134 | { | 122 | { |
135 | const unsigned char *byte, *end; | 123 | const u8 *buffer = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset; |
136 | unsigned char cur_state; | 124 | const u8 *byte; |
137 | unsigned int bit; | 125 | unsigned int bit; |
138 | 126 | ||
139 | byte = buffer + (block / GFS2_NBBY); | 127 | byte = buffer + (rbm->offset / GFS2_NBBY); |
140 | bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE; | 128 | bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE; |
141 | end = buffer + buflen; | ||
142 | |||
143 | gfs2_assert(rgd->rd_sbd, byte < end); | ||
144 | 129 | ||
145 | cur_state = (*byte >> bit) & GFS2_BIT_MASK; | 130 | return (*byte >> bit) & GFS2_BIT_MASK; |
146 | |||
147 | return cur_state; | ||
148 | } | 131 | } |
149 | 132 | ||
150 | /** | 133 | /** |
@@ -192,7 +175,7 @@ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state) | |||
192 | */ | 175 | */ |
193 | static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs) | 176 | static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs) |
194 | { | 177 | { |
195 | u64 startblk = gfs2_rs_startblk(rs); | 178 | u64 startblk = gfs2_rbm_to_block(&rs->rs_rbm); |
196 | 179 | ||
197 | if (blk >= startblk + rs->rs_free) | 180 | if (blk >= startblk + rs->rs_free) |
198 | return 1; | 181 | return 1; |
@@ -202,36 +185,6 @@ static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs) | |||
202 | } | 185 | } |
203 | 186 | ||
204 | /** | 187 | /** |
205 | * rs_find - Find a rgrp multi-block reservation that contains a given block | ||
206 | * @rgd: The rgrp | ||
207 | * @rgblk: The block we're looking for, relative to the rgrp | ||
208 | */ | ||
209 | static struct gfs2_blkreserv *rs_find(struct gfs2_rgrpd *rgd, u32 rgblk) | ||
210 | { | ||
211 | struct rb_node **newn; | ||
212 | int rc; | ||
213 | u64 fsblk = rgblk + rgd->rd_data0; | ||
214 | |||
215 | spin_lock(&rgd->rd_rsspin); | ||
216 | newn = &rgd->rd_rstree.rb_node; | ||
217 | while (*newn) { | ||
218 | struct gfs2_blkreserv *cur = | ||
219 | rb_entry(*newn, struct gfs2_blkreserv, rs_node); | ||
220 | rc = rs_cmp(fsblk, 1, cur); | ||
221 | if (rc < 0) | ||
222 | newn = &((*newn)->rb_left); | ||
223 | else if (rc > 0) | ||
224 | newn = &((*newn)->rb_right); | ||
225 | else { | ||
226 | spin_unlock(&rgd->rd_rsspin); | ||
227 | return cur; | ||
228 | } | ||
229 | } | ||
230 | spin_unlock(&rgd->rd_rsspin); | ||
231 | return NULL; | ||
232 | } | ||
233 | |||
234 | /** | ||
235 | * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing | 188 | * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing |
236 | * a block in a given allocation state. | 189 | * a block in a given allocation state. |
237 | * @buf: the buffer that holds the bitmaps | 190 | * @buf: the buffer that holds the bitmaps |
@@ -262,8 +215,6 @@ static u32 gfs2_bitfit(const u8 *buf, const unsigned int len, | |||
262 | u64 mask = 0x5555555555555555ULL; | 215 | u64 mask = 0x5555555555555555ULL; |
263 | u32 bit; | 216 | u32 bit; |
264 | 217 | ||
265 | BUG_ON(state > 3); | ||
266 | |||
267 | /* Mask off bits we don't care about at the start of the search */ | 218 | /* Mask off bits we don't care about at the start of the search */ |
268 | mask <<= spoint; | 219 | mask <<= spoint; |
269 | tmp = gfs2_bit_search(ptr, mask, state); | 220 | tmp = gfs2_bit_search(ptr, mask, state); |
@@ -285,6 +236,131 @@ static u32 gfs2_bitfit(const u8 *buf, const unsigned int len, | |||
285 | } | 236 | } |
286 | 237 | ||
287 | /** | 238 | /** |
239 | * gfs2_rbm_from_block - Set the rbm based upon rgd and block number | ||
240 | * @rbm: The rbm with rgd already set correctly | ||
241 | * @block: The block number (filesystem relative) | ||
242 | * | ||
243 | * This sets the bi and offset members of an rbm based on a | ||
244 | * resource group and a filesystem relative block number. The | ||
245 | * resource group must be set in the rbm on entry, the bi and | ||
246 | * offset members will be set by this function. | ||
247 | * | ||
248 | * Returns: 0 on success, or an error code | ||
249 | */ | ||
250 | |||
251 | static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block) | ||
252 | { | ||
253 | u64 rblock = block - rbm->rgd->rd_data0; | ||
254 | u32 goal = (u32)rblock; | ||
255 | int x; | ||
256 | |||
257 | if (WARN_ON_ONCE(rblock > UINT_MAX)) | ||
258 | return -EINVAL; | ||
259 | if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data) | ||
260 | return -E2BIG; | ||
261 | |||
262 | for (x = 0; x < rbm->rgd->rd_length; x++) { | ||
263 | rbm->bi = rbm->rgd->rd_bits + x; | ||
264 | if (goal < (rbm->bi->bi_start + rbm->bi->bi_len) * GFS2_NBBY) { | ||
265 | rbm->offset = goal - (rbm->bi->bi_start * GFS2_NBBY); | ||
266 | break; | ||
267 | } | ||
268 | } | ||
269 | |||
270 | return 0; | ||
271 | } | ||
272 | |||
273 | /** | ||
274 | * gfs2_unaligned_extlen - Look for free blocks which are not byte aligned | ||
275 | * @rbm: Position to search (value/result) | ||
276 | * @n_unaligned: Number of unaligned blocks to check | ||
277 | * @len: Decremented for each block found (terminate on zero) | ||
278 | * | ||
279 | * Returns: true if a non-free block is encountered | ||
280 | */ | ||
281 | |||
282 | static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *len) | ||
283 | { | ||
284 | u64 block; | ||
285 | u32 n; | ||
286 | u8 res; | ||
287 | |||
288 | for (n = 0; n < n_unaligned; n++) { | ||
289 | res = gfs2_testbit(rbm); | ||
290 | if (res != GFS2_BLKST_FREE) | ||
291 | return true; | ||
292 | (*len)--; | ||
293 | if (*len == 0) | ||
294 | return true; | ||
295 | block = gfs2_rbm_to_block(rbm); | ||
296 | if (gfs2_rbm_from_block(rbm, block + 1)) | ||
297 | return true; | ||
298 | } | ||
299 | |||
300 | return false; | ||
301 | } | ||
302 | |||
303 | /** | ||
304 | * gfs2_free_extlen - Return extent length of free blocks | ||
305 | * @rbm: Starting position | ||
306 | * @len: Max length to check | ||
307 | * | ||
308 | * Starting at the block specified by the rbm, see how many free blocks | ||
309 | * there are, not reading more than len blocks ahead. This can be done | ||
310 | * using memchr_inv when the blocks are byte aligned, but has to be done | ||
311 | * on a block by block basis in case of unaligned blocks. Also this | ||
312 | * function can cope with bitmap boundaries (although it must stop on | ||
313 | * a resource group boundary) | ||
314 | * | ||
315 | * Returns: Number of free blocks in the extent | ||
316 | */ | ||
317 | |||
318 | static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len) | ||
319 | { | ||
320 | struct gfs2_rbm rbm = *rrbm; | ||
321 | u32 n_unaligned = rbm.offset & 3; | ||
322 | u32 size = len; | ||
323 | u32 bytes; | ||
324 | u32 chunk_size; | ||
325 | u8 *ptr, *start, *end; | ||
326 | u64 block; | ||
327 | |||
328 | if (n_unaligned && | ||
329 | gfs2_unaligned_extlen(&rbm, 4 - n_unaligned, &len)) | ||
330 | goto out; | ||
331 | |||
332 | n_unaligned = len & 3; | ||
333 | /* Start is now byte aligned */ | ||
334 | while (len > 3) { | ||
335 | start = rbm.bi->bi_bh->b_data; | ||
336 | if (rbm.bi->bi_clone) | ||
337 | start = rbm.bi->bi_clone; | ||
338 | end = start + rbm.bi->bi_bh->b_size; | ||
339 | start += rbm.bi->bi_offset; | ||
340 | BUG_ON(rbm.offset & 3); | ||
341 | start += (rbm.offset / GFS2_NBBY); | ||
342 | bytes = min_t(u32, len / GFS2_NBBY, (end - start)); | ||
343 | ptr = memchr_inv(start, 0, bytes); | ||
344 | chunk_size = ((ptr == NULL) ? bytes : (ptr - start)); | ||
345 | chunk_size *= GFS2_NBBY; | ||
346 | BUG_ON(len < chunk_size); | ||
347 | len -= chunk_size; | ||
348 | block = gfs2_rbm_to_block(&rbm); | ||
349 | gfs2_rbm_from_block(&rbm, block + chunk_size); | ||
350 | n_unaligned = 3; | ||
351 | if (ptr) | ||
352 | break; | ||
353 | n_unaligned = len & 3; | ||
354 | } | ||
355 | |||
356 | /* Deal with any bits left over at the end */ | ||
357 | if (n_unaligned) | ||
358 | gfs2_unaligned_extlen(&rbm, n_unaligned, &len); | ||
359 | out: | ||
360 | return size - len; | ||
361 | } | ||
362 | |||
363 | /** | ||
288 | * gfs2_bitcount - count the number of bits in a certain state | 364 | * gfs2_bitcount - count the number of bits in a certain state |
289 | * @rgd: the resource group descriptor | 365 | * @rgd: the resource group descriptor |
290 | * @buffer: the buffer that holds the bitmaps | 366 | * @buffer: the buffer that holds the bitmaps |
@@ -487,6 +563,8 @@ int gfs2_rs_alloc(struct gfs2_inode *ip) | |||
487 | if (!res) | 563 | if (!res) |
488 | error = -ENOMEM; | 564 | error = -ENOMEM; |
489 | 565 | ||
566 | RB_CLEAR_NODE(&res->rs_node); | ||
567 | |||
490 | down_write(&ip->i_rw_mutex); | 568 | down_write(&ip->i_rw_mutex); |
491 | if (ip->i_res) | 569 | if (ip->i_res) |
492 | kmem_cache_free(gfs2_rsrv_cachep, res); | 570 | kmem_cache_free(gfs2_rsrv_cachep, res); |
@@ -496,11 +574,12 @@ int gfs2_rs_alloc(struct gfs2_inode *ip) | |||
496 | return error; | 574 | return error; |
497 | } | 575 | } |
498 | 576 | ||
499 | static void dump_rs(struct seq_file *seq, struct gfs2_blkreserv *rs) | 577 | static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs) |
500 | { | 578 | { |
501 | gfs2_print_dbg(seq, " r: %llu s:%llu b:%u f:%u\n", | 579 | gfs2_print_dbg(seq, " B: n:%llu s:%llu b:%u f:%u\n", |
502 | rs->rs_rgd->rd_addr, gfs2_rs_startblk(rs), rs->rs_biblk, | 580 | (unsigned long long)rs->rs_inum, |
503 | rs->rs_free); | 581 | (unsigned long long)gfs2_rbm_to_block(&rs->rs_rbm), |
582 | rs->rs_rbm.offset, rs->rs_free); | ||
504 | } | 583 | } |
505 | 584 | ||
506 | /** | 585 | /** |
@@ -508,41 +587,26 @@ static void dump_rs(struct seq_file *seq, struct gfs2_blkreserv *rs) | |||
508 | * @rs: The reservation to remove | 587 | * @rs: The reservation to remove |
509 | * | 588 | * |
510 | */ | 589 | */ |
511 | static void __rs_deltree(struct gfs2_blkreserv *rs) | 590 | static void __rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs) |
512 | { | 591 | { |
513 | struct gfs2_rgrpd *rgd; | 592 | struct gfs2_rgrpd *rgd; |
514 | 593 | ||
515 | if (!gfs2_rs_active(rs)) | 594 | if (!gfs2_rs_active(rs)) |
516 | return; | 595 | return; |
517 | 596 | ||
518 | rgd = rs->rs_rgd; | 597 | rgd = rs->rs_rbm.rgd; |
519 | /* We can't do this: The reason is that when the rgrp is invalidated, | 598 | trace_gfs2_rs(rs, TRACE_RS_TREEDEL); |
520 | it's in the "middle" of acquiring the glock, but the HOLDER bit | 599 | rb_erase(&rs->rs_node, &rgd->rd_rstree); |
521 | isn't set yet: | 600 | RB_CLEAR_NODE(&rs->rs_node); |
522 | BUG_ON(!gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl));*/ | ||
523 | trace_gfs2_rs(NULL, rs, TRACE_RS_TREEDEL); | ||
524 | |||
525 | if (!RB_EMPTY_ROOT(&rgd->rd_rstree)) | ||
526 | rb_erase(&rs->rs_node, &rgd->rd_rstree); | ||
527 | BUG_ON(!rgd->rd_rs_cnt); | ||
528 | rgd->rd_rs_cnt--; | ||
529 | 601 | ||
530 | if (rs->rs_free) { | 602 | if (rs->rs_free) { |
531 | /* return reserved blocks to the rgrp and the ip */ | 603 | /* return reserved blocks to the rgrp and the ip */ |
532 | BUG_ON(rs->rs_rgd->rd_reserved < rs->rs_free); | 604 | BUG_ON(rs->rs_rbm.rgd->rd_reserved < rs->rs_free); |
533 | rs->rs_rgd->rd_reserved -= rs->rs_free; | 605 | rs->rs_rbm.rgd->rd_reserved -= rs->rs_free; |
534 | rs->rs_free = 0; | 606 | rs->rs_free = 0; |
535 | clear_bit(GBF_FULL, &rs->rs_bi->bi_flags); | 607 | clear_bit(GBF_FULL, &rs->rs_rbm.bi->bi_flags); |
536 | smp_mb__after_clear_bit(); | 608 | smp_mb__after_clear_bit(); |
537 | } | 609 | } |
538 | /* We can't change any of the step 1 or step 2 components of the rs. | ||
539 | E.g. We can't set rs_rgd to NULL because the rgd glock is held and | ||
540 | dequeued through this pointer. | ||
541 | Can't: atomic_set(&rs->rs_sizehint, 0); | ||
542 | Can't: rs->rs_requested = 0; | ||
543 | Can't: rs->rs_rgd = NULL;*/ | ||
544 | rs->rs_bi = NULL; | ||
545 | rs->rs_biblk = 0; | ||
546 | } | 610 | } |
547 | 611 | ||
548 | /** | 612 | /** |
@@ -550,17 +614,16 @@ static void __rs_deltree(struct gfs2_blkreserv *rs) | |||
550 | * @rs: The reservation to remove | 614 | * @rs: The reservation to remove |
551 | * | 615 | * |
552 | */ | 616 | */ |
553 | void gfs2_rs_deltree(struct gfs2_blkreserv *rs) | 617 | void gfs2_rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs) |
554 | { | 618 | { |
555 | struct gfs2_rgrpd *rgd; | 619 | struct gfs2_rgrpd *rgd; |
556 | 620 | ||
557 | if (!gfs2_rs_active(rs)) | 621 | rgd = rs->rs_rbm.rgd; |
558 | return; | 622 | if (rgd) { |
559 | 623 | spin_lock(&rgd->rd_rsspin); | |
560 | rgd = rs->rs_rgd; | 624 | __rs_deltree(ip, rs); |
561 | spin_lock(&rgd->rd_rsspin); | 625 | spin_unlock(&rgd->rd_rsspin); |
562 | __rs_deltree(rs); | 626 | } |
563 | spin_unlock(&rgd->rd_rsspin); | ||
564 | } | 627 | } |
565 | 628 | ||
566 | /** | 629 | /** |
@@ -572,8 +635,7 @@ void gfs2_rs_delete(struct gfs2_inode *ip) | |||
572 | { | 635 | { |
573 | down_write(&ip->i_rw_mutex); | 636 | down_write(&ip->i_rw_mutex); |
574 | if (ip->i_res) { | 637 | if (ip->i_res) { |
575 | gfs2_rs_deltree(ip->i_res); | 638 | gfs2_rs_deltree(ip, ip->i_res); |
576 | trace_gfs2_rs(ip, ip->i_res, TRACE_RS_DELETE); | ||
577 | BUG_ON(ip->i_res->rs_free); | 639 | BUG_ON(ip->i_res->rs_free); |
578 | kmem_cache_free(gfs2_rsrv_cachep, ip->i_res); | 640 | kmem_cache_free(gfs2_rsrv_cachep, ip->i_res); |
579 | ip->i_res = NULL; | 641 | ip->i_res = NULL; |
@@ -597,7 +659,7 @@ static void return_all_reservations(struct gfs2_rgrpd *rgd) | |||
597 | spin_lock(&rgd->rd_rsspin); | 659 | spin_lock(&rgd->rd_rsspin); |
598 | while ((n = rb_first(&rgd->rd_rstree))) { | 660 | while ((n = rb_first(&rgd->rd_rstree))) { |
599 | rs = rb_entry(n, struct gfs2_blkreserv, rs_node); | 661 | rs = rb_entry(n, struct gfs2_blkreserv, rs_node); |
600 | __rs_deltree(rs); | 662 | __rs_deltree(NULL, rs); |
601 | } | 663 | } |
602 | spin_unlock(&rgd->rd_rsspin); | 664 | spin_unlock(&rgd->rd_rsspin); |
603 | } | 665 | } |
@@ -1270,211 +1332,276 @@ out: | |||
1270 | 1332 | ||
1271 | /** | 1333 | /** |
1272 | * rs_insert - insert a new multi-block reservation into the rgrp's rb_tree | 1334 | * rs_insert - insert a new multi-block reservation into the rgrp's rb_tree |
1273 | * @bi: the bitmap with the blocks | ||
1274 | * @ip: the inode structure | 1335 | * @ip: the inode structure |
1275 | * @biblk: the 32-bit block number relative to the start of the bitmap | ||
1276 | * @amount: the number of blocks to reserve | ||
1277 | * | 1336 | * |
1278 | * Returns: NULL - reservation was already taken, so not inserted | ||
1279 | * pointer to the inserted reservation | ||
1280 | */ | 1337 | */ |
1281 | static struct gfs2_blkreserv *rs_insert(struct gfs2_bitmap *bi, | 1338 | static void rs_insert(struct gfs2_inode *ip) |
1282 | struct gfs2_inode *ip, u32 biblk, | ||
1283 | int amount) | ||
1284 | { | 1339 | { |
1285 | struct rb_node **newn, *parent = NULL; | 1340 | struct rb_node **newn, *parent = NULL; |
1286 | int rc; | 1341 | int rc; |
1287 | struct gfs2_blkreserv *rs = ip->i_res; | 1342 | struct gfs2_blkreserv *rs = ip->i_res; |
1288 | struct gfs2_rgrpd *rgd = rs->rs_rgd; | 1343 | struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd; |
1289 | u64 fsblock = gfs2_bi2rgd_blk(bi, biblk) + rgd->rd_data0; | 1344 | u64 fsblock = gfs2_rbm_to_block(&rs->rs_rbm); |
1345 | |||
1346 | BUG_ON(gfs2_rs_active(rs)); | ||
1290 | 1347 | ||
1291 | spin_lock(&rgd->rd_rsspin); | 1348 | spin_lock(&rgd->rd_rsspin); |
1292 | newn = &rgd->rd_rstree.rb_node; | 1349 | newn = &rgd->rd_rstree.rb_node; |
1293 | BUG_ON(!ip->i_res); | ||
1294 | BUG_ON(gfs2_rs_active(rs)); | ||
1295 | /* Figure out where to put new node */ | ||
1296 | /*BUG_ON(!gfs2_glock_is_locked_by_me(rgd->rd_gl));*/ | ||
1297 | while (*newn) { | 1350 | while (*newn) { |
1298 | struct gfs2_blkreserv *cur = | 1351 | struct gfs2_blkreserv *cur = |
1299 | rb_entry(*newn, struct gfs2_blkreserv, rs_node); | 1352 | rb_entry(*newn, struct gfs2_blkreserv, rs_node); |
1300 | 1353 | ||
1301 | parent = *newn; | 1354 | parent = *newn; |
1302 | rc = rs_cmp(fsblock, amount, cur); | 1355 | rc = rs_cmp(fsblock, rs->rs_free, cur); |
1303 | if (rc > 0) | 1356 | if (rc > 0) |
1304 | newn = &((*newn)->rb_right); | 1357 | newn = &((*newn)->rb_right); |
1305 | else if (rc < 0) | 1358 | else if (rc < 0) |
1306 | newn = &((*newn)->rb_left); | 1359 | newn = &((*newn)->rb_left); |
1307 | else { | 1360 | else { |
1308 | spin_unlock(&rgd->rd_rsspin); | 1361 | spin_unlock(&rgd->rd_rsspin); |
1309 | return NULL; /* reservation already in use */ | 1362 | WARN_ON(1); |
1363 | return; | ||
1310 | } | 1364 | } |
1311 | } | 1365 | } |
1312 | 1366 | ||
1313 | /* Do our reservation work */ | ||
1314 | rs = ip->i_res; | ||
1315 | rs->rs_free = amount; | ||
1316 | rs->rs_biblk = biblk; | ||
1317 | rs->rs_bi = bi; | ||
1318 | rb_link_node(&rs->rs_node, parent, newn); | 1367 | rb_link_node(&rs->rs_node, parent, newn); |
1319 | rb_insert_color(&rs->rs_node, &rgd->rd_rstree); | 1368 | rb_insert_color(&rs->rs_node, &rgd->rd_rstree); |
1320 | 1369 | ||
1321 | /* Do our inode accounting for the reservation */ | ||
1322 | /*BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));*/ | ||
1323 | |||
1324 | /* Do our rgrp accounting for the reservation */ | 1370 | /* Do our rgrp accounting for the reservation */ |
1325 | rgd->rd_reserved += amount; /* blocks reserved */ | 1371 | rgd->rd_reserved += rs->rs_free; /* blocks reserved */ |
1326 | rgd->rd_rs_cnt++; /* number of in-tree reservations */ | ||
1327 | spin_unlock(&rgd->rd_rsspin); | 1372 | spin_unlock(&rgd->rd_rsspin); |
1328 | trace_gfs2_rs(ip, rs, TRACE_RS_INSERT); | 1373 | trace_gfs2_rs(rs, TRACE_RS_INSERT); |
1329 | return rs; | ||
1330 | } | 1374 | } |
1331 | 1375 | ||
1332 | /** | 1376 | /** |
1333 | * unclaimed_blocks - return number of blocks that aren't spoken for | 1377 | * rg_mblk_search - find a group of multiple free blocks to form a reservation |
1334 | */ | ||
1335 | static u32 unclaimed_blocks(struct gfs2_rgrpd *rgd) | ||
1336 | { | ||
1337 | return rgd->rd_free_clone - rgd->rd_reserved; | ||
1338 | } | ||
1339 | |||
1340 | /** | ||
1341 | * rg_mblk_search - find a group of multiple free blocks | ||
1342 | * @rgd: the resource group descriptor | 1378 | * @rgd: the resource group descriptor |
1343 | * @rs: the block reservation | ||
1344 | * @ip: pointer to the inode for which we're reserving blocks | 1379 | * @ip: pointer to the inode for which we're reserving blocks |
1380 | * @requested: number of blocks required for this allocation | ||
1345 | * | 1381 | * |
1346 | * This is very similar to rgblk_search, except we're looking for whole | ||
1347 | * 64-bit words that represent a chunk of 32 free blocks. I'm only focusing | ||
1348 | * on aligned dwords for speed's sake. | ||
1349 | * | ||
1350 | * Returns: 0 if successful or BFITNOENT if there isn't enough free space | ||
1351 | */ | 1382 | */ |
1352 | 1383 | ||
1353 | static int rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) | 1384 | static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, |
1385 | unsigned requested) | ||
1354 | { | 1386 | { |
1355 | struct gfs2_bitmap *bi = rgd->rd_bits; | 1387 | struct gfs2_rbm rbm = { .rgd = rgd, }; |
1356 | const u32 length = rgd->rd_length; | 1388 | u64 goal; |
1357 | u32 blk; | 1389 | struct gfs2_blkreserv *rs = ip->i_res; |
1358 | unsigned int buf, x, search_bytes; | 1390 | u32 extlen; |
1359 | u8 *buffer = NULL; | 1391 | u32 free_blocks = rgd->rd_free_clone - rgd->rd_reserved; |
1360 | u8 *ptr, *end, *nonzero; | 1392 | int ret; |
1361 | u32 goal, rsv_bytes; | 1393 | |
1362 | struct gfs2_blkreserv *rs; | 1394 | extlen = max_t(u32, atomic_read(&rs->rs_sizehint), requested); |
1363 | u32 best_rs_bytes, unclaimed; | 1395 | extlen = clamp(extlen, RGRP_RSRV_MINBLKS, free_blocks); |
1364 | int best_rs_blocks; | 1396 | if ((rgd->rd_free_clone < rgd->rd_reserved) || (free_blocks < extlen)) |
1397 | return; | ||
1365 | 1398 | ||
1366 | /* Find bitmap block that contains bits for goal block */ | 1399 | /* Find bitmap block that contains bits for goal block */ |
1367 | if (rgrp_contains_block(rgd, ip->i_goal)) | 1400 | if (rgrp_contains_block(rgd, ip->i_goal)) |
1368 | goal = ip->i_goal - rgd->rd_data0; | 1401 | goal = ip->i_goal; |
1369 | else | 1402 | else |
1370 | goal = rgd->rd_last_alloc; | 1403 | goal = rgd->rd_last_alloc + rgd->rd_data0; |
1371 | for (buf = 0; buf < length; buf++) { | 1404 | |
1372 | bi = rgd->rd_bits + buf; | 1405 | if (WARN_ON(gfs2_rbm_from_block(&rbm, goal))) |
1373 | /* Convert scope of "goal" from rgrp-wide to within | 1406 | return; |
1374 | found bit block */ | 1407 | |
1375 | if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY) { | 1408 | ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, extlen, ip, true); |
1376 | goal -= bi->bi_start * GFS2_NBBY; | 1409 | if (ret == 0) { |
1377 | goto do_search; | 1410 | rs->rs_rbm = rbm; |
1378 | } | 1411 | rs->rs_free = extlen; |
1412 | rs->rs_inum = ip->i_no_addr; | ||
1413 | rs_insert(ip); | ||
1379 | } | 1414 | } |
1380 | buf = 0; | 1415 | } |
1381 | goal = 0; | ||
1382 | |||
1383 | do_search: | ||
1384 | best_rs_blocks = max_t(int, atomic_read(&ip->i_res->rs_sizehint), | ||
1385 | (RGRP_RSRV_MINBLKS * rgd->rd_length)); | ||
1386 | best_rs_bytes = (best_rs_blocks * | ||
1387 | (1 + (RSRV_CONTENTION_FACTOR * rgd->rd_rs_cnt))) / | ||
1388 | GFS2_NBBY; /* 1 + is for our not-yet-created reservation */ | ||
1389 | best_rs_bytes = ALIGN(best_rs_bytes, sizeof(u64)); | ||
1390 | unclaimed = unclaimed_blocks(rgd); | ||
1391 | if (best_rs_bytes * GFS2_NBBY > unclaimed) | ||
1392 | best_rs_bytes = unclaimed >> GFS2_BIT_SIZE; | ||
1393 | |||
1394 | for (x = 0; x <= length; x++) { | ||
1395 | bi = rgd->rd_bits + buf; | ||
1396 | 1416 | ||
1397 | if (test_bit(GBF_FULL, &bi->bi_flags)) | 1417 | /** |
1398 | goto skip; | 1418 | * gfs2_next_unreserved_block - Return next block that is not reserved |
1419 | * @rgd: The resource group | ||
1420 | * @block: The starting block | ||
1421 | * @length: The required length | ||
1422 | * @ip: Ignore any reservations for this inode | ||
1423 | * | ||
1424 | * If the block does not appear in any reservation, then return the | ||
1425 | * block number unchanged. If it does appear in the reservation, then | ||
1426 | * keep looking through the tree of reservations in order to find the | ||
1427 | * first block number which is not reserved. | ||
1428 | */ | ||
1399 | 1429 | ||
1400 | WARN_ON(!buffer_uptodate(bi->bi_bh)); | 1430 | static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block, |
1401 | if (bi->bi_clone) | 1431 | u32 length, |
1402 | buffer = bi->bi_clone + bi->bi_offset; | 1432 | const struct gfs2_inode *ip) |
1433 | { | ||
1434 | struct gfs2_blkreserv *rs; | ||
1435 | struct rb_node *n; | ||
1436 | int rc; | ||
1437 | |||
1438 | spin_lock(&rgd->rd_rsspin); | ||
1439 | n = rgd->rd_rstree.rb_node; | ||
1440 | while (n) { | ||
1441 | rs = rb_entry(n, struct gfs2_blkreserv, rs_node); | ||
1442 | rc = rs_cmp(block, length, rs); | ||
1443 | if (rc < 0) | ||
1444 | n = n->rb_left; | ||
1445 | else if (rc > 0) | ||
1446 | n = n->rb_right; | ||
1403 | else | 1447 | else |
1404 | buffer = bi->bi_bh->b_data + bi->bi_offset; | 1448 | break; |
1405 | 1449 | } | |
1406 | /* We have to keep the reservations aligned on u64 boundaries | 1450 | |
1407 | otherwise we could get situations where a byte can't be | 1451 | if (n) { |
1408 | used because it's after a reservation, but a free bit still | 1452 | while ((rs_cmp(block, length, rs) == 0) && (ip->i_res != rs)) { |
1409 | is within the reservation's area. */ | 1453 | block = gfs2_rbm_to_block(&rs->rs_rbm) + rs->rs_free; |
1410 | ptr = buffer + ALIGN(goal >> GFS2_BIT_SIZE, sizeof(u64)); | 1454 | n = n->rb_right; |
1411 | end = (buffer + bi->bi_len); | 1455 | if (n == NULL) |
1412 | while (ptr < end) { | 1456 | break; |
1413 | rsv_bytes = 0; | 1457 | rs = rb_entry(n, struct gfs2_blkreserv, rs_node); |
1414 | if ((ptr + best_rs_bytes) <= end) | ||
1415 | search_bytes = best_rs_bytes; | ||
1416 | else | ||
1417 | search_bytes = end - ptr; | ||
1418 | BUG_ON(!search_bytes); | ||
1419 | nonzero = memchr_inv(ptr, 0, search_bytes); | ||
1420 | /* If the lot is all zeroes, reserve the whole size. If | ||
1421 | there's enough zeroes to satisfy the request, use | ||
1422 | what we can. If there's not enough, keep looking. */ | ||
1423 | if (nonzero == NULL) | ||
1424 | rsv_bytes = search_bytes; | ||
1425 | else if ((nonzero - ptr) * GFS2_NBBY >= | ||
1426 | ip->i_res->rs_requested) | ||
1427 | rsv_bytes = (nonzero - ptr); | ||
1428 | |||
1429 | if (rsv_bytes) { | ||
1430 | blk = ((ptr - buffer) * GFS2_NBBY); | ||
1431 | BUG_ON(blk >= bi->bi_len * GFS2_NBBY); | ||
1432 | rs = rs_insert(bi, ip, blk, | ||
1433 | rsv_bytes * GFS2_NBBY); | ||
1434 | if (IS_ERR(rs)) | ||
1435 | return PTR_ERR(rs); | ||
1436 | if (rs) | ||
1437 | return 0; | ||
1438 | } | ||
1439 | ptr += ALIGN(search_bytes, sizeof(u64)); | ||
1440 | } | 1458 | } |
1441 | skip: | ||
1442 | /* Try next bitmap block (wrap back to rgrp header | ||
1443 | if at end) */ | ||
1444 | buf++; | ||
1445 | buf %= length; | ||
1446 | goal = 0; | ||
1447 | } | 1459 | } |
1448 | 1460 | ||
1449 | return BFITNOENT; | 1461 | spin_unlock(&rgd->rd_rsspin); |
1462 | return block; | ||
1450 | } | 1463 | } |
1451 | 1464 | ||
1452 | /** | 1465 | /** |
1453 | * try_rgrp_fit - See if a given reservation will fit in a given RG | 1466 | * gfs2_reservation_check_and_update - Check for reservations during block alloc |
1454 | * @rgd: the RG data | 1467 | * @rbm: The current position in the resource group |
1455 | * @ip: the inode | 1468 | * @ip: The inode for which we are searching for blocks |
1469 | * @minext: The minimum extent length | ||
1456 | * | 1470 | * |
1457 | * If there's room for the requested blocks to be allocated from the RG: | 1471 | * This checks the current position in the rgrp to see whether there is |
1458 | * This will try to get a multi-block reservation first, and if that doesn't | 1472 | * a reservation covering this block. If not then this function is a |
1459 | * fit, it will take what it can. | 1473 | * no-op. If there is, then the position is moved to the end of the |
1474 | * contiguous reservation(s) so that we are pointing at the first | ||
1475 | * non-reserved block. | ||
1460 | * | 1476 | * |
1461 | * Returns: 1 on success (it fits), 0 on failure (it doesn't fit) | 1477 | * Returns: 0 if no reservation, 1 if @rbm has changed, otherwise an error |
1462 | */ | 1478 | */ |
1463 | 1479 | ||
1464 | static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) | 1480 | static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm, |
1481 | const struct gfs2_inode *ip, | ||
1482 | u32 minext) | ||
1465 | { | 1483 | { |
1466 | struct gfs2_blkreserv *rs = ip->i_res; | 1484 | u64 block = gfs2_rbm_to_block(rbm); |
1485 | u32 extlen = 1; | ||
1486 | u64 nblock; | ||
1487 | int ret; | ||
1467 | 1488 | ||
1468 | if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR)) | 1489 | /* |
1490 | * If we have a minimum extent length, then skip over any extent | ||
1491 | * which is less than the min extent length in size. | ||
1492 | */ | ||
1493 | if (minext) { | ||
1494 | extlen = gfs2_free_extlen(rbm, minext); | ||
1495 | nblock = block + extlen; | ||
1496 | if (extlen < minext) | ||
1497 | goto fail; | ||
1498 | } | ||
1499 | |||
1500 | /* | ||
1501 | * Check the extent which has been found against the reservations | ||
1502 | * and skip if parts of it are already reserved | ||
1503 | */ | ||
1504 | nblock = gfs2_next_unreserved_block(rbm->rgd, block, extlen, ip); | ||
1505 | if (nblock == block) | ||
1469 | return 0; | 1506 | return 0; |
1470 | /* Look for a multi-block reservation. */ | 1507 | fail: |
1471 | if (unclaimed_blocks(rgd) >= RGRP_RSRV_MINBLKS && | 1508 | ret = gfs2_rbm_from_block(rbm, nblock); |
1472 | rg_mblk_search(rgd, ip) != BFITNOENT) | 1509 | if (ret < 0) |
1473 | return 1; | 1510 | return ret; |
1474 | if (unclaimed_blocks(rgd) >= rs->rs_requested) | 1511 | return 1; |
1475 | return 1; | 1512 | } |
1476 | 1513 | ||
1477 | return 0; | 1514 | /** |
1515 | * gfs2_rbm_find - Look for blocks of a particular state | ||
1516 | * @rbm: Value/result starting position and final position | ||
1517 | * @state: The state which we want to find | ||
1518 | * @minext: The requested extent length (0 for a single block) | ||
1519 | * @ip: If set, check for reservations | ||
1520 | * @nowrap: Stop looking at the end of the rgrp, rather than wrapping | ||
1521 | * around until we've reached the starting point. | ||
1522 | * | ||
1523 | * Side effects: | ||
1524 | * - If looking for free blocks, we set GBF_FULL on each bitmap which | ||
1525 | * has no free blocks in it. | ||
1526 | * | ||
1527 | * Returns: 0 on success, -ENOSPC if there is no block of the requested state | ||
1528 | */ | ||
1529 | |||
1530 | static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext, | ||
1531 | const struct gfs2_inode *ip, bool nowrap) | ||
1532 | { | ||
1533 | struct buffer_head *bh; | ||
1534 | struct gfs2_bitmap *initial_bi; | ||
1535 | u32 initial_offset; | ||
1536 | u32 offset; | ||
1537 | u8 *buffer; | ||
1538 | int index; | ||
1539 | int n = 0; | ||
1540 | int iters = rbm->rgd->rd_length; | ||
1541 | int ret; | ||
1542 | |||
1543 | /* If we are not starting at the beginning of a bitmap, then we | ||
1544 | * need to add one to the bitmap count to ensure that we search | ||
1545 | * the starting bitmap twice. | ||
1546 | */ | ||
1547 | if (rbm->offset != 0) | ||
1548 | iters++; | ||
1549 | |||
1550 | while(1) { | ||
1551 | if (test_bit(GBF_FULL, &rbm->bi->bi_flags) && | ||
1552 | (state == GFS2_BLKST_FREE)) | ||
1553 | goto next_bitmap; | ||
1554 | |||
1555 | bh = rbm->bi->bi_bh; | ||
1556 | buffer = bh->b_data + rbm->bi->bi_offset; | ||
1557 | WARN_ON(!buffer_uptodate(bh)); | ||
1558 | if (state != GFS2_BLKST_UNLINKED && rbm->bi->bi_clone) | ||
1559 | buffer = rbm->bi->bi_clone + rbm->bi->bi_offset; | ||
1560 | initial_offset = rbm->offset; | ||
1561 | offset = gfs2_bitfit(buffer, rbm->bi->bi_len, rbm->offset, state); | ||
1562 | if (offset == BFITNOENT) | ||
1563 | goto bitmap_full; | ||
1564 | rbm->offset = offset; | ||
1565 | if (ip == NULL) | ||
1566 | return 0; | ||
1567 | |||
1568 | initial_bi = rbm->bi; | ||
1569 | ret = gfs2_reservation_check_and_update(rbm, ip, minext); | ||
1570 | if (ret == 0) | ||
1571 | return 0; | ||
1572 | if (ret > 0) { | ||
1573 | n += (rbm->bi - initial_bi); | ||
1574 | goto next_iter; | ||
1575 | } | ||
1576 | if (ret == -E2BIG) { | ||
1577 | index = 0; | ||
1578 | rbm->offset = 0; | ||
1579 | n += (rbm->bi - initial_bi); | ||
1580 | goto res_covered_end_of_rgrp; | ||
1581 | } | ||
1582 | return ret; | ||
1583 | |||
1584 | bitmap_full: /* Mark bitmap as full and fall through */ | ||
1585 | if ((state == GFS2_BLKST_FREE) && initial_offset == 0) | ||
1586 | set_bit(GBF_FULL, &rbm->bi->bi_flags); | ||
1587 | |||
1588 | next_bitmap: /* Find next bitmap in the rgrp */ | ||
1589 | rbm->offset = 0; | ||
1590 | index = rbm->bi - rbm->rgd->rd_bits; | ||
1591 | index++; | ||
1592 | if (index == rbm->rgd->rd_length) | ||
1593 | index = 0; | ||
1594 | res_covered_end_of_rgrp: | ||
1595 | rbm->bi = &rbm->rgd->rd_bits[index]; | ||
1596 | if ((index == 0) && nowrap) | ||
1597 | break; | ||
1598 | n++; | ||
1599 | next_iter: | ||
1600 | if (n >= iters) | ||
1601 | break; | ||
1602 | } | ||
1603 | |||
1604 | return -ENOSPC; | ||
1478 | } | 1605 | } |
1479 | 1606 | ||
1480 | /** | 1607 | /** |
@@ -1489,34 +1616,33 @@ static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) | |||
1489 | 1616 | ||
1490 | static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip) | 1617 | static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip) |
1491 | { | 1618 | { |
1492 | u32 goal = 0, block; | 1619 | u64 block; |
1493 | u64 no_addr; | ||
1494 | struct gfs2_sbd *sdp = rgd->rd_sbd; | 1620 | struct gfs2_sbd *sdp = rgd->rd_sbd; |
1495 | struct gfs2_glock *gl; | 1621 | struct gfs2_glock *gl; |
1496 | struct gfs2_inode *ip; | 1622 | struct gfs2_inode *ip; |
1497 | int error; | 1623 | int error; |
1498 | int found = 0; | 1624 | int found = 0; |
1499 | struct gfs2_bitmap *bi; | 1625 | struct gfs2_rbm rbm = { .rgd = rgd, .bi = rgd->rd_bits, .offset = 0 }; |
1500 | 1626 | ||
1501 | while (goal < rgd->rd_data) { | 1627 | while (1) { |
1502 | down_write(&sdp->sd_log_flush_lock); | 1628 | down_write(&sdp->sd_log_flush_lock); |
1503 | block = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED, &bi); | 1629 | error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, 0, NULL, true); |
1504 | up_write(&sdp->sd_log_flush_lock); | 1630 | up_write(&sdp->sd_log_flush_lock); |
1505 | if (block == BFITNOENT) | 1631 | if (error == -ENOSPC) |
1632 | break; | ||
1633 | if (WARN_ON_ONCE(error)) | ||
1506 | break; | 1634 | break; |
1507 | 1635 | ||
1508 | block = gfs2_bi2rgd_blk(bi, block); | 1636 | block = gfs2_rbm_to_block(&rbm); |
1509 | /* rgblk_search can return a block < goal, so we need to | 1637 | if (gfs2_rbm_from_block(&rbm, block + 1)) |
1510 | keep it marching forward. */ | 1638 | break; |
1511 | no_addr = block + rgd->rd_data0; | 1639 | if (*last_unlinked != NO_BLOCK && block <= *last_unlinked) |
1512 | goal = max(block + 1, goal + 1); | ||
1513 | if (*last_unlinked != NO_BLOCK && no_addr <= *last_unlinked) | ||
1514 | continue; | 1640 | continue; |
1515 | if (no_addr == skip) | 1641 | if (block == skip) |
1516 | continue; | 1642 | continue; |
1517 | *last_unlinked = no_addr; | 1643 | *last_unlinked = block; |
1518 | 1644 | ||
1519 | error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &gl); | 1645 | error = gfs2_glock_get(sdp, block, &gfs2_inode_glops, CREATE, &gl); |
1520 | if (error) | 1646 | if (error) |
1521 | continue; | 1647 | continue; |
1522 | 1648 | ||
@@ -1543,6 +1669,19 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip | |||
1543 | return; | 1669 | return; |
1544 | } | 1670 | } |
1545 | 1671 | ||
1672 | static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin) | ||
1673 | { | ||
1674 | struct gfs2_rgrpd *rgd = *pos; | ||
1675 | |||
1676 | rgd = gfs2_rgrpd_get_next(rgd); | ||
1677 | if (rgd == NULL) | ||
1678 | rgd = gfs2_rgrpd_get_next(NULL); | ||
1679 | *pos = rgd; | ||
1680 | if (rgd != begin) /* If we didn't wrap */ | ||
1681 | return true; | ||
1682 | return false; | ||
1683 | } | ||
1684 | |||
1546 | /** | 1685 | /** |
1547 | * gfs2_inplace_reserve - Reserve space in the filesystem | 1686 | * gfs2_inplace_reserve - Reserve space in the filesystem |
1548 | * @ip: the inode to reserve space for | 1687 | * @ip: the inode to reserve space for |
@@ -1562,103 +1701,96 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) | |||
1562 | 1701 | ||
1563 | if (sdp->sd_args.ar_rgrplvb) | 1702 | if (sdp->sd_args.ar_rgrplvb) |
1564 | flags |= GL_SKIP; | 1703 | flags |= GL_SKIP; |
1565 | rs->rs_requested = requested; | 1704 | if (gfs2_assert_warn(sdp, requested)) |
1566 | if (gfs2_assert_warn(sdp, requested)) { | 1705 | return -EINVAL; |
1567 | error = -EINVAL; | ||
1568 | goto out; | ||
1569 | } | ||
1570 | if (gfs2_rs_active(rs)) { | 1706 | if (gfs2_rs_active(rs)) { |
1571 | begin = rs->rs_rgd; | 1707 | begin = rs->rs_rbm.rgd; |
1572 | flags = 0; /* Yoda: Do or do not. There is no try */ | 1708 | flags = 0; /* Yoda: Do or do not. There is no try */ |
1573 | } else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) { | 1709 | } else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) { |
1574 | rs->rs_rgd = begin = ip->i_rgd; | 1710 | rs->rs_rbm.rgd = begin = ip->i_rgd; |
1575 | } else { | 1711 | } else { |
1576 | rs->rs_rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); | 1712 | rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); |
1577 | } | 1713 | } |
1578 | if (rs->rs_rgd == NULL) | 1714 | if (rs->rs_rbm.rgd == NULL) |
1579 | return -EBADSLT; | 1715 | return -EBADSLT; |
1580 | 1716 | ||
1581 | while (loops < 3) { | 1717 | while (loops < 3) { |
1582 | rg_locked = 0; | 1718 | rg_locked = 1; |
1583 | 1719 | ||
1584 | if (gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl)) { | 1720 | if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) { |
1585 | rg_locked = 1; | 1721 | rg_locked = 0; |
1586 | error = 0; | 1722 | error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl, |
1587 | } else if (!loops && !gfs2_rs_active(rs) && | ||
1588 | rs->rs_rgd->rd_rs_cnt > RGRP_RSRV_MAX_CONTENDERS) { | ||
1589 | /* If the rgrp already is maxed out for contenders, | ||
1590 | we can eliminate it as a "first pass" without even | ||
1591 | requesting the rgrp glock. */ | ||
1592 | error = GLR_TRYFAILED; | ||
1593 | } else { | ||
1594 | error = gfs2_glock_nq_init(rs->rs_rgd->rd_gl, | ||
1595 | LM_ST_EXCLUSIVE, flags, | 1723 | LM_ST_EXCLUSIVE, flags, |
1596 | &rs->rs_rgd_gh); | 1724 | &rs->rs_rgd_gh); |
1597 | if (!error && sdp->sd_args.ar_rgrplvb) { | 1725 | if (error == GLR_TRYFAILED) |
1598 | error = update_rgrp_lvb(rs->rs_rgd); | 1726 | goto next_rgrp; |
1599 | if (error) { | 1727 | if (unlikely(error)) |
1728 | return error; | ||
1729 | if (sdp->sd_args.ar_rgrplvb) { | ||
1730 | error = update_rgrp_lvb(rs->rs_rbm.rgd); | ||
1731 | if (unlikely(error)) { | ||
1600 | gfs2_glock_dq_uninit(&rs->rs_rgd_gh); | 1732 | gfs2_glock_dq_uninit(&rs->rs_rgd_gh); |
1601 | return error; | 1733 | return error; |
1602 | } | 1734 | } |
1603 | } | 1735 | } |
1604 | } | 1736 | } |
1605 | switch (error) { | ||
1606 | case 0: | ||
1607 | if (gfs2_rs_active(rs)) { | ||
1608 | if (unclaimed_blocks(rs->rs_rgd) + | ||
1609 | rs->rs_free >= rs->rs_requested) { | ||
1610 | ip->i_rgd = rs->rs_rgd; | ||
1611 | return 0; | ||
1612 | } | ||
1613 | /* We have a multi-block reservation, but the | ||
1614 | rgrp doesn't have enough free blocks to | ||
1615 | satisfy the request. Free the reservation | ||
1616 | and look for a suitable rgrp. */ | ||
1617 | gfs2_rs_deltree(rs); | ||
1618 | } | ||
1619 | if (try_rgrp_fit(rs->rs_rgd, ip)) { | ||
1620 | if (sdp->sd_args.ar_rgrplvb) | ||
1621 | gfs2_rgrp_bh_get(rs->rs_rgd); | ||
1622 | ip->i_rgd = rs->rs_rgd; | ||
1623 | return 0; | ||
1624 | } | ||
1625 | if (rs->rs_rgd->rd_flags & GFS2_RDF_CHECK) { | ||
1626 | if (sdp->sd_args.ar_rgrplvb) | ||
1627 | gfs2_rgrp_bh_get(rs->rs_rgd); | ||
1628 | try_rgrp_unlink(rs->rs_rgd, &last_unlinked, | ||
1629 | ip->i_no_addr); | ||
1630 | } | ||
1631 | if (!rg_locked) | ||
1632 | gfs2_glock_dq_uninit(&rs->rs_rgd_gh); | ||
1633 | /* fall through */ | ||
1634 | case GLR_TRYFAILED: | ||
1635 | rs->rs_rgd = gfs2_rgrpd_get_next(rs->rs_rgd); | ||
1636 | rs->rs_rgd = rs->rs_rgd ? : begin; /* if NULL, wrap */ | ||
1637 | if (rs->rs_rgd != begin) /* If we didn't wrap */ | ||
1638 | break; | ||
1639 | 1737 | ||
1640 | flags &= ~LM_FLAG_TRY; | 1738 | /* Skip unuseable resource groups */ |
1641 | loops++; | 1739 | if (rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR)) |
1642 | /* Check that fs hasn't grown if writing to rindex */ | 1740 | goto skip_rgrp; |
1643 | if (ip == GFS2_I(sdp->sd_rindex) && | 1741 | |
1644 | !sdp->sd_rindex_uptodate) { | 1742 | if (sdp->sd_args.ar_rgrplvb) |
1645 | error = gfs2_ri_update(ip); | 1743 | gfs2_rgrp_bh_get(rs->rs_rbm.rgd); |
1646 | if (error) | 1744 | |
1647 | goto out; | 1745 | /* Get a reservation if we don't already have one */ |
1648 | } else if (loops == 2) | 1746 | if (!gfs2_rs_active(rs)) |
1649 | /* Flushing the log may release space */ | 1747 | rg_mblk_search(rs->rs_rbm.rgd, ip, requested); |
1650 | gfs2_log_flush(sdp, NULL); | 1748 | |
1651 | break; | 1749 | /* Skip rgrps when we can't get a reservation on first pass */ |
1652 | default: | 1750 | if (!gfs2_rs_active(rs) && (loops < 1)) |
1653 | goto out; | 1751 | goto check_rgrp; |
1752 | |||
1753 | /* If rgrp has enough free space, use it */ | ||
1754 | if (rs->rs_rbm.rgd->rd_free_clone >= requested) { | ||
1755 | ip->i_rgd = rs->rs_rbm.rgd; | ||
1756 | return 0; | ||
1757 | } | ||
1758 | |||
1759 | /* Drop reservation, if we couldn't use reserved rgrp */ | ||
1760 | if (gfs2_rs_active(rs)) | ||
1761 | gfs2_rs_deltree(ip, rs); | ||
1762 | check_rgrp: | ||
1763 | /* Check for unlinked inodes which can be reclaimed */ | ||
1764 | if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK) | ||
1765 | try_rgrp_unlink(rs->rs_rbm.rgd, &last_unlinked, | ||
1766 | ip->i_no_addr); | ||
1767 | skip_rgrp: | ||
1768 | /* Unlock rgrp if required */ | ||
1769 | if (!rg_locked) | ||
1770 | gfs2_glock_dq_uninit(&rs->rs_rgd_gh); | ||
1771 | next_rgrp: | ||
1772 | /* Find the next rgrp, and continue looking */ | ||
1773 | if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin)) | ||
1774 | continue; | ||
1775 | |||
1776 | /* If we've scanned all the rgrps, but found no free blocks | ||
1777 | * then this checks for some less likely conditions before | ||
1778 | * trying again. | ||
1779 | */ | ||
1780 | flags &= ~LM_FLAG_TRY; | ||
1781 | loops++; | ||
1782 | /* Check that fs hasn't grown if writing to rindex */ | ||
1783 | if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) { | ||
1784 | error = gfs2_ri_update(ip); | ||
1785 | if (error) | ||
1786 | return error; | ||
1654 | } | 1787 | } |
1788 | /* Flushing the log may release space */ | ||
1789 | if (loops == 2) | ||
1790 | gfs2_log_flush(sdp, NULL); | ||
1655 | } | 1791 | } |
1656 | error = -ENOSPC; | ||
1657 | 1792 | ||
1658 | out: | 1793 | return -ENOSPC; |
1659 | if (error) | ||
1660 | rs->rs_requested = 0; | ||
1661 | return error; | ||
1662 | } | 1794 | } |
1663 | 1795 | ||
1664 | /** | 1796 | /** |
@@ -1672,15 +1804,8 @@ void gfs2_inplace_release(struct gfs2_inode *ip) | |||
1672 | { | 1804 | { |
1673 | struct gfs2_blkreserv *rs = ip->i_res; | 1805 | struct gfs2_blkreserv *rs = ip->i_res; |
1674 | 1806 | ||
1675 | if (!rs) | ||
1676 | return; | ||
1677 | |||
1678 | if (!rs->rs_free) | ||
1679 | gfs2_rs_deltree(rs); | ||
1680 | |||
1681 | if (rs->rs_rgd_gh.gh_gl) | 1807 | if (rs->rs_rgd_gh.gh_gl) |
1682 | gfs2_glock_dq_uninit(&rs->rs_rgd_gh); | 1808 | gfs2_glock_dq_uninit(&rs->rs_rgd_gh); |
1683 | rs->rs_requested = 0; | ||
1684 | } | 1809 | } |
1685 | 1810 | ||
1686 | /** | 1811 | /** |
@@ -1693,173 +1818,47 @@ void gfs2_inplace_release(struct gfs2_inode *ip) | |||
1693 | 1818 | ||
1694 | static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block) | 1819 | static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block) |
1695 | { | 1820 | { |
1696 | struct gfs2_bitmap *bi = NULL; | 1821 | struct gfs2_rbm rbm = { .rgd = rgd, }; |
1697 | u32 length, rgrp_block, buf_block; | 1822 | int ret; |
1698 | unsigned int buf; | ||
1699 | unsigned char type; | ||
1700 | |||
1701 | length = rgd->rd_length; | ||
1702 | rgrp_block = block - rgd->rd_data0; | ||
1703 | |||
1704 | for (buf = 0; buf < length; buf++) { | ||
1705 | bi = rgd->rd_bits + buf; | ||
1706 | if (rgrp_block < (bi->bi_start + bi->bi_len) * GFS2_NBBY) | ||
1707 | break; | ||
1708 | } | ||
1709 | 1823 | ||
1710 | gfs2_assert(rgd->rd_sbd, buf < length); | 1824 | ret = gfs2_rbm_from_block(&rbm, block); |
1711 | buf_block = rgrp_block - bi->bi_start * GFS2_NBBY; | 1825 | WARN_ON_ONCE(ret != 0); |
1712 | 1826 | ||
1713 | type = gfs2_testbit(rgd, bi->bi_bh->b_data + bi->bi_offset, | 1827 | return gfs2_testbit(&rbm); |
1714 | bi->bi_len, buf_block); | ||
1715 | |||
1716 | return type; | ||
1717 | } | 1828 | } |
1718 | 1829 | ||
1719 | /** | ||
1720 | * rgblk_search - find a block in @state | ||
1721 | * @rgd: the resource group descriptor | ||
1722 | * @goal: the goal block within the RG (start here to search for avail block) | ||
1723 | * @state: GFS2_BLKST_XXX the before-allocation state to find | ||
1724 | * @rbi: address of the pointer to the bitmap containing the block found | ||
1725 | * | ||
1726 | * Walk rgrp's bitmap to find bits that represent a block in @state. | ||
1727 | * | ||
1728 | * This function never fails, because we wouldn't call it unless we | ||
1729 | * know (from reservation results, etc.) that a block is available. | ||
1730 | * | ||
1731 | * Scope of @goal is just within rgrp, not the whole filesystem. | ||
1732 | * Scope of @returned block is just within bitmap, not the whole filesystem. | ||
1733 | * | ||
1734 | * Returns: the block number found relative to the bitmap rbi | ||
1735 | */ | ||
1736 | |||
1737 | static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, unsigned char state, | ||
1738 | struct gfs2_bitmap **rbi) | ||
1739 | { | ||
1740 | struct gfs2_bitmap *bi = NULL; | ||
1741 | const u32 length = rgd->rd_length; | ||
1742 | u32 biblk = BFITNOENT; | ||
1743 | unsigned int buf, x; | ||
1744 | const u8 *buffer = NULL; | ||
1745 | |||
1746 | *rbi = NULL; | ||
1747 | /* Find bitmap block that contains bits for goal block */ | ||
1748 | for (buf = 0; buf < length; buf++) { | ||
1749 | bi = rgd->rd_bits + buf; | ||
1750 | /* Convert scope of "goal" from rgrp-wide to within found bit block */ | ||
1751 | if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY) { | ||
1752 | goal -= bi->bi_start * GFS2_NBBY; | ||
1753 | goto do_search; | ||
1754 | } | ||
1755 | } | ||
1756 | buf = 0; | ||
1757 | goal = 0; | ||
1758 | |||
1759 | do_search: | ||
1760 | /* Search (up to entire) bitmap in this rgrp for allocatable block. | ||
1761 | "x <= length", instead of "x < length", because we typically start | ||
1762 | the search in the middle of a bit block, but if we can't find an | ||
1763 | allocatable block anywhere else, we want to be able wrap around and | ||
1764 | search in the first part of our first-searched bit block. */ | ||
1765 | for (x = 0; x <= length; x++) { | ||
1766 | bi = rgd->rd_bits + buf; | ||
1767 | |||
1768 | if (test_bit(GBF_FULL, &bi->bi_flags) && | ||
1769 | (state == GFS2_BLKST_FREE)) | ||
1770 | goto skip; | ||
1771 | |||
1772 | /* The GFS2_BLKST_UNLINKED state doesn't apply to the clone | ||
1773 | bitmaps, so we must search the originals for that. */ | ||
1774 | buffer = bi->bi_bh->b_data + bi->bi_offset; | ||
1775 | WARN_ON(!buffer_uptodate(bi->bi_bh)); | ||
1776 | if (state != GFS2_BLKST_UNLINKED && bi->bi_clone) | ||
1777 | buffer = bi->bi_clone + bi->bi_offset; | ||
1778 | |||
1779 | while (1) { | ||
1780 | struct gfs2_blkreserv *rs; | ||
1781 | u32 rgblk; | ||
1782 | |||
1783 | biblk = gfs2_bitfit(buffer, bi->bi_len, goal, state); | ||
1784 | if (biblk == BFITNOENT) | ||
1785 | break; | ||
1786 | /* Check if this block is reserved() */ | ||
1787 | rgblk = gfs2_bi2rgd_blk(bi, biblk); | ||
1788 | rs = rs_find(rgd, rgblk); | ||
1789 | if (rs == NULL) | ||
1790 | break; | ||
1791 | |||
1792 | BUG_ON(rs->rs_bi != bi); | ||
1793 | biblk = BFITNOENT; | ||
1794 | /* This should jump to the first block after the | ||
1795 | reservation. */ | ||
1796 | goal = rs->rs_biblk + rs->rs_free; | ||
1797 | if (goal >= bi->bi_len * GFS2_NBBY) | ||
1798 | break; | ||
1799 | } | ||
1800 | if (biblk != BFITNOENT) | ||
1801 | break; | ||
1802 | |||
1803 | if ((goal == 0) && (state == GFS2_BLKST_FREE)) | ||
1804 | set_bit(GBF_FULL, &bi->bi_flags); | ||
1805 | |||
1806 | /* Try next bitmap block (wrap back to rgrp header if at end) */ | ||
1807 | skip: | ||
1808 | buf++; | ||
1809 | buf %= length; | ||
1810 | goal = 0; | ||
1811 | } | ||
1812 | |||
1813 | if (biblk != BFITNOENT) | ||
1814 | *rbi = bi; | ||
1815 | |||
1816 | return biblk; | ||
1817 | } | ||
1818 | 1830 | ||
1819 | /** | 1831 | /** |
1820 | * gfs2_alloc_extent - allocate an extent from a given bitmap | 1832 | * gfs2_alloc_extent - allocate an extent from a given bitmap |
1821 | * @rgd: the resource group descriptor | 1833 | * @rbm: the resource group information |
1822 | * @bi: the bitmap within the rgrp | ||
1823 | * @blk: the block within the bitmap | ||
1824 | * @dinode: TRUE if the first block we allocate is for a dinode | 1834 | * @dinode: TRUE if the first block we allocate is for a dinode |
1825 | * @n: The extent length | 1835 | * @n: The extent length (value/result) |
1826 | * | 1836 | * |
1827 | * Add the found bitmap buffer to the transaction. | 1837 | * Add the bitmap buffer to the transaction. |
1828 | * Set the found bits to @new_state to change block's allocation state. | 1838 | * Set the found bits to @new_state to change block's allocation state. |
1829 | * Returns: starting block number of the extent (fs scope) | ||
1830 | */ | 1839 | */ |
1831 | static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, struct gfs2_bitmap *bi, | 1840 | static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode, |
1832 | u32 blk, bool dinode, unsigned int *n) | 1841 | unsigned int *n) |
1833 | { | 1842 | { |
1843 | struct gfs2_rbm pos = { .rgd = rbm->rgd, }; | ||
1834 | const unsigned int elen = *n; | 1844 | const unsigned int elen = *n; |
1835 | u32 goal, rgblk; | 1845 | u64 block; |
1836 | const u8 *buffer = NULL; | 1846 | int ret; |
1837 | struct gfs2_blkreserv *rs; | 1847 | |
1838 | 1848 | *n = 1; | |
1839 | *n = 0; | 1849 | block = gfs2_rbm_to_block(rbm); |
1840 | buffer = bi->bi_bh->b_data + bi->bi_offset; | 1850 | gfs2_trans_add_bh(rbm->rgd->rd_gl, rbm->bi->bi_bh, 1); |
1841 | gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); | 1851 | gfs2_setbit(rbm, true, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); |
1842 | gfs2_setbit(rgd, bi->bi_clone, bi, blk, | 1852 | block++; |
1843 | dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); | ||
1844 | (*n)++; | ||
1845 | goal = blk; | ||
1846 | while (*n < elen) { | 1853 | while (*n < elen) { |
1847 | goal++; | 1854 | ret = gfs2_rbm_from_block(&pos, block); |
1848 | if (goal >= (bi->bi_len * GFS2_NBBY)) | 1855 | if (ret || gfs2_testbit(&pos) != GFS2_BLKST_FREE) |
1849 | break; | ||
1850 | rgblk = gfs2_bi2rgd_blk(bi, goal); | ||
1851 | rs = rs_find(rgd, rgblk); | ||
1852 | if (rs) /* Oops, we bumped into someone's reservation */ | ||
1853 | break; | ||
1854 | if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) != | ||
1855 | GFS2_BLKST_FREE) | ||
1856 | break; | 1856 | break; |
1857 | gfs2_setbit(rgd, bi->bi_clone, bi, goal, GFS2_BLKST_USED); | 1857 | gfs2_trans_add_bh(pos.rgd->rd_gl, pos.bi->bi_bh, 1); |
1858 | gfs2_setbit(&pos, true, GFS2_BLKST_USED); | ||
1858 | (*n)++; | 1859 | (*n)++; |
1860 | block++; | ||
1859 | } | 1861 | } |
1860 | blk = gfs2_bi2rgd_blk(bi, blk); | ||
1861 | rgd->rd_last_alloc = blk + *n - 1; | ||
1862 | return rgd->rd_data0 + blk; | ||
1863 | } | 1862 | } |
1864 | 1863 | ||
1865 | /** | 1864 | /** |
@@ -1875,46 +1874,30 @@ static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, struct gfs2_bitmap *bi, | |||
1875 | static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, | 1874 | static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, |
1876 | u32 blen, unsigned char new_state) | 1875 | u32 blen, unsigned char new_state) |
1877 | { | 1876 | { |
1878 | struct gfs2_rgrpd *rgd; | 1877 | struct gfs2_rbm rbm; |
1879 | struct gfs2_bitmap *bi = NULL; | ||
1880 | u32 length, rgrp_blk, buf_blk; | ||
1881 | unsigned int buf; | ||
1882 | 1878 | ||
1883 | rgd = gfs2_blk2rgrpd(sdp, bstart, 1); | 1879 | rbm.rgd = gfs2_blk2rgrpd(sdp, bstart, 1); |
1884 | if (!rgd) { | 1880 | if (!rbm.rgd) { |
1885 | if (gfs2_consist(sdp)) | 1881 | if (gfs2_consist(sdp)) |
1886 | fs_err(sdp, "block = %llu\n", (unsigned long long)bstart); | 1882 | fs_err(sdp, "block = %llu\n", (unsigned long long)bstart); |
1887 | return NULL; | 1883 | return NULL; |
1888 | } | 1884 | } |
1889 | 1885 | ||
1890 | length = rgd->rd_length; | ||
1891 | |||
1892 | rgrp_blk = bstart - rgd->rd_data0; | ||
1893 | |||
1894 | while (blen--) { | 1886 | while (blen--) { |
1895 | for (buf = 0; buf < length; buf++) { | 1887 | gfs2_rbm_from_block(&rbm, bstart); |
1896 | bi = rgd->rd_bits + buf; | 1888 | bstart++; |
1897 | if (rgrp_blk < (bi->bi_start + bi->bi_len) * GFS2_NBBY) | 1889 | if (!rbm.bi->bi_clone) { |
1898 | break; | 1890 | rbm.bi->bi_clone = kmalloc(rbm.bi->bi_bh->b_size, |
1899 | } | 1891 | GFP_NOFS | __GFP_NOFAIL); |
1900 | 1892 | memcpy(rbm.bi->bi_clone + rbm.bi->bi_offset, | |
1901 | gfs2_assert(rgd->rd_sbd, buf < length); | 1893 | rbm.bi->bi_bh->b_data + rbm.bi->bi_offset, |
1902 | 1894 | rbm.bi->bi_len); | |
1903 | buf_blk = rgrp_blk - bi->bi_start * GFS2_NBBY; | ||
1904 | rgrp_blk++; | ||
1905 | |||
1906 | if (!bi->bi_clone) { | ||
1907 | bi->bi_clone = kmalloc(bi->bi_bh->b_size, | ||
1908 | GFP_NOFS | __GFP_NOFAIL); | ||
1909 | memcpy(bi->bi_clone + bi->bi_offset, | ||
1910 | bi->bi_bh->b_data + bi->bi_offset, | ||
1911 | bi->bi_len); | ||
1912 | } | 1895 | } |
1913 | gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); | 1896 | gfs2_trans_add_bh(rbm.rgd->rd_gl, rbm.bi->bi_bh, 1); |
1914 | gfs2_setbit(rgd, NULL, bi, buf_blk, new_state); | 1897 | gfs2_setbit(&rbm, false, new_state); |
1915 | } | 1898 | } |
1916 | 1899 | ||
1917 | return rgd; | 1900 | return rbm.rgd; |
1918 | } | 1901 | } |
1919 | 1902 | ||
1920 | /** | 1903 | /** |
@@ -1956,56 +1939,41 @@ static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd) | |||
1956 | } | 1939 | } |
1957 | 1940 | ||
1958 | /** | 1941 | /** |
1959 | * claim_reserved_blks - Claim previously reserved blocks | 1942 | * gfs2_adjust_reservation - Adjust (or remove) a reservation after allocation |
1960 | * @ip: the inode that's claiming the reservation | 1943 | * @ip: The inode we have just allocated blocks for |
1961 | * @dinode: 1 if this block is a dinode block, otherwise data block | 1944 | * @rbm: The start of the allocated blocks |
1962 | * @nblocks: desired extent length | 1945 | * @len: The extent length |
1963 | * | 1946 | * |
1964 | * Lay claim to previously allocated block reservation blocks. | 1947 | * Adjusts a reservation after an allocation has taken place. If the |
1965 | * Returns: Starting block number of the blocks claimed. | 1948 | * reservation does not match the allocation, or if it is now empty |
1966 | * Sets *nblocks to the actual extent length allocated. | 1949 | * then it is removed. |
1967 | */ | 1950 | */ |
1968 | static u64 claim_reserved_blks(struct gfs2_inode *ip, bool dinode, | 1951 | |
1969 | unsigned int *nblocks) | 1952 | static void gfs2_adjust_reservation(struct gfs2_inode *ip, |
1953 | const struct gfs2_rbm *rbm, unsigned len) | ||
1970 | { | 1954 | { |
1971 | struct gfs2_blkreserv *rs = ip->i_res; | 1955 | struct gfs2_blkreserv *rs = ip->i_res; |
1972 | struct gfs2_rgrpd *rgd = rs->rs_rgd; | 1956 | struct gfs2_rgrpd *rgd = rbm->rgd; |
1973 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 1957 | unsigned rlen; |
1974 | struct gfs2_bitmap *bi; | 1958 | u64 block; |
1975 | u64 start_block = gfs2_rs_startblk(rs); | 1959 | int ret; |
1976 | const unsigned int elen = *nblocks; | ||
1977 | |||
1978 | /*BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));*/ | ||
1979 | gfs2_assert_withdraw(sdp, rgd); | ||
1980 | /*BUG_ON(!gfs2_glock_is_locked_by_me(rgd->rd_gl));*/ | ||
1981 | bi = rs->rs_bi; | ||
1982 | gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); | ||
1983 | |||
1984 | for (*nblocks = 0; *nblocks < elen && rs->rs_free; (*nblocks)++) { | ||
1985 | /* Make sure the bitmap hasn't changed */ | ||
1986 | gfs2_setbit(rgd, bi->bi_clone, bi, rs->rs_biblk, | ||
1987 | dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); | ||
1988 | rs->rs_biblk++; | ||
1989 | rs->rs_free--; | ||
1990 | |||
1991 | BUG_ON(!rgd->rd_reserved); | ||
1992 | rgd->rd_reserved--; | ||
1993 | dinode = false; | ||
1994 | trace_gfs2_rs(ip, rs, TRACE_RS_CLAIM); | ||
1995 | } | ||
1996 | |||
1997 | if (!rs->rs_free) { | ||
1998 | struct gfs2_rgrpd *rgd = ip->i_res->rs_rgd; | ||
1999 | 1960 | ||
2000 | gfs2_rs_deltree(rs); | 1961 | spin_lock(&rgd->rd_rsspin); |
2001 | /* -nblocks because we haven't returned to do the math yet. | 1962 | if (gfs2_rs_active(rs)) { |
2002 | I'm doing the math backwards to prevent negative numbers, | 1963 | if (gfs2_rbm_eq(&rs->rs_rbm, rbm)) { |
2003 | but think of it as: | 1964 | block = gfs2_rbm_to_block(rbm); |
2004 | if (unclaimed_blocks(rgd) - *nblocks >= RGRP_RSRV_MINBLKS */ | 1965 | ret = gfs2_rbm_from_block(&rs->rs_rbm, block + len); |
2005 | if (unclaimed_blocks(rgd) >= RGRP_RSRV_MINBLKS + *nblocks) | 1966 | rlen = min(rs->rs_free, len); |
2006 | rg_mblk_search(rgd, ip); | 1967 | rs->rs_free -= rlen; |
1968 | rgd->rd_reserved -= rlen; | ||
1969 | trace_gfs2_rs(rs, TRACE_RS_CLAIM); | ||
1970 | if (rs->rs_free && !ret) | ||
1971 | goto out; | ||
1972 | } | ||
1973 | __rs_deltree(ip, rs); | ||
2007 | } | 1974 | } |
2008 | return start_block; | 1975 | out: |
1976 | spin_unlock(&rgd->rd_rsspin); | ||
2009 | } | 1977 | } |
2010 | 1978 | ||
2011 | /** | 1979 | /** |
@@ -2024,47 +1992,40 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, | |||
2024 | { | 1992 | { |
2025 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 1993 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
2026 | struct buffer_head *dibh; | 1994 | struct buffer_head *dibh; |
2027 | struct gfs2_rgrpd *rgd; | 1995 | struct gfs2_rbm rbm = { .rgd = ip->i_rgd, }; |
2028 | unsigned int ndata; | 1996 | unsigned int ndata; |
2029 | u32 goal, blk; /* block, within the rgrp scope */ | 1997 | u64 goal; |
2030 | u64 block; /* block, within the file system scope */ | 1998 | u64 block; /* block, within the file system scope */ |
2031 | int error; | 1999 | int error; |
2032 | struct gfs2_bitmap *bi; | ||
2033 | 2000 | ||
2034 | /* Only happens if there is a bug in gfs2, return something distinctive | 2001 | if (gfs2_rs_active(ip->i_res)) |
2035 | * to ensure that it is noticed. | 2002 | goal = gfs2_rbm_to_block(&ip->i_res->rs_rbm); |
2036 | */ | 2003 | else if (!dinode && rgrp_contains_block(rbm.rgd, ip->i_goal)) |
2037 | if (ip->i_res->rs_requested == 0) | 2004 | goal = ip->i_goal; |
2038 | return -ECANCELED; | 2005 | else |
2039 | 2006 | goal = rbm.rgd->rd_last_alloc + rbm.rgd->rd_data0; | |
2040 | /* Check if we have a multi-block reservation, and if so, claim the | ||
2041 | next free block from it. */ | ||
2042 | if (gfs2_rs_active(ip->i_res)) { | ||
2043 | BUG_ON(!ip->i_res->rs_free); | ||
2044 | rgd = ip->i_res->rs_rgd; | ||
2045 | block = claim_reserved_blks(ip, dinode, nblocks); | ||
2046 | } else { | ||
2047 | rgd = ip->i_rgd; | ||
2048 | 2007 | ||
2049 | if (!dinode && rgrp_contains_block(rgd, ip->i_goal)) | 2008 | gfs2_rbm_from_block(&rbm, goal); |
2050 | goal = ip->i_goal - rgd->rd_data0; | 2009 | error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, 0, ip, false); |
2051 | else | ||
2052 | goal = rgd->rd_last_alloc; | ||
2053 | |||
2054 | blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, &bi); | ||
2055 | |||
2056 | /* Since all blocks are reserved in advance, this shouldn't | ||
2057 | happen */ | ||
2058 | if (blk == BFITNOENT) { | ||
2059 | printk(KERN_WARNING "BFITNOENT, nblocks=%u\n", | ||
2060 | *nblocks); | ||
2061 | printk(KERN_WARNING "FULL=%d\n", | ||
2062 | test_bit(GBF_FULL, &rgd->rd_bits->bi_flags)); | ||
2063 | goto rgrp_error; | ||
2064 | } | ||
2065 | 2010 | ||
2066 | block = gfs2_alloc_extent(rgd, bi, blk, dinode, nblocks); | 2011 | if (error == -ENOSPC) { |
2012 | gfs2_rbm_from_block(&rbm, goal); | ||
2013 | error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, 0, NULL, false); | ||
2014 | } | ||
2015 | |||
2016 | /* Since all blocks are reserved in advance, this shouldn't happen */ | ||
2017 | if (error) { | ||
2018 | fs_warn(sdp, "inum=%llu error=%d, nblocks=%u, full=%d\n", | ||
2019 | (unsigned long long)ip->i_no_addr, error, *nblocks, | ||
2020 | test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags)); | ||
2021 | goto rgrp_error; | ||
2067 | } | 2022 | } |
2023 | |||
2024 | gfs2_alloc_extent(&rbm, dinode, nblocks); | ||
2025 | block = gfs2_rbm_to_block(&rbm); | ||
2026 | rbm.rgd->rd_last_alloc = block - rbm.rgd->rd_data0; | ||
2027 | if (gfs2_rs_active(ip->i_res)) | ||
2028 | gfs2_adjust_reservation(ip, &rbm, *nblocks); | ||
2068 | ndata = *nblocks; | 2029 | ndata = *nblocks; |
2069 | if (dinode) | 2030 | if (dinode) |
2070 | ndata--; | 2031 | ndata--; |
@@ -2081,22 +2042,22 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, | |||
2081 | brelse(dibh); | 2042 | brelse(dibh); |
2082 | } | 2043 | } |
2083 | } | 2044 | } |
2084 | if (rgd->rd_free < *nblocks) { | 2045 | if (rbm.rgd->rd_free < *nblocks) { |
2085 | printk(KERN_WARNING "nblocks=%u\n", *nblocks); | 2046 | printk(KERN_WARNING "nblocks=%u\n", *nblocks); |
2086 | goto rgrp_error; | 2047 | goto rgrp_error; |
2087 | } | 2048 | } |
2088 | 2049 | ||
2089 | rgd->rd_free -= *nblocks; | 2050 | rbm.rgd->rd_free -= *nblocks; |
2090 | if (dinode) { | 2051 | if (dinode) { |
2091 | rgd->rd_dinodes++; | 2052 | rbm.rgd->rd_dinodes++; |
2092 | *generation = rgd->rd_igeneration++; | 2053 | *generation = rbm.rgd->rd_igeneration++; |
2093 | if (*generation == 0) | 2054 | if (*generation == 0) |
2094 | *generation = rgd->rd_igeneration++; | 2055 | *generation = rbm.rgd->rd_igeneration++; |
2095 | } | 2056 | } |
2096 | 2057 | ||
2097 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); | 2058 | gfs2_trans_add_bh(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh, 1); |
2098 | gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); | 2059 | gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data); |
2099 | gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); | 2060 | gfs2_rgrp_ondisk2lvb(rbm.rgd->rd_rgl, rbm.rgd->rd_bits[0].bi_bh->b_data); |
2100 | 2061 | ||
2101 | gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0); | 2062 | gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0); |
2102 | if (dinode) | 2063 | if (dinode) |
@@ -2110,14 +2071,14 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, | |||
2110 | gfs2_quota_change(ip, ndata, ip->i_inode.i_uid, | 2071 | gfs2_quota_change(ip, ndata, ip->i_inode.i_uid, |
2111 | ip->i_inode.i_gid); | 2072 | ip->i_inode.i_gid); |
2112 | 2073 | ||
2113 | rgd->rd_free_clone -= *nblocks; | 2074 | rbm.rgd->rd_free_clone -= *nblocks; |
2114 | trace_gfs2_block_alloc(ip, rgd, block, *nblocks, | 2075 | trace_gfs2_block_alloc(ip, rbm.rgd, block, *nblocks, |
2115 | dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); | 2076 | dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); |
2116 | *bn = block; | 2077 | *bn = block; |
2117 | return 0; | 2078 | return 0; |
2118 | 2079 | ||
2119 | rgrp_error: | 2080 | rgrp_error: |
2120 | gfs2_rgrp_error(rgd); | 2081 | gfs2_rgrp_error(rbm.rgd); |
2121 | return -EIO; | 2082 | return -EIO; |
2122 | } | 2083 | } |
2123 | 2084 | ||
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index ca6e26729b86..24077958dcf6 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h | |||
@@ -46,7 +46,7 @@ extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n, | |||
46 | bool dinode, u64 *generation); | 46 | bool dinode, u64 *generation); |
47 | 47 | ||
48 | extern int gfs2_rs_alloc(struct gfs2_inode *ip); | 48 | extern int gfs2_rs_alloc(struct gfs2_inode *ip); |
49 | extern void gfs2_rs_deltree(struct gfs2_blkreserv *rs); | 49 | extern void gfs2_rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs); |
50 | extern void gfs2_rs_delete(struct gfs2_inode *ip); | 50 | extern void gfs2_rs_delete(struct gfs2_inode *ip); |
51 | extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta); | 51 | extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta); |
52 | extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); | 52 | extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); |
@@ -73,30 +73,10 @@ extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, | |||
73 | const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed); | 73 | const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed); |
74 | extern int gfs2_fitrim(struct file *filp, void __user *argp); | 74 | extern int gfs2_fitrim(struct file *filp, void __user *argp); |
75 | 75 | ||
76 | /* This is how to tell if a multi-block reservation is "inplace" reserved: */ | 76 | /* This is how to tell if a reservation is in the rgrp tree: */ |
77 | static inline int gfs2_mb_reserved(struct gfs2_inode *ip) | 77 | static inline bool gfs2_rs_active(struct gfs2_blkreserv *rs) |
78 | { | 78 | { |
79 | if (ip->i_res && ip->i_res->rs_requested) | 79 | return rs && !RB_EMPTY_NODE(&rs->rs_node); |
80 | return 1; | ||
81 | return 0; | ||
82 | } | ||
83 | |||
84 | /* This is how to tell if a multi-block reservation is in the rgrp tree: */ | ||
85 | static inline int gfs2_rs_active(struct gfs2_blkreserv *rs) | ||
86 | { | ||
87 | if (rs && rs->rs_bi) | ||
88 | return 1; | ||
89 | return 0; | ||
90 | } | ||
91 | |||
92 | static inline u32 gfs2_bi2rgd_blk(const struct gfs2_bitmap *bi, u32 blk) | ||
93 | { | ||
94 | return (bi->bi_start * GFS2_NBBY) + blk; | ||
95 | } | ||
96 | |||
97 | static inline u64 gfs2_rs_startblk(const struct gfs2_blkreserv *rs) | ||
98 | { | ||
99 | return gfs2_bi2rgd_blk(rs->rs_bi, rs->rs_biblk) + rs->rs_rgd->rd_data0; | ||
100 | } | 80 | } |
101 | 81 | ||
102 | #endif /* __RGRP_DOT_H__ */ | 82 | #endif /* __RGRP_DOT_H__ */ |
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index fc3168f47a14..bc737261f234 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c | |||
@@ -1366,6 +1366,8 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root) | |||
1366 | val = sdp->sd_tune.gt_statfs_quantum; | 1366 | val = sdp->sd_tune.gt_statfs_quantum; |
1367 | if (val != 30) | 1367 | if (val != 30) |
1368 | seq_printf(s, ",statfs_quantum=%d", val); | 1368 | seq_printf(s, ",statfs_quantum=%d", val); |
1369 | else if (sdp->sd_tune.gt_statfs_slow) | ||
1370 | seq_puts(s, ",statfs_quantum=0"); | ||
1369 | val = sdp->sd_tune.gt_quota_quantum; | 1371 | val = sdp->sd_tune.gt_quota_quantum; |
1370 | if (val != 60) | 1372 | if (val != 60) |
1371 | seq_printf(s, ",quota_quantum=%d", val); | 1373 | seq_printf(s, ",quota_quantum=%d", val); |
@@ -1543,6 +1545,11 @@ static void gfs2_evict_inode(struct inode *inode) | |||
1543 | 1545 | ||
1544 | out_truncate: | 1546 | out_truncate: |
1545 | gfs2_log_flush(sdp, ip->i_gl); | 1547 | gfs2_log_flush(sdp, ip->i_gl); |
1548 | if (test_bit(GLF_DIRTY, &ip->i_gl->gl_flags)) { | ||
1549 | struct address_space *metamapping = gfs2_glock2aspace(ip->i_gl); | ||
1550 | filemap_fdatawrite(metamapping); | ||
1551 | filemap_fdatawait(metamapping); | ||
1552 | } | ||
1546 | write_inode_now(inode, 1); | 1553 | write_inode_now(inode, 1); |
1547 | gfs2_ail_flush(ip->i_gl, 0); | 1554 | gfs2_ail_flush(ip->i_gl, 0); |
1548 | 1555 | ||
@@ -1557,7 +1564,7 @@ out_truncate: | |||
1557 | out_unlock: | 1564 | out_unlock: |
1558 | /* Error path for case 1 */ | 1565 | /* Error path for case 1 */ |
1559 | if (gfs2_rs_active(ip->i_res)) | 1566 | if (gfs2_rs_active(ip->i_res)) |
1560 | gfs2_rs_deltree(ip->i_res); | 1567 | gfs2_rs_deltree(ip, ip->i_res); |
1561 | 1568 | ||
1562 | if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) | 1569 | if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) |
1563 | gfs2_glock_dq(&ip->i_iopen_gh); | 1570 | gfs2_glock_dq(&ip->i_iopen_gh); |
@@ -1572,7 +1579,7 @@ out: | |||
1572 | clear_inode(inode); | 1579 | clear_inode(inode); |
1573 | gfs2_dir_hash_inval(ip); | 1580 | gfs2_dir_hash_inval(ip); |
1574 | ip->i_gl->gl_object = NULL; | 1581 | ip->i_gl->gl_object = NULL; |
1575 | flush_delayed_work_sync(&ip->i_gl->gl_work); | 1582 | flush_delayed_work(&ip->i_gl->gl_work); |
1576 | gfs2_glock_add_to_lru(ip->i_gl); | 1583 | gfs2_glock_add_to_lru(ip->i_gl); |
1577 | gfs2_glock_put(ip->i_gl); | 1584 | gfs2_glock_put(ip->i_gl); |
1578 | ip->i_gl = NULL; | 1585 | ip->i_gl = NULL; |
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h index a25c252fe412..bbdc78af60ca 100644 --- a/fs/gfs2/trace_gfs2.h +++ b/fs/gfs2/trace_gfs2.h | |||
@@ -509,10 +509,9 @@ TRACE_EVENT(gfs2_block_alloc, | |||
509 | /* Keep track of multi-block reservations as they are allocated/freed */ | 509 | /* Keep track of multi-block reservations as they are allocated/freed */ |
510 | TRACE_EVENT(gfs2_rs, | 510 | TRACE_EVENT(gfs2_rs, |
511 | 511 | ||
512 | TP_PROTO(const struct gfs2_inode *ip, const struct gfs2_blkreserv *rs, | 512 | TP_PROTO(const struct gfs2_blkreserv *rs, u8 func), |
513 | u8 func), | ||
514 | 513 | ||
515 | TP_ARGS(ip, rs, func), | 514 | TP_ARGS(rs, func), |
516 | 515 | ||
517 | TP_STRUCT__entry( | 516 | TP_STRUCT__entry( |
518 | __field( dev_t, dev ) | 517 | __field( dev_t, dev ) |
@@ -526,18 +525,17 @@ TRACE_EVENT(gfs2_rs, | |||
526 | ), | 525 | ), |
527 | 526 | ||
528 | TP_fast_assign( | 527 | TP_fast_assign( |
529 | __entry->dev = rs->rs_rgd ? rs->rs_rgd->rd_sbd->sd_vfs->s_dev : 0; | 528 | __entry->dev = rs->rs_rbm.rgd->rd_sbd->sd_vfs->s_dev; |
530 | __entry->rd_addr = rs->rs_rgd ? rs->rs_rgd->rd_addr : 0; | 529 | __entry->rd_addr = rs->rs_rbm.rgd->rd_addr; |
531 | __entry->rd_free_clone = rs->rs_rgd ? rs->rs_rgd->rd_free_clone : 0; | 530 | __entry->rd_free_clone = rs->rs_rbm.rgd->rd_free_clone; |
532 | __entry->rd_reserved = rs->rs_rgd ? rs->rs_rgd->rd_reserved : 0; | 531 | __entry->rd_reserved = rs->rs_rbm.rgd->rd_reserved; |
533 | __entry->inum = ip ? ip->i_no_addr : 0; | 532 | __entry->inum = rs->rs_inum; |
534 | __entry->start = gfs2_rs_startblk(rs); | 533 | __entry->start = gfs2_rbm_to_block(&rs->rs_rbm); |
535 | __entry->free = rs->rs_free; | 534 | __entry->free = rs->rs_free; |
536 | __entry->func = func; | 535 | __entry->func = func; |
537 | ), | 536 | ), |
538 | 537 | ||
539 | TP_printk("%u,%u bmap %llu resrv %llu rg:%llu rf:%lu rr:%lu %s " | 538 | TP_printk("%u,%u bmap %llu resrv %llu rg:%llu rf:%lu rr:%lu %s f:%lu", |
540 | "f:%lu", | ||
541 | MAJOR(__entry->dev), MINOR(__entry->dev), | 539 | MAJOR(__entry->dev), MINOR(__entry->dev), |
542 | (unsigned long long)__entry->inum, | 540 | (unsigned long long)__entry->inum, |
543 | (unsigned long long)__entry->start, | 541 | (unsigned long long)__entry->start, |
diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h index 41f42cdccbb8..bf2ae9aeee7a 100644 --- a/fs/gfs2/trans.h +++ b/fs/gfs2/trans.h | |||
@@ -28,11 +28,10 @@ struct gfs2_glock; | |||
28 | 28 | ||
29 | /* reserve either the number of blocks to be allocated plus the rg header | 29 | /* reserve either the number of blocks to be allocated plus the rg header |
30 | * block, or all of the blocks in the rg, whichever is smaller */ | 30 | * block, or all of the blocks in the rg, whichever is smaller */ |
31 | static inline unsigned int gfs2_rg_blocks(const struct gfs2_inode *ip) | 31 | static inline unsigned int gfs2_rg_blocks(const struct gfs2_inode *ip, unsigned requested) |
32 | { | 32 | { |
33 | const struct gfs2_blkreserv *rs = ip->i_res; | 33 | if (requested < ip->i_rgd->rd_length) |
34 | if (rs && rs->rs_requested < ip->i_rgd->rd_length) | 34 | return requested + 1; |
35 | return rs->rs_requested + 1; | ||
36 | return ip->i_rgd->rd_length; | 35 | return ip->i_rgd->rd_length; |
37 | } | 36 | } |
38 | 37 | ||
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index 27a0b4a901f5..db330e5518cd 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c | |||
@@ -448,17 +448,18 @@ ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size) | |||
448 | } | 448 | } |
449 | 449 | ||
450 | /** | 450 | /** |
451 | * ea_get_unstuffed - actually copies the unstuffed data into the | 451 | * ea_iter_unstuffed - copies the unstuffed xattr data to/from the |
452 | * request buffer | 452 | * request buffer |
453 | * @ip: The GFS2 inode | 453 | * @ip: The GFS2 inode |
454 | * @ea: The extended attribute header structure | 454 | * @ea: The extended attribute header structure |
455 | * @data: The data to be copied | 455 | * @din: The data to be copied in |
456 | * @dout: The data to be copied out (one of din,dout will be NULL) | ||
456 | * | 457 | * |
457 | * Returns: errno | 458 | * Returns: errno |
458 | */ | 459 | */ |
459 | 460 | ||
460 | static int ea_get_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea, | 461 | static int gfs2_iter_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea, |
461 | char *data) | 462 | const char *din, char *dout) |
462 | { | 463 | { |
463 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 464 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
464 | struct buffer_head **bh; | 465 | struct buffer_head **bh; |
@@ -467,6 +468,8 @@ static int ea_get_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea, | |||
467 | __be64 *dataptrs = GFS2_EA2DATAPTRS(ea); | 468 | __be64 *dataptrs = GFS2_EA2DATAPTRS(ea); |
468 | unsigned int x; | 469 | unsigned int x; |
469 | int error = 0; | 470 | int error = 0; |
471 | unsigned char *pos; | ||
472 | unsigned cp_size; | ||
470 | 473 | ||
471 | bh = kcalloc(nptrs, sizeof(struct buffer_head *), GFP_NOFS); | 474 | bh = kcalloc(nptrs, sizeof(struct buffer_head *), GFP_NOFS); |
472 | if (!bh) | 475 | if (!bh) |
@@ -497,12 +500,21 @@ static int ea_get_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea, | |||
497 | goto out; | 500 | goto out; |
498 | } | 501 | } |
499 | 502 | ||
500 | memcpy(data, bh[x]->b_data + sizeof(struct gfs2_meta_header), | 503 | pos = bh[x]->b_data + sizeof(struct gfs2_meta_header); |
501 | (sdp->sd_jbsize > amount) ? amount : sdp->sd_jbsize); | 504 | cp_size = (sdp->sd_jbsize > amount) ? amount : sdp->sd_jbsize; |
502 | 505 | ||
503 | amount -= sdp->sd_jbsize; | 506 | if (dout) { |
504 | data += sdp->sd_jbsize; | 507 | memcpy(dout, pos, cp_size); |
508 | dout += sdp->sd_jbsize; | ||
509 | } | ||
510 | |||
511 | if (din) { | ||
512 | gfs2_trans_add_bh(ip->i_gl, bh[x], 1); | ||
513 | memcpy(pos, din, cp_size); | ||
514 | din += sdp->sd_jbsize; | ||
515 | } | ||
505 | 516 | ||
517 | amount -= sdp->sd_jbsize; | ||
506 | brelse(bh[x]); | 518 | brelse(bh[x]); |
507 | } | 519 | } |
508 | 520 | ||
@@ -523,7 +535,7 @@ static int gfs2_ea_get_copy(struct gfs2_inode *ip, struct gfs2_ea_location *el, | |||
523 | memcpy(data, GFS2_EA2DATA(el->el_ea), len); | 535 | memcpy(data, GFS2_EA2DATA(el->el_ea), len); |
524 | return len; | 536 | return len; |
525 | } | 537 | } |
526 | ret = ea_get_unstuffed(ip, el->el_ea, data); | 538 | ret = gfs2_iter_unstuffed(ip, el->el_ea, NULL, data); |
527 | if (ret < 0) | 539 | if (ret < 0) |
528 | return ret; | 540 | return ret; |
529 | return len; | 541 | return len; |
@@ -727,7 +739,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er, | |||
727 | goto out_gunlock_q; | 739 | goto out_gunlock_q; |
728 | 740 | ||
729 | error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), | 741 | error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), |
730 | blks + gfs2_rg_blocks(ip) + | 742 | blks + gfs2_rg_blocks(ip, blks) + |
731 | RES_DINODE + RES_STATFS + RES_QUOTA, 0); | 743 | RES_DINODE + RES_STATFS + RES_QUOTA, 0); |
732 | if (error) | 744 | if (error) |
733 | goto out_ipres; | 745 | goto out_ipres; |
@@ -1220,69 +1232,23 @@ static int gfs2_xattr_set(struct dentry *dentry, const char *name, | |||
1220 | size, flags, type); | 1232 | size, flags, type); |
1221 | } | 1233 | } |
1222 | 1234 | ||
1235 | |||
1223 | static int ea_acl_chmod_unstuffed(struct gfs2_inode *ip, | 1236 | static int ea_acl_chmod_unstuffed(struct gfs2_inode *ip, |
1224 | struct gfs2_ea_header *ea, char *data) | 1237 | struct gfs2_ea_header *ea, char *data) |
1225 | { | 1238 | { |
1226 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 1239 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
1227 | struct buffer_head **bh; | ||
1228 | unsigned int amount = GFS2_EA_DATA_LEN(ea); | 1240 | unsigned int amount = GFS2_EA_DATA_LEN(ea); |
1229 | unsigned int nptrs = DIV_ROUND_UP(amount, sdp->sd_jbsize); | 1241 | unsigned int nptrs = DIV_ROUND_UP(amount, sdp->sd_jbsize); |
1230 | __be64 *dataptrs = GFS2_EA2DATAPTRS(ea); | 1242 | int ret; |
1231 | unsigned int x; | ||
1232 | int error; | ||
1233 | |||
1234 | bh = kcalloc(nptrs, sizeof(struct buffer_head *), GFP_NOFS); | ||
1235 | if (!bh) | ||
1236 | return -ENOMEM; | ||
1237 | |||
1238 | error = gfs2_trans_begin(sdp, nptrs + RES_DINODE, 0); | ||
1239 | if (error) | ||
1240 | goto out; | ||
1241 | |||
1242 | for (x = 0; x < nptrs; x++) { | ||
1243 | error = gfs2_meta_read(ip->i_gl, be64_to_cpu(*dataptrs), 0, | ||
1244 | bh + x); | ||
1245 | if (error) { | ||
1246 | while (x--) | ||
1247 | brelse(bh[x]); | ||
1248 | goto fail; | ||
1249 | } | ||
1250 | dataptrs++; | ||
1251 | } | ||
1252 | |||
1253 | for (x = 0; x < nptrs; x++) { | ||
1254 | error = gfs2_meta_wait(sdp, bh[x]); | ||
1255 | if (error) { | ||
1256 | for (; x < nptrs; x++) | ||
1257 | brelse(bh[x]); | ||
1258 | goto fail; | ||
1259 | } | ||
1260 | if (gfs2_metatype_check(sdp, bh[x], GFS2_METATYPE_ED)) { | ||
1261 | for (; x < nptrs; x++) | ||
1262 | brelse(bh[x]); | ||
1263 | error = -EIO; | ||
1264 | goto fail; | ||
1265 | } | ||
1266 | |||
1267 | gfs2_trans_add_bh(ip->i_gl, bh[x], 1); | ||
1268 | |||
1269 | memcpy(bh[x]->b_data + sizeof(struct gfs2_meta_header), data, | ||
1270 | (sdp->sd_jbsize > amount) ? amount : sdp->sd_jbsize); | ||
1271 | |||
1272 | amount -= sdp->sd_jbsize; | ||
1273 | data += sdp->sd_jbsize; | ||
1274 | |||
1275 | brelse(bh[x]); | ||
1276 | } | ||
1277 | 1243 | ||
1278 | out: | 1244 | ret = gfs2_trans_begin(sdp, nptrs + RES_DINODE, 0); |
1279 | kfree(bh); | 1245 | if (ret) |
1280 | return error; | 1246 | return ret; |
1281 | 1247 | ||
1282 | fail: | 1248 | ret = gfs2_iter_unstuffed(ip, ea, data, NULL); |
1283 | gfs2_trans_end(sdp); | 1249 | gfs2_trans_end(sdp); |
1284 | kfree(bh); | 1250 | |
1285 | return error; | 1251 | return ret; |
1286 | } | 1252 | } |
1287 | 1253 | ||
1288 | int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data) | 1254 | int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data) |
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h index 8275175acf6e..693df9fe52b2 100644 --- a/fs/hfs/hfs_fs.h +++ b/fs/hfs/hfs_fs.h | |||
@@ -134,8 +134,8 @@ struct hfs_sb_info { | |||
134 | permissions on all files */ | 134 | permissions on all files */ |
135 | umode_t s_dir_umask; /* The umask applied to the | 135 | umode_t s_dir_umask; /* The umask applied to the |
136 | permissions on all dirs */ | 136 | permissions on all dirs */ |
137 | uid_t s_uid; /* The uid of all files */ | 137 | kuid_t s_uid; /* The uid of all files */ |
138 | gid_t s_gid; /* The gid of all files */ | 138 | kgid_t s_gid; /* The gid of all files */ |
139 | 139 | ||
140 | int session, part; | 140 | int session, part; |
141 | struct nls_table *nls_io, *nls_disk; | 141 | struct nls_table *nls_io, *nls_disk; |
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index ee1bc55677f1..0b35903219bc 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c | |||
@@ -594,9 +594,9 @@ int hfs_inode_setattr(struct dentry *dentry, struct iattr * attr) | |||
594 | 594 | ||
595 | /* no uig/gid changes and limit which mode bits can be set */ | 595 | /* no uig/gid changes and limit which mode bits can be set */ |
596 | if (((attr->ia_valid & ATTR_UID) && | 596 | if (((attr->ia_valid & ATTR_UID) && |
597 | (attr->ia_uid != hsb->s_uid)) || | 597 | (!uid_eq(attr->ia_uid, hsb->s_uid))) || |
598 | ((attr->ia_valid & ATTR_GID) && | 598 | ((attr->ia_valid & ATTR_GID) && |
599 | (attr->ia_gid != hsb->s_gid)) || | 599 | (!gid_eq(attr->ia_gid, hsb->s_gid))) || |
600 | ((attr->ia_valid & ATTR_MODE) && | 600 | ((attr->ia_valid & ATTR_MODE) && |
601 | ((S_ISDIR(inode->i_mode) && | 601 | ((S_ISDIR(inode->i_mode) && |
602 | (attr->ia_mode != inode->i_mode)) || | 602 | (attr->ia_mode != inode->i_mode)) || |
@@ -644,7 +644,7 @@ static int hfs_file_fsync(struct file *filp, loff_t start, loff_t end, | |||
644 | 644 | ||
645 | /* sync the superblock to buffers */ | 645 | /* sync the superblock to buffers */ |
646 | sb = inode->i_sb; | 646 | sb = inode->i_sb; |
647 | flush_delayed_work_sync(&HFS_SB(sb)->mdb_work); | 647 | flush_delayed_work(&HFS_SB(sb)->mdb_work); |
648 | /* .. finally sync the buffers to disk */ | 648 | /* .. finally sync the buffers to disk */ |
649 | err = sync_blockdev(sb->s_bdev); | 649 | err = sync_blockdev(sb->s_bdev); |
650 | if (!ret) | 650 | if (!ret) |
diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 4eb873e0c07b..e93ddaadfd1e 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c | |||
@@ -138,7 +138,9 @@ static int hfs_show_options(struct seq_file *seq, struct dentry *root) | |||
138 | seq_printf(seq, ",creator=%.4s", (char *)&sbi->s_creator); | 138 | seq_printf(seq, ",creator=%.4s", (char *)&sbi->s_creator); |
139 | if (sbi->s_type != cpu_to_be32(0x3f3f3f3f)) | 139 | if (sbi->s_type != cpu_to_be32(0x3f3f3f3f)) |
140 | seq_printf(seq, ",type=%.4s", (char *)&sbi->s_type); | 140 | seq_printf(seq, ",type=%.4s", (char *)&sbi->s_type); |
141 | seq_printf(seq, ",uid=%u,gid=%u", sbi->s_uid, sbi->s_gid); | 141 | seq_printf(seq, ",uid=%u,gid=%u", |
142 | from_kuid_munged(&init_user_ns, sbi->s_uid), | ||
143 | from_kgid_munged(&init_user_ns, sbi->s_gid)); | ||
142 | if (sbi->s_file_umask != 0133) | 144 | if (sbi->s_file_umask != 0133) |
143 | seq_printf(seq, ",file_umask=%o", sbi->s_file_umask); | 145 | seq_printf(seq, ",file_umask=%o", sbi->s_file_umask); |
144 | if (sbi->s_dir_umask != 0022) | 146 | if (sbi->s_dir_umask != 0022) |
@@ -254,14 +256,22 @@ static int parse_options(char *options, struct hfs_sb_info *hsb) | |||
254 | printk(KERN_ERR "hfs: uid requires an argument\n"); | 256 | printk(KERN_ERR "hfs: uid requires an argument\n"); |
255 | return 0; | 257 | return 0; |
256 | } | 258 | } |
257 | hsb->s_uid = (uid_t)tmp; | 259 | hsb->s_uid = make_kuid(current_user_ns(), (uid_t)tmp); |
260 | if (!uid_valid(hsb->s_uid)) { | ||
261 | printk(KERN_ERR "hfs: invalid uid %d\n", tmp); | ||
262 | return 0; | ||
263 | } | ||
258 | break; | 264 | break; |
259 | case opt_gid: | 265 | case opt_gid: |
260 | if (match_int(&args[0], &tmp)) { | 266 | if (match_int(&args[0], &tmp)) { |
261 | printk(KERN_ERR "hfs: gid requires an argument\n"); | 267 | printk(KERN_ERR "hfs: gid requires an argument\n"); |
262 | return 0; | 268 | return 0; |
263 | } | 269 | } |
264 | hsb->s_gid = (gid_t)tmp; | 270 | hsb->s_gid = make_kgid(current_user_ns(), (gid_t)tmp); |
271 | if (!gid_valid(hsb->s_gid)) { | ||
272 | printk(KERN_ERR "hfs: invalid gid %d\n", tmp); | ||
273 | return 0; | ||
274 | } | ||
265 | break; | 275 | break; |
266 | case opt_umask: | 276 | case opt_umask: |
267 | if (match_octal(&args[0], &tmp)) { | 277 | if (match_octal(&args[0], &tmp)) { |
@@ -482,6 +492,12 @@ static int __init init_hfs_fs(void) | |||
482 | static void __exit exit_hfs_fs(void) | 492 | static void __exit exit_hfs_fs(void) |
483 | { | 493 | { |
484 | unregister_filesystem(&hfs_fs_type); | 494 | unregister_filesystem(&hfs_fs_type); |
495 | |||
496 | /* | ||
497 | * Make sure all delayed rcu free inodes are flushed before we | ||
498 | * destroy cache. | ||
499 | */ | ||
500 | rcu_barrier(); | ||
485 | kmem_cache_destroy(hfs_inode_cachep); | 501 | kmem_cache_destroy(hfs_inode_cachep); |
486 | } | 502 | } |
487 | 503 | ||
diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c index ec2a9c23f0c9..798d9c4c5e71 100644 --- a/fs/hfsplus/catalog.c +++ b/fs/hfsplus/catalog.c | |||
@@ -80,8 +80,8 @@ void hfsplus_cat_set_perms(struct inode *inode, struct hfsplus_perm *perms) | |||
80 | 80 | ||
81 | perms->userflags = HFSPLUS_I(inode)->userflags; | 81 | perms->userflags = HFSPLUS_I(inode)->userflags; |
82 | perms->mode = cpu_to_be16(inode->i_mode); | 82 | perms->mode = cpu_to_be16(inode->i_mode); |
83 | perms->owner = cpu_to_be32(inode->i_uid); | 83 | perms->owner = cpu_to_be32(i_uid_read(inode)); |
84 | perms->group = cpu_to_be32(inode->i_gid); | 84 | perms->group = cpu_to_be32(i_gid_read(inode)); |
85 | 85 | ||
86 | if (S_ISREG(inode->i_mode)) | 86 | if (S_ISREG(inode->i_mode)) |
87 | perms->dev = cpu_to_be32(inode->i_nlink); | 87 | perms->dev = cpu_to_be32(inode->i_nlink); |
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index 558dbb463a4e..c571de224b15 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h | |||
@@ -149,8 +149,8 @@ struct hfsplus_sb_info { | |||
149 | u32 type; | 149 | u32 type; |
150 | 150 | ||
151 | umode_t umask; | 151 | umode_t umask; |
152 | uid_t uid; | 152 | kuid_t uid; |
153 | gid_t gid; | 153 | kgid_t gid; |
154 | 154 | ||
155 | int part, session; | 155 | int part, session; |
156 | unsigned long flags; | 156 | unsigned long flags; |
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 3d8b4a675ba0..2172aa5976f5 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c | |||
@@ -233,12 +233,12 @@ static void hfsplus_get_perms(struct inode *inode, | |||
233 | 233 | ||
234 | mode = be16_to_cpu(perms->mode); | 234 | mode = be16_to_cpu(perms->mode); |
235 | 235 | ||
236 | inode->i_uid = be32_to_cpu(perms->owner); | 236 | i_uid_write(inode, be32_to_cpu(perms->owner)); |
237 | if (!inode->i_uid && !mode) | 237 | if (!i_uid_read(inode) && !mode) |
238 | inode->i_uid = sbi->uid; | 238 | inode->i_uid = sbi->uid; |
239 | 239 | ||
240 | inode->i_gid = be32_to_cpu(perms->group); | 240 | i_gid_write(inode, be32_to_cpu(perms->group)); |
241 | if (!inode->i_gid && !mode) | 241 | if (!i_gid_read(inode) && !mode) |
242 | inode->i_gid = sbi->gid; | 242 | inode->i_gid = sbi->gid; |
243 | 243 | ||
244 | if (dir) { | 244 | if (dir) { |
diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c index 06fa5618600c..ed257c671615 100644 --- a/fs/hfsplus/options.c +++ b/fs/hfsplus/options.c | |||
@@ -135,14 +135,22 @@ int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi) | |||
135 | printk(KERN_ERR "hfs: uid requires an argument\n"); | 135 | printk(KERN_ERR "hfs: uid requires an argument\n"); |
136 | return 0; | 136 | return 0; |
137 | } | 137 | } |
138 | sbi->uid = (uid_t)tmp; | 138 | sbi->uid = make_kuid(current_user_ns(), (uid_t)tmp); |
139 | if (!uid_valid(sbi->uid)) { | ||
140 | printk(KERN_ERR "hfs: invalid uid specified\n"); | ||
141 | return 0; | ||
142 | } | ||
139 | break; | 143 | break; |
140 | case opt_gid: | 144 | case opt_gid: |
141 | if (match_int(&args[0], &tmp)) { | 145 | if (match_int(&args[0], &tmp)) { |
142 | printk(KERN_ERR "hfs: gid requires an argument\n"); | 146 | printk(KERN_ERR "hfs: gid requires an argument\n"); |
143 | return 0; | 147 | return 0; |
144 | } | 148 | } |
145 | sbi->gid = (gid_t)tmp; | 149 | sbi->gid = make_kgid(current_user_ns(), (gid_t)tmp); |
150 | if (!gid_valid(sbi->gid)) { | ||
151 | printk(KERN_ERR "hfs: invalid gid specified\n"); | ||
152 | return 0; | ||
153 | } | ||
146 | break; | 154 | break; |
147 | case opt_part: | 155 | case opt_part: |
148 | if (match_int(&args[0], &sbi->part)) { | 156 | if (match_int(&args[0], &sbi->part)) { |
@@ -215,7 +223,8 @@ int hfsplus_show_options(struct seq_file *seq, struct dentry *root) | |||
215 | if (sbi->type != HFSPLUS_DEF_CR_TYPE) | 223 | if (sbi->type != HFSPLUS_DEF_CR_TYPE) |
216 | seq_printf(seq, ",type=%.4s", (char *)&sbi->type); | 224 | seq_printf(seq, ",type=%.4s", (char *)&sbi->type); |
217 | seq_printf(seq, ",umask=%o,uid=%u,gid=%u", sbi->umask, | 225 | seq_printf(seq, ",umask=%o,uid=%u,gid=%u", sbi->umask, |
218 | sbi->uid, sbi->gid); | 226 | from_kuid_munged(&init_user_ns, sbi->uid), |
227 | from_kgid_munged(&init_user_ns, sbi->gid)); | ||
219 | if (sbi->part >= 0) | 228 | if (sbi->part >= 0) |
220 | seq_printf(seq, ",part=%u", sbi->part); | 229 | seq_printf(seq, ",part=%u", sbi->part); |
221 | if (sbi->session >= 0) | 230 | if (sbi->session >= 0) |
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index fdafb2d71654..811a84d2d964 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c | |||
@@ -635,6 +635,12 @@ static int __init init_hfsplus_fs(void) | |||
635 | static void __exit exit_hfsplus_fs(void) | 635 | static void __exit exit_hfsplus_fs(void) |
636 | { | 636 | { |
637 | unregister_filesystem(&hfsplus_fs_type); | 637 | unregister_filesystem(&hfsplus_fs_type); |
638 | |||
639 | /* | ||
640 | * Make sure all delayed rcu free inodes are flushed before we | ||
641 | * destroy cache. | ||
642 | */ | ||
643 | rcu_barrier(); | ||
638 | kmem_cache_destroy(hfsplus_inode_cachep); | 644 | kmem_cache_destroy(hfsplus_inode_cachep); |
639 | } | 645 | } |
640 | 646 | ||
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 124146543aa7..6c9f3a9d5e21 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c | |||
@@ -542,8 +542,8 @@ static int read_name(struct inode *ino, char *name) | |||
542 | ino->i_ino = st.ino; | 542 | ino->i_ino = st.ino; |
543 | ino->i_mode = st.mode; | 543 | ino->i_mode = st.mode; |
544 | set_nlink(ino, st.nlink); | 544 | set_nlink(ino, st.nlink); |
545 | ino->i_uid = st.uid; | 545 | i_uid_write(ino, st.uid); |
546 | ino->i_gid = st.gid; | 546 | i_gid_write(ino, st.gid); |
547 | ino->i_atime = st.atime; | 547 | ino->i_atime = st.atime; |
548 | ino->i_mtime = st.mtime; | 548 | ino->i_mtime = st.mtime; |
549 | ino->i_ctime = st.ctime; | 549 | ino->i_ctime = st.ctime; |
@@ -808,11 +808,11 @@ int hostfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
808 | } | 808 | } |
809 | if (attr->ia_valid & ATTR_UID) { | 809 | if (attr->ia_valid & ATTR_UID) { |
810 | attrs.ia_valid |= HOSTFS_ATTR_UID; | 810 | attrs.ia_valid |= HOSTFS_ATTR_UID; |
811 | attrs.ia_uid = attr->ia_uid; | 811 | attrs.ia_uid = from_kuid(&init_user_ns, attr->ia_uid); |
812 | } | 812 | } |
813 | if (attr->ia_valid & ATTR_GID) { | 813 | if (attr->ia_valid & ATTR_GID) { |
814 | attrs.ia_valid |= HOSTFS_ATTR_GID; | 814 | attrs.ia_valid |= HOSTFS_ATTR_GID; |
815 | attrs.ia_gid = attr->ia_gid; | 815 | attrs.ia_gid = from_kgid(&init_user_ns, attr->ia_gid); |
816 | } | 816 | } |
817 | if (attr->ia_valid & ATTR_SIZE) { | 817 | if (attr->ia_valid & ATTR_SIZE) { |
818 | attrs.ia_valid |= HOSTFS_ATTR_SIZE; | 818 | attrs.ia_valid |= HOSTFS_ATTR_SIZE; |
diff --git a/fs/hpfs/anode.c b/fs/hpfs/anode.c index 4bae4a4a60b1..2d5b254ad9e2 100644 --- a/fs/hpfs/anode.c +++ b/fs/hpfs/anode.c | |||
@@ -102,7 +102,7 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi | |||
102 | return -1; | 102 | return -1; |
103 | } | 103 | } |
104 | if (hpfs_alloc_if_possible(s, se = le32_to_cpu(btree->u.external[n].disk_secno) + le32_to_cpu(btree->u.external[n].length))) { | 104 | if (hpfs_alloc_if_possible(s, se = le32_to_cpu(btree->u.external[n].disk_secno) + le32_to_cpu(btree->u.external[n].length))) { |
105 | btree->u.external[n].length = cpu_to_le32(le32_to_cpu(btree->u.external[n].length) + 1); | 105 | le32_add_cpu(&btree->u.external[n].length, 1); |
106 | mark_buffer_dirty(bh); | 106 | mark_buffer_dirty(bh); |
107 | brelse(bh); | 107 | brelse(bh); |
108 | return se; | 108 | return se; |
@@ -153,7 +153,7 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi | |||
153 | btree = &anode->btree; | 153 | btree = &anode->btree; |
154 | } | 154 | } |
155 | btree->n_free_nodes--; n = btree->n_used_nodes++; | 155 | btree->n_free_nodes--; n = btree->n_used_nodes++; |
156 | btree->first_free = cpu_to_le16(le16_to_cpu(btree->first_free) + 12); | 156 | le16_add_cpu(&btree->first_free, 12); |
157 | btree->u.external[n].disk_secno = cpu_to_le32(se); | 157 | btree->u.external[n].disk_secno = cpu_to_le32(se); |
158 | btree->u.external[n].file_secno = cpu_to_le32(fs); | 158 | btree->u.external[n].file_secno = cpu_to_le32(fs); |
159 | btree->u.external[n].length = cpu_to_le32(1); | 159 | btree->u.external[n].length = cpu_to_le32(1); |
@@ -174,7 +174,7 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi | |||
174 | } | 174 | } |
175 | if (btree->n_free_nodes) { | 175 | if (btree->n_free_nodes) { |
176 | btree->n_free_nodes--; n = btree->n_used_nodes++; | 176 | btree->n_free_nodes--; n = btree->n_used_nodes++; |
177 | btree->first_free = cpu_to_le16(le16_to_cpu(btree->first_free) + 8); | 177 | le16_add_cpu(&btree->first_free, 8); |
178 | btree->u.internal[n].file_secno = cpu_to_le32(-1); | 178 | btree->u.internal[n].file_secno = cpu_to_le32(-1); |
179 | btree->u.internal[n].down = cpu_to_le32(na); | 179 | btree->u.internal[n].down = cpu_to_le32(na); |
180 | btree->u.internal[n-1].file_secno = cpu_to_le32(fs); | 180 | btree->u.internal[n-1].file_secno = cpu_to_le32(fs); |
diff --git a/fs/hpfs/dnode.c b/fs/hpfs/dnode.c index 3228c524ebe5..4364b2a02c5d 100644 --- a/fs/hpfs/dnode.c +++ b/fs/hpfs/dnode.c | |||
@@ -145,10 +145,10 @@ static void set_last_pointer(struct super_block *s, struct dnode *d, dnode_secno | |||
145 | } | 145 | } |
146 | } | 146 | } |
147 | if (ptr) { | 147 | if (ptr) { |
148 | d->first_free = cpu_to_le32(le32_to_cpu(d->first_free) + 4); | 148 | le32_add_cpu(&d->first_free, 4); |
149 | if (le32_to_cpu(d->first_free) > 2048) { | 149 | if (le32_to_cpu(d->first_free) > 2048) { |
150 | hpfs_error(s, "set_last_pointer: too long dnode %08x", le32_to_cpu(d->self)); | 150 | hpfs_error(s, "set_last_pointer: too long dnode %08x", le32_to_cpu(d->self)); |
151 | d->first_free = cpu_to_le32(le32_to_cpu(d->first_free) - 4); | 151 | le32_add_cpu(&d->first_free, -4); |
152 | return; | 152 | return; |
153 | } | 153 | } |
154 | de->length = cpu_to_le16(36); | 154 | de->length = cpu_to_le16(36); |
@@ -184,7 +184,7 @@ struct hpfs_dirent *hpfs_add_de(struct super_block *s, struct dnode *d, | |||
184 | de->not_8x3 = hpfs_is_name_long(name, namelen); | 184 | de->not_8x3 = hpfs_is_name_long(name, namelen); |
185 | de->namelen = namelen; | 185 | de->namelen = namelen; |
186 | memcpy(de->name, name, namelen); | 186 | memcpy(de->name, name, namelen); |
187 | d->first_free = cpu_to_le32(le32_to_cpu(d->first_free) + d_size); | 187 | le32_add_cpu(&d->first_free, d_size); |
188 | return de; | 188 | return de; |
189 | } | 189 | } |
190 | 190 | ||
@@ -314,7 +314,7 @@ static int hpfs_add_to_dnode(struct inode *i, dnode_secno dno, | |||
314 | set_last_pointer(i->i_sb, ad, de->down ? de_down_pointer(de) : 0); | 314 | set_last_pointer(i->i_sb, ad, de->down ? de_down_pointer(de) : 0); |
315 | de = de_next_de(de); | 315 | de = de_next_de(de); |
316 | memmove((char *)nd + 20, de, le32_to_cpu(nd->first_free) + (char *)nd - (char *)de); | 316 | memmove((char *)nd + 20, de, le32_to_cpu(nd->first_free) + (char *)nd - (char *)de); |
317 | nd->first_free = cpu_to_le32(le32_to_cpu(nd->first_free) - ((char *)de - (char *)nd - 20)); | 317 | le32_add_cpu(&nd->first_free, -((char *)de - (char *)nd - 20)); |
318 | memcpy(d, nd, le32_to_cpu(nd->first_free)); | 318 | memcpy(d, nd, le32_to_cpu(nd->first_free)); |
319 | for_all_poss(i, hpfs_pos_del, (loff_t)dno << 4, pos); | 319 | for_all_poss(i, hpfs_pos_del, (loff_t)dno << 4, pos); |
320 | fix_up_ptrs(i->i_sb, ad); | 320 | fix_up_ptrs(i->i_sb, ad); |
@@ -474,8 +474,8 @@ static secno move_to_top(struct inode *i, dnode_secno from, dnode_secno to) | |||
474 | hpfs_brelse4(&qbh); | 474 | hpfs_brelse4(&qbh); |
475 | return 0; | 475 | return 0; |
476 | } | 476 | } |
477 | dnode->first_free = cpu_to_le32(le32_to_cpu(dnode->first_free) - 4); | 477 | le32_add_cpu(&dnode->first_free, -4); |
478 | de->length = cpu_to_le16(le16_to_cpu(de->length) - 4); | 478 | le16_add_cpu(&de->length, -4); |
479 | de->down = 0; | 479 | de->down = 0; |
480 | hpfs_mark_4buffers_dirty(&qbh); | 480 | hpfs_mark_4buffers_dirty(&qbh); |
481 | dno = up; | 481 | dno = up; |
@@ -570,8 +570,8 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno) | |||
570 | for_all_poss(i, hpfs_pos_subst, ((loff_t)dno << 4) | 1, ((loff_t)up << 4) | p); | 570 | for_all_poss(i, hpfs_pos_subst, ((loff_t)dno << 4) | 1, ((loff_t)up << 4) | p); |
571 | if (!down) { | 571 | if (!down) { |
572 | de->down = 0; | 572 | de->down = 0; |
573 | de->length = cpu_to_le16(le16_to_cpu(de->length) - 4); | 573 | le16_add_cpu(&de->length, -4); |
574 | dnode->first_free = cpu_to_le32(le32_to_cpu(dnode->first_free) - 4); | 574 | le32_add_cpu(&dnode->first_free, -4); |
575 | memmove(de_next_de(de), (char *)de_next_de(de) + 4, | 575 | memmove(de_next_de(de), (char *)de_next_de(de) + 4, |
576 | (char *)dnode + le32_to_cpu(dnode->first_free) - (char *)de_next_de(de)); | 576 | (char *)dnode + le32_to_cpu(dnode->first_free) - (char *)de_next_de(de)); |
577 | } else { | 577 | } else { |
@@ -647,14 +647,14 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno) | |||
647 | printk("HPFS: warning: unbalanced dnode tree, see hpfs.txt 4 more info\n"); | 647 | printk("HPFS: warning: unbalanced dnode tree, see hpfs.txt 4 more info\n"); |
648 | printk("HPFS: warning: goin'on\n"); | 648 | printk("HPFS: warning: goin'on\n"); |
649 | } | 649 | } |
650 | del->length = cpu_to_le16(le16_to_cpu(del->length) + 4); | 650 | le16_add_cpu(&del->length, 4); |
651 | del->down = 1; | 651 | del->down = 1; |
652 | d1->first_free = cpu_to_le32(le32_to_cpu(d1->first_free) + 4); | 652 | le32_add_cpu(&d1->first_free, 4); |
653 | } | 653 | } |
654 | if (dlp && !down) { | 654 | if (dlp && !down) { |
655 | del->length = cpu_to_le16(le16_to_cpu(del->length) - 4); | 655 | le16_add_cpu(&del->length, -4); |
656 | del->down = 0; | 656 | del->down = 0; |
657 | d1->first_free = cpu_to_le32(le32_to_cpu(d1->first_free) - 4); | 657 | le32_add_cpu(&d1->first_free, -4); |
658 | } else if (down) | 658 | } else if (down) |
659 | *(__le32 *) ((void *) del + le16_to_cpu(del->length) - 4) = cpu_to_le32(down); | 659 | *(__le32 *) ((void *) del + le16_to_cpu(del->length) - 4) = cpu_to_le32(down); |
660 | } else goto endm; | 660 | } else goto endm; |
@@ -668,9 +668,9 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno) | |||
668 | memcpy(de_cp, de_prev, le16_to_cpu(de_prev->length)); | 668 | memcpy(de_cp, de_prev, le16_to_cpu(de_prev->length)); |
669 | hpfs_delete_de(i->i_sb, dnode, de_prev); | 669 | hpfs_delete_de(i->i_sb, dnode, de_prev); |
670 | if (!de_prev->down) { | 670 | if (!de_prev->down) { |
671 | de_prev->length = cpu_to_le16(le16_to_cpu(de_prev->length) + 4); | 671 | le16_add_cpu(&de_prev->length, 4); |
672 | de_prev->down = 1; | 672 | de_prev->down = 1; |
673 | dnode->first_free = cpu_to_le32(le32_to_cpu(dnode->first_free) + 4); | 673 | le32_add_cpu(&dnode->first_free, 4); |
674 | } | 674 | } |
675 | *(__le32 *) ((void *) de_prev + le16_to_cpu(de_prev->length) - 4) = cpu_to_le32(ndown); | 675 | *(__le32 *) ((void *) de_prev + le16_to_cpu(de_prev->length) - 4) = cpu_to_le32(ndown); |
676 | hpfs_mark_4buffers_dirty(&qbh); | 676 | hpfs_mark_4buffers_dirty(&qbh); |
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index ac1ead194db5..7102aaecc244 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h | |||
@@ -63,8 +63,8 @@ struct hpfs_sb_info { | |||
63 | unsigned sb_dmap; /* sector number of dnode bit map */ | 63 | unsigned sb_dmap; /* sector number of dnode bit map */ |
64 | unsigned sb_n_free; /* free blocks for statfs, or -1 */ | 64 | unsigned sb_n_free; /* free blocks for statfs, or -1 */ |
65 | unsigned sb_n_free_dnodes; /* free dnodes for statfs, or -1 */ | 65 | unsigned sb_n_free_dnodes; /* free dnodes for statfs, or -1 */ |
66 | uid_t sb_uid; /* uid from mount options */ | 66 | kuid_t sb_uid; /* uid from mount options */ |
67 | gid_t sb_gid; /* gid from mount options */ | 67 | kgid_t sb_gid; /* gid from mount options */ |
68 | umode_t sb_mode; /* mode from mount options */ | 68 | umode_t sb_mode; /* mode from mount options */ |
69 | unsigned sb_eas : 2; /* eas: 0-ignore, 1-ro, 2-rw */ | 69 | unsigned sb_eas : 2; /* eas: 0-ignore, 1-ro, 2-rw */ |
70 | unsigned sb_err : 2; /* on errs: 0-cont, 1-ro, 2-panic */ | 70 | unsigned sb_err : 2; /* on errs: 0-cont, 1-ro, 2-panic */ |
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c index ed671e0ea784..804a9a842cbc 100644 --- a/fs/hpfs/inode.c +++ b/fs/hpfs/inode.c | |||
@@ -7,6 +7,7 @@ | |||
7 | */ | 7 | */ |
8 | 8 | ||
9 | #include <linux/slab.h> | 9 | #include <linux/slab.h> |
10 | #include <linux/user_namespace.h> | ||
10 | #include "hpfs_fn.h" | 11 | #include "hpfs_fn.h" |
11 | 12 | ||
12 | void hpfs_init_inode(struct inode *i) | 13 | void hpfs_init_inode(struct inode *i) |
@@ -60,14 +61,14 @@ void hpfs_read_inode(struct inode *i) | |||
60 | if (hpfs_sb(i->i_sb)->sb_eas) { | 61 | if (hpfs_sb(i->i_sb)->sb_eas) { |
61 | if ((ea = hpfs_get_ea(i->i_sb, fnode, "UID", &ea_size))) { | 62 | if ((ea = hpfs_get_ea(i->i_sb, fnode, "UID", &ea_size))) { |
62 | if (ea_size == 2) { | 63 | if (ea_size == 2) { |
63 | i->i_uid = le16_to_cpu(*(__le16*)ea); | 64 | i_uid_write(i, le16_to_cpu(*(__le16*)ea)); |
64 | hpfs_inode->i_ea_uid = 1; | 65 | hpfs_inode->i_ea_uid = 1; |
65 | } | 66 | } |
66 | kfree(ea); | 67 | kfree(ea); |
67 | } | 68 | } |
68 | if ((ea = hpfs_get_ea(i->i_sb, fnode, "GID", &ea_size))) { | 69 | if ((ea = hpfs_get_ea(i->i_sb, fnode, "GID", &ea_size))) { |
69 | if (ea_size == 2) { | 70 | if (ea_size == 2) { |
70 | i->i_gid = le16_to_cpu(*(__le16*)ea); | 71 | i_gid_write(i, le16_to_cpu(*(__le16*)ea)); |
71 | hpfs_inode->i_ea_gid = 1; | 72 | hpfs_inode->i_ea_gid = 1; |
72 | } | 73 | } |
73 | kfree(ea); | 74 | kfree(ea); |
@@ -149,13 +150,13 @@ static void hpfs_write_inode_ea(struct inode *i, struct fnode *fnode) | |||
149 | hpfs_error(i->i_sb, "fnode %08x has some unknown HPFS386 stuctures", i->i_ino); | 150 | hpfs_error(i->i_sb, "fnode %08x has some unknown HPFS386 stuctures", i->i_ino); |
150 | } else*/ if (hpfs_sb(i->i_sb)->sb_eas >= 2) { | 151 | } else*/ if (hpfs_sb(i->i_sb)->sb_eas >= 2) { |
151 | __le32 ea; | 152 | __le32 ea; |
152 | if ((i->i_uid != hpfs_sb(i->i_sb)->sb_uid) || hpfs_inode->i_ea_uid) { | 153 | if (!uid_eq(i->i_uid, hpfs_sb(i->i_sb)->sb_uid) || hpfs_inode->i_ea_uid) { |
153 | ea = cpu_to_le32(i->i_uid); | 154 | ea = cpu_to_le32(i_uid_read(i)); |
154 | hpfs_set_ea(i, fnode, "UID", (char*)&ea, 2); | 155 | hpfs_set_ea(i, fnode, "UID", (char*)&ea, 2); |
155 | hpfs_inode->i_ea_uid = 1; | 156 | hpfs_inode->i_ea_uid = 1; |
156 | } | 157 | } |
157 | if ((i->i_gid != hpfs_sb(i->i_sb)->sb_gid) || hpfs_inode->i_ea_gid) { | 158 | if (!gid_eq(i->i_gid, hpfs_sb(i->i_sb)->sb_gid) || hpfs_inode->i_ea_gid) { |
158 | ea = cpu_to_le32(i->i_gid); | 159 | ea = cpu_to_le32(i_gid_read(i)); |
159 | hpfs_set_ea(i, fnode, "GID", (char *)&ea, 2); | 160 | hpfs_set_ea(i, fnode, "GID", (char *)&ea, 2); |
160 | hpfs_inode->i_ea_gid = 1; | 161 | hpfs_inode->i_ea_gid = 1; |
161 | } | 162 | } |
@@ -261,9 +262,11 @@ int hpfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
261 | hpfs_lock(inode->i_sb); | 262 | hpfs_lock(inode->i_sb); |
262 | if (inode->i_ino == hpfs_sb(inode->i_sb)->sb_root) | 263 | if (inode->i_ino == hpfs_sb(inode->i_sb)->sb_root) |
263 | goto out_unlock; | 264 | goto out_unlock; |
264 | if ((attr->ia_valid & ATTR_UID) && attr->ia_uid >= 0x10000) | 265 | if ((attr->ia_valid & ATTR_UID) && |
266 | from_kuid(&init_user_ns, attr->ia_uid) >= 0x10000) | ||
265 | goto out_unlock; | 267 | goto out_unlock; |
266 | if ((attr->ia_valid & ATTR_GID) && attr->ia_gid >= 0x10000) | 268 | if ((attr->ia_valid & ATTR_GID) && |
269 | from_kgid(&init_user_ns, attr->ia_gid) >= 0x10000) | ||
267 | goto out_unlock; | 270 | goto out_unlock; |
268 | if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size > inode->i_size) | 271 | if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size > inode->i_size) |
269 | goto out_unlock; | 272 | goto out_unlock; |
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index bc9082482f68..345713d2f8f3 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c | |||
@@ -91,8 +91,8 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) | |||
91 | inc_nlink(dir); | 91 | inc_nlink(dir); |
92 | insert_inode_hash(result); | 92 | insert_inode_hash(result); |
93 | 93 | ||
94 | if (result->i_uid != current_fsuid() || | 94 | if (!uid_eq(result->i_uid, current_fsuid()) || |
95 | result->i_gid != current_fsgid() || | 95 | !gid_eq(result->i_gid, current_fsgid()) || |
96 | result->i_mode != (mode | S_IFDIR)) { | 96 | result->i_mode != (mode | S_IFDIR)) { |
97 | result->i_uid = current_fsuid(); | 97 | result->i_uid = current_fsuid(); |
98 | result->i_gid = current_fsgid(); | 98 | result->i_gid = current_fsgid(); |
@@ -179,8 +179,8 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, b | |||
179 | 179 | ||
180 | insert_inode_hash(result); | 180 | insert_inode_hash(result); |
181 | 181 | ||
182 | if (result->i_uid != current_fsuid() || | 182 | if (!uid_eq(result->i_uid, current_fsuid()) || |
183 | result->i_gid != current_fsgid() || | 183 | !gid_eq(result->i_gid, current_fsgid()) || |
184 | result->i_mode != (mode | S_IFREG)) { | 184 | result->i_mode != (mode | S_IFREG)) { |
185 | result->i_uid = current_fsuid(); | 185 | result->i_uid = current_fsuid(); |
186 | result->i_gid = current_fsgid(); | 186 | result->i_gid = current_fsgid(); |
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index 706a12c083ea..bc28bf077a6a 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c | |||
@@ -210,6 +210,11 @@ static int init_inodecache(void) | |||
210 | 210 | ||
211 | static void destroy_inodecache(void) | 211 | static void destroy_inodecache(void) |
212 | { | 212 | { |
213 | /* | ||
214 | * Make sure all delayed rcu free inodes are flushed before we | ||
215 | * destroy cache. | ||
216 | */ | ||
217 | rcu_barrier(); | ||
213 | kmem_cache_destroy(hpfs_inode_cachep); | 218 | kmem_cache_destroy(hpfs_inode_cachep); |
214 | } | 219 | } |
215 | 220 | ||
@@ -251,7 +256,7 @@ static const match_table_t tokens = { | |||
251 | {Opt_err, NULL}, | 256 | {Opt_err, NULL}, |
252 | }; | 257 | }; |
253 | 258 | ||
254 | static int parse_opts(char *opts, uid_t *uid, gid_t *gid, umode_t *umask, | 259 | static int parse_opts(char *opts, kuid_t *uid, kgid_t *gid, umode_t *umask, |
255 | int *lowercase, int *eas, int *chk, int *errs, | 260 | int *lowercase, int *eas, int *chk, int *errs, |
256 | int *chkdsk, int *timeshift) | 261 | int *chkdsk, int *timeshift) |
257 | { | 262 | { |
@@ -276,12 +281,16 @@ static int parse_opts(char *opts, uid_t *uid, gid_t *gid, umode_t *umask, | |||
276 | case Opt_uid: | 281 | case Opt_uid: |
277 | if (match_int(args, &option)) | 282 | if (match_int(args, &option)) |
278 | return 0; | 283 | return 0; |
279 | *uid = option; | 284 | *uid = make_kuid(current_user_ns(), option); |
285 | if (!uid_valid(*uid)) | ||
286 | return 0; | ||
280 | break; | 287 | break; |
281 | case Opt_gid: | 288 | case Opt_gid: |
282 | if (match_int(args, &option)) | 289 | if (match_int(args, &option)) |
283 | return 0; | 290 | return 0; |
284 | *gid = option; | 291 | *gid = make_kgid(current_user_ns(), option); |
292 | if (!gid_valid(*gid)) | ||
293 | return 0; | ||
285 | break; | 294 | break; |
286 | case Opt_umask: | 295 | case Opt_umask: |
287 | if (match_octal(args, &option)) | 296 | if (match_octal(args, &option)) |
@@ -378,8 +387,8 @@ HPFS filesystem options:\n\ | |||
378 | 387 | ||
379 | static int hpfs_remount_fs(struct super_block *s, int *flags, char *data) | 388 | static int hpfs_remount_fs(struct super_block *s, int *flags, char *data) |
380 | { | 389 | { |
381 | uid_t uid; | 390 | kuid_t uid; |
382 | gid_t gid; | 391 | kgid_t gid; |
383 | umode_t umask; | 392 | umode_t umask; |
384 | int lowercase, eas, chk, errs, chkdsk, timeshift; | 393 | int lowercase, eas, chk, errs, chkdsk, timeshift; |
385 | int o; | 394 | int o; |
@@ -455,8 +464,8 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent) | |||
455 | struct hpfs_sb_info *sbi; | 464 | struct hpfs_sb_info *sbi; |
456 | struct inode *root; | 465 | struct inode *root; |
457 | 466 | ||
458 | uid_t uid; | 467 | kuid_t uid; |
459 | gid_t gid; | 468 | kgid_t gid; |
460 | umode_t umask; | 469 | umode_t umask; |
461 | int lowercase, eas, chk, errs, chkdsk, timeshift; | 470 | int lowercase, eas, chk, errs, chkdsk, timeshift; |
462 | 471 | ||
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 8349a899912e..c5bc355d8243 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
@@ -42,8 +42,8 @@ static const struct inode_operations hugetlbfs_dir_inode_operations; | |||
42 | static const struct inode_operations hugetlbfs_inode_operations; | 42 | static const struct inode_operations hugetlbfs_inode_operations; |
43 | 43 | ||
44 | struct hugetlbfs_config { | 44 | struct hugetlbfs_config { |
45 | uid_t uid; | 45 | kuid_t uid; |
46 | gid_t gid; | 46 | kgid_t gid; |
47 | umode_t mode; | 47 | umode_t mode; |
48 | long nr_blocks; | 48 | long nr_blocks; |
49 | long nr_inodes; | 49 | long nr_inodes; |
@@ -110,7 +110,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) | |||
110 | * way when do_mmap_pgoff unwinds (may be important on powerpc | 110 | * way when do_mmap_pgoff unwinds (may be important on powerpc |
111 | * and ia64). | 111 | * and ia64). |
112 | */ | 112 | */ |
113 | vma->vm_flags |= VM_HUGETLB | VM_RESERVED; | 113 | vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND | VM_DONTDUMP; |
114 | vma->vm_ops = &hugetlb_vm_ops; | 114 | vma->vm_ops = &hugetlb_vm_ops; |
115 | 115 | ||
116 | if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT)) | 116 | if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT)) |
@@ -397,17 +397,16 @@ static void hugetlbfs_evict_inode(struct inode *inode) | |||
397 | } | 397 | } |
398 | 398 | ||
399 | static inline void | 399 | static inline void |
400 | hugetlb_vmtruncate_list(struct prio_tree_root *root, pgoff_t pgoff) | 400 | hugetlb_vmtruncate_list(struct rb_root *root, pgoff_t pgoff) |
401 | { | 401 | { |
402 | struct vm_area_struct *vma; | 402 | struct vm_area_struct *vma; |
403 | struct prio_tree_iter iter; | ||
404 | 403 | ||
405 | vma_prio_tree_foreach(vma, &iter, root, pgoff, ULONG_MAX) { | 404 | vma_interval_tree_foreach(vma, root, pgoff, ULONG_MAX) { |
406 | unsigned long v_offset; | 405 | unsigned long v_offset; |
407 | 406 | ||
408 | /* | 407 | /* |
409 | * Can the expression below overflow on 32-bit arches? | 408 | * Can the expression below overflow on 32-bit arches? |
410 | * No, because the prio_tree returns us only those vmas | 409 | * No, because the interval tree returns us only those vmas |
411 | * which overlap the truncated area starting at pgoff, | 410 | * which overlap the truncated area starting at pgoff, |
412 | * and no vma on a 32-bit arch can span beyond the 4GB. | 411 | * and no vma on a 32-bit arch can span beyond the 4GB. |
413 | */ | 412 | */ |
@@ -432,7 +431,7 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset) | |||
432 | 431 | ||
433 | i_size_write(inode, offset); | 432 | i_size_write(inode, offset); |
434 | mutex_lock(&mapping->i_mmap_mutex); | 433 | mutex_lock(&mapping->i_mmap_mutex); |
435 | if (!prio_tree_empty(&mapping->i_mmap)) | 434 | if (!RB_EMPTY_ROOT(&mapping->i_mmap)) |
436 | hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff); | 435 | hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff); |
437 | mutex_unlock(&mapping->i_mmap_mutex); | 436 | mutex_unlock(&mapping->i_mmap_mutex); |
438 | truncate_hugepages(inode, offset); | 437 | truncate_hugepages(inode, offset); |
@@ -785,13 +784,17 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) | |||
785 | case Opt_uid: | 784 | case Opt_uid: |
786 | if (match_int(&args[0], &option)) | 785 | if (match_int(&args[0], &option)) |
787 | goto bad_val; | 786 | goto bad_val; |
788 | pconfig->uid = option; | 787 | pconfig->uid = make_kuid(current_user_ns(), option); |
788 | if (!uid_valid(pconfig->uid)) | ||
789 | goto bad_val; | ||
789 | break; | 790 | break; |
790 | 791 | ||
791 | case Opt_gid: | 792 | case Opt_gid: |
792 | if (match_int(&args[0], &option)) | 793 | if (match_int(&args[0], &option)) |
793 | goto bad_val; | 794 | goto bad_val; |
794 | pconfig->gid = option; | 795 | pconfig->gid = make_kgid(current_user_ns(), option); |
796 | if (!gid_valid(pconfig->gid)) | ||
797 | goto bad_val; | ||
795 | break; | 798 | break; |
796 | 799 | ||
797 | case Opt_mode: | 800 | case Opt_mode: |
@@ -924,7 +927,9 @@ static struct vfsmount *hugetlbfs_vfsmount; | |||
924 | 927 | ||
925 | static int can_do_hugetlb_shm(void) | 928 | static int can_do_hugetlb_shm(void) |
926 | { | 929 | { |
927 | return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group); | 930 | kgid_t shm_group; |
931 | shm_group = make_kgid(&init_user_ns, sysctl_hugetlb_shm_group); | ||
932 | return capable(CAP_IPC_LOCK) || in_group_p(shm_group); | ||
928 | } | 933 | } |
929 | 934 | ||
930 | struct file *hugetlb_file_setup(const char *name, unsigned long addr, | 935 | struct file *hugetlb_file_setup(const char *name, unsigned long addr, |
@@ -1042,6 +1047,11 @@ static int __init init_hugetlbfs_fs(void) | |||
1042 | 1047 | ||
1043 | static void __exit exit_hugetlbfs_fs(void) | 1048 | static void __exit exit_hugetlbfs_fs(void) |
1044 | { | 1049 | { |
1050 | /* | ||
1051 | * Make sure all delayed rcu free inodes are flushed before we | ||
1052 | * destroy cache. | ||
1053 | */ | ||
1054 | rcu_barrier(); | ||
1045 | kmem_cache_destroy(hugetlbfs_inode_cachep); | 1055 | kmem_cache_destroy(hugetlbfs_inode_cachep); |
1046 | kern_unmount(hugetlbfs_vfsmount); | 1056 | kern_unmount(hugetlbfs_vfsmount); |
1047 | unregister_filesystem(&hugetlbfs_fs_type); | 1057 | unregister_filesystem(&hugetlbfs_fs_type); |
diff --git a/fs/inode.c b/fs/inode.c index ac8d904b3f16..b03c71957246 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -348,7 +348,7 @@ void address_space_init_once(struct address_space *mapping) | |||
348 | mutex_init(&mapping->i_mmap_mutex); | 348 | mutex_init(&mapping->i_mmap_mutex); |
349 | INIT_LIST_HEAD(&mapping->private_list); | 349 | INIT_LIST_HEAD(&mapping->private_list); |
350 | spin_lock_init(&mapping->private_lock); | 350 | spin_lock_init(&mapping->private_lock); |
351 | INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap); | 351 | mapping->i_mmap = RB_ROOT; |
352 | INIT_LIST_HEAD(&mapping->i_mmap_nonlinear); | 352 | INIT_LIST_HEAD(&mapping->i_mmap_nonlinear); |
353 | } | 353 | } |
354 | EXPORT_SYMBOL(address_space_init_once); | 354 | EXPORT_SYMBOL(address_space_init_once); |
diff --git a/fs/ioctl.c b/fs/ioctl.c index 29167bebe874..3bdad6d1f268 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c | |||
@@ -603,21 +603,14 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, | |||
603 | 603 | ||
604 | SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) | 604 | SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) |
605 | { | 605 | { |
606 | struct file *filp; | 606 | int error; |
607 | int error = -EBADF; | 607 | struct fd f = fdget(fd); |
608 | int fput_needed; | 608 | |
609 | 609 | if (!f.file) | |
610 | filp = fget_light(fd, &fput_needed); | 610 | return -EBADF; |
611 | if (!filp) | 611 | error = security_file_ioctl(f.file, cmd, arg); |
612 | goto out; | 612 | if (!error) |
613 | 613 | error = do_vfs_ioctl(f.file, fd, cmd, arg); | |
614 | error = security_file_ioctl(filp, cmd, arg); | 614 | fdput(f); |
615 | if (error) | ||
616 | goto out_fput; | ||
617 | |||
618 | error = do_vfs_ioctl(filp, fd, cmd, arg); | ||
619 | out_fput: | ||
620 | fput_light(filp, fput_needed); | ||
621 | out: | ||
622 | return error; | 615 | return error; |
623 | } | 616 | } |
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 29037c365ba4..67ce52507d7d 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/cdrom.h> | 21 | #include <linux/cdrom.h> |
22 | #include <linux/parser.h> | 22 | #include <linux/parser.h> |
23 | #include <linux/mpage.h> | 23 | #include <linux/mpage.h> |
24 | #include <linux/user_namespace.h> | ||
24 | 25 | ||
25 | #include "isofs.h" | 26 | #include "isofs.h" |
26 | #include "zisofs.h" | 27 | #include "zisofs.h" |
@@ -114,6 +115,11 @@ static int init_inodecache(void) | |||
114 | 115 | ||
115 | static void destroy_inodecache(void) | 116 | static void destroy_inodecache(void) |
116 | { | 117 | { |
118 | /* | ||
119 | * Make sure all delayed rcu free inodes are flushed before we | ||
120 | * destroy cache. | ||
121 | */ | ||
122 | rcu_barrier(); | ||
117 | kmem_cache_destroy(isofs_inode_cachep); | 123 | kmem_cache_destroy(isofs_inode_cachep); |
118 | } | 124 | } |
119 | 125 | ||
@@ -171,8 +177,8 @@ struct iso9660_options{ | |||
171 | unsigned int blocksize; | 177 | unsigned int blocksize; |
172 | umode_t fmode; | 178 | umode_t fmode; |
173 | umode_t dmode; | 179 | umode_t dmode; |
174 | gid_t gid; | 180 | kgid_t gid; |
175 | uid_t uid; | 181 | kuid_t uid; |
176 | char *iocharset; | 182 | char *iocharset; |
177 | /* LVE */ | 183 | /* LVE */ |
178 | s32 session; | 184 | s32 session; |
@@ -383,8 +389,8 @@ static int parse_options(char *options, struct iso9660_options *popt) | |||
383 | popt->fmode = popt->dmode = ISOFS_INVALID_MODE; | 389 | popt->fmode = popt->dmode = ISOFS_INVALID_MODE; |
384 | popt->uid_set = 0; | 390 | popt->uid_set = 0; |
385 | popt->gid_set = 0; | 391 | popt->gid_set = 0; |
386 | popt->gid = 0; | 392 | popt->gid = GLOBAL_ROOT_GID; |
387 | popt->uid = 0; | 393 | popt->uid = GLOBAL_ROOT_UID; |
388 | popt->iocharset = NULL; | 394 | popt->iocharset = NULL; |
389 | popt->utf8 = 0; | 395 | popt->utf8 = 0; |
390 | popt->overriderockperm = 0; | 396 | popt->overriderockperm = 0; |
@@ -460,13 +466,17 @@ static int parse_options(char *options, struct iso9660_options *popt) | |||
460 | case Opt_uid: | 466 | case Opt_uid: |
461 | if (match_int(&args[0], &option)) | 467 | if (match_int(&args[0], &option)) |
462 | return 0; | 468 | return 0; |
463 | popt->uid = option; | 469 | popt->uid = make_kuid(current_user_ns(), option); |
470 | if (!uid_valid(popt->uid)) | ||
471 | return 0; | ||
464 | popt->uid_set = 1; | 472 | popt->uid_set = 1; |
465 | break; | 473 | break; |
466 | case Opt_gid: | 474 | case Opt_gid: |
467 | if (match_int(&args[0], &option)) | 475 | if (match_int(&args[0], &option)) |
468 | return 0; | 476 | return 0; |
469 | popt->gid = option; | 477 | popt->gid = make_kgid(current_user_ns(), option); |
478 | if (!gid_valid(popt->gid)) | ||
479 | return 0; | ||
470 | popt->gid_set = 1; | 480 | popt->gid_set = 1; |
471 | break; | 481 | break; |
472 | case Opt_mode: | 482 | case Opt_mode: |
diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h index 3620ad1ea9bc..99167238518d 100644 --- a/fs/isofs/isofs.h +++ b/fs/isofs/isofs.h | |||
@@ -52,8 +52,8 @@ struct isofs_sb_info { | |||
52 | 52 | ||
53 | umode_t s_fmode; | 53 | umode_t s_fmode; |
54 | umode_t s_dmode; | 54 | umode_t s_dmode; |
55 | gid_t s_gid; | 55 | kgid_t s_gid; |
56 | uid_t s_uid; | 56 | kuid_t s_uid; |
57 | struct nls_table *s_nls_iocharset; /* Native language support table */ | 57 | struct nls_table *s_nls_iocharset; /* Native language support table */ |
58 | }; | 58 | }; |
59 | 59 | ||
diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c index 70e79d0c756a..c0bf42472e40 100644 --- a/fs/isofs/rock.c +++ b/fs/isofs/rock.c | |||
@@ -364,8 +364,8 @@ repeat: | |||
364 | case SIG('P', 'X'): | 364 | case SIG('P', 'X'): |
365 | inode->i_mode = isonum_733(rr->u.PX.mode); | 365 | inode->i_mode = isonum_733(rr->u.PX.mode); |
366 | set_nlink(inode, isonum_733(rr->u.PX.n_links)); | 366 | set_nlink(inode, isonum_733(rr->u.PX.n_links)); |
367 | inode->i_uid = isonum_733(rr->u.PX.uid); | 367 | i_uid_write(inode, isonum_733(rr->u.PX.uid)); |
368 | inode->i_gid = isonum_733(rr->u.PX.gid); | 368 | i_gid_write(inode, isonum_733(rr->u.PX.gid)); |
369 | break; | 369 | break; |
370 | case SIG('P', 'N'): | 370 | case SIG('P', 'N'): |
371 | { | 371 | { |
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 52c15c776029..86b39b167c23 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c | |||
@@ -86,7 +86,12 @@ nope: | |||
86 | static void release_data_buffer(struct buffer_head *bh) | 86 | static void release_data_buffer(struct buffer_head *bh) |
87 | { | 87 | { |
88 | if (buffer_freed(bh)) { | 88 | if (buffer_freed(bh)) { |
89 | WARN_ON_ONCE(buffer_dirty(bh)); | ||
89 | clear_buffer_freed(bh); | 90 | clear_buffer_freed(bh); |
91 | clear_buffer_mapped(bh); | ||
92 | clear_buffer_new(bh); | ||
93 | clear_buffer_req(bh); | ||
94 | bh->b_bdev = NULL; | ||
90 | release_buffer_page(bh); | 95 | release_buffer_page(bh); |
91 | } else | 96 | } else |
92 | put_bh(bh); | 97 | put_bh(bh); |
@@ -866,17 +871,35 @@ restart_loop: | |||
866 | * there's no point in keeping a checkpoint record for | 871 | * there's no point in keeping a checkpoint record for |
867 | * it. */ | 872 | * it. */ |
868 | 873 | ||
869 | /* A buffer which has been freed while still being | 874 | /* |
870 | * journaled by a previous transaction may end up still | 875 | * A buffer which has been freed while still being journaled by |
871 | * being dirty here, but we want to avoid writing back | 876 | * a previous transaction. |
872 | * that buffer in the future after the "add to orphan" | 877 | */ |
873 | * operation been committed, That's not only a performance | 878 | if (buffer_freed(bh)) { |
874 | * gain, it also stops aliasing problems if the buffer is | 879 | /* |
875 | * left behind for writeback and gets reallocated for another | 880 | * If the running transaction is the one containing |
876 | * use in a different page. */ | 881 | * "add to orphan" operation (b_next_transaction != |
877 | if (buffer_freed(bh) && !jh->b_next_transaction) { | 882 | * NULL), we have to wait for that transaction to |
878 | clear_buffer_freed(bh); | 883 | * commit before we can really get rid of the buffer. |
879 | clear_buffer_jbddirty(bh); | 884 | * So just clear b_modified to not confuse transaction |
885 | * credit accounting and refile the buffer to | ||
886 | * BJ_Forget of the running transaction. If the just | ||
887 | * committed transaction contains "add to orphan" | ||
888 | * operation, we can completely invalidate the buffer | ||
889 | * now. We are rather throughout in that since the | ||
890 | * buffer may be still accessible when blocksize < | ||
891 | * pagesize and it is attached to the last partial | ||
892 | * page. | ||
893 | */ | ||
894 | jh->b_modified = 0; | ||
895 | if (!jh->b_next_transaction) { | ||
896 | clear_buffer_freed(bh); | ||
897 | clear_buffer_jbddirty(bh); | ||
898 | clear_buffer_mapped(bh); | ||
899 | clear_buffer_new(bh); | ||
900 | clear_buffer_req(bh); | ||
901 | bh->b_bdev = NULL; | ||
902 | } | ||
880 | } | 903 | } |
881 | 904 | ||
882 | if (buffer_jbddirty(bh)) { | 905 | if (buffer_jbddirty(bh)) { |
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index 09357508ec9a..a2862339323b 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c | |||
@@ -1113,6 +1113,11 @@ static void mark_journal_empty(journal_t *journal) | |||
1113 | 1113 | ||
1114 | BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); | 1114 | BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); |
1115 | spin_lock(&journal->j_state_lock); | 1115 | spin_lock(&journal->j_state_lock); |
1116 | /* Is it already empty? */ | ||
1117 | if (sb->s_start == 0) { | ||
1118 | spin_unlock(&journal->j_state_lock); | ||
1119 | return; | ||
1120 | } | ||
1116 | jbd_debug(1, "JBD: Marking journal as empty (seq %d)\n", | 1121 | jbd_debug(1, "JBD: Marking journal as empty (seq %d)\n", |
1117 | journal->j_tail_sequence); | 1122 | journal->j_tail_sequence); |
1118 | 1123 | ||
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index febc10db5ced..78b7f84241d4 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c | |||
@@ -1843,15 +1843,16 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) | |||
1843 | * We're outside-transaction here. Either or both of j_running_transaction | 1843 | * We're outside-transaction here. Either or both of j_running_transaction |
1844 | * and j_committing_transaction may be NULL. | 1844 | * and j_committing_transaction may be NULL. |
1845 | */ | 1845 | */ |
1846 | static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) | 1846 | static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh, |
1847 | int partial_page) | ||
1847 | { | 1848 | { |
1848 | transaction_t *transaction; | 1849 | transaction_t *transaction; |
1849 | struct journal_head *jh; | 1850 | struct journal_head *jh; |
1850 | int may_free = 1; | 1851 | int may_free = 1; |
1851 | int ret; | ||
1852 | 1852 | ||
1853 | BUFFER_TRACE(bh, "entry"); | 1853 | BUFFER_TRACE(bh, "entry"); |
1854 | 1854 | ||
1855 | retry: | ||
1855 | /* | 1856 | /* |
1856 | * It is safe to proceed here without the j_list_lock because the | 1857 | * It is safe to proceed here without the j_list_lock because the |
1857 | * buffers cannot be stolen by try_to_free_buffers as long as we are | 1858 | * buffers cannot be stolen by try_to_free_buffers as long as we are |
@@ -1879,10 +1880,18 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) | |||
1879 | * clear the buffer dirty bit at latest at the moment when the | 1880 | * clear the buffer dirty bit at latest at the moment when the |
1880 | * transaction marking the buffer as freed in the filesystem | 1881 | * transaction marking the buffer as freed in the filesystem |
1881 | * structures is committed because from that moment on the | 1882 | * structures is committed because from that moment on the |
1882 | * buffer can be reallocated and used by a different page. | 1883 | * block can be reallocated and used by a different page. |
1883 | * Since the block hasn't been freed yet but the inode has | 1884 | * Since the block hasn't been freed yet but the inode has |
1884 | * already been added to orphan list, it is safe for us to add | 1885 | * already been added to orphan list, it is safe for us to add |
1885 | * the buffer to BJ_Forget list of the newest transaction. | 1886 | * the buffer to BJ_Forget list of the newest transaction. |
1887 | * | ||
1888 | * Also we have to clear buffer_mapped flag of a truncated buffer | ||
1889 | * because the buffer_head may be attached to the page straddling | ||
1890 | * i_size (can happen only when blocksize < pagesize) and thus the | ||
1891 | * buffer_head can be reused when the file is extended again. So we end | ||
1892 | * up keeping around invalidated buffers attached to transactions' | ||
1893 | * BJ_Forget list just to stop checkpointing code from cleaning up | ||
1894 | * the transaction this buffer was modified in. | ||
1886 | */ | 1895 | */ |
1887 | transaction = jh->b_transaction; | 1896 | transaction = jh->b_transaction; |
1888 | if (transaction == NULL) { | 1897 | if (transaction == NULL) { |
@@ -1909,13 +1918,9 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) | |||
1909 | * committed, the buffer won't be needed any | 1918 | * committed, the buffer won't be needed any |
1910 | * longer. */ | 1919 | * longer. */ |
1911 | JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget"); | 1920 | JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget"); |
1912 | ret = __dispose_buffer(jh, | 1921 | may_free = __dispose_buffer(jh, |
1913 | journal->j_running_transaction); | 1922 | journal->j_running_transaction); |
1914 | journal_put_journal_head(jh); | 1923 | goto zap_buffer; |
1915 | spin_unlock(&journal->j_list_lock); | ||
1916 | jbd_unlock_bh_state(bh); | ||
1917 | spin_unlock(&journal->j_state_lock); | ||
1918 | return ret; | ||
1919 | } else { | 1924 | } else { |
1920 | /* There is no currently-running transaction. So the | 1925 | /* There is no currently-running transaction. So the |
1921 | * orphan record which we wrote for this file must have | 1926 | * orphan record which we wrote for this file must have |
@@ -1923,13 +1928,9 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) | |||
1923 | * the committing transaction, if it exists. */ | 1928 | * the committing transaction, if it exists. */ |
1924 | if (journal->j_committing_transaction) { | 1929 | if (journal->j_committing_transaction) { |
1925 | JBUFFER_TRACE(jh, "give to committing trans"); | 1930 | JBUFFER_TRACE(jh, "give to committing trans"); |
1926 | ret = __dispose_buffer(jh, | 1931 | may_free = __dispose_buffer(jh, |
1927 | journal->j_committing_transaction); | 1932 | journal->j_committing_transaction); |
1928 | journal_put_journal_head(jh); | 1933 | goto zap_buffer; |
1929 | spin_unlock(&journal->j_list_lock); | ||
1930 | jbd_unlock_bh_state(bh); | ||
1931 | spin_unlock(&journal->j_state_lock); | ||
1932 | return ret; | ||
1933 | } else { | 1934 | } else { |
1934 | /* The orphan record's transaction has | 1935 | /* The orphan record's transaction has |
1935 | * committed. We can cleanse this buffer */ | 1936 | * committed. We can cleanse this buffer */ |
@@ -1950,10 +1951,24 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) | |||
1950 | } | 1951 | } |
1951 | /* | 1952 | /* |
1952 | * The buffer is committing, we simply cannot touch | 1953 | * The buffer is committing, we simply cannot touch |
1953 | * it. So we just set j_next_transaction to the | 1954 | * it. If the page is straddling i_size we have to wait |
1954 | * running transaction (if there is one) and mark | 1955 | * for commit and try again. |
1955 | * buffer as freed so that commit code knows it should | 1956 | */ |
1956 | * clear dirty bits when it is done with the buffer. | 1957 | if (partial_page) { |
1958 | tid_t tid = journal->j_committing_transaction->t_tid; | ||
1959 | |||
1960 | journal_put_journal_head(jh); | ||
1961 | spin_unlock(&journal->j_list_lock); | ||
1962 | jbd_unlock_bh_state(bh); | ||
1963 | spin_unlock(&journal->j_state_lock); | ||
1964 | log_wait_commit(journal, tid); | ||
1965 | goto retry; | ||
1966 | } | ||
1967 | /* | ||
1968 | * OK, buffer won't be reachable after truncate. We just set | ||
1969 | * j_next_transaction to the running transaction (if there is | ||
1970 | * one) and mark buffer as freed so that commit code knows it | ||
1971 | * should clear dirty bits when it is done with the buffer. | ||
1957 | */ | 1972 | */ |
1958 | set_buffer_freed(bh); | 1973 | set_buffer_freed(bh); |
1959 | if (journal->j_running_transaction && buffer_jbddirty(bh)) | 1974 | if (journal->j_running_transaction && buffer_jbddirty(bh)) |
@@ -1976,6 +1991,14 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) | |||
1976 | } | 1991 | } |
1977 | 1992 | ||
1978 | zap_buffer: | 1993 | zap_buffer: |
1994 | /* | ||
1995 | * This is tricky. Although the buffer is truncated, it may be reused | ||
1996 | * if blocksize < pagesize and it is attached to the page straddling | ||
1997 | * EOF. Since the buffer might have been added to BJ_Forget list of the | ||
1998 | * running transaction, journal_get_write_access() won't clear | ||
1999 | * b_modified and credit accounting gets confused. So clear b_modified | ||
2000 | * here. */ | ||
2001 | jh->b_modified = 0; | ||
1979 | journal_put_journal_head(jh); | 2002 | journal_put_journal_head(jh); |
1980 | zap_buffer_no_jh: | 2003 | zap_buffer_no_jh: |
1981 | spin_unlock(&journal->j_list_lock); | 2004 | spin_unlock(&journal->j_list_lock); |
@@ -2024,7 +2047,8 @@ void journal_invalidatepage(journal_t *journal, | |||
2024 | if (offset <= curr_off) { | 2047 | if (offset <= curr_off) { |
2025 | /* This block is wholly outside the truncation point */ | 2048 | /* This block is wholly outside the truncation point */ |
2026 | lock_buffer(bh); | 2049 | lock_buffer(bh); |
2027 | may_free &= journal_unmap_buffer(journal, bh); | 2050 | may_free &= journal_unmap_buffer(journal, bh, |
2051 | offset > 0); | ||
2028 | unlock_buffer(bh); | 2052 | unlock_buffer(bh); |
2029 | } | 2053 | } |
2030 | curr_off = next_off; | 2054 | curr_off = next_off; |
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index af5280fb579b..3091d42992f0 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -1014,17 +1014,35 @@ restart_loop: | |||
1014 | * there's no point in keeping a checkpoint record for | 1014 | * there's no point in keeping a checkpoint record for |
1015 | * it. */ | 1015 | * it. */ |
1016 | 1016 | ||
1017 | /* A buffer which has been freed while still being | 1017 | /* |
1018 | * journaled by a previous transaction may end up still | 1018 | * A buffer which has been freed while still being journaled by |
1019 | * being dirty here, but we want to avoid writing back | 1019 | * a previous transaction. |
1020 | * that buffer in the future after the "add to orphan" | 1020 | */ |
1021 | * operation been committed, That's not only a performance | 1021 | if (buffer_freed(bh)) { |
1022 | * gain, it also stops aliasing problems if the buffer is | 1022 | /* |
1023 | * left behind for writeback and gets reallocated for another | 1023 | * If the running transaction is the one containing |
1024 | * use in a different page. */ | 1024 | * "add to orphan" operation (b_next_transaction != |
1025 | if (buffer_freed(bh) && !jh->b_next_transaction) { | 1025 | * NULL), we have to wait for that transaction to |
1026 | clear_buffer_freed(bh); | 1026 | * commit before we can really get rid of the buffer. |
1027 | clear_buffer_jbddirty(bh); | 1027 | * So just clear b_modified to not confuse transaction |
1028 | * credit accounting and refile the buffer to | ||
1029 | * BJ_Forget of the running transaction. If the just | ||
1030 | * committed transaction contains "add to orphan" | ||
1031 | * operation, we can completely invalidate the buffer | ||
1032 | * now. We are rather through in that since the | ||
1033 | * buffer may be still accessible when blocksize < | ||
1034 | * pagesize and it is attached to the last partial | ||
1035 | * page. | ||
1036 | */ | ||
1037 | jh->b_modified = 0; | ||
1038 | if (!jh->b_next_transaction) { | ||
1039 | clear_buffer_freed(bh); | ||
1040 | clear_buffer_jbddirty(bh); | ||
1041 | clear_buffer_mapped(bh); | ||
1042 | clear_buffer_new(bh); | ||
1043 | clear_buffer_req(bh); | ||
1044 | bh->b_bdev = NULL; | ||
1045 | } | ||
1028 | } | 1046 | } |
1029 | 1047 | ||
1030 | if (buffer_jbddirty(bh)) { | 1048 | if (buffer_jbddirty(bh)) { |
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 8625da27eccf..484b8d1c6cb6 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
@@ -1354,6 +1354,11 @@ static void jbd2_mark_journal_empty(journal_t *journal) | |||
1354 | 1354 | ||
1355 | BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); | 1355 | BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); |
1356 | read_lock(&journal->j_state_lock); | 1356 | read_lock(&journal->j_state_lock); |
1357 | /* Is it already empty? */ | ||
1358 | if (sb->s_start == 0) { | ||
1359 | read_unlock(&journal->j_state_lock); | ||
1360 | return; | ||
1361 | } | ||
1357 | jbd_debug(1, "JBD2: Marking journal as empty (seq %d)\n", | 1362 | jbd_debug(1, "JBD2: Marking journal as empty (seq %d)\n", |
1358 | journal->j_tail_sequence); | 1363 | journal->j_tail_sequence); |
1359 | 1364 | ||
@@ -1377,7 +1382,7 @@ static void jbd2_mark_journal_empty(journal_t *journal) | |||
1377 | * Update a journal's errno. Write updated superblock to disk waiting for IO | 1382 | * Update a journal's errno. Write updated superblock to disk waiting for IO |
1378 | * to complete. | 1383 | * to complete. |
1379 | */ | 1384 | */ |
1380 | static void jbd2_journal_update_sb_errno(journal_t *journal) | 1385 | void jbd2_journal_update_sb_errno(journal_t *journal) |
1381 | { | 1386 | { |
1382 | journal_superblock_t *sb = journal->j_superblock; | 1387 | journal_superblock_t *sb = journal->j_superblock; |
1383 | 1388 | ||
@@ -1390,6 +1395,7 @@ static void jbd2_journal_update_sb_errno(journal_t *journal) | |||
1390 | 1395 | ||
1391 | jbd2_write_superblock(journal, WRITE_SYNC); | 1396 | jbd2_write_superblock(journal, WRITE_SYNC); |
1392 | } | 1397 | } |
1398 | EXPORT_SYMBOL(jbd2_journal_update_sb_errno); | ||
1393 | 1399 | ||
1394 | /* | 1400 | /* |
1395 | * Read the superblock for a given journal, performing initial | 1401 | * Read the superblock for a given journal, performing initial |
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index 0131e4362534..626846bac32f 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c | |||
@@ -289,8 +289,11 @@ int jbd2_journal_recover(journal_t *journal) | |||
289 | if (!err) | 289 | if (!err) |
290 | err = err2; | 290 | err = err2; |
291 | /* Make sure all replayed data is on permanent storage */ | 291 | /* Make sure all replayed data is on permanent storage */ |
292 | if (journal->j_flags & JBD2_BARRIER) | 292 | if (journal->j_flags & JBD2_BARRIER) { |
293 | blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); | 293 | err2 = blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); |
294 | if (!err) | ||
295 | err = err2; | ||
296 | } | ||
294 | return err; | 297 | return err; |
295 | } | 298 | } |
296 | 299 | ||
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index fb1ab9533b67..a74ba4659549 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
@@ -1841,15 +1841,16 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) | |||
1841 | * We're outside-transaction here. Either or both of j_running_transaction | 1841 | * We're outside-transaction here. Either or both of j_running_transaction |
1842 | * and j_committing_transaction may be NULL. | 1842 | * and j_committing_transaction may be NULL. |
1843 | */ | 1843 | */ |
1844 | static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) | 1844 | static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh, |
1845 | int partial_page) | ||
1845 | { | 1846 | { |
1846 | transaction_t *transaction; | 1847 | transaction_t *transaction; |
1847 | struct journal_head *jh; | 1848 | struct journal_head *jh; |
1848 | int may_free = 1; | 1849 | int may_free = 1; |
1849 | int ret; | ||
1850 | 1850 | ||
1851 | BUFFER_TRACE(bh, "entry"); | 1851 | BUFFER_TRACE(bh, "entry"); |
1852 | 1852 | ||
1853 | retry: | ||
1853 | /* | 1854 | /* |
1854 | * It is safe to proceed here without the j_list_lock because the | 1855 | * It is safe to proceed here without the j_list_lock because the |
1855 | * buffers cannot be stolen by try_to_free_buffers as long as we are | 1856 | * buffers cannot be stolen by try_to_free_buffers as long as we are |
@@ -1878,10 +1879,18 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) | |||
1878 | * clear the buffer dirty bit at latest at the moment when the | 1879 | * clear the buffer dirty bit at latest at the moment when the |
1879 | * transaction marking the buffer as freed in the filesystem | 1880 | * transaction marking the buffer as freed in the filesystem |
1880 | * structures is committed because from that moment on the | 1881 | * structures is committed because from that moment on the |
1881 | * buffer can be reallocated and used by a different page. | 1882 | * block can be reallocated and used by a different page. |
1882 | * Since the block hasn't been freed yet but the inode has | 1883 | * Since the block hasn't been freed yet but the inode has |
1883 | * already been added to orphan list, it is safe for us to add | 1884 | * already been added to orphan list, it is safe for us to add |
1884 | * the buffer to BJ_Forget list of the newest transaction. | 1885 | * the buffer to BJ_Forget list of the newest transaction. |
1886 | * | ||
1887 | * Also we have to clear buffer_mapped flag of a truncated buffer | ||
1888 | * because the buffer_head may be attached to the page straddling | ||
1889 | * i_size (can happen only when blocksize < pagesize) and thus the | ||
1890 | * buffer_head can be reused when the file is extended again. So we end | ||
1891 | * up keeping around invalidated buffers attached to transactions' | ||
1892 | * BJ_Forget list just to stop checkpointing code from cleaning up | ||
1893 | * the transaction this buffer was modified in. | ||
1885 | */ | 1894 | */ |
1886 | transaction = jh->b_transaction; | 1895 | transaction = jh->b_transaction; |
1887 | if (transaction == NULL) { | 1896 | if (transaction == NULL) { |
@@ -1908,13 +1917,9 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) | |||
1908 | * committed, the buffer won't be needed any | 1917 | * committed, the buffer won't be needed any |
1909 | * longer. */ | 1918 | * longer. */ |
1910 | JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget"); | 1919 | JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget"); |
1911 | ret = __dispose_buffer(jh, | 1920 | may_free = __dispose_buffer(jh, |
1912 | journal->j_running_transaction); | 1921 | journal->j_running_transaction); |
1913 | jbd2_journal_put_journal_head(jh); | 1922 | goto zap_buffer; |
1914 | spin_unlock(&journal->j_list_lock); | ||
1915 | jbd_unlock_bh_state(bh); | ||
1916 | write_unlock(&journal->j_state_lock); | ||
1917 | return ret; | ||
1918 | } else { | 1923 | } else { |
1919 | /* There is no currently-running transaction. So the | 1924 | /* There is no currently-running transaction. So the |
1920 | * orphan record which we wrote for this file must have | 1925 | * orphan record which we wrote for this file must have |
@@ -1922,13 +1927,9 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) | |||
1922 | * the committing transaction, if it exists. */ | 1927 | * the committing transaction, if it exists. */ |
1923 | if (journal->j_committing_transaction) { | 1928 | if (journal->j_committing_transaction) { |
1924 | JBUFFER_TRACE(jh, "give to committing trans"); | 1929 | JBUFFER_TRACE(jh, "give to committing trans"); |
1925 | ret = __dispose_buffer(jh, | 1930 | may_free = __dispose_buffer(jh, |
1926 | journal->j_committing_transaction); | 1931 | journal->j_committing_transaction); |
1927 | jbd2_journal_put_journal_head(jh); | 1932 | goto zap_buffer; |
1928 | spin_unlock(&journal->j_list_lock); | ||
1929 | jbd_unlock_bh_state(bh); | ||
1930 | write_unlock(&journal->j_state_lock); | ||
1931 | return ret; | ||
1932 | } else { | 1933 | } else { |
1933 | /* The orphan record's transaction has | 1934 | /* The orphan record's transaction has |
1934 | * committed. We can cleanse this buffer */ | 1935 | * committed. We can cleanse this buffer */ |
@@ -1940,10 +1941,24 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) | |||
1940 | JBUFFER_TRACE(jh, "on committing transaction"); | 1941 | JBUFFER_TRACE(jh, "on committing transaction"); |
1941 | /* | 1942 | /* |
1942 | * The buffer is committing, we simply cannot touch | 1943 | * The buffer is committing, we simply cannot touch |
1943 | * it. So we just set j_next_transaction to the | 1944 | * it. If the page is straddling i_size we have to wait |
1944 | * running transaction (if there is one) and mark | 1945 | * for commit and try again. |
1945 | * buffer as freed so that commit code knows it should | 1946 | */ |
1946 | * clear dirty bits when it is done with the buffer. | 1947 | if (partial_page) { |
1948 | tid_t tid = journal->j_committing_transaction->t_tid; | ||
1949 | |||
1950 | jbd2_journal_put_journal_head(jh); | ||
1951 | spin_unlock(&journal->j_list_lock); | ||
1952 | jbd_unlock_bh_state(bh); | ||
1953 | write_unlock(&journal->j_state_lock); | ||
1954 | jbd2_log_wait_commit(journal, tid); | ||
1955 | goto retry; | ||
1956 | } | ||
1957 | /* | ||
1958 | * OK, buffer won't be reachable after truncate. We just set | ||
1959 | * j_next_transaction to the running transaction (if there is | ||
1960 | * one) and mark buffer as freed so that commit code knows it | ||
1961 | * should clear dirty bits when it is done with the buffer. | ||
1947 | */ | 1962 | */ |
1948 | set_buffer_freed(bh); | 1963 | set_buffer_freed(bh); |
1949 | if (journal->j_running_transaction && buffer_jbddirty(bh)) | 1964 | if (journal->j_running_transaction && buffer_jbddirty(bh)) |
@@ -1966,6 +1981,15 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) | |||
1966 | } | 1981 | } |
1967 | 1982 | ||
1968 | zap_buffer: | 1983 | zap_buffer: |
1984 | /* | ||
1985 | * This is tricky. Although the buffer is truncated, it may be reused | ||
1986 | * if blocksize < pagesize and it is attached to the page straddling | ||
1987 | * EOF. Since the buffer might have been added to BJ_Forget list of the | ||
1988 | * running transaction, journal_get_write_access() won't clear | ||
1989 | * b_modified and credit accounting gets confused. So clear b_modified | ||
1990 | * here. | ||
1991 | */ | ||
1992 | jh->b_modified = 0; | ||
1969 | jbd2_journal_put_journal_head(jh); | 1993 | jbd2_journal_put_journal_head(jh); |
1970 | zap_buffer_no_jh: | 1994 | zap_buffer_no_jh: |
1971 | spin_unlock(&journal->j_list_lock); | 1995 | spin_unlock(&journal->j_list_lock); |
@@ -2017,7 +2041,8 @@ void jbd2_journal_invalidatepage(journal_t *journal, | |||
2017 | if (offset <= curr_off) { | 2041 | if (offset <= curr_off) { |
2018 | /* This block is wholly outside the truncation point */ | 2042 | /* This block is wholly outside the truncation point */ |
2019 | lock_buffer(bh); | 2043 | lock_buffer(bh); |
2020 | may_free &= journal_unmap_buffer(journal, bh); | 2044 | may_free &= journal_unmap_buffer(journal, bh, |
2045 | offset > 0); | ||
2021 | unlock_buffer(bh); | 2046 | unlock_buffer(bh); |
2022 | } | 2047 | } |
2023 | curr_off = next_off; | 2048 | curr_off = next_off; |
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c index 922f146e4235..223283c30111 100644 --- a/fs/jffs2/acl.c +++ b/fs/jffs2/acl.c | |||
@@ -94,15 +94,23 @@ static struct posix_acl *jffs2_acl_from_medium(void *value, size_t size) | |||
94 | case ACL_MASK: | 94 | case ACL_MASK: |
95 | case ACL_OTHER: | 95 | case ACL_OTHER: |
96 | value += sizeof(struct jffs2_acl_entry_short); | 96 | value += sizeof(struct jffs2_acl_entry_short); |
97 | acl->a_entries[i].e_id = ACL_UNDEFINED_ID; | ||
98 | break; | 97 | break; |
99 | 98 | ||
100 | case ACL_USER: | 99 | case ACL_USER: |
100 | value += sizeof(struct jffs2_acl_entry); | ||
101 | if (value > end) | ||
102 | goto fail; | ||
103 | acl->a_entries[i].e_uid = | ||
104 | make_kuid(&init_user_ns, | ||
105 | je32_to_cpu(entry->e_id)); | ||
106 | break; | ||
101 | case ACL_GROUP: | 107 | case ACL_GROUP: |
102 | value += sizeof(struct jffs2_acl_entry); | 108 | value += sizeof(struct jffs2_acl_entry); |
103 | if (value > end) | 109 | if (value > end) |
104 | goto fail; | 110 | goto fail; |
105 | acl->a_entries[i].e_id = je32_to_cpu(entry->e_id); | 111 | acl->a_entries[i].e_gid = |
112 | make_kgid(&init_user_ns, | ||
113 | je32_to_cpu(entry->e_id)); | ||
106 | break; | 114 | break; |
107 | 115 | ||
108 | default: | 116 | default: |
@@ -131,13 +139,19 @@ static void *jffs2_acl_to_medium(const struct posix_acl *acl, size_t *size) | |||
131 | header->a_version = cpu_to_je32(JFFS2_ACL_VERSION); | 139 | header->a_version = cpu_to_je32(JFFS2_ACL_VERSION); |
132 | e = header + 1; | 140 | e = header + 1; |
133 | for (i=0; i < acl->a_count; i++) { | 141 | for (i=0; i < acl->a_count; i++) { |
142 | const struct posix_acl_entry *acl_e = &acl->a_entries[i]; | ||
134 | entry = e; | 143 | entry = e; |
135 | entry->e_tag = cpu_to_je16(acl->a_entries[i].e_tag); | 144 | entry->e_tag = cpu_to_je16(acl_e->e_tag); |
136 | entry->e_perm = cpu_to_je16(acl->a_entries[i].e_perm); | 145 | entry->e_perm = cpu_to_je16(acl_e->e_perm); |
137 | switch(acl->a_entries[i].e_tag) { | 146 | switch(acl_e->e_tag) { |
138 | case ACL_USER: | 147 | case ACL_USER: |
148 | entry->e_id = cpu_to_je32( | ||
149 | from_kuid(&init_user_ns, acl_e->e_uid)); | ||
150 | e += sizeof(struct jffs2_acl_entry); | ||
151 | break; | ||
139 | case ACL_GROUP: | 152 | case ACL_GROUP: |
140 | entry->e_id = cpu_to_je32(acl->a_entries[i].e_id); | 153 | entry->e_id = cpu_to_je32( |
154 | from_kgid(&init_user_ns, acl_e->e_gid)); | ||
141 | e += sizeof(struct jffs2_acl_entry); | 155 | e += sizeof(struct jffs2_acl_entry); |
142 | break; | 156 | break; |
143 | 157 | ||
@@ -363,7 +377,7 @@ static int jffs2_acl_getxattr(struct dentry *dentry, const char *name, | |||
363 | return PTR_ERR(acl); | 377 | return PTR_ERR(acl); |
364 | if (!acl) | 378 | if (!acl) |
365 | return -ENODATA; | 379 | return -ENODATA; |
366 | rc = posix_acl_to_xattr(acl, buffer, size); | 380 | rc = posix_acl_to_xattr(&init_user_ns, acl, buffer, size); |
367 | posix_acl_release(acl); | 381 | posix_acl_release(acl); |
368 | 382 | ||
369 | return rc; | 383 | return rc; |
@@ -381,7 +395,7 @@ static int jffs2_acl_setxattr(struct dentry *dentry, const char *name, | |||
381 | return -EPERM; | 395 | return -EPERM; |
382 | 396 | ||
383 | if (value) { | 397 | if (value) { |
384 | acl = posix_acl_from_xattr(value, size); | 398 | acl = posix_acl_from_xattr(&init_user_ns, value, size); |
385 | if (IS_ERR(acl)) | 399 | if (IS_ERR(acl)) |
386 | return PTR_ERR(acl); | 400 | return PTR_ERR(acl); |
387 | if (acl) { | 401 | if (acl) { |
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c index db3889ba8818..60ef3fb707ff 100644 --- a/fs/jffs2/file.c +++ b/fs/jffs2/file.c | |||
@@ -175,8 +175,8 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, | |||
175 | ri.ino = cpu_to_je32(f->inocache->ino); | 175 | ri.ino = cpu_to_je32(f->inocache->ino); |
176 | ri.version = cpu_to_je32(++f->highest_version); | 176 | ri.version = cpu_to_je32(++f->highest_version); |
177 | ri.mode = cpu_to_jemode(inode->i_mode); | 177 | ri.mode = cpu_to_jemode(inode->i_mode); |
178 | ri.uid = cpu_to_je16(inode->i_uid); | 178 | ri.uid = cpu_to_je16(i_uid_read(inode)); |
179 | ri.gid = cpu_to_je16(inode->i_gid); | 179 | ri.gid = cpu_to_je16(i_gid_read(inode)); |
180 | ri.isize = cpu_to_je32(max((uint32_t)inode->i_size, pageofs)); | 180 | ri.isize = cpu_to_je32(max((uint32_t)inode->i_size, pageofs)); |
181 | ri.atime = ri.ctime = ri.mtime = cpu_to_je32(get_seconds()); | 181 | ri.atime = ri.ctime = ri.mtime = cpu_to_je32(get_seconds()); |
182 | ri.offset = cpu_to_je32(inode->i_size); | 182 | ri.offset = cpu_to_je32(inode->i_size); |
@@ -283,8 +283,8 @@ static int jffs2_write_end(struct file *filp, struct address_space *mapping, | |||
283 | /* Set the fields that the generic jffs2_write_inode_range() code can't find */ | 283 | /* Set the fields that the generic jffs2_write_inode_range() code can't find */ |
284 | ri->ino = cpu_to_je32(inode->i_ino); | 284 | ri->ino = cpu_to_je32(inode->i_ino); |
285 | ri->mode = cpu_to_jemode(inode->i_mode); | 285 | ri->mode = cpu_to_jemode(inode->i_mode); |
286 | ri->uid = cpu_to_je16(inode->i_uid); | 286 | ri->uid = cpu_to_je16(i_uid_read(inode)); |
287 | ri->gid = cpu_to_je16(inode->i_gid); | 287 | ri->gid = cpu_to_je16(i_gid_read(inode)); |
288 | ri->isize = cpu_to_je32((uint32_t)inode->i_size); | 288 | ri->isize = cpu_to_je32((uint32_t)inode->i_size); |
289 | ri->atime = ri->ctime = ri->mtime = cpu_to_je32(get_seconds()); | 289 | ri->atime = ri->ctime = ri->mtime = cpu_to_je32(get_seconds()); |
290 | 290 | ||
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index 3d3092eda811..fe3c0527545f 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c | |||
@@ -99,8 +99,10 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr) | |||
99 | ri->ino = cpu_to_je32(inode->i_ino); | 99 | ri->ino = cpu_to_je32(inode->i_ino); |
100 | ri->version = cpu_to_je32(++f->highest_version); | 100 | ri->version = cpu_to_je32(++f->highest_version); |
101 | 101 | ||
102 | ri->uid = cpu_to_je16((ivalid & ATTR_UID)?iattr->ia_uid:inode->i_uid); | 102 | ri->uid = cpu_to_je16((ivalid & ATTR_UID)? |
103 | ri->gid = cpu_to_je16((ivalid & ATTR_GID)?iattr->ia_gid:inode->i_gid); | 103 | from_kuid(&init_user_ns, iattr->ia_uid):i_uid_read(inode)); |
104 | ri->gid = cpu_to_je16((ivalid & ATTR_GID)? | ||
105 | from_kgid(&init_user_ns, iattr->ia_gid):i_gid_read(inode)); | ||
104 | 106 | ||
105 | if (ivalid & ATTR_MODE) | 107 | if (ivalid & ATTR_MODE) |
106 | ri->mode = cpu_to_jemode(iattr->ia_mode); | 108 | ri->mode = cpu_to_jemode(iattr->ia_mode); |
@@ -147,8 +149,8 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr) | |||
147 | inode->i_ctime = ITIME(je32_to_cpu(ri->ctime)); | 149 | inode->i_ctime = ITIME(je32_to_cpu(ri->ctime)); |
148 | inode->i_mtime = ITIME(je32_to_cpu(ri->mtime)); | 150 | inode->i_mtime = ITIME(je32_to_cpu(ri->mtime)); |
149 | inode->i_mode = jemode_to_cpu(ri->mode); | 151 | inode->i_mode = jemode_to_cpu(ri->mode); |
150 | inode->i_uid = je16_to_cpu(ri->uid); | 152 | i_uid_write(inode, je16_to_cpu(ri->uid)); |
151 | inode->i_gid = je16_to_cpu(ri->gid); | 153 | i_gid_write(inode, je16_to_cpu(ri->gid)); |
152 | 154 | ||
153 | 155 | ||
154 | old_metadata = f->metadata; | 156 | old_metadata = f->metadata; |
@@ -276,8 +278,8 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino) | |||
276 | return ERR_PTR(ret); | 278 | return ERR_PTR(ret); |
277 | } | 279 | } |
278 | inode->i_mode = jemode_to_cpu(latest_node.mode); | 280 | inode->i_mode = jemode_to_cpu(latest_node.mode); |
279 | inode->i_uid = je16_to_cpu(latest_node.uid); | 281 | i_uid_write(inode, je16_to_cpu(latest_node.uid)); |
280 | inode->i_gid = je16_to_cpu(latest_node.gid); | 282 | i_gid_write(inode, je16_to_cpu(latest_node.gid)); |
281 | inode->i_size = je32_to_cpu(latest_node.isize); | 283 | inode->i_size = je32_to_cpu(latest_node.isize); |
282 | inode->i_atime = ITIME(je32_to_cpu(latest_node.atime)); | 284 | inode->i_atime = ITIME(je32_to_cpu(latest_node.atime)); |
283 | inode->i_mtime = ITIME(je32_to_cpu(latest_node.mtime)); | 285 | inode->i_mtime = ITIME(je32_to_cpu(latest_node.mtime)); |
@@ -440,14 +442,14 @@ struct inode *jffs2_new_inode (struct inode *dir_i, umode_t mode, struct jffs2_r | |||
440 | 442 | ||
441 | memset(ri, 0, sizeof(*ri)); | 443 | memset(ri, 0, sizeof(*ri)); |
442 | /* Set OS-specific defaults for new inodes */ | 444 | /* Set OS-specific defaults for new inodes */ |
443 | ri->uid = cpu_to_je16(current_fsuid()); | 445 | ri->uid = cpu_to_je16(from_kuid(&init_user_ns, current_fsuid())); |
444 | 446 | ||
445 | if (dir_i->i_mode & S_ISGID) { | 447 | if (dir_i->i_mode & S_ISGID) { |
446 | ri->gid = cpu_to_je16(dir_i->i_gid); | 448 | ri->gid = cpu_to_je16(i_gid_read(dir_i)); |
447 | if (S_ISDIR(mode)) | 449 | if (S_ISDIR(mode)) |
448 | mode |= S_ISGID; | 450 | mode |= S_ISGID; |
449 | } else { | 451 | } else { |
450 | ri->gid = cpu_to_je16(current_fsgid()); | 452 | ri->gid = cpu_to_je16(from_kgid(&init_user_ns, current_fsgid())); |
451 | } | 453 | } |
452 | 454 | ||
453 | /* POSIX ACLs have to be processed now, at least partly. | 455 | /* POSIX ACLs have to be processed now, at least partly. |
@@ -467,8 +469,8 @@ struct inode *jffs2_new_inode (struct inode *dir_i, umode_t mode, struct jffs2_r | |||
467 | set_nlink(inode, 1); | 469 | set_nlink(inode, 1); |
468 | inode->i_ino = je32_to_cpu(ri->ino); | 470 | inode->i_ino = je32_to_cpu(ri->ino); |
469 | inode->i_mode = jemode_to_cpu(ri->mode); | 471 | inode->i_mode = jemode_to_cpu(ri->mode); |
470 | inode->i_gid = je16_to_cpu(ri->gid); | 472 | i_gid_write(inode, je16_to_cpu(ri->gid)); |
471 | inode->i_uid = je16_to_cpu(ri->uid); | 473 | i_uid_write(inode, je16_to_cpu(ri->uid)); |
472 | inode->i_atime = inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC; | 474 | inode->i_atime = inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC; |
473 | ri->atime = ri->mtime = ri->ctime = cpu_to_je32(I_SEC(inode->i_mtime)); | 475 | ri->atime = ri->mtime = ri->ctime = cpu_to_je32(I_SEC(inode->i_mtime)); |
474 | 476 | ||
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h index bcd983d7e7f9..d200a9b8fd5e 100644 --- a/fs/jffs2/os-linux.h +++ b/fs/jffs2/os-linux.h | |||
@@ -27,8 +27,8 @@ struct kvec; | |||
27 | 27 | ||
28 | #define JFFS2_F_I_SIZE(f) (OFNI_EDONI_2SFFJ(f)->i_size) | 28 | #define JFFS2_F_I_SIZE(f) (OFNI_EDONI_2SFFJ(f)->i_size) |
29 | #define JFFS2_F_I_MODE(f) (OFNI_EDONI_2SFFJ(f)->i_mode) | 29 | #define JFFS2_F_I_MODE(f) (OFNI_EDONI_2SFFJ(f)->i_mode) |
30 | #define JFFS2_F_I_UID(f) (OFNI_EDONI_2SFFJ(f)->i_uid) | 30 | #define JFFS2_F_I_UID(f) (i_uid_read(OFNI_EDONI_2SFFJ(f))) |
31 | #define JFFS2_F_I_GID(f) (OFNI_EDONI_2SFFJ(f)->i_gid) | 31 | #define JFFS2_F_I_GID(f) (i_gid_read(OFNI_EDONI_2SFFJ(f))) |
32 | #define JFFS2_F_I_RDEV(f) (OFNI_EDONI_2SFFJ(f)->i_rdev) | 32 | #define JFFS2_F_I_RDEV(f) (OFNI_EDONI_2SFFJ(f)->i_rdev) |
33 | 33 | ||
34 | #define ITIME(sec) ((struct timespec){sec, 0}) | 34 | #define ITIME(sec) ((struct timespec){sec, 0}) |
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c index 1ea349fff68b..ae81b01e6fd7 100644 --- a/fs/jffs2/readinode.c +++ b/fs/jffs2/readinode.c | |||
@@ -394,8 +394,11 @@ static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c, | |||
394 | } | 394 | } |
395 | 395 | ||
396 | /* Trivial function to remove the last node in the tree. Which by definition | 396 | /* Trivial function to remove the last node in the tree. Which by definition |
397 | has no right-hand -- so can be removed just by making its only child (if | 397 | has no right-hand child — so can be removed just by making its left-hand |
398 | any) take its place under its parent. */ | 398 | child (if any) take its place under its parent. Since this is only done |
399 | when we're consuming the whole tree, there's no need to use rb_erase() | ||
400 | and let it worry about adjusting colours and balancing the tree. That | ||
401 | would just be a waste of time. */ | ||
399 | static void eat_last(struct rb_root *root, struct rb_node *node) | 402 | static void eat_last(struct rb_root *root, struct rb_node *node) |
400 | { | 403 | { |
401 | struct rb_node *parent = rb_parent(node); | 404 | struct rb_node *parent = rb_parent(node); |
@@ -412,12 +415,12 @@ static void eat_last(struct rb_root *root, struct rb_node *node) | |||
412 | link = &parent->rb_right; | 415 | link = &parent->rb_right; |
413 | 416 | ||
414 | *link = node->rb_left; | 417 | *link = node->rb_left; |
415 | /* Colour doesn't matter now. Only the parent pointer. */ | ||
416 | if (node->rb_left) | 418 | if (node->rb_left) |
417 | node->rb_left->rb_parent_color = node->rb_parent_color; | 419 | node->rb_left->__rb_parent_color = node->__rb_parent_color; |
418 | } | 420 | } |
419 | 421 | ||
420 | /* We put this in reverse order, so we can just use eat_last */ | 422 | /* We put the version tree in reverse order, so we can use the same eat_last() |
423 | function that we use to consume the tmpnode tree (tn_root). */ | ||
421 | static void ver_insert(struct rb_root *ver_root, struct jffs2_tmp_dnode_info *tn) | 424 | static void ver_insert(struct rb_root *ver_root, struct jffs2_tmp_dnode_info *tn) |
422 | { | 425 | { |
423 | struct rb_node **link = &ver_root->rb_node; | 426 | struct rb_node **link = &ver_root->rb_node; |
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 1224d6b48e7e..d3d8799e2187 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c | |||
@@ -422,6 +422,12 @@ static void __exit exit_jffs2_fs(void) | |||
422 | unregister_filesystem(&jffs2_fs_type); | 422 | unregister_filesystem(&jffs2_fs_type); |
423 | jffs2_destroy_slab_caches(); | 423 | jffs2_destroy_slab_caches(); |
424 | jffs2_compressors_exit(); | 424 | jffs2_compressors_exit(); |
425 | |||
426 | /* | ||
427 | * Make sure all delayed rcu free inodes are flushed before we | ||
428 | * destroy cache. | ||
429 | */ | ||
430 | rcu_barrier(); | ||
425 | kmem_cache_destroy(jffs2_inode_cachep); | 431 | kmem_cache_destroy(jffs2_inode_cachep); |
426 | } | 432 | } |
427 | 433 | ||
diff --git a/fs/jfs/Makefile b/fs/jfs/Makefile index a58fa72d7e59..d20d4737b3ef 100644 --- a/fs/jfs/Makefile +++ b/fs/jfs/Makefile | |||
@@ -6,7 +6,7 @@ obj-$(CONFIG_JFS_FS) += jfs.o | |||
6 | 6 | ||
7 | jfs-y := super.o file.o inode.o namei.o jfs_mount.o jfs_umount.o \ | 7 | jfs-y := super.o file.o inode.o namei.o jfs_mount.o jfs_umount.o \ |
8 | jfs_xtree.o jfs_imap.o jfs_debug.o jfs_dmap.o \ | 8 | jfs_xtree.o jfs_imap.o jfs_debug.o jfs_dmap.o \ |
9 | jfs_unicode.o jfs_dtree.o jfs_inode.o \ | 9 | jfs_unicode.o jfs_dtree.o jfs_inode.o jfs_discard.o \ |
10 | jfs_extent.o symlink.o jfs_metapage.o \ | 10 | jfs_extent.o symlink.o jfs_metapage.o \ |
11 | jfs_logmgr.o jfs_txnmgr.o jfs_uniupr.o \ | 11 | jfs_logmgr.o jfs_txnmgr.o jfs_uniupr.o \ |
12 | resize.o xattr.o ioctl.o | 12 | resize.o xattr.o ioctl.o |
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c index 45559dc3ea2f..d254d6d35995 100644 --- a/fs/jfs/acl.c +++ b/fs/jfs/acl.c | |||
@@ -64,7 +64,7 @@ struct posix_acl *jfs_get_acl(struct inode *inode, int type) | |||
64 | else | 64 | else |
65 | acl = ERR_PTR(size); | 65 | acl = ERR_PTR(size); |
66 | } else { | 66 | } else { |
67 | acl = posix_acl_from_xattr(value, size); | 67 | acl = posix_acl_from_xattr(&init_user_ns, value, size); |
68 | } | 68 | } |
69 | kfree(value); | 69 | kfree(value); |
70 | if (!IS_ERR(acl)) | 70 | if (!IS_ERR(acl)) |
@@ -100,7 +100,7 @@ static int jfs_set_acl(tid_t tid, struct inode *inode, int type, | |||
100 | value = kmalloc(size, GFP_KERNEL); | 100 | value = kmalloc(size, GFP_KERNEL); |
101 | if (!value) | 101 | if (!value) |
102 | return -ENOMEM; | 102 | return -ENOMEM; |
103 | rc = posix_acl_to_xattr(acl, value, size); | 103 | rc = posix_acl_to_xattr(&init_user_ns, acl, value, size); |
104 | if (rc < 0) | 104 | if (rc < 0) |
105 | goto out; | 105 | goto out; |
106 | } | 106 | } |
diff --git a/fs/jfs/file.c b/fs/jfs/file.c index 844f9460cb11..9d3afd157f99 100644 --- a/fs/jfs/file.c +++ b/fs/jfs/file.c | |||
@@ -108,8 +108,8 @@ int jfs_setattr(struct dentry *dentry, struct iattr *iattr) | |||
108 | 108 | ||
109 | if (is_quota_modification(inode, iattr)) | 109 | if (is_quota_modification(inode, iattr)) |
110 | dquot_initialize(inode); | 110 | dquot_initialize(inode); |
111 | if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) || | 111 | if ((iattr->ia_valid & ATTR_UID && !uid_eq(iattr->ia_uid, inode->i_uid)) || |
112 | (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) { | 112 | (iattr->ia_valid & ATTR_GID && !gid_eq(iattr->ia_gid, inode->i_gid))) { |
113 | rc = dquot_transfer(inode, iattr); | 113 | rc = dquot_transfer(inode, iattr); |
114 | if (rc) | 114 | if (rc) |
115 | return rc; | 115 | return rc; |
diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c index f19d1e04a374..bc555ff417e9 100644 --- a/fs/jfs/ioctl.c +++ b/fs/jfs/ioctl.c | |||
@@ -11,13 +11,17 @@ | |||
11 | #include <linux/mount.h> | 11 | #include <linux/mount.h> |
12 | #include <linux/time.h> | 12 | #include <linux/time.h> |
13 | #include <linux/sched.h> | 13 | #include <linux/sched.h> |
14 | #include <linux/blkdev.h> | ||
14 | #include <asm/current.h> | 15 | #include <asm/current.h> |
15 | #include <asm/uaccess.h> | 16 | #include <asm/uaccess.h> |
16 | 17 | ||
18 | #include "jfs_filsys.h" | ||
19 | #include "jfs_debug.h" | ||
17 | #include "jfs_incore.h" | 20 | #include "jfs_incore.h" |
18 | #include "jfs_dinode.h" | 21 | #include "jfs_dinode.h" |
19 | #include "jfs_inode.h" | 22 | #include "jfs_inode.h" |
20 | 23 | #include "jfs_dmap.h" | |
24 | #include "jfs_discard.h" | ||
21 | 25 | ||
22 | static struct { | 26 | static struct { |
23 | long jfs_flag; | 27 | long jfs_flag; |
@@ -123,6 +127,40 @@ setflags_out: | |||
123 | mnt_drop_write_file(filp); | 127 | mnt_drop_write_file(filp); |
124 | return err; | 128 | return err; |
125 | } | 129 | } |
130 | |||
131 | case FITRIM: | ||
132 | { | ||
133 | struct super_block *sb = inode->i_sb; | ||
134 | struct request_queue *q = bdev_get_queue(sb->s_bdev); | ||
135 | struct fstrim_range range; | ||
136 | s64 ret = 0; | ||
137 | |||
138 | if (!capable(CAP_SYS_ADMIN)) | ||
139 | return -EPERM; | ||
140 | |||
141 | if (!blk_queue_discard(q)) { | ||
142 | jfs_warn("FITRIM not supported on device"); | ||
143 | return -EOPNOTSUPP; | ||
144 | } | ||
145 | |||
146 | if (copy_from_user(&range, (struct fstrim_range __user *)arg, | ||
147 | sizeof(range))) | ||
148 | return -EFAULT; | ||
149 | |||
150 | range.minlen = max_t(unsigned int, range.minlen, | ||
151 | q->limits.discard_granularity); | ||
152 | |||
153 | ret = jfs_ioc_trim(inode, &range); | ||
154 | if (ret < 0) | ||
155 | return ret; | ||
156 | |||
157 | if (copy_to_user((struct fstrim_range __user *)arg, &range, | ||
158 | sizeof(range))) | ||
159 | return -EFAULT; | ||
160 | |||
161 | return 0; | ||
162 | } | ||
163 | |||
126 | default: | 164 | default: |
127 | return -ENOTTY; | 165 | return -ENOTTY; |
128 | } | 166 | } |
@@ -142,6 +180,9 @@ long jfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
142 | case JFS_IOC_SETFLAGS32: | 180 | case JFS_IOC_SETFLAGS32: |
143 | cmd = JFS_IOC_SETFLAGS; | 181 | cmd = JFS_IOC_SETFLAGS; |
144 | break; | 182 | break; |
183 | case FITRIM: | ||
184 | cmd = FITRIM; | ||
185 | break; | ||
145 | } | 186 | } |
146 | return jfs_ioctl(filp, cmd, arg); | 187 | return jfs_ioctl(filp, cmd, arg); |
147 | } | 188 | } |
diff --git a/fs/jfs/jfs_discard.c b/fs/jfs/jfs_discard.c new file mode 100644 index 000000000000..9947563e4175 --- /dev/null +++ b/fs/jfs/jfs_discard.c | |||
@@ -0,0 +1,117 @@ | |||
1 | /* | ||
2 | * Copyright (C) Tino Reichardt, 2012 | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | ||
12 | * the GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
17 | */ | ||
18 | |||
19 | #include <linux/fs.h> | ||
20 | #include <linux/slab.h> | ||
21 | #include <linux/blkdev.h> | ||
22 | |||
23 | #include "jfs_incore.h" | ||
24 | #include "jfs_superblock.h" | ||
25 | #include "jfs_discard.h" | ||
26 | #include "jfs_dmap.h" | ||
27 | #include "jfs_debug.h" | ||
28 | |||
29 | |||
30 | /* | ||
31 | * NAME: jfs_issue_discard() | ||
32 | * | ||
33 | * FUNCTION: TRIM the specified block range on device, if supported | ||
34 | * | ||
35 | * PARAMETERS: | ||
36 | * ip - pointer to in-core inode | ||
37 | * blkno - starting block number to be trimmed (0..N) | ||
38 | * nblocks - number of blocks to be trimmed | ||
39 | * | ||
40 | * RETURN VALUES: | ||
41 | * none | ||
42 | * | ||
43 | * serialization: IREAD_LOCK(ipbmap) held on entry/exit; | ||
44 | */ | ||
45 | void jfs_issue_discard(struct inode *ip, u64 blkno, u64 nblocks) | ||
46 | { | ||
47 | struct super_block *sb = ip->i_sb; | ||
48 | int r = 0; | ||
49 | |||
50 | r = sb_issue_discard(sb, blkno, nblocks, GFP_NOFS, 0); | ||
51 | if (unlikely(r != 0)) { | ||
52 | jfs_err("JFS: sb_issue_discard" \ | ||
53 | "(%p, %llu, %llu, GFP_NOFS, 0) = %d => failed!\n", | ||
54 | sb, (unsigned long long)blkno, | ||
55 | (unsigned long long)nblocks, r); | ||
56 | } | ||
57 | |||
58 | jfs_info("JFS: sb_issue_discard" \ | ||
59 | "(%p, %llu, %llu, GFP_NOFS, 0) = %d\n", | ||
60 | sb, (unsigned long long)blkno, | ||
61 | (unsigned long long)nblocks, r); | ||
62 | |||
63 | return; | ||
64 | } | ||
65 | |||
66 | /* | ||
67 | * NAME: jfs_ioc_trim() | ||
68 | * | ||
69 | * FUNCTION: attempt to discard (TRIM) all free blocks from the | ||
70 | * filesystem. | ||
71 | * | ||
72 | * PARAMETERS: | ||
73 | * ip - pointer to in-core inode; | ||
74 | * range - the range, given by user space | ||
75 | * | ||
76 | * RETURN VALUES: | ||
77 | * 0 - success | ||
78 | * -EIO - i/o error | ||
79 | */ | ||
80 | int jfs_ioc_trim(struct inode *ip, struct fstrim_range *range) | ||
81 | { | ||
82 | struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; | ||
83 | struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap; | ||
84 | struct super_block *sb = ipbmap->i_sb; | ||
85 | int agno, agno_end; | ||
86 | s64 start, end, minlen; | ||
87 | u64 trimmed = 0; | ||
88 | |||
89 | /** | ||
90 | * convert byte values to block size of filesystem: | ||
91 | * start: First Byte to trim | ||
92 | * len: number of Bytes to trim from start | ||
93 | * minlen: minimum extent length in Bytes | ||
94 | */ | ||
95 | start = range->start >> sb->s_blocksize_bits; | ||
96 | if (start < 0) | ||
97 | start = 0; | ||
98 | end = start + (range->len >> sb->s_blocksize_bits) - 1; | ||
99 | if (end >= bmp->db_mapsize) | ||
100 | end = bmp->db_mapsize - 1; | ||
101 | minlen = range->minlen >> sb->s_blocksize_bits; | ||
102 | if (minlen <= 0) | ||
103 | minlen = 1; | ||
104 | |||
105 | /** | ||
106 | * we trim all ag's within the range | ||
107 | */ | ||
108 | agno = BLKTOAG(start, JFS_SBI(ip->i_sb)); | ||
109 | agno_end = BLKTOAG(end, JFS_SBI(ip->i_sb)); | ||
110 | while (agno <= agno_end) { | ||
111 | trimmed += dbDiscardAG(ip, agno, minlen); | ||
112 | agno++; | ||
113 | } | ||
114 | range->len = trimmed << sb->s_blocksize_bits; | ||
115 | |||
116 | return 0; | ||
117 | } | ||
diff --git a/fs/jfs/jfs_discard.h b/fs/jfs/jfs_discard.h new file mode 100644 index 000000000000..40d1ee6081a0 --- /dev/null +++ b/fs/jfs/jfs_discard.h | |||
@@ -0,0 +1,26 @@ | |||
1 | /* | ||
2 | * Copyright (C) Tino Reichardt, 2012 | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | ||
12 | * the GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
17 | */ | ||
18 | #ifndef _H_JFS_DISCARD | ||
19 | #define _H_JFS_DISCARD | ||
20 | |||
21 | struct fstrim_range; | ||
22 | |||
23 | extern void jfs_issue_discard(struct inode *ip, u64 blkno, u64 nblocks); | ||
24 | extern int jfs_ioc_trim(struct inode *ip, struct fstrim_range *range); | ||
25 | |||
26 | #endif /* _H_JFS_DISCARD */ | ||
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index 9cbd11a3f804..9a55f53be5ff 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c | |||
@@ -1,5 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) International Business Machines Corp., 2000-2004 | 2 | * Copyright (C) International Business Machines Corp., 2000-2004 |
3 | * Portions Copyright (C) Tino Reichardt, 2012 | ||
3 | * | 4 | * |
4 | * This program is free software; you can redistribute it and/or modify | 5 | * This program is free software; you can redistribute it and/or modify |
5 | * it under the terms of the GNU General Public License as published by | 6 | * it under the terms of the GNU General Public License as published by |
@@ -25,6 +26,7 @@ | |||
25 | #include "jfs_lock.h" | 26 | #include "jfs_lock.h" |
26 | #include "jfs_metapage.h" | 27 | #include "jfs_metapage.h" |
27 | #include "jfs_debug.h" | 28 | #include "jfs_debug.h" |
29 | #include "jfs_discard.h" | ||
28 | 30 | ||
29 | /* | 31 | /* |
30 | * SERIALIZATION of the Block Allocation Map. | 32 | * SERIALIZATION of the Block Allocation Map. |
@@ -104,7 +106,6 @@ static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno, | |||
104 | static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno, | 106 | static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno, |
105 | int nblocks); | 107 | int nblocks); |
106 | static int dbMaxBud(u8 * cp); | 108 | static int dbMaxBud(u8 * cp); |
107 | s64 dbMapFileSizeToMapSize(struct inode *ipbmap); | ||
108 | static int blkstol2(s64 nb); | 109 | static int blkstol2(s64 nb); |
109 | 110 | ||
110 | static int cntlz(u32 value); | 111 | static int cntlz(u32 value); |
@@ -145,7 +146,6 @@ static const s8 budtab[256] = { | |||
145 | 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, -1 | 146 | 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, -1 |
146 | }; | 147 | }; |
147 | 148 | ||
148 | |||
149 | /* | 149 | /* |
150 | * NAME: dbMount() | 150 | * NAME: dbMount() |
151 | * | 151 | * |
@@ -310,7 +310,6 @@ int dbSync(struct inode *ipbmap) | |||
310 | return (0); | 310 | return (0); |
311 | } | 311 | } |
312 | 312 | ||
313 | |||
314 | /* | 313 | /* |
315 | * NAME: dbFree() | 314 | * NAME: dbFree() |
316 | * | 315 | * |
@@ -337,6 +336,7 @@ int dbFree(struct inode *ip, s64 blkno, s64 nblocks) | |||
337 | s64 lblkno, rem; | 336 | s64 lblkno, rem; |
338 | struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; | 337 | struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; |
339 | struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap; | 338 | struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap; |
339 | struct super_block *sb = ipbmap->i_sb; | ||
340 | 340 | ||
341 | IREAD_LOCK(ipbmap, RDWRLOCK_DMAP); | 341 | IREAD_LOCK(ipbmap, RDWRLOCK_DMAP); |
342 | 342 | ||
@@ -351,6 +351,13 @@ int dbFree(struct inode *ip, s64 blkno, s64 nblocks) | |||
351 | return -EIO; | 351 | return -EIO; |
352 | } | 352 | } |
353 | 353 | ||
354 | /** | ||
355 | * TRIM the blocks, when mounted with discard option | ||
356 | */ | ||
357 | if (JFS_SBI(sb)->flag & JFS_DISCARD) | ||
358 | if (JFS_SBI(sb)->minblks_trim <= nblocks) | ||
359 | jfs_issue_discard(ipbmap, blkno, nblocks); | ||
360 | |||
354 | /* | 361 | /* |
355 | * free the blocks a dmap at a time. | 362 | * free the blocks a dmap at a time. |
356 | */ | 363 | */ |
@@ -1095,7 +1102,6 @@ static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks) | |||
1095 | /* we were not successful */ | 1102 | /* we were not successful */ |
1096 | release_metapage(mp); | 1103 | release_metapage(mp); |
1097 | 1104 | ||
1098 | |||
1099 | return (rc); | 1105 | return (rc); |
1100 | } | 1106 | } |
1101 | 1107 | ||
@@ -1590,6 +1596,118 @@ static int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results) | |||
1590 | 1596 | ||
1591 | 1597 | ||
1592 | /* | 1598 | /* |
1599 | * NAME: dbDiscardAG() | ||
1600 | * | ||
1601 | * FUNCTION: attempt to discard (TRIM) all free blocks of specific AG | ||
1602 | * | ||
1603 | * algorithm: | ||
1604 | * 1) allocate blocks, as large as possible and save them | ||
1605 | * while holding IWRITE_LOCK on ipbmap | ||
1606 | * 2) trim all these saved block/length values | ||
1607 | * 3) mark the blocks free again | ||
1608 | * | ||
1609 | * benefit: | ||
1610 | * - we work only on one ag at some time, minimizing how long we | ||
1611 | * need to lock ipbmap | ||
1612 | * - reading / writing the fs is possible most time, even on | ||
1613 | * trimming | ||
1614 | * | ||
1615 | * downside: | ||
1616 | * - we write two times to the dmapctl and dmap pages | ||
1617 | * - but for me, this seems the best way, better ideas? | ||
1618 | * /TR 2012 | ||
1619 | * | ||
1620 | * PARAMETERS: | ||
1621 | * ip - pointer to in-core inode | ||
1622 | * agno - ag to trim | ||
1623 | * minlen - minimum value of contiguous blocks | ||
1624 | * | ||
1625 | * RETURN VALUES: | ||
1626 | * s64 - actual number of blocks trimmed | ||
1627 | */ | ||
1628 | s64 dbDiscardAG(struct inode *ip, int agno, s64 minlen) | ||
1629 | { | ||
1630 | struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; | ||
1631 | struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap; | ||
1632 | s64 nblocks, blkno; | ||
1633 | u64 trimmed = 0; | ||
1634 | int rc, l2nb; | ||
1635 | struct super_block *sb = ipbmap->i_sb; | ||
1636 | |||
1637 | struct range2trim { | ||
1638 | u64 blkno; | ||
1639 | u64 nblocks; | ||
1640 | } *totrim, *tt; | ||
1641 | |||
1642 | /* max blkno / nblocks pairs to trim */ | ||
1643 | int count = 0, range_cnt; | ||
1644 | u64 max_ranges; | ||
1645 | |||
1646 | /* prevent others from writing new stuff here, while trimming */ | ||
1647 | IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP); | ||
1648 | |||
1649 | nblocks = bmp->db_agfree[agno]; | ||
1650 | max_ranges = nblocks; | ||
1651 | do_div(max_ranges, minlen); | ||
1652 | range_cnt = min_t(u64, max_ranges + 1, 32 * 1024); | ||
1653 | totrim = kmalloc(sizeof(struct range2trim) * range_cnt, GFP_NOFS); | ||
1654 | if (totrim == NULL) { | ||
1655 | jfs_error(bmp->db_ipbmap->i_sb, | ||
1656 | "dbDiscardAG: no memory for trim array"); | ||
1657 | IWRITE_UNLOCK(ipbmap); | ||
1658 | return 0; | ||
1659 | } | ||
1660 | |||
1661 | tt = totrim; | ||
1662 | while (nblocks >= minlen) { | ||
1663 | l2nb = BLKSTOL2(nblocks); | ||
1664 | |||
1665 | /* 0 = okay, -EIO = fatal, -ENOSPC -> try smaller block */ | ||
1666 | rc = dbAllocAG(bmp, agno, nblocks, l2nb, &blkno); | ||
1667 | if (rc == 0) { | ||
1668 | tt->blkno = blkno; | ||
1669 | tt->nblocks = nblocks; | ||
1670 | tt++; count++; | ||
1671 | |||
1672 | /* the whole ag is free, trim now */ | ||
1673 | if (bmp->db_agfree[agno] == 0) | ||
1674 | break; | ||
1675 | |||
1676 | /* give a hint for the next while */ | ||
1677 | nblocks = bmp->db_agfree[agno]; | ||
1678 | continue; | ||
1679 | } else if (rc == -ENOSPC) { | ||
1680 | /* search for next smaller log2 block */ | ||
1681 | l2nb = BLKSTOL2(nblocks) - 1; | ||
1682 | nblocks = 1 << l2nb; | ||
1683 | } else { | ||
1684 | /* Trim any already allocated blocks */ | ||
1685 | jfs_error(bmp->db_ipbmap->i_sb, | ||
1686 | "dbDiscardAG: -EIO"); | ||
1687 | break; | ||
1688 | } | ||
1689 | |||
1690 | /* check, if our trim array is full */ | ||
1691 | if (unlikely(count >= range_cnt - 1)) | ||
1692 | break; | ||
1693 | } | ||
1694 | IWRITE_UNLOCK(ipbmap); | ||
1695 | |||
1696 | tt->nblocks = 0; /* mark the current end */ | ||
1697 | for (tt = totrim; tt->nblocks != 0; tt++) { | ||
1698 | /* when mounted with online discard, dbFree() will | ||
1699 | * call jfs_issue_discard() itself */ | ||
1700 | if (!(JFS_SBI(sb)->flag & JFS_DISCARD)) | ||
1701 | jfs_issue_discard(ip, tt->blkno, tt->nblocks); | ||
1702 | dbFree(ip, tt->blkno, tt->nblocks); | ||
1703 | trimmed += tt->nblocks; | ||
1704 | } | ||
1705 | kfree(totrim); | ||
1706 | |||
1707 | return trimmed; | ||
1708 | } | ||
1709 | |||
1710 | /* | ||
1593 | * NAME: dbFindCtl() | 1711 | * NAME: dbFindCtl() |
1594 | * | 1712 | * |
1595 | * FUNCTION: starting at a specified dmap control page level and block | 1713 | * FUNCTION: starting at a specified dmap control page level and block |
diff --git a/fs/jfs/jfs_dmap.h b/fs/jfs/jfs_dmap.h index 6dcb906c55d8..562b9a7e4311 100644 --- a/fs/jfs/jfs_dmap.h +++ b/fs/jfs/jfs_dmap.h | |||
@@ -311,4 +311,6 @@ extern int dbAllocBottomUp(struct inode *ip, s64 blkno, s64 nblocks); | |||
311 | extern int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks); | 311 | extern int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks); |
312 | extern void dbFinalizeBmap(struct inode *ipbmap); | 312 | extern void dbFinalizeBmap(struct inode *ipbmap); |
313 | extern s64 dbMapFileSizeToMapSize(struct inode *ipbmap); | 313 | extern s64 dbMapFileSizeToMapSize(struct inode *ipbmap); |
314 | extern s64 dbDiscardAG(struct inode *ip, int agno, s64 minlen); | ||
315 | |||
314 | #endif /* _H_JFS_DMAP */ | 316 | #endif /* _H_JFS_DMAP */ |
diff --git a/fs/jfs/jfs_filsys.h b/fs/jfs/jfs_filsys.h index b3f5463fbe52..b67d64671bb4 100644 --- a/fs/jfs/jfs_filsys.h +++ b/fs/jfs/jfs_filsys.h | |||
@@ -45,6 +45,9 @@ | |||
45 | /* mount time flag to disable journaling to disk */ | 45 | /* mount time flag to disable journaling to disk */ |
46 | #define JFS_NOINTEGRITY 0x00000040 | 46 | #define JFS_NOINTEGRITY 0x00000040 |
47 | 47 | ||
48 | /* mount time flag to enable TRIM to ssd disks */ | ||
49 | #define JFS_DISCARD 0x00000080 | ||
50 | |||
48 | /* commit option */ | 51 | /* commit option */ |
49 | #define JFS_COMMIT 0x00000f00 /* commit option mask */ | 52 | #define JFS_COMMIT 0x00000f00 /* commit option mask */ |
50 | #define JFS_GROUPCOMMIT 0x00000100 /* group (of 1) commit */ | 53 | #define JFS_GROUPCOMMIT 0x00000100 /* group (of 1) commit */ |
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c index 1b6f15f191b3..6ba4006e011b 100644 --- a/fs/jfs/jfs_imap.c +++ b/fs/jfs/jfs_imap.c | |||
@@ -3078,15 +3078,15 @@ static int copy_from_dinode(struct dinode * dip, struct inode *ip) | |||
3078 | } | 3078 | } |
3079 | set_nlink(ip, le32_to_cpu(dip->di_nlink)); | 3079 | set_nlink(ip, le32_to_cpu(dip->di_nlink)); |
3080 | 3080 | ||
3081 | jfs_ip->saved_uid = le32_to_cpu(dip->di_uid); | 3081 | jfs_ip->saved_uid = make_kuid(&init_user_ns, le32_to_cpu(dip->di_uid)); |
3082 | if (sbi->uid == -1) | 3082 | if (!uid_valid(sbi->uid)) |
3083 | ip->i_uid = jfs_ip->saved_uid; | 3083 | ip->i_uid = jfs_ip->saved_uid; |
3084 | else { | 3084 | else { |
3085 | ip->i_uid = sbi->uid; | 3085 | ip->i_uid = sbi->uid; |
3086 | } | 3086 | } |
3087 | 3087 | ||
3088 | jfs_ip->saved_gid = le32_to_cpu(dip->di_gid); | 3088 | jfs_ip->saved_gid = make_kgid(&init_user_ns, le32_to_cpu(dip->di_gid)); |
3089 | if (sbi->gid == -1) | 3089 | if (!gid_valid(sbi->gid)) |
3090 | ip->i_gid = jfs_ip->saved_gid; | 3090 | ip->i_gid = jfs_ip->saved_gid; |
3091 | else { | 3091 | else { |
3092 | ip->i_gid = sbi->gid; | 3092 | ip->i_gid = sbi->gid; |
@@ -3150,14 +3150,16 @@ static void copy_to_dinode(struct dinode * dip, struct inode *ip) | |||
3150 | dip->di_size = cpu_to_le64(ip->i_size); | 3150 | dip->di_size = cpu_to_le64(ip->i_size); |
3151 | dip->di_nblocks = cpu_to_le64(PBLK2LBLK(ip->i_sb, ip->i_blocks)); | 3151 | dip->di_nblocks = cpu_to_le64(PBLK2LBLK(ip->i_sb, ip->i_blocks)); |
3152 | dip->di_nlink = cpu_to_le32(ip->i_nlink); | 3152 | dip->di_nlink = cpu_to_le32(ip->i_nlink); |
3153 | if (sbi->uid == -1) | 3153 | if (!uid_valid(sbi->uid)) |
3154 | dip->di_uid = cpu_to_le32(ip->i_uid); | 3154 | dip->di_uid = cpu_to_le32(i_uid_read(ip)); |
3155 | else | 3155 | else |
3156 | dip->di_uid = cpu_to_le32(jfs_ip->saved_uid); | 3156 | dip->di_uid =cpu_to_le32(from_kuid(&init_user_ns, |
3157 | if (sbi->gid == -1) | 3157 | jfs_ip->saved_uid)); |
3158 | dip->di_gid = cpu_to_le32(ip->i_gid); | 3158 | if (!gid_valid(sbi->gid)) |
3159 | dip->di_gid = cpu_to_le32(i_gid_read(ip)); | ||
3159 | else | 3160 | else |
3160 | dip->di_gid = cpu_to_le32(jfs_ip->saved_gid); | 3161 | dip->di_gid = cpu_to_le32(from_kgid(&init_user_ns, |
3162 | jfs_ip->saved_gid)); | ||
3161 | jfs_get_inode_flags(jfs_ip); | 3163 | jfs_get_inode_flags(jfs_ip); |
3162 | /* | 3164 | /* |
3163 | * mode2 is only needed for storing the higher order bits. | 3165 | * mode2 is only needed for storing the higher order bits. |
diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h index 584a4a1a6e81..cf47f09e8ac8 100644 --- a/fs/jfs/jfs_incore.h +++ b/fs/jfs/jfs_incore.h | |||
@@ -38,8 +38,8 @@ | |||
38 | struct jfs_inode_info { | 38 | struct jfs_inode_info { |
39 | int fileset; /* fileset number (always 16)*/ | 39 | int fileset; /* fileset number (always 16)*/ |
40 | uint mode2; /* jfs-specific mode */ | 40 | uint mode2; /* jfs-specific mode */ |
41 | uint saved_uid; /* saved for uid mount option */ | 41 | kuid_t saved_uid; /* saved for uid mount option */ |
42 | uint saved_gid; /* saved for gid mount option */ | 42 | kgid_t saved_gid; /* saved for gid mount option */ |
43 | pxd_t ixpxd; /* inode extent descriptor */ | 43 | pxd_t ixpxd; /* inode extent descriptor */ |
44 | dxd_t acl; /* dxd describing acl */ | 44 | dxd_t acl; /* dxd describing acl */ |
45 | dxd_t ea; /* dxd describing ea */ | 45 | dxd_t ea; /* dxd describing ea */ |
@@ -192,9 +192,10 @@ struct jfs_sb_info { | |||
192 | uint state; /* mount/recovery state */ | 192 | uint state; /* mount/recovery state */ |
193 | unsigned long flag; /* mount time flags */ | 193 | unsigned long flag; /* mount time flags */ |
194 | uint p_state; /* state prior to going no integrity */ | 194 | uint p_state; /* state prior to going no integrity */ |
195 | uint uid; /* uid to override on-disk uid */ | 195 | kuid_t uid; /* uid to override on-disk uid */ |
196 | uint gid; /* gid to override on-disk gid */ | 196 | kgid_t gid; /* gid to override on-disk gid */ |
197 | uint umask; /* umask to override on-disk umask */ | 197 | uint umask; /* umask to override on-disk umask */ |
198 | uint minblks_trim; /* minimum blocks, for online trim */ | ||
198 | }; | 199 | }; |
199 | 200 | ||
200 | /* jfs_sb_info commit_state */ | 201 | /* jfs_sb_info commit_state */ |
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c index bb8b661bcc50..5fcc02eaa64c 100644 --- a/fs/jfs/jfs_txnmgr.c +++ b/fs/jfs/jfs_txnmgr.c | |||
@@ -2977,12 +2977,9 @@ int jfs_sync(void *arg) | |||
2977 | * put back on the anon_list. | 2977 | * put back on the anon_list. |
2978 | */ | 2978 | */ |
2979 | 2979 | ||
2980 | /* Take off anon_list */ | 2980 | /* Move from anon_list to anon_list2 */ |
2981 | list_del(&jfs_ip->anon_inode_list); | 2981 | list_move(&jfs_ip->anon_inode_list, |
2982 | 2982 | &TxAnchor.anon_list2); | |
2983 | /* Put on anon_list2 */ | ||
2984 | list_add(&jfs_ip->anon_inode_list, | ||
2985 | &TxAnchor.anon_list2); | ||
2986 | 2983 | ||
2987 | TXN_UNLOCK(); | 2984 | TXN_UNLOCK(); |
2988 | iput(ip); | 2985 | iput(ip); |
diff --git a/fs/jfs/super.c b/fs/jfs/super.c index c55c7452d285..1a543be09c79 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c | |||
@@ -33,6 +33,7 @@ | |||
33 | #include <linux/slab.h> | 33 | #include <linux/slab.h> |
34 | #include <asm/uaccess.h> | 34 | #include <asm/uaccess.h> |
35 | #include <linux/seq_file.h> | 35 | #include <linux/seq_file.h> |
36 | #include <linux/blkdev.h> | ||
36 | 37 | ||
37 | #include "jfs_incore.h" | 38 | #include "jfs_incore.h" |
38 | #include "jfs_filsys.h" | 39 | #include "jfs_filsys.h" |
@@ -100,7 +101,7 @@ void jfs_error(struct super_block *sb, const char * function, ...) | |||
100 | vsnprintf(error_buf, sizeof(error_buf), function, args); | 101 | vsnprintf(error_buf, sizeof(error_buf), function, args); |
101 | va_end(args); | 102 | va_end(args); |
102 | 103 | ||
103 | printk(KERN_ERR "ERROR: (device %s): %s\n", sb->s_id, error_buf); | 104 | pr_err("ERROR: (device %s): %s\n", sb->s_id, error_buf); |
104 | 105 | ||
105 | jfs_handle_error(sb); | 106 | jfs_handle_error(sb); |
106 | } | 107 | } |
@@ -197,7 +198,8 @@ static void jfs_put_super(struct super_block *sb) | |||
197 | enum { | 198 | enum { |
198 | Opt_integrity, Opt_nointegrity, Opt_iocharset, Opt_resize, | 199 | Opt_integrity, Opt_nointegrity, Opt_iocharset, Opt_resize, |
199 | Opt_resize_nosize, Opt_errors, Opt_ignore, Opt_err, Opt_quota, | 200 | Opt_resize_nosize, Opt_errors, Opt_ignore, Opt_err, Opt_quota, |
200 | Opt_usrquota, Opt_grpquota, Opt_uid, Opt_gid, Opt_umask | 201 | Opt_usrquota, Opt_grpquota, Opt_uid, Opt_gid, Opt_umask, |
202 | Opt_discard, Opt_nodiscard, Opt_discard_minblk | ||
201 | }; | 203 | }; |
202 | 204 | ||
203 | static const match_table_t tokens = { | 205 | static const match_table_t tokens = { |
@@ -214,6 +216,9 @@ static const match_table_t tokens = { | |||
214 | {Opt_uid, "uid=%u"}, | 216 | {Opt_uid, "uid=%u"}, |
215 | {Opt_gid, "gid=%u"}, | 217 | {Opt_gid, "gid=%u"}, |
216 | {Opt_umask, "umask=%u"}, | 218 | {Opt_umask, "umask=%u"}, |
219 | {Opt_discard, "discard"}, | ||
220 | {Opt_nodiscard, "nodiscard"}, | ||
221 | {Opt_discard_minblk, "discard=%u"}, | ||
217 | {Opt_err, NULL} | 222 | {Opt_err, NULL} |
218 | }; | 223 | }; |
219 | 224 | ||
@@ -255,8 +260,7 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize, | |||
255 | else { | 260 | else { |
256 | nls_map = load_nls(args[0].from); | 261 | nls_map = load_nls(args[0].from); |
257 | if (!nls_map) { | 262 | if (!nls_map) { |
258 | printk(KERN_ERR | 263 | pr_err("JFS: charset not found\n"); |
259 | "JFS: charset not found\n"); | ||
260 | goto cleanup; | 264 | goto cleanup; |
261 | } | 265 | } |
262 | } | 266 | } |
@@ -272,8 +276,7 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize, | |||
272 | *newLVSize = sb->s_bdev->bd_inode->i_size >> | 276 | *newLVSize = sb->s_bdev->bd_inode->i_size >> |
273 | sb->s_blocksize_bits; | 277 | sb->s_blocksize_bits; |
274 | if (*newLVSize == 0) | 278 | if (*newLVSize == 0) |
275 | printk(KERN_ERR | 279 | pr_err("JFS: Cannot determine volume size\n"); |
276 | "JFS: Cannot determine volume size\n"); | ||
277 | break; | 280 | break; |
278 | } | 281 | } |
279 | case Opt_errors: | 282 | case Opt_errors: |
@@ -294,8 +297,7 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize, | |||
294 | *flag &= ~JFS_ERR_REMOUNT_RO; | 297 | *flag &= ~JFS_ERR_REMOUNT_RO; |
295 | *flag |= JFS_ERR_PANIC; | 298 | *flag |= JFS_ERR_PANIC; |
296 | } else { | 299 | } else { |
297 | printk(KERN_ERR | 300 | pr_err("JFS: %s is an invalid error handler\n", |
298 | "JFS: %s is an invalid error handler\n", | ||
299 | errors); | 301 | errors); |
300 | goto cleanup; | 302 | goto cleanup; |
301 | } | 303 | } |
@@ -314,33 +316,76 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize, | |||
314 | case Opt_usrquota: | 316 | case Opt_usrquota: |
315 | case Opt_grpquota: | 317 | case Opt_grpquota: |
316 | case Opt_quota: | 318 | case Opt_quota: |
317 | printk(KERN_ERR | 319 | pr_err("JFS: quota operations not supported\n"); |
318 | "JFS: quota operations not supported\n"); | ||
319 | break; | 320 | break; |
320 | #endif | 321 | #endif |
321 | case Opt_uid: | 322 | case Opt_uid: |
322 | { | 323 | { |
323 | char *uid = args[0].from; | 324 | char *uid = args[0].from; |
324 | sbi->uid = simple_strtoul(uid, &uid, 0); | 325 | uid_t val = simple_strtoul(uid, &uid, 0); |
326 | sbi->uid = make_kuid(current_user_ns(), val); | ||
327 | if (!uid_valid(sbi->uid)) | ||
328 | goto cleanup; | ||
325 | break; | 329 | break; |
326 | } | 330 | } |
331 | |||
327 | case Opt_gid: | 332 | case Opt_gid: |
328 | { | 333 | { |
329 | char *gid = args[0].from; | 334 | char *gid = args[0].from; |
330 | sbi->gid = simple_strtoul(gid, &gid, 0); | 335 | gid_t val = simple_strtoul(gid, &gid, 0); |
336 | sbi->gid = make_kgid(current_user_ns(), val); | ||
337 | if (!gid_valid(sbi->gid)) | ||
338 | goto cleanup; | ||
331 | break; | 339 | break; |
332 | } | 340 | } |
341 | |||
333 | case Opt_umask: | 342 | case Opt_umask: |
334 | { | 343 | { |
335 | char *umask = args[0].from; | 344 | char *umask = args[0].from; |
336 | sbi->umask = simple_strtoul(umask, &umask, 8); | 345 | sbi->umask = simple_strtoul(umask, &umask, 8); |
337 | if (sbi->umask & ~0777) { | 346 | if (sbi->umask & ~0777) { |
338 | printk(KERN_ERR | 347 | pr_err("JFS: Invalid value of umask\n"); |
339 | "JFS: Invalid value of umask\n"); | ||
340 | goto cleanup; | 348 | goto cleanup; |
341 | } | 349 | } |
342 | break; | 350 | break; |
343 | } | 351 | } |
352 | |||
353 | case Opt_discard: | ||
354 | { | ||
355 | struct request_queue *q = bdev_get_queue(sb->s_bdev); | ||
356 | /* if set to 1, even copying files will cause | ||
357 | * trimming :O | ||
358 | * -> user has more control over the online trimming | ||
359 | */ | ||
360 | sbi->minblks_trim = 64; | ||
361 | if (blk_queue_discard(q)) { | ||
362 | *flag |= JFS_DISCARD; | ||
363 | } else { | ||
364 | pr_err("JFS: discard option " \ | ||
365 | "not supported on device\n"); | ||
366 | } | ||
367 | break; | ||
368 | } | ||
369 | |||
370 | case Opt_nodiscard: | ||
371 | *flag &= ~JFS_DISCARD; | ||
372 | break; | ||
373 | |||
374 | case Opt_discard_minblk: | ||
375 | { | ||
376 | struct request_queue *q = bdev_get_queue(sb->s_bdev); | ||
377 | char *minblks_trim = args[0].from; | ||
378 | if (blk_queue_discard(q)) { | ||
379 | *flag |= JFS_DISCARD; | ||
380 | sbi->minblks_trim = simple_strtoull( | ||
381 | minblks_trim, &minblks_trim, 0); | ||
382 | } else { | ||
383 | pr_err("JFS: discard option " \ | ||
384 | "not supported on device\n"); | ||
385 | } | ||
386 | break; | ||
387 | } | ||
388 | |||
344 | default: | 389 | default: |
345 | printk("jfs: Unrecognized mount option \"%s\" " | 390 | printk("jfs: Unrecognized mount option \"%s\" " |
346 | " or missing value\n", p); | 391 | " or missing value\n", p); |
@@ -374,8 +419,8 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data) | |||
374 | 419 | ||
375 | if (newLVSize) { | 420 | if (newLVSize) { |
376 | if (sb->s_flags & MS_RDONLY) { | 421 | if (sb->s_flags & MS_RDONLY) { |
377 | printk(KERN_ERR | 422 | pr_err("JFS: resize requires volume" \ |
378 | "JFS: resize requires volume to be mounted read-write\n"); | 423 | " to be mounted read-write\n"); |
379 | return -EROFS; | 424 | return -EROFS; |
380 | } | 425 | } |
381 | rc = jfs_extendfs(sb, newLVSize, 0); | 426 | rc = jfs_extendfs(sb, newLVSize, 0); |
@@ -443,7 +488,9 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) | |||
443 | sb->s_fs_info = sbi; | 488 | sb->s_fs_info = sbi; |
444 | sb->s_max_links = JFS_LINK_MAX; | 489 | sb->s_max_links = JFS_LINK_MAX; |
445 | sbi->sb = sb; | 490 | sbi->sb = sb; |
446 | sbi->uid = sbi->gid = sbi->umask = -1; | 491 | sbi->uid = INVALID_UID; |
492 | sbi->gid = INVALID_GID; | ||
493 | sbi->umask = -1; | ||
447 | 494 | ||
448 | /* initialize the mount flag and determine the default error handler */ | 495 | /* initialize the mount flag and determine the default error handler */ |
449 | flag = JFS_ERR_REMOUNT_RO; | 496 | flag = JFS_ERR_REMOUNT_RO; |
@@ -457,7 +504,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) | |||
457 | #endif | 504 | #endif |
458 | 505 | ||
459 | if (newLVSize) { | 506 | if (newLVSize) { |
460 | printk(KERN_ERR "resize option for remount only\n"); | 507 | pr_err("resize option for remount only\n"); |
461 | goto out_kfree; | 508 | goto out_kfree; |
462 | } | 509 | } |
463 | 510 | ||
@@ -617,14 +664,16 @@ static int jfs_show_options(struct seq_file *seq, struct dentry *root) | |||
617 | { | 664 | { |
618 | struct jfs_sb_info *sbi = JFS_SBI(root->d_sb); | 665 | struct jfs_sb_info *sbi = JFS_SBI(root->d_sb); |
619 | 666 | ||
620 | if (sbi->uid != -1) | 667 | if (uid_valid(sbi->uid)) |
621 | seq_printf(seq, ",uid=%d", sbi->uid); | 668 | seq_printf(seq, ",uid=%d", from_kuid(&init_user_ns, sbi->uid)); |
622 | if (sbi->gid != -1) | 669 | if (gid_valid(sbi->gid)) |
623 | seq_printf(seq, ",gid=%d", sbi->gid); | 670 | seq_printf(seq, ",gid=%d", from_kgid(&init_user_ns, sbi->gid)); |
624 | if (sbi->umask != -1) | 671 | if (sbi->umask != -1) |
625 | seq_printf(seq, ",umask=%03o", sbi->umask); | 672 | seq_printf(seq, ",umask=%03o", sbi->umask); |
626 | if (sbi->flag & JFS_NOINTEGRITY) | 673 | if (sbi->flag & JFS_NOINTEGRITY) |
627 | seq_puts(seq, ",nointegrity"); | 674 | seq_puts(seq, ",nointegrity"); |
675 | if (sbi->flag & JFS_DISCARD) | ||
676 | seq_printf(seq, ",discard=%u", sbi->minblks_trim); | ||
628 | if (sbi->nls_tab) | 677 | if (sbi->nls_tab) |
629 | seq_printf(seq, ",iocharset=%s", sbi->nls_tab->charset); | 678 | seq_printf(seq, ",iocharset=%s", sbi->nls_tab->charset); |
630 | if (sbi->flag & JFS_ERR_CONTINUE) | 679 | if (sbi->flag & JFS_ERR_CONTINUE) |
@@ -903,6 +952,12 @@ static void __exit exit_jfs_fs(void) | |||
903 | jfs_proc_clean(); | 952 | jfs_proc_clean(); |
904 | #endif | 953 | #endif |
905 | unregister_filesystem(&jfs_fs_type); | 954 | unregister_filesystem(&jfs_fs_type); |
955 | |||
956 | /* | ||
957 | * Make sure all delayed rcu free inodes are flushed before we | ||
958 | * destroy cache. | ||
959 | */ | ||
960 | rcu_barrier(); | ||
906 | kmem_cache_destroy(jfs_inode_cachep); | 961 | kmem_cache_destroy(jfs_inode_cachep); |
907 | } | 962 | } |
908 | 963 | ||
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c index 26683e15b3ac..42d67f9757bf 100644 --- a/fs/jfs/xattr.c +++ b/fs/jfs/xattr.c | |||
@@ -685,7 +685,7 @@ static int can_set_system_xattr(struct inode *inode, const char *name, | |||
685 | * POSIX_ACL_XATTR_ACCESS is tied to i_mode | 685 | * POSIX_ACL_XATTR_ACCESS is tied to i_mode |
686 | */ | 686 | */ |
687 | if (strcmp(name, POSIX_ACL_XATTR_ACCESS) == 0) { | 687 | if (strcmp(name, POSIX_ACL_XATTR_ACCESS) == 0) { |
688 | acl = posix_acl_from_xattr(value, value_len); | 688 | acl = posix_acl_from_xattr(&init_user_ns, value, value_len); |
689 | if (IS_ERR(acl)) { | 689 | if (IS_ERR(acl)) { |
690 | rc = PTR_ERR(acl); | 690 | rc = PTR_ERR(acl); |
691 | printk(KERN_ERR "posix_acl_from_xattr returned %d\n", | 691 | printk(KERN_ERR "posix_acl_from_xattr returned %d\n", |
@@ -710,7 +710,7 @@ static int can_set_system_xattr(struct inode *inode, const char *name, | |||
710 | 710 | ||
711 | return 0; | 711 | return 0; |
712 | } else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0) { | 712 | } else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0) { |
713 | acl = posix_acl_from_xattr(value, value_len); | 713 | acl = posix_acl_from_xattr(&init_user_ns, value, value_len); |
714 | if (IS_ERR(acl)) { | 714 | if (IS_ERR(acl)) { |
715 | rc = PTR_ERR(acl); | 715 | rc = PTR_ERR(acl); |
716 | printk(KERN_ERR "posix_acl_from_xattr returned %d\n", | 716 | printk(KERN_ERR "posix_acl_from_xattr returned %d\n", |
diff --git a/fs/libfs.c b/fs/libfs.c index a74cb1725ac6..7cc37ca19cd8 100644 --- a/fs/libfs.c +++ b/fs/libfs.c | |||
@@ -874,7 +874,7 @@ struct dentry *generic_fh_to_dentry(struct super_block *sb, struct fid *fid, | |||
874 | EXPORT_SYMBOL_GPL(generic_fh_to_dentry); | 874 | EXPORT_SYMBOL_GPL(generic_fh_to_dentry); |
875 | 875 | ||
876 | /** | 876 | /** |
877 | * generic_fh_to_dentry - generic helper for the fh_to_parent export operation | 877 | * generic_fh_to_parent - generic helper for the fh_to_parent export operation |
878 | * @sb: filesystem to do the file handle conversion on | 878 | * @sb: filesystem to do the file handle conversion on |
879 | * @fid: file handle to convert | 879 | * @fid: file handle to convert |
880 | * @fh_len: length of the file handle in bytes | 880 | * @fh_len: length of the file handle in bytes |
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index fb1a2bedbe97..8d80c990dffd 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c | |||
@@ -289,7 +289,6 @@ static void nlmsvc_free_block(struct kref *kref) | |||
289 | dprintk("lockd: freeing block %p...\n", block); | 289 | dprintk("lockd: freeing block %p...\n", block); |
290 | 290 | ||
291 | /* Remove block from file's list of blocks */ | 291 | /* Remove block from file's list of blocks */ |
292 | mutex_lock(&file->f_mutex); | ||
293 | list_del_init(&block->b_flist); | 292 | list_del_init(&block->b_flist); |
294 | mutex_unlock(&file->f_mutex); | 293 | mutex_unlock(&file->f_mutex); |
295 | 294 | ||
@@ -303,7 +302,7 @@ static void nlmsvc_free_block(struct kref *kref) | |||
303 | static void nlmsvc_release_block(struct nlm_block *block) | 302 | static void nlmsvc_release_block(struct nlm_block *block) |
304 | { | 303 | { |
305 | if (block != NULL) | 304 | if (block != NULL) |
306 | kref_put(&block->b_count, nlmsvc_free_block); | 305 | kref_put_mutex(&block->b_count, nlmsvc_free_block, &block->b_file->f_mutex); |
307 | } | 306 | } |
308 | 307 | ||
309 | /* | 308 | /* |
diff --git a/fs/locks.c b/fs/locks.c index 7e81bfc75164..abc7dc6c490b 100644 --- a/fs/locks.c +++ b/fs/locks.c | |||
@@ -1625,15 +1625,13 @@ EXPORT_SYMBOL(flock_lock_file_wait); | |||
1625 | */ | 1625 | */ |
1626 | SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) | 1626 | SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) |
1627 | { | 1627 | { |
1628 | struct file *filp; | 1628 | struct fd f = fdget(fd); |
1629 | int fput_needed; | ||
1630 | struct file_lock *lock; | 1629 | struct file_lock *lock; |
1631 | int can_sleep, unlock; | 1630 | int can_sleep, unlock; |
1632 | int error; | 1631 | int error; |
1633 | 1632 | ||
1634 | error = -EBADF; | 1633 | error = -EBADF; |
1635 | filp = fget_light(fd, &fput_needed); | 1634 | if (!f.file) |
1636 | if (!filp) | ||
1637 | goto out; | 1635 | goto out; |
1638 | 1636 | ||
1639 | can_sleep = !(cmd & LOCK_NB); | 1637 | can_sleep = !(cmd & LOCK_NB); |
@@ -1641,31 +1639,31 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) | |||
1641 | unlock = (cmd == LOCK_UN); | 1639 | unlock = (cmd == LOCK_UN); |
1642 | 1640 | ||
1643 | if (!unlock && !(cmd & LOCK_MAND) && | 1641 | if (!unlock && !(cmd & LOCK_MAND) && |
1644 | !(filp->f_mode & (FMODE_READ|FMODE_WRITE))) | 1642 | !(f.file->f_mode & (FMODE_READ|FMODE_WRITE))) |
1645 | goto out_putf; | 1643 | goto out_putf; |
1646 | 1644 | ||
1647 | error = flock_make_lock(filp, &lock, cmd); | 1645 | error = flock_make_lock(f.file, &lock, cmd); |
1648 | if (error) | 1646 | if (error) |
1649 | goto out_putf; | 1647 | goto out_putf; |
1650 | if (can_sleep) | 1648 | if (can_sleep) |
1651 | lock->fl_flags |= FL_SLEEP; | 1649 | lock->fl_flags |= FL_SLEEP; |
1652 | 1650 | ||
1653 | error = security_file_lock(filp, lock->fl_type); | 1651 | error = security_file_lock(f.file, lock->fl_type); |
1654 | if (error) | 1652 | if (error) |
1655 | goto out_free; | 1653 | goto out_free; |
1656 | 1654 | ||
1657 | if (filp->f_op && filp->f_op->flock) | 1655 | if (f.file->f_op && f.file->f_op->flock) |
1658 | error = filp->f_op->flock(filp, | 1656 | error = f.file->f_op->flock(f.file, |
1659 | (can_sleep) ? F_SETLKW : F_SETLK, | 1657 | (can_sleep) ? F_SETLKW : F_SETLK, |
1660 | lock); | 1658 | lock); |
1661 | else | 1659 | else |
1662 | error = flock_lock_file_wait(filp, lock); | 1660 | error = flock_lock_file_wait(f.file, lock); |
1663 | 1661 | ||
1664 | out_free: | 1662 | out_free: |
1665 | locks_free_lock(lock); | 1663 | locks_free_lock(lock); |
1666 | 1664 | ||
1667 | out_putf: | 1665 | out_putf: |
1668 | fput_light(filp, fput_needed); | 1666 | fdput(f); |
1669 | out: | 1667 | out: |
1670 | return error; | 1668 | return error; |
1671 | } | 1669 | } |
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c index df0de27c2733..e784a217b500 100644 --- a/fs/logfs/dev_bdev.c +++ b/fs/logfs/dev_bdev.c | |||
@@ -26,6 +26,7 @@ static int sync_request(struct page *page, struct block_device *bdev, int rw) | |||
26 | struct completion complete; | 26 | struct completion complete; |
27 | 27 | ||
28 | bio_init(&bio); | 28 | bio_init(&bio); |
29 | bio.bi_max_vecs = 1; | ||
29 | bio.bi_io_vec = &bio_vec; | 30 | bio.bi_io_vec = &bio_vec; |
30 | bio_vec.bv_page = page; | 31 | bio_vec.bv_page = page; |
31 | bio_vec.bv_len = PAGE_SIZE; | 32 | bio_vec.bv_len = PAGE_SIZE; |
@@ -95,12 +96,11 @@ static int __bdev_writeseg(struct super_block *sb, u64 ofs, pgoff_t index, | |||
95 | struct address_space *mapping = super->s_mapping_inode->i_mapping; | 96 | struct address_space *mapping = super->s_mapping_inode->i_mapping; |
96 | struct bio *bio; | 97 | struct bio *bio; |
97 | struct page *page; | 98 | struct page *page; |
98 | struct request_queue *q = bdev_get_queue(sb->s_bdev); | 99 | unsigned int max_pages; |
99 | unsigned int max_pages = queue_max_hw_sectors(q) >> (PAGE_SHIFT - 9); | ||
100 | int i; | 100 | int i; |
101 | 101 | ||
102 | if (max_pages > BIO_MAX_PAGES) | 102 | max_pages = min(nr_pages, (size_t) bio_get_nr_vecs(super->s_bdev)); |
103 | max_pages = BIO_MAX_PAGES; | 103 | |
104 | bio = bio_alloc(GFP_NOFS, max_pages); | 104 | bio = bio_alloc(GFP_NOFS, max_pages); |
105 | BUG_ON(!bio); | 105 | BUG_ON(!bio); |
106 | 106 | ||
@@ -190,12 +190,11 @@ static int do_erase(struct super_block *sb, u64 ofs, pgoff_t index, | |||
190 | { | 190 | { |
191 | struct logfs_super *super = logfs_super(sb); | 191 | struct logfs_super *super = logfs_super(sb); |
192 | struct bio *bio; | 192 | struct bio *bio; |
193 | struct request_queue *q = bdev_get_queue(sb->s_bdev); | 193 | unsigned int max_pages; |
194 | unsigned int max_pages = queue_max_hw_sectors(q) >> (PAGE_SHIFT - 9); | ||
195 | int i; | 194 | int i; |
196 | 195 | ||
197 | if (max_pages > BIO_MAX_PAGES) | 196 | max_pages = min(nr_pages, (size_t) bio_get_nr_vecs(super->s_bdev)); |
198 | max_pages = BIO_MAX_PAGES; | 197 | |
199 | bio = bio_alloc(GFP_NOFS, max_pages); | 198 | bio = bio_alloc(GFP_NOFS, max_pages); |
200 | BUG_ON(!bio); | 199 | BUG_ON(!bio); |
201 | 200 | ||
diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c index a422f42238b2..adb90116d36b 100644 --- a/fs/logfs/inode.c +++ b/fs/logfs/inode.c | |||
@@ -156,10 +156,26 @@ static void __logfs_destroy_inode(struct inode *inode) | |||
156 | call_rcu(&inode->i_rcu, logfs_i_callback); | 156 | call_rcu(&inode->i_rcu, logfs_i_callback); |
157 | } | 157 | } |
158 | 158 | ||
159 | static void __logfs_destroy_meta_inode(struct inode *inode) | ||
160 | { | ||
161 | struct logfs_inode *li = logfs_inode(inode); | ||
162 | BUG_ON(li->li_block); | ||
163 | call_rcu(&inode->i_rcu, logfs_i_callback); | ||
164 | } | ||
165 | |||
159 | static void logfs_destroy_inode(struct inode *inode) | 166 | static void logfs_destroy_inode(struct inode *inode) |
160 | { | 167 | { |
161 | struct logfs_inode *li = logfs_inode(inode); | 168 | struct logfs_inode *li = logfs_inode(inode); |
162 | 169 | ||
170 | if (inode->i_ino < LOGFS_RESERVED_INOS) { | ||
171 | /* | ||
172 | * The reserved inodes are never destroyed unless we are in | ||
173 | * unmont path. | ||
174 | */ | ||
175 | __logfs_destroy_meta_inode(inode); | ||
176 | return; | ||
177 | } | ||
178 | |||
163 | BUG_ON(list_empty(&li->li_freeing_list)); | 179 | BUG_ON(list_empty(&li->li_freeing_list)); |
164 | spin_lock(&logfs_inode_lock); | 180 | spin_lock(&logfs_inode_lock); |
165 | li->li_refcount--; | 181 | li->li_refcount--; |
@@ -192,8 +208,8 @@ static void logfs_init_inode(struct super_block *sb, struct inode *inode) | |||
192 | li->li_height = 0; | 208 | li->li_height = 0; |
193 | li->li_used_bytes = 0; | 209 | li->li_used_bytes = 0; |
194 | li->li_block = NULL; | 210 | li->li_block = NULL; |
195 | inode->i_uid = 0; | 211 | i_uid_write(inode, 0); |
196 | inode->i_gid = 0; | 212 | i_gid_write(inode, 0); |
197 | inode->i_size = 0; | 213 | inode->i_size = 0; |
198 | inode->i_blocks = 0; | 214 | inode->i_blocks = 0; |
199 | inode->i_ctime = CURRENT_TIME; | 215 | inode->i_ctime = CURRENT_TIME; |
@@ -373,8 +389,8 @@ static void logfs_put_super(struct super_block *sb) | |||
373 | { | 389 | { |
374 | struct logfs_super *super = logfs_super(sb); | 390 | struct logfs_super *super = logfs_super(sb); |
375 | /* kill the meta-inodes */ | 391 | /* kill the meta-inodes */ |
376 | iput(super->s_master_inode); | ||
377 | iput(super->s_segfile_inode); | 392 | iput(super->s_segfile_inode); |
393 | iput(super->s_master_inode); | ||
378 | iput(super->s_mapping_inode); | 394 | iput(super->s_mapping_inode); |
379 | } | 395 | } |
380 | 396 | ||
@@ -401,5 +417,10 @@ int logfs_init_inode_cache(void) | |||
401 | 417 | ||
402 | void logfs_destroy_inode_cache(void) | 418 | void logfs_destroy_inode_cache(void) |
403 | { | 419 | { |
420 | /* | ||
421 | * Make sure all delayed rcu free inodes are flushed before we | ||
422 | * destroy cache. | ||
423 | */ | ||
424 | rcu_barrier(); | ||
404 | kmem_cache_destroy(logfs_inode_cache); | 425 | kmem_cache_destroy(logfs_inode_cache); |
405 | } | 426 | } |
diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c index 1e1c369df22b..2a09b8d73989 100644 --- a/fs/logfs/journal.c +++ b/fs/logfs/journal.c | |||
@@ -565,7 +565,7 @@ static void write_wbuf(struct super_block *sb, struct logfs_area *area, | |||
565 | index = ofs >> PAGE_SHIFT; | 565 | index = ofs >> PAGE_SHIFT; |
566 | page_ofs = ofs & (PAGE_SIZE - 1); | 566 | page_ofs = ofs & (PAGE_SIZE - 1); |
567 | 567 | ||
568 | page = find_lock_page(mapping, index); | 568 | page = find_or_create_page(mapping, index, GFP_NOFS); |
569 | BUG_ON(!page); | 569 | BUG_ON(!page); |
570 | memcpy(wbuf, page_address(page) + page_ofs, super->s_writesize); | 570 | memcpy(wbuf, page_address(page) + page_ofs, super->s_writesize); |
571 | unlock_page(page); | 571 | unlock_page(page); |
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c index f1cb512c5019..e1a3b6bf6324 100644 --- a/fs/logfs/readwrite.c +++ b/fs/logfs/readwrite.c | |||
@@ -119,8 +119,8 @@ static void logfs_disk_to_inode(struct logfs_disk_inode *di, struct inode*inode) | |||
119 | inode->i_mode = be16_to_cpu(di->di_mode); | 119 | inode->i_mode = be16_to_cpu(di->di_mode); |
120 | li->li_height = di->di_height; | 120 | li->li_height = di->di_height; |
121 | li->li_flags = be32_to_cpu(di->di_flags); | 121 | li->li_flags = be32_to_cpu(di->di_flags); |
122 | inode->i_uid = be32_to_cpu(di->di_uid); | 122 | i_uid_write(inode, be32_to_cpu(di->di_uid)); |
123 | inode->i_gid = be32_to_cpu(di->di_gid); | 123 | i_gid_write(inode, be32_to_cpu(di->di_gid)); |
124 | inode->i_size = be64_to_cpu(di->di_size); | 124 | inode->i_size = be64_to_cpu(di->di_size); |
125 | logfs_set_blocks(inode, be64_to_cpu(di->di_used_bytes)); | 125 | logfs_set_blocks(inode, be64_to_cpu(di->di_used_bytes)); |
126 | inode->i_atime = be64_to_timespec(di->di_atime); | 126 | inode->i_atime = be64_to_timespec(di->di_atime); |
@@ -156,8 +156,8 @@ static void logfs_inode_to_disk(struct inode *inode, struct logfs_disk_inode*di) | |||
156 | di->di_height = li->li_height; | 156 | di->di_height = li->li_height; |
157 | di->di_pad = 0; | 157 | di->di_pad = 0; |
158 | di->di_flags = cpu_to_be32(li->li_flags); | 158 | di->di_flags = cpu_to_be32(li->li_flags); |
159 | di->di_uid = cpu_to_be32(inode->i_uid); | 159 | di->di_uid = cpu_to_be32(i_uid_read(inode)); |
160 | di->di_gid = cpu_to_be32(inode->i_gid); | 160 | di->di_gid = cpu_to_be32(i_gid_read(inode)); |
161 | di->di_size = cpu_to_be64(i_size_read(inode)); | 161 | di->di_size = cpu_to_be64(i_size_read(inode)); |
162 | di->di_used_bytes = cpu_to_be64(li->li_used_bytes); | 162 | di->di_used_bytes = cpu_to_be64(li->li_used_bytes); |
163 | di->di_atime = timespec_to_be64(inode->i_atime); | 163 | di->di_atime = timespec_to_be64(inode->i_atime); |
@@ -2189,7 +2189,6 @@ void logfs_evict_inode(struct inode *inode) | |||
2189 | return; | 2189 | return; |
2190 | } | 2190 | } |
2191 | 2191 | ||
2192 | BUG_ON(inode->i_ino < LOGFS_RESERVED_INOS); | ||
2193 | page = inode_to_page(inode); | 2192 | page = inode_to_page(inode); |
2194 | BUG_ON(!page); /* FIXME: Use emergency page */ | 2193 | BUG_ON(!page); /* FIXME: Use emergency page */ |
2195 | logfs_put_write_page(page); | 2194 | logfs_put_write_page(page); |
diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c index e28d090c98d6..038da0991794 100644 --- a/fs/logfs/segment.c +++ b/fs/logfs/segment.c | |||
@@ -886,7 +886,7 @@ static struct logfs_area *alloc_area(struct super_block *sb) | |||
886 | 886 | ||
887 | static void map_invalidatepage(struct page *page, unsigned long l) | 887 | static void map_invalidatepage(struct page *page, unsigned long l) |
888 | { | 888 | { |
889 | BUG(); | 889 | return; |
890 | } | 890 | } |
891 | 891 | ||
892 | static int map_releasepage(struct page *page, gfp_t g) | 892 | static int map_releasepage(struct page *page, gfp_t g) |
diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 2a503ad020d5..4fc5f8ab1c44 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c | |||
@@ -100,6 +100,11 @@ static int init_inodecache(void) | |||
100 | 100 | ||
101 | static void destroy_inodecache(void) | 101 | static void destroy_inodecache(void) |
102 | { | 102 | { |
103 | /* | ||
104 | * Make sure all delayed rcu free inodes are flushed before we | ||
105 | * destroy cache. | ||
106 | */ | ||
107 | rcu_barrier(); | ||
103 | kmem_cache_destroy(minix_inode_cachep); | 108 | kmem_cache_destroy(minix_inode_cachep); |
104 | } | 109 | } |
105 | 110 | ||
@@ -460,8 +465,8 @@ static struct inode *V1_minix_iget(struct inode *inode) | |||
460 | return ERR_PTR(-EIO); | 465 | return ERR_PTR(-EIO); |
461 | } | 466 | } |
462 | inode->i_mode = raw_inode->i_mode; | 467 | inode->i_mode = raw_inode->i_mode; |
463 | inode->i_uid = (uid_t)raw_inode->i_uid; | 468 | i_uid_write(inode, raw_inode->i_uid); |
464 | inode->i_gid = (gid_t)raw_inode->i_gid; | 469 | i_gid_write(inode, raw_inode->i_gid); |
465 | set_nlink(inode, raw_inode->i_nlinks); | 470 | set_nlink(inode, raw_inode->i_nlinks); |
466 | inode->i_size = raw_inode->i_size; | 471 | inode->i_size = raw_inode->i_size; |
467 | inode->i_mtime.tv_sec = inode->i_atime.tv_sec = inode->i_ctime.tv_sec = raw_inode->i_time; | 472 | inode->i_mtime.tv_sec = inode->i_atime.tv_sec = inode->i_ctime.tv_sec = raw_inode->i_time; |
@@ -493,8 +498,8 @@ static struct inode *V2_minix_iget(struct inode *inode) | |||
493 | return ERR_PTR(-EIO); | 498 | return ERR_PTR(-EIO); |
494 | } | 499 | } |
495 | inode->i_mode = raw_inode->i_mode; | 500 | inode->i_mode = raw_inode->i_mode; |
496 | inode->i_uid = (uid_t)raw_inode->i_uid; | 501 | i_uid_write(inode, raw_inode->i_uid); |
497 | inode->i_gid = (gid_t)raw_inode->i_gid; | 502 | i_gid_write(inode, raw_inode->i_gid); |
498 | set_nlink(inode, raw_inode->i_nlinks); | 503 | set_nlink(inode, raw_inode->i_nlinks); |
499 | inode->i_size = raw_inode->i_size; | 504 | inode->i_size = raw_inode->i_size; |
500 | inode->i_mtime.tv_sec = raw_inode->i_mtime; | 505 | inode->i_mtime.tv_sec = raw_inode->i_mtime; |
@@ -545,8 +550,8 @@ static struct buffer_head * V1_minix_update_inode(struct inode * inode) | |||
545 | if (!raw_inode) | 550 | if (!raw_inode) |
546 | return NULL; | 551 | return NULL; |
547 | raw_inode->i_mode = inode->i_mode; | 552 | raw_inode->i_mode = inode->i_mode; |
548 | raw_inode->i_uid = fs_high2lowuid(inode->i_uid); | 553 | raw_inode->i_uid = fs_high2lowuid(i_uid_read(inode)); |
549 | raw_inode->i_gid = fs_high2lowgid(inode->i_gid); | 554 | raw_inode->i_gid = fs_high2lowgid(i_gid_read(inode)); |
550 | raw_inode->i_nlinks = inode->i_nlink; | 555 | raw_inode->i_nlinks = inode->i_nlink; |
551 | raw_inode->i_size = inode->i_size; | 556 | raw_inode->i_size = inode->i_size; |
552 | raw_inode->i_time = inode->i_mtime.tv_sec; | 557 | raw_inode->i_time = inode->i_mtime.tv_sec; |
@@ -572,8 +577,8 @@ static struct buffer_head * V2_minix_update_inode(struct inode * inode) | |||
572 | if (!raw_inode) | 577 | if (!raw_inode) |
573 | return NULL; | 578 | return NULL; |
574 | raw_inode->i_mode = inode->i_mode; | 579 | raw_inode->i_mode = inode->i_mode; |
575 | raw_inode->i_uid = fs_high2lowuid(inode->i_uid); | 580 | raw_inode->i_uid = fs_high2lowuid(i_uid_read(inode)); |
576 | raw_inode->i_gid = fs_high2lowgid(inode->i_gid); | 581 | raw_inode->i_gid = fs_high2lowgid(i_gid_read(inode)); |
577 | raw_inode->i_nlinks = inode->i_nlink; | 582 | raw_inode->i_nlinks = inode->i_nlink; |
578 | raw_inode->i_size = inode->i_size; | 583 | raw_inode->i_size = inode->i_size; |
579 | raw_inode->i_mtime = inode->i_mtime.tv_sec; | 584 | raw_inode->i_mtime = inode->i_mtime.tv_sec; |
diff --git a/fs/namei.c b/fs/namei.c index 1b464390dde8..aa30d19e9edd 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -352,6 +352,7 @@ int __inode_permission(struct inode *inode, int mask) | |||
352 | /** | 352 | /** |
353 | * sb_permission - Check superblock-level permissions | 353 | * sb_permission - Check superblock-level permissions |
354 | * @sb: Superblock of inode to check permission on | 354 | * @sb: Superblock of inode to check permission on |
355 | * @inode: Inode to check permission on | ||
355 | * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) | 356 | * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) |
356 | * | 357 | * |
357 | * Separate out file-system wide checks from inode-specific permission checks. | 358 | * Separate out file-system wide checks from inode-specific permission checks. |
@@ -656,6 +657,7 @@ int sysctl_protected_hardlinks __read_mostly = 1; | |||
656 | /** | 657 | /** |
657 | * may_follow_link - Check symlink following for unsafe situations | 658 | * may_follow_link - Check symlink following for unsafe situations |
658 | * @link: The path of the symlink | 659 | * @link: The path of the symlink |
660 | * @nd: nameidata pathwalk data | ||
659 | * | 661 | * |
660 | * In the case of the sysctl_protected_symlinks sysctl being enabled, | 662 | * In the case of the sysctl_protected_symlinks sysctl being enabled, |
661 | * CAP_DAC_OVERRIDE needs to be specifically ignored if the symlink is | 663 | * CAP_DAC_OVERRIDE needs to be specifically ignored if the symlink is |
@@ -678,7 +680,7 @@ static inline int may_follow_link(struct path *link, struct nameidata *nd) | |||
678 | 680 | ||
679 | /* Allowed if owner and follower match. */ | 681 | /* Allowed if owner and follower match. */ |
680 | inode = link->dentry->d_inode; | 682 | inode = link->dentry->d_inode; |
681 | if (current_cred()->fsuid == inode->i_uid) | 683 | if (uid_eq(current_cred()->fsuid, inode->i_uid)) |
682 | return 0; | 684 | return 0; |
683 | 685 | ||
684 | /* Allowed if parent directory not sticky and world-writable. */ | 686 | /* Allowed if parent directory not sticky and world-writable. */ |
@@ -687,7 +689,7 @@ static inline int may_follow_link(struct path *link, struct nameidata *nd) | |||
687 | return 0; | 689 | return 0; |
688 | 690 | ||
689 | /* Allowed if parent directory and link owner match. */ | 691 | /* Allowed if parent directory and link owner match. */ |
690 | if (parent->i_uid == inode->i_uid) | 692 | if (uid_eq(parent->i_uid, inode->i_uid)) |
691 | return 0; | 693 | return 0; |
692 | 694 | ||
693 | path_put_conditional(link, nd); | 695 | path_put_conditional(link, nd); |
@@ -757,7 +759,7 @@ static int may_linkat(struct path *link) | |||
757 | /* Source inode owner (or CAP_FOWNER) can hardlink all they like, | 759 | /* Source inode owner (or CAP_FOWNER) can hardlink all they like, |
758 | * otherwise, it must be a safe source. | 760 | * otherwise, it must be a safe source. |
759 | */ | 761 | */ |
760 | if (cred->fsuid == inode->i_uid || safe_hardlink_source(inode) || | 762 | if (uid_eq(cred->fsuid, inode->i_uid) || safe_hardlink_source(inode) || |
761 | capable(CAP_FOWNER)) | 763 | capable(CAP_FOWNER)) |
762 | return 0; | 764 | return 0; |
763 | 765 | ||
@@ -1795,8 +1797,6 @@ static int path_init(int dfd, const char *name, unsigned int flags, | |||
1795 | struct nameidata *nd, struct file **fp) | 1797 | struct nameidata *nd, struct file **fp) |
1796 | { | 1798 | { |
1797 | int retval = 0; | 1799 | int retval = 0; |
1798 | int fput_needed; | ||
1799 | struct file *file; | ||
1800 | 1800 | ||
1801 | nd->last_type = LAST_ROOT; /* if there are only slashes... */ | 1801 | nd->last_type = LAST_ROOT; /* if there are only slashes... */ |
1802 | nd->flags = flags | LOOKUP_JUMPED; | 1802 | nd->flags = flags | LOOKUP_JUMPED; |
@@ -1848,44 +1848,41 @@ static int path_init(int dfd, const char *name, unsigned int flags, | |||
1848 | get_fs_pwd(current->fs, &nd->path); | 1848 | get_fs_pwd(current->fs, &nd->path); |
1849 | } | 1849 | } |
1850 | } else { | 1850 | } else { |
1851 | struct fd f = fdget_raw(dfd); | ||
1851 | struct dentry *dentry; | 1852 | struct dentry *dentry; |
1852 | 1853 | ||
1853 | file = fget_raw_light(dfd, &fput_needed); | 1854 | if (!f.file) |
1854 | retval = -EBADF; | 1855 | return -EBADF; |
1855 | if (!file) | ||
1856 | goto out_fail; | ||
1857 | 1856 | ||
1858 | dentry = file->f_path.dentry; | 1857 | dentry = f.file->f_path.dentry; |
1859 | 1858 | ||
1860 | if (*name) { | 1859 | if (*name) { |
1861 | retval = -ENOTDIR; | 1860 | if (!S_ISDIR(dentry->d_inode->i_mode)) { |
1862 | if (!S_ISDIR(dentry->d_inode->i_mode)) | 1861 | fdput(f); |
1863 | goto fput_fail; | 1862 | return -ENOTDIR; |
1863 | } | ||
1864 | 1864 | ||
1865 | retval = inode_permission(dentry->d_inode, MAY_EXEC); | 1865 | retval = inode_permission(dentry->d_inode, MAY_EXEC); |
1866 | if (retval) | 1866 | if (retval) { |
1867 | goto fput_fail; | 1867 | fdput(f); |
1868 | return retval; | ||
1869 | } | ||
1868 | } | 1870 | } |
1869 | 1871 | ||
1870 | nd->path = file->f_path; | 1872 | nd->path = f.file->f_path; |
1871 | if (flags & LOOKUP_RCU) { | 1873 | if (flags & LOOKUP_RCU) { |
1872 | if (fput_needed) | 1874 | if (f.need_put) |
1873 | *fp = file; | 1875 | *fp = f.file; |
1874 | nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); | 1876 | nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); |
1875 | lock_rcu_walk(); | 1877 | lock_rcu_walk(); |
1876 | } else { | 1878 | } else { |
1877 | path_get(&file->f_path); | 1879 | path_get(&nd->path); |
1878 | fput_light(file, fput_needed); | 1880 | fdput(f); |
1879 | } | 1881 | } |
1880 | } | 1882 | } |
1881 | 1883 | ||
1882 | nd->inode = nd->path.dentry->d_inode; | 1884 | nd->inode = nd->path.dentry->d_inode; |
1883 | return 0; | 1885 | return 0; |
1884 | |||
1885 | fput_fail: | ||
1886 | fput_light(file, fput_needed); | ||
1887 | out_fail: | ||
1888 | return retval; | ||
1889 | } | 1886 | } |
1890 | 1887 | ||
1891 | static inline int lookup_last(struct nameidata *nd, struct path *path) | 1888 | static inline int lookup_last(struct nameidata *nd, struct path *path) |
@@ -2414,7 +2411,7 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, | |||
2414 | goto out; | 2411 | goto out; |
2415 | } | 2412 | } |
2416 | 2413 | ||
2417 | mode = op->mode & S_IALLUGO; | 2414 | mode = op->mode; |
2418 | if ((open_flag & O_CREAT) && !IS_POSIXACL(dir)) | 2415 | if ((open_flag & O_CREAT) && !IS_POSIXACL(dir)) |
2419 | mode &= ~current_umask(); | 2416 | mode &= ~current_umask(); |
2420 | 2417 | ||
@@ -2452,7 +2449,7 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, | |||
2452 | } | 2449 | } |
2453 | 2450 | ||
2454 | if (open_flag & O_CREAT) { | 2451 | if (open_flag & O_CREAT) { |
2455 | error = may_o_create(&nd->path, dentry, op->mode); | 2452 | error = may_o_create(&nd->path, dentry, mode); |
2456 | if (error) { | 2453 | if (error) { |
2457 | create_error = error; | 2454 | create_error = error; |
2458 | if (open_flag & O_EXCL) | 2455 | if (open_flag & O_EXCL) |
@@ -2489,6 +2486,10 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, | |||
2489 | dput(dentry); | 2486 | dput(dentry); |
2490 | dentry = file->f_path.dentry; | 2487 | dentry = file->f_path.dentry; |
2491 | } | 2488 | } |
2489 | if (create_error && dentry->d_inode == NULL) { | ||
2490 | error = create_error; | ||
2491 | goto out; | ||
2492 | } | ||
2492 | goto looked_up; | 2493 | goto looked_up; |
2493 | } | 2494 | } |
2494 | 2495 | ||
@@ -3965,7 +3966,7 @@ EXPORT_SYMBOL(user_path_at); | |||
3965 | EXPORT_SYMBOL(follow_down_one); | 3966 | EXPORT_SYMBOL(follow_down_one); |
3966 | EXPORT_SYMBOL(follow_down); | 3967 | EXPORT_SYMBOL(follow_down); |
3967 | EXPORT_SYMBOL(follow_up); | 3968 | EXPORT_SYMBOL(follow_up); |
3968 | EXPORT_SYMBOL(get_write_access); /* binfmt_aout */ | 3969 | EXPORT_SYMBOL(get_write_access); /* nfsd */ |
3969 | EXPORT_SYMBOL(getname); | 3970 | EXPORT_SYMBOL(getname); |
3970 | EXPORT_SYMBOL(lock_rename); | 3971 | EXPORT_SYMBOL(lock_rename); |
3971 | EXPORT_SYMBOL(lookup_one_len); | 3972 | EXPORT_SYMBOL(lookup_one_len); |
diff --git a/fs/namespace.c b/fs/namespace.c index 4d31f73e2561..7bdf7907413f 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
@@ -1886,8 +1886,14 @@ static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags) | |||
1886 | return err; | 1886 | return err; |
1887 | 1887 | ||
1888 | err = -EINVAL; | 1888 | err = -EINVAL; |
1889 | if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(real_mount(path->mnt))) | 1889 | if (unlikely(!check_mnt(real_mount(path->mnt)))) { |
1890 | goto unlock; | 1890 | /* that's acceptable only for automounts done in private ns */ |
1891 | if (!(mnt_flags & MNT_SHRINKABLE)) | ||
1892 | goto unlock; | ||
1893 | /* ... and for those we'd better have mountpoint still alive */ | ||
1894 | if (!real_mount(path->mnt)->mnt_ns) | ||
1895 | goto unlock; | ||
1896 | } | ||
1891 | 1897 | ||
1892 | /* Refuse the same filesystem on the same mount point */ | 1898 | /* Refuse the same filesystem on the same mount point */ |
1893 | err = -EBUSY; | 1899 | err = -EBUSY; |
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index 333df07ae3bd..d7e9fe77188a 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c | |||
@@ -89,6 +89,11 @@ static int init_inodecache(void) | |||
89 | 89 | ||
90 | static void destroy_inodecache(void) | 90 | static void destroy_inodecache(void) |
91 | { | 91 | { |
92 | /* | ||
93 | * Make sure all delayed rcu free inodes are flushed before we | ||
94 | * destroy cache. | ||
95 | */ | ||
96 | rcu_barrier(); | ||
92 | kmem_cache_destroy(ncp_inode_cachep); | 97 | kmem_cache_destroy(ncp_inode_cachep); |
93 | } | 98 | } |
94 | 99 | ||
@@ -314,11 +319,11 @@ static void ncp_stop_tasks(struct ncp_server *server) { | |||
314 | release_sock(sk); | 319 | release_sock(sk); |
315 | del_timer_sync(&server->timeout_tm); | 320 | del_timer_sync(&server->timeout_tm); |
316 | 321 | ||
317 | flush_work_sync(&server->rcv.tq); | 322 | flush_work(&server->rcv.tq); |
318 | if (sk->sk_socket->type == SOCK_STREAM) | 323 | if (sk->sk_socket->type == SOCK_STREAM) |
319 | flush_work_sync(&server->tx.tq); | 324 | flush_work(&server->tx.tq); |
320 | else | 325 | else |
321 | flush_work_sync(&server->timeout_tq); | 326 | flush_work(&server->timeout_tq); |
322 | } | 327 | } |
323 | 328 | ||
324 | static int ncp_show_options(struct seq_file *seq, struct dentry *root) | 329 | static int ncp_show_options(struct seq_file *seq, struct dentry *root) |
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 8bf3a3f6925a..b7db60897f91 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile | |||
@@ -12,19 +12,19 @@ nfs-$(CONFIG_ROOT_NFS) += nfsroot.o | |||
12 | nfs-$(CONFIG_SYSCTL) += sysctl.o | 12 | nfs-$(CONFIG_SYSCTL) += sysctl.o |
13 | nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o | 13 | nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o |
14 | 14 | ||
15 | obj-$(CONFIG_NFS_V2) += nfs2.o | 15 | obj-$(CONFIG_NFS_V2) += nfsv2.o |
16 | nfs2-y := nfs2super.o proc.o nfs2xdr.o | 16 | nfsv2-y := nfs2super.o proc.o nfs2xdr.o |
17 | 17 | ||
18 | obj-$(CONFIG_NFS_V3) += nfs3.o | 18 | obj-$(CONFIG_NFS_V3) += nfsv3.o |
19 | nfs3-y := nfs3super.o nfs3client.o nfs3proc.o nfs3xdr.o | 19 | nfsv3-y := nfs3super.o nfs3client.o nfs3proc.o nfs3xdr.o |
20 | nfs3-$(CONFIG_NFS_V3_ACL) += nfs3acl.o | 20 | nfsv3-$(CONFIG_NFS_V3_ACL) += nfs3acl.o |
21 | 21 | ||
22 | obj-$(CONFIG_NFS_V4) += nfs4.o | 22 | obj-$(CONFIG_NFS_V4) += nfsv4.o |
23 | nfs4-y := nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o nfs4super.o nfs4file.o \ | 23 | nfsv4-y := nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o nfs4super.o nfs4file.o \ |
24 | delegation.o idmap.o callback.o callback_xdr.o callback_proc.o \ | 24 | delegation.o idmap.o callback.o callback_xdr.o callback_proc.o \ |
25 | nfs4namespace.o nfs4getroot.o nfs4client.o | 25 | nfs4namespace.o nfs4getroot.o nfs4client.o |
26 | nfs4-$(CONFIG_SYSCTL) += nfs4sysctl.o | 26 | nfsv4-$(CONFIG_SYSCTL) += nfs4sysctl.o |
27 | nfs4-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o | 27 | nfsv4-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o |
28 | 28 | ||
29 | obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o | 29 | obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o |
30 | nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o | 30 | nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o |
diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 9fc0d9dfc91b..99694442b93f 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c | |||
@@ -105,7 +105,7 @@ struct nfs_subversion *get_nfs_version(unsigned int version) | |||
105 | 105 | ||
106 | if (IS_ERR(nfs)) { | 106 | if (IS_ERR(nfs)) { |
107 | mutex_lock(&nfs_version_mutex); | 107 | mutex_lock(&nfs_version_mutex); |
108 | request_module("nfs%d", version); | 108 | request_module("nfsv%d", version); |
109 | nfs = find_nfs_version(version); | 109 | nfs = find_nfs_version(version); |
110 | mutex_unlock(&nfs_version_mutex); | 110 | mutex_unlock(&nfs_version_mutex); |
111 | } | 111 | } |
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 75d6d0a3d32e..f692be97676d 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
@@ -287,10 +287,12 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) | |||
287 | struct inode *inode = file->f_path.dentry->d_inode; | 287 | struct inode *inode = file->f_path.dentry->d_inode; |
288 | 288 | ||
289 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); | 289 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); |
290 | if (ret != 0) | ||
291 | goto out; | ||
290 | mutex_lock(&inode->i_mutex); | 292 | mutex_lock(&inode->i_mutex); |
291 | ret = nfs_file_fsync_commit(file, start, end, datasync); | 293 | ret = nfs_file_fsync_commit(file, start, end, datasync); |
292 | mutex_unlock(&inode->i_mutex); | 294 | mutex_unlock(&inode->i_mutex); |
293 | 295 | out: | |
294 | return ret; | 296 | return ret; |
295 | } | 297 | } |
296 | 298 | ||
@@ -576,6 +578,7 @@ out: | |||
576 | static const struct vm_operations_struct nfs_file_vm_ops = { | 578 | static const struct vm_operations_struct nfs_file_vm_ops = { |
577 | .fault = filemap_fault, | 579 | .fault = filemap_fault, |
578 | .page_mkwrite = nfs_vm_page_mkwrite, | 580 | .page_mkwrite = nfs_vm_page_mkwrite, |
581 | .remap_pages = generic_file_remap_pages, | ||
579 | }; | 582 | }; |
580 | 583 | ||
581 | static int nfs_need_sync_write(struct file *filp, struct inode *inode) | 584 | static int nfs_need_sync_write(struct file *filp, struct inode *inode) |
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index b701358c39c3..a850079467d8 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c | |||
@@ -61,6 +61,12 @@ struct idmap { | |||
61 | struct mutex idmap_mutex; | 61 | struct mutex idmap_mutex; |
62 | }; | 62 | }; |
63 | 63 | ||
64 | struct idmap_legacy_upcalldata { | ||
65 | struct rpc_pipe_msg pipe_msg; | ||
66 | struct idmap_msg idmap_msg; | ||
67 | struct idmap *idmap; | ||
68 | }; | ||
69 | |||
64 | /** | 70 | /** |
65 | * nfs_fattr_init_names - initialise the nfs_fattr owner_name/group_name fields | 71 | * nfs_fattr_init_names - initialise the nfs_fattr owner_name/group_name fields |
66 | * @fattr: fully initialised struct nfs_fattr | 72 | * @fattr: fully initialised struct nfs_fattr |
@@ -324,6 +330,7 @@ static ssize_t nfs_idmap_get_key(const char *name, size_t namelen, | |||
324 | ret = nfs_idmap_request_key(&key_type_id_resolver_legacy, | 330 | ret = nfs_idmap_request_key(&key_type_id_resolver_legacy, |
325 | name, namelen, type, data, | 331 | name, namelen, type, data, |
326 | data_size, idmap); | 332 | data_size, idmap); |
333 | idmap->idmap_key_cons = NULL; | ||
327 | mutex_unlock(&idmap->idmap_mutex); | 334 | mutex_unlock(&idmap->idmap_mutex); |
328 | } | 335 | } |
329 | return ret; | 336 | return ret; |
@@ -380,11 +387,13 @@ static const match_table_t nfs_idmap_tokens = { | |||
380 | static int nfs_idmap_legacy_upcall(struct key_construction *, const char *, void *); | 387 | static int nfs_idmap_legacy_upcall(struct key_construction *, const char *, void *); |
381 | static ssize_t idmap_pipe_downcall(struct file *, const char __user *, | 388 | static ssize_t idmap_pipe_downcall(struct file *, const char __user *, |
382 | size_t); | 389 | size_t); |
390 | static void idmap_release_pipe(struct inode *); | ||
383 | static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *); | 391 | static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *); |
384 | 392 | ||
385 | static const struct rpc_pipe_ops idmap_upcall_ops = { | 393 | static const struct rpc_pipe_ops idmap_upcall_ops = { |
386 | .upcall = rpc_pipe_generic_upcall, | 394 | .upcall = rpc_pipe_generic_upcall, |
387 | .downcall = idmap_pipe_downcall, | 395 | .downcall = idmap_pipe_downcall, |
396 | .release_pipe = idmap_release_pipe, | ||
388 | .destroy_msg = idmap_pipe_destroy_msg, | 397 | .destroy_msg = idmap_pipe_destroy_msg, |
389 | }; | 398 | }; |
390 | 399 | ||
@@ -616,7 +625,8 @@ void nfs_idmap_quit(void) | |||
616 | nfs_idmap_quit_keyring(); | 625 | nfs_idmap_quit_keyring(); |
617 | } | 626 | } |
618 | 627 | ||
619 | static int nfs_idmap_prepare_message(char *desc, struct idmap_msg *im, | 628 | static int nfs_idmap_prepare_message(char *desc, struct idmap *idmap, |
629 | struct idmap_msg *im, | ||
620 | struct rpc_pipe_msg *msg) | 630 | struct rpc_pipe_msg *msg) |
621 | { | 631 | { |
622 | substring_t substr; | 632 | substring_t substr; |
@@ -659,6 +669,7 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons, | |||
659 | const char *op, | 669 | const char *op, |
660 | void *aux) | 670 | void *aux) |
661 | { | 671 | { |
672 | struct idmap_legacy_upcalldata *data; | ||
662 | struct rpc_pipe_msg *msg; | 673 | struct rpc_pipe_msg *msg; |
663 | struct idmap_msg *im; | 674 | struct idmap_msg *im; |
664 | struct idmap *idmap = (struct idmap *)aux; | 675 | struct idmap *idmap = (struct idmap *)aux; |
@@ -666,15 +677,15 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons, | |||
666 | int ret = -ENOMEM; | 677 | int ret = -ENOMEM; |
667 | 678 | ||
668 | /* msg and im are freed in idmap_pipe_destroy_msg */ | 679 | /* msg and im are freed in idmap_pipe_destroy_msg */ |
669 | msg = kmalloc(sizeof(*msg), GFP_KERNEL); | 680 | data = kmalloc(sizeof(*data), GFP_KERNEL); |
670 | if (!msg) | 681 | if (!data) |
671 | goto out0; | ||
672 | |||
673 | im = kmalloc(sizeof(*im), GFP_KERNEL); | ||
674 | if (!im) | ||
675 | goto out1; | 682 | goto out1; |
676 | 683 | ||
677 | ret = nfs_idmap_prepare_message(key->description, im, msg); | 684 | msg = &data->pipe_msg; |
685 | im = &data->idmap_msg; | ||
686 | data->idmap = idmap; | ||
687 | |||
688 | ret = nfs_idmap_prepare_message(key->description, idmap, im, msg); | ||
678 | if (ret < 0) | 689 | if (ret < 0) |
679 | goto out2; | 690 | goto out2; |
680 | 691 | ||
@@ -683,15 +694,15 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons, | |||
683 | 694 | ||
684 | ret = rpc_queue_upcall(idmap->idmap_pipe, msg); | 695 | ret = rpc_queue_upcall(idmap->idmap_pipe, msg); |
685 | if (ret < 0) | 696 | if (ret < 0) |
686 | goto out2; | 697 | goto out3; |
687 | 698 | ||
688 | return ret; | 699 | return ret; |
689 | 700 | ||
701 | out3: | ||
702 | idmap->idmap_key_cons = NULL; | ||
690 | out2: | 703 | out2: |
691 | kfree(im); | 704 | kfree(data); |
692 | out1: | 705 | out1: |
693 | kfree(msg); | ||
694 | out0: | ||
695 | complete_request_key(cons, ret); | 706 | complete_request_key(cons, ret); |
696 | return ret; | 707 | return ret; |
697 | } | 708 | } |
@@ -749,9 +760,8 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) | |||
749 | } | 760 | } |
750 | 761 | ||
751 | if (!(im.im_status & IDMAP_STATUS_SUCCESS)) { | 762 | if (!(im.im_status & IDMAP_STATUS_SUCCESS)) { |
752 | ret = mlen; | 763 | ret = -ENOKEY; |
753 | complete_request_key(cons, -ENOKEY); | 764 | goto out; |
754 | goto out_incomplete; | ||
755 | } | 765 | } |
756 | 766 | ||
757 | namelen_in = strnlen(im.im_name, IDMAP_NAMESZ); | 767 | namelen_in = strnlen(im.im_name, IDMAP_NAMESZ); |
@@ -768,16 +778,32 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) | |||
768 | 778 | ||
769 | out: | 779 | out: |
770 | complete_request_key(cons, ret); | 780 | complete_request_key(cons, ret); |
771 | out_incomplete: | ||
772 | return ret; | 781 | return ret; |
773 | } | 782 | } |
774 | 783 | ||
775 | static void | 784 | static void |
776 | idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg) | 785 | idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg) |
777 | { | 786 | { |
787 | struct idmap_legacy_upcalldata *data = container_of(msg, | ||
788 | struct idmap_legacy_upcalldata, | ||
789 | pipe_msg); | ||
790 | struct idmap *idmap = data->idmap; | ||
791 | struct key_construction *cons; | ||
792 | if (msg->errno) { | ||
793 | cons = ACCESS_ONCE(idmap->idmap_key_cons); | ||
794 | idmap->idmap_key_cons = NULL; | ||
795 | complete_request_key(cons, msg->errno); | ||
796 | } | ||
778 | /* Free memory allocated in nfs_idmap_legacy_upcall() */ | 797 | /* Free memory allocated in nfs_idmap_legacy_upcall() */ |
779 | kfree(msg->data); | 798 | kfree(data); |
780 | kfree(msg); | 799 | } |
800 | |||
801 | static void | ||
802 | idmap_release_pipe(struct inode *inode) | ||
803 | { | ||
804 | struct rpc_inode *rpci = RPC_I(inode); | ||
805 | struct idmap *idmap = (struct idmap *)rpci->private; | ||
806 | idmap->idmap_key_cons = NULL; | ||
781 | } | 807 | } |
782 | 808 | ||
783 | int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid) | 809 | int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid) |
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index c6e895f0fbf3..e4c716d374a8 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
@@ -154,7 +154,7 @@ static void nfs_zap_caches_locked(struct inode *inode) | |||
154 | nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); | 154 | nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); |
155 | nfsi->attrtimeo_timestamp = jiffies; | 155 | nfsi->attrtimeo_timestamp = jiffies; |
156 | 156 | ||
157 | memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); | 157 | memset(NFS_I(inode)->cookieverf, 0, sizeof(NFS_I(inode)->cookieverf)); |
158 | if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) | 158 | if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) |
159 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; | 159 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; |
160 | else | 160 | else |
@@ -1571,6 +1571,11 @@ static int __init nfs_init_inodecache(void) | |||
1571 | 1571 | ||
1572 | static void nfs_destroy_inodecache(void) | 1572 | static void nfs_destroy_inodecache(void) |
1573 | { | 1573 | { |
1574 | /* | ||
1575 | * Make sure all delayed rcu free inodes are flushed before we | ||
1576 | * destroy cache. | ||
1577 | */ | ||
1578 | rcu_barrier(); | ||
1574 | kmem_cache_destroy(nfs_inode_cachep); | 1579 | kmem_cache_destroy(nfs_inode_cachep); |
1575 | } | 1580 | } |
1576 | 1581 | ||
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index e4498dc351a8..4a1aafba6a20 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c | |||
@@ -70,7 +70,7 @@ ssize_t nfs3_getxattr(struct dentry *dentry, const char *name, | |||
70 | if (type == ACL_TYPE_ACCESS && acl->a_count == 0) | 70 | if (type == ACL_TYPE_ACCESS && acl->a_count == 0) |
71 | error = -ENODATA; | 71 | error = -ENODATA; |
72 | else | 72 | else |
73 | error = posix_acl_to_xattr(acl, buffer, size); | 73 | error = posix_acl_to_xattr(&init_user_ns, acl, buffer, size); |
74 | posix_acl_release(acl); | 74 | posix_acl_release(acl); |
75 | } else | 75 | } else |
76 | error = -ENODATA; | 76 | error = -ENODATA; |
@@ -92,7 +92,7 @@ int nfs3_setxattr(struct dentry *dentry, const char *name, | |||
92 | else | 92 | else |
93 | return -EOPNOTSUPP; | 93 | return -EOPNOTSUPP; |
94 | 94 | ||
95 | acl = posix_acl_from_xattr(value, size); | 95 | acl = posix_acl_from_xattr(&init_user_ns, value, size); |
96 | if (IS_ERR(acl)) | 96 | if (IS_ERR(acl)) |
97 | return PTR_ERR(acl); | 97 | return PTR_ERR(acl); |
98 | error = nfs3_proc_setacl(inode, type, acl); | 98 | error = nfs3_proc_setacl(inode, type, acl); |
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 0952c791df36..69322096c325 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c | |||
@@ -69,7 +69,7 @@ do_proc_get_root(struct rpc_clnt *client, struct nfs_fh *fhandle, | |||
69 | nfs_fattr_init(info->fattr); | 69 | nfs_fattr_init(info->fattr); |
70 | status = rpc_call_sync(client, &msg, 0); | 70 | status = rpc_call_sync(client, &msg, 0); |
71 | dprintk("%s: reply fsinfo: %d\n", __func__, status); | 71 | dprintk("%s: reply fsinfo: %d\n", __func__, status); |
72 | if (!(info->fattr->valid & NFS_ATTR_FATTR)) { | 72 | if (status == 0 && !(info->fattr->valid & NFS_ATTR_FATTR)) { |
73 | msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR]; | 73 | msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR]; |
74 | msg.rpc_resp = info->fattr; | 74 | msg.rpc_resp = info->fattr; |
75 | status = rpc_call_sync(client, &msg, 0); | 75 | status = rpc_call_sync(client, &msg, 0); |
@@ -643,7 +643,7 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, | |||
643 | u64 cookie, struct page **pages, unsigned int count, int plus) | 643 | u64 cookie, struct page **pages, unsigned int count, int plus) |
644 | { | 644 | { |
645 | struct inode *dir = dentry->d_inode; | 645 | struct inode *dir = dentry->d_inode; |
646 | __be32 *verf = NFS_COOKIEVERF(dir); | 646 | __be32 *verf = NFS_I(dir)->cookieverf; |
647 | struct nfs3_readdirargs arg = { | 647 | struct nfs3_readdirargs arg = { |
648 | .fh = NFS_FH(dir), | 648 | .fh = NFS_FH(dir), |
649 | .cookie = cookie, | 649 | .cookie = cookie, |
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 3b950dd81e81..da0618aeeadb 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h | |||
@@ -205,6 +205,9 @@ extern const struct dentry_operations nfs4_dentry_operations; | |||
205 | int nfs_atomic_open(struct inode *, struct dentry *, struct file *, | 205 | int nfs_atomic_open(struct inode *, struct dentry *, struct file *, |
206 | unsigned, umode_t, int *); | 206 | unsigned, umode_t, int *); |
207 | 207 | ||
208 | /* super.c */ | ||
209 | extern struct file_system_type nfs4_fs_type; | ||
210 | |||
208 | /* nfs4namespace.c */ | 211 | /* nfs4namespace.c */ |
209 | rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *); | 212 | rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *); |
210 | struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *, struct inode *, struct qstr *); | 213 | struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *, struct inode *, struct qstr *); |
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index cbcdfaf32505..24eb663f8ed5 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c | |||
@@ -74,7 +74,7 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init) | |||
74 | return clp; | 74 | return clp; |
75 | 75 | ||
76 | error: | 76 | error: |
77 | kfree(clp); | 77 | nfs_free_client(clp); |
78 | return ERR_PTR(err); | 78 | return ERR_PTR(err); |
79 | } | 79 | } |
80 | 80 | ||
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index acb65e7887f8..eb5eb8eef4d3 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c | |||
@@ -96,13 +96,15 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) | |||
96 | struct inode *inode = file->f_path.dentry->d_inode; | 96 | struct inode *inode = file->f_path.dentry->d_inode; |
97 | 97 | ||
98 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); | 98 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); |
99 | if (ret != 0) | ||
100 | goto out; | ||
99 | mutex_lock(&inode->i_mutex); | 101 | mutex_lock(&inode->i_mutex); |
100 | ret = nfs_file_fsync_commit(file, start, end, datasync); | 102 | ret = nfs_file_fsync_commit(file, start, end, datasync); |
101 | if (!ret && !datasync) | 103 | if (!ret && !datasync) |
102 | /* application has asked for meta-data sync */ | 104 | /* application has asked for meta-data sync */ |
103 | ret = pnfs_layoutcommit_inode(inode, true); | 105 | ret = pnfs_layoutcommit_inode(inode, true); |
104 | mutex_unlock(&inode->i_mutex); | 106 | mutex_unlock(&inode->i_mutex); |
105 | 107 | out: | |
106 | return ret; | 108 | return ret; |
107 | } | 109 | } |
108 | 110 | ||
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index a99a8d948721..1e50326d00dd 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
@@ -3215,11 +3215,11 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, | |||
3215 | dentry->d_parent->d_name.name, | 3215 | dentry->d_parent->d_name.name, |
3216 | dentry->d_name.name, | 3216 | dentry->d_name.name, |
3217 | (unsigned long long)cookie); | 3217 | (unsigned long long)cookie); |
3218 | nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args); | 3218 | nfs4_setup_readdir(cookie, NFS_I(dir)->cookieverf, dentry, &args); |
3219 | res.pgbase = args.pgbase; | 3219 | res.pgbase = args.pgbase; |
3220 | status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &msg, &args.seq_args, &res.seq_res, 0); | 3220 | status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &msg, &args.seq_args, &res.seq_res, 0); |
3221 | if (status >= 0) { | 3221 | if (status >= 0) { |
3222 | memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE); | 3222 | memcpy(NFS_I(dir)->cookieverf, res.verifier.data, NFS4_VERIFIER_SIZE); |
3223 | status += args.pgbase; | 3223 | status += args.pgbase; |
3224 | } | 3224 | } |
3225 | 3225 | ||
@@ -3653,11 +3653,11 @@ static inline int nfs4_server_supports_acls(struct nfs_server *server) | |||
3653 | && (server->acl_bitmask & ACL4_SUPPORT_DENY_ACL); | 3653 | && (server->acl_bitmask & ACL4_SUPPORT_DENY_ACL); |
3654 | } | 3654 | } |
3655 | 3655 | ||
3656 | /* Assuming that XATTR_SIZE_MAX is a multiple of PAGE_CACHE_SIZE, and that | 3656 | /* Assuming that XATTR_SIZE_MAX is a multiple of PAGE_SIZE, and that |
3657 | * it's OK to put sizeof(void) * (XATTR_SIZE_MAX/PAGE_CACHE_SIZE) bytes on | 3657 | * it's OK to put sizeof(void) * (XATTR_SIZE_MAX/PAGE_SIZE) bytes on |
3658 | * the stack. | 3658 | * the stack. |
3659 | */ | 3659 | */ |
3660 | #define NFS4ACL_MAXPAGES (XATTR_SIZE_MAX >> PAGE_CACHE_SHIFT) | 3660 | #define NFS4ACL_MAXPAGES DIV_ROUND_UP(XATTR_SIZE_MAX, PAGE_SIZE) |
3661 | 3661 | ||
3662 | static int buf_to_pages_noslab(const void *buf, size_t buflen, | 3662 | static int buf_to_pages_noslab(const void *buf, size_t buflen, |
3663 | struct page **pages, unsigned int *pgbase) | 3663 | struct page **pages, unsigned int *pgbase) |
@@ -3668,7 +3668,7 @@ static int buf_to_pages_noslab(const void *buf, size_t buflen, | |||
3668 | spages = pages; | 3668 | spages = pages; |
3669 | 3669 | ||
3670 | do { | 3670 | do { |
3671 | len = min_t(size_t, PAGE_CACHE_SIZE, buflen); | 3671 | len = min_t(size_t, PAGE_SIZE, buflen); |
3672 | newpage = alloc_page(GFP_KERNEL); | 3672 | newpage = alloc_page(GFP_KERNEL); |
3673 | 3673 | ||
3674 | if (newpage == NULL) | 3674 | if (newpage == NULL) |
@@ -3737,9 +3737,10 @@ out: | |||
3737 | static void nfs4_write_cached_acl(struct inode *inode, struct page **pages, size_t pgbase, size_t acl_len) | 3737 | static void nfs4_write_cached_acl(struct inode *inode, struct page **pages, size_t pgbase, size_t acl_len) |
3738 | { | 3738 | { |
3739 | struct nfs4_cached_acl *acl; | 3739 | struct nfs4_cached_acl *acl; |
3740 | size_t buflen = sizeof(*acl) + acl_len; | ||
3740 | 3741 | ||
3741 | if (pages && acl_len <= PAGE_SIZE) { | 3742 | if (buflen <= PAGE_SIZE) { |
3742 | acl = kmalloc(sizeof(*acl) + acl_len, GFP_KERNEL); | 3743 | acl = kmalloc(buflen, GFP_KERNEL); |
3743 | if (acl == NULL) | 3744 | if (acl == NULL) |
3744 | goto out; | 3745 | goto out; |
3745 | acl->cached = 1; | 3746 | acl->cached = 1; |
@@ -3781,17 +3782,15 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu | |||
3781 | .rpc_argp = &args, | 3782 | .rpc_argp = &args, |
3782 | .rpc_resp = &res, | 3783 | .rpc_resp = &res, |
3783 | }; | 3784 | }; |
3784 | int ret = -ENOMEM, npages, i; | 3785 | unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE); |
3785 | size_t acl_len = 0; | 3786 | int ret = -ENOMEM, i; |
3786 | 3787 | ||
3787 | npages = (buflen + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
3788 | /* As long as we're doing a round trip to the server anyway, | 3788 | /* As long as we're doing a round trip to the server anyway, |
3789 | * let's be prepared for a page of acl data. */ | 3789 | * let's be prepared for a page of acl data. */ |
3790 | if (npages == 0) | 3790 | if (npages == 0) |
3791 | npages = 1; | 3791 | npages = 1; |
3792 | 3792 | if (npages > ARRAY_SIZE(pages)) | |
3793 | /* Add an extra page to handle the bitmap returned */ | 3793 | return -ERANGE; |
3794 | npages++; | ||
3795 | 3794 | ||
3796 | for (i = 0; i < npages; i++) { | 3795 | for (i = 0; i < npages; i++) { |
3797 | pages[i] = alloc_page(GFP_KERNEL); | 3796 | pages[i] = alloc_page(GFP_KERNEL); |
@@ -3807,11 +3806,6 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu | |||
3807 | args.acl_len = npages * PAGE_SIZE; | 3806 | args.acl_len = npages * PAGE_SIZE; |
3808 | args.acl_pgbase = 0; | 3807 | args.acl_pgbase = 0; |
3809 | 3808 | ||
3810 | /* Let decode_getfacl know not to fail if the ACL data is larger than | ||
3811 | * the page we send as a guess */ | ||
3812 | if (buf == NULL) | ||
3813 | res.acl_flags |= NFS4_ACL_LEN_REQUEST; | ||
3814 | |||
3815 | dprintk("%s buf %p buflen %zu npages %d args.acl_len %zu\n", | 3809 | dprintk("%s buf %p buflen %zu npages %d args.acl_len %zu\n", |
3816 | __func__, buf, buflen, npages, args.acl_len); | 3810 | __func__, buf, buflen, npages, args.acl_len); |
3817 | ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode), | 3811 | ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode), |
@@ -3819,20 +3813,19 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu | |||
3819 | if (ret) | 3813 | if (ret) |
3820 | goto out_free; | 3814 | goto out_free; |
3821 | 3815 | ||
3822 | acl_len = res.acl_len - res.acl_data_offset; | 3816 | /* Handle the case where the passed-in buffer is too short */ |
3823 | if (acl_len > args.acl_len) | 3817 | if (res.acl_flags & NFS4_ACL_TRUNC) { |
3824 | nfs4_write_cached_acl(inode, NULL, 0, acl_len); | 3818 | /* Did the user only issue a request for the acl length? */ |
3825 | else | 3819 | if (buf == NULL) |
3826 | nfs4_write_cached_acl(inode, pages, res.acl_data_offset, | 3820 | goto out_ok; |
3827 | acl_len); | ||
3828 | if (buf) { | ||
3829 | ret = -ERANGE; | 3821 | ret = -ERANGE; |
3830 | if (acl_len > buflen) | 3822 | goto out_free; |
3831 | goto out_free; | ||
3832 | _copy_from_pages(buf, pages, res.acl_data_offset, | ||
3833 | acl_len); | ||
3834 | } | 3823 | } |
3835 | ret = acl_len; | 3824 | nfs4_write_cached_acl(inode, pages, res.acl_data_offset, res.acl_len); |
3825 | if (buf) | ||
3826 | _copy_from_pages(buf, pages, res.acl_data_offset, res.acl_len); | ||
3827 | out_ok: | ||
3828 | ret = res.acl_len; | ||
3836 | out_free: | 3829 | out_free: |
3837 | for (i = 0; i < npages; i++) | 3830 | for (i = 0; i < npages; i++) |
3838 | if (pages[i]) | 3831 | if (pages[i]) |
@@ -3890,10 +3883,13 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl | |||
3890 | .rpc_argp = &arg, | 3883 | .rpc_argp = &arg, |
3891 | .rpc_resp = &res, | 3884 | .rpc_resp = &res, |
3892 | }; | 3885 | }; |
3886 | unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE); | ||
3893 | int ret, i; | 3887 | int ret, i; |
3894 | 3888 | ||
3895 | if (!nfs4_server_supports_acls(server)) | 3889 | if (!nfs4_server_supports_acls(server)) |
3896 | return -EOPNOTSUPP; | 3890 | return -EOPNOTSUPP; |
3891 | if (npages > ARRAY_SIZE(pages)) | ||
3892 | return -ERANGE; | ||
3897 | i = buf_to_pages_noslab(buf, buflen, arg.acl_pages, &arg.acl_pgbase); | 3893 | i = buf_to_pages_noslab(buf, buflen, arg.acl_pages, &arg.acl_pgbase); |
3898 | if (i < 0) | 3894 | if (i < 0) |
3899 | return i; | 3895 | return i; |
@@ -6223,11 +6219,58 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) | |||
6223 | dprintk("<-- %s\n", __func__); | 6219 | dprintk("<-- %s\n", __func__); |
6224 | } | 6220 | } |
6225 | 6221 | ||
6222 | static size_t max_response_pages(struct nfs_server *server) | ||
6223 | { | ||
6224 | u32 max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; | ||
6225 | return nfs_page_array_len(0, max_resp_sz); | ||
6226 | } | ||
6227 | |||
6228 | static void nfs4_free_pages(struct page **pages, size_t size) | ||
6229 | { | ||
6230 | int i; | ||
6231 | |||
6232 | if (!pages) | ||
6233 | return; | ||
6234 | |||
6235 | for (i = 0; i < size; i++) { | ||
6236 | if (!pages[i]) | ||
6237 | break; | ||
6238 | __free_page(pages[i]); | ||
6239 | } | ||
6240 | kfree(pages); | ||
6241 | } | ||
6242 | |||
6243 | static struct page **nfs4_alloc_pages(size_t size, gfp_t gfp_flags) | ||
6244 | { | ||
6245 | struct page **pages; | ||
6246 | int i; | ||
6247 | |||
6248 | pages = kcalloc(size, sizeof(struct page *), gfp_flags); | ||
6249 | if (!pages) { | ||
6250 | dprintk("%s: can't alloc array of %zu pages\n", __func__, size); | ||
6251 | return NULL; | ||
6252 | } | ||
6253 | |||
6254 | for (i = 0; i < size; i++) { | ||
6255 | pages[i] = alloc_page(gfp_flags); | ||
6256 | if (!pages[i]) { | ||
6257 | dprintk("%s: failed to allocate page\n", __func__); | ||
6258 | nfs4_free_pages(pages, size); | ||
6259 | return NULL; | ||
6260 | } | ||
6261 | } | ||
6262 | |||
6263 | return pages; | ||
6264 | } | ||
6265 | |||
6226 | static void nfs4_layoutget_release(void *calldata) | 6266 | static void nfs4_layoutget_release(void *calldata) |
6227 | { | 6267 | { |
6228 | struct nfs4_layoutget *lgp = calldata; | 6268 | struct nfs4_layoutget *lgp = calldata; |
6269 | struct nfs_server *server = NFS_SERVER(lgp->args.inode); | ||
6270 | size_t max_pages = max_response_pages(server); | ||
6229 | 6271 | ||
6230 | dprintk("--> %s\n", __func__); | 6272 | dprintk("--> %s\n", __func__); |
6273 | nfs4_free_pages(lgp->args.layout.pages, max_pages); | ||
6231 | put_nfs_open_context(lgp->args.ctx); | 6274 | put_nfs_open_context(lgp->args.ctx); |
6232 | kfree(calldata); | 6275 | kfree(calldata); |
6233 | dprintk("<-- %s\n", __func__); | 6276 | dprintk("<-- %s\n", __func__); |
@@ -6239,9 +6282,10 @@ static const struct rpc_call_ops nfs4_layoutget_call_ops = { | |||
6239 | .rpc_release = nfs4_layoutget_release, | 6282 | .rpc_release = nfs4_layoutget_release, |
6240 | }; | 6283 | }; |
6241 | 6284 | ||
6242 | int nfs4_proc_layoutget(struct nfs4_layoutget *lgp) | 6285 | void nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) |
6243 | { | 6286 | { |
6244 | struct nfs_server *server = NFS_SERVER(lgp->args.inode); | 6287 | struct nfs_server *server = NFS_SERVER(lgp->args.inode); |
6288 | size_t max_pages = max_response_pages(server); | ||
6245 | struct rpc_task *task; | 6289 | struct rpc_task *task; |
6246 | struct rpc_message msg = { | 6290 | struct rpc_message msg = { |
6247 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTGET], | 6291 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTGET], |
@@ -6259,12 +6303,19 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp) | |||
6259 | 6303 | ||
6260 | dprintk("--> %s\n", __func__); | 6304 | dprintk("--> %s\n", __func__); |
6261 | 6305 | ||
6306 | lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags); | ||
6307 | if (!lgp->args.layout.pages) { | ||
6308 | nfs4_layoutget_release(lgp); | ||
6309 | return; | ||
6310 | } | ||
6311 | lgp->args.layout.pglen = max_pages * PAGE_SIZE; | ||
6312 | |||
6262 | lgp->res.layoutp = &lgp->args.layout; | 6313 | lgp->res.layoutp = &lgp->args.layout; |
6263 | lgp->res.seq_res.sr_slot = NULL; | 6314 | lgp->res.seq_res.sr_slot = NULL; |
6264 | nfs41_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0); | 6315 | nfs41_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0); |
6265 | task = rpc_run_task(&task_setup_data); | 6316 | task = rpc_run_task(&task_setup_data); |
6266 | if (IS_ERR(task)) | 6317 | if (IS_ERR(task)) |
6267 | return PTR_ERR(task); | 6318 | return; |
6268 | status = nfs4_wait_for_completion_rpc_task(task); | 6319 | status = nfs4_wait_for_completion_rpc_task(task); |
6269 | if (status == 0) | 6320 | if (status == 0) |
6270 | status = task->tk_status; | 6321 | status = task->tk_status; |
@@ -6272,7 +6323,7 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp) | |||
6272 | status = pnfs_layout_process(lgp); | 6323 | status = pnfs_layout_process(lgp); |
6273 | rpc_put_task(task); | 6324 | rpc_put_task(task); |
6274 | dprintk("<-- %s status=%d\n", __func__, status); | 6325 | dprintk("<-- %s status=%d\n", __func__, status); |
6275 | return status; | 6326 | return; |
6276 | } | 6327 | } |
6277 | 6328 | ||
6278 | static void | 6329 | static void |
@@ -6304,12 +6355,8 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) | |||
6304 | return; | 6355 | return; |
6305 | } | 6356 | } |
6306 | spin_lock(&lo->plh_inode->i_lock); | 6357 | spin_lock(&lo->plh_inode->i_lock); |
6307 | if (task->tk_status == 0) { | 6358 | if (task->tk_status == 0 && lrp->res.lrs_present) |
6308 | if (lrp->res.lrs_present) { | 6359 | pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); |
6309 | pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); | ||
6310 | } else | ||
6311 | BUG_ON(!list_empty(&lo->plh_segs)); | ||
6312 | } | ||
6313 | lo->plh_block_lgets--; | 6360 | lo->plh_block_lgets--; |
6314 | spin_unlock(&lo->plh_inode->i_lock); | 6361 | spin_unlock(&lo->plh_inode->i_lock); |
6315 | dprintk("<-- %s\n", __func__); | 6362 | dprintk("<-- %s\n", __func__); |
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index 6930bec91bca..1720d32ffa54 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c | |||
@@ -117,8 +117,7 @@ nfs4_schedule_state_renewal(struct nfs_client *clp) | |||
117 | timeout = 5 * HZ; | 117 | timeout = 5 * HZ; |
118 | dprintk("%s: requeueing work. Lease period = %ld\n", | 118 | dprintk("%s: requeueing work. Lease period = %ld\n", |
119 | __func__, (timeout + HZ - 1) / HZ); | 119 | __func__, (timeout + HZ - 1) / HZ); |
120 | cancel_delayed_work(&clp->cl_renewd); | 120 | mod_delayed_work(system_wq, &clp->cl_renewd, timeout); |
121 | schedule_delayed_work(&clp->cl_renewd, timeout); | ||
122 | set_bit(NFS_CS_RENEWD, &clp->cl_res_state); | 121 | set_bit(NFS_CS_RENEWD, &clp->cl_res_state); |
123 | spin_unlock(&clp->cl_lock); | 122 | spin_unlock(&clp->cl_lock); |
124 | } | 123 | } |
diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 12a31a9dbcdd..bd61221ad2c5 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c | |||
@@ -23,14 +23,6 @@ static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type, | |||
23 | static struct dentry *nfs4_remote_referral_mount(struct file_system_type *fs_type, | 23 | static struct dentry *nfs4_remote_referral_mount(struct file_system_type *fs_type, |
24 | int flags, const char *dev_name, void *raw_data); | 24 | int flags, const char *dev_name, void *raw_data); |
25 | 25 | ||
26 | static struct file_system_type nfs4_fs_type = { | ||
27 | .owner = THIS_MODULE, | ||
28 | .name = "nfs4", | ||
29 | .mount = nfs_fs_mount, | ||
30 | .kill_sb = nfs_kill_super, | ||
31 | .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, | ||
32 | }; | ||
33 | |||
34 | static struct file_system_type nfs4_remote_fs_type = { | 26 | static struct file_system_type nfs4_remote_fs_type = { |
35 | .owner = THIS_MODULE, | 27 | .owner = THIS_MODULE, |
36 | .name = "nfs4", | 28 | .name = "nfs4", |
@@ -344,14 +336,8 @@ static int __init init_nfs_v4(void) | |||
344 | if (err) | 336 | if (err) |
345 | goto out1; | 337 | goto out1; |
346 | 338 | ||
347 | err = register_filesystem(&nfs4_fs_type); | ||
348 | if (err < 0) | ||
349 | goto out2; | ||
350 | |||
351 | register_nfs_version(&nfs_v4); | 339 | register_nfs_version(&nfs_v4); |
352 | return 0; | 340 | return 0; |
353 | out2: | ||
354 | nfs4_unregister_sysctl(); | ||
355 | out1: | 341 | out1: |
356 | nfs_idmap_quit(); | 342 | nfs_idmap_quit(); |
357 | out: | 343 | out: |
@@ -361,7 +347,6 @@ out: | |||
361 | static void __exit exit_nfs_v4(void) | 347 | static void __exit exit_nfs_v4(void) |
362 | { | 348 | { |
363 | unregister_nfs_version(&nfs_v4); | 349 | unregister_nfs_version(&nfs_v4); |
364 | unregister_filesystem(&nfs4_fs_type); | ||
365 | nfs4_unregister_sysctl(); | 350 | nfs4_unregister_sysctl(); |
366 | nfs_idmap_quit(); | 351 | nfs_idmap_quit(); |
367 | } | 352 | } |
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index ca13483edd60..8dba6bd48557 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c | |||
@@ -5045,22 +5045,19 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, | |||
5045 | struct nfs_getaclres *res) | 5045 | struct nfs_getaclres *res) |
5046 | { | 5046 | { |
5047 | unsigned int savep; | 5047 | unsigned int savep; |
5048 | __be32 *bm_p; | ||
5049 | uint32_t attrlen, | 5048 | uint32_t attrlen, |
5050 | bitmap[3] = {0}; | 5049 | bitmap[3] = {0}; |
5051 | int status; | 5050 | int status; |
5052 | size_t page_len = xdr->buf->page_len; | 5051 | unsigned int pg_offset; |
5053 | 5052 | ||
5054 | res->acl_len = 0; | 5053 | res->acl_len = 0; |
5055 | if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) | 5054 | if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) |
5056 | goto out; | 5055 | goto out; |
5057 | 5056 | ||
5058 | bm_p = xdr->p; | 5057 | xdr_enter_page(xdr, xdr->buf->page_len); |
5059 | res->acl_data_offset = be32_to_cpup(bm_p) + 2; | 5058 | |
5060 | res->acl_data_offset <<= 2; | 5059 | /* Calculate the offset of the page data */ |
5061 | /* Check if the acl data starts beyond the allocated buffer */ | 5060 | pg_offset = xdr->buf->head[0].iov_len; |
5062 | if (res->acl_data_offset > page_len) | ||
5063 | return -ERANGE; | ||
5064 | 5061 | ||
5065 | if ((status = decode_attr_bitmap(xdr, bitmap)) != 0) | 5062 | if ((status = decode_attr_bitmap(xdr, bitmap)) != 0) |
5066 | goto out; | 5063 | goto out; |
@@ -5074,23 +5071,16 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, | |||
5074 | /* The bitmap (xdr len + bitmaps) and the attr xdr len words | 5071 | /* The bitmap (xdr len + bitmaps) and the attr xdr len words |
5075 | * are stored with the acl data to handle the problem of | 5072 | * are stored with the acl data to handle the problem of |
5076 | * variable length bitmaps.*/ | 5073 | * variable length bitmaps.*/ |
5077 | xdr->p = bm_p; | 5074 | res->acl_data_offset = xdr_stream_pos(xdr) - pg_offset; |
5078 | |||
5079 | /* We ignore &savep and don't do consistency checks on | ||
5080 | * the attr length. Let userspace figure it out.... */ | ||
5081 | attrlen += res->acl_data_offset; | ||
5082 | if (attrlen > page_len) { | ||
5083 | if (res->acl_flags & NFS4_ACL_LEN_REQUEST) { | ||
5084 | /* getxattr interface called with a NULL buf */ | ||
5085 | res->acl_len = attrlen; | ||
5086 | goto out; | ||
5087 | } | ||
5088 | dprintk("NFS: acl reply: attrlen %u > page_len %zu\n", | ||
5089 | attrlen, page_len); | ||
5090 | return -EINVAL; | ||
5091 | } | ||
5092 | xdr_read_pages(xdr, attrlen); | ||
5093 | res->acl_len = attrlen; | 5075 | res->acl_len = attrlen; |
5076 | |||
5077 | /* Check for receive buffer overflow */ | ||
5078 | if (res->acl_len > (xdr->nwords << 2) || | ||
5079 | res->acl_len + res->acl_data_offset > xdr->buf->page_len) { | ||
5080 | res->acl_flags |= NFS4_ACL_TRUNC; | ||
5081 | dprintk("NFS: acl reply: attrlen %u > page_len %u\n", | ||
5082 | attrlen, xdr->nwords << 2); | ||
5083 | } | ||
5094 | } else | 5084 | } else |
5095 | status = -EOPNOTSUPP; | 5085 | status = -EOPNOTSUPP; |
5096 | 5086 | ||
@@ -6235,7 +6225,8 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr, | |||
6235 | status = decode_open(xdr, res); | 6225 | status = decode_open(xdr, res); |
6236 | if (status) | 6226 | if (status) |
6237 | goto out; | 6227 | goto out; |
6238 | if (decode_getfh(xdr, &res->fh) != 0) | 6228 | status = decode_getfh(xdr, &res->fh); |
6229 | if (status) | ||
6239 | goto out; | 6230 | goto out; |
6240 | decode_getfattr(xdr, res->f_attr, res->server); | 6231 | decode_getfattr(xdr, res->f_attr, res->server); |
6241 | out: | 6232 | out: |
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index f50d3e8d6f22..ea6d111b03e9 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c | |||
@@ -570,17 +570,66 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio, | |||
570 | return false; | 570 | return false; |
571 | 571 | ||
572 | return pgio->pg_count + req->wb_bytes <= | 572 | return pgio->pg_count + req->wb_bytes <= |
573 | OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length; | 573 | (unsigned long)pgio->pg_layout_private; |
574 | } | ||
575 | |||
576 | void objio_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) | ||
577 | { | ||
578 | pnfs_generic_pg_init_read(pgio, req); | ||
579 | if (unlikely(pgio->pg_lseg == NULL)) | ||
580 | return; /* Not pNFS */ | ||
581 | |||
582 | pgio->pg_layout_private = (void *) | ||
583 | OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length; | ||
584 | } | ||
585 | |||
586 | static bool aligned_on_raid_stripe(u64 offset, struct ore_layout *layout, | ||
587 | unsigned long *stripe_end) | ||
588 | { | ||
589 | u32 stripe_off; | ||
590 | unsigned stripe_size; | ||
591 | |||
592 | if (layout->raid_algorithm == PNFS_OSD_RAID_0) | ||
593 | return true; | ||
594 | |||
595 | stripe_size = layout->stripe_unit * | ||
596 | (layout->group_width - layout->parity); | ||
597 | |||
598 | div_u64_rem(offset, stripe_size, &stripe_off); | ||
599 | if (!stripe_off) | ||
600 | return true; | ||
601 | |||
602 | *stripe_end = stripe_size - stripe_off; | ||
603 | return false; | ||
604 | } | ||
605 | |||
606 | void objio_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) | ||
607 | { | ||
608 | unsigned long stripe_end = 0; | ||
609 | |||
610 | pnfs_generic_pg_init_write(pgio, req); | ||
611 | if (unlikely(pgio->pg_lseg == NULL)) | ||
612 | return; /* Not pNFS */ | ||
613 | |||
614 | if (req->wb_offset || | ||
615 | !aligned_on_raid_stripe(req->wb_index * PAGE_SIZE, | ||
616 | &OBJIO_LSEG(pgio->pg_lseg)->layout, | ||
617 | &stripe_end)) { | ||
618 | pgio->pg_layout_private = (void *)stripe_end; | ||
619 | } else { | ||
620 | pgio->pg_layout_private = (void *) | ||
621 | OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length; | ||
622 | } | ||
574 | } | 623 | } |
575 | 624 | ||
576 | static const struct nfs_pageio_ops objio_pg_read_ops = { | 625 | static const struct nfs_pageio_ops objio_pg_read_ops = { |
577 | .pg_init = pnfs_generic_pg_init_read, | 626 | .pg_init = objio_init_read, |
578 | .pg_test = objio_pg_test, | 627 | .pg_test = objio_pg_test, |
579 | .pg_doio = pnfs_generic_pg_readpages, | 628 | .pg_doio = pnfs_generic_pg_readpages, |
580 | }; | 629 | }; |
581 | 630 | ||
582 | static const struct nfs_pageio_ops objio_pg_write_ops = { | 631 | static const struct nfs_pageio_ops objio_pg_write_ops = { |
583 | .pg_init = pnfs_generic_pg_init_write, | 632 | .pg_init = objio_init_write, |
584 | .pg_test = objio_pg_test, | 633 | .pg_test = objio_pg_test, |
585 | .pg_doio = pnfs_generic_pg_writepages, | 634 | .pg_doio = pnfs_generic_pg_writepages, |
586 | }; | 635 | }; |
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 1a6732ed04a4..311a79681e2b 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c | |||
@@ -49,6 +49,7 @@ void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, | |||
49 | hdr->io_start = req_offset(hdr->req); | 49 | hdr->io_start = req_offset(hdr->req); |
50 | hdr->good_bytes = desc->pg_count; | 50 | hdr->good_bytes = desc->pg_count; |
51 | hdr->dreq = desc->pg_dreq; | 51 | hdr->dreq = desc->pg_dreq; |
52 | hdr->layout_private = desc->pg_layout_private; | ||
52 | hdr->release = release; | 53 | hdr->release = release; |
53 | hdr->completion_ops = desc->pg_completion_ops; | 54 | hdr->completion_ops = desc->pg_completion_ops; |
54 | if (hdr->completion_ops->init_hdr) | 55 | if (hdr->completion_ops->init_hdr) |
@@ -268,6 +269,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, | |||
268 | desc->pg_error = 0; | 269 | desc->pg_error = 0; |
269 | desc->pg_lseg = NULL; | 270 | desc->pg_lseg = NULL; |
270 | desc->pg_dreq = NULL; | 271 | desc->pg_dreq = NULL; |
272 | desc->pg_layout_private = NULL; | ||
271 | } | 273 | } |
272 | EXPORT_SYMBOL_GPL(nfs_pageio_init); | 274 | EXPORT_SYMBOL_GPL(nfs_pageio_init); |
273 | 275 | ||
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 76875bfcf19c..2e00feacd4be 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c | |||
@@ -583,9 +583,6 @@ send_layoutget(struct pnfs_layout_hdr *lo, | |||
583 | struct nfs_server *server = NFS_SERVER(ino); | 583 | struct nfs_server *server = NFS_SERVER(ino); |
584 | struct nfs4_layoutget *lgp; | 584 | struct nfs4_layoutget *lgp; |
585 | struct pnfs_layout_segment *lseg = NULL; | 585 | struct pnfs_layout_segment *lseg = NULL; |
586 | struct page **pages = NULL; | ||
587 | int i; | ||
588 | u32 max_resp_sz, max_pages; | ||
589 | 586 | ||
590 | dprintk("--> %s\n", __func__); | 587 | dprintk("--> %s\n", __func__); |
591 | 588 | ||
@@ -594,20 +591,6 @@ send_layoutget(struct pnfs_layout_hdr *lo, | |||
594 | if (lgp == NULL) | 591 | if (lgp == NULL) |
595 | return NULL; | 592 | return NULL; |
596 | 593 | ||
597 | /* allocate pages for xdr post processing */ | ||
598 | max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; | ||
599 | max_pages = nfs_page_array_len(0, max_resp_sz); | ||
600 | |||
601 | pages = kcalloc(max_pages, sizeof(struct page *), gfp_flags); | ||
602 | if (!pages) | ||
603 | goto out_err_free; | ||
604 | |||
605 | for (i = 0; i < max_pages; i++) { | ||
606 | pages[i] = alloc_page(gfp_flags); | ||
607 | if (!pages[i]) | ||
608 | goto out_err_free; | ||
609 | } | ||
610 | |||
611 | lgp->args.minlength = PAGE_CACHE_SIZE; | 594 | lgp->args.minlength = PAGE_CACHE_SIZE; |
612 | if (lgp->args.minlength > range->length) | 595 | if (lgp->args.minlength > range->length) |
613 | lgp->args.minlength = range->length; | 596 | lgp->args.minlength = range->length; |
@@ -616,39 +599,19 @@ send_layoutget(struct pnfs_layout_hdr *lo, | |||
616 | lgp->args.type = server->pnfs_curr_ld->id; | 599 | lgp->args.type = server->pnfs_curr_ld->id; |
617 | lgp->args.inode = ino; | 600 | lgp->args.inode = ino; |
618 | lgp->args.ctx = get_nfs_open_context(ctx); | 601 | lgp->args.ctx = get_nfs_open_context(ctx); |
619 | lgp->args.layout.pages = pages; | ||
620 | lgp->args.layout.pglen = max_pages * PAGE_SIZE; | ||
621 | lgp->lsegpp = &lseg; | 602 | lgp->lsegpp = &lseg; |
622 | lgp->gfp_flags = gfp_flags; | 603 | lgp->gfp_flags = gfp_flags; |
623 | 604 | ||
624 | /* Synchronously retrieve layout information from server and | 605 | /* Synchronously retrieve layout information from server and |
625 | * store in lseg. | 606 | * store in lseg. |
626 | */ | 607 | */ |
627 | nfs4_proc_layoutget(lgp); | 608 | nfs4_proc_layoutget(lgp, gfp_flags); |
628 | if (!lseg) { | 609 | if (!lseg) { |
629 | /* remember that LAYOUTGET failed and suspend trying */ | 610 | /* remember that LAYOUTGET failed and suspend trying */ |
630 | set_bit(lo_fail_bit(range->iomode), &lo->plh_flags); | 611 | set_bit(lo_fail_bit(range->iomode), &lo->plh_flags); |
631 | } | 612 | } |
632 | 613 | ||
633 | /* free xdr pages */ | ||
634 | for (i = 0; i < max_pages; i++) | ||
635 | __free_page(pages[i]); | ||
636 | kfree(pages); | ||
637 | |||
638 | return lseg; | 614 | return lseg; |
639 | |||
640 | out_err_free: | ||
641 | /* free any allocated xdr pages, lgp as it's not used */ | ||
642 | if (pages) { | ||
643 | for (i = 0; i < max_pages; i++) { | ||
644 | if (!pages[i]) | ||
645 | break; | ||
646 | __free_page(pages[i]); | ||
647 | } | ||
648 | kfree(pages); | ||
649 | } | ||
650 | kfree(lgp); | ||
651 | return NULL; | ||
652 | } | 615 | } |
653 | 616 | ||
654 | /* | 617 | /* |
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 2c6c80503ba4..745aa1b39e7c 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h | |||
@@ -172,7 +172,7 @@ extern int nfs4_proc_getdevicelist(struct nfs_server *server, | |||
172 | struct pnfs_devicelist *devlist); | 172 | struct pnfs_devicelist *devlist); |
173 | extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, | 173 | extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, |
174 | struct pnfs_device *dev); | 174 | struct pnfs_device *dev); |
175 | extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp); | 175 | extern void nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags); |
176 | extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp); | 176 | extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp); |
177 | 177 | ||
178 | /* pnfs.c */ | 178 | /* pnfs.c */ |
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index ac6a3c55dce4..d2c7f5db0847 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
@@ -319,6 +319,34 @@ EXPORT_SYMBOL_GPL(nfs_sops); | |||
319 | static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *); | 319 | static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *); |
320 | static int nfs4_validate_mount_data(void *options, | 320 | static int nfs4_validate_mount_data(void *options, |
321 | struct nfs_parsed_mount_data *args, const char *dev_name); | 321 | struct nfs_parsed_mount_data *args, const char *dev_name); |
322 | |||
323 | struct file_system_type nfs4_fs_type = { | ||
324 | .owner = THIS_MODULE, | ||
325 | .name = "nfs4", | ||
326 | .mount = nfs_fs_mount, | ||
327 | .kill_sb = nfs_kill_super, | ||
328 | .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, | ||
329 | }; | ||
330 | EXPORT_SYMBOL_GPL(nfs4_fs_type); | ||
331 | |||
332 | static int __init register_nfs4_fs(void) | ||
333 | { | ||
334 | return register_filesystem(&nfs4_fs_type); | ||
335 | } | ||
336 | |||
337 | static void unregister_nfs4_fs(void) | ||
338 | { | ||
339 | unregister_filesystem(&nfs4_fs_type); | ||
340 | } | ||
341 | #else | ||
342 | static int __init register_nfs4_fs(void) | ||
343 | { | ||
344 | return 0; | ||
345 | } | ||
346 | |||
347 | static void unregister_nfs4_fs(void) | ||
348 | { | ||
349 | } | ||
322 | #endif | 350 | #endif |
323 | 351 | ||
324 | static struct shrinker acl_shrinker = { | 352 | static struct shrinker acl_shrinker = { |
@@ -337,12 +365,18 @@ int __init register_nfs_fs(void) | |||
337 | if (ret < 0) | 365 | if (ret < 0) |
338 | goto error_0; | 366 | goto error_0; |
339 | 367 | ||
340 | ret = nfs_register_sysctl(); | 368 | ret = register_nfs4_fs(); |
341 | if (ret < 0) | 369 | if (ret < 0) |
342 | goto error_1; | 370 | goto error_1; |
371 | |||
372 | ret = nfs_register_sysctl(); | ||
373 | if (ret < 0) | ||
374 | goto error_2; | ||
343 | register_shrinker(&acl_shrinker); | 375 | register_shrinker(&acl_shrinker); |
344 | return 0; | 376 | return 0; |
345 | 377 | ||
378 | error_2: | ||
379 | unregister_nfs4_fs(); | ||
346 | error_1: | 380 | error_1: |
347 | unregister_filesystem(&nfs_fs_type); | 381 | unregister_filesystem(&nfs_fs_type); |
348 | error_0: | 382 | error_0: |
@@ -356,6 +390,7 @@ void __exit unregister_nfs_fs(void) | |||
356 | { | 390 | { |
357 | unregister_shrinker(&acl_shrinker); | 391 | unregister_shrinker(&acl_shrinker); |
358 | nfs_unregister_sysctl(); | 392 | nfs_unregister_sysctl(); |
393 | unregister_nfs4_fs(); | ||
359 | unregister_filesystem(&nfs_fs_type); | 394 | unregister_filesystem(&nfs_fs_type); |
360 | } | 395 | } |
361 | 396 | ||
@@ -1502,7 +1537,7 @@ static int nfs_parse_mount_options(char *raw, | |||
1502 | 1537 | ||
1503 | /* | 1538 | /* |
1504 | * verify that any proto=/mountproto= options match the address | 1539 | * verify that any proto=/mountproto= options match the address |
1505 | * familiies in the addr=/mountaddr= options. | 1540 | * families in the addr=/mountaddr= options. |
1506 | */ | 1541 | */ |
1507 | if (protofamily != AF_UNSPEC && | 1542 | if (protofamily != AF_UNSPEC && |
1508 | protofamily != mnt->nfs_server.address.ss_family) | 1543 | protofamily != mnt->nfs_server.address.ss_family) |
@@ -1832,6 +1867,7 @@ static int nfs23_validate_mount_data(void *options, | |||
1832 | 1867 | ||
1833 | memcpy(sap, &data->addr, sizeof(data->addr)); | 1868 | memcpy(sap, &data->addr, sizeof(data->addr)); |
1834 | args->nfs_server.addrlen = sizeof(data->addr); | 1869 | args->nfs_server.addrlen = sizeof(data->addr); |
1870 | args->nfs_server.port = ntohs(data->addr.sin_port); | ||
1835 | if (!nfs_verify_server_address(sap)) | 1871 | if (!nfs_verify_server_address(sap)) |
1836 | goto out_no_address; | 1872 | goto out_no_address; |
1837 | 1873 | ||
@@ -2529,6 +2565,7 @@ static int nfs4_validate_mount_data(void *options, | |||
2529 | return -EFAULT; | 2565 | return -EFAULT; |
2530 | if (!nfs_verify_server_address(sap)) | 2566 | if (!nfs_verify_server_address(sap)) |
2531 | goto out_no_address; | 2567 | goto out_no_address; |
2568 | args->nfs_server.port = ntohs(((struct sockaddr_in *)sap)->sin_port); | ||
2532 | 2569 | ||
2533 | if (data->auth_flavourlen) { | 2570 | if (data->auth_flavourlen) { |
2534 | if (data->auth_flavourlen > 1) | 2571 | if (data->auth_flavourlen > 1) |
@@ -2645,4 +2682,6 @@ MODULE_PARM_DESC(max_session_slots, "Maximum number of outstanding NFSv4.1 " | |||
2645 | module_param(send_implementation_id, ushort, 0644); | 2682 | module_param(send_implementation_id, ushort, 0644); |
2646 | MODULE_PARM_DESC(send_implementation_id, | 2683 | MODULE_PARM_DESC(send_implementation_id, |
2647 | "Send implementation ID with NFSv4.1 exchange_id"); | 2684 | "Send implementation ID with NFSv4.1 exchange_id"); |
2685 | MODULE_ALIAS("nfs4"); | ||
2686 | |||
2648 | #endif /* CONFIG_NFS_V4 */ | 2687 | #endif /* CONFIG_NFS_V4 */ |
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 5829d0ce7cfb..e3b55372726c 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -1814,19 +1814,19 @@ int __init nfs_init_writepagecache(void) | |||
1814 | nfs_wdata_mempool = mempool_create_slab_pool(MIN_POOL_WRITE, | 1814 | nfs_wdata_mempool = mempool_create_slab_pool(MIN_POOL_WRITE, |
1815 | nfs_wdata_cachep); | 1815 | nfs_wdata_cachep); |
1816 | if (nfs_wdata_mempool == NULL) | 1816 | if (nfs_wdata_mempool == NULL) |
1817 | return -ENOMEM; | 1817 | goto out_destroy_write_cache; |
1818 | 1818 | ||
1819 | nfs_cdata_cachep = kmem_cache_create("nfs_commit_data", | 1819 | nfs_cdata_cachep = kmem_cache_create("nfs_commit_data", |
1820 | sizeof(struct nfs_commit_data), | 1820 | sizeof(struct nfs_commit_data), |
1821 | 0, SLAB_HWCACHE_ALIGN, | 1821 | 0, SLAB_HWCACHE_ALIGN, |
1822 | NULL); | 1822 | NULL); |
1823 | if (nfs_cdata_cachep == NULL) | 1823 | if (nfs_cdata_cachep == NULL) |
1824 | return -ENOMEM; | 1824 | goto out_destroy_write_mempool; |
1825 | 1825 | ||
1826 | nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT, | 1826 | nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT, |
1827 | nfs_wdata_cachep); | 1827 | nfs_wdata_cachep); |
1828 | if (nfs_commit_mempool == NULL) | 1828 | if (nfs_commit_mempool == NULL) |
1829 | return -ENOMEM; | 1829 | goto out_destroy_commit_cache; |
1830 | 1830 | ||
1831 | /* | 1831 | /* |
1832 | * NFS congestion size, scale with available memory. | 1832 | * NFS congestion size, scale with available memory. |
@@ -1849,11 +1849,20 @@ int __init nfs_init_writepagecache(void) | |||
1849 | nfs_congestion_kb = 256*1024; | 1849 | nfs_congestion_kb = 256*1024; |
1850 | 1850 | ||
1851 | return 0; | 1851 | return 0; |
1852 | |||
1853 | out_destroy_commit_cache: | ||
1854 | kmem_cache_destroy(nfs_cdata_cachep); | ||
1855 | out_destroy_write_mempool: | ||
1856 | mempool_destroy(nfs_wdata_mempool); | ||
1857 | out_destroy_write_cache: | ||
1858 | kmem_cache_destroy(nfs_wdata_cachep); | ||
1859 | return -ENOMEM; | ||
1852 | } | 1860 | } |
1853 | 1861 | ||
1854 | void nfs_destroy_writepagecache(void) | 1862 | void nfs_destroy_writepagecache(void) |
1855 | { | 1863 | { |
1856 | mempool_destroy(nfs_commit_mempool); | 1864 | mempool_destroy(nfs_commit_mempool); |
1865 | kmem_cache_destroy(nfs_cdata_cachep); | ||
1857 | mempool_destroy(nfs_wdata_mempool); | 1866 | mempool_destroy(nfs_wdata_mempool); |
1858 | kmem_cache_destroy(nfs_wdata_cachep); | 1867 | kmem_cache_destroy(nfs_wdata_cachep); |
1859 | } | 1868 | } |
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index cbaf4f8bb7b7..4c7bd35b1876 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c | |||
@@ -651,12 +651,12 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c | |||
651 | 651 | ||
652 | if (clp->cl_minorversion == 0) { | 652 | if (clp->cl_minorversion == 0) { |
653 | if (!clp->cl_cred.cr_principal && | 653 | if (!clp->cl_cred.cr_principal && |
654 | (clp->cl_flavor >= RPC_AUTH_GSS_KRB5)) | 654 | (clp->cl_cred.cr_flavor >= RPC_AUTH_GSS_KRB5)) |
655 | return -EINVAL; | 655 | return -EINVAL; |
656 | args.client_name = clp->cl_cred.cr_principal; | 656 | args.client_name = clp->cl_cred.cr_principal; |
657 | args.prognumber = conn->cb_prog, | 657 | args.prognumber = conn->cb_prog, |
658 | args.protocol = XPRT_TRANSPORT_TCP; | 658 | args.protocol = XPRT_TRANSPORT_TCP; |
659 | args.authflavor = clp->cl_flavor; | 659 | args.authflavor = clp->cl_cred.cr_flavor; |
660 | clp->cl_cb_ident = conn->cb_ident; | 660 | clp->cl_cb_ident = conn->cb_ident; |
661 | } else { | 661 | } else { |
662 | if (!conn->cb_xprt) | 662 | if (!conn->cb_xprt) |
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index cc894eda385a..48a1bad37334 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c | |||
@@ -2837,8 +2837,7 @@ static int nfs4_setlease(struct nfs4_delegation *dp, int flag) | |||
2837 | return -ENOMEM; | 2837 | return -ENOMEM; |
2838 | } | 2838 | } |
2839 | fp->fi_lease = fl; | 2839 | fp->fi_lease = fl; |
2840 | fp->fi_deleg_file = fl->fl_file; | 2840 | fp->fi_deleg_file = get_file(fl->fl_file); |
2841 | get_file(fp->fi_deleg_file); | ||
2842 | atomic_set(&fp->fi_delegees, 1); | 2841 | atomic_set(&fp->fi_delegees, 1); |
2843 | list_add(&dp->dl_perfile, &fp->fi_delegations); | 2842 | list_add(&dp->dl_perfile, &fp->fi_delegations); |
2844 | return 0; | 2843 | return 0; |
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index e6173147f982..22bd0a66c356 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h | |||
@@ -231,7 +231,6 @@ struct nfs4_client { | |||
231 | nfs4_verifier cl_verifier; /* generated by client */ | 231 | nfs4_verifier cl_verifier; /* generated by client */ |
232 | time_t cl_time; /* time of last lease renewal */ | 232 | time_t cl_time; /* time of last lease renewal */ |
233 | struct sockaddr_storage cl_addr; /* client ipaddress */ | 233 | struct sockaddr_storage cl_addr; /* client ipaddress */ |
234 | u32 cl_flavor; /* setclientid pseudoflavor */ | ||
235 | struct svc_cred cl_cred; /* setclientid principal */ | 234 | struct svc_cred cl_cred; /* setclientid principal */ |
236 | clientid_t cl_clientid; /* generated by server */ | 235 | clientid_t cl_clientid; /* generated by server */ |
237 | nfs4_verifier cl_confirm; /* generated by server */ | 236 | nfs4_verifier cl_confirm; /* generated by server */ |
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index a9269f142cc4..3f67b8e12251 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c | |||
@@ -480,7 +480,7 @@ set_nfsv4_acl_one(struct dentry *dentry, struct posix_acl *pacl, char *key) | |||
480 | if (buf == NULL) | 480 | if (buf == NULL) |
481 | goto out; | 481 | goto out; |
482 | 482 | ||
483 | len = posix_acl_to_xattr(pacl, buf, buflen); | 483 | len = posix_acl_to_xattr(&init_user_ns, pacl, buf, buflen); |
484 | if (len < 0) { | 484 | if (len < 0) { |
485 | error = len; | 485 | error = len; |
486 | goto out; | 486 | goto out; |
@@ -549,7 +549,7 @@ _get_posix_acl(struct dentry *dentry, char *key) | |||
549 | if (buflen <= 0) | 549 | if (buflen <= 0) |
550 | return ERR_PTR(buflen); | 550 | return ERR_PTR(buflen); |
551 | 551 | ||
552 | pacl = posix_acl_from_xattr(buf, buflen); | 552 | pacl = posix_acl_from_xattr(&init_user_ns, buf, buflen); |
553 | kfree(buf); | 553 | kfree(buf); |
554 | return pacl; | 554 | return pacl; |
555 | } | 555 | } |
@@ -2264,7 +2264,7 @@ nfsd_get_posix_acl(struct svc_fh *fhp, int type) | |||
2264 | if (size < 0) | 2264 | if (size < 0) |
2265 | return ERR_PTR(size); | 2265 | return ERR_PTR(size); |
2266 | 2266 | ||
2267 | acl = posix_acl_from_xattr(value, size); | 2267 | acl = posix_acl_from_xattr(&init_user_ns, value, size); |
2268 | kfree(value); | 2268 | kfree(value); |
2269 | return acl; | 2269 | return acl; |
2270 | } | 2270 | } |
@@ -2297,7 +2297,7 @@ nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl) | |||
2297 | value = kmalloc(size, GFP_KERNEL); | 2297 | value = kmalloc(size, GFP_KERNEL); |
2298 | if (!value) | 2298 | if (!value) |
2299 | return -ENOMEM; | 2299 | return -ENOMEM; |
2300 | error = posix_acl_to_xattr(acl, value, size); | 2300 | error = posix_acl_to_xattr(&init_user_ns, acl, value, size); |
2301 | if (error < 0) | 2301 | if (error < 0) |
2302 | goto getout; | 2302 | goto getout; |
2303 | size = error; | 2303 | size = error; |
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c index a4d56ac02e6c..16f35f7423c5 100644 --- a/fs/nilfs2/file.c +++ b/fs/nilfs2/file.c | |||
@@ -116,6 +116,7 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
116 | if (unlikely(ret)) | 116 | if (unlikely(ret)) |
117 | goto out; | 117 | goto out; |
118 | 118 | ||
119 | file_update_time(vma->vm_file); | ||
119 | ret = __block_page_mkwrite(vma, vmf, nilfs_get_block); | 120 | ret = __block_page_mkwrite(vma, vmf, nilfs_get_block); |
120 | if (ret) { | 121 | if (ret) { |
121 | nilfs_transaction_abort(inode->i_sb); | 122 | nilfs_transaction_abort(inode->i_sb); |
@@ -134,13 +135,13 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
134 | static const struct vm_operations_struct nilfs_file_vm_ops = { | 135 | static const struct vm_operations_struct nilfs_file_vm_ops = { |
135 | .fault = filemap_fault, | 136 | .fault = filemap_fault, |
136 | .page_mkwrite = nilfs_page_mkwrite, | 137 | .page_mkwrite = nilfs_page_mkwrite, |
138 | .remap_pages = generic_file_remap_pages, | ||
137 | }; | 139 | }; |
138 | 140 | ||
139 | static int nilfs_file_mmap(struct file *file, struct vm_area_struct *vma) | 141 | static int nilfs_file_mmap(struct file *file, struct vm_area_struct *vma) |
140 | { | 142 | { |
141 | file_accessed(file); | 143 | file_accessed(file); |
142 | vma->vm_ops = &nilfs_file_vm_ops; | 144 | vma->vm_ops = &nilfs_file_vm_ops; |
143 | vma->vm_flags |= VM_CAN_NONLINEAR; | ||
144 | return 0; | 145 | return 0; |
145 | } | 146 | } |
146 | 147 | ||
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 6e2c3db976b2..4d31d2cca7fd 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c | |||
@@ -401,8 +401,8 @@ int nilfs_read_inode_common(struct inode *inode, | |||
401 | int err; | 401 | int err; |
402 | 402 | ||
403 | inode->i_mode = le16_to_cpu(raw_inode->i_mode); | 403 | inode->i_mode = le16_to_cpu(raw_inode->i_mode); |
404 | inode->i_uid = (uid_t)le32_to_cpu(raw_inode->i_uid); | 404 | i_uid_write(inode, le32_to_cpu(raw_inode->i_uid)); |
405 | inode->i_gid = (gid_t)le32_to_cpu(raw_inode->i_gid); | 405 | i_gid_write(inode, le32_to_cpu(raw_inode->i_gid)); |
406 | set_nlink(inode, le16_to_cpu(raw_inode->i_links_count)); | 406 | set_nlink(inode, le16_to_cpu(raw_inode->i_links_count)); |
407 | inode->i_size = le64_to_cpu(raw_inode->i_size); | 407 | inode->i_size = le64_to_cpu(raw_inode->i_size); |
408 | inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime); | 408 | inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime); |
@@ -590,8 +590,8 @@ void nilfs_write_inode_common(struct inode *inode, | |||
590 | struct nilfs_inode_info *ii = NILFS_I(inode); | 590 | struct nilfs_inode_info *ii = NILFS_I(inode); |
591 | 591 | ||
592 | raw_inode->i_mode = cpu_to_le16(inode->i_mode); | 592 | raw_inode->i_mode = cpu_to_le16(inode->i_mode); |
593 | raw_inode->i_uid = cpu_to_le32(inode->i_uid); | 593 | raw_inode->i_uid = cpu_to_le32(i_uid_read(inode)); |
594 | raw_inode->i_gid = cpu_to_le32(inode->i_gid); | 594 | raw_inode->i_gid = cpu_to_le32(i_gid_read(inode)); |
595 | raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); | 595 | raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); |
596 | raw_inode->i_size = cpu_to_le64(inode->i_size); | 596 | raw_inode->i_size = cpu_to_le64(inode->i_size); |
597 | raw_inode->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); | 597 | raw_inode->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); |
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 6a10812711c1..3c991dc84f2f 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c | |||
@@ -1382,6 +1382,12 @@ static void nilfs_segbuf_init_once(void *obj) | |||
1382 | 1382 | ||
1383 | static void nilfs_destroy_cachep(void) | 1383 | static void nilfs_destroy_cachep(void) |
1384 | { | 1384 | { |
1385 | /* | ||
1386 | * Make sure all delayed rcu free inodes are flushed before we | ||
1387 | * destroy cache. | ||
1388 | */ | ||
1389 | rcu_barrier(); | ||
1390 | |||
1385 | if (nilfs_inode_cachep) | 1391 | if (nilfs_inode_cachep) |
1386 | kmem_cache_destroy(nilfs_inode_cachep); | 1392 | kmem_cache_destroy(nilfs_inode_cachep); |
1387 | if (nilfs_transaction_cachep) | 1393 | if (nilfs_transaction_cachep) |
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index d43803669739..721d692fa8d4 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c | |||
@@ -58,7 +58,9 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group, | |||
58 | return fsnotify_remove_notify_event(group); | 58 | return fsnotify_remove_notify_event(group); |
59 | } | 59 | } |
60 | 60 | ||
61 | static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event) | 61 | static int create_fd(struct fsnotify_group *group, |
62 | struct fsnotify_event *event, | ||
63 | struct file **file) | ||
62 | { | 64 | { |
63 | int client_fd; | 65 | int client_fd; |
64 | struct file *new_file; | 66 | struct file *new_file; |
@@ -98,7 +100,7 @@ static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event) | |||
98 | put_unused_fd(client_fd); | 100 | put_unused_fd(client_fd); |
99 | client_fd = PTR_ERR(new_file); | 101 | client_fd = PTR_ERR(new_file); |
100 | } else { | 102 | } else { |
101 | fd_install(client_fd, new_file); | 103 | *file = new_file; |
102 | } | 104 | } |
103 | 105 | ||
104 | return client_fd; | 106 | return client_fd; |
@@ -106,13 +108,15 @@ static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event) | |||
106 | 108 | ||
107 | static int fill_event_metadata(struct fsnotify_group *group, | 109 | static int fill_event_metadata(struct fsnotify_group *group, |
108 | struct fanotify_event_metadata *metadata, | 110 | struct fanotify_event_metadata *metadata, |
109 | struct fsnotify_event *event) | 111 | struct fsnotify_event *event, |
112 | struct file **file) | ||
110 | { | 113 | { |
111 | int ret = 0; | 114 | int ret = 0; |
112 | 115 | ||
113 | pr_debug("%s: group=%p metadata=%p event=%p\n", __func__, | 116 | pr_debug("%s: group=%p metadata=%p event=%p\n", __func__, |
114 | group, metadata, event); | 117 | group, metadata, event); |
115 | 118 | ||
119 | *file = NULL; | ||
116 | metadata->event_len = FAN_EVENT_METADATA_LEN; | 120 | metadata->event_len = FAN_EVENT_METADATA_LEN; |
117 | metadata->metadata_len = FAN_EVENT_METADATA_LEN; | 121 | metadata->metadata_len = FAN_EVENT_METADATA_LEN; |
118 | metadata->vers = FANOTIFY_METADATA_VERSION; | 122 | metadata->vers = FANOTIFY_METADATA_VERSION; |
@@ -121,7 +125,7 @@ static int fill_event_metadata(struct fsnotify_group *group, | |||
121 | if (unlikely(event->mask & FAN_Q_OVERFLOW)) | 125 | if (unlikely(event->mask & FAN_Q_OVERFLOW)) |
122 | metadata->fd = FAN_NOFD; | 126 | metadata->fd = FAN_NOFD; |
123 | else { | 127 | else { |
124 | metadata->fd = create_fd(group, event); | 128 | metadata->fd = create_fd(group, event, file); |
125 | if (metadata->fd < 0) | 129 | if (metadata->fd < 0) |
126 | ret = metadata->fd; | 130 | ret = metadata->fd; |
127 | } | 131 | } |
@@ -220,25 +224,6 @@ static int prepare_for_access_response(struct fsnotify_group *group, | |||
220 | return 0; | 224 | return 0; |
221 | } | 225 | } |
222 | 226 | ||
223 | static void remove_access_response(struct fsnotify_group *group, | ||
224 | struct fsnotify_event *event, | ||
225 | __s32 fd) | ||
226 | { | ||
227 | struct fanotify_response_event *re; | ||
228 | |||
229 | if (!(event->mask & FAN_ALL_PERM_EVENTS)) | ||
230 | return; | ||
231 | |||
232 | re = dequeue_re(group, fd); | ||
233 | if (!re) | ||
234 | return; | ||
235 | |||
236 | BUG_ON(re->event != event); | ||
237 | |||
238 | kmem_cache_free(fanotify_response_event_cache, re); | ||
239 | |||
240 | return; | ||
241 | } | ||
242 | #else | 227 | #else |
243 | static int prepare_for_access_response(struct fsnotify_group *group, | 228 | static int prepare_for_access_response(struct fsnotify_group *group, |
244 | struct fsnotify_event *event, | 229 | struct fsnotify_event *event, |
@@ -247,12 +232,6 @@ static int prepare_for_access_response(struct fsnotify_group *group, | |||
247 | return 0; | 232 | return 0; |
248 | } | 233 | } |
249 | 234 | ||
250 | static void remove_access_response(struct fsnotify_group *group, | ||
251 | struct fsnotify_event *event, | ||
252 | __s32 fd) | ||
253 | { | ||
254 | return; | ||
255 | } | ||
256 | #endif | 235 | #endif |
257 | 236 | ||
258 | static ssize_t copy_event_to_user(struct fsnotify_group *group, | 237 | static ssize_t copy_event_to_user(struct fsnotify_group *group, |
@@ -260,31 +239,33 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, | |||
260 | char __user *buf) | 239 | char __user *buf) |
261 | { | 240 | { |
262 | struct fanotify_event_metadata fanotify_event_metadata; | 241 | struct fanotify_event_metadata fanotify_event_metadata; |
242 | struct file *f; | ||
263 | int fd, ret; | 243 | int fd, ret; |
264 | 244 | ||
265 | pr_debug("%s: group=%p event=%p\n", __func__, group, event); | 245 | pr_debug("%s: group=%p event=%p\n", __func__, group, event); |
266 | 246 | ||
267 | ret = fill_event_metadata(group, &fanotify_event_metadata, event); | 247 | ret = fill_event_metadata(group, &fanotify_event_metadata, event, &f); |
268 | if (ret < 0) | 248 | if (ret < 0) |
269 | goto out; | 249 | goto out; |
270 | 250 | ||
271 | fd = fanotify_event_metadata.fd; | 251 | fd = fanotify_event_metadata.fd; |
272 | ret = prepare_for_access_response(group, event, fd); | ||
273 | if (ret) | ||
274 | goto out_close_fd; | ||
275 | |||
276 | ret = -EFAULT; | 252 | ret = -EFAULT; |
277 | if (copy_to_user(buf, &fanotify_event_metadata, | 253 | if (copy_to_user(buf, &fanotify_event_metadata, |
278 | fanotify_event_metadata.event_len)) | 254 | fanotify_event_metadata.event_len)) |
279 | goto out_kill_access_response; | 255 | goto out_close_fd; |
280 | 256 | ||
257 | ret = prepare_for_access_response(group, event, fd); | ||
258 | if (ret) | ||
259 | goto out_close_fd; | ||
260 | |||
261 | fd_install(fd, f); | ||
281 | return fanotify_event_metadata.event_len; | 262 | return fanotify_event_metadata.event_len; |
282 | 263 | ||
283 | out_kill_access_response: | ||
284 | remove_access_response(group, event, fd); | ||
285 | out_close_fd: | 264 | out_close_fd: |
286 | if (fd != FAN_NOFD) | 265 | if (fd != FAN_NOFD) { |
287 | sys_close(fd); | 266 | put_unused_fd(fd); |
267 | fput(f); | ||
268 | } | ||
288 | out: | 269 | out: |
289 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS | 270 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS |
290 | if (event->mask & FAN_ALL_PERM_EVENTS) { | 271 | if (event->mask & FAN_ALL_PERM_EVENTS) { |
@@ -470,24 +451,22 @@ static int fanotify_find_path(int dfd, const char __user *filename, | |||
470 | dfd, filename, flags); | 451 | dfd, filename, flags); |
471 | 452 | ||
472 | if (filename == NULL) { | 453 | if (filename == NULL) { |
473 | struct file *file; | 454 | struct fd f = fdget(dfd); |
474 | int fput_needed; | ||
475 | 455 | ||
476 | ret = -EBADF; | 456 | ret = -EBADF; |
477 | file = fget_light(dfd, &fput_needed); | 457 | if (!f.file) |
478 | if (!file) | ||
479 | goto out; | 458 | goto out; |
480 | 459 | ||
481 | ret = -ENOTDIR; | 460 | ret = -ENOTDIR; |
482 | if ((flags & FAN_MARK_ONLYDIR) && | 461 | if ((flags & FAN_MARK_ONLYDIR) && |
483 | !(S_ISDIR(file->f_path.dentry->d_inode->i_mode))) { | 462 | !(S_ISDIR(f.file->f_path.dentry->d_inode->i_mode))) { |
484 | fput_light(file, fput_needed); | 463 | fdput(f); |
485 | goto out; | 464 | goto out; |
486 | } | 465 | } |
487 | 466 | ||
488 | *path = file->f_path; | 467 | *path = f.file->f_path; |
489 | path_get(path); | 468 | path_get(path); |
490 | fput_light(file, fput_needed); | 469 | fdput(f); |
491 | } else { | 470 | } else { |
492 | unsigned int lookup_flags = 0; | 471 | unsigned int lookup_flags = 0; |
493 | 472 | ||
@@ -767,9 +746,9 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags, | |||
767 | struct inode *inode = NULL; | 746 | struct inode *inode = NULL; |
768 | struct vfsmount *mnt = NULL; | 747 | struct vfsmount *mnt = NULL; |
769 | struct fsnotify_group *group; | 748 | struct fsnotify_group *group; |
770 | struct file *filp; | 749 | struct fd f; |
771 | struct path path; | 750 | struct path path; |
772 | int ret, fput_needed; | 751 | int ret; |
773 | 752 | ||
774 | pr_debug("%s: fanotify_fd=%d flags=%x dfd=%d pathname=%p mask=%llx\n", | 753 | pr_debug("%s: fanotify_fd=%d flags=%x dfd=%d pathname=%p mask=%llx\n", |
775 | __func__, fanotify_fd, flags, dfd, pathname, mask); | 754 | __func__, fanotify_fd, flags, dfd, pathname, mask); |
@@ -803,15 +782,15 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags, | |||
803 | #endif | 782 | #endif |
804 | return -EINVAL; | 783 | return -EINVAL; |
805 | 784 | ||
806 | filp = fget_light(fanotify_fd, &fput_needed); | 785 | f = fdget(fanotify_fd); |
807 | if (unlikely(!filp)) | 786 | if (unlikely(!f.file)) |
808 | return -EBADF; | 787 | return -EBADF; |
809 | 788 | ||
810 | /* verify that this is indeed an fanotify instance */ | 789 | /* verify that this is indeed an fanotify instance */ |
811 | ret = -EINVAL; | 790 | ret = -EINVAL; |
812 | if (unlikely(filp->f_op != &fanotify_fops)) | 791 | if (unlikely(f.file->f_op != &fanotify_fops)) |
813 | goto fput_and_out; | 792 | goto fput_and_out; |
814 | group = filp->private_data; | 793 | group = f.file->private_data; |
815 | 794 | ||
816 | /* | 795 | /* |
817 | * group->priority == FS_PRIO_0 == FAN_CLASS_NOTIF. These are not | 796 | * group->priority == FS_PRIO_0 == FAN_CLASS_NOTIF. These are not |
@@ -858,7 +837,7 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags, | |||
858 | 837 | ||
859 | path_put(&path); | 838 | path_put(&path); |
860 | fput_and_out: | 839 | fput_and_out: |
861 | fput_light(filp, fput_needed); | 840 | fdput(f); |
862 | return ret; | 841 | return ret; |
863 | } | 842 | } |
864 | 843 | ||
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 8445fbc8985c..c311dda054a3 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c | |||
@@ -757,16 +757,16 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname, | |||
757 | struct fsnotify_group *group; | 757 | struct fsnotify_group *group; |
758 | struct inode *inode; | 758 | struct inode *inode; |
759 | struct path path; | 759 | struct path path; |
760 | struct file *filp; | 760 | struct fd f; |
761 | int ret, fput_needed; | 761 | int ret; |
762 | unsigned flags = 0; | 762 | unsigned flags = 0; |
763 | 763 | ||
764 | filp = fget_light(fd, &fput_needed); | 764 | f = fdget(fd); |
765 | if (unlikely(!filp)) | 765 | if (unlikely(!f.file)) |
766 | return -EBADF; | 766 | return -EBADF; |
767 | 767 | ||
768 | /* verify that this is indeed an inotify instance */ | 768 | /* verify that this is indeed an inotify instance */ |
769 | if (unlikely(filp->f_op != &inotify_fops)) { | 769 | if (unlikely(f.file->f_op != &inotify_fops)) { |
770 | ret = -EINVAL; | 770 | ret = -EINVAL; |
771 | goto fput_and_out; | 771 | goto fput_and_out; |
772 | } | 772 | } |
@@ -782,13 +782,13 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname, | |||
782 | 782 | ||
783 | /* inode held in place by reference to path; group by fget on fd */ | 783 | /* inode held in place by reference to path; group by fget on fd */ |
784 | inode = path.dentry->d_inode; | 784 | inode = path.dentry->d_inode; |
785 | group = filp->private_data; | 785 | group = f.file->private_data; |
786 | 786 | ||
787 | /* create/update an inode mark */ | 787 | /* create/update an inode mark */ |
788 | ret = inotify_update_watch(group, inode, mask); | 788 | ret = inotify_update_watch(group, inode, mask); |
789 | path_put(&path); | 789 | path_put(&path); |
790 | fput_and_out: | 790 | fput_and_out: |
791 | fput_light(filp, fput_needed); | 791 | fdput(f); |
792 | return ret; | 792 | return ret; |
793 | } | 793 | } |
794 | 794 | ||
@@ -796,19 +796,19 @@ SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd) | |||
796 | { | 796 | { |
797 | struct fsnotify_group *group; | 797 | struct fsnotify_group *group; |
798 | struct inotify_inode_mark *i_mark; | 798 | struct inotify_inode_mark *i_mark; |
799 | struct file *filp; | 799 | struct fd f; |
800 | int ret = 0, fput_needed; | 800 | int ret = 0; |
801 | 801 | ||
802 | filp = fget_light(fd, &fput_needed); | 802 | f = fdget(fd); |
803 | if (unlikely(!filp)) | 803 | if (unlikely(!f.file)) |
804 | return -EBADF; | 804 | return -EBADF; |
805 | 805 | ||
806 | /* verify that this is indeed an inotify instance */ | 806 | /* verify that this is indeed an inotify instance */ |
807 | ret = -EINVAL; | 807 | ret = -EINVAL; |
808 | if (unlikely(filp->f_op != &inotify_fops)) | 808 | if (unlikely(f.file->f_op != &inotify_fops)) |
809 | goto out; | 809 | goto out; |
810 | 810 | ||
811 | group = filp->private_data; | 811 | group = f.file->private_data; |
812 | 812 | ||
813 | ret = -EINVAL; | 813 | ret = -EINVAL; |
814 | i_mark = inotify_idr_find(group, wd); | 814 | i_mark = inotify_idr_find(group, wd); |
@@ -823,7 +823,7 @@ SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd) | |||
823 | fsnotify_put_mark(&i_mark->fsn_mark); | 823 | fsnotify_put_mark(&i_mark->fsn_mark); |
824 | 824 | ||
825 | out: | 825 | out: |
826 | fput_light(filp, fput_needed); | 826 | fdput(f); |
827 | return ret; | 827 | return ret; |
828 | } | 828 | } |
829 | 829 | ||
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index c6dbd3db6ca8..1d27331e6fc9 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c | |||
@@ -2124,7 +2124,8 @@ int ntfs_read_inode_mount(struct inode *vi) | |||
2124 | * ntfs_read_inode() will have set up the default ones. | 2124 | * ntfs_read_inode() will have set up the default ones. |
2125 | */ | 2125 | */ |
2126 | /* Set uid and gid to root. */ | 2126 | /* Set uid and gid to root. */ |
2127 | vi->i_uid = vi->i_gid = 0; | 2127 | vi->i_uid = GLOBAL_ROOT_UID; |
2128 | vi->i_gid = GLOBAL_ROOT_GID; | ||
2128 | /* Regular file. No access for anyone. */ | 2129 | /* Regular file. No access for anyone. */ |
2129 | vi->i_mode = S_IFREG; | 2130 | vi->i_mode = S_IFREG; |
2130 | /* No VFS initiated operations allowed for $MFT. */ | 2131 | /* No VFS initiated operations allowed for $MFT. */ |
@@ -2312,8 +2313,8 @@ int ntfs_show_options(struct seq_file *sf, struct dentry *root) | |||
2312 | ntfs_volume *vol = NTFS_SB(root->d_sb); | 2313 | ntfs_volume *vol = NTFS_SB(root->d_sb); |
2313 | int i; | 2314 | int i; |
2314 | 2315 | ||
2315 | seq_printf(sf, ",uid=%i", vol->uid); | 2316 | seq_printf(sf, ",uid=%i", from_kuid_munged(&init_user_ns, vol->uid)); |
2316 | seq_printf(sf, ",gid=%i", vol->gid); | 2317 | seq_printf(sf, ",gid=%i", from_kgid_munged(&init_user_ns, vol->gid)); |
2317 | if (vol->fmask == vol->dmask) | 2318 | if (vol->fmask == vol->dmask) |
2318 | seq_printf(sf, ",umask=0%o", vol->fmask); | 2319 | seq_printf(sf, ",umask=0%o", vol->fmask); |
2319 | else { | 2320 | else { |
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 2bc149d6a784..4a8289f8b16c 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c | |||
@@ -102,8 +102,8 @@ static bool parse_options(ntfs_volume *vol, char *opt) | |||
102 | char *p, *v, *ov; | 102 | char *p, *v, *ov; |
103 | static char *utf8 = "utf8"; | 103 | static char *utf8 = "utf8"; |
104 | int errors = 0, sloppy = 0; | 104 | int errors = 0, sloppy = 0; |
105 | uid_t uid = (uid_t)-1; | 105 | kuid_t uid = INVALID_UID; |
106 | gid_t gid = (gid_t)-1; | 106 | kgid_t gid = INVALID_GID; |
107 | umode_t fmask = (umode_t)-1, dmask = (umode_t)-1; | 107 | umode_t fmask = (umode_t)-1, dmask = (umode_t)-1; |
108 | int mft_zone_multiplier = -1, on_errors = -1; | 108 | int mft_zone_multiplier = -1, on_errors = -1; |
109 | int show_sys_files = -1, case_sensitive = -1, disable_sparse = -1; | 109 | int show_sys_files = -1, case_sensitive = -1, disable_sparse = -1; |
@@ -128,6 +128,30 @@ static bool parse_options(ntfs_volume *vol, char *opt) | |||
128 | if (*v) \ | 128 | if (*v) \ |
129 | goto needs_val; \ | 129 | goto needs_val; \ |
130 | } | 130 | } |
131 | #define NTFS_GETOPT_UID(option, variable) \ | ||
132 | if (!strcmp(p, option)) { \ | ||
133 | uid_t uid_value; \ | ||
134 | if (!v || !*v) \ | ||
135 | goto needs_arg; \ | ||
136 | uid_value = simple_strtoul(ov = v, &v, 0); \ | ||
137 | if (*v) \ | ||
138 | goto needs_val; \ | ||
139 | variable = make_kuid(current_user_ns(), uid_value); \ | ||
140 | if (!uid_valid(variable)) \ | ||
141 | goto needs_val; \ | ||
142 | } | ||
143 | #define NTFS_GETOPT_GID(option, variable) \ | ||
144 | if (!strcmp(p, option)) { \ | ||
145 | gid_t gid_value; \ | ||
146 | if (!v || !*v) \ | ||
147 | goto needs_arg; \ | ||
148 | gid_value = simple_strtoul(ov = v, &v, 0); \ | ||
149 | if (*v) \ | ||
150 | goto needs_val; \ | ||
151 | variable = make_kgid(current_user_ns(), gid_value); \ | ||
152 | if (!gid_valid(variable)) \ | ||
153 | goto needs_val; \ | ||
154 | } | ||
131 | #define NTFS_GETOPT_OCTAL(option, variable) \ | 155 | #define NTFS_GETOPT_OCTAL(option, variable) \ |
132 | if (!strcmp(p, option)) { \ | 156 | if (!strcmp(p, option)) { \ |
133 | if (!v || !*v) \ | 157 | if (!v || !*v) \ |
@@ -165,8 +189,8 @@ static bool parse_options(ntfs_volume *vol, char *opt) | |||
165 | while ((p = strsep(&opt, ","))) { | 189 | while ((p = strsep(&opt, ","))) { |
166 | if ((v = strchr(p, '='))) | 190 | if ((v = strchr(p, '='))) |
167 | *v++ = 0; | 191 | *v++ = 0; |
168 | NTFS_GETOPT("uid", uid) | 192 | NTFS_GETOPT_UID("uid", uid) |
169 | else NTFS_GETOPT("gid", gid) | 193 | else NTFS_GETOPT_GID("gid", gid) |
170 | else NTFS_GETOPT_OCTAL("umask", fmask = dmask) | 194 | else NTFS_GETOPT_OCTAL("umask", fmask = dmask) |
171 | else NTFS_GETOPT_OCTAL("fmask", fmask) | 195 | else NTFS_GETOPT_OCTAL("fmask", fmask) |
172 | else NTFS_GETOPT_OCTAL("dmask", dmask) | 196 | else NTFS_GETOPT_OCTAL("dmask", dmask) |
@@ -283,9 +307,9 @@ no_mount_options: | |||
283 | vol->on_errors = on_errors; | 307 | vol->on_errors = on_errors; |
284 | if (!vol->on_errors || vol->on_errors == ON_ERRORS_RECOVER) | 308 | if (!vol->on_errors || vol->on_errors == ON_ERRORS_RECOVER) |
285 | vol->on_errors |= ON_ERRORS_CONTINUE; | 309 | vol->on_errors |= ON_ERRORS_CONTINUE; |
286 | if (uid != (uid_t)-1) | 310 | if (uid_valid(uid)) |
287 | vol->uid = uid; | 311 | vol->uid = uid; |
288 | if (gid != (gid_t)-1) | 312 | if (gid_valid(gid)) |
289 | vol->gid = gid; | 313 | vol->gid = gid; |
290 | if (fmask != (umode_t)-1) | 314 | if (fmask != (umode_t)-1) |
291 | vol->fmask = fmask; | 315 | vol->fmask = fmask; |
@@ -1023,7 +1047,8 @@ static bool load_and_init_mft_mirror(ntfs_volume *vol) | |||
1023 | * ntfs_read_inode() will have set up the default ones. | 1047 | * ntfs_read_inode() will have set up the default ones. |
1024 | */ | 1048 | */ |
1025 | /* Set uid and gid to root. */ | 1049 | /* Set uid and gid to root. */ |
1026 | tmp_ino->i_uid = tmp_ino->i_gid = 0; | 1050 | tmp_ino->i_uid = GLOBAL_ROOT_UID; |
1051 | tmp_ino->i_gid = GLOBAL_ROOT_GID; | ||
1027 | /* Regular file. No access for anyone. */ | 1052 | /* Regular file. No access for anyone. */ |
1028 | tmp_ino->i_mode = S_IFREG; | 1053 | tmp_ino->i_mode = S_IFREG; |
1029 | /* No VFS initiated operations allowed for $MFTMirr. */ | 1054 | /* No VFS initiated operations allowed for $MFTMirr. */ |
@@ -3168,6 +3193,12 @@ static void __exit exit_ntfs_fs(void) | |||
3168 | ntfs_debug("Unregistering NTFS driver."); | 3193 | ntfs_debug("Unregistering NTFS driver."); |
3169 | 3194 | ||
3170 | unregister_filesystem(&ntfs_fs_type); | 3195 | unregister_filesystem(&ntfs_fs_type); |
3196 | |||
3197 | /* | ||
3198 | * Make sure all delayed rcu free inodes are flushed before we | ||
3199 | * destroy cache. | ||
3200 | */ | ||
3201 | rcu_barrier(); | ||
3171 | kmem_cache_destroy(ntfs_big_inode_cache); | 3202 | kmem_cache_destroy(ntfs_big_inode_cache); |
3172 | kmem_cache_destroy(ntfs_inode_cache); | 3203 | kmem_cache_destroy(ntfs_inode_cache); |
3173 | kmem_cache_destroy(ntfs_name_cache); | 3204 | kmem_cache_destroy(ntfs_name_cache); |
diff --git a/fs/ntfs/volume.h b/fs/ntfs/volume.h index 15e3ba8d521a..4f579b02bc76 100644 --- a/fs/ntfs/volume.h +++ b/fs/ntfs/volume.h | |||
@@ -25,6 +25,7 @@ | |||
25 | #define _LINUX_NTFS_VOLUME_H | 25 | #define _LINUX_NTFS_VOLUME_H |
26 | 26 | ||
27 | #include <linux/rwsem.h> | 27 | #include <linux/rwsem.h> |
28 | #include <linux/uidgid.h> | ||
28 | 29 | ||
29 | #include "types.h" | 30 | #include "types.h" |
30 | #include "layout.h" | 31 | #include "layout.h" |
@@ -46,8 +47,8 @@ typedef struct { | |||
46 | sized blocks on the device. */ | 47 | sized blocks on the device. */ |
47 | /* Configuration provided by user at mount time. */ | 48 | /* Configuration provided by user at mount time. */ |
48 | unsigned long flags; /* Miscellaneous flags, see below. */ | 49 | unsigned long flags; /* Miscellaneous flags, see below. */ |
49 | uid_t uid; /* uid that files will be mounted as. */ | 50 | kuid_t uid; /* uid that files will be mounted as. */ |
50 | gid_t gid; /* gid that files will be mounted as. */ | 51 | kgid_t gid; /* gid that files will be mounted as. */ |
51 | umode_t fmask; /* The mask for file permissions. */ | 52 | umode_t fmask; /* The mask for file permissions. */ |
52 | umode_t dmask; /* The mask for directory | 53 | umode_t dmask; /* The mask for directory |
53 | permissions. */ | 54 | permissions. */ |
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index a7219075b4de..260b16281fc3 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c | |||
@@ -452,7 +452,7 @@ static int ocfs2_xattr_get_acl(struct dentry *dentry, const char *name, | |||
452 | return PTR_ERR(acl); | 452 | return PTR_ERR(acl); |
453 | if (acl == NULL) | 453 | if (acl == NULL) |
454 | return -ENODATA; | 454 | return -ENODATA; |
455 | ret = posix_acl_to_xattr(acl, buffer, size); | 455 | ret = posix_acl_to_xattr(&init_user_ns, acl, buffer, size); |
456 | posix_acl_release(acl); | 456 | posix_acl_release(acl); |
457 | 457 | ||
458 | return ret; | 458 | return ret; |
@@ -475,7 +475,7 @@ static int ocfs2_xattr_set_acl(struct dentry *dentry, const char *name, | |||
475 | return -EPERM; | 475 | return -EPERM; |
476 | 476 | ||
477 | if (value) { | 477 | if (value) { |
478 | acl = posix_acl_from_xattr(value, size); | 478 | acl = posix_acl_from_xattr(&init_user_ns, value, size); |
479 | if (IS_ERR(acl)) | 479 | if (IS_ERR(acl)) |
480 | return PTR_ERR(acl); | 480 | return PTR_ERR(acl); |
481 | else if (acl) { | 481 | else if (acl) { |
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index a4e855e3690e..f7c648d7d6bf 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c | |||
@@ -1746,8 +1746,8 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, | |||
1746 | long fd; | 1746 | long fd; |
1747 | int sectsize; | 1747 | int sectsize; |
1748 | char *p = (char *)page; | 1748 | char *p = (char *)page; |
1749 | struct file *filp = NULL; | 1749 | struct fd f; |
1750 | struct inode *inode = NULL; | 1750 | struct inode *inode; |
1751 | ssize_t ret = -EINVAL; | 1751 | ssize_t ret = -EINVAL; |
1752 | int live_threshold; | 1752 | int live_threshold; |
1753 | 1753 | ||
@@ -1766,26 +1766,26 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, | |||
1766 | if (fd < 0 || fd >= INT_MAX) | 1766 | if (fd < 0 || fd >= INT_MAX) |
1767 | goto out; | 1767 | goto out; |
1768 | 1768 | ||
1769 | filp = fget(fd); | 1769 | f = fdget(fd); |
1770 | if (filp == NULL) | 1770 | if (f.file == NULL) |
1771 | goto out; | 1771 | goto out; |
1772 | 1772 | ||
1773 | if (reg->hr_blocks == 0 || reg->hr_start_block == 0 || | 1773 | if (reg->hr_blocks == 0 || reg->hr_start_block == 0 || |
1774 | reg->hr_block_bytes == 0) | 1774 | reg->hr_block_bytes == 0) |
1775 | goto out; | 1775 | goto out2; |
1776 | 1776 | ||
1777 | inode = igrab(filp->f_mapping->host); | 1777 | inode = igrab(f.file->f_mapping->host); |
1778 | if (inode == NULL) | 1778 | if (inode == NULL) |
1779 | goto out; | 1779 | goto out2; |
1780 | 1780 | ||
1781 | if (!S_ISBLK(inode->i_mode)) | 1781 | if (!S_ISBLK(inode->i_mode)) |
1782 | goto out; | 1782 | goto out3; |
1783 | 1783 | ||
1784 | reg->hr_bdev = I_BDEV(filp->f_mapping->host); | 1784 | reg->hr_bdev = I_BDEV(f.file->f_mapping->host); |
1785 | ret = blkdev_get(reg->hr_bdev, FMODE_WRITE | FMODE_READ, NULL); | 1785 | ret = blkdev_get(reg->hr_bdev, FMODE_WRITE | FMODE_READ, NULL); |
1786 | if (ret) { | 1786 | if (ret) { |
1787 | reg->hr_bdev = NULL; | 1787 | reg->hr_bdev = NULL; |
1788 | goto out; | 1788 | goto out3; |
1789 | } | 1789 | } |
1790 | inode = NULL; | 1790 | inode = NULL; |
1791 | 1791 | ||
@@ -1797,7 +1797,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, | |||
1797 | "blocksize %u incorrect for device, expected %d", | 1797 | "blocksize %u incorrect for device, expected %d", |
1798 | reg->hr_block_bytes, sectsize); | 1798 | reg->hr_block_bytes, sectsize); |
1799 | ret = -EINVAL; | 1799 | ret = -EINVAL; |
1800 | goto out; | 1800 | goto out3; |
1801 | } | 1801 | } |
1802 | 1802 | ||
1803 | o2hb_init_region_params(reg); | 1803 | o2hb_init_region_params(reg); |
@@ -1811,13 +1811,13 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, | |||
1811 | ret = o2hb_map_slot_data(reg); | 1811 | ret = o2hb_map_slot_data(reg); |
1812 | if (ret) { | 1812 | if (ret) { |
1813 | mlog_errno(ret); | 1813 | mlog_errno(ret); |
1814 | goto out; | 1814 | goto out3; |
1815 | } | 1815 | } |
1816 | 1816 | ||
1817 | ret = o2hb_populate_slot_data(reg); | 1817 | ret = o2hb_populate_slot_data(reg); |
1818 | if (ret) { | 1818 | if (ret) { |
1819 | mlog_errno(ret); | 1819 | mlog_errno(ret); |
1820 | goto out; | 1820 | goto out3; |
1821 | } | 1821 | } |
1822 | 1822 | ||
1823 | INIT_DELAYED_WORK(®->hr_write_timeout_work, o2hb_write_timeout); | 1823 | INIT_DELAYED_WORK(®->hr_write_timeout_work, o2hb_write_timeout); |
@@ -1847,7 +1847,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, | |||
1847 | if (IS_ERR(hb_task)) { | 1847 | if (IS_ERR(hb_task)) { |
1848 | ret = PTR_ERR(hb_task); | 1848 | ret = PTR_ERR(hb_task); |
1849 | mlog_errno(ret); | 1849 | mlog_errno(ret); |
1850 | goto out; | 1850 | goto out3; |
1851 | } | 1851 | } |
1852 | 1852 | ||
1853 | spin_lock(&o2hb_live_lock); | 1853 | spin_lock(&o2hb_live_lock); |
@@ -1863,7 +1863,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, | |||
1863 | 1863 | ||
1864 | if (reg->hr_aborted_start) { | 1864 | if (reg->hr_aborted_start) { |
1865 | ret = -EIO; | 1865 | ret = -EIO; |
1866 | goto out; | 1866 | goto out3; |
1867 | } | 1867 | } |
1868 | 1868 | ||
1869 | /* Ok, we were woken. Make sure it wasn't by drop_item() */ | 1869 | /* Ok, we were woken. Make sure it wasn't by drop_item() */ |
@@ -1882,11 +1882,11 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, | |||
1882 | printk(KERN_NOTICE "o2hb: Heartbeat started on region %s (%s)\n", | 1882 | printk(KERN_NOTICE "o2hb: Heartbeat started on region %s (%s)\n", |
1883 | config_item_name(®->hr_item), reg->hr_dev_name); | 1883 | config_item_name(®->hr_item), reg->hr_dev_name); |
1884 | 1884 | ||
1885 | out3: | ||
1886 | iput(inode); | ||
1887 | out2: | ||
1888 | fdput(f); | ||
1885 | out: | 1889 | out: |
1886 | if (filp) | ||
1887 | fput(filp); | ||
1888 | if (inode) | ||
1889 | iput(inode); | ||
1890 | if (ret < 0) { | 1890 | if (ret < 0) { |
1891 | if (reg->hr_bdev) { | 1891 | if (reg->hr_bdev) { |
1892 | blkdev_put(reg->hr_bdev, FMODE_READ|FMODE_WRITE); | 1892 | blkdev_put(reg->hr_bdev, FMODE_READ|FMODE_WRITE); |
diff --git a/fs/ocfs2/cluster/quorum.c b/fs/ocfs2/cluster/quorum.c index 8f9cea1597af..c19897d0fe14 100644 --- a/fs/ocfs2/cluster/quorum.c +++ b/fs/ocfs2/cluster/quorum.c | |||
@@ -327,5 +327,5 @@ void o2quo_exit(void) | |||
327 | { | 327 | { |
328 | struct o2quo_state *qs = &o2quo_state; | 328 | struct o2quo_state *qs = &o2quo_state; |
329 | 329 | ||
330 | flush_work_sync(&qs->qs_work); | 330 | flush_work(&qs->qs_work); |
331 | } | 331 | } |
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index 83b6f98e0665..16b712d260d4 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c | |||
@@ -691,6 +691,11 @@ static void __exit exit_dlmfs_fs(void) | |||
691 | flush_workqueue(user_dlm_worker); | 691 | flush_workqueue(user_dlm_worker); |
692 | destroy_workqueue(user_dlm_worker); | 692 | destroy_workqueue(user_dlm_worker); |
693 | 693 | ||
694 | /* | ||
695 | * Make sure all delayed rcu free inodes are flushed before we | ||
696 | * destroy cache. | ||
697 | */ | ||
698 | rcu_barrier(); | ||
694 | kmem_cache_destroy(dlmfs_inode_cache); | 699 | kmem_cache_destroy(dlmfs_inode_cache); |
695 | 700 | ||
696 | bdi_destroy(&dlmfs_backing_dev_info); | 701 | bdi_destroy(&dlmfs_backing_dev_info); |
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 46a1f6d75104..5a4ee77cec51 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -1184,8 +1184,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) | |||
1184 | if (attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid | 1184 | if (attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid |
1185 | && OCFS2_HAS_RO_COMPAT_FEATURE(sb, | 1185 | && OCFS2_HAS_RO_COMPAT_FEATURE(sb, |
1186 | OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) { | 1186 | OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) { |
1187 | transfer_to[USRQUOTA] = dqget(sb, attr->ia_uid, | 1187 | transfer_to[USRQUOTA] = dqget(sb, make_kqid_uid(attr->ia_uid)); |
1188 | USRQUOTA); | ||
1189 | if (!transfer_to[USRQUOTA]) { | 1188 | if (!transfer_to[USRQUOTA]) { |
1190 | status = -ESRCH; | 1189 | status = -ESRCH; |
1191 | goto bail_unlock; | 1190 | goto bail_unlock; |
@@ -1194,8 +1193,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) | |||
1194 | if (attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid | 1193 | if (attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid |
1195 | && OCFS2_HAS_RO_COMPAT_FEATURE(sb, | 1194 | && OCFS2_HAS_RO_COMPAT_FEATURE(sb, |
1196 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) { | 1195 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) { |
1197 | transfer_to[GRPQUOTA] = dqget(sb, attr->ia_gid, | 1196 | transfer_to[GRPQUOTA] = dqget(sb, make_kqid_gid(attr->ia_gid)); |
1198 | GRPQUOTA); | ||
1199 | if (!transfer_to[GRPQUOTA]) { | 1197 | if (!transfer_to[GRPQUOTA]) { |
1200 | status = -ESRCH; | 1198 | status = -ESRCH; |
1201 | goto bail_unlock; | 1199 | goto bail_unlock; |
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index d150372fd81d..47a87dda54ce 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c | |||
@@ -173,6 +173,7 @@ out: | |||
173 | static const struct vm_operations_struct ocfs2_file_vm_ops = { | 173 | static const struct vm_operations_struct ocfs2_file_vm_ops = { |
174 | .fault = ocfs2_fault, | 174 | .fault = ocfs2_fault, |
175 | .page_mkwrite = ocfs2_page_mkwrite, | 175 | .page_mkwrite = ocfs2_page_mkwrite, |
176 | .remap_pages = generic_file_remap_pages, | ||
176 | }; | 177 | }; |
177 | 178 | ||
178 | int ocfs2_mmap(struct file *file, struct vm_area_struct *vma) | 179 | int ocfs2_mmap(struct file *file, struct vm_area_struct *vma) |
@@ -188,7 +189,6 @@ int ocfs2_mmap(struct file *file, struct vm_area_struct *vma) | |||
188 | ocfs2_inode_unlock(file->f_dentry->d_inode, lock_level); | 189 | ocfs2_inode_unlock(file->f_dentry->d_inode, lock_level); |
189 | out: | 190 | out: |
190 | vma->vm_ops = &ocfs2_file_vm_ops; | 191 | vma->vm_ops = &ocfs2_file_vm_ops; |
191 | vma->vm_flags |= VM_CAN_NONLINEAR; | ||
192 | return 0; | 192 | return 0; |
193 | } | 193 | } |
194 | 194 | ||
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index 0a86e302655f..332a281f217e 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c | |||
@@ -95,7 +95,7 @@ static void ocfs2_global_mem2diskdqb(void *dp, struct dquot *dquot) | |||
95 | struct ocfs2_global_disk_dqblk *d = dp; | 95 | struct ocfs2_global_disk_dqblk *d = dp; |
96 | struct mem_dqblk *m = &dquot->dq_dqb; | 96 | struct mem_dqblk *m = &dquot->dq_dqb; |
97 | 97 | ||
98 | d->dqb_id = cpu_to_le32(dquot->dq_id); | 98 | d->dqb_id = cpu_to_le32(from_kqid(&init_user_ns, dquot->dq_id)); |
99 | d->dqb_use_count = cpu_to_le32(OCFS2_DQUOT(dquot)->dq_use_count); | 99 | d->dqb_use_count = cpu_to_le32(OCFS2_DQUOT(dquot)->dq_use_count); |
100 | d->dqb_ihardlimit = cpu_to_le64(m->dqb_ihardlimit); | 100 | d->dqb_ihardlimit = cpu_to_le64(m->dqb_ihardlimit); |
101 | d->dqb_isoftlimit = cpu_to_le64(m->dqb_isoftlimit); | 101 | d->dqb_isoftlimit = cpu_to_le64(m->dqb_isoftlimit); |
@@ -112,11 +112,14 @@ static int ocfs2_global_is_id(void *dp, struct dquot *dquot) | |||
112 | { | 112 | { |
113 | struct ocfs2_global_disk_dqblk *d = dp; | 113 | struct ocfs2_global_disk_dqblk *d = dp; |
114 | struct ocfs2_mem_dqinfo *oinfo = | 114 | struct ocfs2_mem_dqinfo *oinfo = |
115 | sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv; | 115 | sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv; |
116 | 116 | ||
117 | if (qtree_entry_unused(&oinfo->dqi_gi, dp)) | 117 | if (qtree_entry_unused(&oinfo->dqi_gi, dp)) |
118 | return 0; | 118 | return 0; |
119 | return le32_to_cpu(d->dqb_id) == dquot->dq_id; | 119 | |
120 | return qid_eq(make_kqid(&init_user_ns, dquot->dq_id.type, | ||
121 | le32_to_cpu(d->dqb_id)), | ||
122 | dquot->dq_id); | ||
120 | } | 123 | } |
121 | 124 | ||
122 | struct qtree_fmt_operations ocfs2_global_ops = { | 125 | struct qtree_fmt_operations ocfs2_global_ops = { |
@@ -475,7 +478,7 @@ int __ocfs2_sync_dquot(struct dquot *dquot, int freeing) | |||
475 | { | 478 | { |
476 | int err, err2; | 479 | int err, err2; |
477 | struct super_block *sb = dquot->dq_sb; | 480 | struct super_block *sb = dquot->dq_sb; |
478 | int type = dquot->dq_type; | 481 | int type = dquot->dq_id.type; |
479 | struct ocfs2_mem_dqinfo *info = sb_dqinfo(sb, type)->dqi_priv; | 482 | struct ocfs2_mem_dqinfo *info = sb_dqinfo(sb, type)->dqi_priv; |
480 | struct ocfs2_global_disk_dqblk dqblk; | 483 | struct ocfs2_global_disk_dqblk dqblk; |
481 | s64 spacechange, inodechange; | 484 | s64 spacechange, inodechange; |
@@ -504,7 +507,8 @@ int __ocfs2_sync_dquot(struct dquot *dquot, int freeing) | |||
504 | olditime = dquot->dq_dqb.dqb_itime; | 507 | olditime = dquot->dq_dqb.dqb_itime; |
505 | oldbtime = dquot->dq_dqb.dqb_btime; | 508 | oldbtime = dquot->dq_dqb.dqb_btime; |
506 | ocfs2_global_disk2memdqb(dquot, &dqblk); | 509 | ocfs2_global_disk2memdqb(dquot, &dqblk); |
507 | trace_ocfs2_sync_dquot(dquot->dq_id, dquot->dq_dqb.dqb_curspace, | 510 | trace_ocfs2_sync_dquot(from_kqid(&init_user_ns, dquot->dq_id), |
511 | dquot->dq_dqb.dqb_curspace, | ||
508 | (long long)spacechange, | 512 | (long long)spacechange, |
509 | dquot->dq_dqb.dqb_curinodes, | 513 | dquot->dq_dqb.dqb_curinodes, |
510 | (long long)inodechange); | 514 | (long long)inodechange); |
@@ -555,8 +559,8 @@ int __ocfs2_sync_dquot(struct dquot *dquot, int freeing) | |||
555 | err = ocfs2_qinfo_lock(info, freeing); | 559 | err = ocfs2_qinfo_lock(info, freeing); |
556 | if (err < 0) { | 560 | if (err < 0) { |
557 | mlog(ML_ERROR, "Failed to lock quota info, losing quota write" | 561 | mlog(ML_ERROR, "Failed to lock quota info, losing quota write" |
558 | " (type=%d, id=%u)\n", dquot->dq_type, | 562 | " (type=%d, id=%u)\n", dquot->dq_id.type, |
559 | (unsigned)dquot->dq_id); | 563 | (unsigned)from_kqid(&init_user_ns, dquot->dq_id)); |
560 | goto out; | 564 | goto out; |
561 | } | 565 | } |
562 | if (freeing) | 566 | if (freeing) |
@@ -591,9 +595,10 @@ static int ocfs2_sync_dquot_helper(struct dquot *dquot, unsigned long type) | |||
591 | struct ocfs2_super *osb = OCFS2_SB(sb); | 595 | struct ocfs2_super *osb = OCFS2_SB(sb); |
592 | int status = 0; | 596 | int status = 0; |
593 | 597 | ||
594 | trace_ocfs2_sync_dquot_helper(dquot->dq_id, dquot->dq_type, | 598 | trace_ocfs2_sync_dquot_helper(from_kqid(&init_user_ns, dquot->dq_id), |
599 | dquot->dq_id.type, | ||
595 | type, sb->s_id); | 600 | type, sb->s_id); |
596 | if (type != dquot->dq_type) | 601 | if (type != dquot->dq_id.type) |
597 | goto out; | 602 | goto out; |
598 | status = ocfs2_lock_global_qf(oinfo, 1); | 603 | status = ocfs2_lock_global_qf(oinfo, 1); |
599 | if (status < 0) | 604 | if (status < 0) |
@@ -643,7 +648,8 @@ static int ocfs2_write_dquot(struct dquot *dquot) | |||
643 | struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb); | 648 | struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb); |
644 | int status = 0; | 649 | int status = 0; |
645 | 650 | ||
646 | trace_ocfs2_write_dquot(dquot->dq_id, dquot->dq_type); | 651 | trace_ocfs2_write_dquot(from_kqid(&init_user_ns, dquot->dq_id), |
652 | dquot->dq_id.type); | ||
647 | 653 | ||
648 | handle = ocfs2_start_trans(osb, OCFS2_QWRITE_CREDITS); | 654 | handle = ocfs2_start_trans(osb, OCFS2_QWRITE_CREDITS); |
649 | if (IS_ERR(handle)) { | 655 | if (IS_ERR(handle)) { |
@@ -677,11 +683,12 @@ static int ocfs2_release_dquot(struct dquot *dquot) | |||
677 | { | 683 | { |
678 | handle_t *handle; | 684 | handle_t *handle; |
679 | struct ocfs2_mem_dqinfo *oinfo = | 685 | struct ocfs2_mem_dqinfo *oinfo = |
680 | sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv; | 686 | sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv; |
681 | struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb); | 687 | struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb); |
682 | int status = 0; | 688 | int status = 0; |
683 | 689 | ||
684 | trace_ocfs2_release_dquot(dquot->dq_id, dquot->dq_type); | 690 | trace_ocfs2_release_dquot(from_kqid(&init_user_ns, dquot->dq_id), |
691 | dquot->dq_id.type); | ||
685 | 692 | ||
686 | mutex_lock(&dquot->dq_lock); | 693 | mutex_lock(&dquot->dq_lock); |
687 | /* Check whether we are not racing with some other dqget() */ | 694 | /* Check whether we are not racing with some other dqget() */ |
@@ -691,7 +698,7 @@ static int ocfs2_release_dquot(struct dquot *dquot) | |||
691 | if (status < 0) | 698 | if (status < 0) |
692 | goto out; | 699 | goto out; |
693 | handle = ocfs2_start_trans(osb, | 700 | handle = ocfs2_start_trans(osb, |
694 | ocfs2_calc_qdel_credits(dquot->dq_sb, dquot->dq_type)); | 701 | ocfs2_calc_qdel_credits(dquot->dq_sb, dquot->dq_id.type)); |
695 | if (IS_ERR(handle)) { | 702 | if (IS_ERR(handle)) { |
696 | status = PTR_ERR(handle); | 703 | status = PTR_ERR(handle); |
697 | mlog_errno(status); | 704 | mlog_errno(status); |
@@ -733,13 +740,14 @@ static int ocfs2_acquire_dquot(struct dquot *dquot) | |||
733 | int ex = 0; | 740 | int ex = 0; |
734 | struct super_block *sb = dquot->dq_sb; | 741 | struct super_block *sb = dquot->dq_sb; |
735 | struct ocfs2_super *osb = OCFS2_SB(sb); | 742 | struct ocfs2_super *osb = OCFS2_SB(sb); |
736 | int type = dquot->dq_type; | 743 | int type = dquot->dq_id.type; |
737 | struct ocfs2_mem_dqinfo *info = sb_dqinfo(sb, type)->dqi_priv; | 744 | struct ocfs2_mem_dqinfo *info = sb_dqinfo(sb, type)->dqi_priv; |
738 | struct inode *gqinode = info->dqi_gqinode; | 745 | struct inode *gqinode = info->dqi_gqinode; |
739 | int need_alloc = ocfs2_global_qinit_alloc(sb, type); | 746 | int need_alloc = ocfs2_global_qinit_alloc(sb, type); |
740 | handle_t *handle; | 747 | handle_t *handle; |
741 | 748 | ||
742 | trace_ocfs2_acquire_dquot(dquot->dq_id, type); | 749 | trace_ocfs2_acquire_dquot(from_kqid(&init_user_ns, dquot->dq_id), |
750 | type); | ||
743 | mutex_lock(&dquot->dq_lock); | 751 | mutex_lock(&dquot->dq_lock); |
744 | /* | 752 | /* |
745 | * We need an exclusive lock, because we're going to update use count | 753 | * We need an exclusive lock, because we're going to update use count |
@@ -821,12 +829,13 @@ static int ocfs2_mark_dquot_dirty(struct dquot *dquot) | |||
821 | int sync = 0; | 829 | int sync = 0; |
822 | int status; | 830 | int status; |
823 | struct super_block *sb = dquot->dq_sb; | 831 | struct super_block *sb = dquot->dq_sb; |
824 | int type = dquot->dq_type; | 832 | int type = dquot->dq_id.type; |
825 | struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv; | 833 | struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv; |
826 | handle_t *handle; | 834 | handle_t *handle; |
827 | struct ocfs2_super *osb = OCFS2_SB(sb); | 835 | struct ocfs2_super *osb = OCFS2_SB(sb); |
828 | 836 | ||
829 | trace_ocfs2_mark_dquot_dirty(dquot->dq_id, type); | 837 | trace_ocfs2_mark_dquot_dirty(from_kqid(&init_user_ns, dquot->dq_id), |
838 | type); | ||
830 | 839 | ||
831 | /* In case user set some limits, sync dquot immediately to global | 840 | /* In case user set some limits, sync dquot immediately to global |
832 | * quota file so that information propagates quicker */ | 841 | * quota file so that information propagates quicker */ |
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index f100bf70a906..27fe7ee4874c 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c | |||
@@ -501,7 +501,9 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode, | |||
501 | } | 501 | } |
502 | dqblk = (struct ocfs2_local_disk_dqblk *)(qbh->b_data + | 502 | dqblk = (struct ocfs2_local_disk_dqblk *)(qbh->b_data + |
503 | ol_dqblk_block_off(sb, chunk, bit)); | 503 | ol_dqblk_block_off(sb, chunk, bit)); |
504 | dquot = dqget(sb, le64_to_cpu(dqblk->dqb_id), type); | 504 | dquot = dqget(sb, |
505 | make_kqid(&init_user_ns, type, | ||
506 | le64_to_cpu(dqblk->dqb_id))); | ||
505 | if (!dquot) { | 507 | if (!dquot) { |
506 | status = -EIO; | 508 | status = -EIO; |
507 | mlog(ML_ERROR, "Failed to get quota structure " | 509 | mlog(ML_ERROR, "Failed to get quota structure " |
@@ -881,7 +883,8 @@ static void olq_set_dquot(struct buffer_head *bh, void *private) | |||
881 | dqblk = (struct ocfs2_local_disk_dqblk *)(bh->b_data | 883 | dqblk = (struct ocfs2_local_disk_dqblk *)(bh->b_data |
882 | + ol_dqblk_block_offset(sb, od->dq_local_off)); | 884 | + ol_dqblk_block_offset(sb, od->dq_local_off)); |
883 | 885 | ||
884 | dqblk->dqb_id = cpu_to_le64(od->dq_dquot.dq_id); | 886 | dqblk->dqb_id = cpu_to_le64(from_kqid(&init_user_ns, |
887 | od->dq_dquot.dq_id)); | ||
885 | spin_lock(&dq_data_lock); | 888 | spin_lock(&dq_data_lock); |
886 | dqblk->dqb_spacemod = cpu_to_le64(od->dq_dquot.dq_dqb.dqb_curspace - | 889 | dqblk->dqb_spacemod = cpu_to_le64(od->dq_dquot.dq_dqb.dqb_curspace - |
887 | od->dq_origspace); | 890 | od->dq_origspace); |
@@ -891,7 +894,7 @@ static void olq_set_dquot(struct buffer_head *bh, void *private) | |||
891 | trace_olq_set_dquot( | 894 | trace_olq_set_dquot( |
892 | (unsigned long long)le64_to_cpu(dqblk->dqb_spacemod), | 895 | (unsigned long long)le64_to_cpu(dqblk->dqb_spacemod), |
893 | (unsigned long long)le64_to_cpu(dqblk->dqb_inodemod), | 896 | (unsigned long long)le64_to_cpu(dqblk->dqb_inodemod), |
894 | od->dq_dquot.dq_id); | 897 | from_kqid(&init_user_ns, od->dq_dquot.dq_id)); |
895 | } | 898 | } |
896 | 899 | ||
897 | /* Write dquot to local quota file */ | 900 | /* Write dquot to local quota file */ |
@@ -900,7 +903,7 @@ int ocfs2_local_write_dquot(struct dquot *dquot) | |||
900 | struct super_block *sb = dquot->dq_sb; | 903 | struct super_block *sb = dquot->dq_sb; |
901 | struct ocfs2_dquot *od = OCFS2_DQUOT(dquot); | 904 | struct ocfs2_dquot *od = OCFS2_DQUOT(dquot); |
902 | struct buffer_head *bh; | 905 | struct buffer_head *bh; |
903 | struct inode *lqinode = sb_dqopt(sb)->files[dquot->dq_type]; | 906 | struct inode *lqinode = sb_dqopt(sb)->files[dquot->dq_id.type]; |
904 | int status; | 907 | int status; |
905 | 908 | ||
906 | status = ocfs2_read_quota_phys_block(lqinode, od->dq_local_phys_blk, | 909 | status = ocfs2_read_quota_phys_block(lqinode, od->dq_local_phys_blk, |
@@ -1221,7 +1224,7 @@ static void olq_alloc_dquot(struct buffer_head *bh, void *private) | |||
1221 | int ocfs2_create_local_dquot(struct dquot *dquot) | 1224 | int ocfs2_create_local_dquot(struct dquot *dquot) |
1222 | { | 1225 | { |
1223 | struct super_block *sb = dquot->dq_sb; | 1226 | struct super_block *sb = dquot->dq_sb; |
1224 | int type = dquot->dq_type; | 1227 | int type = dquot->dq_id.type; |
1225 | struct inode *lqinode = sb_dqopt(sb)->files[type]; | 1228 | struct inode *lqinode = sb_dqopt(sb)->files[type]; |
1226 | struct ocfs2_quota_chunk *chunk; | 1229 | struct ocfs2_quota_chunk *chunk; |
1227 | struct ocfs2_dquot *od = OCFS2_DQUOT(dquot); | 1230 | struct ocfs2_dquot *od = OCFS2_DQUOT(dquot); |
@@ -1275,7 +1278,7 @@ out: | |||
1275 | int ocfs2_local_release_dquot(handle_t *handle, struct dquot *dquot) | 1278 | int ocfs2_local_release_dquot(handle_t *handle, struct dquot *dquot) |
1276 | { | 1279 | { |
1277 | int status; | 1280 | int status; |
1278 | int type = dquot->dq_type; | 1281 | int type = dquot->dq_id.type; |
1279 | struct ocfs2_dquot *od = OCFS2_DQUOT(dquot); | 1282 | struct ocfs2_dquot *od = OCFS2_DQUOT(dquot); |
1280 | struct super_block *sb = dquot->dq_sb; | 1283 | struct super_block *sb = dquot->dq_sb; |
1281 | struct ocfs2_local_disk_chunk *dchunk; | 1284 | struct ocfs2_local_disk_chunk *dchunk; |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 68f4541c2db9..0e91ec22a940 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
@@ -1818,6 +1818,11 @@ static int ocfs2_initialize_mem_caches(void) | |||
1818 | 1818 | ||
1819 | static void ocfs2_free_mem_caches(void) | 1819 | static void ocfs2_free_mem_caches(void) |
1820 | { | 1820 | { |
1821 | /* | ||
1822 | * Make sure all delayed rcu free inodes are flushed before we | ||
1823 | * destroy cache. | ||
1824 | */ | ||
1825 | rcu_barrier(); | ||
1821 | if (ocfs2_inode_cachep) | 1826 | if (ocfs2_inode_cachep) |
1822 | kmem_cache_destroy(ocfs2_inode_cachep); | 1827 | kmem_cache_destroy(ocfs2_inode_cachep); |
1823 | ocfs2_inode_cachep = NULL; | 1828 | ocfs2_inode_cachep = NULL; |
diff --git a/fs/omfs/file.c b/fs/omfs/file.c index 2c6d95257a4d..77e3cb2962b4 100644 --- a/fs/omfs/file.c +++ b/fs/omfs/file.c | |||
@@ -146,8 +146,7 @@ static int omfs_grow_extent(struct inode *inode, struct omfs_extent *oe, | |||
146 | be64_to_cpu(entry->e_blocks); | 146 | be64_to_cpu(entry->e_blocks); |
147 | 147 | ||
148 | if (omfs_allocate_block(inode->i_sb, new_block)) { | 148 | if (omfs_allocate_block(inode->i_sb, new_block)) { |
149 | entry->e_blocks = | 149 | be64_add_cpu(&entry->e_blocks, 1); |
150 | cpu_to_be64(be64_to_cpu(entry->e_blocks) + 1); | ||
151 | terminator->e_blocks = ~(cpu_to_be64( | 150 | terminator->e_blocks = ~(cpu_to_be64( |
152 | be64_to_cpu(~terminator->e_blocks) + 1)); | 151 | be64_to_cpu(~terminator->e_blocks) + 1)); |
153 | goto out; | 152 | goto out; |
@@ -177,7 +176,7 @@ static int omfs_grow_extent(struct inode *inode, struct omfs_extent *oe, | |||
177 | be64_to_cpu(~terminator->e_blocks) + (u64) new_count)); | 176 | be64_to_cpu(~terminator->e_blocks) + (u64) new_count)); |
178 | 177 | ||
179 | /* write in new entry */ | 178 | /* write in new entry */ |
180 | oe->e_extent_count = cpu_to_be32(1 + be32_to_cpu(oe->e_extent_count)); | 179 | be32_add_cpu(&oe->e_extent_count, 1); |
181 | 180 | ||
182 | out: | 181 | out: |
183 | *ret_block = new_block; | 182 | *ret_block = new_block; |
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c index e6213b3725d1..25d715c7c87a 100644 --- a/fs/omfs/inode.c +++ b/fs/omfs/inode.c | |||
@@ -391,12 +391,16 @@ static int parse_options(char *options, struct omfs_sb_info *sbi) | |||
391 | case Opt_uid: | 391 | case Opt_uid: |
392 | if (match_int(&args[0], &option)) | 392 | if (match_int(&args[0], &option)) |
393 | return 0; | 393 | return 0; |
394 | sbi->s_uid = option; | 394 | sbi->s_uid = make_kuid(current_user_ns(), option); |
395 | if (!uid_valid(sbi->s_uid)) | ||
396 | return 0; | ||
395 | break; | 397 | break; |
396 | case Opt_gid: | 398 | case Opt_gid: |
397 | if (match_int(&args[0], &option)) | 399 | if (match_int(&args[0], &option)) |
398 | return 0; | 400 | return 0; |
399 | sbi->s_gid = option; | 401 | sbi->s_gid = make_kgid(current_user_ns(), option); |
402 | if (!gid_valid(sbi->s_gid)) | ||
403 | return 0; | ||
400 | break; | 404 | break; |
401 | case Opt_umask: | 405 | case Opt_umask: |
402 | if (match_octal(&args[0], &option)) | 406 | if (match_octal(&args[0], &option)) |
diff --git a/fs/omfs/omfs.h b/fs/omfs/omfs.h index 8941f12c6b01..f0f8bc75e609 100644 --- a/fs/omfs/omfs.h +++ b/fs/omfs/omfs.h | |||
@@ -19,8 +19,8 @@ struct omfs_sb_info { | |||
19 | unsigned long **s_imap; | 19 | unsigned long **s_imap; |
20 | int s_imap_size; | 20 | int s_imap_size; |
21 | struct mutex s_bitmap_lock; | 21 | struct mutex s_bitmap_lock; |
22 | int s_uid; | 22 | kuid_t s_uid; |
23 | int s_gid; | 23 | kgid_t s_gid; |
24 | int s_dmask; | 24 | int s_dmask; |
25 | int s_fmask; | 25 | int s_fmask; |
26 | }; | 26 | }; |
@@ -132,27 +132,27 @@ SYSCALL_DEFINE2(truncate, const char __user *, path, long, length) | |||
132 | 132 | ||
133 | static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) | 133 | static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) |
134 | { | 134 | { |
135 | struct inode * inode; | 135 | struct inode *inode; |
136 | struct dentry *dentry; | 136 | struct dentry *dentry; |
137 | struct file * file; | 137 | struct fd f; |
138 | int error; | 138 | int error; |
139 | 139 | ||
140 | error = -EINVAL; | 140 | error = -EINVAL; |
141 | if (length < 0) | 141 | if (length < 0) |
142 | goto out; | 142 | goto out; |
143 | error = -EBADF; | 143 | error = -EBADF; |
144 | file = fget(fd); | 144 | f = fdget(fd); |
145 | if (!file) | 145 | if (!f.file) |
146 | goto out; | 146 | goto out; |
147 | 147 | ||
148 | /* explicitly opened as large or we are on 64-bit box */ | 148 | /* explicitly opened as large or we are on 64-bit box */ |
149 | if (file->f_flags & O_LARGEFILE) | 149 | if (f.file->f_flags & O_LARGEFILE) |
150 | small = 0; | 150 | small = 0; |
151 | 151 | ||
152 | dentry = file->f_path.dentry; | 152 | dentry = f.file->f_path.dentry; |
153 | inode = dentry->d_inode; | 153 | inode = dentry->d_inode; |
154 | error = -EINVAL; | 154 | error = -EINVAL; |
155 | if (!S_ISREG(inode->i_mode) || !(file->f_mode & FMODE_WRITE)) | 155 | if (!S_ISREG(inode->i_mode) || !(f.file->f_mode & FMODE_WRITE)) |
156 | goto out_putf; | 156 | goto out_putf; |
157 | 157 | ||
158 | error = -EINVAL; | 158 | error = -EINVAL; |
@@ -165,14 +165,14 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) | |||
165 | goto out_putf; | 165 | goto out_putf; |
166 | 166 | ||
167 | sb_start_write(inode->i_sb); | 167 | sb_start_write(inode->i_sb); |
168 | error = locks_verify_truncate(inode, file, length); | 168 | error = locks_verify_truncate(inode, f.file, length); |
169 | if (!error) | 169 | if (!error) |
170 | error = security_path_truncate(&file->f_path); | 170 | error = security_path_truncate(&f.file->f_path); |
171 | if (!error) | 171 | if (!error) |
172 | error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file); | 172 | error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, f.file); |
173 | sb_end_write(inode->i_sb); | 173 | sb_end_write(inode->i_sb); |
174 | out_putf: | 174 | out_putf: |
175 | fput(file); | 175 | fdput(f); |
176 | out: | 176 | out: |
177 | return error; | 177 | return error; |
178 | } | 178 | } |
@@ -276,15 +276,13 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len) | |||
276 | 276 | ||
277 | SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len) | 277 | SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len) |
278 | { | 278 | { |
279 | struct file *file; | 279 | struct fd f = fdget(fd); |
280 | int error = -EBADF; | 280 | int error = -EBADF; |
281 | 281 | ||
282 | file = fget(fd); | 282 | if (f.file) { |
283 | if (file) { | 283 | error = do_fallocate(f.file, mode, offset, len); |
284 | error = do_fallocate(file, mode, offset, len); | 284 | fdput(f); |
285 | fput(file); | ||
286 | } | 285 | } |
287 | |||
288 | return error; | 286 | return error; |
289 | } | 287 | } |
290 | 288 | ||
@@ -400,16 +398,15 @@ out: | |||
400 | 398 | ||
401 | SYSCALL_DEFINE1(fchdir, unsigned int, fd) | 399 | SYSCALL_DEFINE1(fchdir, unsigned int, fd) |
402 | { | 400 | { |
403 | struct file *file; | 401 | struct fd f = fdget_raw(fd); |
404 | struct inode *inode; | 402 | struct inode *inode; |
405 | int error, fput_needed; | 403 | int error = -EBADF; |
406 | 404 | ||
407 | error = -EBADF; | 405 | error = -EBADF; |
408 | file = fget_raw_light(fd, &fput_needed); | 406 | if (!f.file) |
409 | if (!file) | ||
410 | goto out; | 407 | goto out; |
411 | 408 | ||
412 | inode = file->f_path.dentry->d_inode; | 409 | inode = f.file->f_path.dentry->d_inode; |
413 | 410 | ||
414 | error = -ENOTDIR; | 411 | error = -ENOTDIR; |
415 | if (!S_ISDIR(inode->i_mode)) | 412 | if (!S_ISDIR(inode->i_mode)) |
@@ -417,9 +414,9 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd) | |||
417 | 414 | ||
418 | error = inode_permission(inode, MAY_EXEC | MAY_CHDIR); | 415 | error = inode_permission(inode, MAY_EXEC | MAY_CHDIR); |
419 | if (!error) | 416 | if (!error) |
420 | set_fs_pwd(current->fs, &file->f_path); | 417 | set_fs_pwd(current->fs, &f.file->f_path); |
421 | out_putf: | 418 | out_putf: |
422 | fput_light(file, fput_needed); | 419 | fdput(f); |
423 | out: | 420 | out: |
424 | return error; | 421 | return error; |
425 | } | 422 | } |
@@ -534,7 +531,7 @@ static int chown_common(struct path *path, uid_t user, gid_t group) | |||
534 | newattrs.ia_valid |= | 531 | newattrs.ia_valid |= |
535 | ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV; | 532 | ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV; |
536 | mutex_lock(&inode->i_mutex); | 533 | mutex_lock(&inode->i_mutex); |
537 | error = security_path_chown(path, user, group); | 534 | error = security_path_chown(path, uid, gid); |
538 | if (!error) | 535 | if (!error) |
539 | error = notify_change(path->dentry, &newattrs); | 536 | error = notify_change(path->dentry, &newattrs); |
540 | mutex_unlock(&inode->i_mutex); | 537 | mutex_unlock(&inode->i_mutex); |
@@ -582,23 +579,20 @@ SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group | |||
582 | 579 | ||
583 | SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group) | 580 | SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group) |
584 | { | 581 | { |
585 | struct file * file; | 582 | struct fd f = fdget(fd); |
586 | int error = -EBADF; | 583 | int error = -EBADF; |
587 | struct dentry * dentry; | ||
588 | 584 | ||
589 | file = fget(fd); | 585 | if (!f.file) |
590 | if (!file) | ||
591 | goto out; | 586 | goto out; |
592 | 587 | ||
593 | error = mnt_want_write_file(file); | 588 | error = mnt_want_write_file(f.file); |
594 | if (error) | 589 | if (error) |
595 | goto out_fput; | 590 | goto out_fput; |
596 | dentry = file->f_path.dentry; | 591 | audit_inode(NULL, f.file->f_path.dentry); |
597 | audit_inode(NULL, dentry); | 592 | error = chown_common(&f.file->f_path, user, group); |
598 | error = chown_common(&file->f_path, user, group); | 593 | mnt_drop_write_file(f.file); |
599 | mnt_drop_write_file(file); | ||
600 | out_fput: | 594 | out_fput: |
601 | fput(file); | 595 | fdput(f); |
602 | out: | 596 | out: |
603 | return error; | 597 | return error; |
604 | } | 598 | } |
@@ -803,61 +797,18 @@ struct file *dentry_open(const struct path *path, int flags, | |||
803 | } | 797 | } |
804 | EXPORT_SYMBOL(dentry_open); | 798 | EXPORT_SYMBOL(dentry_open); |
805 | 799 | ||
806 | static void __put_unused_fd(struct files_struct *files, unsigned int fd) | ||
807 | { | ||
808 | struct fdtable *fdt = files_fdtable(files); | ||
809 | __clear_open_fd(fd, fdt); | ||
810 | if (fd < files->next_fd) | ||
811 | files->next_fd = fd; | ||
812 | } | ||
813 | |||
814 | void put_unused_fd(unsigned int fd) | ||
815 | { | ||
816 | struct files_struct *files = current->files; | ||
817 | spin_lock(&files->file_lock); | ||
818 | __put_unused_fd(files, fd); | ||
819 | spin_unlock(&files->file_lock); | ||
820 | } | ||
821 | |||
822 | EXPORT_SYMBOL(put_unused_fd); | ||
823 | |||
824 | /* | ||
825 | * Install a file pointer in the fd array. | ||
826 | * | ||
827 | * The VFS is full of places where we drop the files lock between | ||
828 | * setting the open_fds bitmap and installing the file in the file | ||
829 | * array. At any such point, we are vulnerable to a dup2() race | ||
830 | * installing a file in the array before us. We need to detect this and | ||
831 | * fput() the struct file we are about to overwrite in this case. | ||
832 | * | ||
833 | * It should never happen - if we allow dup2() do it, _really_ bad things | ||
834 | * will follow. | ||
835 | */ | ||
836 | |||
837 | void fd_install(unsigned int fd, struct file *file) | ||
838 | { | ||
839 | struct files_struct *files = current->files; | ||
840 | struct fdtable *fdt; | ||
841 | spin_lock(&files->file_lock); | ||
842 | fdt = files_fdtable(files); | ||
843 | BUG_ON(fdt->fd[fd] != NULL); | ||
844 | rcu_assign_pointer(fdt->fd[fd], file); | ||
845 | spin_unlock(&files->file_lock); | ||
846 | } | ||
847 | |||
848 | EXPORT_SYMBOL(fd_install); | ||
849 | |||
850 | static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op) | 800 | static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op) |
851 | { | 801 | { |
852 | int lookup_flags = 0; | 802 | int lookup_flags = 0; |
853 | int acc_mode; | 803 | int acc_mode; |
854 | 804 | ||
855 | if (!(flags & O_CREAT)) | 805 | if (flags & O_CREAT) |
856 | mode = 0; | 806 | op->mode = (mode & S_IALLUGO) | S_IFREG; |
857 | op->mode = mode; | 807 | else |
808 | op->mode = 0; | ||
858 | 809 | ||
859 | /* Must never be set by userspace */ | 810 | /* Must never be set by userspace */ |
860 | flags &= ~FMODE_NONOTIFY; | 811 | flags &= ~FMODE_NONOTIFY & ~O_CLOEXEC; |
861 | 812 | ||
862 | /* | 813 | /* |
863 | * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only | 814 | * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only |
@@ -1037,23 +988,7 @@ EXPORT_SYMBOL(filp_close); | |||
1037 | */ | 988 | */ |
1038 | SYSCALL_DEFINE1(close, unsigned int, fd) | 989 | SYSCALL_DEFINE1(close, unsigned int, fd) |
1039 | { | 990 | { |
1040 | struct file * filp; | 991 | int retval = __close_fd(current->files, fd); |
1041 | struct files_struct *files = current->files; | ||
1042 | struct fdtable *fdt; | ||
1043 | int retval; | ||
1044 | |||
1045 | spin_lock(&files->file_lock); | ||
1046 | fdt = files_fdtable(files); | ||
1047 | if (fd >= fdt->max_fds) | ||
1048 | goto out_unlock; | ||
1049 | filp = fdt->fd[fd]; | ||
1050 | if (!filp) | ||
1051 | goto out_unlock; | ||
1052 | rcu_assign_pointer(fdt->fd[fd], NULL); | ||
1053 | __clear_close_on_exec(fd, fdt); | ||
1054 | __put_unused_fd(files, fd); | ||
1055 | spin_unlock(&files->file_lock); | ||
1056 | retval = filp_close(filp, files); | ||
1057 | 992 | ||
1058 | /* can't restart close syscall because file table entry was cleared */ | 993 | /* can't restart close syscall because file table entry was cleared */ |
1059 | if (unlikely(retval == -ERESTARTSYS || | 994 | if (unlikely(retval == -ERESTARTSYS || |
@@ -1063,10 +998,6 @@ SYSCALL_DEFINE1(close, unsigned int, fd) | |||
1063 | retval = -EINTR; | 998 | retval = -EINTR; |
1064 | 999 | ||
1065 | return retval; | 1000 | return retval; |
1066 | |||
1067 | out_unlock: | ||
1068 | spin_unlock(&files->file_lock); | ||
1069 | return -EBADF; | ||
1070 | } | 1001 | } |
1071 | EXPORT_SYMBOL(sys_close); | 1002 | EXPORT_SYMBOL(sys_close); |
1072 | 1003 | ||
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index 4a3477949bca..2ad080faca34 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c | |||
@@ -463,6 +463,11 @@ static int __init init_openprom_fs(void) | |||
463 | static void __exit exit_openprom_fs(void) | 463 | static void __exit exit_openprom_fs(void) |
464 | { | 464 | { |
465 | unregister_filesystem(&openprom_fs_type); | 465 | unregister_filesystem(&openprom_fs_type); |
466 | /* | ||
467 | * Make sure all delayed rcu free inodes are flushed before we | ||
468 | * destroy cache. | ||
469 | */ | ||
470 | rcu_barrier(); | ||
466 | kmem_cache_destroy(op_inode_cachep); | 471 | kmem_cache_destroy(op_inode_cachep); |
467 | } | 472 | } |
468 | 473 | ||
@@ -1064,9 +1064,8 @@ err_inode: | |||
1064 | return err; | 1064 | return err; |
1065 | } | 1065 | } |
1066 | 1066 | ||
1067 | int do_pipe_flags(int *fd, int flags) | 1067 | static int __do_pipe_flags(int *fd, struct file **files, int flags) |
1068 | { | 1068 | { |
1069 | struct file *files[2]; | ||
1070 | int error; | 1069 | int error; |
1071 | int fdw, fdr; | 1070 | int fdw, fdr; |
1072 | 1071 | ||
@@ -1088,11 +1087,8 @@ int do_pipe_flags(int *fd, int flags) | |||
1088 | fdw = error; | 1087 | fdw = error; |
1089 | 1088 | ||
1090 | audit_fd_pair(fdr, fdw); | 1089 | audit_fd_pair(fdr, fdw); |
1091 | fd_install(fdr, files[0]); | ||
1092 | fd_install(fdw, files[1]); | ||
1093 | fd[0] = fdr; | 1090 | fd[0] = fdr; |
1094 | fd[1] = fdw; | 1091 | fd[1] = fdw; |
1095 | |||
1096 | return 0; | 1092 | return 0; |
1097 | 1093 | ||
1098 | err_fdr: | 1094 | err_fdr: |
@@ -1103,21 +1099,38 @@ int do_pipe_flags(int *fd, int flags) | |||
1103 | return error; | 1099 | return error; |
1104 | } | 1100 | } |
1105 | 1101 | ||
1102 | int do_pipe_flags(int *fd, int flags) | ||
1103 | { | ||
1104 | struct file *files[2]; | ||
1105 | int error = __do_pipe_flags(fd, files, flags); | ||
1106 | if (!error) { | ||
1107 | fd_install(fd[0], files[0]); | ||
1108 | fd_install(fd[1], files[1]); | ||
1109 | } | ||
1110 | return error; | ||
1111 | } | ||
1112 | |||
1106 | /* | 1113 | /* |
1107 | * sys_pipe() is the normal C calling standard for creating | 1114 | * sys_pipe() is the normal C calling standard for creating |
1108 | * a pipe. It's not the way Unix traditionally does this, though. | 1115 | * a pipe. It's not the way Unix traditionally does this, though. |
1109 | */ | 1116 | */ |
1110 | SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags) | 1117 | SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags) |
1111 | { | 1118 | { |
1119 | struct file *files[2]; | ||
1112 | int fd[2]; | 1120 | int fd[2]; |
1113 | int error; | 1121 | int error; |
1114 | 1122 | ||
1115 | error = do_pipe_flags(fd, flags); | 1123 | error = __do_pipe_flags(fd, files, flags); |
1116 | if (!error) { | 1124 | if (!error) { |
1117 | if (copy_to_user(fildes, fd, sizeof(fd))) { | 1125 | if (unlikely(copy_to_user(fildes, fd, sizeof(fd)))) { |
1118 | sys_close(fd[0]); | 1126 | fput(files[0]); |
1119 | sys_close(fd[1]); | 1127 | fput(files[1]); |
1128 | put_unused_fd(fd[0]); | ||
1129 | put_unused_fd(fd[1]); | ||
1120 | error = -EFAULT; | 1130 | error = -EFAULT; |
1131 | } else { | ||
1132 | fd_install(fd[0], files[0]); | ||
1133 | fd_install(fd[1], files[1]); | ||
1121 | } | 1134 | } |
1122 | } | 1135 | } |
1123 | return error; | 1136 | return error; |
diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 5e325a42e33d..8bd2135b7f82 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c | |||
@@ -78,7 +78,8 @@ posix_acl_valid(const struct posix_acl *acl) | |||
78 | { | 78 | { |
79 | const struct posix_acl_entry *pa, *pe; | 79 | const struct posix_acl_entry *pa, *pe; |
80 | int state = ACL_USER_OBJ; | 80 | int state = ACL_USER_OBJ; |
81 | unsigned int id = 0; /* keep gcc happy */ | 81 | kuid_t prev_uid = INVALID_UID; |
82 | kgid_t prev_gid = INVALID_GID; | ||
82 | int needs_mask = 0; | 83 | int needs_mask = 0; |
83 | 84 | ||
84 | FOREACH_ACL_ENTRY(pa, acl, pe) { | 85 | FOREACH_ACL_ENTRY(pa, acl, pe) { |
@@ -87,7 +88,6 @@ posix_acl_valid(const struct posix_acl *acl) | |||
87 | switch (pa->e_tag) { | 88 | switch (pa->e_tag) { |
88 | case ACL_USER_OBJ: | 89 | case ACL_USER_OBJ: |
89 | if (state == ACL_USER_OBJ) { | 90 | if (state == ACL_USER_OBJ) { |
90 | id = 0; | ||
91 | state = ACL_USER; | 91 | state = ACL_USER; |
92 | break; | 92 | break; |
93 | } | 93 | } |
@@ -96,16 +96,17 @@ posix_acl_valid(const struct posix_acl *acl) | |||
96 | case ACL_USER: | 96 | case ACL_USER: |
97 | if (state != ACL_USER) | 97 | if (state != ACL_USER) |
98 | return -EINVAL; | 98 | return -EINVAL; |
99 | if (pa->e_id == ACL_UNDEFINED_ID || | 99 | if (!uid_valid(pa->e_uid)) |
100 | pa->e_id < id) | ||
101 | return -EINVAL; | 100 | return -EINVAL; |
102 | id = pa->e_id + 1; | 101 | if (uid_valid(prev_uid) && |
102 | uid_lte(pa->e_uid, prev_uid)) | ||
103 | return -EINVAL; | ||
104 | prev_uid = pa->e_uid; | ||
103 | needs_mask = 1; | 105 | needs_mask = 1; |
104 | break; | 106 | break; |
105 | 107 | ||
106 | case ACL_GROUP_OBJ: | 108 | case ACL_GROUP_OBJ: |
107 | if (state == ACL_USER) { | 109 | if (state == ACL_USER) { |
108 | id = 0; | ||
109 | state = ACL_GROUP; | 110 | state = ACL_GROUP; |
110 | break; | 111 | break; |
111 | } | 112 | } |
@@ -114,10 +115,12 @@ posix_acl_valid(const struct posix_acl *acl) | |||
114 | case ACL_GROUP: | 115 | case ACL_GROUP: |
115 | if (state != ACL_GROUP) | 116 | if (state != ACL_GROUP) |
116 | return -EINVAL; | 117 | return -EINVAL; |
117 | if (pa->e_id == ACL_UNDEFINED_ID || | 118 | if (!gid_valid(pa->e_gid)) |
118 | pa->e_id < id) | 119 | return -EINVAL; |
120 | if (gid_valid(prev_gid) && | ||
121 | gid_lte(pa->e_gid, prev_gid)) | ||
119 | return -EINVAL; | 122 | return -EINVAL; |
120 | id = pa->e_id + 1; | 123 | prev_gid = pa->e_gid; |
121 | needs_mask = 1; | 124 | needs_mask = 1; |
122 | break; | 125 | break; |
123 | 126 | ||
@@ -195,15 +198,12 @@ posix_acl_from_mode(umode_t mode, gfp_t flags) | |||
195 | return ERR_PTR(-ENOMEM); | 198 | return ERR_PTR(-ENOMEM); |
196 | 199 | ||
197 | acl->a_entries[0].e_tag = ACL_USER_OBJ; | 200 | acl->a_entries[0].e_tag = ACL_USER_OBJ; |
198 | acl->a_entries[0].e_id = ACL_UNDEFINED_ID; | ||
199 | acl->a_entries[0].e_perm = (mode & S_IRWXU) >> 6; | 201 | acl->a_entries[0].e_perm = (mode & S_IRWXU) >> 6; |
200 | 202 | ||
201 | acl->a_entries[1].e_tag = ACL_GROUP_OBJ; | 203 | acl->a_entries[1].e_tag = ACL_GROUP_OBJ; |
202 | acl->a_entries[1].e_id = ACL_UNDEFINED_ID; | ||
203 | acl->a_entries[1].e_perm = (mode & S_IRWXG) >> 3; | 204 | acl->a_entries[1].e_perm = (mode & S_IRWXG) >> 3; |
204 | 205 | ||
205 | acl->a_entries[2].e_tag = ACL_OTHER; | 206 | acl->a_entries[2].e_tag = ACL_OTHER; |
206 | acl->a_entries[2].e_id = ACL_UNDEFINED_ID; | ||
207 | acl->a_entries[2].e_perm = (mode & S_IRWXO); | 207 | acl->a_entries[2].e_perm = (mode & S_IRWXO); |
208 | return acl; | 208 | return acl; |
209 | } | 209 | } |
@@ -224,11 +224,11 @@ posix_acl_permission(struct inode *inode, const struct posix_acl *acl, int want) | |||
224 | switch(pa->e_tag) { | 224 | switch(pa->e_tag) { |
225 | case ACL_USER_OBJ: | 225 | case ACL_USER_OBJ: |
226 | /* (May have been checked already) */ | 226 | /* (May have been checked already) */ |
227 | if (inode->i_uid == current_fsuid()) | 227 | if (uid_eq(inode->i_uid, current_fsuid())) |
228 | goto check_perm; | 228 | goto check_perm; |
229 | break; | 229 | break; |
230 | case ACL_USER: | 230 | case ACL_USER: |
231 | if (pa->e_id == current_fsuid()) | 231 | if (uid_eq(pa->e_uid, current_fsuid())) |
232 | goto mask; | 232 | goto mask; |
233 | break; | 233 | break; |
234 | case ACL_GROUP_OBJ: | 234 | case ACL_GROUP_OBJ: |
@@ -239,7 +239,7 @@ posix_acl_permission(struct inode *inode, const struct posix_acl *acl, int want) | |||
239 | } | 239 | } |
240 | break; | 240 | break; |
241 | case ACL_GROUP: | 241 | case ACL_GROUP: |
242 | if (in_group_p(pa->e_id)) { | 242 | if (in_group_p(pa->e_gid)) { |
243 | found = 1; | 243 | found = 1; |
244 | if ((pa->e_perm & want) == want) | 244 | if ((pa->e_perm & want) == want) |
245 | goto mask; | 245 | goto mask; |
diff --git a/fs/proc/Makefile b/fs/proc/Makefile index c1c729335924..99349efbbc2b 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile | |||
@@ -8,7 +8,7 @@ proc-y := nommu.o task_nommu.o | |||
8 | proc-$(CONFIG_MMU) := mmu.o task_mmu.o | 8 | proc-$(CONFIG_MMU) := mmu.o task_mmu.o |
9 | 9 | ||
10 | proc-y += inode.o root.o base.o generic.o array.o \ | 10 | proc-y += inode.o root.o base.o generic.o array.o \ |
11 | proc_tty.o | 11 | proc_tty.o fd.o |
12 | proc-y += cmdline.o | 12 | proc-y += cmdline.o |
13 | proc-y += consoles.o | 13 | proc-y += consoles.o |
14 | proc-y += cpuinfo.o | 14 | proc-y += cpuinfo.o |
diff --git a/fs/proc/base.c b/fs/proc/base.c index 1b6c84cbdb73..ef5c84be66f9 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -90,6 +90,7 @@ | |||
90 | #endif | 90 | #endif |
91 | #include <trace/events/oom.h> | 91 | #include <trace/events/oom.h> |
92 | #include "internal.h" | 92 | #include "internal.h" |
93 | #include "fd.h" | ||
93 | 94 | ||
94 | /* NOTE: | 95 | /* NOTE: |
95 | * Implementing inode permission operations in /proc is almost | 96 | * Implementing inode permission operations in /proc is almost |
@@ -136,8 +137,6 @@ struct pid_entry { | |||
136 | NULL, &proc_single_file_operations, \ | 137 | NULL, &proc_single_file_operations, \ |
137 | { .proc_show = show } ) | 138 | { .proc_show = show } ) |
138 | 139 | ||
139 | static int proc_fd_permission(struct inode *inode, int mask); | ||
140 | |||
141 | /* | 140 | /* |
142 | * Count the number of hardlinks for the pid_entry table, excluding the . | 141 | * Count the number of hardlinks for the pid_entry table, excluding the . |
143 | * and .. links. | 142 | * and .. links. |
@@ -874,111 +873,6 @@ static const struct file_operations proc_environ_operations = { | |||
874 | .release = mem_release, | 873 | .release = mem_release, |
875 | }; | 874 | }; |
876 | 875 | ||
877 | static ssize_t oom_adjust_read(struct file *file, char __user *buf, | ||
878 | size_t count, loff_t *ppos) | ||
879 | { | ||
880 | struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); | ||
881 | char buffer[PROC_NUMBUF]; | ||
882 | size_t len; | ||
883 | int oom_adjust = OOM_DISABLE; | ||
884 | unsigned long flags; | ||
885 | |||
886 | if (!task) | ||
887 | return -ESRCH; | ||
888 | |||
889 | if (lock_task_sighand(task, &flags)) { | ||
890 | oom_adjust = task->signal->oom_adj; | ||
891 | unlock_task_sighand(task, &flags); | ||
892 | } | ||
893 | |||
894 | put_task_struct(task); | ||
895 | |||
896 | len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust); | ||
897 | |||
898 | return simple_read_from_buffer(buf, count, ppos, buffer, len); | ||
899 | } | ||
900 | |||
901 | static ssize_t oom_adjust_write(struct file *file, const char __user *buf, | ||
902 | size_t count, loff_t *ppos) | ||
903 | { | ||
904 | struct task_struct *task; | ||
905 | char buffer[PROC_NUMBUF]; | ||
906 | int oom_adjust; | ||
907 | unsigned long flags; | ||
908 | int err; | ||
909 | |||
910 | memset(buffer, 0, sizeof(buffer)); | ||
911 | if (count > sizeof(buffer) - 1) | ||
912 | count = sizeof(buffer) - 1; | ||
913 | if (copy_from_user(buffer, buf, count)) { | ||
914 | err = -EFAULT; | ||
915 | goto out; | ||
916 | } | ||
917 | |||
918 | err = kstrtoint(strstrip(buffer), 0, &oom_adjust); | ||
919 | if (err) | ||
920 | goto out; | ||
921 | if ((oom_adjust < OOM_ADJUST_MIN || oom_adjust > OOM_ADJUST_MAX) && | ||
922 | oom_adjust != OOM_DISABLE) { | ||
923 | err = -EINVAL; | ||
924 | goto out; | ||
925 | } | ||
926 | |||
927 | task = get_proc_task(file->f_path.dentry->d_inode); | ||
928 | if (!task) { | ||
929 | err = -ESRCH; | ||
930 | goto out; | ||
931 | } | ||
932 | |||
933 | task_lock(task); | ||
934 | if (!task->mm) { | ||
935 | err = -EINVAL; | ||
936 | goto err_task_lock; | ||
937 | } | ||
938 | |||
939 | if (!lock_task_sighand(task, &flags)) { | ||
940 | err = -ESRCH; | ||
941 | goto err_task_lock; | ||
942 | } | ||
943 | |||
944 | if (oom_adjust < task->signal->oom_adj && !capable(CAP_SYS_RESOURCE)) { | ||
945 | err = -EACCES; | ||
946 | goto err_sighand; | ||
947 | } | ||
948 | |||
949 | /* | ||
950 | * Warn that /proc/pid/oom_adj is deprecated, see | ||
951 | * Documentation/feature-removal-schedule.txt. | ||
952 | */ | ||
953 | printk_once(KERN_WARNING "%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n", | ||
954 | current->comm, task_pid_nr(current), task_pid_nr(task), | ||
955 | task_pid_nr(task)); | ||
956 | task->signal->oom_adj = oom_adjust; | ||
957 | /* | ||
958 | * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum | ||
959 | * value is always attainable. | ||
960 | */ | ||
961 | if (task->signal->oom_adj == OOM_ADJUST_MAX) | ||
962 | task->signal->oom_score_adj = OOM_SCORE_ADJ_MAX; | ||
963 | else | ||
964 | task->signal->oom_score_adj = (oom_adjust * OOM_SCORE_ADJ_MAX) / | ||
965 | -OOM_DISABLE; | ||
966 | trace_oom_score_adj_update(task); | ||
967 | err_sighand: | ||
968 | unlock_task_sighand(task, &flags); | ||
969 | err_task_lock: | ||
970 | task_unlock(task); | ||
971 | put_task_struct(task); | ||
972 | out: | ||
973 | return err < 0 ? err : count; | ||
974 | } | ||
975 | |||
976 | static const struct file_operations proc_oom_adjust_operations = { | ||
977 | .read = oom_adjust_read, | ||
978 | .write = oom_adjust_write, | ||
979 | .llseek = generic_file_llseek, | ||
980 | }; | ||
981 | |||
982 | static ssize_t oom_score_adj_read(struct file *file, char __user *buf, | 876 | static ssize_t oom_score_adj_read(struct file *file, char __user *buf, |
983 | size_t count, loff_t *ppos) | 877 | size_t count, loff_t *ppos) |
984 | { | 878 | { |
@@ -1052,15 +946,7 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf, | |||
1052 | if (has_capability_noaudit(current, CAP_SYS_RESOURCE)) | 946 | if (has_capability_noaudit(current, CAP_SYS_RESOURCE)) |
1053 | task->signal->oom_score_adj_min = oom_score_adj; | 947 | task->signal->oom_score_adj_min = oom_score_adj; |
1054 | trace_oom_score_adj_update(task); | 948 | trace_oom_score_adj_update(task); |
1055 | /* | 949 | |
1056 | * Scale /proc/pid/oom_adj appropriately ensuring that OOM_DISABLE is | ||
1057 | * always attainable. | ||
1058 | */ | ||
1059 | if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) | ||
1060 | task->signal->oom_adj = OOM_DISABLE; | ||
1061 | else | ||
1062 | task->signal->oom_adj = (oom_score_adj * OOM_ADJUST_MAX) / | ||
1063 | OOM_SCORE_ADJ_MAX; | ||
1064 | err_sighand: | 950 | err_sighand: |
1065 | unlock_task_sighand(task, &flags); | 951 | unlock_task_sighand(task, &flags); |
1066 | err_task_lock: | 952 | err_task_lock: |
@@ -1089,7 +975,8 @@ static ssize_t proc_loginuid_read(struct file * file, char __user * buf, | |||
1089 | if (!task) | 975 | if (!task) |
1090 | return -ESRCH; | 976 | return -ESRCH; |
1091 | length = scnprintf(tmpbuf, TMPBUFLEN, "%u", | 977 | length = scnprintf(tmpbuf, TMPBUFLEN, "%u", |
1092 | audit_get_loginuid(task)); | 978 | from_kuid(file->f_cred->user_ns, |
979 | audit_get_loginuid(task))); | ||
1093 | put_task_struct(task); | 980 | put_task_struct(task); |
1094 | return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); | 981 | return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); |
1095 | } | 982 | } |
@@ -1101,6 +988,7 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, | |||
1101 | char *page, *tmp; | 988 | char *page, *tmp; |
1102 | ssize_t length; | 989 | ssize_t length; |
1103 | uid_t loginuid; | 990 | uid_t loginuid; |
991 | kuid_t kloginuid; | ||
1104 | 992 | ||
1105 | rcu_read_lock(); | 993 | rcu_read_lock(); |
1106 | if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) { | 994 | if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) { |
@@ -1130,7 +1018,13 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, | |||
1130 | goto out_free_page; | 1018 | goto out_free_page; |
1131 | 1019 | ||
1132 | } | 1020 | } |
1133 | length = audit_set_loginuid(loginuid); | 1021 | kloginuid = make_kuid(file->f_cred->user_ns, loginuid); |
1022 | if (!uid_valid(kloginuid)) { | ||
1023 | length = -EINVAL; | ||
1024 | goto out_free_page; | ||
1025 | } | ||
1026 | |||
1027 | length = audit_set_loginuid(kloginuid); | ||
1134 | if (likely(length == 0)) | 1028 | if (likely(length == 0)) |
1135 | length = count; | 1029 | length = count; |
1136 | 1030 | ||
@@ -1492,7 +1386,7 @@ out: | |||
1492 | return error; | 1386 | return error; |
1493 | } | 1387 | } |
1494 | 1388 | ||
1495 | static const struct inode_operations proc_pid_link_inode_operations = { | 1389 | const struct inode_operations proc_pid_link_inode_operations = { |
1496 | .readlink = proc_pid_readlink, | 1390 | .readlink = proc_pid_readlink, |
1497 | .follow_link = proc_pid_follow_link, | 1391 | .follow_link = proc_pid_follow_link, |
1498 | .setattr = proc_setattr, | 1392 | .setattr = proc_setattr, |
@@ -1501,21 +1395,6 @@ static const struct inode_operations proc_pid_link_inode_operations = { | |||
1501 | 1395 | ||
1502 | /* building an inode */ | 1396 | /* building an inode */ |
1503 | 1397 | ||
1504 | static int task_dumpable(struct task_struct *task) | ||
1505 | { | ||
1506 | int dumpable = 0; | ||
1507 | struct mm_struct *mm; | ||
1508 | |||
1509 | task_lock(task); | ||
1510 | mm = task->mm; | ||
1511 | if (mm) | ||
1512 | dumpable = get_dumpable(mm); | ||
1513 | task_unlock(task); | ||
1514 | if(dumpable == 1) | ||
1515 | return 1; | ||
1516 | return 0; | ||
1517 | } | ||
1518 | |||
1519 | struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task) | 1398 | struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task) |
1520 | { | 1399 | { |
1521 | struct inode * inode; | 1400 | struct inode * inode; |
@@ -1641,15 +1520,6 @@ int pid_revalidate(struct dentry *dentry, unsigned int flags) | |||
1641 | return 0; | 1520 | return 0; |
1642 | } | 1521 | } |
1643 | 1522 | ||
1644 | static int pid_delete_dentry(const struct dentry * dentry) | ||
1645 | { | ||
1646 | /* Is the task we represent dead? | ||
1647 | * If so, then don't put the dentry on the lru list, | ||
1648 | * kill it immediately. | ||
1649 | */ | ||
1650 | return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first; | ||
1651 | } | ||
1652 | |||
1653 | const struct dentry_operations pid_dentry_operations = | 1523 | const struct dentry_operations pid_dentry_operations = |
1654 | { | 1524 | { |
1655 | .d_revalidate = pid_revalidate, | 1525 | .d_revalidate = pid_revalidate, |
@@ -1712,289 +1582,6 @@ end_instantiate: | |||
1712 | return filldir(dirent, name, len, filp->f_pos, ino, type); | 1582 | return filldir(dirent, name, len, filp->f_pos, ino, type); |
1713 | } | 1583 | } |
1714 | 1584 | ||
1715 | static unsigned name_to_int(struct dentry *dentry) | ||
1716 | { | ||
1717 | const char *name = dentry->d_name.name; | ||
1718 | int len = dentry->d_name.len; | ||
1719 | unsigned n = 0; | ||
1720 | |||
1721 | if (len > 1 && *name == '0') | ||
1722 | goto out; | ||
1723 | while (len-- > 0) { | ||
1724 | unsigned c = *name++ - '0'; | ||
1725 | if (c > 9) | ||
1726 | goto out; | ||
1727 | if (n >= (~0U-9)/10) | ||
1728 | goto out; | ||
1729 | n *= 10; | ||
1730 | n += c; | ||
1731 | } | ||
1732 | return n; | ||
1733 | out: | ||
1734 | return ~0U; | ||
1735 | } | ||
1736 | |||
1737 | #define PROC_FDINFO_MAX 64 | ||
1738 | |||
1739 | static int proc_fd_info(struct inode *inode, struct path *path, char *info) | ||
1740 | { | ||
1741 | struct task_struct *task = get_proc_task(inode); | ||
1742 | struct files_struct *files = NULL; | ||
1743 | struct file *file; | ||
1744 | int fd = proc_fd(inode); | ||
1745 | |||
1746 | if (task) { | ||
1747 | files = get_files_struct(task); | ||
1748 | put_task_struct(task); | ||
1749 | } | ||
1750 | if (files) { | ||
1751 | /* | ||
1752 | * We are not taking a ref to the file structure, so we must | ||
1753 | * hold ->file_lock. | ||
1754 | */ | ||
1755 | spin_lock(&files->file_lock); | ||
1756 | file = fcheck_files(files, fd); | ||
1757 | if (file) { | ||
1758 | unsigned int f_flags; | ||
1759 | struct fdtable *fdt; | ||
1760 | |||
1761 | fdt = files_fdtable(files); | ||
1762 | f_flags = file->f_flags & ~O_CLOEXEC; | ||
1763 | if (close_on_exec(fd, fdt)) | ||
1764 | f_flags |= O_CLOEXEC; | ||
1765 | |||
1766 | if (path) { | ||
1767 | *path = file->f_path; | ||
1768 | path_get(&file->f_path); | ||
1769 | } | ||
1770 | if (info) | ||
1771 | snprintf(info, PROC_FDINFO_MAX, | ||
1772 | "pos:\t%lli\n" | ||
1773 | "flags:\t0%o\n", | ||
1774 | (long long) file->f_pos, | ||
1775 | f_flags); | ||
1776 | spin_unlock(&files->file_lock); | ||
1777 | put_files_struct(files); | ||
1778 | return 0; | ||
1779 | } | ||
1780 | spin_unlock(&files->file_lock); | ||
1781 | put_files_struct(files); | ||
1782 | } | ||
1783 | return -ENOENT; | ||
1784 | } | ||
1785 | |||
1786 | static int proc_fd_link(struct dentry *dentry, struct path *path) | ||
1787 | { | ||
1788 | return proc_fd_info(dentry->d_inode, path, NULL); | ||
1789 | } | ||
1790 | |||
1791 | static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags) | ||
1792 | { | ||
1793 | struct inode *inode; | ||
1794 | struct task_struct *task; | ||
1795 | int fd; | ||
1796 | struct files_struct *files; | ||
1797 | const struct cred *cred; | ||
1798 | |||
1799 | if (flags & LOOKUP_RCU) | ||
1800 | return -ECHILD; | ||
1801 | |||
1802 | inode = dentry->d_inode; | ||
1803 | task = get_proc_task(inode); | ||
1804 | fd = proc_fd(inode); | ||
1805 | |||
1806 | if (task) { | ||
1807 | files = get_files_struct(task); | ||
1808 | if (files) { | ||
1809 | struct file *file; | ||
1810 | rcu_read_lock(); | ||
1811 | file = fcheck_files(files, fd); | ||
1812 | if (file) { | ||
1813 | unsigned f_mode = file->f_mode; | ||
1814 | |||
1815 | rcu_read_unlock(); | ||
1816 | put_files_struct(files); | ||
1817 | |||
1818 | if (task_dumpable(task)) { | ||
1819 | rcu_read_lock(); | ||
1820 | cred = __task_cred(task); | ||
1821 | inode->i_uid = cred->euid; | ||
1822 | inode->i_gid = cred->egid; | ||
1823 | rcu_read_unlock(); | ||
1824 | } else { | ||
1825 | inode->i_uid = GLOBAL_ROOT_UID; | ||
1826 | inode->i_gid = GLOBAL_ROOT_GID; | ||
1827 | } | ||
1828 | |||
1829 | if (S_ISLNK(inode->i_mode)) { | ||
1830 | unsigned i_mode = S_IFLNK; | ||
1831 | if (f_mode & FMODE_READ) | ||
1832 | i_mode |= S_IRUSR | S_IXUSR; | ||
1833 | if (f_mode & FMODE_WRITE) | ||
1834 | i_mode |= S_IWUSR | S_IXUSR; | ||
1835 | inode->i_mode = i_mode; | ||
1836 | } | ||
1837 | |||
1838 | security_task_to_inode(task, inode); | ||
1839 | put_task_struct(task); | ||
1840 | return 1; | ||
1841 | } | ||
1842 | rcu_read_unlock(); | ||
1843 | put_files_struct(files); | ||
1844 | } | ||
1845 | put_task_struct(task); | ||
1846 | } | ||
1847 | d_drop(dentry); | ||
1848 | return 0; | ||
1849 | } | ||
1850 | |||
1851 | static const struct dentry_operations tid_fd_dentry_operations = | ||
1852 | { | ||
1853 | .d_revalidate = tid_fd_revalidate, | ||
1854 | .d_delete = pid_delete_dentry, | ||
1855 | }; | ||
1856 | |||
1857 | static struct dentry *proc_fd_instantiate(struct inode *dir, | ||
1858 | struct dentry *dentry, struct task_struct *task, const void *ptr) | ||
1859 | { | ||
1860 | unsigned fd = (unsigned long)ptr; | ||
1861 | struct inode *inode; | ||
1862 | struct proc_inode *ei; | ||
1863 | struct dentry *error = ERR_PTR(-ENOENT); | ||
1864 | |||
1865 | inode = proc_pid_make_inode(dir->i_sb, task); | ||
1866 | if (!inode) | ||
1867 | goto out; | ||
1868 | ei = PROC_I(inode); | ||
1869 | ei->fd = fd; | ||
1870 | |||
1871 | inode->i_mode = S_IFLNK; | ||
1872 | inode->i_op = &proc_pid_link_inode_operations; | ||
1873 | inode->i_size = 64; | ||
1874 | ei->op.proc_get_link = proc_fd_link; | ||
1875 | d_set_d_op(dentry, &tid_fd_dentry_operations); | ||
1876 | d_add(dentry, inode); | ||
1877 | /* Close the race of the process dying before we return the dentry */ | ||
1878 | if (tid_fd_revalidate(dentry, 0)) | ||
1879 | error = NULL; | ||
1880 | |||
1881 | out: | ||
1882 | return error; | ||
1883 | } | ||
1884 | |||
1885 | static struct dentry *proc_lookupfd_common(struct inode *dir, | ||
1886 | struct dentry *dentry, | ||
1887 | instantiate_t instantiate) | ||
1888 | { | ||
1889 | struct task_struct *task = get_proc_task(dir); | ||
1890 | unsigned fd = name_to_int(dentry); | ||
1891 | struct dentry *result = ERR_PTR(-ENOENT); | ||
1892 | |||
1893 | if (!task) | ||
1894 | goto out_no_task; | ||
1895 | if (fd == ~0U) | ||
1896 | goto out; | ||
1897 | |||
1898 | result = instantiate(dir, dentry, task, (void *)(unsigned long)fd); | ||
1899 | out: | ||
1900 | put_task_struct(task); | ||
1901 | out_no_task: | ||
1902 | return result; | ||
1903 | } | ||
1904 | |||
1905 | static int proc_readfd_common(struct file * filp, void * dirent, | ||
1906 | filldir_t filldir, instantiate_t instantiate) | ||
1907 | { | ||
1908 | struct dentry *dentry = filp->f_path.dentry; | ||
1909 | struct inode *inode = dentry->d_inode; | ||
1910 | struct task_struct *p = get_proc_task(inode); | ||
1911 | unsigned int fd, ino; | ||
1912 | int retval; | ||
1913 | struct files_struct * files; | ||
1914 | |||
1915 | retval = -ENOENT; | ||
1916 | if (!p) | ||
1917 | goto out_no_task; | ||
1918 | retval = 0; | ||
1919 | |||
1920 | fd = filp->f_pos; | ||
1921 | switch (fd) { | ||
1922 | case 0: | ||
1923 | if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0) | ||
1924 | goto out; | ||
1925 | filp->f_pos++; | ||
1926 | case 1: | ||
1927 | ino = parent_ino(dentry); | ||
1928 | if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) | ||
1929 | goto out; | ||
1930 | filp->f_pos++; | ||
1931 | default: | ||
1932 | files = get_files_struct(p); | ||
1933 | if (!files) | ||
1934 | goto out; | ||
1935 | rcu_read_lock(); | ||
1936 | for (fd = filp->f_pos-2; | ||
1937 | fd < files_fdtable(files)->max_fds; | ||
1938 | fd++, filp->f_pos++) { | ||
1939 | char name[PROC_NUMBUF]; | ||
1940 | int len; | ||
1941 | int rv; | ||
1942 | |||
1943 | if (!fcheck_files(files, fd)) | ||
1944 | continue; | ||
1945 | rcu_read_unlock(); | ||
1946 | |||
1947 | len = snprintf(name, sizeof(name), "%d", fd); | ||
1948 | rv = proc_fill_cache(filp, dirent, filldir, | ||
1949 | name, len, instantiate, p, | ||
1950 | (void *)(unsigned long)fd); | ||
1951 | if (rv < 0) | ||
1952 | goto out_fd_loop; | ||
1953 | rcu_read_lock(); | ||
1954 | } | ||
1955 | rcu_read_unlock(); | ||
1956 | out_fd_loop: | ||
1957 | put_files_struct(files); | ||
1958 | } | ||
1959 | out: | ||
1960 | put_task_struct(p); | ||
1961 | out_no_task: | ||
1962 | return retval; | ||
1963 | } | ||
1964 | |||
1965 | static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry, | ||
1966 | unsigned int flags) | ||
1967 | { | ||
1968 | return proc_lookupfd_common(dir, dentry, proc_fd_instantiate); | ||
1969 | } | ||
1970 | |||
1971 | static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir) | ||
1972 | { | ||
1973 | return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate); | ||
1974 | } | ||
1975 | |||
1976 | static ssize_t proc_fdinfo_read(struct file *file, char __user *buf, | ||
1977 | size_t len, loff_t *ppos) | ||
1978 | { | ||
1979 | char tmp[PROC_FDINFO_MAX]; | ||
1980 | int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, tmp); | ||
1981 | if (!err) | ||
1982 | err = simple_read_from_buffer(buf, len, ppos, tmp, strlen(tmp)); | ||
1983 | return err; | ||
1984 | } | ||
1985 | |||
1986 | static const struct file_operations proc_fdinfo_file_operations = { | ||
1987 | .open = nonseekable_open, | ||
1988 | .read = proc_fdinfo_read, | ||
1989 | .llseek = no_llseek, | ||
1990 | }; | ||
1991 | |||
1992 | static const struct file_operations proc_fd_operations = { | ||
1993 | .read = generic_read_dir, | ||
1994 | .readdir = proc_readfd, | ||
1995 | .llseek = default_llseek, | ||
1996 | }; | ||
1997 | |||
1998 | #ifdef CONFIG_CHECKPOINT_RESTORE | 1585 | #ifdef CONFIG_CHECKPOINT_RESTORE |
1999 | 1586 | ||
2000 | /* | 1587 | /* |
@@ -2113,7 +1700,7 @@ out: | |||
2113 | } | 1700 | } |
2114 | 1701 | ||
2115 | struct map_files_info { | 1702 | struct map_files_info { |
2116 | struct file *file; | 1703 | fmode_t mode; |
2117 | unsigned long len; | 1704 | unsigned long len; |
2118 | unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */ | 1705 | unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */ |
2119 | }; | 1706 | }; |
@@ -2122,13 +1709,10 @@ static struct dentry * | |||
2122 | proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, | 1709 | proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, |
2123 | struct task_struct *task, const void *ptr) | 1710 | struct task_struct *task, const void *ptr) |
2124 | { | 1711 | { |
2125 | const struct file *file = ptr; | 1712 | fmode_t mode = (fmode_t)(unsigned long)ptr; |
2126 | struct proc_inode *ei; | 1713 | struct proc_inode *ei; |
2127 | struct inode *inode; | 1714 | struct inode *inode; |
2128 | 1715 | ||
2129 | if (!file) | ||
2130 | return ERR_PTR(-ENOENT); | ||
2131 | |||
2132 | inode = proc_pid_make_inode(dir->i_sb, task); | 1716 | inode = proc_pid_make_inode(dir->i_sb, task); |
2133 | if (!inode) | 1717 | if (!inode) |
2134 | return ERR_PTR(-ENOENT); | 1718 | return ERR_PTR(-ENOENT); |
@@ -2140,9 +1724,9 @@ proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, | |||
2140 | inode->i_size = 64; | 1724 | inode->i_size = 64; |
2141 | inode->i_mode = S_IFLNK; | 1725 | inode->i_mode = S_IFLNK; |
2142 | 1726 | ||
2143 | if (file->f_mode & FMODE_READ) | 1727 | if (mode & FMODE_READ) |
2144 | inode->i_mode |= S_IRUSR; | 1728 | inode->i_mode |= S_IRUSR; |
2145 | if (file->f_mode & FMODE_WRITE) | 1729 | if (mode & FMODE_WRITE) |
2146 | inode->i_mode |= S_IWUSR; | 1730 | inode->i_mode |= S_IWUSR; |
2147 | 1731 | ||
2148 | d_set_d_op(dentry, &tid_map_files_dentry_operations); | 1732 | d_set_d_op(dentry, &tid_map_files_dentry_operations); |
@@ -2186,7 +1770,8 @@ static struct dentry *proc_map_files_lookup(struct inode *dir, | |||
2186 | if (!vma) | 1770 | if (!vma) |
2187 | goto out_no_vma; | 1771 | goto out_no_vma; |
2188 | 1772 | ||
2189 | result = proc_map_files_instantiate(dir, dentry, task, vma->vm_file); | 1773 | result = proc_map_files_instantiate(dir, dentry, task, |
1774 | (void *)(unsigned long)vma->vm_file->f_mode); | ||
2190 | 1775 | ||
2191 | out_no_vma: | 1776 | out_no_vma: |
2192 | up_read(&mm->mmap_sem); | 1777 | up_read(&mm->mmap_sem); |
@@ -2287,8 +1872,7 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
2287 | if (++pos <= filp->f_pos) | 1872 | if (++pos <= filp->f_pos) |
2288 | continue; | 1873 | continue; |
2289 | 1874 | ||
2290 | get_file(vma->vm_file); | 1875 | info.mode = vma->vm_file->f_mode; |
2291 | info.file = vma->vm_file; | ||
2292 | info.len = snprintf(info.name, | 1876 | info.len = snprintf(info.name, |
2293 | sizeof(info.name), "%lx-%lx", | 1877 | sizeof(info.name), "%lx-%lx", |
2294 | vma->vm_start, vma->vm_end); | 1878 | vma->vm_start, vma->vm_end); |
@@ -2303,19 +1887,11 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
2303 | ret = proc_fill_cache(filp, dirent, filldir, | 1887 | ret = proc_fill_cache(filp, dirent, filldir, |
2304 | p->name, p->len, | 1888 | p->name, p->len, |
2305 | proc_map_files_instantiate, | 1889 | proc_map_files_instantiate, |
2306 | task, p->file); | 1890 | task, |
1891 | (void *)(unsigned long)p->mode); | ||
2307 | if (ret) | 1892 | if (ret) |
2308 | break; | 1893 | break; |
2309 | filp->f_pos++; | 1894 | filp->f_pos++; |
2310 | fput(p->file); | ||
2311 | } | ||
2312 | for (; i < nr_files; i++) { | ||
2313 | /* | ||
2314 | * In case of error don't forget | ||
2315 | * to put rest of file refs. | ||
2316 | */ | ||
2317 | p = flex_array_get(fa, i); | ||
2318 | fput(p->file); | ||
2319 | } | 1895 | } |
2320 | if (fa) | 1896 | if (fa) |
2321 | flex_array_free(fa); | 1897 | flex_array_free(fa); |
@@ -2337,82 +1913,6 @@ static const struct file_operations proc_map_files_operations = { | |||
2337 | 1913 | ||
2338 | #endif /* CONFIG_CHECKPOINT_RESTORE */ | 1914 | #endif /* CONFIG_CHECKPOINT_RESTORE */ |
2339 | 1915 | ||
2340 | /* | ||
2341 | * /proc/pid/fd needs a special permission handler so that a process can still | ||
2342 | * access /proc/self/fd after it has executed a setuid(). | ||
2343 | */ | ||
2344 | static int proc_fd_permission(struct inode *inode, int mask) | ||
2345 | { | ||
2346 | int rv = generic_permission(inode, mask); | ||
2347 | if (rv == 0) | ||
2348 | return 0; | ||
2349 | if (task_pid(current) == proc_pid(inode)) | ||
2350 | rv = 0; | ||
2351 | return rv; | ||
2352 | } | ||
2353 | |||
2354 | /* | ||
2355 | * proc directories can do almost nothing.. | ||
2356 | */ | ||
2357 | static const struct inode_operations proc_fd_inode_operations = { | ||
2358 | .lookup = proc_lookupfd, | ||
2359 | .permission = proc_fd_permission, | ||
2360 | .setattr = proc_setattr, | ||
2361 | }; | ||
2362 | |||
2363 | static struct dentry *proc_fdinfo_instantiate(struct inode *dir, | ||
2364 | struct dentry *dentry, struct task_struct *task, const void *ptr) | ||
2365 | { | ||
2366 | unsigned fd = (unsigned long)ptr; | ||
2367 | struct inode *inode; | ||
2368 | struct proc_inode *ei; | ||
2369 | struct dentry *error = ERR_PTR(-ENOENT); | ||
2370 | |||
2371 | inode = proc_pid_make_inode(dir->i_sb, task); | ||
2372 | if (!inode) | ||
2373 | goto out; | ||
2374 | ei = PROC_I(inode); | ||
2375 | ei->fd = fd; | ||
2376 | inode->i_mode = S_IFREG | S_IRUSR; | ||
2377 | inode->i_fop = &proc_fdinfo_file_operations; | ||
2378 | d_set_d_op(dentry, &tid_fd_dentry_operations); | ||
2379 | d_add(dentry, inode); | ||
2380 | /* Close the race of the process dying before we return the dentry */ | ||
2381 | if (tid_fd_revalidate(dentry, 0)) | ||
2382 | error = NULL; | ||
2383 | |||
2384 | out: | ||
2385 | return error; | ||
2386 | } | ||
2387 | |||
2388 | static struct dentry *proc_lookupfdinfo(struct inode *dir, | ||
2389 | struct dentry *dentry, | ||
2390 | unsigned int flags) | ||
2391 | { | ||
2392 | return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate); | ||
2393 | } | ||
2394 | |||
2395 | static int proc_readfdinfo(struct file *filp, void *dirent, filldir_t filldir) | ||
2396 | { | ||
2397 | return proc_readfd_common(filp, dirent, filldir, | ||
2398 | proc_fdinfo_instantiate); | ||
2399 | } | ||
2400 | |||
2401 | static const struct file_operations proc_fdinfo_operations = { | ||
2402 | .read = generic_read_dir, | ||
2403 | .readdir = proc_readfdinfo, | ||
2404 | .llseek = default_llseek, | ||
2405 | }; | ||
2406 | |||
2407 | /* | ||
2408 | * proc directories can do almost nothing.. | ||
2409 | */ | ||
2410 | static const struct inode_operations proc_fdinfo_inode_operations = { | ||
2411 | .lookup = proc_lookupfdinfo, | ||
2412 | .setattr = proc_setattr, | ||
2413 | }; | ||
2414 | |||
2415 | |||
2416 | static struct dentry *proc_pident_instantiate(struct inode *dir, | 1916 | static struct dentry *proc_pident_instantiate(struct inode *dir, |
2417 | struct dentry *dentry, struct task_struct *task, const void *ptr) | 1917 | struct dentry *dentry, struct task_struct *task, const void *ptr) |
2418 | { | 1918 | { |
@@ -2983,6 +2483,11 @@ static int proc_gid_map_open(struct inode *inode, struct file *file) | |||
2983 | return proc_id_map_open(inode, file, &proc_gid_seq_operations); | 2483 | return proc_id_map_open(inode, file, &proc_gid_seq_operations); |
2984 | } | 2484 | } |
2985 | 2485 | ||
2486 | static int proc_projid_map_open(struct inode *inode, struct file *file) | ||
2487 | { | ||
2488 | return proc_id_map_open(inode, file, &proc_projid_seq_operations); | ||
2489 | } | ||
2490 | |||
2986 | static const struct file_operations proc_uid_map_operations = { | 2491 | static const struct file_operations proc_uid_map_operations = { |
2987 | .open = proc_uid_map_open, | 2492 | .open = proc_uid_map_open, |
2988 | .write = proc_uid_map_write, | 2493 | .write = proc_uid_map_write, |
@@ -2998,6 +2503,14 @@ static const struct file_operations proc_gid_map_operations = { | |||
2998 | .llseek = seq_lseek, | 2503 | .llseek = seq_lseek, |
2999 | .release = proc_id_map_release, | 2504 | .release = proc_id_map_release, |
3000 | }; | 2505 | }; |
2506 | |||
2507 | static const struct file_operations proc_projid_map_operations = { | ||
2508 | .open = proc_projid_map_open, | ||
2509 | .write = proc_projid_map_write, | ||
2510 | .read = seq_read, | ||
2511 | .llseek = seq_lseek, | ||
2512 | .release = proc_id_map_release, | ||
2513 | }; | ||
3001 | #endif /* CONFIG_USER_NS */ | 2514 | #endif /* CONFIG_USER_NS */ |
3002 | 2515 | ||
3003 | static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, | 2516 | static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, |
@@ -3084,7 +2597,6 @@ static const struct pid_entry tgid_base_stuff[] = { | |||
3084 | REG("cgroup", S_IRUGO, proc_cgroup_operations), | 2597 | REG("cgroup", S_IRUGO, proc_cgroup_operations), |
3085 | #endif | 2598 | #endif |
3086 | INF("oom_score", S_IRUGO, proc_oom_score), | 2599 | INF("oom_score", S_IRUGO, proc_oom_score), |
3087 | REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adjust_operations), | ||
3088 | REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), | 2600 | REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), |
3089 | #ifdef CONFIG_AUDITSYSCALL | 2601 | #ifdef CONFIG_AUDITSYSCALL |
3090 | REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), | 2602 | REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), |
@@ -3105,6 +2617,7 @@ static const struct pid_entry tgid_base_stuff[] = { | |||
3105 | #ifdef CONFIG_USER_NS | 2617 | #ifdef CONFIG_USER_NS |
3106 | REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), | 2618 | REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), |
3107 | REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), | 2619 | REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), |
2620 | REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations), | ||
3108 | #endif | 2621 | #endif |
3109 | }; | 2622 | }; |
3110 | 2623 | ||
@@ -3450,7 +2963,6 @@ static const struct pid_entry tid_base_stuff[] = { | |||
3450 | REG("cgroup", S_IRUGO, proc_cgroup_operations), | 2963 | REG("cgroup", S_IRUGO, proc_cgroup_operations), |
3451 | #endif | 2964 | #endif |
3452 | INF("oom_score", S_IRUGO, proc_oom_score), | 2965 | INF("oom_score", S_IRUGO, proc_oom_score), |
3453 | REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adjust_operations), | ||
3454 | REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), | 2966 | REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), |
3455 | #ifdef CONFIG_AUDITSYSCALL | 2967 | #ifdef CONFIG_AUDITSYSCALL |
3456 | REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), | 2968 | REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), |
@@ -3468,6 +2980,7 @@ static const struct pid_entry tid_base_stuff[] = { | |||
3468 | #ifdef CONFIG_USER_NS | 2980 | #ifdef CONFIG_USER_NS |
3469 | REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), | 2981 | REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), |
3470 | REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), | 2982 | REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), |
2983 | REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations), | ||
3471 | #endif | 2984 | #endif |
3472 | }; | 2985 | }; |
3473 | 2986 | ||
diff --git a/fs/proc/fd.c b/fs/proc/fd.c new file mode 100644 index 000000000000..f28a875f8779 --- /dev/null +++ b/fs/proc/fd.c | |||
@@ -0,0 +1,367 @@ | |||
1 | #include <linux/sched.h> | ||
2 | #include <linux/errno.h> | ||
3 | #include <linux/dcache.h> | ||
4 | #include <linux/path.h> | ||
5 | #include <linux/fdtable.h> | ||
6 | #include <linux/namei.h> | ||
7 | #include <linux/pid.h> | ||
8 | #include <linux/security.h> | ||
9 | #include <linux/file.h> | ||
10 | #include <linux/seq_file.h> | ||
11 | |||
12 | #include <linux/proc_fs.h> | ||
13 | |||
14 | #include "internal.h" | ||
15 | #include "fd.h" | ||
16 | |||
17 | static int seq_show(struct seq_file *m, void *v) | ||
18 | { | ||
19 | struct files_struct *files = NULL; | ||
20 | int f_flags = 0, ret = -ENOENT; | ||
21 | struct file *file = NULL; | ||
22 | struct task_struct *task; | ||
23 | |||
24 | task = get_proc_task(m->private); | ||
25 | if (!task) | ||
26 | return -ENOENT; | ||
27 | |||
28 | files = get_files_struct(task); | ||
29 | put_task_struct(task); | ||
30 | |||
31 | if (files) { | ||
32 | int fd = proc_fd(m->private); | ||
33 | |||
34 | spin_lock(&files->file_lock); | ||
35 | file = fcheck_files(files, fd); | ||
36 | if (file) { | ||
37 | struct fdtable *fdt = files_fdtable(files); | ||
38 | |||
39 | f_flags = file->f_flags; | ||
40 | if (close_on_exec(fd, fdt)) | ||
41 | f_flags |= O_CLOEXEC; | ||
42 | |||
43 | get_file(file); | ||
44 | ret = 0; | ||
45 | } | ||
46 | spin_unlock(&files->file_lock); | ||
47 | put_files_struct(files); | ||
48 | } | ||
49 | |||
50 | if (!ret) { | ||
51 | seq_printf(m, "pos:\t%lli\nflags:\t0%o\n", | ||
52 | (long long)file->f_pos, f_flags); | ||
53 | fput(file); | ||
54 | } | ||
55 | |||
56 | return ret; | ||
57 | } | ||
58 | |||
59 | static int seq_fdinfo_open(struct inode *inode, struct file *file) | ||
60 | { | ||
61 | return single_open(file, seq_show, inode); | ||
62 | } | ||
63 | |||
64 | static const struct file_operations proc_fdinfo_file_operations = { | ||
65 | .open = seq_fdinfo_open, | ||
66 | .read = seq_read, | ||
67 | .llseek = seq_lseek, | ||
68 | .release = single_release, | ||
69 | }; | ||
70 | |||
71 | static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags) | ||
72 | { | ||
73 | struct files_struct *files; | ||
74 | struct task_struct *task; | ||
75 | const struct cred *cred; | ||
76 | struct inode *inode; | ||
77 | int fd; | ||
78 | |||
79 | if (flags & LOOKUP_RCU) | ||
80 | return -ECHILD; | ||
81 | |||
82 | inode = dentry->d_inode; | ||
83 | task = get_proc_task(inode); | ||
84 | fd = proc_fd(inode); | ||
85 | |||
86 | if (task) { | ||
87 | files = get_files_struct(task); | ||
88 | if (files) { | ||
89 | struct file *file; | ||
90 | |||
91 | rcu_read_lock(); | ||
92 | file = fcheck_files(files, fd); | ||
93 | if (file) { | ||
94 | unsigned f_mode = file->f_mode; | ||
95 | |||
96 | rcu_read_unlock(); | ||
97 | put_files_struct(files); | ||
98 | |||
99 | if (task_dumpable(task)) { | ||
100 | rcu_read_lock(); | ||
101 | cred = __task_cred(task); | ||
102 | inode->i_uid = cred->euid; | ||
103 | inode->i_gid = cred->egid; | ||
104 | rcu_read_unlock(); | ||
105 | } else { | ||
106 | inode->i_uid = GLOBAL_ROOT_UID; | ||
107 | inode->i_gid = GLOBAL_ROOT_GID; | ||
108 | } | ||
109 | |||
110 | if (S_ISLNK(inode->i_mode)) { | ||
111 | unsigned i_mode = S_IFLNK; | ||
112 | if (f_mode & FMODE_READ) | ||
113 | i_mode |= S_IRUSR | S_IXUSR; | ||
114 | if (f_mode & FMODE_WRITE) | ||
115 | i_mode |= S_IWUSR | S_IXUSR; | ||
116 | inode->i_mode = i_mode; | ||
117 | } | ||
118 | |||
119 | security_task_to_inode(task, inode); | ||
120 | put_task_struct(task); | ||
121 | return 1; | ||
122 | } | ||
123 | rcu_read_unlock(); | ||
124 | put_files_struct(files); | ||
125 | } | ||
126 | put_task_struct(task); | ||
127 | } | ||
128 | |||
129 | d_drop(dentry); | ||
130 | return 0; | ||
131 | } | ||
132 | |||
133 | static const struct dentry_operations tid_fd_dentry_operations = { | ||
134 | .d_revalidate = tid_fd_revalidate, | ||
135 | .d_delete = pid_delete_dentry, | ||
136 | }; | ||
137 | |||
138 | static int proc_fd_link(struct dentry *dentry, struct path *path) | ||
139 | { | ||
140 | struct files_struct *files = NULL; | ||
141 | struct task_struct *task; | ||
142 | int ret = -ENOENT; | ||
143 | |||
144 | task = get_proc_task(dentry->d_inode); | ||
145 | if (task) { | ||
146 | files = get_files_struct(task); | ||
147 | put_task_struct(task); | ||
148 | } | ||
149 | |||
150 | if (files) { | ||
151 | int fd = proc_fd(dentry->d_inode); | ||
152 | struct file *fd_file; | ||
153 | |||
154 | spin_lock(&files->file_lock); | ||
155 | fd_file = fcheck_files(files, fd); | ||
156 | if (fd_file) { | ||
157 | *path = fd_file->f_path; | ||
158 | path_get(&fd_file->f_path); | ||
159 | ret = 0; | ||
160 | } | ||
161 | spin_unlock(&files->file_lock); | ||
162 | put_files_struct(files); | ||
163 | } | ||
164 | |||
165 | return ret; | ||
166 | } | ||
167 | |||
168 | static struct dentry * | ||
169 | proc_fd_instantiate(struct inode *dir, struct dentry *dentry, | ||
170 | struct task_struct *task, const void *ptr) | ||
171 | { | ||
172 | struct dentry *error = ERR_PTR(-ENOENT); | ||
173 | unsigned fd = (unsigned long)ptr; | ||
174 | struct proc_inode *ei; | ||
175 | struct inode *inode; | ||
176 | |||
177 | inode = proc_pid_make_inode(dir->i_sb, task); | ||
178 | if (!inode) | ||
179 | goto out; | ||
180 | |||
181 | ei = PROC_I(inode); | ||
182 | ei->fd = fd; | ||
183 | |||
184 | inode->i_mode = S_IFLNK; | ||
185 | inode->i_op = &proc_pid_link_inode_operations; | ||
186 | inode->i_size = 64; | ||
187 | |||
188 | ei->op.proc_get_link = proc_fd_link; | ||
189 | |||
190 | d_set_d_op(dentry, &tid_fd_dentry_operations); | ||
191 | d_add(dentry, inode); | ||
192 | |||
193 | /* Close the race of the process dying before we return the dentry */ | ||
194 | if (tid_fd_revalidate(dentry, 0)) | ||
195 | error = NULL; | ||
196 | out: | ||
197 | return error; | ||
198 | } | ||
199 | |||
200 | static struct dentry *proc_lookupfd_common(struct inode *dir, | ||
201 | struct dentry *dentry, | ||
202 | instantiate_t instantiate) | ||
203 | { | ||
204 | struct task_struct *task = get_proc_task(dir); | ||
205 | struct dentry *result = ERR_PTR(-ENOENT); | ||
206 | unsigned fd = name_to_int(dentry); | ||
207 | |||
208 | if (!task) | ||
209 | goto out_no_task; | ||
210 | if (fd == ~0U) | ||
211 | goto out; | ||
212 | |||
213 | result = instantiate(dir, dentry, task, (void *)(unsigned long)fd); | ||
214 | out: | ||
215 | put_task_struct(task); | ||
216 | out_no_task: | ||
217 | return result; | ||
218 | } | ||
219 | |||
220 | static int proc_readfd_common(struct file * filp, void * dirent, | ||
221 | filldir_t filldir, instantiate_t instantiate) | ||
222 | { | ||
223 | struct dentry *dentry = filp->f_path.dentry; | ||
224 | struct inode *inode = dentry->d_inode; | ||
225 | struct task_struct *p = get_proc_task(inode); | ||
226 | struct files_struct *files; | ||
227 | unsigned int fd, ino; | ||
228 | int retval; | ||
229 | |||
230 | retval = -ENOENT; | ||
231 | if (!p) | ||
232 | goto out_no_task; | ||
233 | retval = 0; | ||
234 | |||
235 | fd = filp->f_pos; | ||
236 | switch (fd) { | ||
237 | case 0: | ||
238 | if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0) | ||
239 | goto out; | ||
240 | filp->f_pos++; | ||
241 | case 1: | ||
242 | ino = parent_ino(dentry); | ||
243 | if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) | ||
244 | goto out; | ||
245 | filp->f_pos++; | ||
246 | default: | ||
247 | files = get_files_struct(p); | ||
248 | if (!files) | ||
249 | goto out; | ||
250 | rcu_read_lock(); | ||
251 | for (fd = filp->f_pos - 2; | ||
252 | fd < files_fdtable(files)->max_fds; | ||
253 | fd++, filp->f_pos++) { | ||
254 | char name[PROC_NUMBUF]; | ||
255 | int len; | ||
256 | int rv; | ||
257 | |||
258 | if (!fcheck_files(files, fd)) | ||
259 | continue; | ||
260 | rcu_read_unlock(); | ||
261 | |||
262 | len = snprintf(name, sizeof(name), "%d", fd); | ||
263 | rv = proc_fill_cache(filp, dirent, filldir, | ||
264 | name, len, instantiate, p, | ||
265 | (void *)(unsigned long)fd); | ||
266 | if (rv < 0) | ||
267 | goto out_fd_loop; | ||
268 | rcu_read_lock(); | ||
269 | } | ||
270 | rcu_read_unlock(); | ||
271 | out_fd_loop: | ||
272 | put_files_struct(files); | ||
273 | } | ||
274 | out: | ||
275 | put_task_struct(p); | ||
276 | out_no_task: | ||
277 | return retval; | ||
278 | } | ||
279 | |||
280 | static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir) | ||
281 | { | ||
282 | return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate); | ||
283 | } | ||
284 | |||
285 | const struct file_operations proc_fd_operations = { | ||
286 | .read = generic_read_dir, | ||
287 | .readdir = proc_readfd, | ||
288 | .llseek = default_llseek, | ||
289 | }; | ||
290 | |||
291 | static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry, | ||
292 | unsigned int flags) | ||
293 | { | ||
294 | return proc_lookupfd_common(dir, dentry, proc_fd_instantiate); | ||
295 | } | ||
296 | |||
297 | /* | ||
298 | * /proc/pid/fd needs a special permission handler so that a process can still | ||
299 | * access /proc/self/fd after it has executed a setuid(). | ||
300 | */ | ||
301 | int proc_fd_permission(struct inode *inode, int mask) | ||
302 | { | ||
303 | int rv = generic_permission(inode, mask); | ||
304 | if (rv == 0) | ||
305 | return 0; | ||
306 | if (task_pid(current) == proc_pid(inode)) | ||
307 | rv = 0; | ||
308 | return rv; | ||
309 | } | ||
310 | |||
311 | const struct inode_operations proc_fd_inode_operations = { | ||
312 | .lookup = proc_lookupfd, | ||
313 | .permission = proc_fd_permission, | ||
314 | .setattr = proc_setattr, | ||
315 | }; | ||
316 | |||
317 | static struct dentry * | ||
318 | proc_fdinfo_instantiate(struct inode *dir, struct dentry *dentry, | ||
319 | struct task_struct *task, const void *ptr) | ||
320 | { | ||
321 | struct dentry *error = ERR_PTR(-ENOENT); | ||
322 | unsigned fd = (unsigned long)ptr; | ||
323 | struct proc_inode *ei; | ||
324 | struct inode *inode; | ||
325 | |||
326 | inode = proc_pid_make_inode(dir->i_sb, task); | ||
327 | if (!inode) | ||
328 | goto out; | ||
329 | |||
330 | ei = PROC_I(inode); | ||
331 | ei->fd = fd; | ||
332 | |||
333 | inode->i_mode = S_IFREG | S_IRUSR; | ||
334 | inode->i_fop = &proc_fdinfo_file_operations; | ||
335 | |||
336 | d_set_d_op(dentry, &tid_fd_dentry_operations); | ||
337 | d_add(dentry, inode); | ||
338 | |||
339 | /* Close the race of the process dying before we return the dentry */ | ||
340 | if (tid_fd_revalidate(dentry, 0)) | ||
341 | error = NULL; | ||
342 | out: | ||
343 | return error; | ||
344 | } | ||
345 | |||
346 | static struct dentry * | ||
347 | proc_lookupfdinfo(struct inode *dir, struct dentry *dentry, unsigned int flags) | ||
348 | { | ||
349 | return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate); | ||
350 | } | ||
351 | |||
352 | static int proc_readfdinfo(struct file *filp, void *dirent, filldir_t filldir) | ||
353 | { | ||
354 | return proc_readfd_common(filp, dirent, filldir, | ||
355 | proc_fdinfo_instantiate); | ||
356 | } | ||
357 | |||
358 | const struct inode_operations proc_fdinfo_inode_operations = { | ||
359 | .lookup = proc_lookupfdinfo, | ||
360 | .setattr = proc_setattr, | ||
361 | }; | ||
362 | |||
363 | const struct file_operations proc_fdinfo_operations = { | ||
364 | .read = generic_read_dir, | ||
365 | .readdir = proc_readfdinfo, | ||
366 | .llseek = default_llseek, | ||
367 | }; | ||
diff --git a/fs/proc/fd.h b/fs/proc/fd.h new file mode 100644 index 000000000000..cbb1d47deda8 --- /dev/null +++ b/fs/proc/fd.h | |||
@@ -0,0 +1,14 @@ | |||
1 | #ifndef __PROCFS_FD_H__ | ||
2 | #define __PROCFS_FD_H__ | ||
3 | |||
4 | #include <linux/fs.h> | ||
5 | |||
6 | extern const struct file_operations proc_fd_operations; | ||
7 | extern const struct inode_operations proc_fd_inode_operations; | ||
8 | |||
9 | extern const struct file_operations proc_fdinfo_operations; | ||
10 | extern const struct inode_operations proc_fdinfo_inode_operations; | ||
11 | |||
12 | extern int proc_fd_permission(struct inode *inode, int mask); | ||
13 | |||
14 | #endif /* __PROCFS_FD_H__ */ | ||
diff --git a/fs/proc/generic.c b/fs/proc/generic.c index b3647fe6a608..0d80cef4cfb9 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c | |||
@@ -427,7 +427,7 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir, | |||
427 | if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { | 427 | if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { |
428 | pde_get(de); | 428 | pde_get(de); |
429 | spin_unlock(&proc_subdir_lock); | 429 | spin_unlock(&proc_subdir_lock); |
430 | error = -EINVAL; | 430 | error = -ENOMEM; |
431 | inode = proc_get_inode(dir->i_sb, de); | 431 | inode = proc_get_inode(dir->i_sb, de); |
432 | goto out_unlock; | 432 | goto out_unlock; |
433 | } | 433 | } |
@@ -605,7 +605,8 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, | |||
605 | unsigned int len; | 605 | unsigned int len; |
606 | 606 | ||
607 | /* make sure name is valid */ | 607 | /* make sure name is valid */ |
608 | if (!name || !strlen(name)) goto out; | 608 | if (!name || !strlen(name)) |
609 | goto out; | ||
609 | 610 | ||
610 | if (xlate_proc_name(name, parent, &fn) != 0) | 611 | if (xlate_proc_name(name, parent, &fn) != 0) |
611 | goto out; | 612 | goto out; |
@@ -616,20 +617,18 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, | |||
616 | 617 | ||
617 | len = strlen(fn); | 618 | len = strlen(fn); |
618 | 619 | ||
619 | ent = kmalloc(sizeof(struct proc_dir_entry) + len + 1, GFP_KERNEL); | 620 | ent = kzalloc(sizeof(struct proc_dir_entry) + len + 1, GFP_KERNEL); |
620 | if (!ent) goto out; | 621 | if (!ent) |
622 | goto out; | ||
621 | 623 | ||
622 | memset(ent, 0, sizeof(struct proc_dir_entry)); | ||
623 | memcpy(ent->name, fn, len + 1); | 624 | memcpy(ent->name, fn, len + 1); |
624 | ent->namelen = len; | 625 | ent->namelen = len; |
625 | ent->mode = mode; | 626 | ent->mode = mode; |
626 | ent->nlink = nlink; | 627 | ent->nlink = nlink; |
627 | atomic_set(&ent->count, 1); | 628 | atomic_set(&ent->count, 1); |
628 | ent->pde_users = 0; | ||
629 | spin_lock_init(&ent->pde_unload_lock); | 629 | spin_lock_init(&ent->pde_unload_lock); |
630 | ent->pde_unload_completion = NULL; | ||
631 | INIT_LIST_HEAD(&ent->pde_openers); | 630 | INIT_LIST_HEAD(&ent->pde_openers); |
632 | out: | 631 | out: |
633 | return ent; | 632 | return ent; |
634 | } | 633 | } |
635 | 634 | ||
diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 7ac817b64a71..3b22bbdee9ec 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c | |||
@@ -450,7 +450,6 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) | |||
450 | return NULL; | 450 | return NULL; |
451 | if (inode->i_state & I_NEW) { | 451 | if (inode->i_state & I_NEW) { |
452 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | 452 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; |
453 | PROC_I(inode)->fd = 0; | ||
454 | PROC_I(inode)->pde = de; | 453 | PROC_I(inode)->pde = de; |
455 | 454 | ||
456 | if (de->mode) { | 455 | if (de->mode) { |
diff --git a/fs/proc/internal.h b/fs/proc/internal.h index e1167a1c9126..cceaab07ad54 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h | |||
@@ -9,6 +9,7 @@ | |||
9 | * 2 of the License, or (at your option) any later version. | 9 | * 2 of the License, or (at your option) any later version. |
10 | */ | 10 | */ |
11 | 11 | ||
12 | #include <linux/sched.h> | ||
12 | #include <linux/proc_fs.h> | 13 | #include <linux/proc_fs.h> |
13 | struct ctl_table_header; | 14 | struct ctl_table_header; |
14 | 15 | ||
@@ -65,6 +66,7 @@ extern const struct file_operations proc_clear_refs_operations; | |||
65 | extern const struct file_operations proc_pagemap_operations; | 66 | extern const struct file_operations proc_pagemap_operations; |
66 | extern const struct file_operations proc_net_operations; | 67 | extern const struct file_operations proc_net_operations; |
67 | extern const struct inode_operations proc_net_inode_operations; | 68 | extern const struct inode_operations proc_net_inode_operations; |
69 | extern const struct inode_operations proc_pid_link_inode_operations; | ||
68 | 70 | ||
69 | struct proc_maps_private { | 71 | struct proc_maps_private { |
70 | struct pid *pid; | 72 | struct pid *pid; |
@@ -91,6 +93,52 @@ static inline int proc_fd(struct inode *inode) | |||
91 | return PROC_I(inode)->fd; | 93 | return PROC_I(inode)->fd; |
92 | } | 94 | } |
93 | 95 | ||
96 | static inline int task_dumpable(struct task_struct *task) | ||
97 | { | ||
98 | int dumpable = 0; | ||
99 | struct mm_struct *mm; | ||
100 | |||
101 | task_lock(task); | ||
102 | mm = task->mm; | ||
103 | if (mm) | ||
104 | dumpable = get_dumpable(mm); | ||
105 | task_unlock(task); | ||
106 | if (dumpable == SUID_DUMPABLE_ENABLED) | ||
107 | return 1; | ||
108 | return 0; | ||
109 | } | ||
110 | |||
111 | static inline int pid_delete_dentry(const struct dentry * dentry) | ||
112 | { | ||
113 | /* Is the task we represent dead? | ||
114 | * If so, then don't put the dentry on the lru list, | ||
115 | * kill it immediately. | ||
116 | */ | ||
117 | return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first; | ||
118 | } | ||
119 | |||
120 | static inline unsigned name_to_int(struct dentry *dentry) | ||
121 | { | ||
122 | const char *name = dentry->d_name.name; | ||
123 | int len = dentry->d_name.len; | ||
124 | unsigned n = 0; | ||
125 | |||
126 | if (len > 1 && *name == '0') | ||
127 | goto out; | ||
128 | while (len-- > 0) { | ||
129 | unsigned c = *name++ - '0'; | ||
130 | if (c > 9) | ||
131 | goto out; | ||
132 | if (n >= (~0U-9)/10) | ||
133 | goto out; | ||
134 | n *= 10; | ||
135 | n += c; | ||
136 | } | ||
137 | return n; | ||
138 | out: | ||
139 | return ~0U; | ||
140 | } | ||
141 | |||
94 | struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino, | 142 | struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino, |
95 | struct dentry *dentry); | 143 | struct dentry *dentry); |
96 | int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent, | 144 | int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent, |
diff --git a/fs/proc/page.c b/fs/proc/page.c index 7fcd0d60a968..b8730d9ebaee 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c | |||
@@ -115,7 +115,13 @@ u64 stable_page_flags(struct page *page) | |||
115 | u |= 1 << KPF_COMPOUND_TAIL; | 115 | u |= 1 << KPF_COMPOUND_TAIL; |
116 | if (PageHuge(page)) | 116 | if (PageHuge(page)) |
117 | u |= 1 << KPF_HUGE; | 117 | u |= 1 << KPF_HUGE; |
118 | else if (PageTransCompound(page)) | 118 | /* |
119 | * PageTransCompound can be true for non-huge compound pages (slab | ||
120 | * pages or pages allocated by drivers with __GFP_COMP) because it | ||
121 | * just checks PG_head/PG_tail, so we need to check PageLRU to make | ||
122 | * sure a given page is a thp, not a non-huge compound page. | ||
123 | */ | ||
124 | else if (PageTransCompound(page) && PageLRU(compound_trans_head(page))) | ||
119 | u |= 1 << KPF_THP; | 125 | u |= 1 << KPF_THP; |
120 | 126 | ||
121 | /* | 127 | /* |
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index dfafeb2b05a0..a781bdf06694 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c | |||
@@ -142,6 +142,7 @@ static int insert_entry(struct ctl_table_header *head, struct ctl_table *entry) | |||
142 | } | 142 | } |
143 | 143 | ||
144 | rb_link_node(node, parent, p); | 144 | rb_link_node(node, parent, p); |
145 | rb_insert_color(node, &head->parent->root); | ||
145 | return 0; | 146 | return 0; |
146 | } | 147 | } |
147 | 148 | ||
@@ -168,10 +169,8 @@ static void init_header(struct ctl_table_header *head, | |||
168 | head->node = node; | 169 | head->node = node; |
169 | if (node) { | 170 | if (node) { |
170 | struct ctl_table *entry; | 171 | struct ctl_table *entry; |
171 | for (entry = table; entry->procname; entry++, node++) { | 172 | for (entry = table; entry->procname; entry++, node++) |
172 | rb_init_node(&node->node); | ||
173 | node->header = head; | 173 | node->header = head; |
174 | } | ||
175 | } | 174 | } |
176 | } | 175 | } |
177 | 176 | ||
@@ -266,8 +265,7 @@ void sysctl_head_put(struct ctl_table_header *head) | |||
266 | 265 | ||
267 | static struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head) | 266 | static struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head) |
268 | { | 267 | { |
269 | if (!head) | 268 | BUG_ON(!head); |
270 | BUG(); | ||
271 | spin_lock(&sysctl_lock); | 269 | spin_lock(&sysctl_lock); |
272 | if (!use_table(head)) | 270 | if (!use_table(head)) |
273 | head = ERR_PTR(-ENOENT); | 271 | head = ERR_PTR(-ENOENT); |
@@ -462,9 +460,6 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, | |||
462 | 460 | ||
463 | err = ERR_PTR(-ENOMEM); | 461 | err = ERR_PTR(-ENOMEM); |
464 | inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p); | 462 | inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p); |
465 | if (h) | ||
466 | sysctl_head_finish(h); | ||
467 | |||
468 | if (!inode) | 463 | if (!inode) |
469 | goto out; | 464 | goto out; |
470 | 465 | ||
@@ -473,6 +468,8 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, | |||
473 | d_add(dentry, inode); | 468 | d_add(dentry, inode); |
474 | 469 | ||
475 | out: | 470 | out: |
471 | if (h) | ||
472 | sysctl_head_finish(h); | ||
476 | sysctl_head_finish(head); | 473 | sysctl_head_finish(head); |
477 | return err; | 474 | return err; |
478 | } | 475 | } |
diff --git a/fs/proc/root.c b/fs/proc/root.c index 9a2d9fd7cadd..9889a92d2e01 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c | |||
@@ -61,7 +61,7 @@ static int proc_parse_options(char *options, struct pid_namespace *pid) | |||
61 | if (!*p) | 61 | if (!*p) |
62 | continue; | 62 | continue; |
63 | 63 | ||
64 | args[0].to = args[0].from = 0; | 64 | args[0].to = args[0].from = NULL; |
65 | token = match_token(p, tokens, args); | 65 | token = match_token(p, tokens, args); |
66 | switch (token) { | 66 | switch (token) { |
67 | case Opt_gid: | 67 | case Opt_gid: |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 4540b8f76f16..79827ce03e3b 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -54,7 +54,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) | |||
54 | "VmPTE:\t%8lu kB\n" | 54 | "VmPTE:\t%8lu kB\n" |
55 | "VmSwap:\t%8lu kB\n", | 55 | "VmSwap:\t%8lu kB\n", |
56 | hiwater_vm << (PAGE_SHIFT-10), | 56 | hiwater_vm << (PAGE_SHIFT-10), |
57 | (total_vm - mm->reserved_vm) << (PAGE_SHIFT-10), | 57 | total_vm << (PAGE_SHIFT-10), |
58 | mm->locked_vm << (PAGE_SHIFT-10), | 58 | mm->locked_vm << (PAGE_SHIFT-10), |
59 | mm->pinned_vm << (PAGE_SHIFT-10), | 59 | mm->pinned_vm << (PAGE_SHIFT-10), |
60 | hiwater_rss << (PAGE_SHIFT-10), | 60 | hiwater_rss << (PAGE_SHIFT-10), |
diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig index d39bb5cce883..ca71db69da07 100644 --- a/fs/pstore/Kconfig +++ b/fs/pstore/Kconfig | |||
@@ -23,6 +23,7 @@ config PSTORE_FTRACE | |||
23 | bool "Persistent function tracer" | 23 | bool "Persistent function tracer" |
24 | depends on PSTORE | 24 | depends on PSTORE |
25 | depends on FUNCTION_TRACER | 25 | depends on FUNCTION_TRACER |
26 | depends on DEBUG_FS | ||
26 | help | 27 | help |
27 | With this option kernel traces function calls into a persistent | 28 | With this option kernel traces function calls into a persistent |
28 | ram buffer that can be decoded and dumped after reboot through | 29 | ram buffer that can be decoded and dumped after reboot through |
diff --git a/fs/pstore/ftrace.c b/fs/pstore/ftrace.c index a130d484b7d3..2d57e1ac0115 100644 --- a/fs/pstore/ftrace.c +++ b/fs/pstore/ftrace.c | |||
@@ -17,19 +17,113 @@ | |||
17 | #include <linux/percpu.h> | 17 | #include <linux/percpu.h> |
18 | #include <linux/smp.h> | 18 | #include <linux/smp.h> |
19 | #include <linux/atomic.h> | 19 | #include <linux/atomic.h> |
20 | #include <linux/types.h> | ||
21 | #include <linux/mutex.h> | ||
22 | #include <linux/ftrace.h> | ||
23 | #include <linux/fs.h> | ||
24 | #include <linux/debugfs.h> | ||
25 | #include <linux/err.h> | ||
26 | #include <linux/cache.h> | ||
20 | #include <asm/barrier.h> | 27 | #include <asm/barrier.h> |
21 | #include "internal.h" | 28 | #include "internal.h" |
22 | 29 | ||
23 | void notrace pstore_ftrace_call(unsigned long ip, unsigned long parent_ip) | 30 | static void notrace pstore_ftrace_call(unsigned long ip, |
31 | unsigned long parent_ip) | ||
24 | { | 32 | { |
33 | unsigned long flags; | ||
25 | struct pstore_ftrace_record rec = {}; | 34 | struct pstore_ftrace_record rec = {}; |
26 | 35 | ||
27 | if (unlikely(oops_in_progress)) | 36 | if (unlikely(oops_in_progress)) |
28 | return; | 37 | return; |
29 | 38 | ||
39 | local_irq_save(flags); | ||
40 | |||
30 | rec.ip = ip; | 41 | rec.ip = ip; |
31 | rec.parent_ip = parent_ip; | 42 | rec.parent_ip = parent_ip; |
32 | pstore_ftrace_encode_cpu(&rec, raw_smp_processor_id()); | 43 | pstore_ftrace_encode_cpu(&rec, raw_smp_processor_id()); |
33 | psinfo->write_buf(PSTORE_TYPE_FTRACE, 0, NULL, 0, (void *)&rec, | 44 | psinfo->write_buf(PSTORE_TYPE_FTRACE, 0, NULL, 0, (void *)&rec, |
34 | sizeof(rec), psinfo); | 45 | sizeof(rec), psinfo); |
46 | |||
47 | local_irq_restore(flags); | ||
48 | } | ||
49 | |||
50 | static struct ftrace_ops pstore_ftrace_ops __read_mostly = { | ||
51 | .func = pstore_ftrace_call, | ||
52 | }; | ||
53 | |||
54 | static DEFINE_MUTEX(pstore_ftrace_lock); | ||
55 | static bool pstore_ftrace_enabled; | ||
56 | |||
57 | static ssize_t pstore_ftrace_knob_write(struct file *f, const char __user *buf, | ||
58 | size_t count, loff_t *ppos) | ||
59 | { | ||
60 | u8 on; | ||
61 | ssize_t ret; | ||
62 | |||
63 | ret = kstrtou8_from_user(buf, count, 2, &on); | ||
64 | if (ret) | ||
65 | return ret; | ||
66 | |||
67 | mutex_lock(&pstore_ftrace_lock); | ||
68 | |||
69 | if (!on ^ pstore_ftrace_enabled) | ||
70 | goto out; | ||
71 | |||
72 | if (on) | ||
73 | ret = register_ftrace_function(&pstore_ftrace_ops); | ||
74 | else | ||
75 | ret = unregister_ftrace_function(&pstore_ftrace_ops); | ||
76 | if (ret) { | ||
77 | pr_err("%s: unable to %sregister ftrace ops: %zd\n", | ||
78 | __func__, on ? "" : "un", ret); | ||
79 | goto err; | ||
80 | } | ||
81 | |||
82 | pstore_ftrace_enabled = on; | ||
83 | out: | ||
84 | ret = count; | ||
85 | err: | ||
86 | mutex_unlock(&pstore_ftrace_lock); | ||
87 | |||
88 | return ret; | ||
89 | } | ||
90 | |||
91 | static ssize_t pstore_ftrace_knob_read(struct file *f, char __user *buf, | ||
92 | size_t count, loff_t *ppos) | ||
93 | { | ||
94 | char val[] = { '0' + pstore_ftrace_enabled, '\n' }; | ||
95 | |||
96 | return simple_read_from_buffer(buf, count, ppos, val, sizeof(val)); | ||
97 | } | ||
98 | |||
99 | static const struct file_operations pstore_knob_fops = { | ||
100 | .open = simple_open, | ||
101 | .read = pstore_ftrace_knob_read, | ||
102 | .write = pstore_ftrace_knob_write, | ||
103 | }; | ||
104 | |||
105 | void pstore_register_ftrace(void) | ||
106 | { | ||
107 | struct dentry *dir; | ||
108 | struct dentry *file; | ||
109 | |||
110 | if (!psinfo->write_buf) | ||
111 | return; | ||
112 | |||
113 | dir = debugfs_create_dir("pstore", NULL); | ||
114 | if (!dir) { | ||
115 | pr_err("%s: unable to create pstore directory\n", __func__); | ||
116 | return; | ||
117 | } | ||
118 | |||
119 | file = debugfs_create_file("record_ftrace", 0600, dir, NULL, | ||
120 | &pstore_knob_fops); | ||
121 | if (!file) { | ||
122 | pr_err("%s: unable to create record_ftrace file\n", __func__); | ||
123 | goto err_file; | ||
124 | } | ||
125 | |||
126 | return; | ||
127 | err_file: | ||
128 | debugfs_remove(dir); | ||
35 | } | 129 | } |
diff --git a/fs/pstore/internal.h b/fs/pstore/internal.h index 0d0d3b7d5f12..4847f588b7d5 100644 --- a/fs/pstore/internal.h +++ b/fs/pstore/internal.h | |||
@@ -39,6 +39,12 @@ pstore_ftrace_decode_cpu(struct pstore_ftrace_record *rec) | |||
39 | #endif | 39 | #endif |
40 | } | 40 | } |
41 | 41 | ||
42 | #ifdef CONFIG_PSTORE_FTRACE | ||
43 | extern void pstore_register_ftrace(void); | ||
44 | #else | ||
45 | static inline void pstore_register_ftrace(void) {} | ||
46 | #endif | ||
47 | |||
42 | extern struct pstore_info *psinfo; | 48 | extern struct pstore_info *psinfo; |
43 | 49 | ||
44 | extern void pstore_set_kmsg_bytes(int); | 50 | extern void pstore_set_kmsg_bytes(int); |
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 29996e8793a7..a40da07e93d6 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c | |||
@@ -164,7 +164,13 @@ static void pstore_console_write(struct console *con, const char *s, unsigned c) | |||
164 | 164 | ||
165 | if (c > psinfo->bufsize) | 165 | if (c > psinfo->bufsize) |
166 | c = psinfo->bufsize; | 166 | c = psinfo->bufsize; |
167 | spin_lock_irqsave(&psinfo->buf_lock, flags); | 167 | |
168 | if (oops_in_progress) { | ||
169 | if (!spin_trylock_irqsave(&psinfo->buf_lock, flags)) | ||
170 | break; | ||
171 | } else { | ||
172 | spin_lock_irqsave(&psinfo->buf_lock, flags); | ||
173 | } | ||
168 | memcpy(psinfo->buf, s, c); | 174 | memcpy(psinfo->buf, s, c); |
169 | psinfo->write(PSTORE_TYPE_CONSOLE, 0, NULL, 0, c, psinfo); | 175 | psinfo->write(PSTORE_TYPE_CONSOLE, 0, NULL, 0, c, psinfo); |
170 | spin_unlock_irqrestore(&psinfo->buf_lock, flags); | 176 | spin_unlock_irqrestore(&psinfo->buf_lock, flags); |
@@ -236,6 +242,7 @@ int pstore_register(struct pstore_info *psi) | |||
236 | 242 | ||
237 | kmsg_dump_register(&pstore_dumper); | 243 | kmsg_dump_register(&pstore_dumper); |
238 | pstore_register_console(); | 244 | pstore_register_console(); |
245 | pstore_register_ftrace(); | ||
239 | 246 | ||
240 | if (pstore_update_ms >= 0) { | 247 | if (pstore_update_ms >= 0) { |
241 | pstore_timer.expires = jiffies + | 248 | pstore_timer.expires = jiffies + |
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 0b311bc18916..1a4f6da58eab 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c | |||
@@ -32,6 +32,7 @@ | |||
32 | #include <linux/ioport.h> | 32 | #include <linux/ioport.h> |
33 | #include <linux/platform_device.h> | 33 | #include <linux/platform_device.h> |
34 | #include <linux/slab.h> | 34 | #include <linux/slab.h> |
35 | #include <linux/compiler.h> | ||
35 | #include <linux/pstore_ram.h> | 36 | #include <linux/pstore_ram.h> |
36 | 37 | ||
37 | #define RAMOOPS_KERNMSG_HDR "====" | 38 | #define RAMOOPS_KERNMSG_HDR "====" |
@@ -181,12 +182,11 @@ static size_t ramoops_write_kmsg_hdr(struct persistent_ram_zone *prz) | |||
181 | return len; | 182 | return len; |
182 | } | 183 | } |
183 | 184 | ||
184 | 185 | static int notrace ramoops_pstore_write_buf(enum pstore_type_id type, | |
185 | static int ramoops_pstore_write_buf(enum pstore_type_id type, | 186 | enum kmsg_dump_reason reason, |
186 | enum kmsg_dump_reason reason, | 187 | u64 *id, unsigned int part, |
187 | u64 *id, unsigned int part, | 188 | const char *buf, size_t size, |
188 | const char *buf, size_t size, | 189 | struct pstore_info *psi) |
189 | struct pstore_info *psi) | ||
190 | { | 190 | { |
191 | struct ramoops_context *cxt = psi->data; | 191 | struct ramoops_context *cxt = psi->data; |
192 | struct persistent_ram_zone *prz = cxt->przs[cxt->dump_write_cnt]; | 192 | struct persistent_ram_zone *prz = cxt->przs[cxt->dump_write_cnt]; |
@@ -406,7 +406,7 @@ static int __devinit ramoops_probe(struct platform_device *pdev) | |||
406 | goto fail_init_fprz; | 406 | goto fail_init_fprz; |
407 | 407 | ||
408 | if (!cxt->przs && !cxt->cprz && !cxt->fprz) { | 408 | if (!cxt->przs && !cxt->cprz && !cxt->fprz) { |
409 | pr_err("memory size too small, minimum is %lu\n", | 409 | pr_err("memory size too small, minimum is %zu\n", |
410 | cxt->console_size + cxt->record_size + | 410 | cxt->console_size + cxt->record_size + |
411 | cxt->ftrace_size); | 411 | cxt->ftrace_size); |
412 | goto fail_cnt; | 412 | goto fail_cnt; |
@@ -414,13 +414,14 @@ static int __devinit ramoops_probe(struct platform_device *pdev) | |||
414 | 414 | ||
415 | cxt->pstore.data = cxt; | 415 | cxt->pstore.data = cxt; |
416 | /* | 416 | /* |
417 | * Console can handle any buffer size, so prefer dumps buffer | 417 | * Console can handle any buffer size, so prefer LOG_LINE_MAX. If we |
418 | * size since usually it is smaller. | 418 | * have to handle dumps, we must have at least record_size buffer. And |
419 | * for ftrace, bufsize is irrelevant (if bufsize is 0, buf will be | ||
420 | * ZERO_SIZE_PTR). | ||
419 | */ | 421 | */ |
420 | if (cxt->przs) | 422 | if (cxt->console_size) |
421 | cxt->pstore.bufsize = cxt->przs[0]->buffer_size; | 423 | cxt->pstore.bufsize = 1024; /* LOG_LINE_MAX */ |
422 | else | 424 | cxt->pstore.bufsize = max(cxt->record_size, cxt->pstore.bufsize); |
423 | cxt->pstore.bufsize = cxt->cprz->buffer_size; | ||
424 | cxt->pstore.buf = kmalloc(cxt->pstore.bufsize, GFP_KERNEL); | 425 | cxt->pstore.buf = kmalloc(cxt->pstore.bufsize, GFP_KERNEL); |
425 | spin_lock_init(&cxt->pstore.buf_lock); | 426 | spin_lock_init(&cxt->pstore.buf_lock); |
426 | if (!cxt->pstore.buf) { | 427 | if (!cxt->pstore.buf) { |
@@ -537,6 +538,7 @@ postcore_initcall(ramoops_init); | |||
537 | static void __exit ramoops_exit(void) | 538 | static void __exit ramoops_exit(void) |
538 | { | 539 | { |
539 | platform_driver_unregister(&ramoops_driver); | 540 | platform_driver_unregister(&ramoops_driver); |
541 | platform_device_unregister(dummy); | ||
540 | kfree(dummy_data); | 542 | kfree(dummy_data); |
541 | } | 543 | } |
542 | module_exit(ramoops_exit); | 544 | module_exit(ramoops_exit); |
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index 552e994e3aa1..43098bb5723a 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c | |||
@@ -312,8 +312,8 @@ struct inode *qnx4_iget(struct super_block *sb, unsigned long ino) | |||
312 | (ino % QNX4_INODES_PER_BLOCK); | 312 | (ino % QNX4_INODES_PER_BLOCK); |
313 | 313 | ||
314 | inode->i_mode = le16_to_cpu(raw_inode->di_mode); | 314 | inode->i_mode = le16_to_cpu(raw_inode->di_mode); |
315 | inode->i_uid = (uid_t)le16_to_cpu(raw_inode->di_uid); | 315 | i_uid_write(inode, (uid_t)le16_to_cpu(raw_inode->di_uid)); |
316 | inode->i_gid = (gid_t)le16_to_cpu(raw_inode->di_gid); | 316 | i_gid_write(inode, (gid_t)le16_to_cpu(raw_inode->di_gid)); |
317 | set_nlink(inode, le16_to_cpu(raw_inode->di_nlink)); | 317 | set_nlink(inode, le16_to_cpu(raw_inode->di_nlink)); |
318 | inode->i_size = le32_to_cpu(raw_inode->di_size); | 318 | inode->i_size = le32_to_cpu(raw_inode->di_size); |
319 | inode->i_mtime.tv_sec = le32_to_cpu(raw_inode->di_mtime); | 319 | inode->i_mtime.tv_sec = le32_to_cpu(raw_inode->di_mtime); |
@@ -391,6 +391,11 @@ static int init_inodecache(void) | |||
391 | 391 | ||
392 | static void destroy_inodecache(void) | 392 | static void destroy_inodecache(void) |
393 | { | 393 | { |
394 | /* | ||
395 | * Make sure all delayed rcu free inodes are flushed before we | ||
396 | * destroy cache. | ||
397 | */ | ||
398 | rcu_barrier(); | ||
394 | kmem_cache_destroy(qnx4_inode_cachep); | 399 | kmem_cache_destroy(qnx4_inode_cachep); |
395 | } | 400 | } |
396 | 401 | ||
diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c index 2049c814bda4..b6addf560483 100644 --- a/fs/qnx6/inode.c +++ b/fs/qnx6/inode.c | |||
@@ -574,8 +574,8 @@ struct inode *qnx6_iget(struct super_block *sb, unsigned ino) | |||
574 | raw_inode = ((struct qnx6_inode_entry *)page_address(page)) + offs; | 574 | raw_inode = ((struct qnx6_inode_entry *)page_address(page)) + offs; |
575 | 575 | ||
576 | inode->i_mode = fs16_to_cpu(sbi, raw_inode->di_mode); | 576 | inode->i_mode = fs16_to_cpu(sbi, raw_inode->di_mode); |
577 | inode->i_uid = (uid_t)fs32_to_cpu(sbi, raw_inode->di_uid); | 577 | i_uid_write(inode, (uid_t)fs32_to_cpu(sbi, raw_inode->di_uid)); |
578 | inode->i_gid = (gid_t)fs32_to_cpu(sbi, raw_inode->di_gid); | 578 | i_gid_write(inode, (gid_t)fs32_to_cpu(sbi, raw_inode->di_gid)); |
579 | inode->i_size = fs64_to_cpu(sbi, raw_inode->di_size); | 579 | inode->i_size = fs64_to_cpu(sbi, raw_inode->di_size); |
580 | inode->i_mtime.tv_sec = fs32_to_cpu(sbi, raw_inode->di_mtime); | 580 | inode->i_mtime.tv_sec = fs32_to_cpu(sbi, raw_inode->di_mtime); |
581 | inode->i_mtime.tv_nsec = 0; | 581 | inode->i_mtime.tv_nsec = 0; |
@@ -651,6 +651,11 @@ static int init_inodecache(void) | |||
651 | 651 | ||
652 | static void destroy_inodecache(void) | 652 | static void destroy_inodecache(void) |
653 | { | 653 | { |
654 | /* | ||
655 | * Make sure all delayed rcu free inodes are flushed before we | ||
656 | * destroy cache. | ||
657 | */ | ||
658 | rcu_barrier(); | ||
654 | kmem_cache_destroy(qnx6_inode_cachep); | 659 | kmem_cache_destroy(qnx6_inode_cachep); |
655 | } | 660 | } |
656 | 661 | ||
diff --git a/fs/quota/Makefile b/fs/quota/Makefile index 5f9e9e276af0..c66c37cdaa39 100644 --- a/fs/quota/Makefile +++ b/fs/quota/Makefile | |||
@@ -2,6 +2,6 @@ obj-$(CONFIG_QUOTA) += dquot.o | |||
2 | obj-$(CONFIG_QFMT_V1) += quota_v1.o | 2 | obj-$(CONFIG_QFMT_V1) += quota_v1.o |
3 | obj-$(CONFIG_QFMT_V2) += quota_v2.o | 3 | obj-$(CONFIG_QFMT_V2) += quota_v2.o |
4 | obj-$(CONFIG_QUOTA_TREE) += quota_tree.o | 4 | obj-$(CONFIG_QUOTA_TREE) += quota_tree.o |
5 | obj-$(CONFIG_QUOTACTL) += quota.o | 5 | obj-$(CONFIG_QUOTACTL) += quota.o kqid.o |
6 | obj-$(CONFIG_QUOTACTL_COMPAT) += compat.o | 6 | obj-$(CONFIG_QUOTACTL_COMPAT) += compat.o |
7 | obj-$(CONFIG_QUOTA_NETLINK_INTERFACE) += netlink.o | 7 | obj-$(CONFIG_QUOTA_NETLINK_INTERFACE) += netlink.o |
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 36a29b753c79..557a9c20a215 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c | |||
@@ -253,8 +253,10 @@ static qsize_t inode_get_rsv_space(struct inode *inode); | |||
253 | static void __dquot_initialize(struct inode *inode, int type); | 253 | static void __dquot_initialize(struct inode *inode, int type); |
254 | 254 | ||
255 | static inline unsigned int | 255 | static inline unsigned int |
256 | hashfn(const struct super_block *sb, unsigned int id, int type) | 256 | hashfn(const struct super_block *sb, struct kqid qid) |
257 | { | 257 | { |
258 | unsigned int id = from_kqid(&init_user_ns, qid); | ||
259 | int type = qid.type; | ||
258 | unsigned long tmp; | 260 | unsigned long tmp; |
259 | 261 | ||
260 | tmp = (((unsigned long)sb>>L1_CACHE_SHIFT) ^ id) * (MAXQUOTAS - type); | 262 | tmp = (((unsigned long)sb>>L1_CACHE_SHIFT) ^ id) * (MAXQUOTAS - type); |
@@ -267,7 +269,7 @@ hashfn(const struct super_block *sb, unsigned int id, int type) | |||
267 | static inline void insert_dquot_hash(struct dquot *dquot) | 269 | static inline void insert_dquot_hash(struct dquot *dquot) |
268 | { | 270 | { |
269 | struct hlist_head *head; | 271 | struct hlist_head *head; |
270 | head = dquot_hash + hashfn(dquot->dq_sb, dquot->dq_id, dquot->dq_type); | 272 | head = dquot_hash + hashfn(dquot->dq_sb, dquot->dq_id); |
271 | hlist_add_head(&dquot->dq_hash, head); | 273 | hlist_add_head(&dquot->dq_hash, head); |
272 | } | 274 | } |
273 | 275 | ||
@@ -277,15 +279,14 @@ static inline void remove_dquot_hash(struct dquot *dquot) | |||
277 | } | 279 | } |
278 | 280 | ||
279 | static struct dquot *find_dquot(unsigned int hashent, struct super_block *sb, | 281 | static struct dquot *find_dquot(unsigned int hashent, struct super_block *sb, |
280 | unsigned int id, int type) | 282 | struct kqid qid) |
281 | { | 283 | { |
282 | struct hlist_node *node; | 284 | struct hlist_node *node; |
283 | struct dquot *dquot; | 285 | struct dquot *dquot; |
284 | 286 | ||
285 | hlist_for_each (node, dquot_hash+hashent) { | 287 | hlist_for_each (node, dquot_hash+hashent) { |
286 | dquot = hlist_entry(node, struct dquot, dq_hash); | 288 | dquot = hlist_entry(node, struct dquot, dq_hash); |
287 | if (dquot->dq_sb == sb && dquot->dq_id == id && | 289 | if (dquot->dq_sb == sb && qid_eq(dquot->dq_id, qid)) |
288 | dquot->dq_type == type) | ||
289 | return dquot; | 290 | return dquot; |
290 | } | 291 | } |
291 | return NULL; | 292 | return NULL; |
@@ -351,7 +352,7 @@ int dquot_mark_dquot_dirty(struct dquot *dquot) | |||
351 | spin_lock(&dq_list_lock); | 352 | spin_lock(&dq_list_lock); |
352 | if (!test_and_set_bit(DQ_MOD_B, &dquot->dq_flags)) { | 353 | if (!test_and_set_bit(DQ_MOD_B, &dquot->dq_flags)) { |
353 | list_add(&dquot->dq_dirty, &sb_dqopt(dquot->dq_sb)-> | 354 | list_add(&dquot->dq_dirty, &sb_dqopt(dquot->dq_sb)-> |
354 | info[dquot->dq_type].dqi_dirty_list); | 355 | info[dquot->dq_id.type].dqi_dirty_list); |
355 | ret = 0; | 356 | ret = 0; |
356 | } | 357 | } |
357 | spin_unlock(&dq_list_lock); | 358 | spin_unlock(&dq_list_lock); |
@@ -410,17 +411,17 @@ int dquot_acquire(struct dquot *dquot) | |||
410 | mutex_lock(&dquot->dq_lock); | 411 | mutex_lock(&dquot->dq_lock); |
411 | mutex_lock(&dqopt->dqio_mutex); | 412 | mutex_lock(&dqopt->dqio_mutex); |
412 | if (!test_bit(DQ_READ_B, &dquot->dq_flags)) | 413 | if (!test_bit(DQ_READ_B, &dquot->dq_flags)) |
413 | ret = dqopt->ops[dquot->dq_type]->read_dqblk(dquot); | 414 | ret = dqopt->ops[dquot->dq_id.type]->read_dqblk(dquot); |
414 | if (ret < 0) | 415 | if (ret < 0) |
415 | goto out_iolock; | 416 | goto out_iolock; |
416 | set_bit(DQ_READ_B, &dquot->dq_flags); | 417 | set_bit(DQ_READ_B, &dquot->dq_flags); |
417 | /* Instantiate dquot if needed */ | 418 | /* Instantiate dquot if needed */ |
418 | if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags) && !dquot->dq_off) { | 419 | if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags) && !dquot->dq_off) { |
419 | ret = dqopt->ops[dquot->dq_type]->commit_dqblk(dquot); | 420 | ret = dqopt->ops[dquot->dq_id.type]->commit_dqblk(dquot); |
420 | /* Write the info if needed */ | 421 | /* Write the info if needed */ |
421 | if (info_dirty(&dqopt->info[dquot->dq_type])) { | 422 | if (info_dirty(&dqopt->info[dquot->dq_id.type])) { |
422 | ret2 = dqopt->ops[dquot->dq_type]->write_file_info( | 423 | ret2 = dqopt->ops[dquot->dq_id.type]->write_file_info( |
423 | dquot->dq_sb, dquot->dq_type); | 424 | dquot->dq_sb, dquot->dq_id.type); |
424 | } | 425 | } |
425 | if (ret < 0) | 426 | if (ret < 0) |
426 | goto out_iolock; | 427 | goto out_iolock; |
@@ -455,7 +456,7 @@ int dquot_commit(struct dquot *dquot) | |||
455 | /* Inactive dquot can be only if there was error during read/init | 456 | /* Inactive dquot can be only if there was error during read/init |
456 | * => we have better not writing it */ | 457 | * => we have better not writing it */ |
457 | if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) | 458 | if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) |
458 | ret = dqopt->ops[dquot->dq_type]->commit_dqblk(dquot); | 459 | ret = dqopt->ops[dquot->dq_id.type]->commit_dqblk(dquot); |
459 | else | 460 | else |
460 | ret = -EIO; | 461 | ret = -EIO; |
461 | out_sem: | 462 | out_sem: |
@@ -477,12 +478,12 @@ int dquot_release(struct dquot *dquot) | |||
477 | if (atomic_read(&dquot->dq_count) > 1) | 478 | if (atomic_read(&dquot->dq_count) > 1) |
478 | goto out_dqlock; | 479 | goto out_dqlock; |
479 | mutex_lock(&dqopt->dqio_mutex); | 480 | mutex_lock(&dqopt->dqio_mutex); |
480 | if (dqopt->ops[dquot->dq_type]->release_dqblk) { | 481 | if (dqopt->ops[dquot->dq_id.type]->release_dqblk) { |
481 | ret = dqopt->ops[dquot->dq_type]->release_dqblk(dquot); | 482 | ret = dqopt->ops[dquot->dq_id.type]->release_dqblk(dquot); |
482 | /* Write the info */ | 483 | /* Write the info */ |
483 | if (info_dirty(&dqopt->info[dquot->dq_type])) { | 484 | if (info_dirty(&dqopt->info[dquot->dq_id.type])) { |
484 | ret2 = dqopt->ops[dquot->dq_type]->write_file_info( | 485 | ret2 = dqopt->ops[dquot->dq_id.type]->write_file_info( |
485 | dquot->dq_sb, dquot->dq_type); | 486 | dquot->dq_sb, dquot->dq_id.type); |
486 | } | 487 | } |
487 | if (ret >= 0) | 488 | if (ret >= 0) |
488 | ret = ret2; | 489 | ret = ret2; |
@@ -521,7 +522,7 @@ restart: | |||
521 | list_for_each_entry_safe(dquot, tmp, &inuse_list, dq_inuse) { | 522 | list_for_each_entry_safe(dquot, tmp, &inuse_list, dq_inuse) { |
522 | if (dquot->dq_sb != sb) | 523 | if (dquot->dq_sb != sb) |
523 | continue; | 524 | continue; |
524 | if (dquot->dq_type != type) | 525 | if (dquot->dq_id.type != type) |
525 | continue; | 526 | continue; |
526 | /* Wait for dquot users */ | 527 | /* Wait for dquot users */ |
527 | if (atomic_read(&dquot->dq_count)) { | 528 | if (atomic_read(&dquot->dq_count)) { |
@@ -741,7 +742,8 @@ void dqput(struct dquot *dquot) | |||
741 | #ifdef CONFIG_QUOTA_DEBUG | 742 | #ifdef CONFIG_QUOTA_DEBUG |
742 | if (!atomic_read(&dquot->dq_count)) { | 743 | if (!atomic_read(&dquot->dq_count)) { |
743 | quota_error(dquot->dq_sb, "trying to free free dquot of %s %d", | 744 | quota_error(dquot->dq_sb, "trying to free free dquot of %s %d", |
744 | quotatypes[dquot->dq_type], dquot->dq_id); | 745 | quotatypes[dquot->dq_id.type], |
746 | from_kqid(&init_user_ns, dquot->dq_id)); | ||
745 | BUG(); | 747 | BUG(); |
746 | } | 748 | } |
747 | #endif | 749 | #endif |
@@ -752,7 +754,7 @@ we_slept: | |||
752 | /* We have more than one user... nothing to do */ | 754 | /* We have more than one user... nothing to do */ |
753 | atomic_dec(&dquot->dq_count); | 755 | atomic_dec(&dquot->dq_count); |
754 | /* Releasing dquot during quotaoff phase? */ | 756 | /* Releasing dquot during quotaoff phase? */ |
755 | if (!sb_has_quota_active(dquot->dq_sb, dquot->dq_type) && | 757 | if (!sb_has_quota_active(dquot->dq_sb, dquot->dq_id.type) && |
756 | atomic_read(&dquot->dq_count) == 1) | 758 | atomic_read(&dquot->dq_count) == 1) |
757 | wake_up(&dquot->dq_wait_unused); | 759 | wake_up(&dquot->dq_wait_unused); |
758 | spin_unlock(&dq_list_lock); | 760 | spin_unlock(&dq_list_lock); |
@@ -815,7 +817,7 @@ static struct dquot *get_empty_dquot(struct super_block *sb, int type) | |||
815 | INIT_LIST_HEAD(&dquot->dq_dirty); | 817 | INIT_LIST_HEAD(&dquot->dq_dirty); |
816 | init_waitqueue_head(&dquot->dq_wait_unused); | 818 | init_waitqueue_head(&dquot->dq_wait_unused); |
817 | dquot->dq_sb = sb; | 819 | dquot->dq_sb = sb; |
818 | dquot->dq_type = type; | 820 | dquot->dq_id = make_kqid_invalid(type); |
819 | atomic_set(&dquot->dq_count, 1); | 821 | atomic_set(&dquot->dq_count, 1); |
820 | 822 | ||
821 | return dquot; | 823 | return dquot; |
@@ -829,35 +831,35 @@ static struct dquot *get_empty_dquot(struct super_block *sb, int type) | |||
829 | * a) checking for quota flags under dq_list_lock and | 831 | * a) checking for quota flags under dq_list_lock and |
830 | * b) getting a reference to dquot before we release dq_list_lock | 832 | * b) getting a reference to dquot before we release dq_list_lock |
831 | */ | 833 | */ |
832 | struct dquot *dqget(struct super_block *sb, unsigned int id, int type) | 834 | struct dquot *dqget(struct super_block *sb, struct kqid qid) |
833 | { | 835 | { |
834 | unsigned int hashent = hashfn(sb, id, type); | 836 | unsigned int hashent = hashfn(sb, qid); |
835 | struct dquot *dquot = NULL, *empty = NULL; | 837 | struct dquot *dquot = NULL, *empty = NULL; |
836 | 838 | ||
837 | if (!sb_has_quota_active(sb, type)) | 839 | if (!sb_has_quota_active(sb, qid.type)) |
838 | return NULL; | 840 | return NULL; |
839 | we_slept: | 841 | we_slept: |
840 | spin_lock(&dq_list_lock); | 842 | spin_lock(&dq_list_lock); |
841 | spin_lock(&dq_state_lock); | 843 | spin_lock(&dq_state_lock); |
842 | if (!sb_has_quota_active(sb, type)) { | 844 | if (!sb_has_quota_active(sb, qid.type)) { |
843 | spin_unlock(&dq_state_lock); | 845 | spin_unlock(&dq_state_lock); |
844 | spin_unlock(&dq_list_lock); | 846 | spin_unlock(&dq_list_lock); |
845 | goto out; | 847 | goto out; |
846 | } | 848 | } |
847 | spin_unlock(&dq_state_lock); | 849 | spin_unlock(&dq_state_lock); |
848 | 850 | ||
849 | dquot = find_dquot(hashent, sb, id, type); | 851 | dquot = find_dquot(hashent, sb, qid); |
850 | if (!dquot) { | 852 | if (!dquot) { |
851 | if (!empty) { | 853 | if (!empty) { |
852 | spin_unlock(&dq_list_lock); | 854 | spin_unlock(&dq_list_lock); |
853 | empty = get_empty_dquot(sb, type); | 855 | empty = get_empty_dquot(sb, qid.type); |
854 | if (!empty) | 856 | if (!empty) |
855 | schedule(); /* Try to wait for a moment... */ | 857 | schedule(); /* Try to wait for a moment... */ |
856 | goto we_slept; | 858 | goto we_slept; |
857 | } | 859 | } |
858 | dquot = empty; | 860 | dquot = empty; |
859 | empty = NULL; | 861 | empty = NULL; |
860 | dquot->dq_id = id; | 862 | dquot->dq_id = qid; |
861 | /* all dquots go on the inuse_list */ | 863 | /* all dquots go on the inuse_list */ |
862 | put_inuse(dquot); | 864 | put_inuse(dquot); |
863 | /* hash it first so it can be found */ | 865 | /* hash it first so it can be found */ |
@@ -1129,8 +1131,7 @@ static void dquot_decr_space(struct dquot *dquot, qsize_t number) | |||
1129 | 1131 | ||
1130 | struct dquot_warn { | 1132 | struct dquot_warn { |
1131 | struct super_block *w_sb; | 1133 | struct super_block *w_sb; |
1132 | qid_t w_dq_id; | 1134 | struct kqid w_dq_id; |
1133 | short w_dq_type; | ||
1134 | short w_type; | 1135 | short w_type; |
1135 | }; | 1136 | }; |
1136 | 1137 | ||
@@ -1154,11 +1155,11 @@ static int need_print_warning(struct dquot_warn *warn) | |||
1154 | if (!flag_print_warnings) | 1155 | if (!flag_print_warnings) |
1155 | return 0; | 1156 | return 0; |
1156 | 1157 | ||
1157 | switch (warn->w_dq_type) { | 1158 | switch (warn->w_dq_id.type) { |
1158 | case USRQUOTA: | 1159 | case USRQUOTA: |
1159 | return current_fsuid() == warn->w_dq_id; | 1160 | return uid_eq(current_fsuid(), warn->w_dq_id.uid); |
1160 | case GRPQUOTA: | 1161 | case GRPQUOTA: |
1161 | return in_group_p(warn->w_dq_id); | 1162 | return in_group_p(warn->w_dq_id.gid); |
1162 | } | 1163 | } |
1163 | return 0; | 1164 | return 0; |
1164 | } | 1165 | } |
@@ -1184,7 +1185,7 @@ static void print_warning(struct dquot_warn *warn) | |||
1184 | tty_write_message(tty, ": warning, "); | 1185 | tty_write_message(tty, ": warning, "); |
1185 | else | 1186 | else |
1186 | tty_write_message(tty, ": write failed, "); | 1187 | tty_write_message(tty, ": write failed, "); |
1187 | tty_write_message(tty, quotatypes[warn->w_dq_type]); | 1188 | tty_write_message(tty, quotatypes[warn->w_dq_id.type]); |
1188 | switch (warntype) { | 1189 | switch (warntype) { |
1189 | case QUOTA_NL_IHARDWARN: | 1190 | case QUOTA_NL_IHARDWARN: |
1190 | msg = " file limit reached.\r\n"; | 1191 | msg = " file limit reached.\r\n"; |
@@ -1218,7 +1219,6 @@ static void prepare_warning(struct dquot_warn *warn, struct dquot *dquot, | |||
1218 | warn->w_type = warntype; | 1219 | warn->w_type = warntype; |
1219 | warn->w_sb = dquot->dq_sb; | 1220 | warn->w_sb = dquot->dq_sb; |
1220 | warn->w_dq_id = dquot->dq_id; | 1221 | warn->w_dq_id = dquot->dq_id; |
1221 | warn->w_dq_type = dquot->dq_type; | ||
1222 | } | 1222 | } |
1223 | 1223 | ||
1224 | /* | 1224 | /* |
@@ -1236,14 +1236,14 @@ static void flush_warnings(struct dquot_warn *warn) | |||
1236 | #ifdef CONFIG_PRINT_QUOTA_WARNING | 1236 | #ifdef CONFIG_PRINT_QUOTA_WARNING |
1237 | print_warning(&warn[i]); | 1237 | print_warning(&warn[i]); |
1238 | #endif | 1238 | #endif |
1239 | quota_send_warning(warn[i].w_dq_type, warn[i].w_dq_id, | 1239 | quota_send_warning(warn[i].w_dq_id, |
1240 | warn[i].w_sb->s_dev, warn[i].w_type); | 1240 | warn[i].w_sb->s_dev, warn[i].w_type); |
1241 | } | 1241 | } |
1242 | } | 1242 | } |
1243 | 1243 | ||
1244 | static int ignore_hardlimit(struct dquot *dquot) | 1244 | static int ignore_hardlimit(struct dquot *dquot) |
1245 | { | 1245 | { |
1246 | struct mem_dqinfo *info = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_type]; | 1246 | struct mem_dqinfo *info = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_id.type]; |
1247 | 1247 | ||
1248 | return capable(CAP_SYS_RESOURCE) && | 1248 | return capable(CAP_SYS_RESOURCE) && |
1249 | (info->dqi_format->qf_fmt_id != QFMT_VFS_OLD || | 1249 | (info->dqi_format->qf_fmt_id != QFMT_VFS_OLD || |
@@ -1256,7 +1256,7 @@ static int check_idq(struct dquot *dquot, qsize_t inodes, | |||
1256 | { | 1256 | { |
1257 | qsize_t newinodes = dquot->dq_dqb.dqb_curinodes + inodes; | 1257 | qsize_t newinodes = dquot->dq_dqb.dqb_curinodes + inodes; |
1258 | 1258 | ||
1259 | if (!sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_type) || | 1259 | if (!sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_id.type) || |
1260 | test_bit(DQ_FAKE_B, &dquot->dq_flags)) | 1260 | test_bit(DQ_FAKE_B, &dquot->dq_flags)) |
1261 | return 0; | 1261 | return 0; |
1262 | 1262 | ||
@@ -1281,7 +1281,7 @@ static int check_idq(struct dquot *dquot, qsize_t inodes, | |||
1281 | dquot->dq_dqb.dqb_itime == 0) { | 1281 | dquot->dq_dqb.dqb_itime == 0) { |
1282 | prepare_warning(warn, dquot, QUOTA_NL_ISOFTWARN); | 1282 | prepare_warning(warn, dquot, QUOTA_NL_ISOFTWARN); |
1283 | dquot->dq_dqb.dqb_itime = get_seconds() + | 1283 | dquot->dq_dqb.dqb_itime = get_seconds() + |
1284 | sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_igrace; | 1284 | sb_dqopt(dquot->dq_sb)->info[dquot->dq_id.type].dqi_igrace; |
1285 | } | 1285 | } |
1286 | 1286 | ||
1287 | return 0; | 1287 | return 0; |
@@ -1294,7 +1294,7 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, | |||
1294 | qsize_t tspace; | 1294 | qsize_t tspace; |
1295 | struct super_block *sb = dquot->dq_sb; | 1295 | struct super_block *sb = dquot->dq_sb; |
1296 | 1296 | ||
1297 | if (!sb_has_quota_limits_enabled(sb, dquot->dq_type) || | 1297 | if (!sb_has_quota_limits_enabled(sb, dquot->dq_id.type) || |
1298 | test_bit(DQ_FAKE_B, &dquot->dq_flags)) | 1298 | test_bit(DQ_FAKE_B, &dquot->dq_flags)) |
1299 | return 0; | 1299 | return 0; |
1300 | 1300 | ||
@@ -1325,7 +1325,7 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, | |||
1325 | if (!prealloc) { | 1325 | if (!prealloc) { |
1326 | prepare_warning(warn, dquot, QUOTA_NL_BSOFTWARN); | 1326 | prepare_warning(warn, dquot, QUOTA_NL_BSOFTWARN); |
1327 | dquot->dq_dqb.dqb_btime = get_seconds() + | 1327 | dquot->dq_dqb.dqb_btime = get_seconds() + |
1328 | sb_dqopt(sb)->info[dquot->dq_type].dqi_bgrace; | 1328 | sb_dqopt(sb)->info[dquot->dq_id.type].dqi_bgrace; |
1329 | } | 1329 | } |
1330 | else | 1330 | else |
1331 | /* | 1331 | /* |
@@ -1344,7 +1344,7 @@ static int info_idq_free(struct dquot *dquot, qsize_t inodes) | |||
1344 | 1344 | ||
1345 | if (test_bit(DQ_FAKE_B, &dquot->dq_flags) || | 1345 | if (test_bit(DQ_FAKE_B, &dquot->dq_flags) || |
1346 | dquot->dq_dqb.dqb_curinodes <= dquot->dq_dqb.dqb_isoftlimit || | 1346 | dquot->dq_dqb.dqb_curinodes <= dquot->dq_dqb.dqb_isoftlimit || |
1347 | !sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_type)) | 1347 | !sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_id.type)) |
1348 | return QUOTA_NL_NOWARN; | 1348 | return QUOTA_NL_NOWARN; |
1349 | 1349 | ||
1350 | newinodes = dquot->dq_dqb.dqb_curinodes - inodes; | 1350 | newinodes = dquot->dq_dqb.dqb_curinodes - inodes; |
@@ -1390,7 +1390,6 @@ static int dquot_active(const struct inode *inode) | |||
1390 | */ | 1390 | */ |
1391 | static void __dquot_initialize(struct inode *inode, int type) | 1391 | static void __dquot_initialize(struct inode *inode, int type) |
1392 | { | 1392 | { |
1393 | unsigned int id = 0; | ||
1394 | int cnt; | 1393 | int cnt; |
1395 | struct dquot *got[MAXQUOTAS]; | 1394 | struct dquot *got[MAXQUOTAS]; |
1396 | struct super_block *sb = inode->i_sb; | 1395 | struct super_block *sb = inode->i_sb; |
@@ -1403,18 +1402,19 @@ static void __dquot_initialize(struct inode *inode, int type) | |||
1403 | 1402 | ||
1404 | /* First get references to structures we might need. */ | 1403 | /* First get references to structures we might need. */ |
1405 | for (cnt = 0; cnt < MAXQUOTAS; cnt++) { | 1404 | for (cnt = 0; cnt < MAXQUOTAS; cnt++) { |
1405 | struct kqid qid; | ||
1406 | got[cnt] = NULL; | 1406 | got[cnt] = NULL; |
1407 | if (type != -1 && cnt != type) | 1407 | if (type != -1 && cnt != type) |
1408 | continue; | 1408 | continue; |
1409 | switch (cnt) { | 1409 | switch (cnt) { |
1410 | case USRQUOTA: | 1410 | case USRQUOTA: |
1411 | id = inode->i_uid; | 1411 | qid = make_kqid_uid(inode->i_uid); |
1412 | break; | 1412 | break; |
1413 | case GRPQUOTA: | 1413 | case GRPQUOTA: |
1414 | id = inode->i_gid; | 1414 | qid = make_kqid_gid(inode->i_gid); |
1415 | break; | 1415 | break; |
1416 | } | 1416 | } |
1417 | got[cnt] = dqget(sb, id, cnt); | 1417 | got[cnt] = dqget(sb, qid); |
1418 | } | 1418 | } |
1419 | 1419 | ||
1420 | down_write(&sb_dqopt(sb)->dqptr_sem); | 1420 | down_write(&sb_dqopt(sb)->dqptr_sem); |
@@ -1589,10 +1589,10 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags) | |||
1589 | goto out; | 1589 | goto out; |
1590 | } | 1590 | } |
1591 | 1591 | ||
1592 | down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); | ||
1593 | for (cnt = 0; cnt < MAXQUOTAS; cnt++) | 1592 | for (cnt = 0; cnt < MAXQUOTAS; cnt++) |
1594 | warn[cnt].w_type = QUOTA_NL_NOWARN; | 1593 | warn[cnt].w_type = QUOTA_NL_NOWARN; |
1595 | 1594 | ||
1595 | down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); | ||
1596 | spin_lock(&dq_data_lock); | 1596 | spin_lock(&dq_data_lock); |
1597 | for (cnt = 0; cnt < MAXQUOTAS; cnt++) { | 1597 | for (cnt = 0; cnt < MAXQUOTAS; cnt++) { |
1598 | if (!dquots[cnt]) | 1598 | if (!dquots[cnt]) |
@@ -1897,10 +1897,10 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr) | |||
1897 | if (!dquot_active(inode)) | 1897 | if (!dquot_active(inode)) |
1898 | return 0; | 1898 | return 0; |
1899 | 1899 | ||
1900 | if (iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) | 1900 | if (iattr->ia_valid & ATTR_UID && !uid_eq(iattr->ia_uid, inode->i_uid)) |
1901 | transfer_to[USRQUOTA] = dqget(sb, iattr->ia_uid, USRQUOTA); | 1901 | transfer_to[USRQUOTA] = dqget(sb, make_kqid_uid(iattr->ia_uid)); |
1902 | if (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid) | 1902 | if (iattr->ia_valid & ATTR_GID && !gid_eq(iattr->ia_gid, inode->i_gid)) |
1903 | transfer_to[GRPQUOTA] = dqget(sb, iattr->ia_gid, GRPQUOTA); | 1903 | transfer_to[GRPQUOTA] = dqget(sb, make_kqid_gid(iattr->ia_gid)); |
1904 | 1904 | ||
1905 | ret = __dquot_transfer(inode, transfer_to); | 1905 | ret = __dquot_transfer(inode, transfer_to); |
1906 | dqput_all(transfer_to); | 1906 | dqput_all(transfer_to); |
@@ -2360,9 +2360,9 @@ static void do_get_dqblk(struct dquot *dquot, struct fs_disk_quota *di) | |||
2360 | 2360 | ||
2361 | memset(di, 0, sizeof(*di)); | 2361 | memset(di, 0, sizeof(*di)); |
2362 | di->d_version = FS_DQUOT_VERSION; | 2362 | di->d_version = FS_DQUOT_VERSION; |
2363 | di->d_flags = dquot->dq_type == USRQUOTA ? | 2363 | di->d_flags = dquot->dq_id.type == USRQUOTA ? |
2364 | FS_USER_QUOTA : FS_GROUP_QUOTA; | 2364 | FS_USER_QUOTA : FS_GROUP_QUOTA; |
2365 | di->d_id = dquot->dq_id; | 2365 | di->d_id = from_kqid_munged(current_user_ns(), dquot->dq_id); |
2366 | 2366 | ||
2367 | spin_lock(&dq_data_lock); | 2367 | spin_lock(&dq_data_lock); |
2368 | di->d_blk_hardlimit = stoqb(dm->dqb_bhardlimit); | 2368 | di->d_blk_hardlimit = stoqb(dm->dqb_bhardlimit); |
@@ -2376,12 +2376,12 @@ static void do_get_dqblk(struct dquot *dquot, struct fs_disk_quota *di) | |||
2376 | spin_unlock(&dq_data_lock); | 2376 | spin_unlock(&dq_data_lock); |
2377 | } | 2377 | } |
2378 | 2378 | ||
2379 | int dquot_get_dqblk(struct super_block *sb, int type, qid_t id, | 2379 | int dquot_get_dqblk(struct super_block *sb, struct kqid qid, |
2380 | struct fs_disk_quota *di) | 2380 | struct fs_disk_quota *di) |
2381 | { | 2381 | { |
2382 | struct dquot *dquot; | 2382 | struct dquot *dquot; |
2383 | 2383 | ||
2384 | dquot = dqget(sb, id, type); | 2384 | dquot = dqget(sb, qid); |
2385 | if (!dquot) | 2385 | if (!dquot) |
2386 | return -ESRCH; | 2386 | return -ESRCH; |
2387 | do_get_dqblk(dquot, di); | 2387 | do_get_dqblk(dquot, di); |
@@ -2401,7 +2401,7 @@ static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di) | |||
2401 | { | 2401 | { |
2402 | struct mem_dqblk *dm = &dquot->dq_dqb; | 2402 | struct mem_dqblk *dm = &dquot->dq_dqb; |
2403 | int check_blim = 0, check_ilim = 0; | 2403 | int check_blim = 0, check_ilim = 0; |
2404 | struct mem_dqinfo *dqi = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_type]; | 2404 | struct mem_dqinfo *dqi = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_id.type]; |
2405 | 2405 | ||
2406 | if (di->d_fieldmask & ~VFS_FS_DQ_MASK) | 2406 | if (di->d_fieldmask & ~VFS_FS_DQ_MASK) |
2407 | return -EINVAL; | 2407 | return -EINVAL; |
@@ -2488,13 +2488,13 @@ static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di) | |||
2488 | return 0; | 2488 | return 0; |
2489 | } | 2489 | } |
2490 | 2490 | ||
2491 | int dquot_set_dqblk(struct super_block *sb, int type, qid_t id, | 2491 | int dquot_set_dqblk(struct super_block *sb, struct kqid qid, |
2492 | struct fs_disk_quota *di) | 2492 | struct fs_disk_quota *di) |
2493 | { | 2493 | { |
2494 | struct dquot *dquot; | 2494 | struct dquot *dquot; |
2495 | int rc; | 2495 | int rc; |
2496 | 2496 | ||
2497 | dquot = dqget(sb, id, type); | 2497 | dquot = dqget(sb, qid); |
2498 | if (!dquot) { | 2498 | if (!dquot) { |
2499 | rc = -ESRCH; | 2499 | rc = -ESRCH; |
2500 | goto out; | 2500 | goto out; |
diff --git a/fs/quota/kqid.c b/fs/quota/kqid.c new file mode 100644 index 000000000000..2f97b0e2c501 --- /dev/null +++ b/fs/quota/kqid.c | |||
@@ -0,0 +1,132 @@ | |||
1 | #include <linux/fs.h> | ||
2 | #include <linux/quota.h> | ||
3 | #include <linux/export.h> | ||
4 | |||
5 | /** | ||
6 | * qid_eq - Test to see if to kquid values are the same | ||
7 | * @left: A qid value | ||
8 | * @right: Another quid value | ||
9 | * | ||
10 | * Return true if the two qid values are equal and false otherwise. | ||
11 | */ | ||
12 | bool qid_eq(struct kqid left, struct kqid right) | ||
13 | { | ||
14 | if (left.type != right.type) | ||
15 | return false; | ||
16 | switch(left.type) { | ||
17 | case USRQUOTA: | ||
18 | return uid_eq(left.uid, right.uid); | ||
19 | case GRPQUOTA: | ||
20 | return gid_eq(left.gid, right.gid); | ||
21 | case PRJQUOTA: | ||
22 | return projid_eq(left.projid, right.projid); | ||
23 | default: | ||
24 | BUG(); | ||
25 | } | ||
26 | } | ||
27 | EXPORT_SYMBOL(qid_eq); | ||
28 | |||
29 | /** | ||
30 | * qid_lt - Test to see if one qid value is less than another | ||
31 | * @left: The possibly lesser qid value | ||
32 | * @right: The possibly greater qid value | ||
33 | * | ||
34 | * Return true if left is less than right and false otherwise. | ||
35 | */ | ||
36 | bool qid_lt(struct kqid left, struct kqid right) | ||
37 | { | ||
38 | if (left.type < right.type) | ||
39 | return true; | ||
40 | if (left.type > right.type) | ||
41 | return false; | ||
42 | switch (left.type) { | ||
43 | case USRQUOTA: | ||
44 | return uid_lt(left.uid, right.uid); | ||
45 | case GRPQUOTA: | ||
46 | return gid_lt(left.gid, right.gid); | ||
47 | case PRJQUOTA: | ||
48 | return projid_lt(left.projid, right.projid); | ||
49 | default: | ||
50 | BUG(); | ||
51 | } | ||
52 | } | ||
53 | EXPORT_SYMBOL(qid_lt); | ||
54 | |||
55 | /** | ||
56 | * from_kqid - Create a qid from a kqid user-namespace pair. | ||
57 | * @targ: The user namespace we want a qid in. | ||
58 | * @kuid: The kernel internal quota identifier to start with. | ||
59 | * | ||
60 | * Map @kqid into the user-namespace specified by @targ and | ||
61 | * return the resulting qid. | ||
62 | * | ||
63 | * There is always a mapping into the initial user_namespace. | ||
64 | * | ||
65 | * If @kqid has no mapping in @targ (qid_t)-1 is returned. | ||
66 | */ | ||
67 | qid_t from_kqid(struct user_namespace *targ, struct kqid kqid) | ||
68 | { | ||
69 | switch (kqid.type) { | ||
70 | case USRQUOTA: | ||
71 | return from_kuid(targ, kqid.uid); | ||
72 | case GRPQUOTA: | ||
73 | return from_kgid(targ, kqid.gid); | ||
74 | case PRJQUOTA: | ||
75 | return from_kprojid(targ, kqid.projid); | ||
76 | default: | ||
77 | BUG(); | ||
78 | } | ||
79 | } | ||
80 | EXPORT_SYMBOL(from_kqid); | ||
81 | |||
82 | /** | ||
83 | * from_kqid_munged - Create a qid from a kqid user-namespace pair. | ||
84 | * @targ: The user namespace we want a qid in. | ||
85 | * @kqid: The kernel internal quota identifier to start with. | ||
86 | * | ||
87 | * Map @kqid into the user-namespace specified by @targ and | ||
88 | * return the resulting qid. | ||
89 | * | ||
90 | * There is always a mapping into the initial user_namespace. | ||
91 | * | ||
92 | * Unlike from_kqid from_kqid_munged never fails and always | ||
93 | * returns a valid projid. This makes from_kqid_munged | ||
94 | * appropriate for use in places where failing to provide | ||
95 | * a qid_t is not a good option. | ||
96 | * | ||
97 | * If @kqid has no mapping in @targ the kqid.type specific | ||
98 | * overflow identifier is returned. | ||
99 | */ | ||
100 | qid_t from_kqid_munged(struct user_namespace *targ, struct kqid kqid) | ||
101 | { | ||
102 | switch (kqid.type) { | ||
103 | case USRQUOTA: | ||
104 | return from_kuid_munged(targ, kqid.uid); | ||
105 | case GRPQUOTA: | ||
106 | return from_kgid_munged(targ, kqid.gid); | ||
107 | case PRJQUOTA: | ||
108 | return from_kprojid_munged(targ, kqid.projid); | ||
109 | default: | ||
110 | BUG(); | ||
111 | } | ||
112 | } | ||
113 | EXPORT_SYMBOL(from_kqid_munged); | ||
114 | |||
115 | /** | ||
116 | * qid_valid - Report if a valid value is stored in a kqid. | ||
117 | * @qid: The kernel internal quota identifier to test. | ||
118 | */ | ||
119 | bool qid_valid(struct kqid qid) | ||
120 | { | ||
121 | switch (qid.type) { | ||
122 | case USRQUOTA: | ||
123 | return uid_valid(qid.uid); | ||
124 | case GRPQUOTA: | ||
125 | return gid_valid(qid.gid); | ||
126 | case PRJQUOTA: | ||
127 | return projid_valid(qid.projid); | ||
128 | default: | ||
129 | BUG(); | ||
130 | } | ||
131 | } | ||
132 | EXPORT_SYMBOL(qid_valid); | ||
diff --git a/fs/quota/netlink.c b/fs/quota/netlink.c index d67908b407d9..16e8abb7709b 100644 --- a/fs/quota/netlink.c +++ b/fs/quota/netlink.c | |||
@@ -30,7 +30,7 @@ static struct genl_family quota_genl_family = { | |||
30 | * | 30 | * |
31 | */ | 31 | */ |
32 | 32 | ||
33 | void quota_send_warning(short type, unsigned int id, dev_t dev, | 33 | void quota_send_warning(struct kqid qid, dev_t dev, |
34 | const char warntype) | 34 | const char warntype) |
35 | { | 35 | { |
36 | static atomic_t seq; | 36 | static atomic_t seq; |
@@ -56,10 +56,11 @@ void quota_send_warning(short type, unsigned int id, dev_t dev, | |||
56 | "VFS: Cannot store netlink header in quota warning.\n"); | 56 | "VFS: Cannot store netlink header in quota warning.\n"); |
57 | goto err_out; | 57 | goto err_out; |
58 | } | 58 | } |
59 | ret = nla_put_u32(skb, QUOTA_NL_A_QTYPE, type); | 59 | ret = nla_put_u32(skb, QUOTA_NL_A_QTYPE, qid.type); |
60 | if (ret) | 60 | if (ret) |
61 | goto attr_err_out; | 61 | goto attr_err_out; |
62 | ret = nla_put_u64(skb, QUOTA_NL_A_EXCESS_ID, id); | 62 | ret = nla_put_u64(skb, QUOTA_NL_A_EXCESS_ID, |
63 | from_kqid_munged(&init_user_ns, qid)); | ||
63 | if (ret) | 64 | if (ret) |
64 | goto attr_err_out; | 65 | goto attr_err_out; |
65 | ret = nla_put_u32(skb, QUOTA_NL_A_WARNING, warntype); | 66 | ret = nla_put_u32(skb, QUOTA_NL_A_WARNING, warntype); |
@@ -71,7 +72,8 @@ void quota_send_warning(short type, unsigned int id, dev_t dev, | |||
71 | ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MINOR, MINOR(dev)); | 72 | ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MINOR, MINOR(dev)); |
72 | if (ret) | 73 | if (ret) |
73 | goto attr_err_out; | 74 | goto attr_err_out; |
74 | ret = nla_put_u64(skb, QUOTA_NL_A_CAUSED_ID, current_uid()); | 75 | ret = nla_put_u64(skb, QUOTA_NL_A_CAUSED_ID, |
76 | from_kuid_munged(&init_user_ns, current_uid())); | ||
75 | if (ret) | 77 | if (ret) |
76 | goto attr_err_out; | 78 | goto attr_err_out; |
77 | genlmsg_end(skb, msg_head); | 79 | genlmsg_end(skb, msg_head); |
diff --git a/fs/quota/quota.c b/fs/quota/quota.c index 6f155788cbc6..ff0135d6bc51 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c | |||
@@ -32,8 +32,8 @@ static int check_quotactl_permission(struct super_block *sb, int type, int cmd, | |||
32 | /* allow to query information for dquots we "own" */ | 32 | /* allow to query information for dquots we "own" */ |
33 | case Q_GETQUOTA: | 33 | case Q_GETQUOTA: |
34 | case Q_XGETQUOTA: | 34 | case Q_XGETQUOTA: |
35 | if ((type == USRQUOTA && current_euid() == id) || | 35 | if ((type == USRQUOTA && uid_eq(current_euid(), make_kuid(current_user_ns(), id))) || |
36 | (type == GRPQUOTA && in_egroup_p(id))) | 36 | (type == GRPQUOTA && in_egroup_p(make_kgid(current_user_ns(), id)))) |
37 | break; | 37 | break; |
38 | /*FALLTHROUGH*/ | 38 | /*FALLTHROUGH*/ |
39 | default: | 39 | default: |
@@ -130,13 +130,17 @@ static void copy_to_if_dqblk(struct if_dqblk *dst, struct fs_disk_quota *src) | |||
130 | static int quota_getquota(struct super_block *sb, int type, qid_t id, | 130 | static int quota_getquota(struct super_block *sb, int type, qid_t id, |
131 | void __user *addr) | 131 | void __user *addr) |
132 | { | 132 | { |
133 | struct kqid qid; | ||
133 | struct fs_disk_quota fdq; | 134 | struct fs_disk_quota fdq; |
134 | struct if_dqblk idq; | 135 | struct if_dqblk idq; |
135 | int ret; | 136 | int ret; |
136 | 137 | ||
137 | if (!sb->s_qcop->get_dqblk) | 138 | if (!sb->s_qcop->get_dqblk) |
138 | return -ENOSYS; | 139 | return -ENOSYS; |
139 | ret = sb->s_qcop->get_dqblk(sb, type, id, &fdq); | 140 | qid = make_kqid(current_user_ns(), type, id); |
141 | if (!qid_valid(qid)) | ||
142 | return -EINVAL; | ||
143 | ret = sb->s_qcop->get_dqblk(sb, qid, &fdq); | ||
140 | if (ret) | 144 | if (ret) |
141 | return ret; | 145 | return ret; |
142 | copy_to_if_dqblk(&idq, &fdq); | 146 | copy_to_if_dqblk(&idq, &fdq); |
@@ -176,13 +180,17 @@ static int quota_setquota(struct super_block *sb, int type, qid_t id, | |||
176 | { | 180 | { |
177 | struct fs_disk_quota fdq; | 181 | struct fs_disk_quota fdq; |
178 | struct if_dqblk idq; | 182 | struct if_dqblk idq; |
183 | struct kqid qid; | ||
179 | 184 | ||
180 | if (copy_from_user(&idq, addr, sizeof(idq))) | 185 | if (copy_from_user(&idq, addr, sizeof(idq))) |
181 | return -EFAULT; | 186 | return -EFAULT; |
182 | if (!sb->s_qcop->set_dqblk) | 187 | if (!sb->s_qcop->set_dqblk) |
183 | return -ENOSYS; | 188 | return -ENOSYS; |
189 | qid = make_kqid(current_user_ns(), type, id); | ||
190 | if (!qid_valid(qid)) | ||
191 | return -EINVAL; | ||
184 | copy_from_if_dqblk(&fdq, &idq); | 192 | copy_from_if_dqblk(&fdq, &idq); |
185 | return sb->s_qcop->set_dqblk(sb, type, id, &fdq); | 193 | return sb->s_qcop->set_dqblk(sb, qid, &fdq); |
186 | } | 194 | } |
187 | 195 | ||
188 | static int quota_setxstate(struct super_block *sb, int cmd, void __user *addr) | 196 | static int quota_setxstate(struct super_block *sb, int cmd, void __user *addr) |
@@ -213,23 +221,31 @@ static int quota_setxquota(struct super_block *sb, int type, qid_t id, | |||
213 | void __user *addr) | 221 | void __user *addr) |
214 | { | 222 | { |
215 | struct fs_disk_quota fdq; | 223 | struct fs_disk_quota fdq; |
224 | struct kqid qid; | ||
216 | 225 | ||
217 | if (copy_from_user(&fdq, addr, sizeof(fdq))) | 226 | if (copy_from_user(&fdq, addr, sizeof(fdq))) |
218 | return -EFAULT; | 227 | return -EFAULT; |
219 | if (!sb->s_qcop->set_dqblk) | 228 | if (!sb->s_qcop->set_dqblk) |
220 | return -ENOSYS; | 229 | return -ENOSYS; |
221 | return sb->s_qcop->set_dqblk(sb, type, id, &fdq); | 230 | qid = make_kqid(current_user_ns(), type, id); |
231 | if (!qid_valid(qid)) | ||
232 | return -EINVAL; | ||
233 | return sb->s_qcop->set_dqblk(sb, qid, &fdq); | ||
222 | } | 234 | } |
223 | 235 | ||
224 | static int quota_getxquota(struct super_block *sb, int type, qid_t id, | 236 | static int quota_getxquota(struct super_block *sb, int type, qid_t id, |
225 | void __user *addr) | 237 | void __user *addr) |
226 | { | 238 | { |
227 | struct fs_disk_quota fdq; | 239 | struct fs_disk_quota fdq; |
240 | struct kqid qid; | ||
228 | int ret; | 241 | int ret; |
229 | 242 | ||
230 | if (!sb->s_qcop->get_dqblk) | 243 | if (!sb->s_qcop->get_dqblk) |
231 | return -ENOSYS; | 244 | return -ENOSYS; |
232 | ret = sb->s_qcop->get_dqblk(sb, type, id, &fdq); | 245 | qid = make_kqid(current_user_ns(), type, id); |
246 | if (!qid_valid(qid)) | ||
247 | return -EINVAL; | ||
248 | ret = sb->s_qcop->get_dqblk(sb, qid, &fdq); | ||
233 | if (!ret && copy_to_user(addr, &fdq, sizeof(fdq))) | 249 | if (!ret && copy_to_user(addr, &fdq, sizeof(fdq))) |
234 | return -EFAULT; | 250 | return -EFAULT; |
235 | return ret; | 251 | return ret; |
diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c index e41c1becf096..d65877fbe8f4 100644 --- a/fs/quota/quota_tree.c +++ b/fs/quota/quota_tree.c | |||
@@ -22,9 +22,10 @@ MODULE_LICENSE("GPL"); | |||
22 | 22 | ||
23 | #define __QUOTA_QT_PARANOIA | 23 | #define __QUOTA_QT_PARANOIA |
24 | 24 | ||
25 | static int get_index(struct qtree_mem_dqinfo *info, qid_t id, int depth) | 25 | static int get_index(struct qtree_mem_dqinfo *info, struct kqid qid, int depth) |
26 | { | 26 | { |
27 | unsigned int epb = info->dqi_usable_bs >> 2; | 27 | unsigned int epb = info->dqi_usable_bs >> 2; |
28 | qid_t id = from_kqid(&init_user_ns, qid); | ||
28 | 29 | ||
29 | depth = info->dqi_qtree_depth - depth - 1; | 30 | depth = info->dqi_qtree_depth - depth - 1; |
30 | while (depth--) | 31 | while (depth--) |
@@ -244,7 +245,7 @@ static uint find_free_dqentry(struct qtree_mem_dqinfo *info, | |||
244 | /* This is enough as the block is already zeroed and the entry | 245 | /* This is enough as the block is already zeroed and the entry |
245 | * list is empty... */ | 246 | * list is empty... */ |
246 | info->dqi_free_entry = blk; | 247 | info->dqi_free_entry = blk; |
247 | mark_info_dirty(dquot->dq_sb, dquot->dq_type); | 248 | mark_info_dirty(dquot->dq_sb, dquot->dq_id.type); |
248 | } | 249 | } |
249 | /* Block will be full? */ | 250 | /* Block will be full? */ |
250 | if (le16_to_cpu(dh->dqdh_entries) + 1 >= qtree_dqstr_in_blk(info)) { | 251 | if (le16_to_cpu(dh->dqdh_entries) + 1 >= qtree_dqstr_in_blk(info)) { |
@@ -357,7 +358,7 @@ static inline int dq_insert_tree(struct qtree_mem_dqinfo *info, | |||
357 | */ | 358 | */ |
358 | int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot) | 359 | int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot) |
359 | { | 360 | { |
360 | int type = dquot->dq_type; | 361 | int type = dquot->dq_id.type; |
361 | struct super_block *sb = dquot->dq_sb; | 362 | struct super_block *sb = dquot->dq_sb; |
362 | ssize_t ret; | 363 | ssize_t ret; |
363 | char *ddquot = getdqbuf(info->dqi_entry_size); | 364 | char *ddquot = getdqbuf(info->dqi_entry_size); |
@@ -538,8 +539,9 @@ static loff_t find_block_dqentry(struct qtree_mem_dqinfo *info, | |||
538 | ddquot += info->dqi_entry_size; | 539 | ddquot += info->dqi_entry_size; |
539 | } | 540 | } |
540 | if (i == qtree_dqstr_in_blk(info)) { | 541 | if (i == qtree_dqstr_in_blk(info)) { |
541 | quota_error(dquot->dq_sb, "Quota for id %u referenced " | 542 | quota_error(dquot->dq_sb, |
542 | "but not present", dquot->dq_id); | 543 | "Quota for id %u referenced but not present", |
544 | from_kqid(&init_user_ns, dquot->dq_id)); | ||
543 | ret = -EIO; | 545 | ret = -EIO; |
544 | goto out_buf; | 546 | goto out_buf; |
545 | } else { | 547 | } else { |
@@ -589,7 +591,7 @@ static inline loff_t find_dqentry(struct qtree_mem_dqinfo *info, | |||
589 | 591 | ||
590 | int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot) | 592 | int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot) |
591 | { | 593 | { |
592 | int type = dquot->dq_type; | 594 | int type = dquot->dq_id.type; |
593 | struct super_block *sb = dquot->dq_sb; | 595 | struct super_block *sb = dquot->dq_sb; |
594 | loff_t offset; | 596 | loff_t offset; |
595 | char *ddquot; | 597 | char *ddquot; |
@@ -607,8 +609,10 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot) | |||
607 | offset = find_dqentry(info, dquot); | 609 | offset = find_dqentry(info, dquot); |
608 | if (offset <= 0) { /* Entry not present? */ | 610 | if (offset <= 0) { /* Entry not present? */ |
609 | if (offset < 0) | 611 | if (offset < 0) |
610 | quota_error(sb, "Can't read quota structure " | 612 | quota_error(sb,"Can't read quota structure " |
611 | "for id %u", dquot->dq_id); | 613 | "for id %u", |
614 | from_kqid(&init_user_ns, | ||
615 | dquot->dq_id)); | ||
612 | dquot->dq_off = 0; | 616 | dquot->dq_off = 0; |
613 | set_bit(DQ_FAKE_B, &dquot->dq_flags); | 617 | set_bit(DQ_FAKE_B, &dquot->dq_flags); |
614 | memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk)); | 618 | memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk)); |
@@ -626,7 +630,7 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot) | |||
626 | if (ret >= 0) | 630 | if (ret >= 0) |
627 | ret = -EIO; | 631 | ret = -EIO; |
628 | quota_error(sb, "Error while reading quota structure for id %u", | 632 | quota_error(sb, "Error while reading quota structure for id %u", |
629 | dquot->dq_id); | 633 | from_kqid(&init_user_ns, dquot->dq_id)); |
630 | set_bit(DQ_FAKE_B, &dquot->dq_flags); | 634 | set_bit(DQ_FAKE_B, &dquot->dq_flags); |
631 | memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk)); | 635 | memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk)); |
632 | kfree(ddquot); | 636 | kfree(ddquot); |
diff --git a/fs/quota/quota_v1.c b/fs/quota/quota_v1.c index 34b37a67bb16..469c6848b322 100644 --- a/fs/quota/quota_v1.c +++ b/fs/quota/quota_v1.c | |||
@@ -54,7 +54,7 @@ static void v1_mem2disk_dqblk(struct v1_disk_dqblk *d, struct mem_dqblk *m) | |||
54 | 54 | ||
55 | static int v1_read_dqblk(struct dquot *dquot) | 55 | static int v1_read_dqblk(struct dquot *dquot) |
56 | { | 56 | { |
57 | int type = dquot->dq_type; | 57 | int type = dquot->dq_id.type; |
58 | struct v1_disk_dqblk dqblk; | 58 | struct v1_disk_dqblk dqblk; |
59 | 59 | ||
60 | if (!sb_dqopt(dquot->dq_sb)->files[type]) | 60 | if (!sb_dqopt(dquot->dq_sb)->files[type]) |
@@ -63,7 +63,8 @@ static int v1_read_dqblk(struct dquot *dquot) | |||
63 | /* Set structure to 0s in case read fails/is after end of file */ | 63 | /* Set structure to 0s in case read fails/is after end of file */ |
64 | memset(&dqblk, 0, sizeof(struct v1_disk_dqblk)); | 64 | memset(&dqblk, 0, sizeof(struct v1_disk_dqblk)); |
65 | dquot->dq_sb->s_op->quota_read(dquot->dq_sb, type, (char *)&dqblk, | 65 | dquot->dq_sb->s_op->quota_read(dquot->dq_sb, type, (char *)&dqblk, |
66 | sizeof(struct v1_disk_dqblk), v1_dqoff(dquot->dq_id)); | 66 | sizeof(struct v1_disk_dqblk), |
67 | v1_dqoff(from_kqid(&init_user_ns, dquot->dq_id))); | ||
67 | 68 | ||
68 | v1_disk2mem_dqblk(&dquot->dq_dqb, &dqblk); | 69 | v1_disk2mem_dqblk(&dquot->dq_dqb, &dqblk); |
69 | if (dquot->dq_dqb.dqb_bhardlimit == 0 && | 70 | if (dquot->dq_dqb.dqb_bhardlimit == 0 && |
@@ -78,12 +79,13 @@ static int v1_read_dqblk(struct dquot *dquot) | |||
78 | 79 | ||
79 | static int v1_commit_dqblk(struct dquot *dquot) | 80 | static int v1_commit_dqblk(struct dquot *dquot) |
80 | { | 81 | { |
81 | short type = dquot->dq_type; | 82 | short type = dquot->dq_id.type; |
82 | ssize_t ret; | 83 | ssize_t ret; |
83 | struct v1_disk_dqblk dqblk; | 84 | struct v1_disk_dqblk dqblk; |
84 | 85 | ||
85 | v1_mem2disk_dqblk(&dqblk, &dquot->dq_dqb); | 86 | v1_mem2disk_dqblk(&dqblk, &dquot->dq_dqb); |
86 | if (dquot->dq_id == 0) { | 87 | if (((type == USRQUOTA) && uid_eq(dquot->dq_id.uid, GLOBAL_ROOT_UID)) || |
88 | ((type == GRPQUOTA) && gid_eq(dquot->dq_id.gid, GLOBAL_ROOT_GID))) { | ||
87 | dqblk.dqb_btime = | 89 | dqblk.dqb_btime = |
88 | sb_dqopt(dquot->dq_sb)->info[type].dqi_bgrace; | 90 | sb_dqopt(dquot->dq_sb)->info[type].dqi_bgrace; |
89 | dqblk.dqb_itime = | 91 | dqblk.dqb_itime = |
@@ -93,7 +95,7 @@ static int v1_commit_dqblk(struct dquot *dquot) | |||
93 | if (sb_dqopt(dquot->dq_sb)->files[type]) | 95 | if (sb_dqopt(dquot->dq_sb)->files[type]) |
94 | ret = dquot->dq_sb->s_op->quota_write(dquot->dq_sb, type, | 96 | ret = dquot->dq_sb->s_op->quota_write(dquot->dq_sb, type, |
95 | (char *)&dqblk, sizeof(struct v1_disk_dqblk), | 97 | (char *)&dqblk, sizeof(struct v1_disk_dqblk), |
96 | v1_dqoff(dquot->dq_id)); | 98 | v1_dqoff(from_kqid(&init_user_ns, dquot->dq_id))); |
97 | if (ret != sizeof(struct v1_disk_dqblk)) { | 99 | if (ret != sizeof(struct v1_disk_dqblk)) { |
98 | quota_error(dquot->dq_sb, "dquota write failed"); | 100 | quota_error(dquot->dq_sb, "dquota write failed"); |
99 | if (ret >= 0) | 101 | if (ret >= 0) |
diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c index f1ab3604db5a..02751ec695c5 100644 --- a/fs/quota/quota_v2.c +++ b/fs/quota/quota_v2.c | |||
@@ -196,7 +196,7 @@ static void v2r0_mem2diskdqb(void *dp, struct dquot *dquot) | |||
196 | struct v2r0_disk_dqblk *d = dp; | 196 | struct v2r0_disk_dqblk *d = dp; |
197 | struct mem_dqblk *m = &dquot->dq_dqb; | 197 | struct mem_dqblk *m = &dquot->dq_dqb; |
198 | struct qtree_mem_dqinfo *info = | 198 | struct qtree_mem_dqinfo *info = |
199 | sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv; | 199 | sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv; |
200 | 200 | ||
201 | d->dqb_ihardlimit = cpu_to_le32(m->dqb_ihardlimit); | 201 | d->dqb_ihardlimit = cpu_to_le32(m->dqb_ihardlimit); |
202 | d->dqb_isoftlimit = cpu_to_le32(m->dqb_isoftlimit); | 202 | d->dqb_isoftlimit = cpu_to_le32(m->dqb_isoftlimit); |
@@ -206,7 +206,7 @@ static void v2r0_mem2diskdqb(void *dp, struct dquot *dquot) | |||
206 | d->dqb_bsoftlimit = cpu_to_le32(v2_stoqb(m->dqb_bsoftlimit)); | 206 | d->dqb_bsoftlimit = cpu_to_le32(v2_stoqb(m->dqb_bsoftlimit)); |
207 | d->dqb_curspace = cpu_to_le64(m->dqb_curspace); | 207 | d->dqb_curspace = cpu_to_le64(m->dqb_curspace); |
208 | d->dqb_btime = cpu_to_le64(m->dqb_btime); | 208 | d->dqb_btime = cpu_to_le64(m->dqb_btime); |
209 | d->dqb_id = cpu_to_le32(dquot->dq_id); | 209 | d->dqb_id = cpu_to_le32(from_kqid(&init_user_ns, dquot->dq_id)); |
210 | if (qtree_entry_unused(info, dp)) | 210 | if (qtree_entry_unused(info, dp)) |
211 | d->dqb_itime = cpu_to_le64(1); | 211 | d->dqb_itime = cpu_to_le64(1); |
212 | } | 212 | } |
@@ -215,11 +215,13 @@ static int v2r0_is_id(void *dp, struct dquot *dquot) | |||
215 | { | 215 | { |
216 | struct v2r0_disk_dqblk *d = dp; | 216 | struct v2r0_disk_dqblk *d = dp; |
217 | struct qtree_mem_dqinfo *info = | 217 | struct qtree_mem_dqinfo *info = |
218 | sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv; | 218 | sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv; |
219 | 219 | ||
220 | if (qtree_entry_unused(info, dp)) | 220 | if (qtree_entry_unused(info, dp)) |
221 | return 0; | 221 | return 0; |
222 | return le32_to_cpu(d->dqb_id) == dquot->dq_id; | 222 | return qid_eq(make_kqid(&init_user_ns, dquot->dq_id.type, |
223 | le32_to_cpu(d->dqb_id)), | ||
224 | dquot->dq_id); | ||
223 | } | 225 | } |
224 | 226 | ||
225 | static void v2r1_disk2memdqb(struct dquot *dquot, void *dp) | 227 | static void v2r1_disk2memdqb(struct dquot *dquot, void *dp) |
@@ -247,7 +249,7 @@ static void v2r1_mem2diskdqb(void *dp, struct dquot *dquot) | |||
247 | struct v2r1_disk_dqblk *d = dp; | 249 | struct v2r1_disk_dqblk *d = dp; |
248 | struct mem_dqblk *m = &dquot->dq_dqb; | 250 | struct mem_dqblk *m = &dquot->dq_dqb; |
249 | struct qtree_mem_dqinfo *info = | 251 | struct qtree_mem_dqinfo *info = |
250 | sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv; | 252 | sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv; |
251 | 253 | ||
252 | d->dqb_ihardlimit = cpu_to_le64(m->dqb_ihardlimit); | 254 | d->dqb_ihardlimit = cpu_to_le64(m->dqb_ihardlimit); |
253 | d->dqb_isoftlimit = cpu_to_le64(m->dqb_isoftlimit); | 255 | d->dqb_isoftlimit = cpu_to_le64(m->dqb_isoftlimit); |
@@ -257,7 +259,7 @@ static void v2r1_mem2diskdqb(void *dp, struct dquot *dquot) | |||
257 | d->dqb_bsoftlimit = cpu_to_le64(v2_stoqb(m->dqb_bsoftlimit)); | 259 | d->dqb_bsoftlimit = cpu_to_le64(v2_stoqb(m->dqb_bsoftlimit)); |
258 | d->dqb_curspace = cpu_to_le64(m->dqb_curspace); | 260 | d->dqb_curspace = cpu_to_le64(m->dqb_curspace); |
259 | d->dqb_btime = cpu_to_le64(m->dqb_btime); | 261 | d->dqb_btime = cpu_to_le64(m->dqb_btime); |
260 | d->dqb_id = cpu_to_le32(dquot->dq_id); | 262 | d->dqb_id = cpu_to_le32(from_kqid(&init_user_ns, dquot->dq_id)); |
261 | if (qtree_entry_unused(info, dp)) | 263 | if (qtree_entry_unused(info, dp)) |
262 | d->dqb_itime = cpu_to_le64(1); | 264 | d->dqb_itime = cpu_to_le64(1); |
263 | } | 265 | } |
@@ -266,26 +268,28 @@ static int v2r1_is_id(void *dp, struct dquot *dquot) | |||
266 | { | 268 | { |
267 | struct v2r1_disk_dqblk *d = dp; | 269 | struct v2r1_disk_dqblk *d = dp; |
268 | struct qtree_mem_dqinfo *info = | 270 | struct qtree_mem_dqinfo *info = |
269 | sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv; | 271 | sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv; |
270 | 272 | ||
271 | if (qtree_entry_unused(info, dp)) | 273 | if (qtree_entry_unused(info, dp)) |
272 | return 0; | 274 | return 0; |
273 | return le32_to_cpu(d->dqb_id) == dquot->dq_id; | 275 | return qid_eq(make_kqid(&init_user_ns, dquot->dq_id.type, |
276 | le32_to_cpu(d->dqb_id)), | ||
277 | dquot->dq_id); | ||
274 | } | 278 | } |
275 | 279 | ||
276 | static int v2_read_dquot(struct dquot *dquot) | 280 | static int v2_read_dquot(struct dquot *dquot) |
277 | { | 281 | { |
278 | return qtree_read_dquot(sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv, dquot); | 282 | return qtree_read_dquot(sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv, dquot); |
279 | } | 283 | } |
280 | 284 | ||
281 | static int v2_write_dquot(struct dquot *dquot) | 285 | static int v2_write_dquot(struct dquot *dquot) |
282 | { | 286 | { |
283 | return qtree_write_dquot(sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv, dquot); | 287 | return qtree_write_dquot(sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv, dquot); |
284 | } | 288 | } |
285 | 289 | ||
286 | static int v2_release_dquot(struct dquot *dquot) | 290 | static int v2_release_dquot(struct dquot *dquot) |
287 | { | 291 | { |
288 | return qtree_release_dquot(sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv, dquot); | 292 | return qtree_release_dquot(sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv, dquot); |
289 | } | 293 | } |
290 | 294 | ||
291 | static int v2_free_file_info(struct super_block *sb, int type) | 295 | static int v2_free_file_info(struct super_block *sb, int type) |
diff --git a/fs/read_write.c b/fs/read_write.c index 1adfb691e4f1..d06534857e9e 100644 --- a/fs/read_write.c +++ b/fs/read_write.c | |||
@@ -232,23 +232,18 @@ EXPORT_SYMBOL(vfs_llseek); | |||
232 | SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, origin) | 232 | SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, origin) |
233 | { | 233 | { |
234 | off_t retval; | 234 | off_t retval; |
235 | struct file * file; | 235 | struct fd f = fdget(fd); |
236 | int fput_needed; | 236 | if (!f.file) |
237 | 237 | return -EBADF; | |
238 | retval = -EBADF; | ||
239 | file = fget_light(fd, &fput_needed); | ||
240 | if (!file) | ||
241 | goto bad; | ||
242 | 238 | ||
243 | retval = -EINVAL; | 239 | retval = -EINVAL; |
244 | if (origin <= SEEK_MAX) { | 240 | if (origin <= SEEK_MAX) { |
245 | loff_t res = vfs_llseek(file, offset, origin); | 241 | loff_t res = vfs_llseek(f.file, offset, origin); |
246 | retval = res; | 242 | retval = res; |
247 | if (res != (loff_t)retval) | 243 | if (res != (loff_t)retval) |
248 | retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */ | 244 | retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */ |
249 | } | 245 | } |
250 | fput_light(file, fput_needed); | 246 | fdput(f); |
251 | bad: | ||
252 | return retval; | 247 | return retval; |
253 | } | 248 | } |
254 | 249 | ||
@@ -258,20 +253,17 @@ SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high, | |||
258 | unsigned int, origin) | 253 | unsigned int, origin) |
259 | { | 254 | { |
260 | int retval; | 255 | int retval; |
261 | struct file * file; | 256 | struct fd f = fdget(fd); |
262 | loff_t offset; | 257 | loff_t offset; |
263 | int fput_needed; | ||
264 | 258 | ||
265 | retval = -EBADF; | 259 | if (!f.file) |
266 | file = fget_light(fd, &fput_needed); | 260 | return -EBADF; |
267 | if (!file) | ||
268 | goto bad; | ||
269 | 261 | ||
270 | retval = -EINVAL; | 262 | retval = -EINVAL; |
271 | if (origin > SEEK_MAX) | 263 | if (origin > SEEK_MAX) |
272 | goto out_putf; | 264 | goto out_putf; |
273 | 265 | ||
274 | offset = vfs_llseek(file, ((loff_t) offset_high << 32) | offset_low, | 266 | offset = vfs_llseek(f.file, ((loff_t) offset_high << 32) | offset_low, |
275 | origin); | 267 | origin); |
276 | 268 | ||
277 | retval = (int)offset; | 269 | retval = (int)offset; |
@@ -281,8 +273,7 @@ SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high, | |||
281 | retval = 0; | 273 | retval = 0; |
282 | } | 274 | } |
283 | out_putf: | 275 | out_putf: |
284 | fput_light(file, fput_needed); | 276 | fdput(f); |
285 | bad: | ||
286 | return retval; | 277 | return retval; |
287 | } | 278 | } |
288 | #endif | 279 | #endif |
@@ -461,34 +452,29 @@ static inline void file_pos_write(struct file *file, loff_t pos) | |||
461 | 452 | ||
462 | SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) | 453 | SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) |
463 | { | 454 | { |
464 | struct file *file; | 455 | struct fd f = fdget(fd); |
465 | ssize_t ret = -EBADF; | 456 | ssize_t ret = -EBADF; |
466 | int fput_needed; | ||
467 | 457 | ||
468 | file = fget_light(fd, &fput_needed); | 458 | if (f.file) { |
469 | if (file) { | 459 | loff_t pos = file_pos_read(f.file); |
470 | loff_t pos = file_pos_read(file); | 460 | ret = vfs_read(f.file, buf, count, &pos); |
471 | ret = vfs_read(file, buf, count, &pos); | 461 | file_pos_write(f.file, pos); |
472 | file_pos_write(file, pos); | 462 | fdput(f); |
473 | fput_light(file, fput_needed); | ||
474 | } | 463 | } |
475 | |||
476 | return ret; | 464 | return ret; |
477 | } | 465 | } |
478 | 466 | ||
479 | SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, | 467 | SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, |
480 | size_t, count) | 468 | size_t, count) |
481 | { | 469 | { |
482 | struct file *file; | 470 | struct fd f = fdget(fd); |
483 | ssize_t ret = -EBADF; | 471 | ssize_t ret = -EBADF; |
484 | int fput_needed; | ||
485 | 472 | ||
486 | file = fget_light(fd, &fput_needed); | 473 | if (f.file) { |
487 | if (file) { | 474 | loff_t pos = file_pos_read(f.file); |
488 | loff_t pos = file_pos_read(file); | 475 | ret = vfs_write(f.file, buf, count, &pos); |
489 | ret = vfs_write(file, buf, count, &pos); | 476 | file_pos_write(f.file, pos); |
490 | file_pos_write(file, pos); | 477 | fdput(f); |
491 | fput_light(file, fput_needed); | ||
492 | } | 478 | } |
493 | 479 | ||
494 | return ret; | 480 | return ret; |
@@ -497,19 +483,18 @@ SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, | |||
497 | SYSCALL_DEFINE(pread64)(unsigned int fd, char __user *buf, | 483 | SYSCALL_DEFINE(pread64)(unsigned int fd, char __user *buf, |
498 | size_t count, loff_t pos) | 484 | size_t count, loff_t pos) |
499 | { | 485 | { |
500 | struct file *file; | 486 | struct fd f; |
501 | ssize_t ret = -EBADF; | 487 | ssize_t ret = -EBADF; |
502 | int fput_needed; | ||
503 | 488 | ||
504 | if (pos < 0) | 489 | if (pos < 0) |
505 | return -EINVAL; | 490 | return -EINVAL; |
506 | 491 | ||
507 | file = fget_light(fd, &fput_needed); | 492 | f = fdget(fd); |
508 | if (file) { | 493 | if (f.file) { |
509 | ret = -ESPIPE; | 494 | ret = -ESPIPE; |
510 | if (file->f_mode & FMODE_PREAD) | 495 | if (f.file->f_mode & FMODE_PREAD) |
511 | ret = vfs_read(file, buf, count, &pos); | 496 | ret = vfs_read(f.file, buf, count, &pos); |
512 | fput_light(file, fput_needed); | 497 | fdput(f); |
513 | } | 498 | } |
514 | 499 | ||
515 | return ret; | 500 | return ret; |
@@ -526,19 +511,18 @@ SYSCALL_ALIAS(sys_pread64, SyS_pread64); | |||
526 | SYSCALL_DEFINE(pwrite64)(unsigned int fd, const char __user *buf, | 511 | SYSCALL_DEFINE(pwrite64)(unsigned int fd, const char __user *buf, |
527 | size_t count, loff_t pos) | 512 | size_t count, loff_t pos) |
528 | { | 513 | { |
529 | struct file *file; | 514 | struct fd f; |
530 | ssize_t ret = -EBADF; | 515 | ssize_t ret = -EBADF; |
531 | int fput_needed; | ||
532 | 516 | ||
533 | if (pos < 0) | 517 | if (pos < 0) |
534 | return -EINVAL; | 518 | return -EINVAL; |
535 | 519 | ||
536 | file = fget_light(fd, &fput_needed); | 520 | f = fdget(fd); |
537 | if (file) { | 521 | if (f.file) { |
538 | ret = -ESPIPE; | 522 | ret = -ESPIPE; |
539 | if (file->f_mode & FMODE_PWRITE) | 523 | if (f.file->f_mode & FMODE_PWRITE) |
540 | ret = vfs_write(file, buf, count, &pos); | 524 | ret = vfs_write(f.file, buf, count, &pos); |
541 | fput_light(file, fput_needed); | 525 | fdput(f); |
542 | } | 526 | } |
543 | 527 | ||
544 | return ret; | 528 | return ret; |
@@ -789,16 +773,14 @@ EXPORT_SYMBOL(vfs_writev); | |||
789 | SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, | 773 | SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, |
790 | unsigned long, vlen) | 774 | unsigned long, vlen) |
791 | { | 775 | { |
792 | struct file *file; | 776 | struct fd f = fdget(fd); |
793 | ssize_t ret = -EBADF; | 777 | ssize_t ret = -EBADF; |
794 | int fput_needed; | ||
795 | 778 | ||
796 | file = fget_light(fd, &fput_needed); | 779 | if (f.file) { |
797 | if (file) { | 780 | loff_t pos = file_pos_read(f.file); |
798 | loff_t pos = file_pos_read(file); | 781 | ret = vfs_readv(f.file, vec, vlen, &pos); |
799 | ret = vfs_readv(file, vec, vlen, &pos); | 782 | file_pos_write(f.file, pos); |
800 | file_pos_write(file, pos); | 783 | fdput(f); |
801 | fput_light(file, fput_needed); | ||
802 | } | 784 | } |
803 | 785 | ||
804 | if (ret > 0) | 786 | if (ret > 0) |
@@ -810,16 +792,14 @@ SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, | |||
810 | SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, | 792 | SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, |
811 | unsigned long, vlen) | 793 | unsigned long, vlen) |
812 | { | 794 | { |
813 | struct file *file; | 795 | struct fd f = fdget(fd); |
814 | ssize_t ret = -EBADF; | 796 | ssize_t ret = -EBADF; |
815 | int fput_needed; | ||
816 | 797 | ||
817 | file = fget_light(fd, &fput_needed); | 798 | if (f.file) { |
818 | if (file) { | 799 | loff_t pos = file_pos_read(f.file); |
819 | loff_t pos = file_pos_read(file); | 800 | ret = vfs_writev(f.file, vec, vlen, &pos); |
820 | ret = vfs_writev(file, vec, vlen, &pos); | 801 | file_pos_write(f.file, pos); |
821 | file_pos_write(file, pos); | 802 | fdput(f); |
822 | fput_light(file, fput_needed); | ||
823 | } | 803 | } |
824 | 804 | ||
825 | if (ret > 0) | 805 | if (ret > 0) |
@@ -838,19 +818,18 @@ SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec, | |||
838 | unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) | 818 | unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) |
839 | { | 819 | { |
840 | loff_t pos = pos_from_hilo(pos_h, pos_l); | 820 | loff_t pos = pos_from_hilo(pos_h, pos_l); |
841 | struct file *file; | 821 | struct fd f; |
842 | ssize_t ret = -EBADF; | 822 | ssize_t ret = -EBADF; |
843 | int fput_needed; | ||
844 | 823 | ||
845 | if (pos < 0) | 824 | if (pos < 0) |
846 | return -EINVAL; | 825 | return -EINVAL; |
847 | 826 | ||
848 | file = fget_light(fd, &fput_needed); | 827 | f = fdget(fd); |
849 | if (file) { | 828 | if (f.file) { |
850 | ret = -ESPIPE; | 829 | ret = -ESPIPE; |
851 | if (file->f_mode & FMODE_PREAD) | 830 | if (f.file->f_mode & FMODE_PREAD) |
852 | ret = vfs_readv(file, vec, vlen, &pos); | 831 | ret = vfs_readv(f.file, vec, vlen, &pos); |
853 | fput_light(file, fput_needed); | 832 | fdput(f); |
854 | } | 833 | } |
855 | 834 | ||
856 | if (ret > 0) | 835 | if (ret > 0) |
@@ -863,19 +842,18 @@ SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec, | |||
863 | unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) | 842 | unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) |
864 | { | 843 | { |
865 | loff_t pos = pos_from_hilo(pos_h, pos_l); | 844 | loff_t pos = pos_from_hilo(pos_h, pos_l); |
866 | struct file *file; | 845 | struct fd f; |
867 | ssize_t ret = -EBADF; | 846 | ssize_t ret = -EBADF; |
868 | int fput_needed; | ||
869 | 847 | ||
870 | if (pos < 0) | 848 | if (pos < 0) |
871 | return -EINVAL; | 849 | return -EINVAL; |
872 | 850 | ||
873 | file = fget_light(fd, &fput_needed); | 851 | f = fdget(fd); |
874 | if (file) { | 852 | if (f.file) { |
875 | ret = -ESPIPE; | 853 | ret = -ESPIPE; |
876 | if (file->f_mode & FMODE_PWRITE) | 854 | if (f.file->f_mode & FMODE_PWRITE) |
877 | ret = vfs_writev(file, vec, vlen, &pos); | 855 | ret = vfs_writev(f.file, vec, vlen, &pos); |
878 | fput_light(file, fput_needed); | 856 | fdput(f); |
879 | } | 857 | } |
880 | 858 | ||
881 | if (ret > 0) | 859 | if (ret > 0) |
@@ -884,31 +862,31 @@ SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec, | |||
884 | return ret; | 862 | return ret; |
885 | } | 863 | } |
886 | 864 | ||
887 | static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, | 865 | ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, size_t count, |
888 | size_t count, loff_t max) | 866 | loff_t max) |
889 | { | 867 | { |
890 | struct file * in_file, * out_file; | 868 | struct fd in, out; |
891 | struct inode * in_inode, * out_inode; | 869 | struct inode *in_inode, *out_inode; |
892 | loff_t pos; | 870 | loff_t pos; |
893 | ssize_t retval; | 871 | ssize_t retval; |
894 | int fput_needed_in, fput_needed_out, fl; | 872 | int fl; |
895 | 873 | ||
896 | /* | 874 | /* |
897 | * Get input file, and verify that it is ok.. | 875 | * Get input file, and verify that it is ok.. |
898 | */ | 876 | */ |
899 | retval = -EBADF; | 877 | retval = -EBADF; |
900 | in_file = fget_light(in_fd, &fput_needed_in); | 878 | in = fdget(in_fd); |
901 | if (!in_file) | 879 | if (!in.file) |
902 | goto out; | 880 | goto out; |
903 | if (!(in_file->f_mode & FMODE_READ)) | 881 | if (!(in.file->f_mode & FMODE_READ)) |
904 | goto fput_in; | 882 | goto fput_in; |
905 | retval = -ESPIPE; | 883 | retval = -ESPIPE; |
906 | if (!ppos) | 884 | if (!ppos) |
907 | ppos = &in_file->f_pos; | 885 | ppos = &in.file->f_pos; |
908 | else | 886 | else |
909 | if (!(in_file->f_mode & FMODE_PREAD)) | 887 | if (!(in.file->f_mode & FMODE_PREAD)) |
910 | goto fput_in; | 888 | goto fput_in; |
911 | retval = rw_verify_area(READ, in_file, ppos, count); | 889 | retval = rw_verify_area(READ, in.file, ppos, count); |
912 | if (retval < 0) | 890 | if (retval < 0) |
913 | goto fput_in; | 891 | goto fput_in; |
914 | count = retval; | 892 | count = retval; |
@@ -917,15 +895,15 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, | |||
917 | * Get output file, and verify that it is ok.. | 895 | * Get output file, and verify that it is ok.. |
918 | */ | 896 | */ |
919 | retval = -EBADF; | 897 | retval = -EBADF; |
920 | out_file = fget_light(out_fd, &fput_needed_out); | 898 | out = fdget(out_fd); |
921 | if (!out_file) | 899 | if (!out.file) |
922 | goto fput_in; | 900 | goto fput_in; |
923 | if (!(out_file->f_mode & FMODE_WRITE)) | 901 | if (!(out.file->f_mode & FMODE_WRITE)) |
924 | goto fput_out; | 902 | goto fput_out; |
925 | retval = -EINVAL; | 903 | retval = -EINVAL; |
926 | in_inode = in_file->f_path.dentry->d_inode; | 904 | in_inode = in.file->f_path.dentry->d_inode; |
927 | out_inode = out_file->f_path.dentry->d_inode; | 905 | out_inode = out.file->f_path.dentry->d_inode; |
928 | retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count); | 906 | retval = rw_verify_area(WRITE, out.file, &out.file->f_pos, count); |
929 | if (retval < 0) | 907 | if (retval < 0) |
930 | goto fput_out; | 908 | goto fput_out; |
931 | count = retval; | 909 | count = retval; |
@@ -949,10 +927,10 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, | |||
949 | * and the application is arguably buggy if it doesn't expect | 927 | * and the application is arguably buggy if it doesn't expect |
950 | * EAGAIN on a non-blocking file descriptor. | 928 | * EAGAIN on a non-blocking file descriptor. |
951 | */ | 929 | */ |
952 | if (in_file->f_flags & O_NONBLOCK) | 930 | if (in.file->f_flags & O_NONBLOCK) |
953 | fl = SPLICE_F_NONBLOCK; | 931 | fl = SPLICE_F_NONBLOCK; |
954 | #endif | 932 | #endif |
955 | retval = do_splice_direct(in_file, ppos, out_file, count, fl); | 933 | retval = do_splice_direct(in.file, ppos, out.file, count, fl); |
956 | 934 | ||
957 | if (retval > 0) { | 935 | if (retval > 0) { |
958 | add_rchar(current, retval); | 936 | add_rchar(current, retval); |
@@ -965,9 +943,9 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, | |||
965 | retval = -EOVERFLOW; | 943 | retval = -EOVERFLOW; |
966 | 944 | ||
967 | fput_out: | 945 | fput_out: |
968 | fput_light(out_file, fput_needed_out); | 946 | fdput(out); |
969 | fput_in: | 947 | fput_in: |
970 | fput_light(in_file, fput_needed_in); | 948 | fdput(in); |
971 | out: | 949 | out: |
972 | return retval; | 950 | return retval; |
973 | } | 951 | } |
diff --git a/fs/read_write.h b/fs/read_write.h index d07b954c6e0c..d3e00ef67420 100644 --- a/fs/read_write.h +++ b/fs/read_write.h | |||
@@ -12,3 +12,5 @@ ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov, | |||
12 | unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn); | 12 | unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn); |
13 | ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov, | 13 | ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov, |
14 | unsigned long nr_segs, loff_t *ppos, io_fn_t fn); | 14 | unsigned long nr_segs, loff_t *ppos, io_fn_t fn); |
15 | ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, size_t count, | ||
16 | loff_t max); | ||
diff --git a/fs/readdir.c b/fs/readdir.c index 39e3370d79cf..5e69ef533b77 100644 --- a/fs/readdir.c +++ b/fs/readdir.c | |||
@@ -106,22 +106,20 @@ SYSCALL_DEFINE3(old_readdir, unsigned int, fd, | |||
106 | struct old_linux_dirent __user *, dirent, unsigned int, count) | 106 | struct old_linux_dirent __user *, dirent, unsigned int, count) |
107 | { | 107 | { |
108 | int error; | 108 | int error; |
109 | struct file * file; | 109 | struct fd f = fdget(fd); |
110 | struct readdir_callback buf; | 110 | struct readdir_callback buf; |
111 | int fput_needed; | ||
112 | 111 | ||
113 | file = fget_light(fd, &fput_needed); | 112 | if (!f.file) |
114 | if (!file) | ||
115 | return -EBADF; | 113 | return -EBADF; |
116 | 114 | ||
117 | buf.result = 0; | 115 | buf.result = 0; |
118 | buf.dirent = dirent; | 116 | buf.dirent = dirent; |
119 | 117 | ||
120 | error = vfs_readdir(file, fillonedir, &buf); | 118 | error = vfs_readdir(f.file, fillonedir, &buf); |
121 | if (buf.result) | 119 | if (buf.result) |
122 | error = buf.result; | 120 | error = buf.result; |
123 | 121 | ||
124 | fput_light(file, fput_needed); | 122 | fdput(f); |
125 | return error; | 123 | return error; |
126 | } | 124 | } |
127 | 125 | ||
@@ -191,17 +189,16 @@ efault: | |||
191 | SYSCALL_DEFINE3(getdents, unsigned int, fd, | 189 | SYSCALL_DEFINE3(getdents, unsigned int, fd, |
192 | struct linux_dirent __user *, dirent, unsigned int, count) | 190 | struct linux_dirent __user *, dirent, unsigned int, count) |
193 | { | 191 | { |
194 | struct file * file; | 192 | struct fd f; |
195 | struct linux_dirent __user * lastdirent; | 193 | struct linux_dirent __user * lastdirent; |
196 | struct getdents_callback buf; | 194 | struct getdents_callback buf; |
197 | int fput_needed; | ||
198 | int error; | 195 | int error; |
199 | 196 | ||
200 | if (!access_ok(VERIFY_WRITE, dirent, count)) | 197 | if (!access_ok(VERIFY_WRITE, dirent, count)) |
201 | return -EFAULT; | 198 | return -EFAULT; |
202 | 199 | ||
203 | file = fget_light(fd, &fput_needed); | 200 | f = fdget(fd); |
204 | if (!file) | 201 | if (!f.file) |
205 | return -EBADF; | 202 | return -EBADF; |
206 | 203 | ||
207 | buf.current_dir = dirent; | 204 | buf.current_dir = dirent; |
@@ -209,17 +206,17 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd, | |||
209 | buf.count = count; | 206 | buf.count = count; |
210 | buf.error = 0; | 207 | buf.error = 0; |
211 | 208 | ||
212 | error = vfs_readdir(file, filldir, &buf); | 209 | error = vfs_readdir(f.file, filldir, &buf); |
213 | if (error >= 0) | 210 | if (error >= 0) |
214 | error = buf.error; | 211 | error = buf.error; |
215 | lastdirent = buf.previous; | 212 | lastdirent = buf.previous; |
216 | if (lastdirent) { | 213 | if (lastdirent) { |
217 | if (put_user(file->f_pos, &lastdirent->d_off)) | 214 | if (put_user(f.file->f_pos, &lastdirent->d_off)) |
218 | error = -EFAULT; | 215 | error = -EFAULT; |
219 | else | 216 | else |
220 | error = count - buf.count; | 217 | error = count - buf.count; |
221 | } | 218 | } |
222 | fput_light(file, fput_needed); | 219 | fdput(f); |
223 | return error; | 220 | return error; |
224 | } | 221 | } |
225 | 222 | ||
@@ -272,17 +269,16 @@ efault: | |||
272 | SYSCALL_DEFINE3(getdents64, unsigned int, fd, | 269 | SYSCALL_DEFINE3(getdents64, unsigned int, fd, |
273 | struct linux_dirent64 __user *, dirent, unsigned int, count) | 270 | struct linux_dirent64 __user *, dirent, unsigned int, count) |
274 | { | 271 | { |
275 | struct file * file; | 272 | struct fd f; |
276 | struct linux_dirent64 __user * lastdirent; | 273 | struct linux_dirent64 __user * lastdirent; |
277 | struct getdents_callback64 buf; | 274 | struct getdents_callback64 buf; |
278 | int fput_needed; | ||
279 | int error; | 275 | int error; |
280 | 276 | ||
281 | if (!access_ok(VERIFY_WRITE, dirent, count)) | 277 | if (!access_ok(VERIFY_WRITE, dirent, count)) |
282 | return -EFAULT; | 278 | return -EFAULT; |
283 | 279 | ||
284 | file = fget_light(fd, &fput_needed); | 280 | f = fdget(fd); |
285 | if (!file) | 281 | if (!f.file) |
286 | return -EBADF; | 282 | return -EBADF; |
287 | 283 | ||
288 | buf.current_dir = dirent; | 284 | buf.current_dir = dirent; |
@@ -290,17 +286,17 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd, | |||
290 | buf.count = count; | 286 | buf.count = count; |
291 | buf.error = 0; | 287 | buf.error = 0; |
292 | 288 | ||
293 | error = vfs_readdir(file, filldir64, &buf); | 289 | error = vfs_readdir(f.file, filldir64, &buf); |
294 | if (error >= 0) | 290 | if (error >= 0) |
295 | error = buf.error; | 291 | error = buf.error; |
296 | lastdirent = buf.previous; | 292 | lastdirent = buf.previous; |
297 | if (lastdirent) { | 293 | if (lastdirent) { |
298 | typeof(lastdirent->d_off) d_off = file->f_pos; | 294 | typeof(lastdirent->d_off) d_off = f.file->f_pos; |
299 | if (__put_user(d_off, &lastdirent->d_off)) | 295 | if (__put_user(d_off, &lastdirent->d_off)) |
300 | error = -EFAULT; | 296 | error = -EFAULT; |
301 | else | 297 | else |
302 | error = count - buf.count; | 298 | error = count - buf.count; |
303 | } | 299 | } |
304 | fput_light(file, fput_needed); | 300 | fdput(f); |
305 | return error; | 301 | return error; |
306 | } | 302 | } |
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c index 4c0c7d163d15..a98b7740a0fc 100644 --- a/fs/reiserfs/bitmap.c +++ b/fs/reiserfs/bitmap.c | |||
@@ -1334,9 +1334,7 @@ struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb, | |||
1334 | else if (bitmap == 0) | 1334 | else if (bitmap == 0) |
1335 | block = (REISERFS_DISK_OFFSET_IN_BYTES >> sb->s_blocksize_bits) + 1; | 1335 | block = (REISERFS_DISK_OFFSET_IN_BYTES >> sb->s_blocksize_bits) + 1; |
1336 | 1336 | ||
1337 | reiserfs_write_unlock(sb); | ||
1338 | bh = sb_bread(sb, block); | 1337 | bh = sb_bread(sb, block); |
1339 | reiserfs_write_lock(sb); | ||
1340 | if (bh == NULL) | 1338 | if (bh == NULL) |
1341 | reiserfs_warning(sb, "sh-2029: %s: bitmap block (#%u) " | 1339 | reiserfs_warning(sb, "sh-2029: %s: bitmap block (#%u) " |
1342 | "reading failed", __func__, block); | 1340 | "reading failed", __func__, block); |
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index a6d4268fb6c1..46485557cdc6 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c | |||
@@ -76,10 +76,10 @@ void reiserfs_evict_inode(struct inode *inode) | |||
76 | ; | 76 | ; |
77 | } | 77 | } |
78 | out: | 78 | out: |
79 | reiserfs_write_unlock_once(inode->i_sb, depth); | ||
79 | clear_inode(inode); /* note this must go after the journal_end to prevent deadlock */ | 80 | clear_inode(inode); /* note this must go after the journal_end to prevent deadlock */ |
80 | dquot_drop(inode); | 81 | dquot_drop(inode); |
81 | inode->i_blocks = 0; | 82 | inode->i_blocks = 0; |
82 | reiserfs_write_unlock_once(inode->i_sb, depth); | ||
83 | return; | 83 | return; |
84 | 84 | ||
85 | no_delete: | 85 | no_delete: |
@@ -1155,8 +1155,8 @@ static void init_inode(struct inode *inode, struct treepath *path) | |||
1155 | set_inode_sd_version(inode, STAT_DATA_V1); | 1155 | set_inode_sd_version(inode, STAT_DATA_V1); |
1156 | inode->i_mode = sd_v1_mode(sd); | 1156 | inode->i_mode = sd_v1_mode(sd); |
1157 | set_nlink(inode, sd_v1_nlink(sd)); | 1157 | set_nlink(inode, sd_v1_nlink(sd)); |
1158 | inode->i_uid = sd_v1_uid(sd); | 1158 | i_uid_write(inode, sd_v1_uid(sd)); |
1159 | inode->i_gid = sd_v1_gid(sd); | 1159 | i_gid_write(inode, sd_v1_gid(sd)); |
1160 | inode->i_size = sd_v1_size(sd); | 1160 | inode->i_size = sd_v1_size(sd); |
1161 | inode->i_atime.tv_sec = sd_v1_atime(sd); | 1161 | inode->i_atime.tv_sec = sd_v1_atime(sd); |
1162 | inode->i_mtime.tv_sec = sd_v1_mtime(sd); | 1162 | inode->i_mtime.tv_sec = sd_v1_mtime(sd); |
@@ -1200,9 +1200,9 @@ static void init_inode(struct inode *inode, struct treepath *path) | |||
1200 | 1200 | ||
1201 | inode->i_mode = sd_v2_mode(sd); | 1201 | inode->i_mode = sd_v2_mode(sd); |
1202 | set_nlink(inode, sd_v2_nlink(sd)); | 1202 | set_nlink(inode, sd_v2_nlink(sd)); |
1203 | inode->i_uid = sd_v2_uid(sd); | 1203 | i_uid_write(inode, sd_v2_uid(sd)); |
1204 | inode->i_size = sd_v2_size(sd); | 1204 | inode->i_size = sd_v2_size(sd); |
1205 | inode->i_gid = sd_v2_gid(sd); | 1205 | i_gid_write(inode, sd_v2_gid(sd)); |
1206 | inode->i_mtime.tv_sec = sd_v2_mtime(sd); | 1206 | inode->i_mtime.tv_sec = sd_v2_mtime(sd); |
1207 | inode->i_atime.tv_sec = sd_v2_atime(sd); | 1207 | inode->i_atime.tv_sec = sd_v2_atime(sd); |
1208 | inode->i_ctime.tv_sec = sd_v2_ctime(sd); | 1208 | inode->i_ctime.tv_sec = sd_v2_ctime(sd); |
@@ -1258,9 +1258,9 @@ static void inode2sd(void *sd, struct inode *inode, loff_t size) | |||
1258 | 1258 | ||
1259 | set_sd_v2_mode(sd_v2, inode->i_mode); | 1259 | set_sd_v2_mode(sd_v2, inode->i_mode); |
1260 | set_sd_v2_nlink(sd_v2, inode->i_nlink); | 1260 | set_sd_v2_nlink(sd_v2, inode->i_nlink); |
1261 | set_sd_v2_uid(sd_v2, inode->i_uid); | 1261 | set_sd_v2_uid(sd_v2, i_uid_read(inode)); |
1262 | set_sd_v2_size(sd_v2, size); | 1262 | set_sd_v2_size(sd_v2, size); |
1263 | set_sd_v2_gid(sd_v2, inode->i_gid); | 1263 | set_sd_v2_gid(sd_v2, i_gid_read(inode)); |
1264 | set_sd_v2_mtime(sd_v2, inode->i_mtime.tv_sec); | 1264 | set_sd_v2_mtime(sd_v2, inode->i_mtime.tv_sec); |
1265 | set_sd_v2_atime(sd_v2, inode->i_atime.tv_sec); | 1265 | set_sd_v2_atime(sd_v2, inode->i_atime.tv_sec); |
1266 | set_sd_v2_ctime(sd_v2, inode->i_ctime.tv_sec); | 1266 | set_sd_v2_ctime(sd_v2, inode->i_ctime.tv_sec); |
@@ -1280,8 +1280,8 @@ static void inode2sd_v1(void *sd, struct inode *inode, loff_t size) | |||
1280 | struct stat_data_v1 *sd_v1 = (struct stat_data_v1 *)sd; | 1280 | struct stat_data_v1 *sd_v1 = (struct stat_data_v1 *)sd; |
1281 | 1281 | ||
1282 | set_sd_v1_mode(sd_v1, inode->i_mode); | 1282 | set_sd_v1_mode(sd_v1, inode->i_mode); |
1283 | set_sd_v1_uid(sd_v1, inode->i_uid); | 1283 | set_sd_v1_uid(sd_v1, i_uid_read(inode)); |
1284 | set_sd_v1_gid(sd_v1, inode->i_gid); | 1284 | set_sd_v1_gid(sd_v1, i_gid_read(inode)); |
1285 | set_sd_v1_nlink(sd_v1, inode->i_nlink); | 1285 | set_sd_v1_nlink(sd_v1, inode->i_nlink); |
1286 | set_sd_v1_size(sd_v1, size); | 1286 | set_sd_v1_size(sd_v1, size); |
1287 | set_sd_v1_atime(sd_v1, inode->i_atime.tv_sec); | 1287 | set_sd_v1_atime(sd_v1, inode->i_atime.tv_sec); |
@@ -1869,7 +1869,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, | |||
1869 | goto out_bad_inode; | 1869 | goto out_bad_inode; |
1870 | } | 1870 | } |
1871 | if (old_format_only(sb)) { | 1871 | if (old_format_only(sb)) { |
1872 | if (inode->i_uid & ~0xffff || inode->i_gid & ~0xffff) { | 1872 | if (i_uid_read(inode) & ~0xffff || i_gid_read(inode) & ~0xffff) { |
1873 | pathrelse(&path_to_key); | 1873 | pathrelse(&path_to_key); |
1874 | /* i_uid or i_gid is too big to be stored in stat data v3.5 */ | 1874 | /* i_uid or i_gid is too big to be stored in stat data v3.5 */ |
1875 | err = -EINVAL; | 1875 | err = -EINVAL; |
@@ -3140,16 +3140,16 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
3140 | } | 3140 | } |
3141 | } | 3141 | } |
3142 | 3142 | ||
3143 | if ((((attr->ia_valid & ATTR_UID) && (attr->ia_uid & ~0xffff)) || | 3143 | if ((((attr->ia_valid & ATTR_UID) && (from_kuid(&init_user_ns, attr->ia_uid) & ~0xffff)) || |
3144 | ((attr->ia_valid & ATTR_GID) && (attr->ia_gid & ~0xffff))) && | 3144 | ((attr->ia_valid & ATTR_GID) && (from_kgid(&init_user_ns, attr->ia_gid) & ~0xffff))) && |
3145 | (get_inode_sd_version(inode) == STAT_DATA_V1)) { | 3145 | (get_inode_sd_version(inode) == STAT_DATA_V1)) { |
3146 | /* stat data of format v3.5 has 16 bit uid and gid */ | 3146 | /* stat data of format v3.5 has 16 bit uid and gid */ |
3147 | error = -EINVAL; | 3147 | error = -EINVAL; |
3148 | goto out; | 3148 | goto out; |
3149 | } | 3149 | } |
3150 | 3150 | ||
3151 | if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || | 3151 | if ((ia_valid & ATTR_UID && !uid_eq(attr->ia_uid, inode->i_uid)) || |
3152 | (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { | 3152 | (ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid))) { |
3153 | struct reiserfs_transaction_handle th; | 3153 | struct reiserfs_transaction_handle th; |
3154 | int jbegin_count = | 3154 | int jbegin_count = |
3155 | 2 * | 3155 | 2 * |
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 7a37dabf5a96..1078ae179993 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c | |||
@@ -608,6 +608,11 @@ static int init_inodecache(void) | |||
608 | 608 | ||
609 | static void destroy_inodecache(void) | 609 | static void destroy_inodecache(void) |
610 | { | 610 | { |
611 | /* | ||
612 | * Make sure all delayed rcu free inodes are flushed before we | ||
613 | * destroy cache. | ||
614 | */ | ||
615 | rcu_barrier(); | ||
611 | kmem_cache_destroy(reiserfs_inode_cachep); | 616 | kmem_cache_destroy(reiserfs_inode_cachep); |
612 | } | 617 | } |
613 | 618 | ||
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index d319963aeb11..c196369fe408 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c | |||
@@ -896,7 +896,7 @@ static int create_privroot(struct dentry *dentry) { return 0; } | |||
896 | #endif | 896 | #endif |
897 | 897 | ||
898 | /* Actual operations that are exported to VFS-land */ | 898 | /* Actual operations that are exported to VFS-land */ |
899 | const struct xattr_handler *reiserfs_xattr_handlers[] = { | 899 | static const struct xattr_handler *reiserfs_xattr_handlers[] = { |
900 | #ifdef CONFIG_REISERFS_FS_XATTR | 900 | #ifdef CONFIG_REISERFS_FS_XATTR |
901 | &reiserfs_xattr_user_handler, | 901 | &reiserfs_xattr_user_handler, |
902 | &reiserfs_xattr_trusted_handler, | 902 | &reiserfs_xattr_trusted_handler, |
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c index 44474f9b990d..d7c01ef64eda 100644 --- a/fs/reiserfs/xattr_acl.c +++ b/fs/reiserfs/xattr_acl.c | |||
@@ -30,7 +30,7 @@ posix_acl_set(struct dentry *dentry, const char *name, const void *value, | |||
30 | return -EPERM; | 30 | return -EPERM; |
31 | 31 | ||
32 | if (value) { | 32 | if (value) { |
33 | acl = posix_acl_from_xattr(value, size); | 33 | acl = posix_acl_from_xattr(&init_user_ns, value, size); |
34 | if (IS_ERR(acl)) { | 34 | if (IS_ERR(acl)) { |
35 | return PTR_ERR(acl); | 35 | return PTR_ERR(acl); |
36 | } else if (acl) { | 36 | } else if (acl) { |
@@ -77,7 +77,7 @@ posix_acl_get(struct dentry *dentry, const char *name, void *buffer, | |||
77 | return PTR_ERR(acl); | 77 | return PTR_ERR(acl); |
78 | if (acl == NULL) | 78 | if (acl == NULL) |
79 | return -ENODATA; | 79 | return -ENODATA; |
80 | error = posix_acl_to_xattr(acl, buffer, size); | 80 | error = posix_acl_to_xattr(&init_user_ns, acl, buffer, size); |
81 | posix_acl_release(acl); | 81 | posix_acl_release(acl); |
82 | 82 | ||
83 | return error; | 83 | return error; |
@@ -121,15 +121,23 @@ static struct posix_acl *posix_acl_from_disk(const void *value, size_t size) | |||
121 | case ACL_OTHER: | 121 | case ACL_OTHER: |
122 | value = (char *)value + | 122 | value = (char *)value + |
123 | sizeof(reiserfs_acl_entry_short); | 123 | sizeof(reiserfs_acl_entry_short); |
124 | acl->a_entries[n].e_id = ACL_UNDEFINED_ID; | ||
125 | break; | 124 | break; |
126 | 125 | ||
127 | case ACL_USER: | 126 | case ACL_USER: |
127 | value = (char *)value + sizeof(reiserfs_acl_entry); | ||
128 | if ((char *)value > end) | ||
129 | goto fail; | ||
130 | acl->a_entries[n].e_uid = | ||
131 | make_kuid(&init_user_ns, | ||
132 | le32_to_cpu(entry->e_id)); | ||
133 | break; | ||
128 | case ACL_GROUP: | 134 | case ACL_GROUP: |
129 | value = (char *)value + sizeof(reiserfs_acl_entry); | 135 | value = (char *)value + sizeof(reiserfs_acl_entry); |
130 | if ((char *)value > end) | 136 | if ((char *)value > end) |
131 | goto fail; | 137 | goto fail; |
132 | acl->a_entries[n].e_id = le32_to_cpu(entry->e_id); | 138 | acl->a_entries[n].e_gid = |
139 | make_kgid(&init_user_ns, | ||
140 | le32_to_cpu(entry->e_id)); | ||
133 | break; | 141 | break; |
134 | 142 | ||
135 | default: | 143 | default: |
@@ -164,13 +172,19 @@ static void *posix_acl_to_disk(const struct posix_acl *acl, size_t * size) | |||
164 | ext_acl->a_version = cpu_to_le32(REISERFS_ACL_VERSION); | 172 | ext_acl->a_version = cpu_to_le32(REISERFS_ACL_VERSION); |
165 | e = (char *)ext_acl + sizeof(reiserfs_acl_header); | 173 | e = (char *)ext_acl + sizeof(reiserfs_acl_header); |
166 | for (n = 0; n < acl->a_count; n++) { | 174 | for (n = 0; n < acl->a_count; n++) { |
175 | const struct posix_acl_entry *acl_e = &acl->a_entries[n]; | ||
167 | reiserfs_acl_entry *entry = (reiserfs_acl_entry *) e; | 176 | reiserfs_acl_entry *entry = (reiserfs_acl_entry *) e; |
168 | entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag); | 177 | entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag); |
169 | entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm); | 178 | entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm); |
170 | switch (acl->a_entries[n].e_tag) { | 179 | switch (acl->a_entries[n].e_tag) { |
171 | case ACL_USER: | 180 | case ACL_USER: |
181 | entry->e_id = cpu_to_le32( | ||
182 | from_kuid(&init_user_ns, acl_e->e_uid)); | ||
183 | e += sizeof(reiserfs_acl_entry); | ||
184 | break; | ||
172 | case ACL_GROUP: | 185 | case ACL_GROUP: |
173 | entry->e_id = cpu_to_le32(acl->a_entries[n].e_id); | 186 | entry->e_id = cpu_to_le32( |
187 | from_kgid(&init_user_ns, acl_e->e_gid)); | ||
174 | e += sizeof(reiserfs_acl_entry); | 188 | e += sizeof(reiserfs_acl_entry); |
175 | break; | 189 | break; |
176 | 190 | ||
diff --git a/fs/romfs/super.c b/fs/romfs/super.c index 77c5f2173983..fd7c5f60b46b 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c | |||
@@ -648,6 +648,11 @@ error_register: | |||
648 | static void __exit exit_romfs_fs(void) | 648 | static void __exit exit_romfs_fs(void) |
649 | { | 649 | { |
650 | unregister_filesystem(&romfs_fs_type); | 650 | unregister_filesystem(&romfs_fs_type); |
651 | /* | ||
652 | * Make sure all delayed rcu free inodes are flushed before we | ||
653 | * destroy cache. | ||
654 | */ | ||
655 | rcu_barrier(); | ||
651 | kmem_cache_destroy(romfs_inode_cachep); | 656 | kmem_cache_destroy(romfs_inode_cachep); |
652 | } | 657 | } |
653 | 658 | ||
diff --git a/fs/select.c b/fs/select.c index db14c781335e..2ef72d965036 100644 --- a/fs/select.c +++ b/fs/select.c | |||
@@ -220,8 +220,7 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, | |||
220 | struct poll_table_entry *entry = poll_get_entry(pwq); | 220 | struct poll_table_entry *entry = poll_get_entry(pwq); |
221 | if (!entry) | 221 | if (!entry) |
222 | return; | 222 | return; |
223 | get_file(filp); | 223 | entry->filp = get_file(filp); |
224 | entry->filp = filp; | ||
225 | entry->wait_address = wait_address; | 224 | entry->wait_address = wait_address; |
226 | entry->key = p->_key; | 225 | entry->key = p->_key; |
227 | init_waitqueue_func_entry(&entry->wait, pollwake); | 226 | init_waitqueue_func_entry(&entry->wait, pollwake); |
@@ -429,8 +428,6 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) | |||
429 | for (i = 0; i < n; ++rinp, ++routp, ++rexp) { | 428 | for (i = 0; i < n; ++rinp, ++routp, ++rexp) { |
430 | unsigned long in, out, ex, all_bits, bit = 1, mask, j; | 429 | unsigned long in, out, ex, all_bits, bit = 1, mask, j; |
431 | unsigned long res_in = 0, res_out = 0, res_ex = 0; | 430 | unsigned long res_in = 0, res_out = 0, res_ex = 0; |
432 | const struct file_operations *f_op = NULL; | ||
433 | struct file *file = NULL; | ||
434 | 431 | ||
435 | in = *inp++; out = *outp++; ex = *exp++; | 432 | in = *inp++; out = *outp++; ex = *exp++; |
436 | all_bits = in | out | ex; | 433 | all_bits = in | out | ex; |
@@ -440,20 +437,21 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) | |||
440 | } | 437 | } |
441 | 438 | ||
442 | for (j = 0; j < BITS_PER_LONG; ++j, ++i, bit <<= 1) { | 439 | for (j = 0; j < BITS_PER_LONG; ++j, ++i, bit <<= 1) { |
443 | int fput_needed; | 440 | struct fd f; |
444 | if (i >= n) | 441 | if (i >= n) |
445 | break; | 442 | break; |
446 | if (!(bit & all_bits)) | 443 | if (!(bit & all_bits)) |
447 | continue; | 444 | continue; |
448 | file = fget_light(i, &fput_needed); | 445 | f = fdget(i); |
449 | if (file) { | 446 | if (f.file) { |
450 | f_op = file->f_op; | 447 | const struct file_operations *f_op; |
448 | f_op = f.file->f_op; | ||
451 | mask = DEFAULT_POLLMASK; | 449 | mask = DEFAULT_POLLMASK; |
452 | if (f_op && f_op->poll) { | 450 | if (f_op && f_op->poll) { |
453 | wait_key_set(wait, in, out, bit); | 451 | wait_key_set(wait, in, out, bit); |
454 | mask = (*f_op->poll)(file, wait); | 452 | mask = (*f_op->poll)(f.file, wait); |
455 | } | 453 | } |
456 | fput_light(file, fput_needed); | 454 | fdput(f); |
457 | if ((mask & POLLIN_SET) && (in & bit)) { | 455 | if ((mask & POLLIN_SET) && (in & bit)) { |
458 | res_in |= bit; | 456 | res_in |= bit; |
459 | retval++; | 457 | retval++; |
@@ -726,20 +724,17 @@ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait) | |||
726 | mask = 0; | 724 | mask = 0; |
727 | fd = pollfd->fd; | 725 | fd = pollfd->fd; |
728 | if (fd >= 0) { | 726 | if (fd >= 0) { |
729 | int fput_needed; | 727 | struct fd f = fdget(fd); |
730 | struct file * file; | ||
731 | |||
732 | file = fget_light(fd, &fput_needed); | ||
733 | mask = POLLNVAL; | 728 | mask = POLLNVAL; |
734 | if (file != NULL) { | 729 | if (f.file) { |
735 | mask = DEFAULT_POLLMASK; | 730 | mask = DEFAULT_POLLMASK; |
736 | if (file->f_op && file->f_op->poll) { | 731 | if (f.file->f_op && f.file->f_op->poll) { |
737 | pwait->_key = pollfd->events|POLLERR|POLLHUP; | 732 | pwait->_key = pollfd->events|POLLERR|POLLHUP; |
738 | mask = file->f_op->poll(file, pwait); | 733 | mask = f.file->f_op->poll(f.file, pwait); |
739 | } | 734 | } |
740 | /* Mask out unneeded events. */ | 735 | /* Mask out unneeded events. */ |
741 | mask &= pollfd->events | POLLERR | POLLHUP; | 736 | mask &= pollfd->events | POLLERR | POLLHUP; |
742 | fput_light(file, fput_needed); | 737 | fdput(f); |
743 | } | 738 | } |
744 | } | 739 | } |
745 | pollfd->revents = mask; | 740 | pollfd->revents = mask; |
diff --git a/fs/seq_file.c b/fs/seq_file.c index 14cf9de1dbe1..99dffab4c4e4 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <linux/export.h> | 9 | #include <linux/export.h> |
10 | #include <linux/seq_file.h> | 10 | #include <linux/seq_file.h> |
11 | #include <linux/slab.h> | 11 | #include <linux/slab.h> |
12 | #include <linux/cred.h> | ||
12 | 13 | ||
13 | #include <asm/uaccess.h> | 14 | #include <asm/uaccess.h> |
14 | #include <asm/page.h> | 15 | #include <asm/page.h> |
@@ -56,6 +57,9 @@ int seq_open(struct file *file, const struct seq_operations *op) | |||
56 | memset(p, 0, sizeof(*p)); | 57 | memset(p, 0, sizeof(*p)); |
57 | mutex_init(&p->lock); | 58 | mutex_init(&p->lock); |
58 | p->op = op; | 59 | p->op = op; |
60 | #ifdef CONFIG_USER_NS | ||
61 | p->user_ns = file->f_cred->user_ns; | ||
62 | #endif | ||
59 | 63 | ||
60 | /* | 64 | /* |
61 | * Wrappers around seq_open(e.g. swaps_open) need to be | 65 | * Wrappers around seq_open(e.g. swaps_open) need to be |
diff --git a/fs/signalfd.c b/fs/signalfd.c index 9f35a37173de..8bee4e570911 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c | |||
@@ -269,13 +269,12 @@ SYSCALL_DEFINE4(signalfd4, int, ufd, sigset_t __user *, user_mask, | |||
269 | if (ufd < 0) | 269 | if (ufd < 0) |
270 | kfree(ctx); | 270 | kfree(ctx); |
271 | } else { | 271 | } else { |
272 | int fput_needed; | 272 | struct fd f = fdget(ufd); |
273 | struct file *file = fget_light(ufd, &fput_needed); | 273 | if (!f.file) |
274 | if (!file) | ||
275 | return -EBADF; | 274 | return -EBADF; |
276 | ctx = file->private_data; | 275 | ctx = f.file->private_data; |
277 | if (file->f_op != &signalfd_fops) { | 276 | if (f.file->f_op != &signalfd_fops) { |
278 | fput_light(file, fput_needed); | 277 | fdput(f); |
279 | return -EINVAL; | 278 | return -EINVAL; |
280 | } | 279 | } |
281 | spin_lock_irq(¤t->sighand->siglock); | 280 | spin_lock_irq(¤t->sighand->siglock); |
@@ -283,7 +282,7 @@ SYSCALL_DEFINE4(signalfd4, int, ufd, sigset_t __user *, user_mask, | |||
283 | spin_unlock_irq(¤t->sighand->siglock); | 282 | spin_unlock_irq(¤t->sighand->siglock); |
284 | 283 | ||
285 | wake_up(¤t->sighand->signalfd_wqh); | 284 | wake_up(¤t->sighand->signalfd_wqh); |
286 | fput_light(file, fput_needed); | 285 | fdput(f); |
287 | } | 286 | } |
288 | 287 | ||
289 | return ufd; | 288 | return ufd; |
diff --git a/fs/splice.c b/fs/splice.c index 41514dd89462..13e5b4776e7a 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
@@ -1666,9 +1666,8 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov, | |||
1666 | SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, iov, | 1666 | SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, iov, |
1667 | unsigned long, nr_segs, unsigned int, flags) | 1667 | unsigned long, nr_segs, unsigned int, flags) |
1668 | { | 1668 | { |
1669 | struct file *file; | 1669 | struct fd f; |
1670 | long error; | 1670 | long error; |
1671 | int fput; | ||
1672 | 1671 | ||
1673 | if (unlikely(nr_segs > UIO_MAXIOV)) | 1672 | if (unlikely(nr_segs > UIO_MAXIOV)) |
1674 | return -EINVAL; | 1673 | return -EINVAL; |
@@ -1676,14 +1675,14 @@ SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, iov, | |||
1676 | return 0; | 1675 | return 0; |
1677 | 1676 | ||
1678 | error = -EBADF; | 1677 | error = -EBADF; |
1679 | file = fget_light(fd, &fput); | 1678 | f = fdget(fd); |
1680 | if (file) { | 1679 | if (f.file) { |
1681 | if (file->f_mode & FMODE_WRITE) | 1680 | if (f.file->f_mode & FMODE_WRITE) |
1682 | error = vmsplice_to_pipe(file, iov, nr_segs, flags); | 1681 | error = vmsplice_to_pipe(f.file, iov, nr_segs, flags); |
1683 | else if (file->f_mode & FMODE_READ) | 1682 | else if (f.file->f_mode & FMODE_READ) |
1684 | error = vmsplice_to_user(file, iov, nr_segs, flags); | 1683 | error = vmsplice_to_user(f.file, iov, nr_segs, flags); |
1685 | 1684 | ||
1686 | fput_light(file, fput); | 1685 | fdput(f); |
1687 | } | 1686 | } |
1688 | 1687 | ||
1689 | return error; | 1688 | return error; |
@@ -1693,30 +1692,27 @@ SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in, | |||
1693 | int, fd_out, loff_t __user *, off_out, | 1692 | int, fd_out, loff_t __user *, off_out, |
1694 | size_t, len, unsigned int, flags) | 1693 | size_t, len, unsigned int, flags) |
1695 | { | 1694 | { |
1695 | struct fd in, out; | ||
1696 | long error; | 1696 | long error; |
1697 | struct file *in, *out; | ||
1698 | int fput_in, fput_out; | ||
1699 | 1697 | ||
1700 | if (unlikely(!len)) | 1698 | if (unlikely(!len)) |
1701 | return 0; | 1699 | return 0; |
1702 | 1700 | ||
1703 | error = -EBADF; | 1701 | error = -EBADF; |
1704 | in = fget_light(fd_in, &fput_in); | 1702 | in = fdget(fd_in); |
1705 | if (in) { | 1703 | if (in.file) { |
1706 | if (in->f_mode & FMODE_READ) { | 1704 | if (in.file->f_mode & FMODE_READ) { |
1707 | out = fget_light(fd_out, &fput_out); | 1705 | out = fdget(fd_out); |
1708 | if (out) { | 1706 | if (out.file) { |
1709 | if (out->f_mode & FMODE_WRITE) | 1707 | if (out.file->f_mode & FMODE_WRITE) |
1710 | error = do_splice(in, off_in, | 1708 | error = do_splice(in.file, off_in, |
1711 | out, off_out, | 1709 | out.file, off_out, |
1712 | len, flags); | 1710 | len, flags); |
1713 | fput_light(out, fput_out); | 1711 | fdput(out); |
1714 | } | 1712 | } |
1715 | } | 1713 | } |
1716 | 1714 | fdput(in); | |
1717 | fput_light(in, fput_in); | ||
1718 | } | 1715 | } |
1719 | |||
1720 | return error; | 1716 | return error; |
1721 | } | 1717 | } |
1722 | 1718 | ||
@@ -2027,26 +2023,25 @@ static long do_tee(struct file *in, struct file *out, size_t len, | |||
2027 | 2023 | ||
2028 | SYSCALL_DEFINE4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags) | 2024 | SYSCALL_DEFINE4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags) |
2029 | { | 2025 | { |
2030 | struct file *in; | 2026 | struct fd in; |
2031 | int error, fput_in; | 2027 | int error; |
2032 | 2028 | ||
2033 | if (unlikely(!len)) | 2029 | if (unlikely(!len)) |
2034 | return 0; | 2030 | return 0; |
2035 | 2031 | ||
2036 | error = -EBADF; | 2032 | error = -EBADF; |
2037 | in = fget_light(fdin, &fput_in); | 2033 | in = fdget(fdin); |
2038 | if (in) { | 2034 | if (in.file) { |
2039 | if (in->f_mode & FMODE_READ) { | 2035 | if (in.file->f_mode & FMODE_READ) { |
2040 | int fput_out; | 2036 | struct fd out = fdget(fdout); |
2041 | struct file *out = fget_light(fdout, &fput_out); | 2037 | if (out.file) { |
2042 | 2038 | if (out.file->f_mode & FMODE_WRITE) | |
2043 | if (out) { | 2039 | error = do_tee(in.file, out.file, |
2044 | if (out->f_mode & FMODE_WRITE) | 2040 | len, flags); |
2045 | error = do_tee(in, out, len, flags); | 2041 | fdput(out); |
2046 | fput_light(out, fput_out); | ||
2047 | } | 2042 | } |
2048 | } | 2043 | } |
2049 | fput_light(in, fput_in); | 2044 | fdput(in); |
2050 | } | 2045 | } |
2051 | 2046 | ||
2052 | return error; | 2047 | return error; |
diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c index 81afbccfa843..a1ce5ce60632 100644 --- a/fs/squashfs/inode.c +++ b/fs/squashfs/inode.c | |||
@@ -56,16 +56,20 @@ | |||
56 | static int squashfs_new_inode(struct super_block *sb, struct inode *inode, | 56 | static int squashfs_new_inode(struct super_block *sb, struct inode *inode, |
57 | struct squashfs_base_inode *sqsh_ino) | 57 | struct squashfs_base_inode *sqsh_ino) |
58 | { | 58 | { |
59 | uid_t i_uid; | ||
60 | gid_t i_gid; | ||
59 | int err; | 61 | int err; |
60 | 62 | ||
61 | err = squashfs_get_id(sb, le16_to_cpu(sqsh_ino->uid), &inode->i_uid); | 63 | err = squashfs_get_id(sb, le16_to_cpu(sqsh_ino->uid), &i_uid); |
62 | if (err) | 64 | if (err) |
63 | return err; | 65 | return err; |
64 | 66 | ||
65 | err = squashfs_get_id(sb, le16_to_cpu(sqsh_ino->guid), &inode->i_gid); | 67 | err = squashfs_get_id(sb, le16_to_cpu(sqsh_ino->guid), &i_gid); |
66 | if (err) | 68 | if (err) |
67 | return err; | 69 | return err; |
68 | 70 | ||
71 | i_uid_write(inode, i_uid); | ||
72 | i_gid_write(inode, i_gid); | ||
69 | inode->i_ino = le32_to_cpu(sqsh_ino->inode_number); | 73 | inode->i_ino = le32_to_cpu(sqsh_ino->inode_number); |
70 | inode->i_mtime.tv_sec = le32_to_cpu(sqsh_ino->mtime); | 74 | inode->i_mtime.tv_sec = le32_to_cpu(sqsh_ino->mtime); |
71 | inode->i_atime.tv_sec = inode->i_mtime.tv_sec; | 75 | inode->i_atime.tv_sec = inode->i_mtime.tv_sec; |
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 29cd014ed3a1..260e3928d4f5 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c | |||
@@ -425,6 +425,11 @@ static int __init init_inodecache(void) | |||
425 | 425 | ||
426 | static void destroy_inodecache(void) | 426 | static void destroy_inodecache(void) |
427 | { | 427 | { |
428 | /* | ||
429 | * Make sure all delayed rcu free inodes are flushed before we | ||
430 | * destroy cache. | ||
431 | */ | ||
432 | rcu_barrier(); | ||
428 | kmem_cache_destroy(squashfs_inode_cachep); | 433 | kmem_cache_destroy(squashfs_inode_cachep); |
429 | } | 434 | } |
430 | 435 | ||
@@ -57,13 +57,13 @@ EXPORT_SYMBOL(vfs_getattr); | |||
57 | 57 | ||
58 | int vfs_fstat(unsigned int fd, struct kstat *stat) | 58 | int vfs_fstat(unsigned int fd, struct kstat *stat) |
59 | { | 59 | { |
60 | int fput_needed; | 60 | struct fd f = fdget_raw(fd); |
61 | struct file *f = fget_light(fd, &fput_needed); | ||
62 | int error = -EBADF; | 61 | int error = -EBADF; |
63 | 62 | ||
64 | if (f) { | 63 | if (f.file) { |
65 | error = vfs_getattr(f->f_path.mnt, f->f_path.dentry, stat); | 64 | error = vfs_getattr(f.file->f_path.mnt, f.file->f_path.dentry, |
66 | fput_light(f, fput_needed); | 65 | stat); |
66 | fdput(f); | ||
67 | } | 67 | } |
68 | return error; | 68 | return error; |
69 | } | 69 | } |
@@ -326,7 +326,7 @@ SYSCALL_DEFINE3(readlink, const char __user *, path, char __user *, buf, | |||
326 | 326 | ||
327 | 327 | ||
328 | /* ---------- LFS-64 ----------- */ | 328 | /* ---------- LFS-64 ----------- */ |
329 | #ifdef __ARCH_WANT_STAT64 | 329 | #if defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_COMPAT_STAT64) |
330 | 330 | ||
331 | #ifndef INIT_STRUCT_STAT64_PADDING | 331 | #ifndef INIT_STRUCT_STAT64_PADDING |
332 | # define INIT_STRUCT_STAT64_PADDING(st) memset(&st, 0, sizeof(st)) | 332 | # define INIT_STRUCT_STAT64_PADDING(st) memset(&st, 0, sizeof(st)) |
@@ -415,7 +415,7 @@ SYSCALL_DEFINE4(fstatat64, int, dfd, const char __user *, filename, | |||
415 | return error; | 415 | return error; |
416 | return cp_new_stat64(&stat, statbuf); | 416 | return cp_new_stat64(&stat, statbuf); |
417 | } | 417 | } |
418 | #endif /* __ARCH_WANT_STAT64 */ | 418 | #endif /* __ARCH_WANT_STAT64 || __ARCH_WANT_COMPAT_STAT64 */ |
419 | 419 | ||
420 | /* Caller is here responsible for sufficient locking (ie. inode->i_lock) */ | 420 | /* Caller is here responsible for sufficient locking (ie. inode->i_lock) */ |
421 | void __inode_add_bytes(struct inode *inode, loff_t bytes) | 421 | void __inode_add_bytes(struct inode *inode, loff_t bytes) |
diff --git a/fs/statfs.c b/fs/statfs.c index 95ad5c0e586c..f8e832e6f0a2 100644 --- a/fs/statfs.c +++ b/fs/statfs.c | |||
@@ -87,12 +87,11 @@ int user_statfs(const char __user *pathname, struct kstatfs *st) | |||
87 | 87 | ||
88 | int fd_statfs(int fd, struct kstatfs *st) | 88 | int fd_statfs(int fd, struct kstatfs *st) |
89 | { | 89 | { |
90 | int fput_needed; | 90 | struct fd f = fdget(fd); |
91 | struct file *file = fget_light(fd, &fput_needed); | ||
92 | int error = -EBADF; | 91 | int error = -EBADF; |
93 | if (file) { | 92 | if (f.file) { |
94 | error = vfs_statfs(&file->f_path, st); | 93 | error = vfs_statfs(&f.file->f_path, st); |
95 | fput_light(file, fput_needed); | 94 | fdput(f); |
96 | } | 95 | } |
97 | return error; | 96 | return error; |
98 | } | 97 | } |
diff --git a/fs/super.c b/fs/super.c index 0902cfa6a12e..a3bc935069d9 100644 --- a/fs/super.c +++ b/fs/super.c | |||
@@ -307,12 +307,6 @@ void deactivate_locked_super(struct super_block *s) | |||
307 | 307 | ||
308 | /* caches are now gone, we can safely kill the shrinker now */ | 308 | /* caches are now gone, we can safely kill the shrinker now */ |
309 | unregister_shrinker(&s->s_shrink); | 309 | unregister_shrinker(&s->s_shrink); |
310 | |||
311 | /* | ||
312 | * We need to call rcu_barrier so all the delayed rcu free | ||
313 | * inodes are flushed before we release the fs module. | ||
314 | */ | ||
315 | rcu_barrier(); | ||
316 | put_filesystem(fs); | 310 | put_filesystem(fs); |
317 | put_super(s); | 311 | put_super(s); |
318 | } else { | 312 | } else { |
@@ -871,7 +865,7 @@ int get_anon_bdev(dev_t *p) | |||
871 | else if (error) | 865 | else if (error) |
872 | return -EAGAIN; | 866 | return -EAGAIN; |
873 | 867 | ||
874 | if ((dev & MAX_ID_MASK) == (1 << MINORBITS)) { | 868 | if ((dev & MAX_IDR_MASK) == (1 << MINORBITS)) { |
875 | spin_lock(&unnamed_dev_lock); | 869 | spin_lock(&unnamed_dev_lock); |
876 | ida_remove(&unnamed_dev_ida, dev); | 870 | ida_remove(&unnamed_dev_ida, dev); |
877 | if (unnamed_dev_start > dev) | 871 | if (unnamed_dev_start > dev) |
@@ -148,21 +148,19 @@ void emergency_sync(void) | |||
148 | */ | 148 | */ |
149 | SYSCALL_DEFINE1(syncfs, int, fd) | 149 | SYSCALL_DEFINE1(syncfs, int, fd) |
150 | { | 150 | { |
151 | struct file *file; | 151 | struct fd f = fdget(fd); |
152 | struct super_block *sb; | 152 | struct super_block *sb; |
153 | int ret; | 153 | int ret; |
154 | int fput_needed; | ||
155 | 154 | ||
156 | file = fget_light(fd, &fput_needed); | 155 | if (!f.file) |
157 | if (!file) | ||
158 | return -EBADF; | 156 | return -EBADF; |
159 | sb = file->f_dentry->d_sb; | 157 | sb = f.file->f_dentry->d_sb; |
160 | 158 | ||
161 | down_read(&sb->s_umount); | 159 | down_read(&sb->s_umount); |
162 | ret = sync_filesystem(sb); | 160 | ret = sync_filesystem(sb); |
163 | up_read(&sb->s_umount); | 161 | up_read(&sb->s_umount); |
164 | 162 | ||
165 | fput_light(file, fput_needed); | 163 | fdput(f); |
166 | return ret; | 164 | return ret; |
167 | } | 165 | } |
168 | 166 | ||
@@ -201,14 +199,12 @@ EXPORT_SYMBOL(vfs_fsync); | |||
201 | 199 | ||
202 | static int do_fsync(unsigned int fd, int datasync) | 200 | static int do_fsync(unsigned int fd, int datasync) |
203 | { | 201 | { |
204 | struct file *file; | 202 | struct fd f = fdget(fd); |
205 | int ret = -EBADF; | 203 | int ret = -EBADF; |
206 | int fput_needed; | ||
207 | 204 | ||
208 | file = fget_light(fd, &fput_needed); | 205 | if (f.file) { |
209 | if (file) { | 206 | ret = vfs_fsync(f.file, datasync); |
210 | ret = vfs_fsync(file, datasync); | 207 | fdput(f); |
211 | fput_light(file, fput_needed); | ||
212 | } | 208 | } |
213 | return ret; | 209 | return ret; |
214 | } | 210 | } |
@@ -291,10 +287,9 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes, | |||
291 | unsigned int flags) | 287 | unsigned int flags) |
292 | { | 288 | { |
293 | int ret; | 289 | int ret; |
294 | struct file *file; | 290 | struct fd f; |
295 | struct address_space *mapping; | 291 | struct address_space *mapping; |
296 | loff_t endbyte; /* inclusive */ | 292 | loff_t endbyte; /* inclusive */ |
297 | int fput_needed; | ||
298 | umode_t i_mode; | 293 | umode_t i_mode; |
299 | 294 | ||
300 | ret = -EINVAL; | 295 | ret = -EINVAL; |
@@ -333,17 +328,17 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes, | |||
333 | endbyte--; /* inclusive */ | 328 | endbyte--; /* inclusive */ |
334 | 329 | ||
335 | ret = -EBADF; | 330 | ret = -EBADF; |
336 | file = fget_light(fd, &fput_needed); | 331 | f = fdget(fd); |
337 | if (!file) | 332 | if (!f.file) |
338 | goto out; | 333 | goto out; |
339 | 334 | ||
340 | i_mode = file->f_path.dentry->d_inode->i_mode; | 335 | i_mode = f.file->f_path.dentry->d_inode->i_mode; |
341 | ret = -ESPIPE; | 336 | ret = -ESPIPE; |
342 | if (!S_ISREG(i_mode) && !S_ISBLK(i_mode) && !S_ISDIR(i_mode) && | 337 | if (!S_ISREG(i_mode) && !S_ISBLK(i_mode) && !S_ISDIR(i_mode) && |
343 | !S_ISLNK(i_mode)) | 338 | !S_ISLNK(i_mode)) |
344 | goto out_put; | 339 | goto out_put; |
345 | 340 | ||
346 | mapping = file->f_mapping; | 341 | mapping = f.file->f_mapping; |
347 | if (!mapping) { | 342 | if (!mapping) { |
348 | ret = -EINVAL; | 343 | ret = -EINVAL; |
349 | goto out_put; | 344 | goto out_put; |
@@ -366,7 +361,7 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes, | |||
366 | ret = filemap_fdatawait_range(mapping, offset, endbyte); | 361 | ret = filemap_fdatawait_range(mapping, offset, endbyte); |
367 | 362 | ||
368 | out_put: | 363 | out_put: |
369 | fput_light(file, fput_needed); | 364 | fdput(f); |
370 | out: | 365 | out: |
371 | return ret; | 366 | return ret; |
372 | } | 367 | } |
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c index a7ac78f8e67a..3c9eb5624f5e 100644 --- a/fs/sysfs/symlink.c +++ b/fs/sysfs/symlink.c | |||
@@ -113,7 +113,7 @@ int sysfs_create_link(struct kobject *kobj, struct kobject *target, | |||
113 | * @target: object we're pointing to. | 113 | * @target: object we're pointing to. |
114 | * @name: name of the symlink. | 114 | * @name: name of the symlink. |
115 | * | 115 | * |
116 | * This function does the same as sysf_create_link(), but it | 116 | * This function does the same as sysfs_create_link(), but it |
117 | * doesn't warn if the link already exists. | 117 | * doesn't warn if the link already exists. |
118 | */ | 118 | */ |
119 | int sysfs_create_link_nowarn(struct kobject *kobj, struct kobject *target, | 119 | int sysfs_create_link_nowarn(struct kobject *kobj, struct kobject *target, |
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index 80e1e2b18df1..d33e506c1eac 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c | |||
@@ -202,8 +202,8 @@ struct inode *sysv_iget(struct super_block *sb, unsigned int ino) | |||
202 | } | 202 | } |
203 | /* SystemV FS: kludge permissions if ino==SYSV_ROOT_INO ?? */ | 203 | /* SystemV FS: kludge permissions if ino==SYSV_ROOT_INO ?? */ |
204 | inode->i_mode = fs16_to_cpu(sbi, raw_inode->i_mode); | 204 | inode->i_mode = fs16_to_cpu(sbi, raw_inode->i_mode); |
205 | inode->i_uid = (uid_t)fs16_to_cpu(sbi, raw_inode->i_uid); | 205 | i_uid_write(inode, (uid_t)fs16_to_cpu(sbi, raw_inode->i_uid)); |
206 | inode->i_gid = (gid_t)fs16_to_cpu(sbi, raw_inode->i_gid); | 206 | i_gid_write(inode, (gid_t)fs16_to_cpu(sbi, raw_inode->i_gid)); |
207 | set_nlink(inode, fs16_to_cpu(sbi, raw_inode->i_nlink)); | 207 | set_nlink(inode, fs16_to_cpu(sbi, raw_inode->i_nlink)); |
208 | inode->i_size = fs32_to_cpu(sbi, raw_inode->i_size); | 208 | inode->i_size = fs32_to_cpu(sbi, raw_inode->i_size); |
209 | inode->i_atime.tv_sec = fs32_to_cpu(sbi, raw_inode->i_atime); | 209 | inode->i_atime.tv_sec = fs32_to_cpu(sbi, raw_inode->i_atime); |
@@ -256,8 +256,8 @@ static int __sysv_write_inode(struct inode *inode, int wait) | |||
256 | } | 256 | } |
257 | 257 | ||
258 | raw_inode->i_mode = cpu_to_fs16(sbi, inode->i_mode); | 258 | raw_inode->i_mode = cpu_to_fs16(sbi, inode->i_mode); |
259 | raw_inode->i_uid = cpu_to_fs16(sbi, fs_high2lowuid(inode->i_uid)); | 259 | raw_inode->i_uid = cpu_to_fs16(sbi, fs_high2lowuid(i_uid_read(inode))); |
260 | raw_inode->i_gid = cpu_to_fs16(sbi, fs_high2lowgid(inode->i_gid)); | 260 | raw_inode->i_gid = cpu_to_fs16(sbi, fs_high2lowgid(i_gid_read(inode))); |
261 | raw_inode->i_nlink = cpu_to_fs16(sbi, inode->i_nlink); | 261 | raw_inode->i_nlink = cpu_to_fs16(sbi, inode->i_nlink); |
262 | raw_inode->i_size = cpu_to_fs32(sbi, inode->i_size); | 262 | raw_inode->i_size = cpu_to_fs32(sbi, inode->i_size); |
263 | raw_inode->i_atime = cpu_to_fs32(sbi, inode->i_atime.tv_sec); | 263 | raw_inode->i_atime = cpu_to_fs32(sbi, inode->i_atime.tv_sec); |
@@ -360,5 +360,10 @@ int __init sysv_init_icache(void) | |||
360 | 360 | ||
361 | void sysv_destroy_icache(void) | 361 | void sysv_destroy_icache(void) |
362 | { | 362 | { |
363 | /* | ||
364 | * Make sure all delayed rcu free inodes are flushed before we | ||
365 | * destroy cache. | ||
366 | */ | ||
367 | rcu_barrier(); | ||
363 | kmem_cache_destroy(sysv_inode_cachep); | 368 | kmem_cache_destroy(sysv_inode_cachep); |
364 | } | 369 | } |
diff --git a/fs/timerfd.c b/fs/timerfd.c index dffeb3795af1..d03822bbf190 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c | |||
@@ -234,19 +234,17 @@ static const struct file_operations timerfd_fops = { | |||
234 | .llseek = noop_llseek, | 234 | .llseek = noop_llseek, |
235 | }; | 235 | }; |
236 | 236 | ||
237 | static struct file *timerfd_fget(int fd) | 237 | static int timerfd_fget(int fd, struct fd *p) |
238 | { | 238 | { |
239 | struct file *file; | 239 | struct fd f = fdget(fd); |
240 | 240 | if (!f.file) | |
241 | file = fget(fd); | 241 | return -EBADF; |
242 | if (!file) | 242 | if (f.file->f_op != &timerfd_fops) { |
243 | return ERR_PTR(-EBADF); | 243 | fdput(f); |
244 | if (file->f_op != &timerfd_fops) { | 244 | return -EINVAL; |
245 | fput(file); | ||
246 | return ERR_PTR(-EINVAL); | ||
247 | } | 245 | } |
248 | 246 | *p = f; | |
249 | return file; | 247 | return 0; |
250 | } | 248 | } |
251 | 249 | ||
252 | SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) | 250 | SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) |
@@ -284,7 +282,7 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, | |||
284 | const struct itimerspec __user *, utmr, | 282 | const struct itimerspec __user *, utmr, |
285 | struct itimerspec __user *, otmr) | 283 | struct itimerspec __user *, otmr) |
286 | { | 284 | { |
287 | struct file *file; | 285 | struct fd f; |
288 | struct timerfd_ctx *ctx; | 286 | struct timerfd_ctx *ctx; |
289 | struct itimerspec ktmr, kotmr; | 287 | struct itimerspec ktmr, kotmr; |
290 | int ret; | 288 | int ret; |
@@ -297,10 +295,10 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, | |||
297 | !timespec_valid(&ktmr.it_interval)) | 295 | !timespec_valid(&ktmr.it_interval)) |
298 | return -EINVAL; | 296 | return -EINVAL; |
299 | 297 | ||
300 | file = timerfd_fget(ufd); | 298 | ret = timerfd_fget(ufd, &f); |
301 | if (IS_ERR(file)) | 299 | if (ret) |
302 | return PTR_ERR(file); | 300 | return ret; |
303 | ctx = file->private_data; | 301 | ctx = f.file->private_data; |
304 | 302 | ||
305 | timerfd_setup_cancel(ctx, flags); | 303 | timerfd_setup_cancel(ctx, flags); |
306 | 304 | ||
@@ -334,7 +332,7 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, | |||
334 | ret = timerfd_setup(ctx, flags, &ktmr); | 332 | ret = timerfd_setup(ctx, flags, &ktmr); |
335 | 333 | ||
336 | spin_unlock_irq(&ctx->wqh.lock); | 334 | spin_unlock_irq(&ctx->wqh.lock); |
337 | fput(file); | 335 | fdput(f); |
338 | if (otmr && copy_to_user(otmr, &kotmr, sizeof(kotmr))) | 336 | if (otmr && copy_to_user(otmr, &kotmr, sizeof(kotmr))) |
339 | return -EFAULT; | 337 | return -EFAULT; |
340 | 338 | ||
@@ -343,14 +341,13 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, | |||
343 | 341 | ||
344 | SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr) | 342 | SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr) |
345 | { | 343 | { |
346 | struct file *file; | 344 | struct fd f; |
347 | struct timerfd_ctx *ctx; | 345 | struct timerfd_ctx *ctx; |
348 | struct itimerspec kotmr; | 346 | struct itimerspec kotmr; |
349 | 347 | int ret = timerfd_fget(ufd, &f); | |
350 | file = timerfd_fget(ufd); | 348 | if (ret) |
351 | if (IS_ERR(file)) | 349 | return ret; |
352 | return PTR_ERR(file); | 350 | ctx = f.file->private_data; |
353 | ctx = file->private_data; | ||
354 | 351 | ||
355 | spin_lock_irq(&ctx->wqh.lock); | 352 | spin_lock_irq(&ctx->wqh.lock); |
356 | if (ctx->expired && ctx->tintv.tv64) { | 353 | if (ctx->expired && ctx->tintv.tv64) { |
@@ -362,7 +359,7 @@ SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr) | |||
362 | kotmr.it_value = ktime_to_timespec(timerfd_get_remaining(ctx)); | 359 | kotmr.it_value = ktime_to_timespec(timerfd_get_remaining(ctx)); |
363 | kotmr.it_interval = ktime_to_timespec(ctx->tintv); | 360 | kotmr.it_interval = ktime_to_timespec(ctx->tintv); |
364 | spin_unlock_irq(&ctx->wqh.lock); | 361 | spin_unlock_irq(&ctx->wqh.lock); |
365 | fput(file); | 362 | fdput(f); |
366 | 363 | ||
367 | return copy_to_user(otmr, &kotmr, sizeof(kotmr)) ? -EFAULT: 0; | 364 | return copy_to_user(otmr, &kotmr, sizeof(kotmr)) ? -EFAULT: 0; |
368 | } | 365 | } |
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index bc4f94b28706..e8e01d74dc05 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c | |||
@@ -272,8 +272,8 @@ long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs) | |||
272 | */ | 272 | */ |
273 | static int can_use_rp(struct ubifs_info *c) | 273 | static int can_use_rp(struct ubifs_info *c) |
274 | { | 274 | { |
275 | if (current_fsuid() == c->rp_uid || capable(CAP_SYS_RESOURCE) || | 275 | if (uid_eq(current_fsuid(), c->rp_uid) || capable(CAP_SYS_RESOURCE) || |
276 | (c->rp_gid != 0 && in_group_p(c->rp_gid))) | 276 | (!gid_eq(c->rp_gid, GLOBAL_ROOT_GID) && in_group_p(c->rp_gid))) |
277 | return 1; | 277 | return 1; |
278 | return 0; | 278 | return 0; |
279 | } | 279 | } |
@@ -342,9 +342,8 @@ static int do_budget_space(struct ubifs_info *c) | |||
342 | lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - | 342 | lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - |
343 | c->lst.taken_empty_lebs; | 343 | c->lst.taken_empty_lebs; |
344 | if (unlikely(rsvd_idx_lebs > lebs)) { | 344 | if (unlikely(rsvd_idx_lebs > lebs)) { |
345 | dbg_budg("out of indexing space: min_idx_lebs %d (old %d), " | 345 | dbg_budg("out of indexing space: min_idx_lebs %d (old %d), rsvd_idx_lebs %d", |
346 | "rsvd_idx_lebs %d", min_idx_lebs, c->bi.min_idx_lebs, | 346 | min_idx_lebs, c->bi.min_idx_lebs, rsvd_idx_lebs); |
347 | rsvd_idx_lebs); | ||
348 | return -ENOSPC; | 347 | return -ENOSPC; |
349 | } | 348 | } |
350 | 349 | ||
diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c index 8eda717cb99b..ff8229340cd5 100644 --- a/fs/ubifs/commit.c +++ b/fs/ubifs/commit.c | |||
@@ -293,8 +293,8 @@ int ubifs_bg_thread(void *info) | |||
293 | int err; | 293 | int err; |
294 | struct ubifs_info *c = info; | 294 | struct ubifs_info *c = info; |
295 | 295 | ||
296 | dbg_msg("background thread \"%s\" started, PID %d", | 296 | ubifs_msg("background thread \"%s\" started, PID %d", |
297 | c->bgt_name, current->pid); | 297 | c->bgt_name, current->pid); |
298 | set_freezable(); | 298 | set_freezable(); |
299 | 299 | ||
300 | while (1) { | 300 | while (1) { |
@@ -328,7 +328,7 @@ int ubifs_bg_thread(void *info) | |||
328 | cond_resched(); | 328 | cond_resched(); |
329 | } | 329 | } |
330 | 330 | ||
331 | dbg_msg("background thread \"%s\" stops", c->bgt_name); | 331 | ubifs_msg("background thread \"%s\" stops", c->bgt_name); |
332 | return 0; | 332 | return 0; |
333 | } | 333 | } |
334 | 334 | ||
@@ -514,7 +514,7 @@ struct idx_node { | |||
514 | struct list_head list; | 514 | struct list_head list; |
515 | int iip; | 515 | int iip; |
516 | union ubifs_key upper_key; | 516 | union ubifs_key upper_key; |
517 | struct ubifs_idx_node idx __attribute__((aligned(8))); | 517 | struct ubifs_idx_node idx __aligned(8); |
518 | }; | 518 | }; |
519 | 519 | ||
520 | /** | 520 | /** |
diff --git a/fs/ubifs/compress.c b/fs/ubifs/compress.c index 11e4132f314a..2bfa0953335d 100644 --- a/fs/ubifs/compress.c +++ b/fs/ubifs/compress.c | |||
@@ -112,8 +112,7 @@ void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len, | |||
112 | if (compr->comp_mutex) | 112 | if (compr->comp_mutex) |
113 | mutex_unlock(compr->comp_mutex); | 113 | mutex_unlock(compr->comp_mutex); |
114 | if (unlikely(err)) { | 114 | if (unlikely(err)) { |
115 | ubifs_warn("cannot compress %d bytes, compressor %s, " | 115 | ubifs_warn("cannot compress %d bytes, compressor %s, error %d, leave data uncompressed", |
116 | "error %d, leave data uncompressed", | ||
117 | in_len, compr->name, err); | 116 | in_len, compr->name, err); |
118 | goto no_compr; | 117 | goto no_compr; |
119 | } | 118 | } |
@@ -176,8 +175,8 @@ int ubifs_decompress(const void *in_buf, int in_len, void *out_buf, | |||
176 | if (compr->decomp_mutex) | 175 | if (compr->decomp_mutex) |
177 | mutex_unlock(compr->decomp_mutex); | 176 | mutex_unlock(compr->decomp_mutex); |
178 | if (err) | 177 | if (err) |
179 | ubifs_err("cannot decompress %d bytes, compressor %s, " | 178 | ubifs_err("cannot decompress %d bytes, compressor %s, error %d", |
180 | "error %d", in_len, compr->name, err); | 179 | in_len, compr->name, err); |
181 | 180 | ||
182 | return err; | 181 | return err; |
183 | } | 182 | } |
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index bb3167257aab..62911637e12f 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c | |||
@@ -219,15 +219,15 @@ const char *dbg_jhead(int jhead) | |||
219 | 219 | ||
220 | static void dump_ch(const struct ubifs_ch *ch) | 220 | static void dump_ch(const struct ubifs_ch *ch) |
221 | { | 221 | { |
222 | printk(KERN_ERR "\tmagic %#x\n", le32_to_cpu(ch->magic)); | 222 | pr_err("\tmagic %#x\n", le32_to_cpu(ch->magic)); |
223 | printk(KERN_ERR "\tcrc %#x\n", le32_to_cpu(ch->crc)); | 223 | pr_err("\tcrc %#x\n", le32_to_cpu(ch->crc)); |
224 | printk(KERN_ERR "\tnode_type %d (%s)\n", ch->node_type, | 224 | pr_err("\tnode_type %d (%s)\n", ch->node_type, |
225 | dbg_ntype(ch->node_type)); | 225 | dbg_ntype(ch->node_type)); |
226 | printk(KERN_ERR "\tgroup_type %d (%s)\n", ch->group_type, | 226 | pr_err("\tgroup_type %d (%s)\n", ch->group_type, |
227 | dbg_gtype(ch->group_type)); | 227 | dbg_gtype(ch->group_type)); |
228 | printk(KERN_ERR "\tsqnum %llu\n", | 228 | pr_err("\tsqnum %llu\n", |
229 | (unsigned long long)le64_to_cpu(ch->sqnum)); | 229 | (unsigned long long)le64_to_cpu(ch->sqnum)); |
230 | printk(KERN_ERR "\tlen %u\n", le32_to_cpu(ch->len)); | 230 | pr_err("\tlen %u\n", le32_to_cpu(ch->len)); |
231 | } | 231 | } |
232 | 232 | ||
233 | void ubifs_dump_inode(struct ubifs_info *c, const struct inode *inode) | 233 | void ubifs_dump_inode(struct ubifs_info *c, const struct inode *inode) |
@@ -238,43 +238,43 @@ void ubifs_dump_inode(struct ubifs_info *c, const struct inode *inode) | |||
238 | struct ubifs_dent_node *dent, *pdent = NULL; | 238 | struct ubifs_dent_node *dent, *pdent = NULL; |
239 | int count = 2; | 239 | int count = 2; |
240 | 240 | ||
241 | printk(KERN_ERR "Dump in-memory inode:"); | 241 | pr_err("Dump in-memory inode:"); |
242 | printk(KERN_ERR "\tinode %lu\n", inode->i_ino); | 242 | pr_err("\tinode %lu\n", inode->i_ino); |
243 | printk(KERN_ERR "\tsize %llu\n", | 243 | pr_err("\tsize %llu\n", |
244 | (unsigned long long)i_size_read(inode)); | 244 | (unsigned long long)i_size_read(inode)); |
245 | printk(KERN_ERR "\tnlink %u\n", inode->i_nlink); | 245 | pr_err("\tnlink %u\n", inode->i_nlink); |
246 | printk(KERN_ERR "\tuid %u\n", (unsigned int)inode->i_uid); | 246 | pr_err("\tuid %u\n", (unsigned int)i_uid_read(inode)); |
247 | printk(KERN_ERR "\tgid %u\n", (unsigned int)inode->i_gid); | 247 | pr_err("\tgid %u\n", (unsigned int)i_gid_read(inode)); |
248 | printk(KERN_ERR "\tatime %u.%u\n", | 248 | pr_err("\tatime %u.%u\n", |
249 | (unsigned int)inode->i_atime.tv_sec, | 249 | (unsigned int)inode->i_atime.tv_sec, |
250 | (unsigned int)inode->i_atime.tv_nsec); | 250 | (unsigned int)inode->i_atime.tv_nsec); |
251 | printk(KERN_ERR "\tmtime %u.%u\n", | 251 | pr_err("\tmtime %u.%u\n", |
252 | (unsigned int)inode->i_mtime.tv_sec, | 252 | (unsigned int)inode->i_mtime.tv_sec, |
253 | (unsigned int)inode->i_mtime.tv_nsec); | 253 | (unsigned int)inode->i_mtime.tv_nsec); |
254 | printk(KERN_ERR "\tctime %u.%u\n", | 254 | pr_err("\tctime %u.%u\n", |
255 | (unsigned int)inode->i_ctime.tv_sec, | 255 | (unsigned int)inode->i_ctime.tv_sec, |
256 | (unsigned int)inode->i_ctime.tv_nsec); | 256 | (unsigned int)inode->i_ctime.tv_nsec); |
257 | printk(KERN_ERR "\tcreat_sqnum %llu\n", ui->creat_sqnum); | 257 | pr_err("\tcreat_sqnum %llu\n", ui->creat_sqnum); |
258 | printk(KERN_ERR "\txattr_size %u\n", ui->xattr_size); | 258 | pr_err("\txattr_size %u\n", ui->xattr_size); |
259 | printk(KERN_ERR "\txattr_cnt %u\n", ui->xattr_cnt); | 259 | pr_err("\txattr_cnt %u\n", ui->xattr_cnt); |
260 | printk(KERN_ERR "\txattr_names %u\n", ui->xattr_names); | 260 | pr_err("\txattr_names %u\n", ui->xattr_names); |
261 | printk(KERN_ERR "\tdirty %u\n", ui->dirty); | 261 | pr_err("\tdirty %u\n", ui->dirty); |
262 | printk(KERN_ERR "\txattr %u\n", ui->xattr); | 262 | pr_err("\txattr %u\n", ui->xattr); |
263 | printk(KERN_ERR "\tbulk_read %u\n", ui->xattr); | 263 | pr_err("\tbulk_read %u\n", ui->xattr); |
264 | printk(KERN_ERR "\tsynced_i_size %llu\n", | 264 | pr_err("\tsynced_i_size %llu\n", |
265 | (unsigned long long)ui->synced_i_size); | 265 | (unsigned long long)ui->synced_i_size); |
266 | printk(KERN_ERR "\tui_size %llu\n", | 266 | pr_err("\tui_size %llu\n", |
267 | (unsigned long long)ui->ui_size); | 267 | (unsigned long long)ui->ui_size); |
268 | printk(KERN_ERR "\tflags %d\n", ui->flags); | 268 | pr_err("\tflags %d\n", ui->flags); |
269 | printk(KERN_ERR "\tcompr_type %d\n", ui->compr_type); | 269 | pr_err("\tcompr_type %d\n", ui->compr_type); |
270 | printk(KERN_ERR "\tlast_page_read %lu\n", ui->last_page_read); | 270 | pr_err("\tlast_page_read %lu\n", ui->last_page_read); |
271 | printk(KERN_ERR "\tread_in_a_row %lu\n", ui->read_in_a_row); | 271 | pr_err("\tread_in_a_row %lu\n", ui->read_in_a_row); |
272 | printk(KERN_ERR "\tdata_len %d\n", ui->data_len); | 272 | pr_err("\tdata_len %d\n", ui->data_len); |
273 | 273 | ||
274 | if (!S_ISDIR(inode->i_mode)) | 274 | if (!S_ISDIR(inode->i_mode)) |
275 | return; | 275 | return; |
276 | 276 | ||
277 | printk(KERN_ERR "List of directory entries:\n"); | 277 | pr_err("List of directory entries:\n"); |
278 | ubifs_assert(!mutex_is_locked(&c->tnc_mutex)); | 278 | ubifs_assert(!mutex_is_locked(&c->tnc_mutex)); |
279 | 279 | ||
280 | lowest_dent_key(c, &key, inode->i_ino); | 280 | lowest_dent_key(c, &key, inode->i_ino); |
@@ -282,11 +282,11 @@ void ubifs_dump_inode(struct ubifs_info *c, const struct inode *inode) | |||
282 | dent = ubifs_tnc_next_ent(c, &key, &nm); | 282 | dent = ubifs_tnc_next_ent(c, &key, &nm); |
283 | if (IS_ERR(dent)) { | 283 | if (IS_ERR(dent)) { |
284 | if (PTR_ERR(dent) != -ENOENT) | 284 | if (PTR_ERR(dent) != -ENOENT) |
285 | printk(KERN_ERR "error %ld\n", PTR_ERR(dent)); | 285 | pr_err("error %ld\n", PTR_ERR(dent)); |
286 | break; | 286 | break; |
287 | } | 287 | } |
288 | 288 | ||
289 | printk(KERN_ERR "\t%d: %s (%s)\n", | 289 | pr_err("\t%d: %s (%s)\n", |
290 | count++, dent->name, get_dent_type(dent->type)); | 290 | count++, dent->name, get_dent_type(dent->type)); |
291 | 291 | ||
292 | nm.name = dent->name; | 292 | nm.name = dent->name; |
@@ -305,12 +305,9 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node) | |||
305 | const struct ubifs_ch *ch = node; | 305 | const struct ubifs_ch *ch = node; |
306 | char key_buf[DBG_KEY_BUF_LEN]; | 306 | char key_buf[DBG_KEY_BUF_LEN]; |
307 | 307 | ||
308 | if (dbg_is_tst_rcvry(c)) | ||
309 | return; | ||
310 | |||
311 | /* If the magic is incorrect, just hexdump the first bytes */ | 308 | /* If the magic is incorrect, just hexdump the first bytes */ |
312 | if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC) { | 309 | if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC) { |
313 | printk(KERN_ERR "Not a node, first %zu bytes:", UBIFS_CH_SZ); | 310 | pr_err("Not a node, first %zu bytes:", UBIFS_CH_SZ); |
314 | print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 32, 1, | 311 | print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 32, 1, |
315 | (void *)node, UBIFS_CH_SZ, 1); | 312 | (void *)node, UBIFS_CH_SZ, 1); |
316 | return; | 313 | return; |
@@ -324,8 +321,7 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node) | |||
324 | { | 321 | { |
325 | const struct ubifs_pad_node *pad = node; | 322 | const struct ubifs_pad_node *pad = node; |
326 | 323 | ||
327 | printk(KERN_ERR "\tpad_len %u\n", | 324 | pr_err("\tpad_len %u\n", le32_to_cpu(pad->pad_len)); |
328 | le32_to_cpu(pad->pad_len)); | ||
329 | break; | 325 | break; |
330 | } | 326 | } |
331 | case UBIFS_SB_NODE: | 327 | case UBIFS_SB_NODE: |
@@ -333,112 +329,77 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node) | |||
333 | const struct ubifs_sb_node *sup = node; | 329 | const struct ubifs_sb_node *sup = node; |
334 | unsigned int sup_flags = le32_to_cpu(sup->flags); | 330 | unsigned int sup_flags = le32_to_cpu(sup->flags); |
335 | 331 | ||
336 | printk(KERN_ERR "\tkey_hash %d (%s)\n", | 332 | pr_err("\tkey_hash %d (%s)\n", |
337 | (int)sup->key_hash, get_key_hash(sup->key_hash)); | 333 | (int)sup->key_hash, get_key_hash(sup->key_hash)); |
338 | printk(KERN_ERR "\tkey_fmt %d (%s)\n", | 334 | pr_err("\tkey_fmt %d (%s)\n", |
339 | (int)sup->key_fmt, get_key_fmt(sup->key_fmt)); | 335 | (int)sup->key_fmt, get_key_fmt(sup->key_fmt)); |
340 | printk(KERN_ERR "\tflags %#x\n", sup_flags); | 336 | pr_err("\tflags %#x\n", sup_flags); |
341 | printk(KERN_ERR "\t big_lpt %u\n", | 337 | pr_err("\t big_lpt %u\n", |
342 | !!(sup_flags & UBIFS_FLG_BIGLPT)); | 338 | !!(sup_flags & UBIFS_FLG_BIGLPT)); |
343 | printk(KERN_ERR "\t space_fixup %u\n", | 339 | pr_err("\t space_fixup %u\n", |
344 | !!(sup_flags & UBIFS_FLG_SPACE_FIXUP)); | 340 | !!(sup_flags & UBIFS_FLG_SPACE_FIXUP)); |
345 | printk(KERN_ERR "\tmin_io_size %u\n", | 341 | pr_err("\tmin_io_size %u\n", le32_to_cpu(sup->min_io_size)); |
346 | le32_to_cpu(sup->min_io_size)); | 342 | pr_err("\tleb_size %u\n", le32_to_cpu(sup->leb_size)); |
347 | printk(KERN_ERR "\tleb_size %u\n", | 343 | pr_err("\tleb_cnt %u\n", le32_to_cpu(sup->leb_cnt)); |
348 | le32_to_cpu(sup->leb_size)); | 344 | pr_err("\tmax_leb_cnt %u\n", le32_to_cpu(sup->max_leb_cnt)); |
349 | printk(KERN_ERR "\tleb_cnt %u\n", | 345 | pr_err("\tmax_bud_bytes %llu\n", |
350 | le32_to_cpu(sup->leb_cnt)); | ||
351 | printk(KERN_ERR "\tmax_leb_cnt %u\n", | ||
352 | le32_to_cpu(sup->max_leb_cnt)); | ||
353 | printk(KERN_ERR "\tmax_bud_bytes %llu\n", | ||
354 | (unsigned long long)le64_to_cpu(sup->max_bud_bytes)); | 346 | (unsigned long long)le64_to_cpu(sup->max_bud_bytes)); |
355 | printk(KERN_ERR "\tlog_lebs %u\n", | 347 | pr_err("\tlog_lebs %u\n", le32_to_cpu(sup->log_lebs)); |
356 | le32_to_cpu(sup->log_lebs)); | 348 | pr_err("\tlpt_lebs %u\n", le32_to_cpu(sup->lpt_lebs)); |
357 | printk(KERN_ERR "\tlpt_lebs %u\n", | 349 | pr_err("\torph_lebs %u\n", le32_to_cpu(sup->orph_lebs)); |
358 | le32_to_cpu(sup->lpt_lebs)); | 350 | pr_err("\tjhead_cnt %u\n", le32_to_cpu(sup->jhead_cnt)); |
359 | printk(KERN_ERR "\torph_lebs %u\n", | 351 | pr_err("\tfanout %u\n", le32_to_cpu(sup->fanout)); |
360 | le32_to_cpu(sup->orph_lebs)); | 352 | pr_err("\tlsave_cnt %u\n", le32_to_cpu(sup->lsave_cnt)); |
361 | printk(KERN_ERR "\tjhead_cnt %u\n", | 353 | pr_err("\tdefault_compr %u\n", |
362 | le32_to_cpu(sup->jhead_cnt)); | ||
363 | printk(KERN_ERR "\tfanout %u\n", | ||
364 | le32_to_cpu(sup->fanout)); | ||
365 | printk(KERN_ERR "\tlsave_cnt %u\n", | ||
366 | le32_to_cpu(sup->lsave_cnt)); | ||
367 | printk(KERN_ERR "\tdefault_compr %u\n", | ||
368 | (int)le16_to_cpu(sup->default_compr)); | 354 | (int)le16_to_cpu(sup->default_compr)); |
369 | printk(KERN_ERR "\trp_size %llu\n", | 355 | pr_err("\trp_size %llu\n", |
370 | (unsigned long long)le64_to_cpu(sup->rp_size)); | 356 | (unsigned long long)le64_to_cpu(sup->rp_size)); |
371 | printk(KERN_ERR "\trp_uid %u\n", | 357 | pr_err("\trp_uid %u\n", le32_to_cpu(sup->rp_uid)); |
372 | le32_to_cpu(sup->rp_uid)); | 358 | pr_err("\trp_gid %u\n", le32_to_cpu(sup->rp_gid)); |
373 | printk(KERN_ERR "\trp_gid %u\n", | 359 | pr_err("\tfmt_version %u\n", le32_to_cpu(sup->fmt_version)); |
374 | le32_to_cpu(sup->rp_gid)); | 360 | pr_err("\ttime_gran %u\n", le32_to_cpu(sup->time_gran)); |
375 | printk(KERN_ERR "\tfmt_version %u\n", | 361 | pr_err("\tUUID %pUB\n", sup->uuid); |
376 | le32_to_cpu(sup->fmt_version)); | ||
377 | printk(KERN_ERR "\ttime_gran %u\n", | ||
378 | le32_to_cpu(sup->time_gran)); | ||
379 | printk(KERN_ERR "\tUUID %pUB\n", | ||
380 | sup->uuid); | ||
381 | break; | 362 | break; |
382 | } | 363 | } |
383 | case UBIFS_MST_NODE: | 364 | case UBIFS_MST_NODE: |
384 | { | 365 | { |
385 | const struct ubifs_mst_node *mst = node; | 366 | const struct ubifs_mst_node *mst = node; |
386 | 367 | ||
387 | printk(KERN_ERR "\thighest_inum %llu\n", | 368 | pr_err("\thighest_inum %llu\n", |
388 | (unsigned long long)le64_to_cpu(mst->highest_inum)); | 369 | (unsigned long long)le64_to_cpu(mst->highest_inum)); |
389 | printk(KERN_ERR "\tcommit number %llu\n", | 370 | pr_err("\tcommit number %llu\n", |
390 | (unsigned long long)le64_to_cpu(mst->cmt_no)); | 371 | (unsigned long long)le64_to_cpu(mst->cmt_no)); |
391 | printk(KERN_ERR "\tflags %#x\n", | 372 | pr_err("\tflags %#x\n", le32_to_cpu(mst->flags)); |
392 | le32_to_cpu(mst->flags)); | 373 | pr_err("\tlog_lnum %u\n", le32_to_cpu(mst->log_lnum)); |
393 | printk(KERN_ERR "\tlog_lnum %u\n", | 374 | pr_err("\troot_lnum %u\n", le32_to_cpu(mst->root_lnum)); |
394 | le32_to_cpu(mst->log_lnum)); | 375 | pr_err("\troot_offs %u\n", le32_to_cpu(mst->root_offs)); |
395 | printk(KERN_ERR "\troot_lnum %u\n", | 376 | pr_err("\troot_len %u\n", le32_to_cpu(mst->root_len)); |
396 | le32_to_cpu(mst->root_lnum)); | 377 | pr_err("\tgc_lnum %u\n", le32_to_cpu(mst->gc_lnum)); |
397 | printk(KERN_ERR "\troot_offs %u\n", | 378 | pr_err("\tihead_lnum %u\n", le32_to_cpu(mst->ihead_lnum)); |
398 | le32_to_cpu(mst->root_offs)); | 379 | pr_err("\tihead_offs %u\n", le32_to_cpu(mst->ihead_offs)); |
399 | printk(KERN_ERR "\troot_len %u\n", | 380 | pr_err("\tindex_size %llu\n", |
400 | le32_to_cpu(mst->root_len)); | ||
401 | printk(KERN_ERR "\tgc_lnum %u\n", | ||
402 | le32_to_cpu(mst->gc_lnum)); | ||
403 | printk(KERN_ERR "\tihead_lnum %u\n", | ||
404 | le32_to_cpu(mst->ihead_lnum)); | ||
405 | printk(KERN_ERR "\tihead_offs %u\n", | ||
406 | le32_to_cpu(mst->ihead_offs)); | ||
407 | printk(KERN_ERR "\tindex_size %llu\n", | ||
408 | (unsigned long long)le64_to_cpu(mst->index_size)); | 381 | (unsigned long long)le64_to_cpu(mst->index_size)); |
409 | printk(KERN_ERR "\tlpt_lnum %u\n", | 382 | pr_err("\tlpt_lnum %u\n", le32_to_cpu(mst->lpt_lnum)); |
410 | le32_to_cpu(mst->lpt_lnum)); | 383 | pr_err("\tlpt_offs %u\n", le32_to_cpu(mst->lpt_offs)); |
411 | printk(KERN_ERR "\tlpt_offs %u\n", | 384 | pr_err("\tnhead_lnum %u\n", le32_to_cpu(mst->nhead_lnum)); |
412 | le32_to_cpu(mst->lpt_offs)); | 385 | pr_err("\tnhead_offs %u\n", le32_to_cpu(mst->nhead_offs)); |
413 | printk(KERN_ERR "\tnhead_lnum %u\n", | 386 | pr_err("\tltab_lnum %u\n", le32_to_cpu(mst->ltab_lnum)); |
414 | le32_to_cpu(mst->nhead_lnum)); | 387 | pr_err("\tltab_offs %u\n", le32_to_cpu(mst->ltab_offs)); |
415 | printk(KERN_ERR "\tnhead_offs %u\n", | 388 | pr_err("\tlsave_lnum %u\n", le32_to_cpu(mst->lsave_lnum)); |
416 | le32_to_cpu(mst->nhead_offs)); | 389 | pr_err("\tlsave_offs %u\n", le32_to_cpu(mst->lsave_offs)); |
417 | printk(KERN_ERR "\tltab_lnum %u\n", | 390 | pr_err("\tlscan_lnum %u\n", le32_to_cpu(mst->lscan_lnum)); |
418 | le32_to_cpu(mst->ltab_lnum)); | 391 | pr_err("\tleb_cnt %u\n", le32_to_cpu(mst->leb_cnt)); |
419 | printk(KERN_ERR "\tltab_offs %u\n", | 392 | pr_err("\tempty_lebs %u\n", le32_to_cpu(mst->empty_lebs)); |
420 | le32_to_cpu(mst->ltab_offs)); | 393 | pr_err("\tidx_lebs %u\n", le32_to_cpu(mst->idx_lebs)); |
421 | printk(KERN_ERR "\tlsave_lnum %u\n", | 394 | pr_err("\ttotal_free %llu\n", |
422 | le32_to_cpu(mst->lsave_lnum)); | ||
423 | printk(KERN_ERR "\tlsave_offs %u\n", | ||
424 | le32_to_cpu(mst->lsave_offs)); | ||
425 | printk(KERN_ERR "\tlscan_lnum %u\n", | ||
426 | le32_to_cpu(mst->lscan_lnum)); | ||
427 | printk(KERN_ERR "\tleb_cnt %u\n", | ||
428 | le32_to_cpu(mst->leb_cnt)); | ||
429 | printk(KERN_ERR "\tempty_lebs %u\n", | ||
430 | le32_to_cpu(mst->empty_lebs)); | ||
431 | printk(KERN_ERR "\tidx_lebs %u\n", | ||
432 | le32_to_cpu(mst->idx_lebs)); | ||
433 | printk(KERN_ERR "\ttotal_free %llu\n", | ||
434 | (unsigned long long)le64_to_cpu(mst->total_free)); | 395 | (unsigned long long)le64_to_cpu(mst->total_free)); |
435 | printk(KERN_ERR "\ttotal_dirty %llu\n", | 396 | pr_err("\ttotal_dirty %llu\n", |
436 | (unsigned long long)le64_to_cpu(mst->total_dirty)); | 397 | (unsigned long long)le64_to_cpu(mst->total_dirty)); |
437 | printk(KERN_ERR "\ttotal_used %llu\n", | 398 | pr_err("\ttotal_used %llu\n", |
438 | (unsigned long long)le64_to_cpu(mst->total_used)); | 399 | (unsigned long long)le64_to_cpu(mst->total_used)); |
439 | printk(KERN_ERR "\ttotal_dead %llu\n", | 400 | pr_err("\ttotal_dead %llu\n", |
440 | (unsigned long long)le64_to_cpu(mst->total_dead)); | 401 | (unsigned long long)le64_to_cpu(mst->total_dead)); |
441 | printk(KERN_ERR "\ttotal_dark %llu\n", | 402 | pr_err("\ttotal_dark %llu\n", |
442 | (unsigned long long)le64_to_cpu(mst->total_dark)); | 403 | (unsigned long long)le64_to_cpu(mst->total_dark)); |
443 | break; | 404 | break; |
444 | } | 405 | } |
@@ -446,12 +407,9 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node) | |||
446 | { | 407 | { |
447 | const struct ubifs_ref_node *ref = node; | 408 | const struct ubifs_ref_node *ref = node; |
448 | 409 | ||
449 | printk(KERN_ERR "\tlnum %u\n", | 410 | pr_err("\tlnum %u\n", le32_to_cpu(ref->lnum)); |
450 | le32_to_cpu(ref->lnum)); | 411 | pr_err("\toffs %u\n", le32_to_cpu(ref->offs)); |
451 | printk(KERN_ERR "\toffs %u\n", | 412 | pr_err("\tjhead %u\n", le32_to_cpu(ref->jhead)); |
452 | le32_to_cpu(ref->offs)); | ||
453 | printk(KERN_ERR "\tjhead %u\n", | ||
454 | le32_to_cpu(ref->jhead)); | ||
455 | break; | 413 | break; |
456 | } | 414 | } |
457 | case UBIFS_INO_NODE: | 415 | case UBIFS_INO_NODE: |
@@ -459,41 +417,32 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node) | |||
459 | const struct ubifs_ino_node *ino = node; | 417 | const struct ubifs_ino_node *ino = node; |
460 | 418 | ||
461 | key_read(c, &ino->key, &key); | 419 | key_read(c, &ino->key, &key); |
462 | printk(KERN_ERR "\tkey %s\n", | 420 | pr_err("\tkey %s\n", |
463 | dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); | 421 | dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); |
464 | printk(KERN_ERR "\tcreat_sqnum %llu\n", | 422 | pr_err("\tcreat_sqnum %llu\n", |
465 | (unsigned long long)le64_to_cpu(ino->creat_sqnum)); | 423 | (unsigned long long)le64_to_cpu(ino->creat_sqnum)); |
466 | printk(KERN_ERR "\tsize %llu\n", | 424 | pr_err("\tsize %llu\n", |
467 | (unsigned long long)le64_to_cpu(ino->size)); | 425 | (unsigned long long)le64_to_cpu(ino->size)); |
468 | printk(KERN_ERR "\tnlink %u\n", | 426 | pr_err("\tnlink %u\n", le32_to_cpu(ino->nlink)); |
469 | le32_to_cpu(ino->nlink)); | 427 | pr_err("\tatime %lld.%u\n", |
470 | printk(KERN_ERR "\tatime %lld.%u\n", | ||
471 | (long long)le64_to_cpu(ino->atime_sec), | 428 | (long long)le64_to_cpu(ino->atime_sec), |
472 | le32_to_cpu(ino->atime_nsec)); | 429 | le32_to_cpu(ino->atime_nsec)); |
473 | printk(KERN_ERR "\tmtime %lld.%u\n", | 430 | pr_err("\tmtime %lld.%u\n", |
474 | (long long)le64_to_cpu(ino->mtime_sec), | 431 | (long long)le64_to_cpu(ino->mtime_sec), |
475 | le32_to_cpu(ino->mtime_nsec)); | 432 | le32_to_cpu(ino->mtime_nsec)); |
476 | printk(KERN_ERR "\tctime %lld.%u\n", | 433 | pr_err("\tctime %lld.%u\n", |
477 | (long long)le64_to_cpu(ino->ctime_sec), | 434 | (long long)le64_to_cpu(ino->ctime_sec), |
478 | le32_to_cpu(ino->ctime_nsec)); | 435 | le32_to_cpu(ino->ctime_nsec)); |
479 | printk(KERN_ERR "\tuid %u\n", | 436 | pr_err("\tuid %u\n", le32_to_cpu(ino->uid)); |
480 | le32_to_cpu(ino->uid)); | 437 | pr_err("\tgid %u\n", le32_to_cpu(ino->gid)); |
481 | printk(KERN_ERR "\tgid %u\n", | 438 | pr_err("\tmode %u\n", le32_to_cpu(ino->mode)); |
482 | le32_to_cpu(ino->gid)); | 439 | pr_err("\tflags %#x\n", le32_to_cpu(ino->flags)); |
483 | printk(KERN_ERR "\tmode %u\n", | 440 | pr_err("\txattr_cnt %u\n", le32_to_cpu(ino->xattr_cnt)); |
484 | le32_to_cpu(ino->mode)); | 441 | pr_err("\txattr_size %u\n", le32_to_cpu(ino->xattr_size)); |
485 | printk(KERN_ERR "\tflags %#x\n", | 442 | pr_err("\txattr_names %u\n", le32_to_cpu(ino->xattr_names)); |
486 | le32_to_cpu(ino->flags)); | 443 | pr_err("\tcompr_type %#x\n", |
487 | printk(KERN_ERR "\txattr_cnt %u\n", | ||
488 | le32_to_cpu(ino->xattr_cnt)); | ||
489 | printk(KERN_ERR "\txattr_size %u\n", | ||
490 | le32_to_cpu(ino->xattr_size)); | ||
491 | printk(KERN_ERR "\txattr_names %u\n", | ||
492 | le32_to_cpu(ino->xattr_names)); | ||
493 | printk(KERN_ERR "\tcompr_type %#x\n", | ||
494 | (int)le16_to_cpu(ino->compr_type)); | 444 | (int)le16_to_cpu(ino->compr_type)); |
495 | printk(KERN_ERR "\tdata len %u\n", | 445 | pr_err("\tdata len %u\n", le32_to_cpu(ino->data_len)); |
496 | le32_to_cpu(ino->data_len)); | ||
497 | break; | 446 | break; |
498 | } | 447 | } |
499 | case UBIFS_DENT_NODE: | 448 | case UBIFS_DENT_NODE: |
@@ -503,22 +452,21 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node) | |||
503 | int nlen = le16_to_cpu(dent->nlen); | 452 | int nlen = le16_to_cpu(dent->nlen); |
504 | 453 | ||
505 | key_read(c, &dent->key, &key); | 454 | key_read(c, &dent->key, &key); |
506 | printk(KERN_ERR "\tkey %s\n", | 455 | pr_err("\tkey %s\n", |
507 | dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); | 456 | dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); |
508 | printk(KERN_ERR "\tinum %llu\n", | 457 | pr_err("\tinum %llu\n", |
509 | (unsigned long long)le64_to_cpu(dent->inum)); | 458 | (unsigned long long)le64_to_cpu(dent->inum)); |
510 | printk(KERN_ERR "\ttype %d\n", (int)dent->type); | 459 | pr_err("\ttype %d\n", (int)dent->type); |
511 | printk(KERN_ERR "\tnlen %d\n", nlen); | 460 | pr_err("\tnlen %d\n", nlen); |
512 | printk(KERN_ERR "\tname "); | 461 | pr_err("\tname "); |
513 | 462 | ||
514 | if (nlen > UBIFS_MAX_NLEN) | 463 | if (nlen > UBIFS_MAX_NLEN) |
515 | printk(KERN_ERR "(bad name length, not printing, " | 464 | pr_err("(bad name length, not printing, bad or corrupted node)"); |
516 | "bad or corrupted node)"); | ||
517 | else { | 465 | else { |
518 | for (i = 0; i < nlen && dent->name[i]; i++) | 466 | for (i = 0; i < nlen && dent->name[i]; i++) |
519 | printk(KERN_CONT "%c", dent->name[i]); | 467 | pr_cont("%c", dent->name[i]); |
520 | } | 468 | } |
521 | printk(KERN_CONT "\n"); | 469 | pr_cont("\n"); |
522 | 470 | ||
523 | break; | 471 | break; |
524 | } | 472 | } |
@@ -528,15 +476,13 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node) | |||
528 | int dlen = le32_to_cpu(ch->len) - UBIFS_DATA_NODE_SZ; | 476 | int dlen = le32_to_cpu(ch->len) - UBIFS_DATA_NODE_SZ; |
529 | 477 | ||
530 | key_read(c, &dn->key, &key); | 478 | key_read(c, &dn->key, &key); |
531 | printk(KERN_ERR "\tkey %s\n", | 479 | pr_err("\tkey %s\n", |
532 | dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); | 480 | dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); |
533 | printk(KERN_ERR "\tsize %u\n", | 481 | pr_err("\tsize %u\n", le32_to_cpu(dn->size)); |
534 | le32_to_cpu(dn->size)); | 482 | pr_err("\tcompr_typ %d\n", |
535 | printk(KERN_ERR "\tcompr_typ %d\n", | ||
536 | (int)le16_to_cpu(dn->compr_type)); | 483 | (int)le16_to_cpu(dn->compr_type)); |
537 | printk(KERN_ERR "\tdata size %d\n", | 484 | pr_err("\tdata size %d\n", dlen); |
538 | dlen); | 485 | pr_err("\tdata:\n"); |
539 | printk(KERN_ERR "\tdata:\n"); | ||
540 | print_hex_dump(KERN_ERR, "\t", DUMP_PREFIX_OFFSET, 32, 1, | 486 | print_hex_dump(KERN_ERR, "\t", DUMP_PREFIX_OFFSET, 32, 1, |
541 | (void *)&dn->data, dlen, 0); | 487 | (void *)&dn->data, dlen, 0); |
542 | break; | 488 | break; |
@@ -545,11 +491,10 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node) | |||
545 | { | 491 | { |
546 | const struct ubifs_trun_node *trun = node; | 492 | const struct ubifs_trun_node *trun = node; |
547 | 493 | ||
548 | printk(KERN_ERR "\tinum %u\n", | 494 | pr_err("\tinum %u\n", le32_to_cpu(trun->inum)); |
549 | le32_to_cpu(trun->inum)); | 495 | pr_err("\told_size %llu\n", |
550 | printk(KERN_ERR "\told_size %llu\n", | ||
551 | (unsigned long long)le64_to_cpu(trun->old_size)); | 496 | (unsigned long long)le64_to_cpu(trun->old_size)); |
552 | printk(KERN_ERR "\tnew_size %llu\n", | 497 | pr_err("\tnew_size %llu\n", |
553 | (unsigned long long)le64_to_cpu(trun->new_size)); | 498 | (unsigned long long)le64_to_cpu(trun->new_size)); |
554 | break; | 499 | break; |
555 | } | 500 | } |
@@ -558,17 +503,16 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node) | |||
558 | const struct ubifs_idx_node *idx = node; | 503 | const struct ubifs_idx_node *idx = node; |
559 | 504 | ||
560 | n = le16_to_cpu(idx->child_cnt); | 505 | n = le16_to_cpu(idx->child_cnt); |
561 | printk(KERN_ERR "\tchild_cnt %d\n", n); | 506 | pr_err("\tchild_cnt %d\n", n); |
562 | printk(KERN_ERR "\tlevel %d\n", | 507 | pr_err("\tlevel %d\n", (int)le16_to_cpu(idx->level)); |
563 | (int)le16_to_cpu(idx->level)); | 508 | pr_err("\tBranches:\n"); |
564 | printk(KERN_ERR "\tBranches:\n"); | ||
565 | 509 | ||
566 | for (i = 0; i < n && i < c->fanout - 1; i++) { | 510 | for (i = 0; i < n && i < c->fanout - 1; i++) { |
567 | const struct ubifs_branch *br; | 511 | const struct ubifs_branch *br; |
568 | 512 | ||
569 | br = ubifs_idx_branch(c, idx, i); | 513 | br = ubifs_idx_branch(c, idx, i); |
570 | key_read(c, &br->key, &key); | 514 | key_read(c, &br->key, &key); |
571 | printk(KERN_ERR "\t%d: LEB %d:%d len %d key %s\n", | 515 | pr_err("\t%d: LEB %d:%d len %d key %s\n", |
572 | i, le32_to_cpu(br->lnum), le32_to_cpu(br->offs), | 516 | i, le32_to_cpu(br->lnum), le32_to_cpu(br->offs), |
573 | le32_to_cpu(br->len), | 517 | le32_to_cpu(br->len), |
574 | dbg_snprintf_key(c, &key, key_buf, | 518 | dbg_snprintf_key(c, &key, key_buf, |
@@ -582,20 +526,20 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node) | |||
582 | { | 526 | { |
583 | const struct ubifs_orph_node *orph = node; | 527 | const struct ubifs_orph_node *orph = node; |
584 | 528 | ||
585 | printk(KERN_ERR "\tcommit number %llu\n", | 529 | pr_err("\tcommit number %llu\n", |
586 | (unsigned long long) | 530 | (unsigned long long) |
587 | le64_to_cpu(orph->cmt_no) & LLONG_MAX); | 531 | le64_to_cpu(orph->cmt_no) & LLONG_MAX); |
588 | printk(KERN_ERR "\tlast node flag %llu\n", | 532 | pr_err("\tlast node flag %llu\n", |
589 | (unsigned long long)(le64_to_cpu(orph->cmt_no)) >> 63); | 533 | (unsigned long long)(le64_to_cpu(orph->cmt_no)) >> 63); |
590 | n = (le32_to_cpu(ch->len) - UBIFS_ORPH_NODE_SZ) >> 3; | 534 | n = (le32_to_cpu(ch->len) - UBIFS_ORPH_NODE_SZ) >> 3; |
591 | printk(KERN_ERR "\t%d orphan inode numbers:\n", n); | 535 | pr_err("\t%d orphan inode numbers:\n", n); |
592 | for (i = 0; i < n; i++) | 536 | for (i = 0; i < n; i++) |
593 | printk(KERN_ERR "\t ino %llu\n", | 537 | pr_err("\t ino %llu\n", |
594 | (unsigned long long)le64_to_cpu(orph->inos[i])); | 538 | (unsigned long long)le64_to_cpu(orph->inos[i])); |
595 | break; | 539 | break; |
596 | } | 540 | } |
597 | default: | 541 | default: |
598 | printk(KERN_ERR "node type %d was not recognized\n", | 542 | pr_err("node type %d was not recognized\n", |
599 | (int)ch->node_type); | 543 | (int)ch->node_type); |
600 | } | 544 | } |
601 | spin_unlock(&dbg_lock); | 545 | spin_unlock(&dbg_lock); |
@@ -604,16 +548,16 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node) | |||
604 | void ubifs_dump_budget_req(const struct ubifs_budget_req *req) | 548 | void ubifs_dump_budget_req(const struct ubifs_budget_req *req) |
605 | { | 549 | { |
606 | spin_lock(&dbg_lock); | 550 | spin_lock(&dbg_lock); |
607 | printk(KERN_ERR "Budgeting request: new_ino %d, dirtied_ino %d\n", | 551 | pr_err("Budgeting request: new_ino %d, dirtied_ino %d\n", |
608 | req->new_ino, req->dirtied_ino); | 552 | req->new_ino, req->dirtied_ino); |
609 | printk(KERN_ERR "\tnew_ino_d %d, dirtied_ino_d %d\n", | 553 | pr_err("\tnew_ino_d %d, dirtied_ino_d %d\n", |
610 | req->new_ino_d, req->dirtied_ino_d); | 554 | req->new_ino_d, req->dirtied_ino_d); |
611 | printk(KERN_ERR "\tnew_page %d, dirtied_page %d\n", | 555 | pr_err("\tnew_page %d, dirtied_page %d\n", |
612 | req->new_page, req->dirtied_page); | 556 | req->new_page, req->dirtied_page); |
613 | printk(KERN_ERR "\tnew_dent %d, mod_dent %d\n", | 557 | pr_err("\tnew_dent %d, mod_dent %d\n", |
614 | req->new_dent, req->mod_dent); | 558 | req->new_dent, req->mod_dent); |
615 | printk(KERN_ERR "\tidx_growth %d\n", req->idx_growth); | 559 | pr_err("\tidx_growth %d\n", req->idx_growth); |
616 | printk(KERN_ERR "\tdata_growth %d dd_growth %d\n", | 560 | pr_err("\tdata_growth %d dd_growth %d\n", |
617 | req->data_growth, req->dd_growth); | 561 | req->data_growth, req->dd_growth); |
618 | spin_unlock(&dbg_lock); | 562 | spin_unlock(&dbg_lock); |
619 | } | 563 | } |
@@ -621,14 +565,12 @@ void ubifs_dump_budget_req(const struct ubifs_budget_req *req) | |||
621 | void ubifs_dump_lstats(const struct ubifs_lp_stats *lst) | 565 | void ubifs_dump_lstats(const struct ubifs_lp_stats *lst) |
622 | { | 566 | { |
623 | spin_lock(&dbg_lock); | 567 | spin_lock(&dbg_lock); |
624 | printk(KERN_ERR "(pid %d) Lprops statistics: empty_lebs %d, " | 568 | pr_err("(pid %d) Lprops statistics: empty_lebs %d, idx_lebs %d\n", |
625 | "idx_lebs %d\n", current->pid, lst->empty_lebs, lst->idx_lebs); | 569 | current->pid, lst->empty_lebs, lst->idx_lebs); |
626 | printk(KERN_ERR "\ttaken_empty_lebs %d, total_free %lld, " | 570 | pr_err("\ttaken_empty_lebs %d, total_free %lld, total_dirty %lld\n", |
627 | "total_dirty %lld\n", lst->taken_empty_lebs, lst->total_free, | 571 | lst->taken_empty_lebs, lst->total_free, lst->total_dirty); |
628 | lst->total_dirty); | 572 | pr_err("\ttotal_used %lld, total_dark %lld, total_dead %lld\n", |
629 | printk(KERN_ERR "\ttotal_used %lld, total_dark %lld, " | 573 | lst->total_used, lst->total_dark, lst->total_dead); |
630 | "total_dead %lld\n", lst->total_used, lst->total_dark, | ||
631 | lst->total_dead); | ||
632 | spin_unlock(&dbg_lock); | 574 | spin_unlock(&dbg_lock); |
633 | } | 575 | } |
634 | 576 | ||
@@ -642,21 +584,17 @@ void ubifs_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi) | |||
642 | 584 | ||
643 | spin_lock(&c->space_lock); | 585 | spin_lock(&c->space_lock); |
644 | spin_lock(&dbg_lock); | 586 | spin_lock(&dbg_lock); |
645 | printk(KERN_ERR "(pid %d) Budgeting info: data budget sum %lld, " | 587 | pr_err("(pid %d) Budgeting info: data budget sum %lld, total budget sum %lld\n", |
646 | "total budget sum %lld\n", current->pid, | 588 | current->pid, bi->data_growth + bi->dd_growth, |
647 | bi->data_growth + bi->dd_growth, | ||
648 | bi->data_growth + bi->dd_growth + bi->idx_growth); | 589 | bi->data_growth + bi->dd_growth + bi->idx_growth); |
649 | printk(KERN_ERR "\tbudg_data_growth %lld, budg_dd_growth %lld, " | 590 | pr_err("\tbudg_data_growth %lld, budg_dd_growth %lld, budg_idx_growth %lld\n", |
650 | "budg_idx_growth %lld\n", bi->data_growth, bi->dd_growth, | 591 | bi->data_growth, bi->dd_growth, bi->idx_growth); |
651 | bi->idx_growth); | 592 | pr_err("\tmin_idx_lebs %d, old_idx_sz %llu, uncommitted_idx %lld\n", |
652 | printk(KERN_ERR "\tmin_idx_lebs %d, old_idx_sz %llu, " | 593 | bi->min_idx_lebs, bi->old_idx_sz, bi->uncommitted_idx); |
653 | "uncommitted_idx %lld\n", bi->min_idx_lebs, bi->old_idx_sz, | 594 | pr_err("\tpage_budget %d, inode_budget %d, dent_budget %d\n", |
654 | bi->uncommitted_idx); | ||
655 | printk(KERN_ERR "\tpage_budget %d, inode_budget %d, dent_budget %d\n", | ||
656 | bi->page_budget, bi->inode_budget, bi->dent_budget); | 595 | bi->page_budget, bi->inode_budget, bi->dent_budget); |
657 | printk(KERN_ERR "\tnospace %u, nospace_rp %u\n", | 596 | pr_err("\tnospace %u, nospace_rp %u\n", bi->nospace, bi->nospace_rp); |
658 | bi->nospace, bi->nospace_rp); | 597 | pr_err("\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n", |
659 | printk(KERN_ERR "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n", | ||
660 | c->dark_wm, c->dead_wm, c->max_idx_node_sz); | 598 | c->dark_wm, c->dead_wm, c->max_idx_node_sz); |
661 | 599 | ||
662 | if (bi != &c->bi) | 600 | if (bi != &c->bi) |
@@ -667,38 +605,37 @@ void ubifs_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi) | |||
667 | */ | 605 | */ |
668 | goto out_unlock; | 606 | goto out_unlock; |
669 | 607 | ||
670 | printk(KERN_ERR "\tfreeable_cnt %d, calc_idx_sz %lld, idx_gc_cnt %d\n", | 608 | pr_err("\tfreeable_cnt %d, calc_idx_sz %lld, idx_gc_cnt %d\n", |
671 | c->freeable_cnt, c->calc_idx_sz, c->idx_gc_cnt); | 609 | c->freeable_cnt, c->calc_idx_sz, c->idx_gc_cnt); |
672 | printk(KERN_ERR "\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, " | 610 | pr_err("\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, clean_zn_cnt %ld\n", |
673 | "clean_zn_cnt %ld\n", atomic_long_read(&c->dirty_pg_cnt), | 611 | atomic_long_read(&c->dirty_pg_cnt), |
674 | atomic_long_read(&c->dirty_zn_cnt), | 612 | atomic_long_read(&c->dirty_zn_cnt), |
675 | atomic_long_read(&c->clean_zn_cnt)); | 613 | atomic_long_read(&c->clean_zn_cnt)); |
676 | printk(KERN_ERR "\tgc_lnum %d, ihead_lnum %d\n", | 614 | pr_err("\tgc_lnum %d, ihead_lnum %d\n", c->gc_lnum, c->ihead_lnum); |
677 | c->gc_lnum, c->ihead_lnum); | ||
678 | 615 | ||
679 | /* If we are in R/O mode, journal heads do not exist */ | 616 | /* If we are in R/O mode, journal heads do not exist */ |
680 | if (c->jheads) | 617 | if (c->jheads) |
681 | for (i = 0; i < c->jhead_cnt; i++) | 618 | for (i = 0; i < c->jhead_cnt; i++) |
682 | printk(KERN_ERR "\tjhead %s\t LEB %d\n", | 619 | pr_err("\tjhead %s\t LEB %d\n", |
683 | dbg_jhead(c->jheads[i].wbuf.jhead), | 620 | dbg_jhead(c->jheads[i].wbuf.jhead), |
684 | c->jheads[i].wbuf.lnum); | 621 | c->jheads[i].wbuf.lnum); |
685 | for (rb = rb_first(&c->buds); rb; rb = rb_next(rb)) { | 622 | for (rb = rb_first(&c->buds); rb; rb = rb_next(rb)) { |
686 | bud = rb_entry(rb, struct ubifs_bud, rb); | 623 | bud = rb_entry(rb, struct ubifs_bud, rb); |
687 | printk(KERN_ERR "\tbud LEB %d\n", bud->lnum); | 624 | pr_err("\tbud LEB %d\n", bud->lnum); |
688 | } | 625 | } |
689 | list_for_each_entry(bud, &c->old_buds, list) | 626 | list_for_each_entry(bud, &c->old_buds, list) |
690 | printk(KERN_ERR "\told bud LEB %d\n", bud->lnum); | 627 | pr_err("\told bud LEB %d\n", bud->lnum); |
691 | list_for_each_entry(idx_gc, &c->idx_gc, list) | 628 | list_for_each_entry(idx_gc, &c->idx_gc, list) |
692 | printk(KERN_ERR "\tGC'ed idx LEB %d unmap %d\n", | 629 | pr_err("\tGC'ed idx LEB %d unmap %d\n", |
693 | idx_gc->lnum, idx_gc->unmap); | 630 | idx_gc->lnum, idx_gc->unmap); |
694 | printk(KERN_ERR "\tcommit state %d\n", c->cmt_state); | 631 | pr_err("\tcommit state %d\n", c->cmt_state); |
695 | 632 | ||
696 | /* Print budgeting predictions */ | 633 | /* Print budgeting predictions */ |
697 | available = ubifs_calc_available(c, c->bi.min_idx_lebs); | 634 | available = ubifs_calc_available(c, c->bi.min_idx_lebs); |
698 | outstanding = c->bi.data_growth + c->bi.dd_growth; | 635 | outstanding = c->bi.data_growth + c->bi.dd_growth; |
699 | free = ubifs_get_free_space_nolock(c); | 636 | free = ubifs_get_free_space_nolock(c); |
700 | printk(KERN_ERR "Budgeting predictions:\n"); | 637 | pr_err("Budgeting predictions:\n"); |
701 | printk(KERN_ERR "\tavailable: %lld, outstanding %lld, free %lld\n", | 638 | pr_err("\tavailable: %lld, outstanding %lld, free %lld\n", |
702 | available, outstanding, free); | 639 | available, outstanding, free); |
703 | out_unlock: | 640 | out_unlock: |
704 | spin_unlock(&dbg_lock); | 641 | spin_unlock(&dbg_lock); |
@@ -718,21 +655,19 @@ void ubifs_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp) | |||
718 | dark = ubifs_calc_dark(c, spc); | 655 | dark = ubifs_calc_dark(c, spc); |
719 | 656 | ||
720 | if (lp->flags & LPROPS_INDEX) | 657 | if (lp->flags & LPROPS_INDEX) |
721 | printk(KERN_ERR "LEB %-7d free %-8d dirty %-8d used %-8d " | 658 | pr_err("LEB %-7d free %-8d dirty %-8d used %-8d free + dirty %-8d flags %#x (", |
722 | "free + dirty %-8d flags %#x (", lp->lnum, lp->free, | 659 | lp->lnum, lp->free, lp->dirty, c->leb_size - spc, spc, |
723 | lp->dirty, c->leb_size - spc, spc, lp->flags); | 660 | lp->flags); |
724 | else | 661 | else |
725 | printk(KERN_ERR "LEB %-7d free %-8d dirty %-8d used %-8d " | 662 | pr_err("LEB %-7d free %-8d dirty %-8d used %-8d free + dirty %-8d dark %-4d dead %-4d nodes fit %-3d flags %#-4x (", |
726 | "free + dirty %-8d dark %-4d dead %-4d nodes fit %-3d " | 663 | lp->lnum, lp->free, lp->dirty, c->leb_size - spc, spc, |
727 | "flags %#-4x (", lp->lnum, lp->free, lp->dirty, | 664 | dark, dead, (int)(spc / UBIFS_MAX_NODE_SZ), lp->flags); |
728 | c->leb_size - spc, spc, dark, dead, | ||
729 | (int)(spc / UBIFS_MAX_NODE_SZ), lp->flags); | ||
730 | 665 | ||
731 | if (lp->flags & LPROPS_TAKEN) { | 666 | if (lp->flags & LPROPS_TAKEN) { |
732 | if (lp->flags & LPROPS_INDEX) | 667 | if (lp->flags & LPROPS_INDEX) |
733 | printk(KERN_CONT "index, taken"); | 668 | pr_cont("index, taken"); |
734 | else | 669 | else |
735 | printk(KERN_CONT "taken"); | 670 | pr_cont("taken"); |
736 | } else { | 671 | } else { |
737 | const char *s; | 672 | const char *s; |
738 | 673 | ||
@@ -769,7 +704,7 @@ void ubifs_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp) | |||
769 | break; | 704 | break; |
770 | } | 705 | } |
771 | } | 706 | } |
772 | printk(KERN_CONT "%s", s); | 707 | pr_cont("%s", s); |
773 | } | 708 | } |
774 | 709 | ||
775 | for (rb = rb_first((struct rb_root *)&c->buds); rb; rb = rb_next(rb)) { | 710 | for (rb = rb_first((struct rb_root *)&c->buds); rb; rb = rb_next(rb)) { |
@@ -784,19 +719,18 @@ void ubifs_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp) | |||
784 | */ | 719 | */ |
785 | if (c->jheads && | 720 | if (c->jheads && |
786 | lp->lnum == c->jheads[i].wbuf.lnum) { | 721 | lp->lnum == c->jheads[i].wbuf.lnum) { |
787 | printk(KERN_CONT ", jhead %s", | 722 | pr_cont(", jhead %s", dbg_jhead(i)); |
788 | dbg_jhead(i)); | ||
789 | head = 1; | 723 | head = 1; |
790 | } | 724 | } |
791 | } | 725 | } |
792 | if (!head) | 726 | if (!head) |
793 | printk(KERN_CONT ", bud of jhead %s", | 727 | pr_cont(", bud of jhead %s", |
794 | dbg_jhead(bud->jhead)); | 728 | dbg_jhead(bud->jhead)); |
795 | } | 729 | } |
796 | } | 730 | } |
797 | if (lp->lnum == c->gc_lnum) | 731 | if (lp->lnum == c->gc_lnum) |
798 | printk(KERN_CONT ", GC LEB"); | 732 | pr_cont(", GC LEB"); |
799 | printk(KERN_CONT ")\n"); | 733 | pr_cont(")\n"); |
800 | } | 734 | } |
801 | 735 | ||
802 | void ubifs_dump_lprops(struct ubifs_info *c) | 736 | void ubifs_dump_lprops(struct ubifs_info *c) |
@@ -805,8 +739,7 @@ void ubifs_dump_lprops(struct ubifs_info *c) | |||
805 | struct ubifs_lprops lp; | 739 | struct ubifs_lprops lp; |
806 | struct ubifs_lp_stats lst; | 740 | struct ubifs_lp_stats lst; |
807 | 741 | ||
808 | printk(KERN_ERR "(pid %d) start dumping LEB properties\n", | 742 | pr_err("(pid %d) start dumping LEB properties\n", current->pid); |
809 | current->pid); | ||
810 | ubifs_get_lp_stats(c, &lst); | 743 | ubifs_get_lp_stats(c, &lst); |
811 | ubifs_dump_lstats(&lst); | 744 | ubifs_dump_lstats(&lst); |
812 | 745 | ||
@@ -817,8 +750,7 @@ void ubifs_dump_lprops(struct ubifs_info *c) | |||
817 | 750 | ||
818 | ubifs_dump_lprop(c, &lp); | 751 | ubifs_dump_lprop(c, &lp); |
819 | } | 752 | } |
820 | printk(KERN_ERR "(pid %d) finish dumping LEB properties\n", | 753 | pr_err("(pid %d) finish dumping LEB properties\n", current->pid); |
821 | current->pid); | ||
822 | } | 754 | } |
823 | 755 | ||
824 | void ubifs_dump_lpt_info(struct ubifs_info *c) | 756 | void ubifs_dump_lpt_info(struct ubifs_info *c) |
@@ -826,37 +758,36 @@ void ubifs_dump_lpt_info(struct ubifs_info *c) | |||
826 | int i; | 758 | int i; |
827 | 759 | ||
828 | spin_lock(&dbg_lock); | 760 | spin_lock(&dbg_lock); |
829 | printk(KERN_ERR "(pid %d) dumping LPT information\n", current->pid); | 761 | pr_err("(pid %d) dumping LPT information\n", current->pid); |
830 | printk(KERN_ERR "\tlpt_sz: %lld\n", c->lpt_sz); | 762 | pr_err("\tlpt_sz: %lld\n", c->lpt_sz); |
831 | printk(KERN_ERR "\tpnode_sz: %d\n", c->pnode_sz); | 763 | pr_err("\tpnode_sz: %d\n", c->pnode_sz); |
832 | printk(KERN_ERR "\tnnode_sz: %d\n", c->nnode_sz); | 764 | pr_err("\tnnode_sz: %d\n", c->nnode_sz); |
833 | printk(KERN_ERR "\tltab_sz: %d\n", c->ltab_sz); | 765 | pr_err("\tltab_sz: %d\n", c->ltab_sz); |
834 | printk(KERN_ERR "\tlsave_sz: %d\n", c->lsave_sz); | 766 | pr_err("\tlsave_sz: %d\n", c->lsave_sz); |
835 | printk(KERN_ERR "\tbig_lpt: %d\n", c->big_lpt); | 767 | pr_err("\tbig_lpt: %d\n", c->big_lpt); |
836 | printk(KERN_ERR "\tlpt_hght: %d\n", c->lpt_hght); | 768 | pr_err("\tlpt_hght: %d\n", c->lpt_hght); |
837 | printk(KERN_ERR "\tpnode_cnt: %d\n", c->pnode_cnt); | 769 | pr_err("\tpnode_cnt: %d\n", c->pnode_cnt); |
838 | printk(KERN_ERR "\tnnode_cnt: %d\n", c->nnode_cnt); | 770 | pr_err("\tnnode_cnt: %d\n", c->nnode_cnt); |
839 | printk(KERN_ERR "\tdirty_pn_cnt: %d\n", c->dirty_pn_cnt); | 771 | pr_err("\tdirty_pn_cnt: %d\n", c->dirty_pn_cnt); |
840 | printk(KERN_ERR "\tdirty_nn_cnt: %d\n", c->dirty_nn_cnt); | 772 | pr_err("\tdirty_nn_cnt: %d\n", c->dirty_nn_cnt); |
841 | printk(KERN_ERR "\tlsave_cnt: %d\n", c->lsave_cnt); | 773 | pr_err("\tlsave_cnt: %d\n", c->lsave_cnt); |
842 | printk(KERN_ERR "\tspace_bits: %d\n", c->space_bits); | 774 | pr_err("\tspace_bits: %d\n", c->space_bits); |
843 | printk(KERN_ERR "\tlpt_lnum_bits: %d\n", c->lpt_lnum_bits); | 775 | pr_err("\tlpt_lnum_bits: %d\n", c->lpt_lnum_bits); |
844 | printk(KERN_ERR "\tlpt_offs_bits: %d\n", c->lpt_offs_bits); | 776 | pr_err("\tlpt_offs_bits: %d\n", c->lpt_offs_bits); |
845 | printk(KERN_ERR "\tlpt_spc_bits: %d\n", c->lpt_spc_bits); | 777 | pr_err("\tlpt_spc_bits: %d\n", c->lpt_spc_bits); |
846 | printk(KERN_ERR "\tpcnt_bits: %d\n", c->pcnt_bits); | 778 | pr_err("\tpcnt_bits: %d\n", c->pcnt_bits); |
847 | printk(KERN_ERR "\tlnum_bits: %d\n", c->lnum_bits); | 779 | pr_err("\tlnum_bits: %d\n", c->lnum_bits); |
848 | printk(KERN_ERR "\tLPT root is at %d:%d\n", c->lpt_lnum, c->lpt_offs); | 780 | pr_err("\tLPT root is at %d:%d\n", c->lpt_lnum, c->lpt_offs); |
849 | printk(KERN_ERR "\tLPT head is at %d:%d\n", | 781 | pr_err("\tLPT head is at %d:%d\n", |
850 | c->nhead_lnum, c->nhead_offs); | 782 | c->nhead_lnum, c->nhead_offs); |
851 | printk(KERN_ERR "\tLPT ltab is at %d:%d\n", | 783 | pr_err("\tLPT ltab is at %d:%d\n", c->ltab_lnum, c->ltab_offs); |
852 | c->ltab_lnum, c->ltab_offs); | ||
853 | if (c->big_lpt) | 784 | if (c->big_lpt) |
854 | printk(KERN_ERR "\tLPT lsave is at %d:%d\n", | 785 | pr_err("\tLPT lsave is at %d:%d\n", |
855 | c->lsave_lnum, c->lsave_offs); | 786 | c->lsave_lnum, c->lsave_offs); |
856 | for (i = 0; i < c->lpt_lebs; i++) | 787 | for (i = 0; i < c->lpt_lebs; i++) |
857 | printk(KERN_ERR "\tLPT LEB %d free %d dirty %d tgc %d " | 788 | pr_err("\tLPT LEB %d free %d dirty %d tgc %d cmt %d\n", |
858 | "cmt %d\n", i + c->lpt_first, c->ltab[i].free, | 789 | i + c->lpt_first, c->ltab[i].free, c->ltab[i].dirty, |
859 | c->ltab[i].dirty, c->ltab[i].tgc, c->ltab[i].cmt); | 790 | c->ltab[i].tgc, c->ltab[i].cmt); |
860 | spin_unlock(&dbg_lock); | 791 | spin_unlock(&dbg_lock); |
861 | } | 792 | } |
862 | 793 | ||
@@ -865,13 +796,13 @@ void ubifs_dump_sleb(const struct ubifs_info *c, | |||
865 | { | 796 | { |
866 | struct ubifs_scan_node *snod; | 797 | struct ubifs_scan_node *snod; |
867 | 798 | ||
868 | printk(KERN_ERR "(pid %d) start dumping scanned data from LEB %d:%d\n", | 799 | pr_err("(pid %d) start dumping scanned data from LEB %d:%d\n", |
869 | current->pid, sleb->lnum, offs); | 800 | current->pid, sleb->lnum, offs); |
870 | 801 | ||
871 | list_for_each_entry(snod, &sleb->nodes, list) { | 802 | list_for_each_entry(snod, &sleb->nodes, list) { |
872 | cond_resched(); | 803 | cond_resched(); |
873 | printk(KERN_ERR "Dumping node at LEB %d:%d len %d\n", sleb->lnum, | 804 | pr_err("Dumping node at LEB %d:%d len %d\n", |
874 | snod->offs, snod->len); | 805 | sleb->lnum, snod->offs, snod->len); |
875 | ubifs_dump_node(c, snod->node); | 806 | ubifs_dump_node(c, snod->node); |
876 | } | 807 | } |
877 | } | 808 | } |
@@ -882,11 +813,7 @@ void ubifs_dump_leb(const struct ubifs_info *c, int lnum) | |||
882 | struct ubifs_scan_node *snod; | 813 | struct ubifs_scan_node *snod; |
883 | void *buf; | 814 | void *buf; |
884 | 815 | ||
885 | if (dbg_is_tst_rcvry(c)) | 816 | pr_err("(pid %d) start dumping LEB %d\n", current->pid, lnum); |
886 | return; | ||
887 | |||
888 | printk(KERN_ERR "(pid %d) start dumping LEB %d\n", | ||
889 | current->pid, lnum); | ||
890 | 817 | ||
891 | buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); | 818 | buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); |
892 | if (!buf) { | 819 | if (!buf) { |
@@ -900,18 +827,17 @@ void ubifs_dump_leb(const struct ubifs_info *c, int lnum) | |||
900 | goto out; | 827 | goto out; |
901 | } | 828 | } |
902 | 829 | ||
903 | printk(KERN_ERR "LEB %d has %d nodes ending at %d\n", lnum, | 830 | pr_err("LEB %d has %d nodes ending at %d\n", lnum, |
904 | sleb->nodes_cnt, sleb->endpt); | 831 | sleb->nodes_cnt, sleb->endpt); |
905 | 832 | ||
906 | list_for_each_entry(snod, &sleb->nodes, list) { | 833 | list_for_each_entry(snod, &sleb->nodes, list) { |
907 | cond_resched(); | 834 | cond_resched(); |
908 | printk(KERN_ERR "Dumping node at LEB %d:%d len %d\n", lnum, | 835 | pr_err("Dumping node at LEB %d:%d len %d\n", lnum, |
909 | snod->offs, snod->len); | 836 | snod->offs, snod->len); |
910 | ubifs_dump_node(c, snod->node); | 837 | ubifs_dump_node(c, snod->node); |
911 | } | 838 | } |
912 | 839 | ||
913 | printk(KERN_ERR "(pid %d) finish dumping LEB %d\n", | 840 | pr_err("(pid %d) finish dumping LEB %d\n", current->pid, lnum); |
914 | current->pid, lnum); | ||
915 | ubifs_scan_destroy(sleb); | 841 | ubifs_scan_destroy(sleb); |
916 | 842 | ||
917 | out: | 843 | out: |
@@ -932,33 +858,28 @@ void ubifs_dump_znode(const struct ubifs_info *c, | |||
932 | else | 858 | else |
933 | zbr = &c->zroot; | 859 | zbr = &c->zroot; |
934 | 860 | ||
935 | printk(KERN_ERR "znode %p, LEB %d:%d len %d parent %p iip %d level %d" | 861 | pr_err("znode %p, LEB %d:%d len %d parent %p iip %d level %d child_cnt %d flags %lx\n", |
936 | " child_cnt %d flags %lx\n", znode, zbr->lnum, zbr->offs, | 862 | znode, zbr->lnum, zbr->offs, zbr->len, znode->parent, znode->iip, |
937 | zbr->len, znode->parent, znode->iip, znode->level, | 863 | znode->level, znode->child_cnt, znode->flags); |
938 | znode->child_cnt, znode->flags); | ||
939 | 864 | ||
940 | if (znode->child_cnt <= 0 || znode->child_cnt > c->fanout) { | 865 | if (znode->child_cnt <= 0 || znode->child_cnt > c->fanout) { |
941 | spin_unlock(&dbg_lock); | 866 | spin_unlock(&dbg_lock); |
942 | return; | 867 | return; |
943 | } | 868 | } |
944 | 869 | ||
945 | printk(KERN_ERR "zbranches:\n"); | 870 | pr_err("zbranches:\n"); |
946 | for (n = 0; n < znode->child_cnt; n++) { | 871 | for (n = 0; n < znode->child_cnt; n++) { |
947 | zbr = &znode->zbranch[n]; | 872 | zbr = &znode->zbranch[n]; |
948 | if (znode->level > 0) | 873 | if (znode->level > 0) |
949 | printk(KERN_ERR "\t%d: znode %p LEB %d:%d len %d key " | 874 | pr_err("\t%d: znode %p LEB %d:%d len %d key %s\n", |
950 | "%s\n", n, zbr->znode, zbr->lnum, | 875 | n, zbr->znode, zbr->lnum, zbr->offs, zbr->len, |
951 | zbr->offs, zbr->len, | 876 | dbg_snprintf_key(c, &zbr->key, key_buf, |
952 | dbg_snprintf_key(c, &zbr->key, | 877 | DBG_KEY_BUF_LEN)); |
953 | key_buf, | ||
954 | DBG_KEY_BUF_LEN)); | ||
955 | else | 878 | else |
956 | printk(KERN_ERR "\t%d: LNC %p LEB %d:%d len %d key " | 879 | pr_err("\t%d: LNC %p LEB %d:%d len %d key %s\n", |
957 | "%s\n", n, zbr->znode, zbr->lnum, | 880 | n, zbr->znode, zbr->lnum, zbr->offs, zbr->len, |
958 | zbr->offs, zbr->len, | 881 | dbg_snprintf_key(c, &zbr->key, key_buf, |
959 | dbg_snprintf_key(c, &zbr->key, | 882 | DBG_KEY_BUF_LEN)); |
960 | key_buf, | ||
961 | DBG_KEY_BUF_LEN)); | ||
962 | } | 883 | } |
963 | spin_unlock(&dbg_lock); | 884 | spin_unlock(&dbg_lock); |
964 | } | 885 | } |
@@ -967,16 +888,16 @@ void ubifs_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat) | |||
967 | { | 888 | { |
968 | int i; | 889 | int i; |
969 | 890 | ||
970 | printk(KERN_ERR "(pid %d) start dumping heap cat %d (%d elements)\n", | 891 | pr_err("(pid %d) start dumping heap cat %d (%d elements)\n", |
971 | current->pid, cat, heap->cnt); | 892 | current->pid, cat, heap->cnt); |
972 | for (i = 0; i < heap->cnt; i++) { | 893 | for (i = 0; i < heap->cnt; i++) { |
973 | struct ubifs_lprops *lprops = heap->arr[i]; | 894 | struct ubifs_lprops *lprops = heap->arr[i]; |
974 | 895 | ||
975 | printk(KERN_ERR "\t%d. LEB %d hpos %d free %d dirty %d " | 896 | pr_err("\t%d. LEB %d hpos %d free %d dirty %d flags %d\n", |
976 | "flags %d\n", i, lprops->lnum, lprops->hpos, | 897 | i, lprops->lnum, lprops->hpos, lprops->free, |
977 | lprops->free, lprops->dirty, lprops->flags); | 898 | lprops->dirty, lprops->flags); |
978 | } | 899 | } |
979 | printk(KERN_ERR "(pid %d) finish dumping heap\n", current->pid); | 900 | pr_err("(pid %d) finish dumping heap\n", current->pid); |
980 | } | 901 | } |
981 | 902 | ||
982 | void ubifs_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, | 903 | void ubifs_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, |
@@ -984,15 +905,15 @@ void ubifs_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, | |||
984 | { | 905 | { |
985 | int i; | 906 | int i; |
986 | 907 | ||
987 | printk(KERN_ERR "(pid %d) dumping pnode:\n", current->pid); | 908 | pr_err("(pid %d) dumping pnode:\n", current->pid); |
988 | printk(KERN_ERR "\taddress %zx parent %zx cnext %zx\n", | 909 | pr_err("\taddress %zx parent %zx cnext %zx\n", |
989 | (size_t)pnode, (size_t)parent, (size_t)pnode->cnext); | 910 | (size_t)pnode, (size_t)parent, (size_t)pnode->cnext); |
990 | printk(KERN_ERR "\tflags %lu iip %d level %d num %d\n", | 911 | pr_err("\tflags %lu iip %d level %d num %d\n", |
991 | pnode->flags, iip, pnode->level, pnode->num); | 912 | pnode->flags, iip, pnode->level, pnode->num); |
992 | for (i = 0; i < UBIFS_LPT_FANOUT; i++) { | 913 | for (i = 0; i < UBIFS_LPT_FANOUT; i++) { |
993 | struct ubifs_lprops *lp = &pnode->lprops[i]; | 914 | struct ubifs_lprops *lp = &pnode->lprops[i]; |
994 | 915 | ||
995 | printk(KERN_ERR "\t%d: free %d dirty %d flags %d lnum %d\n", | 916 | pr_err("\t%d: free %d dirty %d flags %d lnum %d\n", |
996 | i, lp->free, lp->dirty, lp->flags, lp->lnum); | 917 | i, lp->free, lp->dirty, lp->flags, lp->lnum); |
997 | } | 918 | } |
998 | } | 919 | } |
@@ -1002,20 +923,20 @@ void ubifs_dump_tnc(struct ubifs_info *c) | |||
1002 | struct ubifs_znode *znode; | 923 | struct ubifs_znode *znode; |
1003 | int level; | 924 | int level; |
1004 | 925 | ||
1005 | printk(KERN_ERR "\n"); | 926 | pr_err("\n"); |
1006 | printk(KERN_ERR "(pid %d) start dumping TNC tree\n", current->pid); | 927 | pr_err("(pid %d) start dumping TNC tree\n", current->pid); |
1007 | znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL); | 928 | znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL); |
1008 | level = znode->level; | 929 | level = znode->level; |
1009 | printk(KERN_ERR "== Level %d ==\n", level); | 930 | pr_err("== Level %d ==\n", level); |
1010 | while (znode) { | 931 | while (znode) { |
1011 | if (level != znode->level) { | 932 | if (level != znode->level) { |
1012 | level = znode->level; | 933 | level = znode->level; |
1013 | printk(KERN_ERR "== Level %d ==\n", level); | 934 | pr_err("== Level %d ==\n", level); |
1014 | } | 935 | } |
1015 | ubifs_dump_znode(c, znode); | 936 | ubifs_dump_znode(c, znode); |
1016 | znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode); | 937 | znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode); |
1017 | } | 938 | } |
1018 | printk(KERN_ERR "(pid %d) finish dumping TNC tree\n", current->pid); | 939 | pr_err("(pid %d) finish dumping TNC tree\n", current->pid); |
1019 | } | 940 | } |
1020 | 941 | ||
1021 | static int dump_znode(struct ubifs_info *c, struct ubifs_znode *znode, | 942 | static int dump_znode(struct ubifs_info *c, struct ubifs_znode *znode, |
@@ -1154,8 +1075,8 @@ int dbg_check_synced_i_size(const struct ubifs_info *c, struct inode *inode) | |||
1154 | mutex_lock(&ui->ui_mutex); | 1075 | mutex_lock(&ui->ui_mutex); |
1155 | spin_lock(&ui->ui_lock); | 1076 | spin_lock(&ui->ui_lock); |
1156 | if (ui->ui_size != ui->synced_i_size && !ui->dirty) { | 1077 | if (ui->ui_size != ui->synced_i_size && !ui->dirty) { |
1157 | ubifs_err("ui_size is %lld, synced_i_size is %lld, but inode " | 1078 | ubifs_err("ui_size is %lld, synced_i_size is %lld, but inode is clean", |
1158 | "is clean", ui->ui_size, ui->synced_i_size); | 1079 | ui->ui_size, ui->synced_i_size); |
1159 | ubifs_err("i_ino %lu, i_mode %#x, i_size %lld", inode->i_ino, | 1080 | ubifs_err("i_ino %lu, i_mode %#x, i_size %lld", inode->i_ino, |
1160 | inode->i_mode, i_size_read(inode)); | 1081 | inode->i_mode, i_size_read(inode)); |
1161 | dump_stack(); | 1082 | dump_stack(); |
@@ -1217,17 +1138,16 @@ int dbg_check_dir(struct ubifs_info *c, const struct inode *dir) | |||
1217 | kfree(pdent); | 1138 | kfree(pdent); |
1218 | 1139 | ||
1219 | if (i_size_read(dir) != size) { | 1140 | if (i_size_read(dir) != size) { |
1220 | ubifs_err("directory inode %lu has size %llu, " | 1141 | ubifs_err("directory inode %lu has size %llu, but calculated size is %llu", |
1221 | "but calculated size is %llu", dir->i_ino, | 1142 | dir->i_ino, (unsigned long long)i_size_read(dir), |
1222 | (unsigned long long)i_size_read(dir), | ||
1223 | (unsigned long long)size); | 1143 | (unsigned long long)size); |
1224 | ubifs_dump_inode(c, dir); | 1144 | ubifs_dump_inode(c, dir); |
1225 | dump_stack(); | 1145 | dump_stack(); |
1226 | return -EINVAL; | 1146 | return -EINVAL; |
1227 | } | 1147 | } |
1228 | if (dir->i_nlink != nlink) { | 1148 | if (dir->i_nlink != nlink) { |
1229 | ubifs_err("directory inode %lu has nlink %u, but calculated " | 1149 | ubifs_err("directory inode %lu has nlink %u, but calculated nlink is %u", |
1230 | "nlink is %u", dir->i_ino, dir->i_nlink, nlink); | 1150 | dir->i_ino, dir->i_nlink, nlink); |
1231 | ubifs_dump_inode(c, dir); | 1151 | ubifs_dump_inode(c, dir); |
1232 | dump_stack(); | 1152 | dump_stack(); |
1233 | return -EINVAL; | 1153 | return -EINVAL; |
@@ -1686,8 +1606,8 @@ int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb, | |||
1686 | if (znode_cb) { | 1606 | if (znode_cb) { |
1687 | err = znode_cb(c, znode, priv); | 1607 | err = znode_cb(c, znode, priv); |
1688 | if (err) { | 1608 | if (err) { |
1689 | ubifs_err("znode checking function returned " | 1609 | ubifs_err("znode checking function returned error %d", |
1690 | "error %d", err); | 1610 | err); |
1691 | ubifs_dump_znode(c, znode); | 1611 | ubifs_dump_znode(c, znode); |
1692 | goto out_dump; | 1612 | goto out_dump; |
1693 | } | 1613 | } |
@@ -1697,9 +1617,7 @@ int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb, | |||
1697 | zbr = &znode->zbranch[idx]; | 1617 | zbr = &znode->zbranch[idx]; |
1698 | err = leaf_cb(c, zbr, priv); | 1618 | err = leaf_cb(c, zbr, priv); |
1699 | if (err) { | 1619 | if (err) { |
1700 | ubifs_err("leaf checking function " | 1620 | ubifs_err("leaf checking function returned error %d, for leaf at LEB %d:%d", |
1701 | "returned error %d, for leaf " | ||
1702 | "at LEB %d:%d", | ||
1703 | err, zbr->lnum, zbr->offs); | 1621 | err, zbr->lnum, zbr->offs); |
1704 | goto out_dump; | 1622 | goto out_dump; |
1705 | } | 1623 | } |
@@ -1807,8 +1725,8 @@ int dbg_check_idx_size(struct ubifs_info *c, long long idx_size) | |||
1807 | } | 1725 | } |
1808 | 1726 | ||
1809 | if (calc != idx_size) { | 1727 | if (calc != idx_size) { |
1810 | ubifs_err("index size check failed: calculated size is %lld, " | 1728 | ubifs_err("index size check failed: calculated size is %lld, should be %lld", |
1811 | "should be %lld", calc, idx_size); | 1729 | calc, idx_size); |
1812 | dump_stack(); | 1730 | dump_stack(); |
1813 | return -EINVAL; | 1731 | return -EINVAL; |
1814 | } | 1732 | } |
@@ -2120,8 +2038,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr, | |||
2120 | fscki = read_add_inode(c, priv, inum); | 2038 | fscki = read_add_inode(c, priv, inum); |
2121 | if (IS_ERR(fscki)) { | 2039 | if (IS_ERR(fscki)) { |
2122 | err = PTR_ERR(fscki); | 2040 | err = PTR_ERR(fscki); |
2123 | ubifs_err("error %d while processing data node and " | 2041 | ubifs_err("error %d while processing data node and trying to find inode node %lu", |
2124 | "trying to find inode node %lu", | ||
2125 | err, (unsigned long)inum); | 2042 | err, (unsigned long)inum); |
2126 | goto out_dump; | 2043 | goto out_dump; |
2127 | } | 2044 | } |
@@ -2131,9 +2048,8 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr, | |||
2131 | blk_offs <<= UBIFS_BLOCK_SHIFT; | 2048 | blk_offs <<= UBIFS_BLOCK_SHIFT; |
2132 | blk_offs += le32_to_cpu(dn->size); | 2049 | blk_offs += le32_to_cpu(dn->size); |
2133 | if (blk_offs > fscki->size) { | 2050 | if (blk_offs > fscki->size) { |
2134 | ubifs_err("data node at LEB %d:%d is not within inode " | 2051 | ubifs_err("data node at LEB %d:%d is not within inode size %lld", |
2135 | "size %lld", zbr->lnum, zbr->offs, | 2052 | zbr->lnum, zbr->offs, fscki->size); |
2136 | fscki->size); | ||
2137 | err = -EINVAL; | 2053 | err = -EINVAL; |
2138 | goto out_dump; | 2054 | goto out_dump; |
2139 | } | 2055 | } |
@@ -2154,8 +2070,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr, | |||
2154 | fscki = read_add_inode(c, priv, inum); | 2070 | fscki = read_add_inode(c, priv, inum); |
2155 | if (IS_ERR(fscki)) { | 2071 | if (IS_ERR(fscki)) { |
2156 | err = PTR_ERR(fscki); | 2072 | err = PTR_ERR(fscki); |
2157 | ubifs_err("error %d while processing entry node and " | 2073 | ubifs_err("error %d while processing entry node and trying to find inode node %lu", |
2158 | "trying to find inode node %lu", | ||
2159 | err, (unsigned long)inum); | 2074 | err, (unsigned long)inum); |
2160 | goto out_dump; | 2075 | goto out_dump; |
2161 | } | 2076 | } |
@@ -2167,8 +2082,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr, | |||
2167 | fscki1 = read_add_inode(c, priv, inum); | 2082 | fscki1 = read_add_inode(c, priv, inum); |
2168 | if (IS_ERR(fscki1)) { | 2083 | if (IS_ERR(fscki1)) { |
2169 | err = PTR_ERR(fscki1); | 2084 | err = PTR_ERR(fscki1); |
2170 | ubifs_err("error %d while processing entry node and " | 2085 | ubifs_err("error %d while processing entry node and trying to find parent inode node %lu", |
2171 | "trying to find parent inode node %lu", | ||
2172 | err, (unsigned long)inum); | 2086 | err, (unsigned long)inum); |
2173 | goto out_dump; | 2087 | goto out_dump; |
2174 | } | 2088 | } |
@@ -2258,61 +2172,52 @@ static int check_inodes(struct ubifs_info *c, struct fsck_data *fsckd) | |||
2258 | */ | 2172 | */ |
2259 | if (fscki->inum != UBIFS_ROOT_INO && | 2173 | if (fscki->inum != UBIFS_ROOT_INO && |
2260 | fscki->references != 1) { | 2174 | fscki->references != 1) { |
2261 | ubifs_err("directory inode %lu has %d " | 2175 | ubifs_err("directory inode %lu has %d direntries which refer it, but should be 1", |
2262 | "direntries which refer it, but " | ||
2263 | "should be 1", | ||
2264 | (unsigned long)fscki->inum, | 2176 | (unsigned long)fscki->inum, |
2265 | fscki->references); | 2177 | fscki->references); |
2266 | goto out_dump; | 2178 | goto out_dump; |
2267 | } | 2179 | } |
2268 | if (fscki->inum == UBIFS_ROOT_INO && | 2180 | if (fscki->inum == UBIFS_ROOT_INO && |
2269 | fscki->references != 0) { | 2181 | fscki->references != 0) { |
2270 | ubifs_err("root inode %lu has non-zero (%d) " | 2182 | ubifs_err("root inode %lu has non-zero (%d) direntries which refer it", |
2271 | "direntries which refer it", | ||
2272 | (unsigned long)fscki->inum, | 2183 | (unsigned long)fscki->inum, |
2273 | fscki->references); | 2184 | fscki->references); |
2274 | goto out_dump; | 2185 | goto out_dump; |
2275 | } | 2186 | } |
2276 | if (fscki->calc_sz != fscki->size) { | 2187 | if (fscki->calc_sz != fscki->size) { |
2277 | ubifs_err("directory inode %lu size is %lld, " | 2188 | ubifs_err("directory inode %lu size is %lld, but calculated size is %lld", |
2278 | "but calculated size is %lld", | ||
2279 | (unsigned long)fscki->inum, | 2189 | (unsigned long)fscki->inum, |
2280 | fscki->size, fscki->calc_sz); | 2190 | fscki->size, fscki->calc_sz); |
2281 | goto out_dump; | 2191 | goto out_dump; |
2282 | } | 2192 | } |
2283 | if (fscki->calc_cnt != fscki->nlink) { | 2193 | if (fscki->calc_cnt != fscki->nlink) { |
2284 | ubifs_err("directory inode %lu nlink is %d, " | 2194 | ubifs_err("directory inode %lu nlink is %d, but calculated nlink is %d", |
2285 | "but calculated nlink is %d", | ||
2286 | (unsigned long)fscki->inum, | 2195 | (unsigned long)fscki->inum, |
2287 | fscki->nlink, fscki->calc_cnt); | 2196 | fscki->nlink, fscki->calc_cnt); |
2288 | goto out_dump; | 2197 | goto out_dump; |
2289 | } | 2198 | } |
2290 | } else { | 2199 | } else { |
2291 | if (fscki->references != fscki->nlink) { | 2200 | if (fscki->references != fscki->nlink) { |
2292 | ubifs_err("inode %lu nlink is %d, but " | 2201 | ubifs_err("inode %lu nlink is %d, but calculated nlink is %d", |
2293 | "calculated nlink is %d", | ||
2294 | (unsigned long)fscki->inum, | 2202 | (unsigned long)fscki->inum, |
2295 | fscki->nlink, fscki->references); | 2203 | fscki->nlink, fscki->references); |
2296 | goto out_dump; | 2204 | goto out_dump; |
2297 | } | 2205 | } |
2298 | } | 2206 | } |
2299 | if (fscki->xattr_sz != fscki->calc_xsz) { | 2207 | if (fscki->xattr_sz != fscki->calc_xsz) { |
2300 | ubifs_err("inode %lu has xattr size %u, but " | 2208 | ubifs_err("inode %lu has xattr size %u, but calculated size is %lld", |
2301 | "calculated size is %lld", | ||
2302 | (unsigned long)fscki->inum, fscki->xattr_sz, | 2209 | (unsigned long)fscki->inum, fscki->xattr_sz, |
2303 | fscki->calc_xsz); | 2210 | fscki->calc_xsz); |
2304 | goto out_dump; | 2211 | goto out_dump; |
2305 | } | 2212 | } |
2306 | if (fscki->xattr_cnt != fscki->calc_xcnt) { | 2213 | if (fscki->xattr_cnt != fscki->calc_xcnt) { |
2307 | ubifs_err("inode %lu has %u xattrs, but " | 2214 | ubifs_err("inode %lu has %u xattrs, but calculated count is %lld", |
2308 | "calculated count is %lld", | ||
2309 | (unsigned long)fscki->inum, | 2215 | (unsigned long)fscki->inum, |
2310 | fscki->xattr_cnt, fscki->calc_xcnt); | 2216 | fscki->xattr_cnt, fscki->calc_xcnt); |
2311 | goto out_dump; | 2217 | goto out_dump; |
2312 | } | 2218 | } |
2313 | if (fscki->xattr_nms != fscki->calc_xnms) { | 2219 | if (fscki->xattr_nms != fscki->calc_xnms) { |
2314 | ubifs_err("inode %lu has xattr names' size %u, but " | 2220 | ubifs_err("inode %lu has xattr names' size %u, but calculated names' size is %lld", |
2315 | "calculated names' size is %lld", | ||
2316 | (unsigned long)fscki->inum, fscki->xattr_nms, | 2221 | (unsigned long)fscki->inum, fscki->xattr_nms, |
2317 | fscki->calc_xnms); | 2222 | fscki->calc_xnms); |
2318 | goto out_dump; | 2223 | goto out_dump; |
@@ -2652,20 +2557,18 @@ static int power_cut_emulated(struct ubifs_info *c, int lnum, int write) | |||
2652 | return 1; | 2557 | return 1; |
2653 | } | 2558 | } |
2654 | 2559 | ||
2655 | static void cut_data(const void *buf, unsigned int len) | 2560 | static int corrupt_data(const struct ubifs_info *c, const void *buf, |
2561 | unsigned int len) | ||
2656 | { | 2562 | { |
2657 | unsigned int from, to, i, ffs = chance(1, 2); | 2563 | unsigned int from, to, i, ffs = chance(1, 2); |
2658 | unsigned char *p = (void *)buf; | 2564 | unsigned char *p = (void *)buf; |
2659 | 2565 | ||
2660 | from = random32() % (len + 1); | 2566 | from = random32() % (len + 1); |
2661 | if (chance(1, 2)) | 2567 | /* Corruption may only span one max. write unit */ |
2662 | to = random32() % (len - from + 1); | 2568 | to = min(len, ALIGN(from, c->max_write_size)); |
2663 | else | ||
2664 | to = len; | ||
2665 | 2569 | ||
2666 | if (from < to) | 2570 | ubifs_warn("filled bytes %u-%u with %s", from, to - 1, |
2667 | ubifs_warn("filled bytes %u-%u with %s", from, to - 1, | 2571 | ffs ? "0xFFs" : "random data"); |
2668 | ffs ? "0xFFs" : "random data"); | ||
2669 | 2572 | ||
2670 | if (ffs) | 2573 | if (ffs) |
2671 | for (i = from; i < to; i++) | 2574 | for (i = from; i < to; i++) |
@@ -2673,6 +2576,8 @@ static void cut_data(const void *buf, unsigned int len) | |||
2673 | else | 2576 | else |
2674 | for (i = from; i < to; i++) | 2577 | for (i = from; i < to; i++) |
2675 | p[i] = random32() % 0x100; | 2578 | p[i] = random32() % 0x100; |
2579 | |||
2580 | return to; | ||
2676 | } | 2581 | } |
2677 | 2582 | ||
2678 | int dbg_leb_write(struct ubifs_info *c, int lnum, const void *buf, | 2583 | int dbg_leb_write(struct ubifs_info *c, int lnum, const void *buf, |
@@ -2685,7 +2590,9 @@ int dbg_leb_write(struct ubifs_info *c, int lnum, const void *buf, | |||
2685 | 2590 | ||
2686 | failing = power_cut_emulated(c, lnum, 1); | 2591 | failing = power_cut_emulated(c, lnum, 1); |
2687 | if (failing) | 2592 | if (failing) |
2688 | cut_data(buf, len); | 2593 | len = corrupt_data(c, buf, len); |
2594 | ubifs_warn("actually write %d bytes to LEB %d:%d (the buffer was corrupted)", | ||
2595 | len, lnum, offs); | ||
2689 | err = ubi_leb_write(c->ubi, lnum, buf, offs, len); | 2596 | err = ubi_leb_write(c->ubi, lnum, buf, offs, len); |
2690 | if (err) | 2597 | if (err) |
2691 | return err; | 2598 | return err; |
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index 8b8cc4e945f4..e03d5179769a 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h | |||
@@ -150,7 +150,7 @@ struct ubifs_global_debug_info { | |||
150 | 150 | ||
151 | #define ubifs_assert(expr) do { \ | 151 | #define ubifs_assert(expr) do { \ |
152 | if (unlikely(!(expr))) { \ | 152 | if (unlikely(!(expr))) { \ |
153 | printk(KERN_CRIT "UBIFS assert failed in %s at %u (pid %d)\n", \ | 153 | pr_crit("UBIFS assert failed in %s at %u (pid %d)\n", \ |
154 | __func__, __LINE__, current->pid); \ | 154 | __func__, __LINE__, current->pid); \ |
155 | dump_stack(); \ | 155 | dump_stack(); \ |
156 | } \ | 156 | } \ |
@@ -159,26 +159,23 @@ struct ubifs_global_debug_info { | |||
159 | #define ubifs_assert_cmt_locked(c) do { \ | 159 | #define ubifs_assert_cmt_locked(c) do { \ |
160 | if (unlikely(down_write_trylock(&(c)->commit_sem))) { \ | 160 | if (unlikely(down_write_trylock(&(c)->commit_sem))) { \ |
161 | up_write(&(c)->commit_sem); \ | 161 | up_write(&(c)->commit_sem); \ |
162 | printk(KERN_CRIT "commit lock is not locked!\n"); \ | 162 | pr_crit("commit lock is not locked!\n"); \ |
163 | ubifs_assert(0); \ | 163 | ubifs_assert(0); \ |
164 | } \ | 164 | } \ |
165 | } while (0) | 165 | } while (0) |
166 | 166 | ||
167 | #define ubifs_dbg_msg(type, fmt, ...) \ | 167 | #define ubifs_dbg_msg(type, fmt, ...) \ |
168 | pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__) | 168 | pr_debug("UBIFS DBG " type " (pid %d): " fmt "\n", current->pid, \ |
169 | ##__VA_ARGS__) | ||
169 | 170 | ||
170 | #define DBG_KEY_BUF_LEN 32 | 171 | #define DBG_KEY_BUF_LEN 48 |
171 | #define ubifs_dbg_msg_key(type, key, fmt, ...) do { \ | 172 | #define ubifs_dbg_msg_key(type, key, fmt, ...) do { \ |
172 | char __tmp_key_buf[DBG_KEY_BUF_LEN]; \ | 173 | char __tmp_key_buf[DBG_KEY_BUF_LEN]; \ |
173 | pr_debug("UBIFS DBG " type ": " fmt "%s\n", ##__VA_ARGS__, \ | 174 | pr_debug("UBIFS DBG " type " (pid %d): " fmt "%s\n", current->pid, \ |
175 | ##__VA_ARGS__, \ | ||
174 | dbg_snprintf_key(c, key, __tmp_key_buf, DBG_KEY_BUF_LEN)); \ | 176 | dbg_snprintf_key(c, key, __tmp_key_buf, DBG_KEY_BUF_LEN)); \ |
175 | } while (0) | 177 | } while (0) |
176 | 178 | ||
177 | /* Just a debugging messages not related to any specific UBIFS subsystem */ | ||
178 | #define dbg_msg(fmt, ...) \ | ||
179 | printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", current->pid, \ | ||
180 | __func__, ##__VA_ARGS__) | ||
181 | |||
182 | /* General messages */ | 179 | /* General messages */ |
183 | #define dbg_gen(fmt, ...) ubifs_dbg_msg("gen", fmt, ##__VA_ARGS__) | 180 | #define dbg_gen(fmt, ...) ubifs_dbg_msg("gen", fmt, ##__VA_ARGS__) |
184 | /* Additional journal messages */ | 181 | /* Additional journal messages */ |
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index c95681cf1b71..e271fba1651b 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c | |||
@@ -980,8 +980,8 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
980 | * separately. | 980 | * separately. |
981 | */ | 981 | */ |
982 | 982 | ||
983 | dbg_gen("dent '%.*s' ino %lu in dir ino %lu to dent '%.*s' in " | 983 | dbg_gen("dent '%.*s' ino %lu in dir ino %lu to dent '%.*s' in dir ino %lu", |
984 | "dir ino %lu", old_dentry->d_name.len, old_dentry->d_name.name, | 984 | old_dentry->d_name.len, old_dentry->d_name.name, |
985 | old_inode->i_ino, old_dir->i_ino, new_dentry->d_name.len, | 985 | old_inode->i_ino, old_dir->i_ino, new_dentry->d_name.len, |
986 | new_dentry->d_name.name, new_dir->i_ino); | 986 | new_dentry->d_name.name, new_dir->i_ino); |
987 | ubifs_assert(mutex_is_locked(&old_dir->i_mutex)); | 987 | ubifs_assert(mutex_is_locked(&old_dir->i_mutex)); |
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 7bd6e72afd11..5bc77817f382 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c | |||
@@ -1486,8 +1486,8 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, | |||
1486 | err = ubifs_budget_space(c, &req); | 1486 | err = ubifs_budget_space(c, &req); |
1487 | if (unlikely(err)) { | 1487 | if (unlikely(err)) { |
1488 | if (err == -ENOSPC) | 1488 | if (err == -ENOSPC) |
1489 | ubifs_warn("out of space for mmapped file " | 1489 | ubifs_warn("out of space for mmapped file (inode number %lu)", |
1490 | "(inode number %lu)", inode->i_ino); | 1490 | inode->i_ino); |
1491 | return VM_FAULT_SIGBUS; | 1491 | return VM_FAULT_SIGBUS; |
1492 | } | 1492 | } |
1493 | 1493 | ||
@@ -1536,6 +1536,7 @@ out_unlock: | |||
1536 | static const struct vm_operations_struct ubifs_file_vm_ops = { | 1536 | static const struct vm_operations_struct ubifs_file_vm_ops = { |
1537 | .fault = filemap_fault, | 1537 | .fault = filemap_fault, |
1538 | .page_mkwrite = ubifs_vm_page_mkwrite, | 1538 | .page_mkwrite = ubifs_vm_page_mkwrite, |
1539 | .remap_pages = generic_file_remap_pages, | ||
1539 | }; | 1540 | }; |
1540 | 1541 | ||
1541 | static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma) | 1542 | static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma) |
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c index 04dd6f47635e..76ca53cd3eee 100644 --- a/fs/ubifs/gc.c +++ b/fs/ubifs/gc.c | |||
@@ -714,9 +714,9 @@ int ubifs_garbage_collect(struct ubifs_info *c, int anyway) | |||
714 | break; | 714 | break; |
715 | } | 715 | } |
716 | 716 | ||
717 | dbg_gc("found LEB %d: free %d, dirty %d, sum %d " | 717 | dbg_gc("found LEB %d: free %d, dirty %d, sum %d (min. space %d)", |
718 | "(min. space %d)", lp.lnum, lp.free, lp.dirty, | 718 | lp.lnum, lp.free, lp.dirty, lp.free + lp.dirty, |
719 | lp.free + lp.dirty, min_space); | 719 | min_space); |
720 | 720 | ||
721 | space_before = c->leb_size - wbuf->offs - wbuf->used; | 721 | space_before = c->leb_size - wbuf->offs - wbuf->used; |
722 | if (wbuf->lnum == -1) | 722 | if (wbuf->lnum == -1) |
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index 12c0f154ca83..afaad07f3b29 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c | |||
@@ -469,8 +469,8 @@ static void pack_inode(struct ubifs_info *c, struct ubifs_ino_node *ino, | |||
469 | ino->ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); | 469 | ino->ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); |
470 | ino->mtime_sec = cpu_to_le64(inode->i_mtime.tv_sec); | 470 | ino->mtime_sec = cpu_to_le64(inode->i_mtime.tv_sec); |
471 | ino->mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); | 471 | ino->mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); |
472 | ino->uid = cpu_to_le32(inode->i_uid); | 472 | ino->uid = cpu_to_le32(i_uid_read(inode)); |
473 | ino->gid = cpu_to_le32(inode->i_gid); | 473 | ino->gid = cpu_to_le32(i_gid_read(inode)); |
474 | ino->mode = cpu_to_le32(inode->i_mode); | 474 | ino->mode = cpu_to_le32(inode->i_mode); |
475 | ino->flags = cpu_to_le32(ui->flags); | 475 | ino->flags = cpu_to_le32(ui->flags); |
476 | ino->size = cpu_to_le64(ui->ui_size); | 476 | ino->size = cpu_to_le64(ui->ui_size); |
diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c index c80b15d6c8de..36bd4efd0819 100644 --- a/fs/ubifs/log.c +++ b/fs/ubifs/log.c | |||
@@ -315,17 +315,15 @@ static void remove_buds(struct ubifs_info *c) | |||
315 | * heads (non-closed buds). | 315 | * heads (non-closed buds). |
316 | */ | 316 | */ |
317 | c->cmt_bud_bytes += wbuf->offs - bud->start; | 317 | c->cmt_bud_bytes += wbuf->offs - bud->start; |
318 | dbg_log("preserve %d:%d, jhead %s, bud bytes %d, " | 318 | dbg_log("preserve %d:%d, jhead %s, bud bytes %d, cmt_bud_bytes %lld", |
319 | "cmt_bud_bytes %lld", bud->lnum, bud->start, | 319 | bud->lnum, bud->start, dbg_jhead(bud->jhead), |
320 | dbg_jhead(bud->jhead), wbuf->offs - bud->start, | 320 | wbuf->offs - bud->start, c->cmt_bud_bytes); |
321 | c->cmt_bud_bytes); | ||
322 | bud->start = wbuf->offs; | 321 | bud->start = wbuf->offs; |
323 | } else { | 322 | } else { |
324 | c->cmt_bud_bytes += c->leb_size - bud->start; | 323 | c->cmt_bud_bytes += c->leb_size - bud->start; |
325 | dbg_log("remove %d:%d, jhead %s, bud bytes %d, " | 324 | dbg_log("remove %d:%d, jhead %s, bud bytes %d, cmt_bud_bytes %lld", |
326 | "cmt_bud_bytes %lld", bud->lnum, bud->start, | 325 | bud->lnum, bud->start, dbg_jhead(bud->jhead), |
327 | dbg_jhead(bud->jhead), c->leb_size - bud->start, | 326 | c->leb_size - bud->start, c->cmt_bud_bytes); |
328 | c->cmt_bud_bytes); | ||
329 | rb_erase(p1, &c->buds); | 327 | rb_erase(p1, &c->buds); |
330 | /* | 328 | /* |
331 | * If the commit does not finish, the recovery will need | 329 | * If the commit does not finish, the recovery will need |
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c index 86eb8e533249..e5a2a35a46dc 100644 --- a/fs/ubifs/lprops.c +++ b/fs/ubifs/lprops.c | |||
@@ -867,15 +867,15 @@ int dbg_check_cats(struct ubifs_info *c) | |||
867 | 867 | ||
868 | list_for_each_entry(lprops, &c->empty_list, list) { | 868 | list_for_each_entry(lprops, &c->empty_list, list) { |
869 | if (lprops->free != c->leb_size) { | 869 | if (lprops->free != c->leb_size) { |
870 | ubifs_err("non-empty LEB %d on empty list " | 870 | ubifs_err("non-empty LEB %d on empty list (free %d dirty %d flags %d)", |
871 | "(free %d dirty %d flags %d)", lprops->lnum, | 871 | lprops->lnum, lprops->free, lprops->dirty, |
872 | lprops->free, lprops->dirty, lprops->flags); | 872 | lprops->flags); |
873 | return -EINVAL; | 873 | return -EINVAL; |
874 | } | 874 | } |
875 | if (lprops->flags & LPROPS_TAKEN) { | 875 | if (lprops->flags & LPROPS_TAKEN) { |
876 | ubifs_err("taken LEB %d on empty list " | 876 | ubifs_err("taken LEB %d on empty list (free %d dirty %d flags %d)", |
877 | "(free %d dirty %d flags %d)", lprops->lnum, | 877 | lprops->lnum, lprops->free, lprops->dirty, |
878 | lprops->free, lprops->dirty, lprops->flags); | 878 | lprops->flags); |
879 | return -EINVAL; | 879 | return -EINVAL; |
880 | } | 880 | } |
881 | } | 881 | } |
@@ -883,15 +883,15 @@ int dbg_check_cats(struct ubifs_info *c) | |||
883 | i = 0; | 883 | i = 0; |
884 | list_for_each_entry(lprops, &c->freeable_list, list) { | 884 | list_for_each_entry(lprops, &c->freeable_list, list) { |
885 | if (lprops->free + lprops->dirty != c->leb_size) { | 885 | if (lprops->free + lprops->dirty != c->leb_size) { |
886 | ubifs_err("non-freeable LEB %d on freeable list " | 886 | ubifs_err("non-freeable LEB %d on freeable list (free %d dirty %d flags %d)", |
887 | "(free %d dirty %d flags %d)", lprops->lnum, | 887 | lprops->lnum, lprops->free, lprops->dirty, |
888 | lprops->free, lprops->dirty, lprops->flags); | 888 | lprops->flags); |
889 | return -EINVAL; | 889 | return -EINVAL; |
890 | } | 890 | } |
891 | if (lprops->flags & LPROPS_TAKEN) { | 891 | if (lprops->flags & LPROPS_TAKEN) { |
892 | ubifs_err("taken LEB %d on freeable list " | 892 | ubifs_err("taken LEB %d on freeable list (free %d dirty %d flags %d)", |
893 | "(free %d dirty %d flags %d)", lprops->lnum, | 893 | lprops->lnum, lprops->free, lprops->dirty, |
894 | lprops->free, lprops->dirty, lprops->flags); | 894 | lprops->flags); |
895 | return -EINVAL; | 895 | return -EINVAL; |
896 | } | 896 | } |
897 | i += 1; | 897 | i += 1; |
@@ -913,21 +913,21 @@ int dbg_check_cats(struct ubifs_info *c) | |||
913 | 913 | ||
914 | list_for_each_entry(lprops, &c->frdi_idx_list, list) { | 914 | list_for_each_entry(lprops, &c->frdi_idx_list, list) { |
915 | if (lprops->free + lprops->dirty != c->leb_size) { | 915 | if (lprops->free + lprops->dirty != c->leb_size) { |
916 | ubifs_err("non-freeable LEB %d on frdi_idx list " | 916 | ubifs_err("non-freeable LEB %d on frdi_idx list (free %d dirty %d flags %d)", |
917 | "(free %d dirty %d flags %d)", lprops->lnum, | 917 | lprops->lnum, lprops->free, lprops->dirty, |
918 | lprops->free, lprops->dirty, lprops->flags); | 918 | lprops->flags); |
919 | return -EINVAL; | 919 | return -EINVAL; |
920 | } | 920 | } |
921 | if (lprops->flags & LPROPS_TAKEN) { | 921 | if (lprops->flags & LPROPS_TAKEN) { |
922 | ubifs_err("taken LEB %d on frdi_idx list " | 922 | ubifs_err("taken LEB %d on frdi_idx list (free %d dirty %d flags %d)", |
923 | "(free %d dirty %d flags %d)", lprops->lnum, | 923 | lprops->lnum, lprops->free, lprops->dirty, |
924 | lprops->free, lprops->dirty, lprops->flags); | 924 | lprops->flags); |
925 | return -EINVAL; | 925 | return -EINVAL; |
926 | } | 926 | } |
927 | if (!(lprops->flags & LPROPS_INDEX)) { | 927 | if (!(lprops->flags & LPROPS_INDEX)) { |
928 | ubifs_err("non-index LEB %d on frdi_idx list " | 928 | ubifs_err("non-index LEB %d on frdi_idx list (free %d dirty %d flags %d)", |
929 | "(free %d dirty %d flags %d)", lprops->lnum, | 929 | lprops->lnum, lprops->free, lprops->dirty, |
930 | lprops->free, lprops->dirty, lprops->flags); | 930 | lprops->flags); |
931 | return -EINVAL; | 931 | return -EINVAL; |
932 | } | 932 | } |
933 | } | 933 | } |
@@ -982,9 +982,9 @@ void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat, | |||
982 | goto out; | 982 | goto out; |
983 | } | 983 | } |
984 | if (lprops != lp) { | 984 | if (lprops != lp) { |
985 | dbg_msg("lprops %zx lp %zx lprops->lnum %d lp->lnum %d", | 985 | ubifs_err("lprops %zx lp %zx lprops->lnum %d lp->lnum %d", |
986 | (size_t)lprops, (size_t)lp, lprops->lnum, | 986 | (size_t)lprops, (size_t)lp, lprops->lnum, |
987 | lp->lnum); | 987 | lp->lnum); |
988 | err = 4; | 988 | err = 4; |
989 | goto out; | 989 | goto out; |
990 | } | 990 | } |
@@ -1002,7 +1002,7 @@ void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat, | |||
1002 | } | 1002 | } |
1003 | out: | 1003 | out: |
1004 | if (err) { | 1004 | if (err) { |
1005 | dbg_msg("failed cat %d hpos %d err %d", cat, i, err); | 1005 | ubifs_err("failed cat %d hpos %d err %d", cat, i, err); |
1006 | dump_stack(); | 1006 | dump_stack(); |
1007 | ubifs_dump_heap(c, heap, cat); | 1007 | ubifs_dump_heap(c, heap, cat); |
1008 | } | 1008 | } |
@@ -1153,8 +1153,8 @@ static int scan_check_cb(struct ubifs_info *c, | |||
1153 | 1153 | ||
1154 | if (free > c->leb_size || free < 0 || dirty > c->leb_size || | 1154 | if (free > c->leb_size || free < 0 || dirty > c->leb_size || |
1155 | dirty < 0) { | 1155 | dirty < 0) { |
1156 | ubifs_err("bad calculated accounting for LEB %d: " | 1156 | ubifs_err("bad calculated accounting for LEB %d: free %d, dirty %d", |
1157 | "free %d, dirty %d", lnum, free, dirty); | 1157 | lnum, free, dirty); |
1158 | goto out_destroy; | 1158 | goto out_destroy; |
1159 | } | 1159 | } |
1160 | 1160 | ||
@@ -1200,8 +1200,7 @@ static int scan_check_cb(struct ubifs_info *c, | |||
1200 | /* Free but not unmapped LEB, it's fine */ | 1200 | /* Free but not unmapped LEB, it's fine */ |
1201 | is_idx = 0; | 1201 | is_idx = 0; |
1202 | else { | 1202 | else { |
1203 | ubifs_err("indexing node without indexing " | 1203 | ubifs_err("indexing node without indexing flag"); |
1204 | "flag"); | ||
1205 | goto out_print; | 1204 | goto out_print; |
1206 | } | 1205 | } |
1207 | } | 1206 | } |
@@ -1236,8 +1235,7 @@ static int scan_check_cb(struct ubifs_info *c, | |||
1236 | return LPT_SCAN_CONTINUE; | 1235 | return LPT_SCAN_CONTINUE; |
1237 | 1236 | ||
1238 | out_print: | 1237 | out_print: |
1239 | ubifs_err("bad accounting of LEB %d: free %d, dirty %d flags %#x, " | 1238 | ubifs_err("bad accounting of LEB %d: free %d, dirty %d flags %#x, should be free %d, dirty %d", |
1240 | "should be free %d, dirty %d", | ||
1241 | lnum, lp->free, lp->dirty, lp->flags, free, dirty); | 1239 | lnum, lp->free, lp->dirty, lp->flags, free, dirty); |
1242 | ubifs_dump_leb(c, lnum); | 1240 | ubifs_dump_leb(c, lnum); |
1243 | out_destroy: | 1241 | out_destroy: |
@@ -1290,12 +1288,10 @@ int dbg_check_lprops(struct ubifs_info *c) | |||
1290 | lst.total_dirty != c->lst.total_dirty || | 1288 | lst.total_dirty != c->lst.total_dirty || |
1291 | lst.total_used != c->lst.total_used) { | 1289 | lst.total_used != c->lst.total_used) { |
1292 | ubifs_err("bad overall accounting"); | 1290 | ubifs_err("bad overall accounting"); |
1293 | ubifs_err("calculated: empty_lebs %d, idx_lebs %d, " | 1291 | ubifs_err("calculated: empty_lebs %d, idx_lebs %d, total_free %lld, total_dirty %lld, total_used %lld", |
1294 | "total_free %lld, total_dirty %lld, total_used %lld", | ||
1295 | lst.empty_lebs, lst.idx_lebs, lst.total_free, | 1292 | lst.empty_lebs, lst.idx_lebs, lst.total_free, |
1296 | lst.total_dirty, lst.total_used); | 1293 | lst.total_dirty, lst.total_used); |
1297 | ubifs_err("read from lprops: empty_lebs %d, idx_lebs %d, " | 1294 | ubifs_err("read from lprops: empty_lebs %d, idx_lebs %d, total_free %lld, total_dirty %lld, total_used %lld", |
1298 | "total_free %lld, total_dirty %lld, total_used %lld", | ||
1299 | c->lst.empty_lebs, c->lst.idx_lebs, c->lst.total_free, | 1295 | c->lst.empty_lebs, c->lst.idx_lebs, c->lst.total_free, |
1300 | c->lst.total_dirty, c->lst.total_used); | 1296 | c->lst.total_dirty, c->lst.total_used); |
1301 | err = -EINVAL; | 1297 | err = -EINVAL; |
diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c index ce33b2beb151..d46b19ec1815 100644 --- a/fs/ubifs/lpt.c +++ b/fs/ubifs/lpt.c | |||
@@ -1311,7 +1311,7 @@ out: | |||
1311 | ubifs_err("error %d reading pnode at %d:%d", err, lnum, offs); | 1311 | ubifs_err("error %d reading pnode at %d:%d", err, lnum, offs); |
1312 | ubifs_dump_pnode(c, pnode, parent, iip); | 1312 | ubifs_dump_pnode(c, pnode, parent, iip); |
1313 | dump_stack(); | 1313 | dump_stack(); |
1314 | dbg_msg("calc num: %d", calc_pnode_num_from_parent(c, parent, iip)); | 1314 | ubifs_err("calc num: %d", calc_pnode_num_from_parent(c, parent, iip)); |
1315 | kfree(pnode); | 1315 | kfree(pnode); |
1316 | return err; | 1316 | return err; |
1317 | } | 1317 | } |
@@ -1749,7 +1749,10 @@ int ubifs_lpt_init(struct ubifs_info *c, int rd, int wr) | |||
1749 | return 0; | 1749 | return 0; |
1750 | 1750 | ||
1751 | out_err: | 1751 | out_err: |
1752 | ubifs_lpt_free(c, 0); | 1752 | if (wr) |
1753 | ubifs_lpt_free(c, 1); | ||
1754 | if (rd) | ||
1755 | ubifs_lpt_free(c, 0); | ||
1753 | return err; | 1756 | return err; |
1754 | } | 1757 | } |
1755 | 1758 | ||
@@ -2234,8 +2237,7 @@ int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode, | |||
2234 | /* cnode is a nnode */ | 2237 | /* cnode is a nnode */ |
2235 | num = calc_nnode_num(row, col); | 2238 | num = calc_nnode_num(row, col); |
2236 | if (cnode->num != num) { | 2239 | if (cnode->num != num) { |
2237 | ubifs_err("nnode num %d expected %d " | 2240 | ubifs_err("nnode num %d expected %d parent num %d iip %d", |
2238 | "parent num %d iip %d", | ||
2239 | cnode->num, num, | 2241 | cnode->num, num, |
2240 | (nnode ? nnode->num : 0), cnode->iip); | 2242 | (nnode ? nnode->num : 0), cnode->iip); |
2241 | return -EINVAL; | 2243 | return -EINVAL; |
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c index 4fa70734e6e7..9daaeef675dd 100644 --- a/fs/ubifs/lpt_commit.c +++ b/fs/ubifs/lpt_commit.c | |||
@@ -320,8 +320,8 @@ static int layout_cnodes(struct ubifs_info *c) | |||
320 | return 0; | 320 | return 0; |
321 | 321 | ||
322 | no_space: | 322 | no_space: |
323 | ubifs_err("LPT out of space at LEB %d:%d needing %d, done_ltab %d, " | 323 | ubifs_err("LPT out of space at LEB %d:%d needing %d, done_ltab %d, done_lsave %d", |
324 | "done_lsave %d", lnum, offs, len, done_ltab, done_lsave); | 324 | lnum, offs, len, done_ltab, done_lsave); |
325 | ubifs_dump_lpt_info(c); | 325 | ubifs_dump_lpt_info(c); |
326 | ubifs_dump_lpt_lebs(c); | 326 | ubifs_dump_lpt_lebs(c); |
327 | dump_stack(); | 327 | dump_stack(); |
@@ -545,8 +545,8 @@ static int write_cnodes(struct ubifs_info *c) | |||
545 | return 0; | 545 | return 0; |
546 | 546 | ||
547 | no_space: | 547 | no_space: |
548 | ubifs_err("LPT out of space mismatch at LEB %d:%d needing %d, done_ltab " | 548 | ubifs_err("LPT out of space mismatch at LEB %d:%d needing %d, done_ltab %d, done_lsave %d", |
549 | "%d, done_lsave %d", lnum, offs, len, done_ltab, done_lsave); | 549 | lnum, offs, len, done_ltab, done_lsave); |
550 | ubifs_dump_lpt_info(c); | 550 | ubifs_dump_lpt_info(c); |
551 | ubifs_dump_lpt_lebs(c); | 551 | ubifs_dump_lpt_lebs(c); |
552 | dump_stack(); | 552 | dump_stack(); |
@@ -1662,21 +1662,19 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum) | |||
1662 | continue; | 1662 | continue; |
1663 | } | 1663 | } |
1664 | if (!dbg_is_all_ff(p, len)) { | 1664 | if (!dbg_is_all_ff(p, len)) { |
1665 | dbg_msg("invalid empty space in LEB %d at %d", | 1665 | ubifs_err("invalid empty space in LEB %d at %d", |
1666 | lnum, c->leb_size - len); | 1666 | lnum, c->leb_size - len); |
1667 | err = -EINVAL; | 1667 | err = -EINVAL; |
1668 | } | 1668 | } |
1669 | i = lnum - c->lpt_first; | 1669 | i = lnum - c->lpt_first; |
1670 | if (len != c->ltab[i].free) { | 1670 | if (len != c->ltab[i].free) { |
1671 | dbg_msg("invalid free space in LEB %d " | 1671 | ubifs_err("invalid free space in LEB %d (free %d, expected %d)", |
1672 | "(free %d, expected %d)", | 1672 | lnum, len, c->ltab[i].free); |
1673 | lnum, len, c->ltab[i].free); | ||
1674 | err = -EINVAL; | 1673 | err = -EINVAL; |
1675 | } | 1674 | } |
1676 | if (dirty != c->ltab[i].dirty) { | 1675 | if (dirty != c->ltab[i].dirty) { |
1677 | dbg_msg("invalid dirty space in LEB %d " | 1676 | ubifs_err("invalid dirty space in LEB %d (dirty %d, expected %d)", |
1678 | "(dirty %d, expected %d)", | 1677 | lnum, dirty, c->ltab[i].dirty); |
1679 | lnum, dirty, c->ltab[i].dirty); | ||
1680 | err = -EINVAL; | 1678 | err = -EINVAL; |
1681 | } | 1679 | } |
1682 | goto out; | 1680 | goto out; |
@@ -1888,8 +1886,7 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum) | |||
1888 | int err, len = c->leb_size, node_type, node_num, node_len, offs; | 1886 | int err, len = c->leb_size, node_type, node_num, node_len, offs; |
1889 | void *buf, *p; | 1887 | void *buf, *p; |
1890 | 1888 | ||
1891 | printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n", | 1889 | pr_err("(pid %d) start dumping LEB %d\n", current->pid, lnum); |
1892 | current->pid, lnum); | ||
1893 | buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); | 1890 | buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); |
1894 | if (!buf) { | 1891 | if (!buf) { |
1895 | ubifs_err("cannot allocate memory to dump LPT"); | 1892 | ubifs_err("cannot allocate memory to dump LPT"); |
@@ -1907,14 +1904,14 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum) | |||
1907 | 1904 | ||
1908 | pad_len = get_pad_len(c, p, len); | 1905 | pad_len = get_pad_len(c, p, len); |
1909 | if (pad_len) { | 1906 | if (pad_len) { |
1910 | printk(KERN_DEBUG "LEB %d:%d, pad %d bytes\n", | 1907 | pr_err("LEB %d:%d, pad %d bytes\n", |
1911 | lnum, offs, pad_len); | 1908 | lnum, offs, pad_len); |
1912 | p += pad_len; | 1909 | p += pad_len; |
1913 | len -= pad_len; | 1910 | len -= pad_len; |
1914 | continue; | 1911 | continue; |
1915 | } | 1912 | } |
1916 | if (len) | 1913 | if (len) |
1917 | printk(KERN_DEBUG "LEB %d:%d, free %d bytes\n", | 1914 | pr_err("LEB %d:%d, free %d bytes\n", |
1918 | lnum, offs, len); | 1915 | lnum, offs, len); |
1919 | break; | 1916 | break; |
1920 | } | 1917 | } |
@@ -1925,11 +1922,10 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum) | |||
1925 | { | 1922 | { |
1926 | node_len = c->pnode_sz; | 1923 | node_len = c->pnode_sz; |
1927 | if (c->big_lpt) | 1924 | if (c->big_lpt) |
1928 | printk(KERN_DEBUG "LEB %d:%d, pnode num %d\n", | 1925 | pr_err("LEB %d:%d, pnode num %d\n", |
1929 | lnum, offs, node_num); | 1926 | lnum, offs, node_num); |
1930 | else | 1927 | else |
1931 | printk(KERN_DEBUG "LEB %d:%d, pnode\n", | 1928 | pr_err("LEB %d:%d, pnode\n", lnum, offs); |
1932 | lnum, offs); | ||
1933 | break; | 1929 | break; |
1934 | } | 1930 | } |
1935 | case UBIFS_LPT_NNODE: | 1931 | case UBIFS_LPT_NNODE: |
@@ -1939,29 +1935,28 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum) | |||
1939 | 1935 | ||
1940 | node_len = c->nnode_sz; | 1936 | node_len = c->nnode_sz; |
1941 | if (c->big_lpt) | 1937 | if (c->big_lpt) |
1942 | printk(KERN_DEBUG "LEB %d:%d, nnode num %d, ", | 1938 | pr_err("LEB %d:%d, nnode num %d, ", |
1943 | lnum, offs, node_num); | 1939 | lnum, offs, node_num); |
1944 | else | 1940 | else |
1945 | printk(KERN_DEBUG "LEB %d:%d, nnode, ", | 1941 | pr_err("LEB %d:%d, nnode, ", |
1946 | lnum, offs); | 1942 | lnum, offs); |
1947 | err = ubifs_unpack_nnode(c, p, &nnode); | 1943 | err = ubifs_unpack_nnode(c, p, &nnode); |
1948 | for (i = 0; i < UBIFS_LPT_FANOUT; i++) { | 1944 | for (i = 0; i < UBIFS_LPT_FANOUT; i++) { |
1949 | printk(KERN_CONT "%d:%d", nnode.nbranch[i].lnum, | 1945 | pr_cont("%d:%d", nnode.nbranch[i].lnum, |
1950 | nnode.nbranch[i].offs); | 1946 | nnode.nbranch[i].offs); |
1951 | if (i != UBIFS_LPT_FANOUT - 1) | 1947 | if (i != UBIFS_LPT_FANOUT - 1) |
1952 | printk(KERN_CONT ", "); | 1948 | pr_cont(", "); |
1953 | } | 1949 | } |
1954 | printk(KERN_CONT "\n"); | 1950 | pr_cont("\n"); |
1955 | break; | 1951 | break; |
1956 | } | 1952 | } |
1957 | case UBIFS_LPT_LTAB: | 1953 | case UBIFS_LPT_LTAB: |
1958 | node_len = c->ltab_sz; | 1954 | node_len = c->ltab_sz; |
1959 | printk(KERN_DEBUG "LEB %d:%d, ltab\n", | 1955 | pr_err("LEB %d:%d, ltab\n", lnum, offs); |
1960 | lnum, offs); | ||
1961 | break; | 1956 | break; |
1962 | case UBIFS_LPT_LSAVE: | 1957 | case UBIFS_LPT_LSAVE: |
1963 | node_len = c->lsave_sz; | 1958 | node_len = c->lsave_sz; |
1964 | printk(KERN_DEBUG "LEB %d:%d, lsave len\n", lnum, offs); | 1959 | pr_err("LEB %d:%d, lsave len\n", lnum, offs); |
1965 | break; | 1960 | break; |
1966 | default: | 1961 | default: |
1967 | ubifs_err("LPT node type %d not recognized", node_type); | 1962 | ubifs_err("LPT node type %d not recognized", node_type); |
@@ -1972,8 +1967,7 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum) | |||
1972 | len -= node_len; | 1967 | len -= node_len; |
1973 | } | 1968 | } |
1974 | 1969 | ||
1975 | printk(KERN_DEBUG "(pid %d) finish dumping LEB %d\n", | 1970 | pr_err("(pid %d) finish dumping LEB %d\n", current->pid, lnum); |
1976 | current->pid, lnum); | ||
1977 | out: | 1971 | out: |
1978 | vfree(buf); | 1972 | vfree(buf); |
1979 | return; | 1973 | return; |
@@ -1990,12 +1984,10 @@ void ubifs_dump_lpt_lebs(const struct ubifs_info *c) | |||
1990 | { | 1984 | { |
1991 | int i; | 1985 | int i; |
1992 | 1986 | ||
1993 | printk(KERN_DEBUG "(pid %d) start dumping all LPT LEBs\n", | 1987 | pr_err("(pid %d) start dumping all LPT LEBs\n", current->pid); |
1994 | current->pid); | ||
1995 | for (i = 0; i < c->lpt_lebs; i++) | 1988 | for (i = 0; i < c->lpt_lebs; i++) |
1996 | dump_lpt_leb(c, i + c->lpt_first); | 1989 | dump_lpt_leb(c, i + c->lpt_first); |
1997 | printk(KERN_DEBUG "(pid %d) finish dumping all LPT LEBs\n", | 1990 | pr_err("(pid %d) finish dumping all LPT LEBs\n", current->pid); |
1998 | current->pid); | ||
1999 | } | 1991 | } |
2000 | 1992 | ||
2001 | /** | 1993 | /** |
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c index cebf17ea0458..769701ccb5c9 100644 --- a/fs/ubifs/orphan.c +++ b/fs/ubifs/orphan.c | |||
@@ -562,8 +562,8 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb, | |||
562 | 562 | ||
563 | list_for_each_entry(snod, &sleb->nodes, list) { | 563 | list_for_each_entry(snod, &sleb->nodes, list) { |
564 | if (snod->type != UBIFS_ORPH_NODE) { | 564 | if (snod->type != UBIFS_ORPH_NODE) { |
565 | ubifs_err("invalid node type %d in orphan area at " | 565 | ubifs_err("invalid node type %d in orphan area at %d:%d", |
566 | "%d:%d", snod->type, sleb->lnum, snod->offs); | 566 | snod->type, sleb->lnum, snod->offs); |
567 | ubifs_dump_node(c, snod->node); | 567 | ubifs_dump_node(c, snod->node); |
568 | return -EINVAL; | 568 | return -EINVAL; |
569 | } | 569 | } |
@@ -589,8 +589,7 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb, | |||
589 | * number. That makes this orphan node, out of date. | 589 | * number. That makes this orphan node, out of date. |
590 | */ | 590 | */ |
591 | if (!first) { | 591 | if (!first) { |
592 | ubifs_err("out of order commit number %llu in " | 592 | ubifs_err("out of order commit number %llu in orphan node at %d:%d", |
593 | "orphan node at %d:%d", | ||
594 | cmt_no, sleb->lnum, snod->offs); | 593 | cmt_no, sleb->lnum, snod->offs); |
595 | ubifs_dump_node(c, snod->node); | 594 | ubifs_dump_node(c, snod->node); |
596 | return -EINVAL; | 595 | return -EINVAL; |
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index c30d976b4be8..065096e36ed9 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c | |||
@@ -609,7 +609,8 @@ static void drop_last_node(struct ubifs_scan_leb *sleb, int *offs) | |||
609 | snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, | 609 | snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, |
610 | list); | 610 | list); |
611 | 611 | ||
612 | dbg_rcvry("dropping last node at %d:%d", sleb->lnum, snod->offs); | 612 | dbg_rcvry("dropping last node at %d:%d", |
613 | sleb->lnum, snod->offs); | ||
613 | *offs = snod->offs; | 614 | *offs = snod->offs; |
614 | list_del(&snod->list); | 615 | list_del(&snod->list); |
615 | kfree(snod); | 616 | kfree(snod); |
@@ -702,8 +703,8 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, | |||
702 | * See header comment for this file for more | 703 | * See header comment for this file for more |
703 | * explanations about the reasons we have this check. | 704 | * explanations about the reasons we have this check. |
704 | */ | 705 | */ |
705 | ubifs_err("corrupt empty space LEB %d:%d, corruption " | 706 | ubifs_err("corrupt empty space LEB %d:%d, corruption starts at %d", |
706 | "starts at %d", lnum, offs, corruption); | 707 | lnum, offs, corruption); |
707 | /* Make sure we dump interesting non-0xFF data */ | 708 | /* Make sure we dump interesting non-0xFF data */ |
708 | offs += corruption; | 709 | offs += corruption; |
709 | buf += corruption; | 710 | buf += corruption; |
@@ -788,7 +789,7 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, | |||
788 | 789 | ||
789 | corrupted_rescan: | 790 | corrupted_rescan: |
790 | /* Re-scan the corrupted data with verbose messages */ | 791 | /* Re-scan the corrupted data with verbose messages */ |
791 | ubifs_err("corruptio %d", ret); | 792 | ubifs_err("corruption %d", ret); |
792 | ubifs_scan_a_node(c, buf, len, lnum, offs, 1); | 793 | ubifs_scan_a_node(c, buf, len, lnum, offs, 1); |
793 | corrupted: | 794 | corrupted: |
794 | ubifs_scanned_corruption(c, lnum, offs, buf); | 795 | ubifs_scanned_corruption(c, lnum, offs, buf); |
@@ -899,8 +900,8 @@ struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, | |||
899 | } | 900 | } |
900 | } | 901 | } |
901 | if (snod->sqnum > cs_sqnum) { | 902 | if (snod->sqnum > cs_sqnum) { |
902 | ubifs_err("unrecoverable log corruption " | 903 | ubifs_err("unrecoverable log corruption in LEB %d", |
903 | "in LEB %d", lnum); | 904 | lnum); |
904 | ubifs_scan_destroy(sleb); | 905 | ubifs_scan_destroy(sleb); |
905 | return ERR_PTR(-EUCLEAN); | 906 | return ERR_PTR(-EUCLEAN); |
906 | } | 907 | } |
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index eba46d4a7619..3187925e9879 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c | |||
@@ -141,9 +141,9 @@ static int set_bud_lprops(struct ubifs_info *c, struct bud_entry *b) | |||
141 | * during the replay. | 141 | * during the replay. |
142 | */ | 142 | */ |
143 | if (dirty != 0) | 143 | if (dirty != 0) |
144 | dbg_msg("LEB %d lp: %d free %d dirty " | 144 | dbg_mnt("LEB %d lp: %d free %d dirty replay: %d free %d dirty", |
145 | "replay: %d free %d dirty", b->bud->lnum, | 145 | b->bud->lnum, lp->free, lp->dirty, b->free, |
146 | lp->free, lp->dirty, b->free, b->dirty); | 146 | b->dirty); |
147 | } | 147 | } |
148 | lp = ubifs_change_lp(c, lp, b->free, dirty + b->dirty, | 148 | lp = ubifs_change_lp(c, lp, b->free, dirty + b->dirty, |
149 | lp->flags | LPROPS_TAKEN, 0); | 149 | lp->flags | LPROPS_TAKEN, 0); |
@@ -677,7 +677,8 @@ static int replay_bud(struct ubifs_info *c, struct bud_entry *b) | |||
677 | 677 | ||
678 | b->dirty = sleb->endpt - offs - used; | 678 | b->dirty = sleb->endpt - offs - used; |
679 | b->free = c->leb_size - sleb->endpt; | 679 | b->free = c->leb_size - sleb->endpt; |
680 | dbg_mnt("bud LEB %d replied: dirty %d, free %d", lnum, b->dirty, b->free); | 680 | dbg_mnt("bud LEB %d replied: dirty %d, free %d", |
681 | lnum, b->dirty, b->free); | ||
681 | 682 | ||
682 | out: | 683 | out: |
683 | ubifs_scan_destroy(sleb); | 684 | ubifs_scan_destroy(sleb); |
@@ -865,8 +866,7 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf) | |||
865 | goto out_dump; | 866 | goto out_dump; |
866 | } | 867 | } |
867 | if (le64_to_cpu(node->cmt_no) != c->cmt_no) { | 868 | if (le64_to_cpu(node->cmt_no) != c->cmt_no) { |
868 | ubifs_err("first CS node at LEB %d:%d has wrong " | 869 | ubifs_err("first CS node at LEB %d:%d has wrong commit number %llu expected %llu", |
869 | "commit number %llu expected %llu", | ||
870 | lnum, offs, | 870 | lnum, offs, |
871 | (unsigned long long)le64_to_cpu(node->cmt_no), | 871 | (unsigned long long)le64_to_cpu(node->cmt_no), |
872 | c->cmt_no); | 872 | c->cmt_no); |
@@ -1026,7 +1026,6 @@ int ubifs_replay_journal(struct ubifs_info *c) | |||
1026 | c->replaying = 1; | 1026 | c->replaying = 1; |
1027 | lnum = c->ltail_lnum = c->lhead_lnum; | 1027 | lnum = c->ltail_lnum = c->lhead_lnum; |
1028 | 1028 | ||
1029 | lnum = UBIFS_LOG_LNUM; | ||
1030 | do { | 1029 | do { |
1031 | err = replay_log_leb(c, lnum, 0, c->sbuf); | 1030 | err = replay_log_leb(c, lnum, 0, c->sbuf); |
1032 | if (err == 1) | 1031 | if (err == 1) |
@@ -1035,7 +1034,7 @@ int ubifs_replay_journal(struct ubifs_info *c) | |||
1035 | if (err) | 1034 | if (err) |
1036 | goto out; | 1035 | goto out; |
1037 | lnum = ubifs_next_log_lnum(c, lnum); | 1036 | lnum = ubifs_next_log_lnum(c, lnum); |
1038 | } while (lnum != UBIFS_LOG_LNUM); | 1037 | } while (lnum != c->ltail_lnum); |
1039 | 1038 | ||
1040 | err = replay_buds(c); | 1039 | err = replay_buds(c); |
1041 | if (err) | 1040 | if (err) |
@@ -1059,8 +1058,8 @@ int ubifs_replay_journal(struct ubifs_info *c) | |||
1059 | c->bi.uncommitted_idx *= c->max_idx_node_sz; | 1058 | c->bi.uncommitted_idx *= c->max_idx_node_sz; |
1060 | 1059 | ||
1061 | ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery); | 1060 | ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery); |
1062 | dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, " | 1061 | dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, highest_inum %lu", |
1063 | "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum, | 1062 | c->lhead_lnum, c->lhead_offs, c->max_sqnum, |
1064 | (unsigned long)c->highest_inum); | 1063 | (unsigned long)c->highest_inum); |
1065 | out: | 1064 | out: |
1066 | destroy_replay_list(c); | 1065 | destroy_replay_list(c); |
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c index 15e2fc5aa60b..4c37607a958e 100644 --- a/fs/ubifs/sb.c +++ b/fs/ubifs/sb.c | |||
@@ -391,9 +391,8 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup) | |||
391 | min_leb_cnt += c->lpt_lebs + c->orph_lebs + c->jhead_cnt + 6; | 391 | min_leb_cnt += c->lpt_lebs + c->orph_lebs + c->jhead_cnt + 6; |
392 | 392 | ||
393 | if (c->leb_cnt < min_leb_cnt || c->leb_cnt > c->vi.size) { | 393 | if (c->leb_cnt < min_leb_cnt || c->leb_cnt > c->vi.size) { |
394 | ubifs_err("bad LEB count: %d in superblock, %d on UBI volume, " | 394 | ubifs_err("bad LEB count: %d in superblock, %d on UBI volume, %d minimum required", |
395 | "%d minimum required", c->leb_cnt, c->vi.size, | 395 | c->leb_cnt, c->vi.size, min_leb_cnt); |
396 | min_leb_cnt); | ||
397 | goto failed; | 396 | goto failed; |
398 | } | 397 | } |
399 | 398 | ||
@@ -411,15 +410,14 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup) | |||
411 | 410 | ||
412 | max_bytes = (long long)c->leb_size * UBIFS_MIN_BUD_LEBS; | 411 | max_bytes = (long long)c->leb_size * UBIFS_MIN_BUD_LEBS; |
413 | if (c->max_bud_bytes < max_bytes) { | 412 | if (c->max_bud_bytes < max_bytes) { |
414 | ubifs_err("too small journal (%lld bytes), must be at least " | 413 | ubifs_err("too small journal (%lld bytes), must be at least %lld bytes", |
415 | "%lld bytes", c->max_bud_bytes, max_bytes); | 414 | c->max_bud_bytes, max_bytes); |
416 | goto failed; | 415 | goto failed; |
417 | } | 416 | } |
418 | 417 | ||
419 | max_bytes = (long long)c->leb_size * c->main_lebs; | 418 | max_bytes = (long long)c->leb_size * c->main_lebs; |
420 | if (c->max_bud_bytes > max_bytes) { | 419 | if (c->max_bud_bytes > max_bytes) { |
421 | ubifs_err("too large journal size (%lld bytes), only %lld bytes" | 420 | ubifs_err("too large journal size (%lld bytes), only %lld bytes available in the main area", |
422 | "available in the main area", | ||
423 | c->max_bud_bytes, max_bytes); | 421 | c->max_bud_bytes, max_bytes); |
424 | goto failed; | 422 | goto failed; |
425 | } | 423 | } |
@@ -549,10 +547,9 @@ int ubifs_read_superblock(struct ubifs_info *c) | |||
549 | ubifs_assert(!c->ro_media || c->ro_mount); | 547 | ubifs_assert(!c->ro_media || c->ro_mount); |
550 | if (!c->ro_mount || | 548 | if (!c->ro_mount || |
551 | c->ro_compat_version > UBIFS_RO_COMPAT_VERSION) { | 549 | c->ro_compat_version > UBIFS_RO_COMPAT_VERSION) { |
552 | ubifs_err("on-flash format version is w%d/r%d, but " | 550 | ubifs_err("on-flash format version is w%d/r%d, but software only supports up to version w%d/r%d", |
553 | "software only supports up to version " | 551 | c->fmt_version, c->ro_compat_version, |
554 | "w%d/r%d", c->fmt_version, | 552 | UBIFS_FORMAT_VERSION, |
555 | c->ro_compat_version, UBIFS_FORMAT_VERSION, | ||
556 | UBIFS_RO_COMPAT_VERSION); | 553 | UBIFS_RO_COMPAT_VERSION); |
557 | if (c->ro_compat_version <= UBIFS_RO_COMPAT_VERSION) { | 554 | if (c->ro_compat_version <= UBIFS_RO_COMPAT_VERSION) { |
558 | ubifs_msg("only R/O mounting is possible"); | 555 | ubifs_msg("only R/O mounting is possible"); |
@@ -611,8 +608,8 @@ int ubifs_read_superblock(struct ubifs_info *c) | |||
611 | c->fanout = le32_to_cpu(sup->fanout); | 608 | c->fanout = le32_to_cpu(sup->fanout); |
612 | c->lsave_cnt = le32_to_cpu(sup->lsave_cnt); | 609 | c->lsave_cnt = le32_to_cpu(sup->lsave_cnt); |
613 | c->rp_size = le64_to_cpu(sup->rp_size); | 610 | c->rp_size = le64_to_cpu(sup->rp_size); |
614 | c->rp_uid = le32_to_cpu(sup->rp_uid); | 611 | c->rp_uid = make_kuid(&init_user_ns, le32_to_cpu(sup->rp_uid)); |
615 | c->rp_gid = le32_to_cpu(sup->rp_gid); | 612 | c->rp_gid = make_kgid(&init_user_ns, le32_to_cpu(sup->rp_gid)); |
616 | sup_flags = le32_to_cpu(sup->flags); | 613 | sup_flags = le32_to_cpu(sup->flags); |
617 | if (!c->mount_opts.override_compr) | 614 | if (!c->mount_opts.override_compr) |
618 | c->default_compr = le16_to_cpu(sup->default_compr); | 615 | c->default_compr = le16_to_cpu(sup->default_compr); |
diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c index 7c40e6025fd6..58aa05df2bb6 100644 --- a/fs/ubifs/scan.c +++ b/fs/ubifs/scan.c | |||
@@ -75,7 +75,7 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum, | |||
75 | magic = le32_to_cpu(ch->magic); | 75 | magic = le32_to_cpu(ch->magic); |
76 | 76 | ||
77 | if (magic == 0xFFFFFFFF) { | 77 | if (magic == 0xFFFFFFFF) { |
78 | dbg_scan("hit empty space"); | 78 | dbg_scan("hit empty space at LEB %d:%d", lnum, offs); |
79 | return SCANNED_EMPTY_SPACE; | 79 | return SCANNED_EMPTY_SPACE; |
80 | } | 80 | } |
81 | 81 | ||
@@ -85,7 +85,8 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum, | |||
85 | if (len < UBIFS_CH_SZ) | 85 | if (len < UBIFS_CH_SZ) |
86 | return SCANNED_GARBAGE; | 86 | return SCANNED_GARBAGE; |
87 | 87 | ||
88 | dbg_scan("scanning %s", dbg_ntype(ch->node_type)); | 88 | dbg_scan("scanning %s at LEB %d:%d", |
89 | dbg_ntype(ch->node_type), lnum, offs); | ||
89 | 90 | ||
90 | if (ubifs_check_node(c, buf, lnum, offs, quiet, 1)) | 91 | if (ubifs_check_node(c, buf, lnum, offs, quiet, 1)) |
91 | return SCANNED_A_CORRUPT_NODE; | 92 | return SCANNED_A_CORRUPT_NODE; |
@@ -114,8 +115,8 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum, | |||
114 | return SCANNED_A_BAD_PAD_NODE; | 115 | return SCANNED_A_BAD_PAD_NODE; |
115 | } | 116 | } |
116 | 117 | ||
117 | dbg_scan("%d bytes padded, offset now %d", | 118 | dbg_scan("%d bytes padded at LEB %d:%d, offset now %d", pad_len, |
118 | pad_len, ALIGN(offs + node_len + pad_len, 8)); | 119 | lnum, offs, ALIGN(offs + node_len + pad_len, 8)); |
119 | 120 | ||
120 | return node_len + pad_len; | 121 | return node_len + pad_len; |
121 | } | 122 | } |
@@ -150,8 +151,8 @@ struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum, | |||
150 | 151 | ||
151 | err = ubifs_leb_read(c, lnum, sbuf + offs, offs, c->leb_size - offs, 0); | 152 | err = ubifs_leb_read(c, lnum, sbuf + offs, offs, c->leb_size - offs, 0); |
152 | if (err && err != -EBADMSG) { | 153 | if (err && err != -EBADMSG) { |
153 | ubifs_err("cannot read %d bytes from LEB %d:%d," | 154 | ubifs_err("cannot read %d bytes from LEB %d:%d, error %d", |
154 | " error %d", c->leb_size - offs, lnum, offs, err); | 155 | c->leb_size - offs, lnum, offs, err); |
155 | kfree(sleb); | 156 | kfree(sleb); |
156 | return ERR_PTR(err); | 157 | return ERR_PTR(err); |
157 | } | 158 | } |
@@ -240,8 +241,6 @@ void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs, | |||
240 | int len; | 241 | int len; |
241 | 242 | ||
242 | ubifs_err("corruption at LEB %d:%d", lnum, offs); | 243 | ubifs_err("corruption at LEB %d:%d", lnum, offs); |
243 | if (dbg_is_tst_rcvry(c)) | ||
244 | return; | ||
245 | len = c->leb_size - offs; | 244 | len = c->leb_size - offs; |
246 | if (len > 8192) | 245 | if (len > 8192) |
247 | len = 8192; | 246 | len = 8192; |
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index c3fa6c5327a3..ddc0f6ae65e9 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c | |||
@@ -89,9 +89,8 @@ static int validate_inode(struct ubifs_info *c, const struct inode *inode) | |||
89 | return 5; | 89 | return 5; |
90 | 90 | ||
91 | if (!ubifs_compr_present(ui->compr_type)) { | 91 | if (!ubifs_compr_present(ui->compr_type)) { |
92 | ubifs_warn("inode %lu uses '%s' compression, but it was not " | 92 | ubifs_warn("inode %lu uses '%s' compression, but it was not compiled in", |
93 | "compiled in", inode->i_ino, | 93 | inode->i_ino, ubifs_compr_name(ui->compr_type)); |
94 | ubifs_compr_name(ui->compr_type)); | ||
95 | } | 94 | } |
96 | 95 | ||
97 | err = dbg_check_dir(c, inode); | 96 | err = dbg_check_dir(c, inode); |
@@ -130,8 +129,8 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum) | |||
130 | 129 | ||
131 | inode->i_flags |= (S_NOCMTIME | S_NOATIME); | 130 | inode->i_flags |= (S_NOCMTIME | S_NOATIME); |
132 | set_nlink(inode, le32_to_cpu(ino->nlink)); | 131 | set_nlink(inode, le32_to_cpu(ino->nlink)); |
133 | inode->i_uid = le32_to_cpu(ino->uid); | 132 | i_uid_write(inode, le32_to_cpu(ino->uid)); |
134 | inode->i_gid = le32_to_cpu(ino->gid); | 133 | i_gid_write(inode, le32_to_cpu(ino->gid)); |
135 | inode->i_atime.tv_sec = (int64_t)le64_to_cpu(ino->atime_sec); | 134 | inode->i_atime.tv_sec = (int64_t)le64_to_cpu(ino->atime_sec); |
136 | inode->i_atime.tv_nsec = le32_to_cpu(ino->atime_nsec); | 135 | inode->i_atime.tv_nsec = le32_to_cpu(ino->atime_nsec); |
137 | inode->i_mtime.tv_sec = (int64_t)le64_to_cpu(ino->mtime_sec); | 136 | inode->i_mtime.tv_sec = (int64_t)le64_to_cpu(ino->mtime_sec); |
@@ -1061,8 +1060,8 @@ static int ubifs_parse_options(struct ubifs_info *c, char *options, | |||
1061 | 1060 | ||
1062 | flag = parse_standard_option(p); | 1061 | flag = parse_standard_option(p); |
1063 | if (!flag) { | 1062 | if (!flag) { |
1064 | ubifs_err("unrecognized mount option \"%s\" " | 1063 | ubifs_err("unrecognized mount option \"%s\" or missing value", |
1065 | "or missing value", p); | 1064 | p); |
1066 | return -EINVAL; | 1065 | return -EINVAL; |
1067 | } | 1066 | } |
1068 | sb->s_flags |= flag; | 1067 | sb->s_flags |= flag; |
@@ -1124,8 +1123,8 @@ again: | |||
1124 | } | 1123 | } |
1125 | 1124 | ||
1126 | /* Just disable bulk-read */ | 1125 | /* Just disable bulk-read */ |
1127 | ubifs_warn("Cannot allocate %d bytes of memory for bulk-read, " | 1126 | ubifs_warn("cannot allocate %d bytes of memory for bulk-read, disabling it", |
1128 | "disabling it", c->max_bu_buf_len); | 1127 | c->max_bu_buf_len); |
1129 | c->mount_opts.bulk_read = 1; | 1128 | c->mount_opts.bulk_read = 1; |
1130 | c->bulk_read = 0; | 1129 | c->bulk_read = 0; |
1131 | return; | 1130 | return; |
@@ -1157,14 +1156,11 @@ static int check_free_space(struct ubifs_info *c) | |||
1157 | * | 1156 | * |
1158 | * This function mounts UBIFS file system. Returns zero in case of success and | 1157 | * This function mounts UBIFS file system. Returns zero in case of success and |
1159 | * a negative error code in case of failure. | 1158 | * a negative error code in case of failure. |
1160 | * | ||
1161 | * Note, the function does not de-allocate resources it it fails half way | ||
1162 | * through, and the caller has to do this instead. | ||
1163 | */ | 1159 | */ |
1164 | static int mount_ubifs(struct ubifs_info *c) | 1160 | static int mount_ubifs(struct ubifs_info *c) |
1165 | { | 1161 | { |
1166 | int err; | 1162 | int err; |
1167 | long long x; | 1163 | long long x, y; |
1168 | size_t sz; | 1164 | size_t sz; |
1169 | 1165 | ||
1170 | c->ro_mount = !!(c->vfs_sb->s_flags & MS_RDONLY); | 1166 | c->ro_mount = !!(c->vfs_sb->s_flags & MS_RDONLY); |
@@ -1414,75 +1410,69 @@ static int mount_ubifs(struct ubifs_info *c) | |||
1414 | 1410 | ||
1415 | c->mounting = 0; | 1411 | c->mounting = 0; |
1416 | 1412 | ||
1417 | ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"", | 1413 | ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"%s", |
1418 | c->vi.ubi_num, c->vi.vol_id, c->vi.name); | 1414 | c->vi.ubi_num, c->vi.vol_id, c->vi.name, |
1419 | if (c->ro_mount) | 1415 | c->ro_mount ? ", R/O mode" : NULL); |
1420 | ubifs_msg("mounted read-only"); | ||
1421 | x = (long long)c->main_lebs * c->leb_size; | 1416 | x = (long long)c->main_lebs * c->leb_size; |
1422 | ubifs_msg("file system size: %lld bytes (%lld KiB, %lld MiB, %d " | 1417 | y = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes; |
1423 | "LEBs)", x, x >> 10, x >> 20, c->main_lebs); | 1418 | ubifs_msg("LEB size: %d bytes (%d KiB), min./max. I/O unit sizes: %d bytes/%d bytes", |
1424 | x = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes; | 1419 | c->leb_size, c->leb_size >> 10, c->min_io_size, |
1425 | ubifs_msg("journal size: %lld bytes (%lld KiB, %lld MiB, %d " | 1420 | c->max_write_size); |
1426 | "LEBs)", x, x >> 10, x >> 20, c->log_lebs + c->max_bud_cnt); | 1421 | ubifs_msg("FS size: %lld bytes (%lld MiB, %d LEBs), journal size %lld bytes (%lld MiB, %d LEBs)", |
1427 | ubifs_msg("media format: w%d/r%d (latest is w%d/r%d)", | 1422 | x, x >> 20, c->main_lebs, |
1423 | y, y >> 20, c->log_lebs + c->max_bud_cnt); | ||
1424 | ubifs_msg("reserved for root: %llu bytes (%llu KiB)", | ||
1425 | c->report_rp_size, c->report_rp_size >> 10); | ||
1426 | ubifs_msg("media format: w%d/r%d (latest is w%d/r%d), UUID %pUB%s", | ||
1428 | c->fmt_version, c->ro_compat_version, | 1427 | c->fmt_version, c->ro_compat_version, |
1429 | UBIFS_FORMAT_VERSION, UBIFS_RO_COMPAT_VERSION); | 1428 | UBIFS_FORMAT_VERSION, UBIFS_RO_COMPAT_VERSION, c->uuid, |
1430 | ubifs_msg("default compressor: %s", ubifs_compr_name(c->default_compr)); | 1429 | c->big_lpt ? ", big LPT model" : ", small LPT model"); |
1431 | ubifs_msg("reserved for root: %llu bytes (%llu KiB)", | 1430 | |
1432 | c->report_rp_size, c->report_rp_size >> 10); | 1431 | dbg_gen("default compressor: %s", ubifs_compr_name(c->default_compr)); |
1433 | 1432 | dbg_gen("data journal heads: %d", | |
1434 | dbg_msg("compiled on: " __DATE__ " at " __TIME__); | ||
1435 | dbg_msg("min. I/O unit size: %d bytes", c->min_io_size); | ||
1436 | dbg_msg("max. write size: %d bytes", c->max_write_size); | ||
1437 | dbg_msg("LEB size: %d bytes (%d KiB)", | ||
1438 | c->leb_size, c->leb_size >> 10); | ||
1439 | dbg_msg("data journal heads: %d", | ||
1440 | c->jhead_cnt - NONDATA_JHEADS_CNT); | 1433 | c->jhead_cnt - NONDATA_JHEADS_CNT); |
1441 | dbg_msg("UUID: %pUB", c->uuid); | 1434 | dbg_gen("log LEBs: %d (%d - %d)", |
1442 | dbg_msg("big_lpt %d", c->big_lpt); | ||
1443 | dbg_msg("log LEBs: %d (%d - %d)", | ||
1444 | c->log_lebs, UBIFS_LOG_LNUM, c->log_last); | 1435 | c->log_lebs, UBIFS_LOG_LNUM, c->log_last); |
1445 | dbg_msg("LPT area LEBs: %d (%d - %d)", | 1436 | dbg_gen("LPT area LEBs: %d (%d - %d)", |
1446 | c->lpt_lebs, c->lpt_first, c->lpt_last); | 1437 | c->lpt_lebs, c->lpt_first, c->lpt_last); |
1447 | dbg_msg("orphan area LEBs: %d (%d - %d)", | 1438 | dbg_gen("orphan area LEBs: %d (%d - %d)", |
1448 | c->orph_lebs, c->orph_first, c->orph_last); | 1439 | c->orph_lebs, c->orph_first, c->orph_last); |
1449 | dbg_msg("main area LEBs: %d (%d - %d)", | 1440 | dbg_gen("main area LEBs: %d (%d - %d)", |
1450 | c->main_lebs, c->main_first, c->leb_cnt - 1); | 1441 | c->main_lebs, c->main_first, c->leb_cnt - 1); |
1451 | dbg_msg("index LEBs: %d", c->lst.idx_lebs); | 1442 | dbg_gen("index LEBs: %d", c->lst.idx_lebs); |
1452 | dbg_msg("total index bytes: %lld (%lld KiB, %lld MiB)", | 1443 | dbg_gen("total index bytes: %lld (%lld KiB, %lld MiB)", |
1453 | c->bi.old_idx_sz, c->bi.old_idx_sz >> 10, | 1444 | c->bi.old_idx_sz, c->bi.old_idx_sz >> 10, |
1454 | c->bi.old_idx_sz >> 20); | 1445 | c->bi.old_idx_sz >> 20); |
1455 | dbg_msg("key hash type: %d", c->key_hash_type); | 1446 | dbg_gen("key hash type: %d", c->key_hash_type); |
1456 | dbg_msg("tree fanout: %d", c->fanout); | 1447 | dbg_gen("tree fanout: %d", c->fanout); |
1457 | dbg_msg("reserved GC LEB: %d", c->gc_lnum); | 1448 | dbg_gen("reserved GC LEB: %d", c->gc_lnum); |
1458 | dbg_msg("first main LEB: %d", c->main_first); | 1449 | dbg_gen("max. znode size %d", c->max_znode_sz); |
1459 | dbg_msg("max. znode size %d", c->max_znode_sz); | 1450 | dbg_gen("max. index node size %d", c->max_idx_node_sz); |
1460 | dbg_msg("max. index node size %d", c->max_idx_node_sz); | 1451 | dbg_gen("node sizes: data %zu, inode %zu, dentry %zu", |
1461 | dbg_msg("node sizes: data %zu, inode %zu, dentry %zu", | ||
1462 | UBIFS_DATA_NODE_SZ, UBIFS_INO_NODE_SZ, UBIFS_DENT_NODE_SZ); | 1452 | UBIFS_DATA_NODE_SZ, UBIFS_INO_NODE_SZ, UBIFS_DENT_NODE_SZ); |
1463 | dbg_msg("node sizes: trun %zu, sb %zu, master %zu", | 1453 | dbg_gen("node sizes: trun %zu, sb %zu, master %zu", |
1464 | UBIFS_TRUN_NODE_SZ, UBIFS_SB_NODE_SZ, UBIFS_MST_NODE_SZ); | 1454 | UBIFS_TRUN_NODE_SZ, UBIFS_SB_NODE_SZ, UBIFS_MST_NODE_SZ); |
1465 | dbg_msg("node sizes: ref %zu, cmt. start %zu, orph %zu", | 1455 | dbg_gen("node sizes: ref %zu, cmt. start %zu, orph %zu", |
1466 | UBIFS_REF_NODE_SZ, UBIFS_CS_NODE_SZ, UBIFS_ORPH_NODE_SZ); | 1456 | UBIFS_REF_NODE_SZ, UBIFS_CS_NODE_SZ, UBIFS_ORPH_NODE_SZ); |
1467 | dbg_msg("max. node sizes: data %zu, inode %zu dentry %zu, idx %d", | 1457 | dbg_gen("max. node sizes: data %zu, inode %zu dentry %zu, idx %d", |
1468 | UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ, | 1458 | UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ, |
1469 | UBIFS_MAX_DENT_NODE_SZ, ubifs_idx_node_sz(c, c->fanout)); | 1459 | UBIFS_MAX_DENT_NODE_SZ, ubifs_idx_node_sz(c, c->fanout)); |
1470 | dbg_msg("dead watermark: %d", c->dead_wm); | 1460 | dbg_gen("dead watermark: %d", c->dead_wm); |
1471 | dbg_msg("dark watermark: %d", c->dark_wm); | 1461 | dbg_gen("dark watermark: %d", c->dark_wm); |
1472 | dbg_msg("LEB overhead: %d", c->leb_overhead); | 1462 | dbg_gen("LEB overhead: %d", c->leb_overhead); |
1473 | x = (long long)c->main_lebs * c->dark_wm; | 1463 | x = (long long)c->main_lebs * c->dark_wm; |
1474 | dbg_msg("max. dark space: %lld (%lld KiB, %lld MiB)", | 1464 | dbg_gen("max. dark space: %lld (%lld KiB, %lld MiB)", |
1475 | x, x >> 10, x >> 20); | 1465 | x, x >> 10, x >> 20); |
1476 | dbg_msg("maximum bud bytes: %lld (%lld KiB, %lld MiB)", | 1466 | dbg_gen("maximum bud bytes: %lld (%lld KiB, %lld MiB)", |
1477 | c->max_bud_bytes, c->max_bud_bytes >> 10, | 1467 | c->max_bud_bytes, c->max_bud_bytes >> 10, |
1478 | c->max_bud_bytes >> 20); | 1468 | c->max_bud_bytes >> 20); |
1479 | dbg_msg("BG commit bud bytes: %lld (%lld KiB, %lld MiB)", | 1469 | dbg_gen("BG commit bud bytes: %lld (%lld KiB, %lld MiB)", |
1480 | c->bg_bud_bytes, c->bg_bud_bytes >> 10, | 1470 | c->bg_bud_bytes, c->bg_bud_bytes >> 10, |
1481 | c->bg_bud_bytes >> 20); | 1471 | c->bg_bud_bytes >> 20); |
1482 | dbg_msg("current bud bytes %lld (%lld KiB, %lld MiB)", | 1472 | dbg_gen("current bud bytes %lld (%lld KiB, %lld MiB)", |
1483 | c->bud_bytes, c->bud_bytes >> 10, c->bud_bytes >> 20); | 1473 | c->bud_bytes, c->bud_bytes >> 10, c->bud_bytes >> 20); |
1484 | dbg_msg("max. seq. number: %llu", c->max_sqnum); | 1474 | dbg_gen("max. seq. number: %llu", c->max_sqnum); |
1485 | dbg_msg("commit number: %llu", c->cmt_no); | 1475 | dbg_gen("commit number: %llu", c->cmt_no); |
1486 | 1476 | ||
1487 | return 0; | 1477 | return 0; |
1488 | 1478 | ||
@@ -1567,10 +1557,9 @@ static int ubifs_remount_rw(struct ubifs_info *c) | |||
1567 | 1557 | ||
1568 | if (c->rw_incompat) { | 1558 | if (c->rw_incompat) { |
1569 | ubifs_err("the file-system is not R/W-compatible"); | 1559 | ubifs_err("the file-system is not R/W-compatible"); |
1570 | ubifs_msg("on-flash format version is w%d/r%d, but software " | 1560 | ubifs_msg("on-flash format version is w%d/r%d, but software only supports up to version w%d/r%d", |
1571 | "only supports up to version w%d/r%d", c->fmt_version, | 1561 | c->fmt_version, c->ro_compat_version, |
1572 | c->ro_compat_version, UBIFS_FORMAT_VERSION, | 1562 | UBIFS_FORMAT_VERSION, UBIFS_RO_COMPAT_VERSION); |
1573 | UBIFS_RO_COMPAT_VERSION); | ||
1574 | return -EROFS; | 1563 | return -EROFS; |
1575 | } | 1564 | } |
1576 | 1565 | ||
@@ -1831,8 +1820,8 @@ static void ubifs_put_super(struct super_block *sb) | |||
1831 | * next mount, so we just print a message and | 1820 | * next mount, so we just print a message and |
1832 | * continue to unmount normally. | 1821 | * continue to unmount normally. |
1833 | */ | 1822 | */ |
1834 | ubifs_err("failed to write master node, " | 1823 | ubifs_err("failed to write master node, error %d", |
1835 | "error %d", err); | 1824 | err); |
1836 | } else { | 1825 | } else { |
1837 | for (i = 0; i < c->jhead_cnt; i++) | 1826 | for (i = 0; i < c->jhead_cnt; i++) |
1838 | /* Make sure write-buffer timers are canceled */ | 1827 | /* Make sure write-buffer timers are canceled */ |
@@ -2251,8 +2240,7 @@ static int __init ubifs_init(void) | |||
2251 | * UBIFS_BLOCK_SIZE. It is assumed that both are powers of 2. | 2240 | * UBIFS_BLOCK_SIZE. It is assumed that both are powers of 2. |
2252 | */ | 2241 | */ |
2253 | if (PAGE_CACHE_SIZE < UBIFS_BLOCK_SIZE) { | 2242 | if (PAGE_CACHE_SIZE < UBIFS_BLOCK_SIZE) { |
2254 | ubifs_err("VFS page cache size is %u bytes, but UBIFS requires" | 2243 | ubifs_err("VFS page cache size is %u bytes, but UBIFS requires at least 4096 bytes", |
2255 | " at least 4096 bytes", | ||
2256 | (unsigned int)PAGE_CACHE_SIZE); | 2244 | (unsigned int)PAGE_CACHE_SIZE); |
2257 | return -EINVAL; | 2245 | return -EINVAL; |
2258 | } | 2246 | } |
@@ -2301,6 +2289,12 @@ static void __exit ubifs_exit(void) | |||
2301 | dbg_debugfs_exit(); | 2289 | dbg_debugfs_exit(); |
2302 | ubifs_compressors_exit(); | 2290 | ubifs_compressors_exit(); |
2303 | unregister_shrinker(&ubifs_shrinker_info); | 2291 | unregister_shrinker(&ubifs_shrinker_info); |
2292 | |||
2293 | /* | ||
2294 | * Make sure all delayed rcu free inodes are flushed before we | ||
2295 | * destroy cache. | ||
2296 | */ | ||
2297 | rcu_barrier(); | ||
2304 | kmem_cache_destroy(ubifs_inode_slab); | 2298 | kmem_cache_destroy(ubifs_inode_slab); |
2305 | unregister_filesystem(&ubifs_fs_type); | 2299 | unregister_filesystem(&ubifs_fs_type); |
2306 | } | 2300 | } |
diff --git a/fs/ubifs/tnc_misc.c b/fs/ubifs/tnc_misc.c index d38ac7f9654b..f6bf8995c7b1 100644 --- a/fs/ubifs/tnc_misc.c +++ b/fs/ubifs/tnc_misc.c | |||
@@ -328,8 +328,8 @@ static int read_znode(struct ubifs_info *c, int lnum, int offs, int len, | |||
328 | case UBIFS_XENT_KEY: | 328 | case UBIFS_XENT_KEY: |
329 | break; | 329 | break; |
330 | default: | 330 | default: |
331 | dbg_msg("bad key type at slot %d: %d", | 331 | ubifs_err("bad key type at slot %d: %d", |
332 | i, key_type(c, &zbr->key)); | 332 | i, key_type(c, &zbr->key)); |
333 | err = 3; | 333 | err = 3; |
334 | goto out_dump; | 334 | goto out_dump; |
335 | } | 335 | } |
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 1e5a08623d11..5486346d0a3f 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h | |||
@@ -42,16 +42,15 @@ | |||
42 | #define UBIFS_VERSION 1 | 42 | #define UBIFS_VERSION 1 |
43 | 43 | ||
44 | /* Normal UBIFS messages */ | 44 | /* Normal UBIFS messages */ |
45 | #define ubifs_msg(fmt, ...) \ | 45 | #define ubifs_msg(fmt, ...) pr_notice("UBIFS: " fmt "\n", ##__VA_ARGS__) |
46 | printk(KERN_NOTICE "UBIFS: " fmt "\n", ##__VA_ARGS__) | ||
47 | /* UBIFS error messages */ | 46 | /* UBIFS error messages */ |
48 | #define ubifs_err(fmt, ...) \ | 47 | #define ubifs_err(fmt, ...) \ |
49 | printk(KERN_ERR "UBIFS error (pid %d): %s: " fmt "\n", current->pid, \ | 48 | pr_err("UBIFS error (pid %d): %s: " fmt "\n", current->pid, \ |
50 | __func__, ##__VA_ARGS__) | 49 | __func__, ##__VA_ARGS__) |
51 | /* UBIFS warning messages */ | 50 | /* UBIFS warning messages */ |
52 | #define ubifs_warn(fmt, ...) \ | 51 | #define ubifs_warn(fmt, ...) \ |
53 | printk(KERN_WARNING "UBIFS warning (pid %d): %s: " fmt "\n", \ | 52 | pr_warn("UBIFS warning (pid %d): %s: " fmt "\n", \ |
54 | current->pid, __func__, ##__VA_ARGS__) | 53 | current->pid, __func__, ##__VA_ARGS__) |
55 | 54 | ||
56 | /* UBIFS file system VFS magic number */ | 55 | /* UBIFS file system VFS magic number */ |
57 | #define UBIFS_SUPER_MAGIC 0x24051905 | 56 | #define UBIFS_SUPER_MAGIC 0x24051905 |
@@ -1426,8 +1425,8 @@ struct ubifs_info { | |||
1426 | 1425 | ||
1427 | long long rp_size; | 1426 | long long rp_size; |
1428 | long long report_rp_size; | 1427 | long long report_rp_size; |
1429 | uid_t rp_uid; | 1428 | kuid_t rp_uid; |
1430 | gid_t rp_gid; | 1429 | kgid_t rp_gid; |
1431 | 1430 | ||
1432 | /* The below fields are used only during mounting and re-mounting */ | 1431 | /* The below fields are used only during mounting and re-mounting */ |
1433 | unsigned int empty:1; | 1432 | unsigned int empty:1; |
diff --git a/fs/udf/file.c b/fs/udf/file.c index 7f3f7ba3df6e..77b5953eaac8 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c | |||
@@ -39,20 +39,24 @@ | |||
39 | #include "udf_i.h" | 39 | #include "udf_i.h" |
40 | #include "udf_sb.h" | 40 | #include "udf_sb.h" |
41 | 41 | ||
42 | static int udf_adinicb_readpage(struct file *file, struct page *page) | 42 | static void __udf_adinicb_readpage(struct page *page) |
43 | { | 43 | { |
44 | struct inode *inode = page->mapping->host; | 44 | struct inode *inode = page->mapping->host; |
45 | char *kaddr; | 45 | char *kaddr; |
46 | struct udf_inode_info *iinfo = UDF_I(inode); | 46 | struct udf_inode_info *iinfo = UDF_I(inode); |
47 | 47 | ||
48 | BUG_ON(!PageLocked(page)); | ||
49 | |||
50 | kaddr = kmap(page); | 48 | kaddr = kmap(page); |
51 | memset(kaddr, 0, PAGE_CACHE_SIZE); | ||
52 | memcpy(kaddr, iinfo->i_ext.i_data + iinfo->i_lenEAttr, inode->i_size); | 49 | memcpy(kaddr, iinfo->i_ext.i_data + iinfo->i_lenEAttr, inode->i_size); |
50 | memset(kaddr + inode->i_size, 0, PAGE_CACHE_SIZE - inode->i_size); | ||
53 | flush_dcache_page(page); | 51 | flush_dcache_page(page); |
54 | SetPageUptodate(page); | 52 | SetPageUptodate(page); |
55 | kunmap(page); | 53 | kunmap(page); |
54 | } | ||
55 | |||
56 | static int udf_adinicb_readpage(struct file *file, struct page *page) | ||
57 | { | ||
58 | BUG_ON(!PageLocked(page)); | ||
59 | __udf_adinicb_readpage(page); | ||
56 | unlock_page(page); | 60 | unlock_page(page); |
57 | 61 | ||
58 | return 0; | 62 | return 0; |
@@ -77,6 +81,25 @@ static int udf_adinicb_writepage(struct page *page, | |||
77 | return 0; | 81 | return 0; |
78 | } | 82 | } |
79 | 83 | ||
84 | static int udf_adinicb_write_begin(struct file *file, | ||
85 | struct address_space *mapping, loff_t pos, | ||
86 | unsigned len, unsigned flags, struct page **pagep, | ||
87 | void **fsdata) | ||
88 | { | ||
89 | struct page *page; | ||
90 | |||
91 | if (WARN_ON_ONCE(pos >= PAGE_CACHE_SIZE)) | ||
92 | return -EIO; | ||
93 | page = grab_cache_page_write_begin(mapping, 0, flags); | ||
94 | if (!page) | ||
95 | return -ENOMEM; | ||
96 | *pagep = page; | ||
97 | |||
98 | if (!PageUptodate(page) && len != PAGE_CACHE_SIZE) | ||
99 | __udf_adinicb_readpage(page); | ||
100 | return 0; | ||
101 | } | ||
102 | |||
80 | static int udf_adinicb_write_end(struct file *file, | 103 | static int udf_adinicb_write_end(struct file *file, |
81 | struct address_space *mapping, | 104 | struct address_space *mapping, |
82 | loff_t pos, unsigned len, unsigned copied, | 105 | loff_t pos, unsigned len, unsigned copied, |
@@ -95,11 +118,20 @@ static int udf_adinicb_write_end(struct file *file, | |||
95 | return simple_write_end(file, mapping, pos, len, copied, page, fsdata); | 118 | return simple_write_end(file, mapping, pos, len, copied, page, fsdata); |
96 | } | 119 | } |
97 | 120 | ||
121 | static ssize_t udf_adinicb_direct_IO(int rw, struct kiocb *iocb, | ||
122 | const struct iovec *iov, | ||
123 | loff_t offset, unsigned long nr_segs) | ||
124 | { | ||
125 | /* Fallback to buffered I/O. */ | ||
126 | return 0; | ||
127 | } | ||
128 | |||
98 | const struct address_space_operations udf_adinicb_aops = { | 129 | const struct address_space_operations udf_adinicb_aops = { |
99 | .readpage = udf_adinicb_readpage, | 130 | .readpage = udf_adinicb_readpage, |
100 | .writepage = udf_adinicb_writepage, | 131 | .writepage = udf_adinicb_writepage, |
101 | .write_begin = simple_write_begin, | 132 | .write_begin = udf_adinicb_write_begin, |
102 | .write_end = udf_adinicb_write_end, | 133 | .write_end = udf_adinicb_write_end, |
134 | .direct_IO = udf_adinicb_direct_IO, | ||
103 | }; | 135 | }; |
104 | 136 | ||
105 | static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | 137 | static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov, |
diff --git a/fs/udf/inode.c b/fs/udf/inode.c index fafaad795cd6..df88b957ccf0 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c | |||
@@ -95,11 +95,33 @@ void udf_evict_inode(struct inode *inode) | |||
95 | } | 95 | } |
96 | } | 96 | } |
97 | 97 | ||
98 | static void udf_write_failed(struct address_space *mapping, loff_t to) | ||
99 | { | ||
100 | struct inode *inode = mapping->host; | ||
101 | struct udf_inode_info *iinfo = UDF_I(inode); | ||
102 | loff_t isize = inode->i_size; | ||
103 | |||
104 | if (to > isize) { | ||
105 | truncate_pagecache(inode, to, isize); | ||
106 | if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) { | ||
107 | down_write(&iinfo->i_data_sem); | ||
108 | udf_truncate_extents(inode); | ||
109 | up_write(&iinfo->i_data_sem); | ||
110 | } | ||
111 | } | ||
112 | } | ||
113 | |||
98 | static int udf_writepage(struct page *page, struct writeback_control *wbc) | 114 | static int udf_writepage(struct page *page, struct writeback_control *wbc) |
99 | { | 115 | { |
100 | return block_write_full_page(page, udf_get_block, wbc); | 116 | return block_write_full_page(page, udf_get_block, wbc); |
101 | } | 117 | } |
102 | 118 | ||
119 | static int udf_writepages(struct address_space *mapping, | ||
120 | struct writeback_control *wbc) | ||
121 | { | ||
122 | return mpage_writepages(mapping, wbc, udf_get_block); | ||
123 | } | ||
124 | |||
103 | static int udf_readpage(struct file *file, struct page *page) | 125 | static int udf_readpage(struct file *file, struct page *page) |
104 | { | 126 | { |
105 | return mpage_readpage(page, udf_get_block); | 127 | return mpage_readpage(page, udf_get_block); |
@@ -118,21 +140,24 @@ static int udf_write_begin(struct file *file, struct address_space *mapping, | |||
118 | int ret; | 140 | int ret; |
119 | 141 | ||
120 | ret = block_write_begin(mapping, pos, len, flags, pagep, udf_get_block); | 142 | ret = block_write_begin(mapping, pos, len, flags, pagep, udf_get_block); |
121 | if (unlikely(ret)) { | 143 | if (unlikely(ret)) |
122 | struct inode *inode = mapping->host; | 144 | udf_write_failed(mapping, pos + len); |
123 | struct udf_inode_info *iinfo = UDF_I(inode); | 145 | return ret; |
124 | loff_t isize = inode->i_size; | 146 | } |
125 | |||
126 | if (pos + len > isize) { | ||
127 | truncate_pagecache(inode, pos + len, isize); | ||
128 | if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) { | ||
129 | down_write(&iinfo->i_data_sem); | ||
130 | udf_truncate_extents(inode); | ||
131 | up_write(&iinfo->i_data_sem); | ||
132 | } | ||
133 | } | ||
134 | } | ||
135 | 147 | ||
148 | static ssize_t udf_direct_IO(int rw, struct kiocb *iocb, | ||
149 | const struct iovec *iov, | ||
150 | loff_t offset, unsigned long nr_segs) | ||
151 | { | ||
152 | struct file *file = iocb->ki_filp; | ||
153 | struct address_space *mapping = file->f_mapping; | ||
154 | struct inode *inode = mapping->host; | ||
155 | ssize_t ret; | ||
156 | |||
157 | ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, | ||
158 | udf_get_block); | ||
159 | if (unlikely(ret < 0 && (rw & WRITE))) | ||
160 | udf_write_failed(mapping, offset + iov_length(iov, nr_segs)); | ||
136 | return ret; | 161 | return ret; |
137 | } | 162 | } |
138 | 163 | ||
@@ -145,8 +170,10 @@ const struct address_space_operations udf_aops = { | |||
145 | .readpage = udf_readpage, | 170 | .readpage = udf_readpage, |
146 | .readpages = udf_readpages, | 171 | .readpages = udf_readpages, |
147 | .writepage = udf_writepage, | 172 | .writepage = udf_writepage, |
148 | .write_begin = udf_write_begin, | 173 | .writepages = udf_writepages, |
149 | .write_end = generic_write_end, | 174 | .write_begin = udf_write_begin, |
175 | .write_end = generic_write_end, | ||
176 | .direct_IO = udf_direct_IO, | ||
150 | .bmap = udf_bmap, | 177 | .bmap = udf_bmap, |
151 | }; | 178 | }; |
152 | 179 | ||
@@ -1124,14 +1151,17 @@ int udf_setsize(struct inode *inode, loff_t newsize) | |||
1124 | if (err) | 1151 | if (err) |
1125 | return err; | 1152 | return err; |
1126 | down_write(&iinfo->i_data_sem); | 1153 | down_write(&iinfo->i_data_sem); |
1127 | } else | 1154 | } else { |
1128 | iinfo->i_lenAlloc = newsize; | 1155 | iinfo->i_lenAlloc = newsize; |
1156 | goto set_size; | ||
1157 | } | ||
1129 | } | 1158 | } |
1130 | err = udf_extend_file(inode, newsize); | 1159 | err = udf_extend_file(inode, newsize); |
1131 | if (err) { | 1160 | if (err) { |
1132 | up_write(&iinfo->i_data_sem); | 1161 | up_write(&iinfo->i_data_sem); |
1133 | return err; | 1162 | return err; |
1134 | } | 1163 | } |
1164 | set_size: | ||
1135 | truncate_setsize(inode, newsize); | 1165 | truncate_setsize(inode, newsize); |
1136 | up_write(&iinfo->i_data_sem); | 1166 | up_write(&iinfo->i_data_sem); |
1137 | } else { | 1167 | } else { |
@@ -1309,14 +1339,14 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) | |||
1309 | } | 1339 | } |
1310 | 1340 | ||
1311 | read_lock(&sbi->s_cred_lock); | 1341 | read_lock(&sbi->s_cred_lock); |
1312 | inode->i_uid = le32_to_cpu(fe->uid); | 1342 | i_uid_write(inode, le32_to_cpu(fe->uid)); |
1313 | if (inode->i_uid == -1 || | 1343 | if (!uid_valid(inode->i_uid) || |
1314 | UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_UID_IGNORE) || | 1344 | UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_UID_IGNORE) || |
1315 | UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_UID_SET)) | 1345 | UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_UID_SET)) |
1316 | inode->i_uid = UDF_SB(inode->i_sb)->s_uid; | 1346 | inode->i_uid = UDF_SB(inode->i_sb)->s_uid; |
1317 | 1347 | ||
1318 | inode->i_gid = le32_to_cpu(fe->gid); | 1348 | i_gid_write(inode, le32_to_cpu(fe->gid)); |
1319 | if (inode->i_gid == -1 || | 1349 | if (!gid_valid(inode->i_gid) || |
1320 | UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_GID_IGNORE) || | 1350 | UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_GID_IGNORE) || |
1321 | UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_GID_SET)) | 1351 | UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_GID_SET)) |
1322 | inode->i_gid = UDF_SB(inode->i_sb)->s_gid; | 1352 | inode->i_gid = UDF_SB(inode->i_sb)->s_gid; |
@@ -1539,12 +1569,12 @@ static int udf_update_inode(struct inode *inode, int do_sync) | |||
1539 | if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_UID_FORGET)) | 1569 | if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_UID_FORGET)) |
1540 | fe->uid = cpu_to_le32(-1); | 1570 | fe->uid = cpu_to_le32(-1); |
1541 | else | 1571 | else |
1542 | fe->uid = cpu_to_le32(inode->i_uid); | 1572 | fe->uid = cpu_to_le32(i_uid_read(inode)); |
1543 | 1573 | ||
1544 | if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_GID_FORGET)) | 1574 | if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_GID_FORGET)) |
1545 | fe->gid = cpu_to_le32(-1); | 1575 | fe->gid = cpu_to_le32(-1); |
1546 | else | 1576 | else |
1547 | fe->gid = cpu_to_le32(inode->i_gid); | 1577 | fe->gid = cpu_to_le32(i_gid_read(inode)); |
1548 | 1578 | ||
1549 | udfperms = ((inode->i_mode & S_IRWXO)) | | 1579 | udfperms = ((inode->i_mode & S_IRWXO)) | |
1550 | ((inode->i_mode & S_IRWXG) << 2) | | 1580 | ((inode->i_mode & S_IRWXG) << 2) | |
diff --git a/fs/udf/super.c b/fs/udf/super.c index dcbf98722afc..d44fb568abe1 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c | |||
@@ -171,6 +171,11 @@ static int init_inodecache(void) | |||
171 | 171 | ||
172 | static void destroy_inodecache(void) | 172 | static void destroy_inodecache(void) |
173 | { | 173 | { |
174 | /* | ||
175 | * Make sure all delayed rcu free inodes are flushed before we | ||
176 | * destroy cache. | ||
177 | */ | ||
178 | rcu_barrier(); | ||
174 | kmem_cache_destroy(udf_inode_cachep); | 179 | kmem_cache_destroy(udf_inode_cachep); |
175 | } | 180 | } |
176 | 181 | ||
@@ -199,8 +204,8 @@ struct udf_options { | |||
199 | unsigned int rootdir; | 204 | unsigned int rootdir; |
200 | unsigned int flags; | 205 | unsigned int flags; |
201 | umode_t umask; | 206 | umode_t umask; |
202 | gid_t gid; | 207 | kgid_t gid; |
203 | uid_t uid; | 208 | kuid_t uid; |
204 | umode_t fmode; | 209 | umode_t fmode; |
205 | umode_t dmode; | 210 | umode_t dmode; |
206 | struct nls_table *nls_map; | 211 | struct nls_table *nls_map; |
@@ -335,9 +340,9 @@ static int udf_show_options(struct seq_file *seq, struct dentry *root) | |||
335 | if (UDF_QUERY_FLAG(sb, UDF_FLAG_GID_IGNORE)) | 340 | if (UDF_QUERY_FLAG(sb, UDF_FLAG_GID_IGNORE)) |
336 | seq_puts(seq, ",gid=ignore"); | 341 | seq_puts(seq, ",gid=ignore"); |
337 | if (UDF_QUERY_FLAG(sb, UDF_FLAG_UID_SET)) | 342 | if (UDF_QUERY_FLAG(sb, UDF_FLAG_UID_SET)) |
338 | seq_printf(seq, ",uid=%u", sbi->s_uid); | 343 | seq_printf(seq, ",uid=%u", from_kuid(&init_user_ns, sbi->s_uid)); |
339 | if (UDF_QUERY_FLAG(sb, UDF_FLAG_GID_SET)) | 344 | if (UDF_QUERY_FLAG(sb, UDF_FLAG_GID_SET)) |
340 | seq_printf(seq, ",gid=%u", sbi->s_gid); | 345 | seq_printf(seq, ",gid=%u", from_kgid(&init_user_ns, sbi->s_gid)); |
341 | if (sbi->s_umask != 0) | 346 | if (sbi->s_umask != 0) |
342 | seq_printf(seq, ",umask=%ho", sbi->s_umask); | 347 | seq_printf(seq, ",umask=%ho", sbi->s_umask); |
343 | if (sbi->s_fmode != UDF_INVALID_MODE) | 348 | if (sbi->s_fmode != UDF_INVALID_MODE) |
@@ -516,13 +521,17 @@ static int udf_parse_options(char *options, struct udf_options *uopt, | |||
516 | case Opt_gid: | 521 | case Opt_gid: |
517 | if (match_int(args, &option)) | 522 | if (match_int(args, &option)) |
518 | return 0; | 523 | return 0; |
519 | uopt->gid = option; | 524 | uopt->gid = make_kgid(current_user_ns(), option); |
525 | if (!gid_valid(uopt->gid)) | ||
526 | return 0; | ||
520 | uopt->flags |= (1 << UDF_FLAG_GID_SET); | 527 | uopt->flags |= (1 << UDF_FLAG_GID_SET); |
521 | break; | 528 | break; |
522 | case Opt_uid: | 529 | case Opt_uid: |
523 | if (match_int(args, &option)) | 530 | if (match_int(args, &option)) |
524 | return 0; | 531 | return 0; |
525 | uopt->uid = option; | 532 | uopt->uid = make_kuid(current_user_ns(), option); |
533 | if (!uid_valid(uopt->uid)) | ||
534 | return 0; | ||
526 | uopt->flags |= (1 << UDF_FLAG_UID_SET); | 535 | uopt->flags |= (1 << UDF_FLAG_UID_SET); |
527 | break; | 536 | break; |
528 | case Opt_umask: | 537 | case Opt_umask: |
@@ -1344,6 +1353,7 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block, | |||
1344 | udf_err(sb, "error loading logical volume descriptor: " | 1353 | udf_err(sb, "error loading logical volume descriptor: " |
1345 | "Partition table too long (%u > %lu)\n", table_len, | 1354 | "Partition table too long (%u > %lu)\n", table_len, |
1346 | sb->s_blocksize - sizeof(*lvd)); | 1355 | sb->s_blocksize - sizeof(*lvd)); |
1356 | ret = 1; | ||
1347 | goto out_bh; | 1357 | goto out_bh; |
1348 | } | 1358 | } |
1349 | 1359 | ||
@@ -1388,8 +1398,10 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block, | |||
1388 | UDF_ID_SPARABLE, | 1398 | UDF_ID_SPARABLE, |
1389 | strlen(UDF_ID_SPARABLE))) { | 1399 | strlen(UDF_ID_SPARABLE))) { |
1390 | if (udf_load_sparable_map(sb, map, | 1400 | if (udf_load_sparable_map(sb, map, |
1391 | (struct sparablePartitionMap *)gpm) < 0) | 1401 | (struct sparablePartitionMap *)gpm) < 0) { |
1402 | ret = 1; | ||
1392 | goto out_bh; | 1403 | goto out_bh; |
1404 | } | ||
1393 | } else if (!strncmp(upm2->partIdent.ident, | 1405 | } else if (!strncmp(upm2->partIdent.ident, |
1394 | UDF_ID_METADATA, | 1406 | UDF_ID_METADATA, |
1395 | strlen(UDF_ID_METADATA))) { | 1407 | strlen(UDF_ID_METADATA))) { |
@@ -1931,8 +1943,8 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) | |||
1931 | struct udf_sb_info *sbi; | 1943 | struct udf_sb_info *sbi; |
1932 | 1944 | ||
1933 | uopt.flags = (1 << UDF_FLAG_USE_AD_IN_ICB) | (1 << UDF_FLAG_STRICT); | 1945 | uopt.flags = (1 << UDF_FLAG_USE_AD_IN_ICB) | (1 << UDF_FLAG_STRICT); |
1934 | uopt.uid = -1; | 1946 | uopt.uid = INVALID_UID; |
1935 | uopt.gid = -1; | 1947 | uopt.gid = INVALID_GID; |
1936 | uopt.umask = 0; | 1948 | uopt.umask = 0; |
1937 | uopt.fmode = UDF_INVALID_MODE; | 1949 | uopt.fmode = UDF_INVALID_MODE; |
1938 | uopt.dmode = UDF_INVALID_MODE; | 1950 | uopt.dmode = UDF_INVALID_MODE; |
@@ -2000,6 +2012,8 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) | |||
2000 | if (!silent) | 2012 | if (!silent) |
2001 | pr_notice("Rescanning with blocksize %d\n", | 2013 | pr_notice("Rescanning with blocksize %d\n", |
2002 | UDF_DEFAULT_BLOCKSIZE); | 2014 | UDF_DEFAULT_BLOCKSIZE); |
2015 | brelse(sbi->s_lvid_bh); | ||
2016 | sbi->s_lvid_bh = NULL; | ||
2003 | uopt.blocksize = UDF_DEFAULT_BLOCKSIZE; | 2017 | uopt.blocksize = UDF_DEFAULT_BLOCKSIZE; |
2004 | ret = udf_load_vrs(sb, &uopt, silent, &fileset); | 2018 | ret = udf_load_vrs(sb, &uopt, silent, &fileset); |
2005 | } | 2019 | } |
diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h index 42ad69ac9576..5f027227f085 100644 --- a/fs/udf/udf_sb.h +++ b/fs/udf/udf_sb.h | |||
@@ -128,8 +128,8 @@ struct udf_sb_info { | |||
128 | 128 | ||
129 | /* Default permissions */ | 129 | /* Default permissions */ |
130 | umode_t s_umask; | 130 | umode_t s_umask; |
131 | gid_t s_gid; | 131 | kgid_t s_gid; |
132 | uid_t s_uid; | 132 | kuid_t s_uid; |
133 | umode_t s_fmode; | 133 | umode_t s_fmode; |
134 | umode_t s_dmode; | 134 | umode_t s_dmode; |
135 | /* Lock protecting consistency of above permission settings */ | 135 | /* Lock protecting consistency of above permission settings */ |
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index dd7c89d8a1c1..eb6d0b7dc879 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c | |||
@@ -597,8 +597,8 @@ static int ufs1_read_inode(struct inode *inode, struct ufs_inode *ufs_inode) | |||
597 | /* | 597 | /* |
598 | * Linux now has 32-bit uid and gid, so we can support EFT. | 598 | * Linux now has 32-bit uid and gid, so we can support EFT. |
599 | */ | 599 | */ |
600 | inode->i_uid = ufs_get_inode_uid(sb, ufs_inode); | 600 | i_uid_write(inode, ufs_get_inode_uid(sb, ufs_inode)); |
601 | inode->i_gid = ufs_get_inode_gid(sb, ufs_inode); | 601 | i_gid_write(inode, ufs_get_inode_gid(sb, ufs_inode)); |
602 | 602 | ||
603 | inode->i_size = fs64_to_cpu(sb, ufs_inode->ui_size); | 603 | inode->i_size = fs64_to_cpu(sb, ufs_inode->ui_size); |
604 | inode->i_atime.tv_sec = fs32_to_cpu(sb, ufs_inode->ui_atime.tv_sec); | 604 | inode->i_atime.tv_sec = fs32_to_cpu(sb, ufs_inode->ui_atime.tv_sec); |
@@ -645,8 +645,8 @@ static int ufs2_read_inode(struct inode *inode, struct ufs2_inode *ufs2_inode) | |||
645 | /* | 645 | /* |
646 | * Linux now has 32-bit uid and gid, so we can support EFT. | 646 | * Linux now has 32-bit uid and gid, so we can support EFT. |
647 | */ | 647 | */ |
648 | inode->i_uid = fs32_to_cpu(sb, ufs2_inode->ui_uid); | 648 | i_uid_write(inode, fs32_to_cpu(sb, ufs2_inode->ui_uid)); |
649 | inode->i_gid = fs32_to_cpu(sb, ufs2_inode->ui_gid); | 649 | i_gid_write(inode, fs32_to_cpu(sb, ufs2_inode->ui_gid)); |
650 | 650 | ||
651 | inode->i_size = fs64_to_cpu(sb, ufs2_inode->ui_size); | 651 | inode->i_size = fs64_to_cpu(sb, ufs2_inode->ui_size); |
652 | inode->i_atime.tv_sec = fs64_to_cpu(sb, ufs2_inode->ui_atime); | 652 | inode->i_atime.tv_sec = fs64_to_cpu(sb, ufs2_inode->ui_atime); |
@@ -745,8 +745,8 @@ static void ufs1_update_inode(struct inode *inode, struct ufs_inode *ufs_inode) | |||
745 | ufs_inode->ui_mode = cpu_to_fs16(sb, inode->i_mode); | 745 | ufs_inode->ui_mode = cpu_to_fs16(sb, inode->i_mode); |
746 | ufs_inode->ui_nlink = cpu_to_fs16(sb, inode->i_nlink); | 746 | ufs_inode->ui_nlink = cpu_to_fs16(sb, inode->i_nlink); |
747 | 747 | ||
748 | ufs_set_inode_uid(sb, ufs_inode, inode->i_uid); | 748 | ufs_set_inode_uid(sb, ufs_inode, i_uid_read(inode)); |
749 | ufs_set_inode_gid(sb, ufs_inode, inode->i_gid); | 749 | ufs_set_inode_gid(sb, ufs_inode, i_gid_read(inode)); |
750 | 750 | ||
751 | ufs_inode->ui_size = cpu_to_fs64(sb, inode->i_size); | 751 | ufs_inode->ui_size = cpu_to_fs64(sb, inode->i_size); |
752 | ufs_inode->ui_atime.tv_sec = cpu_to_fs32(sb, inode->i_atime.tv_sec); | 752 | ufs_inode->ui_atime.tv_sec = cpu_to_fs32(sb, inode->i_atime.tv_sec); |
@@ -789,8 +789,8 @@ static void ufs2_update_inode(struct inode *inode, struct ufs2_inode *ufs_inode) | |||
789 | ufs_inode->ui_mode = cpu_to_fs16(sb, inode->i_mode); | 789 | ufs_inode->ui_mode = cpu_to_fs16(sb, inode->i_mode); |
790 | ufs_inode->ui_nlink = cpu_to_fs16(sb, inode->i_nlink); | 790 | ufs_inode->ui_nlink = cpu_to_fs16(sb, inode->i_nlink); |
791 | 791 | ||
792 | ufs_inode->ui_uid = cpu_to_fs32(sb, inode->i_uid); | 792 | ufs_inode->ui_uid = cpu_to_fs32(sb, i_uid_read(inode)); |
793 | ufs_inode->ui_gid = cpu_to_fs32(sb, inode->i_gid); | 793 | ufs_inode->ui_gid = cpu_to_fs32(sb, i_gid_read(inode)); |
794 | 794 | ||
795 | ufs_inode->ui_size = cpu_to_fs64(sb, inode->i_size); | 795 | ufs_inode->ui_size = cpu_to_fs64(sb, inode->i_size); |
796 | ufs_inode->ui_atime = cpu_to_fs64(sb, inode->i_atime.tv_sec); | 796 | ufs_inode->ui_atime = cpu_to_fs64(sb, inode->i_atime.tv_sec); |
diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 444927e5706b..f7cfecfe1cab 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c | |||
@@ -1466,6 +1466,11 @@ static int init_inodecache(void) | |||
1466 | 1466 | ||
1467 | static void destroy_inodecache(void) | 1467 | static void destroy_inodecache(void) |
1468 | { | 1468 | { |
1469 | /* | ||
1470 | * Make sure all delayed rcu free inodes are flushed before we | ||
1471 | * destroy cache. | ||
1472 | */ | ||
1473 | rcu_barrier(); | ||
1469 | kmem_cache_destroy(ufs_inode_cachep); | 1474 | kmem_cache_destroy(ufs_inode_cachep); |
1470 | } | 1475 | } |
1471 | 1476 | ||
diff --git a/fs/utimes.c b/fs/utimes.c index fa4dbe451e27..bb0696a41735 100644 --- a/fs/utimes.c +++ b/fs/utimes.c | |||
@@ -140,19 +140,18 @@ long do_utimes(int dfd, const char __user *filename, struct timespec *times, | |||
140 | goto out; | 140 | goto out; |
141 | 141 | ||
142 | if (filename == NULL && dfd != AT_FDCWD) { | 142 | if (filename == NULL && dfd != AT_FDCWD) { |
143 | int fput_needed; | 143 | struct fd f; |
144 | struct file *file; | ||
145 | 144 | ||
146 | if (flags & AT_SYMLINK_NOFOLLOW) | 145 | if (flags & AT_SYMLINK_NOFOLLOW) |
147 | goto out; | 146 | goto out; |
148 | 147 | ||
149 | file = fget_light(dfd, &fput_needed); | 148 | f = fdget(dfd); |
150 | error = -EBADF; | 149 | error = -EBADF; |
151 | if (!file) | 150 | if (!f.file) |
152 | goto out; | 151 | goto out; |
153 | 152 | ||
154 | error = utimes_common(&file->f_path, times); | 153 | error = utimes_common(&f.file->f_path, times); |
155 | fput_light(file, fput_needed); | 154 | fdput(f); |
156 | } else { | 155 | } else { |
157 | struct path path; | 156 | struct path path; |
158 | int lookup_flags = 0; | 157 | int lookup_flags = 0; |
diff --git a/fs/xattr.c b/fs/xattr.c index 4d45b7189e7e..1780f062dbaf 100644 --- a/fs/xattr.c +++ b/fs/xattr.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/fsnotify.h> | 20 | #include <linux/fsnotify.h> |
21 | #include <linux/audit.h> | 21 | #include <linux/audit.h> |
22 | #include <linux/vmalloc.h> | 22 | #include <linux/vmalloc.h> |
23 | #include <linux/posix_acl_xattr.h> | ||
23 | 24 | ||
24 | #include <asm/uaccess.h> | 25 | #include <asm/uaccess.h> |
25 | 26 | ||
@@ -295,11 +296,13 @@ vfs_removexattr(struct dentry *dentry, const char *name) | |||
295 | if (error) | 296 | if (error) |
296 | return error; | 297 | return error; |
297 | 298 | ||
299 | mutex_lock(&inode->i_mutex); | ||
298 | error = security_inode_removexattr(dentry, name); | 300 | error = security_inode_removexattr(dentry, name); |
299 | if (error) | 301 | if (error) { |
302 | mutex_unlock(&inode->i_mutex); | ||
300 | return error; | 303 | return error; |
304 | } | ||
301 | 305 | ||
302 | mutex_lock(&inode->i_mutex); | ||
303 | error = inode->i_op->removexattr(dentry, name); | 306 | error = inode->i_op->removexattr(dentry, name); |
304 | mutex_unlock(&inode->i_mutex); | 307 | mutex_unlock(&inode->i_mutex); |
305 | 308 | ||
@@ -347,6 +350,9 @@ setxattr(struct dentry *d, const char __user *name, const void __user *value, | |||
347 | error = -EFAULT; | 350 | error = -EFAULT; |
348 | goto out; | 351 | goto out; |
349 | } | 352 | } |
353 | if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) || | ||
354 | (strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0)) | ||
355 | posix_acl_fix_xattr_from_user(kvalue, size); | ||
350 | } | 356 | } |
351 | 357 | ||
352 | error = vfs_setxattr(d, kname, kvalue, size, flags); | 358 | error = vfs_setxattr(d, kname, kvalue, size, flags); |
@@ -399,22 +405,20 @@ SYSCALL_DEFINE5(lsetxattr, const char __user *, pathname, | |||
399 | SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name, | 405 | SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name, |
400 | const void __user *,value, size_t, size, int, flags) | 406 | const void __user *,value, size_t, size, int, flags) |
401 | { | 407 | { |
402 | int fput_needed; | 408 | struct fd f = fdget(fd); |
403 | struct file *f; | ||
404 | struct dentry *dentry; | 409 | struct dentry *dentry; |
405 | int error = -EBADF; | 410 | int error = -EBADF; |
406 | 411 | ||
407 | f = fget_light(fd, &fput_needed); | 412 | if (!f.file) |
408 | if (!f) | ||
409 | return error; | 413 | return error; |
410 | dentry = f->f_path.dentry; | 414 | dentry = f.file->f_path.dentry; |
411 | audit_inode(NULL, dentry); | 415 | audit_inode(NULL, dentry); |
412 | error = mnt_want_write_file(f); | 416 | error = mnt_want_write_file(f.file); |
413 | if (!error) { | 417 | if (!error) { |
414 | error = setxattr(dentry, name, value, size, flags); | 418 | error = setxattr(dentry, name, value, size, flags); |
415 | mnt_drop_write_file(f); | 419 | mnt_drop_write_file(f.file); |
416 | } | 420 | } |
417 | fput_light(f, fput_needed); | 421 | fdput(f); |
418 | return error; | 422 | return error; |
419 | } | 423 | } |
420 | 424 | ||
@@ -450,6 +454,9 @@ getxattr(struct dentry *d, const char __user *name, void __user *value, | |||
450 | 454 | ||
451 | error = vfs_getxattr(d, kname, kvalue, size); | 455 | error = vfs_getxattr(d, kname, kvalue, size); |
452 | if (error > 0) { | 456 | if (error > 0) { |
457 | if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) || | ||
458 | (strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0)) | ||
459 | posix_acl_fix_xattr_to_user(kvalue, size); | ||
453 | if (size && copy_to_user(value, kvalue, error)) | 460 | if (size && copy_to_user(value, kvalue, error)) |
454 | error = -EFAULT; | 461 | error = -EFAULT; |
455 | } else if (error == -ERANGE && size >= XATTR_SIZE_MAX) { | 462 | } else if (error == -ERANGE && size >= XATTR_SIZE_MAX) { |
@@ -495,16 +502,14 @@ SYSCALL_DEFINE4(lgetxattr, const char __user *, pathname, | |||
495 | SYSCALL_DEFINE4(fgetxattr, int, fd, const char __user *, name, | 502 | SYSCALL_DEFINE4(fgetxattr, int, fd, const char __user *, name, |
496 | void __user *, value, size_t, size) | 503 | void __user *, value, size_t, size) |
497 | { | 504 | { |
498 | int fput_needed; | 505 | struct fd f = fdget(fd); |
499 | struct file *f; | ||
500 | ssize_t error = -EBADF; | 506 | ssize_t error = -EBADF; |
501 | 507 | ||
502 | f = fget_light(fd, &fput_needed); | 508 | if (!f.file) |
503 | if (!f) | ||
504 | return error; | 509 | return error; |
505 | audit_inode(NULL, f->f_path.dentry); | 510 | audit_inode(NULL, f.file->f_path.dentry); |
506 | error = getxattr(f->f_path.dentry, name, value, size); | 511 | error = getxattr(f.file->f_path.dentry, name, value, size); |
507 | fput_light(f, fput_needed); | 512 | fdput(f); |
508 | return error; | 513 | return error; |
509 | } | 514 | } |
510 | 515 | ||
@@ -576,16 +581,14 @@ SYSCALL_DEFINE3(llistxattr, const char __user *, pathname, char __user *, list, | |||
576 | 581 | ||
577 | SYSCALL_DEFINE3(flistxattr, int, fd, char __user *, list, size_t, size) | 582 | SYSCALL_DEFINE3(flistxattr, int, fd, char __user *, list, size_t, size) |
578 | { | 583 | { |
579 | int fput_needed; | 584 | struct fd f = fdget(fd); |
580 | struct file *f; | ||
581 | ssize_t error = -EBADF; | 585 | ssize_t error = -EBADF; |
582 | 586 | ||
583 | f = fget_light(fd, &fput_needed); | 587 | if (!f.file) |
584 | if (!f) | ||
585 | return error; | 588 | return error; |
586 | audit_inode(NULL, f->f_path.dentry); | 589 | audit_inode(NULL, f.file->f_path.dentry); |
587 | error = listxattr(f->f_path.dentry, list, size); | 590 | error = listxattr(f.file->f_path.dentry, list, size); |
588 | fput_light(f, fput_needed); | 591 | fdput(f); |
589 | return error; | 592 | return error; |
590 | } | 593 | } |
591 | 594 | ||
@@ -645,22 +648,20 @@ SYSCALL_DEFINE2(lremovexattr, const char __user *, pathname, | |||
645 | 648 | ||
646 | SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name) | 649 | SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name) |
647 | { | 650 | { |
648 | int fput_needed; | 651 | struct fd f = fdget(fd); |
649 | struct file *f; | ||
650 | struct dentry *dentry; | 652 | struct dentry *dentry; |
651 | int error = -EBADF; | 653 | int error = -EBADF; |
652 | 654 | ||
653 | f = fget_light(fd, &fput_needed); | 655 | if (!f.file) |
654 | if (!f) | ||
655 | return error; | 656 | return error; |
656 | dentry = f->f_path.dentry; | 657 | dentry = f.file->f_path.dentry; |
657 | audit_inode(NULL, dentry); | 658 | audit_inode(NULL, dentry); |
658 | error = mnt_want_write_file(f); | 659 | error = mnt_want_write_file(f.file); |
659 | if (!error) { | 660 | if (!error) { |
660 | error = removexattr(dentry, name); | 661 | error = removexattr(dentry, name); |
661 | mnt_drop_write_file(f); | 662 | mnt_drop_write_file(f.file); |
662 | } | 663 | } |
663 | fput_light(f, fput_needed); | 664 | fdput(f); |
664 | return error; | 665 | return error; |
665 | } | 666 | } |
666 | 667 | ||
@@ -791,3 +792,183 @@ EXPORT_SYMBOL(generic_getxattr); | |||
791 | EXPORT_SYMBOL(generic_listxattr); | 792 | EXPORT_SYMBOL(generic_listxattr); |
792 | EXPORT_SYMBOL(generic_setxattr); | 793 | EXPORT_SYMBOL(generic_setxattr); |
793 | EXPORT_SYMBOL(generic_removexattr); | 794 | EXPORT_SYMBOL(generic_removexattr); |
795 | |||
796 | /* | ||
797 | * Allocate new xattr and copy in the value; but leave the name to callers. | ||
798 | */ | ||
799 | struct simple_xattr *simple_xattr_alloc(const void *value, size_t size) | ||
800 | { | ||
801 | struct simple_xattr *new_xattr; | ||
802 | size_t len; | ||
803 | |||
804 | /* wrap around? */ | ||
805 | len = sizeof(*new_xattr) + size; | ||
806 | if (len <= sizeof(*new_xattr)) | ||
807 | return NULL; | ||
808 | |||
809 | new_xattr = kmalloc(len, GFP_KERNEL); | ||
810 | if (!new_xattr) | ||
811 | return NULL; | ||
812 | |||
813 | new_xattr->size = size; | ||
814 | memcpy(new_xattr->value, value, size); | ||
815 | return new_xattr; | ||
816 | } | ||
817 | |||
818 | /* | ||
819 | * xattr GET operation for in-memory/pseudo filesystems | ||
820 | */ | ||
821 | int simple_xattr_get(struct simple_xattrs *xattrs, const char *name, | ||
822 | void *buffer, size_t size) | ||
823 | { | ||
824 | struct simple_xattr *xattr; | ||
825 | int ret = -ENODATA; | ||
826 | |||
827 | spin_lock(&xattrs->lock); | ||
828 | list_for_each_entry(xattr, &xattrs->head, list) { | ||
829 | if (strcmp(name, xattr->name)) | ||
830 | continue; | ||
831 | |||
832 | ret = xattr->size; | ||
833 | if (buffer) { | ||
834 | if (size < xattr->size) | ||
835 | ret = -ERANGE; | ||
836 | else | ||
837 | memcpy(buffer, xattr->value, xattr->size); | ||
838 | } | ||
839 | break; | ||
840 | } | ||
841 | spin_unlock(&xattrs->lock); | ||
842 | return ret; | ||
843 | } | ||
844 | |||
845 | static int __simple_xattr_set(struct simple_xattrs *xattrs, const char *name, | ||
846 | const void *value, size_t size, int flags) | ||
847 | { | ||
848 | struct simple_xattr *xattr; | ||
849 | struct simple_xattr *uninitialized_var(new_xattr); | ||
850 | int err = 0; | ||
851 | |||
852 | /* value == NULL means remove */ | ||
853 | if (value) { | ||
854 | new_xattr = simple_xattr_alloc(value, size); | ||
855 | if (!new_xattr) | ||
856 | return -ENOMEM; | ||
857 | |||
858 | new_xattr->name = kstrdup(name, GFP_KERNEL); | ||
859 | if (!new_xattr->name) { | ||
860 | kfree(new_xattr); | ||
861 | return -ENOMEM; | ||
862 | } | ||
863 | } | ||
864 | |||
865 | spin_lock(&xattrs->lock); | ||
866 | list_for_each_entry(xattr, &xattrs->head, list) { | ||
867 | if (!strcmp(name, xattr->name)) { | ||
868 | if (flags & XATTR_CREATE) { | ||
869 | xattr = new_xattr; | ||
870 | err = -EEXIST; | ||
871 | } else if (new_xattr) { | ||
872 | list_replace(&xattr->list, &new_xattr->list); | ||
873 | } else { | ||
874 | list_del(&xattr->list); | ||
875 | } | ||
876 | goto out; | ||
877 | } | ||
878 | } | ||
879 | if (flags & XATTR_REPLACE) { | ||
880 | xattr = new_xattr; | ||
881 | err = -ENODATA; | ||
882 | } else { | ||
883 | list_add(&new_xattr->list, &xattrs->head); | ||
884 | xattr = NULL; | ||
885 | } | ||
886 | out: | ||
887 | spin_unlock(&xattrs->lock); | ||
888 | if (xattr) { | ||
889 | kfree(xattr->name); | ||
890 | kfree(xattr); | ||
891 | } | ||
892 | return err; | ||
893 | |||
894 | } | ||
895 | |||
896 | /** | ||
897 | * simple_xattr_set - xattr SET operation for in-memory/pseudo filesystems | ||
898 | * @xattrs: target simple_xattr list | ||
899 | * @name: name of the new extended attribute | ||
900 | * @value: value of the new xattr. If %NULL, will remove the attribute | ||
901 | * @size: size of the new xattr | ||
902 | * @flags: %XATTR_{CREATE|REPLACE} | ||
903 | * | ||
904 | * %XATTR_CREATE is set, the xattr shouldn't exist already; otherwise fails | ||
905 | * with -EEXIST. If %XATTR_REPLACE is set, the xattr should exist; | ||
906 | * otherwise, fails with -ENODATA. | ||
907 | * | ||
908 | * Returns 0 on success, -errno on failure. | ||
909 | */ | ||
910 | int simple_xattr_set(struct simple_xattrs *xattrs, const char *name, | ||
911 | const void *value, size_t size, int flags) | ||
912 | { | ||
913 | if (size == 0) | ||
914 | value = ""; /* empty EA, do not remove */ | ||
915 | return __simple_xattr_set(xattrs, name, value, size, flags); | ||
916 | } | ||
917 | |||
918 | /* | ||
919 | * xattr REMOVE operation for in-memory/pseudo filesystems | ||
920 | */ | ||
921 | int simple_xattr_remove(struct simple_xattrs *xattrs, const char *name) | ||
922 | { | ||
923 | return __simple_xattr_set(xattrs, name, NULL, 0, XATTR_REPLACE); | ||
924 | } | ||
925 | |||
926 | static bool xattr_is_trusted(const char *name) | ||
927 | { | ||
928 | return !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN); | ||
929 | } | ||
930 | |||
931 | /* | ||
932 | * xattr LIST operation for in-memory/pseudo filesystems | ||
933 | */ | ||
934 | ssize_t simple_xattr_list(struct simple_xattrs *xattrs, char *buffer, | ||
935 | size_t size) | ||
936 | { | ||
937 | bool trusted = capable(CAP_SYS_ADMIN); | ||
938 | struct simple_xattr *xattr; | ||
939 | size_t used = 0; | ||
940 | |||
941 | spin_lock(&xattrs->lock); | ||
942 | list_for_each_entry(xattr, &xattrs->head, list) { | ||
943 | size_t len; | ||
944 | |||
945 | /* skip "trusted." attributes for unprivileged callers */ | ||
946 | if (!trusted && xattr_is_trusted(xattr->name)) | ||
947 | continue; | ||
948 | |||
949 | len = strlen(xattr->name) + 1; | ||
950 | used += len; | ||
951 | if (buffer) { | ||
952 | if (size < used) { | ||
953 | used = -ERANGE; | ||
954 | break; | ||
955 | } | ||
956 | memcpy(buffer, xattr->name, len); | ||
957 | buffer += len; | ||
958 | } | ||
959 | } | ||
960 | spin_unlock(&xattrs->lock); | ||
961 | |||
962 | return used; | ||
963 | } | ||
964 | |||
965 | /* | ||
966 | * Adds an extended attribute to the list | ||
967 | */ | ||
968 | void simple_xattr_list_add(struct simple_xattrs *xattrs, | ||
969 | struct simple_xattr *new_xattr) | ||
970 | { | ||
971 | spin_lock(&xattrs->lock); | ||
972 | list_add(&new_xattr->list, &xattrs->head); | ||
973 | spin_unlock(&xattrs->lock); | ||
974 | } | ||
diff --git a/fs/xattr_acl.c b/fs/xattr_acl.c index 69d06b07b169..11efd830b5f5 100644 --- a/fs/xattr_acl.c +++ b/fs/xattr_acl.c | |||
@@ -9,13 +9,72 @@ | |||
9 | #include <linux/fs.h> | 9 | #include <linux/fs.h> |
10 | #include <linux/posix_acl_xattr.h> | 10 | #include <linux/posix_acl_xattr.h> |
11 | #include <linux/gfp.h> | 11 | #include <linux/gfp.h> |
12 | #include <linux/user_namespace.h> | ||
12 | 13 | ||
14 | /* | ||
15 | * Fix up the uids and gids in posix acl extended attributes in place. | ||
16 | */ | ||
17 | static void posix_acl_fix_xattr_userns( | ||
18 | struct user_namespace *to, struct user_namespace *from, | ||
19 | void *value, size_t size) | ||
20 | { | ||
21 | posix_acl_xattr_header *header = (posix_acl_xattr_header *)value; | ||
22 | posix_acl_xattr_entry *entry = (posix_acl_xattr_entry *)(header+1), *end; | ||
23 | int count; | ||
24 | kuid_t uid; | ||
25 | kgid_t gid; | ||
26 | |||
27 | if (!value) | ||
28 | return; | ||
29 | if (size < sizeof(posix_acl_xattr_header)) | ||
30 | return; | ||
31 | if (header->a_version != cpu_to_le32(POSIX_ACL_XATTR_VERSION)) | ||
32 | return; | ||
33 | |||
34 | count = posix_acl_xattr_count(size); | ||
35 | if (count < 0) | ||
36 | return; | ||
37 | if (count == 0) | ||
38 | return; | ||
39 | |||
40 | for (end = entry + count; entry != end; entry++) { | ||
41 | switch(le16_to_cpu(entry->e_tag)) { | ||
42 | case ACL_USER: | ||
43 | uid = make_kuid(from, le32_to_cpu(entry->e_id)); | ||
44 | entry->e_id = cpu_to_le32(from_kuid(to, uid)); | ||
45 | break; | ||
46 | case ACL_GROUP: | ||
47 | gid = make_kgid(from, le32_to_cpu(entry->e_id)); | ||
48 | entry->e_id = cpu_to_le32(from_kuid(to, uid)); | ||
49 | break; | ||
50 | default: | ||
51 | break; | ||
52 | } | ||
53 | } | ||
54 | } | ||
55 | |||
56 | void posix_acl_fix_xattr_from_user(void *value, size_t size) | ||
57 | { | ||
58 | struct user_namespace *user_ns = current_user_ns(); | ||
59 | if (user_ns == &init_user_ns) | ||
60 | return; | ||
61 | posix_acl_fix_xattr_userns(&init_user_ns, user_ns, value, size); | ||
62 | } | ||
63 | |||
64 | void posix_acl_fix_xattr_to_user(void *value, size_t size) | ||
65 | { | ||
66 | struct user_namespace *user_ns = current_user_ns(); | ||
67 | if (user_ns == &init_user_ns) | ||
68 | return; | ||
69 | posix_acl_fix_xattr_userns(user_ns, &init_user_ns, value, size); | ||
70 | } | ||
13 | 71 | ||
14 | /* | 72 | /* |
15 | * Convert from extended attribute to in-memory representation. | 73 | * Convert from extended attribute to in-memory representation. |
16 | */ | 74 | */ |
17 | struct posix_acl * | 75 | struct posix_acl * |
18 | posix_acl_from_xattr(const void *value, size_t size) | 76 | posix_acl_from_xattr(struct user_namespace *user_ns, |
77 | const void *value, size_t size) | ||
19 | { | 78 | { |
20 | posix_acl_xattr_header *header = (posix_acl_xattr_header *)value; | 79 | posix_acl_xattr_header *header = (posix_acl_xattr_header *)value; |
21 | posix_acl_xattr_entry *entry = (posix_acl_xattr_entry *)(header+1), *end; | 80 | posix_acl_xattr_entry *entry = (posix_acl_xattr_entry *)(header+1), *end; |
@@ -50,12 +109,21 @@ posix_acl_from_xattr(const void *value, size_t size) | |||
50 | case ACL_GROUP_OBJ: | 109 | case ACL_GROUP_OBJ: |
51 | case ACL_MASK: | 110 | case ACL_MASK: |
52 | case ACL_OTHER: | 111 | case ACL_OTHER: |
53 | acl_e->e_id = ACL_UNDEFINED_ID; | ||
54 | break; | 112 | break; |
55 | 113 | ||
56 | case ACL_USER: | 114 | case ACL_USER: |
115 | acl_e->e_uid = | ||
116 | make_kuid(user_ns, | ||
117 | le32_to_cpu(entry->e_id)); | ||
118 | if (!uid_valid(acl_e->e_uid)) | ||
119 | goto fail; | ||
120 | break; | ||
57 | case ACL_GROUP: | 121 | case ACL_GROUP: |
58 | acl_e->e_id = le32_to_cpu(entry->e_id); | 122 | acl_e->e_gid = |
123 | make_kgid(user_ns, | ||
124 | le32_to_cpu(entry->e_id)); | ||
125 | if (!gid_valid(acl_e->e_gid)) | ||
126 | goto fail; | ||
59 | break; | 127 | break; |
60 | 128 | ||
61 | default: | 129 | default: |
@@ -74,7 +142,8 @@ EXPORT_SYMBOL (posix_acl_from_xattr); | |||
74 | * Convert from in-memory to extended attribute representation. | 142 | * Convert from in-memory to extended attribute representation. |
75 | */ | 143 | */ |
76 | int | 144 | int |
77 | posix_acl_to_xattr(const struct posix_acl *acl, void *buffer, size_t size) | 145 | posix_acl_to_xattr(struct user_namespace *user_ns, const struct posix_acl *acl, |
146 | void *buffer, size_t size) | ||
78 | { | 147 | { |
79 | posix_acl_xattr_header *ext_acl = (posix_acl_xattr_header *)buffer; | 148 | posix_acl_xattr_header *ext_acl = (posix_acl_xattr_header *)buffer; |
80 | posix_acl_xattr_entry *ext_entry = ext_acl->a_entries; | 149 | posix_acl_xattr_entry *ext_entry = ext_acl->a_entries; |
@@ -89,9 +158,22 @@ posix_acl_to_xattr(const struct posix_acl *acl, void *buffer, size_t size) | |||
89 | ext_acl->a_version = cpu_to_le32(POSIX_ACL_XATTR_VERSION); | 158 | ext_acl->a_version = cpu_to_le32(POSIX_ACL_XATTR_VERSION); |
90 | 159 | ||
91 | for (n=0; n < acl->a_count; n++, ext_entry++) { | 160 | for (n=0; n < acl->a_count; n++, ext_entry++) { |
92 | ext_entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag); | 161 | const struct posix_acl_entry *acl_e = &acl->a_entries[n]; |
93 | ext_entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm); | 162 | ext_entry->e_tag = cpu_to_le16(acl_e->e_tag); |
94 | ext_entry->e_id = cpu_to_le32(acl->a_entries[n].e_id); | 163 | ext_entry->e_perm = cpu_to_le16(acl_e->e_perm); |
164 | switch(acl_e->e_tag) { | ||
165 | case ACL_USER: | ||
166 | ext_entry->e_id = | ||
167 | cpu_to_le32(from_kuid(user_ns, acl_e->e_uid)); | ||
168 | break; | ||
169 | case ACL_GROUP: | ||
170 | ext_entry->e_id = | ||
171 | cpu_to_le32(from_kgid(user_ns, acl_e->e_gid)); | ||
172 | break; | ||
173 | default: | ||
174 | ext_entry->e_id = cpu_to_le32(ACL_UNDEFINED_ID); | ||
175 | break; | ||
176 | } | ||
95 | } | 177 | } |
96 | return real_size; | 178 | return real_size; |
97 | } | 179 | } |
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index ac702a6eab9b..1d32f1d52763 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c | |||
@@ -337,7 +337,7 @@ xfs_xattr_acl_get(struct dentry *dentry, const char *name, | |||
337 | if (acl == NULL) | 337 | if (acl == NULL) |
338 | return -ENODATA; | 338 | return -ENODATA; |
339 | 339 | ||
340 | error = posix_acl_to_xattr(acl, value, size); | 340 | error = posix_acl_to_xattr(&init_user_ns, acl, value, size); |
341 | posix_acl_release(acl); | 341 | posix_acl_release(acl); |
342 | 342 | ||
343 | return error; | 343 | return error; |
@@ -361,7 +361,7 @@ xfs_xattr_acl_set(struct dentry *dentry, const char *name, | |||
361 | if (!value) | 361 | if (!value) |
362 | goto set_acl; | 362 | goto set_acl; |
363 | 363 | ||
364 | acl = posix_acl_from_xattr(value, size); | 364 | acl = posix_acl_from_xattr(&init_user_ns, value, size); |
365 | if (!acl) { | 365 | if (!acl) { |
366 | /* | 366 | /* |
367 | * acl_set_file(3) may request that we set default ACLs with | 367 | * acl_set_file(3) may request that we set default ACLs with |
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index d7a9dd735e1e..933b7930b863 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c | |||
@@ -96,6 +96,7 @@ xfs_buf_lru_add( | |||
96 | atomic_inc(&bp->b_hold); | 96 | atomic_inc(&bp->b_hold); |
97 | list_add_tail(&bp->b_lru, &btp->bt_lru); | 97 | list_add_tail(&bp->b_lru, &btp->bt_lru); |
98 | btp->bt_lru_nr++; | 98 | btp->bt_lru_nr++; |
99 | bp->b_lru_flags &= ~_XBF_LRU_DISPOSE; | ||
99 | } | 100 | } |
100 | spin_unlock(&btp->bt_lru_lock); | 101 | spin_unlock(&btp->bt_lru_lock); |
101 | } | 102 | } |
@@ -154,7 +155,8 @@ xfs_buf_stale( | |||
154 | struct xfs_buftarg *btp = bp->b_target; | 155 | struct xfs_buftarg *btp = bp->b_target; |
155 | 156 | ||
156 | spin_lock(&btp->bt_lru_lock); | 157 | spin_lock(&btp->bt_lru_lock); |
157 | if (!list_empty(&bp->b_lru)) { | 158 | if (!list_empty(&bp->b_lru) && |
159 | !(bp->b_lru_flags & _XBF_LRU_DISPOSE)) { | ||
158 | list_del_init(&bp->b_lru); | 160 | list_del_init(&bp->b_lru); |
159 | btp->bt_lru_nr--; | 161 | btp->bt_lru_nr--; |
160 | atomic_dec(&bp->b_hold); | 162 | atomic_dec(&bp->b_hold); |
@@ -1501,6 +1503,7 @@ xfs_buftarg_shrink( | |||
1501 | */ | 1503 | */ |
1502 | list_move(&bp->b_lru, &dispose); | 1504 | list_move(&bp->b_lru, &dispose); |
1503 | btp->bt_lru_nr--; | 1505 | btp->bt_lru_nr--; |
1506 | bp->b_lru_flags |= _XBF_LRU_DISPOSE; | ||
1504 | } | 1507 | } |
1505 | spin_unlock(&btp->bt_lru_lock); | 1508 | spin_unlock(&btp->bt_lru_lock); |
1506 | 1509 | ||
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index d03b73b9604e..7c0b6a0a1557 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h | |||
@@ -38,27 +38,28 @@ typedef enum { | |||
38 | XBRW_ZERO = 3, /* Zero target memory */ | 38 | XBRW_ZERO = 3, /* Zero target memory */ |
39 | } xfs_buf_rw_t; | 39 | } xfs_buf_rw_t; |
40 | 40 | ||
41 | #define XBF_READ (1 << 0) /* buffer intended for reading from device */ | 41 | #define XBF_READ (1 << 0) /* buffer intended for reading from device */ |
42 | #define XBF_WRITE (1 << 1) /* buffer intended for writing to device */ | 42 | #define XBF_WRITE (1 << 1) /* buffer intended for writing to device */ |
43 | #define XBF_READ_AHEAD (1 << 2) /* asynchronous read-ahead */ | 43 | #define XBF_READ_AHEAD (1 << 2) /* asynchronous read-ahead */ |
44 | #define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */ | 44 | #define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */ |
45 | #define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */ | 45 | #define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */ |
46 | #define XBF_STALE (1 << 6) /* buffer has been staled, do not find it */ | 46 | #define XBF_STALE (1 << 6) /* buffer has been staled, do not find it */ |
47 | 47 | ||
48 | /* I/O hints for the BIO layer */ | 48 | /* I/O hints for the BIO layer */ |
49 | #define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */ | 49 | #define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */ |
50 | #define XBF_FUA (1 << 11)/* force cache write through mode */ | 50 | #define XBF_FUA (1 << 11)/* force cache write through mode */ |
51 | #define XBF_FLUSH (1 << 12)/* flush the disk cache before a write */ | 51 | #define XBF_FLUSH (1 << 12)/* flush the disk cache before a write */ |
52 | 52 | ||
53 | /* flags used only as arguments to access routines */ | 53 | /* flags used only as arguments to access routines */ |
54 | #define XBF_TRYLOCK (1 << 16)/* lock requested, but do not wait */ | 54 | #define XBF_TRYLOCK (1 << 16)/* lock requested, but do not wait */ |
55 | #define XBF_UNMAPPED (1 << 17)/* do not map the buffer */ | 55 | #define XBF_UNMAPPED (1 << 17)/* do not map the buffer */ |
56 | 56 | ||
57 | /* flags used only internally */ | 57 | /* flags used only internally */ |
58 | #define _XBF_PAGES (1 << 20)/* backed by refcounted pages */ | 58 | #define _XBF_PAGES (1 << 20)/* backed by refcounted pages */ |
59 | #define _XBF_KMEM (1 << 21)/* backed by heap memory */ | 59 | #define _XBF_KMEM (1 << 21)/* backed by heap memory */ |
60 | #define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */ | 60 | #define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */ |
61 | #define _XBF_COMPOUND (1 << 23)/* compound buffer */ | 61 | #define _XBF_COMPOUND (1 << 23)/* compound buffer */ |
62 | #define _XBF_LRU_DISPOSE (1 << 24)/* buffer being discarded */ | ||
62 | 63 | ||
63 | typedef unsigned int xfs_buf_flags_t; | 64 | typedef unsigned int xfs_buf_flags_t; |
64 | 65 | ||
@@ -72,12 +73,13 @@ typedef unsigned int xfs_buf_flags_t; | |||
72 | { XBF_SYNCIO, "SYNCIO" }, \ | 73 | { XBF_SYNCIO, "SYNCIO" }, \ |
73 | { XBF_FUA, "FUA" }, \ | 74 | { XBF_FUA, "FUA" }, \ |
74 | { XBF_FLUSH, "FLUSH" }, \ | 75 | { XBF_FLUSH, "FLUSH" }, \ |
75 | { XBF_TRYLOCK, "TRYLOCK" }, /* should never be set */\ | 76 | { XBF_TRYLOCK, "TRYLOCK" }, /* should never be set */\ |
76 | { XBF_UNMAPPED, "UNMAPPED" }, /* ditto */\ | 77 | { XBF_UNMAPPED, "UNMAPPED" }, /* ditto */\ |
77 | { _XBF_PAGES, "PAGES" }, \ | 78 | { _XBF_PAGES, "PAGES" }, \ |
78 | { _XBF_KMEM, "KMEM" }, \ | 79 | { _XBF_KMEM, "KMEM" }, \ |
79 | { _XBF_DELWRI_Q, "DELWRI_Q" }, \ | 80 | { _XBF_DELWRI_Q, "DELWRI_Q" }, \ |
80 | { _XBF_COMPOUND, "COMPOUND" } | 81 | { _XBF_COMPOUND, "COMPOUND" }, \ |
82 | { _XBF_LRU_DISPOSE, "LRU_DISPOSE" } | ||
81 | 83 | ||
82 | typedef struct xfs_buftarg { | 84 | typedef struct xfs_buftarg { |
83 | dev_t bt_dev; | 85 | dev_t bt_dev; |
@@ -124,7 +126,12 @@ typedef struct xfs_buf { | |||
124 | xfs_buf_flags_t b_flags; /* status flags */ | 126 | xfs_buf_flags_t b_flags; /* status flags */ |
125 | struct semaphore b_sema; /* semaphore for lockables */ | 127 | struct semaphore b_sema; /* semaphore for lockables */ |
126 | 128 | ||
129 | /* | ||
130 | * concurrent access to b_lru and b_lru_flags are protected by | ||
131 | * bt_lru_lock and not by b_sema | ||
132 | */ | ||
127 | struct list_head b_lru; /* lru list */ | 133 | struct list_head b_lru; /* lru list */ |
134 | xfs_buf_flags_t b_lru_flags; /* internal lru status flags */ | ||
128 | wait_queue_head_t b_waiters; /* unpin waiters */ | 135 | wait_queue_head_t b_waiters; /* unpin waiters */ |
129 | struct list_head b_list; | 136 | struct list_head b_list; |
130 | struct xfs_perag *b_pag; /* contains rbtree root */ | 137 | struct xfs_perag *b_pag; /* contains rbtree root */ |
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index e00de08dc8ac..b9b8646e62db 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c | |||
@@ -48,44 +48,44 @@ xfs_swapext( | |||
48 | xfs_swapext_t *sxp) | 48 | xfs_swapext_t *sxp) |
49 | { | 49 | { |
50 | xfs_inode_t *ip, *tip; | 50 | xfs_inode_t *ip, *tip; |
51 | struct file *file, *tmp_file; | 51 | struct fd f, tmp; |
52 | int error = 0; | 52 | int error = 0; |
53 | 53 | ||
54 | /* Pull information for the target fd */ | 54 | /* Pull information for the target fd */ |
55 | file = fget((int)sxp->sx_fdtarget); | 55 | f = fdget((int)sxp->sx_fdtarget); |
56 | if (!file) { | 56 | if (!f.file) { |
57 | error = XFS_ERROR(EINVAL); | 57 | error = XFS_ERROR(EINVAL); |
58 | goto out; | 58 | goto out; |
59 | } | 59 | } |
60 | 60 | ||
61 | if (!(file->f_mode & FMODE_WRITE) || | 61 | if (!(f.file->f_mode & FMODE_WRITE) || |
62 | !(file->f_mode & FMODE_READ) || | 62 | !(f.file->f_mode & FMODE_READ) || |
63 | (file->f_flags & O_APPEND)) { | 63 | (f.file->f_flags & O_APPEND)) { |
64 | error = XFS_ERROR(EBADF); | 64 | error = XFS_ERROR(EBADF); |
65 | goto out_put_file; | 65 | goto out_put_file; |
66 | } | 66 | } |
67 | 67 | ||
68 | tmp_file = fget((int)sxp->sx_fdtmp); | 68 | tmp = fdget((int)sxp->sx_fdtmp); |
69 | if (!tmp_file) { | 69 | if (!tmp.file) { |
70 | error = XFS_ERROR(EINVAL); | 70 | error = XFS_ERROR(EINVAL); |
71 | goto out_put_file; | 71 | goto out_put_file; |
72 | } | 72 | } |
73 | 73 | ||
74 | if (!(tmp_file->f_mode & FMODE_WRITE) || | 74 | if (!(tmp.file->f_mode & FMODE_WRITE) || |
75 | !(tmp_file->f_mode & FMODE_READ) || | 75 | !(tmp.file->f_mode & FMODE_READ) || |
76 | (tmp_file->f_flags & O_APPEND)) { | 76 | (tmp.file->f_flags & O_APPEND)) { |
77 | error = XFS_ERROR(EBADF); | 77 | error = XFS_ERROR(EBADF); |
78 | goto out_put_tmp_file; | 78 | goto out_put_tmp_file; |
79 | } | 79 | } |
80 | 80 | ||
81 | if (IS_SWAPFILE(file->f_path.dentry->d_inode) || | 81 | if (IS_SWAPFILE(f.file->f_path.dentry->d_inode) || |
82 | IS_SWAPFILE(tmp_file->f_path.dentry->d_inode)) { | 82 | IS_SWAPFILE(tmp.file->f_path.dentry->d_inode)) { |
83 | error = XFS_ERROR(EINVAL); | 83 | error = XFS_ERROR(EINVAL); |
84 | goto out_put_tmp_file; | 84 | goto out_put_tmp_file; |
85 | } | 85 | } |
86 | 86 | ||
87 | ip = XFS_I(file->f_path.dentry->d_inode); | 87 | ip = XFS_I(f.file->f_path.dentry->d_inode); |
88 | tip = XFS_I(tmp_file->f_path.dentry->d_inode); | 88 | tip = XFS_I(tmp.file->f_path.dentry->d_inode); |
89 | 89 | ||
90 | if (ip->i_mount != tip->i_mount) { | 90 | if (ip->i_mount != tip->i_mount) { |
91 | error = XFS_ERROR(EINVAL); | 91 | error = XFS_ERROR(EINVAL); |
@@ -105,9 +105,9 @@ xfs_swapext( | |||
105 | error = xfs_swap_extents(ip, tip, sxp); | 105 | error = xfs_swap_extents(ip, tip, sxp); |
106 | 106 | ||
107 | out_put_tmp_file: | 107 | out_put_tmp_file: |
108 | fput(tmp_file); | 108 | fdput(tmp); |
109 | out_put_file: | 109 | out_put_file: |
110 | fput(file); | 110 | fdput(f); |
111 | out: | 111 | out: |
112 | return error; | 112 | return error; |
113 | } | 113 | } |
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index f9c3fe304a17..69cf4fcde03e 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c | |||
@@ -179,12 +179,14 @@ xfs_ioc_trim( | |||
179 | * used by the fstrim application. In the end it really doesn't | 179 | * used by the fstrim application. In the end it really doesn't |
180 | * matter as trimming blocks is an advisory interface. | 180 | * matter as trimming blocks is an advisory interface. |
181 | */ | 181 | */ |
182 | if (range.start >= XFS_FSB_TO_B(mp, mp->m_sb.sb_dblocks) || | ||
183 | range.minlen > XFS_FSB_TO_B(mp, XFS_ALLOC_AG_MAX_USABLE(mp))) | ||
184 | return -XFS_ERROR(EINVAL); | ||
185 | |||
182 | start = BTOBB(range.start); | 186 | start = BTOBB(range.start); |
183 | end = start + BTOBBT(range.len) - 1; | 187 | end = start + BTOBBT(range.len) - 1; |
184 | minlen = BTOBB(max_t(u64, granularity, range.minlen)); | 188 | minlen = BTOBB(max_t(u64, granularity, range.minlen)); |
185 | 189 | ||
186 | if (XFS_BB_TO_FSB(mp, start) >= mp->m_sb.sb_dblocks) | ||
187 | return -XFS_ERROR(EINVAL); | ||
188 | if (end > XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) - 1) | 190 | if (end > XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) - 1) |
189 | end = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)- 1; | 191 | end = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)- 1; |
190 | 192 | ||
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 56afcdb2377d..aa473fa640a2 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c | |||
@@ -36,6 +36,7 @@ | |||
36 | 36 | ||
37 | #include <linux/dcache.h> | 37 | #include <linux/dcache.h> |
38 | #include <linux/falloc.h> | 38 | #include <linux/falloc.h> |
39 | #include <linux/pagevec.h> | ||
39 | 40 | ||
40 | static const struct vm_operations_struct xfs_file_vm_ops; | 41 | static const struct vm_operations_struct xfs_file_vm_ops; |
41 | 42 | ||
@@ -939,7 +940,6 @@ xfs_file_mmap( | |||
939 | struct vm_area_struct *vma) | 940 | struct vm_area_struct *vma) |
940 | { | 941 | { |
941 | vma->vm_ops = &xfs_file_vm_ops; | 942 | vma->vm_ops = &xfs_file_vm_ops; |
942 | vma->vm_flags |= VM_CAN_NONLINEAR; | ||
943 | 943 | ||
944 | file_accessed(filp); | 944 | file_accessed(filp); |
945 | return 0; | 945 | return 0; |
@@ -959,17 +959,232 @@ xfs_vm_page_mkwrite( | |||
959 | return block_page_mkwrite(vma, vmf, xfs_get_blocks); | 959 | return block_page_mkwrite(vma, vmf, xfs_get_blocks); |
960 | } | 960 | } |
961 | 961 | ||
962 | /* | ||
963 | * This type is designed to indicate the type of offset we would like | ||
964 | * to search from page cache for either xfs_seek_data() or xfs_seek_hole(). | ||
965 | */ | ||
966 | enum { | ||
967 | HOLE_OFF = 0, | ||
968 | DATA_OFF, | ||
969 | }; | ||
970 | |||
971 | /* | ||
972 | * Lookup the desired type of offset from the given page. | ||
973 | * | ||
974 | * On success, return true and the offset argument will point to the | ||
975 | * start of the region that was found. Otherwise this function will | ||
976 | * return false and keep the offset argument unchanged. | ||
977 | */ | ||
978 | STATIC bool | ||
979 | xfs_lookup_buffer_offset( | ||
980 | struct page *page, | ||
981 | loff_t *offset, | ||
982 | unsigned int type) | ||
983 | { | ||
984 | loff_t lastoff = page_offset(page); | ||
985 | bool found = false; | ||
986 | struct buffer_head *bh, *head; | ||
987 | |||
988 | bh = head = page_buffers(page); | ||
989 | do { | ||
990 | /* | ||
991 | * Unwritten extents that have data in the page | ||
992 | * cache covering them can be identified by the | ||
993 | * BH_Unwritten state flag. Pages with multiple | ||
994 | * buffers might have a mix of holes, data and | ||
995 | * unwritten extents - any buffer with valid | ||
996 | * data in it should have BH_Uptodate flag set | ||
997 | * on it. | ||
998 | */ | ||
999 | if (buffer_unwritten(bh) || | ||
1000 | buffer_uptodate(bh)) { | ||
1001 | if (type == DATA_OFF) | ||
1002 | found = true; | ||
1003 | } else { | ||
1004 | if (type == HOLE_OFF) | ||
1005 | found = true; | ||
1006 | } | ||
1007 | |||
1008 | if (found) { | ||
1009 | *offset = lastoff; | ||
1010 | break; | ||
1011 | } | ||
1012 | lastoff += bh->b_size; | ||
1013 | } while ((bh = bh->b_this_page) != head); | ||
1014 | |||
1015 | return found; | ||
1016 | } | ||
1017 | |||
1018 | /* | ||
1019 | * This routine is called to find out and return a data or hole offset | ||
1020 | * from the page cache for unwritten extents according to the desired | ||
1021 | * type for xfs_seek_data() or xfs_seek_hole(). | ||
1022 | * | ||
1023 | * The argument offset is used to tell where we start to search from the | ||
1024 | * page cache. Map is used to figure out the end points of the range to | ||
1025 | * lookup pages. | ||
1026 | * | ||
1027 | * Return true if the desired type of offset was found, and the argument | ||
1028 | * offset is filled with that address. Otherwise, return false and keep | ||
1029 | * offset unchanged. | ||
1030 | */ | ||
1031 | STATIC bool | ||
1032 | xfs_find_get_desired_pgoff( | ||
1033 | struct inode *inode, | ||
1034 | struct xfs_bmbt_irec *map, | ||
1035 | unsigned int type, | ||
1036 | loff_t *offset) | ||
1037 | { | ||
1038 | struct xfs_inode *ip = XFS_I(inode); | ||
1039 | struct xfs_mount *mp = ip->i_mount; | ||
1040 | struct pagevec pvec; | ||
1041 | pgoff_t index; | ||
1042 | pgoff_t end; | ||
1043 | loff_t endoff; | ||
1044 | loff_t startoff = *offset; | ||
1045 | loff_t lastoff = startoff; | ||
1046 | bool found = false; | ||
1047 | |||
1048 | pagevec_init(&pvec, 0); | ||
1049 | |||
1050 | index = startoff >> PAGE_CACHE_SHIFT; | ||
1051 | endoff = XFS_FSB_TO_B(mp, map->br_startoff + map->br_blockcount); | ||
1052 | end = endoff >> PAGE_CACHE_SHIFT; | ||
1053 | do { | ||
1054 | int want; | ||
1055 | unsigned nr_pages; | ||
1056 | unsigned int i; | ||
1057 | |||
1058 | want = min_t(pgoff_t, end - index, PAGEVEC_SIZE); | ||
1059 | nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index, | ||
1060 | want); | ||
1061 | /* | ||
1062 | * No page mapped into given range. If we are searching holes | ||
1063 | * and if this is the first time we got into the loop, it means | ||
1064 | * that the given offset is landed in a hole, return it. | ||
1065 | * | ||
1066 | * If we have already stepped through some block buffers to find | ||
1067 | * holes but they all contains data. In this case, the last | ||
1068 | * offset is already updated and pointed to the end of the last | ||
1069 | * mapped page, if it does not reach the endpoint to search, | ||
1070 | * that means there should be a hole between them. | ||
1071 | */ | ||
1072 | if (nr_pages == 0) { | ||
1073 | /* Data search found nothing */ | ||
1074 | if (type == DATA_OFF) | ||
1075 | break; | ||
1076 | |||
1077 | ASSERT(type == HOLE_OFF); | ||
1078 | if (lastoff == startoff || lastoff < endoff) { | ||
1079 | found = true; | ||
1080 | *offset = lastoff; | ||
1081 | } | ||
1082 | break; | ||
1083 | } | ||
1084 | |||
1085 | /* | ||
1086 | * At lease we found one page. If this is the first time we | ||
1087 | * step into the loop, and if the first page index offset is | ||
1088 | * greater than the given search offset, a hole was found. | ||
1089 | */ | ||
1090 | if (type == HOLE_OFF && lastoff == startoff && | ||
1091 | lastoff < page_offset(pvec.pages[0])) { | ||
1092 | found = true; | ||
1093 | break; | ||
1094 | } | ||
1095 | |||
1096 | for (i = 0; i < nr_pages; i++) { | ||
1097 | struct page *page = pvec.pages[i]; | ||
1098 | loff_t b_offset; | ||
1099 | |||
1100 | /* | ||
1101 | * At this point, the page may be truncated or | ||
1102 | * invalidated (changing page->mapping to NULL), | ||
1103 | * or even swizzled back from swapper_space to tmpfs | ||
1104 | * file mapping. However, page->index will not change | ||
1105 | * because we have a reference on the page. | ||
1106 | * | ||
1107 | * Searching done if the page index is out of range. | ||
1108 | * If the current offset is not reaches the end of | ||
1109 | * the specified search range, there should be a hole | ||
1110 | * between them. | ||
1111 | */ | ||
1112 | if (page->index > end) { | ||
1113 | if (type == HOLE_OFF && lastoff < endoff) { | ||
1114 | *offset = lastoff; | ||
1115 | found = true; | ||
1116 | } | ||
1117 | goto out; | ||
1118 | } | ||
1119 | |||
1120 | lock_page(page); | ||
1121 | /* | ||
1122 | * Page truncated or invalidated(page->mapping == NULL). | ||
1123 | * We can freely skip it and proceed to check the next | ||
1124 | * page. | ||
1125 | */ | ||
1126 | if (unlikely(page->mapping != inode->i_mapping)) { | ||
1127 | unlock_page(page); | ||
1128 | continue; | ||
1129 | } | ||
1130 | |||
1131 | if (!page_has_buffers(page)) { | ||
1132 | unlock_page(page); | ||
1133 | continue; | ||
1134 | } | ||
1135 | |||
1136 | found = xfs_lookup_buffer_offset(page, &b_offset, type); | ||
1137 | if (found) { | ||
1138 | /* | ||
1139 | * The found offset may be less than the start | ||
1140 | * point to search if this is the first time to | ||
1141 | * come here. | ||
1142 | */ | ||
1143 | *offset = max_t(loff_t, startoff, b_offset); | ||
1144 | unlock_page(page); | ||
1145 | goto out; | ||
1146 | } | ||
1147 | |||
1148 | /* | ||
1149 | * We either searching data but nothing was found, or | ||
1150 | * searching hole but found a data buffer. In either | ||
1151 | * case, probably the next page contains the desired | ||
1152 | * things, update the last offset to it so. | ||
1153 | */ | ||
1154 | lastoff = page_offset(page) + PAGE_SIZE; | ||
1155 | unlock_page(page); | ||
1156 | } | ||
1157 | |||
1158 | /* | ||
1159 | * The number of returned pages less than our desired, search | ||
1160 | * done. In this case, nothing was found for searching data, | ||
1161 | * but we found a hole behind the last offset. | ||
1162 | */ | ||
1163 | if (nr_pages < want) { | ||
1164 | if (type == HOLE_OFF) { | ||
1165 | *offset = lastoff; | ||
1166 | found = true; | ||
1167 | } | ||
1168 | break; | ||
1169 | } | ||
1170 | |||
1171 | index = pvec.pages[i - 1]->index + 1; | ||
1172 | pagevec_release(&pvec); | ||
1173 | } while (index <= end); | ||
1174 | |||
1175 | out: | ||
1176 | pagevec_release(&pvec); | ||
1177 | return found; | ||
1178 | } | ||
1179 | |||
962 | STATIC loff_t | 1180 | STATIC loff_t |
963 | xfs_seek_data( | 1181 | xfs_seek_data( |
964 | struct file *file, | 1182 | struct file *file, |
965 | loff_t start, | 1183 | loff_t start) |
966 | u32 type) | ||
967 | { | 1184 | { |
968 | struct inode *inode = file->f_mapping->host; | 1185 | struct inode *inode = file->f_mapping->host; |
969 | struct xfs_inode *ip = XFS_I(inode); | 1186 | struct xfs_inode *ip = XFS_I(inode); |
970 | struct xfs_mount *mp = ip->i_mount; | 1187 | struct xfs_mount *mp = ip->i_mount; |
971 | struct xfs_bmbt_irec map[2]; | ||
972 | int nmap = 2; | ||
973 | loff_t uninitialized_var(offset); | 1188 | loff_t uninitialized_var(offset); |
974 | xfs_fsize_t isize; | 1189 | xfs_fsize_t isize; |
975 | xfs_fileoff_t fsbno; | 1190 | xfs_fileoff_t fsbno; |
@@ -985,36 +1200,74 @@ xfs_seek_data( | |||
985 | goto out_unlock; | 1200 | goto out_unlock; |
986 | } | 1201 | } |
987 | 1202 | ||
988 | fsbno = XFS_B_TO_FSBT(mp, start); | ||
989 | |||
990 | /* | 1203 | /* |
991 | * Try to read extents from the first block indicated | 1204 | * Try to read extents from the first block indicated |
992 | * by fsbno to the end block of the file. | 1205 | * by fsbno to the end block of the file. |
993 | */ | 1206 | */ |
1207 | fsbno = XFS_B_TO_FSBT(mp, start); | ||
994 | end = XFS_B_TO_FSB(mp, isize); | 1208 | end = XFS_B_TO_FSB(mp, isize); |
1209 | for (;;) { | ||
1210 | struct xfs_bmbt_irec map[2]; | ||
1211 | int nmap = 2; | ||
1212 | unsigned int i; | ||
995 | 1213 | ||
996 | error = xfs_bmapi_read(ip, fsbno, end - fsbno, map, &nmap, | 1214 | error = xfs_bmapi_read(ip, fsbno, end - fsbno, map, &nmap, |
997 | XFS_BMAPI_ENTIRE); | 1215 | XFS_BMAPI_ENTIRE); |
998 | if (error) | 1216 | if (error) |
999 | goto out_unlock; | 1217 | goto out_unlock; |
1000 | 1218 | ||
1001 | /* | 1219 | /* No extents at given offset, must be beyond EOF */ |
1002 | * Treat unwritten extent as data extent since it might | 1220 | if (nmap == 0) { |
1003 | * contains dirty data in page cache. | 1221 | error = ENXIO; |
1004 | */ | 1222 | goto out_unlock; |
1005 | if (map[0].br_startblock != HOLESTARTBLOCK) { | 1223 | } |
1006 | offset = max_t(loff_t, start, | 1224 | |
1007 | XFS_FSB_TO_B(mp, map[0].br_startoff)); | 1225 | for (i = 0; i < nmap; i++) { |
1008 | } else { | 1226 | offset = max_t(loff_t, start, |
1227 | XFS_FSB_TO_B(mp, map[i].br_startoff)); | ||
1228 | |||
1229 | /* Landed in a data extent */ | ||
1230 | if (map[i].br_startblock == DELAYSTARTBLOCK || | ||
1231 | (map[i].br_state == XFS_EXT_NORM && | ||
1232 | !isnullstartblock(map[i].br_startblock))) | ||
1233 | goto out; | ||
1234 | |||
1235 | /* | ||
1236 | * Landed in an unwritten extent, try to search data | ||
1237 | * from page cache. | ||
1238 | */ | ||
1239 | if (map[i].br_state == XFS_EXT_UNWRITTEN) { | ||
1240 | if (xfs_find_get_desired_pgoff(inode, &map[i], | ||
1241 | DATA_OFF, &offset)) | ||
1242 | goto out; | ||
1243 | } | ||
1244 | } | ||
1245 | |||
1246 | /* | ||
1247 | * map[0] is hole or its an unwritten extent but | ||
1248 | * without data in page cache. Probably means that | ||
1249 | * we are reading after EOF if nothing in map[1]. | ||
1250 | */ | ||
1009 | if (nmap == 1) { | 1251 | if (nmap == 1) { |
1010 | error = ENXIO; | 1252 | error = ENXIO; |
1011 | goto out_unlock; | 1253 | goto out_unlock; |
1012 | } | 1254 | } |
1013 | 1255 | ||
1014 | offset = max_t(loff_t, start, | 1256 | ASSERT(i > 1); |
1015 | XFS_FSB_TO_B(mp, map[1].br_startoff)); | 1257 | |
1258 | /* | ||
1259 | * Nothing was found, proceed to the next round of search | ||
1260 | * if reading offset not beyond or hit EOF. | ||
1261 | */ | ||
1262 | fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount; | ||
1263 | start = XFS_FSB_TO_B(mp, fsbno); | ||
1264 | if (start >= isize) { | ||
1265 | error = ENXIO; | ||
1266 | goto out_unlock; | ||
1267 | } | ||
1016 | } | 1268 | } |
1017 | 1269 | ||
1270 | out: | ||
1018 | if (offset != file->f_pos) | 1271 | if (offset != file->f_pos) |
1019 | file->f_pos = offset; | 1272 | file->f_pos = offset; |
1020 | 1273 | ||
@@ -1029,16 +1282,15 @@ out_unlock: | |||
1029 | STATIC loff_t | 1282 | STATIC loff_t |
1030 | xfs_seek_hole( | 1283 | xfs_seek_hole( |
1031 | struct file *file, | 1284 | struct file *file, |
1032 | loff_t start, | 1285 | loff_t start) |
1033 | u32 type) | ||
1034 | { | 1286 | { |
1035 | struct inode *inode = file->f_mapping->host; | 1287 | struct inode *inode = file->f_mapping->host; |
1036 | struct xfs_inode *ip = XFS_I(inode); | 1288 | struct xfs_inode *ip = XFS_I(inode); |
1037 | struct xfs_mount *mp = ip->i_mount; | 1289 | struct xfs_mount *mp = ip->i_mount; |
1038 | loff_t uninitialized_var(offset); | 1290 | loff_t uninitialized_var(offset); |
1039 | loff_t holeoff; | ||
1040 | xfs_fsize_t isize; | 1291 | xfs_fsize_t isize; |
1041 | xfs_fileoff_t fsbno; | 1292 | xfs_fileoff_t fsbno; |
1293 | xfs_filblks_t end; | ||
1042 | uint lock; | 1294 | uint lock; |
1043 | int error; | 1295 | int error; |
1044 | 1296 | ||
@@ -1054,21 +1306,77 @@ xfs_seek_hole( | |||
1054 | } | 1306 | } |
1055 | 1307 | ||
1056 | fsbno = XFS_B_TO_FSBT(mp, start); | 1308 | fsbno = XFS_B_TO_FSBT(mp, start); |
1057 | error = xfs_bmap_first_unused(NULL, ip, 1, &fsbno, XFS_DATA_FORK); | 1309 | end = XFS_B_TO_FSB(mp, isize); |
1058 | if (error) | 1310 | |
1059 | goto out_unlock; | 1311 | for (;;) { |
1312 | struct xfs_bmbt_irec map[2]; | ||
1313 | int nmap = 2; | ||
1314 | unsigned int i; | ||
1315 | |||
1316 | error = xfs_bmapi_read(ip, fsbno, end - fsbno, map, &nmap, | ||
1317 | XFS_BMAPI_ENTIRE); | ||
1318 | if (error) | ||
1319 | goto out_unlock; | ||
1320 | |||
1321 | /* No extents at given offset, must be beyond EOF */ | ||
1322 | if (nmap == 0) { | ||
1323 | error = ENXIO; | ||
1324 | goto out_unlock; | ||
1325 | } | ||
1326 | |||
1327 | for (i = 0; i < nmap; i++) { | ||
1328 | offset = max_t(loff_t, start, | ||
1329 | XFS_FSB_TO_B(mp, map[i].br_startoff)); | ||
1330 | |||
1331 | /* Landed in a hole */ | ||
1332 | if (map[i].br_startblock == HOLESTARTBLOCK) | ||
1333 | goto out; | ||
1334 | |||
1335 | /* | ||
1336 | * Landed in an unwritten extent, try to search hole | ||
1337 | * from page cache. | ||
1338 | */ | ||
1339 | if (map[i].br_state == XFS_EXT_UNWRITTEN) { | ||
1340 | if (xfs_find_get_desired_pgoff(inode, &map[i], | ||
1341 | HOLE_OFF, &offset)) | ||
1342 | goto out; | ||
1343 | } | ||
1344 | } | ||
1345 | |||
1346 | /* | ||
1347 | * map[0] contains data or its unwritten but contains | ||
1348 | * data in page cache, probably means that we are | ||
1349 | * reading after EOF. We should fix offset to point | ||
1350 | * to the end of the file(i.e., there is an implicit | ||
1351 | * hole at the end of any file). | ||
1352 | */ | ||
1353 | if (nmap == 1) { | ||
1354 | offset = isize; | ||
1355 | break; | ||
1356 | } | ||
1357 | |||
1358 | ASSERT(i > 1); | ||
1060 | 1359 | ||
1061 | holeoff = XFS_FSB_TO_B(mp, fsbno); | ||
1062 | if (holeoff <= start) | ||
1063 | offset = start; | ||
1064 | else { | ||
1065 | /* | 1360 | /* |
1066 | * xfs_bmap_first_unused() could return a value bigger than | 1361 | * Both mappings contains data, proceed to the next round of |
1067 | * isize if there are no more holes past the supplied offset. | 1362 | * search if the current reading offset not beyond or hit EOF. |
1068 | */ | 1363 | */ |
1069 | offset = min_t(loff_t, holeoff, isize); | 1364 | fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount; |
1365 | start = XFS_FSB_TO_B(mp, fsbno); | ||
1366 | if (start >= isize) { | ||
1367 | offset = isize; | ||
1368 | break; | ||
1369 | } | ||
1070 | } | 1370 | } |
1071 | 1371 | ||
1372 | out: | ||
1373 | /* | ||
1374 | * At this point, we must have found a hole. However, the returned | ||
1375 | * offset may be bigger than the file size as it may be aligned to | ||
1376 | * page boundary for unwritten extents, we need to deal with this | ||
1377 | * situation in particular. | ||
1378 | */ | ||
1379 | offset = min_t(loff_t, offset, isize); | ||
1072 | if (offset != file->f_pos) | 1380 | if (offset != file->f_pos) |
1073 | file->f_pos = offset; | 1381 | file->f_pos = offset; |
1074 | 1382 | ||
@@ -1092,9 +1400,9 @@ xfs_file_llseek( | |||
1092 | case SEEK_SET: | 1400 | case SEEK_SET: |
1093 | return generic_file_llseek(file, offset, origin); | 1401 | return generic_file_llseek(file, offset, origin); |
1094 | case SEEK_DATA: | 1402 | case SEEK_DATA: |
1095 | return xfs_seek_data(file, offset, origin); | 1403 | return xfs_seek_data(file, offset); |
1096 | case SEEK_HOLE: | 1404 | case SEEK_HOLE: |
1097 | return xfs_seek_hole(file, offset, origin); | 1405 | return xfs_seek_hole(file, offset); |
1098 | default: | 1406 | default: |
1099 | return -EINVAL; | 1407 | return -EINVAL; |
1100 | } | 1408 | } |
@@ -1134,4 +1442,5 @@ const struct file_operations xfs_dir_file_operations = { | |||
1134 | static const struct vm_operations_struct xfs_file_vm_ops = { | 1442 | static const struct vm_operations_struct xfs_file_vm_ops = { |
1135 | .fault = filemap_fault, | 1443 | .fault = filemap_fault, |
1136 | .page_mkwrite = xfs_vm_page_mkwrite, | 1444 | .page_mkwrite = xfs_vm_page_mkwrite, |
1445 | .remap_pages = generic_file_remap_pages, | ||
1137 | }; | 1446 | }; |
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 21e37b55f7e5..445bf1aef31c 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c | |||
@@ -431,7 +431,7 @@ xfs_ialloc_next_ag( | |||
431 | 431 | ||
432 | spin_lock(&mp->m_agirotor_lock); | 432 | spin_lock(&mp->m_agirotor_lock); |
433 | agno = mp->m_agirotor; | 433 | agno = mp->m_agirotor; |
434 | if (++mp->m_agirotor == mp->m_maxagi) | 434 | if (++mp->m_agirotor >= mp->m_maxagi) |
435 | mp->m_agirotor = 0; | 435 | mp->m_agirotor = 0; |
436 | spin_unlock(&mp->m_agirotor_lock); | 436 | spin_unlock(&mp->m_agirotor_lock); |
437 | 437 | ||
@@ -962,23 +962,22 @@ xfs_dialloc( | |||
962 | if (!pag->pagi_freecount && !okalloc) | 962 | if (!pag->pagi_freecount && !okalloc) |
963 | goto nextag; | 963 | goto nextag; |
964 | 964 | ||
965 | /* | ||
966 | * Then read in the AGI buffer and recheck with the AGI buffer | ||
967 | * lock held. | ||
968 | */ | ||
965 | error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); | 969 | error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); |
966 | if (error) | 970 | if (error) |
967 | goto out_error; | 971 | goto out_error; |
968 | 972 | ||
969 | /* | ||
970 | * Once the AGI has been read in we have to recheck | ||
971 | * pagi_freecount with the AGI buffer lock held. | ||
972 | */ | ||
973 | if (pag->pagi_freecount) { | 973 | if (pag->pagi_freecount) { |
974 | xfs_perag_put(pag); | 974 | xfs_perag_put(pag); |
975 | goto out_alloc; | 975 | goto out_alloc; |
976 | } | 976 | } |
977 | 977 | ||
978 | if (!okalloc) { | 978 | if (!okalloc) |
979 | xfs_trans_brelse(tp, agbp); | 979 | goto nextag_relse_buffer; |
980 | goto nextag; | 980 | |
981 | } | ||
982 | 981 | ||
983 | error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced); | 982 | error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced); |
984 | if (error) { | 983 | if (error) { |
@@ -1007,6 +1006,8 @@ xfs_dialloc( | |||
1007 | return 0; | 1006 | return 0; |
1008 | } | 1007 | } |
1009 | 1008 | ||
1009 | nextag_relse_buffer: | ||
1010 | xfs_trans_brelse(tp, agbp); | ||
1010 | nextag: | 1011 | nextag: |
1011 | xfs_perag_put(pag); | 1012 | xfs_perag_put(pag); |
1012 | if (++agno == mp->m_sb.sb_agcount) | 1013 | if (++agno == mp->m_sb.sb_agcount) |
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 0e0232c3b6d9..8305f2ac6773 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c | |||
@@ -70,16 +70,16 @@ xfs_find_handle( | |||
70 | int hsize; | 70 | int hsize; |
71 | xfs_handle_t handle; | 71 | xfs_handle_t handle; |
72 | struct inode *inode; | 72 | struct inode *inode; |
73 | struct file *file = NULL; | 73 | struct fd f; |
74 | struct path path; | 74 | struct path path; |
75 | int error; | 75 | int error; |
76 | struct xfs_inode *ip; | 76 | struct xfs_inode *ip; |
77 | 77 | ||
78 | if (cmd == XFS_IOC_FD_TO_HANDLE) { | 78 | if (cmd == XFS_IOC_FD_TO_HANDLE) { |
79 | file = fget(hreq->fd); | 79 | f = fdget(hreq->fd); |
80 | if (!file) | 80 | if (!f.file) |
81 | return -EBADF; | 81 | return -EBADF; |
82 | inode = file->f_path.dentry->d_inode; | 82 | inode = f.file->f_path.dentry->d_inode; |
83 | } else { | 83 | } else { |
84 | error = user_lpath((const char __user *)hreq->path, &path); | 84 | error = user_lpath((const char __user *)hreq->path, &path); |
85 | if (error) | 85 | if (error) |
@@ -134,7 +134,7 @@ xfs_find_handle( | |||
134 | 134 | ||
135 | out_put: | 135 | out_put: |
136 | if (cmd == XFS_IOC_FD_TO_HANDLE) | 136 | if (cmd == XFS_IOC_FD_TO_HANDLE) |
137 | fput(file); | 137 | fdput(f); |
138 | else | 138 | else |
139 | path_put(&path); | 139 | path_put(&path); |
140 | return error; | 140 | return error; |
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 29c2f83d4147..b2bd3a0e6376 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c | |||
@@ -440,7 +440,7 @@ xfs_initialize_perag( | |||
440 | xfs_agnumber_t agcount, | 440 | xfs_agnumber_t agcount, |
441 | xfs_agnumber_t *maxagi) | 441 | xfs_agnumber_t *maxagi) |
442 | { | 442 | { |
443 | xfs_agnumber_t index, max_metadata; | 443 | xfs_agnumber_t index; |
444 | xfs_agnumber_t first_initialised = 0; | 444 | xfs_agnumber_t first_initialised = 0; |
445 | xfs_perag_t *pag; | 445 | xfs_perag_t *pag; |
446 | xfs_agino_t agino; | 446 | xfs_agino_t agino; |
@@ -500,43 +500,10 @@ xfs_initialize_perag( | |||
500 | else | 500 | else |
501 | mp->m_flags &= ~XFS_MOUNT_32BITINODES; | 501 | mp->m_flags &= ~XFS_MOUNT_32BITINODES; |
502 | 502 | ||
503 | if (mp->m_flags & XFS_MOUNT_32BITINODES) { | 503 | if (mp->m_flags & XFS_MOUNT_32BITINODES) |
504 | /* | 504 | index = xfs_set_inode32(mp); |
505 | * Calculate how much should be reserved for inodes to meet | 505 | else |
506 | * the max inode percentage. | 506 | index = xfs_set_inode64(mp); |
507 | */ | ||
508 | if (mp->m_maxicount) { | ||
509 | __uint64_t icount; | ||
510 | |||
511 | icount = sbp->sb_dblocks * sbp->sb_imax_pct; | ||
512 | do_div(icount, 100); | ||
513 | icount += sbp->sb_agblocks - 1; | ||
514 | do_div(icount, sbp->sb_agblocks); | ||
515 | max_metadata = icount; | ||
516 | } else { | ||
517 | max_metadata = agcount; | ||
518 | } | ||
519 | |||
520 | for (index = 0; index < agcount; index++) { | ||
521 | ino = XFS_AGINO_TO_INO(mp, index, agino); | ||
522 | if (ino > XFS_MAXINUMBER_32) { | ||
523 | index++; | ||
524 | break; | ||
525 | } | ||
526 | |||
527 | pag = xfs_perag_get(mp, index); | ||
528 | pag->pagi_inodeok = 1; | ||
529 | if (index < max_metadata) | ||
530 | pag->pagf_metadata = 1; | ||
531 | xfs_perag_put(pag); | ||
532 | } | ||
533 | } else { | ||
534 | for (index = 0; index < agcount; index++) { | ||
535 | pag = xfs_perag_get(mp, index); | ||
536 | pag->pagi_inodeok = 1; | ||
537 | xfs_perag_put(pag); | ||
538 | } | ||
539 | } | ||
540 | 507 | ||
541 | if (maxagi) | 508 | if (maxagi) |
542 | *maxagi = index; | 509 | *maxagi = index; |
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 05a05a7b6119..deee09e534dc 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h | |||
@@ -54,12 +54,7 @@ typedef struct xfs_trans_reservations { | |||
54 | #include "xfs_sync.h" | 54 | #include "xfs_sync.h" |
55 | 55 | ||
56 | struct xlog; | 56 | struct xlog; |
57 | struct xfs_mount_args; | ||
58 | struct xfs_inode; | 57 | struct xfs_inode; |
59 | struct xfs_bmbt_irec; | ||
60 | struct xfs_bmap_free; | ||
61 | struct xfs_extdelta; | ||
62 | struct xfs_swapext; | ||
63 | struct xfs_mru_cache; | 58 | struct xfs_mru_cache; |
64 | struct xfs_nameops; | 59 | struct xfs_nameops; |
65 | struct xfs_ail; | 60 | struct xfs_ail; |
diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c index fed504fc2999..71926d630527 100644 --- a/fs/xfs/xfs_quotaops.c +++ b/fs/xfs/xfs_quotaops.c | |||
@@ -97,8 +97,7 @@ xfs_fs_set_xstate( | |||
97 | STATIC int | 97 | STATIC int |
98 | xfs_fs_get_dqblk( | 98 | xfs_fs_get_dqblk( |
99 | struct super_block *sb, | 99 | struct super_block *sb, |
100 | int type, | 100 | struct kqid qid, |
101 | qid_t id, | ||
102 | struct fs_disk_quota *fdq) | 101 | struct fs_disk_quota *fdq) |
103 | { | 102 | { |
104 | struct xfs_mount *mp = XFS_M(sb); | 103 | struct xfs_mount *mp = XFS_M(sb); |
@@ -108,14 +107,14 @@ xfs_fs_get_dqblk( | |||
108 | if (!XFS_IS_QUOTA_ON(mp)) | 107 | if (!XFS_IS_QUOTA_ON(mp)) |
109 | return -ESRCH; | 108 | return -ESRCH; |
110 | 109 | ||
111 | return -xfs_qm_scall_getquota(mp, id, xfs_quota_type(type), fdq); | 110 | return -xfs_qm_scall_getquota(mp, from_kqid(&init_user_ns, qid), |
111 | xfs_quota_type(qid.type), fdq); | ||
112 | } | 112 | } |
113 | 113 | ||
114 | STATIC int | 114 | STATIC int |
115 | xfs_fs_set_dqblk( | 115 | xfs_fs_set_dqblk( |
116 | struct super_block *sb, | 116 | struct super_block *sb, |
117 | int type, | 117 | struct kqid qid, |
118 | qid_t id, | ||
119 | struct fs_disk_quota *fdq) | 118 | struct fs_disk_quota *fdq) |
120 | { | 119 | { |
121 | struct xfs_mount *mp = XFS_M(sb); | 120 | struct xfs_mount *mp = XFS_M(sb); |
@@ -127,7 +126,8 @@ xfs_fs_set_dqblk( | |||
127 | if (!XFS_IS_QUOTA_ON(mp)) | 126 | if (!XFS_IS_QUOTA_ON(mp)) |
128 | return -ESRCH; | 127 | return -ESRCH; |
129 | 128 | ||
130 | return -xfs_qm_scall_setqlim(mp, id, xfs_quota_type(type), fdq); | 129 | return -xfs_qm_scall_setqlim(mp, from_kqid(&init_user_ns, qid), |
130 | xfs_quota_type(qid.type), fdq); | ||
131 | } | 131 | } |
132 | 132 | ||
133 | const struct quotactl_ops xfs_quotactl_operations = { | 133 | const struct quotactl_ops xfs_quotactl_operations = { |
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 92d4331cd4f1..ca28a4ba4b54 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c | |||
@@ -857,7 +857,7 @@ xfs_rtbuf_get( | |||
857 | xfs_buf_t *bp; /* block buffer, result */ | 857 | xfs_buf_t *bp; /* block buffer, result */ |
858 | xfs_inode_t *ip; /* bitmap or summary inode */ | 858 | xfs_inode_t *ip; /* bitmap or summary inode */ |
859 | xfs_bmbt_irec_t map; | 859 | xfs_bmbt_irec_t map; |
860 | int nmap; | 860 | int nmap = 1; |
861 | int error; /* error value */ | 861 | int error; /* error value */ |
862 | 862 | ||
863 | ip = issum ? mp->m_rsumip : mp->m_rbmip; | 863 | ip = issum ? mp->m_rsumip : mp->m_rbmip; |
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index bdaf4cb9f4a2..26a09bd7f975 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c | |||
@@ -88,6 +88,8 @@ mempool_t *xfs_ioend_pool; | |||
88 | * unwritten extent conversion */ | 88 | * unwritten extent conversion */ |
89 | #define MNTOPT_NOBARRIER "nobarrier" /* .. disable */ | 89 | #define MNTOPT_NOBARRIER "nobarrier" /* .. disable */ |
90 | #define MNTOPT_64BITINODE "inode64" /* inodes can be allocated anywhere */ | 90 | #define MNTOPT_64BITINODE "inode64" /* inodes can be allocated anywhere */ |
91 | #define MNTOPT_32BITINODE "inode32" /* inode allocation limited to | ||
92 | * XFS_MAXINUMBER_32 */ | ||
91 | #define MNTOPT_IKEEP "ikeep" /* do not free empty inode clusters */ | 93 | #define MNTOPT_IKEEP "ikeep" /* do not free empty inode clusters */ |
92 | #define MNTOPT_NOIKEEP "noikeep" /* free empty inode clusters */ | 94 | #define MNTOPT_NOIKEEP "noikeep" /* free empty inode clusters */ |
93 | #define MNTOPT_LARGEIO "largeio" /* report large I/O sizes in stat() */ | 95 | #define MNTOPT_LARGEIO "largeio" /* report large I/O sizes in stat() */ |
@@ -120,12 +122,18 @@ mempool_t *xfs_ioend_pool; | |||
120 | * in the future, too. | 122 | * in the future, too. |
121 | */ | 123 | */ |
122 | enum { | 124 | enum { |
123 | Opt_barrier, Opt_nobarrier, Opt_err | 125 | Opt_barrier, |
126 | Opt_nobarrier, | ||
127 | Opt_inode64, | ||
128 | Opt_inode32, | ||
129 | Opt_err | ||
124 | }; | 130 | }; |
125 | 131 | ||
126 | static const match_table_t tokens = { | 132 | static const match_table_t tokens = { |
127 | {Opt_barrier, "barrier"}, | 133 | {Opt_barrier, "barrier"}, |
128 | {Opt_nobarrier, "nobarrier"}, | 134 | {Opt_nobarrier, "nobarrier"}, |
135 | {Opt_inode64, "inode64"}, | ||
136 | {Opt_inode32, "inode32"}, | ||
129 | {Opt_err, NULL} | 137 | {Opt_err, NULL} |
130 | }; | 138 | }; |
131 | 139 | ||
@@ -197,7 +205,9 @@ xfs_parseargs( | |||
197 | */ | 205 | */ |
198 | mp->m_flags |= XFS_MOUNT_BARRIER; | 206 | mp->m_flags |= XFS_MOUNT_BARRIER; |
199 | mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; | 207 | mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; |
208 | #if !XFS_BIG_INUMS | ||
200 | mp->m_flags |= XFS_MOUNT_SMALL_INUMS; | 209 | mp->m_flags |= XFS_MOUNT_SMALL_INUMS; |
210 | #endif | ||
201 | 211 | ||
202 | /* | 212 | /* |
203 | * These can be overridden by the mount option parsing. | 213 | * These can be overridden by the mount option parsing. |
@@ -294,6 +304,8 @@ xfs_parseargs( | |||
294 | return EINVAL; | 304 | return EINVAL; |
295 | } | 305 | } |
296 | dswidth = simple_strtoul(value, &eov, 10); | 306 | dswidth = simple_strtoul(value, &eov, 10); |
307 | } else if (!strcmp(this_char, MNTOPT_32BITINODE)) { | ||
308 | mp->m_flags |= XFS_MOUNT_SMALL_INUMS; | ||
297 | } else if (!strcmp(this_char, MNTOPT_64BITINODE)) { | 309 | } else if (!strcmp(this_char, MNTOPT_64BITINODE)) { |
298 | mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS; | 310 | mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS; |
299 | #if !XFS_BIG_INUMS | 311 | #if !XFS_BIG_INUMS |
@@ -492,6 +504,7 @@ xfs_showargs( | |||
492 | { XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM }, | 504 | { XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM }, |
493 | { XFS_MOUNT_GRPID, "," MNTOPT_GRPID }, | 505 | { XFS_MOUNT_GRPID, "," MNTOPT_GRPID }, |
494 | { XFS_MOUNT_DISCARD, "," MNTOPT_DISCARD }, | 506 | { XFS_MOUNT_DISCARD, "," MNTOPT_DISCARD }, |
507 | { XFS_MOUNT_SMALL_INUMS, "," MNTOPT_32BITINODE }, | ||
495 | { 0, NULL } | 508 | { 0, NULL } |
496 | }; | 509 | }; |
497 | static struct proc_xfs_info xfs_info_unset[] = { | 510 | static struct proc_xfs_info xfs_info_unset[] = { |
@@ -591,6 +604,80 @@ xfs_max_file_offset( | |||
591 | return (((__uint64_t)pagefactor) << bitshift) - 1; | 604 | return (((__uint64_t)pagefactor) << bitshift) - 1; |
592 | } | 605 | } |
593 | 606 | ||
607 | xfs_agnumber_t | ||
608 | xfs_set_inode32(struct xfs_mount *mp) | ||
609 | { | ||
610 | xfs_agnumber_t index = 0; | ||
611 | xfs_agnumber_t maxagi = 0; | ||
612 | xfs_sb_t *sbp = &mp->m_sb; | ||
613 | xfs_agnumber_t max_metadata; | ||
614 | xfs_agino_t agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks -1, 0); | ||
615 | xfs_ino_t ino = XFS_AGINO_TO_INO(mp, sbp->sb_agcount -1, agino); | ||
616 | xfs_perag_t *pag; | ||
617 | |||
618 | /* Calculate how much should be reserved for inodes to meet | ||
619 | * the max inode percentage. | ||
620 | */ | ||
621 | if (mp->m_maxicount) { | ||
622 | __uint64_t icount; | ||
623 | |||
624 | icount = sbp->sb_dblocks * sbp->sb_imax_pct; | ||
625 | do_div(icount, 100); | ||
626 | icount += sbp->sb_agblocks - 1; | ||
627 | do_div(icount, sbp->sb_agblocks); | ||
628 | max_metadata = icount; | ||
629 | } else { | ||
630 | max_metadata = sbp->sb_agcount; | ||
631 | } | ||
632 | |||
633 | for (index = 0; index < sbp->sb_agcount; index++) { | ||
634 | ino = XFS_AGINO_TO_INO(mp, index, agino); | ||
635 | |||
636 | if (ino > XFS_MAXINUMBER_32) { | ||
637 | pag = xfs_perag_get(mp, index); | ||
638 | pag->pagi_inodeok = 0; | ||
639 | pag->pagf_metadata = 0; | ||
640 | xfs_perag_put(pag); | ||
641 | continue; | ||
642 | } | ||
643 | |||
644 | pag = xfs_perag_get(mp, index); | ||
645 | pag->pagi_inodeok = 1; | ||
646 | maxagi++; | ||
647 | if (index < max_metadata) | ||
648 | pag->pagf_metadata = 1; | ||
649 | xfs_perag_put(pag); | ||
650 | } | ||
651 | mp->m_flags |= (XFS_MOUNT_32BITINODES | | ||
652 | XFS_MOUNT_SMALL_INUMS); | ||
653 | |||
654 | return maxagi; | ||
655 | } | ||
656 | |||
657 | xfs_agnumber_t | ||
658 | xfs_set_inode64(struct xfs_mount *mp) | ||
659 | { | ||
660 | xfs_agnumber_t index = 0; | ||
661 | |||
662 | for (index = 0; index < mp->m_sb.sb_agcount; index++) { | ||
663 | struct xfs_perag *pag; | ||
664 | |||
665 | pag = xfs_perag_get(mp, index); | ||
666 | pag->pagi_inodeok = 1; | ||
667 | pag->pagf_metadata = 0; | ||
668 | xfs_perag_put(pag); | ||
669 | } | ||
670 | |||
671 | /* There is no need for lock protection on m_flags, | ||
672 | * the rw_semaphore of the VFS superblock is locked | ||
673 | * during mount/umount/remount operations, so this is | ||
674 | * enough to avoid concurency on the m_flags field | ||
675 | */ | ||
676 | mp->m_flags &= ~(XFS_MOUNT_32BITINODES | | ||
677 | XFS_MOUNT_SMALL_INUMS); | ||
678 | return index; | ||
679 | } | ||
680 | |||
594 | STATIC int | 681 | STATIC int |
595 | xfs_blkdev_get( | 682 | xfs_blkdev_get( |
596 | xfs_mount_t *mp, | 683 | xfs_mount_t *mp, |
@@ -919,6 +1006,7 @@ xfs_fs_put_super( | |||
919 | struct xfs_mount *mp = XFS_M(sb); | 1006 | struct xfs_mount *mp = XFS_M(sb); |
920 | 1007 | ||
921 | xfs_filestream_unmount(mp); | 1008 | xfs_filestream_unmount(mp); |
1009 | cancel_delayed_work_sync(&mp->m_sync_work); | ||
922 | xfs_unmountfs(mp); | 1010 | xfs_unmountfs(mp); |
923 | xfs_syncd_stop(mp); | 1011 | xfs_syncd_stop(mp); |
924 | xfs_freesb(mp); | 1012 | xfs_freesb(mp); |
@@ -953,7 +1041,7 @@ xfs_fs_sync_fs( | |||
953 | * We schedule xfssyncd now (now that the disk is | 1041 | * We schedule xfssyncd now (now that the disk is |
954 | * active) instead of later (when it might not be). | 1042 | * active) instead of later (when it might not be). |
955 | */ | 1043 | */ |
956 | flush_delayed_work_sync(&mp->m_sync_work); | 1044 | flush_delayed_work(&mp->m_sync_work); |
957 | } | 1045 | } |
958 | 1046 | ||
959 | return 0; | 1047 | return 0; |
@@ -1055,6 +1143,12 @@ xfs_fs_remount( | |||
1055 | case Opt_nobarrier: | 1143 | case Opt_nobarrier: |
1056 | mp->m_flags &= ~XFS_MOUNT_BARRIER; | 1144 | mp->m_flags &= ~XFS_MOUNT_BARRIER; |
1057 | break; | 1145 | break; |
1146 | case Opt_inode64: | ||
1147 | mp->m_maxagi = xfs_set_inode64(mp); | ||
1148 | break; | ||
1149 | case Opt_inode32: | ||
1150 | mp->m_maxagi = xfs_set_inode32(mp); | ||
1151 | break; | ||
1058 | default: | 1152 | default: |
1059 | /* | 1153 | /* |
1060 | * Logically we would return an error here to prevent | 1154 | * Logically we would return an error here to prevent |
@@ -1505,6 +1599,11 @@ xfs_init_zones(void) | |||
1505 | STATIC void | 1599 | STATIC void |
1506 | xfs_destroy_zones(void) | 1600 | xfs_destroy_zones(void) |
1507 | { | 1601 | { |
1602 | /* | ||
1603 | * Make sure all delayed rcu free are flushed before we | ||
1604 | * destroy caches. | ||
1605 | */ | ||
1606 | rcu_barrier(); | ||
1508 | kmem_zone_destroy(xfs_ili_zone); | 1607 | kmem_zone_destroy(xfs_ili_zone); |
1509 | kmem_zone_destroy(xfs_inode_zone); | 1608 | kmem_zone_destroy(xfs_inode_zone); |
1510 | kmem_zone_destroy(xfs_efi_zone); | 1609 | kmem_zone_destroy(xfs_efi_zone); |
diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h index 09b0c26b2245..9de4a920ba05 100644 --- a/fs/xfs/xfs_super.h +++ b/fs/xfs/xfs_super.h | |||
@@ -75,6 +75,8 @@ struct block_device; | |||
75 | extern __uint64_t xfs_max_file_offset(unsigned int); | 75 | extern __uint64_t xfs_max_file_offset(unsigned int); |
76 | 76 | ||
77 | extern void xfs_blkdev_issue_flush(struct xfs_buftarg *); | 77 | extern void xfs_blkdev_issue_flush(struct xfs_buftarg *); |
78 | extern xfs_agnumber_t xfs_set_inode32(struct xfs_mount *); | ||
79 | extern xfs_agnumber_t xfs_set_inode64(struct xfs_mount *); | ||
78 | 80 | ||
79 | extern const struct export_operations xfs_export_operations; | 81 | extern const struct export_operations xfs_export_operations; |
80 | extern const struct xattr_handler *xfs_xattr_handlers[]; | 82 | extern const struct xattr_handler *xfs_xattr_handlers[]; |
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c index 96548176db80..9500caf15acf 100644 --- a/fs/xfs/xfs_sync.c +++ b/fs/xfs/xfs_sync.c | |||
@@ -475,7 +475,7 @@ xfs_flush_inodes( | |||
475 | struct xfs_mount *mp = ip->i_mount; | 475 | struct xfs_mount *mp = ip->i_mount; |
476 | 476 | ||
477 | queue_work(xfs_syncd_wq, &mp->m_flush_work); | 477 | queue_work(xfs_syncd_wq, &mp->m_flush_work); |
478 | flush_work_sync(&mp->m_flush_work); | 478 | flush_work(&mp->m_flush_work); |
479 | } | 479 | } |
480 | 480 | ||
481 | STATIC void | 481 | STATIC void |
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index e5795dd6013a..7d36ccf57f93 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h | |||
@@ -37,6 +37,7 @@ struct xlog_recover; | |||
37 | struct xlog_recover_item; | 37 | struct xlog_recover_item; |
38 | struct xfs_buf_log_format; | 38 | struct xfs_buf_log_format; |
39 | struct xfs_inode_log_format; | 39 | struct xfs_inode_log_format; |
40 | struct xfs_bmbt_irec; | ||
40 | 41 | ||
41 | DECLARE_EVENT_CLASS(xfs_attr_list_class, | 42 | DECLARE_EVENT_CLASS(xfs_attr_list_class, |
42 | TP_PROTO(struct xfs_attr_list_context *ctx), | 43 | TP_PROTO(struct xfs_attr_list_context *ctx), |
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c index bcb60542fcf1..0c7fa54f309e 100644 --- a/fs/xfs/xfs_trans_dquot.c +++ b/fs/xfs/xfs_trans_dquot.c | |||
@@ -578,9 +578,11 @@ xfs_quota_warn( | |||
578 | /* no warnings for project quotas - we just return ENOSPC later */ | 578 | /* no warnings for project quotas - we just return ENOSPC later */ |
579 | if (dqp->dq_flags & XFS_DQ_PROJ) | 579 | if (dqp->dq_flags & XFS_DQ_PROJ) |
580 | return; | 580 | return; |
581 | quota_send_warning((dqp->dq_flags & XFS_DQ_USER) ? USRQUOTA : GRPQUOTA, | 581 | quota_send_warning(make_kqid(&init_user_ns, |
582 | be32_to_cpu(dqp->q_core.d_id), mp->m_super->s_dev, | 582 | (dqp->dq_flags & XFS_DQ_USER) ? |
583 | type); | 583 | USRQUOTA : GRPQUOTA, |
584 | be32_to_cpu(dqp->q_core.d_id)), | ||
585 | mp->m_super->s_dev, type); | ||
584 | } | 586 | } |
585 | 587 | ||
586 | /* | 588 | /* |