aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_inode.c41
-rw-r--r--fs/9p/vfs_super.c3
-rw-r--r--fs/affs/affs.h2
-rw-r--r--fs/affs/namei.c7
-rw-r--r--fs/affs/super.c31
-rw-r--r--fs/affs/symlink.c7
-rw-r--r--fs/bfs/inode.c43
-rw-r--r--fs/binfmt_aout.c1
-rw-r--r--fs/binfmt_elf.c27
-rw-r--r--fs/binfmt_elf_fdpic.c3
-rw-r--r--fs/binfmt_flat.c1
-rw-r--r--fs/binfmt_som.c1
-rw-r--r--fs/bio.c2
-rw-r--r--fs/btrfs/acl.c13
-rw-r--r--fs/btrfs/ctree.h1
-rw-r--r--fs/btrfs/disk-io.c6
-rw-r--r--fs/btrfs/extent-tree.c32
-rw-r--r--fs/btrfs/extent_map.c14
-rw-r--r--fs/btrfs/file.c100
-rw-r--r--fs/btrfs/inode.c22
-rw-r--r--fs/btrfs/ordered-data.c2
-rw-r--r--fs/btrfs/relocation.c4
-rw-r--r--fs/btrfs/super.c9
-rw-r--r--fs/btrfs/volumes.c17
-rw-r--r--fs/cifs/cifs_dfs_ref.c3
-rw-r--r--fs/compat_ioctl.c3
-rw-r--r--fs/configfs/symlink.c4
-rw-r--r--fs/debugfs/inode.c11
-rw-r--r--fs/ecryptfs/crypto.c4
-rw-r--r--fs/ecryptfs/file.c17
-rw-r--r--fs/ecryptfs/inode.c158
-rw-r--r--fs/ecryptfs/main.c4
-rw-r--r--fs/eventfd.c89
-rw-r--r--fs/exec.c36
-rw-r--r--fs/ext4/ext4.h9
-rw-r--r--fs/ext4/extents.c21
-rw-r--r--fs/ext4/inode.c82
-rw-r--r--fs/fcntl.c108
-rw-r--r--fs/hppfs/hppfs.c18
-rw-r--r--fs/namei.c23
-rw-r--r--fs/namespace.c14
-rw-r--r--fs/notify/inotify/inotify_fsnotify.c2
-rw-r--r--fs/notify/inotify/inotify_user.c4
-rw-r--r--fs/proc/base.c1
-rw-r--r--fs/ramfs/file-nommu.c26
-rw-r--r--fs/reiserfs/journal.c2
-rw-r--r--fs/romfs/super.c1
-rw-r--r--fs/ubifs/gc.c96
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c14
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c183
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h2
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c2
-rw-r--r--fs/xfs/xfs_dfrag.c106
-rw-r--r--fs/xfs/xfs_iget.c1
-rw-r--r--fs/xfs/xfs_inode.c21
-rw-r--r--fs/xfs/xfs_rtalloc.c2
56 files changed, 836 insertions, 620 deletions
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 18f74ec4dce9..9d03d1ebca6f 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -1001,44 +1001,6 @@ done:
1001} 1001}
1002 1002
1003/** 1003/**
1004 * v9fs_vfs_readlink - read a symlink's location
1005 * @dentry: dentry for symlink
1006 * @buffer: buffer to load symlink location into
1007 * @buflen: length of buffer
1008 *
1009 */
1010
1011static int v9fs_vfs_readlink(struct dentry *dentry, char __user * buffer,
1012 int buflen)
1013{
1014 int retval;
1015 int ret;
1016 char *link = __getname();
1017
1018 if (unlikely(!link))
1019 return -ENOMEM;
1020
1021 if (buflen > PATH_MAX)
1022 buflen = PATH_MAX;
1023
1024 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name,
1025 dentry);
1026
1027 retval = v9fs_readlink(dentry, link, buflen);
1028
1029 if (retval > 0) {
1030 if ((ret = copy_to_user(buffer, link, retval)) != 0) {
1031 P9_DPRINTK(P9_DEBUG_ERROR,
1032 "problem copying to user: %d\n", ret);
1033 retval = ret;
1034 }
1035 }
1036
1037 __putname(link);
1038 return retval;
1039}
1040
1041/**
1042 * v9fs_vfs_follow_link - follow a symlink path 1004 * v9fs_vfs_follow_link - follow a symlink path
1043 * @dentry: dentry for symlink 1005 * @dentry: dentry for symlink
1044 * @nd: nameidata 1006 * @nd: nameidata
@@ -1230,7 +1192,6 @@ static const struct inode_operations v9fs_dir_inode_operations_ext = {
1230 .rmdir = v9fs_vfs_rmdir, 1192 .rmdir = v9fs_vfs_rmdir,
1231 .mknod = v9fs_vfs_mknod, 1193 .mknod = v9fs_vfs_mknod,
1232 .rename = v9fs_vfs_rename, 1194 .rename = v9fs_vfs_rename,
1233 .readlink = v9fs_vfs_readlink,
1234 .getattr = v9fs_vfs_getattr, 1195 .getattr = v9fs_vfs_getattr,
1235 .setattr = v9fs_vfs_setattr, 1196 .setattr = v9fs_vfs_setattr,
1236}; 1197};
@@ -1253,7 +1214,7 @@ static const struct inode_operations v9fs_file_inode_operations = {
1253}; 1214};
1254 1215
1255static const struct inode_operations v9fs_symlink_inode_operations = { 1216static const struct inode_operations v9fs_symlink_inode_operations = {
1256 .readlink = v9fs_vfs_readlink, 1217 .readlink = generic_readlink,
1257 .follow_link = v9fs_vfs_follow_link, 1218 .follow_link = v9fs_vfs_follow_link,
1258 .put_link = v9fs_vfs_put_link, 1219 .put_link = v9fs_vfs_put_link,
1259 .getattr = v9fs_vfs_getattr, 1220 .getattr = v9fs_vfs_getattr,
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 14a86448572c..69357c0d9899 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -188,7 +188,8 @@ static void v9fs_kill_super(struct super_block *s)
188 188
189 P9_DPRINTK(P9_DEBUG_VFS, " %p\n", s); 189 P9_DPRINTK(P9_DEBUG_VFS, " %p\n", s);
190 190
191 v9fs_dentry_release(s->s_root); /* clunk root */ 191 if (s->s_root)
192 v9fs_dentry_release(s->s_root); /* clunk root */
192 193
193 kill_anon_super(s); 194 kill_anon_super(s);
194 195
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index e511dc621a2e..0e40caaba456 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -106,8 +106,8 @@ struct affs_sb_info {
106 u32 s_last_bmap; 106 u32 s_last_bmap;
107 struct buffer_head *s_bmap_bh; 107 struct buffer_head *s_bmap_bh;
108 char *s_prefix; /* Prefix for volumes and assigns. */ 108 char *s_prefix; /* Prefix for volumes and assigns. */
109 int s_prefix_len; /* Length of prefix. */
110 char s_volume[32]; /* Volume prefix for absolute symlinks. */ 109 char s_volume[32]; /* Volume prefix for absolute symlinks. */
110 spinlock_t symlink_lock; /* protects the previous two */
111}; 111};
112 112
113#define SF_INTL 0x0001 /* International filesystem. */ 113#define SF_INTL 0x0001 /* International filesystem. */
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index 960d336ec694..d70bbbac6b7b 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -341,10 +341,13 @@ affs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
341 p = (char *)AFFS_HEAD(bh)->table; 341 p = (char *)AFFS_HEAD(bh)->table;
342 lc = '/'; 342 lc = '/';
343 if (*symname == '/') { 343 if (*symname == '/') {
344 struct affs_sb_info *sbi = AFFS_SB(sb);
344 while (*symname == '/') 345 while (*symname == '/')
345 symname++; 346 symname++;
346 while (AFFS_SB(sb)->s_volume[i]) /* Cannot overflow */ 347 spin_lock(&sbi->symlink_lock);
347 *p++ = AFFS_SB(sb)->s_volume[i++]; 348 while (sbi->s_volume[i]) /* Cannot overflow */
349 *p++ = sbi->s_volume[i++];
350 spin_unlock(&sbi->symlink_lock);
348 } 351 }
349 while (i < maxlen && (c = *symname++)) { 352 while (i < maxlen && (c = *symname++)) {
350 if (c == '.' && lc == '/' && *symname == '.' && symname[1] == '/') { 353 if (c == '.' && lc == '/' && *symname == '.' && symname[1] == '/') {
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 104fdcb3a7fc..d41e9673cd97 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -203,7 +203,7 @@ parse_options(char *options, uid_t *uid, gid_t *gid, int *mode, int *reserved, s
203 switch (token) { 203 switch (token) {
204 case Opt_bs: 204 case Opt_bs:
205 if (match_int(&args[0], &n)) 205 if (match_int(&args[0], &n))
206 return -EINVAL; 206 return 0;
207 if (n != 512 && n != 1024 && n != 2048 207 if (n != 512 && n != 1024 && n != 2048
208 && n != 4096) { 208 && n != 4096) {
209 printk ("AFFS: Invalid blocksize (512, 1024, 2048, 4096 allowed)\n"); 209 printk ("AFFS: Invalid blocksize (512, 1024, 2048, 4096 allowed)\n");
@@ -213,7 +213,7 @@ parse_options(char *options, uid_t *uid, gid_t *gid, int *mode, int *reserved, s
213 break; 213 break;
214 case Opt_mode: 214 case Opt_mode:
215 if (match_octal(&args[0], &option)) 215 if (match_octal(&args[0], &option))
216 return 1; 216 return 0;
217 *mode = option & 0777; 217 *mode = option & 0777;
218 *mount_opts |= SF_SETMODE; 218 *mount_opts |= SF_SETMODE;
219 break; 219 break;
@@ -221,8 +221,6 @@ parse_options(char *options, uid_t *uid, gid_t *gid, int *mode, int *reserved, s
221 *mount_opts |= SF_MUFS; 221 *mount_opts |= SF_MUFS;
222 break; 222 break;
223 case Opt_prefix: 223 case Opt_prefix:
224 /* Free any previous prefix */
225 kfree(*prefix);
226 *prefix = match_strdup(&args[0]); 224 *prefix = match_strdup(&args[0]);
227 if (!*prefix) 225 if (!*prefix)
228 return 0; 226 return 0;
@@ -233,21 +231,21 @@ parse_options(char *options, uid_t *uid, gid_t *gid, int *mode, int *reserved, s
233 break; 231 break;
234 case Opt_reserved: 232 case Opt_reserved:
235 if (match_int(&args[0], reserved)) 233 if (match_int(&args[0], reserved))
236 return 1; 234 return 0;
237 break; 235 break;
238 case Opt_root: 236 case Opt_root:
239 if (match_int(&args[0], root)) 237 if (match_int(&args[0], root))
240 return 1; 238 return 0;
241 break; 239 break;
242 case Opt_setgid: 240 case Opt_setgid:
243 if (match_int(&args[0], &option)) 241 if (match_int(&args[0], &option))
244 return 1; 242 return 0;
245 *gid = option; 243 *gid = option;
246 *mount_opts |= SF_SETGID; 244 *mount_opts |= SF_SETGID;
247 break; 245 break;
248 case Opt_setuid: 246 case Opt_setuid:
249 if (match_int(&args[0], &option)) 247 if (match_int(&args[0], &option))
250 return -EINVAL; 248 return 0;
251 *uid = option; 249 *uid = option;
252 *mount_opts |= SF_SETUID; 250 *mount_opts |= SF_SETUID;
253 break; 251 break;
@@ -311,11 +309,14 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent)
311 return -ENOMEM; 309 return -ENOMEM;
312 sb->s_fs_info = sbi; 310 sb->s_fs_info = sbi;
313 mutex_init(&sbi->s_bmlock); 311 mutex_init(&sbi->s_bmlock);
312 spin_lock_init(&sbi->symlink_lock);
314 313
315 if (!parse_options(data,&uid,&gid,&i,&reserved,&root_block, 314 if (!parse_options(data,&uid,&gid,&i,&reserved,&root_block,
316 &blocksize,&sbi->s_prefix, 315 &blocksize,&sbi->s_prefix,
317 sbi->s_volume, &mount_flags)) { 316 sbi->s_volume, &mount_flags)) {
318 printk(KERN_ERR "AFFS: Error parsing options\n"); 317 printk(KERN_ERR "AFFS: Error parsing options\n");
318 kfree(sbi->s_prefix);
319 kfree(sbi);
319 return -EINVAL; 320 return -EINVAL;
320 } 321 }
321 /* N.B. after this point s_prefix must be released */ 322 /* N.B. after this point s_prefix must be released */
@@ -516,14 +517,18 @@ affs_remount(struct super_block *sb, int *flags, char *data)
516 unsigned long mount_flags; 517 unsigned long mount_flags;
517 int res = 0; 518 int res = 0;
518 char *new_opts = kstrdup(data, GFP_KERNEL); 519 char *new_opts = kstrdup(data, GFP_KERNEL);
520 char volume[32];
521 char *prefix = NULL;
519 522
520 pr_debug("AFFS: remount(flags=0x%x,opts=\"%s\")\n",*flags,data); 523 pr_debug("AFFS: remount(flags=0x%x,opts=\"%s\")\n",*flags,data);
521 524
522 *flags |= MS_NODIRATIME; 525 *flags |= MS_NODIRATIME;
523 526
527 memcpy(volume, sbi->s_volume, 32);
524 if (!parse_options(data, &uid, &gid, &mode, &reserved, &root_block, 528 if (!parse_options(data, &uid, &gid, &mode, &reserved, &root_block,
525 &blocksize, &sbi->s_prefix, sbi->s_volume, 529 &blocksize, &prefix, volume,
526 &mount_flags)) { 530 &mount_flags)) {
531 kfree(prefix);
527 kfree(new_opts); 532 kfree(new_opts);
528 return -EINVAL; 533 return -EINVAL;
529 } 534 }
@@ -534,6 +539,14 @@ affs_remount(struct super_block *sb, int *flags, char *data)
534 sbi->s_mode = mode; 539 sbi->s_mode = mode;
535 sbi->s_uid = uid; 540 sbi->s_uid = uid;
536 sbi->s_gid = gid; 541 sbi->s_gid = gid;
542 /* protect against readers */
543 spin_lock(&sbi->symlink_lock);
544 if (prefix) {
545 kfree(sbi->s_prefix);
546 sbi->s_prefix = prefix;
547 }
548 memcpy(sbi->s_volume, volume, 32);
549 spin_unlock(&sbi->symlink_lock);
537 550
538 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) { 551 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) {
539 unlock_kernel(); 552 unlock_kernel();
diff --git a/fs/affs/symlink.c b/fs/affs/symlink.c
index 41782539c907..ee00f08c4f53 100644
--- a/fs/affs/symlink.c
+++ b/fs/affs/symlink.c
@@ -20,7 +20,6 @@ static int affs_symlink_readpage(struct file *file, struct page *page)
20 int i, j; 20 int i, j;
21 char c; 21 char c;
22 char lc; 22 char lc;
23 char *pf;
24 23
25 pr_debug("AFFS: follow_link(ino=%lu)\n",inode->i_ino); 24 pr_debug("AFFS: follow_link(ino=%lu)\n",inode->i_ino);
26 25
@@ -32,11 +31,15 @@ static int affs_symlink_readpage(struct file *file, struct page *page)
32 j = 0; 31 j = 0;
33 lf = (struct slink_front *)bh->b_data; 32 lf = (struct slink_front *)bh->b_data;
34 lc = 0; 33 lc = 0;
35 pf = AFFS_SB(inode->i_sb)->s_prefix ? AFFS_SB(inode->i_sb)->s_prefix : "/";
36 34
37 if (strchr(lf->symname,':')) { /* Handle assign or volume name */ 35 if (strchr(lf->symname,':')) { /* Handle assign or volume name */
36 struct affs_sb_info *sbi = AFFS_SB(inode->i_sb);
37 char *pf;
38 spin_lock(&sbi->symlink_lock);
39 pf = sbi->s_prefix ? sbi->s_prefix : "/";
38 while (i < 1023 && (c = pf[i])) 40 while (i < 1023 && (c = pf[i]))
39 link[i++] = c; 41 link[i++] = c;
42 spin_unlock(&sbi->symlink_lock);
40 while (i < 1023 && lf->symname[j] != ':') 43 while (i < 1023 && lf->symname[j] != ':')
41 link[i++] = lf->symname[j++]; 44 link[i++] = lf->symname[j++];
42 if (i < 1023) 45 if (i < 1023)
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 6f60336c6628..8f3d9fd89604 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -353,35 +353,35 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
353 struct inode *inode; 353 struct inode *inode;
354 unsigned i, imap_len; 354 unsigned i, imap_len;
355 struct bfs_sb_info *info; 355 struct bfs_sb_info *info;
356 long ret = -EINVAL; 356 int ret = -EINVAL;
357 unsigned long i_sblock, i_eblock, i_eoff, s_size; 357 unsigned long i_sblock, i_eblock, i_eoff, s_size;
358 358
359 info = kzalloc(sizeof(*info), GFP_KERNEL); 359 info = kzalloc(sizeof(*info), GFP_KERNEL);
360 if (!info) 360 if (!info)
361 return -ENOMEM; 361 return -ENOMEM;
362 mutex_init(&info->bfs_lock);
362 s->s_fs_info = info; 363 s->s_fs_info = info;
363 364
364 sb_set_blocksize(s, BFS_BSIZE); 365 sb_set_blocksize(s, BFS_BSIZE);
365 366
366 bh = sb_bread(s, 0); 367 info->si_sbh = sb_bread(s, 0);
367 if(!bh) 368 if (!info->si_sbh)
368 goto out; 369 goto out;
369 bfs_sb = (struct bfs_super_block *)bh->b_data; 370 bfs_sb = (struct bfs_super_block *)info->si_sbh->b_data;
370 if (le32_to_cpu(bfs_sb->s_magic) != BFS_MAGIC) { 371 if (le32_to_cpu(bfs_sb->s_magic) != BFS_MAGIC) {
371 if (!silent) 372 if (!silent)
372 printf("No BFS filesystem on %s (magic=%08x)\n", 373 printf("No BFS filesystem on %s (magic=%08x)\n",
373 s->s_id, le32_to_cpu(bfs_sb->s_magic)); 374 s->s_id, le32_to_cpu(bfs_sb->s_magic));
374 goto out; 375 goto out1;
375 } 376 }
376 if (BFS_UNCLEAN(bfs_sb, s) && !silent) 377 if (BFS_UNCLEAN(bfs_sb, s) && !silent)
377 printf("%s is unclean, continuing\n", s->s_id); 378 printf("%s is unclean, continuing\n", s->s_id);
378 379
379 s->s_magic = BFS_MAGIC; 380 s->s_magic = BFS_MAGIC;
380 info->si_sbh = bh;
381 381
382 if (le32_to_cpu(bfs_sb->s_start) > le32_to_cpu(bfs_sb->s_end)) { 382 if (le32_to_cpu(bfs_sb->s_start) > le32_to_cpu(bfs_sb->s_end)) {
383 printf("Superblock is corrupted\n"); 383 printf("Superblock is corrupted\n");
384 goto out; 384 goto out1;
385 } 385 }
386 386
387 info->si_lasti = (le32_to_cpu(bfs_sb->s_start) - BFS_BSIZE) / 387 info->si_lasti = (le32_to_cpu(bfs_sb->s_start) - BFS_BSIZE) /
@@ -390,7 +390,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
390 imap_len = (info->si_lasti / 8) + 1; 390 imap_len = (info->si_lasti / 8) + 1;
391 info->si_imap = kzalloc(imap_len, GFP_KERNEL); 391 info->si_imap = kzalloc(imap_len, GFP_KERNEL);
392 if (!info->si_imap) 392 if (!info->si_imap)
393 goto out; 393 goto out1;
394 for (i = 0; i < BFS_ROOT_INO; i++) 394 for (i = 0; i < BFS_ROOT_INO; i++)
395 set_bit(i, info->si_imap); 395 set_bit(i, info->si_imap);
396 396
@@ -398,15 +398,13 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
398 inode = bfs_iget(s, BFS_ROOT_INO); 398 inode = bfs_iget(s, BFS_ROOT_INO);
399 if (IS_ERR(inode)) { 399 if (IS_ERR(inode)) {
400 ret = PTR_ERR(inode); 400 ret = PTR_ERR(inode);
401 kfree(info->si_imap); 401 goto out2;
402 goto out;
403 } 402 }
404 s->s_root = d_alloc_root(inode); 403 s->s_root = d_alloc_root(inode);
405 if (!s->s_root) { 404 if (!s->s_root) {
406 iput(inode); 405 iput(inode);
407 ret = -ENOMEM; 406 ret = -ENOMEM;
408 kfree(info->si_imap); 407 goto out2;
409 goto out;
410 } 408 }
411 409
412 info->si_blocks = (le32_to_cpu(bfs_sb->s_end) + 1) >> BFS_BSIZE_BITS; 410 info->si_blocks = (le32_to_cpu(bfs_sb->s_end) + 1) >> BFS_BSIZE_BITS;
@@ -419,10 +417,8 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
419 bh = sb_bread(s, info->si_blocks - 1); 417 bh = sb_bread(s, info->si_blocks - 1);
420 if (!bh) { 418 if (!bh) {
421 printf("Last block not available: %lu\n", info->si_blocks - 1); 419 printf("Last block not available: %lu\n", info->si_blocks - 1);
422 iput(inode);
423 ret = -EIO; 420 ret = -EIO;
424 kfree(info->si_imap); 421 goto out3;
425 goto out;
426 } 422 }
427 brelse(bh); 423 brelse(bh);
428 424
@@ -459,11 +455,8 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
459 printf("Inode 0x%08x corrupted\n", i); 455 printf("Inode 0x%08x corrupted\n", i);
460 456
461 brelse(bh); 457 brelse(bh);
462 s->s_root = NULL; 458 ret = -EIO;
463 kfree(info->si_imap); 459 goto out3;
464 kfree(info);
465 s->s_fs_info = NULL;
466 return -EIO;
467 } 460 }
468 461
469 if (!di->i_ino) { 462 if (!di->i_ino) {
@@ -483,11 +476,17 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
483 s->s_dirt = 1; 476 s->s_dirt = 1;
484 } 477 }
485 dump_imap("read_super", s); 478 dump_imap("read_super", s);
486 mutex_init(&info->bfs_lock);
487 return 0; 479 return 0;
488 480
481out3:
482 dput(s->s_root);
483 s->s_root = NULL;
484out2:
485 kfree(info->si_imap);
486out1:
487 brelse(info->si_sbh);
489out: 488out:
490 brelse(bh); 489 mutex_destroy(&info->bfs_lock);
491 kfree(info); 490 kfree(info);
492 s->s_fs_info = NULL; 491 s->s_fs_info = NULL;
493 return ret; 492 return ret;
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index 346b69405363..fdd397099172 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -264,6 +264,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
264#else 264#else
265 set_personality(PER_LINUX); 265 set_personality(PER_LINUX);
266#endif 266#endif
267 setup_new_exec(bprm);
267 268
268 current->mm->end_code = ex.a_text + 269 current->mm->end_code = ex.a_text +
269 (current->mm->start_code = N_TXTADDR(ex)); 270 (current->mm->start_code = N_TXTADDR(ex));
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index edd90c49003c..fd5b2ea5d299 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -662,27 +662,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
662 if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0') 662 if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
663 goto out_free_interp; 663 goto out_free_interp;
664 664
665 /*
666 * The early SET_PERSONALITY here is so that the lookup
667 * for the interpreter happens in the namespace of the
668 * to-be-execed image. SET_PERSONALITY can select an
669 * alternate root.
670 *
671 * However, SET_PERSONALITY is NOT allowed to switch
672 * this task into the new images's memory mapping
673 * policy - that is, TASK_SIZE must still evaluate to
674 * that which is appropriate to the execing application.
675 * This is because exit_mmap() needs to have TASK_SIZE
676 * evaluate to the size of the old image.
677 *
678 * So if (say) a 64-bit application is execing a 32-bit
679 * application it is the architecture's responsibility
680 * to defer changing the value of TASK_SIZE until the
681 * switch really is going to happen - do this in
682 * flush_thread(). - akpm
683 */
684 SET_PERSONALITY(loc->elf_ex);
685
686 interpreter = open_exec(elf_interpreter); 665 interpreter = open_exec(elf_interpreter);
687 retval = PTR_ERR(interpreter); 666 retval = PTR_ERR(interpreter);
688 if (IS_ERR(interpreter)) 667 if (IS_ERR(interpreter))
@@ -730,9 +709,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
730 /* Verify the interpreter has a valid arch */ 709 /* Verify the interpreter has a valid arch */
731 if (!elf_check_arch(&loc->interp_elf_ex)) 710 if (!elf_check_arch(&loc->interp_elf_ex))
732 goto out_free_dentry; 711 goto out_free_dentry;
733 } else {
734 /* Executables without an interpreter also need a personality */
735 SET_PERSONALITY(loc->elf_ex);
736 } 712 }
737 713
738 /* Flush all traces of the currently running executable */ 714 /* Flush all traces of the currently running executable */
@@ -752,7 +728,8 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
752 728
753 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) 729 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
754 current->flags |= PF_RANDOMIZE; 730 current->flags |= PF_RANDOMIZE;
755 arch_pick_mmap_layout(current->mm); 731
732 setup_new_exec(bprm);
756 733
757 /* Do this so that we can load the interpreter, if need be. We will 734 /* Do this so that we can load the interpreter, if need be. We will
758 change some of these later */ 735 change some of these later */
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index c57d9ce5ff7e..18d77297ccc8 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -321,6 +321,9 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
321 set_personality(PER_LINUX_FDPIC); 321 set_personality(PER_LINUX_FDPIC);
322 if (elf_read_implies_exec(&exec_params.hdr, executable_stack)) 322 if (elf_read_implies_exec(&exec_params.hdr, executable_stack))
323 current->personality |= READ_IMPLIES_EXEC; 323 current->personality |= READ_IMPLIES_EXEC;
324
325 setup_new_exec(bprm);
326
324 set_binfmt(&elf_fdpic_format); 327 set_binfmt(&elf_fdpic_format);
325 328
326 current->mm->start_code = 0; 329 current->mm->start_code = 0;
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index d4a00ea1054c..42c6b4a54445 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -519,6 +519,7 @@ static int load_flat_file(struct linux_binprm * bprm,
519 519
520 /* OK, This is the point of no return */ 520 /* OK, This is the point of no return */
521 set_personality(PER_LINUX_32BIT); 521 set_personality(PER_LINUX_32BIT);
522 setup_new_exec(bprm);
522 } 523 }
523 524
524 /* 525 /*
diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c
index 2a9b5330cc5e..cc8560f6c9b0 100644
--- a/fs/binfmt_som.c
+++ b/fs/binfmt_som.c
@@ -227,6 +227,7 @@ load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs)
227 /* OK, This is the point of no return */ 227 /* OK, This is the point of no return */
228 current->flags &= ~PF_FORKNOEXEC; 228 current->flags &= ~PF_FORKNOEXEC;
229 current->personality = PER_HPUX; 229 current->personality = PER_HPUX;
230 setup_new_exec(bprm);
230 231
231 /* Set the task size for HP-UX processes such that 232 /* Set the task size for HP-UX processes such that
232 * the gateway page is outside the address space. 233 * the gateway page is outside the address space.
diff --git a/fs/bio.c b/fs/bio.c
index 76e6713abf94..12429c9553eb 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -78,7 +78,7 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
78 78
79 i = 0; 79 i = 0;
80 while (i < bio_slab_nr) { 80 while (i < bio_slab_nr) {
81 struct bio_slab *bslab = &bio_slabs[i]; 81 bslab = &bio_slabs[i];
82 82
83 if (!bslab->slab && entry == -1) 83 if (!bslab->slab && entry == -1)
84 entry = i; 84 entry = i;
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 2e9e69987a82..6df6d6ed74fd 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -112,12 +112,14 @@ static int btrfs_set_acl(struct btrfs_trans_handle *trans,
112 switch (type) { 112 switch (type) {
113 case ACL_TYPE_ACCESS: 113 case ACL_TYPE_ACCESS:
114 mode = inode->i_mode; 114 mode = inode->i_mode;
115 ret = posix_acl_equiv_mode(acl, &mode);
116 if (ret < 0)
117 return ret;
118 ret = 0;
119 inode->i_mode = mode;
120 name = POSIX_ACL_XATTR_ACCESS; 115 name = POSIX_ACL_XATTR_ACCESS;
116 if (acl) {
117 ret = posix_acl_equiv_mode(acl, &mode);
118 if (ret < 0)
119 return ret;
120 inode->i_mode = mode;
121 }
122 ret = 0;
121 break; 123 break;
122 case ACL_TYPE_DEFAULT: 124 case ACL_TYPE_DEFAULT:
123 if (!S_ISDIR(inode->i_mode)) 125 if (!S_ISDIR(inode->i_mode))
@@ -242,6 +244,7 @@ int btrfs_init_acl(struct btrfs_trans_handle *trans,
242 ACL_TYPE_ACCESS); 244 ACL_TYPE_ACCESS);
243 } 245 }
244 } 246 }
247 posix_acl_release(clone);
245 } 248 }
246failed: 249failed:
247 posix_acl_release(acl); 250 posix_acl_release(acl);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 9f806dd04c27..2aa8ec6a0981 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1161,6 +1161,7 @@ struct btrfs_root {
1161#define BTRFS_MOUNT_SSD_SPREAD (1 << 8) 1161#define BTRFS_MOUNT_SSD_SPREAD (1 << 8)
1162#define BTRFS_MOUNT_NOSSD (1 << 9) 1162#define BTRFS_MOUNT_NOSSD (1 << 9)
1163#define BTRFS_MOUNT_DISCARD (1 << 10) 1163#define BTRFS_MOUNT_DISCARD (1 << 10)
1164#define BTRFS_MOUNT_FORCE_COMPRESS (1 << 11)
1164 1165
1165#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) 1166#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
1166#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) 1167#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 009e3bd18f23..87b25543d7d1 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1993,6 +1993,12 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1993 if (!fs_info->fs_root) 1993 if (!fs_info->fs_root)
1994 goto fail_trans_kthread; 1994 goto fail_trans_kthread;
1995 1995
1996 if (!(sb->s_flags & MS_RDONLY)) {
1997 down_read(&fs_info->cleanup_work_sem);
1998 btrfs_orphan_cleanup(fs_info->fs_root);
1999 up_read(&fs_info->cleanup_work_sem);
2000 }
2001
1996 return tree_root; 2002 return tree_root;
1997 2003
1998fail_trans_kthread: 2004fail_trans_kthread:
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 56e50137d0e6..432a2da4641e 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -83,6 +83,17 @@ static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
83 return (cache->flags & bits) == bits; 83 return (cache->flags & bits) == bits;
84} 84}
85 85
86void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
87{
88 atomic_inc(&cache->count);
89}
90
91void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
92{
93 if (atomic_dec_and_test(&cache->count))
94 kfree(cache);
95}
96
86/* 97/*
87 * this adds the block group to the fs_info rb tree for the block group 98 * this adds the block group to the fs_info rb tree for the block group
88 * cache 99 * cache
@@ -156,7 +167,7 @@ block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
156 } 167 }
157 } 168 }
158 if (ret) 169 if (ret)
159 atomic_inc(&ret->count); 170 btrfs_get_block_group(ret);
160 spin_unlock(&info->block_group_cache_lock); 171 spin_unlock(&info->block_group_cache_lock);
161 172
162 return ret; 173 return ret;
@@ -407,6 +418,8 @@ err:
407 418
408 put_caching_control(caching_ctl); 419 put_caching_control(caching_ctl);
409 atomic_dec(&block_group->space_info->caching_threads); 420 atomic_dec(&block_group->space_info->caching_threads);
421 btrfs_put_block_group(block_group);
422
410 return 0; 423 return 0;
411} 424}
412 425
@@ -447,6 +460,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache)
447 up_write(&fs_info->extent_commit_sem); 460 up_write(&fs_info->extent_commit_sem);
448 461
449 atomic_inc(&cache->space_info->caching_threads); 462 atomic_inc(&cache->space_info->caching_threads);
463 btrfs_get_block_group(cache);
450 464
451 tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n", 465 tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n",
452 cache->key.objectid); 466 cache->key.objectid);
@@ -486,12 +500,6 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(
486 return cache; 500 return cache;
487} 501}
488 502
489void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
490{
491 if (atomic_dec_and_test(&cache->count))
492 kfree(cache);
493}
494
495static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, 503static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
496 u64 flags) 504 u64 flags)
497{ 505{
@@ -2582,7 +2590,7 @@ next_block_group(struct btrfs_root *root,
2582 if (node) { 2590 if (node) {
2583 cache = rb_entry(node, struct btrfs_block_group_cache, 2591 cache = rb_entry(node, struct btrfs_block_group_cache,
2584 cache_node); 2592 cache_node);
2585 atomic_inc(&cache->count); 2593 btrfs_get_block_group(cache);
2586 } else 2594 } else
2587 cache = NULL; 2595 cache = NULL;
2588 spin_unlock(&root->fs_info->block_group_cache_lock); 2596 spin_unlock(&root->fs_info->block_group_cache_lock);
@@ -4227,7 +4235,7 @@ search:
4227 u64 offset; 4235 u64 offset;
4228 int cached; 4236 int cached;
4229 4237
4230 atomic_inc(&block_group->count); 4238 btrfs_get_block_group(block_group);
4231 search_start = block_group->key.objectid; 4239 search_start = block_group->key.objectid;
4232 4240
4233have_block_group: 4241have_block_group:
@@ -4315,7 +4323,7 @@ have_block_group:
4315 4323
4316 btrfs_put_block_group(block_group); 4324 btrfs_put_block_group(block_group);
4317 block_group = last_ptr->block_group; 4325 block_group = last_ptr->block_group;
4318 atomic_inc(&block_group->count); 4326 btrfs_get_block_group(block_group);
4319 spin_unlock(&last_ptr->lock); 4327 spin_unlock(&last_ptr->lock);
4320 spin_unlock(&last_ptr->refill_lock); 4328 spin_unlock(&last_ptr->refill_lock);
4321 4329
@@ -7395,9 +7403,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
7395 wait_block_group_cache_done(block_group); 7403 wait_block_group_cache_done(block_group);
7396 7404
7397 btrfs_remove_free_space_cache(block_group); 7405 btrfs_remove_free_space_cache(block_group);
7398 7406 btrfs_put_block_group(block_group);
7399 WARN_ON(atomic_read(&block_group->count) != 1);
7400 kfree(block_group);
7401 7407
7402 spin_lock(&info->block_group_cache_lock); 7408 spin_lock(&info->block_group_cache_lock);
7403 } 7409 }
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 46bea0f4dc7b..428fcac45f90 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -155,20 +155,6 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset,
155 return NULL; 155 return NULL;
156} 156}
157 157
158/*
159 * look for an offset in the tree, and if it can't be found, return
160 * the first offset we can find smaller than 'offset'.
161 */
162static inline struct rb_node *tree_search(struct rb_root *root, u64 offset)
163{
164 struct rb_node *prev;
165 struct rb_node *ret;
166 ret = __tree_search(root, offset, &prev, NULL);
167 if (!ret)
168 return prev;
169 return ret;
170}
171
172/* check to see if two extent_map structs are adjacent and safe to merge */ 158/* check to see if two extent_map structs are adjacent and safe to merge */
173static int mergable_maps(struct extent_map *prev, struct extent_map *next) 159static int mergable_maps(struct extent_map *prev, struct extent_map *next)
174{ 160{
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index feaa13b105d9..c02033596f02 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -506,7 +506,8 @@ next_slot:
506} 506}
507 507
508static int extent_mergeable(struct extent_buffer *leaf, int slot, 508static int extent_mergeable(struct extent_buffer *leaf, int slot,
509 u64 objectid, u64 bytenr, u64 *start, u64 *end) 509 u64 objectid, u64 bytenr, u64 orig_offset,
510 u64 *start, u64 *end)
510{ 511{
511 struct btrfs_file_extent_item *fi; 512 struct btrfs_file_extent_item *fi;
512 struct btrfs_key key; 513 struct btrfs_key key;
@@ -522,6 +523,7 @@ static int extent_mergeable(struct extent_buffer *leaf, int slot,
522 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); 523 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
523 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG || 524 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG ||
524 btrfs_file_extent_disk_bytenr(leaf, fi) != bytenr || 525 btrfs_file_extent_disk_bytenr(leaf, fi) != bytenr ||
526 btrfs_file_extent_offset(leaf, fi) != key.offset - orig_offset ||
525 btrfs_file_extent_compression(leaf, fi) || 527 btrfs_file_extent_compression(leaf, fi) ||
526 btrfs_file_extent_encryption(leaf, fi) || 528 btrfs_file_extent_encryption(leaf, fi) ||
527 btrfs_file_extent_other_encoding(leaf, fi)) 529 btrfs_file_extent_other_encoding(leaf, fi))
@@ -561,6 +563,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
561 u64 split; 563 u64 split;
562 int del_nr = 0; 564 int del_nr = 0;
563 int del_slot = 0; 565 int del_slot = 0;
566 int recow;
564 int ret; 567 int ret;
565 568
566 btrfs_drop_extent_cache(inode, start, end - 1, 0); 569 btrfs_drop_extent_cache(inode, start, end - 1, 0);
@@ -568,6 +571,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
568 path = btrfs_alloc_path(); 571 path = btrfs_alloc_path();
569 BUG_ON(!path); 572 BUG_ON(!path);
570again: 573again:
574 recow = 0;
571 split = start; 575 split = start;
572 key.objectid = inode->i_ino; 576 key.objectid = inode->i_ino;
573 key.type = BTRFS_EXTENT_DATA_KEY; 577 key.type = BTRFS_EXTENT_DATA_KEY;
@@ -591,12 +595,60 @@ again:
591 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 595 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
592 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); 596 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
593 orig_offset = key.offset - btrfs_file_extent_offset(leaf, fi); 597 orig_offset = key.offset - btrfs_file_extent_offset(leaf, fi);
598 memcpy(&new_key, &key, sizeof(new_key));
599
600 if (start == key.offset && end < extent_end) {
601 other_start = 0;
602 other_end = start;
603 if (extent_mergeable(leaf, path->slots[0] - 1,
604 inode->i_ino, bytenr, orig_offset,
605 &other_start, &other_end)) {
606 new_key.offset = end;
607 btrfs_set_item_key_safe(trans, root, path, &new_key);
608 fi = btrfs_item_ptr(leaf, path->slots[0],
609 struct btrfs_file_extent_item);
610 btrfs_set_file_extent_num_bytes(leaf, fi,
611 extent_end - end);
612 btrfs_set_file_extent_offset(leaf, fi,
613 end - orig_offset);
614 fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
615 struct btrfs_file_extent_item);
616 btrfs_set_file_extent_num_bytes(leaf, fi,
617 end - other_start);
618 btrfs_mark_buffer_dirty(leaf);
619 goto out;
620 }
621 }
622
623 if (start > key.offset && end == extent_end) {
624 other_start = end;
625 other_end = 0;
626 if (extent_mergeable(leaf, path->slots[0] + 1,
627 inode->i_ino, bytenr, orig_offset,
628 &other_start, &other_end)) {
629 fi = btrfs_item_ptr(leaf, path->slots[0],
630 struct btrfs_file_extent_item);
631 btrfs_set_file_extent_num_bytes(leaf, fi,
632 start - key.offset);
633 path->slots[0]++;
634 new_key.offset = start;
635 btrfs_set_item_key_safe(trans, root, path, &new_key);
636
637 fi = btrfs_item_ptr(leaf, path->slots[0],
638 struct btrfs_file_extent_item);
639 btrfs_set_file_extent_num_bytes(leaf, fi,
640 other_end - start);
641 btrfs_set_file_extent_offset(leaf, fi,
642 start - orig_offset);
643 btrfs_mark_buffer_dirty(leaf);
644 goto out;
645 }
646 }
594 647
595 while (start > key.offset || end < extent_end) { 648 while (start > key.offset || end < extent_end) {
596 if (key.offset == start) 649 if (key.offset == start)
597 split = end; 650 split = end;
598 651
599 memcpy(&new_key, &key, sizeof(new_key));
600 new_key.offset = split; 652 new_key.offset = split;
601 ret = btrfs_duplicate_item(trans, root, path, &new_key); 653 ret = btrfs_duplicate_item(trans, root, path, &new_key);
602 if (ret == -EAGAIN) { 654 if (ret == -EAGAIN) {
@@ -631,15 +683,18 @@ again:
631 path->slots[0]--; 683 path->slots[0]--;
632 extent_end = end; 684 extent_end = end;
633 } 685 }
686 recow = 1;
634 } 687 }
635 688
636 fi = btrfs_item_ptr(leaf, path->slots[0],
637 struct btrfs_file_extent_item);
638
639 other_start = end; 689 other_start = end;
640 other_end = 0; 690 other_end = 0;
641 if (extent_mergeable(leaf, path->slots[0] + 1, inode->i_ino, 691 if (extent_mergeable(leaf, path->slots[0] + 1,
642 bytenr, &other_start, &other_end)) { 692 inode->i_ino, bytenr, orig_offset,
693 &other_start, &other_end)) {
694 if (recow) {
695 btrfs_release_path(root, path);
696 goto again;
697 }
643 extent_end = other_end; 698 extent_end = other_end;
644 del_slot = path->slots[0] + 1; 699 del_slot = path->slots[0] + 1;
645 del_nr++; 700 del_nr++;
@@ -650,8 +705,13 @@ again:
650 } 705 }
651 other_start = 0; 706 other_start = 0;
652 other_end = start; 707 other_end = start;
653 if (extent_mergeable(leaf, path->slots[0] - 1, inode->i_ino, 708 if (extent_mergeable(leaf, path->slots[0] - 1,
654 bytenr, &other_start, &other_end)) { 709 inode->i_ino, bytenr, orig_offset,
710 &other_start, &other_end)) {
711 if (recow) {
712 btrfs_release_path(root, path);
713 goto again;
714 }
655 key.offset = other_start; 715 key.offset = other_start;
656 del_slot = path->slots[0]; 716 del_slot = path->slots[0];
657 del_nr++; 717 del_nr++;
@@ -660,22 +720,22 @@ again:
660 inode->i_ino, orig_offset); 720 inode->i_ino, orig_offset);
661 BUG_ON(ret); 721 BUG_ON(ret);
662 } 722 }
723 fi = btrfs_item_ptr(leaf, path->slots[0],
724 struct btrfs_file_extent_item);
663 if (del_nr == 0) { 725 if (del_nr == 0) {
664 btrfs_set_file_extent_type(leaf, fi, 726 btrfs_set_file_extent_type(leaf, fi,
665 BTRFS_FILE_EXTENT_REG); 727 BTRFS_FILE_EXTENT_REG);
666 btrfs_mark_buffer_dirty(leaf); 728 btrfs_mark_buffer_dirty(leaf);
667 goto out; 729 } else {
668 } 730 btrfs_set_file_extent_type(leaf, fi,
669 731 BTRFS_FILE_EXTENT_REG);
670 fi = btrfs_item_ptr(leaf, del_slot - 1, 732 btrfs_set_file_extent_num_bytes(leaf, fi,
671 struct btrfs_file_extent_item); 733 extent_end - key.offset);
672 btrfs_set_file_extent_type(leaf, fi, BTRFS_FILE_EXTENT_REG); 734 btrfs_mark_buffer_dirty(leaf);
673 btrfs_set_file_extent_num_bytes(leaf, fi,
674 extent_end - key.offset);
675 btrfs_mark_buffer_dirty(leaf);
676 735
677 ret = btrfs_del_items(trans, root, path, del_slot, del_nr); 736 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
678 BUG_ON(ret); 737 BUG_ON(ret);
738 }
679out: 739out:
680 btrfs_free_path(path); 740 btrfs_free_path(path);
681 return 0; 741 return 0;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 5440bab23635..8cd109972fa6 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -483,7 +483,8 @@ again:
483 nr_pages_ret = 0; 483 nr_pages_ret = 0;
484 484
485 /* flag the file so we don't compress in the future */ 485 /* flag the file so we don't compress in the future */
486 BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS; 486 if (!btrfs_test_opt(root, FORCE_COMPRESS))
487 BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
487 } 488 }
488 if (will_compress) { 489 if (will_compress) {
489 *num_added += 1; 490 *num_added += 1;
@@ -3995,7 +3996,11 @@ skip:
3995 3996
3996 /* Reached end of directory/root. Bump pos past the last item. */ 3997 /* Reached end of directory/root. Bump pos past the last item. */
3997 if (key_type == BTRFS_DIR_INDEX_KEY) 3998 if (key_type == BTRFS_DIR_INDEX_KEY)
3998 filp->f_pos = INT_LIMIT(off_t); 3999 /*
4000 * 32-bit glibc will use getdents64, but then strtol -
4001 * so the last number we can serve is this.
4002 */
4003 filp->f_pos = 0x7fffffff;
3999 else 4004 else
4000 filp->f_pos++; 4005 filp->f_pos++;
4001nopos: 4006nopos:
@@ -5789,7 +5794,7 @@ out_fail:
5789} 5794}
5790 5795
5791static int prealloc_file_range(struct inode *inode, u64 start, u64 end, 5796static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
5792 u64 alloc_hint, int mode) 5797 u64 alloc_hint, int mode, loff_t actual_len)
5793{ 5798{
5794 struct btrfs_trans_handle *trans; 5799 struct btrfs_trans_handle *trans;
5795 struct btrfs_root *root = BTRFS_I(inode)->root; 5800 struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -5798,6 +5803,7 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
5798 u64 cur_offset = start; 5803 u64 cur_offset = start;
5799 u64 num_bytes = end - start; 5804 u64 num_bytes = end - start;
5800 int ret = 0; 5805 int ret = 0;
5806 u64 i_size;
5801 5807
5802 while (num_bytes > 0) { 5808 while (num_bytes > 0) {
5803 alloc_size = min(num_bytes, root->fs_info->max_extent); 5809 alloc_size = min(num_bytes, root->fs_info->max_extent);
@@ -5836,8 +5842,12 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
5836 BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; 5842 BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC;
5837 if (!(mode & FALLOC_FL_KEEP_SIZE) && 5843 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
5838 cur_offset > inode->i_size) { 5844 cur_offset > inode->i_size) {
5839 i_size_write(inode, cur_offset); 5845 if (cur_offset > actual_len)
5840 btrfs_ordered_update_i_size(inode, cur_offset, NULL); 5846 i_size = actual_len;
5847 else
5848 i_size = cur_offset;
5849 i_size_write(inode, i_size);
5850 btrfs_ordered_update_i_size(inode, i_size, NULL);
5841 } 5851 }
5842 5852
5843 ret = btrfs_update_inode(trans, root, inode); 5853 ret = btrfs_update_inode(trans, root, inode);
@@ -5930,7 +5940,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5930 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { 5940 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
5931 ret = prealloc_file_range(inode, 5941 ret = prealloc_file_range(inode,
5932 cur_offset, last_byte, 5942 cur_offset, last_byte,
5933 alloc_hint, mode); 5943 alloc_hint, mode, offset+len);
5934 if (ret < 0) { 5944 if (ret < 0) {
5935 free_extent_map(em); 5945 free_extent_map(em);
5936 break; 5946 break;
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index b10a49d4bc6a..5c2a9e78a949 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -626,6 +626,8 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
626 626
627 if (ordered) 627 if (ordered)
628 offset = entry_end(ordered); 628 offset = entry_end(ordered);
629 else
630 offset = ALIGN(offset, BTRFS_I(inode)->root->sectorsize);
629 631
630 mutex_lock(&tree->mutex); 632 mutex_lock(&tree->mutex);
631 disk_i_size = BTRFS_I(inode)->disk_i_size; 633 disk_i_size = BTRFS_I(inode)->disk_i_size;
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index a9728680eca8..ed3e4a2ec2c8 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3281,8 +3281,10 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3281 return -ENOMEM; 3281 return -ENOMEM;
3282 3282
3283 path = btrfs_alloc_path(); 3283 path = btrfs_alloc_path();
3284 if (!path) 3284 if (!path) {
3285 kfree(cluster);
3285 return -ENOMEM; 3286 return -ENOMEM;
3287 }
3286 3288
3287 rc->extents_found = 0; 3289 rc->extents_found = 0;
3288 rc->extents_skipped = 0; 3290 rc->extents_skipped = 0;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 3f9b45704fcd..8a1ea6e64575 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -66,7 +66,8 @@ enum {
66 Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow, 66 Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow,
67 Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, 67 Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier,
68 Opt_ssd, Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, 68 Opt_ssd, Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl,
69 Opt_compress, Opt_notreelog, Opt_ratio, Opt_flushoncommit, 69 Opt_compress, Opt_compress_force, Opt_notreelog, Opt_ratio,
70 Opt_flushoncommit,
70 Opt_discard, Opt_err, 71 Opt_discard, Opt_err,
71}; 72};
72 73
@@ -82,6 +83,7 @@ static match_table_t tokens = {
82 {Opt_alloc_start, "alloc_start=%s"}, 83 {Opt_alloc_start, "alloc_start=%s"},
83 {Opt_thread_pool, "thread_pool=%d"}, 84 {Opt_thread_pool, "thread_pool=%d"},
84 {Opt_compress, "compress"}, 85 {Opt_compress, "compress"},
86 {Opt_compress_force, "compress-force"},
85 {Opt_ssd, "ssd"}, 87 {Opt_ssd, "ssd"},
86 {Opt_ssd_spread, "ssd_spread"}, 88 {Opt_ssd_spread, "ssd_spread"},
87 {Opt_nossd, "nossd"}, 89 {Opt_nossd, "nossd"},
@@ -173,6 +175,11 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
173 printk(KERN_INFO "btrfs: use compression\n"); 175 printk(KERN_INFO "btrfs: use compression\n");
174 btrfs_set_opt(info->mount_opt, COMPRESS); 176 btrfs_set_opt(info->mount_opt, COMPRESS);
175 break; 177 break;
178 case Opt_compress_force:
179 printk(KERN_INFO "btrfs: forcing compression\n");
180 btrfs_set_opt(info->mount_opt, FORCE_COMPRESS);
181 btrfs_set_opt(info->mount_opt, COMPRESS);
182 break;
176 case Opt_ssd: 183 case Opt_ssd:
177 printk(KERN_INFO "btrfs: use ssd allocation scheme\n"); 184 printk(KERN_INFO "btrfs: use ssd allocation scheme\n");
178 btrfs_set_opt(info->mount_opt, SSD); 185 btrfs_set_opt(info->mount_opt, SSD);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 198cff28766d..41ecbb2347f2 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1135,7 +1135,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1135 root->fs_info->avail_metadata_alloc_bits; 1135 root->fs_info->avail_metadata_alloc_bits;
1136 1136
1137 if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && 1137 if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) &&
1138 root->fs_info->fs_devices->rw_devices <= 4) { 1138 root->fs_info->fs_devices->num_devices <= 4) {
1139 printk(KERN_ERR "btrfs: unable to go below four devices " 1139 printk(KERN_ERR "btrfs: unable to go below four devices "
1140 "on raid10\n"); 1140 "on raid10\n");
1141 ret = -EINVAL; 1141 ret = -EINVAL;
@@ -1143,7 +1143,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1143 } 1143 }
1144 1144
1145 if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) && 1145 if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) &&
1146 root->fs_info->fs_devices->rw_devices <= 2) { 1146 root->fs_info->fs_devices->num_devices <= 2) {
1147 printk(KERN_ERR "btrfs: unable to go below two " 1147 printk(KERN_ERR "btrfs: unable to go below two "
1148 "devices on raid1\n"); 1148 "devices on raid1\n");
1149 ret = -EINVAL; 1149 ret = -EINVAL;
@@ -1434,8 +1434,8 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1434 return -EINVAL; 1434 return -EINVAL;
1435 1435
1436 bdev = open_bdev_exclusive(device_path, 0, root->fs_info->bdev_holder); 1436 bdev = open_bdev_exclusive(device_path, 0, root->fs_info->bdev_holder);
1437 if (!bdev) 1437 if (IS_ERR(bdev))
1438 return -EIO; 1438 return PTR_ERR(bdev);
1439 1439
1440 if (root->fs_info->fs_devices->seeding) { 1440 if (root->fs_info->fs_devices->seeding) {
1441 seeding_dev = 1; 1441 seeding_dev = 1;
@@ -2538,6 +2538,11 @@ int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset)
2538 if (!em) 2538 if (!em)
2539 return 1; 2539 return 1;
2540 2540
2541 if (btrfs_test_opt(root, DEGRADED)) {
2542 free_extent_map(em);
2543 return 0;
2544 }
2545
2541 map = (struct map_lookup *)em->bdev; 2546 map = (struct map_lookup *)em->bdev;
2542 for (i = 0; i < map->num_stripes; i++) { 2547 for (i = 0; i < map->num_stripes; i++) {
2543 if (!map->stripes[i].dev->writeable) { 2548 if (!map->stripes[i].dev->writeable) {
@@ -2649,8 +2654,10 @@ again:
2649 em = lookup_extent_mapping(em_tree, logical, *length); 2654 em = lookup_extent_mapping(em_tree, logical, *length);
2650 read_unlock(&em_tree->lock); 2655 read_unlock(&em_tree->lock);
2651 2656
2652 if (!em && unplug_page) 2657 if (!em && unplug_page) {
2658 kfree(multi);
2653 return 0; 2659 return 0;
2660 }
2654 2661
2655 if (!em) { 2662 if (!em) {
2656 printk(KERN_CRIT "unable to find logical %llu len %llu\n", 2663 printk(KERN_CRIT "unable to find logical %llu len %llu\n",
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index fea9e898c4ba..b44ce0a0711c 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -269,7 +269,7 @@ static int add_mount_helper(struct vfsmount *newmnt, struct nameidata *nd,
269 int err; 269 int err;
270 270
271 mntget(newmnt); 271 mntget(newmnt);
272 err = do_add_mount(newmnt, &nd->path, nd->path.mnt->mnt_flags, mntlist); 272 err = do_add_mount(newmnt, &nd->path, nd->path.mnt->mnt_flags | MNT_SHRINKABLE, mntlist);
273 switch (err) { 273 switch (err) {
274 case 0: 274 case 0:
275 path_put(&nd->path); 275 path_put(&nd->path);
@@ -371,7 +371,6 @@ cifs_dfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
371 if (IS_ERR(mnt)) 371 if (IS_ERR(mnt))
372 goto out_err; 372 goto out_err;
373 373
374 nd->path.mnt->mnt_flags |= MNT_SHRINKABLE;
375 rc = add_mount_helper(mnt, nd, &cifs_dfs_automount_list); 374 rc = add_mount_helper(mnt, nd, &cifs_dfs_automount_list);
376 375
377out: 376out:
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 332dd00f0894..c5c45de1a2ee 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1005,6 +1005,9 @@ COMPATIBLE_IOCTL(SCSI_IOCTL_SEND_COMMAND)
1005COMPATIBLE_IOCTL(SCSI_IOCTL_PROBE_HOST) 1005COMPATIBLE_IOCTL(SCSI_IOCTL_PROBE_HOST)
1006COMPATIBLE_IOCTL(SCSI_IOCTL_GET_PCI) 1006COMPATIBLE_IOCTL(SCSI_IOCTL_GET_PCI)
1007#endif 1007#endif
1008/* Big V (don't complain on serial console) */
1009IGNORE_IOCTL(VT_OPENQRY)
1010IGNORE_IOCTL(VT_GETMODE)
1008/* Little p (/dev/rtc, /dev/envctrl, etc.) */ 1011/* Little p (/dev/rtc, /dev/envctrl, etc.) */
1009COMPATIBLE_IOCTL(RTC_AIE_ON) 1012COMPATIBLE_IOCTL(RTC_AIE_ON)
1010COMPATIBLE_IOCTL(RTC_AIE_OFF) 1013COMPATIBLE_IOCTL(RTC_AIE_OFF)
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index c8afa6b1d91d..32a5f46b1157 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -121,8 +121,10 @@ static int get_target(const char *symname, struct path *path,
121 ret = -ENOENT; 121 ret = -ENOENT;
122 path_put(path); 122 path_put(path);
123 } 123 }
124 } else 124 } else {
125 ret = -EPERM; 125 ret = -EPERM;
126 path_put(path);
127 }
126 } 128 }
127 129
128 return ret; 130 return ret;
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index b486169f42bf..274ac865bae8 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -160,15 +160,8 @@ static int debugfs_create_by_name(const char *name, mode_t mode,
160 * block. A pointer to that is in the struct vfsmount that we 160 * block. A pointer to that is in the struct vfsmount that we
161 * have around. 161 * have around.
162 */ 162 */
163 if (!parent) { 163 if (!parent)
164 if (debugfs_mount && debugfs_mount->mnt_sb) { 164 parent = debugfs_mount->mnt_sb->s_root;
165 parent = debugfs_mount->mnt_sb->s_root;
166 }
167 }
168 if (!parent) {
169 pr_debug("debugfs: Ah! can not find a parent!\n");
170 return -EFAULT;
171 }
172 165
173 *dentry = NULL; 166 *dentry = NULL;
174 mutex_lock(&parent->d_inode->i_mutex); 167 mutex_lock(&parent->d_inode->i_mutex);
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index fbb6e5eed697..7cb0a59f4b9d 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -1748,7 +1748,7 @@ ecryptfs_process_key_cipher(struct crypto_blkcipher **key_tfm,
1748 char *cipher_name, size_t *key_size) 1748 char *cipher_name, size_t *key_size)
1749{ 1749{
1750 char dummy_key[ECRYPTFS_MAX_KEY_BYTES]; 1750 char dummy_key[ECRYPTFS_MAX_KEY_BYTES];
1751 char *full_alg_name; 1751 char *full_alg_name = NULL;
1752 int rc; 1752 int rc;
1753 1753
1754 *key_tfm = NULL; 1754 *key_tfm = NULL;
@@ -1763,7 +1763,6 @@ ecryptfs_process_key_cipher(struct crypto_blkcipher **key_tfm,
1763 if (rc) 1763 if (rc)
1764 goto out; 1764 goto out;
1765 *key_tfm = crypto_alloc_blkcipher(full_alg_name, 0, CRYPTO_ALG_ASYNC); 1765 *key_tfm = crypto_alloc_blkcipher(full_alg_name, 0, CRYPTO_ALG_ASYNC);
1766 kfree(full_alg_name);
1767 if (IS_ERR(*key_tfm)) { 1766 if (IS_ERR(*key_tfm)) {
1768 rc = PTR_ERR(*key_tfm); 1767 rc = PTR_ERR(*key_tfm);
1769 printk(KERN_ERR "Unable to allocate crypto cipher with name " 1768 printk(KERN_ERR "Unable to allocate crypto cipher with name "
@@ -1786,6 +1785,7 @@ ecryptfs_process_key_cipher(struct crypto_blkcipher **key_tfm,
1786 goto out; 1785 goto out;
1787 } 1786 }
1788out: 1787out:
1788 kfree(full_alg_name);
1789 return rc; 1789 return rc;
1790} 1790}
1791 1791
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 9e944057001b..678172b61be2 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -158,7 +158,7 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
158 struct dentry *ecryptfs_dentry = file->f_path.dentry; 158 struct dentry *ecryptfs_dentry = file->f_path.dentry;
159 /* Private value of ecryptfs_dentry allocated in 159 /* Private value of ecryptfs_dentry allocated in
160 * ecryptfs_lookup() */ 160 * ecryptfs_lookup() */
161 struct dentry *lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry); 161 struct dentry *lower_dentry;
162 struct ecryptfs_file_info *file_info; 162 struct ecryptfs_file_info *file_info;
163 163
164 mount_crypt_stat = &ecryptfs_superblock_to_private( 164 mount_crypt_stat = &ecryptfs_superblock_to_private(
@@ -191,13 +191,6 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
191 | ECRYPTFS_ENCRYPTED); 191 | ECRYPTFS_ENCRYPTED);
192 } 192 }
193 mutex_unlock(&crypt_stat->cs_mutex); 193 mutex_unlock(&crypt_stat->cs_mutex);
194 if ((ecryptfs_inode_to_private(inode)->lower_file->f_flags & O_RDONLY)
195 && !(file->f_flags & O_RDONLY)) {
196 rc = -EPERM;
197 printk(KERN_WARNING "%s: Lower persistent file is RO; eCryptfs "
198 "file must hence be opened RO\n", __func__);
199 goto out;
200 }
201 if (!ecryptfs_inode_to_private(inode)->lower_file) { 194 if (!ecryptfs_inode_to_private(inode)->lower_file) {
202 rc = ecryptfs_init_persistent_file(ecryptfs_dentry); 195 rc = ecryptfs_init_persistent_file(ecryptfs_dentry);
203 if (rc) { 196 if (rc) {
@@ -208,6 +201,13 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
208 goto out; 201 goto out;
209 } 202 }
210 } 203 }
204 if ((ecryptfs_inode_to_private(inode)->lower_file->f_flags & O_RDONLY)
205 && !(file->f_flags & O_RDONLY)) {
206 rc = -EPERM;
207 printk(KERN_WARNING "%s: Lower persistent file is RO; eCryptfs "
208 "file must hence be opened RO\n", __func__);
209 goto out;
210 }
211 ecryptfs_set_file_lower( 211 ecryptfs_set_file_lower(
212 file, ecryptfs_inode_to_private(inode)->lower_file); 212 file, ecryptfs_inode_to_private(inode)->lower_file);
213 if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) { 213 if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) {
@@ -299,7 +299,6 @@ static int ecryptfs_ioctl(struct inode *inode, struct file *file,
299const struct file_operations ecryptfs_dir_fops = { 299const struct file_operations ecryptfs_dir_fops = {
300 .readdir = ecryptfs_readdir, 300 .readdir = ecryptfs_readdir,
301 .ioctl = ecryptfs_ioctl, 301 .ioctl = ecryptfs_ioctl,
302 .mmap = generic_file_mmap,
303 .open = ecryptfs_open, 302 .open = ecryptfs_open,
304 .flush = ecryptfs_flush, 303 .flush = ecryptfs_flush,
305 .release = ecryptfs_release, 304 .release = ecryptfs_release,
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 429ca0b3ba08..4a430ab4115c 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -282,7 +282,8 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
282 goto out; 282 goto out;
283 } 283 }
284 rc = ecryptfs_interpose(lower_dentry, ecryptfs_dentry, 284 rc = ecryptfs_interpose(lower_dentry, ecryptfs_dentry,
285 ecryptfs_dir_inode->i_sb, 1); 285 ecryptfs_dir_inode->i_sb,
286 ECRYPTFS_INTERPOSE_FLAG_D_ADD);
286 if (rc) { 287 if (rc) {
287 printk(KERN_ERR "%s: Error interposing; rc = [%d]\n", 288 printk(KERN_ERR "%s: Error interposing; rc = [%d]\n",
288 __func__, rc); 289 __func__, rc);
@@ -463,9 +464,6 @@ out_lock:
463 unlock_dir(lower_dir_dentry); 464 unlock_dir(lower_dir_dentry);
464 dput(lower_new_dentry); 465 dput(lower_new_dentry);
465 dput(lower_old_dentry); 466 dput(lower_old_dentry);
466 d_drop(lower_old_dentry);
467 d_drop(new_dentry);
468 d_drop(old_dentry);
469 return rc; 467 return rc;
470} 468}
471 469
@@ -614,6 +612,7 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
614 struct dentry *lower_new_dentry; 612 struct dentry *lower_new_dentry;
615 struct dentry *lower_old_dir_dentry; 613 struct dentry *lower_old_dir_dentry;
616 struct dentry *lower_new_dir_dentry; 614 struct dentry *lower_new_dir_dentry;
615 struct dentry *trap = NULL;
617 616
618 lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry); 617 lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry);
619 lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry); 618 lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry);
@@ -621,7 +620,17 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
621 dget(lower_new_dentry); 620 dget(lower_new_dentry);
622 lower_old_dir_dentry = dget_parent(lower_old_dentry); 621 lower_old_dir_dentry = dget_parent(lower_old_dentry);
623 lower_new_dir_dentry = dget_parent(lower_new_dentry); 622 lower_new_dir_dentry = dget_parent(lower_new_dentry);
624 lock_rename(lower_old_dir_dentry, lower_new_dir_dentry); 623 trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
624 /* source should not be ancestor of target */
625 if (trap == lower_old_dentry) {
626 rc = -EINVAL;
627 goto out_lock;
628 }
629 /* target should not be ancestor of source */
630 if (trap == lower_new_dentry) {
631 rc = -ENOTEMPTY;
632 goto out_lock;
633 }
625 rc = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry, 634 rc = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry,
626 lower_new_dir_dentry->d_inode, lower_new_dentry); 635 lower_new_dir_dentry->d_inode, lower_new_dentry);
627 if (rc) 636 if (rc)
@@ -715,31 +724,31 @@ static void *ecryptfs_follow_link(struct dentry *dentry, struct nameidata *nd)
715 /* Released in ecryptfs_put_link(); only release here on error */ 724 /* Released in ecryptfs_put_link(); only release here on error */
716 buf = kmalloc(len, GFP_KERNEL); 725 buf = kmalloc(len, GFP_KERNEL);
717 if (!buf) { 726 if (!buf) {
718 rc = -ENOMEM; 727 buf = ERR_PTR(-ENOMEM);
719 goto out; 728 goto out;
720 } 729 }
721 old_fs = get_fs(); 730 old_fs = get_fs();
722 set_fs(get_ds()); 731 set_fs(get_ds());
723 rc = dentry->d_inode->i_op->readlink(dentry, (char __user *)buf, len); 732 rc = dentry->d_inode->i_op->readlink(dentry, (char __user *)buf, len);
724 set_fs(old_fs); 733 set_fs(old_fs);
725 if (rc < 0) 734 if (rc < 0) {
726 goto out_free; 735 kfree(buf);
727 else 736 buf = ERR_PTR(rc);
737 } else
728 buf[rc] = '\0'; 738 buf[rc] = '\0';
729 rc = 0;
730 nd_set_link(nd, buf);
731 goto out;
732out_free:
733 kfree(buf);
734out: 739out:
735 return ERR_PTR(rc); 740 nd_set_link(nd, buf);
741 return NULL;
736} 742}
737 743
738static void 744static void
739ecryptfs_put_link(struct dentry *dentry, struct nameidata *nd, void *ptr) 745ecryptfs_put_link(struct dentry *dentry, struct nameidata *nd, void *ptr)
740{ 746{
741 /* Free the char* */ 747 char *buf = nd_get_link(nd);
742 kfree(nd_get_link(nd)); 748 if (!IS_ERR(buf)) {
749 /* Free the char* */
750 kfree(buf);
751 }
743} 752}
744 753
745/** 754/**
@@ -772,18 +781,23 @@ upper_size_to_lower_size(struct ecryptfs_crypt_stat *crypt_stat,
772} 781}
773 782
774/** 783/**
775 * ecryptfs_truncate 784 * truncate_upper
776 * @dentry: The ecryptfs layer dentry 785 * @dentry: The ecryptfs layer dentry
777 * @new_length: The length to expand the file to 786 * @ia: Address of the ecryptfs inode's attributes
787 * @lower_ia: Address of the lower inode's attributes
778 * 788 *
779 * Function to handle truncations modifying the size of the file. Note 789 * Function to handle truncations modifying the size of the file. Note
780 * that the file sizes are interpolated. When expanding, we are simply 790 * that the file sizes are interpolated. When expanding, we are simply
781 * writing strings of 0's out. When truncating, we need to modify the 791 * writing strings of 0's out. When truncating, we truncate the upper
782 * underlying file size according to the page index interpolations. 792 * inode and update the lower_ia according to the page index
793 * interpolations. If ATTR_SIZE is set in lower_ia->ia_valid upon return,
794 * the caller must use lower_ia in a call to notify_change() to perform
795 * the truncation of the lower inode.
783 * 796 *
784 * Returns zero on success; non-zero otherwise 797 * Returns zero on success; non-zero otherwise
785 */ 798 */
786int ecryptfs_truncate(struct dentry *dentry, loff_t new_length) 799static int truncate_upper(struct dentry *dentry, struct iattr *ia,
800 struct iattr *lower_ia)
787{ 801{
788 int rc = 0; 802 int rc = 0;
789 struct inode *inode = dentry->d_inode; 803 struct inode *inode = dentry->d_inode;
@@ -794,8 +808,10 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
794 loff_t lower_size_before_truncate; 808 loff_t lower_size_before_truncate;
795 loff_t lower_size_after_truncate; 809 loff_t lower_size_after_truncate;
796 810
797 if (unlikely((new_length == i_size))) 811 if (unlikely((ia->ia_size == i_size))) {
812 lower_ia->ia_valid &= ~ATTR_SIZE;
798 goto out; 813 goto out;
814 }
799 crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat; 815 crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat;
800 /* Set up a fake ecryptfs file, this is used to interface with 816 /* Set up a fake ecryptfs file, this is used to interface with
801 * the file in the underlying filesystem so that the 817 * the file in the underlying filesystem so that the
@@ -815,28 +831,30 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
815 &fake_ecryptfs_file, 831 &fake_ecryptfs_file,
816 ecryptfs_inode_to_private(dentry->d_inode)->lower_file); 832 ecryptfs_inode_to_private(dentry->d_inode)->lower_file);
817 /* Switch on growing or shrinking file */ 833 /* Switch on growing or shrinking file */
818 if (new_length > i_size) { 834 if (ia->ia_size > i_size) {
819 char zero[] = { 0x00 }; 835 char zero[] = { 0x00 };
820 836
837 lower_ia->ia_valid &= ~ATTR_SIZE;
821 /* Write a single 0 at the last position of the file; 838 /* Write a single 0 at the last position of the file;
822 * this triggers code that will fill in 0's throughout 839 * this triggers code that will fill in 0's throughout
823 * the intermediate portion of the previous end of the 840 * the intermediate portion of the previous end of the
824 * file and the new and of the file */ 841 * file and the new and of the file */
825 rc = ecryptfs_write(&fake_ecryptfs_file, zero, 842 rc = ecryptfs_write(&fake_ecryptfs_file, zero,
826 (new_length - 1), 1); 843 (ia->ia_size - 1), 1);
827 } else { /* new_length < i_size_read(inode) */ 844 } else { /* ia->ia_size < i_size_read(inode) */
828 /* We're chopping off all the pages down do the page 845 /* We're chopping off all the pages down to the page
829 * in which new_length is located. Fill in the end of 846 * in which ia->ia_size is located. Fill in the end of
830 * that page from (new_length & ~PAGE_CACHE_MASK) to 847 * that page from (ia->ia_size & ~PAGE_CACHE_MASK) to
831 * PAGE_CACHE_SIZE with zeros. */ 848 * PAGE_CACHE_SIZE with zeros. */
832 size_t num_zeros = (PAGE_CACHE_SIZE 849 size_t num_zeros = (PAGE_CACHE_SIZE
833 - (new_length & ~PAGE_CACHE_MASK)); 850 - (ia->ia_size & ~PAGE_CACHE_MASK));
834 851
835 if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) { 852 if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
836 rc = vmtruncate(inode, new_length); 853 rc = vmtruncate(inode, ia->ia_size);
837 if (rc) 854 if (rc)
838 goto out_free; 855 goto out_free;
839 rc = vmtruncate(lower_dentry->d_inode, new_length); 856 lower_ia->ia_size = ia->ia_size;
857 lower_ia->ia_valid |= ATTR_SIZE;
840 goto out_free; 858 goto out_free;
841 } 859 }
842 if (num_zeros) { 860 if (num_zeros) {
@@ -848,7 +866,7 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
848 goto out_free; 866 goto out_free;
849 } 867 }
850 rc = ecryptfs_write(&fake_ecryptfs_file, zeros_virt, 868 rc = ecryptfs_write(&fake_ecryptfs_file, zeros_virt,
851 new_length, num_zeros); 869 ia->ia_size, num_zeros);
852 kfree(zeros_virt); 870 kfree(zeros_virt);
853 if (rc) { 871 if (rc) {
854 printk(KERN_ERR "Error attempting to zero out " 872 printk(KERN_ERR "Error attempting to zero out "
@@ -857,7 +875,7 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
857 goto out_free; 875 goto out_free;
858 } 876 }
859 } 877 }
860 vmtruncate(inode, new_length); 878 vmtruncate(inode, ia->ia_size);
861 rc = ecryptfs_write_inode_size_to_metadata(inode); 879 rc = ecryptfs_write_inode_size_to_metadata(inode);
862 if (rc) { 880 if (rc) {
863 printk(KERN_ERR "Problem with " 881 printk(KERN_ERR "Problem with "
@@ -870,10 +888,12 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
870 lower_size_before_truncate = 888 lower_size_before_truncate =
871 upper_size_to_lower_size(crypt_stat, i_size); 889 upper_size_to_lower_size(crypt_stat, i_size);
872 lower_size_after_truncate = 890 lower_size_after_truncate =
873 upper_size_to_lower_size(crypt_stat, new_length); 891 upper_size_to_lower_size(crypt_stat, ia->ia_size);
874 if (lower_size_after_truncate < lower_size_before_truncate) 892 if (lower_size_after_truncate < lower_size_before_truncate) {
875 vmtruncate(lower_dentry->d_inode, 893 lower_ia->ia_size = lower_size_after_truncate;
876 lower_size_after_truncate); 894 lower_ia->ia_valid |= ATTR_SIZE;
895 } else
896 lower_ia->ia_valid &= ~ATTR_SIZE;
877 } 897 }
878out_free: 898out_free:
879 if (ecryptfs_file_to_private(&fake_ecryptfs_file)) 899 if (ecryptfs_file_to_private(&fake_ecryptfs_file))
@@ -883,6 +903,33 @@ out:
883 return rc; 903 return rc;
884} 904}
885 905
906/**
907 * ecryptfs_truncate
908 * @dentry: The ecryptfs layer dentry
909 * @new_length: The length to expand the file to
910 *
911 * Simple function that handles the truncation of an eCryptfs inode and
912 * its corresponding lower inode.
913 *
914 * Returns zero on success; non-zero otherwise
915 */
916int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
917{
918 struct iattr ia = { .ia_valid = ATTR_SIZE, .ia_size = new_length };
919 struct iattr lower_ia = { .ia_valid = 0 };
920 int rc;
921
922 rc = truncate_upper(dentry, &ia, &lower_ia);
923 if (!rc && lower_ia.ia_valid & ATTR_SIZE) {
924 struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
925
926 mutex_lock(&lower_dentry->d_inode->i_mutex);
927 rc = notify_change(lower_dentry, &lower_ia);
928 mutex_unlock(&lower_dentry->d_inode->i_mutex);
929 }
930 return rc;
931}
932
886static int 933static int
887ecryptfs_permission(struct inode *inode, int mask) 934ecryptfs_permission(struct inode *inode, int mask)
888{ 935{
@@ -905,6 +952,7 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
905{ 952{
906 int rc = 0; 953 int rc = 0;
907 struct dentry *lower_dentry; 954 struct dentry *lower_dentry;
955 struct iattr lower_ia;
908 struct inode *inode; 956 struct inode *inode;
909 struct inode *lower_inode; 957 struct inode *lower_inode;
910 struct ecryptfs_crypt_stat *crypt_stat; 958 struct ecryptfs_crypt_stat *crypt_stat;
@@ -943,15 +991,11 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
943 } 991 }
944 } 992 }
945 mutex_unlock(&crypt_stat->cs_mutex); 993 mutex_unlock(&crypt_stat->cs_mutex);
994 memcpy(&lower_ia, ia, sizeof(lower_ia));
995 if (ia->ia_valid & ATTR_FILE)
996 lower_ia.ia_file = ecryptfs_file_to_lower(ia->ia_file);
946 if (ia->ia_valid & ATTR_SIZE) { 997 if (ia->ia_valid & ATTR_SIZE) {
947 ecryptfs_printk(KERN_DEBUG, 998 rc = truncate_upper(dentry, ia, &lower_ia);
948 "ia->ia_valid = [0x%x] ATTR_SIZE" " = [0x%x]\n",
949 ia->ia_valid, ATTR_SIZE);
950 rc = ecryptfs_truncate(dentry, ia->ia_size);
951 /* ecryptfs_truncate handles resizing of the lower file */
952 ia->ia_valid &= ~ATTR_SIZE;
953 ecryptfs_printk(KERN_DEBUG, "ia->ia_valid = [%x]\n",
954 ia->ia_valid);
955 if (rc < 0) 999 if (rc < 0)
956 goto out; 1000 goto out;
957 } 1001 }
@@ -960,17 +1004,32 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
960 * mode change is for clearing setuid/setgid bits. Allow lower fs 1004 * mode change is for clearing setuid/setgid bits. Allow lower fs
961 * to interpret this in its own way. 1005 * to interpret this in its own way.
962 */ 1006 */
963 if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) 1007 if (lower_ia.ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
964 ia->ia_valid &= ~ATTR_MODE; 1008 lower_ia.ia_valid &= ~ATTR_MODE;
965 1009
966 mutex_lock(&lower_dentry->d_inode->i_mutex); 1010 mutex_lock(&lower_dentry->d_inode->i_mutex);
967 rc = notify_change(lower_dentry, ia); 1011 rc = notify_change(lower_dentry, &lower_ia);
968 mutex_unlock(&lower_dentry->d_inode->i_mutex); 1012 mutex_unlock(&lower_dentry->d_inode->i_mutex);
969out: 1013out:
970 fsstack_copy_attr_all(inode, lower_inode); 1014 fsstack_copy_attr_all(inode, lower_inode);
971 return rc; 1015 return rc;
972} 1016}
973 1017
1018int ecryptfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
1019 struct kstat *stat)
1020{
1021 struct kstat lower_stat;
1022 int rc;
1023
1024 rc = vfs_getattr(ecryptfs_dentry_to_lower_mnt(dentry),
1025 ecryptfs_dentry_to_lower(dentry), &lower_stat);
1026 if (!rc) {
1027 generic_fillattr(dentry->d_inode, stat);
1028 stat->blocks = lower_stat.blocks;
1029 }
1030 return rc;
1031}
1032
974int 1033int
975ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value, 1034ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value,
976 size_t size, int flags) 1035 size_t size, int flags)
@@ -1100,6 +1159,7 @@ const struct inode_operations ecryptfs_dir_iops = {
1100const struct inode_operations ecryptfs_main_iops = { 1159const struct inode_operations ecryptfs_main_iops = {
1101 .permission = ecryptfs_permission, 1160 .permission = ecryptfs_permission,
1102 .setattr = ecryptfs_setattr, 1161 .setattr = ecryptfs_setattr,
1162 .getattr = ecryptfs_getattr,
1103 .setxattr = ecryptfs_setxattr, 1163 .setxattr = ecryptfs_setxattr,
1104 .getxattr = ecryptfs_getxattr, 1164 .getxattr = ecryptfs_getxattr,
1105 .listxattr = ecryptfs_listxattr, 1165 .listxattr = ecryptfs_listxattr,
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 567bc4b9f70a..ea2f92101dfe 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -585,8 +585,8 @@ out:
585 * with as much information as it can before needing 585 * with as much information as it can before needing
586 * the lower filesystem. 586 * the lower filesystem.
587 * ecryptfs_read_super(): this accesses the lower filesystem and uses 587 * ecryptfs_read_super(): this accesses the lower filesystem and uses
588 * ecryptfs_interpolate to perform most of the linking 588 * ecryptfs_interpose to perform most of the linking
589 * ecryptfs_interpolate(): links the lower filesystem into ecryptfs 589 * ecryptfs_interpose(): links the lower filesystem into ecryptfs (inode.c)
590 */ 590 */
591static int ecryptfs_get_sb(struct file_system_type *fs_type, int flags, 591static int ecryptfs_get_sb(struct file_system_type *fs_type, int flags,
592 const char *dev_name, void *raw_data, 592 const char *dev_name, void *raw_data,
diff --git a/fs/eventfd.c b/fs/eventfd.c
index d26402ff06ea..7758cc382ef0 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -135,26 +135,71 @@ static unsigned int eventfd_poll(struct file *file, poll_table *wait)
135 return events; 135 return events;
136} 136}
137 137
138static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count, 138static void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
139 loff_t *ppos) 139{
140 *cnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count;
141 ctx->count -= *cnt;
142}
143
144/**
145 * eventfd_ctx_remove_wait_queue - Read the current counter and removes wait queue.
146 * @ctx: [in] Pointer to eventfd context.
147 * @wait: [in] Wait queue to be removed.
148 * @cnt: [out] Pointer to the 64bit conter value.
149 *
150 * Returns zero if successful, or the following error codes:
151 *
152 * -EAGAIN : The operation would have blocked.
153 *
154 * This is used to atomically remove a wait queue entry from the eventfd wait
155 * queue head, and read/reset the counter value.
156 */
157int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_t *wait,
158 __u64 *cnt)
159{
160 unsigned long flags;
161
162 spin_lock_irqsave(&ctx->wqh.lock, flags);
163 eventfd_ctx_do_read(ctx, cnt);
164 __remove_wait_queue(&ctx->wqh, wait);
165 if (*cnt != 0 && waitqueue_active(&ctx->wqh))
166 wake_up_locked_poll(&ctx->wqh, POLLOUT);
167 spin_unlock_irqrestore(&ctx->wqh.lock, flags);
168
169 return *cnt != 0 ? 0 : -EAGAIN;
170}
171EXPORT_SYMBOL_GPL(eventfd_ctx_remove_wait_queue);
172
173/**
174 * eventfd_ctx_read - Reads the eventfd counter or wait if it is zero.
175 * @ctx: [in] Pointer to eventfd context.
176 * @no_wait: [in] Different from zero if the operation should not block.
177 * @cnt: [out] Pointer to the 64bit conter value.
178 *
179 * Returns zero if successful, or the following error codes:
180 *
181 * -EAGAIN : The operation would have blocked but @no_wait was nonzero.
182 * -ERESTARTSYS : A signal interrupted the wait operation.
183 *
184 * If @no_wait is zero, the function might sleep until the eventfd internal
185 * counter becomes greater than zero.
186 */
187ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait, __u64 *cnt)
140{ 188{
141 struct eventfd_ctx *ctx = file->private_data;
142 ssize_t res; 189 ssize_t res;
143 __u64 ucnt = 0;
144 DECLARE_WAITQUEUE(wait, current); 190 DECLARE_WAITQUEUE(wait, current);
145 191
146 if (count < sizeof(ucnt))
147 return -EINVAL;
148 spin_lock_irq(&ctx->wqh.lock); 192 spin_lock_irq(&ctx->wqh.lock);
193 *cnt = 0;
149 res = -EAGAIN; 194 res = -EAGAIN;
150 if (ctx->count > 0) 195 if (ctx->count > 0)
151 res = sizeof(ucnt); 196 res = 0;
152 else if (!(file->f_flags & O_NONBLOCK)) { 197 else if (!no_wait) {
153 __add_wait_queue(&ctx->wqh, &wait); 198 __add_wait_queue(&ctx->wqh, &wait);
154 for (res = 0;;) { 199 for (;;) {
155 set_current_state(TASK_INTERRUPTIBLE); 200 set_current_state(TASK_INTERRUPTIBLE);
156 if (ctx->count > 0) { 201 if (ctx->count > 0) {
157 res = sizeof(ucnt); 202 res = 0;
158 break; 203 break;
159 } 204 }
160 if (signal_pending(current)) { 205 if (signal_pending(current)) {
@@ -168,18 +213,32 @@ static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
168 __remove_wait_queue(&ctx->wqh, &wait); 213 __remove_wait_queue(&ctx->wqh, &wait);
169 __set_current_state(TASK_RUNNING); 214 __set_current_state(TASK_RUNNING);
170 } 215 }
171 if (likely(res > 0)) { 216 if (likely(res == 0)) {
172 ucnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count; 217 eventfd_ctx_do_read(ctx, cnt);
173 ctx->count -= ucnt;
174 if (waitqueue_active(&ctx->wqh)) 218 if (waitqueue_active(&ctx->wqh))
175 wake_up_locked_poll(&ctx->wqh, POLLOUT); 219 wake_up_locked_poll(&ctx->wqh, POLLOUT);
176 } 220 }
177 spin_unlock_irq(&ctx->wqh.lock); 221 spin_unlock_irq(&ctx->wqh.lock);
178 if (res > 0 && put_user(ucnt, (__u64 __user *) buf))
179 return -EFAULT;
180 222
181 return res; 223 return res;
182} 224}
225EXPORT_SYMBOL_GPL(eventfd_ctx_read);
226
227static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
228 loff_t *ppos)
229{
230 struct eventfd_ctx *ctx = file->private_data;
231 ssize_t res;
232 __u64 cnt;
233
234 if (count < sizeof(cnt))
235 return -EINVAL;
236 res = eventfd_ctx_read(ctx, file->f_flags & O_NONBLOCK, &cnt);
237 if (res < 0)
238 return res;
239
240 return put_user(cnt, (__u64 __user *) buf) ? -EFAULT : sizeof(cnt);
241}
183 242
184static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t count, 243static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t count,
185 loff_t *ppos) 244 loff_t *ppos)
diff --git a/fs/exec.c b/fs/exec.c
index 632b02e34ec7..0790a107ff7e 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -941,9 +941,7 @@ void set_task_comm(struct task_struct *tsk, char *buf)
941 941
942int flush_old_exec(struct linux_binprm * bprm) 942int flush_old_exec(struct linux_binprm * bprm)
943{ 943{
944 char * name; 944 int retval;
945 int i, ch, retval;
946 char tcomm[sizeof(current->comm)];
947 945
948 /* 946 /*
949 * Make sure we have a private signal table and that 947 * Make sure we have a private signal table and that
@@ -964,6 +962,25 @@ int flush_old_exec(struct linux_binprm * bprm)
964 962
965 bprm->mm = NULL; /* We're using it now */ 963 bprm->mm = NULL; /* We're using it now */
966 964
965 current->flags &= ~PF_RANDOMIZE;
966 flush_thread();
967 current->personality &= ~bprm->per_clear;
968
969 return 0;
970
971out:
972 return retval;
973}
974EXPORT_SYMBOL(flush_old_exec);
975
976void setup_new_exec(struct linux_binprm * bprm)
977{
978 int i, ch;
979 char * name;
980 char tcomm[sizeof(current->comm)];
981
982 arch_pick_mmap_layout(current->mm);
983
967 /* This is the point of no return */ 984 /* This is the point of no return */
968 current->sas_ss_sp = current->sas_ss_size = 0; 985 current->sas_ss_sp = current->sas_ss_size = 0;
969 986
@@ -985,9 +1002,6 @@ int flush_old_exec(struct linux_binprm * bprm)
985 tcomm[i] = '\0'; 1002 tcomm[i] = '\0';
986 set_task_comm(current, tcomm); 1003 set_task_comm(current, tcomm);
987 1004
988 current->flags &= ~PF_RANDOMIZE;
989 flush_thread();
990
991 /* Set the new mm task size. We have to do that late because it may 1005 /* Set the new mm task size. We have to do that late because it may
992 * depend on TIF_32BIT which is only updated in flush_thread() on 1006 * depend on TIF_32BIT which is only updated in flush_thread() on
993 * some architectures like powerpc 1007 * some architectures like powerpc
@@ -1003,8 +1017,6 @@ int flush_old_exec(struct linux_binprm * bprm)
1003 set_dumpable(current->mm, suid_dumpable); 1017 set_dumpable(current->mm, suid_dumpable);
1004 } 1018 }
1005 1019
1006 current->personality &= ~bprm->per_clear;
1007
1008 /* 1020 /*
1009 * Flush performance counters when crossing a 1021 * Flush performance counters when crossing a
1010 * security domain: 1022 * security domain:
@@ -1019,14 +1031,8 @@ int flush_old_exec(struct linux_binprm * bprm)
1019 1031
1020 flush_signal_handlers(current, 0); 1032 flush_signal_handlers(current, 0);
1021 flush_old_files(current->files); 1033 flush_old_files(current->files);
1022
1023 return 0;
1024
1025out:
1026 return retval;
1027} 1034}
1028 1035EXPORT_SYMBOL(setup_new_exec);
1029EXPORT_SYMBOL(flush_old_exec);
1030 1036
1031/* 1037/*
1032 * Prepare credentials and lock ->cred_guard_mutex. 1038 * Prepare credentials and lock ->cred_guard_mutex.
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index af7b62699ea9..874d169a193e 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -361,14 +361,11 @@ struct ext4_new_group_data {
361 so set the magic i_delalloc_reserve_flag after taking the 361 so set the magic i_delalloc_reserve_flag after taking the
362 inode allocation semaphore for */ 362 inode allocation semaphore for */
363#define EXT4_GET_BLOCKS_DELALLOC_RESERVE 0x0004 363#define EXT4_GET_BLOCKS_DELALLOC_RESERVE 0x0004
364 /* Call ext4_da_update_reserve_space() after successfully
365 allocating the blocks */
366#define EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE 0x0008
367 /* caller is from the direct IO path, request to creation of an 364 /* caller is from the direct IO path, request to creation of an
368 unitialized extents if not allocated, split the uninitialized 365 unitialized extents if not allocated, split the uninitialized
369 extent if blocks has been preallocated already*/ 366 extent if blocks has been preallocated already*/
370#define EXT4_GET_BLOCKS_DIO 0x0010 367#define EXT4_GET_BLOCKS_DIO 0x0008
371#define EXT4_GET_BLOCKS_CONVERT 0x0020 368#define EXT4_GET_BLOCKS_CONVERT 0x0010
372#define EXT4_GET_BLOCKS_DIO_CREATE_EXT (EXT4_GET_BLOCKS_DIO|\ 369#define EXT4_GET_BLOCKS_DIO_CREATE_EXT (EXT4_GET_BLOCKS_DIO|\
373 EXT4_GET_BLOCKS_CREATE_UNINIT_EXT) 370 EXT4_GET_BLOCKS_CREATE_UNINIT_EXT)
374 /* Convert extent to initialized after direct IO complete */ 371 /* Convert extent to initialized after direct IO complete */
@@ -1443,6 +1440,8 @@ extern int ext4_block_truncate_page(handle_t *handle,
1443extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); 1440extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
1444extern qsize_t *ext4_get_reserved_space(struct inode *inode); 1441extern qsize_t *ext4_get_reserved_space(struct inode *inode);
1445extern int flush_aio_dio_completed_IO(struct inode *inode); 1442extern int flush_aio_dio_completed_IO(struct inode *inode);
1443extern void ext4_da_update_reserve_space(struct inode *inode,
1444 int used, int quota_claim);
1446/* ioctl.c */ 1445/* ioctl.c */
1447extern long ext4_ioctl(struct file *, unsigned int, unsigned long); 1446extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
1448extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long); 1447extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 7d7b74e94687..765a4826b118 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3132,7 +3132,19 @@ out:
3132 unmap_underlying_metadata_blocks(inode->i_sb->s_bdev, 3132 unmap_underlying_metadata_blocks(inode->i_sb->s_bdev,
3133 newblock + max_blocks, 3133 newblock + max_blocks,
3134 allocated - max_blocks); 3134 allocated - max_blocks);
3135 allocated = max_blocks;
3135 } 3136 }
3137
3138 /*
3139 * If we have done fallocate with the offset that is already
3140 * delayed allocated, we would have block reservation
3141 * and quota reservation done in the delayed write path.
3142 * But fallocate would have already updated quota and block
3143 * count for this offset. So cancel these reservation
3144 */
3145 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
3146 ext4_da_update_reserve_space(inode, allocated, 0);
3147
3136map_out: 3148map_out:
3137 set_buffer_mapped(bh_result); 3149 set_buffer_mapped(bh_result);
3138out1: 3150out1:
@@ -3368,9 +3380,18 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
3368 /* previous routine could use block we allocated */ 3380 /* previous routine could use block we allocated */
3369 newblock = ext_pblock(&newex); 3381 newblock = ext_pblock(&newex);
3370 allocated = ext4_ext_get_actual_len(&newex); 3382 allocated = ext4_ext_get_actual_len(&newex);
3383 if (allocated > max_blocks)
3384 allocated = max_blocks;
3371 set_buffer_new(bh_result); 3385 set_buffer_new(bh_result);
3372 3386
3373 /* 3387 /*
3388 * Update reserved blocks/metadata blocks after successful
3389 * block allocation which had been deferred till now.
3390 */
3391 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
3392 ext4_da_update_reserve_space(inode, allocated, 1);
3393
3394 /*
3374 * Cache the extent and update transaction to commit on fdatasync only 3395 * Cache the extent and update transaction to commit on fdatasync only
3375 * when it is _not_ an uninitialized extent. 3396 * when it is _not_ an uninitialized extent.
3376 */ 3397 */
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index c818972c8302..e11952404e02 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1053,11 +1053,12 @@ static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock)
1053 * Called with i_data_sem down, which is important since we can call 1053 * Called with i_data_sem down, which is important since we can call
1054 * ext4_discard_preallocations() from here. 1054 * ext4_discard_preallocations() from here.
1055 */ 1055 */
1056static void ext4_da_update_reserve_space(struct inode *inode, int used) 1056void ext4_da_update_reserve_space(struct inode *inode,
1057 int used, int quota_claim)
1057{ 1058{
1058 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1059 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1059 struct ext4_inode_info *ei = EXT4_I(inode); 1060 struct ext4_inode_info *ei = EXT4_I(inode);
1060 int mdb_free = 0; 1061 int mdb_free = 0, allocated_meta_blocks = 0;
1061 1062
1062 spin_lock(&ei->i_block_reservation_lock); 1063 spin_lock(&ei->i_block_reservation_lock);
1063 if (unlikely(used > ei->i_reserved_data_blocks)) { 1064 if (unlikely(used > ei->i_reserved_data_blocks)) {
@@ -1073,6 +1074,7 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
1073 ei->i_reserved_data_blocks -= used; 1074 ei->i_reserved_data_blocks -= used;
1074 used += ei->i_allocated_meta_blocks; 1075 used += ei->i_allocated_meta_blocks;
1075 ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks; 1076 ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks;
1077 allocated_meta_blocks = ei->i_allocated_meta_blocks;
1076 ei->i_allocated_meta_blocks = 0; 1078 ei->i_allocated_meta_blocks = 0;
1077 percpu_counter_sub(&sbi->s_dirtyblocks_counter, used); 1079 percpu_counter_sub(&sbi->s_dirtyblocks_counter, used);
1078 1080
@@ -1090,9 +1092,23 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
1090 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1092 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1091 1093
1092 /* Update quota subsystem */ 1094 /* Update quota subsystem */
1093 vfs_dq_claim_block(inode, used); 1095 if (quota_claim) {
1094 if (mdb_free) 1096 vfs_dq_claim_block(inode, used);
1095 vfs_dq_release_reservation_block(inode, mdb_free); 1097 if (mdb_free)
1098 vfs_dq_release_reservation_block(inode, mdb_free);
1099 } else {
1100 /*
1101 * We did fallocate with an offset that is already delayed
1102 * allocated. So on delayed allocated writeback we should
1103 * not update the quota for allocated blocks. But then
1104 * converting an fallocate region to initialized region would
1105 * have caused a metadata allocation. So claim quota for
1106 * that
1107 */
1108 if (allocated_meta_blocks)
1109 vfs_dq_claim_block(inode, allocated_meta_blocks);
1110 vfs_dq_release_reservation_block(inode, mdb_free + used);
1111 }
1096 1112
1097 /* 1113 /*
1098 * If we have done all the pending block allocations and if 1114 * If we have done all the pending block allocations and if
@@ -1292,18 +1308,20 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
1292 */ 1308 */
1293 EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE; 1309 EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE;
1294 } 1310 }
1295 }
1296 1311
1312 /*
1313 * Update reserved blocks/metadata blocks after successful
1314 * block allocation which had been deferred till now. We don't
1315 * support fallocate for non extent files. So we can update
1316 * reserve space here.
1317 */
1318 if ((retval > 0) &&
1319 (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE))
1320 ext4_da_update_reserve_space(inode, retval, 1);
1321 }
1297 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) 1322 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
1298 EXT4_I(inode)->i_delalloc_reserved_flag = 0; 1323 EXT4_I(inode)->i_delalloc_reserved_flag = 0;
1299 1324
1300 /*
1301 * Update reserved blocks/metadata blocks after successful
1302 * block allocation which had been deferred till now.
1303 */
1304 if ((retval > 0) && (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE))
1305 ext4_da_update_reserve_space(inode, retval);
1306
1307 up_write((&EXT4_I(inode)->i_data_sem)); 1325 up_write((&EXT4_I(inode)->i_data_sem));
1308 if (retval > 0 && buffer_mapped(bh)) { 1326 if (retval > 0 && buffer_mapped(bh)) {
1309 int ret = check_block_validity(inode, "file system " 1327 int ret = check_block_validity(inode, "file system "
@@ -1835,24 +1853,12 @@ repeat:
1835 * later. Real quota accounting is done at pages writeout 1853 * later. Real quota accounting is done at pages writeout
1836 * time. 1854 * time.
1837 */ 1855 */
1838 if (vfs_dq_reserve_block(inode, md_needed + 1)) { 1856 if (vfs_dq_reserve_block(inode, md_needed + 1))
1839 /*
1840 * We tend to badly over-estimate the amount of
1841 * metadata blocks which are needed, so if we have
1842 * reserved any metadata blocks, try to force out the
1843 * inode and see if we have any better luck.
1844 */
1845 if (md_reserved && retries++ <= 3)
1846 goto retry;
1847 return -EDQUOT; 1857 return -EDQUOT;
1848 }
1849 1858
1850 if (ext4_claim_free_blocks(sbi, md_needed + 1)) { 1859 if (ext4_claim_free_blocks(sbi, md_needed + 1)) {
1851 vfs_dq_release_reservation_block(inode, md_needed + 1); 1860 vfs_dq_release_reservation_block(inode, md_needed + 1);
1852 if (ext4_should_retry_alloc(inode->i_sb, &retries)) { 1861 if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
1853 retry:
1854 if (md_reserved)
1855 write_inode_now(inode, (retries == 3));
1856 yield(); 1862 yield();
1857 goto repeat; 1863 goto repeat;
1858 } 1864 }
@@ -2213,10 +2219,10 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
2213 * variables are updated after the blocks have been allocated. 2219 * variables are updated after the blocks have been allocated.
2214 */ 2220 */
2215 new.b_state = 0; 2221 new.b_state = 0;
2216 get_blocks_flags = (EXT4_GET_BLOCKS_CREATE | 2222 get_blocks_flags = EXT4_GET_BLOCKS_CREATE;
2217 EXT4_GET_BLOCKS_DELALLOC_RESERVE);
2218 if (mpd->b_state & (1 << BH_Delay)) 2223 if (mpd->b_state & (1 << BH_Delay))
2219 get_blocks_flags |= EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE; 2224 get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE;
2225
2220 blks = ext4_get_blocks(handle, mpd->inode, next, max_blocks, 2226 blks = ext4_get_blocks(handle, mpd->inode, next, max_blocks,
2221 &new, get_blocks_flags); 2227 &new, get_blocks_flags);
2222 if (blks < 0) { 2228 if (blks < 0) {
@@ -3032,7 +3038,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
3032 loff_t pos, unsigned len, unsigned flags, 3038 loff_t pos, unsigned len, unsigned flags,
3033 struct page **pagep, void **fsdata) 3039 struct page **pagep, void **fsdata)
3034{ 3040{
3035 int ret, retries = 0; 3041 int ret, retries = 0, quota_retries = 0;
3036 struct page *page; 3042 struct page *page;
3037 pgoff_t index; 3043 pgoff_t index;
3038 unsigned from, to; 3044 unsigned from, to;
@@ -3091,6 +3097,22 @@ retry:
3091 3097
3092 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 3098 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
3093 goto retry; 3099 goto retry;
3100
3101 if ((ret == -EDQUOT) &&
3102 EXT4_I(inode)->i_reserved_meta_blocks &&
3103 (quota_retries++ < 3)) {
3104 /*
3105 * Since we often over-estimate the number of meta
3106 * data blocks required, we may sometimes get a
3107 * spurios out of quota error even though there would
3108 * be enough space once we write the data blocks and
3109 * find out how many meta data blocks were _really_
3110 * required. So try forcing the inode write to see if
3111 * that helps.
3112 */
3113 write_inode_now(inode, (quota_retries == 3));
3114 goto retry;
3115 }
3094out: 3116out:
3095 return ret; 3117 return ret;
3096} 3118}
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 2cf93ec40a67..5ef953e6f908 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -199,7 +199,9 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
199static void f_modown(struct file *filp, struct pid *pid, enum pid_type type, 199static void f_modown(struct file *filp, struct pid *pid, enum pid_type type,
200 int force) 200 int force)
201{ 201{
202 write_lock_irq(&filp->f_owner.lock); 202 unsigned long flags;
203
204 write_lock_irqsave(&filp->f_owner.lock, flags);
203 if (force || !filp->f_owner.pid) { 205 if (force || !filp->f_owner.pid) {
204 put_pid(filp->f_owner.pid); 206 put_pid(filp->f_owner.pid);
205 filp->f_owner.pid = get_pid(pid); 207 filp->f_owner.pid = get_pid(pid);
@@ -211,7 +213,7 @@ static void f_modown(struct file *filp, struct pid *pid, enum pid_type type,
211 filp->f_owner.euid = cred->euid; 213 filp->f_owner.euid = cred->euid;
212 } 214 }
213 } 215 }
214 write_unlock_irq(&filp->f_owner.lock); 216 write_unlock_irqrestore(&filp->f_owner.lock, flags);
215} 217}
216 218
217int __f_setown(struct file *filp, struct pid *pid, enum pid_type type, 219int __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
@@ -618,60 +620,90 @@ static DEFINE_RWLOCK(fasync_lock);
618static struct kmem_cache *fasync_cache __read_mostly; 620static struct kmem_cache *fasync_cache __read_mostly;
619 621
620/* 622/*
621 * fasync_helper() is used by almost all character device drivers 623 * Remove a fasync entry. If successfully removed, return
622 * to set up the fasync queue. It returns negative on error, 0 if it did 624 * positive and clear the FASYNC flag. If no entry exists,
623 * no changes and positive if it added/deleted the entry. 625 * do nothing and return 0.
626 *
627 * NOTE! It is very important that the FASYNC flag always
628 * match the state "is the filp on a fasync list".
629 *
630 * We always take the 'filp->f_lock', in since fasync_lock
631 * needs to be irq-safe.
624 */ 632 */
625int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp) 633static int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
626{ 634{
627 struct fasync_struct *fa, **fp; 635 struct fasync_struct *fa, **fp;
628 struct fasync_struct *new = NULL;
629 int result = 0; 636 int result = 0;
630 637
631 if (on) { 638 spin_lock(&filp->f_lock);
632 new = kmem_cache_alloc(fasync_cache, GFP_KERNEL); 639 write_lock_irq(&fasync_lock);
633 if (!new) 640 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
634 return -ENOMEM; 641 if (fa->fa_file != filp)
642 continue;
643 *fp = fa->fa_next;
644 kmem_cache_free(fasync_cache, fa);
645 filp->f_flags &= ~FASYNC;
646 result = 1;
647 break;
635 } 648 }
649 write_unlock_irq(&fasync_lock);
650 spin_unlock(&filp->f_lock);
651 return result;
652}
653
654/*
655 * Add a fasync entry. Return negative on error, positive if
656 * added, and zero if did nothing but change an existing one.
657 *
658 * NOTE! It is very important that the FASYNC flag always
659 * match the state "is the filp on a fasync list".
660 */
661static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fapp)
662{
663 struct fasync_struct *new, *fa, **fp;
664 int result = 0;
665
666 new = kmem_cache_alloc(fasync_cache, GFP_KERNEL);
667 if (!new)
668 return -ENOMEM;
636 669
637 /*
638 * We need to take f_lock first since it's not an IRQ-safe
639 * lock.
640 */
641 spin_lock(&filp->f_lock); 670 spin_lock(&filp->f_lock);
642 write_lock_irq(&fasync_lock); 671 write_lock_irq(&fasync_lock);
643 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { 672 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
644 if (fa->fa_file == filp) { 673 if (fa->fa_file != filp)
645 if(on) { 674 continue;
646 fa->fa_fd = fd; 675 fa->fa_fd = fd;
647 kmem_cache_free(fasync_cache, new); 676 kmem_cache_free(fasync_cache, new);
648 } else { 677 goto out;
649 *fp = fa->fa_next;
650 kmem_cache_free(fasync_cache, fa);
651 result = 1;
652 }
653 goto out;
654 }
655 } 678 }
656 679
657 if (on) { 680 new->magic = FASYNC_MAGIC;
658 new->magic = FASYNC_MAGIC; 681 new->fa_file = filp;
659 new->fa_file = filp; 682 new->fa_fd = fd;
660 new->fa_fd = fd; 683 new->fa_next = *fapp;
661 new->fa_next = *fapp; 684 *fapp = new;
662 *fapp = new; 685 result = 1;
663 result = 1; 686 filp->f_flags |= FASYNC;
664 } 687
665out: 688out:
666 if (on)
667 filp->f_flags |= FASYNC;
668 else
669 filp->f_flags &= ~FASYNC;
670 write_unlock_irq(&fasync_lock); 689 write_unlock_irq(&fasync_lock);
671 spin_unlock(&filp->f_lock); 690 spin_unlock(&filp->f_lock);
672 return result; 691 return result;
673} 692}
674 693
694/*
695 * fasync_helper() is used by almost all character device drivers
696 * to set up the fasync queue, and for regular files by the file
697 * lease code. It returns negative on error, 0 if it did no changes
698 * and positive if it added/deleted the entry.
699 */
700int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp)
701{
702 if (!on)
703 return fasync_remove_entry(filp, fapp);
704 return fasync_add_entry(fd, filp, fapp);
705}
706
675EXPORT_SYMBOL(fasync_helper); 707EXPORT_SYMBOL(fasync_helper);
676 708
677void __kill_fasync(struct fasync_struct *fa, int sig, int band) 709void __kill_fasync(struct fasync_struct *fa, int sig, int band)
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index a5089a6dd67a..7239efc690d8 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -646,22 +646,27 @@ static const struct super_operations hppfs_sbops = {
646static int hppfs_readlink(struct dentry *dentry, char __user *buffer, 646static int hppfs_readlink(struct dentry *dentry, char __user *buffer,
647 int buflen) 647 int buflen)
648{ 648{
649 struct dentry *proc_dentry; 649 struct dentry *proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
650
651 proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
652 return proc_dentry->d_inode->i_op->readlink(proc_dentry, buffer, 650 return proc_dentry->d_inode->i_op->readlink(proc_dentry, buffer,
653 buflen); 651 buflen);
654} 652}
655 653
656static void *hppfs_follow_link(struct dentry *dentry, struct nameidata *nd) 654static void *hppfs_follow_link(struct dentry *dentry, struct nameidata *nd)
657{ 655{
658 struct dentry *proc_dentry; 656 struct dentry *proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
659
660 proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
661 657
662 return proc_dentry->d_inode->i_op->follow_link(proc_dentry, nd); 658 return proc_dentry->d_inode->i_op->follow_link(proc_dentry, nd);
663} 659}
664 660
661static void hppfs_put_link(struct dentry *dentry, struct nameidata *nd,
662 void *cookie)
663{
664 struct dentry *proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
665
666 if (proc_dentry->d_inode->i_op->put_link)
667 proc_dentry->d_inode->i_op->put_link(proc_dentry, nd, cookie);
668}
669
665static const struct inode_operations hppfs_dir_iops = { 670static const struct inode_operations hppfs_dir_iops = {
666 .lookup = hppfs_lookup, 671 .lookup = hppfs_lookup,
667}; 672};
@@ -669,6 +674,7 @@ static const struct inode_operations hppfs_dir_iops = {
669static const struct inode_operations hppfs_link_iops = { 674static const struct inode_operations hppfs_link_iops = {
670 .readlink = hppfs_readlink, 675 .readlink = hppfs_readlink,
671 .follow_link = hppfs_follow_link, 676 .follow_link = hppfs_follow_link,
677 .put_link = hppfs_put_link,
672}; 678};
673 679
674static struct inode *get_inode(struct super_block *sb, struct dentry *dentry) 680static struct inode *get_inode(struct super_block *sb, struct dentry *dentry)
diff --git a/fs/namei.c b/fs/namei.c
index b55440baf7ab..94a5e60779f9 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -561,6 +561,7 @@ static __always_inline int __do_follow_link(struct path *path, struct nameidata
561 dget(dentry); 561 dget(dentry);
562 } 562 }
563 mntget(path->mnt); 563 mntget(path->mnt);
564 nd->last_type = LAST_BIND;
564 cookie = dentry->d_inode->i_op->follow_link(dentry, nd); 565 cookie = dentry->d_inode->i_op->follow_link(dentry, nd);
565 error = PTR_ERR(cookie); 566 error = PTR_ERR(cookie);
566 if (!IS_ERR(cookie)) { 567 if (!IS_ERR(cookie)) {
@@ -1603,11 +1604,12 @@ struct file *do_filp_open(int dfd, const char *pathname,
1603 struct file *filp; 1604 struct file *filp;
1604 struct nameidata nd; 1605 struct nameidata nd;
1605 int error; 1606 int error;
1606 struct path path, save; 1607 struct path path;
1607 struct dentry *dir; 1608 struct dentry *dir;
1608 int count = 0; 1609 int count = 0;
1609 int will_truncate; 1610 int will_truncate;
1610 int flag = open_to_namei_flags(open_flag); 1611 int flag = open_to_namei_flags(open_flag);
1612 int force_reval = 0;
1611 1613
1612 /* 1614 /*
1613 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only 1615 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only
@@ -1619,7 +1621,7 @@ struct file *do_filp_open(int dfd, const char *pathname,
1619 open_flag |= O_DSYNC; 1621 open_flag |= O_DSYNC;
1620 1622
1621 if (!acc_mode) 1623 if (!acc_mode)
1622 acc_mode = MAY_OPEN | ACC_MODE(flag); 1624 acc_mode = MAY_OPEN | ACC_MODE(open_flag);
1623 1625
1624 /* O_TRUNC implies we need access checks for write permissions */ 1626 /* O_TRUNC implies we need access checks for write permissions */
1625 if (flag & O_TRUNC) 1627 if (flag & O_TRUNC)
@@ -1659,9 +1661,12 @@ struct file *do_filp_open(int dfd, const char *pathname,
1659 /* 1661 /*
1660 * Create - we need to know the parent. 1662 * Create - we need to know the parent.
1661 */ 1663 */
1664reval:
1662 error = path_init(dfd, pathname, LOOKUP_PARENT, &nd); 1665 error = path_init(dfd, pathname, LOOKUP_PARENT, &nd);
1663 if (error) 1666 if (error)
1664 return ERR_PTR(error); 1667 return ERR_PTR(error);
1668 if (force_reval)
1669 nd.flags |= LOOKUP_REVAL;
1665 error = path_walk(pathname, &nd); 1670 error = path_walk(pathname, &nd);
1666 if (error) { 1671 if (error) {
1667 if (nd.root.mnt) 1672 if (nd.root.mnt)
@@ -1853,17 +1858,7 @@ do_link:
1853 error = security_inode_follow_link(path.dentry, &nd); 1858 error = security_inode_follow_link(path.dentry, &nd);
1854 if (error) 1859 if (error)
1855 goto exit_dput; 1860 goto exit_dput;
1856 save = nd.path;
1857 path_get(&save);
1858 error = __do_follow_link(&path, &nd); 1861 error = __do_follow_link(&path, &nd);
1859 if (error == -ESTALE) {
1860 /* nd.path had been dropped */
1861 nd.path = save;
1862 path_get(&nd.path);
1863 nd.flags |= LOOKUP_REVAL;
1864 error = __do_follow_link(&path, &nd);
1865 }
1866 path_put(&save);
1867 path_put(&path); 1862 path_put(&path);
1868 if (error) { 1863 if (error) {
1869 /* Does someone understand code flow here? Or it is only 1864 /* Does someone understand code flow here? Or it is only
@@ -1873,6 +1868,10 @@ do_link:
1873 release_open_intent(&nd); 1868 release_open_intent(&nd);
1874 if (nd.root.mnt) 1869 if (nd.root.mnt)
1875 path_put(&nd.root); 1870 path_put(&nd.root);
1871 if (error == -ESTALE && !force_reval) {
1872 force_reval = 1;
1873 goto reval;
1874 }
1876 return ERR_PTR(error); 1875 return ERR_PTR(error);
1877 } 1876 }
1878 nd.flags &= ~LOOKUP_PARENT; 1877 nd.flags &= ~LOOKUP_PARENT;
diff --git a/fs/namespace.c b/fs/namespace.c
index 7d70d63ceb29..c768f733c8d6 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -965,10 +965,12 @@ EXPORT_SYMBOL(may_umount_tree);
965int may_umount(struct vfsmount *mnt) 965int may_umount(struct vfsmount *mnt)
966{ 966{
967 int ret = 1; 967 int ret = 1;
968 down_read(&namespace_sem);
968 spin_lock(&vfsmount_lock); 969 spin_lock(&vfsmount_lock);
969 if (propagate_mount_busy(mnt, 2)) 970 if (propagate_mount_busy(mnt, 2))
970 ret = 0; 971 ret = 0;
971 spin_unlock(&vfsmount_lock); 972 spin_unlock(&vfsmount_lock);
973 up_read(&namespace_sem);
972 return ret; 974 return ret;
973} 975}
974 976
@@ -1352,12 +1354,12 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt,
1352 if (err) 1354 if (err)
1353 goto out_cleanup_ids; 1355 goto out_cleanup_ids;
1354 1356
1357 spin_lock(&vfsmount_lock);
1358
1355 if (IS_MNT_SHARED(dest_mnt)) { 1359 if (IS_MNT_SHARED(dest_mnt)) {
1356 for (p = source_mnt; p; p = next_mnt(p, source_mnt)) 1360 for (p = source_mnt; p; p = next_mnt(p, source_mnt))
1357 set_mnt_shared(p); 1361 set_mnt_shared(p);
1358 } 1362 }
1359
1360 spin_lock(&vfsmount_lock);
1361 if (parent_path) { 1363 if (parent_path) {
1362 detach_mnt(source_mnt, parent_path); 1364 detach_mnt(source_mnt, parent_path);
1363 attach_mnt(source_mnt, path); 1365 attach_mnt(source_mnt, path);
@@ -1534,8 +1536,12 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
1534 err = change_mount_flags(path->mnt, flags); 1536 err = change_mount_flags(path->mnt, flags);
1535 else 1537 else
1536 err = do_remount_sb(sb, flags, data, 0); 1538 err = do_remount_sb(sb, flags, data, 0);
1537 if (!err) 1539 if (!err) {
1540 spin_lock(&vfsmount_lock);
1541 mnt_flags |= path->mnt->mnt_flags & MNT_PNODE_MASK;
1538 path->mnt->mnt_flags = mnt_flags; 1542 path->mnt->mnt_flags = mnt_flags;
1543 spin_unlock(&vfsmount_lock);
1544 }
1539 up_write(&sb->s_umount); 1545 up_write(&sb->s_umount);
1540 if (!err) { 1546 if (!err) {
1541 security_sb_post_remount(path->mnt, flags, data); 1547 security_sb_post_remount(path->mnt, flags, data);
@@ -1665,6 +1671,8 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path,
1665{ 1671{
1666 int err; 1672 int err;
1667 1673
1674 mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD);
1675
1668 down_write(&namespace_sem); 1676 down_write(&namespace_sem);
1669 /* Something was mounted here while we slept */ 1677 /* Something was mounted here while we slept */
1670 while (d_mountpoint(path->dentry) && 1678 while (d_mountpoint(path->dentry) &&
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index c9ee67b442e1..1afb0a10229f 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -121,7 +121,7 @@ static int idr_callback(int id, void *p, void *data)
121 if (warned) 121 if (warned)
122 return 0; 122 return 0;
123 123
124 warned = false; 124 warned = true;
125 entry = p; 125 entry = p;
126 ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry); 126 ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
127 127
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 8271cf05c957..a94e8bd8eb1f 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -552,7 +552,7 @@ retry:
552 552
553 spin_lock(&group->inotify_data.idr_lock); 553 spin_lock(&group->inotify_data.idr_lock);
554 ret = idr_get_new_above(&group->inotify_data.idr, &tmp_ientry->fsn_entry, 554 ret = idr_get_new_above(&group->inotify_data.idr, &tmp_ientry->fsn_entry,
555 group->inotify_data.last_wd, 555 group->inotify_data.last_wd+1,
556 &tmp_ientry->wd); 556 &tmp_ientry->wd);
557 spin_unlock(&group->inotify_data.idr_lock); 557 spin_unlock(&group->inotify_data.idr_lock);
558 if (ret) { 558 if (ret) {
@@ -632,7 +632,7 @@ static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsign
632 632
633 spin_lock_init(&group->inotify_data.idr_lock); 633 spin_lock_init(&group->inotify_data.idr_lock);
634 idr_init(&group->inotify_data.idr); 634 idr_init(&group->inotify_data.idr);
635 group->inotify_data.last_wd = 1; 635 group->inotify_data.last_wd = 0;
636 group->inotify_data.user = user; 636 group->inotify_data.user = user;
637 group->inotify_data.fa = NULL; 637 group->inotify_data.fa = NULL;
638 638
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 18d5cc62d8ed..e42bbd843ed1 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1419,7 +1419,6 @@ static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
1419 goto out; 1419 goto out;
1420 1420
1421 error = PROC_I(inode)->op.proc_get_link(inode, &nd->path); 1421 error = PROC_I(inode)->op.proc_get_link(inode, &nd->path);
1422 nd->last_type = LAST_BIND;
1423out: 1422out:
1424 return ERR_PTR(error); 1423 return ERR_PTR(error);
1425} 1424}
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 2efc57173fd7..1739a4aba25f 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -123,30 +123,6 @@ add_error:
123 123
124/*****************************************************************************/ 124/*****************************************************************************/
125/* 125/*
126 * check that file shrinkage doesn't leave any VMAs dangling in midair
127 */
128static int ramfs_nommu_check_mappings(struct inode *inode,
129 size_t newsize, size_t size)
130{
131 struct vm_area_struct *vma;
132 struct prio_tree_iter iter;
133
134 /* search for VMAs that fall within the dead zone */
135 vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap,
136 newsize >> PAGE_SHIFT,
137 (size + PAGE_SIZE - 1) >> PAGE_SHIFT
138 ) {
139 /* found one - only interested if it's shared out of the page
140 * cache */
141 if (vma->vm_flags & VM_SHARED)
142 return -ETXTBSY; /* not quite true, but near enough */
143 }
144
145 return 0;
146}
147
148/*****************************************************************************/
149/*
150 * 126 *
151 */ 127 */
152static int ramfs_nommu_resize(struct inode *inode, loff_t newsize, loff_t size) 128static int ramfs_nommu_resize(struct inode *inode, loff_t newsize, loff_t size)
@@ -164,7 +140,7 @@ static int ramfs_nommu_resize(struct inode *inode, loff_t newsize, loff_t size)
164 140
165 /* check that a decrease in size doesn't cut off any shared mappings */ 141 /* check that a decrease in size doesn't cut off any shared mappings */
166 if (newsize < size) { 142 if (newsize < size) {
167 ret = ramfs_nommu_check_mappings(inode, newsize, size); 143 ret = nommu_shrink_inode_mappings(inode, size, newsize);
168 if (ret < 0) 144 if (ret < 0)
169 return ret; 145 return ret;
170 } 146 }
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 83ac4d3b3cb0..ba98546fabbd 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2913,7 +2913,9 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
2913 journal->j_mount_id = 10; 2913 journal->j_mount_id = 10;
2914 journal->j_state = 0; 2914 journal->j_state = 0;
2915 atomic_set(&(journal->j_jlock), 0); 2915 atomic_set(&(journal->j_jlock), 0);
2916 reiserfs_write_unlock(sb);
2916 journal->j_cnode_free_list = allocate_cnodes(num_cnodes); 2917 journal->j_cnode_free_list = allocate_cnodes(num_cnodes);
2918 reiserfs_write_lock(sb);
2917 journal->j_cnode_free_orig = journal->j_cnode_free_list; 2919 journal->j_cnode_free_orig = journal->j_cnode_free_list;
2918 journal->j_cnode_free = journal->j_cnode_free_list ? num_cnodes : 0; 2920 journal->j_cnode_free = journal->j_cnode_free_list ? num_cnodes : 0;
2919 journal->j_cnode_used = 0; 2921 journal->j_cnode_used = 0;
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index c117fa80d1e9..42d213546894 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -544,6 +544,7 @@ error:
544error_rsb_inval: 544error_rsb_inval:
545 ret = -EINVAL; 545 ret = -EINVAL;
546error_rsb: 546error_rsb:
547 kfree(rsb);
547 return ret; 548 return ret;
548} 549}
549 550
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c
index 618c2701d3a7..e5a3d8e96bb7 100644
--- a/fs/ubifs/gc.c
+++ b/fs/ubifs/gc.c
@@ -54,6 +54,7 @@
54 */ 54 */
55 55
56#include <linux/pagemap.h> 56#include <linux/pagemap.h>
57#include <linux/list_sort.h>
57#include "ubifs.h" 58#include "ubifs.h"
58 59
59/* 60/*
@@ -108,101 +109,6 @@ static int switch_gc_head(struct ubifs_info *c)
108} 109}
109 110
110/** 111/**
111 * list_sort - sort a list.
112 * @priv: private data, passed to @cmp
113 * @head: the list to sort
114 * @cmp: the elements comparison function
115 *
116 * This function has been implemented by Mark J Roberts <mjr@znex.org>. It
117 * implements "merge sort" which has O(nlog(n)) complexity. The list is sorted
118 * in ascending order.
119 *
120 * The comparison function @cmp is supposed to return a negative value if @a is
121 * than @b, and a positive value if @a is greater than @b. If @a and @b are
122 * equivalent, then it does not matter what this function returns.
123 */
124static void list_sort(void *priv, struct list_head *head,
125 int (*cmp)(void *priv, struct list_head *a,
126 struct list_head *b))
127{
128 struct list_head *p, *q, *e, *list, *tail, *oldhead;
129 int insize, nmerges, psize, qsize, i;
130
131 if (list_empty(head))
132 return;
133
134 list = head->next;
135 list_del(head);
136 insize = 1;
137 for (;;) {
138 p = oldhead = list;
139 list = tail = NULL;
140 nmerges = 0;
141
142 while (p) {
143 nmerges++;
144 q = p;
145 psize = 0;
146 for (i = 0; i < insize; i++) {
147 psize++;
148 q = q->next == oldhead ? NULL : q->next;
149 if (!q)
150 break;
151 }
152
153 qsize = insize;
154 while (psize > 0 || (qsize > 0 && q)) {
155 if (!psize) {
156 e = q;
157 q = q->next;
158 qsize--;
159 if (q == oldhead)
160 q = NULL;
161 } else if (!qsize || !q) {
162 e = p;
163 p = p->next;
164 psize--;
165 if (p == oldhead)
166 p = NULL;
167 } else if (cmp(priv, p, q) <= 0) {
168 e = p;
169 p = p->next;
170 psize--;
171 if (p == oldhead)
172 p = NULL;
173 } else {
174 e = q;
175 q = q->next;
176 qsize--;
177 if (q == oldhead)
178 q = NULL;
179 }
180 if (tail)
181 tail->next = e;
182 else
183 list = e;
184 e->prev = tail;
185 tail = e;
186 }
187 p = q;
188 }
189
190 tail->next = list;
191 list->prev = tail;
192
193 if (nmerges <= 1)
194 break;
195
196 insize *= 2;
197 }
198
199 head->next = list;
200 head->prev = list->prev;
201 list->prev->next = head;
202 list->prev = head;
203}
204
205/**
206 * data_nodes_cmp - compare 2 data nodes. 112 * data_nodes_cmp - compare 2 data nodes.
207 * @priv: UBIFS file-system description object 113 * @priv: UBIFS file-system description object
208 * @a: first data node 114 * @a: first data node
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 09783cc444ac..77414db10dc2 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -954,16 +954,14 @@ xfs_fs_destroy_inode(
954 ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM)); 954 ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM));
955 955
956 /* 956 /*
957 * If we have nothing to flush with this inode then complete the 957 * We always use background reclaim here because even if the
958 * teardown now, otherwise delay the flush operation. 958 * inode is clean, it still may be under IO and hence we have
959 * to take the flush lock. The background reclaim path handles
960 * this more efficiently than we can here, so simply let background
961 * reclaim tear down all inodes.
959 */ 962 */
960 if (!xfs_inode_clean(ip)) {
961 xfs_inode_set_reclaim_tag(ip);
962 return;
963 }
964
965out_reclaim: 963out_reclaim:
966 xfs_ireclaim(ip); 964 xfs_inode_set_reclaim_tag(ip);
967} 965}
968 966
969/* 967/*
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 6fed97a8cd3e..1f5e4bb5e970 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -65,7 +65,6 @@ xfs_inode_ag_lookup(
65 * as the tree is sparse and a gang lookup walks to find 65 * as the tree is sparse and a gang lookup walks to find
66 * the number of objects requested. 66 * the number of objects requested.
67 */ 67 */
68 read_lock(&pag->pag_ici_lock);
69 if (tag == XFS_ICI_NO_TAG) { 68 if (tag == XFS_ICI_NO_TAG) {
70 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, 69 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
71 (void **)&ip, *first_index, 1); 70 (void **)&ip, *first_index, 1);
@@ -74,7 +73,7 @@ xfs_inode_ag_lookup(
74 (void **)&ip, *first_index, 1, tag); 73 (void **)&ip, *first_index, 1, tag);
75 } 74 }
76 if (!nr_found) 75 if (!nr_found)
77 goto unlock; 76 return NULL;
78 77
79 /* 78 /*
80 * Update the index for the next lookup. Catch overflows 79 * Update the index for the next lookup. Catch overflows
@@ -84,13 +83,8 @@ xfs_inode_ag_lookup(
84 */ 83 */
85 *first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); 84 *first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
86 if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) 85 if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
87 goto unlock; 86 return NULL;
88
89 return ip; 87 return ip;
90
91unlock:
92 read_unlock(&pag->pag_ici_lock);
93 return NULL;
94} 88}
95 89
96STATIC int 90STATIC int
@@ -100,7 +94,8 @@ xfs_inode_ag_walk(
100 int (*execute)(struct xfs_inode *ip, 94 int (*execute)(struct xfs_inode *ip,
101 struct xfs_perag *pag, int flags), 95 struct xfs_perag *pag, int flags),
102 int flags, 96 int flags,
103 int tag) 97 int tag,
98 int exclusive)
104{ 99{
105 struct xfs_perag *pag = &mp->m_perag[ag]; 100 struct xfs_perag *pag = &mp->m_perag[ag];
106 uint32_t first_index; 101 uint32_t first_index;
@@ -114,10 +109,20 @@ restart:
114 int error = 0; 109 int error = 0;
115 xfs_inode_t *ip; 110 xfs_inode_t *ip;
116 111
112 if (exclusive)
113 write_lock(&pag->pag_ici_lock);
114 else
115 read_lock(&pag->pag_ici_lock);
117 ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag); 116 ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag);
118 if (!ip) 117 if (!ip) {
118 if (exclusive)
119 write_unlock(&pag->pag_ici_lock);
120 else
121 read_unlock(&pag->pag_ici_lock);
119 break; 122 break;
123 }
120 124
125 /* execute releases pag->pag_ici_lock */
121 error = execute(ip, pag, flags); 126 error = execute(ip, pag, flags);
122 if (error == EAGAIN) { 127 if (error == EAGAIN) {
123 skipped++; 128 skipped++;
@@ -125,9 +130,8 @@ restart:
125 } 130 }
126 if (error) 131 if (error)
127 last_error = error; 132 last_error = error;
128 /* 133
129 * bail out if the filesystem is corrupted. 134 /* bail out if the filesystem is corrupted. */
130 */
131 if (error == EFSCORRUPTED) 135 if (error == EFSCORRUPTED)
132 break; 136 break;
133 137
@@ -148,7 +152,8 @@ xfs_inode_ag_iterator(
148 int (*execute)(struct xfs_inode *ip, 152 int (*execute)(struct xfs_inode *ip,
149 struct xfs_perag *pag, int flags), 153 struct xfs_perag *pag, int flags),
150 int flags, 154 int flags,
151 int tag) 155 int tag,
156 int exclusive)
152{ 157{
153 int error = 0; 158 int error = 0;
154 int last_error = 0; 159 int last_error = 0;
@@ -157,7 +162,8 @@ xfs_inode_ag_iterator(
157 for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) { 162 for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
158 if (!mp->m_perag[ag].pag_ici_init) 163 if (!mp->m_perag[ag].pag_ici_init)
159 continue; 164 continue;
160 error = xfs_inode_ag_walk(mp, ag, execute, flags, tag); 165 error = xfs_inode_ag_walk(mp, ag, execute, flags, tag,
166 exclusive);
161 if (error) { 167 if (error) {
162 last_error = error; 168 last_error = error;
163 if (error == EFSCORRUPTED) 169 if (error == EFSCORRUPTED)
@@ -174,30 +180,31 @@ xfs_sync_inode_valid(
174 struct xfs_perag *pag) 180 struct xfs_perag *pag)
175{ 181{
176 struct inode *inode = VFS_I(ip); 182 struct inode *inode = VFS_I(ip);
183 int error = EFSCORRUPTED;
177 184
178 /* nothing to sync during shutdown */ 185 /* nothing to sync during shutdown */
179 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { 186 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
180 read_unlock(&pag->pag_ici_lock); 187 goto out_unlock;
181 return EFSCORRUPTED;
182 }
183 188
184 /* 189 /* avoid new or reclaimable inodes. Leave for reclaim code to flush */
185 * If we can't get a reference on the inode, it must be in reclaim. 190 error = ENOENT;
186 * Leave it for the reclaim code to flush. Also avoid inodes that 191 if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM))
187 * haven't been fully initialised. 192 goto out_unlock;
188 */
189 if (!igrab(inode)) {
190 read_unlock(&pag->pag_ici_lock);
191 return ENOENT;
192 }
193 read_unlock(&pag->pag_ici_lock);
194 193
195 if (is_bad_inode(inode) || xfs_iflags_test(ip, XFS_INEW)) { 194 /* If we can't grab the inode, it must on it's way to reclaim. */
195 if (!igrab(inode))
196 goto out_unlock;
197
198 if (is_bad_inode(inode)) {
196 IRELE(ip); 199 IRELE(ip);
197 return ENOENT; 200 goto out_unlock;
198 } 201 }
199 202
200 return 0; 203 /* inode is valid */
204 error = 0;
205out_unlock:
206 read_unlock(&pag->pag_ici_lock);
207 return error;
201} 208}
202 209
203STATIC int 210STATIC int
@@ -282,7 +289,7 @@ xfs_sync_data(
282 ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0); 289 ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
283 290
284 error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags, 291 error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags,
285 XFS_ICI_NO_TAG); 292 XFS_ICI_NO_TAG, 0);
286 if (error) 293 if (error)
287 return XFS_ERROR(error); 294 return XFS_ERROR(error);
288 295
@@ -304,7 +311,7 @@ xfs_sync_attr(
304 ASSERT((flags & ~SYNC_WAIT) == 0); 311 ASSERT((flags & ~SYNC_WAIT) == 0);
305 312
306 return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags, 313 return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags,
307 XFS_ICI_NO_TAG); 314 XFS_ICI_NO_TAG, 0);
308} 315}
309 316
310STATIC int 317STATIC int
@@ -664,60 +671,6 @@ xfs_syncd_stop(
664 kthread_stop(mp->m_sync_task); 671 kthread_stop(mp->m_sync_task);
665} 672}
666 673
667STATIC int
668xfs_reclaim_inode(
669 xfs_inode_t *ip,
670 int sync_mode)
671{
672 xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino);
673
674 /* The hash lock here protects a thread in xfs_iget_core from
675 * racing with us on linking the inode back with a vnode.
676 * Once we have the XFS_IRECLAIM flag set it will not touch
677 * us.
678 */
679 write_lock(&pag->pag_ici_lock);
680 spin_lock(&ip->i_flags_lock);
681 if (__xfs_iflags_test(ip, XFS_IRECLAIM) ||
682 !__xfs_iflags_test(ip, XFS_IRECLAIMABLE)) {
683 spin_unlock(&ip->i_flags_lock);
684 write_unlock(&pag->pag_ici_lock);
685 return -EAGAIN;
686 }
687 __xfs_iflags_set(ip, XFS_IRECLAIM);
688 spin_unlock(&ip->i_flags_lock);
689 write_unlock(&pag->pag_ici_lock);
690 xfs_put_perag(ip->i_mount, pag);
691
692 /*
693 * If the inode is still dirty, then flush it out. If the inode
694 * is not in the AIL, then it will be OK to flush it delwri as
695 * long as xfs_iflush() does not keep any references to the inode.
696 * We leave that decision up to xfs_iflush() since it has the
697 * knowledge of whether it's OK to simply do a delwri flush of
698 * the inode or whether we need to wait until the inode is
699 * pulled from the AIL.
700 * We get the flush lock regardless, though, just to make sure
701 * we don't free it while it is being flushed.
702 */
703 xfs_ilock(ip, XFS_ILOCK_EXCL);
704 xfs_iflock(ip);
705
706 /*
707 * In the case of a forced shutdown we rely on xfs_iflush() to
708 * wait for the inode to be unpinned before returning an error.
709 */
710 if (!is_bad_inode(VFS_I(ip)) && xfs_iflush(ip, sync_mode) == 0) {
711 /* synchronize with xfs_iflush_done */
712 xfs_iflock(ip);
713 xfs_ifunlock(ip);
714 }
715
716 xfs_iunlock(ip, XFS_ILOCK_EXCL);
717 xfs_ireclaim(ip);
718 return 0;
719}
720
721void 674void
722__xfs_inode_set_reclaim_tag( 675__xfs_inode_set_reclaim_tag(
723 struct xfs_perag *pag, 676 struct xfs_perag *pag,
@@ -760,19 +713,55 @@ __xfs_inode_clear_reclaim_tag(
760} 713}
761 714
762STATIC int 715STATIC int
763xfs_reclaim_inode_now( 716xfs_reclaim_inode(
764 struct xfs_inode *ip, 717 struct xfs_inode *ip,
765 struct xfs_perag *pag, 718 struct xfs_perag *pag,
766 int flags) 719 int sync_mode)
767{ 720{
768 /* ignore if already under reclaim */ 721 /*
769 if (xfs_iflags_test(ip, XFS_IRECLAIM)) { 722 * The radix tree lock here protects a thread in xfs_iget from racing
770 read_unlock(&pag->pag_ici_lock); 723 * with us starting reclaim on the inode. Once we have the
724 * XFS_IRECLAIM flag set it will not touch us.
725 */
726 spin_lock(&ip->i_flags_lock);
727 ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE));
728 if (__xfs_iflags_test(ip, XFS_IRECLAIM)) {
729 /* ignore as it is already under reclaim */
730 spin_unlock(&ip->i_flags_lock);
731 write_unlock(&pag->pag_ici_lock);
771 return 0; 732 return 0;
772 } 733 }
773 read_unlock(&pag->pag_ici_lock); 734 __xfs_iflags_set(ip, XFS_IRECLAIM);
735 spin_unlock(&ip->i_flags_lock);
736 write_unlock(&pag->pag_ici_lock);
774 737
775 return xfs_reclaim_inode(ip, flags); 738 /*
739 * If the inode is still dirty, then flush it out. If the inode
740 * is not in the AIL, then it will be OK to flush it delwri as
741 * long as xfs_iflush() does not keep any references to the inode.
742 * We leave that decision up to xfs_iflush() since it has the
743 * knowledge of whether it's OK to simply do a delwri flush of
744 * the inode or whether we need to wait until the inode is
745 * pulled from the AIL.
746 * We get the flush lock regardless, though, just to make sure
747 * we don't free it while it is being flushed.
748 */
749 xfs_ilock(ip, XFS_ILOCK_EXCL);
750 xfs_iflock(ip);
751
752 /*
753 * In the case of a forced shutdown we rely on xfs_iflush() to
754 * wait for the inode to be unpinned before returning an error.
755 */
756 if (!is_bad_inode(VFS_I(ip)) && xfs_iflush(ip, sync_mode) == 0) {
757 /* synchronize with xfs_iflush_done */
758 xfs_iflock(ip);
759 xfs_ifunlock(ip);
760 }
761
762 xfs_iunlock(ip, XFS_ILOCK_EXCL);
763 xfs_ireclaim(ip);
764 return 0;
776} 765}
777 766
778int 767int
@@ -780,6 +769,6 @@ xfs_reclaim_inodes(
780 xfs_mount_t *mp, 769 xfs_mount_t *mp,
781 int mode) 770 int mode)
782{ 771{
783 return xfs_inode_ag_iterator(mp, xfs_reclaim_inode_now, mode, 772 return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode,
784 XFS_ICI_RECLAIM_TAG); 773 XFS_ICI_RECLAIM_TAG, 1);
785} 774}
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index a500b4d91835..ea932b43335d 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -54,6 +54,6 @@ void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
54int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag); 54int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag);
55int xfs_inode_ag_iterator(struct xfs_mount *mp, 55int xfs_inode_ag_iterator(struct xfs_mount *mp,
56 int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), 56 int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
57 int flags, int tag); 57 int flags, int tag, int write_lock);
58 58
59#endif 59#endif
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 71af76fe8a23..873e07e29074 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -891,7 +891,7 @@ xfs_qm_dqrele_all_inodes(
891 uint flags) 891 uint flags)
892{ 892{
893 ASSERT(mp->m_quotainfo); 893 ASSERT(mp->m_quotainfo);
894 xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, XFS_ICI_NO_TAG); 894 xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, XFS_ICI_NO_TAG, 0);
895} 895}
896 896
897/*------------------------------------------------------------------------*/ 897/*------------------------------------------------------------------------*/
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index d1483a4f71b8..84ca1cf16a1e 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -114,10 +114,82 @@ xfs_swapext(
114 return error; 114 return error;
115} 115}
116 116
117/*
118 * We need to check that the format of the data fork in the temporary inode is
119 * valid for the target inode before doing the swap. This is not a problem with
120 * attr1 because of the fixed fork offset, but attr2 has a dynamically sized
121 * data fork depending on the space the attribute fork is taking so we can get
122 * invalid formats on the target inode.
123 *
124 * E.g. target has space for 7 extents in extent format, temp inode only has
125 * space for 6. If we defragment down to 7 extents, then the tmp format is a
126 * btree, but when swapped it needs to be in extent format. Hence we can't just
127 * blindly swap data forks on attr2 filesystems.
128 *
129 * Note that we check the swap in both directions so that we don't end up with
130 * a corrupt temporary inode, either.
131 *
132 * Note that fixing the way xfs_fsr sets up the attribute fork in the source
133 * inode will prevent this situation from occurring, so all we do here is
134 * reject and log the attempt. basically we are putting the responsibility on
135 * userspace to get this right.
136 */
137static int
138xfs_swap_extents_check_format(
139 xfs_inode_t *ip, /* target inode */
140 xfs_inode_t *tip) /* tmp inode */
141{
142
143 /* Should never get a local format */
144 if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL ||
145 tip->i_d.di_format == XFS_DINODE_FMT_LOCAL)
146 return EINVAL;
147
148 /*
149 * if the target inode has less extents that then temporary inode then
150 * why did userspace call us?
151 */
152 if (ip->i_d.di_nextents < tip->i_d.di_nextents)
153 return EINVAL;
154
155 /*
156 * if the target inode is in extent form and the temp inode is in btree
157 * form then we will end up with the target inode in the wrong format
158 * as we already know there are less extents in the temp inode.
159 */
160 if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
161 tip->i_d.di_format == XFS_DINODE_FMT_BTREE)
162 return EINVAL;
163
164 /* Check temp in extent form to max in target */
165 if (tip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
166 XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) > ip->i_df.if_ext_max)
167 return EINVAL;
168
169 /* Check target in extent form to max in temp */
170 if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
171 XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) > tip->i_df.if_ext_max)
172 return EINVAL;
173
174 /* Check root block of temp in btree form to max in target */
175 if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE &&
176 XFS_IFORK_BOFF(ip) &&
177 tip->i_df.if_broot_bytes > XFS_IFORK_BOFF(ip))
178 return EINVAL;
179
180 /* Check root block of target in btree form to max in temp */
181 if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE &&
182 XFS_IFORK_BOFF(tip) &&
183 ip->i_df.if_broot_bytes > XFS_IFORK_BOFF(tip))
184 return EINVAL;
185
186 return 0;
187}
188
117int 189int
118xfs_swap_extents( 190xfs_swap_extents(
119 xfs_inode_t *ip, 191 xfs_inode_t *ip, /* target inode */
120 xfs_inode_t *tip, 192 xfs_inode_t *tip, /* tmp inode */
121 xfs_swapext_t *sxp) 193 xfs_swapext_t *sxp)
122{ 194{
123 xfs_mount_t *mp; 195 xfs_mount_t *mp;
@@ -161,13 +233,6 @@ xfs_swap_extents(
161 goto out_unlock; 233 goto out_unlock;
162 } 234 }
163 235
164 /* Should never get a local format */
165 if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL ||
166 tip->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
167 error = XFS_ERROR(EINVAL);
168 goto out_unlock;
169 }
170
171 if (VN_CACHED(VFS_I(tip)) != 0) { 236 if (VN_CACHED(VFS_I(tip)) != 0) {
172 error = xfs_flushinval_pages(tip, 0, -1, 237 error = xfs_flushinval_pages(tip, 0, -1,
173 FI_REMAPF_LOCKED); 238 FI_REMAPF_LOCKED);
@@ -189,13 +254,12 @@ xfs_swap_extents(
189 goto out_unlock; 254 goto out_unlock;
190 } 255 }
191 256
192 /* 257 /* check inode formats now that data is flushed */
193 * If the target has extended attributes, the tmp file 258 error = xfs_swap_extents_check_format(ip, tip);
194 * must also in order to ensure the correct data fork 259 if (error) {
195 * format. 260 xfs_fs_cmn_err(CE_NOTE, mp,
196 */ 261 "%s: inode 0x%llx format is incompatible for exchanging.",
197 if ( XFS_IFORK_Q(ip) != XFS_IFORK_Q(tip) ) { 262 __FILE__, ip->i_ino);
198 error = XFS_ERROR(EINVAL);
199 goto out_unlock; 263 goto out_unlock;
200 } 264 }
201 265
@@ -276,6 +340,16 @@ xfs_swap_extents(
276 *tifp = *tempifp; /* struct copy */ 340 *tifp = *tempifp; /* struct copy */
277 341
278 /* 342 /*
343 * Fix the in-memory data fork values that are dependent on the fork
344 * offset in the inode. We can't assume they remain the same as attr2
345 * has dynamic fork offsets.
346 */
347 ifp->if_ext_max = XFS_IFORK_SIZE(ip, XFS_DATA_FORK) /
348 (uint)sizeof(xfs_bmbt_rec_t);
349 tifp->if_ext_max = XFS_IFORK_SIZE(tip, XFS_DATA_FORK) /
350 (uint)sizeof(xfs_bmbt_rec_t);
351
352 /*
279 * Fix the on-disk inode values 353 * Fix the on-disk inode values
280 */ 354 */
281 tmp = (__uint64_t)ip->i_d.di_nblocks; 355 tmp = (__uint64_t)ip->i_d.di_nblocks;
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index fa402a6bbbcf..155e798f30a1 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -73,7 +73,6 @@ xfs_inode_alloc(
73 ASSERT(atomic_read(&ip->i_pincount) == 0); 73 ASSERT(atomic_read(&ip->i_pincount) == 0);
74 ASSERT(!spin_is_locked(&ip->i_flags_lock)); 74 ASSERT(!spin_is_locked(&ip->i_flags_lock));
75 ASSERT(completion_done(&ip->i_flush)); 75 ASSERT(completion_done(&ip->i_flush));
76 ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
77 76
78 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); 77 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
79 78
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 391d36b0e68c..ef77fd88c8e3 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2842,13 +2842,9 @@ xfs_iflush(
2842 2842
2843 /* 2843 /*
2844 * If the inode isn't dirty, then just release the inode flush lock and 2844 * If the inode isn't dirty, then just release the inode flush lock and
2845 * do nothing. Treat stale inodes the same; we cannot rely on the 2845 * do nothing.
2846 * backing buffer remaining stale in cache for the remaining life of
2847 * the stale inode and so xfs_itobp() below may give us a buffer that
2848 * no longer contains inodes below. Doing this stale check here also
2849 * avoids forcing the log on pinned, stale inodes.
2850 */ 2846 */
2851 if (xfs_inode_clean(ip) || xfs_iflags_test(ip, XFS_ISTALE)) { 2847 if (xfs_inode_clean(ip)) {
2852 xfs_ifunlock(ip); 2848 xfs_ifunlock(ip);
2853 return 0; 2849 return 0;
2854 } 2850 }
@@ -2872,6 +2868,19 @@ xfs_iflush(
2872 xfs_iunpin_wait(ip); 2868 xfs_iunpin_wait(ip);
2873 2869
2874 /* 2870 /*
2871 * For stale inodes we cannot rely on the backing buffer remaining
2872 * stale in cache for the remaining life of the stale inode and so
2873 * xfs_itobp() below may give us a buffer that no longer contains
2874 * inodes below. We have to check this after ensuring the inode is
2875 * unpinned so that it is safe to reclaim the stale inode after the
2876 * flush call.
2877 */
2878 if (xfs_iflags_test(ip, XFS_ISTALE)) {
2879 xfs_ifunlock(ip);
2880 return 0;
2881 }
2882
2883 /*
2875 * This may have been unpinned because the filesystem is shutting 2884 * This may have been unpinned because the filesystem is shutting
2876 * down forcibly. If that's the case we must not write this inode 2885 * down forcibly. If that's the case we must not write this inode
2877 * to disk, because the log record didn't make it to disk! 2886 * to disk, because the log record didn't make it to disk!
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 9e15a1185362..6be05f756d59 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -1517,6 +1517,8 @@ xfs_rtfree_range(
1517 */ 1517 */
1518 error = xfs_rtfind_forw(mp, tp, end, mp->m_sb.sb_rextents - 1, 1518 error = xfs_rtfind_forw(mp, tp, end, mp->m_sb.sb_rextents - 1,
1519 &postblock); 1519 &postblock);
1520 if (error)
1521 return error;
1520 /* 1522 /*
1521 * If there are blocks not being freed at the front of the 1523 * If there are blocks not being freed at the front of the
1522 * old extent, add summary data for them to be allocated. 1524 * old extent, add summary data for them to be allocated.