aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_inode.c41
-rw-r--r--fs/9p/vfs_super.c3
-rw-r--r--fs/affs/affs.h2
-rw-r--r--fs/affs/namei.c7
-rw-r--r--fs/affs/super.c31
-rw-r--r--fs/affs/symlink.c7
-rw-r--r--fs/bfs/inode.c43
-rw-r--r--fs/binfmt_aout.c1
-rw-r--r--fs/binfmt_elf.c27
-rw-r--r--fs/binfmt_elf_fdpic.c20
-rw-r--r--fs/binfmt_flat.c1
-rw-r--r--fs/binfmt_som.c1
-rw-r--r--fs/bio-integrity.c3
-rw-r--r--fs/bio.c9
-rw-r--r--fs/btrfs/acl.c13
-rw-r--r--fs/btrfs/ctree.h1
-rw-r--r--fs/btrfs/disk-io.c13
-rw-r--r--fs/btrfs/extent-tree.c40
-rw-r--r--fs/btrfs/extent_io.c3
-rw-r--r--fs/btrfs/extent_map.c14
-rw-r--r--fs/btrfs/file.c102
-rw-r--r--fs/btrfs/inode.c72
-rw-r--r--fs/btrfs/ordered-data.c2
-rw-r--r--fs/btrfs/relocation.c7
-rw-r--r--fs/btrfs/super.c9
-rw-r--r--fs/btrfs/volumes.c17
-rw-r--r--fs/cifs/CHANGES4
-rw-r--r--fs/cifs/cifs_dfs_ref.c3
-rw-r--r--fs/cifs/cifsfs.c3
-rw-r--r--fs/cifs/connect.c13
-rw-r--r--fs/compat_ioctl.c3
-rw-r--r--fs/configfs/symlink.c4
-rw-r--r--fs/debugfs/inode.c11
-rw-r--r--fs/ecryptfs/crypto.c4
-rw-r--r--fs/ecryptfs/file.c17
-rw-r--r--fs/ecryptfs/inode.c158
-rw-r--r--fs/ecryptfs/main.c4
-rw-r--r--fs/eventfd.c89
-rw-r--r--fs/exec.c36
-rw-r--r--fs/exofs/inode.c17
-rw-r--r--fs/exofs/pnfs.h10
-rw-r--r--fs/ext4/Kconfig1
-rw-r--r--fs/ext4/block_validity.c1
-rw-r--r--fs/ext4/ext4.h11
-rw-r--r--fs/ext4/ext4_extents.h3
-rw-r--r--fs/ext4/extents.c98
-rw-r--r--fs/ext4/fsync.c16
-rw-r--r--fs/ext4/inode.c277
-rw-r--r--fs/ext4/mballoc.h1
-rw-r--r--fs/ext4/super.c7
-rw-r--r--fs/ext4/xattr.c2
-rw-r--r--fs/fcntl.c108
-rw-r--r--fs/fs-writeback.c18
-rw-r--r--fs/fuse/file.c3
-rw-r--r--fs/gfs2/file.c38
-rw-r--r--fs/gfs2/glock.c4
-rw-r--r--fs/gfs2/glock.h2
-rw-r--r--fs/gfs2/incore.h2
-rw-r--r--fs/gfs2/lock_dlm.c11
-rw-r--r--fs/gfs2/meta_io.c2
-rw-r--r--fs/gfs2/ops_fstype.c12
-rw-r--r--fs/gfs2/ops_inode.c9
-rw-r--r--fs/gfs2/rgrp.c8
-rw-r--r--fs/gfs2/super.c1
-rw-r--r--fs/gfs2/xattr.c21
-rw-r--r--fs/hppfs/hppfs.c18
-rw-r--r--fs/jbd2/checkpoint.c15
-rw-r--r--fs/jbd2/commit.c19
-rw-r--r--fs/jbd2/journal.c2
-rw-r--r--fs/namei.c24
-rw-r--r--fs/namespace.c14
-rw-r--r--fs/nfs/dir.c1
-rw-r--r--fs/nfs/file.c2
-rw-r--r--fs/nfs/inode.c4
-rw-r--r--fs/nfs/nfs4_fs.h2
-rw-r--r--fs/nfs/nfs4proc.c78
-rw-r--r--fs/nfs/nfs4state.c2
-rw-r--r--fs/nfs/pagelist.c17
-rw-r--r--fs/nfs/super.c15
-rw-r--r--fs/nfs/sysctl.c2
-rw-r--r--fs/nfs/write.c3
-rw-r--r--fs/nfsd/vfs.c5
-rw-r--r--fs/nilfs2/bmap.c4
-rw-r--r--fs/nilfs2/cpfile.c31
-rw-r--r--fs/nilfs2/direct.c17
-rw-r--r--fs/nilfs2/ioctl.c2
-rw-r--r--fs/nilfs2/segment.c2
-rw-r--r--fs/notify/inotify/inotify_fsnotify.c2
-rw-r--r--fs/notify/inotify/inotify_user.c4
-rw-r--r--fs/ocfs2/file.c21
-rw-r--r--fs/proc/array.c89
-rw-r--r--fs/proc/base.c1
-rw-r--r--fs/proc/task_mmu.c3
-rw-r--r--fs/quota/dquot.c3
-rw-r--r--fs/ramfs/file-nommu.c26
-rw-r--r--fs/reiserfs/bitmap.c3
-rw-r--r--fs/reiserfs/inode.c24
-rw-r--r--fs/reiserfs/ioctl.c3
-rw-r--r--fs/reiserfs/journal.c20
-rw-r--r--fs/reiserfs/lock.c9
-rw-r--r--fs/reiserfs/namei.c7
-rw-r--r--fs/reiserfs/xattr.c38
-rw-r--r--fs/reiserfs/xattr_acl.c2
-rw-r--r--fs/romfs/super.c1
-rw-r--r--fs/sysfs/dir.c14
-rw-r--r--fs/sysfs/sysfs.h15
-rw-r--r--fs/ubifs/gc.c96
-rw-r--r--fs/xfs/linux-2.6/xfs_acl.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c14
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c183
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.h1145
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c2
-rw-r--r--fs/xfs/xfs_alloc.c44
-rw-r--r--fs/xfs/xfs_dfrag.c106
-rw-r--r--fs/xfs/xfs_iget.c1
-rw-r--r--fs/xfs/xfs_inode.c17
-rw-r--r--fs/xfs/xfs_rtalloc.c2
-rw-r--r--fs/xfs/xfs_vnodeops.c93
119 files changed, 2152 insertions, 1641 deletions
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 18f74ec4dce9..9d03d1ebca6f 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -1001,44 +1001,6 @@ done:
1001} 1001}
1002 1002
1003/** 1003/**
1004 * v9fs_vfs_readlink - read a symlink's location
1005 * @dentry: dentry for symlink
1006 * @buffer: buffer to load symlink location into
1007 * @buflen: length of buffer
1008 *
1009 */
1010
1011static int v9fs_vfs_readlink(struct dentry *dentry, char __user * buffer,
1012 int buflen)
1013{
1014 int retval;
1015 int ret;
1016 char *link = __getname();
1017
1018 if (unlikely(!link))
1019 return -ENOMEM;
1020
1021 if (buflen > PATH_MAX)
1022 buflen = PATH_MAX;
1023
1024 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name,
1025 dentry);
1026
1027 retval = v9fs_readlink(dentry, link, buflen);
1028
1029 if (retval > 0) {
1030 if ((ret = copy_to_user(buffer, link, retval)) != 0) {
1031 P9_DPRINTK(P9_DEBUG_ERROR,
1032 "problem copying to user: %d\n", ret);
1033 retval = ret;
1034 }
1035 }
1036
1037 __putname(link);
1038 return retval;
1039}
1040
1041/**
1042 * v9fs_vfs_follow_link - follow a symlink path 1004 * v9fs_vfs_follow_link - follow a symlink path
1043 * @dentry: dentry for symlink 1005 * @dentry: dentry for symlink
1044 * @nd: nameidata 1006 * @nd: nameidata
@@ -1230,7 +1192,6 @@ static const struct inode_operations v9fs_dir_inode_operations_ext = {
1230 .rmdir = v9fs_vfs_rmdir, 1192 .rmdir = v9fs_vfs_rmdir,
1231 .mknod = v9fs_vfs_mknod, 1193 .mknod = v9fs_vfs_mknod,
1232 .rename = v9fs_vfs_rename, 1194 .rename = v9fs_vfs_rename,
1233 .readlink = v9fs_vfs_readlink,
1234 .getattr = v9fs_vfs_getattr, 1195 .getattr = v9fs_vfs_getattr,
1235 .setattr = v9fs_vfs_setattr, 1196 .setattr = v9fs_vfs_setattr,
1236}; 1197};
@@ -1253,7 +1214,7 @@ static const struct inode_operations v9fs_file_inode_operations = {
1253}; 1214};
1254 1215
1255static const struct inode_operations v9fs_symlink_inode_operations = { 1216static const struct inode_operations v9fs_symlink_inode_operations = {
1256 .readlink = v9fs_vfs_readlink, 1217 .readlink = generic_readlink,
1257 .follow_link = v9fs_vfs_follow_link, 1218 .follow_link = v9fs_vfs_follow_link,
1258 .put_link = v9fs_vfs_put_link, 1219 .put_link = v9fs_vfs_put_link,
1259 .getattr = v9fs_vfs_getattr, 1220 .getattr = v9fs_vfs_getattr,
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 14a86448572c..69357c0d9899 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -188,7 +188,8 @@ static void v9fs_kill_super(struct super_block *s)
188 188
189 P9_DPRINTK(P9_DEBUG_VFS, " %p\n", s); 189 P9_DPRINTK(P9_DEBUG_VFS, " %p\n", s);
190 190
191 v9fs_dentry_release(s->s_root); /* clunk root */ 191 if (s->s_root)
192 v9fs_dentry_release(s->s_root); /* clunk root */
192 193
193 kill_anon_super(s); 194 kill_anon_super(s);
194 195
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index e511dc621a2e..0e40caaba456 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -106,8 +106,8 @@ struct affs_sb_info {
106 u32 s_last_bmap; 106 u32 s_last_bmap;
107 struct buffer_head *s_bmap_bh; 107 struct buffer_head *s_bmap_bh;
108 char *s_prefix; /* Prefix for volumes and assigns. */ 108 char *s_prefix; /* Prefix for volumes and assigns. */
109 int s_prefix_len; /* Length of prefix. */
110 char s_volume[32]; /* Volume prefix for absolute symlinks. */ 109 char s_volume[32]; /* Volume prefix for absolute symlinks. */
110 spinlock_t symlink_lock; /* protects the previous two */
111}; 111};
112 112
113#define SF_INTL 0x0001 /* International filesystem. */ 113#define SF_INTL 0x0001 /* International filesystem. */
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index 960d336ec694..d70bbbac6b7b 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -341,10 +341,13 @@ affs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
341 p = (char *)AFFS_HEAD(bh)->table; 341 p = (char *)AFFS_HEAD(bh)->table;
342 lc = '/'; 342 lc = '/';
343 if (*symname == '/') { 343 if (*symname == '/') {
344 struct affs_sb_info *sbi = AFFS_SB(sb);
344 while (*symname == '/') 345 while (*symname == '/')
345 symname++; 346 symname++;
346 while (AFFS_SB(sb)->s_volume[i]) /* Cannot overflow */ 347 spin_lock(&sbi->symlink_lock);
347 *p++ = AFFS_SB(sb)->s_volume[i++]; 348 while (sbi->s_volume[i]) /* Cannot overflow */
349 *p++ = sbi->s_volume[i++];
350 spin_unlock(&sbi->symlink_lock);
348 } 351 }
349 while (i < maxlen && (c = *symname++)) { 352 while (i < maxlen && (c = *symname++)) {
350 if (c == '.' && lc == '/' && *symname == '.' && symname[1] == '/') { 353 if (c == '.' && lc == '/' && *symname == '.' && symname[1] == '/') {
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 104fdcb3a7fc..d41e9673cd97 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -203,7 +203,7 @@ parse_options(char *options, uid_t *uid, gid_t *gid, int *mode, int *reserved, s
203 switch (token) { 203 switch (token) {
204 case Opt_bs: 204 case Opt_bs:
205 if (match_int(&args[0], &n)) 205 if (match_int(&args[0], &n))
206 return -EINVAL; 206 return 0;
207 if (n != 512 && n != 1024 && n != 2048 207 if (n != 512 && n != 1024 && n != 2048
208 && n != 4096) { 208 && n != 4096) {
209 printk ("AFFS: Invalid blocksize (512, 1024, 2048, 4096 allowed)\n"); 209 printk ("AFFS: Invalid blocksize (512, 1024, 2048, 4096 allowed)\n");
@@ -213,7 +213,7 @@ parse_options(char *options, uid_t *uid, gid_t *gid, int *mode, int *reserved, s
213 break; 213 break;
214 case Opt_mode: 214 case Opt_mode:
215 if (match_octal(&args[0], &option)) 215 if (match_octal(&args[0], &option))
216 return 1; 216 return 0;
217 *mode = option & 0777; 217 *mode = option & 0777;
218 *mount_opts |= SF_SETMODE; 218 *mount_opts |= SF_SETMODE;
219 break; 219 break;
@@ -221,8 +221,6 @@ parse_options(char *options, uid_t *uid, gid_t *gid, int *mode, int *reserved, s
221 *mount_opts |= SF_MUFS; 221 *mount_opts |= SF_MUFS;
222 break; 222 break;
223 case Opt_prefix: 223 case Opt_prefix:
224 /* Free any previous prefix */
225 kfree(*prefix);
226 *prefix = match_strdup(&args[0]); 224 *prefix = match_strdup(&args[0]);
227 if (!*prefix) 225 if (!*prefix)
228 return 0; 226 return 0;
@@ -233,21 +231,21 @@ parse_options(char *options, uid_t *uid, gid_t *gid, int *mode, int *reserved, s
233 break; 231 break;
234 case Opt_reserved: 232 case Opt_reserved:
235 if (match_int(&args[0], reserved)) 233 if (match_int(&args[0], reserved))
236 return 1; 234 return 0;
237 break; 235 break;
238 case Opt_root: 236 case Opt_root:
239 if (match_int(&args[0], root)) 237 if (match_int(&args[0], root))
240 return 1; 238 return 0;
241 break; 239 break;
242 case Opt_setgid: 240 case Opt_setgid:
243 if (match_int(&args[0], &option)) 241 if (match_int(&args[0], &option))
244 return 1; 242 return 0;
245 *gid = option; 243 *gid = option;
246 *mount_opts |= SF_SETGID; 244 *mount_opts |= SF_SETGID;
247 break; 245 break;
248 case Opt_setuid: 246 case Opt_setuid:
249 if (match_int(&args[0], &option)) 247 if (match_int(&args[0], &option))
250 return -EINVAL; 248 return 0;
251 *uid = option; 249 *uid = option;
252 *mount_opts |= SF_SETUID; 250 *mount_opts |= SF_SETUID;
253 break; 251 break;
@@ -311,11 +309,14 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent)
311 return -ENOMEM; 309 return -ENOMEM;
312 sb->s_fs_info = sbi; 310 sb->s_fs_info = sbi;
313 mutex_init(&sbi->s_bmlock); 311 mutex_init(&sbi->s_bmlock);
312 spin_lock_init(&sbi->symlink_lock);
314 313
315 if (!parse_options(data,&uid,&gid,&i,&reserved,&root_block, 314 if (!parse_options(data,&uid,&gid,&i,&reserved,&root_block,
316 &blocksize,&sbi->s_prefix, 315 &blocksize,&sbi->s_prefix,
317 sbi->s_volume, &mount_flags)) { 316 sbi->s_volume, &mount_flags)) {
318 printk(KERN_ERR "AFFS: Error parsing options\n"); 317 printk(KERN_ERR "AFFS: Error parsing options\n");
318 kfree(sbi->s_prefix);
319 kfree(sbi);
319 return -EINVAL; 320 return -EINVAL;
320 } 321 }
321 /* N.B. after this point s_prefix must be released */ 322 /* N.B. after this point s_prefix must be released */
@@ -516,14 +517,18 @@ affs_remount(struct super_block *sb, int *flags, char *data)
516 unsigned long mount_flags; 517 unsigned long mount_flags;
517 int res = 0; 518 int res = 0;
518 char *new_opts = kstrdup(data, GFP_KERNEL); 519 char *new_opts = kstrdup(data, GFP_KERNEL);
520 char volume[32];
521 char *prefix = NULL;
519 522
520 pr_debug("AFFS: remount(flags=0x%x,opts=\"%s\")\n",*flags,data); 523 pr_debug("AFFS: remount(flags=0x%x,opts=\"%s\")\n",*flags,data);
521 524
522 *flags |= MS_NODIRATIME; 525 *flags |= MS_NODIRATIME;
523 526
527 memcpy(volume, sbi->s_volume, 32);
524 if (!parse_options(data, &uid, &gid, &mode, &reserved, &root_block, 528 if (!parse_options(data, &uid, &gid, &mode, &reserved, &root_block,
525 &blocksize, &sbi->s_prefix, sbi->s_volume, 529 &blocksize, &prefix, volume,
526 &mount_flags)) { 530 &mount_flags)) {
531 kfree(prefix);
527 kfree(new_opts); 532 kfree(new_opts);
528 return -EINVAL; 533 return -EINVAL;
529 } 534 }
@@ -534,6 +539,14 @@ affs_remount(struct super_block *sb, int *flags, char *data)
534 sbi->s_mode = mode; 539 sbi->s_mode = mode;
535 sbi->s_uid = uid; 540 sbi->s_uid = uid;
536 sbi->s_gid = gid; 541 sbi->s_gid = gid;
542 /* protect against readers */
543 spin_lock(&sbi->symlink_lock);
544 if (prefix) {
545 kfree(sbi->s_prefix);
546 sbi->s_prefix = prefix;
547 }
548 memcpy(sbi->s_volume, volume, 32);
549 spin_unlock(&sbi->symlink_lock);
537 550
538 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) { 551 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) {
539 unlock_kernel(); 552 unlock_kernel();
diff --git a/fs/affs/symlink.c b/fs/affs/symlink.c
index 41782539c907..ee00f08c4f53 100644
--- a/fs/affs/symlink.c
+++ b/fs/affs/symlink.c
@@ -20,7 +20,6 @@ static int affs_symlink_readpage(struct file *file, struct page *page)
20 int i, j; 20 int i, j;
21 char c; 21 char c;
22 char lc; 22 char lc;
23 char *pf;
24 23
25 pr_debug("AFFS: follow_link(ino=%lu)\n",inode->i_ino); 24 pr_debug("AFFS: follow_link(ino=%lu)\n",inode->i_ino);
26 25
@@ -32,11 +31,15 @@ static int affs_symlink_readpage(struct file *file, struct page *page)
32 j = 0; 31 j = 0;
33 lf = (struct slink_front *)bh->b_data; 32 lf = (struct slink_front *)bh->b_data;
34 lc = 0; 33 lc = 0;
35 pf = AFFS_SB(inode->i_sb)->s_prefix ? AFFS_SB(inode->i_sb)->s_prefix : "/";
36 34
37 if (strchr(lf->symname,':')) { /* Handle assign or volume name */ 35 if (strchr(lf->symname,':')) { /* Handle assign or volume name */
36 struct affs_sb_info *sbi = AFFS_SB(inode->i_sb);
37 char *pf;
38 spin_lock(&sbi->symlink_lock);
39 pf = sbi->s_prefix ? sbi->s_prefix : "/";
38 while (i < 1023 && (c = pf[i])) 40 while (i < 1023 && (c = pf[i]))
39 link[i++] = c; 41 link[i++] = c;
42 spin_unlock(&sbi->symlink_lock);
40 while (i < 1023 && lf->symname[j] != ':') 43 while (i < 1023 && lf->symname[j] != ':')
41 link[i++] = lf->symname[j++]; 44 link[i++] = lf->symname[j++];
42 if (i < 1023) 45 if (i < 1023)
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 6f60336c6628..8f3d9fd89604 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -353,35 +353,35 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
353 struct inode *inode; 353 struct inode *inode;
354 unsigned i, imap_len; 354 unsigned i, imap_len;
355 struct bfs_sb_info *info; 355 struct bfs_sb_info *info;
356 long ret = -EINVAL; 356 int ret = -EINVAL;
357 unsigned long i_sblock, i_eblock, i_eoff, s_size; 357 unsigned long i_sblock, i_eblock, i_eoff, s_size;
358 358
359 info = kzalloc(sizeof(*info), GFP_KERNEL); 359 info = kzalloc(sizeof(*info), GFP_KERNEL);
360 if (!info) 360 if (!info)
361 return -ENOMEM; 361 return -ENOMEM;
362 mutex_init(&info->bfs_lock);
362 s->s_fs_info = info; 363 s->s_fs_info = info;
363 364
364 sb_set_blocksize(s, BFS_BSIZE); 365 sb_set_blocksize(s, BFS_BSIZE);
365 366
366 bh = sb_bread(s, 0); 367 info->si_sbh = sb_bread(s, 0);
367 if(!bh) 368 if (!info->si_sbh)
368 goto out; 369 goto out;
369 bfs_sb = (struct bfs_super_block *)bh->b_data; 370 bfs_sb = (struct bfs_super_block *)info->si_sbh->b_data;
370 if (le32_to_cpu(bfs_sb->s_magic) != BFS_MAGIC) { 371 if (le32_to_cpu(bfs_sb->s_magic) != BFS_MAGIC) {
371 if (!silent) 372 if (!silent)
372 printf("No BFS filesystem on %s (magic=%08x)\n", 373 printf("No BFS filesystem on %s (magic=%08x)\n",
373 s->s_id, le32_to_cpu(bfs_sb->s_magic)); 374 s->s_id, le32_to_cpu(bfs_sb->s_magic));
374 goto out; 375 goto out1;
375 } 376 }
376 if (BFS_UNCLEAN(bfs_sb, s) && !silent) 377 if (BFS_UNCLEAN(bfs_sb, s) && !silent)
377 printf("%s is unclean, continuing\n", s->s_id); 378 printf("%s is unclean, continuing\n", s->s_id);
378 379
379 s->s_magic = BFS_MAGIC; 380 s->s_magic = BFS_MAGIC;
380 info->si_sbh = bh;
381 381
382 if (le32_to_cpu(bfs_sb->s_start) > le32_to_cpu(bfs_sb->s_end)) { 382 if (le32_to_cpu(bfs_sb->s_start) > le32_to_cpu(bfs_sb->s_end)) {
383 printf("Superblock is corrupted\n"); 383 printf("Superblock is corrupted\n");
384 goto out; 384 goto out1;
385 } 385 }
386 386
387 info->si_lasti = (le32_to_cpu(bfs_sb->s_start) - BFS_BSIZE) / 387 info->si_lasti = (le32_to_cpu(bfs_sb->s_start) - BFS_BSIZE) /
@@ -390,7 +390,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
390 imap_len = (info->si_lasti / 8) + 1; 390 imap_len = (info->si_lasti / 8) + 1;
391 info->si_imap = kzalloc(imap_len, GFP_KERNEL); 391 info->si_imap = kzalloc(imap_len, GFP_KERNEL);
392 if (!info->si_imap) 392 if (!info->si_imap)
393 goto out; 393 goto out1;
394 for (i = 0; i < BFS_ROOT_INO; i++) 394 for (i = 0; i < BFS_ROOT_INO; i++)
395 set_bit(i, info->si_imap); 395 set_bit(i, info->si_imap);
396 396
@@ -398,15 +398,13 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
398 inode = bfs_iget(s, BFS_ROOT_INO); 398 inode = bfs_iget(s, BFS_ROOT_INO);
399 if (IS_ERR(inode)) { 399 if (IS_ERR(inode)) {
400 ret = PTR_ERR(inode); 400 ret = PTR_ERR(inode);
401 kfree(info->si_imap); 401 goto out2;
402 goto out;
403 } 402 }
404 s->s_root = d_alloc_root(inode); 403 s->s_root = d_alloc_root(inode);
405 if (!s->s_root) { 404 if (!s->s_root) {
406 iput(inode); 405 iput(inode);
407 ret = -ENOMEM; 406 ret = -ENOMEM;
408 kfree(info->si_imap); 407 goto out2;
409 goto out;
410 } 408 }
411 409
412 info->si_blocks = (le32_to_cpu(bfs_sb->s_end) + 1) >> BFS_BSIZE_BITS; 410 info->si_blocks = (le32_to_cpu(bfs_sb->s_end) + 1) >> BFS_BSIZE_BITS;
@@ -419,10 +417,8 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
419 bh = sb_bread(s, info->si_blocks - 1); 417 bh = sb_bread(s, info->si_blocks - 1);
420 if (!bh) { 418 if (!bh) {
421 printf("Last block not available: %lu\n", info->si_blocks - 1); 419 printf("Last block not available: %lu\n", info->si_blocks - 1);
422 iput(inode);
423 ret = -EIO; 420 ret = -EIO;
424 kfree(info->si_imap); 421 goto out3;
425 goto out;
426 } 422 }
427 brelse(bh); 423 brelse(bh);
428 424
@@ -459,11 +455,8 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
459 printf("Inode 0x%08x corrupted\n", i); 455 printf("Inode 0x%08x corrupted\n", i);
460 456
461 brelse(bh); 457 brelse(bh);
462 s->s_root = NULL; 458 ret = -EIO;
463 kfree(info->si_imap); 459 goto out3;
464 kfree(info);
465 s->s_fs_info = NULL;
466 return -EIO;
467 } 460 }
468 461
469 if (!di->i_ino) { 462 if (!di->i_ino) {
@@ -483,11 +476,17 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
483 s->s_dirt = 1; 476 s->s_dirt = 1;
484 } 477 }
485 dump_imap("read_super", s); 478 dump_imap("read_super", s);
486 mutex_init(&info->bfs_lock);
487 return 0; 479 return 0;
488 480
481out3:
482 dput(s->s_root);
483 s->s_root = NULL;
484out2:
485 kfree(info->si_imap);
486out1:
487 brelse(info->si_sbh);
489out: 488out:
490 brelse(bh); 489 mutex_destroy(&info->bfs_lock);
491 kfree(info); 490 kfree(info);
492 s->s_fs_info = NULL; 491 s->s_fs_info = NULL;
493 return ret; 492 return ret;
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index 346b69405363..fdd397099172 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -264,6 +264,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
264#else 264#else
265 set_personality(PER_LINUX); 265 set_personality(PER_LINUX);
266#endif 266#endif
267 setup_new_exec(bprm);
267 268
268 current->mm->end_code = ex.a_text + 269 current->mm->end_code = ex.a_text +
269 (current->mm->start_code = N_TXTADDR(ex)); 270 (current->mm->start_code = N_TXTADDR(ex));
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index edd90c49003c..fd5b2ea5d299 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -662,27 +662,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
662 if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0') 662 if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
663 goto out_free_interp; 663 goto out_free_interp;
664 664
665 /*
666 * The early SET_PERSONALITY here is so that the lookup
667 * for the interpreter happens in the namespace of the
668 * to-be-execed image. SET_PERSONALITY can select an
669 * alternate root.
670 *
671 * However, SET_PERSONALITY is NOT allowed to switch
672 * this task into the new images's memory mapping
673 * policy - that is, TASK_SIZE must still evaluate to
674 * that which is appropriate to the execing application.
675 * This is because exit_mmap() needs to have TASK_SIZE
676 * evaluate to the size of the old image.
677 *
678 * So if (say) a 64-bit application is execing a 32-bit
679 * application it is the architecture's responsibility
680 * to defer changing the value of TASK_SIZE until the
681 * switch really is going to happen - do this in
682 * flush_thread(). - akpm
683 */
684 SET_PERSONALITY(loc->elf_ex);
685
686 interpreter = open_exec(elf_interpreter); 665 interpreter = open_exec(elf_interpreter);
687 retval = PTR_ERR(interpreter); 666 retval = PTR_ERR(interpreter);
688 if (IS_ERR(interpreter)) 667 if (IS_ERR(interpreter))
@@ -730,9 +709,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
730 /* Verify the interpreter has a valid arch */ 709 /* Verify the interpreter has a valid arch */
731 if (!elf_check_arch(&loc->interp_elf_ex)) 710 if (!elf_check_arch(&loc->interp_elf_ex))
732 goto out_free_dentry; 711 goto out_free_dentry;
733 } else {
734 /* Executables without an interpreter also need a personality */
735 SET_PERSONALITY(loc->elf_ex);
736 } 712 }
737 713
738 /* Flush all traces of the currently running executable */ 714 /* Flush all traces of the currently running executable */
@@ -752,7 +728,8 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
752 728
753 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) 729 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
754 current->flags |= PF_RANDOMIZE; 730 current->flags |= PF_RANDOMIZE;
755 arch_pick_mmap_layout(current->mm); 731
732 setup_new_exec(bprm);
756 733
757 /* Do this so that we can load the interpreter, if need be. We will 734 /* Do this so that we can load the interpreter, if need be. We will
758 change some of these later */ 735 change some of these later */
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index c25256a5c5b0..18d77297ccc8 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -171,6 +171,9 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
171#ifdef ELF_FDPIC_PLAT_INIT 171#ifdef ELF_FDPIC_PLAT_INIT
172 unsigned long dynaddr; 172 unsigned long dynaddr;
173#endif 173#endif
174#ifndef CONFIG_MMU
175 unsigned long stack_prot;
176#endif
174 struct file *interpreter = NULL; /* to shut gcc up */ 177 struct file *interpreter = NULL; /* to shut gcc up */
175 char *interpreter_name = NULL; 178 char *interpreter_name = NULL;
176 int executable_stack; 179 int executable_stack;
@@ -316,6 +319,11 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
316 * defunct, deceased, etc. after this point we have to exit via 319 * defunct, deceased, etc. after this point we have to exit via
317 * error_kill */ 320 * error_kill */
318 set_personality(PER_LINUX_FDPIC); 321 set_personality(PER_LINUX_FDPIC);
322 if (elf_read_implies_exec(&exec_params.hdr, executable_stack))
323 current->personality |= READ_IMPLIES_EXEC;
324
325 setup_new_exec(bprm);
326
319 set_binfmt(&elf_fdpic_format); 327 set_binfmt(&elf_fdpic_format);
320 328
321 current->mm->start_code = 0; 329 current->mm->start_code = 0;
@@ -377,9 +385,13 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
377 if (stack_size < PAGE_SIZE * 2) 385 if (stack_size < PAGE_SIZE * 2)
378 stack_size = PAGE_SIZE * 2; 386 stack_size = PAGE_SIZE * 2;
379 387
388 stack_prot = PROT_READ | PROT_WRITE;
389 if (executable_stack == EXSTACK_ENABLE_X ||
390 (executable_stack == EXSTACK_DEFAULT && VM_STACK_FLAGS & VM_EXEC))
391 stack_prot |= PROT_EXEC;
392
380 down_write(&current->mm->mmap_sem); 393 down_write(&current->mm->mmap_sem);
381 current->mm->start_brk = do_mmap(NULL, 0, stack_size, 394 current->mm->start_brk = do_mmap(NULL, 0, stack_size, stack_prot,
382 PROT_READ | PROT_WRITE | PROT_EXEC,
383 MAP_PRIVATE | MAP_ANONYMOUS | 395 MAP_PRIVATE | MAP_ANONYMOUS |
384 MAP_UNINITIALIZED | MAP_GROWSDOWN, 396 MAP_UNINITIALIZED | MAP_GROWSDOWN,
385 0); 397 0);
@@ -1798,11 +1810,11 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
1798 ELF_CORE_WRITE_EXTRA_DATA; 1810 ELF_CORE_WRITE_EXTRA_DATA;
1799#endif 1811#endif
1800 1812
1801 if (file->f_pos != offset) { 1813 if (cprm->file->f_pos != offset) {
1802 /* Sanity check */ 1814 /* Sanity check */
1803 printk(KERN_WARNING 1815 printk(KERN_WARNING
1804 "elf_core_dump: file->f_pos (%lld) != offset (%lld)\n", 1816 "elf_core_dump: file->f_pos (%lld) != offset (%lld)\n",
1805 file->f_pos, offset); 1817 cprm->file->f_pos, offset);
1806 } 1818 }
1807 1819
1808end_coredump: 1820end_coredump:
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index d4a00ea1054c..42c6b4a54445 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -519,6 +519,7 @@ static int load_flat_file(struct linux_binprm * bprm,
519 519
520 /* OK, This is the point of no return */ 520 /* OK, This is the point of no return */
521 set_personality(PER_LINUX_32BIT); 521 set_personality(PER_LINUX_32BIT);
522 setup_new_exec(bprm);
522 } 523 }
523 524
524 /* 525 /*
diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c
index 2a9b5330cc5e..cc8560f6c9b0 100644
--- a/fs/binfmt_som.c
+++ b/fs/binfmt_som.c
@@ -227,6 +227,7 @@ load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs)
227 /* OK, This is the point of no return */ 227 /* OK, This is the point of no return */
228 current->flags &= ~PF_FORKNOEXEC; 228 current->flags &= ~PF_FORKNOEXEC;
229 current->personality = PER_HPUX; 229 current->personality = PER_HPUX;
230 setup_new_exec(bprm);
230 231
231 /* Set the task size for HP-UX processes such that 232 /* Set the task size for HP-UX processes such that
232 * the gateway page is outside the address space. 233 * the gateway page is outside the address space.
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index 49a34e7f7306..a16f29e888cd 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -61,7 +61,7 @@ static inline unsigned int vecs_to_idx(unsigned int nr)
61 61
62static inline int use_bip_pool(unsigned int idx) 62static inline int use_bip_pool(unsigned int idx)
63{ 63{
64 if (idx == BIOVEC_NR_POOLS) 64 if (idx == BIOVEC_MAX_IDX)
65 return 1; 65 return 1;
66 66
67 return 0; 67 return 0;
@@ -95,6 +95,7 @@ struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *bio,
95 95
96 /* Use mempool if lower order alloc failed or max vecs were requested */ 96 /* Use mempool if lower order alloc failed or max vecs were requested */
97 if (bip == NULL) { 97 if (bip == NULL) {
98 idx = BIOVEC_MAX_IDX; /* so we free the payload properly later */
98 bip = mempool_alloc(bs->bio_integrity_pool, gfp_mask); 99 bip = mempool_alloc(bs->bio_integrity_pool, gfp_mask);
99 100
100 if (unlikely(bip == NULL)) { 101 if (unlikely(bip == NULL)) {
diff --git a/fs/bio.c b/fs/bio.c
index 76e6713abf94..88094afc29ea 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -78,7 +78,7 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
78 78
79 i = 0; 79 i = 0;
80 while (i < bio_slab_nr) { 80 while (i < bio_slab_nr) {
81 struct bio_slab *bslab = &bio_slabs[i]; 81 bslab = &bio_slabs[i];
82 82
83 if (!bslab->slab && entry == -1) 83 if (!bslab->slab && entry == -1)
84 entry = i; 84 entry = i;
@@ -542,13 +542,18 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
542 542
543 if (page == prev->bv_page && 543 if (page == prev->bv_page &&
544 offset == prev->bv_offset + prev->bv_len) { 544 offset == prev->bv_offset + prev->bv_len) {
545 unsigned int prev_bv_len = prev->bv_len;
545 prev->bv_len += len; 546 prev->bv_len += len;
546 547
547 if (q->merge_bvec_fn) { 548 if (q->merge_bvec_fn) {
548 struct bvec_merge_data bvm = { 549 struct bvec_merge_data bvm = {
550 /* prev_bvec is already charged in
551 bi_size, discharge it in order to
552 simulate merging updated prev_bvec
553 as new bvec. */
549 .bi_bdev = bio->bi_bdev, 554 .bi_bdev = bio->bi_bdev,
550 .bi_sector = bio->bi_sector, 555 .bi_sector = bio->bi_sector,
551 .bi_size = bio->bi_size, 556 .bi_size = bio->bi_size - prev_bv_len,
552 .bi_rw = bio->bi_rw, 557 .bi_rw = bio->bi_rw,
553 }; 558 };
554 559
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 2e9e69987a82..6df6d6ed74fd 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -112,12 +112,14 @@ static int btrfs_set_acl(struct btrfs_trans_handle *trans,
112 switch (type) { 112 switch (type) {
113 case ACL_TYPE_ACCESS: 113 case ACL_TYPE_ACCESS:
114 mode = inode->i_mode; 114 mode = inode->i_mode;
115 ret = posix_acl_equiv_mode(acl, &mode);
116 if (ret < 0)
117 return ret;
118 ret = 0;
119 inode->i_mode = mode;
120 name = POSIX_ACL_XATTR_ACCESS; 115 name = POSIX_ACL_XATTR_ACCESS;
116 if (acl) {
117 ret = posix_acl_equiv_mode(acl, &mode);
118 if (ret < 0)
119 return ret;
120 inode->i_mode = mode;
121 }
122 ret = 0;
121 break; 123 break;
122 case ACL_TYPE_DEFAULT: 124 case ACL_TYPE_DEFAULT:
123 if (!S_ISDIR(inode->i_mode)) 125 if (!S_ISDIR(inode->i_mode))
@@ -242,6 +244,7 @@ int btrfs_init_acl(struct btrfs_trans_handle *trans,
242 ACL_TYPE_ACCESS); 244 ACL_TYPE_ACCESS);
243 } 245 }
244 } 246 }
247 posix_acl_release(clone);
245 } 248 }
246failed: 249failed:
247 posix_acl_release(acl); 250 posix_acl_release(acl);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 9f806dd04c27..2aa8ec6a0981 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1161,6 +1161,7 @@ struct btrfs_root {
1161#define BTRFS_MOUNT_SSD_SPREAD (1 << 8) 1161#define BTRFS_MOUNT_SSD_SPREAD (1 << 8)
1162#define BTRFS_MOUNT_NOSSD (1 << 9) 1162#define BTRFS_MOUNT_NOSSD (1 << 9)
1163#define BTRFS_MOUNT_DISCARD (1 << 10) 1163#define BTRFS_MOUNT_DISCARD (1 << 10)
1164#define BTRFS_MOUNT_FORCE_COMPRESS (1 << 11)
1164 1165
1165#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) 1166#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
1166#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) 1167#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 009e3bd18f23..2b59201b955c 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1982,7 +1982,12 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1982 1982
1983 if (!(sb->s_flags & MS_RDONLY)) { 1983 if (!(sb->s_flags & MS_RDONLY)) {
1984 ret = btrfs_recover_relocation(tree_root); 1984 ret = btrfs_recover_relocation(tree_root);
1985 BUG_ON(ret); 1985 if (ret < 0) {
1986 printk(KERN_WARNING
1987 "btrfs: failed to recover relocation\n");
1988 err = -EINVAL;
1989 goto fail_trans_kthread;
1990 }
1986 } 1991 }
1987 1992
1988 location.objectid = BTRFS_FS_TREE_OBJECTID; 1993 location.objectid = BTRFS_FS_TREE_OBJECTID;
@@ -1993,6 +1998,12 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1993 if (!fs_info->fs_root) 1998 if (!fs_info->fs_root)
1994 goto fail_trans_kthread; 1999 goto fail_trans_kthread;
1995 2000
2001 if (!(sb->s_flags & MS_RDONLY)) {
2002 down_read(&fs_info->cleanup_work_sem);
2003 btrfs_orphan_cleanup(fs_info->fs_root);
2004 up_read(&fs_info->cleanup_work_sem);
2005 }
2006
1996 return tree_root; 2007 return tree_root;
1997 2008
1998fail_trans_kthread: 2009fail_trans_kthread:
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 56e50137d0e6..559f72489b3b 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -83,6 +83,17 @@ static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
83 return (cache->flags & bits) == bits; 83 return (cache->flags & bits) == bits;
84} 84}
85 85
86void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
87{
88 atomic_inc(&cache->count);
89}
90
91void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
92{
93 if (atomic_dec_and_test(&cache->count))
94 kfree(cache);
95}
96
86/* 97/*
87 * this adds the block group to the fs_info rb tree for the block group 98 * this adds the block group to the fs_info rb tree for the block group
88 * cache 99 * cache
@@ -156,7 +167,7 @@ block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
156 } 167 }
157 } 168 }
158 if (ret) 169 if (ret)
159 atomic_inc(&ret->count); 170 btrfs_get_block_group(ret);
160 spin_unlock(&info->block_group_cache_lock); 171 spin_unlock(&info->block_group_cache_lock);
161 172
162 return ret; 173 return ret;
@@ -407,6 +418,8 @@ err:
407 418
408 put_caching_control(caching_ctl); 419 put_caching_control(caching_ctl);
409 atomic_dec(&block_group->space_info->caching_threads); 420 atomic_dec(&block_group->space_info->caching_threads);
421 btrfs_put_block_group(block_group);
422
410 return 0; 423 return 0;
411} 424}
412 425
@@ -447,6 +460,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache)
447 up_write(&fs_info->extent_commit_sem); 460 up_write(&fs_info->extent_commit_sem);
448 461
449 atomic_inc(&cache->space_info->caching_threads); 462 atomic_inc(&cache->space_info->caching_threads);
463 btrfs_get_block_group(cache);
450 464
451 tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n", 465 tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n",
452 cache->key.objectid); 466 cache->key.objectid);
@@ -486,12 +500,6 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(
486 return cache; 500 return cache;
487} 501}
488 502
489void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
490{
491 if (atomic_dec_and_test(&cache->count))
492 kfree(cache);
493}
494
495static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, 503static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
496 u64 flags) 504 u64 flags)
497{ 505{
@@ -2582,7 +2590,7 @@ next_block_group(struct btrfs_root *root,
2582 if (node) { 2590 if (node) {
2583 cache = rb_entry(node, struct btrfs_block_group_cache, 2591 cache = rb_entry(node, struct btrfs_block_group_cache,
2584 cache_node); 2592 cache_node);
2585 atomic_inc(&cache->count); 2593 btrfs_get_block_group(cache);
2586 } else 2594 } else
2587 cache = NULL; 2595 cache = NULL;
2588 spin_unlock(&root->fs_info->block_group_cache_lock); 2596 spin_unlock(&root->fs_info->block_group_cache_lock);
@@ -4227,7 +4235,7 @@ search:
4227 u64 offset; 4235 u64 offset;
4228 int cached; 4236 int cached;
4229 4237
4230 atomic_inc(&block_group->count); 4238 btrfs_get_block_group(block_group);
4231 search_start = block_group->key.objectid; 4239 search_start = block_group->key.objectid;
4232 4240
4233have_block_group: 4241have_block_group:
@@ -4315,7 +4323,7 @@ have_block_group:
4315 4323
4316 btrfs_put_block_group(block_group); 4324 btrfs_put_block_group(block_group);
4317 block_group = last_ptr->block_group; 4325 block_group = last_ptr->block_group;
4318 atomic_inc(&block_group->count); 4326 btrfs_get_block_group(block_group);
4319 spin_unlock(&last_ptr->lock); 4327 spin_unlock(&last_ptr->lock);
4320 spin_unlock(&last_ptr->refill_lock); 4328 spin_unlock(&last_ptr->refill_lock);
4321 4329
@@ -5394,10 +5402,6 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
5394 int ret; 5402 int ret;
5395 5403
5396 while (level >= 0) { 5404 while (level >= 0) {
5397 if (path->slots[level] >=
5398 btrfs_header_nritems(path->nodes[level]))
5399 break;
5400
5401 ret = walk_down_proc(trans, root, path, wc, lookup_info); 5405 ret = walk_down_proc(trans, root, path, wc, lookup_info);
5402 if (ret > 0) 5406 if (ret > 0)
5403 break; 5407 break;
@@ -5405,6 +5409,10 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
5405 if (level == 0) 5409 if (level == 0)
5406 break; 5410 break;
5407 5411
5412 if (path->slots[level] >=
5413 btrfs_header_nritems(path->nodes[level]))
5414 break;
5415
5408 ret = do_walk_down(trans, root, path, wc, &lookup_info); 5416 ret = do_walk_down(trans, root, path, wc, &lookup_info);
5409 if (ret > 0) { 5417 if (ret > 0) {
5410 path->slots[level]++; 5418 path->slots[level]++;
@@ -7395,9 +7403,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
7395 wait_block_group_cache_done(block_group); 7403 wait_block_group_cache_done(block_group);
7396 7404
7397 btrfs_remove_free_space_cache(block_group); 7405 btrfs_remove_free_space_cache(block_group);
7398 7406 btrfs_put_block_group(block_group);
7399 WARN_ON(atomic_read(&block_group->count) != 1);
7400 kfree(block_group);
7401 7407
7402 spin_lock(&info->block_group_cache_lock); 7408 spin_lock(&info->block_group_cache_lock);
7403 } 7409 }
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 96577e8bf9fd..b177ed319612 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3165,10 +3165,9 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3165 spin_unlock(&tree->buffer_lock); 3165 spin_unlock(&tree->buffer_lock);
3166 goto free_eb; 3166 goto free_eb;
3167 } 3167 }
3168 spin_unlock(&tree->buffer_lock);
3169
3170 /* add one reference for the tree */ 3168 /* add one reference for the tree */
3171 atomic_inc(&eb->refs); 3169 atomic_inc(&eb->refs);
3170 spin_unlock(&tree->buffer_lock);
3172 return eb; 3171 return eb;
3173 3172
3174free_eb: 3173free_eb:
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 46bea0f4dc7b..428fcac45f90 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -155,20 +155,6 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset,
155 return NULL; 155 return NULL;
156} 156}
157 157
158/*
159 * look for an offset in the tree, and if it can't be found, return
160 * the first offset we can find smaller than 'offset'.
161 */
162static inline struct rb_node *tree_search(struct rb_root *root, u64 offset)
163{
164 struct rb_node *prev;
165 struct rb_node *ret;
166 ret = __tree_search(root, offset, &prev, NULL);
167 if (!ret)
168 return prev;
169 return ret;
170}
171
172/* check to see if two extent_map structs are adjacent and safe to merge */ 158/* check to see if two extent_map structs are adjacent and safe to merge */
173static int mergable_maps(struct extent_map *prev, struct extent_map *next) 159static int mergable_maps(struct extent_map *prev, struct extent_map *next)
174{ 160{
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index feaa13b105d9..9d0809629967 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -506,7 +506,8 @@ next_slot:
506} 506}
507 507
508static int extent_mergeable(struct extent_buffer *leaf, int slot, 508static int extent_mergeable(struct extent_buffer *leaf, int slot,
509 u64 objectid, u64 bytenr, u64 *start, u64 *end) 509 u64 objectid, u64 bytenr, u64 orig_offset,
510 u64 *start, u64 *end)
510{ 511{
511 struct btrfs_file_extent_item *fi; 512 struct btrfs_file_extent_item *fi;
512 struct btrfs_key key; 513 struct btrfs_key key;
@@ -522,6 +523,7 @@ static int extent_mergeable(struct extent_buffer *leaf, int slot,
522 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); 523 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
523 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG || 524 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG ||
524 btrfs_file_extent_disk_bytenr(leaf, fi) != bytenr || 525 btrfs_file_extent_disk_bytenr(leaf, fi) != bytenr ||
526 btrfs_file_extent_offset(leaf, fi) != key.offset - orig_offset ||
525 btrfs_file_extent_compression(leaf, fi) || 527 btrfs_file_extent_compression(leaf, fi) ||
526 btrfs_file_extent_encryption(leaf, fi) || 528 btrfs_file_extent_encryption(leaf, fi) ||
527 btrfs_file_extent_other_encoding(leaf, fi)) 529 btrfs_file_extent_other_encoding(leaf, fi))
@@ -561,6 +563,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
561 u64 split; 563 u64 split;
562 int del_nr = 0; 564 int del_nr = 0;
563 int del_slot = 0; 565 int del_slot = 0;
566 int recow;
564 int ret; 567 int ret;
565 568
566 btrfs_drop_extent_cache(inode, start, end - 1, 0); 569 btrfs_drop_extent_cache(inode, start, end - 1, 0);
@@ -568,6 +571,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
568 path = btrfs_alloc_path(); 571 path = btrfs_alloc_path();
569 BUG_ON(!path); 572 BUG_ON(!path);
570again: 573again:
574 recow = 0;
571 split = start; 575 split = start;
572 key.objectid = inode->i_ino; 576 key.objectid = inode->i_ino;
573 key.type = BTRFS_EXTENT_DATA_KEY; 577 key.type = BTRFS_EXTENT_DATA_KEY;
@@ -591,12 +595,60 @@ again:
591 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 595 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
592 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); 596 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
593 orig_offset = key.offset - btrfs_file_extent_offset(leaf, fi); 597 orig_offset = key.offset - btrfs_file_extent_offset(leaf, fi);
598 memcpy(&new_key, &key, sizeof(new_key));
599
600 if (start == key.offset && end < extent_end) {
601 other_start = 0;
602 other_end = start;
603 if (extent_mergeable(leaf, path->slots[0] - 1,
604 inode->i_ino, bytenr, orig_offset,
605 &other_start, &other_end)) {
606 new_key.offset = end;
607 btrfs_set_item_key_safe(trans, root, path, &new_key);
608 fi = btrfs_item_ptr(leaf, path->slots[0],
609 struct btrfs_file_extent_item);
610 btrfs_set_file_extent_num_bytes(leaf, fi,
611 extent_end - end);
612 btrfs_set_file_extent_offset(leaf, fi,
613 end - orig_offset);
614 fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
615 struct btrfs_file_extent_item);
616 btrfs_set_file_extent_num_bytes(leaf, fi,
617 end - other_start);
618 btrfs_mark_buffer_dirty(leaf);
619 goto out;
620 }
621 }
622
623 if (start > key.offset && end == extent_end) {
624 other_start = end;
625 other_end = 0;
626 if (extent_mergeable(leaf, path->slots[0] + 1,
627 inode->i_ino, bytenr, orig_offset,
628 &other_start, &other_end)) {
629 fi = btrfs_item_ptr(leaf, path->slots[0],
630 struct btrfs_file_extent_item);
631 btrfs_set_file_extent_num_bytes(leaf, fi,
632 start - key.offset);
633 path->slots[0]++;
634 new_key.offset = start;
635 btrfs_set_item_key_safe(trans, root, path, &new_key);
636
637 fi = btrfs_item_ptr(leaf, path->slots[0],
638 struct btrfs_file_extent_item);
639 btrfs_set_file_extent_num_bytes(leaf, fi,
640 other_end - start);
641 btrfs_set_file_extent_offset(leaf, fi,
642 start - orig_offset);
643 btrfs_mark_buffer_dirty(leaf);
644 goto out;
645 }
646 }
594 647
595 while (start > key.offset || end < extent_end) { 648 while (start > key.offset || end < extent_end) {
596 if (key.offset == start) 649 if (key.offset == start)
597 split = end; 650 split = end;
598 651
599 memcpy(&new_key, &key, sizeof(new_key));
600 new_key.offset = split; 652 new_key.offset = split;
601 ret = btrfs_duplicate_item(trans, root, path, &new_key); 653 ret = btrfs_duplicate_item(trans, root, path, &new_key);
602 if (ret == -EAGAIN) { 654 if (ret == -EAGAIN) {
@@ -631,15 +683,18 @@ again:
631 path->slots[0]--; 683 path->slots[0]--;
632 extent_end = end; 684 extent_end = end;
633 } 685 }
686 recow = 1;
634 } 687 }
635 688
636 fi = btrfs_item_ptr(leaf, path->slots[0],
637 struct btrfs_file_extent_item);
638
639 other_start = end; 689 other_start = end;
640 other_end = 0; 690 other_end = 0;
641 if (extent_mergeable(leaf, path->slots[0] + 1, inode->i_ino, 691 if (extent_mergeable(leaf, path->slots[0] + 1,
642 bytenr, &other_start, &other_end)) { 692 inode->i_ino, bytenr, orig_offset,
693 &other_start, &other_end)) {
694 if (recow) {
695 btrfs_release_path(root, path);
696 goto again;
697 }
643 extent_end = other_end; 698 extent_end = other_end;
644 del_slot = path->slots[0] + 1; 699 del_slot = path->slots[0] + 1;
645 del_nr++; 700 del_nr++;
@@ -650,8 +705,13 @@ again:
650 } 705 }
651 other_start = 0; 706 other_start = 0;
652 other_end = start; 707 other_end = start;
653 if (extent_mergeable(leaf, path->slots[0] - 1, inode->i_ino, 708 if (extent_mergeable(leaf, path->slots[0] - 1,
654 bytenr, &other_start, &other_end)) { 709 inode->i_ino, bytenr, orig_offset,
710 &other_start, &other_end)) {
711 if (recow) {
712 btrfs_release_path(root, path);
713 goto again;
714 }
655 key.offset = other_start; 715 key.offset = other_start;
656 del_slot = path->slots[0]; 716 del_slot = path->slots[0];
657 del_nr++; 717 del_nr++;
@@ -660,22 +720,22 @@ again:
660 inode->i_ino, orig_offset); 720 inode->i_ino, orig_offset);
661 BUG_ON(ret); 721 BUG_ON(ret);
662 } 722 }
723 fi = btrfs_item_ptr(leaf, path->slots[0],
724 struct btrfs_file_extent_item);
663 if (del_nr == 0) { 725 if (del_nr == 0) {
664 btrfs_set_file_extent_type(leaf, fi, 726 btrfs_set_file_extent_type(leaf, fi,
665 BTRFS_FILE_EXTENT_REG); 727 BTRFS_FILE_EXTENT_REG);
666 btrfs_mark_buffer_dirty(leaf); 728 btrfs_mark_buffer_dirty(leaf);
667 goto out; 729 } else {
668 } 730 btrfs_set_file_extent_type(leaf, fi,
669 731 BTRFS_FILE_EXTENT_REG);
670 fi = btrfs_item_ptr(leaf, del_slot - 1, 732 btrfs_set_file_extent_num_bytes(leaf, fi,
671 struct btrfs_file_extent_item); 733 extent_end - key.offset);
672 btrfs_set_file_extent_type(leaf, fi, BTRFS_FILE_EXTENT_REG); 734 btrfs_mark_buffer_dirty(leaf);
673 btrfs_set_file_extent_num_bytes(leaf, fi,
674 extent_end - key.offset);
675 btrfs_mark_buffer_dirty(leaf);
676 735
677 ret = btrfs_del_items(trans, root, path, del_slot, del_nr); 736 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
678 BUG_ON(ret); 737 BUG_ON(ret);
738 }
679out: 739out:
680 btrfs_free_path(path); 740 btrfs_free_path(path);
681 return 0; 741 return 0;
@@ -1073,7 +1133,7 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
1073 } 1133 }
1074 mutex_lock(&dentry->d_inode->i_mutex); 1134 mutex_lock(&dentry->d_inode->i_mutex);
1075out: 1135out:
1076 return ret > 0 ? EIO : ret; 1136 return ret > 0 ? -EIO : ret;
1077} 1137}
1078 1138
1079static const struct vm_operations_struct btrfs_file_vm_ops = { 1139static const struct vm_operations_struct btrfs_file_vm_ops = {
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 5440bab23635..4deb280f8969 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -483,7 +483,8 @@ again:
483 nr_pages_ret = 0; 483 nr_pages_ret = 0;
484 484
485 /* flag the file so we don't compress in the future */ 485 /* flag the file so we don't compress in the future */
486 BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS; 486 if (!btrfs_test_opt(root, FORCE_COMPRESS))
487 BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
487 } 488 }
488 if (will_compress) { 489 if (will_compress) {
489 *num_added += 1; 490 *num_added += 1;
@@ -1680,24 +1681,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1680 * before we start the transaction. It limits the amount of btree 1681 * before we start the transaction. It limits the amount of btree
1681 * reads required while inside the transaction. 1682 * reads required while inside the transaction.
1682 */ 1683 */
1683static noinline void reada_csum(struct btrfs_root *root,
1684 struct btrfs_path *path,
1685 struct btrfs_ordered_extent *ordered_extent)
1686{
1687 struct btrfs_ordered_sum *sum;
1688 u64 bytenr;
1689
1690 sum = list_entry(ordered_extent->list.next, struct btrfs_ordered_sum,
1691 list);
1692 bytenr = sum->sums[0].bytenr;
1693
1694 /*
1695 * we don't care about the results, the point of this search is
1696 * just to get the btree leaves into ram
1697 */
1698 btrfs_lookup_csum(NULL, root->fs_info->csum_root, path, bytenr, 0);
1699}
1700
1701/* as ordered data IO finishes, this gets called so we can finish 1684/* as ordered data IO finishes, this gets called so we can finish
1702 * an ordered extent if the range of bytes in the file it covers are 1685 * an ordered extent if the range of bytes in the file it covers are
1703 * fully written. 1686 * fully written.
@@ -1708,7 +1691,6 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1708 struct btrfs_trans_handle *trans; 1691 struct btrfs_trans_handle *trans;
1709 struct btrfs_ordered_extent *ordered_extent = NULL; 1692 struct btrfs_ordered_extent *ordered_extent = NULL;
1710 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 1693 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
1711 struct btrfs_path *path;
1712 int compressed = 0; 1694 int compressed = 0;
1713 int ret; 1695 int ret;
1714 1696
@@ -1716,32 +1698,9 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1716 if (!ret) 1698 if (!ret)
1717 return 0; 1699 return 0;
1718 1700
1719 /* 1701 ordered_extent = btrfs_lookup_ordered_extent(inode, start);
1720 * before we join the transaction, try to do some of our IO.
1721 * This will limit the amount of IO that we have to do with
1722 * the transaction running. We're unlikely to need to do any
1723 * IO if the file extents are new, the disk_i_size checks
1724 * covers the most common case.
1725 */
1726 if (start < BTRFS_I(inode)->disk_i_size) {
1727 path = btrfs_alloc_path();
1728 if (path) {
1729 ret = btrfs_lookup_file_extent(NULL, root, path,
1730 inode->i_ino,
1731 start, 0);
1732 ordered_extent = btrfs_lookup_ordered_extent(inode,
1733 start);
1734 if (!list_empty(&ordered_extent->list)) {
1735 btrfs_release_path(root, path);
1736 reada_csum(root, path, ordered_extent);
1737 }
1738 btrfs_free_path(path);
1739 }
1740 }
1741
1742 if (!ordered_extent)
1743 ordered_extent = btrfs_lookup_ordered_extent(inode, start);
1744 BUG_ON(!ordered_extent); 1702 BUG_ON(!ordered_extent);
1703
1745 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { 1704 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
1746 BUG_ON(!list_empty(&ordered_extent->list)); 1705 BUG_ON(!list_empty(&ordered_extent->list));
1747 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); 1706 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
@@ -3995,7 +3954,11 @@ skip:
3995 3954
3996 /* Reached end of directory/root. Bump pos past the last item. */ 3955 /* Reached end of directory/root. Bump pos past the last item. */
3997 if (key_type == BTRFS_DIR_INDEX_KEY) 3956 if (key_type == BTRFS_DIR_INDEX_KEY)
3998 filp->f_pos = INT_LIMIT(off_t); 3957 /*
3958 * 32-bit glibc will use getdents64, but then strtol -
3959 * so the last number we can serve is this.
3960 */
3961 filp->f_pos = 0x7fffffff;
3999 else 3962 else
4000 filp->f_pos++; 3963 filp->f_pos++;
4001nopos: 3964nopos:
@@ -5789,7 +5752,7 @@ out_fail:
5789} 5752}
5790 5753
5791static int prealloc_file_range(struct inode *inode, u64 start, u64 end, 5754static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
5792 u64 alloc_hint, int mode) 5755 u64 alloc_hint, int mode, loff_t actual_len)
5793{ 5756{
5794 struct btrfs_trans_handle *trans; 5757 struct btrfs_trans_handle *trans;
5795 struct btrfs_root *root = BTRFS_I(inode)->root; 5758 struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -5798,6 +5761,7 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
5798 u64 cur_offset = start; 5761 u64 cur_offset = start;
5799 u64 num_bytes = end - start; 5762 u64 num_bytes = end - start;
5800 int ret = 0; 5763 int ret = 0;
5764 u64 i_size;
5801 5765
5802 while (num_bytes > 0) { 5766 while (num_bytes > 0) {
5803 alloc_size = min(num_bytes, root->fs_info->max_extent); 5767 alloc_size = min(num_bytes, root->fs_info->max_extent);
@@ -5835,9 +5799,15 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
5835 inode->i_ctime = CURRENT_TIME; 5799 inode->i_ctime = CURRENT_TIME;
5836 BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; 5800 BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC;
5837 if (!(mode & FALLOC_FL_KEEP_SIZE) && 5801 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
5838 cur_offset > inode->i_size) { 5802 (actual_len > inode->i_size) &&
5839 i_size_write(inode, cur_offset); 5803 (cur_offset > inode->i_size)) {
5840 btrfs_ordered_update_i_size(inode, cur_offset, NULL); 5804
5805 if (cur_offset > actual_len)
5806 i_size = actual_len;
5807 else
5808 i_size = cur_offset;
5809 i_size_write(inode, i_size);
5810 btrfs_ordered_update_i_size(inode, i_size, NULL);
5841 } 5811 }
5842 5812
5843 ret = btrfs_update_inode(trans, root, inode); 5813 ret = btrfs_update_inode(trans, root, inode);
@@ -5930,7 +5900,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5930 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { 5900 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
5931 ret = prealloc_file_range(inode, 5901 ret = prealloc_file_range(inode,
5932 cur_offset, last_byte, 5902 cur_offset, last_byte,
5933 alloc_hint, mode); 5903 alloc_hint, mode, offset+len);
5934 if (ret < 0) { 5904 if (ret < 0) {
5935 free_extent_map(em); 5905 free_extent_map(em);
5936 break; 5906 break;
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index b10a49d4bc6a..5c2a9e78a949 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -626,6 +626,8 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
626 626
627 if (ordered) 627 if (ordered)
628 offset = entry_end(ordered); 628 offset = entry_end(ordered);
629 else
630 offset = ALIGN(offset, BTRFS_I(inode)->root->sectorsize);
629 631
630 mutex_lock(&tree->mutex); 632 mutex_lock(&tree->mutex);
631 disk_i_size = BTRFS_I(inode)->disk_i_size; 633 disk_i_size = BTRFS_I(inode)->disk_i_size;
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index a9728680eca8..ab7ab5318745 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3281,8 +3281,10 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3281 return -ENOMEM; 3281 return -ENOMEM;
3282 3282
3283 path = btrfs_alloc_path(); 3283 path = btrfs_alloc_path();
3284 if (!path) 3284 if (!path) {
3285 kfree(cluster);
3285 return -ENOMEM; 3286 return -ENOMEM;
3287 }
3286 3288
3287 rc->extents_found = 0; 3289 rc->extents_found = 0;
3288 rc->extents_skipped = 0; 3290 rc->extents_skipped = 0;
@@ -3762,7 +3764,8 @@ out:
3762 BTRFS_DATA_RELOC_TREE_OBJECTID); 3764 BTRFS_DATA_RELOC_TREE_OBJECTID);
3763 if (IS_ERR(fs_root)) 3765 if (IS_ERR(fs_root))
3764 err = PTR_ERR(fs_root); 3766 err = PTR_ERR(fs_root);
3765 btrfs_orphan_cleanup(fs_root); 3767 else
3768 btrfs_orphan_cleanup(fs_root);
3766 } 3769 }
3767 return err; 3770 return err;
3768} 3771}
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 3f9b45704fcd..8a1ea6e64575 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -66,7 +66,8 @@ enum {
66 Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow, 66 Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow,
67 Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, 67 Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier,
68 Opt_ssd, Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, 68 Opt_ssd, Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl,
69 Opt_compress, Opt_notreelog, Opt_ratio, Opt_flushoncommit, 69 Opt_compress, Opt_compress_force, Opt_notreelog, Opt_ratio,
70 Opt_flushoncommit,
70 Opt_discard, Opt_err, 71 Opt_discard, Opt_err,
71}; 72};
72 73
@@ -82,6 +83,7 @@ static match_table_t tokens = {
82 {Opt_alloc_start, "alloc_start=%s"}, 83 {Opt_alloc_start, "alloc_start=%s"},
83 {Opt_thread_pool, "thread_pool=%d"}, 84 {Opt_thread_pool, "thread_pool=%d"},
84 {Opt_compress, "compress"}, 85 {Opt_compress, "compress"},
86 {Opt_compress_force, "compress-force"},
85 {Opt_ssd, "ssd"}, 87 {Opt_ssd, "ssd"},
86 {Opt_ssd_spread, "ssd_spread"}, 88 {Opt_ssd_spread, "ssd_spread"},
87 {Opt_nossd, "nossd"}, 89 {Opt_nossd, "nossd"},
@@ -173,6 +175,11 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
173 printk(KERN_INFO "btrfs: use compression\n"); 175 printk(KERN_INFO "btrfs: use compression\n");
174 btrfs_set_opt(info->mount_opt, COMPRESS); 176 btrfs_set_opt(info->mount_opt, COMPRESS);
175 break; 177 break;
178 case Opt_compress_force:
179 printk(KERN_INFO "btrfs: forcing compression\n");
180 btrfs_set_opt(info->mount_opt, FORCE_COMPRESS);
181 btrfs_set_opt(info->mount_opt, COMPRESS);
182 break;
176 case Opt_ssd: 183 case Opt_ssd:
177 printk(KERN_INFO "btrfs: use ssd allocation scheme\n"); 184 printk(KERN_INFO "btrfs: use ssd allocation scheme\n");
178 btrfs_set_opt(info->mount_opt, SSD); 185 btrfs_set_opt(info->mount_opt, SSD);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 198cff28766d..41ecbb2347f2 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1135,7 +1135,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1135 root->fs_info->avail_metadata_alloc_bits; 1135 root->fs_info->avail_metadata_alloc_bits;
1136 1136
1137 if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && 1137 if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) &&
1138 root->fs_info->fs_devices->rw_devices <= 4) { 1138 root->fs_info->fs_devices->num_devices <= 4) {
1139 printk(KERN_ERR "btrfs: unable to go below four devices " 1139 printk(KERN_ERR "btrfs: unable to go below four devices "
1140 "on raid10\n"); 1140 "on raid10\n");
1141 ret = -EINVAL; 1141 ret = -EINVAL;
@@ -1143,7 +1143,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1143 } 1143 }
1144 1144
1145 if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) && 1145 if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) &&
1146 root->fs_info->fs_devices->rw_devices <= 2) { 1146 root->fs_info->fs_devices->num_devices <= 2) {
1147 printk(KERN_ERR "btrfs: unable to go below two " 1147 printk(KERN_ERR "btrfs: unable to go below two "
1148 "devices on raid1\n"); 1148 "devices on raid1\n");
1149 ret = -EINVAL; 1149 ret = -EINVAL;
@@ -1434,8 +1434,8 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1434 return -EINVAL; 1434 return -EINVAL;
1435 1435
1436 bdev = open_bdev_exclusive(device_path, 0, root->fs_info->bdev_holder); 1436 bdev = open_bdev_exclusive(device_path, 0, root->fs_info->bdev_holder);
1437 if (!bdev) 1437 if (IS_ERR(bdev))
1438 return -EIO; 1438 return PTR_ERR(bdev);
1439 1439
1440 if (root->fs_info->fs_devices->seeding) { 1440 if (root->fs_info->fs_devices->seeding) {
1441 seeding_dev = 1; 1441 seeding_dev = 1;
@@ -2538,6 +2538,11 @@ int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset)
2538 if (!em) 2538 if (!em)
2539 return 1; 2539 return 1;
2540 2540
2541 if (btrfs_test_opt(root, DEGRADED)) {
2542 free_extent_map(em);
2543 return 0;
2544 }
2545
2541 map = (struct map_lookup *)em->bdev; 2546 map = (struct map_lookup *)em->bdev;
2542 for (i = 0; i < map->num_stripes; i++) { 2547 for (i = 0; i < map->num_stripes; i++) {
2543 if (!map->stripes[i].dev->writeable) { 2548 if (!map->stripes[i].dev->writeable) {
@@ -2649,8 +2654,10 @@ again:
2649 em = lookup_extent_mapping(em_tree, logical, *length); 2654 em = lookup_extent_mapping(em_tree, logical, *length);
2650 read_unlock(&em_tree->lock); 2655 read_unlock(&em_tree->lock);
2651 2656
2652 if (!em && unplug_page) 2657 if (!em && unplug_page) {
2658 kfree(multi);
2653 return 0; 2659 return 0;
2660 }
2654 2661
2655 if (!em) { 2662 if (!em) {
2656 printk(KERN_CRIT "unable to find logical %llu len %llu\n", 2663 printk(KERN_CRIT "unable to find logical %llu len %llu\n",
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 094ea65afc85..7b2600b380d7 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -5,7 +5,9 @@ have duplicated data). Fix oops in cifs_lookup. Workaround problem
5mounting to OS/400 Netserve. Fix oops in cifs_get_tcp_session. 5mounting to OS/400 Netserve. Fix oops in cifs_get_tcp_session.
6Disable use of server inode numbers when server only 6Disable use of server inode numbers when server only
7partially supports them (e.g. for one server querying inode numbers on 7partially supports them (e.g. for one server querying inode numbers on
8FindFirst fails but QPathInfo queries works). 8FindFirst fails but QPathInfo queries works). Fix oops with dfs in
9cifs_put_smb_ses. Fix mmap to work on directio mounts (needed
10for OpenOffice when on forcedirectio mount e.g.)
9 11
10Version 1.60 12Version 1.60
11------------- 13-------------
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index fea9e898c4ba..b44ce0a0711c 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -269,7 +269,7 @@ static int add_mount_helper(struct vfsmount *newmnt, struct nameidata *nd,
269 int err; 269 int err;
270 270
271 mntget(newmnt); 271 mntget(newmnt);
272 err = do_add_mount(newmnt, &nd->path, nd->path.mnt->mnt_flags, mntlist); 272 err = do_add_mount(newmnt, &nd->path, nd->path.mnt->mnt_flags | MNT_SHRINKABLE, mntlist);
273 switch (err) { 273 switch (err) {
274 case 0: 274 case 0:
275 path_put(&nd->path); 275 path_put(&nd->path);
@@ -371,7 +371,6 @@ cifs_dfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
371 if (IS_ERR(mnt)) 371 if (IS_ERR(mnt))
372 goto out_err; 372 goto out_err;
373 373
374 nd->path.mnt->mnt_flags |= MNT_SHRINKABLE;
375 rc = add_mount_helper(mnt, nd, &cifs_dfs_automount_list); 374 rc = add_mount_helper(mnt, nd, &cifs_dfs_automount_list);
376 375
377out: 376out:
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 29f1da761bbf..8c6a03627176 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -758,7 +758,7 @@ const struct file_operations cifs_file_ops = {
758}; 758};
759 759
760const struct file_operations cifs_file_direct_ops = { 760const struct file_operations cifs_file_direct_ops = {
761 /* no mmap, no aio, no readv - 761 /* no aio, no readv -
762 BB reevaluate whether they can be done with directio, no cache */ 762 BB reevaluate whether they can be done with directio, no cache */
763 .read = cifs_user_read, 763 .read = cifs_user_read,
764 .write = cifs_user_write, 764 .write = cifs_user_write,
@@ -767,6 +767,7 @@ const struct file_operations cifs_file_direct_ops = {
767 .lock = cifs_lock, 767 .lock = cifs_lock,
768 .fsync = cifs_fsync, 768 .fsync = cifs_fsync,
769 .flush = cifs_flush, 769 .flush = cifs_flush,
770 .mmap = cifs_file_mmap,
770 .splice_read = generic_file_splice_read, 771 .splice_read = generic_file_splice_read,
771#ifdef CONFIG_CIFS_POSIX 772#ifdef CONFIG_CIFS_POSIX
772 .unlocked_ioctl = cifs_ioctl, 773 .unlocked_ioctl = cifs_ioctl,
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 63ea83ff687f..3bbcaa716b3c 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -2287,12 +2287,12 @@ int
2287cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, 2287cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2288 char *mount_data_global, const char *devname) 2288 char *mount_data_global, const char *devname)
2289{ 2289{
2290 int rc = 0; 2290 int rc;
2291 int xid; 2291 int xid;
2292 struct smb_vol *volume_info; 2292 struct smb_vol *volume_info;
2293 struct cifsSesInfo *pSesInfo = NULL; 2293 struct cifsSesInfo *pSesInfo;
2294 struct cifsTconInfo *tcon = NULL; 2294 struct cifsTconInfo *tcon;
2295 struct TCP_Server_Info *srvTcp = NULL; 2295 struct TCP_Server_Info *srvTcp;
2296 char *full_path; 2296 char *full_path;
2297 char *mount_data = mount_data_global; 2297 char *mount_data = mount_data_global;
2298#ifdef CONFIG_CIFS_DFS_UPCALL 2298#ifdef CONFIG_CIFS_DFS_UPCALL
@@ -2301,6 +2301,10 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2301 int referral_walks_count = 0; 2301 int referral_walks_count = 0;
2302try_mount_again: 2302try_mount_again:
2303#endif 2303#endif
2304 rc = 0;
2305 tcon = NULL;
2306 pSesInfo = NULL;
2307 srvTcp = NULL;
2304 full_path = NULL; 2308 full_path = NULL;
2305 2309
2306 xid = GetXid(); 2310 xid = GetXid();
@@ -2597,6 +2601,7 @@ remote_path_check:
2597 2601
2598 cleanup_volume_info(&volume_info); 2602 cleanup_volume_info(&volume_info);
2599 referral_walks_count++; 2603 referral_walks_count++;
2604 FreeXid(xid);
2600 goto try_mount_again; 2605 goto try_mount_again;
2601 } 2606 }
2602#else /* No DFS support, return error on mount */ 2607#else /* No DFS support, return error on mount */
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 332dd00f0894..c5c45de1a2ee 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1005,6 +1005,9 @@ COMPATIBLE_IOCTL(SCSI_IOCTL_SEND_COMMAND)
1005COMPATIBLE_IOCTL(SCSI_IOCTL_PROBE_HOST) 1005COMPATIBLE_IOCTL(SCSI_IOCTL_PROBE_HOST)
1006COMPATIBLE_IOCTL(SCSI_IOCTL_GET_PCI) 1006COMPATIBLE_IOCTL(SCSI_IOCTL_GET_PCI)
1007#endif 1007#endif
1008/* Big V (don't complain on serial console) */
1009IGNORE_IOCTL(VT_OPENQRY)
1010IGNORE_IOCTL(VT_GETMODE)
1008/* Little p (/dev/rtc, /dev/envctrl, etc.) */ 1011/* Little p (/dev/rtc, /dev/envctrl, etc.) */
1009COMPATIBLE_IOCTL(RTC_AIE_ON) 1012COMPATIBLE_IOCTL(RTC_AIE_ON)
1010COMPATIBLE_IOCTL(RTC_AIE_OFF) 1013COMPATIBLE_IOCTL(RTC_AIE_OFF)
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index c8afa6b1d91d..32a5f46b1157 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -121,8 +121,10 @@ static int get_target(const char *symname, struct path *path,
121 ret = -ENOENT; 121 ret = -ENOENT;
122 path_put(path); 122 path_put(path);
123 } 123 }
124 } else 124 } else {
125 ret = -EPERM; 125 ret = -EPERM;
126 path_put(path);
127 }
126 } 128 }
127 129
128 return ret; 130 return ret;
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index b486169f42bf..274ac865bae8 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -160,15 +160,8 @@ static int debugfs_create_by_name(const char *name, mode_t mode,
160 * block. A pointer to that is in the struct vfsmount that we 160 * block. A pointer to that is in the struct vfsmount that we
161 * have around. 161 * have around.
162 */ 162 */
163 if (!parent) { 163 if (!parent)
164 if (debugfs_mount && debugfs_mount->mnt_sb) { 164 parent = debugfs_mount->mnt_sb->s_root;
165 parent = debugfs_mount->mnt_sb->s_root;
166 }
167 }
168 if (!parent) {
169 pr_debug("debugfs: Ah! can not find a parent!\n");
170 return -EFAULT;
171 }
172 165
173 *dentry = NULL; 166 *dentry = NULL;
174 mutex_lock(&parent->d_inode->i_mutex); 167 mutex_lock(&parent->d_inode->i_mutex);
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index fbb6e5eed697..7cb0a59f4b9d 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -1748,7 +1748,7 @@ ecryptfs_process_key_cipher(struct crypto_blkcipher **key_tfm,
1748 char *cipher_name, size_t *key_size) 1748 char *cipher_name, size_t *key_size)
1749{ 1749{
1750 char dummy_key[ECRYPTFS_MAX_KEY_BYTES]; 1750 char dummy_key[ECRYPTFS_MAX_KEY_BYTES];
1751 char *full_alg_name; 1751 char *full_alg_name = NULL;
1752 int rc; 1752 int rc;
1753 1753
1754 *key_tfm = NULL; 1754 *key_tfm = NULL;
@@ -1763,7 +1763,6 @@ ecryptfs_process_key_cipher(struct crypto_blkcipher **key_tfm,
1763 if (rc) 1763 if (rc)
1764 goto out; 1764 goto out;
1765 *key_tfm = crypto_alloc_blkcipher(full_alg_name, 0, CRYPTO_ALG_ASYNC); 1765 *key_tfm = crypto_alloc_blkcipher(full_alg_name, 0, CRYPTO_ALG_ASYNC);
1766 kfree(full_alg_name);
1767 if (IS_ERR(*key_tfm)) { 1766 if (IS_ERR(*key_tfm)) {
1768 rc = PTR_ERR(*key_tfm); 1767 rc = PTR_ERR(*key_tfm);
1769 printk(KERN_ERR "Unable to allocate crypto cipher with name " 1768 printk(KERN_ERR "Unable to allocate crypto cipher with name "
@@ -1786,6 +1785,7 @@ ecryptfs_process_key_cipher(struct crypto_blkcipher **key_tfm,
1786 goto out; 1785 goto out;
1787 } 1786 }
1788out: 1787out:
1788 kfree(full_alg_name);
1789 return rc; 1789 return rc;
1790} 1790}
1791 1791
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 9e944057001b..678172b61be2 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -158,7 +158,7 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
158 struct dentry *ecryptfs_dentry = file->f_path.dentry; 158 struct dentry *ecryptfs_dentry = file->f_path.dentry;
159 /* Private value of ecryptfs_dentry allocated in 159 /* Private value of ecryptfs_dentry allocated in
160 * ecryptfs_lookup() */ 160 * ecryptfs_lookup() */
161 struct dentry *lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry); 161 struct dentry *lower_dentry;
162 struct ecryptfs_file_info *file_info; 162 struct ecryptfs_file_info *file_info;
163 163
164 mount_crypt_stat = &ecryptfs_superblock_to_private( 164 mount_crypt_stat = &ecryptfs_superblock_to_private(
@@ -191,13 +191,6 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
191 | ECRYPTFS_ENCRYPTED); 191 | ECRYPTFS_ENCRYPTED);
192 } 192 }
193 mutex_unlock(&crypt_stat->cs_mutex); 193 mutex_unlock(&crypt_stat->cs_mutex);
194 if ((ecryptfs_inode_to_private(inode)->lower_file->f_flags & O_RDONLY)
195 && !(file->f_flags & O_RDONLY)) {
196 rc = -EPERM;
197 printk(KERN_WARNING "%s: Lower persistent file is RO; eCryptfs "
198 "file must hence be opened RO\n", __func__);
199 goto out;
200 }
201 if (!ecryptfs_inode_to_private(inode)->lower_file) { 194 if (!ecryptfs_inode_to_private(inode)->lower_file) {
202 rc = ecryptfs_init_persistent_file(ecryptfs_dentry); 195 rc = ecryptfs_init_persistent_file(ecryptfs_dentry);
203 if (rc) { 196 if (rc) {
@@ -208,6 +201,13 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
208 goto out; 201 goto out;
209 } 202 }
210 } 203 }
204 if ((ecryptfs_inode_to_private(inode)->lower_file->f_flags & O_RDONLY)
205 && !(file->f_flags & O_RDONLY)) {
206 rc = -EPERM;
207 printk(KERN_WARNING "%s: Lower persistent file is RO; eCryptfs "
208 "file must hence be opened RO\n", __func__);
209 goto out;
210 }
211 ecryptfs_set_file_lower( 211 ecryptfs_set_file_lower(
212 file, ecryptfs_inode_to_private(inode)->lower_file); 212 file, ecryptfs_inode_to_private(inode)->lower_file);
213 if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) { 213 if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) {
@@ -299,7 +299,6 @@ static int ecryptfs_ioctl(struct inode *inode, struct file *file,
299const struct file_operations ecryptfs_dir_fops = { 299const struct file_operations ecryptfs_dir_fops = {
300 .readdir = ecryptfs_readdir, 300 .readdir = ecryptfs_readdir,
301 .ioctl = ecryptfs_ioctl, 301 .ioctl = ecryptfs_ioctl,
302 .mmap = generic_file_mmap,
303 .open = ecryptfs_open, 302 .open = ecryptfs_open,
304 .flush = ecryptfs_flush, 303 .flush = ecryptfs_flush,
305 .release = ecryptfs_release, 304 .release = ecryptfs_release,
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 429ca0b3ba08..4a430ab4115c 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -282,7 +282,8 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
282 goto out; 282 goto out;
283 } 283 }
284 rc = ecryptfs_interpose(lower_dentry, ecryptfs_dentry, 284 rc = ecryptfs_interpose(lower_dentry, ecryptfs_dentry,
285 ecryptfs_dir_inode->i_sb, 1); 285 ecryptfs_dir_inode->i_sb,
286 ECRYPTFS_INTERPOSE_FLAG_D_ADD);
286 if (rc) { 287 if (rc) {
287 printk(KERN_ERR "%s: Error interposing; rc = [%d]\n", 288 printk(KERN_ERR "%s: Error interposing; rc = [%d]\n",
288 __func__, rc); 289 __func__, rc);
@@ -463,9 +464,6 @@ out_lock:
463 unlock_dir(lower_dir_dentry); 464 unlock_dir(lower_dir_dentry);
464 dput(lower_new_dentry); 465 dput(lower_new_dentry);
465 dput(lower_old_dentry); 466 dput(lower_old_dentry);
466 d_drop(lower_old_dentry);
467 d_drop(new_dentry);
468 d_drop(old_dentry);
469 return rc; 467 return rc;
470} 468}
471 469
@@ -614,6 +612,7 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
614 struct dentry *lower_new_dentry; 612 struct dentry *lower_new_dentry;
615 struct dentry *lower_old_dir_dentry; 613 struct dentry *lower_old_dir_dentry;
616 struct dentry *lower_new_dir_dentry; 614 struct dentry *lower_new_dir_dentry;
615 struct dentry *trap = NULL;
617 616
618 lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry); 617 lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry);
619 lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry); 618 lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry);
@@ -621,7 +620,17 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
621 dget(lower_new_dentry); 620 dget(lower_new_dentry);
622 lower_old_dir_dentry = dget_parent(lower_old_dentry); 621 lower_old_dir_dentry = dget_parent(lower_old_dentry);
623 lower_new_dir_dentry = dget_parent(lower_new_dentry); 622 lower_new_dir_dentry = dget_parent(lower_new_dentry);
624 lock_rename(lower_old_dir_dentry, lower_new_dir_dentry); 623 trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
624 /* source should not be ancestor of target */
625 if (trap == lower_old_dentry) {
626 rc = -EINVAL;
627 goto out_lock;
628 }
629 /* target should not be ancestor of source */
630 if (trap == lower_new_dentry) {
631 rc = -ENOTEMPTY;
632 goto out_lock;
633 }
625 rc = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry, 634 rc = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry,
626 lower_new_dir_dentry->d_inode, lower_new_dentry); 635 lower_new_dir_dentry->d_inode, lower_new_dentry);
627 if (rc) 636 if (rc)
@@ -715,31 +724,31 @@ static void *ecryptfs_follow_link(struct dentry *dentry, struct nameidata *nd)
715 /* Released in ecryptfs_put_link(); only release here on error */ 724 /* Released in ecryptfs_put_link(); only release here on error */
716 buf = kmalloc(len, GFP_KERNEL); 725 buf = kmalloc(len, GFP_KERNEL);
717 if (!buf) { 726 if (!buf) {
718 rc = -ENOMEM; 727 buf = ERR_PTR(-ENOMEM);
719 goto out; 728 goto out;
720 } 729 }
721 old_fs = get_fs(); 730 old_fs = get_fs();
722 set_fs(get_ds()); 731 set_fs(get_ds());
723 rc = dentry->d_inode->i_op->readlink(dentry, (char __user *)buf, len); 732 rc = dentry->d_inode->i_op->readlink(dentry, (char __user *)buf, len);
724 set_fs(old_fs); 733 set_fs(old_fs);
725 if (rc < 0) 734 if (rc < 0) {
726 goto out_free; 735 kfree(buf);
727 else 736 buf = ERR_PTR(rc);
737 } else
728 buf[rc] = '\0'; 738 buf[rc] = '\0';
729 rc = 0;
730 nd_set_link(nd, buf);
731 goto out;
732out_free:
733 kfree(buf);
734out: 739out:
735 return ERR_PTR(rc); 740 nd_set_link(nd, buf);
741 return NULL;
736} 742}
737 743
738static void 744static void
739ecryptfs_put_link(struct dentry *dentry, struct nameidata *nd, void *ptr) 745ecryptfs_put_link(struct dentry *dentry, struct nameidata *nd, void *ptr)
740{ 746{
741 /* Free the char* */ 747 char *buf = nd_get_link(nd);
742 kfree(nd_get_link(nd)); 748 if (!IS_ERR(buf)) {
749 /* Free the char* */
750 kfree(buf);
751 }
743} 752}
744 753
745/** 754/**
@@ -772,18 +781,23 @@ upper_size_to_lower_size(struct ecryptfs_crypt_stat *crypt_stat,
772} 781}
773 782
774/** 783/**
775 * ecryptfs_truncate 784 * truncate_upper
776 * @dentry: The ecryptfs layer dentry 785 * @dentry: The ecryptfs layer dentry
777 * @new_length: The length to expand the file to 786 * @ia: Address of the ecryptfs inode's attributes
787 * @lower_ia: Address of the lower inode's attributes
778 * 788 *
779 * Function to handle truncations modifying the size of the file. Note 789 * Function to handle truncations modifying the size of the file. Note
780 * that the file sizes are interpolated. When expanding, we are simply 790 * that the file sizes are interpolated. When expanding, we are simply
781 * writing strings of 0's out. When truncating, we need to modify the 791 * writing strings of 0's out. When truncating, we truncate the upper
782 * underlying file size according to the page index interpolations. 792 * inode and update the lower_ia according to the page index
793 * interpolations. If ATTR_SIZE is set in lower_ia->ia_valid upon return,
794 * the caller must use lower_ia in a call to notify_change() to perform
795 * the truncation of the lower inode.
783 * 796 *
784 * Returns zero on success; non-zero otherwise 797 * Returns zero on success; non-zero otherwise
785 */ 798 */
786int ecryptfs_truncate(struct dentry *dentry, loff_t new_length) 799static int truncate_upper(struct dentry *dentry, struct iattr *ia,
800 struct iattr *lower_ia)
787{ 801{
788 int rc = 0; 802 int rc = 0;
789 struct inode *inode = dentry->d_inode; 803 struct inode *inode = dentry->d_inode;
@@ -794,8 +808,10 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
794 loff_t lower_size_before_truncate; 808 loff_t lower_size_before_truncate;
795 loff_t lower_size_after_truncate; 809 loff_t lower_size_after_truncate;
796 810
797 if (unlikely((new_length == i_size))) 811 if (unlikely((ia->ia_size == i_size))) {
812 lower_ia->ia_valid &= ~ATTR_SIZE;
798 goto out; 813 goto out;
814 }
799 crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat; 815 crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat;
800 /* Set up a fake ecryptfs file, this is used to interface with 816 /* Set up a fake ecryptfs file, this is used to interface with
801 * the file in the underlying filesystem so that the 817 * the file in the underlying filesystem so that the
@@ -815,28 +831,30 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
815 &fake_ecryptfs_file, 831 &fake_ecryptfs_file,
816 ecryptfs_inode_to_private(dentry->d_inode)->lower_file); 832 ecryptfs_inode_to_private(dentry->d_inode)->lower_file);
817 /* Switch on growing or shrinking file */ 833 /* Switch on growing or shrinking file */
818 if (new_length > i_size) { 834 if (ia->ia_size > i_size) {
819 char zero[] = { 0x00 }; 835 char zero[] = { 0x00 };
820 836
837 lower_ia->ia_valid &= ~ATTR_SIZE;
821 /* Write a single 0 at the last position of the file; 838 /* Write a single 0 at the last position of the file;
822 * this triggers code that will fill in 0's throughout 839 * this triggers code that will fill in 0's throughout
823 * the intermediate portion of the previous end of the 840 * the intermediate portion of the previous end of the
824 * file and the new and of the file */ 841 * file and the new and of the file */
825 rc = ecryptfs_write(&fake_ecryptfs_file, zero, 842 rc = ecryptfs_write(&fake_ecryptfs_file, zero,
826 (new_length - 1), 1); 843 (ia->ia_size - 1), 1);
827 } else { /* new_length < i_size_read(inode) */ 844 } else { /* ia->ia_size < i_size_read(inode) */
828 /* We're chopping off all the pages down do the page 845 /* We're chopping off all the pages down to the page
829 * in which new_length is located. Fill in the end of 846 * in which ia->ia_size is located. Fill in the end of
830 * that page from (new_length & ~PAGE_CACHE_MASK) to 847 * that page from (ia->ia_size & ~PAGE_CACHE_MASK) to
831 * PAGE_CACHE_SIZE with zeros. */ 848 * PAGE_CACHE_SIZE with zeros. */
832 size_t num_zeros = (PAGE_CACHE_SIZE 849 size_t num_zeros = (PAGE_CACHE_SIZE
833 - (new_length & ~PAGE_CACHE_MASK)); 850 - (ia->ia_size & ~PAGE_CACHE_MASK));
834 851
835 if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) { 852 if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
836 rc = vmtruncate(inode, new_length); 853 rc = vmtruncate(inode, ia->ia_size);
837 if (rc) 854 if (rc)
838 goto out_free; 855 goto out_free;
839 rc = vmtruncate(lower_dentry->d_inode, new_length); 856 lower_ia->ia_size = ia->ia_size;
857 lower_ia->ia_valid |= ATTR_SIZE;
840 goto out_free; 858 goto out_free;
841 } 859 }
842 if (num_zeros) { 860 if (num_zeros) {
@@ -848,7 +866,7 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
848 goto out_free; 866 goto out_free;
849 } 867 }
850 rc = ecryptfs_write(&fake_ecryptfs_file, zeros_virt, 868 rc = ecryptfs_write(&fake_ecryptfs_file, zeros_virt,
851 new_length, num_zeros); 869 ia->ia_size, num_zeros);
852 kfree(zeros_virt); 870 kfree(zeros_virt);
853 if (rc) { 871 if (rc) {
854 printk(KERN_ERR "Error attempting to zero out " 872 printk(KERN_ERR "Error attempting to zero out "
@@ -857,7 +875,7 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
857 goto out_free; 875 goto out_free;
858 } 876 }
859 } 877 }
860 vmtruncate(inode, new_length); 878 vmtruncate(inode, ia->ia_size);
861 rc = ecryptfs_write_inode_size_to_metadata(inode); 879 rc = ecryptfs_write_inode_size_to_metadata(inode);
862 if (rc) { 880 if (rc) {
863 printk(KERN_ERR "Problem with " 881 printk(KERN_ERR "Problem with "
@@ -870,10 +888,12 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
870 lower_size_before_truncate = 888 lower_size_before_truncate =
871 upper_size_to_lower_size(crypt_stat, i_size); 889 upper_size_to_lower_size(crypt_stat, i_size);
872 lower_size_after_truncate = 890 lower_size_after_truncate =
873 upper_size_to_lower_size(crypt_stat, new_length); 891 upper_size_to_lower_size(crypt_stat, ia->ia_size);
874 if (lower_size_after_truncate < lower_size_before_truncate) 892 if (lower_size_after_truncate < lower_size_before_truncate) {
875 vmtruncate(lower_dentry->d_inode, 893 lower_ia->ia_size = lower_size_after_truncate;
876 lower_size_after_truncate); 894 lower_ia->ia_valid |= ATTR_SIZE;
895 } else
896 lower_ia->ia_valid &= ~ATTR_SIZE;
877 } 897 }
878out_free: 898out_free:
879 if (ecryptfs_file_to_private(&fake_ecryptfs_file)) 899 if (ecryptfs_file_to_private(&fake_ecryptfs_file))
@@ -883,6 +903,33 @@ out:
883 return rc; 903 return rc;
884} 904}
885 905
906/**
907 * ecryptfs_truncate
908 * @dentry: The ecryptfs layer dentry
909 * @new_length: The length to expand the file to
910 *
911 * Simple function that handles the truncation of an eCryptfs inode and
912 * its corresponding lower inode.
913 *
914 * Returns zero on success; non-zero otherwise
915 */
916int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
917{
918 struct iattr ia = { .ia_valid = ATTR_SIZE, .ia_size = new_length };
919 struct iattr lower_ia = { .ia_valid = 0 };
920 int rc;
921
922 rc = truncate_upper(dentry, &ia, &lower_ia);
923 if (!rc && lower_ia.ia_valid & ATTR_SIZE) {
924 struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
925
926 mutex_lock(&lower_dentry->d_inode->i_mutex);
927 rc = notify_change(lower_dentry, &lower_ia);
928 mutex_unlock(&lower_dentry->d_inode->i_mutex);
929 }
930 return rc;
931}
932
886static int 933static int
887ecryptfs_permission(struct inode *inode, int mask) 934ecryptfs_permission(struct inode *inode, int mask)
888{ 935{
@@ -905,6 +952,7 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
905{ 952{
906 int rc = 0; 953 int rc = 0;
907 struct dentry *lower_dentry; 954 struct dentry *lower_dentry;
955 struct iattr lower_ia;
908 struct inode *inode; 956 struct inode *inode;
909 struct inode *lower_inode; 957 struct inode *lower_inode;
910 struct ecryptfs_crypt_stat *crypt_stat; 958 struct ecryptfs_crypt_stat *crypt_stat;
@@ -943,15 +991,11 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
943 } 991 }
944 } 992 }
945 mutex_unlock(&crypt_stat->cs_mutex); 993 mutex_unlock(&crypt_stat->cs_mutex);
994 memcpy(&lower_ia, ia, sizeof(lower_ia));
995 if (ia->ia_valid & ATTR_FILE)
996 lower_ia.ia_file = ecryptfs_file_to_lower(ia->ia_file);
946 if (ia->ia_valid & ATTR_SIZE) { 997 if (ia->ia_valid & ATTR_SIZE) {
947 ecryptfs_printk(KERN_DEBUG, 998 rc = truncate_upper(dentry, ia, &lower_ia);
948 "ia->ia_valid = [0x%x] ATTR_SIZE" " = [0x%x]\n",
949 ia->ia_valid, ATTR_SIZE);
950 rc = ecryptfs_truncate(dentry, ia->ia_size);
951 /* ecryptfs_truncate handles resizing of the lower file */
952 ia->ia_valid &= ~ATTR_SIZE;
953 ecryptfs_printk(KERN_DEBUG, "ia->ia_valid = [%x]\n",
954 ia->ia_valid);
955 if (rc < 0) 999 if (rc < 0)
956 goto out; 1000 goto out;
957 } 1001 }
@@ -960,17 +1004,32 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
960 * mode change is for clearing setuid/setgid bits. Allow lower fs 1004 * mode change is for clearing setuid/setgid bits. Allow lower fs
961 * to interpret this in its own way. 1005 * to interpret this in its own way.
962 */ 1006 */
963 if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) 1007 if (lower_ia.ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
964 ia->ia_valid &= ~ATTR_MODE; 1008 lower_ia.ia_valid &= ~ATTR_MODE;
965 1009
966 mutex_lock(&lower_dentry->d_inode->i_mutex); 1010 mutex_lock(&lower_dentry->d_inode->i_mutex);
967 rc = notify_change(lower_dentry, ia); 1011 rc = notify_change(lower_dentry, &lower_ia);
968 mutex_unlock(&lower_dentry->d_inode->i_mutex); 1012 mutex_unlock(&lower_dentry->d_inode->i_mutex);
969out: 1013out:
970 fsstack_copy_attr_all(inode, lower_inode); 1014 fsstack_copy_attr_all(inode, lower_inode);
971 return rc; 1015 return rc;
972} 1016}
973 1017
1018int ecryptfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
1019 struct kstat *stat)
1020{
1021 struct kstat lower_stat;
1022 int rc;
1023
1024 rc = vfs_getattr(ecryptfs_dentry_to_lower_mnt(dentry),
1025 ecryptfs_dentry_to_lower(dentry), &lower_stat);
1026 if (!rc) {
1027 generic_fillattr(dentry->d_inode, stat);
1028 stat->blocks = lower_stat.blocks;
1029 }
1030 return rc;
1031}
1032
974int 1033int
975ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value, 1034ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value,
976 size_t size, int flags) 1035 size_t size, int flags)
@@ -1100,6 +1159,7 @@ const struct inode_operations ecryptfs_dir_iops = {
1100const struct inode_operations ecryptfs_main_iops = { 1159const struct inode_operations ecryptfs_main_iops = {
1101 .permission = ecryptfs_permission, 1160 .permission = ecryptfs_permission,
1102 .setattr = ecryptfs_setattr, 1161 .setattr = ecryptfs_setattr,
1162 .getattr = ecryptfs_getattr,
1103 .setxattr = ecryptfs_setxattr, 1163 .setxattr = ecryptfs_setxattr,
1104 .getxattr = ecryptfs_getxattr, 1164 .getxattr = ecryptfs_getxattr,
1105 .listxattr = ecryptfs_listxattr, 1165 .listxattr = ecryptfs_listxattr,
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 567bc4b9f70a..ea2f92101dfe 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -585,8 +585,8 @@ out:
585 * with as much information as it can before needing 585 * with as much information as it can before needing
586 * the lower filesystem. 586 * the lower filesystem.
587 * ecryptfs_read_super(): this accesses the lower filesystem and uses 587 * ecryptfs_read_super(): this accesses the lower filesystem and uses
588 * ecryptfs_interpolate to perform most of the linking 588 * ecryptfs_interpose to perform most of the linking
589 * ecryptfs_interpolate(): links the lower filesystem into ecryptfs 589 * ecryptfs_interpose(): links the lower filesystem into ecryptfs (inode.c)
590 */ 590 */
591static int ecryptfs_get_sb(struct file_system_type *fs_type, int flags, 591static int ecryptfs_get_sb(struct file_system_type *fs_type, int flags,
592 const char *dev_name, void *raw_data, 592 const char *dev_name, void *raw_data,
diff --git a/fs/eventfd.c b/fs/eventfd.c
index d26402ff06ea..7758cc382ef0 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -135,26 +135,71 @@ static unsigned int eventfd_poll(struct file *file, poll_table *wait)
135 return events; 135 return events;
136} 136}
137 137
138static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count, 138static void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
139 loff_t *ppos) 139{
140 *cnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count;
141 ctx->count -= *cnt;
142}
143
144/**
145 * eventfd_ctx_remove_wait_queue - Read the current counter and removes wait queue.
146 * @ctx: [in] Pointer to eventfd context.
147 * @wait: [in] Wait queue to be removed.
148 * @cnt: [out] Pointer to the 64bit conter value.
149 *
150 * Returns zero if successful, or the following error codes:
151 *
152 * -EAGAIN : The operation would have blocked.
153 *
154 * This is used to atomically remove a wait queue entry from the eventfd wait
155 * queue head, and read/reset the counter value.
156 */
157int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_t *wait,
158 __u64 *cnt)
159{
160 unsigned long flags;
161
162 spin_lock_irqsave(&ctx->wqh.lock, flags);
163 eventfd_ctx_do_read(ctx, cnt);
164 __remove_wait_queue(&ctx->wqh, wait);
165 if (*cnt != 0 && waitqueue_active(&ctx->wqh))
166 wake_up_locked_poll(&ctx->wqh, POLLOUT);
167 spin_unlock_irqrestore(&ctx->wqh.lock, flags);
168
169 return *cnt != 0 ? 0 : -EAGAIN;
170}
171EXPORT_SYMBOL_GPL(eventfd_ctx_remove_wait_queue);
172
173/**
174 * eventfd_ctx_read - Reads the eventfd counter or wait if it is zero.
175 * @ctx: [in] Pointer to eventfd context.
176 * @no_wait: [in] Different from zero if the operation should not block.
177 * @cnt: [out] Pointer to the 64bit conter value.
178 *
179 * Returns zero if successful, or the following error codes:
180 *
181 * -EAGAIN : The operation would have blocked but @no_wait was nonzero.
182 * -ERESTARTSYS : A signal interrupted the wait operation.
183 *
184 * If @no_wait is zero, the function might sleep until the eventfd internal
185 * counter becomes greater than zero.
186 */
187ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait, __u64 *cnt)
140{ 188{
141 struct eventfd_ctx *ctx = file->private_data;
142 ssize_t res; 189 ssize_t res;
143 __u64 ucnt = 0;
144 DECLARE_WAITQUEUE(wait, current); 190 DECLARE_WAITQUEUE(wait, current);
145 191
146 if (count < sizeof(ucnt))
147 return -EINVAL;
148 spin_lock_irq(&ctx->wqh.lock); 192 spin_lock_irq(&ctx->wqh.lock);
193 *cnt = 0;
149 res = -EAGAIN; 194 res = -EAGAIN;
150 if (ctx->count > 0) 195 if (ctx->count > 0)
151 res = sizeof(ucnt); 196 res = 0;
152 else if (!(file->f_flags & O_NONBLOCK)) { 197 else if (!no_wait) {
153 __add_wait_queue(&ctx->wqh, &wait); 198 __add_wait_queue(&ctx->wqh, &wait);
154 for (res = 0;;) { 199 for (;;) {
155 set_current_state(TASK_INTERRUPTIBLE); 200 set_current_state(TASK_INTERRUPTIBLE);
156 if (ctx->count > 0) { 201 if (ctx->count > 0) {
157 res = sizeof(ucnt); 202 res = 0;
158 break; 203 break;
159 } 204 }
160 if (signal_pending(current)) { 205 if (signal_pending(current)) {
@@ -168,18 +213,32 @@ static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
168 __remove_wait_queue(&ctx->wqh, &wait); 213 __remove_wait_queue(&ctx->wqh, &wait);
169 __set_current_state(TASK_RUNNING); 214 __set_current_state(TASK_RUNNING);
170 } 215 }
171 if (likely(res > 0)) { 216 if (likely(res == 0)) {
172 ucnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count; 217 eventfd_ctx_do_read(ctx, cnt);
173 ctx->count -= ucnt;
174 if (waitqueue_active(&ctx->wqh)) 218 if (waitqueue_active(&ctx->wqh))
175 wake_up_locked_poll(&ctx->wqh, POLLOUT); 219 wake_up_locked_poll(&ctx->wqh, POLLOUT);
176 } 220 }
177 spin_unlock_irq(&ctx->wqh.lock); 221 spin_unlock_irq(&ctx->wqh.lock);
178 if (res > 0 && put_user(ucnt, (__u64 __user *) buf))
179 return -EFAULT;
180 222
181 return res; 223 return res;
182} 224}
225EXPORT_SYMBOL_GPL(eventfd_ctx_read);
226
227static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
228 loff_t *ppos)
229{
230 struct eventfd_ctx *ctx = file->private_data;
231 ssize_t res;
232 __u64 cnt;
233
234 if (count < sizeof(cnt))
235 return -EINVAL;
236 res = eventfd_ctx_read(ctx, file->f_flags & O_NONBLOCK, &cnt);
237 if (res < 0)
238 return res;
239
240 return put_user(cnt, (__u64 __user *) buf) ? -EFAULT : sizeof(cnt);
241}
183 242
184static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t count, 243static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t count,
185 loff_t *ppos) 244 loff_t *ppos)
diff --git a/fs/exec.c b/fs/exec.c
index 632b02e34ec7..0790a107ff7e 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -941,9 +941,7 @@ void set_task_comm(struct task_struct *tsk, char *buf)
941 941
942int flush_old_exec(struct linux_binprm * bprm) 942int flush_old_exec(struct linux_binprm * bprm)
943{ 943{
944 char * name; 944 int retval;
945 int i, ch, retval;
946 char tcomm[sizeof(current->comm)];
947 945
948 /* 946 /*
949 * Make sure we have a private signal table and that 947 * Make sure we have a private signal table and that
@@ -964,6 +962,25 @@ int flush_old_exec(struct linux_binprm * bprm)
964 962
965 bprm->mm = NULL; /* We're using it now */ 963 bprm->mm = NULL; /* We're using it now */
966 964
965 current->flags &= ~PF_RANDOMIZE;
966 flush_thread();
967 current->personality &= ~bprm->per_clear;
968
969 return 0;
970
971out:
972 return retval;
973}
974EXPORT_SYMBOL(flush_old_exec);
975
976void setup_new_exec(struct linux_binprm * bprm)
977{
978 int i, ch;
979 char * name;
980 char tcomm[sizeof(current->comm)];
981
982 arch_pick_mmap_layout(current->mm);
983
967 /* This is the point of no return */ 984 /* This is the point of no return */
968 current->sas_ss_sp = current->sas_ss_size = 0; 985 current->sas_ss_sp = current->sas_ss_size = 0;
969 986
@@ -985,9 +1002,6 @@ int flush_old_exec(struct linux_binprm * bprm)
985 tcomm[i] = '\0'; 1002 tcomm[i] = '\0';
986 set_task_comm(current, tcomm); 1003 set_task_comm(current, tcomm);
987 1004
988 current->flags &= ~PF_RANDOMIZE;
989 flush_thread();
990
991 /* Set the new mm task size. We have to do that late because it may 1005 /* Set the new mm task size. We have to do that late because it may
992 * depend on TIF_32BIT which is only updated in flush_thread() on 1006 * depend on TIF_32BIT which is only updated in flush_thread() on
993 * some architectures like powerpc 1007 * some architectures like powerpc
@@ -1003,8 +1017,6 @@ int flush_old_exec(struct linux_binprm * bprm)
1003 set_dumpable(current->mm, suid_dumpable); 1017 set_dumpable(current->mm, suid_dumpable);
1004 } 1018 }
1005 1019
1006 current->personality &= ~bprm->per_clear;
1007
1008 /* 1020 /*
1009 * Flush performance counters when crossing a 1021 * Flush performance counters when crossing a
1010 * security domain: 1022 * security domain:
@@ -1019,14 +1031,8 @@ int flush_old_exec(struct linux_binprm * bprm)
1019 1031
1020 flush_signal_handlers(current, 0); 1032 flush_signal_handlers(current, 0);
1021 flush_old_files(current->files); 1033 flush_old_files(current->files);
1022
1023 return 0;
1024
1025out:
1026 return retval;
1027} 1034}
1028 1035EXPORT_SYMBOL(setup_new_exec);
1029EXPORT_SYMBOL(flush_old_exec);
1030 1036
1031/* 1037/*
1032 * Prepare credentials and lock ->cred_guard_mutex. 1038 * Prepare credentials and lock ->cred_guard_mutex.
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 698a8636d39c..2afbcebeda71 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -738,13 +738,28 @@ static int exofs_write_begin_export(struct file *file,
738 fsdata); 738 fsdata);
739} 739}
740 740
741static int exofs_write_end(struct file *file, struct address_space *mapping,
742 loff_t pos, unsigned len, unsigned copied,
743 struct page *page, void *fsdata)
744{
745 struct inode *inode = mapping->host;
746 /* According to comment in simple_write_end i_mutex is held */
747 loff_t i_size = inode->i_size;
748 int ret;
749
750 ret = simple_write_end(file, mapping,pos, len, copied, page, fsdata);
751 if (i_size != inode->i_size)
752 mark_inode_dirty(inode);
753 return ret;
754}
755
741const struct address_space_operations exofs_aops = { 756const struct address_space_operations exofs_aops = {
742 .readpage = exofs_readpage, 757 .readpage = exofs_readpage,
743 .readpages = exofs_readpages, 758 .readpages = exofs_readpages,
744 .writepage = exofs_writepage, 759 .writepage = exofs_writepage,
745 .writepages = exofs_writepages, 760 .writepages = exofs_writepages,
746 .write_begin = exofs_write_begin_export, 761 .write_begin = exofs_write_begin_export,
747 .write_end = simple_write_end, 762 .write_end = exofs_write_end,
748}; 763};
749 764
750/****************************************************************************** 765/******************************************************************************
diff --git a/fs/exofs/pnfs.h b/fs/exofs/pnfs.h
index 423033addd1f..c52e9888b8ab 100644
--- a/fs/exofs/pnfs.h
+++ b/fs/exofs/pnfs.h
@@ -15,13 +15,7 @@
15#ifndef __EXOFS_PNFS_H__ 15#ifndef __EXOFS_PNFS_H__
16#define __EXOFS_PNFS_H__ 16#define __EXOFS_PNFS_H__
17 17
18#if defined(CONFIG_PNFS) 18#if ! defined(__PNFS_OSD_XDR_H__)
19
20
21/* FIXME: move this file to: linux/exportfs/pnfs_osd_xdr.h */
22#include "../nfs/objlayout/pnfs_osd_xdr.h"
23
24#else /* defined(CONFIG_PNFS) */
25 19
26enum pnfs_iomode { 20enum pnfs_iomode {
27 IOMODE_READ = 1, 21 IOMODE_READ = 1,
@@ -46,6 +40,6 @@ struct pnfs_osd_data_map {
46 u32 odm_raid_algorithm; 40 u32 odm_raid_algorithm;
47}; 41};
48 42
49#endif /* else defined(CONFIG_PNFS) */ 43#endif /* ! defined(__PNFS_OSD_XDR_H__) */
50 44
51#endif /* __EXOFS_PNFS_H__ */ 45#endif /* __EXOFS_PNFS_H__ */
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index 9acf7e808139..9ed1bb1f319f 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -28,6 +28,7 @@ config EXT4_FS
28 28
29config EXT4_USE_FOR_EXT23 29config EXT4_USE_FOR_EXT23
30 bool "Use ext4 for ext2/ext3 file systems" 30 bool "Use ext4 for ext2/ext3 file systems"
31 depends on EXT4_FS
31 depends on EXT3_FS=n || EXT2_FS=n 32 depends on EXT3_FS=n || EXT2_FS=n
32 default y 33 default y
33 help 34 help
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index 4df8621ec31c..a60ab9aad57d 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -16,7 +16,6 @@
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/swap.h> 17#include <linux/swap.h>
18#include <linux/pagemap.h> 18#include <linux/pagemap.h>
19#include <linux/version.h>
20#include <linux/blkdev.h> 19#include <linux/blkdev.h>
21#include <linux/mutex.h> 20#include <linux/mutex.h>
22#include "ext4.h" 21#include "ext4.h"
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 56f9271ee8cc..874d169a193e 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -361,14 +361,11 @@ struct ext4_new_group_data {
361 so set the magic i_delalloc_reserve_flag after taking the 361 so set the magic i_delalloc_reserve_flag after taking the
362 inode allocation semaphore for */ 362 inode allocation semaphore for */
363#define EXT4_GET_BLOCKS_DELALLOC_RESERVE 0x0004 363#define EXT4_GET_BLOCKS_DELALLOC_RESERVE 0x0004
364 /* Call ext4_da_update_reserve_space() after successfully
365 allocating the blocks */
366#define EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE 0x0008
367 /* caller is from the direct IO path, request to creation of an 364 /* caller is from the direct IO path, request to creation of an
368 unitialized extents if not allocated, split the uninitialized 365 unitialized extents if not allocated, split the uninitialized
369 extent if blocks has been preallocated already*/ 366 extent if blocks has been preallocated already*/
370#define EXT4_GET_BLOCKS_DIO 0x0010 367#define EXT4_GET_BLOCKS_DIO 0x0008
371#define EXT4_GET_BLOCKS_CONVERT 0x0020 368#define EXT4_GET_BLOCKS_CONVERT 0x0010
372#define EXT4_GET_BLOCKS_DIO_CREATE_EXT (EXT4_GET_BLOCKS_DIO|\ 369#define EXT4_GET_BLOCKS_DIO_CREATE_EXT (EXT4_GET_BLOCKS_DIO|\
373 EXT4_GET_BLOCKS_CREATE_UNINIT_EXT) 370 EXT4_GET_BLOCKS_CREATE_UNINIT_EXT)
374 /* Convert extent to initialized after direct IO complete */ 371 /* Convert extent to initialized after direct IO complete */
@@ -699,6 +696,8 @@ struct ext4_inode_info {
699 unsigned int i_reserved_meta_blocks; 696 unsigned int i_reserved_meta_blocks;
700 unsigned int i_allocated_meta_blocks; 697 unsigned int i_allocated_meta_blocks;
701 unsigned short i_delalloc_reserved_flag; 698 unsigned short i_delalloc_reserved_flag;
699 sector_t i_da_metadata_calc_last_lblock;
700 int i_da_metadata_calc_len;
702 701
703 /* on-disk additional length */ 702 /* on-disk additional length */
704 __u16 i_extra_isize; 703 __u16 i_extra_isize;
@@ -1441,6 +1440,8 @@ extern int ext4_block_truncate_page(handle_t *handle,
1441extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); 1440extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
1442extern qsize_t *ext4_get_reserved_space(struct inode *inode); 1441extern qsize_t *ext4_get_reserved_space(struct inode *inode);
1443extern int flush_aio_dio_completed_IO(struct inode *inode); 1442extern int flush_aio_dio_completed_IO(struct inode *inode);
1443extern void ext4_da_update_reserve_space(struct inode *inode,
1444 int used, int quota_claim);
1444/* ioctl.c */ 1445/* ioctl.c */
1445extern long ext4_ioctl(struct file *, unsigned int, unsigned long); 1446extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
1446extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long); 1447extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long);
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 2ca686454e87..bdb6ce7e2eb4 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -225,7 +225,8 @@ static inline void ext4_ext_mark_initialized(struct ext4_extent *ext)
225 ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext)); 225 ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext));
226} 226}
227 227
228extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks); 228extern int ext4_ext_calc_metadata_amount(struct inode *inode,
229 sector_t lblocks);
229extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex); 230extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex);
230extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); 231extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *);
231extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); 232extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 3a7928f825e4..765a4826b118 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -296,29 +296,44 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
296 * to allocate @blocks 296 * to allocate @blocks
297 * Worse case is one block per extent 297 * Worse case is one block per extent
298 */ 298 */
299int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks) 299int ext4_ext_calc_metadata_amount(struct inode *inode, sector_t lblock)
300{ 300{
301 int lcap, icap, rcap, leafs, idxs, num; 301 struct ext4_inode_info *ei = EXT4_I(inode);
302 int newextents = blocks; 302 int idxs, num = 0;
303
304 rcap = ext4_ext_space_root_idx(inode, 0);
305 lcap = ext4_ext_space_block(inode, 0);
306 icap = ext4_ext_space_block_idx(inode, 0);
307 303
308 /* number of new leaf blocks needed */ 304 idxs = ((inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
309 num = leafs = (newextents + lcap - 1) / lcap; 305 / sizeof(struct ext4_extent_idx));
310 306
311 /* 307 /*
312 * Worse case, we need separate index block(s) 308 * If the new delayed allocation block is contiguous with the
313 * to link all new leaf blocks 309 * previous da block, it can share index blocks with the
310 * previous block, so we only need to allocate a new index
311 * block every idxs leaf blocks. At ldxs**2 blocks, we need
312 * an additional index block, and at ldxs**3 blocks, yet
313 * another index blocks.
314 */ 314 */
315 idxs = (leafs + icap - 1) / icap; 315 if (ei->i_da_metadata_calc_len &&
316 do { 316 ei->i_da_metadata_calc_last_lblock+1 == lblock) {
317 num += idxs; 317 if ((ei->i_da_metadata_calc_len % idxs) == 0)
318 idxs = (idxs + icap - 1) / icap; 318 num++;
319 } while (idxs > rcap); 319 if ((ei->i_da_metadata_calc_len % (idxs*idxs)) == 0)
320 num++;
321 if ((ei->i_da_metadata_calc_len % (idxs*idxs*idxs)) == 0) {
322 num++;
323 ei->i_da_metadata_calc_len = 0;
324 } else
325 ei->i_da_metadata_calc_len++;
326 ei->i_da_metadata_calc_last_lblock++;
327 return num;
328 }
320 329
321 return num; 330 /*
331 * In the worst case we need a new set of index blocks at
332 * every level of the inode's extent tree.
333 */
334 ei->i_da_metadata_calc_len = 1;
335 ei->i_da_metadata_calc_last_lblock = lblock;
336 return ext_depth(inode) + 1;
322} 337}
323 338
324static int 339static int
@@ -3023,6 +3038,14 @@ out:
3023 return err; 3038 return err;
3024} 3039}
3025 3040
3041static void unmap_underlying_metadata_blocks(struct block_device *bdev,
3042 sector_t block, int count)
3043{
3044 int i;
3045 for (i = 0; i < count; i++)
3046 unmap_underlying_metadata(bdev, block + i);
3047}
3048
3026static int 3049static int
3027ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, 3050ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3028 ext4_lblk_t iblock, unsigned int max_blocks, 3051 ext4_lblk_t iblock, unsigned int max_blocks,
@@ -3098,6 +3121,30 @@ out:
3098 } else 3121 } else
3099 allocated = ret; 3122 allocated = ret;
3100 set_buffer_new(bh_result); 3123 set_buffer_new(bh_result);
3124 /*
3125 * if we allocated more blocks than requested
3126 * we need to make sure we unmap the extra block
3127 * allocated. The actual needed block will get
3128 * unmapped later when we find the buffer_head marked
3129 * new.
3130 */
3131 if (allocated > max_blocks) {
3132 unmap_underlying_metadata_blocks(inode->i_sb->s_bdev,
3133 newblock + max_blocks,
3134 allocated - max_blocks);
3135 allocated = max_blocks;
3136 }
3137
3138 /*
3139 * If we have done fallocate with the offset that is already
3140 * delayed allocated, we would have block reservation
3141 * and quota reservation done in the delayed write path.
3142 * But fallocate would have already updated quota and block
3143 * count for this offset. So cancel these reservation
3144 */
3145 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
3146 ext4_da_update_reserve_space(inode, allocated, 0);
3147
3101map_out: 3148map_out:
3102 set_buffer_mapped(bh_result); 3149 set_buffer_mapped(bh_result);
3103out1: 3150out1:
@@ -3190,7 +3237,13 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
3190 * this situation is possible, though, _during_ tree modification; 3237 * this situation is possible, though, _during_ tree modification;
3191 * this is why assert can't be put in ext4_ext_find_extent() 3238 * this is why assert can't be put in ext4_ext_find_extent()
3192 */ 3239 */
3193 BUG_ON(path[depth].p_ext == NULL && depth != 0); 3240 if (path[depth].p_ext == NULL && depth != 0) {
3241 ext4_error(inode->i_sb, __func__, "bad extent address "
3242 "inode: %lu, iblock: %d, depth: %d",
3243 inode->i_ino, iblock, depth);
3244 err = -EIO;
3245 goto out2;
3246 }
3194 eh = path[depth].p_hdr; 3247 eh = path[depth].p_hdr;
3195 3248
3196 ex = path[depth].p_ext; 3249 ex = path[depth].p_ext;
@@ -3327,9 +3380,18 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
3327 /* previous routine could use block we allocated */ 3380 /* previous routine could use block we allocated */
3328 newblock = ext_pblock(&newex); 3381 newblock = ext_pblock(&newex);
3329 allocated = ext4_ext_get_actual_len(&newex); 3382 allocated = ext4_ext_get_actual_len(&newex);
3383 if (allocated > max_blocks)
3384 allocated = max_blocks;
3330 set_buffer_new(bh_result); 3385 set_buffer_new(bh_result);
3331 3386
3332 /* 3387 /*
3388 * Update reserved blocks/metadata blocks after successful
3389 * block allocation which had been deferred till now.
3390 */
3391 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
3392 ext4_da_update_reserve_space(inode, allocated, 1);
3393
3394 /*
3333 * Cache the extent and update transaction to commit on fdatasync only 3395 * Cache the extent and update transaction to commit on fdatasync only
3334 * when it is _not_ an uninitialized extent. 3396 * when it is _not_ an uninitialized extent.
3335 */ 3397 */
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 0b22497d92e1..98bd140aad01 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -88,9 +88,21 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
88 return ext4_force_commit(inode->i_sb); 88 return ext4_force_commit(inode->i_sb);
89 89
90 commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid; 90 commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid;
91 if (jbd2_log_start_commit(journal, commit_tid)) 91 if (jbd2_log_start_commit(journal, commit_tid)) {
92 /*
93 * When the journal is on a different device than the
94 * fs data disk, we need to issue the barrier in
95 * writeback mode. (In ordered mode, the jbd2 layer
96 * will take care of issuing the barrier. In
97 * data=journal, all of the data blocks are written to
98 * the journal device.)
99 */
100 if (ext4_should_writeback_data(inode) &&
101 (journal->j_fs_dev != journal->j_dev) &&
102 (journal->j_flags & JBD2_BARRIER))
103 blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
92 jbd2_log_wait_commit(journal, commit_tid); 104 jbd2_log_wait_commit(journal, commit_tid);
93 else if (journal->j_flags & JBD2_BARRIER) 105 } else if (journal->j_flags & JBD2_BARRIER)
94 blkdev_issue_flush(inode->i_sb->s_bdev, NULL); 106 blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
95 return ret; 107 return ret;
96} 108}
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index ab807963a614..e11952404e02 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1009,86 +1009,114 @@ qsize_t *ext4_get_reserved_space(struct inode *inode)
1009 return &EXT4_I(inode)->i_reserved_quota; 1009 return &EXT4_I(inode)->i_reserved_quota;
1010} 1010}
1011#endif 1011#endif
1012
1012/* 1013/*
1013 * Calculate the number of metadata blocks need to reserve 1014 * Calculate the number of metadata blocks need to reserve
1014 * to allocate @blocks for non extent file based file 1015 * to allocate a new block at @lblocks for non extent file based file
1015 */ 1016 */
1016static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks) 1017static int ext4_indirect_calc_metadata_amount(struct inode *inode,
1018 sector_t lblock)
1017{ 1019{
1018 int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb); 1020 struct ext4_inode_info *ei = EXT4_I(inode);
1019 int ind_blks, dind_blks, tind_blks; 1021 int dind_mask = EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1;
1020 1022 int blk_bits;
1021 /* number of new indirect blocks needed */
1022 ind_blks = (blocks + icap - 1) / icap;
1023 1023
1024 dind_blks = (ind_blks + icap - 1) / icap; 1024 if (lblock < EXT4_NDIR_BLOCKS)
1025 return 0;
1025 1026
1026 tind_blks = 1; 1027 lblock -= EXT4_NDIR_BLOCKS;
1027 1028
1028 return ind_blks + dind_blks + tind_blks; 1029 if (ei->i_da_metadata_calc_len &&
1030 (lblock & dind_mask) == ei->i_da_metadata_calc_last_lblock) {
1031 ei->i_da_metadata_calc_len++;
1032 return 0;
1033 }
1034 ei->i_da_metadata_calc_last_lblock = lblock & dind_mask;
1035 ei->i_da_metadata_calc_len = 1;
1036 blk_bits = roundup_pow_of_two(lblock + 1);
1037 return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1;
1029} 1038}
1030 1039
1031/* 1040/*
1032 * Calculate the number of metadata blocks need to reserve 1041 * Calculate the number of metadata blocks need to reserve
1033 * to allocate given number of blocks 1042 * to allocate a block located at @lblock
1034 */ 1043 */
1035static int ext4_calc_metadata_amount(struct inode *inode, int blocks) 1044static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock)
1036{ 1045{
1037 if (!blocks)
1038 return 0;
1039
1040 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) 1046 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
1041 return ext4_ext_calc_metadata_amount(inode, blocks); 1047 return ext4_ext_calc_metadata_amount(inode, lblock);
1042 1048
1043 return ext4_indirect_calc_metadata_amount(inode, blocks); 1049 return ext4_indirect_calc_metadata_amount(inode, lblock);
1044} 1050}
1045 1051
1046static void ext4_da_update_reserve_space(struct inode *inode, int used) 1052/*
1053 * Called with i_data_sem down, which is important since we can call
1054 * ext4_discard_preallocations() from here.
1055 */
1056void ext4_da_update_reserve_space(struct inode *inode,
1057 int used, int quota_claim)
1047{ 1058{
1048 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1059 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1049 int total, mdb, mdb_free, mdb_claim = 0; 1060 struct ext4_inode_info *ei = EXT4_I(inode);
1050 1061 int mdb_free = 0, allocated_meta_blocks = 0;
1051 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1062
1052 /* recalculate the number of metablocks still need to be reserved */ 1063 spin_lock(&ei->i_block_reservation_lock);
1053 total = EXT4_I(inode)->i_reserved_data_blocks - used; 1064 if (unlikely(used > ei->i_reserved_data_blocks)) {
1054 mdb = ext4_calc_metadata_amount(inode, total); 1065 ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d "
1055 1066 "with only %d reserved data blocks\n",
1056 /* figure out how many metablocks to release */ 1067 __func__, inode->i_ino, used,
1057 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); 1068 ei->i_reserved_data_blocks);
1058 mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; 1069 WARN_ON(1);
1059 1070 used = ei->i_reserved_data_blocks;
1060 if (mdb_free) { 1071 }
1061 /* Account for allocated meta_blocks */ 1072
1062 mdb_claim = EXT4_I(inode)->i_allocated_meta_blocks; 1073 /* Update per-inode reservations */
1063 BUG_ON(mdb_free < mdb_claim); 1074 ei->i_reserved_data_blocks -= used;
1064 mdb_free -= mdb_claim; 1075 used += ei->i_allocated_meta_blocks;
1065 1076 ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks;
1066 /* update fs dirty blocks counter */ 1077 allocated_meta_blocks = ei->i_allocated_meta_blocks;
1078 ei->i_allocated_meta_blocks = 0;
1079 percpu_counter_sub(&sbi->s_dirtyblocks_counter, used);
1080
1081 if (ei->i_reserved_data_blocks == 0) {
1082 /*
1083 * We can release all of the reserved metadata blocks
1084 * only when we have written all of the delayed
1085 * allocation blocks.
1086 */
1087 mdb_free = ei->i_reserved_meta_blocks;
1088 ei->i_reserved_meta_blocks = 0;
1089 ei->i_da_metadata_calc_len = 0;
1067 percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free); 1090 percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free);
1068 EXT4_I(inode)->i_allocated_meta_blocks = 0;
1069 EXT4_I(inode)->i_reserved_meta_blocks = mdb;
1070 } 1091 }
1071
1072 /* update per-inode reservations */
1073 BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks);
1074 EXT4_I(inode)->i_reserved_data_blocks -= used;
1075 percpu_counter_sub(&sbi->s_dirtyblocks_counter, used + mdb_claim);
1076 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1092 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1077 1093
1078 vfs_dq_claim_block(inode, used + mdb_claim); 1094 /* Update quota subsystem */
1079 1095 if (quota_claim) {
1080 /* 1096 vfs_dq_claim_block(inode, used);
1081 * free those over-booking quota for metadata blocks 1097 if (mdb_free)
1082 */ 1098 vfs_dq_release_reservation_block(inode, mdb_free);
1083 if (mdb_free) 1099 } else {
1084 vfs_dq_release_reservation_block(inode, mdb_free); 1100 /*
1101 * We did fallocate with an offset that is already delayed
1102 * allocated. So on delayed allocated writeback we should
1103 * not update the quota for allocated blocks. But then
1104 * converting an fallocate region to initialized region would
1105 * have caused a metadata allocation. So claim quota for
1106 * that
1107 */
1108 if (allocated_meta_blocks)
1109 vfs_dq_claim_block(inode, allocated_meta_blocks);
1110 vfs_dq_release_reservation_block(inode, mdb_free + used);
1111 }
1085 1112
1086 /* 1113 /*
1087 * If we have done all the pending block allocations and if 1114 * If we have done all the pending block allocations and if
1088 * there aren't any writers on the inode, we can discard the 1115 * there aren't any writers on the inode, we can discard the
1089 * inode's preallocations. 1116 * inode's preallocations.
1090 */ 1117 */
1091 if (!total && (atomic_read(&inode->i_writecount) == 0)) 1118 if ((ei->i_reserved_data_blocks == 0) &&
1119 (atomic_read(&inode->i_writecount) == 0))
1092 ext4_discard_preallocations(inode); 1120 ext4_discard_preallocations(inode);
1093} 1121}
1094 1122
@@ -1280,18 +1308,20 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
1280 */ 1308 */
1281 EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE; 1309 EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE;
1282 } 1310 }
1283 }
1284 1311
1312 /*
1313 * Update reserved blocks/metadata blocks after successful
1314 * block allocation which had been deferred till now. We don't
1315 * support fallocate for non extent files. So we can update
1316 * reserve space here.
1317 */
1318 if ((retval > 0) &&
1319 (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE))
1320 ext4_da_update_reserve_space(inode, retval, 1);
1321 }
1285 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) 1322 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
1286 EXT4_I(inode)->i_delalloc_reserved_flag = 0; 1323 EXT4_I(inode)->i_delalloc_reserved_flag = 0;
1287 1324
1288 /*
1289 * Update reserved blocks/metadata blocks after successful
1290 * block allocation which had been deferred till now.
1291 */
1292 if ((retval > 0) && (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE))
1293 ext4_da_update_reserve_space(inode, retval);
1294
1295 up_write((&EXT4_I(inode)->i_data_sem)); 1325 up_write((&EXT4_I(inode)->i_data_sem));
1296 if (retval > 0 && buffer_mapped(bh)) { 1326 if (retval > 0 && buffer_mapped(bh)) {
1297 int ret = check_block_validity(inode, "file system " 1327 int ret = check_block_validity(inode, "file system "
@@ -1797,11 +1827,15 @@ static int ext4_journalled_write_end(struct file *file,
1797 return ret ? ret : copied; 1827 return ret ? ret : copied;
1798} 1828}
1799 1829
1800static int ext4_da_reserve_space(struct inode *inode, int nrblocks) 1830/*
1831 * Reserve a single block located at lblock
1832 */
1833static int ext4_da_reserve_space(struct inode *inode, sector_t lblock)
1801{ 1834{
1802 int retries = 0; 1835 int retries = 0;
1803 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1836 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1804 unsigned long md_needed, mdblocks, total = 0; 1837 struct ext4_inode_info *ei = EXT4_I(inode);
1838 unsigned long md_needed, md_reserved;
1805 1839
1806 /* 1840 /*
1807 * recalculate the amount of metadata blocks to reserve 1841 * recalculate the amount of metadata blocks to reserve
@@ -1809,35 +1843,31 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
1809 * worse case is one extent per block 1843 * worse case is one extent per block
1810 */ 1844 */
1811repeat: 1845repeat:
1812 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1846 spin_lock(&ei->i_block_reservation_lock);
1813 total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks; 1847 md_reserved = ei->i_reserved_meta_blocks;
1814 mdblocks = ext4_calc_metadata_amount(inode, total); 1848 md_needed = ext4_calc_metadata_amount(inode, lblock);
1815 BUG_ON(mdblocks < EXT4_I(inode)->i_reserved_meta_blocks); 1849 spin_unlock(&ei->i_block_reservation_lock);
1816
1817 md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
1818 total = md_needed + nrblocks;
1819 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1820 1850
1821 /* 1851 /*
1822 * Make quota reservation here to prevent quota overflow 1852 * Make quota reservation here to prevent quota overflow
1823 * later. Real quota accounting is done at pages writeout 1853 * later. Real quota accounting is done at pages writeout
1824 * time. 1854 * time.
1825 */ 1855 */
1826 if (vfs_dq_reserve_block(inode, total)) 1856 if (vfs_dq_reserve_block(inode, md_needed + 1))
1827 return -EDQUOT; 1857 return -EDQUOT;
1828 1858
1829 if (ext4_claim_free_blocks(sbi, total)) { 1859 if (ext4_claim_free_blocks(sbi, md_needed + 1)) {
1830 vfs_dq_release_reservation_block(inode, total); 1860 vfs_dq_release_reservation_block(inode, md_needed + 1);
1831 if (ext4_should_retry_alloc(inode->i_sb, &retries)) { 1861 if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
1832 yield(); 1862 yield();
1833 goto repeat; 1863 goto repeat;
1834 } 1864 }
1835 return -ENOSPC; 1865 return -ENOSPC;
1836 } 1866 }
1837 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1867 spin_lock(&ei->i_block_reservation_lock);
1838 EXT4_I(inode)->i_reserved_data_blocks += nrblocks; 1868 ei->i_reserved_data_blocks++;
1839 EXT4_I(inode)->i_reserved_meta_blocks += md_needed; 1869 ei->i_reserved_meta_blocks += md_needed;
1840 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1870 spin_unlock(&ei->i_block_reservation_lock);
1841 1871
1842 return 0; /* success */ 1872 return 0; /* success */
1843} 1873}
@@ -1845,49 +1875,46 @@ repeat:
1845static void ext4_da_release_space(struct inode *inode, int to_free) 1875static void ext4_da_release_space(struct inode *inode, int to_free)
1846{ 1876{
1847 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1877 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1848 int total, mdb, mdb_free, release; 1878 struct ext4_inode_info *ei = EXT4_I(inode);
1849 1879
1850 if (!to_free) 1880 if (!to_free)
1851 return; /* Nothing to release, exit */ 1881 return; /* Nothing to release, exit */
1852 1882
1853 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1883 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
1854 1884
1855 if (!EXT4_I(inode)->i_reserved_data_blocks) { 1885 if (unlikely(to_free > ei->i_reserved_data_blocks)) {
1856 /* 1886 /*
1857 * if there is no reserved blocks, but we try to free some 1887 * if there aren't enough reserved blocks, then the
1858 * then the counter is messed up somewhere. 1888 * counter is messed up somewhere. Since this
1859 * but since this function is called from invalidate 1889 * function is called from invalidate page, it's
1860 * page, it's harmless to return without any action 1890 * harmless to return without any action.
1861 */ 1891 */
1862 printk(KERN_INFO "ext4 delalloc try to release %d reserved " 1892 ext4_msg(inode->i_sb, KERN_NOTICE, "ext4_da_release_space: "
1863 "blocks for inode %lu, but there is no reserved " 1893 "ino %lu, to_free %d with only %d reserved "
1864 "data blocks\n", to_free, inode->i_ino); 1894 "data blocks\n", inode->i_ino, to_free,
1865 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1895 ei->i_reserved_data_blocks);
1866 return; 1896 WARN_ON(1);
1897 to_free = ei->i_reserved_data_blocks;
1867 } 1898 }
1899 ei->i_reserved_data_blocks -= to_free;
1868 1900
1869 /* recalculate the number of metablocks still need to be reserved */ 1901 if (ei->i_reserved_data_blocks == 0) {
1870 total = EXT4_I(inode)->i_reserved_data_blocks - to_free; 1902 /*
1871 mdb = ext4_calc_metadata_amount(inode, total); 1903 * We can release all of the reserved metadata blocks
1872 1904 * only when we have written all of the delayed
1873 /* figure out how many metablocks to release */ 1905 * allocation blocks.
1874 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); 1906 */
1875 mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; 1907 to_free += ei->i_reserved_meta_blocks;
1876 1908 ei->i_reserved_meta_blocks = 0;
1877 release = to_free + mdb_free; 1909 ei->i_da_metadata_calc_len = 0;
1878 1910 }
1879 /* update fs dirty blocks counter for truncate case */
1880 percpu_counter_sub(&sbi->s_dirtyblocks_counter, release);
1881 1911
1882 /* update per-inode reservations */ 1912 /* update fs dirty blocks counter */
1883 BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks); 1913 percpu_counter_sub(&sbi->s_dirtyblocks_counter, to_free);
1884 EXT4_I(inode)->i_reserved_data_blocks -= to_free;
1885 1914
1886 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
1887 EXT4_I(inode)->i_reserved_meta_blocks = mdb;
1888 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1915 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1889 1916
1890 vfs_dq_release_reservation_block(inode, release); 1917 vfs_dq_release_reservation_block(inode, to_free);
1891} 1918}
1892 1919
1893static void ext4_da_page_release_reservation(struct page *page, 1920static void ext4_da_page_release_reservation(struct page *page,
@@ -2192,10 +2219,10 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
2192 * variables are updated after the blocks have been allocated. 2219 * variables are updated after the blocks have been allocated.
2193 */ 2220 */
2194 new.b_state = 0; 2221 new.b_state = 0;
2195 get_blocks_flags = (EXT4_GET_BLOCKS_CREATE | 2222 get_blocks_flags = EXT4_GET_BLOCKS_CREATE;
2196 EXT4_GET_BLOCKS_DELALLOC_RESERVE);
2197 if (mpd->b_state & (1 << BH_Delay)) 2223 if (mpd->b_state & (1 << BH_Delay))
2198 get_blocks_flags |= EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE; 2224 get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE;
2225
2199 blks = ext4_get_blocks(handle, mpd->inode, next, max_blocks, 2226 blks = ext4_get_blocks(handle, mpd->inode, next, max_blocks,
2200 &new, get_blocks_flags); 2227 &new, get_blocks_flags);
2201 if (blks < 0) { 2228 if (blks < 0) {
@@ -2493,7 +2520,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
2493 * XXX: __block_prepare_write() unmaps passed block, 2520 * XXX: __block_prepare_write() unmaps passed block,
2494 * is it OK? 2521 * is it OK?
2495 */ 2522 */
2496 ret = ext4_da_reserve_space(inode, 1); 2523 ret = ext4_da_reserve_space(inode, iblock);
2497 if (ret) 2524 if (ret)
2498 /* not enough space to reserve */ 2525 /* not enough space to reserve */
2499 return ret; 2526 return ret;
@@ -2967,8 +2994,7 @@ retry:
2967out_writepages: 2994out_writepages:
2968 if (!no_nrwrite_index_update) 2995 if (!no_nrwrite_index_update)
2969 wbc->no_nrwrite_index_update = 0; 2996 wbc->no_nrwrite_index_update = 0;
2970 if (wbc->nr_to_write > nr_to_writebump) 2997 wbc->nr_to_write -= nr_to_writebump;
2971 wbc->nr_to_write -= nr_to_writebump;
2972 wbc->range_start = range_start; 2998 wbc->range_start = range_start;
2973 trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); 2999 trace_ext4_da_writepages_result(inode, wbc, ret, pages_written);
2974 return ret; 3000 return ret;
@@ -2993,11 +3019,18 @@ static int ext4_nonda_switch(struct super_block *sb)
2993 if (2 * free_blocks < 3 * dirty_blocks || 3019 if (2 * free_blocks < 3 * dirty_blocks ||
2994 free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) { 3020 free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) {
2995 /* 3021 /*
2996 * free block count is less that 150% of dirty blocks 3022 * free block count is less than 150% of dirty blocks
2997 * or free blocks is less that watermark 3023 * or free blocks is less than watermark
2998 */ 3024 */
2999 return 1; 3025 return 1;
3000 } 3026 }
3027 /*
3028 * Even if we don't switch but are nearing capacity,
3029 * start pushing delalloc when 1/2 of free blocks are dirty.
3030 */
3031 if (free_blocks < 2 * dirty_blocks)
3032 writeback_inodes_sb_if_idle(sb);
3033
3001 return 0; 3034 return 0;
3002} 3035}
3003 3036
@@ -3005,7 +3038,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
3005 loff_t pos, unsigned len, unsigned flags, 3038 loff_t pos, unsigned len, unsigned flags,
3006 struct page **pagep, void **fsdata) 3039 struct page **pagep, void **fsdata)
3007{ 3040{
3008 int ret, retries = 0; 3041 int ret, retries = 0, quota_retries = 0;
3009 struct page *page; 3042 struct page *page;
3010 pgoff_t index; 3043 pgoff_t index;
3011 unsigned from, to; 3044 unsigned from, to;
@@ -3064,6 +3097,22 @@ retry:
3064 3097
3065 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 3098 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
3066 goto retry; 3099 goto retry;
3100
3101 if ((ret == -EDQUOT) &&
3102 EXT4_I(inode)->i_reserved_meta_blocks &&
3103 (quota_retries++ < 3)) {
3104 /*
3105 * Since we often over-estimate the number of meta
3106 * data blocks required, we may sometimes get a
3107 * spurios out of quota error even though there would
3108 * be enough space once we write the data blocks and
3109 * find out how many meta data blocks were _really_
3110 * required. So try forcing the inode write to see if
3111 * that helps.
3112 */
3113 write_inode_now(inode, (quota_retries == 3));
3114 goto retry;
3115 }
3067out: 3116out:
3068 return ret; 3117 return ret;
3069} 3118}
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 0ca811061bc7..436521cae456 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -17,7 +17,6 @@
17#include <linux/proc_fs.h> 17#include <linux/proc_fs.h>
18#include <linux/pagemap.h> 18#include <linux/pagemap.h>
19#include <linux/seq_file.h> 19#include <linux/seq_file.h>
20#include <linux/version.h>
21#include <linux/blkdev.h> 20#include <linux/blkdev.h>
22#include <linux/mutex.h> 21#include <linux/mutex.h>
23#include "ext4_jbd2.h" 22#include "ext4_jbd2.h"
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 6ed9aa91f27d..735c20d5fd56 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -702,6 +702,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
702 ei->i_reserved_data_blocks = 0; 702 ei->i_reserved_data_blocks = 0;
703 ei->i_reserved_meta_blocks = 0; 703 ei->i_reserved_meta_blocks = 0;
704 ei->i_allocated_meta_blocks = 0; 704 ei->i_allocated_meta_blocks = 0;
705 ei->i_da_metadata_calc_len = 0;
705 ei->i_delalloc_reserved_flag = 0; 706 ei->i_delalloc_reserved_flag = 0;
706 spin_lock_init(&(ei->i_block_reservation_lock)); 707 spin_lock_init(&(ei->i_block_reservation_lock));
707#ifdef CONFIG_QUOTA 708#ifdef CONFIG_QUOTA
@@ -2174,9 +2175,9 @@ static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a,
2174 struct super_block *sb = sbi->s_buddy_cache->i_sb; 2175 struct super_block *sb = sbi->s_buddy_cache->i_sb;
2175 2176
2176 return snprintf(buf, PAGE_SIZE, "%llu\n", 2177 return snprintf(buf, PAGE_SIZE, "%llu\n",
2177 sbi->s_kbytes_written + 2178 (unsigned long long)(sbi->s_kbytes_written +
2178 ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - 2179 ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
2179 EXT4_SB(sb)->s_sectors_written_start) >> 1)); 2180 EXT4_SB(sb)->s_sectors_written_start) >> 1)));
2180} 2181}
2181 2182
2182static ssize_t inode_readahead_blks_store(struct ext4_attr *a, 2183static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
@@ -4005,6 +4006,7 @@ static inline void unregister_as_ext2(void)
4005{ 4006{
4006 unregister_filesystem(&ext2_fs_type); 4007 unregister_filesystem(&ext2_fs_type);
4007} 4008}
4009MODULE_ALIAS("ext2");
4008#else 4010#else
4009static inline void register_as_ext2(void) { } 4011static inline void register_as_ext2(void) { }
4010static inline void unregister_as_ext2(void) { } 4012static inline void unregister_as_ext2(void) { }
@@ -4031,6 +4033,7 @@ static inline void unregister_as_ext3(void)
4031{ 4033{
4032 unregister_filesystem(&ext3_fs_type); 4034 unregister_filesystem(&ext3_fs_type);
4033} 4035}
4036MODULE_ALIAS("ext3");
4034#else 4037#else
4035static inline void register_as_ext3(void) { } 4038static inline void register_as_ext3(void) { }
4036static inline void unregister_as_ext3(void) { } 4039static inline void unregister_as_ext3(void) { }
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 83218bebbc7c..f3a2f7ed45aa 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1332,6 +1332,8 @@ retry:
1332 goto cleanup; 1332 goto cleanup;
1333 kfree(b_entry_name); 1333 kfree(b_entry_name);
1334 kfree(buffer); 1334 kfree(buffer);
1335 b_entry_name = NULL;
1336 buffer = NULL;
1335 brelse(is->iloc.bh); 1337 brelse(is->iloc.bh);
1336 kfree(is); 1338 kfree(is);
1337 kfree(bs); 1339 kfree(bs);
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 2cf93ec40a67..5ef953e6f908 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -199,7 +199,9 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
199static void f_modown(struct file *filp, struct pid *pid, enum pid_type type, 199static void f_modown(struct file *filp, struct pid *pid, enum pid_type type,
200 int force) 200 int force)
201{ 201{
202 write_lock_irq(&filp->f_owner.lock); 202 unsigned long flags;
203
204 write_lock_irqsave(&filp->f_owner.lock, flags);
203 if (force || !filp->f_owner.pid) { 205 if (force || !filp->f_owner.pid) {
204 put_pid(filp->f_owner.pid); 206 put_pid(filp->f_owner.pid);
205 filp->f_owner.pid = get_pid(pid); 207 filp->f_owner.pid = get_pid(pid);
@@ -211,7 +213,7 @@ static void f_modown(struct file *filp, struct pid *pid, enum pid_type type,
211 filp->f_owner.euid = cred->euid; 213 filp->f_owner.euid = cred->euid;
212 } 214 }
213 } 215 }
214 write_unlock_irq(&filp->f_owner.lock); 216 write_unlock_irqrestore(&filp->f_owner.lock, flags);
215} 217}
216 218
217int __f_setown(struct file *filp, struct pid *pid, enum pid_type type, 219int __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
@@ -618,60 +620,90 @@ static DEFINE_RWLOCK(fasync_lock);
618static struct kmem_cache *fasync_cache __read_mostly; 620static struct kmem_cache *fasync_cache __read_mostly;
619 621
620/* 622/*
621 * fasync_helper() is used by almost all character device drivers 623 * Remove a fasync entry. If successfully removed, return
622 * to set up the fasync queue. It returns negative on error, 0 if it did 624 * positive and clear the FASYNC flag. If no entry exists,
623 * no changes and positive if it added/deleted the entry. 625 * do nothing and return 0.
626 *
627 * NOTE! It is very important that the FASYNC flag always
628 * match the state "is the filp on a fasync list".
629 *
630 * We always take the 'filp->f_lock', in since fasync_lock
631 * needs to be irq-safe.
624 */ 632 */
625int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp) 633static int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
626{ 634{
627 struct fasync_struct *fa, **fp; 635 struct fasync_struct *fa, **fp;
628 struct fasync_struct *new = NULL;
629 int result = 0; 636 int result = 0;
630 637
631 if (on) { 638 spin_lock(&filp->f_lock);
632 new = kmem_cache_alloc(fasync_cache, GFP_KERNEL); 639 write_lock_irq(&fasync_lock);
633 if (!new) 640 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
634 return -ENOMEM; 641 if (fa->fa_file != filp)
642 continue;
643 *fp = fa->fa_next;
644 kmem_cache_free(fasync_cache, fa);
645 filp->f_flags &= ~FASYNC;
646 result = 1;
647 break;
635 } 648 }
649 write_unlock_irq(&fasync_lock);
650 spin_unlock(&filp->f_lock);
651 return result;
652}
653
654/*
655 * Add a fasync entry. Return negative on error, positive if
656 * added, and zero if did nothing but change an existing one.
657 *
658 * NOTE! It is very important that the FASYNC flag always
659 * match the state "is the filp on a fasync list".
660 */
661static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fapp)
662{
663 struct fasync_struct *new, *fa, **fp;
664 int result = 0;
665
666 new = kmem_cache_alloc(fasync_cache, GFP_KERNEL);
667 if (!new)
668 return -ENOMEM;
636 669
637 /*
638 * We need to take f_lock first since it's not an IRQ-safe
639 * lock.
640 */
641 spin_lock(&filp->f_lock); 670 spin_lock(&filp->f_lock);
642 write_lock_irq(&fasync_lock); 671 write_lock_irq(&fasync_lock);
643 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { 672 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
644 if (fa->fa_file == filp) { 673 if (fa->fa_file != filp)
645 if(on) { 674 continue;
646 fa->fa_fd = fd; 675 fa->fa_fd = fd;
647 kmem_cache_free(fasync_cache, new); 676 kmem_cache_free(fasync_cache, new);
648 } else { 677 goto out;
649 *fp = fa->fa_next;
650 kmem_cache_free(fasync_cache, fa);
651 result = 1;
652 }
653 goto out;
654 }
655 } 678 }
656 679
657 if (on) { 680 new->magic = FASYNC_MAGIC;
658 new->magic = FASYNC_MAGIC; 681 new->fa_file = filp;
659 new->fa_file = filp; 682 new->fa_fd = fd;
660 new->fa_fd = fd; 683 new->fa_next = *fapp;
661 new->fa_next = *fapp; 684 *fapp = new;
662 *fapp = new; 685 result = 1;
663 result = 1; 686 filp->f_flags |= FASYNC;
664 } 687
665out: 688out:
666 if (on)
667 filp->f_flags |= FASYNC;
668 else
669 filp->f_flags &= ~FASYNC;
670 write_unlock_irq(&fasync_lock); 689 write_unlock_irq(&fasync_lock);
671 spin_unlock(&filp->f_lock); 690 spin_unlock(&filp->f_lock);
672 return result; 691 return result;
673} 692}
674 693
694/*
695 * fasync_helper() is used by almost all character device drivers
696 * to set up the fasync queue, and for regular files by the file
697 * lease code. It returns negative on error, 0 if it did no changes
698 * and positive if it added/deleted the entry.
699 */
700int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp)
701{
702 if (!on)
703 return fasync_remove_entry(filp, fapp);
704 return fasync_add_entry(fd, filp, fapp);
705}
706
675EXPORT_SYMBOL(fasync_helper); 707EXPORT_SYMBOL(fasync_helper);
676 708
677void __kill_fasync(struct fasync_struct *fa, int sig, int band) 709void __kill_fasync(struct fasync_struct *fa, int sig, int band)
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 49bc1b8e8f19..1a7c42c64ff4 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -242,6 +242,7 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi,
242/** 242/**
243 * bdi_start_writeback - start writeback 243 * bdi_start_writeback - start writeback
244 * @bdi: the backing device to write from 244 * @bdi: the backing device to write from
245 * @sb: write inodes from this super_block
245 * @nr_pages: the number of pages to write 246 * @nr_pages: the number of pages to write
246 * 247 *
247 * Description: 248 * Description:
@@ -1187,6 +1188,23 @@ void writeback_inodes_sb(struct super_block *sb)
1187EXPORT_SYMBOL(writeback_inodes_sb); 1188EXPORT_SYMBOL(writeback_inodes_sb);
1188 1189
1189/** 1190/**
1191 * writeback_inodes_sb_if_idle - start writeback if none underway
1192 * @sb: the superblock
1193 *
1194 * Invoke writeback_inodes_sb if no writeback is currently underway.
1195 * Returns 1 if writeback was started, 0 if not.
1196 */
1197int writeback_inodes_sb_if_idle(struct super_block *sb)
1198{
1199 if (!writeback_in_progress(sb->s_bdi)) {
1200 writeback_inodes_sb(sb);
1201 return 1;
1202 } else
1203 return 0;
1204}
1205EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
1206
1207/**
1190 * sync_inodes_sb - sync sb inode pages 1208 * sync_inodes_sb - sync sb inode pages
1191 * @sb: the superblock 1209 * @sb: the superblock
1192 * 1210 *
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index c18913a777ae..a9f5e137f1d3 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -828,6 +828,9 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
828 if (!page) 828 if (!page)
829 break; 829 break;
830 830
831 if (mapping_writably_mapped(mapping))
832 flush_dcache_page(page);
833
831 pagefault_disable(); 834 pagefault_disable();
832 tmp = iov_iter_copy_from_user_atomic(page, ii, offset, bytes); 835 tmp = iov_iter_copy_from_user_atomic(page, ii, offset, bytes);
833 pagefault_enable(); 836 pagefault_enable();
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 4eb308aa3234..a6abbae8a278 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -569,6 +569,40 @@ static int gfs2_fsync(struct file *file, struct dentry *dentry, int datasync)
569 return ret; 569 return ret;
570} 570}
571 571
572/**
573 * gfs2_file_aio_write - Perform a write to a file
574 * @iocb: The io context
575 * @iov: The data to write
576 * @nr_segs: Number of @iov segments
577 * @pos: The file position
578 *
579 * We have to do a lock/unlock here to refresh the inode size for
580 * O_APPEND writes, otherwise we can land up writing at the wrong
581 * offset. There is still a race, but provided the app is using its
582 * own file locking, this will make O_APPEND work as expected.
583 *
584 */
585
586static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
587 unsigned long nr_segs, loff_t pos)
588{
589 struct file *file = iocb->ki_filp;
590
591 if (file->f_flags & O_APPEND) {
592 struct dentry *dentry = file->f_dentry;
593 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
594 struct gfs2_holder gh;
595 int ret;
596
597 ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
598 if (ret)
599 return ret;
600 gfs2_glock_dq_uninit(&gh);
601 }
602
603 return generic_file_aio_write(iocb, iov, nr_segs, pos);
604}
605
572#ifdef CONFIG_GFS2_FS_LOCKING_DLM 606#ifdef CONFIG_GFS2_FS_LOCKING_DLM
573 607
574/** 608/**
@@ -711,7 +745,7 @@ const struct file_operations gfs2_file_fops = {
711 .read = do_sync_read, 745 .read = do_sync_read,
712 .aio_read = generic_file_aio_read, 746 .aio_read = generic_file_aio_read,
713 .write = do_sync_write, 747 .write = do_sync_write,
714 .aio_write = generic_file_aio_write, 748 .aio_write = gfs2_file_aio_write,
715 .unlocked_ioctl = gfs2_ioctl, 749 .unlocked_ioctl = gfs2_ioctl,
716 .mmap = gfs2_mmap, 750 .mmap = gfs2_mmap,
717 .open = gfs2_open, 751 .open = gfs2_open,
@@ -741,7 +775,7 @@ const struct file_operations gfs2_file_fops_nolock = {
741 .read = do_sync_read, 775 .read = do_sync_read,
742 .aio_read = generic_file_aio_read, 776 .aio_read = generic_file_aio_read,
743 .write = do_sync_write, 777 .write = do_sync_write,
744 .aio_write = generic_file_aio_write, 778 .aio_write = gfs2_file_aio_write,
745 .unlocked_ioctl = gfs2_ioctl, 779 .unlocked_ioctl = gfs2_ioctl,
746 .mmap = gfs2_mmap, 780 .mmap = gfs2_mmap,
747 .open = gfs2_open, 781 .open = gfs2_open,
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index f455a03a09e2..f42663325931 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -769,6 +769,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
769 if (!gl) 769 if (!gl)
770 return -ENOMEM; 770 return -ENOMEM;
771 771
772 atomic_inc(&sdp->sd_glock_disposal);
772 gl->gl_flags = 0; 773 gl->gl_flags = 0;
773 gl->gl_name = name; 774 gl->gl_name = name;
774 atomic_set(&gl->gl_ref, 1); 775 atomic_set(&gl->gl_ref, 1);
@@ -1538,6 +1539,9 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp)
1538 up_write(&gfs2_umount_flush_sem); 1539 up_write(&gfs2_umount_flush_sem);
1539 msleep(10); 1540 msleep(10);
1540 } 1541 }
1542 flush_workqueue(glock_workqueue);
1543 wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0);
1544 gfs2_dump_lockstate(sdp);
1541} 1545}
1542 1546
1543void gfs2_glock_finish_truncate(struct gfs2_inode *ip) 1547void gfs2_glock_finish_truncate(struct gfs2_inode *ip)
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 13f0bd228132..c0262faf4725 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -123,7 +123,7 @@ struct lm_lockops {
123 int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname); 123 int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname);
124 void (*lm_unmount) (struct gfs2_sbd *sdp); 124 void (*lm_unmount) (struct gfs2_sbd *sdp);
125 void (*lm_withdraw) (struct gfs2_sbd *sdp); 125 void (*lm_withdraw) (struct gfs2_sbd *sdp);
126 void (*lm_put_lock) (struct kmem_cache *cachep, void *gl); 126 void (*lm_put_lock) (struct kmem_cache *cachep, struct gfs2_glock *gl);
127 unsigned int (*lm_lock) (struct gfs2_glock *gl, 127 unsigned int (*lm_lock) (struct gfs2_glock *gl,
128 unsigned int req_state, unsigned int flags); 128 unsigned int req_state, unsigned int flags);
129 void (*lm_cancel) (struct gfs2_glock *gl); 129 void (*lm_cancel) (struct gfs2_glock *gl);
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 4792200978c8..bc0ad158e6b4 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -544,6 +544,8 @@ struct gfs2_sbd {
544 struct gfs2_holder sd_live_gh; 544 struct gfs2_holder sd_live_gh;
545 struct gfs2_glock *sd_rename_gl; 545 struct gfs2_glock *sd_rename_gl;
546 struct gfs2_glock *sd_trans_gl; 546 struct gfs2_glock *sd_trans_gl;
547 wait_queue_head_t sd_glock_wait;
548 atomic_t sd_glock_disposal;
547 549
548 /* Inode Stuff */ 550 /* Inode Stuff */
549 551
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 46df988323bc..0e5e0e7022e5 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -21,6 +21,7 @@ static void gdlm_ast(void *arg)
21{ 21{
22 struct gfs2_glock *gl = arg; 22 struct gfs2_glock *gl = arg;
23 unsigned ret = gl->gl_state; 23 unsigned ret = gl->gl_state;
24 struct gfs2_sbd *sdp = gl->gl_sbd;
24 25
25 BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED); 26 BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED);
26 27
@@ -30,6 +31,8 @@ static void gdlm_ast(void *arg)
30 switch (gl->gl_lksb.sb_status) { 31 switch (gl->gl_lksb.sb_status) {
31 case -DLM_EUNLOCK: /* Unlocked, so glock can be freed */ 32 case -DLM_EUNLOCK: /* Unlocked, so glock can be freed */
32 kmem_cache_free(gfs2_glock_cachep, gl); 33 kmem_cache_free(gfs2_glock_cachep, gl);
34 if (atomic_dec_and_test(&sdp->sd_glock_disposal))
35 wake_up(&sdp->sd_glock_wait);
33 return; 36 return;
34 case -DLM_ECANCEL: /* Cancel while getting lock */ 37 case -DLM_ECANCEL: /* Cancel while getting lock */
35 ret |= LM_OUT_CANCELED; 38 ret |= LM_OUT_CANCELED;
@@ -164,14 +167,16 @@ static unsigned int gdlm_lock(struct gfs2_glock *gl,
164 return LM_OUT_ASYNC; 167 return LM_OUT_ASYNC;
165} 168}
166 169
167static void gdlm_put_lock(struct kmem_cache *cachep, void *ptr) 170static void gdlm_put_lock(struct kmem_cache *cachep, struct gfs2_glock *gl)
168{ 171{
169 struct gfs2_glock *gl = ptr; 172 struct gfs2_sbd *sdp = gl->gl_sbd;
170 struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct; 173 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
171 int error; 174 int error;
172 175
173 if (gl->gl_lksb.sb_lkid == 0) { 176 if (gl->gl_lksb.sb_lkid == 0) {
174 kmem_cache_free(cachep, gl); 177 kmem_cache_free(cachep, gl);
178 if (atomic_dec_and_test(&sdp->sd_glock_disposal))
179 wake_up(&sdp->sd_glock_wait);
175 return; 180 return;
176 } 181 }
177 182
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index cb8d7a93d5ec..6f68a5f18eb8 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -121,7 +121,7 @@ struct inode *gfs2_aspace_get(struct gfs2_sbd *sdp)
121 if (aspace) { 121 if (aspace) {
122 mapping_set_gfp_mask(aspace->i_mapping, GFP_NOFS); 122 mapping_set_gfp_mask(aspace->i_mapping, GFP_NOFS);
123 aspace->i_mapping->a_ops = &aspace_aops; 123 aspace->i_mapping->a_ops = &aspace_aops;
124 aspace->i_size = ~0ULL; 124 aspace->i_size = MAX_LFS_FILESIZE;
125 ip = GFS2_I(aspace); 125 ip = GFS2_I(aspace);
126 clear_bit(GIF_USER, &ip->i_flags); 126 clear_bit(GIF_USER, &ip->i_flags);
127 insert_inode_hash(aspace); 127 insert_inode_hash(aspace);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index edfee24f3636..8a102f731003 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -82,6 +82,8 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
82 82
83 gfs2_tune_init(&sdp->sd_tune); 83 gfs2_tune_init(&sdp->sd_tune);
84 84
85 init_waitqueue_head(&sdp->sd_glock_wait);
86 atomic_set(&sdp->sd_glock_disposal, 0);
85 spin_lock_init(&sdp->sd_statfs_spin); 87 spin_lock_init(&sdp->sd_statfs_spin);
86 88
87 spin_lock_init(&sdp->sd_rindex_spin); 89 spin_lock_init(&sdp->sd_rindex_spin);
@@ -983,9 +985,17 @@ static const match_table_t nolock_tokens = {
983 { Opt_err, NULL }, 985 { Opt_err, NULL },
984}; 986};
985 987
988static void nolock_put_lock(struct kmem_cache *cachep, struct gfs2_glock *gl)
989{
990 struct gfs2_sbd *sdp = gl->gl_sbd;
991 kmem_cache_free(cachep, gl);
992 if (atomic_dec_and_test(&sdp->sd_glock_disposal))
993 wake_up(&sdp->sd_glock_wait);
994}
995
986static const struct lm_lockops nolock_ops = { 996static const struct lm_lockops nolock_ops = {
987 .lm_proto_name = "lock_nolock", 997 .lm_proto_name = "lock_nolock",
988 .lm_put_lock = kmem_cache_free, 998 .lm_put_lock = nolock_put_lock,
989 .lm_tokens = &nolock_tokens, 999 .lm_tokens = &nolock_tokens,
990}; 1000};
991 1001
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 247436c10deb..84350e1be66d 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -748,7 +748,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
748 struct gfs2_rgrpd *nrgd; 748 struct gfs2_rgrpd *nrgd;
749 unsigned int num_gh; 749 unsigned int num_gh;
750 int dir_rename = 0; 750 int dir_rename = 0;
751 int alloc_required; 751 int alloc_required = 0;
752 unsigned int x; 752 unsigned int x;
753 int error; 753 int error;
754 754
@@ -867,7 +867,9 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
867 goto out_gunlock; 867 goto out_gunlock;
868 } 868 }
869 869
870 alloc_required = error = gfs2_diradd_alloc_required(ndir, &ndentry->d_name); 870 if (nip == NULL)
871 alloc_required = gfs2_diradd_alloc_required(ndir, &ndentry->d_name);
872 error = alloc_required;
871 if (error < 0) 873 if (error < 0)
872 goto out_gunlock; 874 goto out_gunlock;
873 error = 0; 875 error = 0;
@@ -1086,7 +1088,8 @@ static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd)
1086 error = vfs_follow_link(nd, buf); 1088 error = vfs_follow_link(nd, buf);
1087 if (buf != array) 1089 if (buf != array)
1088 kfree(buf); 1090 kfree(buf);
1089 } 1091 } else
1092 path_put(&nd->path);
1090 1093
1091 return ERR_PTR(error); 1094 return ERR_PTR(error);
1092} 1095}
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 0608f490c295..503b842f3ba2 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -591,11 +591,7 @@ static int gfs2_ri_update(struct gfs2_inode *ip)
591 u64 rgrp_count = ip->i_disksize; 591 u64 rgrp_count = ip->i_disksize;
592 int error; 592 int error;
593 593
594 if (do_div(rgrp_count, sizeof(struct gfs2_rindex))) { 594 do_div(rgrp_count, sizeof(struct gfs2_rindex));
595 gfs2_consist_inode(ip);
596 return -EIO;
597 }
598
599 clear_rgrpdi(sdp); 595 clear_rgrpdi(sdp);
600 596
601 file_ra_state_init(&ra_state, inode->i_mapping); 597 file_ra_state_init(&ra_state, inode->i_mapping);
@@ -915,7 +911,7 @@ void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd)
915struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip) 911struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip)
916{ 912{
917 BUG_ON(ip->i_alloc != NULL); 913 BUG_ON(ip->i_alloc != NULL);
918 ip->i_alloc = kzalloc(sizeof(struct gfs2_alloc), GFP_KERNEL); 914 ip->i_alloc = kzalloc(sizeof(struct gfs2_alloc), GFP_NOFS);
919 return ip->i_alloc; 915 return ip->i_alloc;
920} 916}
921 917
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index c282ad41f3d1..b9dd3da22c0a 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -21,6 +21,7 @@
21#include <linux/gfs2_ondisk.h> 21#include <linux/gfs2_ondisk.h>
22#include <linux/crc32.h> 22#include <linux/crc32.h>
23#include <linux/time.h> 23#include <linux/time.h>
24#include <linux/wait.h>
24 25
25#include "gfs2.h" 26#include "gfs2.h"
26#include "incore.h" 27#include "incore.h"
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 8a04108e0c22..c2ebdf2c01d4 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -1296,6 +1296,7 @@ fail:
1296 1296
1297int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data) 1297int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
1298{ 1298{
1299 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1299 struct gfs2_ea_location el; 1300 struct gfs2_ea_location el;
1300 struct buffer_head *dibh; 1301 struct buffer_head *dibh;
1301 int error; 1302 int error;
@@ -1305,16 +1306,17 @@ int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
1305 return error; 1306 return error;
1306 1307
1307 if (GFS2_EA_IS_STUFFED(el.el_ea)) { 1308 if (GFS2_EA_IS_STUFFED(el.el_ea)) {
1308 error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), RES_DINODE + RES_EATTR, 0); 1309 error = gfs2_trans_begin(sdp, RES_DINODE + RES_EATTR, 0);
1309 if (error) 1310 if (error == 0) {
1310 return error; 1311 gfs2_trans_add_bh(ip->i_gl, el.el_bh, 1);
1311 1312 memcpy(GFS2_EA2DATA(el.el_ea), data,
1312 gfs2_trans_add_bh(ip->i_gl, el.el_bh, 1); 1313 GFS2_EA_DATA_LEN(el.el_ea));
1313 memcpy(GFS2_EA2DATA(el.el_ea), data, 1314 }
1314 GFS2_EA_DATA_LEN(el.el_ea)); 1315 } else {
1315 } else
1316 error = ea_acl_chmod_unstuffed(ip, el.el_ea, data); 1316 error = ea_acl_chmod_unstuffed(ip, el.el_ea, data);
1317 }
1317 1318
1319 brelse(el.el_bh);
1318 if (error) 1320 if (error)
1319 return error; 1321 return error;
1320 1322
@@ -1327,8 +1329,7 @@ int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
1327 brelse(dibh); 1329 brelse(dibh);
1328 } 1330 }
1329 1331
1330 gfs2_trans_end(GFS2_SB(&ip->i_inode)); 1332 gfs2_trans_end(sdp);
1331
1332 return error; 1333 return error;
1333} 1334}
1334 1335
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index a5089a6dd67a..7239efc690d8 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -646,22 +646,27 @@ static const struct super_operations hppfs_sbops = {
646static int hppfs_readlink(struct dentry *dentry, char __user *buffer, 646static int hppfs_readlink(struct dentry *dentry, char __user *buffer,
647 int buflen) 647 int buflen)
648{ 648{
649 struct dentry *proc_dentry; 649 struct dentry *proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
650
651 proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
652 return proc_dentry->d_inode->i_op->readlink(proc_dentry, buffer, 650 return proc_dentry->d_inode->i_op->readlink(proc_dentry, buffer,
653 buflen); 651 buflen);
654} 652}
655 653
656static void *hppfs_follow_link(struct dentry *dentry, struct nameidata *nd) 654static void *hppfs_follow_link(struct dentry *dentry, struct nameidata *nd)
657{ 655{
658 struct dentry *proc_dentry; 656 struct dentry *proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
659
660 proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
661 657
662 return proc_dentry->d_inode->i_op->follow_link(proc_dentry, nd); 658 return proc_dentry->d_inode->i_op->follow_link(proc_dentry, nd);
663} 659}
664 660
661static void hppfs_put_link(struct dentry *dentry, struct nameidata *nd,
662 void *cookie)
663{
664 struct dentry *proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
665
666 if (proc_dentry->d_inode->i_op->put_link)
667 proc_dentry->d_inode->i_op->put_link(proc_dentry, nd, cookie);
668}
669
665static const struct inode_operations hppfs_dir_iops = { 670static const struct inode_operations hppfs_dir_iops = {
666 .lookup = hppfs_lookup, 671 .lookup = hppfs_lookup,
667}; 672};
@@ -669,6 +674,7 @@ static const struct inode_operations hppfs_dir_iops = {
669static const struct inode_operations hppfs_link_iops = { 674static const struct inode_operations hppfs_link_iops = {
670 .readlink = hppfs_readlink, 675 .readlink = hppfs_readlink,
671 .follow_link = hppfs_follow_link, 676 .follow_link = hppfs_follow_link,
677 .put_link = hppfs_put_link,
672}; 678};
673 679
674static struct inode *get_inode(struct super_block *sb, struct dentry *dentry) 680static struct inode *get_inode(struct super_block *sb, struct dentry *dentry)
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index ca0f5eb62b20..886849370950 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -22,6 +22,7 @@
22#include <linux/jbd2.h> 22#include <linux/jbd2.h>
23#include <linux/errno.h> 23#include <linux/errno.h>
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <linux/blkdev.h>
25#include <trace/events/jbd2.h> 26#include <trace/events/jbd2.h>
26 27
27/* 28/*
@@ -515,6 +516,20 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
515 journal->j_tail_sequence = first_tid; 516 journal->j_tail_sequence = first_tid;
516 journal->j_tail = blocknr; 517 journal->j_tail = blocknr;
517 spin_unlock(&journal->j_state_lock); 518 spin_unlock(&journal->j_state_lock);
519
520 /*
521 * If there is an external journal, we need to make sure that
522 * any data blocks that were recently written out --- perhaps
523 * by jbd2_log_do_checkpoint() --- are flushed out before we
524 * drop the transactions from the external journal. It's
525 * unlikely this will be necessary, especially with a
526 * appropriately sized journal, but we need this to guarantee
527 * correctness. Fortunately jbd2_cleanup_journal_tail()
528 * doesn't get called all that often.
529 */
530 if ((journal->j_fs_dev != journal->j_dev) &&
531 (journal->j_flags & JBD2_BARRIER))
532 blkdev_issue_flush(journal->j_fs_dev, NULL);
518 if (!(journal->j_flags & JBD2_ABORT)) 533 if (!(journal->j_flags & JBD2_ABORT))
519 jbd2_journal_update_superblock(journal, 1); 534 jbd2_journal_update_superblock(journal, 1);
520 return 0; 535 return 0;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 6a10238d2c63..1bc74b6f26d2 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -259,6 +259,7 @@ static int journal_submit_data_buffers(journal_t *journal,
259 ret = err; 259 ret = err;
260 spin_lock(&journal->j_list_lock); 260 spin_lock(&journal->j_list_lock);
261 J_ASSERT(jinode->i_transaction == commit_transaction); 261 J_ASSERT(jinode->i_transaction == commit_transaction);
262 commit_transaction->t_flushed_data_blocks = 1;
262 jinode->i_flags &= ~JI_COMMIT_RUNNING; 263 jinode->i_flags &= ~JI_COMMIT_RUNNING;
263 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); 264 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
264 } 265 }
@@ -708,8 +709,17 @@ start_journal_io:
708 } 709 }
709 } 710 }
710 711
711 /* Done it all: now write the commit record asynchronously. */ 712 /*
713 * If the journal is not located on the file system device,
714 * then we must flush the file system device before we issue
715 * the commit record
716 */
717 if (commit_transaction->t_flushed_data_blocks &&
718 (journal->j_fs_dev != journal->j_dev) &&
719 (journal->j_flags & JBD2_BARRIER))
720 blkdev_issue_flush(journal->j_fs_dev, NULL);
712 721
722 /* Done it all: now write the commit record asynchronously. */
713 if (JBD2_HAS_INCOMPAT_FEATURE(journal, 723 if (JBD2_HAS_INCOMPAT_FEATURE(journal,
714 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { 724 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
715 err = journal_submit_commit_record(journal, commit_transaction, 725 err = journal_submit_commit_record(journal, commit_transaction,
@@ -720,13 +730,6 @@ start_journal_io:
720 blkdev_issue_flush(journal->j_dev, NULL); 730 blkdev_issue_flush(journal->j_dev, NULL);
721 } 731 }
722 732
723 /*
724 * This is the right place to wait for data buffers both for ASYNC
725 * and !ASYNC commit. If commit is ASYNC, we need to wait only after
726 * the commit block went to disk (which happens above). If commit is
727 * SYNC, we need to wait for data buffers before we start writing
728 * commit block, which happens below in such setting.
729 */
730 err = journal_finish_inode_data_buffers(journal, commit_transaction); 733 err = journal_finish_inode_data_buffers(journal, commit_transaction);
731 if (err) { 734 if (err) {
732 printk(KERN_WARNING 735 printk(KERN_WARNING
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 17af879e6e9e..ac0d027595d0 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -814,7 +814,7 @@ static journal_t * journal_init_common (void)
814 journal_t *journal; 814 journal_t *journal;
815 int err; 815 int err;
816 816
817 journal = kzalloc(sizeof(*journal), GFP_KERNEL|__GFP_NOFAIL); 817 journal = kzalloc(sizeof(*journal), GFP_KERNEL);
818 if (!journal) 818 if (!journal)
819 goto fail; 819 goto fail;
820 820
diff --git a/fs/namei.c b/fs/namei.c
index 68921d9b5302..94a5e60779f9 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -232,6 +232,7 @@ int generic_permission(struct inode *inode, int mask,
232 /* 232 /*
233 * Searching includes executable on directories, else just read. 233 * Searching includes executable on directories, else just read.
234 */ 234 */
235 mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
235 if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))) 236 if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE)))
236 if (capable(CAP_DAC_READ_SEARCH)) 237 if (capable(CAP_DAC_READ_SEARCH))
237 return 0; 238 return 0;
@@ -560,6 +561,7 @@ static __always_inline int __do_follow_link(struct path *path, struct nameidata
560 dget(dentry); 561 dget(dentry);
561 } 562 }
562 mntget(path->mnt); 563 mntget(path->mnt);
564 nd->last_type = LAST_BIND;
563 cookie = dentry->d_inode->i_op->follow_link(dentry, nd); 565 cookie = dentry->d_inode->i_op->follow_link(dentry, nd);
564 error = PTR_ERR(cookie); 566 error = PTR_ERR(cookie);
565 if (!IS_ERR(cookie)) { 567 if (!IS_ERR(cookie)) {
@@ -1602,11 +1604,12 @@ struct file *do_filp_open(int dfd, const char *pathname,
1602 struct file *filp; 1604 struct file *filp;
1603 struct nameidata nd; 1605 struct nameidata nd;
1604 int error; 1606 int error;
1605 struct path path, save; 1607 struct path path;
1606 struct dentry *dir; 1608 struct dentry *dir;
1607 int count = 0; 1609 int count = 0;
1608 int will_truncate; 1610 int will_truncate;
1609 int flag = open_to_namei_flags(open_flag); 1611 int flag = open_to_namei_flags(open_flag);
1612 int force_reval = 0;
1610 1613
1611 /* 1614 /*
1612 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only 1615 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only
@@ -1618,7 +1621,7 @@ struct file *do_filp_open(int dfd, const char *pathname,
1618 open_flag |= O_DSYNC; 1621 open_flag |= O_DSYNC;
1619 1622
1620 if (!acc_mode) 1623 if (!acc_mode)
1621 acc_mode = MAY_OPEN | ACC_MODE(flag); 1624 acc_mode = MAY_OPEN | ACC_MODE(open_flag);
1622 1625
1623 /* O_TRUNC implies we need access checks for write permissions */ 1626 /* O_TRUNC implies we need access checks for write permissions */
1624 if (flag & O_TRUNC) 1627 if (flag & O_TRUNC)
@@ -1658,9 +1661,12 @@ struct file *do_filp_open(int dfd, const char *pathname,
1658 /* 1661 /*
1659 * Create - we need to know the parent. 1662 * Create - we need to know the parent.
1660 */ 1663 */
1664reval:
1661 error = path_init(dfd, pathname, LOOKUP_PARENT, &nd); 1665 error = path_init(dfd, pathname, LOOKUP_PARENT, &nd);
1662 if (error) 1666 if (error)
1663 return ERR_PTR(error); 1667 return ERR_PTR(error);
1668 if (force_reval)
1669 nd.flags |= LOOKUP_REVAL;
1664 error = path_walk(pathname, &nd); 1670 error = path_walk(pathname, &nd);
1665 if (error) { 1671 if (error) {
1666 if (nd.root.mnt) 1672 if (nd.root.mnt)
@@ -1852,17 +1858,7 @@ do_link:
1852 error = security_inode_follow_link(path.dentry, &nd); 1858 error = security_inode_follow_link(path.dentry, &nd);
1853 if (error) 1859 if (error)
1854 goto exit_dput; 1860 goto exit_dput;
1855 save = nd.path;
1856 path_get(&save);
1857 error = __do_follow_link(&path, &nd); 1861 error = __do_follow_link(&path, &nd);
1858 if (error == -ESTALE) {
1859 /* nd.path had been dropped */
1860 nd.path = save;
1861 path_get(&nd.path);
1862 nd.flags |= LOOKUP_REVAL;
1863 error = __do_follow_link(&path, &nd);
1864 }
1865 path_put(&save);
1866 path_put(&path); 1862 path_put(&path);
1867 if (error) { 1863 if (error) {
1868 /* Does someone understand code flow here? Or it is only 1864 /* Does someone understand code flow here? Or it is only
@@ -1872,6 +1868,10 @@ do_link:
1872 release_open_intent(&nd); 1868 release_open_intent(&nd);
1873 if (nd.root.mnt) 1869 if (nd.root.mnt)
1874 path_put(&nd.root); 1870 path_put(&nd.root);
1871 if (error == -ESTALE && !force_reval) {
1872 force_reval = 1;
1873 goto reval;
1874 }
1875 return ERR_PTR(error); 1875 return ERR_PTR(error);
1876 } 1876 }
1877 nd.flags &= ~LOOKUP_PARENT; 1877 nd.flags &= ~LOOKUP_PARENT;
diff --git a/fs/namespace.c b/fs/namespace.c
index 7d70d63ceb29..c768f733c8d6 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -965,10 +965,12 @@ EXPORT_SYMBOL(may_umount_tree);
965int may_umount(struct vfsmount *mnt) 965int may_umount(struct vfsmount *mnt)
966{ 966{
967 int ret = 1; 967 int ret = 1;
968 down_read(&namespace_sem);
968 spin_lock(&vfsmount_lock); 969 spin_lock(&vfsmount_lock);
969 if (propagate_mount_busy(mnt, 2)) 970 if (propagate_mount_busy(mnt, 2))
970 ret = 0; 971 ret = 0;
971 spin_unlock(&vfsmount_lock); 972 spin_unlock(&vfsmount_lock);
973 up_read(&namespace_sem);
972 return ret; 974 return ret;
973} 975}
974 976
@@ -1352,12 +1354,12 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt,
1352 if (err) 1354 if (err)
1353 goto out_cleanup_ids; 1355 goto out_cleanup_ids;
1354 1356
1357 spin_lock(&vfsmount_lock);
1358
1355 if (IS_MNT_SHARED(dest_mnt)) { 1359 if (IS_MNT_SHARED(dest_mnt)) {
1356 for (p = source_mnt; p; p = next_mnt(p, source_mnt)) 1360 for (p = source_mnt; p; p = next_mnt(p, source_mnt))
1357 set_mnt_shared(p); 1361 set_mnt_shared(p);
1358 } 1362 }
1359
1360 spin_lock(&vfsmount_lock);
1361 if (parent_path) { 1363 if (parent_path) {
1362 detach_mnt(source_mnt, parent_path); 1364 detach_mnt(source_mnt, parent_path);
1363 attach_mnt(source_mnt, path); 1365 attach_mnt(source_mnt, path);
@@ -1534,8 +1536,12 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
1534 err = change_mount_flags(path->mnt, flags); 1536 err = change_mount_flags(path->mnt, flags);
1535 else 1537 else
1536 err = do_remount_sb(sb, flags, data, 0); 1538 err = do_remount_sb(sb, flags, data, 0);
1537 if (!err) 1539 if (!err) {
1540 spin_lock(&vfsmount_lock);
1541 mnt_flags |= path->mnt->mnt_flags & MNT_PNODE_MASK;
1538 path->mnt->mnt_flags = mnt_flags; 1542 path->mnt->mnt_flags = mnt_flags;
1543 spin_unlock(&vfsmount_lock);
1544 }
1539 up_write(&sb->s_umount); 1545 up_write(&sb->s_umount);
1540 if (!err) { 1546 if (!err) {
1541 security_sb_post_remount(path->mnt, flags, data); 1547 security_sb_post_remount(path->mnt, flags, data);
@@ -1665,6 +1671,8 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path,
1665{ 1671{
1666 int err; 1672 int err;
1667 1673
1674 mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD);
1675
1668 down_write(&namespace_sem); 1676 down_write(&namespace_sem);
1669 /* Something was mounted here while we slept */ 1677 /* Something was mounted here while we slept */
1670 while (d_mountpoint(path->dentry) && 1678 while (d_mountpoint(path->dentry) &&
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 2c5ace4f00a7..3c7f03b669fb 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1615,6 +1615,7 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1615 goto out; 1615 goto out;
1616 1616
1617 new_dentry = dentry; 1617 new_dentry = dentry;
1618 rehash = NULL;
1618 new_inode = NULL; 1619 new_inode = NULL;
1619 } 1620 }
1620 } 1621 }
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 6b891328f332..63f2071d6445 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -486,6 +486,8 @@ static int nfs_release_page(struct page *page, gfp_t gfp)
486{ 486{
487 dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page); 487 dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page);
488 488
489 if (gfp & __GFP_WAIT)
490 nfs_wb_page(page->mapping->host, page);
489 /* If PagePrivate() is set, then the page is not freeable */ 491 /* If PagePrivate() is set, then the page is not freeable */
490 if (PagePrivate(page)) 492 if (PagePrivate(page))
491 return 0; 493 return 0;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index faa091865ad0..f141bde7756a 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1261,8 +1261,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1261 1261
1262 if (fattr->valid & NFS_ATTR_FATTR_MODE) { 1262 if (fattr->valid & NFS_ATTR_FATTR_MODE) {
1263 if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) { 1263 if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) {
1264 umode_t newmode = inode->i_mode & S_IFMT;
1265 newmode |= fattr->mode & S_IALLUGO;
1266 inode->i_mode = newmode;
1264 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; 1267 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
1265 inode->i_mode = fattr->mode;
1266 } 1268 }
1267 } else if (server->caps & NFS_CAP_MODE) 1269 } else if (server->caps & NFS_CAP_MODE)
1268 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR 1270 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 865265bdca03..0c6fda33d66e 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -146,6 +146,7 @@ enum {
146 NFS_O_RDWR_STATE, /* OPEN stateid has read/write state */ 146 NFS_O_RDWR_STATE, /* OPEN stateid has read/write state */
147 NFS_STATE_RECLAIM_REBOOT, /* OPEN stateid server rebooted */ 147 NFS_STATE_RECLAIM_REBOOT, /* OPEN stateid server rebooted */
148 NFS_STATE_RECLAIM_NOGRACE, /* OPEN stateid needs to recover state */ 148 NFS_STATE_RECLAIM_NOGRACE, /* OPEN stateid needs to recover state */
149 NFS_STATE_POSIX_LOCKS, /* Posix locks are supported */
149}; 150};
150 151
151struct nfs4_state { 152struct nfs4_state {
@@ -277,6 +278,7 @@ extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t);
277extern void nfs4_schedule_state_recovery(struct nfs_client *); 278extern void nfs4_schedule_state_recovery(struct nfs_client *);
278extern void nfs4_schedule_state_manager(struct nfs_client *); 279extern void nfs4_schedule_state_manager(struct nfs_client *);
279extern int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state); 280extern int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state);
281extern int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state);
280extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags); 282extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags);
281extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); 283extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
282extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); 284extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 198d51d17c13..375f0fae2c6a 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -249,19 +249,15 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
249 if (state == NULL) 249 if (state == NULL)
250 break; 250 break;
251 nfs4_state_mark_reclaim_nograce(clp, state); 251 nfs4_state_mark_reclaim_nograce(clp, state);
252 case -NFS4ERR_STALE_CLIENTID: 252 goto do_state_recovery;
253 case -NFS4ERR_STALE_STATEID: 253 case -NFS4ERR_STALE_STATEID:
254 case -NFS4ERR_EXPIRED: 254 if (state == NULL)
255 nfs4_schedule_state_recovery(clp);
256 ret = nfs4_wait_clnt_recover(clp);
257 if (ret == 0)
258 exception->retry = 1;
259#if !defined(CONFIG_NFS_V4_1)
260 break;
261#else /* !defined(CONFIG_NFS_V4_1) */
262 if (!nfs4_has_session(server->nfs_client))
263 break; 255 break;
264 /* FALLTHROUGH */ 256 nfs4_state_mark_reclaim_reboot(clp, state);
257 case -NFS4ERR_STALE_CLIENTID:
258 case -NFS4ERR_EXPIRED:
259 goto do_state_recovery;
260#if defined(CONFIG_NFS_V4_1)
265 case -NFS4ERR_BADSESSION: 261 case -NFS4ERR_BADSESSION:
266 case -NFS4ERR_BADSLOT: 262 case -NFS4ERR_BADSLOT:
267 case -NFS4ERR_BAD_HIGH_SLOT: 263 case -NFS4ERR_BAD_HIGH_SLOT:
@@ -274,7 +270,7 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
274 nfs4_schedule_state_recovery(clp); 270 nfs4_schedule_state_recovery(clp);
275 exception->retry = 1; 271 exception->retry = 1;
276 break; 272 break;
277#endif /* !defined(CONFIG_NFS_V4_1) */ 273#endif /* defined(CONFIG_NFS_V4_1) */
278 case -NFS4ERR_FILE_OPEN: 274 case -NFS4ERR_FILE_OPEN:
279 if (exception->timeout > HZ) { 275 if (exception->timeout > HZ) {
280 /* We have retried a decent amount, time to 276 /* We have retried a decent amount, time to
@@ -293,6 +289,12 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
293 } 289 }
294 /* We failed to handle the error */ 290 /* We failed to handle the error */
295 return nfs4_map_errors(ret); 291 return nfs4_map_errors(ret);
292do_state_recovery:
293 nfs4_schedule_state_recovery(clp);
294 ret = nfs4_wait_clnt_recover(clp);
295 if (ret == 0)
296 exception->retry = 1;
297 return ret;
296} 298}
297 299
298 300
@@ -1658,6 +1660,8 @@ static int _nfs4_do_open(struct inode *dir, struct path *path, fmode_t fmode, in
1658 status = PTR_ERR(state); 1660 status = PTR_ERR(state);
1659 if (IS_ERR(state)) 1661 if (IS_ERR(state))
1660 goto err_opendata_put; 1662 goto err_opendata_put;
1663 if ((opendata->o_res.rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) != 0)
1664 set_bit(NFS_STATE_POSIX_LOCKS, &state->flags);
1661 nfs4_opendata_put(opendata); 1665 nfs4_opendata_put(opendata);
1662 nfs4_put_state_owner(sp); 1666 nfs4_put_state_owner(sp);
1663 *res = state; 1667 *res = state;
@@ -3422,15 +3426,14 @@ _nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
3422 if (state == NULL) 3426 if (state == NULL)
3423 break; 3427 break;
3424 nfs4_state_mark_reclaim_nograce(clp, state); 3428 nfs4_state_mark_reclaim_nograce(clp, state);
3425 case -NFS4ERR_STALE_CLIENTID: 3429 goto do_state_recovery;
3426 case -NFS4ERR_STALE_STATEID: 3430 case -NFS4ERR_STALE_STATEID:
3431 if (state == NULL)
3432 break;
3433 nfs4_state_mark_reclaim_reboot(clp, state);
3434 case -NFS4ERR_STALE_CLIENTID:
3427 case -NFS4ERR_EXPIRED: 3435 case -NFS4ERR_EXPIRED:
3428 rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL); 3436 goto do_state_recovery;
3429 nfs4_schedule_state_recovery(clp);
3430 if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0)
3431 rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task);
3432 task->tk_status = 0;
3433 return -EAGAIN;
3434#if defined(CONFIG_NFS_V4_1) 3437#if defined(CONFIG_NFS_V4_1)
3435 case -NFS4ERR_BADSESSION: 3438 case -NFS4ERR_BADSESSION:
3436 case -NFS4ERR_BADSLOT: 3439 case -NFS4ERR_BADSLOT:
@@ -3458,6 +3461,13 @@ _nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
3458 } 3461 }
3459 task->tk_status = nfs4_map_errors(task->tk_status); 3462 task->tk_status = nfs4_map_errors(task->tk_status);
3460 return 0; 3463 return 0;
3464do_state_recovery:
3465 rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL);
3466 nfs4_schedule_state_recovery(clp);
3467 if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0)
3468 rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task);
3469 task->tk_status = 0;
3470 return -EAGAIN;
3461} 3471}
3462 3472
3463static int 3473static int
@@ -4088,6 +4098,28 @@ static const struct rpc_call_ops nfs4_recover_lock_ops = {
4088 .rpc_release = nfs4_lock_release, 4098 .rpc_release = nfs4_lock_release,
4089}; 4099};
4090 4100
4101static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_state *lsp, int new_lock_owner, int error)
4102{
4103 struct nfs_client *clp = server->nfs_client;
4104 struct nfs4_state *state = lsp->ls_state;
4105
4106 switch (error) {
4107 case -NFS4ERR_ADMIN_REVOKED:
4108 case -NFS4ERR_BAD_STATEID:
4109 case -NFS4ERR_EXPIRED:
4110 if (new_lock_owner != 0 ||
4111 (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0)
4112 nfs4_state_mark_reclaim_nograce(clp, state);
4113 lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
4114 break;
4115 case -NFS4ERR_STALE_STATEID:
4116 if (new_lock_owner != 0 ||
4117 (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0)
4118 nfs4_state_mark_reclaim_reboot(clp, state);
4119 lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
4120 };
4121}
4122
4091static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *fl, int recovery_type) 4123static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *fl, int recovery_type)
4092{ 4124{
4093 struct nfs4_lockdata *data; 4125 struct nfs4_lockdata *data;
@@ -4126,6 +4158,9 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
4126 ret = nfs4_wait_for_completion_rpc_task(task); 4158 ret = nfs4_wait_for_completion_rpc_task(task);
4127 if (ret == 0) { 4159 if (ret == 0) {
4128 ret = data->rpc_status; 4160 ret = data->rpc_status;
4161 if (ret)
4162 nfs4_handle_setlk_error(data->server, data->lsp,
4163 data->arg.new_lock_owner, ret);
4129 } else 4164 } else
4130 data->cancelled = 1; 4165 data->cancelled = 1;
4131 rpc_put_task(task); 4166 rpc_put_task(task);
@@ -4181,8 +4216,11 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
4181{ 4216{
4182 struct nfs_inode *nfsi = NFS_I(state->inode); 4217 struct nfs_inode *nfsi = NFS_I(state->inode);
4183 unsigned char fl_flags = request->fl_flags; 4218 unsigned char fl_flags = request->fl_flags;
4184 int status; 4219 int status = -ENOLCK;
4185 4220
4221 if ((fl_flags & FL_POSIX) &&
4222 !test_bit(NFS_STATE_POSIX_LOCKS, &state->flags))
4223 goto out;
4186 /* Is this a delegated open? */ 4224 /* Is this a delegated open? */
4187 status = nfs4_set_lock_state(state, request); 4225 status = nfs4_set_lock_state(state, request);
4188 if (status != 0) 4226 if (status != 0)
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 6d263ed79e92..c1e2733f4fa4 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -901,7 +901,7 @@ void nfs4_schedule_state_recovery(struct nfs_client *clp)
901 nfs4_schedule_state_manager(clp); 901 nfs4_schedule_state_manager(clp);
902} 902}
903 903
904static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state) 904int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state)
905{ 905{
906 906
907 set_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags); 907 set_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index e2975939126a..a12c45b65dd4 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -176,6 +176,12 @@ void nfs_release_request(struct nfs_page *req)
176 kref_put(&req->wb_kref, nfs_free_request); 176 kref_put(&req->wb_kref, nfs_free_request);
177} 177}
178 178
179static int nfs_wait_bit_uninterruptible(void *word)
180{
181 io_schedule();
182 return 0;
183}
184
179/** 185/**
180 * nfs_wait_on_request - Wait for a request to complete. 186 * nfs_wait_on_request - Wait for a request to complete.
181 * @req: request to wait upon. 187 * @req: request to wait upon.
@@ -186,14 +192,9 @@ void nfs_release_request(struct nfs_page *req)
186int 192int
187nfs_wait_on_request(struct nfs_page *req) 193nfs_wait_on_request(struct nfs_page *req)
188{ 194{
189 int ret = 0; 195 return wait_on_bit(&req->wb_flags, PG_BUSY,
190 196 nfs_wait_bit_uninterruptible,
191 if (!test_bit(PG_BUSY, &req->wb_flags)) 197 TASK_UNINTERRUPTIBLE);
192 goto out;
193 ret = out_of_line_wait_on_bit(&req->wb_flags, PG_BUSY,
194 nfs_wait_bit_killable, TASK_KILLABLE);
195out:
196 return ret;
197} 198}
198 199
199/** 200/**
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index ce907efc5508..f1afee4eea77 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -243,6 +243,7 @@ static int nfs_show_stats(struct seq_file *, struct vfsmount *);
243static int nfs_get_sb(struct file_system_type *, int, const char *, void *, struct vfsmount *); 243static int nfs_get_sb(struct file_system_type *, int, const char *, void *, struct vfsmount *);
244static int nfs_xdev_get_sb(struct file_system_type *fs_type, 244static int nfs_xdev_get_sb(struct file_system_type *fs_type,
245 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); 245 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
246static void nfs_put_super(struct super_block *);
246static void nfs_kill_super(struct super_block *); 247static void nfs_kill_super(struct super_block *);
247static int nfs_remount(struct super_block *sb, int *flags, char *raw_data); 248static int nfs_remount(struct super_block *sb, int *flags, char *raw_data);
248 249
@@ -266,6 +267,7 @@ static const struct super_operations nfs_sops = {
266 .alloc_inode = nfs_alloc_inode, 267 .alloc_inode = nfs_alloc_inode,
267 .destroy_inode = nfs_destroy_inode, 268 .destroy_inode = nfs_destroy_inode,
268 .write_inode = nfs_write_inode, 269 .write_inode = nfs_write_inode,
270 .put_super = nfs_put_super,
269 .statfs = nfs_statfs, 271 .statfs = nfs_statfs,
270 .clear_inode = nfs_clear_inode, 272 .clear_inode = nfs_clear_inode,
271 .umount_begin = nfs_umount_begin, 273 .umount_begin = nfs_umount_begin,
@@ -335,6 +337,7 @@ static const struct super_operations nfs4_sops = {
335 .alloc_inode = nfs_alloc_inode, 337 .alloc_inode = nfs_alloc_inode,
336 .destroy_inode = nfs_destroy_inode, 338 .destroy_inode = nfs_destroy_inode,
337 .write_inode = nfs_write_inode, 339 .write_inode = nfs_write_inode,
340 .put_super = nfs_put_super,
338 .statfs = nfs_statfs, 341 .statfs = nfs_statfs,
339 .clear_inode = nfs4_clear_inode, 342 .clear_inode = nfs4_clear_inode,
340 .umount_begin = nfs_umount_begin, 343 .umount_begin = nfs_umount_begin,
@@ -2258,6 +2261,17 @@ error_splat_super:
2258} 2261}
2259 2262
2260/* 2263/*
2264 * Ensure that we unregister the bdi before kill_anon_super
2265 * releases the device name
2266 */
2267static void nfs_put_super(struct super_block *s)
2268{
2269 struct nfs_server *server = NFS_SB(s);
2270
2271 bdi_unregister(&server->backing_dev_info);
2272}
2273
2274/*
2261 * Destroy an NFS2/3 superblock 2275 * Destroy an NFS2/3 superblock
2262 */ 2276 */
2263static void nfs_kill_super(struct super_block *s) 2277static void nfs_kill_super(struct super_block *s)
@@ -2265,7 +2279,6 @@ static void nfs_kill_super(struct super_block *s)
2265 struct nfs_server *server = NFS_SB(s); 2279 struct nfs_server *server = NFS_SB(s);
2266 2280
2267 kill_anon_super(s); 2281 kill_anon_super(s);
2268 bdi_unregister(&server->backing_dev_info);
2269 nfs_fscache_release_super_cookie(s); 2282 nfs_fscache_release_super_cookie(s);
2270 nfs_free_server(server); 2283 nfs_free_server(server);
2271} 2284}
diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c
index 70e1fbbaaeab..ad4d2e787b20 100644
--- a/fs/nfs/sysctl.c
+++ b/fs/nfs/sysctl.c
@@ -15,8 +15,10 @@
15 15
16#include "callback.h" 16#include "callback.h"
17 17
18#ifdef CONFIG_NFS_V4
18static const int nfs_set_port_min = 0; 19static const int nfs_set_port_min = 0;
19static const int nfs_set_port_max = 65535; 20static const int nfs_set_port_max = 65535;
21#endif
20static struct ctl_table_header *nfs_callback_sysctl_table; 22static struct ctl_table_header *nfs_callback_sysctl_table;
21 23
22static ctl_table nfs_cb_sysctls[] = { 24static ctl_table nfs_cb_sysctls[] = {
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index d171696017f4..7b54b8bb101f 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1233,7 +1233,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1233 1233
1234 1234
1235#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 1235#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
1236void nfs_commitdata_release(void *data) 1236static void nfs_commitdata_release(void *data)
1237{ 1237{
1238 struct nfs_write_data *wdata = data; 1238 struct nfs_write_data *wdata = data;
1239 1239
@@ -1541,6 +1541,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
1541 break; 1541 break;
1542 } 1542 }
1543 ret = nfs_wait_on_request(req); 1543 ret = nfs_wait_on_request(req);
1544 nfs_release_request(req);
1544 if (ret < 0) 1545 if (ret < 0)
1545 goto out; 1546 goto out;
1546 } 1547 }
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 7c2e337d05af..c194793b642b 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -780,12 +780,9 @@ static inline int nfsd_dosync(struct file *filp, struct dentry *dp,
780 int (*fsync) (struct file *, struct dentry *, int); 780 int (*fsync) (struct file *, struct dentry *, int);
781 int err; 781 int err;
782 782
783 err = filemap_fdatawrite(inode->i_mapping); 783 err = filemap_write_and_wait(inode->i_mapping);
784 if (err == 0 && fop && (fsync = fop->fsync)) 784 if (err == 0 && fop && (fsync = fop->fsync))
785 err = fsync(filp, dp, 0); 785 err = fsync(filp, dp, 0);
786 if (err == 0)
787 err = filemap_fdatawait(inode->i_mapping);
788
789 return err; 786 return err;
790} 787}
791 788
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
index f4a14ea2ed9c..effdbdbe6c11 100644
--- a/fs/nilfs2/bmap.c
+++ b/fs/nilfs2/bmap.c
@@ -417,8 +417,8 @@ __u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *bmap,
417 417
418 key = page_index(bh->b_page) << (PAGE_CACHE_SHIFT - 418 key = page_index(bh->b_page) << (PAGE_CACHE_SHIFT -
419 bmap->b_inode->i_blkbits); 419 bmap->b_inode->i_blkbits);
420 for (pbh = page_buffers(bh->b_page); pbh != bh; 420 for (pbh = page_buffers(bh->b_page); pbh != bh; pbh = pbh->b_this_page)
421 pbh = pbh->b_this_page, key++); 421 key++;
422 422
423 return key; 423 return key;
424} 424}
diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c
index d5ad54e204a5..18737818db63 100644
--- a/fs/nilfs2/cpfile.c
+++ b/fs/nilfs2/cpfile.c
@@ -328,19 +328,24 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
328 tnicps += nicps; 328 tnicps += nicps;
329 nilfs_mdt_mark_buffer_dirty(cp_bh); 329 nilfs_mdt_mark_buffer_dirty(cp_bh);
330 nilfs_mdt_mark_dirty(cpfile); 330 nilfs_mdt_mark_dirty(cpfile);
331 if (!nilfs_cpfile_is_in_first(cpfile, cno) && 331 if (!nilfs_cpfile_is_in_first(cpfile, cno)) {
332 (count = nilfs_cpfile_block_sub_valid_checkpoints( 332 count =
333 cpfile, cp_bh, kaddr, nicps)) == 0) { 333 nilfs_cpfile_block_sub_valid_checkpoints(
334 /* make hole */ 334 cpfile, cp_bh, kaddr, nicps);
335 kunmap_atomic(kaddr, KM_USER0); 335 if (count == 0) {
336 brelse(cp_bh); 336 /* make hole */
337 ret = nilfs_cpfile_delete_checkpoint_block( 337 kunmap_atomic(kaddr, KM_USER0);
338 cpfile, cno); 338 brelse(cp_bh);
339 if (ret == 0) 339 ret =
340 continue; 340 nilfs_cpfile_delete_checkpoint_block(
341 printk(KERN_ERR "%s: cannot delete block\n", 341 cpfile, cno);
342 __func__); 342 if (ret == 0)
343 break; 343 continue;
344 printk(KERN_ERR
345 "%s: cannot delete block\n",
346 __func__);
347 break;
348 }
344 } 349 }
345 } 350 }
346 351
diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c
index d369ac718277..236753df5cdf 100644
--- a/fs/nilfs2/direct.c
+++ b/fs/nilfs2/direct.c
@@ -51,11 +51,11 @@ static int nilfs_direct_lookup(const struct nilfs_bmap *bmap,
51 struct nilfs_direct *direct; 51 struct nilfs_direct *direct;
52 __u64 ptr; 52 __u64 ptr;
53 53
54 direct = (struct nilfs_direct *)bmap; 54 direct = (struct nilfs_direct *)bmap; /* XXX: use macro for level 1 */
55 if ((key > NILFS_DIRECT_KEY_MAX) || 55 if (key > NILFS_DIRECT_KEY_MAX || level != 1)
56 (level != 1) || /* XXX: use macro for level 1 */ 56 return -ENOENT;
57 ((ptr = nilfs_direct_get_ptr(direct, key)) == 57 ptr = nilfs_direct_get_ptr(direct, key);
58 NILFS_BMAP_INVALID_PTR)) 58 if (ptr == NILFS_BMAP_INVALID_PTR)
59 return -ENOENT; 59 return -ENOENT;
60 60
61 if (ptrp != NULL) 61 if (ptrp != NULL)
@@ -73,9 +73,10 @@ static int nilfs_direct_lookup_contig(const struct nilfs_bmap *bmap,
73 sector_t blocknr; 73 sector_t blocknr;
74 int ret, cnt; 74 int ret, cnt;
75 75
76 if (key > NILFS_DIRECT_KEY_MAX || 76 if (key > NILFS_DIRECT_KEY_MAX)
77 (ptr = nilfs_direct_get_ptr(direct, key)) == 77 return -ENOENT;
78 NILFS_BMAP_INVALID_PTR) 78 ptr = nilfs_direct_get_ptr(direct, key);
79 if (ptr == NILFS_BMAP_INVALID_PTR)
79 return -ENOENT; 80 return -ENOENT;
80 81
81 if (NILFS_BMAP_USE_VBN(bmap)) { 82 if (NILFS_BMAP_USE_VBN(bmap)) {
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index f6af76042d80..d6b2b83de363 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -480,7 +480,7 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
480 unsigned int cmd, void __user *argp) 480 unsigned int cmd, void __user *argp)
481{ 481{
482 struct nilfs_argv argv[5]; 482 struct nilfs_argv argv[5];
483 const static size_t argsz[5] = { 483 static const size_t argsz[5] = {
484 sizeof(struct nilfs_vdesc), 484 sizeof(struct nilfs_vdesc),
485 sizeof(struct nilfs_period), 485 sizeof(struct nilfs_period),
486 sizeof(__u64), 486 sizeof(__u64),
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 17584c524486..105b508b47a8 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -2829,7 +2829,7 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci)
2829 || sci->sc_seq_request != sci->sc_seq_done); 2829 || sci->sc_seq_request != sci->sc_seq_done);
2830 spin_unlock(&sci->sc_state_lock); 2830 spin_unlock(&sci->sc_state_lock);
2831 2831
2832 if (flag || nilfs_segctor_confirm(sci)) 2832 if (flag || !nilfs_segctor_confirm(sci))
2833 nilfs_segctor_write_out(sci); 2833 nilfs_segctor_write_out(sci);
2834 2834
2835 WARN_ON(!list_empty(&sci->sc_copied_buffers)); 2835 WARN_ON(!list_empty(&sci->sc_copied_buffers));
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index c9ee67b442e1..1afb0a10229f 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -121,7 +121,7 @@ static int idr_callback(int id, void *p, void *data)
121 if (warned) 121 if (warned)
122 return 0; 122 return 0;
123 123
124 warned = false; 124 warned = true;
125 entry = p; 125 entry = p;
126 ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry); 126 ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
127 127
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 8271cf05c957..a94e8bd8eb1f 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -552,7 +552,7 @@ retry:
552 552
553 spin_lock(&group->inotify_data.idr_lock); 553 spin_lock(&group->inotify_data.idr_lock);
554 ret = idr_get_new_above(&group->inotify_data.idr, &tmp_ientry->fsn_entry, 554 ret = idr_get_new_above(&group->inotify_data.idr, &tmp_ientry->fsn_entry,
555 group->inotify_data.last_wd, 555 group->inotify_data.last_wd+1,
556 &tmp_ientry->wd); 556 &tmp_ientry->wd);
557 spin_unlock(&group->inotify_data.idr_lock); 557 spin_unlock(&group->inotify_data.idr_lock);
558 if (ret) { 558 if (ret) {
@@ -632,7 +632,7 @@ static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsign
632 632
633 spin_lock_init(&group->inotify_data.idr_lock); 633 spin_lock_init(&group->inotify_data.idr_lock);
634 idr_init(&group->inotify_data.idr); 634 idr_init(&group->inotify_data.idr);
635 group->inotify_data.last_wd = 1; 635 group->inotify_data.last_wd = 0;
636 group->inotify_data.user = user; 636 group->inotify_data.user = user;
637 group->inotify_data.fa = NULL; 637 group->inotify_data.fa = NULL;
638 638
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 3d30a1c974a8..06ccf6a86d35 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1772,7 +1772,8 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
1772 loff_t *ppos, 1772 loff_t *ppos,
1773 size_t count, 1773 size_t count,
1774 int appending, 1774 int appending,
1775 int *direct_io) 1775 int *direct_io,
1776 int *has_refcount)
1776{ 1777{
1777 int ret = 0, meta_level = 0; 1778 int ret = 0, meta_level = 0;
1778 struct inode *inode = dentry->d_inode; 1779 struct inode *inode = dentry->d_inode;
@@ -1833,6 +1834,8 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
1833 saved_pos, 1834 saved_pos,
1834 count, 1835 count,
1835 &meta_level); 1836 &meta_level);
1837 if (has_refcount)
1838 *has_refcount = 1;
1836 } 1839 }
1837 1840
1838 if (ret < 0) { 1841 if (ret < 0) {
@@ -1856,6 +1859,10 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
1856 break; 1859 break;
1857 } 1860 }
1858 1861
1862 if (has_refcount && *has_refcount == 1) {
1863 *direct_io = 0;
1864 break;
1865 }
1859 /* 1866 /*
1860 * Allowing concurrent direct writes means 1867 * Allowing concurrent direct writes means
1861 * i_size changes wouldn't be synchronized, so 1868 * i_size changes wouldn't be synchronized, so
@@ -1899,7 +1906,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
1899 loff_t pos) 1906 loff_t pos)
1900{ 1907{
1901 int ret, direct_io, appending, rw_level, have_alloc_sem = 0; 1908 int ret, direct_io, appending, rw_level, have_alloc_sem = 0;
1902 int can_do_direct; 1909 int can_do_direct, has_refcount = 0;
1903 ssize_t written = 0; 1910 ssize_t written = 0;
1904 size_t ocount; /* original count */ 1911 size_t ocount; /* original count */
1905 size_t count; /* after file limit checks */ 1912 size_t count; /* after file limit checks */
@@ -1942,7 +1949,7 @@ relock:
1942 can_do_direct = direct_io; 1949 can_do_direct = direct_io;
1943 ret = ocfs2_prepare_inode_for_write(file->f_path.dentry, ppos, 1950 ret = ocfs2_prepare_inode_for_write(file->f_path.dentry, ppos,
1944 iocb->ki_left, appending, 1951 iocb->ki_left, appending,
1945 &can_do_direct); 1952 &can_do_direct, &has_refcount);
1946 if (ret < 0) { 1953 if (ret < 0) {
1947 mlog_errno(ret); 1954 mlog_errno(ret);
1948 goto out; 1955 goto out;
@@ -2006,14 +2013,16 @@ out_dio:
2006 /* buffered aio wouldn't have proper lock coverage today */ 2013 /* buffered aio wouldn't have proper lock coverage today */
2007 BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT)); 2014 BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT));
2008 2015
2009 if ((file->f_flags & O_DSYNC && !direct_io) || IS_SYNC(inode)) { 2016 if ((file->f_flags & O_DSYNC && !direct_io) || IS_SYNC(inode) ||
2017 (file->f_flags & O_DIRECT && has_refcount)) {
2010 ret = filemap_fdatawrite_range(file->f_mapping, pos, 2018 ret = filemap_fdatawrite_range(file->f_mapping, pos,
2011 pos + count - 1); 2019 pos + count - 1);
2012 if (ret < 0) 2020 if (ret < 0)
2013 written = ret; 2021 written = ret;
2014 2022
2015 if (!ret && (old_size != i_size_read(inode) || 2023 if (!ret && (old_size != i_size_read(inode) ||
2016 old_clusters != OCFS2_I(inode)->ip_clusters)) { 2024 old_clusters != OCFS2_I(inode)->ip_clusters ||
2025 has_refcount)) {
2017 ret = jbd2_journal_force_commit(osb->journal->j_journal); 2026 ret = jbd2_journal_force_commit(osb->journal->j_journal);
2018 if (ret < 0) 2027 if (ret < 0)
2019 written = ret; 2028 written = ret;
@@ -2062,7 +2071,7 @@ static int ocfs2_splice_to_file(struct pipe_inode_info *pipe,
2062 int ret; 2071 int ret;
2063 2072
2064 ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, &sd->pos, 2073 ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, &sd->pos,
2065 sd->total_len, 0, NULL); 2074 sd->total_len, 0, NULL, NULL);
2066 if (ret < 0) { 2075 if (ret < 0) {
2067 mlog_errno(ret); 2076 mlog_errno(ret);
2068 return ret; 2077 return ret;
diff --git a/fs/proc/array.c b/fs/proc/array.c
index f560325c444f..13b5d0708175 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -327,94 +327,6 @@ static inline void task_context_switch_counts(struct seq_file *m,
327 p->nivcsw); 327 p->nivcsw);
328} 328}
329 329
330#ifdef CONFIG_MMU
331
332struct stack_stats {
333 struct vm_area_struct *vma;
334 unsigned long startpage;
335 unsigned long usage;
336};
337
338static int stack_usage_pte_range(pmd_t *pmd, unsigned long addr,
339 unsigned long end, struct mm_walk *walk)
340{
341 struct stack_stats *ss = walk->private;
342 struct vm_area_struct *vma = ss->vma;
343 pte_t *pte, ptent;
344 spinlock_t *ptl;
345 int ret = 0;
346
347 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
348 for (; addr != end; pte++, addr += PAGE_SIZE) {
349 ptent = *pte;
350
351#ifdef CONFIG_STACK_GROWSUP
352 if (pte_present(ptent) || is_swap_pte(ptent))
353 ss->usage = addr - ss->startpage + PAGE_SIZE;
354#else
355 if (pte_present(ptent) || is_swap_pte(ptent)) {
356 ss->usage = ss->startpage - addr + PAGE_SIZE;
357 pte++;
358 ret = 1;
359 break;
360 }
361#endif
362 }
363 pte_unmap_unlock(pte - 1, ptl);
364 cond_resched();
365 return ret;
366}
367
368static inline unsigned long get_stack_usage_in_bytes(struct vm_area_struct *vma,
369 struct task_struct *task)
370{
371 struct stack_stats ss;
372 struct mm_walk stack_walk = {
373 .pmd_entry = stack_usage_pte_range,
374 .mm = vma->vm_mm,
375 .private = &ss,
376 };
377
378 if (!vma->vm_mm || is_vm_hugetlb_page(vma))
379 return 0;
380
381 ss.vma = vma;
382 ss.startpage = task->stack_start & PAGE_MASK;
383 ss.usage = 0;
384
385#ifdef CONFIG_STACK_GROWSUP
386 walk_page_range(KSTK_ESP(task) & PAGE_MASK, vma->vm_end,
387 &stack_walk);
388#else
389 walk_page_range(vma->vm_start, (KSTK_ESP(task) & PAGE_MASK) + PAGE_SIZE,
390 &stack_walk);
391#endif
392 return ss.usage;
393}
394
395static inline void task_show_stack_usage(struct seq_file *m,
396 struct task_struct *task)
397{
398 struct vm_area_struct *vma;
399 struct mm_struct *mm = get_task_mm(task);
400
401 if (mm) {
402 down_read(&mm->mmap_sem);
403 vma = find_vma(mm, task->stack_start);
404 if (vma)
405 seq_printf(m, "Stack usage:\t%lu kB\n",
406 get_stack_usage_in_bytes(vma, task) >> 10);
407
408 up_read(&mm->mmap_sem);
409 mmput(mm);
410 }
411}
412#else
413static void task_show_stack_usage(struct seq_file *m, struct task_struct *task)
414{
415}
416#endif /* CONFIG_MMU */
417
418static void task_cpus_allowed(struct seq_file *m, struct task_struct *task) 330static void task_cpus_allowed(struct seq_file *m, struct task_struct *task)
419{ 331{
420 seq_printf(m, "Cpus_allowed:\t"); 332 seq_printf(m, "Cpus_allowed:\t");
@@ -445,7 +357,6 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
445 task_show_regs(m, task); 357 task_show_regs(m, task);
446#endif 358#endif
447 task_context_switch_counts(m, task); 359 task_context_switch_counts(m, task);
448 task_show_stack_usage(m, task);
449 return 0; 360 return 0;
450} 361}
451 362
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 18d5cc62d8ed..e42bbd843ed1 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1419,7 +1419,6 @@ static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
1419 goto out; 1419 goto out;
1420 1420
1421 error = PROC_I(inode)->op.proc_get_link(inode, &nd->path); 1421 error = PROC_I(inode)->op.proc_get_link(inode, &nd->path);
1422 nd->last_type = LAST_BIND;
1423out: 1422out:
1424 return ERR_PTR(error); 1423 return ERR_PTR(error);
1425} 1424}
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 47c03f4336b8..f277c4a111cb 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -361,12 +361,11 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
361 if (!pte_present(ptent)) 361 if (!pte_present(ptent))
362 continue; 362 continue;
363 363
364 mss->resident += PAGE_SIZE;
365
366 page = vm_normal_page(vma, addr, ptent); 364 page = vm_normal_page(vma, addr, ptent);
367 if (!page) 365 if (!page)
368 continue; 366 continue;
369 367
368 mss->resident += PAGE_SIZE;
370 /* Accumulate the size in pages that have been accessed. */ 369 /* Accumulate the size in pages that have been accessed. */
371 if (pte_young(ptent) || PageReferenced(page)) 370 if (pte_young(ptent) || PageReferenced(page))
372 mss->referenced += PAGE_SIZE; 371 mss->referenced += PAGE_SIZE;
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index dea86abdf2e7..3fc62b097bed 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1377,6 +1377,9 @@ static void inode_sub_rsv_space(struct inode *inode, qsize_t number)
1377static qsize_t inode_get_rsv_space(struct inode *inode) 1377static qsize_t inode_get_rsv_space(struct inode *inode)
1378{ 1378{
1379 qsize_t ret; 1379 qsize_t ret;
1380
1381 if (!inode->i_sb->dq_op->get_reserved_space)
1382 return 0;
1380 spin_lock(&inode->i_lock); 1383 spin_lock(&inode->i_lock);
1381 ret = *inode_reserved_space(inode); 1384 ret = *inode_reserved_space(inode);
1382 spin_unlock(&inode->i_lock); 1385 spin_unlock(&inode->i_lock);
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 2efc57173fd7..1739a4aba25f 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -123,30 +123,6 @@ add_error:
123 123
124/*****************************************************************************/ 124/*****************************************************************************/
125/* 125/*
126 * check that file shrinkage doesn't leave any VMAs dangling in midair
127 */
128static int ramfs_nommu_check_mappings(struct inode *inode,
129 size_t newsize, size_t size)
130{
131 struct vm_area_struct *vma;
132 struct prio_tree_iter iter;
133
134 /* search for VMAs that fall within the dead zone */
135 vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap,
136 newsize >> PAGE_SHIFT,
137 (size + PAGE_SIZE - 1) >> PAGE_SHIFT
138 ) {
139 /* found one - only interested if it's shared out of the page
140 * cache */
141 if (vma->vm_flags & VM_SHARED)
142 return -ETXTBSY; /* not quite true, but near enough */
143 }
144
145 return 0;
146}
147
148/*****************************************************************************/
149/*
150 * 126 *
151 */ 127 */
152static int ramfs_nommu_resize(struct inode *inode, loff_t newsize, loff_t size) 128static int ramfs_nommu_resize(struct inode *inode, loff_t newsize, loff_t size)
@@ -164,7 +140,7 @@ static int ramfs_nommu_resize(struct inode *inode, loff_t newsize, loff_t size)
164 140
165 /* check that a decrease in size doesn't cut off any shared mappings */ 141 /* check that a decrease in size doesn't cut off any shared mappings */
166 if (newsize < size) { 142 if (newsize < size) {
167 ret = ramfs_nommu_check_mappings(inode, newsize, size); 143 ret = nommu_shrink_inode_mappings(inode, size, newsize);
168 if (ret < 0) 144 if (ret < 0)
169 return ret; 145 return ret;
170 } 146 }
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index 685495707181..65c872761177 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -1277,7 +1277,10 @@ int reiserfs_init_bitmap_cache(struct super_block *sb)
1277 struct reiserfs_bitmap_info *bitmap; 1277 struct reiserfs_bitmap_info *bitmap;
1278 unsigned int bmap_nr = reiserfs_bmap_count(sb); 1278 unsigned int bmap_nr = reiserfs_bmap_count(sb);
1279 1279
1280 /* Avoid lock recursion in fault case */
1281 reiserfs_write_unlock(sb);
1280 bitmap = vmalloc(sizeof(*bitmap) * bmap_nr); 1282 bitmap = vmalloc(sizeof(*bitmap) * bmap_nr);
1283 reiserfs_write_lock(sb);
1281 if (bitmap == NULL) 1284 if (bitmap == NULL)
1282 return -ENOMEM; 1285 return -ENOMEM;
1283 1286
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 290ae38fca8a..9087b10209e6 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -31,11 +31,12 @@ void reiserfs_delete_inode(struct inode *inode)
31 JOURNAL_PER_BALANCE_CNT * 2 + 31 JOURNAL_PER_BALANCE_CNT * 2 +
32 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb); 32 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb);
33 struct reiserfs_transaction_handle th; 33 struct reiserfs_transaction_handle th;
34 int depth;
34 int err; 35 int err;
35 36
36 truncate_inode_pages(&inode->i_data, 0); 37 truncate_inode_pages(&inode->i_data, 0);
37 38
38 reiserfs_write_lock(inode->i_sb); 39 depth = reiserfs_write_lock_once(inode->i_sb);
39 40
40 /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */ 41 /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */
41 if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */ 42 if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */
@@ -74,7 +75,7 @@ void reiserfs_delete_inode(struct inode *inode)
74 out: 75 out:
75 clear_inode(inode); /* note this must go after the journal_end to prevent deadlock */ 76 clear_inode(inode); /* note this must go after the journal_end to prevent deadlock */
76 inode->i_blocks = 0; 77 inode->i_blocks = 0;
77 reiserfs_write_unlock(inode->i_sb); 78 reiserfs_write_unlock_once(inode->i_sb, depth);
78} 79}
79 80
80static void _make_cpu_key(struct cpu_key *key, int version, __u32 dirid, 81static void _make_cpu_key(struct cpu_key *key, int version, __u32 dirid,
@@ -3061,13 +3062,14 @@ static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb,
3061int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) 3062int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
3062{ 3063{
3063 struct inode *inode = dentry->d_inode; 3064 struct inode *inode = dentry->d_inode;
3064 int error;
3065 unsigned int ia_valid; 3065 unsigned int ia_valid;
3066 int depth;
3067 int error;
3066 3068
3067 /* must be turned off for recursive notify_change calls */ 3069 /* must be turned off for recursive notify_change calls */
3068 ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID); 3070 ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID);
3069 3071
3070 reiserfs_write_lock(inode->i_sb); 3072 depth = reiserfs_write_lock_once(inode->i_sb);
3071 if (attr->ia_valid & ATTR_SIZE) { 3073 if (attr->ia_valid & ATTR_SIZE) {
3072 /* version 2 items will be caught by the s_maxbytes check 3074 /* version 2 items will be caught by the s_maxbytes check
3073 ** done for us in vmtruncate 3075 ** done for us in vmtruncate
@@ -3148,8 +3150,17 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
3148 journal_end(&th, inode->i_sb, jbegin_count); 3150 journal_end(&th, inode->i_sb, jbegin_count);
3149 } 3151 }
3150 } 3152 }
3151 if (!error) 3153 if (!error) {
3154 /*
3155 * Relax the lock here, as it might truncate the
3156 * inode pages and wait for inode pages locks.
3157 * To release such page lock, the owner needs the
3158 * reiserfs lock
3159 */
3160 reiserfs_write_unlock_once(inode->i_sb, depth);
3152 error = inode_setattr(inode, attr); 3161 error = inode_setattr(inode, attr);
3162 depth = reiserfs_write_lock_once(inode->i_sb);
3163 }
3153 } 3164 }
3154 3165
3155 if (!error && reiserfs_posixacl(inode->i_sb)) { 3166 if (!error && reiserfs_posixacl(inode->i_sb)) {
@@ -3158,7 +3169,8 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
3158 } 3169 }
3159 3170
3160 out: 3171 out:
3161 reiserfs_write_unlock(inode->i_sb); 3172 reiserfs_write_unlock_once(inode->i_sb, depth);
3173
3162 return error; 3174 return error;
3163} 3175}
3164 3176
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index ace77451ceb1..f53505de0712 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -104,9 +104,10 @@ setflags_out:
104 err = put_user(inode->i_generation, (int __user *)arg); 104 err = put_user(inode->i_generation, (int __user *)arg);
105 break; 105 break;
106 case REISERFS_IOC_SETVERSION: 106 case REISERFS_IOC_SETVERSION:
107 if (!is_owner_or_cap(inode)) 107 if (!is_owner_or_cap(inode)) {
108 err = -EPERM; 108 err = -EPERM;
109 break; 109 break;
110 }
110 err = mnt_want_write(filp->f_path.mnt); 111 err = mnt_want_write(filp->f_path.mnt);
111 if (err) 112 if (err)
112 break; 113 break;
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 2f8a7e7b8dab..ba98546fabbd 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2009,10 +2009,11 @@ static int do_journal_release(struct reiserfs_transaction_handle *th,
2009 destroy_workqueue(commit_wq); 2009 destroy_workqueue(commit_wq);
2010 commit_wq = NULL; 2010 commit_wq = NULL;
2011 } 2011 }
2012 reiserfs_write_lock(sb);
2013 2012
2014 free_journal_ram(sb); 2013 free_journal_ram(sb);
2015 2014
2015 reiserfs_write_lock(sb);
2016
2016 return 0; 2017 return 0;
2017} 2018}
2018 2019
@@ -2758,11 +2759,18 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
2758 struct reiserfs_journal *journal; 2759 struct reiserfs_journal *journal;
2759 struct reiserfs_journal_list *jl; 2760 struct reiserfs_journal_list *jl;
2760 char b[BDEVNAME_SIZE]; 2761 char b[BDEVNAME_SIZE];
2762 int ret;
2761 2763
2764 /*
2765 * Unlock here to avoid various RECLAIM-FS-ON <-> IN-RECLAIM-FS
2766 * dependency inversion warnings.
2767 */
2768 reiserfs_write_unlock(sb);
2762 journal = SB_JOURNAL(sb) = vmalloc(sizeof(struct reiserfs_journal)); 2769 journal = SB_JOURNAL(sb) = vmalloc(sizeof(struct reiserfs_journal));
2763 if (!journal) { 2770 if (!journal) {
2764 reiserfs_warning(sb, "journal-1256", 2771 reiserfs_warning(sb, "journal-1256",
2765 "unable to get memory for journal structure"); 2772 "unable to get memory for journal structure");
2773 reiserfs_write_lock(sb);
2766 return 1; 2774 return 1;
2767 } 2775 }
2768 memset(journal, 0, sizeof(struct reiserfs_journal)); 2776 memset(journal, 0, sizeof(struct reiserfs_journal));
@@ -2771,10 +2779,12 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
2771 INIT_LIST_HEAD(&journal->j_working_list); 2779 INIT_LIST_HEAD(&journal->j_working_list);
2772 INIT_LIST_HEAD(&journal->j_journal_list); 2780 INIT_LIST_HEAD(&journal->j_journal_list);
2773 journal->j_persistent_trans = 0; 2781 journal->j_persistent_trans = 0;
2774 if (reiserfs_allocate_list_bitmaps(sb, 2782 ret = reiserfs_allocate_list_bitmaps(sb, journal->j_list_bitmap,
2775 journal->j_list_bitmap, 2783 reiserfs_bmap_count(sb));
2776 reiserfs_bmap_count(sb))) 2784 reiserfs_write_lock(sb);
2785 if (ret)
2777 goto free_and_return; 2786 goto free_and_return;
2787
2778 allocate_bitmap_nodes(sb); 2788 allocate_bitmap_nodes(sb);
2779 2789
2780 /* reserved for journal area support */ 2790 /* reserved for journal area support */
@@ -2903,7 +2913,9 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
2903 journal->j_mount_id = 10; 2913 journal->j_mount_id = 10;
2904 journal->j_state = 0; 2914 journal->j_state = 0;
2905 atomic_set(&(journal->j_jlock), 0); 2915 atomic_set(&(journal->j_jlock), 0);
2916 reiserfs_write_unlock(sb);
2906 journal->j_cnode_free_list = allocate_cnodes(num_cnodes); 2917 journal->j_cnode_free_list = allocate_cnodes(num_cnodes);
2918 reiserfs_write_lock(sb);
2907 journal->j_cnode_free_orig = journal->j_cnode_free_list; 2919 journal->j_cnode_free_orig = journal->j_cnode_free_list;
2908 journal->j_cnode_free = journal->j_cnode_free_list ? num_cnodes : 0; 2920 journal->j_cnode_free = journal->j_cnode_free_list ? num_cnodes : 0;
2909 journal->j_cnode_used = 0; 2921 journal->j_cnode_used = 0;
diff --git a/fs/reiserfs/lock.c b/fs/reiserfs/lock.c
index ee2cfc0fd8a7..b87aa2c1afc1 100644
--- a/fs/reiserfs/lock.c
+++ b/fs/reiserfs/lock.c
@@ -86,3 +86,12 @@ void reiserfs_check_lock_depth(struct super_block *sb, char *caller)
86 reiserfs_panic(sb, "%s called without kernel lock held %d", 86 reiserfs_panic(sb, "%s called without kernel lock held %d",
87 caller); 87 caller);
88} 88}
89
90#ifdef CONFIG_REISERFS_CHECK
91void reiserfs_lock_check_recursive(struct super_block *sb)
92{
93 struct reiserfs_sb_info *sb_i = REISERFS_SB(sb);
94
95 WARN_ONCE((sb_i->lock_depth > 0), "Unwanted recursive reiserfs lock!\n");
96}
97#endif
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index e296ff72a6cc..9d4dcf0b07cb 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -921,6 +921,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
921 struct reiserfs_transaction_handle th; 921 struct reiserfs_transaction_handle th;
922 int jbegin_count; 922 int jbegin_count;
923 unsigned long savelink; 923 unsigned long savelink;
924 int depth;
924 925
925 inode = dentry->d_inode; 926 inode = dentry->d_inode;
926 927
@@ -932,7 +933,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
932 JOURNAL_PER_BALANCE_CNT * 2 + 2 + 933 JOURNAL_PER_BALANCE_CNT * 2 + 2 +
933 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb); 934 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
934 935
935 reiserfs_write_lock(dir->i_sb); 936 depth = reiserfs_write_lock_once(dir->i_sb);
936 retval = journal_begin(&th, dir->i_sb, jbegin_count); 937 retval = journal_begin(&th, dir->i_sb, jbegin_count);
937 if (retval) 938 if (retval)
938 goto out_unlink; 939 goto out_unlink;
@@ -993,7 +994,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
993 994
994 retval = journal_end(&th, dir->i_sb, jbegin_count); 995 retval = journal_end(&th, dir->i_sb, jbegin_count);
995 reiserfs_check_path(&path); 996 reiserfs_check_path(&path);
996 reiserfs_write_unlock(dir->i_sb); 997 reiserfs_write_unlock_once(dir->i_sb, depth);
997 return retval; 998 return retval;
998 999
999 end_unlink: 1000 end_unlink:
@@ -1003,7 +1004,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
1003 if (err) 1004 if (err)
1004 retval = err; 1005 retval = err;
1005 out_unlink: 1006 out_unlink:
1006 reiserfs_write_unlock(dir->i_sb); 1007 reiserfs_write_unlock_once(dir->i_sb, depth);
1007 return retval; 1008 return retval;
1008} 1009}
1009 1010
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 8c7033a8b67e..81f09fab8ae4 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -83,7 +83,8 @@ static int xattr_unlink(struct inode *dir, struct dentry *dentry)
83 BUG_ON(!mutex_is_locked(&dir->i_mutex)); 83 BUG_ON(!mutex_is_locked(&dir->i_mutex));
84 vfs_dq_init(dir); 84 vfs_dq_init(dir);
85 85
86 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD); 86 reiserfs_mutex_lock_nested_safe(&dentry->d_inode->i_mutex,
87 I_MUTEX_CHILD, dir->i_sb);
87 error = dir->i_op->unlink(dir, dentry); 88 error = dir->i_op->unlink(dir, dentry);
88 mutex_unlock(&dentry->d_inode->i_mutex); 89 mutex_unlock(&dentry->d_inode->i_mutex);
89 90
@@ -98,7 +99,8 @@ static int xattr_rmdir(struct inode *dir, struct dentry *dentry)
98 BUG_ON(!mutex_is_locked(&dir->i_mutex)); 99 BUG_ON(!mutex_is_locked(&dir->i_mutex));
99 vfs_dq_init(dir); 100 vfs_dq_init(dir);
100 101
101 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD); 102 reiserfs_mutex_lock_nested_safe(&dentry->d_inode->i_mutex,
103 I_MUTEX_CHILD, dir->i_sb);
102 dentry_unhash(dentry); 104 dentry_unhash(dentry);
103 error = dir->i_op->rmdir(dir, dentry); 105 error = dir->i_op->rmdir(dir, dentry);
104 if (!error) 106 if (!error)
@@ -235,16 +237,22 @@ static int reiserfs_for_each_xattr(struct inode *inode,
235 if (IS_PRIVATE(inode) || get_inode_sd_version(inode) == STAT_DATA_V1) 237 if (IS_PRIVATE(inode) || get_inode_sd_version(inode) == STAT_DATA_V1)
236 return 0; 238 return 0;
237 239
240 reiserfs_write_unlock(inode->i_sb);
238 dir = open_xa_dir(inode, XATTR_REPLACE); 241 dir = open_xa_dir(inode, XATTR_REPLACE);
239 if (IS_ERR(dir)) { 242 if (IS_ERR(dir)) {
240 err = PTR_ERR(dir); 243 err = PTR_ERR(dir);
244 reiserfs_write_lock(inode->i_sb);
241 goto out; 245 goto out;
242 } else if (!dir->d_inode) { 246 } else if (!dir->d_inode) {
243 err = 0; 247 err = 0;
248 reiserfs_write_lock(inode->i_sb);
244 goto out_dir; 249 goto out_dir;
245 } 250 }
246 251
247 mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_XATTR); 252 mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_XATTR);
253
254 reiserfs_write_lock(inode->i_sb);
255
248 buf.xadir = dir; 256 buf.xadir = dir;
249 err = reiserfs_readdir_dentry(dir, &buf, fill_with_dentries, &pos); 257 err = reiserfs_readdir_dentry(dir, &buf, fill_with_dentries, &pos);
250 while ((err == 0 || err == -ENOSPC) && buf.count) { 258 while ((err == 0 || err == -ENOSPC) && buf.count) {
@@ -283,8 +291,9 @@ static int reiserfs_for_each_xattr(struct inode *inode,
283 err = journal_begin(&th, inode->i_sb, blocks); 291 err = journal_begin(&th, inode->i_sb, blocks);
284 if (!err) { 292 if (!err) {
285 int jerror; 293 int jerror;
286 mutex_lock_nested(&dir->d_parent->d_inode->i_mutex, 294 reiserfs_mutex_lock_nested_safe(
287 I_MUTEX_XATTR); 295 &dir->d_parent->d_inode->i_mutex,
296 I_MUTEX_XATTR, inode->i_sb);
288 err = action(dir, data); 297 err = action(dir, data);
289 jerror = journal_end(&th, inode->i_sb, blocks); 298 jerror = journal_end(&th, inode->i_sb, blocks);
290 mutex_unlock(&dir->d_parent->d_inode->i_mutex); 299 mutex_unlock(&dir->d_parent->d_inode->i_mutex);
@@ -443,7 +452,9 @@ static int lookup_and_delete_xattr(struct inode *inode, const char *name)
443 } 452 }
444 453
445 if (dentry->d_inode) { 454 if (dentry->d_inode) {
455 reiserfs_write_lock(inode->i_sb);
446 err = xattr_unlink(xadir->d_inode, dentry); 456 err = xattr_unlink(xadir->d_inode, dentry);
457 reiserfs_write_unlock(inode->i_sb);
447 update_ctime(inode); 458 update_ctime(inode);
448 } 459 }
449 460
@@ -477,15 +488,24 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
477 if (get_inode_sd_version(inode) == STAT_DATA_V1) 488 if (get_inode_sd_version(inode) == STAT_DATA_V1)
478 return -EOPNOTSUPP; 489 return -EOPNOTSUPP;
479 490
480 if (!buffer) 491 reiserfs_write_unlock(inode->i_sb);
481 return lookup_and_delete_xattr(inode, name); 492
493 if (!buffer) {
494 err = lookup_and_delete_xattr(inode, name);
495 reiserfs_write_lock(inode->i_sb);
496 return err;
497 }
482 498
483 dentry = xattr_lookup(inode, name, flags); 499 dentry = xattr_lookup(inode, name, flags);
484 if (IS_ERR(dentry)) 500 if (IS_ERR(dentry)) {
501 reiserfs_write_lock(inode->i_sb);
485 return PTR_ERR(dentry); 502 return PTR_ERR(dentry);
503 }
486 504
487 down_write(&REISERFS_I(inode)->i_xattr_sem); 505 down_write(&REISERFS_I(inode)->i_xattr_sem);
488 506
507 reiserfs_write_lock(inode->i_sb);
508
489 xahash = xattr_hash(buffer, buffer_size); 509 xahash = xattr_hash(buffer, buffer_size);
490 while (buffer_pos < buffer_size || buffer_pos == 0) { 510 while (buffer_pos < buffer_size || buffer_pos == 0) {
491 size_t chunk; 511 size_t chunk;
@@ -540,8 +560,12 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
540 .ia_size = buffer_size, 560 .ia_size = buffer_size,
541 .ia_valid = ATTR_SIZE | ATTR_CTIME, 561 .ia_valid = ATTR_SIZE | ATTR_CTIME,
542 }; 562 };
563
564 reiserfs_write_unlock(inode->i_sb);
543 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_XATTR); 565 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_XATTR);
544 down_write(&dentry->d_inode->i_alloc_sem); 566 down_write(&dentry->d_inode->i_alloc_sem);
567 reiserfs_write_lock(inode->i_sb);
568
545 err = reiserfs_setattr(dentry, &newattrs); 569 err = reiserfs_setattr(dentry, &newattrs);
546 up_write(&dentry->d_inode->i_alloc_sem); 570 up_write(&dentry->d_inode->i_alloc_sem);
547 mutex_unlock(&dentry->d_inode->i_mutex); 571 mutex_unlock(&dentry->d_inode->i_mutex);
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index cc32e6ada67b..dd20a7883f0f 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -455,7 +455,9 @@ int reiserfs_acl_chmod(struct inode *inode)
455 return 0; 455 return 0;
456 } 456 }
457 457
458 reiserfs_write_unlock(inode->i_sb);
458 acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS); 459 acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS);
460 reiserfs_write_lock(inode->i_sb);
459 if (!acl) 461 if (!acl)
460 return 0; 462 return 0;
461 if (IS_ERR(acl)) 463 if (IS_ERR(acl))
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index c117fa80d1e9..42d213546894 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -544,6 +544,7 @@ error:
544error_rsb_inval: 544error_rsb_inval:
545 ret = -EINVAL; 545 ret = -EINVAL;
546error_rsb: 546error_rsb:
547 kfree(rsb);
547 return ret; 548 return ret;
548} 549}
549 550
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index f05f2303a8b8..699f371b9f12 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -106,8 +106,10 @@ static struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd)
106 return NULL; 106 return NULL;
107 107
108 t = atomic_cmpxchg(&sd->s_active, v, v + 1); 108 t = atomic_cmpxchg(&sd->s_active, v, v + 1);
109 if (likely(t == v)) 109 if (likely(t == v)) {
110 rwsem_acquire_read(&sd->dep_map, 0, 1, _RET_IP_);
110 return sd; 111 return sd;
112 }
111 if (t < 0) 113 if (t < 0)
112 return NULL; 114 return NULL;
113 115
@@ -130,6 +132,7 @@ static void sysfs_put_active(struct sysfs_dirent *sd)
130 if (unlikely(!sd)) 132 if (unlikely(!sd))
131 return; 133 return;
132 134
135 rwsem_release(&sd->dep_map, 1, _RET_IP_);
133 v = atomic_dec_return(&sd->s_active); 136 v = atomic_dec_return(&sd->s_active);
134 if (likely(v != SD_DEACTIVATED_BIAS)) 137 if (likely(v != SD_DEACTIVATED_BIAS))
135 return; 138 return;
@@ -194,15 +197,21 @@ static void sysfs_deactivate(struct sysfs_dirent *sd)
194 BUG_ON(sd->s_sibling || !(sd->s_flags & SYSFS_FLAG_REMOVED)); 197 BUG_ON(sd->s_sibling || !(sd->s_flags & SYSFS_FLAG_REMOVED));
195 sd->s_sibling = (void *)&wait; 198 sd->s_sibling = (void *)&wait;
196 199
200 rwsem_acquire(&sd->dep_map, 0, 0, _RET_IP_);
197 /* atomic_add_return() is a mb(), put_active() will always see 201 /* atomic_add_return() is a mb(), put_active() will always see
198 * the updated sd->s_sibling. 202 * the updated sd->s_sibling.
199 */ 203 */
200 v = atomic_add_return(SD_DEACTIVATED_BIAS, &sd->s_active); 204 v = atomic_add_return(SD_DEACTIVATED_BIAS, &sd->s_active);
201 205
202 if (v != SD_DEACTIVATED_BIAS) 206 if (v != SD_DEACTIVATED_BIAS) {
207 lock_contended(&sd->dep_map, _RET_IP_);
203 wait_for_completion(&wait); 208 wait_for_completion(&wait);
209 }
204 210
205 sd->s_sibling = NULL; 211 sd->s_sibling = NULL;
212
213 lock_acquired(&sd->dep_map, _RET_IP_);
214 rwsem_release(&sd->dep_map, 1, _RET_IP_);
206} 215}
207 216
208static int sysfs_alloc_ino(ino_t *pino) 217static int sysfs_alloc_ino(ino_t *pino)
@@ -345,6 +354,7 @@ struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type)
345 354
346 atomic_set(&sd->s_count, 1); 355 atomic_set(&sd->s_count, 1);
347 atomic_set(&sd->s_active, 0); 356 atomic_set(&sd->s_active, 0);
357 sysfs_dirent_init_lockdep(sd);
348 358
349 sd->s_name = name; 359 sd->s_name = name;
350 sd->s_mode = mode; 360 sd->s_mode = mode;
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index ca52e7b9d8f8..cdd9377a6e06 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -8,6 +8,7 @@
8 * This file is released under the GPLv2. 8 * This file is released under the GPLv2.
9 */ 9 */
10 10
11#include <linux/lockdep.h>
11#include <linux/fs.h> 12#include <linux/fs.h>
12 13
13struct sysfs_open_dirent; 14struct sysfs_open_dirent;
@@ -50,6 +51,9 @@ struct sysfs_inode_attrs {
50struct sysfs_dirent { 51struct sysfs_dirent {
51 atomic_t s_count; 52 atomic_t s_count;
52 atomic_t s_active; 53 atomic_t s_active;
54#ifdef CONFIG_DEBUG_LOCK_ALLOC
55 struct lockdep_map dep_map;
56#endif
53 struct sysfs_dirent *s_parent; 57 struct sysfs_dirent *s_parent;
54 struct sysfs_dirent *s_sibling; 58 struct sysfs_dirent *s_sibling;
55 const char *s_name; 59 const char *s_name;
@@ -84,6 +88,17 @@ static inline unsigned int sysfs_type(struct sysfs_dirent *sd)
84 return sd->s_flags & SYSFS_TYPE_MASK; 88 return sd->s_flags & SYSFS_TYPE_MASK;
85} 89}
86 90
91#ifdef CONFIG_DEBUG_LOCK_ALLOC
92#define sysfs_dirent_init_lockdep(sd) \
93do { \
94 static struct lock_class_key __key; \
95 \
96 lockdep_init_map(&sd->dep_map, "s_active", &__key, 0); \
97} while(0)
98#else
99#define sysfs_dirent_init_lockdep(sd) do {} while(0)
100#endif
101
87/* 102/*
88 * Context structure to be used while adding/removing nodes. 103 * Context structure to be used while adding/removing nodes.
89 */ 104 */
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c
index 618c2701d3a7..e5a3d8e96bb7 100644
--- a/fs/ubifs/gc.c
+++ b/fs/ubifs/gc.c
@@ -54,6 +54,7 @@
54 */ 54 */
55 55
56#include <linux/pagemap.h> 56#include <linux/pagemap.h>
57#include <linux/list_sort.h>
57#include "ubifs.h" 58#include "ubifs.h"
58 59
59/* 60/*
@@ -108,101 +109,6 @@ static int switch_gc_head(struct ubifs_info *c)
108} 109}
109 110
110/** 111/**
111 * list_sort - sort a list.
112 * @priv: private data, passed to @cmp
113 * @head: the list to sort
114 * @cmp: the elements comparison function
115 *
116 * This function has been implemented by Mark J Roberts <mjr@znex.org>. It
117 * implements "merge sort" which has O(nlog(n)) complexity. The list is sorted
118 * in ascending order.
119 *
120 * The comparison function @cmp is supposed to return a negative value if @a is
121 * than @b, and a positive value if @a is greater than @b. If @a and @b are
122 * equivalent, then it does not matter what this function returns.
123 */
124static void list_sort(void *priv, struct list_head *head,
125 int (*cmp)(void *priv, struct list_head *a,
126 struct list_head *b))
127{
128 struct list_head *p, *q, *e, *list, *tail, *oldhead;
129 int insize, nmerges, psize, qsize, i;
130
131 if (list_empty(head))
132 return;
133
134 list = head->next;
135 list_del(head);
136 insize = 1;
137 for (;;) {
138 p = oldhead = list;
139 list = tail = NULL;
140 nmerges = 0;
141
142 while (p) {
143 nmerges++;
144 q = p;
145 psize = 0;
146 for (i = 0; i < insize; i++) {
147 psize++;
148 q = q->next == oldhead ? NULL : q->next;
149 if (!q)
150 break;
151 }
152
153 qsize = insize;
154 while (psize > 0 || (qsize > 0 && q)) {
155 if (!psize) {
156 e = q;
157 q = q->next;
158 qsize--;
159 if (q == oldhead)
160 q = NULL;
161 } else if (!qsize || !q) {
162 e = p;
163 p = p->next;
164 psize--;
165 if (p == oldhead)
166 p = NULL;
167 } else if (cmp(priv, p, q) <= 0) {
168 e = p;
169 p = p->next;
170 psize--;
171 if (p == oldhead)
172 p = NULL;
173 } else {
174 e = q;
175 q = q->next;
176 qsize--;
177 if (q == oldhead)
178 q = NULL;
179 }
180 if (tail)
181 tail->next = e;
182 else
183 list = e;
184 e->prev = tail;
185 tail = e;
186 }
187 p = q;
188 }
189
190 tail->next = list;
191 list->prev = tail;
192
193 if (nmerges <= 1)
194 break;
195
196 insize *= 2;
197 }
198
199 head->next = list;
200 head->prev = list->prev;
201 list->prev->next = head;
202 list->prev = head;
203}
204
205/**
206 * data_nodes_cmp - compare 2 data nodes. 112 * data_nodes_cmp - compare 2 data nodes.
207 * @priv: UBIFS file-system description object 113 * @priv: UBIFS file-system description object
208 * @a: first data node 114 * @a: first data node
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c
index 2512125dfa7c..883ca5ab8af5 100644
--- a/fs/xfs/linux-2.6/xfs_acl.c
+++ b/fs/xfs/linux-2.6/xfs_acl.c
@@ -251,8 +251,9 @@ xfs_set_mode(struct inode *inode, mode_t mode)
251 if (mode != inode->i_mode) { 251 if (mode != inode->i_mode) {
252 struct iattr iattr; 252 struct iattr iattr;
253 253
254 iattr.ia_valid = ATTR_MODE; 254 iattr.ia_valid = ATTR_MODE | ATTR_CTIME;
255 iattr.ia_mode = mode; 255 iattr.ia_mode = mode;
256 iattr.ia_ctime = current_fs_time(inode->i_sb);
256 257
257 error = -xfs_setattr(XFS_I(inode), &iattr, XFS_ATTR_NOACL); 258 error = -xfs_setattr(XFS_I(inode), &iattr, XFS_ATTR_NOACL);
258 } 259 }
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 09783cc444ac..77414db10dc2 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -954,16 +954,14 @@ xfs_fs_destroy_inode(
954 ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM)); 954 ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM));
955 955
956 /* 956 /*
957 * If we have nothing to flush with this inode then complete the 957 * We always use background reclaim here because even if the
958 * teardown now, otherwise delay the flush operation. 958 * inode is clean, it still may be under IO and hence we have
959 * to take the flush lock. The background reclaim path handles
960 * this more efficiently than we can here, so simply let background
961 * reclaim tear down all inodes.
959 */ 962 */
960 if (!xfs_inode_clean(ip)) {
961 xfs_inode_set_reclaim_tag(ip);
962 return;
963 }
964
965out_reclaim: 963out_reclaim:
966 xfs_ireclaim(ip); 964 xfs_inode_set_reclaim_tag(ip);
967} 965}
968 966
969/* 967/*
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 6fed97a8cd3e..1f5e4bb5e970 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -65,7 +65,6 @@ xfs_inode_ag_lookup(
65 * as the tree is sparse and a gang lookup walks to find 65 * as the tree is sparse and a gang lookup walks to find
66 * the number of objects requested. 66 * the number of objects requested.
67 */ 67 */
68 read_lock(&pag->pag_ici_lock);
69 if (tag == XFS_ICI_NO_TAG) { 68 if (tag == XFS_ICI_NO_TAG) {
70 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, 69 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
71 (void **)&ip, *first_index, 1); 70 (void **)&ip, *first_index, 1);
@@ -74,7 +73,7 @@ xfs_inode_ag_lookup(
74 (void **)&ip, *first_index, 1, tag); 73 (void **)&ip, *first_index, 1, tag);
75 } 74 }
76 if (!nr_found) 75 if (!nr_found)
77 goto unlock; 76 return NULL;
78 77
79 /* 78 /*
80 * Update the index for the next lookup. Catch overflows 79 * Update the index for the next lookup. Catch overflows
@@ -84,13 +83,8 @@ xfs_inode_ag_lookup(
84 */ 83 */
85 *first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); 84 *first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
86 if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) 85 if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
87 goto unlock; 86 return NULL;
88
89 return ip; 87 return ip;
90
91unlock:
92 read_unlock(&pag->pag_ici_lock);
93 return NULL;
94} 88}
95 89
96STATIC int 90STATIC int
@@ -100,7 +94,8 @@ xfs_inode_ag_walk(
100 int (*execute)(struct xfs_inode *ip, 94 int (*execute)(struct xfs_inode *ip,
101 struct xfs_perag *pag, int flags), 95 struct xfs_perag *pag, int flags),
102 int flags, 96 int flags,
103 int tag) 97 int tag,
98 int exclusive)
104{ 99{
105 struct xfs_perag *pag = &mp->m_perag[ag]; 100 struct xfs_perag *pag = &mp->m_perag[ag];
106 uint32_t first_index; 101 uint32_t first_index;
@@ -114,10 +109,20 @@ restart:
114 int error = 0; 109 int error = 0;
115 xfs_inode_t *ip; 110 xfs_inode_t *ip;
116 111
112 if (exclusive)
113 write_lock(&pag->pag_ici_lock);
114 else
115 read_lock(&pag->pag_ici_lock);
117 ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag); 116 ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag);
118 if (!ip) 117 if (!ip) {
118 if (exclusive)
119 write_unlock(&pag->pag_ici_lock);
120 else
121 read_unlock(&pag->pag_ici_lock);
119 break; 122 break;
123 }
120 124
125 /* execute releases pag->pag_ici_lock */
121 error = execute(ip, pag, flags); 126 error = execute(ip, pag, flags);
122 if (error == EAGAIN) { 127 if (error == EAGAIN) {
123 skipped++; 128 skipped++;
@@ -125,9 +130,8 @@ restart:
125 } 130 }
126 if (error) 131 if (error)
127 last_error = error; 132 last_error = error;
128 /* 133
129 * bail out if the filesystem is corrupted. 134 /* bail out if the filesystem is corrupted. */
130 */
131 if (error == EFSCORRUPTED) 135 if (error == EFSCORRUPTED)
132 break; 136 break;
133 137
@@ -148,7 +152,8 @@ xfs_inode_ag_iterator(
148 int (*execute)(struct xfs_inode *ip, 152 int (*execute)(struct xfs_inode *ip,
149 struct xfs_perag *pag, int flags), 153 struct xfs_perag *pag, int flags),
150 int flags, 154 int flags,
151 int tag) 155 int tag,
156 int exclusive)
152{ 157{
153 int error = 0; 158 int error = 0;
154 int last_error = 0; 159 int last_error = 0;
@@ -157,7 +162,8 @@ xfs_inode_ag_iterator(
157 for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) { 162 for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
158 if (!mp->m_perag[ag].pag_ici_init) 163 if (!mp->m_perag[ag].pag_ici_init)
159 continue; 164 continue;
160 error = xfs_inode_ag_walk(mp, ag, execute, flags, tag); 165 error = xfs_inode_ag_walk(mp, ag, execute, flags, tag,
166 exclusive);
161 if (error) { 167 if (error) {
162 last_error = error; 168 last_error = error;
163 if (error == EFSCORRUPTED) 169 if (error == EFSCORRUPTED)
@@ -174,30 +180,31 @@ xfs_sync_inode_valid(
174 struct xfs_perag *pag) 180 struct xfs_perag *pag)
175{ 181{
176 struct inode *inode = VFS_I(ip); 182 struct inode *inode = VFS_I(ip);
183 int error = EFSCORRUPTED;
177 184
178 /* nothing to sync during shutdown */ 185 /* nothing to sync during shutdown */
179 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { 186 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
180 read_unlock(&pag->pag_ici_lock); 187 goto out_unlock;
181 return EFSCORRUPTED;
182 }
183 188
184 /* 189 /* avoid new or reclaimable inodes. Leave for reclaim code to flush */
185 * If we can't get a reference on the inode, it must be in reclaim. 190 error = ENOENT;
186 * Leave it for the reclaim code to flush. Also avoid inodes that 191 if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM))
187 * haven't been fully initialised. 192 goto out_unlock;
188 */
189 if (!igrab(inode)) {
190 read_unlock(&pag->pag_ici_lock);
191 return ENOENT;
192 }
193 read_unlock(&pag->pag_ici_lock);
194 193
195 if (is_bad_inode(inode) || xfs_iflags_test(ip, XFS_INEW)) { 194 /* If we can't grab the inode, it must on it's way to reclaim. */
195 if (!igrab(inode))
196 goto out_unlock;
197
198 if (is_bad_inode(inode)) {
196 IRELE(ip); 199 IRELE(ip);
197 return ENOENT; 200 goto out_unlock;
198 } 201 }
199 202
200 return 0; 203 /* inode is valid */
204 error = 0;
205out_unlock:
206 read_unlock(&pag->pag_ici_lock);
207 return error;
201} 208}
202 209
203STATIC int 210STATIC int
@@ -282,7 +289,7 @@ xfs_sync_data(
282 ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0); 289 ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
283 290
284 error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags, 291 error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags,
285 XFS_ICI_NO_TAG); 292 XFS_ICI_NO_TAG, 0);
286 if (error) 293 if (error)
287 return XFS_ERROR(error); 294 return XFS_ERROR(error);
288 295
@@ -304,7 +311,7 @@ xfs_sync_attr(
304 ASSERT((flags & ~SYNC_WAIT) == 0); 311 ASSERT((flags & ~SYNC_WAIT) == 0);
305 312
306 return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags, 313 return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags,
307 XFS_ICI_NO_TAG); 314 XFS_ICI_NO_TAG, 0);
308} 315}
309 316
310STATIC int 317STATIC int
@@ -664,60 +671,6 @@ xfs_syncd_stop(
664 kthread_stop(mp->m_sync_task); 671 kthread_stop(mp->m_sync_task);
665} 672}
666 673
667STATIC int
668xfs_reclaim_inode(
669 xfs_inode_t *ip,
670 int sync_mode)
671{
672 xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino);
673
674 /* The hash lock here protects a thread in xfs_iget_core from
675 * racing with us on linking the inode back with a vnode.
676 * Once we have the XFS_IRECLAIM flag set it will not touch
677 * us.
678 */
679 write_lock(&pag->pag_ici_lock);
680 spin_lock(&ip->i_flags_lock);
681 if (__xfs_iflags_test(ip, XFS_IRECLAIM) ||
682 !__xfs_iflags_test(ip, XFS_IRECLAIMABLE)) {
683 spin_unlock(&ip->i_flags_lock);
684 write_unlock(&pag->pag_ici_lock);
685 return -EAGAIN;
686 }
687 __xfs_iflags_set(ip, XFS_IRECLAIM);
688 spin_unlock(&ip->i_flags_lock);
689 write_unlock(&pag->pag_ici_lock);
690 xfs_put_perag(ip->i_mount, pag);
691
692 /*
693 * If the inode is still dirty, then flush it out. If the inode
694 * is not in the AIL, then it will be OK to flush it delwri as
695 * long as xfs_iflush() does not keep any references to the inode.
696 * We leave that decision up to xfs_iflush() since it has the
697 * knowledge of whether it's OK to simply do a delwri flush of
698 * the inode or whether we need to wait until the inode is
699 * pulled from the AIL.
700 * We get the flush lock regardless, though, just to make sure
701 * we don't free it while it is being flushed.
702 */
703 xfs_ilock(ip, XFS_ILOCK_EXCL);
704 xfs_iflock(ip);
705
706 /*
707 * In the case of a forced shutdown we rely on xfs_iflush() to
708 * wait for the inode to be unpinned before returning an error.
709 */
710 if (!is_bad_inode(VFS_I(ip)) && xfs_iflush(ip, sync_mode) == 0) {
711 /* synchronize with xfs_iflush_done */
712 xfs_iflock(ip);
713 xfs_ifunlock(ip);
714 }
715
716 xfs_iunlock(ip, XFS_ILOCK_EXCL);
717 xfs_ireclaim(ip);
718 return 0;
719}
720
721void 674void
722__xfs_inode_set_reclaim_tag( 675__xfs_inode_set_reclaim_tag(
723 struct xfs_perag *pag, 676 struct xfs_perag *pag,
@@ -760,19 +713,55 @@ __xfs_inode_clear_reclaim_tag(
760} 713}
761 714
762STATIC int 715STATIC int
763xfs_reclaim_inode_now( 716xfs_reclaim_inode(
764 struct xfs_inode *ip, 717 struct xfs_inode *ip,
765 struct xfs_perag *pag, 718 struct xfs_perag *pag,
766 int flags) 719 int sync_mode)
767{ 720{
768 /* ignore if already under reclaim */ 721 /*
769 if (xfs_iflags_test(ip, XFS_IRECLAIM)) { 722 * The radix tree lock here protects a thread in xfs_iget from racing
770 read_unlock(&pag->pag_ici_lock); 723 * with us starting reclaim on the inode. Once we have the
724 * XFS_IRECLAIM flag set it will not touch us.
725 */
726 spin_lock(&ip->i_flags_lock);
727 ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE));
728 if (__xfs_iflags_test(ip, XFS_IRECLAIM)) {
729 /* ignore as it is already under reclaim */
730 spin_unlock(&ip->i_flags_lock);
731 write_unlock(&pag->pag_ici_lock);
771 return 0; 732 return 0;
772 } 733 }
773 read_unlock(&pag->pag_ici_lock); 734 __xfs_iflags_set(ip, XFS_IRECLAIM);
735 spin_unlock(&ip->i_flags_lock);
736 write_unlock(&pag->pag_ici_lock);
774 737
775 return xfs_reclaim_inode(ip, flags); 738 /*
739 * If the inode is still dirty, then flush it out. If the inode
740 * is not in the AIL, then it will be OK to flush it delwri as
741 * long as xfs_iflush() does not keep any references to the inode.
742 * We leave that decision up to xfs_iflush() since it has the
743 * knowledge of whether it's OK to simply do a delwri flush of
744 * the inode or whether we need to wait until the inode is
745 * pulled from the AIL.
746 * We get the flush lock regardless, though, just to make sure
747 * we don't free it while it is being flushed.
748 */
749 xfs_ilock(ip, XFS_ILOCK_EXCL);
750 xfs_iflock(ip);
751
752 /*
753 * In the case of a forced shutdown we rely on xfs_iflush() to
754 * wait for the inode to be unpinned before returning an error.
755 */
756 if (!is_bad_inode(VFS_I(ip)) && xfs_iflush(ip, sync_mode) == 0) {
757 /* synchronize with xfs_iflush_done */
758 xfs_iflock(ip);
759 xfs_ifunlock(ip);
760 }
761
762 xfs_iunlock(ip, XFS_ILOCK_EXCL);
763 xfs_ireclaim(ip);
764 return 0;
776} 765}
777 766
778int 767int
@@ -780,6 +769,6 @@ xfs_reclaim_inodes(
780 xfs_mount_t *mp, 769 xfs_mount_t *mp,
781 int mode) 770 int mode)
782{ 771{
783 return xfs_inode_ag_iterator(mp, xfs_reclaim_inode_now, mode, 772 return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode,
784 XFS_ICI_RECLAIM_TAG); 773 XFS_ICI_RECLAIM_TAG, 1);
785} 774}
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index a500b4d91835..ea932b43335d 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -54,6 +54,6 @@ void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
54int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag); 54int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag);
55int xfs_inode_ag_iterator(struct xfs_mount *mp, 55int xfs_inode_ag_iterator(struct xfs_mount *mp,
56 int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), 56 int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
57 int flags, int tag); 57 int flags, int tag, int write_lock);
58 58
59#endif 59#endif
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index c40834bdee58..c22a608321a3 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -33,51 +33,55 @@ struct xfs_dquot;
33struct xlog_ticket; 33struct xlog_ticket;
34struct log; 34struct log;
35 35
36DECLARE_EVENT_CLASS(xfs_attr_list_class,
37 TP_PROTO(struct xfs_attr_list_context *ctx),
38 TP_ARGS(ctx),
39 TP_STRUCT__entry(
40 __field(dev_t, dev)
41 __field(xfs_ino_t, ino)
42 __field(u32, hashval)
43 __field(u32, blkno)
44 __field(u32, offset)
45 __field(void *, alist)
46 __field(int, bufsize)
47 __field(int, count)
48 __field(int, firstu)
49 __field(int, dupcnt)
50 __field(int, flags)
51 ),
52 TP_fast_assign(
53 __entry->dev = VFS_I(ctx->dp)->i_sb->s_dev;
54 __entry->ino = ctx->dp->i_ino;
55 __entry->hashval = ctx->cursor->hashval;
56 __entry->blkno = ctx->cursor->blkno;
57 __entry->offset = ctx->cursor->offset;
58 __entry->alist = ctx->alist;
59 __entry->bufsize = ctx->bufsize;
60 __entry->count = ctx->count;
61 __entry->firstu = ctx->firstu;
62 __entry->flags = ctx->flags;
63 ),
64 TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u "
65 "alist 0x%p size %u count %u firstu %u flags %d %s",
66 MAJOR(__entry->dev), MINOR(__entry->dev),
67 __entry->ino,
68 __entry->hashval,
69 __entry->blkno,
70 __entry->offset,
71 __entry->dupcnt,
72 __entry->alist,
73 __entry->bufsize,
74 __entry->count,
75 __entry->firstu,
76 __entry->flags,
77 __print_flags(__entry->flags, "|", XFS_ATTR_FLAGS)
78 )
79)
80
36#define DEFINE_ATTR_LIST_EVENT(name) \ 81#define DEFINE_ATTR_LIST_EVENT(name) \
37TRACE_EVENT(name, \ 82DEFINE_EVENT(xfs_attr_list_class, name, \
38 TP_PROTO(struct xfs_attr_list_context *ctx), \ 83 TP_PROTO(struct xfs_attr_list_context *ctx), \
39 TP_ARGS(ctx), \ 84 TP_ARGS(ctx))
40 TP_STRUCT__entry( \
41 __field(dev_t, dev) \
42 __field(xfs_ino_t, ino) \
43 __field(u32, hashval) \
44 __field(u32, blkno) \
45 __field(u32, offset) \
46 __field(void *, alist) \
47 __field(int, bufsize) \
48 __field(int, count) \
49 __field(int, firstu) \
50 __field(int, dupcnt) \
51 __field(int, flags) \
52 ), \
53 TP_fast_assign( \
54 __entry->dev = VFS_I(ctx->dp)->i_sb->s_dev; \
55 __entry->ino = ctx->dp->i_ino; \
56 __entry->hashval = ctx->cursor->hashval; \
57 __entry->blkno = ctx->cursor->blkno; \
58 __entry->offset = ctx->cursor->offset; \
59 __entry->alist = ctx->alist; \
60 __entry->bufsize = ctx->bufsize; \
61 __entry->count = ctx->count; \
62 __entry->firstu = ctx->firstu; \
63 __entry->flags = ctx->flags; \
64 ), \
65 TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u " \
66 "alist 0x%p size %u count %u firstu %u flags %d %s", \
67 MAJOR(__entry->dev), MINOR(__entry->dev), \
68 __entry->ino, \
69 __entry->hashval, \
70 __entry->blkno, \
71 __entry->offset, \
72 __entry->dupcnt, \
73 __entry->alist, \
74 __entry->bufsize, \
75 __entry->count, \
76 __entry->firstu, \
77 __entry->flags, \
78 __print_flags(__entry->flags, "|", XFS_ATTR_FLAGS) \
79 ) \
80)
81DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf); 85DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf);
82DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf_all); 86DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf_all);
83DEFINE_ATTR_LIST_EVENT(xfs_attr_list_leaf); 87DEFINE_ATTR_LIST_EVENT(xfs_attr_list_leaf);
@@ -178,91 +182,99 @@ TRACE_EVENT(xfs_iext_insert,
178 (char *)__entry->caller_ip) 182 (char *)__entry->caller_ip)
179); 183);
180 184
185DECLARE_EVENT_CLASS(xfs_bmap_class,
186 TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state,
187 unsigned long caller_ip),
188 TP_ARGS(ip, idx, state, caller_ip),
189 TP_STRUCT__entry(
190 __field(dev_t, dev)
191 __field(xfs_ino_t, ino)
192 __field(xfs_extnum_t, idx)
193 __field(xfs_fileoff_t, startoff)
194 __field(xfs_fsblock_t, startblock)
195 __field(xfs_filblks_t, blockcount)
196 __field(xfs_exntst_t, state)
197 __field(int, bmap_state)
198 __field(unsigned long, caller_ip)
199 ),
200 TP_fast_assign(
201 struct xfs_ifork *ifp = (state & BMAP_ATTRFORK) ?
202 ip->i_afp : &ip->i_df;
203 struct xfs_bmbt_irec r;
204
205 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &r);
206 __entry->dev = VFS_I(ip)->i_sb->s_dev;
207 __entry->ino = ip->i_ino;
208 __entry->idx = idx;
209 __entry->startoff = r.br_startoff;
210 __entry->startblock = r.br_startblock;
211 __entry->blockcount = r.br_blockcount;
212 __entry->state = r.br_state;
213 __entry->bmap_state = state;
214 __entry->caller_ip = caller_ip;
215 ),
216 TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
217 "offset %lld block %s count %lld flag %d caller %pf",
218 MAJOR(__entry->dev), MINOR(__entry->dev),
219 __entry->ino,
220 __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
221 (long)__entry->idx,
222 __entry->startoff,
223 xfs_fmtfsblock(__entry->startblock),
224 __entry->blockcount,
225 __entry->state,
226 (char *)__entry->caller_ip)
227)
228
181#define DEFINE_BMAP_EVENT(name) \ 229#define DEFINE_BMAP_EVENT(name) \
182TRACE_EVENT(name, \ 230DEFINE_EVENT(xfs_bmap_class, name, \
183 TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state, \ 231 TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state, \
184 unsigned long caller_ip), \ 232 unsigned long caller_ip), \
185 TP_ARGS(ip, idx, state, caller_ip), \ 233 TP_ARGS(ip, idx, state, caller_ip))
186 TP_STRUCT__entry( \
187 __field(dev_t, dev) \
188 __field(xfs_ino_t, ino) \
189 __field(xfs_extnum_t, idx) \
190 __field(xfs_fileoff_t, startoff) \
191 __field(xfs_fsblock_t, startblock) \
192 __field(xfs_filblks_t, blockcount) \
193 __field(xfs_exntst_t, state) \
194 __field(int, bmap_state) \
195 __field(unsigned long, caller_ip) \
196 ), \
197 TP_fast_assign( \
198 struct xfs_ifork *ifp = (state & BMAP_ATTRFORK) ? \
199 ip->i_afp : &ip->i_df; \
200 struct xfs_bmbt_irec r; \
201 \
202 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &r); \
203 __entry->dev = VFS_I(ip)->i_sb->s_dev; \
204 __entry->ino = ip->i_ino; \
205 __entry->idx = idx; \
206 __entry->startoff = r.br_startoff; \
207 __entry->startblock = r.br_startblock; \
208 __entry->blockcount = r.br_blockcount; \
209 __entry->state = r.br_state; \
210 __entry->bmap_state = state; \
211 __entry->caller_ip = caller_ip; \
212 ), \
213 TP_printk("dev %d:%d ino 0x%llx state %s idx %ld " \
214 "offset %lld block %s count %lld flag %d caller %pf", \
215 MAJOR(__entry->dev), MINOR(__entry->dev), \
216 __entry->ino, \
217 __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS), \
218 (long)__entry->idx, \
219 __entry->startoff, \
220 xfs_fmtfsblock(__entry->startblock), \
221 __entry->blockcount, \
222 __entry->state, \
223 (char *)__entry->caller_ip) \
224)
225
226DEFINE_BMAP_EVENT(xfs_iext_remove); 234DEFINE_BMAP_EVENT(xfs_iext_remove);
227DEFINE_BMAP_EVENT(xfs_bmap_pre_update); 235DEFINE_BMAP_EVENT(xfs_bmap_pre_update);
228DEFINE_BMAP_EVENT(xfs_bmap_post_update); 236DEFINE_BMAP_EVENT(xfs_bmap_post_update);
229DEFINE_BMAP_EVENT(xfs_extlist); 237DEFINE_BMAP_EVENT(xfs_extlist);
230 238
231#define DEFINE_BUF_EVENT(tname) \ 239DECLARE_EVENT_CLASS(xfs_buf_class,
232TRACE_EVENT(tname, \ 240 TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip),
233 TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip), \ 241 TP_ARGS(bp, caller_ip),
234 TP_ARGS(bp, caller_ip), \ 242 TP_STRUCT__entry(
235 TP_STRUCT__entry( \ 243 __field(dev_t, dev)
236 __field(dev_t, dev) \ 244 __field(xfs_daddr_t, bno)
237 __field(xfs_daddr_t, bno) \ 245 __field(size_t, buffer_length)
238 __field(size_t, buffer_length) \ 246 __field(int, hold)
239 __field(int, hold) \ 247 __field(int, pincount)
240 __field(int, pincount) \ 248 __field(unsigned, lockval)
241 __field(unsigned, lockval) \ 249 __field(unsigned, flags)
242 __field(unsigned, flags) \ 250 __field(unsigned long, caller_ip)
243 __field(unsigned long, caller_ip) \ 251 ),
244 ), \ 252 TP_fast_assign(
245 TP_fast_assign( \ 253 __entry->dev = bp->b_target->bt_dev;
246 __entry->dev = bp->b_target->bt_dev; \ 254 __entry->bno = bp->b_bn;
247 __entry->bno = bp->b_bn; \ 255 __entry->buffer_length = bp->b_buffer_length;
248 __entry->buffer_length = bp->b_buffer_length; \ 256 __entry->hold = atomic_read(&bp->b_hold);
249 __entry->hold = atomic_read(&bp->b_hold); \ 257 __entry->pincount = atomic_read(&bp->b_pin_count);
250 __entry->pincount = atomic_read(&bp->b_pin_count); \ 258 __entry->lockval = xfs_buf_lock_value(bp);
251 __entry->lockval = xfs_buf_lock_value(bp); \ 259 __entry->flags = bp->b_flags;
252 __entry->flags = bp->b_flags; \ 260 __entry->caller_ip = caller_ip;
253 __entry->caller_ip = caller_ip; \ 261 ),
254 ), \ 262 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
255 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " \ 263 "lock %d flags %s caller %pf",
256 "lock %d flags %s caller %pf", \ 264 MAJOR(__entry->dev), MINOR(__entry->dev),
257 MAJOR(__entry->dev), MINOR(__entry->dev), \ 265 (unsigned long long)__entry->bno,
258 (unsigned long long)__entry->bno, \ 266 __entry->buffer_length,
259 __entry->buffer_length, \ 267 __entry->hold,
260 __entry->hold, \ 268 __entry->pincount,
261 __entry->pincount, \ 269 __entry->lockval,
262 __entry->lockval, \ 270 __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
263 __print_flags(__entry->flags, "|", XFS_BUF_FLAGS), \ 271 (void *)__entry->caller_ip)
264 (void *)__entry->caller_ip) \
265) 272)
273
274#define DEFINE_BUF_EVENT(name) \
275DEFINE_EVENT(xfs_buf_class, name, \
276 TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip), \
277 TP_ARGS(bp, caller_ip))
266DEFINE_BUF_EVENT(xfs_buf_init); 278DEFINE_BUF_EVENT(xfs_buf_init);
267DEFINE_BUF_EVENT(xfs_buf_free); 279DEFINE_BUF_EVENT(xfs_buf_free);
268DEFINE_BUF_EVENT(xfs_buf_hold); 280DEFINE_BUF_EVENT(xfs_buf_hold);
@@ -299,41 +311,45 @@ DEFINE_BUF_EVENT(xfs_reset_dqcounts);
299DEFINE_BUF_EVENT(xfs_inode_item_push); 311DEFINE_BUF_EVENT(xfs_inode_item_push);
300 312
301/* pass flags explicitly */ 313/* pass flags explicitly */
302#define DEFINE_BUF_FLAGS_EVENT(tname) \ 314DECLARE_EVENT_CLASS(xfs_buf_flags_class,
303TRACE_EVENT(tname, \ 315 TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip),
304 TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip), \ 316 TP_ARGS(bp, flags, caller_ip),
305 TP_ARGS(bp, flags, caller_ip), \ 317 TP_STRUCT__entry(
306 TP_STRUCT__entry( \ 318 __field(dev_t, dev)
307 __field(dev_t, dev) \ 319 __field(xfs_daddr_t, bno)
308 __field(xfs_daddr_t, bno) \ 320 __field(size_t, buffer_length)
309 __field(size_t, buffer_length) \ 321 __field(int, hold)
310 __field(int, hold) \ 322 __field(int, pincount)
311 __field(int, pincount) \ 323 __field(unsigned, lockval)
312 __field(unsigned, lockval) \ 324 __field(unsigned, flags)
313 __field(unsigned, flags) \ 325 __field(unsigned long, caller_ip)
314 __field(unsigned long, caller_ip) \ 326 ),
315 ), \ 327 TP_fast_assign(
316 TP_fast_assign( \ 328 __entry->dev = bp->b_target->bt_dev;
317 __entry->dev = bp->b_target->bt_dev; \ 329 __entry->bno = bp->b_bn;
318 __entry->bno = bp->b_bn; \ 330 __entry->buffer_length = bp->b_buffer_length;
319 __entry->buffer_length = bp->b_buffer_length; \ 331 __entry->flags = flags;
320 __entry->flags = flags; \ 332 __entry->hold = atomic_read(&bp->b_hold);
321 __entry->hold = atomic_read(&bp->b_hold); \ 333 __entry->pincount = atomic_read(&bp->b_pin_count);
322 __entry->pincount = atomic_read(&bp->b_pin_count); \ 334 __entry->lockval = xfs_buf_lock_value(bp);
323 __entry->lockval = xfs_buf_lock_value(bp); \ 335 __entry->caller_ip = caller_ip;
324 __entry->caller_ip = caller_ip; \ 336 ),
325 ), \ 337 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
326 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " \ 338 "lock %d flags %s caller %pf",
327 "lock %d flags %s caller %pf", \ 339 MAJOR(__entry->dev), MINOR(__entry->dev),
328 MAJOR(__entry->dev), MINOR(__entry->dev), \ 340 (unsigned long long)__entry->bno,
329 (unsigned long long)__entry->bno, \ 341 __entry->buffer_length,
330 __entry->buffer_length, \ 342 __entry->hold,
331 __entry->hold, \ 343 __entry->pincount,
332 __entry->pincount, \ 344 __entry->lockval,
333 __entry->lockval, \ 345 __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
334 __print_flags(__entry->flags, "|", XFS_BUF_FLAGS), \ 346 (void *)__entry->caller_ip)
335 (void *)__entry->caller_ip) \
336) 347)
348
349#define DEFINE_BUF_FLAGS_EVENT(name) \
350DEFINE_EVENT(xfs_buf_flags_class, name, \
351 TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip), \
352 TP_ARGS(bp, flags, caller_ip))
337DEFINE_BUF_FLAGS_EVENT(xfs_buf_find); 353DEFINE_BUF_FLAGS_EVENT(xfs_buf_find);
338DEFINE_BUF_FLAGS_EVENT(xfs_buf_get); 354DEFINE_BUF_FLAGS_EVENT(xfs_buf_get);
339DEFINE_BUF_FLAGS_EVENT(xfs_buf_read); 355DEFINE_BUF_FLAGS_EVENT(xfs_buf_read);
@@ -376,55 +392,58 @@ TRACE_EVENT(xfs_buf_ioerror,
376 (void *)__entry->caller_ip) 392 (void *)__entry->caller_ip)
377); 393);
378 394
379#define DEFINE_BUF_ITEM_EVENT(tname) \ 395DECLARE_EVENT_CLASS(xfs_buf_item_class,
380TRACE_EVENT(tname, \ 396 TP_PROTO(struct xfs_buf_log_item *bip),
381 TP_PROTO(struct xfs_buf_log_item *bip), \ 397 TP_ARGS(bip),
382 TP_ARGS(bip), \ 398 TP_STRUCT__entry(
383 TP_STRUCT__entry( \ 399 __field(dev_t, dev)
384 __field(dev_t, dev) \ 400 __field(xfs_daddr_t, buf_bno)
385 __field(xfs_daddr_t, buf_bno) \ 401 __field(size_t, buf_len)
386 __field(size_t, buf_len) \ 402 __field(int, buf_hold)
387 __field(int, buf_hold) \ 403 __field(int, buf_pincount)
388 __field(int, buf_pincount) \ 404 __field(int, buf_lockval)
389 __field(int, buf_lockval) \ 405 __field(unsigned, buf_flags)
390 __field(unsigned, buf_flags) \ 406 __field(unsigned, bli_recur)
391 __field(unsigned, bli_recur) \ 407 __field(int, bli_refcount)
392 __field(int, bli_refcount) \ 408 __field(unsigned, bli_flags)
393 __field(unsigned, bli_flags) \ 409 __field(void *, li_desc)
394 __field(void *, li_desc) \ 410 __field(unsigned, li_flags)
395 __field(unsigned, li_flags) \ 411 ),
396 ), \ 412 TP_fast_assign(
397 TP_fast_assign( \ 413 __entry->dev = bip->bli_buf->b_target->bt_dev;
398 __entry->dev = bip->bli_buf->b_target->bt_dev; \ 414 __entry->bli_flags = bip->bli_flags;
399 __entry->bli_flags = bip->bli_flags; \ 415 __entry->bli_recur = bip->bli_recur;
400 __entry->bli_recur = bip->bli_recur; \ 416 __entry->bli_refcount = atomic_read(&bip->bli_refcount);
401 __entry->bli_refcount = atomic_read(&bip->bli_refcount); \ 417 __entry->buf_bno = bip->bli_buf->b_bn;
402 __entry->buf_bno = bip->bli_buf->b_bn; \ 418 __entry->buf_len = bip->bli_buf->b_buffer_length;
403 __entry->buf_len = bip->bli_buf->b_buffer_length; \ 419 __entry->buf_flags = bip->bli_buf->b_flags;
404 __entry->buf_flags = bip->bli_buf->b_flags; \ 420 __entry->buf_hold = atomic_read(&bip->bli_buf->b_hold);
405 __entry->buf_hold = atomic_read(&bip->bli_buf->b_hold); \ 421 __entry->buf_pincount = atomic_read(&bip->bli_buf->b_pin_count);
406 __entry->buf_pincount = \ 422 __entry->buf_lockval = xfs_buf_lock_value(bip->bli_buf);
407 atomic_read(&bip->bli_buf->b_pin_count); \ 423 __entry->li_desc = bip->bli_item.li_desc;
408 __entry->buf_lockval = xfs_buf_lock_value(bip->bli_buf); \ 424 __entry->li_flags = bip->bli_item.li_flags;
409 __entry->li_desc = bip->bli_item.li_desc; \ 425 ),
410 __entry->li_flags = bip->bli_item.li_flags; \ 426 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
411 ), \ 427 "lock %d flags %s recur %d refcount %d bliflags %s "
412 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " \ 428 "lidesc 0x%p liflags %s",
413 "lock %d flags %s recur %d refcount %d bliflags %s " \ 429 MAJOR(__entry->dev), MINOR(__entry->dev),
414 "lidesc 0x%p liflags %s", \ 430 (unsigned long long)__entry->buf_bno,
415 MAJOR(__entry->dev), MINOR(__entry->dev), \ 431 __entry->buf_len,
416 (unsigned long long)__entry->buf_bno, \ 432 __entry->buf_hold,
417 __entry->buf_len, \ 433 __entry->buf_pincount,
418 __entry->buf_hold, \ 434 __entry->buf_lockval,
419 __entry->buf_pincount, \ 435 __print_flags(__entry->buf_flags, "|", XFS_BUF_FLAGS),
420 __entry->buf_lockval, \ 436 __entry->bli_recur,
421 __print_flags(__entry->buf_flags, "|", XFS_BUF_FLAGS), \ 437 __entry->bli_refcount,
422 __entry->bli_recur, \ 438 __print_flags(__entry->bli_flags, "|", XFS_BLI_FLAGS),
423 __entry->bli_refcount, \ 439 __entry->li_desc,
424 __print_flags(__entry->bli_flags, "|", XFS_BLI_FLAGS), \ 440 __print_flags(__entry->li_flags, "|", XFS_LI_FLAGS))
425 __entry->li_desc, \
426 __print_flags(__entry->li_flags, "|", XFS_LI_FLAGS)) \
427) 441)
442
443#define DEFINE_BUF_ITEM_EVENT(name) \
444DEFINE_EVENT(xfs_buf_item_class, name, \
445 TP_PROTO(struct xfs_buf_log_item *bip), \
446 TP_ARGS(bip))
428DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size); 447DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size);
429DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size_stale); 448DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size_stale);
430DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format); 449DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format);
@@ -450,78 +469,90 @@ DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold);
450DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release); 469DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release);
451DEFINE_BUF_ITEM_EVENT(xfs_trans_binval); 470DEFINE_BUF_ITEM_EVENT(xfs_trans_binval);
452 471
472DECLARE_EVENT_CLASS(xfs_lock_class,
473 TP_PROTO(struct xfs_inode *ip, unsigned lock_flags,
474 unsigned long caller_ip),
475 TP_ARGS(ip, lock_flags, caller_ip),
476 TP_STRUCT__entry(
477 __field(dev_t, dev)
478 __field(xfs_ino_t, ino)
479 __field(int, lock_flags)
480 __field(unsigned long, caller_ip)
481 ),
482 TP_fast_assign(
483 __entry->dev = VFS_I(ip)->i_sb->s_dev;
484 __entry->ino = ip->i_ino;
485 __entry->lock_flags = lock_flags;
486 __entry->caller_ip = caller_ip;
487 ),
488 TP_printk("dev %d:%d ino 0x%llx flags %s caller %pf",
489 MAJOR(__entry->dev), MINOR(__entry->dev),
490 __entry->ino,
491 __print_flags(__entry->lock_flags, "|", XFS_LOCK_FLAGS),
492 (void *)__entry->caller_ip)
493)
494
453#define DEFINE_LOCK_EVENT(name) \ 495#define DEFINE_LOCK_EVENT(name) \
454TRACE_EVENT(name, \ 496DEFINE_EVENT(xfs_lock_class, name, \
455 TP_PROTO(struct xfs_inode *ip, unsigned lock_flags, \ 497 TP_PROTO(struct xfs_inode *ip, unsigned lock_flags, \
456 unsigned long caller_ip), \ 498 unsigned long caller_ip), \
457 TP_ARGS(ip, lock_flags, caller_ip), \ 499 TP_ARGS(ip, lock_flags, caller_ip))
458 TP_STRUCT__entry( \
459 __field(dev_t, dev) \
460 __field(xfs_ino_t, ino) \
461 __field(int, lock_flags) \
462 __field(unsigned long, caller_ip) \
463 ), \
464 TP_fast_assign( \
465 __entry->dev = VFS_I(ip)->i_sb->s_dev; \
466 __entry->ino = ip->i_ino; \
467 __entry->lock_flags = lock_flags; \
468 __entry->caller_ip = caller_ip; \
469 ), \
470 TP_printk("dev %d:%d ino 0x%llx flags %s caller %pf", \
471 MAJOR(__entry->dev), MINOR(__entry->dev), \
472 __entry->ino, \
473 __print_flags(__entry->lock_flags, "|", XFS_LOCK_FLAGS), \
474 (void *)__entry->caller_ip) \
475)
476
477DEFINE_LOCK_EVENT(xfs_ilock); 500DEFINE_LOCK_EVENT(xfs_ilock);
478DEFINE_LOCK_EVENT(xfs_ilock_nowait); 501DEFINE_LOCK_EVENT(xfs_ilock_nowait);
479DEFINE_LOCK_EVENT(xfs_ilock_demote); 502DEFINE_LOCK_EVENT(xfs_ilock_demote);
480DEFINE_LOCK_EVENT(xfs_iunlock); 503DEFINE_LOCK_EVENT(xfs_iunlock);
481 504
505DECLARE_EVENT_CLASS(xfs_iget_class,
506 TP_PROTO(struct xfs_inode *ip),
507 TP_ARGS(ip),
508 TP_STRUCT__entry(
509 __field(dev_t, dev)
510 __field(xfs_ino_t, ino)
511 ),
512 TP_fast_assign(
513 __entry->dev = VFS_I(ip)->i_sb->s_dev;
514 __entry->ino = ip->i_ino;
515 ),
516 TP_printk("dev %d:%d ino 0x%llx",
517 MAJOR(__entry->dev), MINOR(__entry->dev),
518 __entry->ino)
519)
520
482#define DEFINE_IGET_EVENT(name) \ 521#define DEFINE_IGET_EVENT(name) \
483TRACE_EVENT(name, \ 522DEFINE_EVENT(xfs_iget_class, name, \
484 TP_PROTO(struct xfs_inode *ip), \ 523 TP_PROTO(struct xfs_inode *ip), \
485 TP_ARGS(ip), \ 524 TP_ARGS(ip))
486 TP_STRUCT__entry( \
487 __field(dev_t, dev) \
488 __field(xfs_ino_t, ino) \
489 ), \
490 TP_fast_assign( \
491 __entry->dev = VFS_I(ip)->i_sb->s_dev; \
492 __entry->ino = ip->i_ino; \
493 ), \
494 TP_printk("dev %d:%d ino 0x%llx", \
495 MAJOR(__entry->dev), MINOR(__entry->dev), \
496 __entry->ino) \
497)
498DEFINE_IGET_EVENT(xfs_iget_skip); 525DEFINE_IGET_EVENT(xfs_iget_skip);
499DEFINE_IGET_EVENT(xfs_iget_reclaim); 526DEFINE_IGET_EVENT(xfs_iget_reclaim);
500DEFINE_IGET_EVENT(xfs_iget_found); 527DEFINE_IGET_EVENT(xfs_iget_found);
501DEFINE_IGET_EVENT(xfs_iget_alloc); 528DEFINE_IGET_EVENT(xfs_iget_alloc);
502 529
530DECLARE_EVENT_CLASS(xfs_inode_class,
531 TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip),
532 TP_ARGS(ip, caller_ip),
533 TP_STRUCT__entry(
534 __field(dev_t, dev)
535 __field(xfs_ino_t, ino)
536 __field(int, count)
537 __field(unsigned long, caller_ip)
538 ),
539 TP_fast_assign(
540 __entry->dev = VFS_I(ip)->i_sb->s_dev;
541 __entry->ino = ip->i_ino;
542 __entry->count = atomic_read(&VFS_I(ip)->i_count);
543 __entry->caller_ip = caller_ip;
544 ),
545 TP_printk("dev %d:%d ino 0x%llx count %d caller %pf",
546 MAJOR(__entry->dev), MINOR(__entry->dev),
547 __entry->ino,
548 __entry->count,
549 (char *)__entry->caller_ip)
550)
551
503#define DEFINE_INODE_EVENT(name) \ 552#define DEFINE_INODE_EVENT(name) \
504TRACE_EVENT(name, \ 553DEFINE_EVENT(xfs_inode_class, name, \
505 TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \ 554 TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \
506 TP_ARGS(ip, caller_ip), \ 555 TP_ARGS(ip, caller_ip))
507 TP_STRUCT__entry( \
508 __field(dev_t, dev) \
509 __field(xfs_ino_t, ino) \
510 __field(int, count) \
511 __field(unsigned long, caller_ip) \
512 ), \
513 TP_fast_assign( \
514 __entry->dev = VFS_I(ip)->i_sb->s_dev; \
515 __entry->ino = ip->i_ino; \
516 __entry->count = atomic_read(&VFS_I(ip)->i_count); \
517 __entry->caller_ip = caller_ip; \
518 ), \
519 TP_printk("dev %d:%d ino 0x%llx count %d caller %pf", \
520 MAJOR(__entry->dev), MINOR(__entry->dev), \
521 __entry->ino, \
522 __entry->count, \
523 (char *)__entry->caller_ip) \
524)
525DEFINE_INODE_EVENT(xfs_ihold); 556DEFINE_INODE_EVENT(xfs_ihold);
526DEFINE_INODE_EVENT(xfs_irele); 557DEFINE_INODE_EVENT(xfs_irele);
527/* the old xfs_itrace_entry tracer - to be replaced by s.th. in the VFS */ 558/* the old xfs_itrace_entry tracer - to be replaced by s.th. in the VFS */
@@ -529,55 +560,59 @@ DEFINE_INODE_EVENT(xfs_inode);
529#define xfs_itrace_entry(ip) \ 560#define xfs_itrace_entry(ip) \
530 trace_xfs_inode(ip, _THIS_IP_) 561 trace_xfs_inode(ip, _THIS_IP_)
531 562
532#define DEFINE_DQUOT_EVENT(tname) \ 563DECLARE_EVENT_CLASS(xfs_dquot_class,
533TRACE_EVENT(tname, \ 564 TP_PROTO(struct xfs_dquot *dqp),
534 TP_PROTO(struct xfs_dquot *dqp), \ 565 TP_ARGS(dqp),
535 TP_ARGS(dqp), \ 566 TP_STRUCT__entry(
536 TP_STRUCT__entry( \ 567 __field(dev_t, dev)
537 __field(dev_t, dev) \ 568 __field(__be32, id)
538 __field(__be32, id) \ 569 __field(unsigned, flags)
539 __field(unsigned, flags) \ 570 __field(unsigned, nrefs)
540 __field(unsigned, nrefs) \ 571 __field(unsigned long long, res_bcount)
541 __field(unsigned long long, res_bcount) \ 572 __field(unsigned long long, bcount)
542 __field(unsigned long long, bcount) \ 573 __field(unsigned long long, icount)
543 __field(unsigned long long, icount) \ 574 __field(unsigned long long, blk_hardlimit)
544 __field(unsigned long long, blk_hardlimit) \ 575 __field(unsigned long long, blk_softlimit)
545 __field(unsigned long long, blk_softlimit) \ 576 __field(unsigned long long, ino_hardlimit)
546 __field(unsigned long long, ino_hardlimit) \ 577 __field(unsigned long long, ino_softlimit)
547 __field(unsigned long long, ino_softlimit) \
548 ), \
549 TP_fast_assign( \
550 __entry->dev = dqp->q_mount->m_super->s_dev; \
551 __entry->id = dqp->q_core.d_id; \
552 __entry->flags = dqp->dq_flags; \
553 __entry->nrefs = dqp->q_nrefs; \
554 __entry->res_bcount = dqp->q_res_bcount; \
555 __entry->bcount = be64_to_cpu(dqp->q_core.d_bcount); \
556 __entry->icount = be64_to_cpu(dqp->q_core.d_icount); \
557 __entry->blk_hardlimit = \
558 be64_to_cpu(dqp->q_core.d_blk_hardlimit); \
559 __entry->blk_softlimit = \
560 be64_to_cpu(dqp->q_core.d_blk_softlimit); \
561 __entry->ino_hardlimit = \
562 be64_to_cpu(dqp->q_core.d_ino_hardlimit); \
563 __entry->ino_softlimit = \
564 be64_to_cpu(dqp->q_core.d_ino_softlimit); \
565 ), \ 578 ), \
566 TP_printk("dev %d:%d id 0x%x flags %s nrefs %u res_bc 0x%llx " \ 579 TP_fast_assign(
567 "bcnt 0x%llx [hard 0x%llx | soft 0x%llx] " \ 580 __entry->dev = dqp->q_mount->m_super->s_dev;
568 "icnt 0x%llx [hard 0x%llx | soft 0x%llx]", \ 581 __entry->id = dqp->q_core.d_id;
569 MAJOR(__entry->dev), MINOR(__entry->dev), \ 582 __entry->flags = dqp->dq_flags;
570 be32_to_cpu(__entry->id), \ 583 __entry->nrefs = dqp->q_nrefs;
571 __print_flags(__entry->flags, "|", XFS_DQ_FLAGS), \ 584 __entry->res_bcount = dqp->q_res_bcount;
572 __entry->nrefs, \ 585 __entry->bcount = be64_to_cpu(dqp->q_core.d_bcount);
573 __entry->res_bcount, \ 586 __entry->icount = be64_to_cpu(dqp->q_core.d_icount);
574 __entry->bcount, \ 587 __entry->blk_hardlimit =
575 __entry->blk_hardlimit, \ 588 be64_to_cpu(dqp->q_core.d_blk_hardlimit);
576 __entry->blk_softlimit, \ 589 __entry->blk_softlimit =
577 __entry->icount, \ 590 be64_to_cpu(dqp->q_core.d_blk_softlimit);
578 __entry->ino_hardlimit, \ 591 __entry->ino_hardlimit =
579 __entry->ino_softlimit) \ 592 be64_to_cpu(dqp->q_core.d_ino_hardlimit);
593 __entry->ino_softlimit =
594 be64_to_cpu(dqp->q_core.d_ino_softlimit);
595 ),
596 TP_printk("dev %d:%d id 0x%x flags %s nrefs %u res_bc 0x%llx "
597 "bcnt 0x%llx [hard 0x%llx | soft 0x%llx] "
598 "icnt 0x%llx [hard 0x%llx | soft 0x%llx]",
599 MAJOR(__entry->dev), MINOR(__entry->dev),
600 be32_to_cpu(__entry->id),
601 __print_flags(__entry->flags, "|", XFS_DQ_FLAGS),
602 __entry->nrefs,
603 __entry->res_bcount,
604 __entry->bcount,
605 __entry->blk_hardlimit,
606 __entry->blk_softlimit,
607 __entry->icount,
608 __entry->ino_hardlimit,
609 __entry->ino_softlimit)
580) 610)
611
612#define DEFINE_DQUOT_EVENT(name) \
613DEFINE_EVENT(xfs_dquot_class, name, \
614 TP_PROTO(struct xfs_dquot *dqp), \
615 TP_ARGS(dqp))
581DEFINE_DQUOT_EVENT(xfs_dqadjust); 616DEFINE_DQUOT_EVENT(xfs_dqadjust);
582DEFINE_DQUOT_EVENT(xfs_dqshake_dirty); 617DEFINE_DQUOT_EVENT(xfs_dqshake_dirty);
583DEFINE_DQUOT_EVENT(xfs_dqshake_unlink); 618DEFINE_DQUOT_EVENT(xfs_dqshake_unlink);
@@ -610,72 +645,75 @@ DEFINE_DQUOT_EVENT(xfs_dqflush_done);
610DEFINE_IGET_EVENT(xfs_dquot_dqalloc); 645DEFINE_IGET_EVENT(xfs_dquot_dqalloc);
611DEFINE_IGET_EVENT(xfs_dquot_dqdetach); 646DEFINE_IGET_EVENT(xfs_dquot_dqdetach);
612 647
648DECLARE_EVENT_CLASS(xfs_loggrant_class,
649 TP_PROTO(struct log *log, struct xlog_ticket *tic),
650 TP_ARGS(log, tic),
651 TP_STRUCT__entry(
652 __field(dev_t, dev)
653 __field(unsigned, trans_type)
654 __field(char, ocnt)
655 __field(char, cnt)
656 __field(int, curr_res)
657 __field(int, unit_res)
658 __field(unsigned int, flags)
659 __field(void *, reserve_headq)
660 __field(void *, write_headq)
661 __field(int, grant_reserve_cycle)
662 __field(int, grant_reserve_bytes)
663 __field(int, grant_write_cycle)
664 __field(int, grant_write_bytes)
665 __field(int, curr_cycle)
666 __field(int, curr_block)
667 __field(xfs_lsn_t, tail_lsn)
668 ),
669 TP_fast_assign(
670 __entry->dev = log->l_mp->m_super->s_dev;
671 __entry->trans_type = tic->t_trans_type;
672 __entry->ocnt = tic->t_ocnt;
673 __entry->cnt = tic->t_cnt;
674 __entry->curr_res = tic->t_curr_res;
675 __entry->unit_res = tic->t_unit_res;
676 __entry->flags = tic->t_flags;
677 __entry->reserve_headq = log->l_reserve_headq;
678 __entry->write_headq = log->l_write_headq;
679 __entry->grant_reserve_cycle = log->l_grant_reserve_cycle;
680 __entry->grant_reserve_bytes = log->l_grant_reserve_bytes;
681 __entry->grant_write_cycle = log->l_grant_write_cycle;
682 __entry->grant_write_bytes = log->l_grant_write_bytes;
683 __entry->curr_cycle = log->l_curr_cycle;
684 __entry->curr_block = log->l_curr_block;
685 __entry->tail_lsn = log->l_tail_lsn;
686 ),
687 TP_printk("dev %d:%d type %s t_ocnt %u t_cnt %u t_curr_res %u "
688 "t_unit_res %u t_flags %s reserve_headq 0x%p "
689 "write_headq 0x%p grant_reserve_cycle %d "
690 "grant_reserve_bytes %d grant_write_cycle %d "
691 "grant_write_bytes %d curr_cycle %d curr_block %d "
692 "tail_cycle %d tail_block %d",
693 MAJOR(__entry->dev), MINOR(__entry->dev),
694 __print_symbolic(__entry->trans_type, XFS_TRANS_TYPES),
695 __entry->ocnt,
696 __entry->cnt,
697 __entry->curr_res,
698 __entry->unit_res,
699 __print_flags(__entry->flags, "|", XLOG_TIC_FLAGS),
700 __entry->reserve_headq,
701 __entry->write_headq,
702 __entry->grant_reserve_cycle,
703 __entry->grant_reserve_bytes,
704 __entry->grant_write_cycle,
705 __entry->grant_write_bytes,
706 __entry->curr_cycle,
707 __entry->curr_block,
708 CYCLE_LSN(__entry->tail_lsn),
709 BLOCK_LSN(__entry->tail_lsn)
710 )
711)
613 712
614#define DEFINE_LOGGRANT_EVENT(tname) \ 713#define DEFINE_LOGGRANT_EVENT(name) \
615TRACE_EVENT(tname, \ 714DEFINE_EVENT(xfs_loggrant_class, name, \
616 TP_PROTO(struct log *log, struct xlog_ticket *tic), \ 715 TP_PROTO(struct log *log, struct xlog_ticket *tic), \
617 TP_ARGS(log, tic), \ 716 TP_ARGS(log, tic))
618 TP_STRUCT__entry( \
619 __field(dev_t, dev) \
620 __field(unsigned, trans_type) \
621 __field(char, ocnt) \
622 __field(char, cnt) \
623 __field(int, curr_res) \
624 __field(int, unit_res) \
625 __field(unsigned int, flags) \
626 __field(void *, reserve_headq) \
627 __field(void *, write_headq) \
628 __field(int, grant_reserve_cycle) \
629 __field(int, grant_reserve_bytes) \
630 __field(int, grant_write_cycle) \
631 __field(int, grant_write_bytes) \
632 __field(int, curr_cycle) \
633 __field(int, curr_block) \
634 __field(xfs_lsn_t, tail_lsn) \
635 ), \
636 TP_fast_assign( \
637 __entry->dev = log->l_mp->m_super->s_dev; \
638 __entry->trans_type = tic->t_trans_type; \
639 __entry->ocnt = tic->t_ocnt; \
640 __entry->cnt = tic->t_cnt; \
641 __entry->curr_res = tic->t_curr_res; \
642 __entry->unit_res = tic->t_unit_res; \
643 __entry->flags = tic->t_flags; \
644 __entry->reserve_headq = log->l_reserve_headq; \
645 __entry->write_headq = log->l_write_headq; \
646 __entry->grant_reserve_cycle = log->l_grant_reserve_cycle; \
647 __entry->grant_reserve_bytes = log->l_grant_reserve_bytes; \
648 __entry->grant_write_cycle = log->l_grant_write_cycle; \
649 __entry->grant_write_bytes = log->l_grant_write_bytes; \
650 __entry->curr_cycle = log->l_curr_cycle; \
651 __entry->curr_block = log->l_curr_block; \
652 __entry->tail_lsn = log->l_tail_lsn; \
653 ), \
654 TP_printk("dev %d:%d type %s t_ocnt %u t_cnt %u t_curr_res %u " \
655 "t_unit_res %u t_flags %s reserve_headq 0x%p " \
656 "write_headq 0x%p grant_reserve_cycle %d " \
657 "grant_reserve_bytes %d grant_write_cycle %d " \
658 "grant_write_bytes %d curr_cycle %d curr_block %d " \
659 "tail_cycle %d tail_block %d", \
660 MAJOR(__entry->dev), MINOR(__entry->dev), \
661 __print_symbolic(__entry->trans_type, XFS_TRANS_TYPES), \
662 __entry->ocnt, \
663 __entry->cnt, \
664 __entry->curr_res, \
665 __entry->unit_res, \
666 __print_flags(__entry->flags, "|", XLOG_TIC_FLAGS), \
667 __entry->reserve_headq, \
668 __entry->write_headq, \
669 __entry->grant_reserve_cycle, \
670 __entry->grant_reserve_bytes, \
671 __entry->grant_write_cycle, \
672 __entry->grant_write_bytes, \
673 __entry->curr_cycle, \
674 __entry->curr_block, \
675 CYCLE_LSN(__entry->tail_lsn), \
676 BLOCK_LSN(__entry->tail_lsn) \
677 ) \
678)
679DEFINE_LOGGRANT_EVENT(xfs_log_done_nonperm); 717DEFINE_LOGGRANT_EVENT(xfs_log_done_nonperm);
680DEFINE_LOGGRANT_EVENT(xfs_log_done_perm); 718DEFINE_LOGGRANT_EVENT(xfs_log_done_perm);
681DEFINE_LOGGRANT_EVENT(xfs_log_reserve); 719DEFINE_LOGGRANT_EVENT(xfs_log_reserve);
@@ -815,7 +853,7 @@ TRACE_EVENT(name, \
815 ), \ 853 ), \
816 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \ 854 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \
817 "offset 0x%llx count %zd flags %s " \ 855 "offset 0x%llx count %zd flags %s " \
818 "startoff 0x%llx startblock 0x%llx blockcount 0x%llx", \ 856 "startoff 0x%llx startblock %s blockcount 0x%llx", \
819 MAJOR(__entry->dev), MINOR(__entry->dev), \ 857 MAJOR(__entry->dev), MINOR(__entry->dev), \
820 __entry->ino, \ 858 __entry->ino, \
821 __entry->size, \ 859 __entry->size, \
@@ -824,7 +862,7 @@ TRACE_EVENT(name, \
824 __entry->count, \ 862 __entry->count, \
825 __print_flags(__entry->flags, "|", BMAPI_FLAGS), \ 863 __print_flags(__entry->flags, "|", BMAPI_FLAGS), \
826 __entry->startoff, \ 864 __entry->startoff, \
827 __entry->startblock, \ 865 xfs_fmtfsblock(__entry->startblock), \
828 __entry->blockcount) \ 866 __entry->blockcount) \
829) 867)
830DEFINE_IOMAP_EVENT(xfs_iomap_enter); 868DEFINE_IOMAP_EVENT(xfs_iomap_enter);
@@ -897,28 +935,32 @@ TRACE_EVENT(xfs_itruncate_start,
897 __entry->toss_finish) 935 __entry->toss_finish)
898); 936);
899 937
938DECLARE_EVENT_CLASS(xfs_itrunc_class,
939 TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size),
940 TP_ARGS(ip, new_size),
941 TP_STRUCT__entry(
942 __field(dev_t, dev)
943 __field(xfs_ino_t, ino)
944 __field(xfs_fsize_t, size)
945 __field(xfs_fsize_t, new_size)
946 ),
947 TP_fast_assign(
948 __entry->dev = VFS_I(ip)->i_sb->s_dev;
949 __entry->ino = ip->i_ino;
950 __entry->size = ip->i_d.di_size;
951 __entry->new_size = new_size;
952 ),
953 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx",
954 MAJOR(__entry->dev), MINOR(__entry->dev),
955 __entry->ino,
956 __entry->size,
957 __entry->new_size)
958)
959
900#define DEFINE_ITRUNC_EVENT(name) \ 960#define DEFINE_ITRUNC_EVENT(name) \
901TRACE_EVENT(name, \ 961DEFINE_EVENT(xfs_itrunc_class, name, \
902 TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), \ 962 TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), \
903 TP_ARGS(ip, new_size), \ 963 TP_ARGS(ip, new_size))
904 TP_STRUCT__entry( \
905 __field(dev_t, dev) \
906 __field(xfs_ino_t, ino) \
907 __field(xfs_fsize_t, size) \
908 __field(xfs_fsize_t, new_size) \
909 ), \
910 TP_fast_assign( \
911 __entry->dev = VFS_I(ip)->i_sb->s_dev; \
912 __entry->ino = ip->i_ino; \
913 __entry->size = ip->i_d.di_size; \
914 __entry->new_size = new_size; \
915 ), \
916 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx", \
917 MAJOR(__entry->dev), MINOR(__entry->dev), \
918 __entry->ino, \
919 __entry->size, \
920 __entry->new_size) \
921)
922DEFINE_ITRUNC_EVENT(xfs_itruncate_finish_start); 964DEFINE_ITRUNC_EVENT(xfs_itruncate_finish_start);
923DEFINE_ITRUNC_EVENT(xfs_itruncate_finish_end); 965DEFINE_ITRUNC_EVENT(xfs_itruncate_finish_end);
924 966
@@ -1037,28 +1079,28 @@ TRACE_EVENT(xfs_alloc_unbusy,
1037 1079
1038TRACE_EVENT(xfs_alloc_busysearch, 1080TRACE_EVENT(xfs_alloc_busysearch,
1039 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno, 1081 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno,
1040 xfs_extlen_t len, int found), 1082 xfs_extlen_t len, xfs_lsn_t lsn),
1041 TP_ARGS(mp, agno, agbno, len, found), 1083 TP_ARGS(mp, agno, agbno, len, lsn),
1042 TP_STRUCT__entry( 1084 TP_STRUCT__entry(
1043 __field(dev_t, dev) 1085 __field(dev_t, dev)
1044 __field(xfs_agnumber_t, agno) 1086 __field(xfs_agnumber_t, agno)
1045 __field(xfs_agblock_t, agbno) 1087 __field(xfs_agblock_t, agbno)
1046 __field(xfs_extlen_t, len) 1088 __field(xfs_extlen_t, len)
1047 __field(int, found) 1089 __field(xfs_lsn_t, lsn)
1048 ), 1090 ),
1049 TP_fast_assign( 1091 TP_fast_assign(
1050 __entry->dev = mp->m_super->s_dev; 1092 __entry->dev = mp->m_super->s_dev;
1051 __entry->agno = agno; 1093 __entry->agno = agno;
1052 __entry->agbno = agbno; 1094 __entry->agbno = agbno;
1053 __entry->len = len; 1095 __entry->len = len;
1054 __entry->found = found; 1096 __entry->lsn = lsn;
1055 ), 1097 ),
1056 TP_printk("dev %d:%d agno %u agbno %u len %u %s", 1098 TP_printk("dev %d:%d agno %u agbno %u len %u force lsn 0x%llx",
1057 MAJOR(__entry->dev), MINOR(__entry->dev), 1099 MAJOR(__entry->dev), MINOR(__entry->dev),
1058 __entry->agno, 1100 __entry->agno,
1059 __entry->agbno, 1101 __entry->agbno,
1060 __entry->len, 1102 __entry->len,
1061 __print_symbolic(__entry->found, XFS_BUSY_STATES)) 1103 __entry->lsn)
1062); 1104);
1063 1105
1064TRACE_EVENT(xfs_agf, 1106TRACE_EVENT(xfs_agf,
@@ -1152,77 +1194,80 @@ TRACE_EVENT(xfs_free_extent,
1152 1194
1153); 1195);
1154 1196
1155#define DEFINE_ALLOC_EVENT(name) \ 1197DECLARE_EVENT_CLASS(xfs_alloc_class,
1156TRACE_EVENT(name, \ 1198 TP_PROTO(struct xfs_alloc_arg *args),
1157 TP_PROTO(struct xfs_alloc_arg *args), \ 1199 TP_ARGS(args),
1158 TP_ARGS(args), \ 1200 TP_STRUCT__entry(
1159 TP_STRUCT__entry( \ 1201 __field(dev_t, dev)
1160 __field(dev_t, dev) \ 1202 __field(xfs_agnumber_t, agno)
1161 __field(xfs_agnumber_t, agno) \ 1203 __field(xfs_agblock_t, agbno)
1162 __field(xfs_agblock_t, agbno) \ 1204 __field(xfs_extlen_t, minlen)
1163 __field(xfs_extlen_t, minlen) \ 1205 __field(xfs_extlen_t, maxlen)
1164 __field(xfs_extlen_t, maxlen) \ 1206 __field(xfs_extlen_t, mod)
1165 __field(xfs_extlen_t, mod) \ 1207 __field(xfs_extlen_t, prod)
1166 __field(xfs_extlen_t, prod) \ 1208 __field(xfs_extlen_t, minleft)
1167 __field(xfs_extlen_t, minleft) \ 1209 __field(xfs_extlen_t, total)
1168 __field(xfs_extlen_t, total) \ 1210 __field(xfs_extlen_t, alignment)
1169 __field(xfs_extlen_t, alignment) \ 1211 __field(xfs_extlen_t, minalignslop)
1170 __field(xfs_extlen_t, minalignslop) \ 1212 __field(xfs_extlen_t, len)
1171 __field(xfs_extlen_t, len) \ 1213 __field(short, type)
1172 __field(short, type) \ 1214 __field(short, otype)
1173 __field(short, otype) \ 1215 __field(char, wasdel)
1174 __field(char, wasdel) \ 1216 __field(char, wasfromfl)
1175 __field(char, wasfromfl) \ 1217 __field(char, isfl)
1176 __field(char, isfl) \ 1218 __field(char, userdata)
1177 __field(char, userdata) \ 1219 __field(xfs_fsblock_t, firstblock)
1178 __field(xfs_fsblock_t, firstblock) \ 1220 ),
1179 ), \ 1221 TP_fast_assign(
1180 TP_fast_assign( \ 1222 __entry->dev = args->mp->m_super->s_dev;
1181 __entry->dev = args->mp->m_super->s_dev; \ 1223 __entry->agno = args->agno;
1182 __entry->agno = args->agno; \ 1224 __entry->agbno = args->agbno;
1183 __entry->agbno = args->agbno; \ 1225 __entry->minlen = args->minlen;
1184 __entry->minlen = args->minlen; \ 1226 __entry->maxlen = args->maxlen;
1185 __entry->maxlen = args->maxlen; \ 1227 __entry->mod = args->mod;
1186 __entry->mod = args->mod; \ 1228 __entry->prod = args->prod;
1187 __entry->prod = args->prod; \ 1229 __entry->minleft = args->minleft;
1188 __entry->minleft = args->minleft; \ 1230 __entry->total = args->total;
1189 __entry->total = args->total; \ 1231 __entry->alignment = args->alignment;
1190 __entry->alignment = args->alignment; \ 1232 __entry->minalignslop = args->minalignslop;
1191 __entry->minalignslop = args->minalignslop; \ 1233 __entry->len = args->len;
1192 __entry->len = args->len; \ 1234 __entry->type = args->type;
1193 __entry->type = args->type; \ 1235 __entry->otype = args->otype;
1194 __entry->otype = args->otype; \ 1236 __entry->wasdel = args->wasdel;
1195 __entry->wasdel = args->wasdel; \ 1237 __entry->wasfromfl = args->wasfromfl;
1196 __entry->wasfromfl = args->wasfromfl; \ 1238 __entry->isfl = args->isfl;
1197 __entry->isfl = args->isfl; \ 1239 __entry->userdata = args->userdata;
1198 __entry->userdata = args->userdata; \ 1240 __entry->firstblock = args->firstblock;
1199 __entry->firstblock = args->firstblock; \ 1241 ),
1200 ), \ 1242 TP_printk("dev %d:%d agno %u agbno %u minlen %u maxlen %u mod %u "
1201 TP_printk("dev %d:%d agno %u agbno %u minlen %u maxlen %u mod %u " \ 1243 "prod %u minleft %u total %u alignment %u minalignslop %u "
1202 "prod %u minleft %u total %u alignment %u minalignslop %u " \ 1244 "len %u type %s otype %s wasdel %d wasfromfl %d isfl %d "
1203 "len %u type %s otype %s wasdel %d wasfromfl %d isfl %d " \ 1245 "userdata %d firstblock 0x%llx",
1204 "userdata %d firstblock 0x%llx", \ 1246 MAJOR(__entry->dev), MINOR(__entry->dev),
1205 MAJOR(__entry->dev), MINOR(__entry->dev), \ 1247 __entry->agno,
1206 __entry->agno, \ 1248 __entry->agbno,
1207 __entry->agbno, \ 1249 __entry->minlen,
1208 __entry->minlen, \ 1250 __entry->maxlen,
1209 __entry->maxlen, \ 1251 __entry->mod,
1210 __entry->mod, \ 1252 __entry->prod,
1211 __entry->prod, \ 1253 __entry->minleft,
1212 __entry->minleft, \ 1254 __entry->total,
1213 __entry->total, \ 1255 __entry->alignment,
1214 __entry->alignment, \ 1256 __entry->minalignslop,
1215 __entry->minalignslop, \ 1257 __entry->len,
1216 __entry->len, \ 1258 __print_symbolic(__entry->type, XFS_ALLOC_TYPES),
1217 __print_symbolic(__entry->type, XFS_ALLOC_TYPES), \ 1259 __print_symbolic(__entry->otype, XFS_ALLOC_TYPES),
1218 __print_symbolic(__entry->otype, XFS_ALLOC_TYPES), \ 1260 __entry->wasdel,
1219 __entry->wasdel, \ 1261 __entry->wasfromfl,
1220 __entry->wasfromfl, \ 1262 __entry->isfl,
1221 __entry->isfl, \ 1263 __entry->userdata,
1222 __entry->userdata, \ 1264 __entry->firstblock)
1223 __entry->firstblock) \
1224) 1265)
1225 1266
1267#define DEFINE_ALLOC_EVENT(name) \
1268DEFINE_EVENT(xfs_alloc_class, name, \
1269 TP_PROTO(struct xfs_alloc_arg *args), \
1270 TP_ARGS(args))
1226DEFINE_ALLOC_EVENT(xfs_alloc_exact_done); 1271DEFINE_ALLOC_EVENT(xfs_alloc_exact_done);
1227DEFINE_ALLOC_EVENT(xfs_alloc_exact_error); 1272DEFINE_ALLOC_EVENT(xfs_alloc_exact_error);
1228DEFINE_ALLOC_EVENT(xfs_alloc_near_nominleft); 1273DEFINE_ALLOC_EVENT(xfs_alloc_near_nominleft);
@@ -1245,92 +1290,100 @@ DEFINE_ALLOC_EVENT(xfs_alloc_vextent_noagbp);
1245DEFINE_ALLOC_EVENT(xfs_alloc_vextent_loopfailed); 1290DEFINE_ALLOC_EVENT(xfs_alloc_vextent_loopfailed);
1246DEFINE_ALLOC_EVENT(xfs_alloc_vextent_allfailed); 1291DEFINE_ALLOC_EVENT(xfs_alloc_vextent_allfailed);
1247 1292
1248#define DEFINE_DIR2_TRACE(tname) \ 1293DECLARE_EVENT_CLASS(xfs_dir2_class,
1249TRACE_EVENT(tname, \ 1294 TP_PROTO(struct xfs_da_args *args),
1295 TP_ARGS(args),
1296 TP_STRUCT__entry(
1297 __field(dev_t, dev)
1298 __field(xfs_ino_t, ino)
1299 __dynamic_array(char, name, args->namelen)
1300 __field(int, namelen)
1301 __field(xfs_dahash_t, hashval)
1302 __field(xfs_ino_t, inumber)
1303 __field(int, op_flags)
1304 ),
1305 TP_fast_assign(
1306 __entry->dev = VFS_I(args->dp)->i_sb->s_dev;
1307 __entry->ino = args->dp->i_ino;
1308 if (args->namelen)
1309 memcpy(__get_str(name), args->name, args->namelen);
1310 __entry->namelen = args->namelen;
1311 __entry->hashval = args->hashval;
1312 __entry->inumber = args->inumber;
1313 __entry->op_flags = args->op_flags;
1314 ),
1315 TP_printk("dev %d:%d ino 0x%llx name %.*s namelen %d hashval 0x%x "
1316 "inumber 0x%llx op_flags %s",
1317 MAJOR(__entry->dev), MINOR(__entry->dev),
1318 __entry->ino,
1319 __entry->namelen,
1320 __entry->namelen ? __get_str(name) : NULL,
1321 __entry->namelen,
1322 __entry->hashval,
1323 __entry->inumber,
1324 __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS))
1325)
1326
1327#define DEFINE_DIR2_EVENT(name) \
1328DEFINE_EVENT(xfs_dir2_class, name, \
1250 TP_PROTO(struct xfs_da_args *args), \ 1329 TP_PROTO(struct xfs_da_args *args), \
1251 TP_ARGS(args), \ 1330 TP_ARGS(args))
1252 TP_STRUCT__entry( \ 1331DEFINE_DIR2_EVENT(xfs_dir2_sf_addname);
1253 __field(dev_t, dev) \ 1332DEFINE_DIR2_EVENT(xfs_dir2_sf_create);
1254 __field(xfs_ino_t, ino) \ 1333DEFINE_DIR2_EVENT(xfs_dir2_sf_lookup);
1255 __dynamic_array(char, name, args->namelen) \ 1334DEFINE_DIR2_EVENT(xfs_dir2_sf_replace);
1256 __field(int, namelen) \ 1335DEFINE_DIR2_EVENT(xfs_dir2_sf_removename);
1257 __field(xfs_dahash_t, hashval) \ 1336DEFINE_DIR2_EVENT(xfs_dir2_sf_toino4);
1258 __field(xfs_ino_t, inumber) \ 1337DEFINE_DIR2_EVENT(xfs_dir2_sf_toino8);
1259 __field(int, op_flags) \ 1338DEFINE_DIR2_EVENT(xfs_dir2_sf_to_block);
1260 ), \ 1339DEFINE_DIR2_EVENT(xfs_dir2_block_addname);
1261 TP_fast_assign( \ 1340DEFINE_DIR2_EVENT(xfs_dir2_block_lookup);
1262 __entry->dev = VFS_I(args->dp)->i_sb->s_dev; \ 1341DEFINE_DIR2_EVENT(xfs_dir2_block_replace);
1263 __entry->ino = args->dp->i_ino; \ 1342DEFINE_DIR2_EVENT(xfs_dir2_block_removename);
1264 if (args->namelen) \ 1343DEFINE_DIR2_EVENT(xfs_dir2_block_to_sf);
1265 memcpy(__get_str(name), args->name, args->namelen); \ 1344DEFINE_DIR2_EVENT(xfs_dir2_block_to_leaf);
1266 __entry->namelen = args->namelen; \ 1345DEFINE_DIR2_EVENT(xfs_dir2_leaf_addname);
1267 __entry->hashval = args->hashval; \ 1346DEFINE_DIR2_EVENT(xfs_dir2_leaf_lookup);
1268 __entry->inumber = args->inumber; \ 1347DEFINE_DIR2_EVENT(xfs_dir2_leaf_replace);
1269 __entry->op_flags = args->op_flags; \ 1348DEFINE_DIR2_EVENT(xfs_dir2_leaf_removename);
1270 ), \ 1349DEFINE_DIR2_EVENT(xfs_dir2_leaf_to_block);
1271 TP_printk("dev %d:%d ino 0x%llx name %.*s namelen %d hashval 0x%x " \ 1350DEFINE_DIR2_EVENT(xfs_dir2_leaf_to_node);
1272 "inumber 0x%llx op_flags %s", \ 1351DEFINE_DIR2_EVENT(xfs_dir2_node_addname);
1273 MAJOR(__entry->dev), MINOR(__entry->dev), \ 1352DEFINE_DIR2_EVENT(xfs_dir2_node_lookup);
1274 __entry->ino, \ 1353DEFINE_DIR2_EVENT(xfs_dir2_node_replace);
1275 __entry->namelen, \ 1354DEFINE_DIR2_EVENT(xfs_dir2_node_removename);
1276 __entry->namelen ? __get_str(name) : NULL, \ 1355DEFINE_DIR2_EVENT(xfs_dir2_node_to_leaf);
1277 __entry->namelen, \ 1356
1278 __entry->hashval, \ 1357DECLARE_EVENT_CLASS(xfs_dir2_space_class,
1279 __entry->inumber, \ 1358 TP_PROTO(struct xfs_da_args *args, int idx),
1280 __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS)) \ 1359 TP_ARGS(args, idx),
1360 TP_STRUCT__entry(
1361 __field(dev_t, dev)
1362 __field(xfs_ino_t, ino)
1363 __field(int, op_flags)
1364 __field(int, idx)
1365 ),
1366 TP_fast_assign(
1367 __entry->dev = VFS_I(args->dp)->i_sb->s_dev;
1368 __entry->ino = args->dp->i_ino;
1369 __entry->op_flags = args->op_flags;
1370 __entry->idx = idx;
1371 ),
1372 TP_printk("dev %d:%d ino 0x%llx op_flags %s index %d",
1373 MAJOR(__entry->dev), MINOR(__entry->dev),
1374 __entry->ino,
1375 __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS),
1376 __entry->idx)
1281) 1377)
1282DEFINE_DIR2_TRACE(xfs_dir2_sf_addname);
1283DEFINE_DIR2_TRACE(xfs_dir2_sf_create);
1284DEFINE_DIR2_TRACE(xfs_dir2_sf_lookup);
1285DEFINE_DIR2_TRACE(xfs_dir2_sf_replace);
1286DEFINE_DIR2_TRACE(xfs_dir2_sf_removename);
1287DEFINE_DIR2_TRACE(xfs_dir2_sf_toino4);
1288DEFINE_DIR2_TRACE(xfs_dir2_sf_toino8);
1289DEFINE_DIR2_TRACE(xfs_dir2_sf_to_block);
1290DEFINE_DIR2_TRACE(xfs_dir2_block_addname);
1291DEFINE_DIR2_TRACE(xfs_dir2_block_lookup);
1292DEFINE_DIR2_TRACE(xfs_dir2_block_replace);
1293DEFINE_DIR2_TRACE(xfs_dir2_block_removename);
1294DEFINE_DIR2_TRACE(xfs_dir2_block_to_sf);
1295DEFINE_DIR2_TRACE(xfs_dir2_block_to_leaf);
1296DEFINE_DIR2_TRACE(xfs_dir2_leaf_addname);
1297DEFINE_DIR2_TRACE(xfs_dir2_leaf_lookup);
1298DEFINE_DIR2_TRACE(xfs_dir2_leaf_replace);
1299DEFINE_DIR2_TRACE(xfs_dir2_leaf_removename);
1300DEFINE_DIR2_TRACE(xfs_dir2_leaf_to_block);
1301DEFINE_DIR2_TRACE(xfs_dir2_leaf_to_node);
1302DEFINE_DIR2_TRACE(xfs_dir2_node_addname);
1303DEFINE_DIR2_TRACE(xfs_dir2_node_lookup);
1304DEFINE_DIR2_TRACE(xfs_dir2_node_replace);
1305DEFINE_DIR2_TRACE(xfs_dir2_node_removename);
1306DEFINE_DIR2_TRACE(xfs_dir2_node_to_leaf);
1307 1378
1308#define DEFINE_DIR2_SPACE_TRACE(tname) \ 1379#define DEFINE_DIR2_SPACE_EVENT(name) \
1309TRACE_EVENT(tname, \ 1380DEFINE_EVENT(xfs_dir2_space_class, name, \
1310 TP_PROTO(struct xfs_da_args *args, int idx), \ 1381 TP_PROTO(struct xfs_da_args *args, int idx), \
1311 TP_ARGS(args, idx), \ 1382 TP_ARGS(args, idx))
1312 TP_STRUCT__entry( \ 1383DEFINE_DIR2_SPACE_EVENT(xfs_dir2_leafn_add);
1313 __field(dev_t, dev) \ 1384DEFINE_DIR2_SPACE_EVENT(xfs_dir2_leafn_remove);
1314 __field(xfs_ino_t, ino) \ 1385DEFINE_DIR2_SPACE_EVENT(xfs_dir2_grow_inode);
1315 __field(int, op_flags) \ 1386DEFINE_DIR2_SPACE_EVENT(xfs_dir2_shrink_inode);
1316 __field(int, idx) \
1317 ), \
1318 TP_fast_assign( \
1319 __entry->dev = VFS_I(args->dp)->i_sb->s_dev; \
1320 __entry->ino = args->dp->i_ino; \
1321 __entry->op_flags = args->op_flags; \
1322 __entry->idx = idx; \
1323 ), \
1324 TP_printk("dev %d:%d ino 0x%llx op_flags %s index %d", \
1325 MAJOR(__entry->dev), MINOR(__entry->dev), \
1326 __entry->ino, \
1327 __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS), \
1328 __entry->idx) \
1329)
1330DEFINE_DIR2_SPACE_TRACE(xfs_dir2_leafn_add);
1331DEFINE_DIR2_SPACE_TRACE(xfs_dir2_leafn_remove);
1332DEFINE_DIR2_SPACE_TRACE(xfs_dir2_grow_inode);
1333DEFINE_DIR2_SPACE_TRACE(xfs_dir2_shrink_inode);
1334 1387
1335TRACE_EVENT(xfs_dir2_leafn_moveents, 1388TRACE_EVENT(xfs_dir2_leafn_moveents,
1336 TP_PROTO(struct xfs_da_args *args, int src_idx, int dst_idx, int count), 1389 TP_PROTO(struct xfs_da_args *args, int src_idx, int dst_idx, int count),
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 71af76fe8a23..873e07e29074 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -891,7 +891,7 @@ xfs_qm_dqrele_all_inodes(
891 uint flags) 891 uint flags)
892{ 892{
893 ASSERT(mp->m_quotainfo); 893 ASSERT(mp->m_quotainfo);
894 xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, XFS_ICI_NO_TAG); 894 xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, XFS_ICI_NO_TAG, 0);
895} 895}
896 896
897/*------------------------------------------------------------------------*/ 897/*------------------------------------------------------------------------*/
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index a1c65fc6d9c4..275b1f4f9430 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -2563,43 +2563,41 @@ xfs_alloc_search_busy(xfs_trans_t *tp,
2563 xfs_mount_t *mp; 2563 xfs_mount_t *mp;
2564 xfs_perag_busy_t *bsy; 2564 xfs_perag_busy_t *bsy;
2565 xfs_agblock_t uend, bend; 2565 xfs_agblock_t uend, bend;
2566 xfs_lsn_t lsn; 2566 xfs_lsn_t lsn = 0;
2567 int cnt; 2567 int cnt;
2568 2568
2569 mp = tp->t_mountp; 2569 mp = tp->t_mountp;
2570 2570
2571 spin_lock(&mp->m_perag[agno].pagb_lock); 2571 spin_lock(&mp->m_perag[agno].pagb_lock);
2572 cnt = mp->m_perag[agno].pagb_count;
2573 2572
2574 uend = bno + len - 1; 2573 uend = bno + len - 1;
2575 2574
2576 /* search pagb_list for this slot, skipping open slots */ 2575 /*
2577 for (bsy = mp->m_perag[agno].pagb_list; cnt; bsy++) { 2576 * search pagb_list for this slot, skipping open slots. We have to
2577 * search the entire array as there may be multiple overlaps and
2578 * we have to get the most recent LSN for the log force to push out
2579 * all the transactions that span the range.
2580 */
2581 for (cnt = 0; cnt < mp->m_perag[agno].pagb_count; cnt++) {
2582 bsy = &mp->m_perag[agno].pagb_list[cnt];
2583 if (!bsy->busy_tp)
2584 continue;
2578 2585
2579 /* 2586 bend = bsy->busy_start + bsy->busy_length - 1;
2580 * (start1,length1) within (start2, length2) 2587 if (bno > bend || uend < bsy->busy_start)
2581 */ 2588 continue;
2582 if (bsy->busy_tp != NULL) {
2583 bend = bsy->busy_start + bsy->busy_length - 1;
2584 if ((bno > bend) || (uend < bsy->busy_start)) {
2585 cnt--;
2586 } else {
2587 break;
2588 }
2589 }
2590 }
2591 2589
2592 trace_xfs_alloc_busysearch(mp, agno, bno, len, !!cnt); 2590 /* (start1,length1) within (start2, length2) */
2591 if (XFS_LSN_CMP(bsy->busy_tp->t_commit_lsn, lsn) > 0)
2592 lsn = bsy->busy_tp->t_commit_lsn;
2593 }
2594 spin_unlock(&mp->m_perag[agno].pagb_lock);
2595 trace_xfs_alloc_busysearch(tp->t_mountp, agno, bno, len, lsn);
2593 2596
2594 /* 2597 /*
2595 * If a block was found, force the log through the LSN of the 2598 * If a block was found, force the log through the LSN of the
2596 * transaction that freed the block 2599 * transaction that freed the block
2597 */ 2600 */
2598 if (cnt) { 2601 if (lsn)
2599 lsn = bsy->busy_tp->t_commit_lsn;
2600 spin_unlock(&mp->m_perag[agno].pagb_lock);
2601 xfs_log_force(mp, lsn, XFS_LOG_FORCE|XFS_LOG_SYNC); 2602 xfs_log_force(mp, lsn, XFS_LOG_FORCE|XFS_LOG_SYNC);
2602 } else {
2603 spin_unlock(&mp->m_perag[agno].pagb_lock);
2604 }
2605} 2603}
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index d1483a4f71b8..84ca1cf16a1e 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -114,10 +114,82 @@ xfs_swapext(
114 return error; 114 return error;
115} 115}
116 116
117/*
118 * We need to check that the format of the data fork in the temporary inode is
119 * valid for the target inode before doing the swap. This is not a problem with
120 * attr1 because of the fixed fork offset, but attr2 has a dynamically sized
121 * data fork depending on the space the attribute fork is taking so we can get
122 * invalid formats on the target inode.
123 *
124 * E.g. target has space for 7 extents in extent format, temp inode only has
125 * space for 6. If we defragment down to 7 extents, then the tmp format is a
126 * btree, but when swapped it needs to be in extent format. Hence we can't just
127 * blindly swap data forks on attr2 filesystems.
128 *
129 * Note that we check the swap in both directions so that we don't end up with
130 * a corrupt temporary inode, either.
131 *
132 * Note that fixing the way xfs_fsr sets up the attribute fork in the source
133 * inode will prevent this situation from occurring, so all we do here is
134 * reject and log the attempt. basically we are putting the responsibility on
135 * userspace to get this right.
136 */
137static int
138xfs_swap_extents_check_format(
139 xfs_inode_t *ip, /* target inode */
140 xfs_inode_t *tip) /* tmp inode */
141{
142
143 /* Should never get a local format */
144 if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL ||
145 tip->i_d.di_format == XFS_DINODE_FMT_LOCAL)
146 return EINVAL;
147
148 /*
149 * if the target inode has less extents that then temporary inode then
150 * why did userspace call us?
151 */
152 if (ip->i_d.di_nextents < tip->i_d.di_nextents)
153 return EINVAL;
154
155 /*
156 * if the target inode is in extent form and the temp inode is in btree
157 * form then we will end up with the target inode in the wrong format
158 * as we already know there are less extents in the temp inode.
159 */
160 if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
161 tip->i_d.di_format == XFS_DINODE_FMT_BTREE)
162 return EINVAL;
163
164 /* Check temp in extent form to max in target */
165 if (tip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
166 XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) > ip->i_df.if_ext_max)
167 return EINVAL;
168
169 /* Check target in extent form to max in temp */
170 if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
171 XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) > tip->i_df.if_ext_max)
172 return EINVAL;
173
174 /* Check root block of temp in btree form to max in target */
175 if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE &&
176 XFS_IFORK_BOFF(ip) &&
177 tip->i_df.if_broot_bytes > XFS_IFORK_BOFF(ip))
178 return EINVAL;
179
180 /* Check root block of target in btree form to max in temp */
181 if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE &&
182 XFS_IFORK_BOFF(tip) &&
183 ip->i_df.if_broot_bytes > XFS_IFORK_BOFF(tip))
184 return EINVAL;
185
186 return 0;
187}
188
117int 189int
118xfs_swap_extents( 190xfs_swap_extents(
119 xfs_inode_t *ip, 191 xfs_inode_t *ip, /* target inode */
120 xfs_inode_t *tip, 192 xfs_inode_t *tip, /* tmp inode */
121 xfs_swapext_t *sxp) 193 xfs_swapext_t *sxp)
122{ 194{
123 xfs_mount_t *mp; 195 xfs_mount_t *mp;
@@ -161,13 +233,6 @@ xfs_swap_extents(
161 goto out_unlock; 233 goto out_unlock;
162 } 234 }
163 235
164 /* Should never get a local format */
165 if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL ||
166 tip->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
167 error = XFS_ERROR(EINVAL);
168 goto out_unlock;
169 }
170
171 if (VN_CACHED(VFS_I(tip)) != 0) { 236 if (VN_CACHED(VFS_I(tip)) != 0) {
172 error = xfs_flushinval_pages(tip, 0, -1, 237 error = xfs_flushinval_pages(tip, 0, -1,
173 FI_REMAPF_LOCKED); 238 FI_REMAPF_LOCKED);
@@ -189,13 +254,12 @@ xfs_swap_extents(
189 goto out_unlock; 254 goto out_unlock;
190 } 255 }
191 256
192 /* 257 /* check inode formats now that data is flushed */
193 * If the target has extended attributes, the tmp file 258 error = xfs_swap_extents_check_format(ip, tip);
194 * must also in order to ensure the correct data fork 259 if (error) {
195 * format. 260 xfs_fs_cmn_err(CE_NOTE, mp,
196 */ 261 "%s: inode 0x%llx format is incompatible for exchanging.",
197 if ( XFS_IFORK_Q(ip) != XFS_IFORK_Q(tip) ) { 262 __FILE__, ip->i_ino);
198 error = XFS_ERROR(EINVAL);
199 goto out_unlock; 263 goto out_unlock;
200 } 264 }
201 265
@@ -276,6 +340,16 @@ xfs_swap_extents(
276 *tifp = *tempifp; /* struct copy */ 340 *tifp = *tempifp; /* struct copy */
277 341
278 /* 342 /*
343 * Fix the in-memory data fork values that are dependent on the fork
344 * offset in the inode. We can't assume they remain the same as attr2
345 * has dynamic fork offsets.
346 */
347 ifp->if_ext_max = XFS_IFORK_SIZE(ip, XFS_DATA_FORK) /
348 (uint)sizeof(xfs_bmbt_rec_t);
349 tifp->if_ext_max = XFS_IFORK_SIZE(tip, XFS_DATA_FORK) /
350 (uint)sizeof(xfs_bmbt_rec_t);
351
352 /*
279 * Fix the on-disk inode values 353 * Fix the on-disk inode values
280 */ 354 */
281 tmp = (__uint64_t)ip->i_d.di_nblocks; 355 tmp = (__uint64_t)ip->i_d.di_nblocks;
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index fa402a6bbbcf..155e798f30a1 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -73,7 +73,6 @@ xfs_inode_alloc(
73 ASSERT(atomic_read(&ip->i_pincount) == 0); 73 ASSERT(atomic_read(&ip->i_pincount) == 0);
74 ASSERT(!spin_is_locked(&ip->i_flags_lock)); 74 ASSERT(!spin_is_locked(&ip->i_flags_lock));
75 ASSERT(completion_done(&ip->i_flush)); 75 ASSERT(completion_done(&ip->i_flush));
76 ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
77 76
78 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); 77 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
79 78
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index ce278b3ae7fc..ef77fd88c8e3 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2841,8 +2841,8 @@ xfs_iflush(
2841 mp = ip->i_mount; 2841 mp = ip->i_mount;
2842 2842
2843 /* 2843 /*
2844 * If the inode isn't dirty, then just release the inode 2844 * If the inode isn't dirty, then just release the inode flush lock and
2845 * flush lock and do nothing. 2845 * do nothing.
2846 */ 2846 */
2847 if (xfs_inode_clean(ip)) { 2847 if (xfs_inode_clean(ip)) {
2848 xfs_ifunlock(ip); 2848 xfs_ifunlock(ip);
@@ -2868,6 +2868,19 @@ xfs_iflush(
2868 xfs_iunpin_wait(ip); 2868 xfs_iunpin_wait(ip);
2869 2869
2870 /* 2870 /*
2871 * For stale inodes we cannot rely on the backing buffer remaining
2872 * stale in cache for the remaining life of the stale inode and so
2873 * xfs_itobp() below may give us a buffer that no longer contains
2874 * inodes below. We have to check this after ensuring the inode is
2875 * unpinned so that it is safe to reclaim the stale inode after the
2876 * flush call.
2877 */
2878 if (xfs_iflags_test(ip, XFS_ISTALE)) {
2879 xfs_ifunlock(ip);
2880 return 0;
2881 }
2882
2883 /*
2871 * This may have been unpinned because the filesystem is shutting 2884 * This may have been unpinned because the filesystem is shutting
2872 * down forcibly. If that's the case we must not write this inode 2885 * down forcibly. If that's the case we must not write this inode
2873 * to disk, because the log record didn't make it to disk! 2886 * to disk, because the log record didn't make it to disk!
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 9e15a1185362..6be05f756d59 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -1517,6 +1517,8 @@ xfs_rtfree_range(
1517 */ 1517 */
1518 error = xfs_rtfind_forw(mp, tp, end, mp->m_sb.sb_rextents - 1, 1518 error = xfs_rtfind_forw(mp, tp, end, mp->m_sb.sb_rextents - 1,
1519 &postblock); 1519 &postblock);
1520 if (error)
1521 return error;
1520 /* 1522 /*
1521 * If there are blocks not being freed at the front of the 1523 * If there are blocks not being freed at the front of the
1522 * old extent, add summary data for them to be allocated. 1524 * old extent, add summary data for them to be allocated.
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 6558ffd8d140..6f268756bf36 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -70,7 +70,6 @@ xfs_setattr(
70 uint commit_flags=0; 70 uint commit_flags=0;
71 uid_t uid=0, iuid=0; 71 uid_t uid=0, iuid=0;
72 gid_t gid=0, igid=0; 72 gid_t gid=0, igid=0;
73 int timeflags = 0;
74 struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2; 73 struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2;
75 int need_iolock = 1; 74 int need_iolock = 1;
76 75
@@ -135,16 +134,13 @@ xfs_setattr(
135 if (flags & XFS_ATTR_NOLOCK) 134 if (flags & XFS_ATTR_NOLOCK)
136 need_iolock = 0; 135 need_iolock = 0;
137 if (!(mask & ATTR_SIZE)) { 136 if (!(mask & ATTR_SIZE)) {
138 if ((mask != (ATTR_CTIME|ATTR_ATIME|ATTR_MTIME)) || 137 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
139 (mp->m_flags & XFS_MOUNT_WSYNC)) { 138 commit_flags = 0;
140 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); 139 code = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp),
141 commit_flags = 0; 140 0, 0, 0);
142 if ((code = xfs_trans_reserve(tp, 0, 141 if (code) {
143 XFS_ICHANGE_LOG_RES(mp), 0, 142 lock_flags = 0;
144 0, 0))) { 143 goto error_return;
145 lock_flags = 0;
146 goto error_return;
147 }
148 } 144 }
149 } else { 145 } else {
150 if (DM_EVENT_ENABLED(ip, DM_EVENT_TRUNCATE) && 146 if (DM_EVENT_ENABLED(ip, DM_EVENT_TRUNCATE) &&
@@ -295,15 +291,23 @@ xfs_setattr(
295 * or we are explicitly asked to change it. This handles 291 * or we are explicitly asked to change it. This handles
296 * the semantic difference between truncate() and ftruncate() 292 * the semantic difference between truncate() and ftruncate()
297 * as implemented in the VFS. 293 * as implemented in the VFS.
294 *
295 * The regular truncate() case without ATTR_CTIME and ATTR_MTIME
296 * is a special case where we need to update the times despite
297 * not having these flags set. For all other operations the
298 * VFS set these flags explicitly if it wants a timestamp
299 * update.
298 */ 300 */
299 if (iattr->ia_size != ip->i_size || (mask & ATTR_CTIME)) 301 if (iattr->ia_size != ip->i_size &&
300 timeflags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG; 302 (!(mask & (ATTR_CTIME | ATTR_MTIME)))) {
303 iattr->ia_ctime = iattr->ia_mtime =
304 current_fs_time(inode->i_sb);
305 mask |= ATTR_CTIME | ATTR_MTIME;
306 }
301 307
302 if (iattr->ia_size > ip->i_size) { 308 if (iattr->ia_size > ip->i_size) {
303 ip->i_d.di_size = iattr->ia_size; 309 ip->i_d.di_size = iattr->ia_size;
304 ip->i_size = iattr->ia_size; 310 ip->i_size = iattr->ia_size;
305 if (!(flags & XFS_ATTR_DMI))
306 xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
307 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 311 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
308 } else if (iattr->ia_size <= ip->i_size || 312 } else if (iattr->ia_size <= ip->i_size ||
309 (iattr->ia_size == 0 && ip->i_d.di_nextents)) { 313 (iattr->ia_size == 0 && ip->i_d.di_nextents)) {
@@ -374,9 +378,6 @@ xfs_setattr(
374 ip->i_d.di_gid = gid; 378 ip->i_d.di_gid = gid;
375 inode->i_gid = gid; 379 inode->i_gid = gid;
376 } 380 }
377
378 xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
379 timeflags |= XFS_ICHGTIME_CHG;
380 } 381 }
381 382
382 /* 383 /*
@@ -393,51 +394,37 @@ xfs_setattr(
393 394
394 inode->i_mode &= S_IFMT; 395 inode->i_mode &= S_IFMT;
395 inode->i_mode |= mode & ~S_IFMT; 396 inode->i_mode |= mode & ~S_IFMT;
396
397 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
398 timeflags |= XFS_ICHGTIME_CHG;
399 } 397 }
400 398
401 /* 399 /*
402 * Change file access or modified times. 400 * Change file access or modified times.
403 */ 401 */
404 if (mask & (ATTR_ATIME|ATTR_MTIME)) { 402 if (mask & ATTR_ATIME) {
405 if (mask & ATTR_ATIME) { 403 inode->i_atime = iattr->ia_atime;
406 inode->i_atime = iattr->ia_atime; 404 ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec;
407 ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec; 405 ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec;
408 ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec; 406 ip->i_update_core = 1;
409 ip->i_update_core = 1;
410 }
411 if (mask & ATTR_MTIME) {
412 inode->i_mtime = iattr->ia_mtime;
413 ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
414 ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
415 timeflags &= ~XFS_ICHGTIME_MOD;
416 timeflags |= XFS_ICHGTIME_CHG;
417 }
418 if (tp && (mask & (ATTR_MTIME_SET|ATTR_ATIME_SET)))
419 xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
420 } 407 }
421 408 if (mask & ATTR_CTIME) {
422 /*
423 * Change file inode change time only if ATTR_CTIME set
424 * AND we have been called by a DMI function.
425 */
426
427 if ((flags & XFS_ATTR_DMI) && (mask & ATTR_CTIME)) {
428 inode->i_ctime = iattr->ia_ctime; 409 inode->i_ctime = iattr->ia_ctime;
429 ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; 410 ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
430 ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; 411 ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
431 ip->i_update_core = 1; 412 ip->i_update_core = 1;
432 timeflags &= ~XFS_ICHGTIME_CHG; 413 }
414 if (mask & ATTR_MTIME) {
415 inode->i_mtime = iattr->ia_mtime;
416 ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
417 ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
418 ip->i_update_core = 1;
433 } 419 }
434 420
435 /* 421 /*
436 * Send out timestamp changes that need to be set to the 422 * And finally, log the inode core if any attribute in it
437 * current time. Not done when called by a DMI function. 423 * has been changed.
438 */ 424 */
439 if (timeflags && !(flags & XFS_ATTR_DMI)) 425 if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE|
440 xfs_ichgtime(ip, timeflags); 426 ATTR_ATIME|ATTR_CTIME|ATTR_MTIME))
427 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
441 428
442 XFS_STATS_INC(xs_ig_attrchg); 429 XFS_STATS_INC(xs_ig_attrchg);
443 430
@@ -452,12 +439,10 @@ xfs_setattr(
452 * mix so this probably isn't worth the trouble to optimize. 439 * mix so this probably isn't worth the trouble to optimize.
453 */ 440 */
454 code = 0; 441 code = 0;
455 if (tp) { 442 if (mp->m_flags & XFS_MOUNT_WSYNC)
456 if (mp->m_flags & XFS_MOUNT_WSYNC) 443 xfs_trans_set_sync(tp);
457 xfs_trans_set_sync(tp);
458 444
459 code = xfs_trans_commit(tp, commit_flags); 445 code = xfs_trans_commit(tp, commit_flags);
460 }
461 446
462 xfs_iunlock(ip, lock_flags); 447 xfs_iunlock(ip, lock_flags);
463 448