aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_inode.c41
-rw-r--r--fs/binfmt_elf_fdpic.c17
-rw-r--r--fs/cifs/CHANGES4
-rw-r--r--fs/cifs/cifs_dfs_ref.c3
-rw-r--r--fs/cifs/cifsfs.c3
-rw-r--r--fs/cifs/connect.c13
-rw-r--r--fs/configfs/symlink.c4
-rw-r--r--fs/ecryptfs/inode.c24
-rw-r--r--fs/exofs/inode.c17
-rw-r--r--fs/exofs/pnfs.h10
-rw-r--r--fs/ext4/Kconfig1
-rw-r--r--fs/ext4/block_validity.c1
-rw-r--r--fs/ext4/ext4.h2
-rw-r--r--fs/ext4/ext4_extents.h3
-rw-r--r--fs/ext4/extents.c77
-rw-r--r--fs/ext4/fsync.c16
-rw-r--r--fs/ext4/inode.c225
-rw-r--r--fs/ext4/mballoc.h1
-rw-r--r--fs/ext4/super.c7
-rw-r--r--fs/ext4/xattr.c2
-rw-r--r--fs/fcntl.c102
-rw-r--r--fs/fs-writeback.c18
-rw-r--r--fs/gfs2/file.c38
-rw-r--r--fs/gfs2/meta_io.c2
-rw-r--r--fs/gfs2/ops_inode.c6
-rw-r--r--fs/gfs2/xattr.c21
-rw-r--r--fs/hppfs/hppfs.c18
-rw-r--r--fs/jbd2/checkpoint.c15
-rw-r--r--fs/jbd2/commit.c19
-rw-r--r--fs/jbd2/journal.c2
-rw-r--r--fs/namei.c24
-rw-r--r--fs/namespace.c14
-rw-r--r--fs/nfs/dir.c1
-rw-r--r--fs/nfsd/vfs.c5
-rw-r--r--fs/nilfs2/bmap.c4
-rw-r--r--fs/nilfs2/cpfile.c31
-rw-r--r--fs/nilfs2/direct.c17
-rw-r--r--fs/nilfs2/ioctl.c2
-rw-r--r--fs/notify/inotify/inotify_fsnotify.c2
-rw-r--r--fs/notify/inotify/inotify_user.c4
-rw-r--r--fs/ocfs2/Kconfig10
-rw-r--r--fs/ocfs2/Makefile7
-rw-r--r--fs/ocfs2/acl.c4
-rw-r--r--fs/ocfs2/acl.h22
-rw-r--r--fs/ocfs2/cluster/heartbeat.c6
-rw-r--r--fs/ocfs2/cluster/nodemanager.c51
-rw-r--r--fs/ocfs2/cluster/nodemanager.h7
-rw-r--r--fs/ocfs2/cluster/quorum.c16
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c18
-rw-r--r--fs/ocfs2/extent_map.c25
-rw-r--r--fs/ocfs2/file.c21
-rw-r--r--fs/ocfs2/namei.c1
-rw-r--r--fs/ocfs2/ocfs2.h8
-rw-r--r--fs/ocfs2/ocfs2_fs.h2
-rw-r--r--fs/ocfs2/stack_user.c2
-rw-r--r--fs/ocfs2/super.c95
-rw-r--r--fs/ocfs2/symlink.c2
-rw-r--r--fs/ocfs2/xattr.c6
-rw-r--r--fs/ocfs2/xattr.h2
-rw-r--r--fs/proc/array.c89
-rw-r--r--fs/proc/base.c1
-rw-r--r--fs/proc/task_mmu.c3
-rw-r--r--fs/quota/dquot.c3
-rw-r--r--fs/ramfs/file-nommu.c26
-rw-r--r--fs/reiserfs/bitmap.c3
-rw-r--r--fs/reiserfs/inode.c24
-rw-r--r--fs/reiserfs/ioctl.c3
-rw-r--r--fs/reiserfs/journal.c18
-rw-r--r--fs/reiserfs/lock.c9
-rw-r--r--fs/reiserfs/namei.c7
-rw-r--r--fs/reiserfs/xattr.c38
-rw-r--r--fs/reiserfs/xattr_acl.c2
-rw-r--r--fs/sysfs/dir.c14
-rw-r--r--fs/sysfs/sysfs.h15
-rw-r--r--fs/ubifs/gc.c96
-rw-r--r--fs/xfs/linux-2.6/xfs_acl.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c14
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c183
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.h1145
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c2
-rw-r--r--fs/xfs/xfs_alloc.c44
-rw-r--r--fs/xfs/xfs_dfrag.c106
-rw-r--r--fs/xfs/xfs_iget.c1
-rw-r--r--fs/xfs/xfs_inode.c17
-rw-r--r--fs/xfs/xfs_rtalloc.c2
-rw-r--r--fs/xfs/xfs_vnodeops.c93
87 files changed, 1667 insertions, 1417 deletions
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 18f74ec4dce9..9d03d1ebca6f 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -1001,44 +1001,6 @@ done:
1001} 1001}
1002 1002
1003/** 1003/**
1004 * v9fs_vfs_readlink - read a symlink's location
1005 * @dentry: dentry for symlink
1006 * @buffer: buffer to load symlink location into
1007 * @buflen: length of buffer
1008 *
1009 */
1010
1011static int v9fs_vfs_readlink(struct dentry *dentry, char __user * buffer,
1012 int buflen)
1013{
1014 int retval;
1015 int ret;
1016 char *link = __getname();
1017
1018 if (unlikely(!link))
1019 return -ENOMEM;
1020
1021 if (buflen > PATH_MAX)
1022 buflen = PATH_MAX;
1023
1024 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name,
1025 dentry);
1026
1027 retval = v9fs_readlink(dentry, link, buflen);
1028
1029 if (retval > 0) {
1030 if ((ret = copy_to_user(buffer, link, retval)) != 0) {
1031 P9_DPRINTK(P9_DEBUG_ERROR,
1032 "problem copying to user: %d\n", ret);
1033 retval = ret;
1034 }
1035 }
1036
1037 __putname(link);
1038 return retval;
1039}
1040
1041/**
1042 * v9fs_vfs_follow_link - follow a symlink path 1004 * v9fs_vfs_follow_link - follow a symlink path
1043 * @dentry: dentry for symlink 1005 * @dentry: dentry for symlink
1044 * @nd: nameidata 1006 * @nd: nameidata
@@ -1230,7 +1192,6 @@ static const struct inode_operations v9fs_dir_inode_operations_ext = {
1230 .rmdir = v9fs_vfs_rmdir, 1192 .rmdir = v9fs_vfs_rmdir,
1231 .mknod = v9fs_vfs_mknod, 1193 .mknod = v9fs_vfs_mknod,
1232 .rename = v9fs_vfs_rename, 1194 .rename = v9fs_vfs_rename,
1233 .readlink = v9fs_vfs_readlink,
1234 .getattr = v9fs_vfs_getattr, 1195 .getattr = v9fs_vfs_getattr,
1235 .setattr = v9fs_vfs_setattr, 1196 .setattr = v9fs_vfs_setattr,
1236}; 1197};
@@ -1253,7 +1214,7 @@ static const struct inode_operations v9fs_file_inode_operations = {
1253}; 1214};
1254 1215
1255static const struct inode_operations v9fs_symlink_inode_operations = { 1216static const struct inode_operations v9fs_symlink_inode_operations = {
1256 .readlink = v9fs_vfs_readlink, 1217 .readlink = generic_readlink,
1257 .follow_link = v9fs_vfs_follow_link, 1218 .follow_link = v9fs_vfs_follow_link,
1258 .put_link = v9fs_vfs_put_link, 1219 .put_link = v9fs_vfs_put_link,
1259 .getattr = v9fs_vfs_getattr, 1220 .getattr = v9fs_vfs_getattr,
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index c25256a5c5b0..c57d9ce5ff7e 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -171,6 +171,9 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
171#ifdef ELF_FDPIC_PLAT_INIT 171#ifdef ELF_FDPIC_PLAT_INIT
172 unsigned long dynaddr; 172 unsigned long dynaddr;
173#endif 173#endif
174#ifndef CONFIG_MMU
175 unsigned long stack_prot;
176#endif
174 struct file *interpreter = NULL; /* to shut gcc up */ 177 struct file *interpreter = NULL; /* to shut gcc up */
175 char *interpreter_name = NULL; 178 char *interpreter_name = NULL;
176 int executable_stack; 179 int executable_stack;
@@ -316,6 +319,8 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
316 * defunct, deceased, etc. after this point we have to exit via 319 * defunct, deceased, etc. after this point we have to exit via
317 * error_kill */ 320 * error_kill */
318 set_personality(PER_LINUX_FDPIC); 321 set_personality(PER_LINUX_FDPIC);
322 if (elf_read_implies_exec(&exec_params.hdr, executable_stack))
323 current->personality |= READ_IMPLIES_EXEC;
319 set_binfmt(&elf_fdpic_format); 324 set_binfmt(&elf_fdpic_format);
320 325
321 current->mm->start_code = 0; 326 current->mm->start_code = 0;
@@ -377,9 +382,13 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
377 if (stack_size < PAGE_SIZE * 2) 382 if (stack_size < PAGE_SIZE * 2)
378 stack_size = PAGE_SIZE * 2; 383 stack_size = PAGE_SIZE * 2;
379 384
385 stack_prot = PROT_READ | PROT_WRITE;
386 if (executable_stack == EXSTACK_ENABLE_X ||
387 (executable_stack == EXSTACK_DEFAULT && VM_STACK_FLAGS & VM_EXEC))
388 stack_prot |= PROT_EXEC;
389
380 down_write(&current->mm->mmap_sem); 390 down_write(&current->mm->mmap_sem);
381 current->mm->start_brk = do_mmap(NULL, 0, stack_size, 391 current->mm->start_brk = do_mmap(NULL, 0, stack_size, stack_prot,
382 PROT_READ | PROT_WRITE | PROT_EXEC,
383 MAP_PRIVATE | MAP_ANONYMOUS | 392 MAP_PRIVATE | MAP_ANONYMOUS |
384 MAP_UNINITIALIZED | MAP_GROWSDOWN, 393 MAP_UNINITIALIZED | MAP_GROWSDOWN,
385 0); 394 0);
@@ -1798,11 +1807,11 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
1798 ELF_CORE_WRITE_EXTRA_DATA; 1807 ELF_CORE_WRITE_EXTRA_DATA;
1799#endif 1808#endif
1800 1809
1801 if (file->f_pos != offset) { 1810 if (cprm->file->f_pos != offset) {
1802 /* Sanity check */ 1811 /* Sanity check */
1803 printk(KERN_WARNING 1812 printk(KERN_WARNING
1804 "elf_core_dump: file->f_pos (%lld) != offset (%lld)\n", 1813 "elf_core_dump: file->f_pos (%lld) != offset (%lld)\n",
1805 file->f_pos, offset); 1814 cprm->file->f_pos, offset);
1806 } 1815 }
1807 1816
1808end_coredump: 1817end_coredump:
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 094ea65afc85..7b2600b380d7 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -5,7 +5,9 @@ have duplicated data). Fix oops in cifs_lookup. Workaround problem
5mounting to OS/400 Netserve. Fix oops in cifs_get_tcp_session. 5mounting to OS/400 Netserve. Fix oops in cifs_get_tcp_session.
6Disable use of server inode numbers when server only 6Disable use of server inode numbers when server only
7partially supports them (e.g. for one server querying inode numbers on 7partially supports them (e.g. for one server querying inode numbers on
8FindFirst fails but QPathInfo queries works). 8FindFirst fails but QPathInfo queries works). Fix oops with dfs in
9cifs_put_smb_ses. Fix mmap to work on directio mounts (needed
10for OpenOffice when on forcedirectio mount e.g.)
9 11
10Version 1.60 12Version 1.60
11------------- 13-------------
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index fea9e898c4ba..b44ce0a0711c 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -269,7 +269,7 @@ static int add_mount_helper(struct vfsmount *newmnt, struct nameidata *nd,
269 int err; 269 int err;
270 270
271 mntget(newmnt); 271 mntget(newmnt);
272 err = do_add_mount(newmnt, &nd->path, nd->path.mnt->mnt_flags, mntlist); 272 err = do_add_mount(newmnt, &nd->path, nd->path.mnt->mnt_flags | MNT_SHRINKABLE, mntlist);
273 switch (err) { 273 switch (err) {
274 case 0: 274 case 0:
275 path_put(&nd->path); 275 path_put(&nd->path);
@@ -371,7 +371,6 @@ cifs_dfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
371 if (IS_ERR(mnt)) 371 if (IS_ERR(mnt))
372 goto out_err; 372 goto out_err;
373 373
374 nd->path.mnt->mnt_flags |= MNT_SHRINKABLE;
375 rc = add_mount_helper(mnt, nd, &cifs_dfs_automount_list); 374 rc = add_mount_helper(mnt, nd, &cifs_dfs_automount_list);
376 375
377out: 376out:
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 29f1da761bbf..8c6a03627176 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -758,7 +758,7 @@ const struct file_operations cifs_file_ops = {
758}; 758};
759 759
760const struct file_operations cifs_file_direct_ops = { 760const struct file_operations cifs_file_direct_ops = {
761 /* no mmap, no aio, no readv - 761 /* no aio, no readv -
762 BB reevaluate whether they can be done with directio, no cache */ 762 BB reevaluate whether they can be done with directio, no cache */
763 .read = cifs_user_read, 763 .read = cifs_user_read,
764 .write = cifs_user_write, 764 .write = cifs_user_write,
@@ -767,6 +767,7 @@ const struct file_operations cifs_file_direct_ops = {
767 .lock = cifs_lock, 767 .lock = cifs_lock,
768 .fsync = cifs_fsync, 768 .fsync = cifs_fsync,
769 .flush = cifs_flush, 769 .flush = cifs_flush,
770 .mmap = cifs_file_mmap,
770 .splice_read = generic_file_splice_read, 771 .splice_read = generic_file_splice_read,
771#ifdef CONFIG_CIFS_POSIX 772#ifdef CONFIG_CIFS_POSIX
772 .unlocked_ioctl = cifs_ioctl, 773 .unlocked_ioctl = cifs_ioctl,
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 63ea83ff687f..3bbcaa716b3c 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -2287,12 +2287,12 @@ int
2287cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, 2287cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2288 char *mount_data_global, const char *devname) 2288 char *mount_data_global, const char *devname)
2289{ 2289{
2290 int rc = 0; 2290 int rc;
2291 int xid; 2291 int xid;
2292 struct smb_vol *volume_info; 2292 struct smb_vol *volume_info;
2293 struct cifsSesInfo *pSesInfo = NULL; 2293 struct cifsSesInfo *pSesInfo;
2294 struct cifsTconInfo *tcon = NULL; 2294 struct cifsTconInfo *tcon;
2295 struct TCP_Server_Info *srvTcp = NULL; 2295 struct TCP_Server_Info *srvTcp;
2296 char *full_path; 2296 char *full_path;
2297 char *mount_data = mount_data_global; 2297 char *mount_data = mount_data_global;
2298#ifdef CONFIG_CIFS_DFS_UPCALL 2298#ifdef CONFIG_CIFS_DFS_UPCALL
@@ -2301,6 +2301,10 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2301 int referral_walks_count = 0; 2301 int referral_walks_count = 0;
2302try_mount_again: 2302try_mount_again:
2303#endif 2303#endif
2304 rc = 0;
2305 tcon = NULL;
2306 pSesInfo = NULL;
2307 srvTcp = NULL;
2304 full_path = NULL; 2308 full_path = NULL;
2305 2309
2306 xid = GetXid(); 2310 xid = GetXid();
@@ -2597,6 +2601,7 @@ remote_path_check:
2597 2601
2598 cleanup_volume_info(&volume_info); 2602 cleanup_volume_info(&volume_info);
2599 referral_walks_count++; 2603 referral_walks_count++;
2604 FreeXid(xid);
2600 goto try_mount_again; 2605 goto try_mount_again;
2601 } 2606 }
2602#else /* No DFS support, return error on mount */ 2607#else /* No DFS support, return error on mount */
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index c8afa6b1d91d..32a5f46b1157 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -121,8 +121,10 @@ static int get_target(const char *symname, struct path *path,
121 ret = -ENOENT; 121 ret = -ENOENT;
122 path_put(path); 122 path_put(path);
123 } 123 }
124 } else 124 } else {
125 ret = -EPERM; 125 ret = -EPERM;
126 path_put(path);
127 }
126 } 128 }
127 129
128 return ret; 130 return ret;
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 429ca0b3ba08..7f8545032930 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -715,31 +715,31 @@ static void *ecryptfs_follow_link(struct dentry *dentry, struct nameidata *nd)
715 /* Released in ecryptfs_put_link(); only release here on error */ 715 /* Released in ecryptfs_put_link(); only release here on error */
716 buf = kmalloc(len, GFP_KERNEL); 716 buf = kmalloc(len, GFP_KERNEL);
717 if (!buf) { 717 if (!buf) {
718 rc = -ENOMEM; 718 buf = ERR_PTR(-ENOMEM);
719 goto out; 719 goto out;
720 } 720 }
721 old_fs = get_fs(); 721 old_fs = get_fs();
722 set_fs(get_ds()); 722 set_fs(get_ds());
723 rc = dentry->d_inode->i_op->readlink(dentry, (char __user *)buf, len); 723 rc = dentry->d_inode->i_op->readlink(dentry, (char __user *)buf, len);
724 set_fs(old_fs); 724 set_fs(old_fs);
725 if (rc < 0) 725 if (rc < 0) {
726 goto out_free; 726 kfree(buf);
727 else 727 buf = ERR_PTR(rc);
728 } else
728 buf[rc] = '\0'; 729 buf[rc] = '\0';
729 rc = 0;
730 nd_set_link(nd, buf);
731 goto out;
732out_free:
733 kfree(buf);
734out: 730out:
735 return ERR_PTR(rc); 731 nd_set_link(nd, buf);
732 return NULL;
736} 733}
737 734
738static void 735static void
739ecryptfs_put_link(struct dentry *dentry, struct nameidata *nd, void *ptr) 736ecryptfs_put_link(struct dentry *dentry, struct nameidata *nd, void *ptr)
740{ 737{
741 /* Free the char* */ 738 char *buf = nd_get_link(nd);
742 kfree(nd_get_link(nd)); 739 if (!IS_ERR(buf)) {
740 /* Free the char* */
741 kfree(buf);
742 }
743} 743}
744 744
745/** 745/**
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 698a8636d39c..2afbcebeda71 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -738,13 +738,28 @@ static int exofs_write_begin_export(struct file *file,
738 fsdata); 738 fsdata);
739} 739}
740 740
741static int exofs_write_end(struct file *file, struct address_space *mapping,
742 loff_t pos, unsigned len, unsigned copied,
743 struct page *page, void *fsdata)
744{
745 struct inode *inode = mapping->host;
746 /* According to comment in simple_write_end i_mutex is held */
747 loff_t i_size = inode->i_size;
748 int ret;
749
750 ret = simple_write_end(file, mapping,pos, len, copied, page, fsdata);
751 if (i_size != inode->i_size)
752 mark_inode_dirty(inode);
753 return ret;
754}
755
741const struct address_space_operations exofs_aops = { 756const struct address_space_operations exofs_aops = {
742 .readpage = exofs_readpage, 757 .readpage = exofs_readpage,
743 .readpages = exofs_readpages, 758 .readpages = exofs_readpages,
744 .writepage = exofs_writepage, 759 .writepage = exofs_writepage,
745 .writepages = exofs_writepages, 760 .writepages = exofs_writepages,
746 .write_begin = exofs_write_begin_export, 761 .write_begin = exofs_write_begin_export,
747 .write_end = simple_write_end, 762 .write_end = exofs_write_end,
748}; 763};
749 764
750/****************************************************************************** 765/******************************************************************************
diff --git a/fs/exofs/pnfs.h b/fs/exofs/pnfs.h
index 423033addd1f..c52e9888b8ab 100644
--- a/fs/exofs/pnfs.h
+++ b/fs/exofs/pnfs.h
@@ -15,13 +15,7 @@
15#ifndef __EXOFS_PNFS_H__ 15#ifndef __EXOFS_PNFS_H__
16#define __EXOFS_PNFS_H__ 16#define __EXOFS_PNFS_H__
17 17
18#if defined(CONFIG_PNFS) 18#if ! defined(__PNFS_OSD_XDR_H__)
19
20
21/* FIXME: move this file to: linux/exportfs/pnfs_osd_xdr.h */
22#include "../nfs/objlayout/pnfs_osd_xdr.h"
23
24#else /* defined(CONFIG_PNFS) */
25 19
26enum pnfs_iomode { 20enum pnfs_iomode {
27 IOMODE_READ = 1, 21 IOMODE_READ = 1,
@@ -46,6 +40,6 @@ struct pnfs_osd_data_map {
46 u32 odm_raid_algorithm; 40 u32 odm_raid_algorithm;
47}; 41};
48 42
49#endif /* else defined(CONFIG_PNFS) */ 43#endif /* ! defined(__PNFS_OSD_XDR_H__) */
50 44
51#endif /* __EXOFS_PNFS_H__ */ 45#endif /* __EXOFS_PNFS_H__ */
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index 9acf7e808139..9ed1bb1f319f 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -28,6 +28,7 @@ config EXT4_FS
28 28
29config EXT4_USE_FOR_EXT23 29config EXT4_USE_FOR_EXT23
30 bool "Use ext4 for ext2/ext3 file systems" 30 bool "Use ext4 for ext2/ext3 file systems"
31 depends on EXT4_FS
31 depends on EXT3_FS=n || EXT2_FS=n 32 depends on EXT3_FS=n || EXT2_FS=n
32 default y 33 default y
33 help 34 help
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index 4df8621ec31c..a60ab9aad57d 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -16,7 +16,6 @@
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/swap.h> 17#include <linux/swap.h>
18#include <linux/pagemap.h> 18#include <linux/pagemap.h>
19#include <linux/version.h>
20#include <linux/blkdev.h> 19#include <linux/blkdev.h>
21#include <linux/mutex.h> 20#include <linux/mutex.h>
22#include "ext4.h" 21#include "ext4.h"
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 56f9271ee8cc..af7b62699ea9 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -699,6 +699,8 @@ struct ext4_inode_info {
699 unsigned int i_reserved_meta_blocks; 699 unsigned int i_reserved_meta_blocks;
700 unsigned int i_allocated_meta_blocks; 700 unsigned int i_allocated_meta_blocks;
701 unsigned short i_delalloc_reserved_flag; 701 unsigned short i_delalloc_reserved_flag;
702 sector_t i_da_metadata_calc_last_lblock;
703 int i_da_metadata_calc_len;
702 704
703 /* on-disk additional length */ 705 /* on-disk additional length */
704 __u16 i_extra_isize; 706 __u16 i_extra_isize;
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 2ca686454e87..bdb6ce7e2eb4 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -225,7 +225,8 @@ static inline void ext4_ext_mark_initialized(struct ext4_extent *ext)
225 ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext)); 225 ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext));
226} 226}
227 227
228extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks); 228extern int ext4_ext_calc_metadata_amount(struct inode *inode,
229 sector_t lblocks);
229extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex); 230extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex);
230extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); 231extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *);
231extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); 232extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 3a7928f825e4..7d7b74e94687 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -296,29 +296,44 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
296 * to allocate @blocks 296 * to allocate @blocks
297 * Worse case is one block per extent 297 * Worse case is one block per extent
298 */ 298 */
299int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks) 299int ext4_ext_calc_metadata_amount(struct inode *inode, sector_t lblock)
300{ 300{
301 int lcap, icap, rcap, leafs, idxs, num; 301 struct ext4_inode_info *ei = EXT4_I(inode);
302 int newextents = blocks; 302 int idxs, num = 0;
303
304 rcap = ext4_ext_space_root_idx(inode, 0);
305 lcap = ext4_ext_space_block(inode, 0);
306 icap = ext4_ext_space_block_idx(inode, 0);
307 303
308 /* number of new leaf blocks needed */ 304 idxs = ((inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
309 num = leafs = (newextents + lcap - 1) / lcap; 305 / sizeof(struct ext4_extent_idx));
310 306
311 /* 307 /*
312 * Worse case, we need separate index block(s) 308 * If the new delayed allocation block is contiguous with the
313 * to link all new leaf blocks 309 * previous da block, it can share index blocks with the
310 * previous block, so we only need to allocate a new index
311 * block every idxs leaf blocks. At ldxs**2 blocks, we need
312 * an additional index block, and at ldxs**3 blocks, yet
313 * another index blocks.
314 */ 314 */
315 idxs = (leafs + icap - 1) / icap; 315 if (ei->i_da_metadata_calc_len &&
316 do { 316 ei->i_da_metadata_calc_last_lblock+1 == lblock) {
317 num += idxs; 317 if ((ei->i_da_metadata_calc_len % idxs) == 0)
318 idxs = (idxs + icap - 1) / icap; 318 num++;
319 } while (idxs > rcap); 319 if ((ei->i_da_metadata_calc_len % (idxs*idxs)) == 0)
320 num++;
321 if ((ei->i_da_metadata_calc_len % (idxs*idxs*idxs)) == 0) {
322 num++;
323 ei->i_da_metadata_calc_len = 0;
324 } else
325 ei->i_da_metadata_calc_len++;
326 ei->i_da_metadata_calc_last_lblock++;
327 return num;
328 }
320 329
321 return num; 330 /*
331 * In the worst case we need a new set of index blocks at
332 * every level of the inode's extent tree.
333 */
334 ei->i_da_metadata_calc_len = 1;
335 ei->i_da_metadata_calc_last_lblock = lblock;
336 return ext_depth(inode) + 1;
322} 337}
323 338
324static int 339static int
@@ -3023,6 +3038,14 @@ out:
3023 return err; 3038 return err;
3024} 3039}
3025 3040
3041static void unmap_underlying_metadata_blocks(struct block_device *bdev,
3042 sector_t block, int count)
3043{
3044 int i;
3045 for (i = 0; i < count; i++)
3046 unmap_underlying_metadata(bdev, block + i);
3047}
3048
3026static int 3049static int
3027ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, 3050ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3028 ext4_lblk_t iblock, unsigned int max_blocks, 3051 ext4_lblk_t iblock, unsigned int max_blocks,
@@ -3098,6 +3121,18 @@ out:
3098 } else 3121 } else
3099 allocated = ret; 3122 allocated = ret;
3100 set_buffer_new(bh_result); 3123 set_buffer_new(bh_result);
3124 /*
3125 * if we allocated more blocks than requested
3126 * we need to make sure we unmap the extra block
3127 * allocated. The actual needed block will get
3128 * unmapped later when we find the buffer_head marked
3129 * new.
3130 */
3131 if (allocated > max_blocks) {
3132 unmap_underlying_metadata_blocks(inode->i_sb->s_bdev,
3133 newblock + max_blocks,
3134 allocated - max_blocks);
3135 }
3101map_out: 3136map_out:
3102 set_buffer_mapped(bh_result); 3137 set_buffer_mapped(bh_result);
3103out1: 3138out1:
@@ -3190,7 +3225,13 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
3190 * this situation is possible, though, _during_ tree modification; 3225 * this situation is possible, though, _during_ tree modification;
3191 * this is why assert can't be put in ext4_ext_find_extent() 3226 * this is why assert can't be put in ext4_ext_find_extent()
3192 */ 3227 */
3193 BUG_ON(path[depth].p_ext == NULL && depth != 0); 3228 if (path[depth].p_ext == NULL && depth != 0) {
3229 ext4_error(inode->i_sb, __func__, "bad extent address "
3230 "inode: %lu, iblock: %d, depth: %d",
3231 inode->i_ino, iblock, depth);
3232 err = -EIO;
3233 goto out2;
3234 }
3194 eh = path[depth].p_hdr; 3235 eh = path[depth].p_hdr;
3195 3236
3196 ex = path[depth].p_ext; 3237 ex = path[depth].p_ext;
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 0b22497d92e1..98bd140aad01 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -88,9 +88,21 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
88 return ext4_force_commit(inode->i_sb); 88 return ext4_force_commit(inode->i_sb);
89 89
90 commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid; 90 commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid;
91 if (jbd2_log_start_commit(journal, commit_tid)) 91 if (jbd2_log_start_commit(journal, commit_tid)) {
92 /*
93 * When the journal is on a different device than the
94 * fs data disk, we need to issue the barrier in
95 * writeback mode. (In ordered mode, the jbd2 layer
96 * will take care of issuing the barrier. In
97 * data=journal, all of the data blocks are written to
98 * the journal device.)
99 */
100 if (ext4_should_writeback_data(inode) &&
101 (journal->j_fs_dev != journal->j_dev) &&
102 (journal->j_flags & JBD2_BARRIER))
103 blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
92 jbd2_log_wait_commit(journal, commit_tid); 104 jbd2_log_wait_commit(journal, commit_tid);
93 else if (journal->j_flags & JBD2_BARRIER) 105 } else if (journal->j_flags & JBD2_BARRIER)
94 blkdev_issue_flush(inode->i_sb->s_bdev, NULL); 106 blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
95 return ret; 107 return ret;
96} 108}
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index ab807963a614..c818972c8302 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1009,77 +1009,88 @@ qsize_t *ext4_get_reserved_space(struct inode *inode)
1009 return &EXT4_I(inode)->i_reserved_quota; 1009 return &EXT4_I(inode)->i_reserved_quota;
1010} 1010}
1011#endif 1011#endif
1012
1012/* 1013/*
1013 * Calculate the number of metadata blocks need to reserve 1014 * Calculate the number of metadata blocks need to reserve
1014 * to allocate @blocks for non extent file based file 1015 * to allocate a new block at @lblocks for non extent file based file
1015 */ 1016 */
1016static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks) 1017static int ext4_indirect_calc_metadata_amount(struct inode *inode,
1018 sector_t lblock)
1017{ 1019{
1018 int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb); 1020 struct ext4_inode_info *ei = EXT4_I(inode);
1019 int ind_blks, dind_blks, tind_blks; 1021 int dind_mask = EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1;
1020 1022 int blk_bits;
1021 /* number of new indirect blocks needed */
1022 ind_blks = (blocks + icap - 1) / icap;
1023 1023
1024 dind_blks = (ind_blks + icap - 1) / icap; 1024 if (lblock < EXT4_NDIR_BLOCKS)
1025 return 0;
1025 1026
1026 tind_blks = 1; 1027 lblock -= EXT4_NDIR_BLOCKS;
1027 1028
1028 return ind_blks + dind_blks + tind_blks; 1029 if (ei->i_da_metadata_calc_len &&
1030 (lblock & dind_mask) == ei->i_da_metadata_calc_last_lblock) {
1031 ei->i_da_metadata_calc_len++;
1032 return 0;
1033 }
1034 ei->i_da_metadata_calc_last_lblock = lblock & dind_mask;
1035 ei->i_da_metadata_calc_len = 1;
1036 blk_bits = roundup_pow_of_two(lblock + 1);
1037 return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1;
1029} 1038}
1030 1039
1031/* 1040/*
1032 * Calculate the number of metadata blocks need to reserve 1041 * Calculate the number of metadata blocks need to reserve
1033 * to allocate given number of blocks 1042 * to allocate a block located at @lblock
1034 */ 1043 */
1035static int ext4_calc_metadata_amount(struct inode *inode, int blocks) 1044static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock)
1036{ 1045{
1037 if (!blocks)
1038 return 0;
1039
1040 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) 1046 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
1041 return ext4_ext_calc_metadata_amount(inode, blocks); 1047 return ext4_ext_calc_metadata_amount(inode, lblock);
1042 1048
1043 return ext4_indirect_calc_metadata_amount(inode, blocks); 1049 return ext4_indirect_calc_metadata_amount(inode, lblock);
1044} 1050}
1045 1051
1052/*
1053 * Called with i_data_sem down, which is important since we can call
1054 * ext4_discard_preallocations() from here.
1055 */
1046static void ext4_da_update_reserve_space(struct inode *inode, int used) 1056static void ext4_da_update_reserve_space(struct inode *inode, int used)
1047{ 1057{
1048 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1058 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1049 int total, mdb, mdb_free, mdb_claim = 0; 1059 struct ext4_inode_info *ei = EXT4_I(inode);
1050 1060 int mdb_free = 0;
1051 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1061
1052 /* recalculate the number of metablocks still need to be reserved */ 1062 spin_lock(&ei->i_block_reservation_lock);
1053 total = EXT4_I(inode)->i_reserved_data_blocks - used; 1063 if (unlikely(used > ei->i_reserved_data_blocks)) {
1054 mdb = ext4_calc_metadata_amount(inode, total); 1064 ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d "
1055 1065 "with only %d reserved data blocks\n",
1056 /* figure out how many metablocks to release */ 1066 __func__, inode->i_ino, used,
1057 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); 1067 ei->i_reserved_data_blocks);
1058 mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; 1068 WARN_ON(1);
1059 1069 used = ei->i_reserved_data_blocks;
1060 if (mdb_free) { 1070 }
1061 /* Account for allocated meta_blocks */ 1071
1062 mdb_claim = EXT4_I(inode)->i_allocated_meta_blocks; 1072 /* Update per-inode reservations */
1063 BUG_ON(mdb_free < mdb_claim); 1073 ei->i_reserved_data_blocks -= used;
1064 mdb_free -= mdb_claim; 1074 used += ei->i_allocated_meta_blocks;
1065 1075 ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks;
1066 /* update fs dirty blocks counter */ 1076 ei->i_allocated_meta_blocks = 0;
1077 percpu_counter_sub(&sbi->s_dirtyblocks_counter, used);
1078
1079 if (ei->i_reserved_data_blocks == 0) {
1080 /*
1081 * We can release all of the reserved metadata blocks
1082 * only when we have written all of the delayed
1083 * allocation blocks.
1084 */
1085 mdb_free = ei->i_reserved_meta_blocks;
1086 ei->i_reserved_meta_blocks = 0;
1087 ei->i_da_metadata_calc_len = 0;
1067 percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free); 1088 percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free);
1068 EXT4_I(inode)->i_allocated_meta_blocks = 0;
1069 EXT4_I(inode)->i_reserved_meta_blocks = mdb;
1070 } 1089 }
1071
1072 /* update per-inode reservations */
1073 BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks);
1074 EXT4_I(inode)->i_reserved_data_blocks -= used;
1075 percpu_counter_sub(&sbi->s_dirtyblocks_counter, used + mdb_claim);
1076 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1090 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1077 1091
1078 vfs_dq_claim_block(inode, used + mdb_claim); 1092 /* Update quota subsystem */
1079 1093 vfs_dq_claim_block(inode, used);
1080 /*
1081 * free those over-booking quota for metadata blocks
1082 */
1083 if (mdb_free) 1094 if (mdb_free)
1084 vfs_dq_release_reservation_block(inode, mdb_free); 1095 vfs_dq_release_reservation_block(inode, mdb_free);
1085 1096
@@ -1088,7 +1099,8 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
1088 * there aren't any writers on the inode, we can discard the 1099 * there aren't any writers on the inode, we can discard the
1089 * inode's preallocations. 1100 * inode's preallocations.
1090 */ 1101 */
1091 if (!total && (atomic_read(&inode->i_writecount) == 0)) 1102 if ((ei->i_reserved_data_blocks == 0) &&
1103 (atomic_read(&inode->i_writecount) == 0))
1092 ext4_discard_preallocations(inode); 1104 ext4_discard_preallocations(inode);
1093} 1105}
1094 1106
@@ -1797,11 +1809,15 @@ static int ext4_journalled_write_end(struct file *file,
1797 return ret ? ret : copied; 1809 return ret ? ret : copied;
1798} 1810}
1799 1811
1800static int ext4_da_reserve_space(struct inode *inode, int nrblocks) 1812/*
1813 * Reserve a single block located at lblock
1814 */
1815static int ext4_da_reserve_space(struct inode *inode, sector_t lblock)
1801{ 1816{
1802 int retries = 0; 1817 int retries = 0;
1803 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1818 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1804 unsigned long md_needed, mdblocks, total = 0; 1819 struct ext4_inode_info *ei = EXT4_I(inode);
1820 unsigned long md_needed, md_reserved;
1805 1821
1806 /* 1822 /*
1807 * recalculate the amount of metadata blocks to reserve 1823 * recalculate the amount of metadata blocks to reserve
@@ -1809,35 +1825,43 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
1809 * worse case is one extent per block 1825 * worse case is one extent per block
1810 */ 1826 */
1811repeat: 1827repeat:
1812 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1828 spin_lock(&ei->i_block_reservation_lock);
1813 total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks; 1829 md_reserved = ei->i_reserved_meta_blocks;
1814 mdblocks = ext4_calc_metadata_amount(inode, total); 1830 md_needed = ext4_calc_metadata_amount(inode, lblock);
1815 BUG_ON(mdblocks < EXT4_I(inode)->i_reserved_meta_blocks); 1831 spin_unlock(&ei->i_block_reservation_lock);
1816
1817 md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
1818 total = md_needed + nrblocks;
1819 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1820 1832
1821 /* 1833 /*
1822 * Make quota reservation here to prevent quota overflow 1834 * Make quota reservation here to prevent quota overflow
1823 * later. Real quota accounting is done at pages writeout 1835 * later. Real quota accounting is done at pages writeout
1824 * time. 1836 * time.
1825 */ 1837 */
1826 if (vfs_dq_reserve_block(inode, total)) 1838 if (vfs_dq_reserve_block(inode, md_needed + 1)) {
1839 /*
1840 * We tend to badly over-estimate the amount of
1841 * metadata blocks which are needed, so if we have
1842 * reserved any metadata blocks, try to force out the
1843 * inode and see if we have any better luck.
1844 */
1845 if (md_reserved && retries++ <= 3)
1846 goto retry;
1827 return -EDQUOT; 1847 return -EDQUOT;
1848 }
1828 1849
1829 if (ext4_claim_free_blocks(sbi, total)) { 1850 if (ext4_claim_free_blocks(sbi, md_needed + 1)) {
1830 vfs_dq_release_reservation_block(inode, total); 1851 vfs_dq_release_reservation_block(inode, md_needed + 1);
1831 if (ext4_should_retry_alloc(inode->i_sb, &retries)) { 1852 if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
1853 retry:
1854 if (md_reserved)
1855 write_inode_now(inode, (retries == 3));
1832 yield(); 1856 yield();
1833 goto repeat; 1857 goto repeat;
1834 } 1858 }
1835 return -ENOSPC; 1859 return -ENOSPC;
1836 } 1860 }
1837 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1861 spin_lock(&ei->i_block_reservation_lock);
1838 EXT4_I(inode)->i_reserved_data_blocks += nrblocks; 1862 ei->i_reserved_data_blocks++;
1839 EXT4_I(inode)->i_reserved_meta_blocks += md_needed; 1863 ei->i_reserved_meta_blocks += md_needed;
1840 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1864 spin_unlock(&ei->i_block_reservation_lock);
1841 1865
1842 return 0; /* success */ 1866 return 0; /* success */
1843} 1867}
@@ -1845,49 +1869,46 @@ repeat:
1845static void ext4_da_release_space(struct inode *inode, int to_free) 1869static void ext4_da_release_space(struct inode *inode, int to_free)
1846{ 1870{
1847 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1871 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1848 int total, mdb, mdb_free, release; 1872 struct ext4_inode_info *ei = EXT4_I(inode);
1849 1873
1850 if (!to_free) 1874 if (!to_free)
1851 return; /* Nothing to release, exit */ 1875 return; /* Nothing to release, exit */
1852 1876
1853 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1877 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
1854 1878
1855 if (!EXT4_I(inode)->i_reserved_data_blocks) { 1879 if (unlikely(to_free > ei->i_reserved_data_blocks)) {
1856 /* 1880 /*
1857 * if there is no reserved blocks, but we try to free some 1881 * if there aren't enough reserved blocks, then the
1858 * then the counter is messed up somewhere. 1882 * counter is messed up somewhere. Since this
1859 * but since this function is called from invalidate 1883 * function is called from invalidate page, it's
1860 * page, it's harmless to return without any action 1884 * harmless to return without any action.
1861 */ 1885 */
1862 printk(KERN_INFO "ext4 delalloc try to release %d reserved " 1886 ext4_msg(inode->i_sb, KERN_NOTICE, "ext4_da_release_space: "
1863 "blocks for inode %lu, but there is no reserved " 1887 "ino %lu, to_free %d with only %d reserved "
1864 "data blocks\n", to_free, inode->i_ino); 1888 "data blocks\n", inode->i_ino, to_free,
1865 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1889 ei->i_reserved_data_blocks);
1866 return; 1890 WARN_ON(1);
1891 to_free = ei->i_reserved_data_blocks;
1867 } 1892 }
1893 ei->i_reserved_data_blocks -= to_free;
1868 1894
1869 /* recalculate the number of metablocks still need to be reserved */ 1895 if (ei->i_reserved_data_blocks == 0) {
1870 total = EXT4_I(inode)->i_reserved_data_blocks - to_free; 1896 /*
1871 mdb = ext4_calc_metadata_amount(inode, total); 1897 * We can release all of the reserved metadata blocks
1872 1898 * only when we have written all of the delayed
1873 /* figure out how many metablocks to release */ 1899 * allocation blocks.
1874 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); 1900 */
1875 mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; 1901 to_free += ei->i_reserved_meta_blocks;
1876 1902 ei->i_reserved_meta_blocks = 0;
1877 release = to_free + mdb_free; 1903 ei->i_da_metadata_calc_len = 0;
1878 1904 }
1879 /* update fs dirty blocks counter for truncate case */
1880 percpu_counter_sub(&sbi->s_dirtyblocks_counter, release);
1881 1905
1882 /* update per-inode reservations */ 1906 /* update fs dirty blocks counter */
1883 BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks); 1907 percpu_counter_sub(&sbi->s_dirtyblocks_counter, to_free);
1884 EXT4_I(inode)->i_reserved_data_blocks -= to_free;
1885 1908
1886 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
1887 EXT4_I(inode)->i_reserved_meta_blocks = mdb;
1888 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1909 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1889 1910
1890 vfs_dq_release_reservation_block(inode, release); 1911 vfs_dq_release_reservation_block(inode, to_free);
1891} 1912}
1892 1913
1893static void ext4_da_page_release_reservation(struct page *page, 1914static void ext4_da_page_release_reservation(struct page *page,
@@ -2493,7 +2514,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
2493 * XXX: __block_prepare_write() unmaps passed block, 2514 * XXX: __block_prepare_write() unmaps passed block,
2494 * is it OK? 2515 * is it OK?
2495 */ 2516 */
2496 ret = ext4_da_reserve_space(inode, 1); 2517 ret = ext4_da_reserve_space(inode, iblock);
2497 if (ret) 2518 if (ret)
2498 /* not enough space to reserve */ 2519 /* not enough space to reserve */
2499 return ret; 2520 return ret;
@@ -2967,8 +2988,7 @@ retry:
2967out_writepages: 2988out_writepages:
2968 if (!no_nrwrite_index_update) 2989 if (!no_nrwrite_index_update)
2969 wbc->no_nrwrite_index_update = 0; 2990 wbc->no_nrwrite_index_update = 0;
2970 if (wbc->nr_to_write > nr_to_writebump) 2991 wbc->nr_to_write -= nr_to_writebump;
2971 wbc->nr_to_write -= nr_to_writebump;
2972 wbc->range_start = range_start; 2992 wbc->range_start = range_start;
2973 trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); 2993 trace_ext4_da_writepages_result(inode, wbc, ret, pages_written);
2974 return ret; 2994 return ret;
@@ -2993,11 +3013,18 @@ static int ext4_nonda_switch(struct super_block *sb)
2993 if (2 * free_blocks < 3 * dirty_blocks || 3013 if (2 * free_blocks < 3 * dirty_blocks ||
2994 free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) { 3014 free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) {
2995 /* 3015 /*
2996 * free block count is less that 150% of dirty blocks 3016 * free block count is less than 150% of dirty blocks
2997 * or free blocks is less that watermark 3017 * or free blocks is less than watermark
2998 */ 3018 */
2999 return 1; 3019 return 1;
3000 } 3020 }
3021 /*
3022 * Even if we don't switch but are nearing capacity,
3023 * start pushing delalloc when 1/2 of free blocks are dirty.
3024 */
3025 if (free_blocks < 2 * dirty_blocks)
3026 writeback_inodes_sb_if_idle(sb);
3027
3001 return 0; 3028 return 0;
3002} 3029}
3003 3030
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 0ca811061bc7..436521cae456 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -17,7 +17,6 @@
17#include <linux/proc_fs.h> 17#include <linux/proc_fs.h>
18#include <linux/pagemap.h> 18#include <linux/pagemap.h>
19#include <linux/seq_file.h> 19#include <linux/seq_file.h>
20#include <linux/version.h>
21#include <linux/blkdev.h> 20#include <linux/blkdev.h>
22#include <linux/mutex.h> 21#include <linux/mutex.h>
23#include "ext4_jbd2.h" 22#include "ext4_jbd2.h"
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 6ed9aa91f27d..735c20d5fd56 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -702,6 +702,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
702 ei->i_reserved_data_blocks = 0; 702 ei->i_reserved_data_blocks = 0;
703 ei->i_reserved_meta_blocks = 0; 703 ei->i_reserved_meta_blocks = 0;
704 ei->i_allocated_meta_blocks = 0; 704 ei->i_allocated_meta_blocks = 0;
705 ei->i_da_metadata_calc_len = 0;
705 ei->i_delalloc_reserved_flag = 0; 706 ei->i_delalloc_reserved_flag = 0;
706 spin_lock_init(&(ei->i_block_reservation_lock)); 707 spin_lock_init(&(ei->i_block_reservation_lock));
707#ifdef CONFIG_QUOTA 708#ifdef CONFIG_QUOTA
@@ -2174,9 +2175,9 @@ static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a,
2174 struct super_block *sb = sbi->s_buddy_cache->i_sb; 2175 struct super_block *sb = sbi->s_buddy_cache->i_sb;
2175 2176
2176 return snprintf(buf, PAGE_SIZE, "%llu\n", 2177 return snprintf(buf, PAGE_SIZE, "%llu\n",
2177 sbi->s_kbytes_written + 2178 (unsigned long long)(sbi->s_kbytes_written +
2178 ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - 2179 ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
2179 EXT4_SB(sb)->s_sectors_written_start) >> 1)); 2180 EXT4_SB(sb)->s_sectors_written_start) >> 1)));
2180} 2181}
2181 2182
2182static ssize_t inode_readahead_blks_store(struct ext4_attr *a, 2183static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
@@ -4005,6 +4006,7 @@ static inline void unregister_as_ext2(void)
4005{ 4006{
4006 unregister_filesystem(&ext2_fs_type); 4007 unregister_filesystem(&ext2_fs_type);
4007} 4008}
4009MODULE_ALIAS("ext2");
4008#else 4010#else
4009static inline void register_as_ext2(void) { } 4011static inline void register_as_ext2(void) { }
4010static inline void unregister_as_ext2(void) { } 4012static inline void unregister_as_ext2(void) { }
@@ -4031,6 +4033,7 @@ static inline void unregister_as_ext3(void)
4031{ 4033{
4032 unregister_filesystem(&ext3_fs_type); 4034 unregister_filesystem(&ext3_fs_type);
4033} 4035}
4036MODULE_ALIAS("ext3");
4034#else 4037#else
4035static inline void register_as_ext3(void) { } 4038static inline void register_as_ext3(void) { }
4036static inline void unregister_as_ext3(void) { } 4039static inline void unregister_as_ext3(void) { }
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 83218bebbc7c..f3a2f7ed45aa 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1332,6 +1332,8 @@ retry:
1332 goto cleanup; 1332 goto cleanup;
1333 kfree(b_entry_name); 1333 kfree(b_entry_name);
1334 kfree(buffer); 1334 kfree(buffer);
1335 b_entry_name = NULL;
1336 buffer = NULL;
1335 brelse(is->iloc.bh); 1337 brelse(is->iloc.bh);
1336 kfree(is); 1338 kfree(is);
1337 kfree(bs); 1339 kfree(bs);
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 2cf93ec40a67..97e01dc0d95f 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -618,60 +618,90 @@ static DEFINE_RWLOCK(fasync_lock);
618static struct kmem_cache *fasync_cache __read_mostly; 618static struct kmem_cache *fasync_cache __read_mostly;
619 619
620/* 620/*
621 * fasync_helper() is used by almost all character device drivers 621 * Remove a fasync entry. If successfully removed, return
622 * to set up the fasync queue. It returns negative on error, 0 if it did 622 * positive and clear the FASYNC flag. If no entry exists,
623 * no changes and positive if it added/deleted the entry. 623 * do nothing and return 0.
624 *
625 * NOTE! It is very important that the FASYNC flag always
626 * match the state "is the filp on a fasync list".
627 *
628 * We always take the 'filp->f_lock', in since fasync_lock
629 * needs to be irq-safe.
624 */ 630 */
625int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp) 631static int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
626{ 632{
627 struct fasync_struct *fa, **fp; 633 struct fasync_struct *fa, **fp;
628 struct fasync_struct *new = NULL;
629 int result = 0; 634 int result = 0;
630 635
631 if (on) { 636 spin_lock(&filp->f_lock);
632 new = kmem_cache_alloc(fasync_cache, GFP_KERNEL); 637 write_lock_irq(&fasync_lock);
633 if (!new) 638 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
634 return -ENOMEM; 639 if (fa->fa_file != filp)
640 continue;
641 *fp = fa->fa_next;
642 kmem_cache_free(fasync_cache, fa);
643 filp->f_flags &= ~FASYNC;
644 result = 1;
645 break;
635 } 646 }
647 write_unlock_irq(&fasync_lock);
648 spin_unlock(&filp->f_lock);
649 return result;
650}
651
652/*
653 * Add a fasync entry. Return negative on error, positive if
654 * added, and zero if did nothing but change an existing one.
655 *
656 * NOTE! It is very important that the FASYNC flag always
657 * match the state "is the filp on a fasync list".
658 */
659static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fapp)
660{
661 struct fasync_struct *new, *fa, **fp;
662 int result = 0;
663
664 new = kmem_cache_alloc(fasync_cache, GFP_KERNEL);
665 if (!new)
666 return -ENOMEM;
636 667
637 /*
638 * We need to take f_lock first since it's not an IRQ-safe
639 * lock.
640 */
641 spin_lock(&filp->f_lock); 668 spin_lock(&filp->f_lock);
642 write_lock_irq(&fasync_lock); 669 write_lock_irq(&fasync_lock);
643 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { 670 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
644 if (fa->fa_file == filp) { 671 if (fa->fa_file != filp)
645 if(on) { 672 continue;
646 fa->fa_fd = fd; 673 fa->fa_fd = fd;
647 kmem_cache_free(fasync_cache, new); 674 kmem_cache_free(fasync_cache, new);
648 } else { 675 goto out;
649 *fp = fa->fa_next;
650 kmem_cache_free(fasync_cache, fa);
651 result = 1;
652 }
653 goto out;
654 }
655 } 676 }
656 677
657 if (on) { 678 new->magic = FASYNC_MAGIC;
658 new->magic = FASYNC_MAGIC; 679 new->fa_file = filp;
659 new->fa_file = filp; 680 new->fa_fd = fd;
660 new->fa_fd = fd; 681 new->fa_next = *fapp;
661 new->fa_next = *fapp; 682 *fapp = new;
662 *fapp = new; 683 result = 1;
663 result = 1; 684 filp->f_flags |= FASYNC;
664 } 685
665out: 686out:
666 if (on)
667 filp->f_flags |= FASYNC;
668 else
669 filp->f_flags &= ~FASYNC;
670 write_unlock_irq(&fasync_lock); 687 write_unlock_irq(&fasync_lock);
671 spin_unlock(&filp->f_lock); 688 spin_unlock(&filp->f_lock);
672 return result; 689 return result;
673} 690}
674 691
692/*
693 * fasync_helper() is used by almost all character device drivers
694 * to set up the fasync queue, and for regular files by the file
695 * lease code. It returns negative on error, 0 if it did no changes
696 * and positive if it added/deleted the entry.
697 */
698int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp)
699{
700 if (!on)
701 return fasync_remove_entry(filp, fapp);
702 return fasync_add_entry(fd, filp, fapp);
703}
704
675EXPORT_SYMBOL(fasync_helper); 705EXPORT_SYMBOL(fasync_helper);
676 706
677void __kill_fasync(struct fasync_struct *fa, int sig, int band) 707void __kill_fasync(struct fasync_struct *fa, int sig, int band)
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 49bc1b8e8f19..1a7c42c64ff4 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -242,6 +242,7 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi,
242/** 242/**
243 * bdi_start_writeback - start writeback 243 * bdi_start_writeback - start writeback
244 * @bdi: the backing device to write from 244 * @bdi: the backing device to write from
245 * @sb: write inodes from this super_block
245 * @nr_pages: the number of pages to write 246 * @nr_pages: the number of pages to write
246 * 247 *
247 * Description: 248 * Description:
@@ -1187,6 +1188,23 @@ void writeback_inodes_sb(struct super_block *sb)
1187EXPORT_SYMBOL(writeback_inodes_sb); 1188EXPORT_SYMBOL(writeback_inodes_sb);
1188 1189
1189/** 1190/**
1191 * writeback_inodes_sb_if_idle - start writeback if none underway
1192 * @sb: the superblock
1193 *
1194 * Invoke writeback_inodes_sb if no writeback is currently underway.
1195 * Returns 1 if writeback was started, 0 if not.
1196 */
1197int writeback_inodes_sb_if_idle(struct super_block *sb)
1198{
1199 if (!writeback_in_progress(sb->s_bdi)) {
1200 writeback_inodes_sb(sb);
1201 return 1;
1202 } else
1203 return 0;
1204}
1205EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
1206
1207/**
1190 * sync_inodes_sb - sync sb inode pages 1208 * sync_inodes_sb - sync sb inode pages
1191 * @sb: the superblock 1209 * @sb: the superblock
1192 * 1210 *
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 4eb308aa3234..a6abbae8a278 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -569,6 +569,40 @@ static int gfs2_fsync(struct file *file, struct dentry *dentry, int datasync)
569 return ret; 569 return ret;
570} 570}
571 571
572/**
573 * gfs2_file_aio_write - Perform a write to a file
574 * @iocb: The io context
575 * @iov: The data to write
576 * @nr_segs: Number of @iov segments
577 * @pos: The file position
578 *
579 * We have to do a lock/unlock here to refresh the inode size for
580 * O_APPEND writes, otherwise we can land up writing at the wrong
581 * offset. There is still a race, but provided the app is using its
582 * own file locking, this will make O_APPEND work as expected.
583 *
584 */
585
586static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
587 unsigned long nr_segs, loff_t pos)
588{
589 struct file *file = iocb->ki_filp;
590
591 if (file->f_flags & O_APPEND) {
592 struct dentry *dentry = file->f_dentry;
593 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
594 struct gfs2_holder gh;
595 int ret;
596
597 ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
598 if (ret)
599 return ret;
600 gfs2_glock_dq_uninit(&gh);
601 }
602
603 return generic_file_aio_write(iocb, iov, nr_segs, pos);
604}
605
572#ifdef CONFIG_GFS2_FS_LOCKING_DLM 606#ifdef CONFIG_GFS2_FS_LOCKING_DLM
573 607
574/** 608/**
@@ -711,7 +745,7 @@ const struct file_operations gfs2_file_fops = {
711 .read = do_sync_read, 745 .read = do_sync_read,
712 .aio_read = generic_file_aio_read, 746 .aio_read = generic_file_aio_read,
713 .write = do_sync_write, 747 .write = do_sync_write,
714 .aio_write = generic_file_aio_write, 748 .aio_write = gfs2_file_aio_write,
715 .unlocked_ioctl = gfs2_ioctl, 749 .unlocked_ioctl = gfs2_ioctl,
716 .mmap = gfs2_mmap, 750 .mmap = gfs2_mmap,
717 .open = gfs2_open, 751 .open = gfs2_open,
@@ -741,7 +775,7 @@ const struct file_operations gfs2_file_fops_nolock = {
741 .read = do_sync_read, 775 .read = do_sync_read,
742 .aio_read = generic_file_aio_read, 776 .aio_read = generic_file_aio_read,
743 .write = do_sync_write, 777 .write = do_sync_write,
744 .aio_write = generic_file_aio_write, 778 .aio_write = gfs2_file_aio_write,
745 .unlocked_ioctl = gfs2_ioctl, 779 .unlocked_ioctl = gfs2_ioctl,
746 .mmap = gfs2_mmap, 780 .mmap = gfs2_mmap,
747 .open = gfs2_open, 781 .open = gfs2_open,
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index cb8d7a93d5ec..6f68a5f18eb8 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -121,7 +121,7 @@ struct inode *gfs2_aspace_get(struct gfs2_sbd *sdp)
121 if (aspace) { 121 if (aspace) {
122 mapping_set_gfp_mask(aspace->i_mapping, GFP_NOFS); 122 mapping_set_gfp_mask(aspace->i_mapping, GFP_NOFS);
123 aspace->i_mapping->a_ops = &aspace_aops; 123 aspace->i_mapping->a_ops = &aspace_aops;
124 aspace->i_size = ~0ULL; 124 aspace->i_size = MAX_LFS_FILESIZE;
125 ip = GFS2_I(aspace); 125 ip = GFS2_I(aspace);
126 clear_bit(GIF_USER, &ip->i_flags); 126 clear_bit(GIF_USER, &ip->i_flags);
127 insert_inode_hash(aspace); 127 insert_inode_hash(aspace);
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 247436c10deb..78f73ca1ef3e 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -748,7 +748,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
748 struct gfs2_rgrpd *nrgd; 748 struct gfs2_rgrpd *nrgd;
749 unsigned int num_gh; 749 unsigned int num_gh;
750 int dir_rename = 0; 750 int dir_rename = 0;
751 int alloc_required; 751 int alloc_required = 0;
752 unsigned int x; 752 unsigned int x;
753 int error; 753 int error;
754 754
@@ -867,7 +867,9 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
867 goto out_gunlock; 867 goto out_gunlock;
868 } 868 }
869 869
870 alloc_required = error = gfs2_diradd_alloc_required(ndir, &ndentry->d_name); 870 if (nip == NULL)
871 alloc_required = gfs2_diradd_alloc_required(ndir, &ndentry->d_name);
872 error = alloc_required;
871 if (error < 0) 873 if (error < 0)
872 goto out_gunlock; 874 goto out_gunlock;
873 error = 0; 875 error = 0;
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 8a04108e0c22..c2ebdf2c01d4 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -1296,6 +1296,7 @@ fail:
1296 1296
1297int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data) 1297int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
1298{ 1298{
1299 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1299 struct gfs2_ea_location el; 1300 struct gfs2_ea_location el;
1300 struct buffer_head *dibh; 1301 struct buffer_head *dibh;
1301 int error; 1302 int error;
@@ -1305,16 +1306,17 @@ int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
1305 return error; 1306 return error;
1306 1307
1307 if (GFS2_EA_IS_STUFFED(el.el_ea)) { 1308 if (GFS2_EA_IS_STUFFED(el.el_ea)) {
1308 error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), RES_DINODE + RES_EATTR, 0); 1309 error = gfs2_trans_begin(sdp, RES_DINODE + RES_EATTR, 0);
1309 if (error) 1310 if (error == 0) {
1310 return error; 1311 gfs2_trans_add_bh(ip->i_gl, el.el_bh, 1);
1311 1312 memcpy(GFS2_EA2DATA(el.el_ea), data,
1312 gfs2_trans_add_bh(ip->i_gl, el.el_bh, 1); 1313 GFS2_EA_DATA_LEN(el.el_ea));
1313 memcpy(GFS2_EA2DATA(el.el_ea), data, 1314 }
1314 GFS2_EA_DATA_LEN(el.el_ea)); 1315 } else {
1315 } else
1316 error = ea_acl_chmod_unstuffed(ip, el.el_ea, data); 1316 error = ea_acl_chmod_unstuffed(ip, el.el_ea, data);
1317 }
1317 1318
1319 brelse(el.el_bh);
1318 if (error) 1320 if (error)
1319 return error; 1321 return error;
1320 1322
@@ -1327,8 +1329,7 @@ int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
1327 brelse(dibh); 1329 brelse(dibh);
1328 } 1330 }
1329 1331
1330 gfs2_trans_end(GFS2_SB(&ip->i_inode)); 1332 gfs2_trans_end(sdp);
1331
1332 return error; 1333 return error;
1333} 1334}
1334 1335
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index a5089a6dd67a..7239efc690d8 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -646,22 +646,27 @@ static const struct super_operations hppfs_sbops = {
646static int hppfs_readlink(struct dentry *dentry, char __user *buffer, 646static int hppfs_readlink(struct dentry *dentry, char __user *buffer,
647 int buflen) 647 int buflen)
648{ 648{
649 struct dentry *proc_dentry; 649 struct dentry *proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
650
651 proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
652 return proc_dentry->d_inode->i_op->readlink(proc_dentry, buffer, 650 return proc_dentry->d_inode->i_op->readlink(proc_dentry, buffer,
653 buflen); 651 buflen);
654} 652}
655 653
656static void *hppfs_follow_link(struct dentry *dentry, struct nameidata *nd) 654static void *hppfs_follow_link(struct dentry *dentry, struct nameidata *nd)
657{ 655{
658 struct dentry *proc_dentry; 656 struct dentry *proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
659
660 proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
661 657
662 return proc_dentry->d_inode->i_op->follow_link(proc_dentry, nd); 658 return proc_dentry->d_inode->i_op->follow_link(proc_dentry, nd);
663} 659}
664 660
661static void hppfs_put_link(struct dentry *dentry, struct nameidata *nd,
662 void *cookie)
663{
664 struct dentry *proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
665
666 if (proc_dentry->d_inode->i_op->put_link)
667 proc_dentry->d_inode->i_op->put_link(proc_dentry, nd, cookie);
668}
669
665static const struct inode_operations hppfs_dir_iops = { 670static const struct inode_operations hppfs_dir_iops = {
666 .lookup = hppfs_lookup, 671 .lookup = hppfs_lookup,
667}; 672};
@@ -669,6 +674,7 @@ static const struct inode_operations hppfs_dir_iops = {
669static const struct inode_operations hppfs_link_iops = { 674static const struct inode_operations hppfs_link_iops = {
670 .readlink = hppfs_readlink, 675 .readlink = hppfs_readlink,
671 .follow_link = hppfs_follow_link, 676 .follow_link = hppfs_follow_link,
677 .put_link = hppfs_put_link,
672}; 678};
673 679
674static struct inode *get_inode(struct super_block *sb, struct dentry *dentry) 680static struct inode *get_inode(struct super_block *sb, struct dentry *dentry)
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index ca0f5eb62b20..886849370950 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -22,6 +22,7 @@
22#include <linux/jbd2.h> 22#include <linux/jbd2.h>
23#include <linux/errno.h> 23#include <linux/errno.h>
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <linux/blkdev.h>
25#include <trace/events/jbd2.h> 26#include <trace/events/jbd2.h>
26 27
27/* 28/*
@@ -515,6 +516,20 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
515 journal->j_tail_sequence = first_tid; 516 journal->j_tail_sequence = first_tid;
516 journal->j_tail = blocknr; 517 journal->j_tail = blocknr;
517 spin_unlock(&journal->j_state_lock); 518 spin_unlock(&journal->j_state_lock);
519
520 /*
521 * If there is an external journal, we need to make sure that
522 * any data blocks that were recently written out --- perhaps
523 * by jbd2_log_do_checkpoint() --- are flushed out before we
524 * drop the transactions from the external journal. It's
525 * unlikely this will be necessary, especially with a
526 * appropriately sized journal, but we need this to guarantee
527 * correctness. Fortunately jbd2_cleanup_journal_tail()
528 * doesn't get called all that often.
529 */
530 if ((journal->j_fs_dev != journal->j_dev) &&
531 (journal->j_flags & JBD2_BARRIER))
532 blkdev_issue_flush(journal->j_fs_dev, NULL);
518 if (!(journal->j_flags & JBD2_ABORT)) 533 if (!(journal->j_flags & JBD2_ABORT))
519 jbd2_journal_update_superblock(journal, 1); 534 jbd2_journal_update_superblock(journal, 1);
520 return 0; 535 return 0;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 6a10238d2c63..1bc74b6f26d2 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -259,6 +259,7 @@ static int journal_submit_data_buffers(journal_t *journal,
259 ret = err; 259 ret = err;
260 spin_lock(&journal->j_list_lock); 260 spin_lock(&journal->j_list_lock);
261 J_ASSERT(jinode->i_transaction == commit_transaction); 261 J_ASSERT(jinode->i_transaction == commit_transaction);
262 commit_transaction->t_flushed_data_blocks = 1;
262 jinode->i_flags &= ~JI_COMMIT_RUNNING; 263 jinode->i_flags &= ~JI_COMMIT_RUNNING;
263 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); 264 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
264 } 265 }
@@ -708,8 +709,17 @@ start_journal_io:
708 } 709 }
709 } 710 }
710 711
711 /* Done it all: now write the commit record asynchronously. */ 712 /*
713 * If the journal is not located on the file system device,
714 * then we must flush the file system device before we issue
715 * the commit record
716 */
717 if (commit_transaction->t_flushed_data_blocks &&
718 (journal->j_fs_dev != journal->j_dev) &&
719 (journal->j_flags & JBD2_BARRIER))
720 blkdev_issue_flush(journal->j_fs_dev, NULL);
712 721
722 /* Done it all: now write the commit record asynchronously. */
713 if (JBD2_HAS_INCOMPAT_FEATURE(journal, 723 if (JBD2_HAS_INCOMPAT_FEATURE(journal,
714 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { 724 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
715 err = journal_submit_commit_record(journal, commit_transaction, 725 err = journal_submit_commit_record(journal, commit_transaction,
@@ -720,13 +730,6 @@ start_journal_io:
720 blkdev_issue_flush(journal->j_dev, NULL); 730 blkdev_issue_flush(journal->j_dev, NULL);
721 } 731 }
722 732
723 /*
724 * This is the right place to wait for data buffers both for ASYNC
725 * and !ASYNC commit. If commit is ASYNC, we need to wait only after
726 * the commit block went to disk (which happens above). If commit is
727 * SYNC, we need to wait for data buffers before we start writing
728 * commit block, which happens below in such setting.
729 */
730 err = journal_finish_inode_data_buffers(journal, commit_transaction); 733 err = journal_finish_inode_data_buffers(journal, commit_transaction);
731 if (err) { 734 if (err) {
732 printk(KERN_WARNING 735 printk(KERN_WARNING
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 17af879e6e9e..ac0d027595d0 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -814,7 +814,7 @@ static journal_t * journal_init_common (void)
814 journal_t *journal; 814 journal_t *journal;
815 int err; 815 int err;
816 816
817 journal = kzalloc(sizeof(*journal), GFP_KERNEL|__GFP_NOFAIL); 817 journal = kzalloc(sizeof(*journal), GFP_KERNEL);
818 if (!journal) 818 if (!journal)
819 goto fail; 819 goto fail;
820 820
diff --git a/fs/namei.c b/fs/namei.c
index 68921d9b5302..94a5e60779f9 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -232,6 +232,7 @@ int generic_permission(struct inode *inode, int mask,
232 /* 232 /*
233 * Searching includes executable on directories, else just read. 233 * Searching includes executable on directories, else just read.
234 */ 234 */
235 mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
235 if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))) 236 if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE)))
236 if (capable(CAP_DAC_READ_SEARCH)) 237 if (capable(CAP_DAC_READ_SEARCH))
237 return 0; 238 return 0;
@@ -560,6 +561,7 @@ static __always_inline int __do_follow_link(struct path *path, struct nameidata
560 dget(dentry); 561 dget(dentry);
561 } 562 }
562 mntget(path->mnt); 563 mntget(path->mnt);
564 nd->last_type = LAST_BIND;
563 cookie = dentry->d_inode->i_op->follow_link(dentry, nd); 565 cookie = dentry->d_inode->i_op->follow_link(dentry, nd);
564 error = PTR_ERR(cookie); 566 error = PTR_ERR(cookie);
565 if (!IS_ERR(cookie)) { 567 if (!IS_ERR(cookie)) {
@@ -1602,11 +1604,12 @@ struct file *do_filp_open(int dfd, const char *pathname,
1602 struct file *filp; 1604 struct file *filp;
1603 struct nameidata nd; 1605 struct nameidata nd;
1604 int error; 1606 int error;
1605 struct path path, save; 1607 struct path path;
1606 struct dentry *dir; 1608 struct dentry *dir;
1607 int count = 0; 1609 int count = 0;
1608 int will_truncate; 1610 int will_truncate;
1609 int flag = open_to_namei_flags(open_flag); 1611 int flag = open_to_namei_flags(open_flag);
1612 int force_reval = 0;
1610 1613
1611 /* 1614 /*
1612 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only 1615 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only
@@ -1618,7 +1621,7 @@ struct file *do_filp_open(int dfd, const char *pathname,
1618 open_flag |= O_DSYNC; 1621 open_flag |= O_DSYNC;
1619 1622
1620 if (!acc_mode) 1623 if (!acc_mode)
1621 acc_mode = MAY_OPEN | ACC_MODE(flag); 1624 acc_mode = MAY_OPEN | ACC_MODE(open_flag);
1622 1625
1623 /* O_TRUNC implies we need access checks for write permissions */ 1626 /* O_TRUNC implies we need access checks for write permissions */
1624 if (flag & O_TRUNC) 1627 if (flag & O_TRUNC)
@@ -1658,9 +1661,12 @@ struct file *do_filp_open(int dfd, const char *pathname,
1658 /* 1661 /*
1659 * Create - we need to know the parent. 1662 * Create - we need to know the parent.
1660 */ 1663 */
1664reval:
1661 error = path_init(dfd, pathname, LOOKUP_PARENT, &nd); 1665 error = path_init(dfd, pathname, LOOKUP_PARENT, &nd);
1662 if (error) 1666 if (error)
1663 return ERR_PTR(error); 1667 return ERR_PTR(error);
1668 if (force_reval)
1669 nd.flags |= LOOKUP_REVAL;
1664 error = path_walk(pathname, &nd); 1670 error = path_walk(pathname, &nd);
1665 if (error) { 1671 if (error) {
1666 if (nd.root.mnt) 1672 if (nd.root.mnt)
@@ -1852,17 +1858,7 @@ do_link:
1852 error = security_inode_follow_link(path.dentry, &nd); 1858 error = security_inode_follow_link(path.dentry, &nd);
1853 if (error) 1859 if (error)
1854 goto exit_dput; 1860 goto exit_dput;
1855 save = nd.path;
1856 path_get(&save);
1857 error = __do_follow_link(&path, &nd); 1861 error = __do_follow_link(&path, &nd);
1858 if (error == -ESTALE) {
1859 /* nd.path had been dropped */
1860 nd.path = save;
1861 path_get(&nd.path);
1862 nd.flags |= LOOKUP_REVAL;
1863 error = __do_follow_link(&path, &nd);
1864 }
1865 path_put(&save);
1866 path_put(&path); 1862 path_put(&path);
1867 if (error) { 1863 if (error) {
1868 /* Does someone understand code flow here? Or it is only 1864 /* Does someone understand code flow here? Or it is only
@@ -1872,6 +1868,10 @@ do_link:
1872 release_open_intent(&nd); 1868 release_open_intent(&nd);
1873 if (nd.root.mnt) 1869 if (nd.root.mnt)
1874 path_put(&nd.root); 1870 path_put(&nd.root);
1871 if (error == -ESTALE && !force_reval) {
1872 force_reval = 1;
1873 goto reval;
1874 }
1875 return ERR_PTR(error); 1875 return ERR_PTR(error);
1876 } 1876 }
1877 nd.flags &= ~LOOKUP_PARENT; 1877 nd.flags &= ~LOOKUP_PARENT;
diff --git a/fs/namespace.c b/fs/namespace.c
index 7d70d63ceb29..c768f733c8d6 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -965,10 +965,12 @@ EXPORT_SYMBOL(may_umount_tree);
965int may_umount(struct vfsmount *mnt) 965int may_umount(struct vfsmount *mnt)
966{ 966{
967 int ret = 1; 967 int ret = 1;
968 down_read(&namespace_sem);
968 spin_lock(&vfsmount_lock); 969 spin_lock(&vfsmount_lock);
969 if (propagate_mount_busy(mnt, 2)) 970 if (propagate_mount_busy(mnt, 2))
970 ret = 0; 971 ret = 0;
971 spin_unlock(&vfsmount_lock); 972 spin_unlock(&vfsmount_lock);
973 up_read(&namespace_sem);
972 return ret; 974 return ret;
973} 975}
974 976
@@ -1352,12 +1354,12 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt,
1352 if (err) 1354 if (err)
1353 goto out_cleanup_ids; 1355 goto out_cleanup_ids;
1354 1356
1357 spin_lock(&vfsmount_lock);
1358
1355 if (IS_MNT_SHARED(dest_mnt)) { 1359 if (IS_MNT_SHARED(dest_mnt)) {
1356 for (p = source_mnt; p; p = next_mnt(p, source_mnt)) 1360 for (p = source_mnt; p; p = next_mnt(p, source_mnt))
1357 set_mnt_shared(p); 1361 set_mnt_shared(p);
1358 } 1362 }
1359
1360 spin_lock(&vfsmount_lock);
1361 if (parent_path) { 1363 if (parent_path) {
1362 detach_mnt(source_mnt, parent_path); 1364 detach_mnt(source_mnt, parent_path);
1363 attach_mnt(source_mnt, path); 1365 attach_mnt(source_mnt, path);
@@ -1534,8 +1536,12 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
1534 err = change_mount_flags(path->mnt, flags); 1536 err = change_mount_flags(path->mnt, flags);
1535 else 1537 else
1536 err = do_remount_sb(sb, flags, data, 0); 1538 err = do_remount_sb(sb, flags, data, 0);
1537 if (!err) 1539 if (!err) {
1540 spin_lock(&vfsmount_lock);
1541 mnt_flags |= path->mnt->mnt_flags & MNT_PNODE_MASK;
1538 path->mnt->mnt_flags = mnt_flags; 1542 path->mnt->mnt_flags = mnt_flags;
1543 spin_unlock(&vfsmount_lock);
1544 }
1539 up_write(&sb->s_umount); 1545 up_write(&sb->s_umount);
1540 if (!err) { 1546 if (!err) {
1541 security_sb_post_remount(path->mnt, flags, data); 1547 security_sb_post_remount(path->mnt, flags, data);
@@ -1665,6 +1671,8 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path,
1665{ 1671{
1666 int err; 1672 int err;
1667 1673
1674 mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD);
1675
1668 down_write(&namespace_sem); 1676 down_write(&namespace_sem);
1669 /* Something was mounted here while we slept */ 1677 /* Something was mounted here while we slept */
1670 while (d_mountpoint(path->dentry) && 1678 while (d_mountpoint(path->dentry) &&
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 2c5ace4f00a7..3c7f03b669fb 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1615,6 +1615,7 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1615 goto out; 1615 goto out;
1616 1616
1617 new_dentry = dentry; 1617 new_dentry = dentry;
1618 rehash = NULL;
1618 new_inode = NULL; 1619 new_inode = NULL;
1619 } 1620 }
1620 } 1621 }
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 7c2e337d05af..c194793b642b 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -780,12 +780,9 @@ static inline int nfsd_dosync(struct file *filp, struct dentry *dp,
780 int (*fsync) (struct file *, struct dentry *, int); 780 int (*fsync) (struct file *, struct dentry *, int);
781 int err; 781 int err;
782 782
783 err = filemap_fdatawrite(inode->i_mapping); 783 err = filemap_write_and_wait(inode->i_mapping);
784 if (err == 0 && fop && (fsync = fop->fsync)) 784 if (err == 0 && fop && (fsync = fop->fsync))
785 err = fsync(filp, dp, 0); 785 err = fsync(filp, dp, 0);
786 if (err == 0)
787 err = filemap_fdatawait(inode->i_mapping);
788
789 return err; 786 return err;
790} 787}
791 788
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
index f4a14ea2ed9c..effdbdbe6c11 100644
--- a/fs/nilfs2/bmap.c
+++ b/fs/nilfs2/bmap.c
@@ -417,8 +417,8 @@ __u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *bmap,
417 417
418 key = page_index(bh->b_page) << (PAGE_CACHE_SHIFT - 418 key = page_index(bh->b_page) << (PAGE_CACHE_SHIFT -
419 bmap->b_inode->i_blkbits); 419 bmap->b_inode->i_blkbits);
420 for (pbh = page_buffers(bh->b_page); pbh != bh; 420 for (pbh = page_buffers(bh->b_page); pbh != bh; pbh = pbh->b_this_page)
421 pbh = pbh->b_this_page, key++); 421 key++;
422 422
423 return key; 423 return key;
424} 424}
diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c
index d5ad54e204a5..18737818db63 100644
--- a/fs/nilfs2/cpfile.c
+++ b/fs/nilfs2/cpfile.c
@@ -328,19 +328,24 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
328 tnicps += nicps; 328 tnicps += nicps;
329 nilfs_mdt_mark_buffer_dirty(cp_bh); 329 nilfs_mdt_mark_buffer_dirty(cp_bh);
330 nilfs_mdt_mark_dirty(cpfile); 330 nilfs_mdt_mark_dirty(cpfile);
331 if (!nilfs_cpfile_is_in_first(cpfile, cno) && 331 if (!nilfs_cpfile_is_in_first(cpfile, cno)) {
332 (count = nilfs_cpfile_block_sub_valid_checkpoints( 332 count =
333 cpfile, cp_bh, kaddr, nicps)) == 0) { 333 nilfs_cpfile_block_sub_valid_checkpoints(
334 /* make hole */ 334 cpfile, cp_bh, kaddr, nicps);
335 kunmap_atomic(kaddr, KM_USER0); 335 if (count == 0) {
336 brelse(cp_bh); 336 /* make hole */
337 ret = nilfs_cpfile_delete_checkpoint_block( 337 kunmap_atomic(kaddr, KM_USER0);
338 cpfile, cno); 338 brelse(cp_bh);
339 if (ret == 0) 339 ret =
340 continue; 340 nilfs_cpfile_delete_checkpoint_block(
341 printk(KERN_ERR "%s: cannot delete block\n", 341 cpfile, cno);
342 __func__); 342 if (ret == 0)
343 break; 343 continue;
344 printk(KERN_ERR
345 "%s: cannot delete block\n",
346 __func__);
347 break;
348 }
344 } 349 }
345 } 350 }
346 351
diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c
index d369ac718277..236753df5cdf 100644
--- a/fs/nilfs2/direct.c
+++ b/fs/nilfs2/direct.c
@@ -51,11 +51,11 @@ static int nilfs_direct_lookup(const struct nilfs_bmap *bmap,
51 struct nilfs_direct *direct; 51 struct nilfs_direct *direct;
52 __u64 ptr; 52 __u64 ptr;
53 53
54 direct = (struct nilfs_direct *)bmap; 54 direct = (struct nilfs_direct *)bmap; /* XXX: use macro for level 1 */
55 if ((key > NILFS_DIRECT_KEY_MAX) || 55 if (key > NILFS_DIRECT_KEY_MAX || level != 1)
56 (level != 1) || /* XXX: use macro for level 1 */ 56 return -ENOENT;
57 ((ptr = nilfs_direct_get_ptr(direct, key)) == 57 ptr = nilfs_direct_get_ptr(direct, key);
58 NILFS_BMAP_INVALID_PTR)) 58 if (ptr == NILFS_BMAP_INVALID_PTR)
59 return -ENOENT; 59 return -ENOENT;
60 60
61 if (ptrp != NULL) 61 if (ptrp != NULL)
@@ -73,9 +73,10 @@ static int nilfs_direct_lookup_contig(const struct nilfs_bmap *bmap,
73 sector_t blocknr; 73 sector_t blocknr;
74 int ret, cnt; 74 int ret, cnt;
75 75
76 if (key > NILFS_DIRECT_KEY_MAX || 76 if (key > NILFS_DIRECT_KEY_MAX)
77 (ptr = nilfs_direct_get_ptr(direct, key)) == 77 return -ENOENT;
78 NILFS_BMAP_INVALID_PTR) 78 ptr = nilfs_direct_get_ptr(direct, key);
79 if (ptr == NILFS_BMAP_INVALID_PTR)
79 return -ENOENT; 80 return -ENOENT;
80 81
81 if (NILFS_BMAP_USE_VBN(bmap)) { 82 if (NILFS_BMAP_USE_VBN(bmap)) {
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index f6af76042d80..d6b2b83de363 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -480,7 +480,7 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
480 unsigned int cmd, void __user *argp) 480 unsigned int cmd, void __user *argp)
481{ 481{
482 struct nilfs_argv argv[5]; 482 struct nilfs_argv argv[5];
483 const static size_t argsz[5] = { 483 static const size_t argsz[5] = {
484 sizeof(struct nilfs_vdesc), 484 sizeof(struct nilfs_vdesc),
485 sizeof(struct nilfs_period), 485 sizeof(struct nilfs_period),
486 sizeof(__u64), 486 sizeof(__u64),
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index c9ee67b442e1..1afb0a10229f 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -121,7 +121,7 @@ static int idr_callback(int id, void *p, void *data)
121 if (warned) 121 if (warned)
122 return 0; 122 return 0;
123 123
124 warned = false; 124 warned = true;
125 entry = p; 125 entry = p;
126 ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry); 126 ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
127 127
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 8271cf05c957..a94e8bd8eb1f 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -552,7 +552,7 @@ retry:
552 552
553 spin_lock(&group->inotify_data.idr_lock); 553 spin_lock(&group->inotify_data.idr_lock);
554 ret = idr_get_new_above(&group->inotify_data.idr, &tmp_ientry->fsn_entry, 554 ret = idr_get_new_above(&group->inotify_data.idr, &tmp_ientry->fsn_entry,
555 group->inotify_data.last_wd, 555 group->inotify_data.last_wd+1,
556 &tmp_ientry->wd); 556 &tmp_ientry->wd);
557 spin_unlock(&group->inotify_data.idr_lock); 557 spin_unlock(&group->inotify_data.idr_lock);
558 if (ret) { 558 if (ret) {
@@ -632,7 +632,7 @@ static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsign
632 632
633 spin_lock_init(&group->inotify_data.idr_lock); 633 spin_lock_init(&group->inotify_data.idr_lock);
634 idr_init(&group->inotify_data.idr); 634 idr_init(&group->inotify_data.idr);
635 group->inotify_data.last_wd = 1; 635 group->inotify_data.last_wd = 0;
636 group->inotify_data.user = user; 636 group->inotify_data.user = user;
637 group->inotify_data.fa = NULL; 637 group->inotify_data.fa = NULL;
638 638
diff --git a/fs/ocfs2/Kconfig b/fs/ocfs2/Kconfig
index 701b7a3a872e..0d840669698e 100644
--- a/fs/ocfs2/Kconfig
+++ b/fs/ocfs2/Kconfig
@@ -6,6 +6,7 @@ config OCFS2_FS
6 select CRC32 6 select CRC32
7 select QUOTA 7 select QUOTA
8 select QUOTA_TREE 8 select QUOTA_TREE
9 select FS_POSIX_ACL
9 help 10 help
10 OCFS2 is a general purpose extent based shared disk cluster file 11 OCFS2 is a general purpose extent based shared disk cluster file
11 system with many similarities to ext3. It supports 64 bit inode 12 system with many similarities to ext3. It supports 64 bit inode
@@ -74,12 +75,3 @@ config OCFS2_DEBUG_FS
74 This option will enable expensive consistency checks. Enable 75 This option will enable expensive consistency checks. Enable
75 this option for debugging only as it is likely to decrease 76 this option for debugging only as it is likely to decrease
76 performance of the filesystem. 77 performance of the filesystem.
77
78config OCFS2_FS_POSIX_ACL
79 bool "OCFS2 POSIX Access Control Lists"
80 depends on OCFS2_FS
81 select FS_POSIX_ACL
82 default n
83 help
84 Posix Access Control Lists (ACLs) support permissions for users and
85 groups beyond the owner/group/world scheme.
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile
index 31f25ce32c97..600d2d2ade11 100644
--- a/fs/ocfs2/Makefile
+++ b/fs/ocfs2/Makefile
@@ -39,11 +39,8 @@ ocfs2-objs := \
39 ver.o \ 39 ver.o \
40 quota_local.o \ 40 quota_local.o \
41 quota_global.o \ 41 quota_global.o \
42 xattr.o 42 xattr.o \
43 43 acl.o
44ifeq ($(CONFIG_OCFS2_FS_POSIX_ACL),y)
45ocfs2-objs += acl.o
46endif
47 44
48ocfs2_stackglue-objs := stackglue.o 45ocfs2_stackglue-objs := stackglue.o
49ocfs2_stack_o2cb-objs := stack_o2cb.o 46ocfs2_stack_o2cb-objs := stack_o2cb.o
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index e3e47415d851..0501974bedd0 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -98,15 +98,11 @@ static struct posix_acl *ocfs2_get_acl_nolock(struct inode *inode,
98 int type, 98 int type,
99 struct buffer_head *di_bh) 99 struct buffer_head *di_bh)
100{ 100{
101 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
102 int name_index; 101 int name_index;
103 char *value = NULL; 102 char *value = NULL;
104 struct posix_acl *acl; 103 struct posix_acl *acl;
105 int retval; 104 int retval;
106 105
107 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
108 return NULL;
109
110 switch (type) { 106 switch (type) {
111 case ACL_TYPE_ACCESS: 107 case ACL_TYPE_ACCESS:
112 name_index = OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS; 108 name_index = OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS;
diff --git a/fs/ocfs2/acl.h b/fs/ocfs2/acl.h
index 8f6389ed4da5..5c5d31f05853 100644
--- a/fs/ocfs2/acl.h
+++ b/fs/ocfs2/acl.h
@@ -26,8 +26,6 @@ struct ocfs2_acl_entry {
26 __le32 e_id; 26 __le32 e_id;
27}; 27};
28 28
29#ifdef CONFIG_OCFS2_FS_POSIX_ACL
30
31extern int ocfs2_check_acl(struct inode *, int); 29extern int ocfs2_check_acl(struct inode *, int);
32extern int ocfs2_acl_chmod(struct inode *); 30extern int ocfs2_acl_chmod(struct inode *);
33extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *, 31extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *,
@@ -35,24 +33,4 @@ extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *,
35 struct ocfs2_alloc_context *, 33 struct ocfs2_alloc_context *,
36 struct ocfs2_alloc_context *); 34 struct ocfs2_alloc_context *);
37 35
38#else /* CONFIG_OCFS2_FS_POSIX_ACL*/
39
40#define ocfs2_check_acl NULL
41static inline int ocfs2_acl_chmod(struct inode *inode)
42{
43 return 0;
44}
45static inline int ocfs2_init_acl(handle_t *handle,
46 struct inode *inode,
47 struct inode *dir,
48 struct buffer_head *di_bh,
49 struct buffer_head *dir_bh,
50 struct ocfs2_alloc_context *meta_ac,
51 struct ocfs2_alloc_context *data_ac)
52{
53 return 0;
54}
55
56#endif /* CONFIG_OCFS2_FS_POSIX_ACL*/
57
58#endif /* OCFS2_ACL_H */ 36#endif /* OCFS2_ACL_H */
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index c452d116b892..eda5b8bcddd5 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -176,7 +176,8 @@ static void o2hb_write_timeout(struct work_struct *work)
176 176
177static void o2hb_arm_write_timeout(struct o2hb_region *reg) 177static void o2hb_arm_write_timeout(struct o2hb_region *reg)
178{ 178{
179 mlog(0, "Queue write timeout for %u ms\n", O2HB_MAX_WRITE_TIMEOUT_MS); 179 mlog(ML_HEARTBEAT, "Queue write timeout for %u ms\n",
180 O2HB_MAX_WRITE_TIMEOUT_MS);
180 181
181 cancel_delayed_work(&reg->hr_write_timeout_work); 182 cancel_delayed_work(&reg->hr_write_timeout_work);
182 reg->hr_last_timeout_start = jiffies; 183 reg->hr_last_timeout_start = jiffies;
@@ -874,7 +875,8 @@ static int o2hb_thread(void *data)
874 do_gettimeofday(&after_hb); 875 do_gettimeofday(&after_hb);
875 elapsed_msec = o2hb_elapsed_msecs(&before_hb, &after_hb); 876 elapsed_msec = o2hb_elapsed_msecs(&before_hb, &after_hb);
876 877
877 mlog(0, "start = %lu.%lu, end = %lu.%lu, msec = %u\n", 878 mlog(ML_HEARTBEAT,
879 "start = %lu.%lu, end = %lu.%lu, msec = %u\n",
878 before_hb.tv_sec, (unsigned long) before_hb.tv_usec, 880 before_hb.tv_sec, (unsigned long) before_hb.tv_usec,
879 after_hb.tv_sec, (unsigned long) after_hb.tv_usec, 881 after_hb.tv_sec, (unsigned long) after_hb.tv_usec,
880 elapsed_msec); 882 elapsed_msec);
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c
index 7ee6188bc79a..c81142e3ef84 100644
--- a/fs/ocfs2/cluster/nodemanager.c
+++ b/fs/ocfs2/cluster/nodemanager.c
@@ -35,6 +35,10 @@
35 * cluster references throughout where nodes are looked up */ 35 * cluster references throughout where nodes are looked up */
36struct o2nm_cluster *o2nm_single_cluster = NULL; 36struct o2nm_cluster *o2nm_single_cluster = NULL;
37 37
38char *o2nm_fence_method_desc[O2NM_FENCE_METHODS] = {
39 "reset", /* O2NM_FENCE_RESET */
40 "panic", /* O2NM_FENCE_PANIC */
41};
38 42
39struct o2nm_node *o2nm_get_node_by_num(u8 node_num) 43struct o2nm_node *o2nm_get_node_by_num(u8 node_num)
40{ 44{
@@ -579,6 +583,43 @@ static ssize_t o2nm_cluster_attr_reconnect_delay_ms_write(
579 return o2nm_cluster_attr_write(page, count, 583 return o2nm_cluster_attr_write(page, count,
580 &cluster->cl_reconnect_delay_ms); 584 &cluster->cl_reconnect_delay_ms);
581} 585}
586
587static ssize_t o2nm_cluster_attr_fence_method_read(
588 struct o2nm_cluster *cluster, char *page)
589{
590 ssize_t ret = 0;
591
592 if (cluster)
593 ret = sprintf(page, "%s\n",
594 o2nm_fence_method_desc[cluster->cl_fence_method]);
595 return ret;
596}
597
598static ssize_t o2nm_cluster_attr_fence_method_write(
599 struct o2nm_cluster *cluster, const char *page, size_t count)
600{
601 unsigned int i;
602
603 if (page[count - 1] != '\n')
604 goto bail;
605
606 for (i = 0; i < O2NM_FENCE_METHODS; ++i) {
607 if (count != strlen(o2nm_fence_method_desc[i]) + 1)
608 continue;
609 if (strncasecmp(page, o2nm_fence_method_desc[i], count - 1))
610 continue;
611 if (cluster->cl_fence_method != i) {
612 printk(KERN_INFO "ocfs2: Changing fence method to %s\n",
613 o2nm_fence_method_desc[i]);
614 cluster->cl_fence_method = i;
615 }
616 return count;
617 }
618
619bail:
620 return -EINVAL;
621}
622
582static struct o2nm_cluster_attribute o2nm_cluster_attr_idle_timeout_ms = { 623static struct o2nm_cluster_attribute o2nm_cluster_attr_idle_timeout_ms = {
583 .attr = { .ca_owner = THIS_MODULE, 624 .attr = { .ca_owner = THIS_MODULE,
584 .ca_name = "idle_timeout_ms", 625 .ca_name = "idle_timeout_ms",
@@ -603,10 +644,19 @@ static struct o2nm_cluster_attribute o2nm_cluster_attr_reconnect_delay_ms = {
603 .store = o2nm_cluster_attr_reconnect_delay_ms_write, 644 .store = o2nm_cluster_attr_reconnect_delay_ms_write,
604}; 645};
605 646
647static struct o2nm_cluster_attribute o2nm_cluster_attr_fence_method = {
648 .attr = { .ca_owner = THIS_MODULE,
649 .ca_name = "fence_method",
650 .ca_mode = S_IRUGO | S_IWUSR },
651 .show = o2nm_cluster_attr_fence_method_read,
652 .store = o2nm_cluster_attr_fence_method_write,
653};
654
606static struct configfs_attribute *o2nm_cluster_attrs[] = { 655static struct configfs_attribute *o2nm_cluster_attrs[] = {
607 &o2nm_cluster_attr_idle_timeout_ms.attr, 656 &o2nm_cluster_attr_idle_timeout_ms.attr,
608 &o2nm_cluster_attr_keepalive_delay_ms.attr, 657 &o2nm_cluster_attr_keepalive_delay_ms.attr,
609 &o2nm_cluster_attr_reconnect_delay_ms.attr, 658 &o2nm_cluster_attr_reconnect_delay_ms.attr,
659 &o2nm_cluster_attr_fence_method.attr,
610 NULL, 660 NULL,
611}; 661};
612static ssize_t o2nm_cluster_show(struct config_item *item, 662static ssize_t o2nm_cluster_show(struct config_item *item,
@@ -778,6 +828,7 @@ static struct config_group *o2nm_cluster_group_make_group(struct config_group *g
778 cluster->cl_reconnect_delay_ms = O2NET_RECONNECT_DELAY_MS_DEFAULT; 828 cluster->cl_reconnect_delay_ms = O2NET_RECONNECT_DELAY_MS_DEFAULT;
779 cluster->cl_idle_timeout_ms = O2NET_IDLE_TIMEOUT_MS_DEFAULT; 829 cluster->cl_idle_timeout_ms = O2NET_IDLE_TIMEOUT_MS_DEFAULT;
780 cluster->cl_keepalive_delay_ms = O2NET_KEEPALIVE_DELAY_MS_DEFAULT; 830 cluster->cl_keepalive_delay_ms = O2NET_KEEPALIVE_DELAY_MS_DEFAULT;
831 cluster->cl_fence_method = O2NM_FENCE_RESET;
781 832
782 ret = &cluster->cl_group; 833 ret = &cluster->cl_group;
783 o2nm_single_cluster = cluster; 834 o2nm_single_cluster = cluster;
diff --git a/fs/ocfs2/cluster/nodemanager.h b/fs/ocfs2/cluster/nodemanager.h
index c992ea0da4ad..09ea2d388bbb 100644
--- a/fs/ocfs2/cluster/nodemanager.h
+++ b/fs/ocfs2/cluster/nodemanager.h
@@ -33,6 +33,12 @@
33#include <linux/configfs.h> 33#include <linux/configfs.h>
34#include <linux/rbtree.h> 34#include <linux/rbtree.h>
35 35
36enum o2nm_fence_method {
37 O2NM_FENCE_RESET = 0,
38 O2NM_FENCE_PANIC,
39 O2NM_FENCE_METHODS, /* Number of fence methods */
40};
41
36struct o2nm_node { 42struct o2nm_node {
37 spinlock_t nd_lock; 43 spinlock_t nd_lock;
38 struct config_item nd_item; 44 struct config_item nd_item;
@@ -58,6 +64,7 @@ struct o2nm_cluster {
58 unsigned int cl_idle_timeout_ms; 64 unsigned int cl_idle_timeout_ms;
59 unsigned int cl_keepalive_delay_ms; 65 unsigned int cl_keepalive_delay_ms;
60 unsigned int cl_reconnect_delay_ms; 66 unsigned int cl_reconnect_delay_ms;
67 enum o2nm_fence_method cl_fence_method;
61 68
62 /* this bitmap is part of a hack for disk bitmap.. will go eventually. - zab */ 69 /* this bitmap is part of a hack for disk bitmap.. will go eventually. - zab */
63 unsigned long cl_nodes_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; 70 unsigned long cl_nodes_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
diff --git a/fs/ocfs2/cluster/quorum.c b/fs/ocfs2/cluster/quorum.c
index bbacf7da48a4..639024033fce 100644
--- a/fs/ocfs2/cluster/quorum.c
+++ b/fs/ocfs2/cluster/quorum.c
@@ -74,8 +74,20 @@ static void o2quo_fence_self(void)
74 * threads can still schedule, etc, etc */ 74 * threads can still schedule, etc, etc */
75 o2hb_stop_all_regions(); 75 o2hb_stop_all_regions();
76 76
77 printk("ocfs2 is very sorry to be fencing this system by restarting\n"); 77 switch (o2nm_single_cluster->cl_fence_method) {
78 emergency_restart(); 78 case O2NM_FENCE_PANIC:
79 panic("*** ocfs2 is very sorry to be fencing this system by "
80 "panicing ***\n");
81 break;
82 default:
83 WARN_ON(o2nm_single_cluster->cl_fence_method >=
84 O2NM_FENCE_METHODS);
85 case O2NM_FENCE_RESET:
86 printk(KERN_ERR "*** ocfs2 is very sorry to be fencing this "
87 "system by restarting ***\n");
88 emergency_restart();
89 break;
90 };
79} 91}
80 92
81/* Indicate that a timeout occured on a hearbeat region write. The 93/* Indicate that a timeout occured on a hearbeat region write. The
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index d9fa3d22e17c..2f9e4e19a4f2 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -2589,6 +2589,14 @@ retry:
2589 "begin reco msg (%d)\n", dlm->name, nodenum, ret); 2589 "begin reco msg (%d)\n", dlm->name, nodenum, ret);
2590 ret = 0; 2590 ret = 0;
2591 } 2591 }
2592 if (ret == -EAGAIN) {
2593 mlog(0, "%s: trying to start recovery of node "
2594 "%u, but node %u is waiting for last recovery "
2595 "to complete, backoff for a bit\n", dlm->name,
2596 dead_node, nodenum);
2597 msleep(100);
2598 goto retry;
2599 }
2592 if (ret < 0) { 2600 if (ret < 0) {
2593 struct dlm_lock_resource *res; 2601 struct dlm_lock_resource *res;
2594 /* this is now a serious problem, possibly ENOMEM 2602 /* this is now a serious problem, possibly ENOMEM
@@ -2608,14 +2616,6 @@ retry:
2608 * another ENOMEM */ 2616 * another ENOMEM */
2609 msleep(100); 2617 msleep(100);
2610 goto retry; 2618 goto retry;
2611 } else if (ret == EAGAIN) {
2612 mlog(0, "%s: trying to start recovery of node "
2613 "%u, but node %u is waiting for last recovery "
2614 "to complete, backoff for a bit\n", dlm->name,
2615 dead_node, nodenum);
2616 /* TODO Look into replacing msleep with cond_resched() */
2617 msleep(100);
2618 goto retry;
2619 } 2619 }
2620 } 2620 }
2621 2621
@@ -2639,7 +2639,7 @@ int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data,
2639 dlm->name, br->node_idx, br->dead_node, 2639 dlm->name, br->node_idx, br->dead_node,
2640 dlm->reco.dead_node, dlm->reco.new_master); 2640 dlm->reco.dead_node, dlm->reco.new_master);
2641 spin_unlock(&dlm->spinlock); 2641 spin_unlock(&dlm->spinlock);
2642 return EAGAIN; 2642 return -EAGAIN;
2643 } 2643 }
2644 spin_unlock(&dlm->spinlock); 2644 spin_unlock(&dlm->spinlock);
2645 2645
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index 843db64e9d4a..d35a27f4523e 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -37,6 +37,7 @@
37#include "extent_map.h" 37#include "extent_map.h"
38#include "inode.h" 38#include "inode.h"
39#include "super.h" 39#include "super.h"
40#include "symlink.h"
40 41
41#include "buffer_head_io.h" 42#include "buffer_head_io.h"
42 43
@@ -703,6 +704,12 @@ out:
703 return ret; 704 return ret;
704} 705}
705 706
707/*
708 * The ocfs2_fiemap_inline() may be a little bit misleading, since
709 * it not only handles the fiemap for inlined files, but also deals
710 * with the fast symlink, cause they have no difference for extent
711 * mapping per se.
712 */
706static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh, 713static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh,
707 struct fiemap_extent_info *fieinfo, 714 struct fiemap_extent_info *fieinfo,
708 u64 map_start) 715 u64 map_start)
@@ -715,11 +722,18 @@ static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh,
715 struct ocfs2_inode_info *oi = OCFS2_I(inode); 722 struct ocfs2_inode_info *oi = OCFS2_I(inode);
716 723
717 di = (struct ocfs2_dinode *)di_bh->b_data; 724 di = (struct ocfs2_dinode *)di_bh->b_data;
718 id_count = le16_to_cpu(di->id2.i_data.id_count); 725 if (ocfs2_inode_is_fast_symlink(inode))
726 id_count = ocfs2_fast_symlink_chars(inode->i_sb);
727 else
728 id_count = le16_to_cpu(di->id2.i_data.id_count);
719 729
720 if (map_start < id_count) { 730 if (map_start < id_count) {
721 phys = oi->ip_blkno << inode->i_sb->s_blocksize_bits; 731 phys = oi->ip_blkno << inode->i_sb->s_blocksize_bits;
722 phys += offsetof(struct ocfs2_dinode, id2.i_data.id_data); 732 if (ocfs2_inode_is_fast_symlink(inode))
733 phys += offsetof(struct ocfs2_dinode, id2.i_symlink);
734 else
735 phys += offsetof(struct ocfs2_dinode,
736 id2.i_data.id_data);
723 737
724 ret = fiemap_fill_next_extent(fieinfo, 0, phys, id_count, 738 ret = fiemap_fill_next_extent(fieinfo, 0, phys, id_count,
725 flags); 739 flags);
@@ -756,9 +770,10 @@ int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
756 down_read(&OCFS2_I(inode)->ip_alloc_sem); 770 down_read(&OCFS2_I(inode)->ip_alloc_sem);
757 771
758 /* 772 /*
759 * Handle inline-data separately. 773 * Handle inline-data and fast symlink separately.
760 */ 774 */
761 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 775 if ((OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) ||
776 ocfs2_inode_is_fast_symlink(inode)) {
762 ret = ocfs2_fiemap_inline(inode, di_bh, fieinfo, map_start); 777 ret = ocfs2_fiemap_inline(inode, di_bh, fieinfo, map_start);
763 goto out_unlock; 778 goto out_unlock;
764 } 779 }
@@ -786,6 +801,8 @@ int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
786 fe_flags = 0; 801 fe_flags = 0;
787 if (rec.e_flags & OCFS2_EXT_UNWRITTEN) 802 if (rec.e_flags & OCFS2_EXT_UNWRITTEN)
788 fe_flags |= FIEMAP_EXTENT_UNWRITTEN; 803 fe_flags |= FIEMAP_EXTENT_UNWRITTEN;
804 if (rec.e_flags & OCFS2_EXT_REFCOUNTED)
805 fe_flags |= FIEMAP_EXTENT_SHARED;
789 if (is_last) 806 if (is_last)
790 fe_flags |= FIEMAP_EXTENT_LAST; 807 fe_flags |= FIEMAP_EXTENT_LAST;
791 len_bytes = (u64)le16_to_cpu(rec.e_leaf_clusters) << osb->s_clustersize_bits; 808 len_bytes = (u64)le16_to_cpu(rec.e_leaf_clusters) << osb->s_clustersize_bits;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 3d30a1c974a8..06ccf6a86d35 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1772,7 +1772,8 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
1772 loff_t *ppos, 1772 loff_t *ppos,
1773 size_t count, 1773 size_t count,
1774 int appending, 1774 int appending,
1775 int *direct_io) 1775 int *direct_io,
1776 int *has_refcount)
1776{ 1777{
1777 int ret = 0, meta_level = 0; 1778 int ret = 0, meta_level = 0;
1778 struct inode *inode = dentry->d_inode; 1779 struct inode *inode = dentry->d_inode;
@@ -1833,6 +1834,8 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
1833 saved_pos, 1834 saved_pos,
1834 count, 1835 count,
1835 &meta_level); 1836 &meta_level);
1837 if (has_refcount)
1838 *has_refcount = 1;
1836 } 1839 }
1837 1840
1838 if (ret < 0) { 1841 if (ret < 0) {
@@ -1856,6 +1859,10 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
1856 break; 1859 break;
1857 } 1860 }
1858 1861
1862 if (has_refcount && *has_refcount == 1) {
1863 *direct_io = 0;
1864 break;
1865 }
1859 /* 1866 /*
1860 * Allowing concurrent direct writes means 1867 * Allowing concurrent direct writes means
1861 * i_size changes wouldn't be synchronized, so 1868 * i_size changes wouldn't be synchronized, so
@@ -1899,7 +1906,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
1899 loff_t pos) 1906 loff_t pos)
1900{ 1907{
1901 int ret, direct_io, appending, rw_level, have_alloc_sem = 0; 1908 int ret, direct_io, appending, rw_level, have_alloc_sem = 0;
1902 int can_do_direct; 1909 int can_do_direct, has_refcount = 0;
1903 ssize_t written = 0; 1910 ssize_t written = 0;
1904 size_t ocount; /* original count */ 1911 size_t ocount; /* original count */
1905 size_t count; /* after file limit checks */ 1912 size_t count; /* after file limit checks */
@@ -1942,7 +1949,7 @@ relock:
1942 can_do_direct = direct_io; 1949 can_do_direct = direct_io;
1943 ret = ocfs2_prepare_inode_for_write(file->f_path.dentry, ppos, 1950 ret = ocfs2_prepare_inode_for_write(file->f_path.dentry, ppos,
1944 iocb->ki_left, appending, 1951 iocb->ki_left, appending,
1945 &can_do_direct); 1952 &can_do_direct, &has_refcount);
1946 if (ret < 0) { 1953 if (ret < 0) {
1947 mlog_errno(ret); 1954 mlog_errno(ret);
1948 goto out; 1955 goto out;
@@ -2006,14 +2013,16 @@ out_dio:
2006 /* buffered aio wouldn't have proper lock coverage today */ 2013 /* buffered aio wouldn't have proper lock coverage today */
2007 BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT)); 2014 BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT));
2008 2015
2009 if ((file->f_flags & O_DSYNC && !direct_io) || IS_SYNC(inode)) { 2016 if ((file->f_flags & O_DSYNC && !direct_io) || IS_SYNC(inode) ||
2017 (file->f_flags & O_DIRECT && has_refcount)) {
2010 ret = filemap_fdatawrite_range(file->f_mapping, pos, 2018 ret = filemap_fdatawrite_range(file->f_mapping, pos,
2011 pos + count - 1); 2019 pos + count - 1);
2012 if (ret < 0) 2020 if (ret < 0)
2013 written = ret; 2021 written = ret;
2014 2022
2015 if (!ret && (old_size != i_size_read(inode) || 2023 if (!ret && (old_size != i_size_read(inode) ||
2016 old_clusters != OCFS2_I(inode)->ip_clusters)) { 2024 old_clusters != OCFS2_I(inode)->ip_clusters ||
2025 has_refcount)) {
2017 ret = jbd2_journal_force_commit(osb->journal->j_journal); 2026 ret = jbd2_journal_force_commit(osb->journal->j_journal);
2018 if (ret < 0) 2027 if (ret < 0)
2019 written = ret; 2028 written = ret;
@@ -2062,7 +2071,7 @@ static int ocfs2_splice_to_file(struct pipe_inode_info *pipe,
2062 int ret; 2071 int ret;
2063 2072
2064 ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, &sd->pos, 2073 ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, &sd->pos,
2065 sd->total_len, 0, NULL); 2074 sd->total_len, 0, NULL, NULL);
2066 if (ret < 0) { 2075 if (ret < 0) {
2067 mlog_errno(ret); 2076 mlog_errno(ret);
2068 return ret; 2077 return ret;
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 3e9b46002f22..50fb26a6a5f5 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -2329,4 +2329,5 @@ const struct inode_operations ocfs2_dir_iops = {
2329 .getxattr = generic_getxattr, 2329 .getxattr = generic_getxattr,
2330 .listxattr = ocfs2_listxattr, 2330 .listxattr = ocfs2_listxattr,
2331 .removexattr = generic_removexattr, 2331 .removexattr = generic_removexattr,
2332 .fiemap = ocfs2_fiemap,
2332}; 2333};
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index d963d8638709..9362eea7424b 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -245,9 +245,11 @@ enum ocfs2_mount_options
245 OCFS2_MOUNT_LOCALFLOCKS = 1 << 5, /* No cluster aware user file locks */ 245 OCFS2_MOUNT_LOCALFLOCKS = 1 << 5, /* No cluster aware user file locks */
246 OCFS2_MOUNT_NOUSERXATTR = 1 << 6, /* No user xattr */ 246 OCFS2_MOUNT_NOUSERXATTR = 1 << 6, /* No user xattr */
247 OCFS2_MOUNT_INODE64 = 1 << 7, /* Allow inode numbers > 2^32 */ 247 OCFS2_MOUNT_INODE64 = 1 << 7, /* Allow inode numbers > 2^32 */
248 OCFS2_MOUNT_POSIX_ACL = 1 << 8, /* POSIX access control lists */ 248 OCFS2_MOUNT_POSIX_ACL = 1 << 8, /* Force POSIX access control lists */
249 OCFS2_MOUNT_USRQUOTA = 1 << 9, /* We support user quotas */ 249 OCFS2_MOUNT_NO_POSIX_ACL = 1 << 9, /* Disable POSIX access
250 OCFS2_MOUNT_GRPQUOTA = 1 << 10, /* We support group quotas */ 250 control lists */
251 OCFS2_MOUNT_USRQUOTA = 1 << 10, /* We support user quotas */
252 OCFS2_MOUNT_GRPQUOTA = 1 << 11, /* We support group quotas */
251}; 253};
252 254
253#define OCFS2_OSB_SOFT_RO 0x0001 255#define OCFS2_OSB_SOFT_RO 0x0001
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index e9431e4a5e7c..1a1a679e51b5 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -1202,7 +1202,7 @@ struct ocfs2_local_disk_dqinfo {
1202/* Header of one chunk of a quota file */ 1202/* Header of one chunk of a quota file */
1203struct ocfs2_local_disk_chunk { 1203struct ocfs2_local_disk_chunk {
1204 __le32 dqc_free; /* Number of free entries in the bitmap */ 1204 __le32 dqc_free; /* Number of free entries in the bitmap */
1205 u8 dqc_bitmap[0]; /* Bitmap of entries in the corresponding 1205 __u8 dqc_bitmap[0]; /* Bitmap of entries in the corresponding
1206 * chunk of quota file */ 1206 * chunk of quota file */
1207}; 1207};
1208 1208
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index ff4c798a5635..da78a2a334fd 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -814,7 +814,7 @@ static int fs_protocol_compare(struct ocfs2_protocol_version *existing,
814static int user_cluster_connect(struct ocfs2_cluster_connection *conn) 814static int user_cluster_connect(struct ocfs2_cluster_connection *conn)
815{ 815{
816 dlm_lockspace_t *fsdlm; 816 dlm_lockspace_t *fsdlm;
817 struct ocfs2_live_connection *control; 817 struct ocfs2_live_connection *uninitialized_var(control);
818 int rc = 0; 818 int rc = 0;
819 819
820 BUG_ON(conn == NULL); 820 BUG_ON(conn == NULL);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 14f47d2bfe02..26069917a9f5 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -100,6 +100,8 @@ struct mount_options
100static int ocfs2_parse_options(struct super_block *sb, char *options, 100static int ocfs2_parse_options(struct super_block *sb, char *options,
101 struct mount_options *mopt, 101 struct mount_options *mopt,
102 int is_remount); 102 int is_remount);
103static int ocfs2_check_set_options(struct super_block *sb,
104 struct mount_options *options);
103static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt); 105static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt);
104static void ocfs2_put_super(struct super_block *sb); 106static void ocfs2_put_super(struct super_block *sb);
105static int ocfs2_mount_volume(struct super_block *sb); 107static int ocfs2_mount_volume(struct super_block *sb);
@@ -600,7 +602,8 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
600 602
601 lock_kernel(); 603 lock_kernel();
602 604
603 if (!ocfs2_parse_options(sb, data, &parsed_options, 1)) { 605 if (!ocfs2_parse_options(sb, data, &parsed_options, 1) ||
606 !ocfs2_check_set_options(sb, &parsed_options)) {
604 ret = -EINVAL; 607 ret = -EINVAL;
605 goto out; 608 goto out;
606 } 609 }
@@ -691,8 +694,6 @@ unlock_osb:
691 if (!ret) { 694 if (!ret) {
692 /* Only save off the new mount options in case of a successful 695 /* Only save off the new mount options in case of a successful
693 * remount. */ 696 * remount. */
694 if (!(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_XATTR))
695 parsed_options.mount_opt &= ~OCFS2_MOUNT_POSIX_ACL;
696 osb->s_mount_opt = parsed_options.mount_opt; 697 osb->s_mount_opt = parsed_options.mount_opt;
697 osb->s_atime_quantum = parsed_options.atime_quantum; 698 osb->s_atime_quantum = parsed_options.atime_quantum;
698 osb->preferred_slot = parsed_options.slot; 699 osb->preferred_slot = parsed_options.slot;
@@ -701,6 +702,10 @@ unlock_osb:
701 702
702 if (!ocfs2_is_hard_readonly(osb)) 703 if (!ocfs2_is_hard_readonly(osb))
703 ocfs2_set_journal_params(osb); 704 ocfs2_set_journal_params(osb);
705
706 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
707 ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) ?
708 MS_POSIXACL : 0);
704 } 709 }
705out: 710out:
706 unlock_kernel(); 711 unlock_kernel();
@@ -1011,31 +1016,16 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
1011 brelse(bh); 1016 brelse(bh);
1012 bh = NULL; 1017 bh = NULL;
1013 1018
1014 if (!(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_XATTR)) 1019 if (!ocfs2_check_set_options(sb, &parsed_options)) {
1015 parsed_options.mount_opt &= ~OCFS2_MOUNT_POSIX_ACL; 1020 status = -EINVAL;
1016 1021 goto read_super_error;
1022 }
1017 osb->s_mount_opt = parsed_options.mount_opt; 1023 osb->s_mount_opt = parsed_options.mount_opt;
1018 osb->s_atime_quantum = parsed_options.atime_quantum; 1024 osb->s_atime_quantum = parsed_options.atime_quantum;
1019 osb->preferred_slot = parsed_options.slot; 1025 osb->preferred_slot = parsed_options.slot;
1020 osb->osb_commit_interval = parsed_options.commit_interval; 1026 osb->osb_commit_interval = parsed_options.commit_interval;
1021 osb->local_alloc_default_bits = ocfs2_megabytes_to_clusters(sb, parsed_options.localalloc_opt); 1027 osb->local_alloc_default_bits = ocfs2_megabytes_to_clusters(sb, parsed_options.localalloc_opt);
1022 osb->local_alloc_bits = osb->local_alloc_default_bits; 1028 osb->local_alloc_bits = osb->local_alloc_default_bits;
1023 if (osb->s_mount_opt & OCFS2_MOUNT_USRQUOTA &&
1024 !OCFS2_HAS_RO_COMPAT_FEATURE(sb,
1025 OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) {
1026 status = -EINVAL;
1027 mlog(ML_ERROR, "User quotas were requested, but this "
1028 "filesystem does not have the feature enabled.\n");
1029 goto read_super_error;
1030 }
1031 if (osb->s_mount_opt & OCFS2_MOUNT_GRPQUOTA &&
1032 !OCFS2_HAS_RO_COMPAT_FEATURE(sb,
1033 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) {
1034 status = -EINVAL;
1035 mlog(ML_ERROR, "Group quotas were requested, but this "
1036 "filesystem does not have the feature enabled.\n");
1037 goto read_super_error;
1038 }
1039 1029
1040 status = ocfs2_verify_userspace_stack(osb, &parsed_options); 1030 status = ocfs2_verify_userspace_stack(osb, &parsed_options);
1041 if (status) 1031 if (status)
@@ -1245,6 +1235,40 @@ static struct file_system_type ocfs2_fs_type = {
1245 .next = NULL 1235 .next = NULL
1246}; 1236};
1247 1237
1238static int ocfs2_check_set_options(struct super_block *sb,
1239 struct mount_options *options)
1240{
1241 if (options->mount_opt & OCFS2_MOUNT_USRQUOTA &&
1242 !OCFS2_HAS_RO_COMPAT_FEATURE(sb,
1243 OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) {
1244 mlog(ML_ERROR, "User quotas were requested, but this "
1245 "filesystem does not have the feature enabled.\n");
1246 return 0;
1247 }
1248 if (options->mount_opt & OCFS2_MOUNT_GRPQUOTA &&
1249 !OCFS2_HAS_RO_COMPAT_FEATURE(sb,
1250 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) {
1251 mlog(ML_ERROR, "Group quotas were requested, but this "
1252 "filesystem does not have the feature enabled.\n");
1253 return 0;
1254 }
1255 if (options->mount_opt & OCFS2_MOUNT_POSIX_ACL &&
1256 !OCFS2_HAS_INCOMPAT_FEATURE(sb, OCFS2_FEATURE_INCOMPAT_XATTR)) {
1257 mlog(ML_ERROR, "ACL support requested but extended attributes "
1258 "feature is not enabled\n");
1259 return 0;
1260 }
1261 /* No ACL setting specified? Use XATTR feature... */
1262 if (!(options->mount_opt & (OCFS2_MOUNT_POSIX_ACL |
1263 OCFS2_MOUNT_NO_POSIX_ACL))) {
1264 if (OCFS2_HAS_INCOMPAT_FEATURE(sb, OCFS2_FEATURE_INCOMPAT_XATTR))
1265 options->mount_opt |= OCFS2_MOUNT_POSIX_ACL;
1266 else
1267 options->mount_opt |= OCFS2_MOUNT_NO_POSIX_ACL;
1268 }
1269 return 1;
1270}
1271
1248static int ocfs2_parse_options(struct super_block *sb, 1272static int ocfs2_parse_options(struct super_block *sb,
1249 char *options, 1273 char *options,
1250 struct mount_options *mopt, 1274 struct mount_options *mopt,
@@ -1392,40 +1416,19 @@ static int ocfs2_parse_options(struct super_block *sb,
1392 mopt->mount_opt |= OCFS2_MOUNT_INODE64; 1416 mopt->mount_opt |= OCFS2_MOUNT_INODE64;
1393 break; 1417 break;
1394 case Opt_usrquota: 1418 case Opt_usrquota:
1395 /* We check only on remount, otherwise features
1396 * aren't yet initialized. */
1397 if (is_remount && !OCFS2_HAS_RO_COMPAT_FEATURE(sb,
1398 OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) {
1399 mlog(ML_ERROR, "User quota requested but "
1400 "filesystem feature is not set\n");
1401 status = 0;
1402 goto bail;
1403 }
1404 mopt->mount_opt |= OCFS2_MOUNT_USRQUOTA; 1419 mopt->mount_opt |= OCFS2_MOUNT_USRQUOTA;
1405 break; 1420 break;
1406 case Opt_grpquota: 1421 case Opt_grpquota:
1407 if (is_remount && !OCFS2_HAS_RO_COMPAT_FEATURE(sb,
1408 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) {
1409 mlog(ML_ERROR, "Group quota requested but "
1410 "filesystem feature is not set\n");
1411 status = 0;
1412 goto bail;
1413 }
1414 mopt->mount_opt |= OCFS2_MOUNT_GRPQUOTA; 1422 mopt->mount_opt |= OCFS2_MOUNT_GRPQUOTA;
1415 break; 1423 break;
1416#ifdef CONFIG_OCFS2_FS_POSIX_ACL
1417 case Opt_acl: 1424 case Opt_acl:
1418 mopt->mount_opt |= OCFS2_MOUNT_POSIX_ACL; 1425 mopt->mount_opt |= OCFS2_MOUNT_POSIX_ACL;
1426 mopt->mount_opt &= ~OCFS2_MOUNT_NO_POSIX_ACL;
1419 break; 1427 break;
1420 case Opt_noacl: 1428 case Opt_noacl:
1429 mopt->mount_opt |= OCFS2_MOUNT_NO_POSIX_ACL;
1421 mopt->mount_opt &= ~OCFS2_MOUNT_POSIX_ACL; 1430 mopt->mount_opt &= ~OCFS2_MOUNT_POSIX_ACL;
1422 break; 1431 break;
1423#else
1424 case Opt_acl:
1425 case Opt_noacl:
1426 printk(KERN_INFO "ocfs2 (no)acl options not supported\n");
1427 break;
1428#endif
1429 default: 1432 default:
1430 mlog(ML_ERROR, 1433 mlog(ML_ERROR,
1431 "Unrecognized mount option \"%s\" " 1434 "Unrecognized mount option \"%s\" "
@@ -1502,12 +1505,10 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
1502 if (opts & OCFS2_MOUNT_INODE64) 1505 if (opts & OCFS2_MOUNT_INODE64)
1503 seq_printf(s, ",inode64"); 1506 seq_printf(s, ",inode64");
1504 1507
1505#ifdef CONFIG_OCFS2_FS_POSIX_ACL
1506 if (opts & OCFS2_MOUNT_POSIX_ACL) 1508 if (opts & OCFS2_MOUNT_POSIX_ACL)
1507 seq_printf(s, ",acl"); 1509 seq_printf(s, ",acl");
1508 else 1510 else
1509 seq_printf(s, ",noacl"); 1511 seq_printf(s, ",noacl");
1510#endif
1511 1512
1512 return 0; 1513 return 0;
1513} 1514}
diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c
index e3421030a69f..49b133ccbf11 100644
--- a/fs/ocfs2/symlink.c
+++ b/fs/ocfs2/symlink.c
@@ -163,6 +163,7 @@ const struct inode_operations ocfs2_symlink_inode_operations = {
163 .getxattr = generic_getxattr, 163 .getxattr = generic_getxattr,
164 .listxattr = ocfs2_listxattr, 164 .listxattr = ocfs2_listxattr,
165 .removexattr = generic_removexattr, 165 .removexattr = generic_removexattr,
166 .fiemap = ocfs2_fiemap,
166}; 167};
167const struct inode_operations ocfs2_fast_symlink_inode_operations = { 168const struct inode_operations ocfs2_fast_symlink_inode_operations = {
168 .readlink = ocfs2_readlink, 169 .readlink = ocfs2_readlink,
@@ -174,4 +175,5 @@ const struct inode_operations ocfs2_fast_symlink_inode_operations = {
174 .getxattr = generic_getxattr, 175 .getxattr = generic_getxattr,
175 .listxattr = ocfs2_listxattr, 176 .listxattr = ocfs2_listxattr,
176 .removexattr = generic_removexattr, 177 .removexattr = generic_removexattr,
178 .fiemap = ocfs2_fiemap,
177}; 179};
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 43c114831c0d..8fc6fb071c6d 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -98,10 +98,8 @@ static struct ocfs2_xattr_def_value_root def_xv = {
98 98
99struct xattr_handler *ocfs2_xattr_handlers[] = { 99struct xattr_handler *ocfs2_xattr_handlers[] = {
100 &ocfs2_xattr_user_handler, 100 &ocfs2_xattr_user_handler,
101#ifdef CONFIG_OCFS2_FS_POSIX_ACL
102 &ocfs2_xattr_acl_access_handler, 101 &ocfs2_xattr_acl_access_handler,
103 &ocfs2_xattr_acl_default_handler, 102 &ocfs2_xattr_acl_default_handler,
104#endif
105 &ocfs2_xattr_trusted_handler, 103 &ocfs2_xattr_trusted_handler,
106 &ocfs2_xattr_security_handler, 104 &ocfs2_xattr_security_handler,
107 NULL 105 NULL
@@ -109,12 +107,10 @@ struct xattr_handler *ocfs2_xattr_handlers[] = {
109 107
110static struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = { 108static struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = {
111 [OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler, 109 [OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler,
112#ifdef CONFIG_OCFS2_FS_POSIX_ACL
113 [OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS] 110 [OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS]
114 = &ocfs2_xattr_acl_access_handler, 111 = &ocfs2_xattr_acl_access_handler,
115 [OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT] 112 [OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT]
116 = &ocfs2_xattr_acl_default_handler, 113 = &ocfs2_xattr_acl_default_handler,
117#endif
118 [OCFS2_XATTR_INDEX_TRUSTED] = &ocfs2_xattr_trusted_handler, 114 [OCFS2_XATTR_INDEX_TRUSTED] = &ocfs2_xattr_trusted_handler,
119 [OCFS2_XATTR_INDEX_SECURITY] = &ocfs2_xattr_security_handler, 115 [OCFS2_XATTR_INDEX_SECURITY] = &ocfs2_xattr_security_handler,
120}; 116};
@@ -6064,7 +6060,7 @@ static int ocfs2_value_metas_in_xattr_header(struct super_block *sb,
6064 * to the extent block, so just calculate a maximum record num. 6060 * to the extent block, so just calculate a maximum record num.
6065 */ 6061 */
6066 if (!xv->xr_list.l_tree_depth) 6062 if (!xv->xr_list.l_tree_depth)
6067 *num_recs += xv->xr_list.l_next_free_rec; 6063 *num_recs += le16_to_cpu(xv->xr_list.l_next_free_rec);
6068 else 6064 else
6069 *num_recs += ocfs2_clusters_for_bytes(sb, 6065 *num_recs += ocfs2_clusters_for_bytes(sb,
6070 XATTR_SIZE_MAX); 6066 XATTR_SIZE_MAX);
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h
index 08e36389f56d..abd72a47f520 100644
--- a/fs/ocfs2/xattr.h
+++ b/fs/ocfs2/xattr.h
@@ -40,10 +40,8 @@ struct ocfs2_security_xattr_info {
40extern struct xattr_handler ocfs2_xattr_user_handler; 40extern struct xattr_handler ocfs2_xattr_user_handler;
41extern struct xattr_handler ocfs2_xattr_trusted_handler; 41extern struct xattr_handler ocfs2_xattr_trusted_handler;
42extern struct xattr_handler ocfs2_xattr_security_handler; 42extern struct xattr_handler ocfs2_xattr_security_handler;
43#ifdef CONFIG_OCFS2_FS_POSIX_ACL
44extern struct xattr_handler ocfs2_xattr_acl_access_handler; 43extern struct xattr_handler ocfs2_xattr_acl_access_handler;
45extern struct xattr_handler ocfs2_xattr_acl_default_handler; 44extern struct xattr_handler ocfs2_xattr_acl_default_handler;
46#endif
47extern struct xattr_handler *ocfs2_xattr_handlers[]; 45extern struct xattr_handler *ocfs2_xattr_handlers[];
48 46
49ssize_t ocfs2_listxattr(struct dentry *, char *, size_t); 47ssize_t ocfs2_listxattr(struct dentry *, char *, size_t);
diff --git a/fs/proc/array.c b/fs/proc/array.c
index f560325c444f..13b5d0708175 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -327,94 +327,6 @@ static inline void task_context_switch_counts(struct seq_file *m,
327 p->nivcsw); 327 p->nivcsw);
328} 328}
329 329
330#ifdef CONFIG_MMU
331
332struct stack_stats {
333 struct vm_area_struct *vma;
334 unsigned long startpage;
335 unsigned long usage;
336};
337
338static int stack_usage_pte_range(pmd_t *pmd, unsigned long addr,
339 unsigned long end, struct mm_walk *walk)
340{
341 struct stack_stats *ss = walk->private;
342 struct vm_area_struct *vma = ss->vma;
343 pte_t *pte, ptent;
344 spinlock_t *ptl;
345 int ret = 0;
346
347 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
348 for (; addr != end; pte++, addr += PAGE_SIZE) {
349 ptent = *pte;
350
351#ifdef CONFIG_STACK_GROWSUP
352 if (pte_present(ptent) || is_swap_pte(ptent))
353 ss->usage = addr - ss->startpage + PAGE_SIZE;
354#else
355 if (pte_present(ptent) || is_swap_pte(ptent)) {
356 ss->usage = ss->startpage - addr + PAGE_SIZE;
357 pte++;
358 ret = 1;
359 break;
360 }
361#endif
362 }
363 pte_unmap_unlock(pte - 1, ptl);
364 cond_resched();
365 return ret;
366}
367
368static inline unsigned long get_stack_usage_in_bytes(struct vm_area_struct *vma,
369 struct task_struct *task)
370{
371 struct stack_stats ss;
372 struct mm_walk stack_walk = {
373 .pmd_entry = stack_usage_pte_range,
374 .mm = vma->vm_mm,
375 .private = &ss,
376 };
377
378 if (!vma->vm_mm || is_vm_hugetlb_page(vma))
379 return 0;
380
381 ss.vma = vma;
382 ss.startpage = task->stack_start & PAGE_MASK;
383 ss.usage = 0;
384
385#ifdef CONFIG_STACK_GROWSUP
386 walk_page_range(KSTK_ESP(task) & PAGE_MASK, vma->vm_end,
387 &stack_walk);
388#else
389 walk_page_range(vma->vm_start, (KSTK_ESP(task) & PAGE_MASK) + PAGE_SIZE,
390 &stack_walk);
391#endif
392 return ss.usage;
393}
394
395static inline void task_show_stack_usage(struct seq_file *m,
396 struct task_struct *task)
397{
398 struct vm_area_struct *vma;
399 struct mm_struct *mm = get_task_mm(task);
400
401 if (mm) {
402 down_read(&mm->mmap_sem);
403 vma = find_vma(mm, task->stack_start);
404 if (vma)
405 seq_printf(m, "Stack usage:\t%lu kB\n",
406 get_stack_usage_in_bytes(vma, task) >> 10);
407
408 up_read(&mm->mmap_sem);
409 mmput(mm);
410 }
411}
412#else
413static void task_show_stack_usage(struct seq_file *m, struct task_struct *task)
414{
415}
416#endif /* CONFIG_MMU */
417
418static void task_cpus_allowed(struct seq_file *m, struct task_struct *task) 330static void task_cpus_allowed(struct seq_file *m, struct task_struct *task)
419{ 331{
420 seq_printf(m, "Cpus_allowed:\t"); 332 seq_printf(m, "Cpus_allowed:\t");
@@ -445,7 +357,6 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
445 task_show_regs(m, task); 357 task_show_regs(m, task);
446#endif 358#endif
447 task_context_switch_counts(m, task); 359 task_context_switch_counts(m, task);
448 task_show_stack_usage(m, task);
449 return 0; 360 return 0;
450} 361}
451 362
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 18d5cc62d8ed..e42bbd843ed1 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1419,7 +1419,6 @@ static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
1419 goto out; 1419 goto out;
1420 1420
1421 error = PROC_I(inode)->op.proc_get_link(inode, &nd->path); 1421 error = PROC_I(inode)->op.proc_get_link(inode, &nd->path);
1422 nd->last_type = LAST_BIND;
1423out: 1422out:
1424 return ERR_PTR(error); 1423 return ERR_PTR(error);
1425} 1424}
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 47c03f4336b8..f277c4a111cb 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -361,12 +361,11 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
361 if (!pte_present(ptent)) 361 if (!pte_present(ptent))
362 continue; 362 continue;
363 363
364 mss->resident += PAGE_SIZE;
365
366 page = vm_normal_page(vma, addr, ptent); 364 page = vm_normal_page(vma, addr, ptent);
367 if (!page) 365 if (!page)
368 continue; 366 continue;
369 367
368 mss->resident += PAGE_SIZE;
370 /* Accumulate the size in pages that have been accessed. */ 369 /* Accumulate the size in pages that have been accessed. */
371 if (pte_young(ptent) || PageReferenced(page)) 370 if (pte_young(ptent) || PageReferenced(page))
372 mss->referenced += PAGE_SIZE; 371 mss->referenced += PAGE_SIZE;
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index dea86abdf2e7..3fc62b097bed 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1377,6 +1377,9 @@ static void inode_sub_rsv_space(struct inode *inode, qsize_t number)
1377static qsize_t inode_get_rsv_space(struct inode *inode) 1377static qsize_t inode_get_rsv_space(struct inode *inode)
1378{ 1378{
1379 qsize_t ret; 1379 qsize_t ret;
1380
1381 if (!inode->i_sb->dq_op->get_reserved_space)
1382 return 0;
1380 spin_lock(&inode->i_lock); 1383 spin_lock(&inode->i_lock);
1381 ret = *inode_reserved_space(inode); 1384 ret = *inode_reserved_space(inode);
1382 spin_unlock(&inode->i_lock); 1385 spin_unlock(&inode->i_lock);
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 2efc57173fd7..1739a4aba25f 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -123,30 +123,6 @@ add_error:
123 123
124/*****************************************************************************/ 124/*****************************************************************************/
125/* 125/*
126 * check that file shrinkage doesn't leave any VMAs dangling in midair
127 */
128static int ramfs_nommu_check_mappings(struct inode *inode,
129 size_t newsize, size_t size)
130{
131 struct vm_area_struct *vma;
132 struct prio_tree_iter iter;
133
134 /* search for VMAs that fall within the dead zone */
135 vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap,
136 newsize >> PAGE_SHIFT,
137 (size + PAGE_SIZE - 1) >> PAGE_SHIFT
138 ) {
139 /* found one - only interested if it's shared out of the page
140 * cache */
141 if (vma->vm_flags & VM_SHARED)
142 return -ETXTBSY; /* not quite true, but near enough */
143 }
144
145 return 0;
146}
147
148/*****************************************************************************/
149/*
150 * 126 *
151 */ 127 */
152static int ramfs_nommu_resize(struct inode *inode, loff_t newsize, loff_t size) 128static int ramfs_nommu_resize(struct inode *inode, loff_t newsize, loff_t size)
@@ -164,7 +140,7 @@ static int ramfs_nommu_resize(struct inode *inode, loff_t newsize, loff_t size)
164 140
165 /* check that a decrease in size doesn't cut off any shared mappings */ 141 /* check that a decrease in size doesn't cut off any shared mappings */
166 if (newsize < size) { 142 if (newsize < size) {
167 ret = ramfs_nommu_check_mappings(inode, newsize, size); 143 ret = nommu_shrink_inode_mappings(inode, size, newsize);
168 if (ret < 0) 144 if (ret < 0)
169 return ret; 145 return ret;
170 } 146 }
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index 685495707181..65c872761177 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -1277,7 +1277,10 @@ int reiserfs_init_bitmap_cache(struct super_block *sb)
1277 struct reiserfs_bitmap_info *bitmap; 1277 struct reiserfs_bitmap_info *bitmap;
1278 unsigned int bmap_nr = reiserfs_bmap_count(sb); 1278 unsigned int bmap_nr = reiserfs_bmap_count(sb);
1279 1279
1280 /* Avoid lock recursion in fault case */
1281 reiserfs_write_unlock(sb);
1280 bitmap = vmalloc(sizeof(*bitmap) * bmap_nr); 1282 bitmap = vmalloc(sizeof(*bitmap) * bmap_nr);
1283 reiserfs_write_lock(sb);
1281 if (bitmap == NULL) 1284 if (bitmap == NULL)
1282 return -ENOMEM; 1285 return -ENOMEM;
1283 1286
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 290ae38fca8a..9087b10209e6 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -31,11 +31,12 @@ void reiserfs_delete_inode(struct inode *inode)
31 JOURNAL_PER_BALANCE_CNT * 2 + 31 JOURNAL_PER_BALANCE_CNT * 2 +
32 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb); 32 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb);
33 struct reiserfs_transaction_handle th; 33 struct reiserfs_transaction_handle th;
34 int depth;
34 int err; 35 int err;
35 36
36 truncate_inode_pages(&inode->i_data, 0); 37 truncate_inode_pages(&inode->i_data, 0);
37 38
38 reiserfs_write_lock(inode->i_sb); 39 depth = reiserfs_write_lock_once(inode->i_sb);
39 40
40 /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */ 41 /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */
41 if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */ 42 if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */
@@ -74,7 +75,7 @@ void reiserfs_delete_inode(struct inode *inode)
74 out: 75 out:
75 clear_inode(inode); /* note this must go after the journal_end to prevent deadlock */ 76 clear_inode(inode); /* note this must go after the journal_end to prevent deadlock */
76 inode->i_blocks = 0; 77 inode->i_blocks = 0;
77 reiserfs_write_unlock(inode->i_sb); 78 reiserfs_write_unlock_once(inode->i_sb, depth);
78} 79}
79 80
80static void _make_cpu_key(struct cpu_key *key, int version, __u32 dirid, 81static void _make_cpu_key(struct cpu_key *key, int version, __u32 dirid,
@@ -3061,13 +3062,14 @@ static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb,
3061int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) 3062int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
3062{ 3063{
3063 struct inode *inode = dentry->d_inode; 3064 struct inode *inode = dentry->d_inode;
3064 int error;
3065 unsigned int ia_valid; 3065 unsigned int ia_valid;
3066 int depth;
3067 int error;
3066 3068
3067 /* must be turned off for recursive notify_change calls */ 3069 /* must be turned off for recursive notify_change calls */
3068 ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID); 3070 ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID);
3069 3071
3070 reiserfs_write_lock(inode->i_sb); 3072 depth = reiserfs_write_lock_once(inode->i_sb);
3071 if (attr->ia_valid & ATTR_SIZE) { 3073 if (attr->ia_valid & ATTR_SIZE) {
3072 /* version 2 items will be caught by the s_maxbytes check 3074 /* version 2 items will be caught by the s_maxbytes check
3073 ** done for us in vmtruncate 3075 ** done for us in vmtruncate
@@ -3148,8 +3150,17 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
3148 journal_end(&th, inode->i_sb, jbegin_count); 3150 journal_end(&th, inode->i_sb, jbegin_count);
3149 } 3151 }
3150 } 3152 }
3151 if (!error) 3153 if (!error) {
3154 /*
3155 * Relax the lock here, as it might truncate the
3156 * inode pages and wait for inode pages locks.
3157 * To release such page lock, the owner needs the
3158 * reiserfs lock
3159 */
3160 reiserfs_write_unlock_once(inode->i_sb, depth);
3152 error = inode_setattr(inode, attr); 3161 error = inode_setattr(inode, attr);
3162 depth = reiserfs_write_lock_once(inode->i_sb);
3163 }
3153 } 3164 }
3154 3165
3155 if (!error && reiserfs_posixacl(inode->i_sb)) { 3166 if (!error && reiserfs_posixacl(inode->i_sb)) {
@@ -3158,7 +3169,8 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
3158 } 3169 }
3159 3170
3160 out: 3171 out:
3161 reiserfs_write_unlock(inode->i_sb); 3172 reiserfs_write_unlock_once(inode->i_sb, depth);
3173
3162 return error; 3174 return error;
3163} 3175}
3164 3176
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index ace77451ceb1..f53505de0712 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -104,9 +104,10 @@ setflags_out:
104 err = put_user(inode->i_generation, (int __user *)arg); 104 err = put_user(inode->i_generation, (int __user *)arg);
105 break; 105 break;
106 case REISERFS_IOC_SETVERSION: 106 case REISERFS_IOC_SETVERSION:
107 if (!is_owner_or_cap(inode)) 107 if (!is_owner_or_cap(inode)) {
108 err = -EPERM; 108 err = -EPERM;
109 break; 109 break;
110 }
110 err = mnt_want_write(filp->f_path.mnt); 111 err = mnt_want_write(filp->f_path.mnt);
111 if (err) 112 if (err)
112 break; 113 break;
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 2f8a7e7b8dab..83ac4d3b3cb0 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2009,10 +2009,11 @@ static int do_journal_release(struct reiserfs_transaction_handle *th,
2009 destroy_workqueue(commit_wq); 2009 destroy_workqueue(commit_wq);
2010 commit_wq = NULL; 2010 commit_wq = NULL;
2011 } 2011 }
2012 reiserfs_write_lock(sb);
2013 2012
2014 free_journal_ram(sb); 2013 free_journal_ram(sb);
2015 2014
2015 reiserfs_write_lock(sb);
2016
2016 return 0; 2017 return 0;
2017} 2018}
2018 2019
@@ -2758,11 +2759,18 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
2758 struct reiserfs_journal *journal; 2759 struct reiserfs_journal *journal;
2759 struct reiserfs_journal_list *jl; 2760 struct reiserfs_journal_list *jl;
2760 char b[BDEVNAME_SIZE]; 2761 char b[BDEVNAME_SIZE];
2762 int ret;
2761 2763
2764 /*
2765 * Unlock here to avoid various RECLAIM-FS-ON <-> IN-RECLAIM-FS
2766 * dependency inversion warnings.
2767 */
2768 reiserfs_write_unlock(sb);
2762 journal = SB_JOURNAL(sb) = vmalloc(sizeof(struct reiserfs_journal)); 2769 journal = SB_JOURNAL(sb) = vmalloc(sizeof(struct reiserfs_journal));
2763 if (!journal) { 2770 if (!journal) {
2764 reiserfs_warning(sb, "journal-1256", 2771 reiserfs_warning(sb, "journal-1256",
2765 "unable to get memory for journal structure"); 2772 "unable to get memory for journal structure");
2773 reiserfs_write_lock(sb);
2766 return 1; 2774 return 1;
2767 } 2775 }
2768 memset(journal, 0, sizeof(struct reiserfs_journal)); 2776 memset(journal, 0, sizeof(struct reiserfs_journal));
@@ -2771,10 +2779,12 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
2771 INIT_LIST_HEAD(&journal->j_working_list); 2779 INIT_LIST_HEAD(&journal->j_working_list);
2772 INIT_LIST_HEAD(&journal->j_journal_list); 2780 INIT_LIST_HEAD(&journal->j_journal_list);
2773 journal->j_persistent_trans = 0; 2781 journal->j_persistent_trans = 0;
2774 if (reiserfs_allocate_list_bitmaps(sb, 2782 ret = reiserfs_allocate_list_bitmaps(sb, journal->j_list_bitmap,
2775 journal->j_list_bitmap, 2783 reiserfs_bmap_count(sb));
2776 reiserfs_bmap_count(sb))) 2784 reiserfs_write_lock(sb);
2785 if (ret)
2777 goto free_and_return; 2786 goto free_and_return;
2787
2778 allocate_bitmap_nodes(sb); 2788 allocate_bitmap_nodes(sb);
2779 2789
2780 /* reserved for journal area support */ 2790 /* reserved for journal area support */
diff --git a/fs/reiserfs/lock.c b/fs/reiserfs/lock.c
index ee2cfc0fd8a7..b87aa2c1afc1 100644
--- a/fs/reiserfs/lock.c
+++ b/fs/reiserfs/lock.c
@@ -86,3 +86,12 @@ void reiserfs_check_lock_depth(struct super_block *sb, char *caller)
86 reiserfs_panic(sb, "%s called without kernel lock held %d", 86 reiserfs_panic(sb, "%s called without kernel lock held %d",
87 caller); 87 caller);
88} 88}
89
90#ifdef CONFIG_REISERFS_CHECK
91void reiserfs_lock_check_recursive(struct super_block *sb)
92{
93 struct reiserfs_sb_info *sb_i = REISERFS_SB(sb);
94
95 WARN_ONCE((sb_i->lock_depth > 0), "Unwanted recursive reiserfs lock!\n");
96}
97#endif
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index e296ff72a6cc..9d4dcf0b07cb 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -921,6 +921,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
921 struct reiserfs_transaction_handle th; 921 struct reiserfs_transaction_handle th;
922 int jbegin_count; 922 int jbegin_count;
923 unsigned long savelink; 923 unsigned long savelink;
924 int depth;
924 925
925 inode = dentry->d_inode; 926 inode = dentry->d_inode;
926 927
@@ -932,7 +933,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
932 JOURNAL_PER_BALANCE_CNT * 2 + 2 + 933 JOURNAL_PER_BALANCE_CNT * 2 + 2 +
933 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb); 934 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
934 935
935 reiserfs_write_lock(dir->i_sb); 936 depth = reiserfs_write_lock_once(dir->i_sb);
936 retval = journal_begin(&th, dir->i_sb, jbegin_count); 937 retval = journal_begin(&th, dir->i_sb, jbegin_count);
937 if (retval) 938 if (retval)
938 goto out_unlink; 939 goto out_unlink;
@@ -993,7 +994,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
993 994
994 retval = journal_end(&th, dir->i_sb, jbegin_count); 995 retval = journal_end(&th, dir->i_sb, jbegin_count);
995 reiserfs_check_path(&path); 996 reiserfs_check_path(&path);
996 reiserfs_write_unlock(dir->i_sb); 997 reiserfs_write_unlock_once(dir->i_sb, depth);
997 return retval; 998 return retval;
998 999
999 end_unlink: 1000 end_unlink:
@@ -1003,7 +1004,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
1003 if (err) 1004 if (err)
1004 retval = err; 1005 retval = err;
1005 out_unlink: 1006 out_unlink:
1006 reiserfs_write_unlock(dir->i_sb); 1007 reiserfs_write_unlock_once(dir->i_sb, depth);
1007 return retval; 1008 return retval;
1008} 1009}
1009 1010
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 8c7033a8b67e..81f09fab8ae4 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -83,7 +83,8 @@ static int xattr_unlink(struct inode *dir, struct dentry *dentry)
83 BUG_ON(!mutex_is_locked(&dir->i_mutex)); 83 BUG_ON(!mutex_is_locked(&dir->i_mutex));
84 vfs_dq_init(dir); 84 vfs_dq_init(dir);
85 85
86 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD); 86 reiserfs_mutex_lock_nested_safe(&dentry->d_inode->i_mutex,
87 I_MUTEX_CHILD, dir->i_sb);
87 error = dir->i_op->unlink(dir, dentry); 88 error = dir->i_op->unlink(dir, dentry);
88 mutex_unlock(&dentry->d_inode->i_mutex); 89 mutex_unlock(&dentry->d_inode->i_mutex);
89 90
@@ -98,7 +99,8 @@ static int xattr_rmdir(struct inode *dir, struct dentry *dentry)
98 BUG_ON(!mutex_is_locked(&dir->i_mutex)); 99 BUG_ON(!mutex_is_locked(&dir->i_mutex));
99 vfs_dq_init(dir); 100 vfs_dq_init(dir);
100 101
101 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD); 102 reiserfs_mutex_lock_nested_safe(&dentry->d_inode->i_mutex,
103 I_MUTEX_CHILD, dir->i_sb);
102 dentry_unhash(dentry); 104 dentry_unhash(dentry);
103 error = dir->i_op->rmdir(dir, dentry); 105 error = dir->i_op->rmdir(dir, dentry);
104 if (!error) 106 if (!error)
@@ -235,16 +237,22 @@ static int reiserfs_for_each_xattr(struct inode *inode,
235 if (IS_PRIVATE(inode) || get_inode_sd_version(inode) == STAT_DATA_V1) 237 if (IS_PRIVATE(inode) || get_inode_sd_version(inode) == STAT_DATA_V1)
236 return 0; 238 return 0;
237 239
240 reiserfs_write_unlock(inode->i_sb);
238 dir = open_xa_dir(inode, XATTR_REPLACE); 241 dir = open_xa_dir(inode, XATTR_REPLACE);
239 if (IS_ERR(dir)) { 242 if (IS_ERR(dir)) {
240 err = PTR_ERR(dir); 243 err = PTR_ERR(dir);
244 reiserfs_write_lock(inode->i_sb);
241 goto out; 245 goto out;
242 } else if (!dir->d_inode) { 246 } else if (!dir->d_inode) {
243 err = 0; 247 err = 0;
248 reiserfs_write_lock(inode->i_sb);
244 goto out_dir; 249 goto out_dir;
245 } 250 }
246 251
247 mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_XATTR); 252 mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_XATTR);
253
254 reiserfs_write_lock(inode->i_sb);
255
248 buf.xadir = dir; 256 buf.xadir = dir;
249 err = reiserfs_readdir_dentry(dir, &buf, fill_with_dentries, &pos); 257 err = reiserfs_readdir_dentry(dir, &buf, fill_with_dentries, &pos);
250 while ((err == 0 || err == -ENOSPC) && buf.count) { 258 while ((err == 0 || err == -ENOSPC) && buf.count) {
@@ -283,8 +291,9 @@ static int reiserfs_for_each_xattr(struct inode *inode,
283 err = journal_begin(&th, inode->i_sb, blocks); 291 err = journal_begin(&th, inode->i_sb, blocks);
284 if (!err) { 292 if (!err) {
285 int jerror; 293 int jerror;
286 mutex_lock_nested(&dir->d_parent->d_inode->i_mutex, 294 reiserfs_mutex_lock_nested_safe(
287 I_MUTEX_XATTR); 295 &dir->d_parent->d_inode->i_mutex,
296 I_MUTEX_XATTR, inode->i_sb);
288 err = action(dir, data); 297 err = action(dir, data);
289 jerror = journal_end(&th, inode->i_sb, blocks); 298 jerror = journal_end(&th, inode->i_sb, blocks);
290 mutex_unlock(&dir->d_parent->d_inode->i_mutex); 299 mutex_unlock(&dir->d_parent->d_inode->i_mutex);
@@ -443,7 +452,9 @@ static int lookup_and_delete_xattr(struct inode *inode, const char *name)
443 } 452 }
444 453
445 if (dentry->d_inode) { 454 if (dentry->d_inode) {
455 reiserfs_write_lock(inode->i_sb);
446 err = xattr_unlink(xadir->d_inode, dentry); 456 err = xattr_unlink(xadir->d_inode, dentry);
457 reiserfs_write_unlock(inode->i_sb);
447 update_ctime(inode); 458 update_ctime(inode);
448 } 459 }
449 460
@@ -477,15 +488,24 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
477 if (get_inode_sd_version(inode) == STAT_DATA_V1) 488 if (get_inode_sd_version(inode) == STAT_DATA_V1)
478 return -EOPNOTSUPP; 489 return -EOPNOTSUPP;
479 490
480 if (!buffer) 491 reiserfs_write_unlock(inode->i_sb);
481 return lookup_and_delete_xattr(inode, name); 492
493 if (!buffer) {
494 err = lookup_and_delete_xattr(inode, name);
495 reiserfs_write_lock(inode->i_sb);
496 return err;
497 }
482 498
483 dentry = xattr_lookup(inode, name, flags); 499 dentry = xattr_lookup(inode, name, flags);
484 if (IS_ERR(dentry)) 500 if (IS_ERR(dentry)) {
501 reiserfs_write_lock(inode->i_sb);
485 return PTR_ERR(dentry); 502 return PTR_ERR(dentry);
503 }
486 504
487 down_write(&REISERFS_I(inode)->i_xattr_sem); 505 down_write(&REISERFS_I(inode)->i_xattr_sem);
488 506
507 reiserfs_write_lock(inode->i_sb);
508
489 xahash = xattr_hash(buffer, buffer_size); 509 xahash = xattr_hash(buffer, buffer_size);
490 while (buffer_pos < buffer_size || buffer_pos == 0) { 510 while (buffer_pos < buffer_size || buffer_pos == 0) {
491 size_t chunk; 511 size_t chunk;
@@ -540,8 +560,12 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
540 .ia_size = buffer_size, 560 .ia_size = buffer_size,
541 .ia_valid = ATTR_SIZE | ATTR_CTIME, 561 .ia_valid = ATTR_SIZE | ATTR_CTIME,
542 }; 562 };
563
564 reiserfs_write_unlock(inode->i_sb);
543 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_XATTR); 565 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_XATTR);
544 down_write(&dentry->d_inode->i_alloc_sem); 566 down_write(&dentry->d_inode->i_alloc_sem);
567 reiserfs_write_lock(inode->i_sb);
568
545 err = reiserfs_setattr(dentry, &newattrs); 569 err = reiserfs_setattr(dentry, &newattrs);
546 up_write(&dentry->d_inode->i_alloc_sem); 570 up_write(&dentry->d_inode->i_alloc_sem);
547 mutex_unlock(&dentry->d_inode->i_mutex); 571 mutex_unlock(&dentry->d_inode->i_mutex);
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index cc32e6ada67b..dd20a7883f0f 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -455,7 +455,9 @@ int reiserfs_acl_chmod(struct inode *inode)
455 return 0; 455 return 0;
456 } 456 }
457 457
458 reiserfs_write_unlock(inode->i_sb);
458 acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS); 459 acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS);
460 reiserfs_write_lock(inode->i_sb);
459 if (!acl) 461 if (!acl)
460 return 0; 462 return 0;
461 if (IS_ERR(acl)) 463 if (IS_ERR(acl))
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index f05f2303a8b8..699f371b9f12 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -106,8 +106,10 @@ static struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd)
106 return NULL; 106 return NULL;
107 107
108 t = atomic_cmpxchg(&sd->s_active, v, v + 1); 108 t = atomic_cmpxchg(&sd->s_active, v, v + 1);
109 if (likely(t == v)) 109 if (likely(t == v)) {
110 rwsem_acquire_read(&sd->dep_map, 0, 1, _RET_IP_);
110 return sd; 111 return sd;
112 }
111 if (t < 0) 113 if (t < 0)
112 return NULL; 114 return NULL;
113 115
@@ -130,6 +132,7 @@ static void sysfs_put_active(struct sysfs_dirent *sd)
130 if (unlikely(!sd)) 132 if (unlikely(!sd))
131 return; 133 return;
132 134
135 rwsem_release(&sd->dep_map, 1, _RET_IP_);
133 v = atomic_dec_return(&sd->s_active); 136 v = atomic_dec_return(&sd->s_active);
134 if (likely(v != SD_DEACTIVATED_BIAS)) 137 if (likely(v != SD_DEACTIVATED_BIAS))
135 return; 138 return;
@@ -194,15 +197,21 @@ static void sysfs_deactivate(struct sysfs_dirent *sd)
194 BUG_ON(sd->s_sibling || !(sd->s_flags & SYSFS_FLAG_REMOVED)); 197 BUG_ON(sd->s_sibling || !(sd->s_flags & SYSFS_FLAG_REMOVED));
195 sd->s_sibling = (void *)&wait; 198 sd->s_sibling = (void *)&wait;
196 199
200 rwsem_acquire(&sd->dep_map, 0, 0, _RET_IP_);
197 /* atomic_add_return() is a mb(), put_active() will always see 201 /* atomic_add_return() is a mb(), put_active() will always see
198 * the updated sd->s_sibling. 202 * the updated sd->s_sibling.
199 */ 203 */
200 v = atomic_add_return(SD_DEACTIVATED_BIAS, &sd->s_active); 204 v = atomic_add_return(SD_DEACTIVATED_BIAS, &sd->s_active);
201 205
202 if (v != SD_DEACTIVATED_BIAS) 206 if (v != SD_DEACTIVATED_BIAS) {
207 lock_contended(&sd->dep_map, _RET_IP_);
203 wait_for_completion(&wait); 208 wait_for_completion(&wait);
209 }
204 210
205 sd->s_sibling = NULL; 211 sd->s_sibling = NULL;
212
213 lock_acquired(&sd->dep_map, _RET_IP_);
214 rwsem_release(&sd->dep_map, 1, _RET_IP_);
206} 215}
207 216
208static int sysfs_alloc_ino(ino_t *pino) 217static int sysfs_alloc_ino(ino_t *pino)
@@ -345,6 +354,7 @@ struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type)
345 354
346 atomic_set(&sd->s_count, 1); 355 atomic_set(&sd->s_count, 1);
347 atomic_set(&sd->s_active, 0); 356 atomic_set(&sd->s_active, 0);
357 sysfs_dirent_init_lockdep(sd);
348 358
349 sd->s_name = name; 359 sd->s_name = name;
350 sd->s_mode = mode; 360 sd->s_mode = mode;
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index ca52e7b9d8f8..cdd9377a6e06 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -8,6 +8,7 @@
8 * This file is released under the GPLv2. 8 * This file is released under the GPLv2.
9 */ 9 */
10 10
11#include <linux/lockdep.h>
11#include <linux/fs.h> 12#include <linux/fs.h>
12 13
13struct sysfs_open_dirent; 14struct sysfs_open_dirent;
@@ -50,6 +51,9 @@ struct sysfs_inode_attrs {
50struct sysfs_dirent { 51struct sysfs_dirent {
51 atomic_t s_count; 52 atomic_t s_count;
52 atomic_t s_active; 53 atomic_t s_active;
54#ifdef CONFIG_DEBUG_LOCK_ALLOC
55 struct lockdep_map dep_map;
56#endif
53 struct sysfs_dirent *s_parent; 57 struct sysfs_dirent *s_parent;
54 struct sysfs_dirent *s_sibling; 58 struct sysfs_dirent *s_sibling;
55 const char *s_name; 59 const char *s_name;
@@ -84,6 +88,17 @@ static inline unsigned int sysfs_type(struct sysfs_dirent *sd)
84 return sd->s_flags & SYSFS_TYPE_MASK; 88 return sd->s_flags & SYSFS_TYPE_MASK;
85} 89}
86 90
91#ifdef CONFIG_DEBUG_LOCK_ALLOC
92#define sysfs_dirent_init_lockdep(sd) \
93do { \
94 static struct lock_class_key __key; \
95 \
96 lockdep_init_map(&sd->dep_map, "s_active", &__key, 0); \
97} while(0)
98#else
99#define sysfs_dirent_init_lockdep(sd) do {} while(0)
100#endif
101
87/* 102/*
88 * Context structure to be used while adding/removing nodes. 103 * Context structure to be used while adding/removing nodes.
89 */ 104 */
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c
index 618c2701d3a7..e5a3d8e96bb7 100644
--- a/fs/ubifs/gc.c
+++ b/fs/ubifs/gc.c
@@ -54,6 +54,7 @@
54 */ 54 */
55 55
56#include <linux/pagemap.h> 56#include <linux/pagemap.h>
57#include <linux/list_sort.h>
57#include "ubifs.h" 58#include "ubifs.h"
58 59
59/* 60/*
@@ -108,101 +109,6 @@ static int switch_gc_head(struct ubifs_info *c)
108} 109}
109 110
110/** 111/**
111 * list_sort - sort a list.
112 * @priv: private data, passed to @cmp
113 * @head: the list to sort
114 * @cmp: the elements comparison function
115 *
116 * This function has been implemented by Mark J Roberts <mjr@znex.org>. It
117 * implements "merge sort" which has O(nlog(n)) complexity. The list is sorted
118 * in ascending order.
119 *
120 * The comparison function @cmp is supposed to return a negative value if @a is
121 * than @b, and a positive value if @a is greater than @b. If @a and @b are
122 * equivalent, then it does not matter what this function returns.
123 */
124static void list_sort(void *priv, struct list_head *head,
125 int (*cmp)(void *priv, struct list_head *a,
126 struct list_head *b))
127{
128 struct list_head *p, *q, *e, *list, *tail, *oldhead;
129 int insize, nmerges, psize, qsize, i;
130
131 if (list_empty(head))
132 return;
133
134 list = head->next;
135 list_del(head);
136 insize = 1;
137 for (;;) {
138 p = oldhead = list;
139 list = tail = NULL;
140 nmerges = 0;
141
142 while (p) {
143 nmerges++;
144 q = p;
145 psize = 0;
146 for (i = 0; i < insize; i++) {
147 psize++;
148 q = q->next == oldhead ? NULL : q->next;
149 if (!q)
150 break;
151 }
152
153 qsize = insize;
154 while (psize > 0 || (qsize > 0 && q)) {
155 if (!psize) {
156 e = q;
157 q = q->next;
158 qsize--;
159 if (q == oldhead)
160 q = NULL;
161 } else if (!qsize || !q) {
162 e = p;
163 p = p->next;
164 psize--;
165 if (p == oldhead)
166 p = NULL;
167 } else if (cmp(priv, p, q) <= 0) {
168 e = p;
169 p = p->next;
170 psize--;
171 if (p == oldhead)
172 p = NULL;
173 } else {
174 e = q;
175 q = q->next;
176 qsize--;
177 if (q == oldhead)
178 q = NULL;
179 }
180 if (tail)
181 tail->next = e;
182 else
183 list = e;
184 e->prev = tail;
185 tail = e;
186 }
187 p = q;
188 }
189
190 tail->next = list;
191 list->prev = tail;
192
193 if (nmerges <= 1)
194 break;
195
196 insize *= 2;
197 }
198
199 head->next = list;
200 head->prev = list->prev;
201 list->prev->next = head;
202 list->prev = head;
203}
204
205/**
206 * data_nodes_cmp - compare 2 data nodes. 112 * data_nodes_cmp - compare 2 data nodes.
207 * @priv: UBIFS file-system description object 113 * @priv: UBIFS file-system description object
208 * @a: first data node 114 * @a: first data node
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c
index 2512125dfa7c..883ca5ab8af5 100644
--- a/fs/xfs/linux-2.6/xfs_acl.c
+++ b/fs/xfs/linux-2.6/xfs_acl.c
@@ -251,8 +251,9 @@ xfs_set_mode(struct inode *inode, mode_t mode)
251 if (mode != inode->i_mode) { 251 if (mode != inode->i_mode) {
252 struct iattr iattr; 252 struct iattr iattr;
253 253
254 iattr.ia_valid = ATTR_MODE; 254 iattr.ia_valid = ATTR_MODE | ATTR_CTIME;
255 iattr.ia_mode = mode; 255 iattr.ia_mode = mode;
256 iattr.ia_ctime = current_fs_time(inode->i_sb);
256 257
257 error = -xfs_setattr(XFS_I(inode), &iattr, XFS_ATTR_NOACL); 258 error = -xfs_setattr(XFS_I(inode), &iattr, XFS_ATTR_NOACL);
258 } 259 }
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 09783cc444ac..77414db10dc2 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -954,16 +954,14 @@ xfs_fs_destroy_inode(
954 ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM)); 954 ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM));
955 955
956 /* 956 /*
957 * If we have nothing to flush with this inode then complete the 957 * We always use background reclaim here because even if the
958 * teardown now, otherwise delay the flush operation. 958 * inode is clean, it still may be under IO and hence we have
959 * to take the flush lock. The background reclaim path handles
960 * this more efficiently than we can here, so simply let background
961 * reclaim tear down all inodes.
959 */ 962 */
960 if (!xfs_inode_clean(ip)) {
961 xfs_inode_set_reclaim_tag(ip);
962 return;
963 }
964
965out_reclaim: 963out_reclaim:
966 xfs_ireclaim(ip); 964 xfs_inode_set_reclaim_tag(ip);
967} 965}
968 966
969/* 967/*
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 6fed97a8cd3e..1f5e4bb5e970 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -65,7 +65,6 @@ xfs_inode_ag_lookup(
65 * as the tree is sparse and a gang lookup walks to find 65 * as the tree is sparse and a gang lookup walks to find
66 * the number of objects requested. 66 * the number of objects requested.
67 */ 67 */
68 read_lock(&pag->pag_ici_lock);
69 if (tag == XFS_ICI_NO_TAG) { 68 if (tag == XFS_ICI_NO_TAG) {
70 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, 69 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
71 (void **)&ip, *first_index, 1); 70 (void **)&ip, *first_index, 1);
@@ -74,7 +73,7 @@ xfs_inode_ag_lookup(
74 (void **)&ip, *first_index, 1, tag); 73 (void **)&ip, *first_index, 1, tag);
75 } 74 }
76 if (!nr_found) 75 if (!nr_found)
77 goto unlock; 76 return NULL;
78 77
79 /* 78 /*
80 * Update the index for the next lookup. Catch overflows 79 * Update the index for the next lookup. Catch overflows
@@ -84,13 +83,8 @@ xfs_inode_ag_lookup(
84 */ 83 */
85 *first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); 84 *first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
86 if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) 85 if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
87 goto unlock; 86 return NULL;
88
89 return ip; 87 return ip;
90
91unlock:
92 read_unlock(&pag->pag_ici_lock);
93 return NULL;
94} 88}
95 89
96STATIC int 90STATIC int
@@ -100,7 +94,8 @@ xfs_inode_ag_walk(
100 int (*execute)(struct xfs_inode *ip, 94 int (*execute)(struct xfs_inode *ip,
101 struct xfs_perag *pag, int flags), 95 struct xfs_perag *pag, int flags),
102 int flags, 96 int flags,
103 int tag) 97 int tag,
98 int exclusive)
104{ 99{
105 struct xfs_perag *pag = &mp->m_perag[ag]; 100 struct xfs_perag *pag = &mp->m_perag[ag];
106 uint32_t first_index; 101 uint32_t first_index;
@@ -114,10 +109,20 @@ restart:
114 int error = 0; 109 int error = 0;
115 xfs_inode_t *ip; 110 xfs_inode_t *ip;
116 111
112 if (exclusive)
113 write_lock(&pag->pag_ici_lock);
114 else
115 read_lock(&pag->pag_ici_lock);
117 ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag); 116 ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag);
118 if (!ip) 117 if (!ip) {
118 if (exclusive)
119 write_unlock(&pag->pag_ici_lock);
120 else
121 read_unlock(&pag->pag_ici_lock);
119 break; 122 break;
123 }
120 124
125 /* execute releases pag->pag_ici_lock */
121 error = execute(ip, pag, flags); 126 error = execute(ip, pag, flags);
122 if (error == EAGAIN) { 127 if (error == EAGAIN) {
123 skipped++; 128 skipped++;
@@ -125,9 +130,8 @@ restart:
125 } 130 }
126 if (error) 131 if (error)
127 last_error = error; 132 last_error = error;
128 /* 133
129 * bail out if the filesystem is corrupted. 134 /* bail out if the filesystem is corrupted. */
130 */
131 if (error == EFSCORRUPTED) 135 if (error == EFSCORRUPTED)
132 break; 136 break;
133 137
@@ -148,7 +152,8 @@ xfs_inode_ag_iterator(
148 int (*execute)(struct xfs_inode *ip, 152 int (*execute)(struct xfs_inode *ip,
149 struct xfs_perag *pag, int flags), 153 struct xfs_perag *pag, int flags),
150 int flags, 154 int flags,
151 int tag) 155 int tag,
156 int exclusive)
152{ 157{
153 int error = 0; 158 int error = 0;
154 int last_error = 0; 159 int last_error = 0;
@@ -157,7 +162,8 @@ xfs_inode_ag_iterator(
157 for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) { 162 for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
158 if (!mp->m_perag[ag].pag_ici_init) 163 if (!mp->m_perag[ag].pag_ici_init)
159 continue; 164 continue;
160 error = xfs_inode_ag_walk(mp, ag, execute, flags, tag); 165 error = xfs_inode_ag_walk(mp, ag, execute, flags, tag,
166 exclusive);
161 if (error) { 167 if (error) {
162 last_error = error; 168 last_error = error;
163 if (error == EFSCORRUPTED) 169 if (error == EFSCORRUPTED)
@@ -174,30 +180,31 @@ xfs_sync_inode_valid(
174 struct xfs_perag *pag) 180 struct xfs_perag *pag)
175{ 181{
176 struct inode *inode = VFS_I(ip); 182 struct inode *inode = VFS_I(ip);
183 int error = EFSCORRUPTED;
177 184
178 /* nothing to sync during shutdown */ 185 /* nothing to sync during shutdown */
179 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { 186 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
180 read_unlock(&pag->pag_ici_lock); 187 goto out_unlock;
181 return EFSCORRUPTED;
182 }
183 188
184 /* 189 /* avoid new or reclaimable inodes. Leave for reclaim code to flush */
185 * If we can't get a reference on the inode, it must be in reclaim. 190 error = ENOENT;
186 * Leave it for the reclaim code to flush. Also avoid inodes that 191 if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM))
187 * haven't been fully initialised. 192 goto out_unlock;
188 */
189 if (!igrab(inode)) {
190 read_unlock(&pag->pag_ici_lock);
191 return ENOENT;
192 }
193 read_unlock(&pag->pag_ici_lock);
194 193
195 if (is_bad_inode(inode) || xfs_iflags_test(ip, XFS_INEW)) { 194 /* If we can't grab the inode, it must on it's way to reclaim. */
195 if (!igrab(inode))
196 goto out_unlock;
197
198 if (is_bad_inode(inode)) {
196 IRELE(ip); 199 IRELE(ip);
197 return ENOENT; 200 goto out_unlock;
198 } 201 }
199 202
200 return 0; 203 /* inode is valid */
204 error = 0;
205out_unlock:
206 read_unlock(&pag->pag_ici_lock);
207 return error;
201} 208}
202 209
203STATIC int 210STATIC int
@@ -282,7 +289,7 @@ xfs_sync_data(
282 ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0); 289 ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
283 290
284 error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags, 291 error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags,
285 XFS_ICI_NO_TAG); 292 XFS_ICI_NO_TAG, 0);
286 if (error) 293 if (error)
287 return XFS_ERROR(error); 294 return XFS_ERROR(error);
288 295
@@ -304,7 +311,7 @@ xfs_sync_attr(
304 ASSERT((flags & ~SYNC_WAIT) == 0); 311 ASSERT((flags & ~SYNC_WAIT) == 0);
305 312
306 return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags, 313 return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags,
307 XFS_ICI_NO_TAG); 314 XFS_ICI_NO_TAG, 0);
308} 315}
309 316
310STATIC int 317STATIC int
@@ -664,60 +671,6 @@ xfs_syncd_stop(
664 kthread_stop(mp->m_sync_task); 671 kthread_stop(mp->m_sync_task);
665} 672}
666 673
667STATIC int
668xfs_reclaim_inode(
669 xfs_inode_t *ip,
670 int sync_mode)
671{
672 xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino);
673
674 /* The hash lock here protects a thread in xfs_iget_core from
675 * racing with us on linking the inode back with a vnode.
676 * Once we have the XFS_IRECLAIM flag set it will not touch
677 * us.
678 */
679 write_lock(&pag->pag_ici_lock);
680 spin_lock(&ip->i_flags_lock);
681 if (__xfs_iflags_test(ip, XFS_IRECLAIM) ||
682 !__xfs_iflags_test(ip, XFS_IRECLAIMABLE)) {
683 spin_unlock(&ip->i_flags_lock);
684 write_unlock(&pag->pag_ici_lock);
685 return -EAGAIN;
686 }
687 __xfs_iflags_set(ip, XFS_IRECLAIM);
688 spin_unlock(&ip->i_flags_lock);
689 write_unlock(&pag->pag_ici_lock);
690 xfs_put_perag(ip->i_mount, pag);
691
692 /*
693 * If the inode is still dirty, then flush it out. If the inode
694 * is not in the AIL, then it will be OK to flush it delwri as
695 * long as xfs_iflush() does not keep any references to the inode.
696 * We leave that decision up to xfs_iflush() since it has the
697 * knowledge of whether it's OK to simply do a delwri flush of
698 * the inode or whether we need to wait until the inode is
699 * pulled from the AIL.
700 * We get the flush lock regardless, though, just to make sure
701 * we don't free it while it is being flushed.
702 */
703 xfs_ilock(ip, XFS_ILOCK_EXCL);
704 xfs_iflock(ip);
705
706 /*
707 * In the case of a forced shutdown we rely on xfs_iflush() to
708 * wait for the inode to be unpinned before returning an error.
709 */
710 if (!is_bad_inode(VFS_I(ip)) && xfs_iflush(ip, sync_mode) == 0) {
711 /* synchronize with xfs_iflush_done */
712 xfs_iflock(ip);
713 xfs_ifunlock(ip);
714 }
715
716 xfs_iunlock(ip, XFS_ILOCK_EXCL);
717 xfs_ireclaim(ip);
718 return 0;
719}
720
721void 674void
722__xfs_inode_set_reclaim_tag( 675__xfs_inode_set_reclaim_tag(
723 struct xfs_perag *pag, 676 struct xfs_perag *pag,
@@ -760,19 +713,55 @@ __xfs_inode_clear_reclaim_tag(
760} 713}
761 714
762STATIC int 715STATIC int
763xfs_reclaim_inode_now( 716xfs_reclaim_inode(
764 struct xfs_inode *ip, 717 struct xfs_inode *ip,
765 struct xfs_perag *pag, 718 struct xfs_perag *pag,
766 int flags) 719 int sync_mode)
767{ 720{
768 /* ignore if already under reclaim */ 721 /*
769 if (xfs_iflags_test(ip, XFS_IRECLAIM)) { 722 * The radix tree lock here protects a thread in xfs_iget from racing
770 read_unlock(&pag->pag_ici_lock); 723 * with us starting reclaim on the inode. Once we have the
724 * XFS_IRECLAIM flag set it will not touch us.
725 */
726 spin_lock(&ip->i_flags_lock);
727 ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE));
728 if (__xfs_iflags_test(ip, XFS_IRECLAIM)) {
729 /* ignore as it is already under reclaim */
730 spin_unlock(&ip->i_flags_lock);
731 write_unlock(&pag->pag_ici_lock);
771 return 0; 732 return 0;
772 } 733 }
773 read_unlock(&pag->pag_ici_lock); 734 __xfs_iflags_set(ip, XFS_IRECLAIM);
735 spin_unlock(&ip->i_flags_lock);
736 write_unlock(&pag->pag_ici_lock);
774 737
775 return xfs_reclaim_inode(ip, flags); 738 /*
739 * If the inode is still dirty, then flush it out. If the inode
740 * is not in the AIL, then it will be OK to flush it delwri as
741 * long as xfs_iflush() does not keep any references to the inode.
742 * We leave that decision up to xfs_iflush() since it has the
743 * knowledge of whether it's OK to simply do a delwri flush of
744 * the inode or whether we need to wait until the inode is
745 * pulled from the AIL.
746 * We get the flush lock regardless, though, just to make sure
747 * we don't free it while it is being flushed.
748 */
749 xfs_ilock(ip, XFS_ILOCK_EXCL);
750 xfs_iflock(ip);
751
752 /*
753 * In the case of a forced shutdown we rely on xfs_iflush() to
754 * wait for the inode to be unpinned before returning an error.
755 */
756 if (!is_bad_inode(VFS_I(ip)) && xfs_iflush(ip, sync_mode) == 0) {
757 /* synchronize with xfs_iflush_done */
758 xfs_iflock(ip);
759 xfs_ifunlock(ip);
760 }
761
762 xfs_iunlock(ip, XFS_ILOCK_EXCL);
763 xfs_ireclaim(ip);
764 return 0;
776} 765}
777 766
778int 767int
@@ -780,6 +769,6 @@ xfs_reclaim_inodes(
780 xfs_mount_t *mp, 769 xfs_mount_t *mp,
781 int mode) 770 int mode)
782{ 771{
783 return xfs_inode_ag_iterator(mp, xfs_reclaim_inode_now, mode, 772 return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode,
784 XFS_ICI_RECLAIM_TAG); 773 XFS_ICI_RECLAIM_TAG, 1);
785} 774}
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index a500b4d91835..ea932b43335d 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -54,6 +54,6 @@ void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
54int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag); 54int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag);
55int xfs_inode_ag_iterator(struct xfs_mount *mp, 55int xfs_inode_ag_iterator(struct xfs_mount *mp,
56 int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), 56 int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
57 int flags, int tag); 57 int flags, int tag, int write_lock);
58 58
59#endif 59#endif
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index c40834bdee58..c22a608321a3 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -33,51 +33,55 @@ struct xfs_dquot;
33struct xlog_ticket; 33struct xlog_ticket;
34struct log; 34struct log;
35 35
36DECLARE_EVENT_CLASS(xfs_attr_list_class,
37 TP_PROTO(struct xfs_attr_list_context *ctx),
38 TP_ARGS(ctx),
39 TP_STRUCT__entry(
40 __field(dev_t, dev)
41 __field(xfs_ino_t, ino)
42 __field(u32, hashval)
43 __field(u32, blkno)
44 __field(u32, offset)
45 __field(void *, alist)
46 __field(int, bufsize)
47 __field(int, count)
48 __field(int, firstu)
49 __field(int, dupcnt)
50 __field(int, flags)
51 ),
52 TP_fast_assign(
53 __entry->dev = VFS_I(ctx->dp)->i_sb->s_dev;
54 __entry->ino = ctx->dp->i_ino;
55 __entry->hashval = ctx->cursor->hashval;
56 __entry->blkno = ctx->cursor->blkno;
57 __entry->offset = ctx->cursor->offset;
58 __entry->alist = ctx->alist;
59 __entry->bufsize = ctx->bufsize;
60 __entry->count = ctx->count;
61 __entry->firstu = ctx->firstu;
62 __entry->flags = ctx->flags;
63 ),
64 TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u "
65 "alist 0x%p size %u count %u firstu %u flags %d %s",
66 MAJOR(__entry->dev), MINOR(__entry->dev),
67 __entry->ino,
68 __entry->hashval,
69 __entry->blkno,
70 __entry->offset,
71 __entry->dupcnt,
72 __entry->alist,
73 __entry->bufsize,
74 __entry->count,
75 __entry->firstu,
76 __entry->flags,
77 __print_flags(__entry->flags, "|", XFS_ATTR_FLAGS)
78 )
79)
80
36#define DEFINE_ATTR_LIST_EVENT(name) \ 81#define DEFINE_ATTR_LIST_EVENT(name) \
37TRACE_EVENT(name, \ 82DEFINE_EVENT(xfs_attr_list_class, name, \
38 TP_PROTO(struct xfs_attr_list_context *ctx), \ 83 TP_PROTO(struct xfs_attr_list_context *ctx), \
39 TP_ARGS(ctx), \ 84 TP_ARGS(ctx))
40 TP_STRUCT__entry( \
41 __field(dev_t, dev) \
42 __field(xfs_ino_t, ino) \
43 __field(u32, hashval) \
44 __field(u32, blkno) \
45 __field(u32, offset) \
46 __field(void *, alist) \
47 __field(int, bufsize) \
48 __field(int, count) \
49 __field(int, firstu) \
50 __field(int, dupcnt) \
51 __field(int, flags) \
52 ), \
53 TP_fast_assign( \
54 __entry->dev = VFS_I(ctx->dp)->i_sb->s_dev; \
55 __entry->ino = ctx->dp->i_ino; \
56 __entry->hashval = ctx->cursor->hashval; \
57 __entry->blkno = ctx->cursor->blkno; \
58 __entry->offset = ctx->cursor->offset; \
59 __entry->alist = ctx->alist; \
60 __entry->bufsize = ctx->bufsize; \
61 __entry->count = ctx->count; \
62 __entry->firstu = ctx->firstu; \
63 __entry->flags = ctx->flags; \
64 ), \
65 TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u " \
66 "alist 0x%p size %u count %u firstu %u flags %d %s", \
67 MAJOR(__entry->dev), MINOR(__entry->dev), \
68 __entry->ino, \
69 __entry->hashval, \
70 __entry->blkno, \
71 __entry->offset, \
72 __entry->dupcnt, \
73 __entry->alist, \
74 __entry->bufsize, \
75 __entry->count, \
76 __entry->firstu, \
77 __entry->flags, \
78 __print_flags(__entry->flags, "|", XFS_ATTR_FLAGS) \
79 ) \
80)
81DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf); 85DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf);
82DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf_all); 86DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf_all);
83DEFINE_ATTR_LIST_EVENT(xfs_attr_list_leaf); 87DEFINE_ATTR_LIST_EVENT(xfs_attr_list_leaf);
@@ -178,91 +182,99 @@ TRACE_EVENT(xfs_iext_insert,
178 (char *)__entry->caller_ip) 182 (char *)__entry->caller_ip)
179); 183);
180 184
185DECLARE_EVENT_CLASS(xfs_bmap_class,
186 TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state,
187 unsigned long caller_ip),
188 TP_ARGS(ip, idx, state, caller_ip),
189 TP_STRUCT__entry(
190 __field(dev_t, dev)
191 __field(xfs_ino_t, ino)
192 __field(xfs_extnum_t, idx)
193 __field(xfs_fileoff_t, startoff)
194 __field(xfs_fsblock_t, startblock)
195 __field(xfs_filblks_t, blockcount)
196 __field(xfs_exntst_t, state)
197 __field(int, bmap_state)
198 __field(unsigned long, caller_ip)
199 ),
200 TP_fast_assign(
201 struct xfs_ifork *ifp = (state & BMAP_ATTRFORK) ?
202 ip->i_afp : &ip->i_df;
203 struct xfs_bmbt_irec r;
204
205 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &r);
206 __entry->dev = VFS_I(ip)->i_sb->s_dev;
207 __entry->ino = ip->i_ino;
208 __entry->idx = idx;
209 __entry->startoff = r.br_startoff;
210 __entry->startblock = r.br_startblock;
211 __entry->blockcount = r.br_blockcount;
212 __entry->state = r.br_state;
213 __entry->bmap_state = state;
214 __entry->caller_ip = caller_ip;
215 ),
216 TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
217 "offset %lld block %s count %lld flag %d caller %pf",
218 MAJOR(__entry->dev), MINOR(__entry->dev),
219 __entry->ino,
220 __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
221 (long)__entry->idx,
222 __entry->startoff,
223 xfs_fmtfsblock(__entry->startblock),
224 __entry->blockcount,
225 __entry->state,
226 (char *)__entry->caller_ip)
227)
228
181#define DEFINE_BMAP_EVENT(name) \ 229#define DEFINE_BMAP_EVENT(name) \
182TRACE_EVENT(name, \ 230DEFINE_EVENT(xfs_bmap_class, name, \
183 TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state, \ 231 TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state, \
184 unsigned long caller_ip), \ 232 unsigned long caller_ip), \
185 TP_ARGS(ip, idx, state, caller_ip), \ 233 TP_ARGS(ip, idx, state, caller_ip))
186 TP_STRUCT__entry( \
187 __field(dev_t, dev) \
188 __field(xfs_ino_t, ino) \
189 __field(xfs_extnum_t, idx) \
190 __field(xfs_fileoff_t, startoff) \
191 __field(xfs_fsblock_t, startblock) \
192 __field(xfs_filblks_t, blockcount) \
193 __field(xfs_exntst_t, state) \
194 __field(int, bmap_state) \
195 __field(unsigned long, caller_ip) \
196 ), \
197 TP_fast_assign( \
198 struct xfs_ifork *ifp = (state & BMAP_ATTRFORK) ? \
199 ip->i_afp : &ip->i_df; \
200 struct xfs_bmbt_irec r; \
201 \
202 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &r); \
203 __entry->dev = VFS_I(ip)->i_sb->s_dev; \
204 __entry->ino = ip->i_ino; \
205 __entry->idx = idx; \
206 __entry->startoff = r.br_startoff; \
207 __entry->startblock = r.br_startblock; \
208 __entry->blockcount = r.br_blockcount; \
209 __entry->state = r.br_state; \
210 __entry->bmap_state = state; \
211 __entry->caller_ip = caller_ip; \
212 ), \
213 TP_printk("dev %d:%d ino 0x%llx state %s idx %ld " \
214 "offset %lld block %s count %lld flag %d caller %pf", \
215 MAJOR(__entry->dev), MINOR(__entry->dev), \
216 __entry->ino, \
217 __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS), \
218 (long)__entry->idx, \
219 __entry->startoff, \
220 xfs_fmtfsblock(__entry->startblock), \
221 __entry->blockcount, \
222 __entry->state, \
223 (char *)__entry->caller_ip) \
224)
225
226DEFINE_BMAP_EVENT(xfs_iext_remove); 234DEFINE_BMAP_EVENT(xfs_iext_remove);
227DEFINE_BMAP_EVENT(xfs_bmap_pre_update); 235DEFINE_BMAP_EVENT(xfs_bmap_pre_update);
228DEFINE_BMAP_EVENT(xfs_bmap_post_update); 236DEFINE_BMAP_EVENT(xfs_bmap_post_update);
229DEFINE_BMAP_EVENT(xfs_extlist); 237DEFINE_BMAP_EVENT(xfs_extlist);
230 238
231#define DEFINE_BUF_EVENT(tname) \ 239DECLARE_EVENT_CLASS(xfs_buf_class,
232TRACE_EVENT(tname, \ 240 TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip),
233 TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip), \ 241 TP_ARGS(bp, caller_ip),
234 TP_ARGS(bp, caller_ip), \ 242 TP_STRUCT__entry(
235 TP_STRUCT__entry( \ 243 __field(dev_t, dev)
236 __field(dev_t, dev) \ 244 __field(xfs_daddr_t, bno)
237 __field(xfs_daddr_t, bno) \ 245 __field(size_t, buffer_length)
238 __field(size_t, buffer_length) \ 246 __field(int, hold)
239 __field(int, hold) \ 247 __field(int, pincount)
240 __field(int, pincount) \ 248 __field(unsigned, lockval)
241 __field(unsigned, lockval) \ 249 __field(unsigned, flags)
242 __field(unsigned, flags) \ 250 __field(unsigned long, caller_ip)
243 __field(unsigned long, caller_ip) \ 251 ),
244 ), \ 252 TP_fast_assign(
245 TP_fast_assign( \ 253 __entry->dev = bp->b_target->bt_dev;
246 __entry->dev = bp->b_target->bt_dev; \ 254 __entry->bno = bp->b_bn;
247 __entry->bno = bp->b_bn; \ 255 __entry->buffer_length = bp->b_buffer_length;
248 __entry->buffer_length = bp->b_buffer_length; \ 256 __entry->hold = atomic_read(&bp->b_hold);
249 __entry->hold = atomic_read(&bp->b_hold); \ 257 __entry->pincount = atomic_read(&bp->b_pin_count);
250 __entry->pincount = atomic_read(&bp->b_pin_count); \ 258 __entry->lockval = xfs_buf_lock_value(bp);
251 __entry->lockval = xfs_buf_lock_value(bp); \ 259 __entry->flags = bp->b_flags;
252 __entry->flags = bp->b_flags; \ 260 __entry->caller_ip = caller_ip;
253 __entry->caller_ip = caller_ip; \ 261 ),
254 ), \ 262 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
255 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " \ 263 "lock %d flags %s caller %pf",
256 "lock %d flags %s caller %pf", \ 264 MAJOR(__entry->dev), MINOR(__entry->dev),
257 MAJOR(__entry->dev), MINOR(__entry->dev), \ 265 (unsigned long long)__entry->bno,
258 (unsigned long long)__entry->bno, \ 266 __entry->buffer_length,
259 __entry->buffer_length, \ 267 __entry->hold,
260 __entry->hold, \ 268 __entry->pincount,
261 __entry->pincount, \ 269 __entry->lockval,
262 __entry->lockval, \ 270 __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
263 __print_flags(__entry->flags, "|", XFS_BUF_FLAGS), \ 271 (void *)__entry->caller_ip)
264 (void *)__entry->caller_ip) \
265) 272)
273
274#define DEFINE_BUF_EVENT(name) \
275DEFINE_EVENT(xfs_buf_class, name, \
276 TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip), \
277 TP_ARGS(bp, caller_ip))
266DEFINE_BUF_EVENT(xfs_buf_init); 278DEFINE_BUF_EVENT(xfs_buf_init);
267DEFINE_BUF_EVENT(xfs_buf_free); 279DEFINE_BUF_EVENT(xfs_buf_free);
268DEFINE_BUF_EVENT(xfs_buf_hold); 280DEFINE_BUF_EVENT(xfs_buf_hold);
@@ -299,41 +311,45 @@ DEFINE_BUF_EVENT(xfs_reset_dqcounts);
299DEFINE_BUF_EVENT(xfs_inode_item_push); 311DEFINE_BUF_EVENT(xfs_inode_item_push);
300 312
301/* pass flags explicitly */ 313/* pass flags explicitly */
302#define DEFINE_BUF_FLAGS_EVENT(tname) \ 314DECLARE_EVENT_CLASS(xfs_buf_flags_class,
303TRACE_EVENT(tname, \ 315 TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip),
304 TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip), \ 316 TP_ARGS(bp, flags, caller_ip),
305 TP_ARGS(bp, flags, caller_ip), \ 317 TP_STRUCT__entry(
306 TP_STRUCT__entry( \ 318 __field(dev_t, dev)
307 __field(dev_t, dev) \ 319 __field(xfs_daddr_t, bno)
308 __field(xfs_daddr_t, bno) \ 320 __field(size_t, buffer_length)
309 __field(size_t, buffer_length) \ 321 __field(int, hold)
310 __field(int, hold) \ 322 __field(int, pincount)
311 __field(int, pincount) \ 323 __field(unsigned, lockval)
312 __field(unsigned, lockval) \ 324 __field(unsigned, flags)
313 __field(unsigned, flags) \ 325 __field(unsigned long, caller_ip)
314 __field(unsigned long, caller_ip) \ 326 ),
315 ), \ 327 TP_fast_assign(
316 TP_fast_assign( \ 328 __entry->dev = bp->b_target->bt_dev;
317 __entry->dev = bp->b_target->bt_dev; \ 329 __entry->bno = bp->b_bn;
318 __entry->bno = bp->b_bn; \ 330 __entry->buffer_length = bp->b_buffer_length;
319 __entry->buffer_length = bp->b_buffer_length; \ 331 __entry->flags = flags;
320 __entry->flags = flags; \ 332 __entry->hold = atomic_read(&bp->b_hold);
321 __entry->hold = atomic_read(&bp->b_hold); \ 333 __entry->pincount = atomic_read(&bp->b_pin_count);
322 __entry->pincount = atomic_read(&bp->b_pin_count); \ 334 __entry->lockval = xfs_buf_lock_value(bp);
323 __entry->lockval = xfs_buf_lock_value(bp); \ 335 __entry->caller_ip = caller_ip;
324 __entry->caller_ip = caller_ip; \ 336 ),
325 ), \ 337 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
326 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " \ 338 "lock %d flags %s caller %pf",
327 "lock %d flags %s caller %pf", \ 339 MAJOR(__entry->dev), MINOR(__entry->dev),
328 MAJOR(__entry->dev), MINOR(__entry->dev), \ 340 (unsigned long long)__entry->bno,
329 (unsigned long long)__entry->bno, \ 341 __entry->buffer_length,
330 __entry->buffer_length, \ 342 __entry->hold,
331 __entry->hold, \ 343 __entry->pincount,
332 __entry->pincount, \ 344 __entry->lockval,
333 __entry->lockval, \ 345 __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
334 __print_flags(__entry->flags, "|", XFS_BUF_FLAGS), \ 346 (void *)__entry->caller_ip)
335 (void *)__entry->caller_ip) \
336) 347)
348
349#define DEFINE_BUF_FLAGS_EVENT(name) \
350DEFINE_EVENT(xfs_buf_flags_class, name, \
351 TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip), \
352 TP_ARGS(bp, flags, caller_ip))
337DEFINE_BUF_FLAGS_EVENT(xfs_buf_find); 353DEFINE_BUF_FLAGS_EVENT(xfs_buf_find);
338DEFINE_BUF_FLAGS_EVENT(xfs_buf_get); 354DEFINE_BUF_FLAGS_EVENT(xfs_buf_get);
339DEFINE_BUF_FLAGS_EVENT(xfs_buf_read); 355DEFINE_BUF_FLAGS_EVENT(xfs_buf_read);
@@ -376,55 +392,58 @@ TRACE_EVENT(xfs_buf_ioerror,
376 (void *)__entry->caller_ip) 392 (void *)__entry->caller_ip)
377); 393);
378 394
379#define DEFINE_BUF_ITEM_EVENT(tname) \ 395DECLARE_EVENT_CLASS(xfs_buf_item_class,
380TRACE_EVENT(tname, \ 396 TP_PROTO(struct xfs_buf_log_item *bip),
381 TP_PROTO(struct xfs_buf_log_item *bip), \ 397 TP_ARGS(bip),
382 TP_ARGS(bip), \ 398 TP_STRUCT__entry(
383 TP_STRUCT__entry( \ 399 __field(dev_t, dev)
384 __field(dev_t, dev) \ 400 __field(xfs_daddr_t, buf_bno)
385 __field(xfs_daddr_t, buf_bno) \ 401 __field(size_t, buf_len)
386 __field(size_t, buf_len) \ 402 __field(int, buf_hold)
387 __field(int, buf_hold) \ 403 __field(int, buf_pincount)
388 __field(int, buf_pincount) \ 404 __field(int, buf_lockval)
389 __field(int, buf_lockval) \ 405 __field(unsigned, buf_flags)
390 __field(unsigned, buf_flags) \ 406 __field(unsigned, bli_recur)
391 __field(unsigned, bli_recur) \ 407 __field(int, bli_refcount)
392 __field(int, bli_refcount) \ 408 __field(unsigned, bli_flags)
393 __field(unsigned, bli_flags) \ 409 __field(void *, li_desc)
394 __field(void *, li_desc) \ 410 __field(unsigned, li_flags)
395 __field(unsigned, li_flags) \ 411 ),
396 ), \ 412 TP_fast_assign(
397 TP_fast_assign( \ 413 __entry->dev = bip->bli_buf->b_target->bt_dev;
398 __entry->dev = bip->bli_buf->b_target->bt_dev; \ 414 __entry->bli_flags = bip->bli_flags;
399 __entry->bli_flags = bip->bli_flags; \ 415 __entry->bli_recur = bip->bli_recur;
400 __entry->bli_recur = bip->bli_recur; \ 416 __entry->bli_refcount = atomic_read(&bip->bli_refcount);
401 __entry->bli_refcount = atomic_read(&bip->bli_refcount); \ 417 __entry->buf_bno = bip->bli_buf->b_bn;
402 __entry->buf_bno = bip->bli_buf->b_bn; \ 418 __entry->buf_len = bip->bli_buf->b_buffer_length;
403 __entry->buf_len = bip->bli_buf->b_buffer_length; \ 419 __entry->buf_flags = bip->bli_buf->b_flags;
404 __entry->buf_flags = bip->bli_buf->b_flags; \ 420 __entry->buf_hold = atomic_read(&bip->bli_buf->b_hold);
405 __entry->buf_hold = atomic_read(&bip->bli_buf->b_hold); \ 421 __entry->buf_pincount = atomic_read(&bip->bli_buf->b_pin_count);
406 __entry->buf_pincount = \ 422 __entry->buf_lockval = xfs_buf_lock_value(bip->bli_buf);
407 atomic_read(&bip->bli_buf->b_pin_count); \ 423 __entry->li_desc = bip->bli_item.li_desc;
408 __entry->buf_lockval = xfs_buf_lock_value(bip->bli_buf); \ 424 __entry->li_flags = bip->bli_item.li_flags;
409 __entry->li_desc = bip->bli_item.li_desc; \ 425 ),
410 __entry->li_flags = bip->bli_item.li_flags; \ 426 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
411 ), \ 427 "lock %d flags %s recur %d refcount %d bliflags %s "
412 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " \ 428 "lidesc 0x%p liflags %s",
413 "lock %d flags %s recur %d refcount %d bliflags %s " \ 429 MAJOR(__entry->dev), MINOR(__entry->dev),
414 "lidesc 0x%p liflags %s", \ 430 (unsigned long long)__entry->buf_bno,
415 MAJOR(__entry->dev), MINOR(__entry->dev), \ 431 __entry->buf_len,
416 (unsigned long long)__entry->buf_bno, \ 432 __entry->buf_hold,
417 __entry->buf_len, \ 433 __entry->buf_pincount,
418 __entry->buf_hold, \ 434 __entry->buf_lockval,
419 __entry->buf_pincount, \ 435 __print_flags(__entry->buf_flags, "|", XFS_BUF_FLAGS),
420 __entry->buf_lockval, \ 436 __entry->bli_recur,
421 __print_flags(__entry->buf_flags, "|", XFS_BUF_FLAGS), \ 437 __entry->bli_refcount,
422 __entry->bli_recur, \ 438 __print_flags(__entry->bli_flags, "|", XFS_BLI_FLAGS),
423 __entry->bli_refcount, \ 439 __entry->li_desc,
424 __print_flags(__entry->bli_flags, "|", XFS_BLI_FLAGS), \ 440 __print_flags(__entry->li_flags, "|", XFS_LI_FLAGS))
425 __entry->li_desc, \
426 __print_flags(__entry->li_flags, "|", XFS_LI_FLAGS)) \
427) 441)
442
443#define DEFINE_BUF_ITEM_EVENT(name) \
444DEFINE_EVENT(xfs_buf_item_class, name, \
445 TP_PROTO(struct xfs_buf_log_item *bip), \
446 TP_ARGS(bip))
428DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size); 447DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size);
429DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size_stale); 448DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size_stale);
430DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format); 449DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format);
@@ -450,78 +469,90 @@ DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold);
450DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release); 469DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release);
451DEFINE_BUF_ITEM_EVENT(xfs_trans_binval); 470DEFINE_BUF_ITEM_EVENT(xfs_trans_binval);
452 471
472DECLARE_EVENT_CLASS(xfs_lock_class,
473 TP_PROTO(struct xfs_inode *ip, unsigned lock_flags,
474 unsigned long caller_ip),
475 TP_ARGS(ip, lock_flags, caller_ip),
476 TP_STRUCT__entry(
477 __field(dev_t, dev)
478 __field(xfs_ino_t, ino)
479 __field(int, lock_flags)
480 __field(unsigned long, caller_ip)
481 ),
482 TP_fast_assign(
483 __entry->dev = VFS_I(ip)->i_sb->s_dev;
484 __entry->ino = ip->i_ino;
485 __entry->lock_flags = lock_flags;
486 __entry->caller_ip = caller_ip;
487 ),
488 TP_printk("dev %d:%d ino 0x%llx flags %s caller %pf",
489 MAJOR(__entry->dev), MINOR(__entry->dev),
490 __entry->ino,
491 __print_flags(__entry->lock_flags, "|", XFS_LOCK_FLAGS),
492 (void *)__entry->caller_ip)
493)
494
453#define DEFINE_LOCK_EVENT(name) \ 495#define DEFINE_LOCK_EVENT(name) \
454TRACE_EVENT(name, \ 496DEFINE_EVENT(xfs_lock_class, name, \
455 TP_PROTO(struct xfs_inode *ip, unsigned lock_flags, \ 497 TP_PROTO(struct xfs_inode *ip, unsigned lock_flags, \
456 unsigned long caller_ip), \ 498 unsigned long caller_ip), \
457 TP_ARGS(ip, lock_flags, caller_ip), \ 499 TP_ARGS(ip, lock_flags, caller_ip))
458 TP_STRUCT__entry( \
459 __field(dev_t, dev) \
460 __field(xfs_ino_t, ino) \
461 __field(int, lock_flags) \
462 __field(unsigned long, caller_ip) \
463 ), \
464 TP_fast_assign( \
465 __entry->dev = VFS_I(ip)->i_sb->s_dev; \
466 __entry->ino = ip->i_ino; \
467 __entry->lock_flags = lock_flags; \
468 __entry->caller_ip = caller_ip; \
469 ), \
470 TP_printk("dev %d:%d ino 0x%llx flags %s caller %pf", \
471 MAJOR(__entry->dev), MINOR(__entry->dev), \
472 __entry->ino, \
473 __print_flags(__entry->lock_flags, "|", XFS_LOCK_FLAGS), \
474 (void *)__entry->caller_ip) \
475)
476
477DEFINE_LOCK_EVENT(xfs_ilock); 500DEFINE_LOCK_EVENT(xfs_ilock);
478DEFINE_LOCK_EVENT(xfs_ilock_nowait); 501DEFINE_LOCK_EVENT(xfs_ilock_nowait);
479DEFINE_LOCK_EVENT(xfs_ilock_demote); 502DEFINE_LOCK_EVENT(xfs_ilock_demote);
480DEFINE_LOCK_EVENT(xfs_iunlock); 503DEFINE_LOCK_EVENT(xfs_iunlock);
481 504
505DECLARE_EVENT_CLASS(xfs_iget_class,
506 TP_PROTO(struct xfs_inode *ip),
507 TP_ARGS(ip),
508 TP_STRUCT__entry(
509 __field(dev_t, dev)
510 __field(xfs_ino_t, ino)
511 ),
512 TP_fast_assign(
513 __entry->dev = VFS_I(ip)->i_sb->s_dev;
514 __entry->ino = ip->i_ino;
515 ),
516 TP_printk("dev %d:%d ino 0x%llx",
517 MAJOR(__entry->dev), MINOR(__entry->dev),
518 __entry->ino)
519)
520
482#define DEFINE_IGET_EVENT(name) \ 521#define DEFINE_IGET_EVENT(name) \
483TRACE_EVENT(name, \ 522DEFINE_EVENT(xfs_iget_class, name, \
484 TP_PROTO(struct xfs_inode *ip), \ 523 TP_PROTO(struct xfs_inode *ip), \
485 TP_ARGS(ip), \ 524 TP_ARGS(ip))
486 TP_STRUCT__entry( \
487 __field(dev_t, dev) \
488 __field(xfs_ino_t, ino) \
489 ), \
490 TP_fast_assign( \
491 __entry->dev = VFS_I(ip)->i_sb->s_dev; \
492 __entry->ino = ip->i_ino; \
493 ), \
494 TP_printk("dev %d:%d ino 0x%llx", \
495 MAJOR(__entry->dev), MINOR(__entry->dev), \
496 __entry->ino) \
497)
498DEFINE_IGET_EVENT(xfs_iget_skip); 525DEFINE_IGET_EVENT(xfs_iget_skip);
499DEFINE_IGET_EVENT(xfs_iget_reclaim); 526DEFINE_IGET_EVENT(xfs_iget_reclaim);
500DEFINE_IGET_EVENT(xfs_iget_found); 527DEFINE_IGET_EVENT(xfs_iget_found);
501DEFINE_IGET_EVENT(xfs_iget_alloc); 528DEFINE_IGET_EVENT(xfs_iget_alloc);
502 529
530DECLARE_EVENT_CLASS(xfs_inode_class,
531 TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip),
532 TP_ARGS(ip, caller_ip),
533 TP_STRUCT__entry(
534 __field(dev_t, dev)
535 __field(xfs_ino_t, ino)
536 __field(int, count)
537 __field(unsigned long, caller_ip)
538 ),
539 TP_fast_assign(
540 __entry->dev = VFS_I(ip)->i_sb->s_dev;
541 __entry->ino = ip->i_ino;
542 __entry->count = atomic_read(&VFS_I(ip)->i_count);
543 __entry->caller_ip = caller_ip;
544 ),
545 TP_printk("dev %d:%d ino 0x%llx count %d caller %pf",
546 MAJOR(__entry->dev), MINOR(__entry->dev),
547 __entry->ino,
548 __entry->count,
549 (char *)__entry->caller_ip)
550)
551
503#define DEFINE_INODE_EVENT(name) \ 552#define DEFINE_INODE_EVENT(name) \
504TRACE_EVENT(name, \ 553DEFINE_EVENT(xfs_inode_class, name, \
505 TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \ 554 TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \
506 TP_ARGS(ip, caller_ip), \ 555 TP_ARGS(ip, caller_ip))
507 TP_STRUCT__entry( \
508 __field(dev_t, dev) \
509 __field(xfs_ino_t, ino) \
510 __field(int, count) \
511 __field(unsigned long, caller_ip) \
512 ), \
513 TP_fast_assign( \
514 __entry->dev = VFS_I(ip)->i_sb->s_dev; \
515 __entry->ino = ip->i_ino; \
516 __entry->count = atomic_read(&VFS_I(ip)->i_count); \
517 __entry->caller_ip = caller_ip; \
518 ), \
519 TP_printk("dev %d:%d ino 0x%llx count %d caller %pf", \
520 MAJOR(__entry->dev), MINOR(__entry->dev), \
521 __entry->ino, \
522 __entry->count, \
523 (char *)__entry->caller_ip) \
524)
525DEFINE_INODE_EVENT(xfs_ihold); 556DEFINE_INODE_EVENT(xfs_ihold);
526DEFINE_INODE_EVENT(xfs_irele); 557DEFINE_INODE_EVENT(xfs_irele);
527/* the old xfs_itrace_entry tracer - to be replaced by s.th. in the VFS */ 558/* the old xfs_itrace_entry tracer - to be replaced by s.th. in the VFS */
@@ -529,55 +560,59 @@ DEFINE_INODE_EVENT(xfs_inode);
529#define xfs_itrace_entry(ip) \ 560#define xfs_itrace_entry(ip) \
530 trace_xfs_inode(ip, _THIS_IP_) 561 trace_xfs_inode(ip, _THIS_IP_)
531 562
532#define DEFINE_DQUOT_EVENT(tname) \ 563DECLARE_EVENT_CLASS(xfs_dquot_class,
533TRACE_EVENT(tname, \ 564 TP_PROTO(struct xfs_dquot *dqp),
534 TP_PROTO(struct xfs_dquot *dqp), \ 565 TP_ARGS(dqp),
535 TP_ARGS(dqp), \ 566 TP_STRUCT__entry(
536 TP_STRUCT__entry( \ 567 __field(dev_t, dev)
537 __field(dev_t, dev) \ 568 __field(__be32, id)
538 __field(__be32, id) \ 569 __field(unsigned, flags)
539 __field(unsigned, flags) \ 570 __field(unsigned, nrefs)
540 __field(unsigned, nrefs) \ 571 __field(unsigned long long, res_bcount)
541 __field(unsigned long long, res_bcount) \ 572 __field(unsigned long long, bcount)
542 __field(unsigned long long, bcount) \ 573 __field(unsigned long long, icount)
543 __field(unsigned long long, icount) \ 574 __field(unsigned long long, blk_hardlimit)
544 __field(unsigned long long, blk_hardlimit) \ 575 __field(unsigned long long, blk_softlimit)
545 __field(unsigned long long, blk_softlimit) \ 576 __field(unsigned long long, ino_hardlimit)
546 __field(unsigned long long, ino_hardlimit) \ 577 __field(unsigned long long, ino_softlimit)
547 __field(unsigned long long, ino_softlimit) \
548 ), \
549 TP_fast_assign( \
550 __entry->dev = dqp->q_mount->m_super->s_dev; \
551 __entry->id = dqp->q_core.d_id; \
552 __entry->flags = dqp->dq_flags; \
553 __entry->nrefs = dqp->q_nrefs; \
554 __entry->res_bcount = dqp->q_res_bcount; \
555 __entry->bcount = be64_to_cpu(dqp->q_core.d_bcount); \
556 __entry->icount = be64_to_cpu(dqp->q_core.d_icount); \
557 __entry->blk_hardlimit = \
558 be64_to_cpu(dqp->q_core.d_blk_hardlimit); \
559 __entry->blk_softlimit = \
560 be64_to_cpu(dqp->q_core.d_blk_softlimit); \
561 __entry->ino_hardlimit = \
562 be64_to_cpu(dqp->q_core.d_ino_hardlimit); \
563 __entry->ino_softlimit = \
564 be64_to_cpu(dqp->q_core.d_ino_softlimit); \
565 ), \ 578 ), \
566 TP_printk("dev %d:%d id 0x%x flags %s nrefs %u res_bc 0x%llx " \ 579 TP_fast_assign(
567 "bcnt 0x%llx [hard 0x%llx | soft 0x%llx] " \ 580 __entry->dev = dqp->q_mount->m_super->s_dev;
568 "icnt 0x%llx [hard 0x%llx | soft 0x%llx]", \ 581 __entry->id = dqp->q_core.d_id;
569 MAJOR(__entry->dev), MINOR(__entry->dev), \ 582 __entry->flags = dqp->dq_flags;
570 be32_to_cpu(__entry->id), \ 583 __entry->nrefs = dqp->q_nrefs;
571 __print_flags(__entry->flags, "|", XFS_DQ_FLAGS), \ 584 __entry->res_bcount = dqp->q_res_bcount;
572 __entry->nrefs, \ 585 __entry->bcount = be64_to_cpu(dqp->q_core.d_bcount);
573 __entry->res_bcount, \ 586 __entry->icount = be64_to_cpu(dqp->q_core.d_icount);
574 __entry->bcount, \ 587 __entry->blk_hardlimit =
575 __entry->blk_hardlimit, \ 588 be64_to_cpu(dqp->q_core.d_blk_hardlimit);
576 __entry->blk_softlimit, \ 589 __entry->blk_softlimit =
577 __entry->icount, \ 590 be64_to_cpu(dqp->q_core.d_blk_softlimit);
578 __entry->ino_hardlimit, \ 591 __entry->ino_hardlimit =
579 __entry->ino_softlimit) \ 592 be64_to_cpu(dqp->q_core.d_ino_hardlimit);
593 __entry->ino_softlimit =
594 be64_to_cpu(dqp->q_core.d_ino_softlimit);
595 ),
596 TP_printk("dev %d:%d id 0x%x flags %s nrefs %u res_bc 0x%llx "
597 "bcnt 0x%llx [hard 0x%llx | soft 0x%llx] "
598 "icnt 0x%llx [hard 0x%llx | soft 0x%llx]",
599 MAJOR(__entry->dev), MINOR(__entry->dev),
600 be32_to_cpu(__entry->id),
601 __print_flags(__entry->flags, "|", XFS_DQ_FLAGS),
602 __entry->nrefs,
603 __entry->res_bcount,
604 __entry->bcount,
605 __entry->blk_hardlimit,
606 __entry->blk_softlimit,
607 __entry->icount,
608 __entry->ino_hardlimit,
609 __entry->ino_softlimit)
580) 610)
611
612#define DEFINE_DQUOT_EVENT(name) \
613DEFINE_EVENT(xfs_dquot_class, name, \
614 TP_PROTO(struct xfs_dquot *dqp), \
615 TP_ARGS(dqp))
581DEFINE_DQUOT_EVENT(xfs_dqadjust); 616DEFINE_DQUOT_EVENT(xfs_dqadjust);
582DEFINE_DQUOT_EVENT(xfs_dqshake_dirty); 617DEFINE_DQUOT_EVENT(xfs_dqshake_dirty);
583DEFINE_DQUOT_EVENT(xfs_dqshake_unlink); 618DEFINE_DQUOT_EVENT(xfs_dqshake_unlink);
@@ -610,72 +645,75 @@ DEFINE_DQUOT_EVENT(xfs_dqflush_done);
610DEFINE_IGET_EVENT(xfs_dquot_dqalloc); 645DEFINE_IGET_EVENT(xfs_dquot_dqalloc);
611DEFINE_IGET_EVENT(xfs_dquot_dqdetach); 646DEFINE_IGET_EVENT(xfs_dquot_dqdetach);
612 647
648DECLARE_EVENT_CLASS(xfs_loggrant_class,
649 TP_PROTO(struct log *log, struct xlog_ticket *tic),
650 TP_ARGS(log, tic),
651 TP_STRUCT__entry(
652 __field(dev_t, dev)
653 __field(unsigned, trans_type)
654 __field(char, ocnt)
655 __field(char, cnt)
656 __field(int, curr_res)
657 __field(int, unit_res)
658 __field(unsigned int, flags)
659 __field(void *, reserve_headq)
660 __field(void *, write_headq)
661 __field(int, grant_reserve_cycle)
662 __field(int, grant_reserve_bytes)
663 __field(int, grant_write_cycle)
664 __field(int, grant_write_bytes)
665 __field(int, curr_cycle)
666 __field(int, curr_block)
667 __field(xfs_lsn_t, tail_lsn)
668 ),
669 TP_fast_assign(
670 __entry->dev = log->l_mp->m_super->s_dev;
671 __entry->trans_type = tic->t_trans_type;
672 __entry->ocnt = tic->t_ocnt;
673 __entry->cnt = tic->t_cnt;
674 __entry->curr_res = tic->t_curr_res;
675 __entry->unit_res = tic->t_unit_res;
676 __entry->flags = tic->t_flags;
677 __entry->reserve_headq = log->l_reserve_headq;
678 __entry->write_headq = log->l_write_headq;
679 __entry->grant_reserve_cycle = log->l_grant_reserve_cycle;
680 __entry->grant_reserve_bytes = log->l_grant_reserve_bytes;
681 __entry->grant_write_cycle = log->l_grant_write_cycle;
682 __entry->grant_write_bytes = log->l_grant_write_bytes;
683 __entry->curr_cycle = log->l_curr_cycle;
684 __entry->curr_block = log->l_curr_block;
685 __entry->tail_lsn = log->l_tail_lsn;
686 ),
687 TP_printk("dev %d:%d type %s t_ocnt %u t_cnt %u t_curr_res %u "
688 "t_unit_res %u t_flags %s reserve_headq 0x%p "
689 "write_headq 0x%p grant_reserve_cycle %d "
690 "grant_reserve_bytes %d grant_write_cycle %d "
691 "grant_write_bytes %d curr_cycle %d curr_block %d "
692 "tail_cycle %d tail_block %d",
693 MAJOR(__entry->dev), MINOR(__entry->dev),
694 __print_symbolic(__entry->trans_type, XFS_TRANS_TYPES),
695 __entry->ocnt,
696 __entry->cnt,
697 __entry->curr_res,
698 __entry->unit_res,
699 __print_flags(__entry->flags, "|", XLOG_TIC_FLAGS),
700 __entry->reserve_headq,
701 __entry->write_headq,
702 __entry->grant_reserve_cycle,
703 __entry->grant_reserve_bytes,
704 __entry->grant_write_cycle,
705 __entry->grant_write_bytes,
706 __entry->curr_cycle,
707 __entry->curr_block,
708 CYCLE_LSN(__entry->tail_lsn),
709 BLOCK_LSN(__entry->tail_lsn)
710 )
711)
613 712
614#define DEFINE_LOGGRANT_EVENT(tname) \ 713#define DEFINE_LOGGRANT_EVENT(name) \
615TRACE_EVENT(tname, \ 714DEFINE_EVENT(xfs_loggrant_class, name, \
616 TP_PROTO(struct log *log, struct xlog_ticket *tic), \ 715 TP_PROTO(struct log *log, struct xlog_ticket *tic), \
617 TP_ARGS(log, tic), \ 716 TP_ARGS(log, tic))
618 TP_STRUCT__entry( \
619 __field(dev_t, dev) \
620 __field(unsigned, trans_type) \
621 __field(char, ocnt) \
622 __field(char, cnt) \
623 __field(int, curr_res) \
624 __field(int, unit_res) \
625 __field(unsigned int, flags) \
626 __field(void *, reserve_headq) \
627 __field(void *, write_headq) \
628 __field(int, grant_reserve_cycle) \
629 __field(int, grant_reserve_bytes) \
630 __field(int, grant_write_cycle) \
631 __field(int, grant_write_bytes) \
632 __field(int, curr_cycle) \
633 __field(int, curr_block) \
634 __field(xfs_lsn_t, tail_lsn) \
635 ), \
636 TP_fast_assign( \
637 __entry->dev = log->l_mp->m_super->s_dev; \
638 __entry->trans_type = tic->t_trans_type; \
639 __entry->ocnt = tic->t_ocnt; \
640 __entry->cnt = tic->t_cnt; \
641 __entry->curr_res = tic->t_curr_res; \
642 __entry->unit_res = tic->t_unit_res; \
643 __entry->flags = tic->t_flags; \
644 __entry->reserve_headq = log->l_reserve_headq; \
645 __entry->write_headq = log->l_write_headq; \
646 __entry->grant_reserve_cycle = log->l_grant_reserve_cycle; \
647 __entry->grant_reserve_bytes = log->l_grant_reserve_bytes; \
648 __entry->grant_write_cycle = log->l_grant_write_cycle; \
649 __entry->grant_write_bytes = log->l_grant_write_bytes; \
650 __entry->curr_cycle = log->l_curr_cycle; \
651 __entry->curr_block = log->l_curr_block; \
652 __entry->tail_lsn = log->l_tail_lsn; \
653 ), \
654 TP_printk("dev %d:%d type %s t_ocnt %u t_cnt %u t_curr_res %u " \
655 "t_unit_res %u t_flags %s reserve_headq 0x%p " \
656 "write_headq 0x%p grant_reserve_cycle %d " \
657 "grant_reserve_bytes %d grant_write_cycle %d " \
658 "grant_write_bytes %d curr_cycle %d curr_block %d " \
659 "tail_cycle %d tail_block %d", \
660 MAJOR(__entry->dev), MINOR(__entry->dev), \
661 __print_symbolic(__entry->trans_type, XFS_TRANS_TYPES), \
662 __entry->ocnt, \
663 __entry->cnt, \
664 __entry->curr_res, \
665 __entry->unit_res, \
666 __print_flags(__entry->flags, "|", XLOG_TIC_FLAGS), \
667 __entry->reserve_headq, \
668 __entry->write_headq, \
669 __entry->grant_reserve_cycle, \
670 __entry->grant_reserve_bytes, \
671 __entry->grant_write_cycle, \
672 __entry->grant_write_bytes, \
673 __entry->curr_cycle, \
674 __entry->curr_block, \
675 CYCLE_LSN(__entry->tail_lsn), \
676 BLOCK_LSN(__entry->tail_lsn) \
677 ) \
678)
679DEFINE_LOGGRANT_EVENT(xfs_log_done_nonperm); 717DEFINE_LOGGRANT_EVENT(xfs_log_done_nonperm);
680DEFINE_LOGGRANT_EVENT(xfs_log_done_perm); 718DEFINE_LOGGRANT_EVENT(xfs_log_done_perm);
681DEFINE_LOGGRANT_EVENT(xfs_log_reserve); 719DEFINE_LOGGRANT_EVENT(xfs_log_reserve);
@@ -815,7 +853,7 @@ TRACE_EVENT(name, \
815 ), \ 853 ), \
816 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \ 854 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \
817 "offset 0x%llx count %zd flags %s " \ 855 "offset 0x%llx count %zd flags %s " \
818 "startoff 0x%llx startblock 0x%llx blockcount 0x%llx", \ 856 "startoff 0x%llx startblock %s blockcount 0x%llx", \
819 MAJOR(__entry->dev), MINOR(__entry->dev), \ 857 MAJOR(__entry->dev), MINOR(__entry->dev), \
820 __entry->ino, \ 858 __entry->ino, \
821 __entry->size, \ 859 __entry->size, \
@@ -824,7 +862,7 @@ TRACE_EVENT(name, \
824 __entry->count, \ 862 __entry->count, \
825 __print_flags(__entry->flags, "|", BMAPI_FLAGS), \ 863 __print_flags(__entry->flags, "|", BMAPI_FLAGS), \
826 __entry->startoff, \ 864 __entry->startoff, \
827 __entry->startblock, \ 865 xfs_fmtfsblock(__entry->startblock), \
828 __entry->blockcount) \ 866 __entry->blockcount) \
829) 867)
830DEFINE_IOMAP_EVENT(xfs_iomap_enter); 868DEFINE_IOMAP_EVENT(xfs_iomap_enter);
@@ -897,28 +935,32 @@ TRACE_EVENT(xfs_itruncate_start,
897 __entry->toss_finish) 935 __entry->toss_finish)
898); 936);
899 937
938DECLARE_EVENT_CLASS(xfs_itrunc_class,
939 TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size),
940 TP_ARGS(ip, new_size),
941 TP_STRUCT__entry(
942 __field(dev_t, dev)
943 __field(xfs_ino_t, ino)
944 __field(xfs_fsize_t, size)
945 __field(xfs_fsize_t, new_size)
946 ),
947 TP_fast_assign(
948 __entry->dev = VFS_I(ip)->i_sb->s_dev;
949 __entry->ino = ip->i_ino;
950 __entry->size = ip->i_d.di_size;
951 __entry->new_size = new_size;
952 ),
953 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx",
954 MAJOR(__entry->dev), MINOR(__entry->dev),
955 __entry->ino,
956 __entry->size,
957 __entry->new_size)
958)
959
900#define DEFINE_ITRUNC_EVENT(name) \ 960#define DEFINE_ITRUNC_EVENT(name) \
901TRACE_EVENT(name, \ 961DEFINE_EVENT(xfs_itrunc_class, name, \
902 TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), \ 962 TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), \
903 TP_ARGS(ip, new_size), \ 963 TP_ARGS(ip, new_size))
904 TP_STRUCT__entry( \
905 __field(dev_t, dev) \
906 __field(xfs_ino_t, ino) \
907 __field(xfs_fsize_t, size) \
908 __field(xfs_fsize_t, new_size) \
909 ), \
910 TP_fast_assign( \
911 __entry->dev = VFS_I(ip)->i_sb->s_dev; \
912 __entry->ino = ip->i_ino; \
913 __entry->size = ip->i_d.di_size; \
914 __entry->new_size = new_size; \
915 ), \
916 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx", \
917 MAJOR(__entry->dev), MINOR(__entry->dev), \
918 __entry->ino, \
919 __entry->size, \
920 __entry->new_size) \
921)
922DEFINE_ITRUNC_EVENT(xfs_itruncate_finish_start); 964DEFINE_ITRUNC_EVENT(xfs_itruncate_finish_start);
923DEFINE_ITRUNC_EVENT(xfs_itruncate_finish_end); 965DEFINE_ITRUNC_EVENT(xfs_itruncate_finish_end);
924 966
@@ -1037,28 +1079,28 @@ TRACE_EVENT(xfs_alloc_unbusy,
1037 1079
1038TRACE_EVENT(xfs_alloc_busysearch, 1080TRACE_EVENT(xfs_alloc_busysearch,
1039 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno, 1081 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno,
1040 xfs_extlen_t len, int found), 1082 xfs_extlen_t len, xfs_lsn_t lsn),
1041 TP_ARGS(mp, agno, agbno, len, found), 1083 TP_ARGS(mp, agno, agbno, len, lsn),
1042 TP_STRUCT__entry( 1084 TP_STRUCT__entry(
1043 __field(dev_t, dev) 1085 __field(dev_t, dev)
1044 __field(xfs_agnumber_t, agno) 1086 __field(xfs_agnumber_t, agno)
1045 __field(xfs_agblock_t, agbno) 1087 __field(xfs_agblock_t, agbno)
1046 __field(xfs_extlen_t, len) 1088 __field(xfs_extlen_t, len)
1047 __field(int, found) 1089 __field(xfs_lsn_t, lsn)
1048 ), 1090 ),
1049 TP_fast_assign( 1091 TP_fast_assign(
1050 __entry->dev = mp->m_super->s_dev; 1092 __entry->dev = mp->m_super->s_dev;
1051 __entry->agno = agno; 1093 __entry->agno = agno;
1052 __entry->agbno = agbno; 1094 __entry->agbno = agbno;
1053 __entry->len = len; 1095 __entry->len = len;
1054 __entry->found = found; 1096 __entry->lsn = lsn;
1055 ), 1097 ),
1056 TP_printk("dev %d:%d agno %u agbno %u len %u %s", 1098 TP_printk("dev %d:%d agno %u agbno %u len %u force lsn 0x%llx",
1057 MAJOR(__entry->dev), MINOR(__entry->dev), 1099 MAJOR(__entry->dev), MINOR(__entry->dev),
1058 __entry->agno, 1100 __entry->agno,
1059 __entry->agbno, 1101 __entry->agbno,
1060 __entry->len, 1102 __entry->len,
1061 __print_symbolic(__entry->found, XFS_BUSY_STATES)) 1103 __entry->lsn)
1062); 1104);
1063 1105
1064TRACE_EVENT(xfs_agf, 1106TRACE_EVENT(xfs_agf,
@@ -1152,77 +1194,80 @@ TRACE_EVENT(xfs_free_extent,
1152 1194
1153); 1195);
1154 1196
1155#define DEFINE_ALLOC_EVENT(name) \ 1197DECLARE_EVENT_CLASS(xfs_alloc_class,
1156TRACE_EVENT(name, \ 1198 TP_PROTO(struct xfs_alloc_arg *args),
1157 TP_PROTO(struct xfs_alloc_arg *args), \ 1199 TP_ARGS(args),
1158 TP_ARGS(args), \ 1200 TP_STRUCT__entry(
1159 TP_STRUCT__entry( \ 1201 __field(dev_t, dev)
1160 __field(dev_t, dev) \ 1202 __field(xfs_agnumber_t, agno)
1161 __field(xfs_agnumber_t, agno) \ 1203 __field(xfs_agblock_t, agbno)
1162 __field(xfs_agblock_t, agbno) \ 1204 __field(xfs_extlen_t, minlen)
1163 __field(xfs_extlen_t, minlen) \ 1205 __field(xfs_extlen_t, maxlen)
1164 __field(xfs_extlen_t, maxlen) \ 1206 __field(xfs_extlen_t, mod)
1165 __field(xfs_extlen_t, mod) \ 1207 __field(xfs_extlen_t, prod)
1166 __field(xfs_extlen_t, prod) \ 1208 __field(xfs_extlen_t, minleft)
1167 __field(xfs_extlen_t, minleft) \ 1209 __field(xfs_extlen_t, total)
1168 __field(xfs_extlen_t, total) \ 1210 __field(xfs_extlen_t, alignment)
1169 __field(xfs_extlen_t, alignment) \ 1211 __field(xfs_extlen_t, minalignslop)
1170 __field(xfs_extlen_t, minalignslop) \ 1212 __field(xfs_extlen_t, len)
1171 __field(xfs_extlen_t, len) \ 1213 __field(short, type)
1172 __field(short, type) \ 1214 __field(short, otype)
1173 __field(short, otype) \ 1215 __field(char, wasdel)
1174 __field(char, wasdel) \ 1216 __field(char, wasfromfl)
1175 __field(char, wasfromfl) \ 1217 __field(char, isfl)
1176 __field(char, isfl) \ 1218 __field(char, userdata)
1177 __field(char, userdata) \ 1219 __field(xfs_fsblock_t, firstblock)
1178 __field(xfs_fsblock_t, firstblock) \ 1220 ),
1179 ), \ 1221 TP_fast_assign(
1180 TP_fast_assign( \ 1222 __entry->dev = args->mp->m_super->s_dev;
1181 __entry->dev = args->mp->m_super->s_dev; \ 1223 __entry->agno = args->agno;
1182 __entry->agno = args->agno; \ 1224 __entry->agbno = args->agbno;
1183 __entry->agbno = args->agbno; \ 1225 __entry->minlen = args->minlen;
1184 __entry->minlen = args->minlen; \ 1226 __entry->maxlen = args->maxlen;
1185 __entry->maxlen = args->maxlen; \ 1227 __entry->mod = args->mod;
1186 __entry->mod = args->mod; \ 1228 __entry->prod = args->prod;
1187 __entry->prod = args->prod; \ 1229 __entry->minleft = args->minleft;
1188 __entry->minleft = args->minleft; \ 1230 __entry->total = args->total;
1189 __entry->total = args->total; \ 1231 __entry->alignment = args->alignment;
1190 __entry->alignment = args->alignment; \ 1232 __entry->minalignslop = args->minalignslop;
1191 __entry->minalignslop = args->minalignslop; \ 1233 __entry->len = args->len;
1192 __entry->len = args->len; \ 1234 __entry->type = args->type;
1193 __entry->type = args->type; \ 1235 __entry->otype = args->otype;
1194 __entry->otype = args->otype; \ 1236 __entry->wasdel = args->wasdel;
1195 __entry->wasdel = args->wasdel; \ 1237 __entry->wasfromfl = args->wasfromfl;
1196 __entry->wasfromfl = args->wasfromfl; \ 1238 __entry->isfl = args->isfl;
1197 __entry->isfl = args->isfl; \ 1239 __entry->userdata = args->userdata;
1198 __entry->userdata = args->userdata; \ 1240 __entry->firstblock = args->firstblock;
1199 __entry->firstblock = args->firstblock; \ 1241 ),
1200 ), \ 1242 TP_printk("dev %d:%d agno %u agbno %u minlen %u maxlen %u mod %u "
1201 TP_printk("dev %d:%d agno %u agbno %u minlen %u maxlen %u mod %u " \ 1243 "prod %u minleft %u total %u alignment %u minalignslop %u "
1202 "prod %u minleft %u total %u alignment %u minalignslop %u " \ 1244 "len %u type %s otype %s wasdel %d wasfromfl %d isfl %d "
1203 "len %u type %s otype %s wasdel %d wasfromfl %d isfl %d " \ 1245 "userdata %d firstblock 0x%llx",
1204 "userdata %d firstblock 0x%llx", \ 1246 MAJOR(__entry->dev), MINOR(__entry->dev),
1205 MAJOR(__entry->dev), MINOR(__entry->dev), \ 1247 __entry->agno,
1206 __entry->agno, \ 1248 __entry->agbno,
1207 __entry->agbno, \ 1249 __entry->minlen,
1208 __entry->minlen, \ 1250 __entry->maxlen,
1209 __entry->maxlen, \ 1251 __entry->mod,
1210 __entry->mod, \ 1252 __entry->prod,
1211 __entry->prod, \ 1253 __entry->minleft,
1212 __entry->minleft, \ 1254 __entry->total,
1213 __entry->total, \ 1255 __entry->alignment,
1214 __entry->alignment, \ 1256 __entry->minalignslop,
1215 __entry->minalignslop, \ 1257 __entry->len,
1216 __entry->len, \ 1258 __print_symbolic(__entry->type, XFS_ALLOC_TYPES),
1217 __print_symbolic(__entry->type, XFS_ALLOC_TYPES), \ 1259 __print_symbolic(__entry->otype, XFS_ALLOC_TYPES),
1218 __print_symbolic(__entry->otype, XFS_ALLOC_TYPES), \ 1260 __entry->wasdel,
1219 __entry->wasdel, \ 1261 __entry->wasfromfl,
1220 __entry->wasfromfl, \ 1262 __entry->isfl,
1221 __entry->isfl, \ 1263 __entry->userdata,
1222 __entry->userdata, \ 1264 __entry->firstblock)
1223 __entry->firstblock) \
1224) 1265)
1225 1266
1267#define DEFINE_ALLOC_EVENT(name) \
1268DEFINE_EVENT(xfs_alloc_class, name, \
1269 TP_PROTO(struct xfs_alloc_arg *args), \
1270 TP_ARGS(args))
1226DEFINE_ALLOC_EVENT(xfs_alloc_exact_done); 1271DEFINE_ALLOC_EVENT(xfs_alloc_exact_done);
1227DEFINE_ALLOC_EVENT(xfs_alloc_exact_error); 1272DEFINE_ALLOC_EVENT(xfs_alloc_exact_error);
1228DEFINE_ALLOC_EVENT(xfs_alloc_near_nominleft); 1273DEFINE_ALLOC_EVENT(xfs_alloc_near_nominleft);
@@ -1245,92 +1290,100 @@ DEFINE_ALLOC_EVENT(xfs_alloc_vextent_noagbp);
1245DEFINE_ALLOC_EVENT(xfs_alloc_vextent_loopfailed); 1290DEFINE_ALLOC_EVENT(xfs_alloc_vextent_loopfailed);
1246DEFINE_ALLOC_EVENT(xfs_alloc_vextent_allfailed); 1291DEFINE_ALLOC_EVENT(xfs_alloc_vextent_allfailed);
1247 1292
1248#define DEFINE_DIR2_TRACE(tname) \ 1293DECLARE_EVENT_CLASS(xfs_dir2_class,
1249TRACE_EVENT(tname, \ 1294 TP_PROTO(struct xfs_da_args *args),
1295 TP_ARGS(args),
1296 TP_STRUCT__entry(
1297 __field(dev_t, dev)
1298 __field(xfs_ino_t, ino)
1299 __dynamic_array(char, name, args->namelen)
1300 __field(int, namelen)
1301 __field(xfs_dahash_t, hashval)
1302 __field(xfs_ino_t, inumber)
1303 __field(int, op_flags)
1304 ),
1305 TP_fast_assign(
1306 __entry->dev = VFS_I(args->dp)->i_sb->s_dev;
1307 __entry->ino = args->dp->i_ino;
1308 if (args->namelen)
1309 memcpy(__get_str(name), args->name, args->namelen);
1310 __entry->namelen = args->namelen;
1311 __entry->hashval = args->hashval;
1312 __entry->inumber = args->inumber;
1313 __entry->op_flags = args->op_flags;
1314 ),
1315 TP_printk("dev %d:%d ino 0x%llx name %.*s namelen %d hashval 0x%x "
1316 "inumber 0x%llx op_flags %s",
1317 MAJOR(__entry->dev), MINOR(__entry->dev),
1318 __entry->ino,
1319 __entry->namelen,
1320 __entry->namelen ? __get_str(name) : NULL,
1321 __entry->namelen,
1322 __entry->hashval,
1323 __entry->inumber,
1324 __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS))
1325)
1326
1327#define DEFINE_DIR2_EVENT(name) \
1328DEFINE_EVENT(xfs_dir2_class, name, \
1250 TP_PROTO(struct xfs_da_args *args), \ 1329 TP_PROTO(struct xfs_da_args *args), \
1251 TP_ARGS(args), \ 1330 TP_ARGS(args))
1252 TP_STRUCT__entry( \ 1331DEFINE_DIR2_EVENT(xfs_dir2_sf_addname);
1253 __field(dev_t, dev) \ 1332DEFINE_DIR2_EVENT(xfs_dir2_sf_create);
1254 __field(xfs_ino_t, ino) \ 1333DEFINE_DIR2_EVENT(xfs_dir2_sf_lookup);
1255 __dynamic_array(char, name, args->namelen) \ 1334DEFINE_DIR2_EVENT(xfs_dir2_sf_replace);
1256 __field(int, namelen) \ 1335DEFINE_DIR2_EVENT(xfs_dir2_sf_removename);
1257 __field(xfs_dahash_t, hashval) \ 1336DEFINE_DIR2_EVENT(xfs_dir2_sf_toino4);
1258 __field(xfs_ino_t, inumber) \ 1337DEFINE_DIR2_EVENT(xfs_dir2_sf_toino8);
1259 __field(int, op_flags) \ 1338DEFINE_DIR2_EVENT(xfs_dir2_sf_to_block);
1260 ), \ 1339DEFINE_DIR2_EVENT(xfs_dir2_block_addname);
1261 TP_fast_assign( \ 1340DEFINE_DIR2_EVENT(xfs_dir2_block_lookup);
1262 __entry->dev = VFS_I(args->dp)->i_sb->s_dev; \ 1341DEFINE_DIR2_EVENT(xfs_dir2_block_replace);
1263 __entry->ino = args->dp->i_ino; \ 1342DEFINE_DIR2_EVENT(xfs_dir2_block_removename);
1264 if (args->namelen) \ 1343DEFINE_DIR2_EVENT(xfs_dir2_block_to_sf);
1265 memcpy(__get_str(name), args->name, args->namelen); \ 1344DEFINE_DIR2_EVENT(xfs_dir2_block_to_leaf);
1266 __entry->namelen = args->namelen; \ 1345DEFINE_DIR2_EVENT(xfs_dir2_leaf_addname);
1267 __entry->hashval = args->hashval; \ 1346DEFINE_DIR2_EVENT(xfs_dir2_leaf_lookup);
1268 __entry->inumber = args->inumber; \ 1347DEFINE_DIR2_EVENT(xfs_dir2_leaf_replace);
1269 __entry->op_flags = args->op_flags; \ 1348DEFINE_DIR2_EVENT(xfs_dir2_leaf_removename);
1270 ), \ 1349DEFINE_DIR2_EVENT(xfs_dir2_leaf_to_block);
1271 TP_printk("dev %d:%d ino 0x%llx name %.*s namelen %d hashval 0x%x " \ 1350DEFINE_DIR2_EVENT(xfs_dir2_leaf_to_node);
1272 "inumber 0x%llx op_flags %s", \ 1351DEFINE_DIR2_EVENT(xfs_dir2_node_addname);
1273 MAJOR(__entry->dev), MINOR(__entry->dev), \ 1352DEFINE_DIR2_EVENT(xfs_dir2_node_lookup);
1274 __entry->ino, \ 1353DEFINE_DIR2_EVENT(xfs_dir2_node_replace);
1275 __entry->namelen, \ 1354DEFINE_DIR2_EVENT(xfs_dir2_node_removename);
1276 __entry->namelen ? __get_str(name) : NULL, \ 1355DEFINE_DIR2_EVENT(xfs_dir2_node_to_leaf);
1277 __entry->namelen, \ 1356
1278 __entry->hashval, \ 1357DECLARE_EVENT_CLASS(xfs_dir2_space_class,
1279 __entry->inumber, \ 1358 TP_PROTO(struct xfs_da_args *args, int idx),
1280 __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS)) \ 1359 TP_ARGS(args, idx),
1360 TP_STRUCT__entry(
1361 __field(dev_t, dev)
1362 __field(xfs_ino_t, ino)
1363 __field(int, op_flags)
1364 __field(int, idx)
1365 ),
1366 TP_fast_assign(
1367 __entry->dev = VFS_I(args->dp)->i_sb->s_dev;
1368 __entry->ino = args->dp->i_ino;
1369 __entry->op_flags = args->op_flags;
1370 __entry->idx = idx;
1371 ),
1372 TP_printk("dev %d:%d ino 0x%llx op_flags %s index %d",
1373 MAJOR(__entry->dev), MINOR(__entry->dev),
1374 __entry->ino,
1375 __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS),
1376 __entry->idx)
1281) 1377)
1282DEFINE_DIR2_TRACE(xfs_dir2_sf_addname);
1283DEFINE_DIR2_TRACE(xfs_dir2_sf_create);
1284DEFINE_DIR2_TRACE(xfs_dir2_sf_lookup);
1285DEFINE_DIR2_TRACE(xfs_dir2_sf_replace);
1286DEFINE_DIR2_TRACE(xfs_dir2_sf_removename);
1287DEFINE_DIR2_TRACE(xfs_dir2_sf_toino4);
1288DEFINE_DIR2_TRACE(xfs_dir2_sf_toino8);
1289DEFINE_DIR2_TRACE(xfs_dir2_sf_to_block);
1290DEFINE_DIR2_TRACE(xfs_dir2_block_addname);
1291DEFINE_DIR2_TRACE(xfs_dir2_block_lookup);
1292DEFINE_DIR2_TRACE(xfs_dir2_block_replace);
1293DEFINE_DIR2_TRACE(xfs_dir2_block_removename);
1294DEFINE_DIR2_TRACE(xfs_dir2_block_to_sf);
1295DEFINE_DIR2_TRACE(xfs_dir2_block_to_leaf);
1296DEFINE_DIR2_TRACE(xfs_dir2_leaf_addname);
1297DEFINE_DIR2_TRACE(xfs_dir2_leaf_lookup);
1298DEFINE_DIR2_TRACE(xfs_dir2_leaf_replace);
1299DEFINE_DIR2_TRACE(xfs_dir2_leaf_removename);
1300DEFINE_DIR2_TRACE(xfs_dir2_leaf_to_block);
1301DEFINE_DIR2_TRACE(xfs_dir2_leaf_to_node);
1302DEFINE_DIR2_TRACE(xfs_dir2_node_addname);
1303DEFINE_DIR2_TRACE(xfs_dir2_node_lookup);
1304DEFINE_DIR2_TRACE(xfs_dir2_node_replace);
1305DEFINE_DIR2_TRACE(xfs_dir2_node_removename);
1306DEFINE_DIR2_TRACE(xfs_dir2_node_to_leaf);
1307 1378
1308#define DEFINE_DIR2_SPACE_TRACE(tname) \ 1379#define DEFINE_DIR2_SPACE_EVENT(name) \
1309TRACE_EVENT(tname, \ 1380DEFINE_EVENT(xfs_dir2_space_class, name, \
1310 TP_PROTO(struct xfs_da_args *args, int idx), \ 1381 TP_PROTO(struct xfs_da_args *args, int idx), \
1311 TP_ARGS(args, idx), \ 1382 TP_ARGS(args, idx))
1312 TP_STRUCT__entry( \ 1383DEFINE_DIR2_SPACE_EVENT(xfs_dir2_leafn_add);
1313 __field(dev_t, dev) \ 1384DEFINE_DIR2_SPACE_EVENT(xfs_dir2_leafn_remove);
1314 __field(xfs_ino_t, ino) \ 1385DEFINE_DIR2_SPACE_EVENT(xfs_dir2_grow_inode);
1315 __field(int, op_flags) \ 1386DEFINE_DIR2_SPACE_EVENT(xfs_dir2_shrink_inode);
1316 __field(int, idx) \
1317 ), \
1318 TP_fast_assign( \
1319 __entry->dev = VFS_I(args->dp)->i_sb->s_dev; \
1320 __entry->ino = args->dp->i_ino; \
1321 __entry->op_flags = args->op_flags; \
1322 __entry->idx = idx; \
1323 ), \
1324 TP_printk("dev %d:%d ino 0x%llx op_flags %s index %d", \
1325 MAJOR(__entry->dev), MINOR(__entry->dev), \
1326 __entry->ino, \
1327 __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS), \
1328 __entry->idx) \
1329)
1330DEFINE_DIR2_SPACE_TRACE(xfs_dir2_leafn_add);
1331DEFINE_DIR2_SPACE_TRACE(xfs_dir2_leafn_remove);
1332DEFINE_DIR2_SPACE_TRACE(xfs_dir2_grow_inode);
1333DEFINE_DIR2_SPACE_TRACE(xfs_dir2_shrink_inode);
1334 1387
1335TRACE_EVENT(xfs_dir2_leafn_moveents, 1388TRACE_EVENT(xfs_dir2_leafn_moveents,
1336 TP_PROTO(struct xfs_da_args *args, int src_idx, int dst_idx, int count), 1389 TP_PROTO(struct xfs_da_args *args, int src_idx, int dst_idx, int count),
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 71af76fe8a23..873e07e29074 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -891,7 +891,7 @@ xfs_qm_dqrele_all_inodes(
891 uint flags) 891 uint flags)
892{ 892{
893 ASSERT(mp->m_quotainfo); 893 ASSERT(mp->m_quotainfo);
894 xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, XFS_ICI_NO_TAG); 894 xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, XFS_ICI_NO_TAG, 0);
895} 895}
896 896
897/*------------------------------------------------------------------------*/ 897/*------------------------------------------------------------------------*/
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index a1c65fc6d9c4..275b1f4f9430 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -2563,43 +2563,41 @@ xfs_alloc_search_busy(xfs_trans_t *tp,
2563 xfs_mount_t *mp; 2563 xfs_mount_t *mp;
2564 xfs_perag_busy_t *bsy; 2564 xfs_perag_busy_t *bsy;
2565 xfs_agblock_t uend, bend; 2565 xfs_agblock_t uend, bend;
2566 xfs_lsn_t lsn; 2566 xfs_lsn_t lsn = 0;
2567 int cnt; 2567 int cnt;
2568 2568
2569 mp = tp->t_mountp; 2569 mp = tp->t_mountp;
2570 2570
2571 spin_lock(&mp->m_perag[agno].pagb_lock); 2571 spin_lock(&mp->m_perag[agno].pagb_lock);
2572 cnt = mp->m_perag[agno].pagb_count;
2573 2572
2574 uend = bno + len - 1; 2573 uend = bno + len - 1;
2575 2574
2576 /* search pagb_list for this slot, skipping open slots */ 2575 /*
2577 for (bsy = mp->m_perag[agno].pagb_list; cnt; bsy++) { 2576 * search pagb_list for this slot, skipping open slots. We have to
2577 * search the entire array as there may be multiple overlaps and
2578 * we have to get the most recent LSN for the log force to push out
2579 * all the transactions that span the range.
2580 */
2581 for (cnt = 0; cnt < mp->m_perag[agno].pagb_count; cnt++) {
2582 bsy = &mp->m_perag[agno].pagb_list[cnt];
2583 if (!bsy->busy_tp)
2584 continue;
2578 2585
2579 /* 2586 bend = bsy->busy_start + bsy->busy_length - 1;
2580 * (start1,length1) within (start2, length2) 2587 if (bno > bend || uend < bsy->busy_start)
2581 */ 2588 continue;
2582 if (bsy->busy_tp != NULL) {
2583 bend = bsy->busy_start + bsy->busy_length - 1;
2584 if ((bno > bend) || (uend < bsy->busy_start)) {
2585 cnt--;
2586 } else {
2587 break;
2588 }
2589 }
2590 }
2591 2589
2592 trace_xfs_alloc_busysearch(mp, agno, bno, len, !!cnt); 2590 /* (start1,length1) within (start2, length2) */
2591 if (XFS_LSN_CMP(bsy->busy_tp->t_commit_lsn, lsn) > 0)
2592 lsn = bsy->busy_tp->t_commit_lsn;
2593 }
2594 spin_unlock(&mp->m_perag[agno].pagb_lock);
2595 trace_xfs_alloc_busysearch(tp->t_mountp, agno, bno, len, lsn);
2593 2596
2594 /* 2597 /*
2595 * If a block was found, force the log through the LSN of the 2598 * If a block was found, force the log through the LSN of the
2596 * transaction that freed the block 2599 * transaction that freed the block
2597 */ 2600 */
2598 if (cnt) { 2601 if (lsn)
2599 lsn = bsy->busy_tp->t_commit_lsn;
2600 spin_unlock(&mp->m_perag[agno].pagb_lock);
2601 xfs_log_force(mp, lsn, XFS_LOG_FORCE|XFS_LOG_SYNC); 2602 xfs_log_force(mp, lsn, XFS_LOG_FORCE|XFS_LOG_SYNC);
2602 } else {
2603 spin_unlock(&mp->m_perag[agno].pagb_lock);
2604 }
2605} 2603}
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index d1483a4f71b8..84ca1cf16a1e 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -114,10 +114,82 @@ xfs_swapext(
114 return error; 114 return error;
115} 115}
116 116
117/*
118 * We need to check that the format of the data fork in the temporary inode is
119 * valid for the target inode before doing the swap. This is not a problem with
120 * attr1 because of the fixed fork offset, but attr2 has a dynamically sized
121 * data fork depending on the space the attribute fork is taking so we can get
122 * invalid formats on the target inode.
123 *
124 * E.g. target has space for 7 extents in extent format, temp inode only has
125 * space for 6. If we defragment down to 7 extents, then the tmp format is a
126 * btree, but when swapped it needs to be in extent format. Hence we can't just
127 * blindly swap data forks on attr2 filesystems.
128 *
129 * Note that we check the swap in both directions so that we don't end up with
130 * a corrupt temporary inode, either.
131 *
132 * Note that fixing the way xfs_fsr sets up the attribute fork in the source
133 * inode will prevent this situation from occurring, so all we do here is
134 * reject and log the attempt. basically we are putting the responsibility on
135 * userspace to get this right.
136 */
137static int
138xfs_swap_extents_check_format(
139 xfs_inode_t *ip, /* target inode */
140 xfs_inode_t *tip) /* tmp inode */
141{
142
143 /* Should never get a local format */
144 if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL ||
145 tip->i_d.di_format == XFS_DINODE_FMT_LOCAL)
146 return EINVAL;
147
148 /*
149 * if the target inode has less extents that then temporary inode then
150 * why did userspace call us?
151 */
152 if (ip->i_d.di_nextents < tip->i_d.di_nextents)
153 return EINVAL;
154
155 /*
156 * if the target inode is in extent form and the temp inode is in btree
157 * form then we will end up with the target inode in the wrong format
158 * as we already know there are less extents in the temp inode.
159 */
160 if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
161 tip->i_d.di_format == XFS_DINODE_FMT_BTREE)
162 return EINVAL;
163
164 /* Check temp in extent form to max in target */
165 if (tip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
166 XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) > ip->i_df.if_ext_max)
167 return EINVAL;
168
169 /* Check target in extent form to max in temp */
170 if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
171 XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) > tip->i_df.if_ext_max)
172 return EINVAL;
173
174 /* Check root block of temp in btree form to max in target */
175 if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE &&
176 XFS_IFORK_BOFF(ip) &&
177 tip->i_df.if_broot_bytes > XFS_IFORK_BOFF(ip))
178 return EINVAL;
179
180 /* Check root block of target in btree form to max in temp */
181 if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE &&
182 XFS_IFORK_BOFF(tip) &&
183 ip->i_df.if_broot_bytes > XFS_IFORK_BOFF(tip))
184 return EINVAL;
185
186 return 0;
187}
188
117int 189int
118xfs_swap_extents( 190xfs_swap_extents(
119 xfs_inode_t *ip, 191 xfs_inode_t *ip, /* target inode */
120 xfs_inode_t *tip, 192 xfs_inode_t *tip, /* tmp inode */
121 xfs_swapext_t *sxp) 193 xfs_swapext_t *sxp)
122{ 194{
123 xfs_mount_t *mp; 195 xfs_mount_t *mp;
@@ -161,13 +233,6 @@ xfs_swap_extents(
161 goto out_unlock; 233 goto out_unlock;
162 } 234 }
163 235
164 /* Should never get a local format */
165 if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL ||
166 tip->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
167 error = XFS_ERROR(EINVAL);
168 goto out_unlock;
169 }
170
171 if (VN_CACHED(VFS_I(tip)) != 0) { 236 if (VN_CACHED(VFS_I(tip)) != 0) {
172 error = xfs_flushinval_pages(tip, 0, -1, 237 error = xfs_flushinval_pages(tip, 0, -1,
173 FI_REMAPF_LOCKED); 238 FI_REMAPF_LOCKED);
@@ -189,13 +254,12 @@ xfs_swap_extents(
189 goto out_unlock; 254 goto out_unlock;
190 } 255 }
191 256
192 /* 257 /* check inode formats now that data is flushed */
193 * If the target has extended attributes, the tmp file 258 error = xfs_swap_extents_check_format(ip, tip);
194 * must also in order to ensure the correct data fork 259 if (error) {
195 * format. 260 xfs_fs_cmn_err(CE_NOTE, mp,
196 */ 261 "%s: inode 0x%llx format is incompatible for exchanging.",
197 if ( XFS_IFORK_Q(ip) != XFS_IFORK_Q(tip) ) { 262 __FILE__, ip->i_ino);
198 error = XFS_ERROR(EINVAL);
199 goto out_unlock; 263 goto out_unlock;
200 } 264 }
201 265
@@ -276,6 +340,16 @@ xfs_swap_extents(
276 *tifp = *tempifp; /* struct copy */ 340 *tifp = *tempifp; /* struct copy */
277 341
278 /* 342 /*
343 * Fix the in-memory data fork values that are dependent on the fork
344 * offset in the inode. We can't assume they remain the same as attr2
345 * has dynamic fork offsets.
346 */
347 ifp->if_ext_max = XFS_IFORK_SIZE(ip, XFS_DATA_FORK) /
348 (uint)sizeof(xfs_bmbt_rec_t);
349 tifp->if_ext_max = XFS_IFORK_SIZE(tip, XFS_DATA_FORK) /
350 (uint)sizeof(xfs_bmbt_rec_t);
351
352 /*
279 * Fix the on-disk inode values 353 * Fix the on-disk inode values
280 */ 354 */
281 tmp = (__uint64_t)ip->i_d.di_nblocks; 355 tmp = (__uint64_t)ip->i_d.di_nblocks;
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index fa402a6bbbcf..155e798f30a1 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -73,7 +73,6 @@ xfs_inode_alloc(
73 ASSERT(atomic_read(&ip->i_pincount) == 0); 73 ASSERT(atomic_read(&ip->i_pincount) == 0);
74 ASSERT(!spin_is_locked(&ip->i_flags_lock)); 74 ASSERT(!spin_is_locked(&ip->i_flags_lock));
75 ASSERT(completion_done(&ip->i_flush)); 75 ASSERT(completion_done(&ip->i_flush));
76 ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
77 76
78 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); 77 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
79 78
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index ce278b3ae7fc..ef77fd88c8e3 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2841,8 +2841,8 @@ xfs_iflush(
2841 mp = ip->i_mount; 2841 mp = ip->i_mount;
2842 2842
2843 /* 2843 /*
2844 * If the inode isn't dirty, then just release the inode 2844 * If the inode isn't dirty, then just release the inode flush lock and
2845 * flush lock and do nothing. 2845 * do nothing.
2846 */ 2846 */
2847 if (xfs_inode_clean(ip)) { 2847 if (xfs_inode_clean(ip)) {
2848 xfs_ifunlock(ip); 2848 xfs_ifunlock(ip);
@@ -2868,6 +2868,19 @@ xfs_iflush(
2868 xfs_iunpin_wait(ip); 2868 xfs_iunpin_wait(ip);
2869 2869
2870 /* 2870 /*
2871 * For stale inodes we cannot rely on the backing buffer remaining
2872 * stale in cache for the remaining life of the stale inode and so
2873 * xfs_itobp() below may give us a buffer that no longer contains
2874 * inodes below. We have to check this after ensuring the inode is
2875 * unpinned so that it is safe to reclaim the stale inode after the
2876 * flush call.
2877 */
2878 if (xfs_iflags_test(ip, XFS_ISTALE)) {
2879 xfs_ifunlock(ip);
2880 return 0;
2881 }
2882
2883 /*
2871 * This may have been unpinned because the filesystem is shutting 2884 * This may have been unpinned because the filesystem is shutting
2872 * down forcibly. If that's the case we must not write this inode 2885 * down forcibly. If that's the case we must not write this inode
2873 * to disk, because the log record didn't make it to disk! 2886 * to disk, because the log record didn't make it to disk!
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 9e15a1185362..6be05f756d59 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -1517,6 +1517,8 @@ xfs_rtfree_range(
1517 */ 1517 */
1518 error = xfs_rtfind_forw(mp, tp, end, mp->m_sb.sb_rextents - 1, 1518 error = xfs_rtfind_forw(mp, tp, end, mp->m_sb.sb_rextents - 1,
1519 &postblock); 1519 &postblock);
1520 if (error)
1521 return error;
1520 /* 1522 /*
1521 * If there are blocks not being freed at the front of the 1523 * If there are blocks not being freed at the front of the
1522 * old extent, add summary data for them to be allocated. 1524 * old extent, add summary data for them to be allocated.
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 6558ffd8d140..6f268756bf36 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -70,7 +70,6 @@ xfs_setattr(
70 uint commit_flags=0; 70 uint commit_flags=0;
71 uid_t uid=0, iuid=0; 71 uid_t uid=0, iuid=0;
72 gid_t gid=0, igid=0; 72 gid_t gid=0, igid=0;
73 int timeflags = 0;
74 struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2; 73 struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2;
75 int need_iolock = 1; 74 int need_iolock = 1;
76 75
@@ -135,16 +134,13 @@ xfs_setattr(
135 if (flags & XFS_ATTR_NOLOCK) 134 if (flags & XFS_ATTR_NOLOCK)
136 need_iolock = 0; 135 need_iolock = 0;
137 if (!(mask & ATTR_SIZE)) { 136 if (!(mask & ATTR_SIZE)) {
138 if ((mask != (ATTR_CTIME|ATTR_ATIME|ATTR_MTIME)) || 137 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
139 (mp->m_flags & XFS_MOUNT_WSYNC)) { 138 commit_flags = 0;
140 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); 139 code = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp),
141 commit_flags = 0; 140 0, 0, 0);
142 if ((code = xfs_trans_reserve(tp, 0, 141 if (code) {
143 XFS_ICHANGE_LOG_RES(mp), 0, 142 lock_flags = 0;
144 0, 0))) { 143 goto error_return;
145 lock_flags = 0;
146 goto error_return;
147 }
148 } 144 }
149 } else { 145 } else {
150 if (DM_EVENT_ENABLED(ip, DM_EVENT_TRUNCATE) && 146 if (DM_EVENT_ENABLED(ip, DM_EVENT_TRUNCATE) &&
@@ -295,15 +291,23 @@ xfs_setattr(
295 * or we are explicitly asked to change it. This handles 291 * or we are explicitly asked to change it. This handles
296 * the semantic difference between truncate() and ftruncate() 292 * the semantic difference between truncate() and ftruncate()
297 * as implemented in the VFS. 293 * as implemented in the VFS.
294 *
295 * The regular truncate() case without ATTR_CTIME and ATTR_MTIME
296 * is a special case where we need to update the times despite
297 * not having these flags set. For all other operations the
298 * VFS set these flags explicitly if it wants a timestamp
299 * update.
298 */ 300 */
299 if (iattr->ia_size != ip->i_size || (mask & ATTR_CTIME)) 301 if (iattr->ia_size != ip->i_size &&
300 timeflags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG; 302 (!(mask & (ATTR_CTIME | ATTR_MTIME)))) {
303 iattr->ia_ctime = iattr->ia_mtime =
304 current_fs_time(inode->i_sb);
305 mask |= ATTR_CTIME | ATTR_MTIME;
306 }
301 307
302 if (iattr->ia_size > ip->i_size) { 308 if (iattr->ia_size > ip->i_size) {
303 ip->i_d.di_size = iattr->ia_size; 309 ip->i_d.di_size = iattr->ia_size;
304 ip->i_size = iattr->ia_size; 310 ip->i_size = iattr->ia_size;
305 if (!(flags & XFS_ATTR_DMI))
306 xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
307 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 311 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
308 } else if (iattr->ia_size <= ip->i_size || 312 } else if (iattr->ia_size <= ip->i_size ||
309 (iattr->ia_size == 0 && ip->i_d.di_nextents)) { 313 (iattr->ia_size == 0 && ip->i_d.di_nextents)) {
@@ -374,9 +378,6 @@ xfs_setattr(
374 ip->i_d.di_gid = gid; 378 ip->i_d.di_gid = gid;
375 inode->i_gid = gid; 379 inode->i_gid = gid;
376 } 380 }
377
378 xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
379 timeflags |= XFS_ICHGTIME_CHG;
380 } 381 }
381 382
382 /* 383 /*
@@ -393,51 +394,37 @@ xfs_setattr(
393 394
394 inode->i_mode &= S_IFMT; 395 inode->i_mode &= S_IFMT;
395 inode->i_mode |= mode & ~S_IFMT; 396 inode->i_mode |= mode & ~S_IFMT;
396
397 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
398 timeflags |= XFS_ICHGTIME_CHG;
399 } 397 }
400 398
401 /* 399 /*
402 * Change file access or modified times. 400 * Change file access or modified times.
403 */ 401 */
404 if (mask & (ATTR_ATIME|ATTR_MTIME)) { 402 if (mask & ATTR_ATIME) {
405 if (mask & ATTR_ATIME) { 403 inode->i_atime = iattr->ia_atime;
406 inode->i_atime = iattr->ia_atime; 404 ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec;
407 ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec; 405 ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec;
408 ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec; 406 ip->i_update_core = 1;
409 ip->i_update_core = 1;
410 }
411 if (mask & ATTR_MTIME) {
412 inode->i_mtime = iattr->ia_mtime;
413 ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
414 ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
415 timeflags &= ~XFS_ICHGTIME_MOD;
416 timeflags |= XFS_ICHGTIME_CHG;
417 }
418 if (tp && (mask & (ATTR_MTIME_SET|ATTR_ATIME_SET)))
419 xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
420 } 407 }
421 408 if (mask & ATTR_CTIME) {
422 /*
423 * Change file inode change time only if ATTR_CTIME set
424 * AND we have been called by a DMI function.
425 */
426
427 if ((flags & XFS_ATTR_DMI) && (mask & ATTR_CTIME)) {
428 inode->i_ctime = iattr->ia_ctime; 409 inode->i_ctime = iattr->ia_ctime;
429 ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; 410 ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
430 ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; 411 ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
431 ip->i_update_core = 1; 412 ip->i_update_core = 1;
432 timeflags &= ~XFS_ICHGTIME_CHG; 413 }
414 if (mask & ATTR_MTIME) {
415 inode->i_mtime = iattr->ia_mtime;
416 ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
417 ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
418 ip->i_update_core = 1;
433 } 419 }
434 420
435 /* 421 /*
436 * Send out timestamp changes that need to be set to the 422 * And finally, log the inode core if any attribute in it
437 * current time. Not done when called by a DMI function. 423 * has been changed.
438 */ 424 */
439 if (timeflags && !(flags & XFS_ATTR_DMI)) 425 if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE|
440 xfs_ichgtime(ip, timeflags); 426 ATTR_ATIME|ATTR_CTIME|ATTR_MTIME))
427 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
441 428
442 XFS_STATS_INC(xs_ig_attrchg); 429 XFS_STATS_INC(xs_ig_attrchg);
443 430
@@ -452,12 +439,10 @@ xfs_setattr(
452 * mix so this probably isn't worth the trouble to optimize. 439 * mix so this probably isn't worth the trouble to optimize.
453 */ 440 */
454 code = 0; 441 code = 0;
455 if (tp) { 442 if (mp->m_flags & XFS_MOUNT_WSYNC)
456 if (mp->m_flags & XFS_MOUNT_WSYNC) 443 xfs_trans_set_sync(tp);
457 xfs_trans_set_sync(tp);
458 444
459 code = xfs_trans_commit(tp, commit_flags); 445 code = xfs_trans_commit(tp, commit_flags);
460 }
461 446
462 xfs_iunlock(ip, lock_flags); 447 xfs_iunlock(ip, lock_flags);
463 448