aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/server.c5
-rw-r--r--fs/binfmt_elf_fdpic.c26
-rw-r--r--fs/binfmt_flat.c27
-rw-r--r--fs/block_dev.c72
-rw-r--r--fs/btrfs/acl.c8
-rw-r--r--fs/btrfs/disk-io.c11
-rw-r--r--fs/btrfs/extent-tree.c3
-rw-r--r--fs/btrfs/file.c12
-rw-r--r--fs/btrfs/inode.c4
-rw-r--r--fs/btrfs/ioctl.c4
-rw-r--r--fs/btrfs/relocation.c7
-rw-r--r--fs/btrfs/root-tree.c3
-rw-r--r--fs/btrfs/super.c6
-rw-r--r--fs/ceph/caps.c93
-rw-r--r--fs/ceph/inode.c2
-rw-r--r--fs/ceph/mds_client.c28
-rw-r--r--fs/ceph/mds_client.h6
-rw-r--r--fs/ceph/mon_client.c2
-rw-r--r--fs/ceph/super.c4
-rw-r--r--fs/cifs/cifsfs.c16
-rw-r--r--fs/cifs/cifsproto.h1
-rw-r--r--fs/cifs/dir.c76
-rw-r--r--fs/cifs/file.c101
-rw-r--r--fs/cifs/inode.c4
-rw-r--r--fs/cifs/sess.c10
-rw-r--r--fs/compat.c2
-rw-r--r--fs/configfs/inode.c9
-rw-r--r--fs/dcache.c2
-rw-r--r--fs/ext2/acl.c1
-rw-r--r--fs/ext2/inode.c2
-rw-r--r--fs/ext3/acl.c1
-rw-r--r--fs/ext4/inode.c40
-rw-r--r--fs/ext4/move_extent.c3
-rw-r--r--fs/fcntl.c13
-rw-r--r--fs/fs-writeback.c304
-rw-r--r--fs/fscache/page.c36
-rw-r--r--fs/jffs2/acl.c3
-rw-r--r--fs/jffs2/dir.c127
-rw-r--r--fs/jffs2/fs.c7
-rw-r--r--fs/libfs.c3
-rw-r--r--fs/minix/dir.c4
-rw-r--r--fs/nfs/client.c122
-rw-r--r--fs/nfs/getroot.c2
-rw-r--r--fs/nfs/nfs4xdr.c4
-rw-r--r--fs/nfs/super.c22
-rw-r--r--fs/nfsd/nfs4state.c2
-rw-r--r--fs/nfsd/vfs.c3
-rw-r--r--fs/nilfs2/btree.h2
-rw-r--r--fs/nilfs2/segbuf.h2
-rw-r--r--fs/nilfs2/segment.h2
-rw-r--r--fs/nilfs2/super.c8
-rw-r--r--fs/ocfs2/reservations.c1
-rw-r--r--fs/pipe.c93
-rw-r--r--fs/proc/proc_devtree.c3
-rw-r--r--fs/proc/task_nommu.c20
-rw-r--r--fs/splice.c2
-rw-r--r--fs/super.c6
-rw-r--r--fs/sync.c2
-rw-r--r--fs/sysfs/inode.c6
-rw-r--r--fs/sysv/ialloc.c6
-rw-r--r--fs/ubifs/budget.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c23
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c11
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c7
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.c15
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c16
-rw-r--r--fs/xfs/linux-2.6/xfs_quotaops.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c9
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.h356
-rw-r--r--fs/xfs/quota/xfs_qm.c22
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c27
-rw-r--r--fs/xfs/xfs_ag.h1
-rw-r--r--fs/xfs/xfs_dfrag.c5
-rw-r--r--fs/xfs/xfs_ialloc.c142
-rw-r--r--fs/xfs/xfs_iget.c39
-rw-r--r--fs/xfs/xfs_inode.c149
-rw-r--r--fs/xfs/xfs_inode.h6
-rw-r--r--fs/xfs/xfs_itable.c285
-rw-r--r--fs/xfs/xfs_itable.h17
-rw-r--r--fs/xfs/xfs_log_recover.c13
-rw-r--r--fs/xfs/xfs_mount.c70
-rw-r--r--fs/xfs/xfs_rtalloc.c8
-rw-r--r--fs/xfs/xfs_rtalloc.h11
-rw-r--r--fs/xfs/xfs_trans.c446
-rw-r--r--fs/xfs/xfs_trans.h411
-rw-r--r--fs/xfs/xfs_trans_inode.c2
-rw-r--r--fs/xfs/xfs_vnodeops.c4
88 files changed, 1642 insertions, 1853 deletions
diff --git a/fs/afs/server.c b/fs/afs/server.c
index f49099516675..9fdc7fe3a7bc 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -91,9 +91,10 @@ static struct afs_server *afs_alloc_server(struct afs_cell *cell,
91 91
92 memcpy(&server->addr, addr, sizeof(struct in_addr)); 92 memcpy(&server->addr, addr, sizeof(struct in_addr));
93 server->addr.s_addr = addr->s_addr; 93 server->addr.s_addr = addr->s_addr;
94 _leave(" = %p{%d}", server, atomic_read(&server->usage));
95 } else {
96 _leave(" = NULL [nomem]");
94 } 97 }
95
96 _leave(" = %p{%d}", server, atomic_read(&server->usage));
97 return server; 98 return server;
98} 99}
99 100
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 2c5f9a0e5d72..63039ed9576f 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -990,10 +990,9 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(
990 990
991 /* clear any space allocated but not loaded */ 991 /* clear any space allocated but not loaded */
992 if (phdr->p_filesz < phdr->p_memsz) { 992 if (phdr->p_filesz < phdr->p_memsz) {
993 ret = clear_user((void *) (seg->addr + phdr->p_filesz), 993 if (clear_user((void *) (seg->addr + phdr->p_filesz),
994 phdr->p_memsz - phdr->p_filesz); 994 phdr->p_memsz - phdr->p_filesz))
995 if (ret) 995 return -EFAULT;
996 return ret;
997 } 996 }
998 997
999 if (mm) { 998 if (mm) {
@@ -1027,7 +1026,7 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
1027 struct elf32_fdpic_loadseg *seg; 1026 struct elf32_fdpic_loadseg *seg;
1028 struct elf32_phdr *phdr; 1027 struct elf32_phdr *phdr;
1029 unsigned long load_addr, delta_vaddr; 1028 unsigned long load_addr, delta_vaddr;
1030 int loop, dvset, ret; 1029 int loop, dvset;
1031 1030
1032 load_addr = params->load_addr; 1031 load_addr = params->load_addr;
1033 delta_vaddr = 0; 1032 delta_vaddr = 0;
@@ -1127,9 +1126,8 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
1127 * PT_LOAD */ 1126 * PT_LOAD */
1128 if (prot & PROT_WRITE && disp > 0) { 1127 if (prot & PROT_WRITE && disp > 0) {
1129 kdebug("clear[%d] ad=%lx sz=%lx", loop, maddr, disp); 1128 kdebug("clear[%d] ad=%lx sz=%lx", loop, maddr, disp);
1130 ret = clear_user((void __user *) maddr, disp); 1129 if (clear_user((void __user *) maddr, disp))
1131 if (ret) 1130 return -EFAULT;
1132 return ret;
1133 maddr += disp; 1131 maddr += disp;
1134 } 1132 }
1135 1133
@@ -1164,19 +1162,17 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
1164 if (prot & PROT_WRITE && excess1 > 0) { 1162 if (prot & PROT_WRITE && excess1 > 0) {
1165 kdebug("clear[%d] ad=%lx sz=%lx", 1163 kdebug("clear[%d] ad=%lx sz=%lx",
1166 loop, maddr + phdr->p_filesz, excess1); 1164 loop, maddr + phdr->p_filesz, excess1);
1167 ret = clear_user((void __user *) maddr + phdr->p_filesz, 1165 if (clear_user((void __user *) maddr + phdr->p_filesz,
1168 excess1); 1166 excess1))
1169 if (ret) 1167 return -EFAULT;
1170 return ret;
1171 } 1168 }
1172 1169
1173#else 1170#else
1174 if (excess > 0) { 1171 if (excess > 0) {
1175 kdebug("clear[%d] ad=%lx sz=%lx", 1172 kdebug("clear[%d] ad=%lx sz=%lx",
1176 loop, maddr + phdr->p_filesz, excess); 1173 loop, maddr + phdr->p_filesz, excess);
1177 ret = clear_user((void *) maddr + phdr->p_filesz, excess); 1174 if (clear_user((void *) maddr + phdr->p_filesz, excess))
1178 if (ret) 1175 return -EFAULT;
1179 return ret;
1180 } 1176 }
1181#endif 1177#endif
1182 1178
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 49566c1687d8..811384bec8de 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -56,16 +56,19 @@
56#endif 56#endif
57 57
58/* 58/*
59 * User data (stack, data section and bss) needs to be aligned 59 * User data (data section and bss) needs to be aligned.
60 * for the same reasons as SLAB memory is, and to the same amount. 60 * We pick 0x20 here because it is the max value elf2flt has always
61 * Avoid duplicating architecture specific code by using the same 61 * used in producing FLAT files, and because it seems to be large
62 * macro as with SLAB allocation: 62 * enough to make all the gcc alignment related tests happy.
63 */ 63 */
64#ifdef ARCH_SLAB_MINALIGN 64#define FLAT_DATA_ALIGN (0x20)
65#define FLAT_DATA_ALIGN (ARCH_SLAB_MINALIGN) 65
66#else 66/*
67#define FLAT_DATA_ALIGN (sizeof(void *)) 67 * User data (stack) also needs to be aligned.
68#endif 68 * Here we can be a bit looser than the data sections since this
69 * needs to only meet arch ABI requirements.
70 */
71#define FLAT_STACK_ALIGN max_t(unsigned long, sizeof(void *), ARCH_SLAB_MINALIGN)
69 72
70#define RELOC_FAILED 0xff00ff01 /* Relocation incorrect somewhere */ 73#define RELOC_FAILED 0xff00ff01 /* Relocation incorrect somewhere */
71#define UNLOADED_LIB 0x7ff000ff /* Placeholder for unused library */ 74#define UNLOADED_LIB 0x7ff000ff /* Placeholder for unused library */
@@ -129,7 +132,7 @@ static unsigned long create_flat_tables(
129 132
130 sp = (unsigned long *)p; 133 sp = (unsigned long *)p;
131 sp -= (envc + argc + 2) + 1 + (flat_argvp_envp_on_stack() ? 2 : 0); 134 sp -= (envc + argc + 2) + 1 + (flat_argvp_envp_on_stack() ? 2 : 0);
132 sp = (unsigned long *) ((unsigned long)sp & -FLAT_DATA_ALIGN); 135 sp = (unsigned long *) ((unsigned long)sp & -FLAT_STACK_ALIGN);
133 argv = sp + 1 + (flat_argvp_envp_on_stack() ? 2 : 0); 136 argv = sp + 1 + (flat_argvp_envp_on_stack() ? 2 : 0);
134 envp = argv + (argc + 1); 137 envp = argv + (argc + 1);
135 138
@@ -589,7 +592,7 @@ static int load_flat_file(struct linux_binprm * bprm,
589 if (IS_ERR_VALUE(result)) { 592 if (IS_ERR_VALUE(result)) {
590 printk("Unable to read data+bss, errno %d\n", (int)-result); 593 printk("Unable to read data+bss, errno %d\n", (int)-result);
591 do_munmap(current->mm, textpos, text_len); 594 do_munmap(current->mm, textpos, text_len);
592 do_munmap(current->mm, realdatastart, data_len + extra); 595 do_munmap(current->mm, realdatastart, len);
593 ret = result; 596 ret = result;
594 goto err; 597 goto err;
595 } 598 }
@@ -876,7 +879,7 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs)
876 stack_len = TOP_OF_ARGS - bprm->p; /* the strings */ 879 stack_len = TOP_OF_ARGS - bprm->p; /* the strings */
877 stack_len += (bprm->argc + 1) * sizeof(char *); /* the argv array */ 880 stack_len += (bprm->argc + 1) * sizeof(char *); /* the argv array */
878 stack_len += (bprm->envc + 1) * sizeof(char *); /* the envp array */ 881 stack_len += (bprm->envc + 1) * sizeof(char *); /* the envp array */
879 stack_len += FLAT_DATA_ALIGN - 1; /* reserve for upcoming alignment */ 882 stack_len += FLAT_STACK_ALIGN - 1; /* reserve for upcoming alignment */
880 883
881 res = load_flat_file(bprm, &libinfo, 0, &stack_len); 884 res = load_flat_file(bprm, &libinfo, 0, &stack_len);
882 if (IS_ERR_VALUE(res)) 885 if (IS_ERR_VALUE(res))
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 7346c96308a5..99d6af811747 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -706,8 +706,13 @@ retry:
706 * @bdev is about to be opened exclusively. Check @bdev can be opened 706 * @bdev is about to be opened exclusively. Check @bdev can be opened
707 * exclusively and mark that an exclusive open is in progress. Each 707 * exclusively and mark that an exclusive open is in progress. Each
708 * successful call to this function must be matched with a call to 708 * successful call to this function must be matched with a call to
709 * either bd_claim() or bd_abort_claiming(). If this function 709 * either bd_finish_claiming() or bd_abort_claiming() (which do not
710 * succeeds, the matching bd_claim() is guaranteed to succeed. 710 * fail).
711 *
712 * This function is used to gain exclusive access to the block device
713 * without actually causing other exclusive open attempts to fail. It
714 * should be used when the open sequence itself requires exclusive
715 * access but may subsequently fail.
711 * 716 *
712 * CONTEXT: 717 * CONTEXT:
713 * Might sleep. 718 * Might sleep.
@@ -734,6 +739,7 @@ static struct block_device *bd_start_claiming(struct block_device *bdev,
734 return ERR_PTR(-ENXIO); 739 return ERR_PTR(-ENXIO);
735 740
736 whole = bdget_disk(disk, 0); 741 whole = bdget_disk(disk, 0);
742 module_put(disk->fops->owner);
737 put_disk(disk); 743 put_disk(disk);
738 if (!whole) 744 if (!whole)
739 return ERR_PTR(-ENOMEM); 745 return ERR_PTR(-ENOMEM);
@@ -782,15 +788,46 @@ static void bd_abort_claiming(struct block_device *whole, void *holder)
782 __bd_abort_claiming(whole, holder); /* releases bdev_lock */ 788 __bd_abort_claiming(whole, holder); /* releases bdev_lock */
783} 789}
784 790
791/* increment holders when we have a legitimate claim. requires bdev_lock */
792static void __bd_claim(struct block_device *bdev, struct block_device *whole,
793 void *holder)
794{
795 /* note that for a whole device bd_holders
796 * will be incremented twice, and bd_holder will
797 * be set to bd_claim before being set to holder
798 */
799 whole->bd_holders++;
800 whole->bd_holder = bd_claim;
801 bdev->bd_holders++;
802 bdev->bd_holder = holder;
803}
804
805/**
806 * bd_finish_claiming - finish claiming a block device
807 * @bdev: block device of interest (passed to bd_start_claiming())
808 * @whole: whole block device returned by bd_start_claiming()
809 * @holder: holder trying to claim @bdev
810 *
811 * Finish a claiming block started by bd_start_claiming().
812 *
813 * CONTEXT:
814 * Grabs and releases bdev_lock.
815 */
816static void bd_finish_claiming(struct block_device *bdev,
817 struct block_device *whole, void *holder)
818{
819 spin_lock(&bdev_lock);
820 BUG_ON(!bd_may_claim(bdev, whole, holder));
821 __bd_claim(bdev, whole, holder);
822 __bd_abort_claiming(whole, holder); /* not actually an abort */
823}
824
785/** 825/**
786 * bd_claim - claim a block device 826 * bd_claim - claim a block device
787 * @bdev: block device to claim 827 * @bdev: block device to claim
788 * @holder: holder trying to claim @bdev 828 * @holder: holder trying to claim @bdev
789 * 829 *
790 * Try to claim @bdev which must have been opened successfully. This 830 * Try to claim @bdev which must have been opened successfully.
791 * function may be called with or without preceding
792 * blk_start_claiming(). In the former case, this function is always
793 * successful and terminates the claiming block.
794 * 831 *
795 * CONTEXT: 832 * CONTEXT:
796 * Might sleep. 833 * Might sleep.
@@ -806,23 +843,10 @@ int bd_claim(struct block_device *bdev, void *holder)
806 might_sleep(); 843 might_sleep();
807 844
808 spin_lock(&bdev_lock); 845 spin_lock(&bdev_lock);
809
810 res = bd_prepare_to_claim(bdev, whole, holder); 846 res = bd_prepare_to_claim(bdev, whole, holder);
811 if (res == 0) { 847 if (res == 0)
812 /* note that for a whole device bd_holders 848 __bd_claim(bdev, whole, holder);
813 * will be incremented twice, and bd_holder will 849 spin_unlock(&bdev_lock);
814 * be set to bd_claim before being set to holder
815 */
816 whole->bd_holders++;
817 whole->bd_holder = bd_claim;
818 bdev->bd_holders++;
819 bdev->bd_holder = holder;
820 }
821
822 if (whole->bd_claiming)
823 __bd_abort_claiming(whole, holder); /* releases bdev_lock */
824 else
825 spin_unlock(&bdev_lock);
826 850
827 return res; 851 return res;
828} 852}
@@ -1476,7 +1500,7 @@ static int blkdev_open(struct inode * inode, struct file * filp)
1476 1500
1477 if (whole) { 1501 if (whole) {
1478 if (res == 0) 1502 if (res == 0)
1479 BUG_ON(bd_claim(bdev, filp) != 0); 1503 bd_finish_claiming(bdev, whole, filp);
1480 else 1504 else
1481 bd_abort_claiming(whole, filp); 1505 bd_abort_claiming(whole, filp);
1482 } 1506 }
@@ -1712,7 +1736,7 @@ struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *h
1712 if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) 1736 if ((mode & FMODE_WRITE) && bdev_read_only(bdev))
1713 goto out_blkdev_put; 1737 goto out_blkdev_put;
1714 1738
1715 BUG_ON(bd_claim(bdev, holder) != 0); 1739 bd_finish_claiming(bdev, whole, holder);
1716 return bdev; 1740 return bdev;
1717 1741
1718out_blkdev_put: 1742out_blkdev_put:
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 8d432cd9d580..2222d161c7b6 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -60,6 +60,8 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
60 size = __btrfs_getxattr(inode, name, value, size); 60 size = __btrfs_getxattr(inode, name, value, size);
61 if (size > 0) { 61 if (size > 0) {
62 acl = posix_acl_from_xattr(value, size); 62 acl = posix_acl_from_xattr(value, size);
63 if (IS_ERR(acl))
64 return acl;
63 set_cached_acl(inode, type, acl); 65 set_cached_acl(inode, type, acl);
64 } 66 }
65 kfree(value); 67 kfree(value);
@@ -160,6 +162,12 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name,
160 int ret; 162 int ret;
161 struct posix_acl *acl = NULL; 163 struct posix_acl *acl = NULL;
162 164
165 if (!is_owner_or_cap(dentry->d_inode))
166 return -EPERM;
167
168 if (!IS_POSIXACL(dentry->d_inode))
169 return -EOPNOTSUPP;
170
163 if (value) { 171 if (value) {
164 acl = posix_acl_from_xattr(value, size); 172 acl = posix_acl_from_xattr(value, size);
165 if (acl == NULL) { 173 if (acl == NULL) {
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index f3b287c22caf..34f7c375567e 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1941,8 +1941,11 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1941 btrfs_level_size(tree_root, 1941 btrfs_level_size(tree_root,
1942 btrfs_super_log_root_level(disk_super)); 1942 btrfs_super_log_root_level(disk_super));
1943 1943
1944 log_tree_root = kzalloc(sizeof(struct btrfs_root), 1944 log_tree_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS);
1945 GFP_NOFS); 1945 if (!log_tree_root) {
1946 err = -ENOMEM;
1947 goto fail_trans_kthread;
1948 }
1946 1949
1947 __setup_root(nodesize, leafsize, sectorsize, stripesize, 1950 __setup_root(nodesize, leafsize, sectorsize, stripesize,
1948 log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID); 1951 log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID);
@@ -1982,6 +1985,10 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1982 fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location); 1985 fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location);
1983 if (!fs_info->fs_root) 1986 if (!fs_info->fs_root)
1984 goto fail_trans_kthread; 1987 goto fail_trans_kthread;
1988 if (IS_ERR(fs_info->fs_root)) {
1989 err = PTR_ERR(fs_info->fs_root);
1990 goto fail_trans_kthread;
1991 }
1985 1992
1986 if (!(sb->s_flags & MS_RDONLY)) { 1993 if (!(sb->s_flags & MS_RDONLY)) {
1987 down_read(&fs_info->cleanup_work_sem); 1994 down_read(&fs_info->cleanup_work_sem);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index b9080d71991a..32d094002a57 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4360,7 +4360,8 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
4360 4360
4361 block_rsv = get_block_rsv(trans, root); 4361 block_rsv = get_block_rsv(trans, root);
4362 cache = btrfs_lookup_block_group(root->fs_info, buf->start); 4362 cache = btrfs_lookup_block_group(root->fs_info, buf->start);
4363 BUG_ON(block_rsv->space_info != cache->space_info); 4363 if (block_rsv->space_info != cache->space_info)
4364 goto out;
4364 4365
4365 if (btrfs_header_generation(buf) == trans->transid) { 4366 if (btrfs_header_generation(buf) == trans->transid) {
4366 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { 4367 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 787b50a16a14..e354c33df082 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1140,7 +1140,7 @@ int btrfs_sync_file(struct file *file, int datasync)
1140 /* 1140 /*
1141 * ok we haven't committed the transaction yet, lets do a commit 1141 * ok we haven't committed the transaction yet, lets do a commit
1142 */ 1142 */
1143 if (file && file->private_data) 1143 if (file->private_data)
1144 btrfs_ioctl_trans_end(file); 1144 btrfs_ioctl_trans_end(file);
1145 1145
1146 trans = btrfs_start_transaction(root, 0); 1146 trans = btrfs_start_transaction(root, 0);
@@ -1190,14 +1190,22 @@ static const struct vm_operations_struct btrfs_file_vm_ops = {
1190 1190
1191static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) 1191static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma)
1192{ 1192{
1193 vma->vm_ops = &btrfs_file_vm_ops; 1193 struct address_space *mapping = filp->f_mapping;
1194
1195 if (!mapping->a_ops->readpage)
1196 return -ENOEXEC;
1197
1194 file_accessed(filp); 1198 file_accessed(filp);
1199 vma->vm_ops = &btrfs_file_vm_ops;
1200 vma->vm_flags |= VM_CAN_NONLINEAR;
1201
1195 return 0; 1202 return 0;
1196} 1203}
1197 1204
1198const struct file_operations btrfs_file_operations = { 1205const struct file_operations btrfs_file_operations = {
1199 .llseek = generic_file_llseek, 1206 .llseek = generic_file_llseek,
1200 .read = do_sync_read, 1207 .read = do_sync_read,
1208 .write = do_sync_write,
1201 .aio_read = generic_file_aio_read, 1209 .aio_read = generic_file_aio_read,
1202 .splice_read = generic_file_splice_read, 1210 .splice_read = generic_file_splice_read,
1203 .aio_write = btrfs_file_aio_write, 1211 .aio_write = btrfs_file_aio_write,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index fa6ccc1bfe2a..1bff92ad4744 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2673,7 +2673,7 @@ static int check_path_shared(struct btrfs_root *root,
2673 struct extent_buffer *eb; 2673 struct extent_buffer *eb;
2674 int level; 2674 int level;
2675 int ret; 2675 int ret;
2676 u64 refs; 2676 u64 refs = 1;
2677 2677
2678 for (level = 0; level < BTRFS_MAX_LEVEL; level++) { 2678 for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
2679 if (!path->nodes[level]) 2679 if (!path->nodes[level])
@@ -6884,7 +6884,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
6884 if (em->block_start == EXTENT_MAP_HOLE || 6884 if (em->block_start == EXTENT_MAP_HOLE ||
6885 (cur_offset >= inode->i_size && 6885 (cur_offset >= inode->i_size &&
6886 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { 6886 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
6887 ret = btrfs_prealloc_file_range(inode, 0, cur_offset, 6887 ret = btrfs_prealloc_file_range(inode, mode, cur_offset,
6888 last_byte - cur_offset, 6888 last_byte - cur_offset,
6889 1 << inode->i_blkbits, 6889 1 << inode->i_blkbits,
6890 offset + len, 6890 offset + len,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 4cdb98cf26de..4dbaf89b1337 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1280,7 +1280,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
1280 trans = btrfs_start_transaction(root, 0); 1280 trans = btrfs_start_transaction(root, 0);
1281 if (IS_ERR(trans)) { 1281 if (IS_ERR(trans)) {
1282 err = PTR_ERR(trans); 1282 err = PTR_ERR(trans);
1283 goto out; 1283 goto out_up_write;
1284 } 1284 }
1285 trans->block_rsv = &root->fs_info->global_block_rsv; 1285 trans->block_rsv = &root->fs_info->global_block_rsv;
1286 1286
@@ -1845,7 +1845,7 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
1845 dir_id = btrfs_super_root_dir(&root->fs_info->super_copy); 1845 dir_id = btrfs_super_root_dir(&root->fs_info->super_copy);
1846 di = btrfs_lookup_dir_item(trans, root->fs_info->tree_root, path, 1846 di = btrfs_lookup_dir_item(trans, root->fs_info->tree_root, path,
1847 dir_id, "default", 7, 1); 1847 dir_id, "default", 7, 1);
1848 if (!di) { 1848 if (IS_ERR_OR_NULL(di)) {
1849 btrfs_free_path(path); 1849 btrfs_free_path(path);
1850 btrfs_end_transaction(trans, root); 1850 btrfs_end_transaction(trans, root);
1851 printk(KERN_ERR "Umm, you don't have the default dir item, " 1851 printk(KERN_ERR "Umm, you don't have the default dir item, "
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 05d41e569236..b37d723b9d4a 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -784,16 +784,17 @@ again:
784 struct btrfs_extent_ref_v0 *ref0; 784 struct btrfs_extent_ref_v0 *ref0;
785 ref0 = btrfs_item_ptr(eb, path1->slots[0], 785 ref0 = btrfs_item_ptr(eb, path1->slots[0],
786 struct btrfs_extent_ref_v0); 786 struct btrfs_extent_ref_v0);
787 root = find_tree_root(rc, eb, ref0);
788 if (!root->ref_cows)
789 cur->cowonly = 1;
790 if (key.objectid == key.offset) { 787 if (key.objectid == key.offset) {
788 root = find_tree_root(rc, eb, ref0);
791 if (root && !should_ignore_root(root)) 789 if (root && !should_ignore_root(root))
792 cur->root = root; 790 cur->root = root;
793 else 791 else
794 list_add(&cur->list, &useless); 792 list_add(&cur->list, &useless);
795 break; 793 break;
796 } 794 }
795 if (is_cowonly_root(btrfs_ref_root_v0(eb,
796 ref0)))
797 cur->cowonly = 1;
797 } 798 }
798#else 799#else
799 BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY); 800 BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index b91ccd972644..2d958be761c8 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -330,7 +330,6 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
330{ 330{
331 struct btrfs_path *path; 331 struct btrfs_path *path;
332 int ret; 332 int ret;
333 u32 refs;
334 struct btrfs_root_item *ri; 333 struct btrfs_root_item *ri;
335 struct extent_buffer *leaf; 334 struct extent_buffer *leaf;
336 335
@@ -344,8 +343,6 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
344 leaf = path->nodes[0]; 343 leaf = path->nodes[0];
345 ri = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_item); 344 ri = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_item);
346 345
347 refs = btrfs_disk_root_refs(leaf, ri);
348 BUG_ON(refs != 0);
349 ret = btrfs_del_item(trans, root, path); 346 ret = btrfs_del_item(trans, root, path);
350out: 347out:
351 btrfs_free_path(path); 348 btrfs_free_path(path);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index d34b2dfc9628..f2393b390318 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -360,6 +360,8 @@ static struct dentry *get_default_root(struct super_block *sb,
360 */ 360 */
361 dir_id = btrfs_super_root_dir(&root->fs_info->super_copy); 361 dir_id = btrfs_super_root_dir(&root->fs_info->super_copy);
362 di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0); 362 di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0);
363 if (IS_ERR(di))
364 return ERR_CAST(di);
363 if (!di) { 365 if (!di) {
364 /* 366 /*
365 * Ok the default dir item isn't there. This is weird since 367 * Ok the default dir item isn't there. This is weird since
@@ -390,8 +392,8 @@ setup_root:
390 location.offset = 0; 392 location.offset = 0;
391 393
392 inode = btrfs_iget(sb, &location, new_root, &new); 394 inode = btrfs_iget(sb, &location, new_root, &new);
393 if (!inode) 395 if (IS_ERR(inode))
394 return ERR_PTR(-ENOMEM); 396 return ERR_CAST(inode);
395 397
396 /* 398 /*
397 * If we're just mounting the root most subvol put the inode and return 399 * If we're just mounting the root most subvol put the inode and return
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index ae3e3a306445..619b61655ee5 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -981,6 +981,46 @@ static int send_cap_msg(struct ceph_mds_session *session,
981 return 0; 981 return 0;
982} 982}
983 983
984static void __queue_cap_release(struct ceph_mds_session *session,
985 u64 ino, u64 cap_id, u32 migrate_seq,
986 u32 issue_seq)
987{
988 struct ceph_msg *msg;
989 struct ceph_mds_cap_release *head;
990 struct ceph_mds_cap_item *item;
991
992 spin_lock(&session->s_cap_lock);
993 BUG_ON(!session->s_num_cap_releases);
994 msg = list_first_entry(&session->s_cap_releases,
995 struct ceph_msg, list_head);
996
997 dout(" adding %llx release to mds%d msg %p (%d left)\n",
998 ino, session->s_mds, msg, session->s_num_cap_releases);
999
1000 BUG_ON(msg->front.iov_len + sizeof(*item) > PAGE_CACHE_SIZE);
1001 head = msg->front.iov_base;
1002 head->num = cpu_to_le32(le32_to_cpu(head->num) + 1);
1003 item = msg->front.iov_base + msg->front.iov_len;
1004 item->ino = cpu_to_le64(ino);
1005 item->cap_id = cpu_to_le64(cap_id);
1006 item->migrate_seq = cpu_to_le32(migrate_seq);
1007 item->seq = cpu_to_le32(issue_seq);
1008
1009 session->s_num_cap_releases--;
1010
1011 msg->front.iov_len += sizeof(*item);
1012 if (le32_to_cpu(head->num) == CEPH_CAPS_PER_RELEASE) {
1013 dout(" release msg %p full\n", msg);
1014 list_move_tail(&msg->list_head, &session->s_cap_releases_done);
1015 } else {
1016 dout(" release msg %p at %d/%d (%d)\n", msg,
1017 (int)le32_to_cpu(head->num),
1018 (int)CEPH_CAPS_PER_RELEASE,
1019 (int)msg->front.iov_len);
1020 }
1021 spin_unlock(&session->s_cap_lock);
1022}
1023
984/* 1024/*
985 * Queue cap releases when an inode is dropped from our cache. Since 1025 * Queue cap releases when an inode is dropped from our cache. Since
986 * inode is about to be destroyed, there is no need for i_lock. 1026 * inode is about to be destroyed, there is no need for i_lock.
@@ -994,41 +1034,9 @@ void ceph_queue_caps_release(struct inode *inode)
994 while (p) { 1034 while (p) {
995 struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node); 1035 struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node);
996 struct ceph_mds_session *session = cap->session; 1036 struct ceph_mds_session *session = cap->session;
997 struct ceph_msg *msg;
998 struct ceph_mds_cap_release *head;
999 struct ceph_mds_cap_item *item;
1000 1037
1001 spin_lock(&session->s_cap_lock); 1038 __queue_cap_release(session, ceph_ino(inode), cap->cap_id,
1002 BUG_ON(!session->s_num_cap_releases); 1039 cap->mseq, cap->issue_seq);
1003 msg = list_first_entry(&session->s_cap_releases,
1004 struct ceph_msg, list_head);
1005
1006 dout(" adding %p release to mds%d msg %p (%d left)\n",
1007 inode, session->s_mds, msg, session->s_num_cap_releases);
1008
1009 BUG_ON(msg->front.iov_len + sizeof(*item) > PAGE_CACHE_SIZE);
1010 head = msg->front.iov_base;
1011 head->num = cpu_to_le32(le32_to_cpu(head->num) + 1);
1012 item = msg->front.iov_base + msg->front.iov_len;
1013 item->ino = cpu_to_le64(ceph_ino(inode));
1014 item->cap_id = cpu_to_le64(cap->cap_id);
1015 item->migrate_seq = cpu_to_le32(cap->mseq);
1016 item->seq = cpu_to_le32(cap->issue_seq);
1017
1018 session->s_num_cap_releases--;
1019
1020 msg->front.iov_len += sizeof(*item);
1021 if (le32_to_cpu(head->num) == CEPH_CAPS_PER_RELEASE) {
1022 dout(" release msg %p full\n", msg);
1023 list_move_tail(&msg->list_head,
1024 &session->s_cap_releases_done);
1025 } else {
1026 dout(" release msg %p at %d/%d (%d)\n", msg,
1027 (int)le32_to_cpu(head->num),
1028 (int)CEPH_CAPS_PER_RELEASE,
1029 (int)msg->front.iov_len);
1030 }
1031 spin_unlock(&session->s_cap_lock);
1032 p = rb_next(p); 1040 p = rb_next(p);
1033 __ceph_remove_cap(cap); 1041 __ceph_remove_cap(cap);
1034 } 1042 }
@@ -2655,7 +2663,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2655 struct ceph_mds_caps *h; 2663 struct ceph_mds_caps *h;
2656 int mds = session->s_mds; 2664 int mds = session->s_mds;
2657 int op; 2665 int op;
2658 u32 seq; 2666 u32 seq, mseq;
2659 struct ceph_vino vino; 2667 struct ceph_vino vino;
2660 u64 cap_id; 2668 u64 cap_id;
2661 u64 size, max_size; 2669 u64 size, max_size;
@@ -2675,6 +2683,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2675 vino.snap = CEPH_NOSNAP; 2683 vino.snap = CEPH_NOSNAP;
2676 cap_id = le64_to_cpu(h->cap_id); 2684 cap_id = le64_to_cpu(h->cap_id);
2677 seq = le32_to_cpu(h->seq); 2685 seq = le32_to_cpu(h->seq);
2686 mseq = le32_to_cpu(h->migrate_seq);
2678 size = le64_to_cpu(h->size); 2687 size = le64_to_cpu(h->size);
2679 max_size = le64_to_cpu(h->max_size); 2688 max_size = le64_to_cpu(h->max_size);
2680 2689
@@ -2689,6 +2698,18 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2689 vino.snap, inode); 2698 vino.snap, inode);
2690 if (!inode) { 2699 if (!inode) {
2691 dout(" i don't have ino %llx\n", vino.ino); 2700 dout(" i don't have ino %llx\n", vino.ino);
2701
2702 if (op == CEPH_CAP_OP_IMPORT)
2703 __queue_cap_release(session, vino.ino, cap_id,
2704 mseq, seq);
2705
2706 /*
2707 * send any full release message to try to move things
2708 * along for the mds (who clearly thinks we still have this
2709 * cap).
2710 */
2711 ceph_add_cap_releases(mdsc, session, -1);
2712 ceph_send_cap_releases(mdsc, session);
2692 goto done; 2713 goto done;
2693 } 2714 }
2694 2715
@@ -2714,7 +2735,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2714 spin_lock(&inode->i_lock); 2735 spin_lock(&inode->i_lock);
2715 cap = __get_cap_for_mds(ceph_inode(inode), mds); 2736 cap = __get_cap_for_mds(ceph_inode(inode), mds);
2716 if (!cap) { 2737 if (!cap) {
2717 dout("no cap on %p ino %llx.%llx from mds%d, releasing\n", 2738 dout(" no cap on %p ino %llx.%llx from mds%d\n",
2718 inode, ceph_ino(inode), ceph_snap(inode), mds); 2739 inode, ceph_ino(inode), ceph_snap(inode), mds);
2719 spin_unlock(&inode->i_lock); 2740 spin_unlock(&inode->i_lock);
2720 goto done; 2741 goto done;
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 226f5a50d362..ab47f46ca282 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -827,7 +827,7 @@ static void ceph_set_dentry_offset(struct dentry *dn)
827 827
828 spin_lock(&dcache_lock); 828 spin_lock(&dcache_lock);
829 spin_lock(&dn->d_lock); 829 spin_lock(&dn->d_lock);
830 list_move_tail(&dir->d_subdirs, &dn->d_u.d_child); 830 list_move(&dn->d_u.d_child, &dir->d_subdirs);
831 dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset, 831 dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset,
832 dn->d_u.d_child.prev, dn->d_u.d_child.next); 832 dn->d_u.d_child.prev, dn->d_u.d_child.next);
833 spin_unlock(&dn->d_lock); 833 spin_unlock(&dn->d_lock);
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index b49f12822cbc..1766947fc07a 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1066,9 +1066,9 @@ static int trim_caps(struct ceph_mds_client *mdsc,
1066 * 1066 *
1067 * Called under s_mutex. 1067 * Called under s_mutex.
1068 */ 1068 */
1069static int add_cap_releases(struct ceph_mds_client *mdsc, 1069int ceph_add_cap_releases(struct ceph_mds_client *mdsc,
1070 struct ceph_mds_session *session, 1070 struct ceph_mds_session *session,
1071 int extra) 1071 int extra)
1072{ 1072{
1073 struct ceph_msg *msg; 1073 struct ceph_msg *msg;
1074 struct ceph_mds_cap_release *head; 1074 struct ceph_mds_cap_release *head;
@@ -1176,8 +1176,8 @@ static int check_cap_flush(struct ceph_mds_client *mdsc, u64 want_flush_seq)
1176/* 1176/*
1177 * called under s_mutex 1177 * called under s_mutex
1178 */ 1178 */
1179static void send_cap_releases(struct ceph_mds_client *mdsc, 1179void ceph_send_cap_releases(struct ceph_mds_client *mdsc,
1180 struct ceph_mds_session *session) 1180 struct ceph_mds_session *session)
1181{ 1181{
1182 struct ceph_msg *msg; 1182 struct ceph_msg *msg;
1183 1183
@@ -1980,7 +1980,7 @@ out_err:
1980 } 1980 }
1981 mutex_unlock(&mdsc->mutex); 1981 mutex_unlock(&mdsc->mutex);
1982 1982
1983 add_cap_releases(mdsc, req->r_session, -1); 1983 ceph_add_cap_releases(mdsc, req->r_session, -1);
1984 mutex_unlock(&session->s_mutex); 1984 mutex_unlock(&session->s_mutex);
1985 1985
1986 /* kick calling process */ 1986 /* kick calling process */
@@ -2433,6 +2433,7 @@ static void handle_lease(struct ceph_mds_client *mdsc,
2433 struct ceph_dentry_info *di; 2433 struct ceph_dentry_info *di;
2434 int mds = session->s_mds; 2434 int mds = session->s_mds;
2435 struct ceph_mds_lease *h = msg->front.iov_base; 2435 struct ceph_mds_lease *h = msg->front.iov_base;
2436 u32 seq;
2436 struct ceph_vino vino; 2437 struct ceph_vino vino;
2437 int mask; 2438 int mask;
2438 struct qstr dname; 2439 struct qstr dname;
@@ -2446,6 +2447,7 @@ static void handle_lease(struct ceph_mds_client *mdsc,
2446 vino.ino = le64_to_cpu(h->ino); 2447 vino.ino = le64_to_cpu(h->ino);
2447 vino.snap = CEPH_NOSNAP; 2448 vino.snap = CEPH_NOSNAP;
2448 mask = le16_to_cpu(h->mask); 2449 mask = le16_to_cpu(h->mask);
2450 seq = le32_to_cpu(h->seq);
2449 dname.name = (void *)h + sizeof(*h) + sizeof(u32); 2451 dname.name = (void *)h + sizeof(*h) + sizeof(u32);
2450 dname.len = msg->front.iov_len - sizeof(*h) - sizeof(u32); 2452 dname.len = msg->front.iov_len - sizeof(*h) - sizeof(u32);
2451 if (dname.len != get_unaligned_le32(h+1)) 2453 if (dname.len != get_unaligned_le32(h+1))
@@ -2456,8 +2458,9 @@ static void handle_lease(struct ceph_mds_client *mdsc,
2456 2458
2457 /* lookup inode */ 2459 /* lookup inode */
2458 inode = ceph_find_inode(sb, vino); 2460 inode = ceph_find_inode(sb, vino);
2459 dout("handle_lease '%s', mask %d, ino %llx %p\n", 2461 dout("handle_lease %s, mask %d, ino %llx %p %.*s\n",
2460 ceph_lease_op_name(h->action), mask, vino.ino, inode); 2462 ceph_lease_op_name(h->action), mask, vino.ino, inode,
2463 dname.len, dname.name);
2461 if (inode == NULL) { 2464 if (inode == NULL) {
2462 dout("handle_lease no inode %llx\n", vino.ino); 2465 dout("handle_lease no inode %llx\n", vino.ino);
2463 goto release; 2466 goto release;
@@ -2482,7 +2485,8 @@ static void handle_lease(struct ceph_mds_client *mdsc,
2482 switch (h->action) { 2485 switch (h->action) {
2483 case CEPH_MDS_LEASE_REVOKE: 2486 case CEPH_MDS_LEASE_REVOKE:
2484 if (di && di->lease_session == session) { 2487 if (di && di->lease_session == session) {
2485 h->seq = cpu_to_le32(di->lease_seq); 2488 if (ceph_seq_cmp(di->lease_seq, seq) > 0)
2489 h->seq = cpu_to_le32(di->lease_seq);
2486 __ceph_mdsc_drop_dentry_lease(dentry); 2490 __ceph_mdsc_drop_dentry_lease(dentry);
2487 } 2491 }
2488 release = 1; 2492 release = 1;
@@ -2496,7 +2500,7 @@ static void handle_lease(struct ceph_mds_client *mdsc,
2496 unsigned long duration = 2500 unsigned long duration =
2497 le32_to_cpu(h->duration_ms) * HZ / 1000; 2501 le32_to_cpu(h->duration_ms) * HZ / 1000;
2498 2502
2499 di->lease_seq = le32_to_cpu(h->seq); 2503 di->lease_seq = seq;
2500 dentry->d_time = di->lease_renew_from + duration; 2504 dentry->d_time = di->lease_renew_from + duration;
2501 di->lease_renew_after = di->lease_renew_from + 2505 di->lease_renew_after = di->lease_renew_from +
2502 (duration >> 1); 2506 (duration >> 1);
@@ -2686,10 +2690,10 @@ static void delayed_work(struct work_struct *work)
2686 send_renew_caps(mdsc, s); 2690 send_renew_caps(mdsc, s);
2687 else 2691 else
2688 ceph_con_keepalive(&s->s_con); 2692 ceph_con_keepalive(&s->s_con);
2689 add_cap_releases(mdsc, s, -1); 2693 ceph_add_cap_releases(mdsc, s, -1);
2690 if (s->s_state == CEPH_MDS_SESSION_OPEN || 2694 if (s->s_state == CEPH_MDS_SESSION_OPEN ||
2691 s->s_state == CEPH_MDS_SESSION_HUNG) 2695 s->s_state == CEPH_MDS_SESSION_HUNG)
2692 send_cap_releases(mdsc, s); 2696 ceph_send_cap_releases(mdsc, s);
2693 mutex_unlock(&s->s_mutex); 2697 mutex_unlock(&s->s_mutex);
2694 ceph_put_mds_session(s); 2698 ceph_put_mds_session(s);
2695 2699
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index d9936c4f1212..b292fa42a66d 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -322,6 +322,12 @@ static inline void ceph_mdsc_put_request(struct ceph_mds_request *req)
322 kref_put(&req->r_kref, ceph_mdsc_release_request); 322 kref_put(&req->r_kref, ceph_mdsc_release_request);
323} 323}
324 324
325extern int ceph_add_cap_releases(struct ceph_mds_client *mdsc,
326 struct ceph_mds_session *session,
327 int extra);
328extern void ceph_send_cap_releases(struct ceph_mds_client *mdsc,
329 struct ceph_mds_session *session);
330
325extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc); 331extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc);
326 332
327extern char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base, 333extern char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base,
diff --git a/fs/ceph/mon_client.c b/fs/ceph/mon_client.c
index 21c62e9b7d1d..07a539906e67 100644
--- a/fs/ceph/mon_client.c
+++ b/fs/ceph/mon_client.c
@@ -400,6 +400,8 @@ static void release_generic_request(struct kref *kref)
400 ceph_msg_put(req->reply); 400 ceph_msg_put(req->reply);
401 if (req->request) 401 if (req->request)
402 ceph_msg_put(req->request); 402 ceph_msg_put(req->request);
403
404 kfree(req);
403} 405}
404 406
405static void put_generic_request(struct ceph_mon_generic_request *req) 407static void put_generic_request(struct ceph_mon_generic_request *req)
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 4e0bee240b9d..fa87f51e38e1 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -89,7 +89,7 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
89 89
90 buf->f_files = le64_to_cpu(st.num_objects); 90 buf->f_files = le64_to_cpu(st.num_objects);
91 buf->f_ffree = -1; 91 buf->f_ffree = -1;
92 buf->f_namelen = PATH_MAX; 92 buf->f_namelen = NAME_MAX;
93 buf->f_frsize = PAGE_CACHE_SIZE; 93 buf->f_frsize = PAGE_CACHE_SIZE;
94 94
95 /* leave fsid little-endian, regardless of host endianness */ 95 /* leave fsid little-endian, regardless of host endianness */
@@ -926,7 +926,7 @@ static int ceph_compare_super(struct super_block *sb, void *data)
926/* 926/*
927 * construct our own bdi so we can control readahead, etc. 927 * construct our own bdi so we can control readahead, etc.
928 */ 928 */
929static atomic_long_t bdi_seq = ATOMIC_INIT(0); 929static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
930 930
931static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client) 931static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client)
932{ 932{
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 78c02eb4cb1f..484e52bb40bb 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -473,14 +473,24 @@ static int cifs_remount(struct super_block *sb, int *flags, char *data)
473 return 0; 473 return 0;
474} 474}
475 475
476void cifs_drop_inode(struct inode *inode)
477{
478 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
479
480 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)
481 return generic_drop_inode(inode);
482
483 return generic_delete_inode(inode);
484}
485
476static const struct super_operations cifs_super_ops = { 486static const struct super_operations cifs_super_ops = {
477 .put_super = cifs_put_super, 487 .put_super = cifs_put_super,
478 .statfs = cifs_statfs, 488 .statfs = cifs_statfs,
479 .alloc_inode = cifs_alloc_inode, 489 .alloc_inode = cifs_alloc_inode,
480 .destroy_inode = cifs_destroy_inode, 490 .destroy_inode = cifs_destroy_inode,
481/* .drop_inode = generic_delete_inode, 491 .drop_inode = cifs_drop_inode,
482 .delete_inode = cifs_delete_inode, */ /* Do not need above two 492/* .delete_inode = cifs_delete_inode, */ /* Do not need above
483 functions unless later we add lazy close of inodes or unless the 493 function unless later we add lazy close of inodes or unless the
484 kernel forgets to call us with the same number of releases (closes) 494 kernel forgets to call us with the same number of releases (closes)
485 as opens */ 495 as opens */
486 .show_options = cifs_show_options, 496 .show_options = cifs_show_options,
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index fb1657e0fdb8..fb6318b81509 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -106,7 +106,6 @@ extern struct cifsFileInfo *cifs_new_fileinfo(struct inode *newinode,
106 __u16 fileHandle, struct file *file, 106 __u16 fileHandle, struct file *file,
107 struct vfsmount *mnt, unsigned int oflags); 107 struct vfsmount *mnt, unsigned int oflags);
108extern int cifs_posix_open(char *full_path, struct inode **pinode, 108extern int cifs_posix_open(char *full_path, struct inode **pinode,
109 struct vfsmount *mnt,
110 struct super_block *sb, 109 struct super_block *sb,
111 int mode, int oflags, 110 int mode, int oflags,
112 __u32 *poplock, __u16 *pnetfid, int xid); 111 __u32 *poplock, __u16 *pnetfid, int xid);
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 391816b461ca..e7ae78b66fa1 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -25,6 +25,7 @@
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <linux/namei.h> 26#include <linux/namei.h>
27#include <linux/mount.h> 27#include <linux/mount.h>
28#include <linux/file.h>
28#include "cifsfs.h" 29#include "cifsfs.h"
29#include "cifspdu.h" 30#include "cifspdu.h"
30#include "cifsglob.h" 31#include "cifsglob.h"
@@ -184,12 +185,13 @@ cifs_new_fileinfo(struct inode *newinode, __u16 fileHandle,
184 } 185 }
185 write_unlock(&GlobalSMBSeslock); 186 write_unlock(&GlobalSMBSeslock);
186 187
188 file->private_data = pCifsFile;
189
187 return pCifsFile; 190 return pCifsFile;
188} 191}
189 192
190int cifs_posix_open(char *full_path, struct inode **pinode, 193int cifs_posix_open(char *full_path, struct inode **pinode,
191 struct vfsmount *mnt, struct super_block *sb, 194 struct super_block *sb, int mode, int oflags,
192 int mode, int oflags,
193 __u32 *poplock, __u16 *pnetfid, int xid) 195 __u32 *poplock, __u16 *pnetfid, int xid)
194{ 196{
195 int rc; 197 int rc;
@@ -258,19 +260,6 @@ int cifs_posix_open(char *full_path, struct inode **pinode,
258 cifs_fattr_to_inode(*pinode, &fattr); 260 cifs_fattr_to_inode(*pinode, &fattr);
259 } 261 }
260 262
261 /*
262 * cifs_fill_filedata() takes care of setting cifsFileInfo pointer to
263 * file->private_data.
264 */
265 if (mnt) {
266 struct cifsFileInfo *pfile_info;
267
268 pfile_info = cifs_new_fileinfo(*pinode, *pnetfid, NULL, mnt,
269 oflags);
270 if (pfile_info == NULL)
271 rc = -ENOMEM;
272 }
273
274posix_open_ret: 263posix_open_ret:
275 kfree(presp_data); 264 kfree(presp_data);
276 return rc; 265 return rc;
@@ -298,7 +287,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
298 int create_options = CREATE_NOT_DIR; 287 int create_options = CREATE_NOT_DIR;
299 __u32 oplock = 0; 288 __u32 oplock = 0;
300 int oflags; 289 int oflags;
301 bool posix_create = false;
302 /* 290 /*
303 * BB below access is probably too much for mknod to request 291 * BB below access is probably too much for mknod to request
304 * but we have to do query and setpathinfo so requesting 292 * but we have to do query and setpathinfo so requesting
@@ -339,7 +327,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
339 (CIFS_UNIX_POSIX_PATH_OPS_CAP & 327 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
340 le64_to_cpu(tcon->fsUnixInfo.Capability))) { 328 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
341 rc = cifs_posix_open(full_path, &newinode, 329 rc = cifs_posix_open(full_path, &newinode,
342 nd ? nd->path.mnt : NULL,
343 inode->i_sb, mode, oflags, &oplock, &fileHandle, xid); 330 inode->i_sb, mode, oflags, &oplock, &fileHandle, xid);
344 /* EIO could indicate that (posix open) operation is not 331 /* EIO could indicate that (posix open) operation is not
345 supported, despite what server claimed in capability 332 supported, despite what server claimed in capability
@@ -347,7 +334,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
347 handled in posix open */ 334 handled in posix open */
348 335
349 if (rc == 0) { 336 if (rc == 0) {
350 posix_create = true;
351 if (newinode == NULL) /* query inode info */ 337 if (newinode == NULL) /* query inode info */
352 goto cifs_create_get_file_info; 338 goto cifs_create_get_file_info;
353 else /* success, no need to query */ 339 else /* success, no need to query */
@@ -478,21 +464,28 @@ cifs_create_set_dentry:
478 else 464 else
479 cFYI(1, "Create worked, get_inode_info failed rc = %d", rc); 465 cFYI(1, "Create worked, get_inode_info failed rc = %d", rc);
480 466
481 /* nfsd case - nfs srv does not set nd */ 467 if (newinode && nd && (nd->flags & LOOKUP_OPEN)) {
482 if ((nd == NULL) || (!(nd->flags & LOOKUP_OPEN))) {
483 /* mknod case - do not leave file open */
484 CIFSSMBClose(xid, tcon, fileHandle);
485 } else if (!(posix_create) && (newinode)) {
486 struct cifsFileInfo *pfile_info; 468 struct cifsFileInfo *pfile_info;
487 /* 469 struct file *filp;
488 * cifs_fill_filedata() takes care of setting cifsFileInfo 470
489 * pointer to file->private_data. 471 filp = lookup_instantiate_filp(nd, direntry, generic_file_open);
490 */ 472 if (IS_ERR(filp)) {
491 pfile_info = cifs_new_fileinfo(newinode, fileHandle, NULL, 473 rc = PTR_ERR(filp);
474 CIFSSMBClose(xid, tcon, fileHandle);
475 goto cifs_create_out;
476 }
477
478 pfile_info = cifs_new_fileinfo(newinode, fileHandle, filp,
492 nd->path.mnt, oflags); 479 nd->path.mnt, oflags);
493 if (pfile_info == NULL) 480 if (pfile_info == NULL) {
481 fput(filp);
482 CIFSSMBClose(xid, tcon, fileHandle);
494 rc = -ENOMEM; 483 rc = -ENOMEM;
484 }
485 } else {
486 CIFSSMBClose(xid, tcon, fileHandle);
495 } 487 }
488
496cifs_create_out: 489cifs_create_out:
497 kfree(buf); 490 kfree(buf);
498 kfree(full_path); 491 kfree(full_path);
@@ -636,6 +629,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
636 bool posix_open = false; 629 bool posix_open = false;
637 struct cifs_sb_info *cifs_sb; 630 struct cifs_sb_info *cifs_sb;
638 struct cifsTconInfo *pTcon; 631 struct cifsTconInfo *pTcon;
632 struct cifsFileInfo *cfile;
639 struct inode *newInode = NULL; 633 struct inode *newInode = NULL;
640 char *full_path = NULL; 634 char *full_path = NULL;
641 struct file *filp; 635 struct file *filp;
@@ -703,7 +697,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
703 if (nd && !(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY)) && 697 if (nd && !(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY)) &&
704 (nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open && 698 (nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open &&
705 (nd->intent.open.flags & O_CREAT)) { 699 (nd->intent.open.flags & O_CREAT)) {
706 rc = cifs_posix_open(full_path, &newInode, nd->path.mnt, 700 rc = cifs_posix_open(full_path, &newInode,
707 parent_dir_inode->i_sb, 701 parent_dir_inode->i_sb,
708 nd->intent.open.create_mode, 702 nd->intent.open.create_mode,
709 nd->intent.open.flags, &oplock, 703 nd->intent.open.flags, &oplock,
@@ -733,8 +727,25 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
733 else 727 else
734 direntry->d_op = &cifs_dentry_ops; 728 direntry->d_op = &cifs_dentry_ops;
735 d_add(direntry, newInode); 729 d_add(direntry, newInode);
736 if (posix_open) 730 if (posix_open) {
737 filp = lookup_instantiate_filp(nd, direntry, NULL); 731 filp = lookup_instantiate_filp(nd, direntry,
732 generic_file_open);
733 if (IS_ERR(filp)) {
734 rc = PTR_ERR(filp);
735 CIFSSMBClose(xid, pTcon, fileHandle);
736 goto lookup_out;
737 }
738
739 cfile = cifs_new_fileinfo(newInode, fileHandle, filp,
740 nd->path.mnt,
741 nd->intent.open.flags);
742 if (cfile == NULL) {
743 fput(filp);
744 CIFSSMBClose(xid, pTcon, fileHandle);
745 rc = -ENOMEM;
746 goto lookup_out;
747 }
748 }
738 /* since paths are not looked up by component - the parent 749 /* since paths are not looked up by component - the parent
739 directories are presumed to be good here */ 750 directories are presumed to be good here */
740 renew_parental_timestamps(direntry); 751 renew_parental_timestamps(direntry);
@@ -755,6 +766,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
755 is a common return code */ 766 is a common return code */
756 } 767 }
757 768
769lookup_out:
758 kfree(full_path); 770 kfree(full_path);
759 FreeXid(xid); 771 FreeXid(xid);
760 return ERR_PTR(rc); 772 return ERR_PTR(rc);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index f1ff785b2292..409e4f523e61 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -162,44 +162,12 @@ psx_client_can_cache:
162 return 0; 162 return 0;
163} 163}
164 164
165static struct cifsFileInfo *
166cifs_fill_filedata(struct file *file)
167{
168 struct list_head *tmp;
169 struct cifsFileInfo *pCifsFile = NULL;
170 struct cifsInodeInfo *pCifsInode = NULL;
171
172 /* search inode for this file and fill in file->private_data */
173 pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
174 read_lock(&GlobalSMBSeslock);
175 list_for_each(tmp, &pCifsInode->openFileList) {
176 pCifsFile = list_entry(tmp, struct cifsFileInfo, flist);
177 if ((pCifsFile->pfile == NULL) &&
178 (pCifsFile->pid == current->tgid)) {
179 /* mode set in cifs_create */
180
181 /* needed for writepage */
182 pCifsFile->pfile = file;
183 file->private_data = pCifsFile;
184 break;
185 }
186 }
187 read_unlock(&GlobalSMBSeslock);
188
189 if (file->private_data != NULL) {
190 return pCifsFile;
191 } else if ((file->f_flags & O_CREAT) && (file->f_flags & O_EXCL))
192 cERROR(1, "could not find file instance for "
193 "new file %p", file);
194 return NULL;
195}
196
197/* all arguments to this function must be checked for validity in caller */ 165/* all arguments to this function must be checked for validity in caller */
198static inline int cifs_open_inode_helper(struct inode *inode, struct file *file, 166static inline int cifs_open_inode_helper(struct inode *inode,
199 struct cifsInodeInfo *pCifsInode, struct cifsFileInfo *pCifsFile,
200 struct cifsTconInfo *pTcon, int *oplock, FILE_ALL_INFO *buf, 167 struct cifsTconInfo *pTcon, int *oplock, FILE_ALL_INFO *buf,
201 char *full_path, int xid) 168 char *full_path, int xid)
202{ 169{
170 struct cifsInodeInfo *pCifsInode = CIFS_I(inode);
203 struct timespec temp; 171 struct timespec temp;
204 int rc; 172 int rc;
205 173
@@ -213,36 +181,35 @@ static inline int cifs_open_inode_helper(struct inode *inode, struct file *file,
213 /* if not oplocked, invalidate inode pages if mtime or file 181 /* if not oplocked, invalidate inode pages if mtime or file
214 size changed */ 182 size changed */
215 temp = cifs_NTtimeToUnix(buf->LastWriteTime); 183 temp = cifs_NTtimeToUnix(buf->LastWriteTime);
216 if (timespec_equal(&file->f_path.dentry->d_inode->i_mtime, &temp) && 184 if (timespec_equal(&inode->i_mtime, &temp) &&
217 (file->f_path.dentry->d_inode->i_size == 185 (inode->i_size ==
218 (loff_t)le64_to_cpu(buf->EndOfFile))) { 186 (loff_t)le64_to_cpu(buf->EndOfFile))) {
219 cFYI(1, "inode unchanged on server"); 187 cFYI(1, "inode unchanged on server");
220 } else { 188 } else {
221 if (file->f_path.dentry->d_inode->i_mapping) { 189 if (inode->i_mapping) {
222 /* BB no need to lock inode until after invalidate 190 /* BB no need to lock inode until after invalidate
223 since namei code should already have it locked? */ 191 since namei code should already have it locked? */
224 rc = filemap_write_and_wait(file->f_path.dentry->d_inode->i_mapping); 192 rc = filemap_write_and_wait(inode->i_mapping);
225 if (rc != 0) 193 if (rc != 0)
226 CIFS_I(file->f_path.dentry->d_inode)->write_behind_rc = rc; 194 pCifsInode->write_behind_rc = rc;
227 } 195 }
228 cFYI(1, "invalidating remote inode since open detected it " 196 cFYI(1, "invalidating remote inode since open detected it "
229 "changed"); 197 "changed");
230 invalidate_remote_inode(file->f_path.dentry->d_inode); 198 invalidate_remote_inode(inode);
231 } 199 }
232 200
233client_can_cache: 201client_can_cache:
234 if (pTcon->unix_ext) 202 if (pTcon->unix_ext)
235 rc = cifs_get_inode_info_unix(&file->f_path.dentry->d_inode, 203 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
236 full_path, inode->i_sb, xid); 204 xid);
237 else 205 else
238 rc = cifs_get_inode_info(&file->f_path.dentry->d_inode, 206 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
239 full_path, buf, inode->i_sb, xid, NULL); 207 xid, NULL);
240 208
241 if ((*oplock & 0xF) == OPLOCK_EXCLUSIVE) { 209 if ((*oplock & 0xF) == OPLOCK_EXCLUSIVE) {
242 pCifsInode->clientCanCacheAll = true; 210 pCifsInode->clientCanCacheAll = true;
243 pCifsInode->clientCanCacheRead = true; 211 pCifsInode->clientCanCacheRead = true;
244 cFYI(1, "Exclusive Oplock granted on inode %p", 212 cFYI(1, "Exclusive Oplock granted on inode %p", inode);
245 file->f_path.dentry->d_inode);
246 } else if ((*oplock & 0xF) == OPLOCK_READ) 213 } else if ((*oplock & 0xF) == OPLOCK_READ)
247 pCifsInode->clientCanCacheRead = true; 214 pCifsInode->clientCanCacheRead = true;
248 215
@@ -256,7 +223,7 @@ int cifs_open(struct inode *inode, struct file *file)
256 __u32 oplock; 223 __u32 oplock;
257 struct cifs_sb_info *cifs_sb; 224 struct cifs_sb_info *cifs_sb;
258 struct cifsTconInfo *tcon; 225 struct cifsTconInfo *tcon;
259 struct cifsFileInfo *pCifsFile; 226 struct cifsFileInfo *pCifsFile = NULL;
260 struct cifsInodeInfo *pCifsInode; 227 struct cifsInodeInfo *pCifsInode;
261 char *full_path = NULL; 228 char *full_path = NULL;
262 int desiredAccess; 229 int desiredAccess;
@@ -270,12 +237,6 @@ int cifs_open(struct inode *inode, struct file *file)
270 tcon = cifs_sb->tcon; 237 tcon = cifs_sb->tcon;
271 238
272 pCifsInode = CIFS_I(file->f_path.dentry->d_inode); 239 pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
273 pCifsFile = cifs_fill_filedata(file);
274 if (pCifsFile) {
275 rc = 0;
276 FreeXid(xid);
277 return rc;
278 }
279 240
280 full_path = build_path_from_dentry(file->f_path.dentry); 241 full_path = build_path_from_dentry(file->f_path.dentry);
281 if (full_path == NULL) { 242 if (full_path == NULL) {
@@ -299,8 +260,7 @@ int cifs_open(struct inode *inode, struct file *file)
299 int oflags = (int) cifs_posix_convert_flags(file->f_flags); 260 int oflags = (int) cifs_posix_convert_flags(file->f_flags);
300 oflags |= SMB_O_CREAT; 261 oflags |= SMB_O_CREAT;
301 /* can not refresh inode info since size could be stale */ 262 /* can not refresh inode info since size could be stale */
302 rc = cifs_posix_open(full_path, &inode, file->f_path.mnt, 263 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
303 inode->i_sb,
304 cifs_sb->mnt_file_mode /* ignored */, 264 cifs_sb->mnt_file_mode /* ignored */,
305 oflags, &oplock, &netfid, xid); 265 oflags, &oplock, &netfid, xid);
306 if (rc == 0) { 266 if (rc == 0) {
@@ -308,9 +268,20 @@ int cifs_open(struct inode *inode, struct file *file)
308 /* no need for special case handling of setting mode 268 /* no need for special case handling of setting mode
309 on read only files needed here */ 269 on read only files needed here */
310 270
311 pCifsFile = cifs_fill_filedata(file); 271 rc = cifs_posix_open_inode_helper(inode, file,
312 cifs_posix_open_inode_helper(inode, file, pCifsInode, 272 pCifsInode, oplock, netfid);
313 oplock, netfid); 273 if (rc != 0) {
274 CIFSSMBClose(xid, tcon, netfid);
275 goto out;
276 }
277
278 pCifsFile = cifs_new_fileinfo(inode, netfid, file,
279 file->f_path.mnt,
280 oflags);
281 if (pCifsFile == NULL) {
282 CIFSSMBClose(xid, tcon, netfid);
283 rc = -ENOMEM;
284 }
314 goto out; 285 goto out;
315 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { 286 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
316 if (tcon->ses->serverNOS) 287 if (tcon->ses->serverNOS)
@@ -391,17 +362,17 @@ int cifs_open(struct inode *inode, struct file *file)
391 goto out; 362 goto out;
392 } 363 }
393 364
365 rc = cifs_open_inode_helper(inode, tcon, &oplock, buf, full_path, xid);
366 if (rc != 0)
367 goto out;
368
394 pCifsFile = cifs_new_fileinfo(inode, netfid, file, file->f_path.mnt, 369 pCifsFile = cifs_new_fileinfo(inode, netfid, file, file->f_path.mnt,
395 file->f_flags); 370 file->f_flags);
396 file->private_data = pCifsFile; 371 if (pCifsFile == NULL) {
397 if (file->private_data == NULL) {
398 rc = -ENOMEM; 372 rc = -ENOMEM;
399 goto out; 373 goto out;
400 } 374 }
401 375
402 rc = cifs_open_inode_helper(inode, file, pCifsInode, pCifsFile, tcon,
403 &oplock, buf, full_path, xid);
404
405 if (oplock & CIFS_CREATE_ACTION) { 376 if (oplock & CIFS_CREATE_ACTION) {
406 /* time to set mode which we can not set earlier due to 377 /* time to set mode which we can not set earlier due to
407 problems creating new read-only files */ 378 problems creating new read-only files */
@@ -513,8 +484,7 @@ reopen_error_exit:
513 le64_to_cpu(tcon->fsUnixInfo.Capability))) { 484 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
514 int oflags = (int) cifs_posix_convert_flags(file->f_flags); 485 int oflags = (int) cifs_posix_convert_flags(file->f_flags);
515 /* can not refresh inode info since size could be stale */ 486 /* can not refresh inode info since size could be stale */
516 rc = cifs_posix_open(full_path, NULL, file->f_path.mnt, 487 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
517 inode->i_sb,
518 cifs_sb->mnt_file_mode /* ignored */, 488 cifs_sb->mnt_file_mode /* ignored */,
519 oflags, &oplock, &netfid, xid); 489 oflags, &oplock, &netfid, xid);
520 if (rc == 0) { 490 if (rc == 0) {
@@ -1952,6 +1922,7 @@ static void cifs_copy_cache_pages(struct address_space *mapping,
1952 bytes_read -= PAGE_CACHE_SIZE; 1922 bytes_read -= PAGE_CACHE_SIZE;
1953 continue; 1923 continue;
1954 } 1924 }
1925 page_cache_release(page);
1955 1926
1956 target = kmap_atomic(page, KM_USER0); 1927 target = kmap_atomic(page, KM_USER0);
1957 1928
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 62b324f26a56..6f0683c68952 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1401,6 +1401,10 @@ cifs_do_rename(int xid, struct dentry *from_dentry, const char *fromPath,
1401 if (rc == 0 || rc != -ETXTBSY) 1401 if (rc == 0 || rc != -ETXTBSY)
1402 return rc; 1402 return rc;
1403 1403
1404 /* open-file renames don't work across directories */
1405 if (to_dentry->d_parent != from_dentry->d_parent)
1406 return rc;
1407
1404 /* open the file to be renamed -- we need DELETE perms */ 1408 /* open the file to be renamed -- we need DELETE perms */
1405 rc = CIFSSMBOpen(xid, pTcon, fromPath, FILE_OPEN, DELETE, 1409 rc = CIFSSMBOpen(xid, pTcon, fromPath, FILE_OPEN, DELETE,
1406 CREATE_NOT_DIR, &srcfid, &oplock, NULL, 1410 CREATE_NOT_DIR, &srcfid, &oplock, NULL,
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 7707389bdf2c..0a57cb7db5dd 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -730,15 +730,7 @@ ssetup_ntlmssp_authenticate:
730 730
731 /* calculate session key */ 731 /* calculate session key */
732 setup_ntlmv2_rsp(ses, v2_sess_key, nls_cp); 732 setup_ntlmv2_rsp(ses, v2_sess_key, nls_cp);
733 if (first_time) /* should this be moved into common code 733 /* FIXME: calculate MAC key */
734 with similar ntlmv2 path? */
735 /* cifs_calculate_ntlmv2_mac_key(ses->server->mac_signing_key,
736 response BB FIXME, v2_sess_key); */
737
738 /* copy session key */
739
740 /* memcpy(bcc_ptr, (char *)ntlm_session_key,LM2_SESS_KEY_SIZE);
741 bcc_ptr += LM2_SESS_KEY_SIZE; */
742 memcpy(bcc_ptr, (char *)v2_sess_key, 734 memcpy(bcc_ptr, (char *)v2_sess_key,
743 sizeof(struct ntlmv2_resp)); 735 sizeof(struct ntlmv2_resp));
744 bcc_ptr += sizeof(struct ntlmv2_resp); 736 bcc_ptr += sizeof(struct ntlmv2_resp);
diff --git a/fs/compat.c b/fs/compat.c
index f0b391c50552..6490d2134ff3 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -626,7 +626,7 @@ ssize_t compat_rw_copy_check_uvector(int type,
626 tot_len += len; 626 tot_len += len;
627 if (tot_len < tmp) /* maths overflow on the compat_ssize_t */ 627 if (tot_len < tmp) /* maths overflow on the compat_ssize_t */
628 goto out; 628 goto out;
629 if (!access_ok(vrfy_dir(type), buf, len)) { 629 if (!access_ok(vrfy_dir(type), compat_ptr(buf), len)) {
630 ret = -EFAULT; 630 ret = -EFAULT;
631 goto out; 631 goto out;
632 } 632 }
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 41645142b88b..cf78d44a8d6a 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -72,10 +72,6 @@ int configfs_setattr(struct dentry * dentry, struct iattr * iattr)
72 if (!sd) 72 if (!sd)
73 return -EINVAL; 73 return -EINVAL;
74 74
75 error = simple_setattr(dentry, iattr);
76 if (error)
77 return error;
78
79 sd_iattr = sd->s_iattr; 75 sd_iattr = sd->s_iattr;
80 if (!sd_iattr) { 76 if (!sd_iattr) {
81 /* setting attributes for the first time, allocate now */ 77 /* setting attributes for the first time, allocate now */
@@ -89,9 +85,12 @@ int configfs_setattr(struct dentry * dentry, struct iattr * iattr)
89 sd_iattr->ia_atime = sd_iattr->ia_mtime = sd_iattr->ia_ctime = CURRENT_TIME; 85 sd_iattr->ia_atime = sd_iattr->ia_mtime = sd_iattr->ia_ctime = CURRENT_TIME;
90 sd->s_iattr = sd_iattr; 86 sd->s_iattr = sd_iattr;
91 } 87 }
92
93 /* attributes were changed atleast once in past */ 88 /* attributes were changed atleast once in past */
94 89
90 error = simple_setattr(dentry, iattr);
91 if (error)
92 return error;
93
95 if (ia_valid & ATTR_UID) 94 if (ia_valid & ATTR_UID)
96 sd_iattr->ia_uid = iattr->ia_uid; 95 sd_iattr->ia_uid = iattr->ia_uid;
97 if (ia_valid & ATTR_GID) 96 if (ia_valid & ATTR_GID)
diff --git a/fs/dcache.c b/fs/dcache.c
index d96047b4a633..c8c78ba07827 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -590,6 +590,8 @@ static void prune_dcache(int count)
590 up_read(&sb->s_umount); 590 up_read(&sb->s_umount);
591 } 591 }
592 spin_lock(&sb_lock); 592 spin_lock(&sb_lock);
593 /* lock was dropped, must reset next */
594 list_safe_reset_next(sb, n, s_list);
593 count -= pruned; 595 count -= pruned;
594 __put_super(sb); 596 __put_super(sb);
595 /* more work left to do? */ 597 /* more work left to do? */
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index ca7e2a0ed98a..2bcc0431bada 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -200,6 +200,7 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
200 return error; 200 return error;
201 else { 201 else {
202 inode->i_mode = mode; 202 inode->i_mode = mode;
203 inode->i_ctime = CURRENT_TIME_SEC;
203 mark_inode_dirty(inode); 204 mark_inode_dirty(inode);
204 if (error == 0) 205 if (error == 0)
205 acl = NULL; 206 acl = NULL;
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 19214435b752..3675088cb88c 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1552,7 +1552,7 @@ int ext2_setattr(struct dentry *dentry, struct iattr *iattr)
1552 if (error) 1552 if (error)
1553 return error; 1553 return error;
1554 } 1554 }
1555 if (iattr->ia_valid & ATTR_SIZE) { 1555 if (iattr->ia_valid & ATTR_SIZE && iattr->ia_size != inode->i_size) {
1556 error = ext2_setsize(inode, iattr->ia_size); 1556 error = ext2_setsize(inode, iattr->ia_size);
1557 if (error) 1557 if (error)
1558 return error; 1558 return error;
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index 01552abbca3c..8a11fe212183 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -205,6 +205,7 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
205 return error; 205 return error;
206 else { 206 else {
207 inode->i_mode = mode; 207 inode->i_mode = mode;
208 inode->i_ctime = CURRENT_TIME_SEC;
208 ext3_mark_inode_dirty(handle, inode); 209 ext3_mark_inode_dirty(handle, inode);
209 if (error == 0) 210 if (error == 0)
210 acl = NULL; 211 acl = NULL;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 19df61c321fd..42272d67955a 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4942,20 +4942,26 @@ void ext4_set_inode_flags(struct inode *inode)
4942/* Propagate flags from i_flags to EXT4_I(inode)->i_flags */ 4942/* Propagate flags from i_flags to EXT4_I(inode)->i_flags */
4943void ext4_get_inode_flags(struct ext4_inode_info *ei) 4943void ext4_get_inode_flags(struct ext4_inode_info *ei)
4944{ 4944{
4945 unsigned int flags = ei->vfs_inode.i_flags; 4945 unsigned int vfs_fl;
4946 4946 unsigned long old_fl, new_fl;
4947 ei->i_flags &= ~(EXT4_SYNC_FL|EXT4_APPEND_FL| 4947
4948 EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL|EXT4_DIRSYNC_FL); 4948 do {
4949 if (flags & S_SYNC) 4949 vfs_fl = ei->vfs_inode.i_flags;
4950 ei->i_flags |= EXT4_SYNC_FL; 4950 old_fl = ei->i_flags;
4951 if (flags & S_APPEND) 4951 new_fl = old_fl & ~(EXT4_SYNC_FL|EXT4_APPEND_FL|
4952 ei->i_flags |= EXT4_APPEND_FL; 4952 EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL|
4953 if (flags & S_IMMUTABLE) 4953 EXT4_DIRSYNC_FL);
4954 ei->i_flags |= EXT4_IMMUTABLE_FL; 4954 if (vfs_fl & S_SYNC)
4955 if (flags & S_NOATIME) 4955 new_fl |= EXT4_SYNC_FL;
4956 ei->i_flags |= EXT4_NOATIME_FL; 4956 if (vfs_fl & S_APPEND)
4957 if (flags & S_DIRSYNC) 4957 new_fl |= EXT4_APPEND_FL;
4958 ei->i_flags |= EXT4_DIRSYNC_FL; 4958 if (vfs_fl & S_IMMUTABLE)
4959 new_fl |= EXT4_IMMUTABLE_FL;
4960 if (vfs_fl & S_NOATIME)
4961 new_fl |= EXT4_NOATIME_FL;
4962 if (vfs_fl & S_DIRSYNC)
4963 new_fl |= EXT4_DIRSYNC_FL;
4964 } while (cmpxchg(&ei->i_flags, old_fl, new_fl) != old_fl);
4959} 4965}
4960 4966
4961static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode, 4967static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
@@ -5191,7 +5197,7 @@ static int ext4_inode_blocks_set(handle_t *handle,
5191 */ 5197 */
5192 raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); 5198 raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
5193 raw_inode->i_blocks_high = 0; 5199 raw_inode->i_blocks_high = 0;
5194 ei->i_flags &= ~EXT4_HUGE_FILE_FL; 5200 ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE);
5195 return 0; 5201 return 0;
5196 } 5202 }
5197 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) 5203 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE))
@@ -5204,9 +5210,9 @@ static int ext4_inode_blocks_set(handle_t *handle,
5204 */ 5210 */
5205 raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); 5211 raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
5206 raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); 5212 raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32);
5207 ei->i_flags &= ~EXT4_HUGE_FILE_FL; 5213 ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE);
5208 } else { 5214 } else {
5209 ei->i_flags |= EXT4_HUGE_FILE_FL; 5215 ext4_set_inode_flag(inode, EXT4_INODE_HUGE_FILE);
5210 /* i_block is stored in file system block size */ 5216 /* i_block is stored in file system block size */
5211 i_blocks = i_blocks >> (inode->i_blkbits - 9); 5217 i_blocks = i_blocks >> (inode->i_blkbits - 9);
5212 raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); 5218 raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 3a6c92ac131c..52abfa12762a 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -960,6 +960,9 @@ mext_check_arguments(struct inode *orig_inode,
960 return -EINVAL; 960 return -EINVAL;
961 } 961 }
962 962
963 if (IS_IMMUTABLE(donor_inode) || IS_APPEND(donor_inode))
964 return -EPERM;
965
963 /* Ext4 move extent does not support swapfile */ 966 /* Ext4 move extent does not support swapfile */
964 if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) { 967 if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) {
965 ext4_debug("ext4 move extent: The argument files should " 968 ext4_debug("ext4 move extent: The argument files should "
diff --git a/fs/fcntl.c b/fs/fcntl.c
index f74d270ba155..9d175d623aab 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -274,7 +274,7 @@ static int f_setown_ex(struct file *filp, unsigned long arg)
274 274
275 ret = copy_from_user(&owner, owner_p, sizeof(owner)); 275 ret = copy_from_user(&owner, owner_p, sizeof(owner));
276 if (ret) 276 if (ret)
277 return ret; 277 return -EFAULT;
278 278
279 switch (owner.type) { 279 switch (owner.type) {
280 case F_OWNER_TID: 280 case F_OWNER_TID:
@@ -332,8 +332,11 @@ static int f_getown_ex(struct file *filp, unsigned long arg)
332 } 332 }
333 read_unlock(&filp->f_owner.lock); 333 read_unlock(&filp->f_owner.lock);
334 334
335 if (!ret) 335 if (!ret) {
336 ret = copy_to_user(owner_p, &owner, sizeof(owner)); 336 ret = copy_to_user(owner_p, &owner, sizeof(owner));
337 if (ret)
338 ret = -EFAULT;
339 }
337 return ret; 340 return ret;
338} 341}
339 342
@@ -730,12 +733,14 @@ static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
730{ 733{
731 while (fa) { 734 while (fa) {
732 struct fown_struct *fown; 735 struct fown_struct *fown;
736 unsigned long flags;
737
733 if (fa->magic != FASYNC_MAGIC) { 738 if (fa->magic != FASYNC_MAGIC) {
734 printk(KERN_ERR "kill_fasync: bad magic number in " 739 printk(KERN_ERR "kill_fasync: bad magic number in "
735 "fasync_struct!\n"); 740 "fasync_struct!\n");
736 return; 741 return;
737 } 742 }
738 spin_lock(&fa->fa_lock); 743 spin_lock_irqsave(&fa->fa_lock, flags);
739 if (fa->fa_file) { 744 if (fa->fa_file) {
740 fown = &fa->fa_file->f_owner; 745 fown = &fa->fa_file->f_owner;
741 /* Don't send SIGURG to processes which have not set a 746 /* Don't send SIGURG to processes which have not set a
@@ -744,7 +749,7 @@ static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
744 if (!(sig == SIGURG && fown->signum == 0)) 749 if (!(sig == SIGURG && fown->signum == 0))
745 send_sigio(fown, fa->fa_fd, band); 750 send_sigio(fown, fa->fa_fd, band);
746 } 751 }
747 spin_unlock(&fa->fa_lock); 752 spin_unlock_irqrestore(&fa->fa_lock, flags);
748 fa = rcu_dereference(fa->fa_next); 753 fa = rcu_dereference(fa->fa_next);
749 } 754 }
750} 755}
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index ea8592b90696..0609607d3955 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -45,7 +45,6 @@ struct wb_writeback_args {
45 unsigned int for_kupdate:1; 45 unsigned int for_kupdate:1;
46 unsigned int range_cyclic:1; 46 unsigned int range_cyclic:1;
47 unsigned int for_background:1; 47 unsigned int for_background:1;
48 unsigned int sb_pinned:1;
49}; 48};
50 49
51/* 50/*
@@ -64,24 +63,16 @@ struct bdi_work {
64}; 63};
65 64
66enum { 65enum {
67 WS_USED_B = 0, 66 WS_INPROGRESS = 0,
68 WS_ONSTACK_B, 67 WS_ONSTACK,
69}; 68};
70 69
71#define WS_USED (1 << WS_USED_B)
72#define WS_ONSTACK (1 << WS_ONSTACK_B)
73
74static inline bool bdi_work_on_stack(struct bdi_work *work)
75{
76 return test_bit(WS_ONSTACK_B, &work->state);
77}
78
79static inline void bdi_work_init(struct bdi_work *work, 70static inline void bdi_work_init(struct bdi_work *work,
80 struct wb_writeback_args *args) 71 struct wb_writeback_args *args)
81{ 72{
82 INIT_RCU_HEAD(&work->rcu_head); 73 INIT_RCU_HEAD(&work->rcu_head);
83 work->args = *args; 74 work->args = *args;
84 work->state = WS_USED; 75 __set_bit(WS_INPROGRESS, &work->state);
85} 76}
86 77
87/** 78/**
@@ -96,43 +87,16 @@ int writeback_in_progress(struct backing_dev_info *bdi)
96 return !list_empty(&bdi->work_list); 87 return !list_empty(&bdi->work_list);
97} 88}
98 89
99static void bdi_work_clear(struct bdi_work *work)
100{
101 clear_bit(WS_USED_B, &work->state);
102 smp_mb__after_clear_bit();
103 /*
104 * work can have disappeared at this point. bit waitq functions
105 * should be able to tolerate this, provided bdi_sched_wait does
106 * not dereference it's pointer argument.
107 */
108 wake_up_bit(&work->state, WS_USED_B);
109}
110
111static void bdi_work_free(struct rcu_head *head) 90static void bdi_work_free(struct rcu_head *head)
112{ 91{
113 struct bdi_work *work = container_of(head, struct bdi_work, rcu_head); 92 struct bdi_work *work = container_of(head, struct bdi_work, rcu_head);
114 93
115 if (!bdi_work_on_stack(work)) 94 clear_bit(WS_INPROGRESS, &work->state);
116 kfree(work); 95 smp_mb__after_clear_bit();
117 else 96 wake_up_bit(&work->state, WS_INPROGRESS);
118 bdi_work_clear(work);
119}
120
121static void wb_work_complete(struct bdi_work *work)
122{
123 const enum writeback_sync_modes sync_mode = work->args.sync_mode;
124 int onstack = bdi_work_on_stack(work);
125 97
126 /* 98 if (!test_bit(WS_ONSTACK, &work->state))
127 * For allocated work, we can clear the done/seen bit right here. 99 kfree(work);
128 * For on-stack work, we need to postpone both the clear and free
129 * to after the RCU grace period, since the stack could be invalidated
130 * as soon as bdi_work_clear() has done the wakeup.
131 */
132 if (!onstack)
133 bdi_work_clear(work);
134 if (sync_mode == WB_SYNC_NONE || onstack)
135 call_rcu(&work->rcu_head, bdi_work_free);
136} 100}
137 101
138static void wb_clear_pending(struct bdi_writeback *wb, struct bdi_work *work) 102static void wb_clear_pending(struct bdi_writeback *wb, struct bdi_work *work)
@@ -148,7 +112,7 @@ static void wb_clear_pending(struct bdi_writeback *wb, struct bdi_work *work)
148 list_del_rcu(&work->list); 112 list_del_rcu(&work->list);
149 spin_unlock(&bdi->wb_lock); 113 spin_unlock(&bdi->wb_lock);
150 114
151 wb_work_complete(work); 115 call_rcu(&work->rcu_head, bdi_work_free);
152 } 116 }
153} 117}
154 118
@@ -186,15 +150,14 @@ static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work)
186 * Used for on-stack allocated work items. The caller needs to wait until 150 * Used for on-stack allocated work items. The caller needs to wait until
187 * the wb threads have acked the work before it's safe to continue. 151 * the wb threads have acked the work before it's safe to continue.
188 */ 152 */
189static void bdi_wait_on_work_clear(struct bdi_work *work) 153static void bdi_wait_on_work_done(struct bdi_work *work)
190{ 154{
191 wait_on_bit(&work->state, WS_USED_B, bdi_sched_wait, 155 wait_on_bit(&work->state, WS_INPROGRESS, bdi_sched_wait,
192 TASK_UNINTERRUPTIBLE); 156 TASK_UNINTERRUPTIBLE);
193} 157}
194 158
195static void bdi_alloc_queue_work(struct backing_dev_info *bdi, 159static void bdi_alloc_queue_work(struct backing_dev_info *bdi,
196 struct wb_writeback_args *args, 160 struct wb_writeback_args *args)
197 int wait)
198{ 161{
199 struct bdi_work *work; 162 struct bdi_work *work;
200 163
@@ -206,8 +169,6 @@ static void bdi_alloc_queue_work(struct backing_dev_info *bdi,
206 if (work) { 169 if (work) {
207 bdi_work_init(work, args); 170 bdi_work_init(work, args);
208 bdi_queue_work(bdi, work); 171 bdi_queue_work(bdi, work);
209 if (wait)
210 bdi_wait_on_work_clear(work);
211 } else { 172 } else {
212 struct bdi_writeback *wb = &bdi->wb; 173 struct bdi_writeback *wb = &bdi->wb;
213 174
@@ -217,72 +178,65 @@ static void bdi_alloc_queue_work(struct backing_dev_info *bdi,
217} 178}
218 179
219/** 180/**
220 * bdi_sync_writeback - start and wait for writeback 181 * bdi_queue_work_onstack - start and wait for writeback
221 * @bdi: the backing device to write from
222 * @sb: write inodes from this super_block 182 * @sb: write inodes from this super_block
223 * 183 *
224 * Description: 184 * Description:
225 * This does WB_SYNC_ALL data integrity writeback and waits for the 185 * This function initiates writeback and waits for the operation to
226 * IO to complete. Callers must hold the sb s_umount semaphore for 186 * complete. Callers must hold the sb s_umount semaphore for
227 * reading, to avoid having the super disappear before we are done. 187 * reading, to avoid having the super disappear before we are done.
228 */ 188 */
229static void bdi_sync_writeback(struct backing_dev_info *bdi, 189static void bdi_queue_work_onstack(struct wb_writeback_args *args)
230 struct super_block *sb)
231{ 190{
232 struct wb_writeback_args args = {
233 .sb = sb,
234 .sync_mode = WB_SYNC_ALL,
235 .nr_pages = LONG_MAX,
236 .range_cyclic = 0,
237 /*
238 * Setting sb_pinned is not necessary for WB_SYNC_ALL, but
239 * lets make it explicitly clear.
240 */
241 .sb_pinned = 1,
242 };
243 struct bdi_work work; 191 struct bdi_work work;
244 192
245 bdi_work_init(&work, &args); 193 bdi_work_init(&work, args);
246 work.state |= WS_ONSTACK; 194 __set_bit(WS_ONSTACK, &work.state);
247 195
248 bdi_queue_work(bdi, &work); 196 bdi_queue_work(args->sb->s_bdi, &work);
249 bdi_wait_on_work_clear(&work); 197 bdi_wait_on_work_done(&work);
250} 198}
251 199
252/** 200/**
253 * bdi_start_writeback - start writeback 201 * bdi_start_writeback - start writeback
254 * @bdi: the backing device to write from 202 * @bdi: the backing device to write from
255 * @sb: write inodes from this super_block
256 * @nr_pages: the number of pages to write 203 * @nr_pages: the number of pages to write
257 * @sb_locked: caller already holds sb umount sem.
258 * 204 *
259 * Description: 205 * Description:
260 * This does WB_SYNC_NONE opportunistic writeback. The IO is only 206 * This does WB_SYNC_NONE opportunistic writeback. The IO is only
261 * started when this function returns, we make no guarentees on 207 * started when this function returns, we make no guarentees on
262 * completion. Caller specifies whether sb umount sem is held already or not. 208 * completion. Caller need not hold sb s_umount semaphore.
263 * 209 *
264 */ 210 */
265void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, 211void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages)
266 long nr_pages, int sb_locked)
267{ 212{
268 struct wb_writeback_args args = { 213 struct wb_writeback_args args = {
269 .sb = sb,
270 .sync_mode = WB_SYNC_NONE, 214 .sync_mode = WB_SYNC_NONE,
271 .nr_pages = nr_pages, 215 .nr_pages = nr_pages,
272 .range_cyclic = 1, 216 .range_cyclic = 1,
273 .sb_pinned = sb_locked,
274 }; 217 };
275 218
276 /* 219 bdi_alloc_queue_work(bdi, &args);
277 * We treat @nr_pages=0 as the special case to do background writeback, 220}
278 * ie. to sync pages until the background dirty threshold is reached.
279 */
280 if (!nr_pages) {
281 args.nr_pages = LONG_MAX;
282 args.for_background = 1;
283 }
284 221
285 bdi_alloc_queue_work(bdi, &args, sb_locked); 222/**
223 * bdi_start_background_writeback - start background writeback
224 * @bdi: the backing device to write from
225 *
226 * Description:
227 * This does WB_SYNC_NONE background writeback. The IO is only
228 * started when this function returns, we make no guarentees on
229 * completion. Caller need not hold sb s_umount semaphore.
230 */
231void bdi_start_background_writeback(struct backing_dev_info *bdi)
232{
233 struct wb_writeback_args args = {
234 .sync_mode = WB_SYNC_NONE,
235 .nr_pages = LONG_MAX,
236 .for_background = 1,
237 .range_cyclic = 1,
238 };
239 bdi_alloc_queue_work(bdi, &args);
286} 240}
287 241
288/* 242/*
@@ -572,48 +526,30 @@ select_queue:
572 return ret; 526 return ret;
573} 527}
574 528
575static void unpin_sb_for_writeback(struct super_block *sb)
576{
577 up_read(&sb->s_umount);
578 put_super(sb);
579}
580
581enum sb_pin_state {
582 SB_PINNED,
583 SB_NOT_PINNED,
584 SB_PIN_FAILED
585};
586
587/* 529/*
588 * For WB_SYNC_NONE writeback, the caller does not have the sb pinned 530 * For background writeback the caller does not have the sb pinned
589 * before calling writeback. So make sure that we do pin it, so it doesn't 531 * before calling writeback. So make sure that we do pin it, so it doesn't
590 * go away while we are writing inodes from it. 532 * go away while we are writing inodes from it.
591 */ 533 */
592static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc, 534static bool pin_sb_for_writeback(struct super_block *sb)
593 struct super_block *sb)
594{ 535{
595 /*
596 * Caller must already hold the ref for this
597 */
598 if (wbc->sync_mode == WB_SYNC_ALL || wbc->sb_pinned) {
599 WARN_ON(!rwsem_is_locked(&sb->s_umount));
600 return SB_NOT_PINNED;
601 }
602 spin_lock(&sb_lock); 536 spin_lock(&sb_lock);
537 if (list_empty(&sb->s_instances)) {
538 spin_unlock(&sb_lock);
539 return false;
540 }
541
603 sb->s_count++; 542 sb->s_count++;
543 spin_unlock(&sb_lock);
544
604 if (down_read_trylock(&sb->s_umount)) { 545 if (down_read_trylock(&sb->s_umount)) {
605 if (sb->s_root) { 546 if (sb->s_root)
606 spin_unlock(&sb_lock); 547 return true;
607 return SB_PINNED;
608 }
609 /*
610 * umounted, drop rwsem again and fall through to failure
611 */
612 up_read(&sb->s_umount); 548 up_read(&sb->s_umount);
613 } 549 }
614 sb->s_count--; 550
615 spin_unlock(&sb_lock); 551 put_super(sb);
616 return SB_PIN_FAILED; 552 return false;
617} 553}
618 554
619/* 555/*
@@ -692,24 +628,31 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
692 struct inode *inode = list_entry(wb->b_io.prev, 628 struct inode *inode = list_entry(wb->b_io.prev,
693 struct inode, i_list); 629 struct inode, i_list);
694 struct super_block *sb = inode->i_sb; 630 struct super_block *sb = inode->i_sb;
695 enum sb_pin_state state;
696 631
697 if (wbc->sb && sb != wbc->sb) { 632 if (wbc->sb) {
698 /* super block given and doesn't 633 /*
699 match, skip this inode */ 634 * We are requested to write out inodes for a specific
700 redirty_tail(inode); 635 * superblock. This means we already have s_umount
701 continue; 636 * taken by the caller which also waits for us to
702 } 637 * complete the writeout.
703 state = pin_sb_for_writeback(wbc, sb); 638 */
639 if (sb != wbc->sb) {
640 redirty_tail(inode);
641 continue;
642 }
704 643
705 if (state == SB_PIN_FAILED) { 644 WARN_ON(!rwsem_is_locked(&sb->s_umount));
706 requeue_io(inode); 645
707 continue; 646 ret = writeback_sb_inodes(sb, wb, wbc);
647 } else {
648 if (!pin_sb_for_writeback(sb)) {
649 requeue_io(inode);
650 continue;
651 }
652 ret = writeback_sb_inodes(sb, wb, wbc);
653 drop_super(sb);
708 } 654 }
709 ret = writeback_sb_inodes(sb, wb, wbc);
710 655
711 if (state == SB_PINNED)
712 unpin_sb_for_writeback(sb);
713 if (ret) 656 if (ret)
714 break; 657 break;
715 } 658 }
@@ -769,7 +712,6 @@ static long wb_writeback(struct bdi_writeback *wb,
769 .for_kupdate = args->for_kupdate, 712 .for_kupdate = args->for_kupdate,
770 .for_background = args->for_background, 713 .for_background = args->for_background,
771 .range_cyclic = args->range_cyclic, 714 .range_cyclic = args->range_cyclic,
772 .sb_pinned = args->sb_pinned,
773 }; 715 };
774 unsigned long oldest_jif; 716 unsigned long oldest_jif;
775 long wrote = 0; 717 long wrote = 0;
@@ -912,7 +854,6 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
912 854
913 while ((work = get_next_work_item(bdi, wb)) != NULL) { 855 while ((work = get_next_work_item(bdi, wb)) != NULL) {
914 struct wb_writeback_args args = work->args; 856 struct wb_writeback_args args = work->args;
915 int post_clear;
916 857
917 /* 858 /*
918 * Override sync mode, in case we must wait for completion 859 * Override sync mode, in case we must wait for completion
@@ -920,13 +861,11 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
920 if (force_wait) 861 if (force_wait)
921 work->args.sync_mode = args.sync_mode = WB_SYNC_ALL; 862 work->args.sync_mode = args.sync_mode = WB_SYNC_ALL;
922 863
923 post_clear = WB_SYNC_ALL || args.sb_pinned;
924
925 /* 864 /*
926 * If this isn't a data integrity operation, just notify 865 * If this isn't a data integrity operation, just notify
927 * that we have seen this work and we are now starting it. 866 * that we have seen this work and we are now starting it.
928 */ 867 */
929 if (!post_clear) 868 if (!test_bit(WS_ONSTACK, &work->state))
930 wb_clear_pending(wb, work); 869 wb_clear_pending(wb, work);
931 870
932 wrote += wb_writeback(wb, &args); 871 wrote += wb_writeback(wb, &args);
@@ -935,7 +874,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
935 * This is a data integrity writeback, so only do the 874 * This is a data integrity writeback, so only do the
936 * notification when we have completed the work. 875 * notification when we have completed the work.
937 */ 876 */
938 if (post_clear) 877 if (test_bit(WS_ONSTACK, &work->state))
939 wb_clear_pending(wb, work); 878 wb_clear_pending(wb, work);
940 } 879 }
941 880
@@ -993,42 +932,32 @@ int bdi_writeback_task(struct bdi_writeback *wb)
993} 932}
994 933
995/* 934/*
996 * Schedule writeback for all backing devices. This does WB_SYNC_NONE 935 * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back
997 * writeback, for integrity writeback see bdi_sync_writeback(). 936 * the whole world.
998 */ 937 */
999static void bdi_writeback_all(struct super_block *sb, long nr_pages) 938void wakeup_flusher_threads(long nr_pages)
1000{ 939{
940 struct backing_dev_info *bdi;
1001 struct wb_writeback_args args = { 941 struct wb_writeback_args args = {
1002 .sb = sb,
1003 .nr_pages = nr_pages,
1004 .sync_mode = WB_SYNC_NONE, 942 .sync_mode = WB_SYNC_NONE,
1005 }; 943 };
1006 struct backing_dev_info *bdi;
1007 944
1008 rcu_read_lock(); 945 if (nr_pages) {
946 args.nr_pages = nr_pages;
947 } else {
948 args.nr_pages = global_page_state(NR_FILE_DIRTY) +
949 global_page_state(NR_UNSTABLE_NFS);
950 }
1009 951
952 rcu_read_lock();
1010 list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { 953 list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
1011 if (!bdi_has_dirty_io(bdi)) 954 if (!bdi_has_dirty_io(bdi))
1012 continue; 955 continue;
1013 956 bdi_alloc_queue_work(bdi, &args);
1014 bdi_alloc_queue_work(bdi, &args, 0);
1015 } 957 }
1016
1017 rcu_read_unlock(); 958 rcu_read_unlock();
1018} 959}
1019 960
1020/*
1021 * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back
1022 * the whole world.
1023 */
1024void wakeup_flusher_threads(long nr_pages)
1025{
1026 if (nr_pages == 0)
1027 nr_pages = global_page_state(NR_FILE_DIRTY) +
1028 global_page_state(NR_UNSTABLE_NFS);
1029 bdi_writeback_all(NULL, nr_pages);
1030}
1031
1032static noinline void block_dump___mark_inode_dirty(struct inode *inode) 961static noinline void block_dump___mark_inode_dirty(struct inode *inode)
1033{ 962{
1034 if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) { 963 if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
@@ -1220,18 +1149,6 @@ static void wait_sb_inodes(struct super_block *sb)
1220 iput(old_inode); 1149 iput(old_inode);
1221} 1150}
1222 1151
1223static void __writeback_inodes_sb(struct super_block *sb, int sb_locked)
1224{
1225 unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
1226 unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
1227 long nr_to_write;
1228
1229 nr_to_write = nr_dirty + nr_unstable +
1230 (inodes_stat.nr_inodes - inodes_stat.nr_unused);
1231
1232 bdi_start_writeback(sb->s_bdi, sb, nr_to_write, sb_locked);
1233}
1234
1235/** 1152/**
1236 * writeback_inodes_sb - writeback dirty inodes from given super_block 1153 * writeback_inodes_sb - writeback dirty inodes from given super_block
1237 * @sb: the superblock 1154 * @sb: the superblock
@@ -1243,21 +1160,21 @@ static void __writeback_inodes_sb(struct super_block *sb, int sb_locked)
1243 */ 1160 */
1244void writeback_inodes_sb(struct super_block *sb) 1161void writeback_inodes_sb(struct super_block *sb)
1245{ 1162{
1246 __writeback_inodes_sb(sb, 0); 1163 unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
1247} 1164 unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
1248EXPORT_SYMBOL(writeback_inodes_sb); 1165 struct wb_writeback_args args = {
1166 .sb = sb,
1167 .sync_mode = WB_SYNC_NONE,
1168 };
1249 1169
1250/** 1170 WARN_ON(!rwsem_is_locked(&sb->s_umount));
1251 * writeback_inodes_sb_locked - writeback dirty inodes from given super_block 1171
1252 * @sb: the superblock 1172 args.nr_pages = nr_dirty + nr_unstable +
1253 * 1173 (inodes_stat.nr_inodes - inodes_stat.nr_unused);
1254 * Like writeback_inodes_sb(), except the caller already holds the 1174
1255 * sb umount sem. 1175 bdi_queue_work_onstack(&args);
1256 */
1257void writeback_inodes_sb_locked(struct super_block *sb)
1258{
1259 __writeback_inodes_sb(sb, 1);
1260} 1176}
1177EXPORT_SYMBOL(writeback_inodes_sb);
1261 1178
1262/** 1179/**
1263 * writeback_inodes_sb_if_idle - start writeback if none underway 1180 * writeback_inodes_sb_if_idle - start writeback if none underway
@@ -1269,7 +1186,9 @@ void writeback_inodes_sb_locked(struct super_block *sb)
1269int writeback_inodes_sb_if_idle(struct super_block *sb) 1186int writeback_inodes_sb_if_idle(struct super_block *sb)
1270{ 1187{
1271 if (!writeback_in_progress(sb->s_bdi)) { 1188 if (!writeback_in_progress(sb->s_bdi)) {
1189 down_read(&sb->s_umount);
1272 writeback_inodes_sb(sb); 1190 writeback_inodes_sb(sb);
1191 up_read(&sb->s_umount);
1273 return 1; 1192 return 1;
1274 } else 1193 } else
1275 return 0; 1194 return 0;
@@ -1285,7 +1204,16 @@ EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
1285 */ 1204 */
1286void sync_inodes_sb(struct super_block *sb) 1205void sync_inodes_sb(struct super_block *sb)
1287{ 1206{
1288 bdi_sync_writeback(sb->s_bdi, sb); 1207 struct wb_writeback_args args = {
1208 .sb = sb,
1209 .sync_mode = WB_SYNC_ALL,
1210 .nr_pages = LONG_MAX,
1211 .range_cyclic = 0,
1212 };
1213
1214 WARN_ON(!rwsem_is_locked(&sb->s_umount));
1215
1216 bdi_queue_work_onstack(&args);
1289 wait_sb_inodes(sb); 1217 wait_sb_inodes(sb);
1290} 1218}
1291EXPORT_SYMBOL(sync_inodes_sb); 1219EXPORT_SYMBOL(sync_inodes_sb);
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index 47aefd376e54..723b889fd219 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -710,30 +710,26 @@ static void fscache_write_op(struct fscache_operation *_op)
710 goto superseded; 710 goto superseded;
711 } 711 }
712 712
713 if (page) { 713 radix_tree_tag_set(&cookie->stores, page->index,
714 radix_tree_tag_set(&cookie->stores, page->index, 714 FSCACHE_COOKIE_STORING_TAG);
715 FSCACHE_COOKIE_STORING_TAG); 715 radix_tree_tag_clear(&cookie->stores, page->index,
716 radix_tree_tag_clear(&cookie->stores, page->index, 716 FSCACHE_COOKIE_PENDING_TAG);
717 FSCACHE_COOKIE_PENDING_TAG);
718 }
719 717
720 spin_unlock(&cookie->stores_lock); 718 spin_unlock(&cookie->stores_lock);
721 spin_unlock(&object->lock); 719 spin_unlock(&object->lock);
722 720
723 if (page) { 721 fscache_set_op_state(&op->op, "Store");
724 fscache_set_op_state(&op->op, "Store"); 722 fscache_stat(&fscache_n_store_pages);
725 fscache_stat(&fscache_n_store_pages); 723 fscache_stat(&fscache_n_cop_write_page);
726 fscache_stat(&fscache_n_cop_write_page); 724 ret = object->cache->ops->write_page(op, page);
727 ret = object->cache->ops->write_page(op, page); 725 fscache_stat_d(&fscache_n_cop_write_page);
728 fscache_stat_d(&fscache_n_cop_write_page); 726 fscache_set_op_state(&op->op, "EndWrite");
729 fscache_set_op_state(&op->op, "EndWrite"); 727 fscache_end_page_write(object, page);
730 fscache_end_page_write(object, page); 728 if (ret < 0) {
731 if (ret < 0) { 729 fscache_set_op_state(&op->op, "Abort");
732 fscache_set_op_state(&op->op, "Abort"); 730 fscache_abort_object(object);
733 fscache_abort_object(object); 731 } else {
734 } else { 732 fscache_enqueue_operation(&op->op);
735 fscache_enqueue_operation(&op->op);
736 }
737 } 733 }
738 734
739 _leave(""); 735 _leave("");
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index a33aab6b5e68..54a92fd02bbd 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -234,8 +234,9 @@ static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
234 if (inode->i_mode != mode) { 234 if (inode->i_mode != mode) {
235 struct iattr attr; 235 struct iattr attr;
236 236
237 attr.ia_valid = ATTR_MODE; 237 attr.ia_valid = ATTR_MODE | ATTR_CTIME;
238 attr.ia_mode = mode; 238 attr.ia_mode = mode;
239 attr.ia_ctime = CURRENT_TIME_SEC;
239 rc = jffs2_do_setattr(inode, &attr); 240 rc = jffs2_do_setattr(inode, &attr);
240 if (rc < 0) 241 if (rc < 0)
241 return rc; 242 return rc;
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 7aa4417e085f..166062a68230 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -222,15 +222,18 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry, int mode,
222 dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(ri->ctime)); 222 dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(ri->ctime));
223 223
224 jffs2_free_raw_inode(ri); 224 jffs2_free_raw_inode(ri);
225 d_instantiate(dentry, inode);
226 225
227 D1(printk(KERN_DEBUG "jffs2_create: Created ino #%lu with mode %o, nlink %d(%d). nrpages %ld\n", 226 D1(printk(KERN_DEBUG "jffs2_create: Created ino #%lu with mode %o, nlink %d(%d). nrpages %ld\n",
228 inode->i_ino, inode->i_mode, inode->i_nlink, 227 inode->i_ino, inode->i_mode, inode->i_nlink,
229 f->inocache->pino_nlink, inode->i_mapping->nrpages)); 228 f->inocache->pino_nlink, inode->i_mapping->nrpages));
229
230 d_instantiate(dentry, inode);
231 unlock_new_inode(inode);
230 return 0; 232 return 0;
231 233
232 fail: 234 fail:
233 make_bad_inode(inode); 235 make_bad_inode(inode);
236 unlock_new_inode(inode);
234 iput(inode); 237 iput(inode);
235 jffs2_free_raw_inode(ri); 238 jffs2_free_raw_inode(ri);
236 return ret; 239 return ret;
@@ -360,8 +363,8 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
360 /* Eeek. Wave bye bye */ 363 /* Eeek. Wave bye bye */
361 mutex_unlock(&f->sem); 364 mutex_unlock(&f->sem);
362 jffs2_complete_reservation(c); 365 jffs2_complete_reservation(c);
363 jffs2_clear_inode(inode); 366 ret = PTR_ERR(fn);
364 return PTR_ERR(fn); 367 goto fail;
365 } 368 }
366 369
367 /* We use f->target field to store the target path. */ 370 /* We use f->target field to store the target path. */
@@ -370,8 +373,8 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
370 printk(KERN_WARNING "Can't allocate %d bytes of memory\n", targetlen + 1); 373 printk(KERN_WARNING "Can't allocate %d bytes of memory\n", targetlen + 1);
371 mutex_unlock(&f->sem); 374 mutex_unlock(&f->sem);
372 jffs2_complete_reservation(c); 375 jffs2_complete_reservation(c);
373 jffs2_clear_inode(inode); 376 ret = -ENOMEM;
374 return -ENOMEM; 377 goto fail;
375 } 378 }
376 379
377 memcpy(f->target, target, targetlen + 1); 380 memcpy(f->target, target, targetlen + 1);
@@ -386,30 +389,24 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
386 jffs2_complete_reservation(c); 389 jffs2_complete_reservation(c);
387 390
388 ret = jffs2_init_security(inode, dir_i); 391 ret = jffs2_init_security(inode, dir_i);
389 if (ret) { 392 if (ret)
390 jffs2_clear_inode(inode); 393 goto fail;
391 return ret; 394
392 }
393 ret = jffs2_init_acl_post(inode); 395 ret = jffs2_init_acl_post(inode);
394 if (ret) { 396 if (ret)
395 jffs2_clear_inode(inode); 397 goto fail;
396 return ret;
397 }
398 398
399 ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &alloclen, 399 ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &alloclen,
400 ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(namelen)); 400 ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(namelen));
401 if (ret) { 401 if (ret)
402 /* Eep. */ 402 goto fail;
403 jffs2_clear_inode(inode);
404 return ret;
405 }
406 403
407 rd = jffs2_alloc_raw_dirent(); 404 rd = jffs2_alloc_raw_dirent();
408 if (!rd) { 405 if (!rd) {
409 /* Argh. Now we treat it like a normal delete */ 406 /* Argh. Now we treat it like a normal delete */
410 jffs2_complete_reservation(c); 407 jffs2_complete_reservation(c);
411 jffs2_clear_inode(inode); 408 ret = -ENOMEM;
412 return -ENOMEM; 409 goto fail;
413 } 410 }
414 411
415 dir_f = JFFS2_INODE_INFO(dir_i); 412 dir_f = JFFS2_INODE_INFO(dir_i);
@@ -437,8 +434,8 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
437 jffs2_complete_reservation(c); 434 jffs2_complete_reservation(c);
438 jffs2_free_raw_dirent(rd); 435 jffs2_free_raw_dirent(rd);
439 mutex_unlock(&dir_f->sem); 436 mutex_unlock(&dir_f->sem);
440 jffs2_clear_inode(inode); 437 ret = PTR_ERR(fd);
441 return PTR_ERR(fd); 438 goto fail;
442 } 439 }
443 440
444 dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(rd->mctime)); 441 dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(rd->mctime));
@@ -453,7 +450,14 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
453 jffs2_complete_reservation(c); 450 jffs2_complete_reservation(c);
454 451
455 d_instantiate(dentry, inode); 452 d_instantiate(dentry, inode);
453 unlock_new_inode(inode);
456 return 0; 454 return 0;
455
456 fail:
457 make_bad_inode(inode);
458 unlock_new_inode(inode);
459 iput(inode);
460 return ret;
457} 461}
458 462
459 463
@@ -519,8 +523,8 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
519 /* Eeek. Wave bye bye */ 523 /* Eeek. Wave bye bye */
520 mutex_unlock(&f->sem); 524 mutex_unlock(&f->sem);
521 jffs2_complete_reservation(c); 525 jffs2_complete_reservation(c);
522 jffs2_clear_inode(inode); 526 ret = PTR_ERR(fn);
523 return PTR_ERR(fn); 527 goto fail;
524 } 528 }
525 /* No data here. Only a metadata node, which will be 529 /* No data here. Only a metadata node, which will be
526 obsoleted by the first data write 530 obsoleted by the first data write
@@ -531,30 +535,24 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
531 jffs2_complete_reservation(c); 535 jffs2_complete_reservation(c);
532 536
533 ret = jffs2_init_security(inode, dir_i); 537 ret = jffs2_init_security(inode, dir_i);
534 if (ret) { 538 if (ret)
535 jffs2_clear_inode(inode); 539 goto fail;
536 return ret; 540
537 }
538 ret = jffs2_init_acl_post(inode); 541 ret = jffs2_init_acl_post(inode);
539 if (ret) { 542 if (ret)
540 jffs2_clear_inode(inode); 543 goto fail;
541 return ret;
542 }
543 544
544 ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &alloclen, 545 ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &alloclen,
545 ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(namelen)); 546 ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(namelen));
546 if (ret) { 547 if (ret)
547 /* Eep. */ 548 goto fail;
548 jffs2_clear_inode(inode);
549 return ret;
550 }
551 549
552 rd = jffs2_alloc_raw_dirent(); 550 rd = jffs2_alloc_raw_dirent();
553 if (!rd) { 551 if (!rd) {
554 /* Argh. Now we treat it like a normal delete */ 552 /* Argh. Now we treat it like a normal delete */
555 jffs2_complete_reservation(c); 553 jffs2_complete_reservation(c);
556 jffs2_clear_inode(inode); 554 ret = -ENOMEM;
557 return -ENOMEM; 555 goto fail;
558 } 556 }
559 557
560 dir_f = JFFS2_INODE_INFO(dir_i); 558 dir_f = JFFS2_INODE_INFO(dir_i);
@@ -582,8 +580,8 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
582 jffs2_complete_reservation(c); 580 jffs2_complete_reservation(c);
583 jffs2_free_raw_dirent(rd); 581 jffs2_free_raw_dirent(rd);
584 mutex_unlock(&dir_f->sem); 582 mutex_unlock(&dir_f->sem);
585 jffs2_clear_inode(inode); 583 ret = PTR_ERR(fd);
586 return PTR_ERR(fd); 584 goto fail;
587 } 585 }
588 586
589 dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(rd->mctime)); 587 dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(rd->mctime));
@@ -599,7 +597,14 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
599 jffs2_complete_reservation(c); 597 jffs2_complete_reservation(c);
600 598
601 d_instantiate(dentry, inode); 599 d_instantiate(dentry, inode);
600 unlock_new_inode(inode);
602 return 0; 601 return 0;
602
603 fail:
604 make_bad_inode(inode);
605 unlock_new_inode(inode);
606 iput(inode);
607 return ret;
603} 608}
604 609
605static int jffs2_rmdir (struct inode *dir_i, struct dentry *dentry) 610static int jffs2_rmdir (struct inode *dir_i, struct dentry *dentry)
@@ -693,8 +698,8 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de
693 /* Eeek. Wave bye bye */ 698 /* Eeek. Wave bye bye */
694 mutex_unlock(&f->sem); 699 mutex_unlock(&f->sem);
695 jffs2_complete_reservation(c); 700 jffs2_complete_reservation(c);
696 jffs2_clear_inode(inode); 701 ret = PTR_ERR(fn);
697 return PTR_ERR(fn); 702 goto fail;
698 } 703 }
699 /* No data here. Only a metadata node, which will be 704 /* No data here. Only a metadata node, which will be
700 obsoleted by the first data write 705 obsoleted by the first data write
@@ -705,30 +710,24 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de
705 jffs2_complete_reservation(c); 710 jffs2_complete_reservation(c);
706 711
707 ret = jffs2_init_security(inode, dir_i); 712 ret = jffs2_init_security(inode, dir_i);
708 if (ret) { 713 if (ret)
709 jffs2_clear_inode(inode); 714 goto fail;
710 return ret; 715
711 }
712 ret = jffs2_init_acl_post(inode); 716 ret = jffs2_init_acl_post(inode);
713 if (ret) { 717 if (ret)
714 jffs2_clear_inode(inode); 718 goto fail;
715 return ret;
716 }
717 719
718 ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &alloclen, 720 ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &alloclen,
719 ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(namelen)); 721 ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(namelen));
720 if (ret) { 722 if (ret)
721 /* Eep. */ 723 goto fail;
722 jffs2_clear_inode(inode);
723 return ret;
724 }
725 724
726 rd = jffs2_alloc_raw_dirent(); 725 rd = jffs2_alloc_raw_dirent();
727 if (!rd) { 726 if (!rd) {
728 /* Argh. Now we treat it like a normal delete */ 727 /* Argh. Now we treat it like a normal delete */
729 jffs2_complete_reservation(c); 728 jffs2_complete_reservation(c);
730 jffs2_clear_inode(inode); 729 ret = -ENOMEM;
731 return -ENOMEM; 730 goto fail;
732 } 731 }
733 732
734 dir_f = JFFS2_INODE_INFO(dir_i); 733 dir_f = JFFS2_INODE_INFO(dir_i);
@@ -759,8 +758,8 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de
759 jffs2_complete_reservation(c); 758 jffs2_complete_reservation(c);
760 jffs2_free_raw_dirent(rd); 759 jffs2_free_raw_dirent(rd);
761 mutex_unlock(&dir_f->sem); 760 mutex_unlock(&dir_f->sem);
762 jffs2_clear_inode(inode); 761 ret = PTR_ERR(fd);
763 return PTR_ERR(fd); 762 goto fail;
764 } 763 }
765 764
766 dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(rd->mctime)); 765 dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(rd->mctime));
@@ -775,8 +774,14 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de
775 jffs2_complete_reservation(c); 774 jffs2_complete_reservation(c);
776 775
777 d_instantiate(dentry, inode); 776 d_instantiate(dentry, inode);
778 777 unlock_new_inode(inode);
779 return 0; 778 return 0;
779
780 fail:
781 make_bad_inode(inode);
782 unlock_new_inode(inode);
783 iput(inode);
784 return ret;
780} 785}
781 786
782static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry, 787static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry,
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 8bc2c80ab159..459d39d1ea0b 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -465,7 +465,12 @@ struct inode *jffs2_new_inode (struct inode *dir_i, int mode, struct jffs2_raw_i
465 inode->i_blocks = 0; 465 inode->i_blocks = 0;
466 inode->i_size = 0; 466 inode->i_size = 0;
467 467
468 insert_inode_hash(inode); 468 if (insert_inode_locked(inode) < 0) {
469 make_bad_inode(inode);
470 unlock_new_inode(inode);
471 iput(inode);
472 return ERR_PTR(-EINVAL);
473 }
469 474
470 return inode; 475 return inode;
471} 476}
diff --git a/fs/libfs.c b/fs/libfs.c
index 09e1016eb774..dcaf972cbf1b 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -489,7 +489,8 @@ int simple_write_end(struct file *file, struct address_space *mapping,
489 * unique inode values later for this filesystem, then you must take care 489 * unique inode values later for this filesystem, then you must take care
490 * to pass it an appropriate max_reserved value to avoid collisions. 490 * to pass it an appropriate max_reserved value to avoid collisions.
491 */ 491 */
492int simple_fill_super(struct super_block *s, int magic, struct tree_descr *files) 492int simple_fill_super(struct super_block *s, unsigned long magic,
493 struct tree_descr *files)
493{ 494{
494 struct inode *inode; 495 struct inode *inode;
495 struct dentry *root; 496 struct dentry *root;
diff --git a/fs/minix/dir.c b/fs/minix/dir.c
index 91969589131c..1dbf921ca44b 100644
--- a/fs/minix/dir.c
+++ b/fs/minix/dir.c
@@ -75,10 +75,6 @@ static struct page * dir_get_page(struct inode *dir, unsigned long n)
75 if (!IS_ERR(page)) 75 if (!IS_ERR(page))
76 kmap(page); 76 kmap(page);
77 return page; 77 return page;
78
79fail:
80 dir_put_page(page);
81 return ERR_PTR(-EIO);
82} 78}
83 79
84static inline void *minix_next_entry(void *de, struct minix_sb_info *sbi) 80static inline void *minix_next_entry(void *de, struct minix_sb_info *sbi)
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 7ec9b34a59f8..d25b5257b7a1 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -1286,6 +1286,55 @@ static void nfs4_session_set_rwsize(struct nfs_server *server)
1286#endif /* CONFIG_NFS_V4_1 */ 1286#endif /* CONFIG_NFS_V4_1 */
1287} 1287}
1288 1288
1289static int nfs4_server_common_setup(struct nfs_server *server,
1290 struct nfs_fh *mntfh)
1291{
1292 struct nfs_fattr *fattr;
1293 int error;
1294
1295 BUG_ON(!server->nfs_client);
1296 BUG_ON(!server->nfs_client->rpc_ops);
1297 BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
1298
1299 fattr = nfs_alloc_fattr();
1300 if (fattr == NULL)
1301 return -ENOMEM;
1302
1303 /* We must ensure the session is initialised first */
1304 error = nfs4_init_session(server);
1305 if (error < 0)
1306 goto out;
1307
1308 /* Probe the root fh to retrieve its FSID and filehandle */
1309 error = nfs4_get_rootfh(server, mntfh);
1310 if (error < 0)
1311 goto out;
1312
1313 dprintk("Server FSID: %llx:%llx\n",
1314 (unsigned long long) server->fsid.major,
1315 (unsigned long long) server->fsid.minor);
1316 dprintk("Mount FH: %d\n", mntfh->size);
1317
1318 nfs4_session_set_rwsize(server);
1319
1320 error = nfs_probe_fsinfo(server, mntfh, fattr);
1321 if (error < 0)
1322 goto out;
1323
1324 if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
1325 server->namelen = NFS4_MAXNAMLEN;
1326
1327 spin_lock(&nfs_client_lock);
1328 list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
1329 list_add_tail(&server->master_link, &nfs_volume_list);
1330 spin_unlock(&nfs_client_lock);
1331
1332 server->mount_time = jiffies;
1333out:
1334 nfs_free_fattr(fattr);
1335 return error;
1336}
1337
1289/* 1338/*
1290 * Create a version 4 volume record 1339 * Create a version 4 volume record
1291 */ 1340 */
@@ -1346,7 +1395,6 @@ error:
1346struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data, 1395struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data,
1347 struct nfs_fh *mntfh) 1396 struct nfs_fh *mntfh)
1348{ 1397{
1349 struct nfs_fattr *fattr;
1350 struct nfs_server *server; 1398 struct nfs_server *server;
1351 int error; 1399 int error;
1352 1400
@@ -1356,55 +1404,19 @@ struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data,
1356 if (!server) 1404 if (!server)
1357 return ERR_PTR(-ENOMEM); 1405 return ERR_PTR(-ENOMEM);
1358 1406
1359 error = -ENOMEM;
1360 fattr = nfs_alloc_fattr();
1361 if (fattr == NULL)
1362 goto error;
1363
1364 /* set up the general RPC client */ 1407 /* set up the general RPC client */
1365 error = nfs4_init_server(server, data); 1408 error = nfs4_init_server(server, data);
1366 if (error < 0) 1409 if (error < 0)
1367 goto error; 1410 goto error;
1368 1411
1369 BUG_ON(!server->nfs_client); 1412 error = nfs4_server_common_setup(server, mntfh);
1370 BUG_ON(!server->nfs_client->rpc_ops);
1371 BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
1372
1373 error = nfs4_init_session(server);
1374 if (error < 0)
1375 goto error;
1376
1377 /* Probe the root fh to retrieve its FSID */
1378 error = nfs4_get_rootfh(server, mntfh);
1379 if (error < 0) 1413 if (error < 0)
1380 goto error; 1414 goto error;
1381 1415
1382 dprintk("Server FSID: %llx:%llx\n",
1383 (unsigned long long) server->fsid.major,
1384 (unsigned long long) server->fsid.minor);
1385 dprintk("Mount FH: %d\n", mntfh->size);
1386
1387 nfs4_session_set_rwsize(server);
1388
1389 error = nfs_probe_fsinfo(server, mntfh, fattr);
1390 if (error < 0)
1391 goto error;
1392
1393 if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
1394 server->namelen = NFS4_MAXNAMLEN;
1395
1396 spin_lock(&nfs_client_lock);
1397 list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
1398 list_add_tail(&server->master_link, &nfs_volume_list);
1399 spin_unlock(&nfs_client_lock);
1400
1401 server->mount_time = jiffies;
1402 dprintk("<-- nfs4_create_server() = %p\n", server); 1416 dprintk("<-- nfs4_create_server() = %p\n", server);
1403 nfs_free_fattr(fattr);
1404 return server; 1417 return server;
1405 1418
1406error: 1419error:
1407 nfs_free_fattr(fattr);
1408 nfs_free_server(server); 1420 nfs_free_server(server);
1409 dprintk("<-- nfs4_create_server() = error %d\n", error); 1421 dprintk("<-- nfs4_create_server() = error %d\n", error);
1410 return ERR_PTR(error); 1422 return ERR_PTR(error);
@@ -1418,7 +1430,6 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
1418{ 1430{
1419 struct nfs_client *parent_client; 1431 struct nfs_client *parent_client;
1420 struct nfs_server *server, *parent_server; 1432 struct nfs_server *server, *parent_server;
1421 struct nfs_fattr *fattr;
1422 int error; 1433 int error;
1423 1434
1424 dprintk("--> nfs4_create_referral_server()\n"); 1435 dprintk("--> nfs4_create_referral_server()\n");
@@ -1427,11 +1438,6 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
1427 if (!server) 1438 if (!server)
1428 return ERR_PTR(-ENOMEM); 1439 return ERR_PTR(-ENOMEM);
1429 1440
1430 error = -ENOMEM;
1431 fattr = nfs_alloc_fattr();
1432 if (fattr == NULL)
1433 goto error;
1434
1435 parent_server = NFS_SB(data->sb); 1441 parent_server = NFS_SB(data->sb);
1436 parent_client = parent_server->nfs_client; 1442 parent_client = parent_server->nfs_client;
1437 1443
@@ -1456,40 +1462,14 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
1456 if (error < 0) 1462 if (error < 0)
1457 goto error; 1463 goto error;
1458 1464
1459 BUG_ON(!server->nfs_client); 1465 error = nfs4_server_common_setup(server, mntfh);
1460 BUG_ON(!server->nfs_client->rpc_ops);
1461 BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
1462
1463 /* Probe the root fh to retrieve its FSID and filehandle */
1464 error = nfs4_get_rootfh(server, mntfh);
1465 if (error < 0)
1466 goto error;
1467
1468 /* probe the filesystem info for this server filesystem */
1469 error = nfs_probe_fsinfo(server, mntfh, fattr);
1470 if (error < 0) 1466 if (error < 0)
1471 goto error; 1467 goto error;
1472 1468
1473 if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
1474 server->namelen = NFS4_MAXNAMLEN;
1475
1476 dprintk("Referral FSID: %llx:%llx\n",
1477 (unsigned long long) server->fsid.major,
1478 (unsigned long long) server->fsid.minor);
1479
1480 spin_lock(&nfs_client_lock);
1481 list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
1482 list_add_tail(&server->master_link, &nfs_volume_list);
1483 spin_unlock(&nfs_client_lock);
1484
1485 server->mount_time = jiffies;
1486
1487 nfs_free_fattr(fattr);
1488 dprintk("<-- nfs_create_referral_server() = %p\n", server); 1469 dprintk("<-- nfs_create_referral_server() = %p\n", server);
1489 return server; 1470 return server;
1490 1471
1491error: 1472error:
1492 nfs_free_fattr(fattr);
1493 nfs_free_server(server); 1473 nfs_free_server(server);
1494 dprintk("<-- nfs4_create_referral_server() = error %d\n", error); 1474 dprintk("<-- nfs4_create_referral_server() = error %d\n", error);
1495 return ERR_PTR(error); 1475 return ERR_PTR(error);
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 7428f7d6273b..a70e446e1605 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -146,7 +146,7 @@ int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh)
146 goto out; 146 goto out;
147 } 147 }
148 148
149 if (!(fsinfo.fattr->valid & NFS_ATTR_FATTR_MODE) 149 if (!(fsinfo.fattr->valid & NFS_ATTR_FATTR_TYPE)
150 || !S_ISDIR(fsinfo.fattr->mode)) { 150 || !S_ISDIR(fsinfo.fattr->mode)) {
151 printk(KERN_ERR "nfs4_get_rootfh:" 151 printk(KERN_ERR "nfs4_get_rootfh:"
152 " getroot encountered non-directory\n"); 152 " getroot encountered non-directory\n");
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 6bdef28efa33..65c8dae4b267 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -862,8 +862,8 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
862 bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET; 862 bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET;
863 *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME); 863 *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME);
864 *p++ = cpu_to_be32(0); 864 *p++ = cpu_to_be32(0);
865 *p++ = cpu_to_be32(iap->ia_mtime.tv_sec); 865 *p++ = cpu_to_be32(iap->ia_atime.tv_sec);
866 *p++ = cpu_to_be32(iap->ia_mtime.tv_nsec); 866 *p++ = cpu_to_be32(iap->ia_atime.tv_nsec);
867 } 867 }
868 else if (iap->ia_valid & ATTR_ATIME) { 868 else if (iap->ia_valid & ATTR_ATIME) {
869 bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET; 869 bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 04214fc5c304..f9df16de4a56 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -570,6 +570,22 @@ static void nfs_show_mountd_options(struct seq_file *m, struct nfs_server *nfss,
570 nfs_show_mountd_netid(m, nfss, showdefaults); 570 nfs_show_mountd_netid(m, nfss, showdefaults);
571} 571}
572 572
573#ifdef CONFIG_NFS_V4
574static void nfs_show_nfsv4_options(struct seq_file *m, struct nfs_server *nfss,
575 int showdefaults)
576{
577 struct nfs_client *clp = nfss->nfs_client;
578
579 seq_printf(m, ",clientaddr=%s", clp->cl_ipaddr);
580 seq_printf(m, ",minorversion=%u", clp->cl_minorversion);
581}
582#else
583static void nfs_show_nfsv4_options(struct seq_file *m, struct nfs_server *nfss,
584 int showdefaults)
585{
586}
587#endif
588
573/* 589/*
574 * Describe the mount options in force on this server representation 590 * Describe the mount options in force on this server representation
575 */ 591 */
@@ -631,11 +647,9 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
631 647
632 if (version != 4) 648 if (version != 4)
633 nfs_show_mountd_options(m, nfss, showdefaults); 649 nfs_show_mountd_options(m, nfss, showdefaults);
650 else
651 nfs_show_nfsv4_options(m, nfss, showdefaults);
634 652
635#ifdef CONFIG_NFS_V4
636 if (clp->rpc_ops->version == 4)
637 seq_printf(m, ",clientaddr=%s", clp->cl_ipaddr);
638#endif
639 if (nfss->options & NFS_OPTION_FSCACHE) 653 if (nfss->options & NFS_OPTION_FSCACHE)
640 seq_printf(m, ",fsc"); 654 seq_printf(m, ",fsc");
641} 655}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 12f7109720c2..4a2734758778 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4122,8 +4122,8 @@ nfs4_state_shutdown(void)
4122 nfs4_lock_state(); 4122 nfs4_lock_state();
4123 nfs4_release_reclaim(); 4123 nfs4_release_reclaim();
4124 __nfs4_state_shutdown(); 4124 __nfs4_state_shutdown();
4125 nfsd4_destroy_callback_queue();
4126 nfs4_unlock_state(); 4125 nfs4_unlock_state();
4126 nfsd4_destroy_callback_queue();
4127} 4127}
4128 4128
4129/* 4129/*
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index ebbf3b6b2457..3c111120b619 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -443,8 +443,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
443 if (size_change) 443 if (size_change)
444 put_write_access(inode); 444 put_write_access(inode);
445 if (!err) 445 if (!err)
446 if (EX_ISSYNC(fhp->fh_export)) 446 commit_metadata(fhp);
447 write_inode_now(inode, 1);
448out: 447out:
449 return err; 448 return err;
450 449
diff --git a/fs/nilfs2/btree.h b/fs/nilfs2/btree.h
index af638d59e3bf..43c8c5b541fd 100644
--- a/fs/nilfs2/btree.h
+++ b/fs/nilfs2/btree.h
@@ -75,8 +75,6 @@ struct nilfs_btree_path {
75 75
76extern struct kmem_cache *nilfs_btree_path_cache; 76extern struct kmem_cache *nilfs_btree_path_cache;
77 77
78int nilfs_btree_path_cache_init(void);
79void nilfs_btree_path_cache_destroy(void);
80int nilfs_btree_init(struct nilfs_bmap *); 78int nilfs_btree_init(struct nilfs_bmap *);
81int nilfs_btree_convert_and_insert(struct nilfs_bmap *, __u64, __u64, 79int nilfs_btree_convert_and_insert(struct nilfs_bmap *, __u64, __u64,
82 const __u64 *, const __u64 *, int); 80 const __u64 *, const __u64 *, int);
diff --git a/fs/nilfs2/segbuf.h b/fs/nilfs2/segbuf.h
index fdf1c3b6d673..85fbb66455e2 100644
--- a/fs/nilfs2/segbuf.h
+++ b/fs/nilfs2/segbuf.h
@@ -127,8 +127,6 @@ struct nilfs_segment_buffer {
127 127
128extern struct kmem_cache *nilfs_segbuf_cachep; 128extern struct kmem_cache *nilfs_segbuf_cachep;
129 129
130int __init nilfs_init_segbuf_cache(void);
131void nilfs_destroy_segbuf_cache(void);
132struct nilfs_segment_buffer *nilfs_segbuf_new(struct super_block *); 130struct nilfs_segment_buffer *nilfs_segbuf_new(struct super_block *);
133void nilfs_segbuf_free(struct nilfs_segment_buffer *); 131void nilfs_segbuf_free(struct nilfs_segment_buffer *);
134void nilfs_segbuf_map(struct nilfs_segment_buffer *, __u64, unsigned long, 132void nilfs_segbuf_map(struct nilfs_segment_buffer *, __u64, unsigned long,
diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h
index dca142361ccf..01e20dbb217d 100644
--- a/fs/nilfs2/segment.h
+++ b/fs/nilfs2/segment.h
@@ -221,8 +221,6 @@ enum {
221extern struct kmem_cache *nilfs_transaction_cachep; 221extern struct kmem_cache *nilfs_transaction_cachep;
222 222
223/* segment.c */ 223/* segment.c */
224extern int nilfs_init_transaction_cache(void);
225extern void nilfs_destroy_transaction_cache(void);
226extern void nilfs_relax_pressure_in_lock(struct super_block *); 224extern void nilfs_relax_pressure_in_lock(struct super_block *);
227 225
228extern int nilfs_construct_segment(struct super_block *); 226extern int nilfs_construct_segment(struct super_block *);
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 03b34b738993..414ef68931cf 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1130,13 +1130,13 @@ static void nilfs_segbuf_init_once(void *obj)
1130 1130
1131static void nilfs_destroy_cachep(void) 1131static void nilfs_destroy_cachep(void)
1132{ 1132{
1133 if (nilfs_inode_cachep) 1133 if (nilfs_inode_cachep)
1134 kmem_cache_destroy(nilfs_inode_cachep); 1134 kmem_cache_destroy(nilfs_inode_cachep);
1135 if (nilfs_transaction_cachep) 1135 if (nilfs_transaction_cachep)
1136 kmem_cache_destroy(nilfs_transaction_cachep); 1136 kmem_cache_destroy(nilfs_transaction_cachep);
1137 if (nilfs_segbuf_cachep) 1137 if (nilfs_segbuf_cachep)
1138 kmem_cache_destroy(nilfs_segbuf_cachep); 1138 kmem_cache_destroy(nilfs_segbuf_cachep);
1139 if (nilfs_btree_path_cache) 1139 if (nilfs_btree_path_cache)
1140 kmem_cache_destroy(nilfs_btree_path_cache); 1140 kmem_cache_destroy(nilfs_btree_path_cache);
1141} 1141}
1142 1142
diff --git a/fs/ocfs2/reservations.c b/fs/ocfs2/reservations.c
index 40650021fc24..d8b6e4259b80 100644
--- a/fs/ocfs2/reservations.c
+++ b/fs/ocfs2/reservations.c
@@ -26,7 +26,6 @@
26 26
27#include <linux/fs.h> 27#include <linux/fs.h>
28#include <linux/types.h> 28#include <linux/types.h>
29#include <linux/slab.h>
30#include <linux/highmem.h> 29#include <linux/highmem.h>
31#include <linux/bitops.h> 30#include <linux/bitops.h>
32#include <linux/list.h> 31#include <linux/list.h>
diff --git a/fs/pipe.c b/fs/pipe.c
index db6eaaba0dd8..279eef96c51c 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -26,9 +26,14 @@
26 26
27/* 27/*
28 * The max size that a non-root user is allowed to grow the pipe. Can 28 * The max size that a non-root user is allowed to grow the pipe. Can
29 * be set by root in /proc/sys/fs/pipe-max-pages 29 * be set by root in /proc/sys/fs/pipe-max-size
30 */ 30 */
31unsigned int pipe_max_pages = PIPE_DEF_BUFFERS * 16; 31unsigned int pipe_max_size = 1048576;
32
33/*
34 * Minimum pipe size, as required by POSIX
35 */
36unsigned int pipe_min_size = PAGE_SIZE;
32 37
33/* 38/*
34 * We use a start+len construction, which provides full use of the 39 * We use a start+len construction, which provides full use of the
@@ -1118,26 +1123,20 @@ SYSCALL_DEFINE1(pipe, int __user *, fildes)
1118 * Allocate a new array of pipe buffers and copy the info over. Returns the 1123 * Allocate a new array of pipe buffers and copy the info over. Returns the
1119 * pipe size if successful, or return -ERROR on error. 1124 * pipe size if successful, or return -ERROR on error.
1120 */ 1125 */
1121static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) 1126static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long nr_pages)
1122{ 1127{
1123 struct pipe_buffer *bufs; 1128 struct pipe_buffer *bufs;
1124 1129
1125 /* 1130 /*
1126 * Must be a power-of-2 currently
1127 */
1128 if (!is_power_of_2(arg))
1129 return -EINVAL;
1130
1131 /*
1132 * We can shrink the pipe, if arg >= pipe->nrbufs. Since we don't 1131 * We can shrink the pipe, if arg >= pipe->nrbufs. Since we don't
1133 * expect a lot of shrink+grow operations, just free and allocate 1132 * expect a lot of shrink+grow operations, just free and allocate
1134 * again like we would do for growing. If the pipe currently 1133 * again like we would do for growing. If the pipe currently
1135 * contains more buffers than arg, then return busy. 1134 * contains more buffers than arg, then return busy.
1136 */ 1135 */
1137 if (arg < pipe->nrbufs) 1136 if (nr_pages < pipe->nrbufs)
1138 return -EBUSY; 1137 return -EBUSY;
1139 1138
1140 bufs = kcalloc(arg, sizeof(struct pipe_buffer), GFP_KERNEL); 1139 bufs = kcalloc(nr_pages, sizeof(struct pipe_buffer), GFP_KERNEL);
1141 if (unlikely(!bufs)) 1140 if (unlikely(!bufs))
1142 return -ENOMEM; 1141 return -ENOMEM;
1143 1142
@@ -1146,20 +1145,56 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg)
1146 * and adjust the indexes. 1145 * and adjust the indexes.
1147 */ 1146 */
1148 if (pipe->nrbufs) { 1147 if (pipe->nrbufs) {
1149 const unsigned int tail = pipe->nrbufs & (pipe->buffers - 1); 1148 unsigned int tail;
1150 const unsigned int head = pipe->nrbufs - tail; 1149 unsigned int head;
1150
1151 tail = pipe->curbuf + pipe->nrbufs;
1152 if (tail < pipe->buffers)
1153 tail = 0;
1154 else
1155 tail &= (pipe->buffers - 1);
1151 1156
1157 head = pipe->nrbufs - tail;
1152 if (head) 1158 if (head)
1153 memcpy(bufs, pipe->bufs + pipe->curbuf, head * sizeof(struct pipe_buffer)); 1159 memcpy(bufs, pipe->bufs + pipe->curbuf, head * sizeof(struct pipe_buffer));
1154 if (tail) 1160 if (tail)
1155 memcpy(bufs + head, pipe->bufs + pipe->curbuf, tail * sizeof(struct pipe_buffer)); 1161 memcpy(bufs + head, pipe->bufs, tail * sizeof(struct pipe_buffer));
1156 } 1162 }
1157 1163
1158 pipe->curbuf = 0; 1164 pipe->curbuf = 0;
1159 kfree(pipe->bufs); 1165 kfree(pipe->bufs);
1160 pipe->bufs = bufs; 1166 pipe->bufs = bufs;
1161 pipe->buffers = arg; 1167 pipe->buffers = nr_pages;
1162 return arg; 1168 return nr_pages * PAGE_SIZE;
1169}
1170
1171/*
1172 * Currently we rely on the pipe array holding a power-of-2 number
1173 * of pages.
1174 */
1175static inline unsigned int round_pipe_size(unsigned int size)
1176{
1177 unsigned long nr_pages;
1178
1179 nr_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
1180 return roundup_pow_of_two(nr_pages) << PAGE_SHIFT;
1181}
1182
1183/*
1184 * This should work even if CONFIG_PROC_FS isn't set, as proc_dointvec_minmax
1185 * will return an error.
1186 */
1187int pipe_proc_fn(struct ctl_table *table, int write, void __user *buf,
1188 size_t *lenp, loff_t *ppos)
1189{
1190 int ret;
1191
1192 ret = proc_dointvec_minmax(table, write, buf, lenp, ppos);
1193 if (ret < 0 || !write)
1194 return ret;
1195
1196 pipe_max_size = round_pipe_size(pipe_max_size);
1197 return ret;
1163} 1198}
1164 1199
1165long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) 1200long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
@@ -1174,23 +1209,25 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
1174 mutex_lock(&pipe->inode->i_mutex); 1209 mutex_lock(&pipe->inode->i_mutex);
1175 1210
1176 switch (cmd) { 1211 switch (cmd) {
1177 case F_SETPIPE_SZ: 1212 case F_SETPIPE_SZ: {
1178 if (!capable(CAP_SYS_ADMIN) && arg > pipe_max_pages) { 1213 unsigned int size, nr_pages;
1179 ret = -EINVAL; 1214
1215 size = round_pipe_size(arg);
1216 nr_pages = size >> PAGE_SHIFT;
1217
1218 ret = -EINVAL;
1219 if (!nr_pages)
1180 goto out; 1220 goto out;
1181 } 1221
1182 /* 1222 if (!capable(CAP_SYS_RESOURCE) && size > pipe_max_size) {
1183 * The pipe needs to be at least 2 pages large to 1223 ret = -EPERM;
1184 * guarantee POSIX behaviour.
1185 */
1186 if (arg < 2) {
1187 ret = -EINVAL;
1188 goto out; 1224 goto out;
1189 } 1225 }
1190 ret = pipe_set_size(pipe, arg); 1226 ret = pipe_set_size(pipe, nr_pages);
1191 break; 1227 break;
1228 }
1192 case F_GETPIPE_SZ: 1229 case F_GETPIPE_SZ:
1193 ret = pipe->buffers; 1230 ret = pipe->buffers * PAGE_SIZE;
1194 break; 1231 break;
1195 default: 1232 default:
1196 ret = -EINVAL; 1233 ret = -EINVAL;
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index ce94801f48ca..d9396a4fc7ff 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -209,6 +209,9 @@ void proc_device_tree_add_node(struct device_node *np,
209 for (pp = np->properties; pp != NULL; pp = pp->next) { 209 for (pp = np->properties; pp != NULL; pp = pp->next) {
210 p = pp->name; 210 p = pp->name;
211 211
212 if (strchr(p, '/'))
213 continue;
214
212 if (duplicate_name(de, p)) 215 if (duplicate_name(de, p))
213 p = fixup_name(np, de, p); 216 p = fixup_name(np, de, p);
214 217
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 46d4b5d72bd3..cb6306e63843 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -122,11 +122,20 @@ int task_statm(struct mm_struct *mm, int *shared, int *text,
122 return size; 122 return size;
123} 123}
124 124
125static void pad_len_spaces(struct seq_file *m, int len)
126{
127 len = 25 + sizeof(void*) * 6 - len;
128 if (len < 1)
129 len = 1;
130 seq_printf(m, "%*c", len, ' ');
131}
132
125/* 133/*
126 * display a single VMA to a sequenced file 134 * display a single VMA to a sequenced file
127 */ 135 */
128static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma) 136static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
129{ 137{
138 struct mm_struct *mm = vma->vm_mm;
130 unsigned long ino = 0; 139 unsigned long ino = 0;
131 struct file *file; 140 struct file *file;
132 dev_t dev = 0; 141 dev_t dev = 0;
@@ -155,11 +164,14 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
155 MAJOR(dev), MINOR(dev), ino, &len); 164 MAJOR(dev), MINOR(dev), ino, &len);
156 165
157 if (file) { 166 if (file) {
158 len = 25 + sizeof(void *) * 6 - len; 167 pad_len_spaces(m, len);
159 if (len < 1)
160 len = 1;
161 seq_printf(m, "%*c", len, ' ');
162 seq_path(m, &file->f_path, ""); 168 seq_path(m, &file->f_path, "");
169 } else if (mm) {
170 if (vma->vm_start <= mm->start_stack &&
171 vma->vm_end >= mm->start_stack) {
172 pad_len_spaces(m, len);
173 seq_puts(m, "[stack]");
174 }
163 } 175 }
164 176
165 seq_putc(m, '\n'); 177 seq_putc(m, '\n');
diff --git a/fs/splice.c b/fs/splice.c
index ac22b00d86c3..740e6b9faf7a 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -354,7 +354,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
354 break; 354 break;
355 355
356 error = add_to_page_cache_lru(page, mapping, index, 356 error = add_to_page_cache_lru(page, mapping, index,
357 mapping_gfp_mask(mapping)); 357 GFP_KERNEL);
358 if (unlikely(error)) { 358 if (unlikely(error)) {
359 page_cache_release(page); 359 page_cache_release(page);
360 if (error == -EEXIST) 360 if (error == -EEXIST)
diff --git a/fs/super.c b/fs/super.c
index 5c35bc7a499e..938119ab8dcb 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -374,6 +374,8 @@ void sync_supers(void)
374 up_read(&sb->s_umount); 374 up_read(&sb->s_umount);
375 375
376 spin_lock(&sb_lock); 376 spin_lock(&sb_lock);
377 /* lock was dropped, must reset next */
378 list_safe_reset_next(sb, n, s_list);
377 __put_super(sb); 379 __put_super(sb);
378 } 380 }
379 } 381 }
@@ -405,6 +407,8 @@ void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
405 up_read(&sb->s_umount); 407 up_read(&sb->s_umount);
406 408
407 spin_lock(&sb_lock); 409 spin_lock(&sb_lock);
410 /* lock was dropped, must reset next */
411 list_safe_reset_next(sb, n, s_list);
408 __put_super(sb); 412 __put_super(sb);
409 } 413 }
410 spin_unlock(&sb_lock); 414 spin_unlock(&sb_lock);
@@ -585,6 +589,8 @@ static void do_emergency_remount(struct work_struct *work)
585 } 589 }
586 up_write(&sb->s_umount); 590 up_write(&sb->s_umount);
587 spin_lock(&sb_lock); 591 spin_lock(&sb_lock);
592 /* lock was dropped, must reset next */
593 list_safe_reset_next(sb, n, s_list);
588 __put_super(sb); 594 __put_super(sb);
589 } 595 }
590 spin_unlock(&sb_lock); 596 spin_unlock(&sb_lock);
diff --git a/fs/sync.c b/fs/sync.c
index c9f83f480ec5..15aa6f03b2da 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -42,7 +42,7 @@ static int __sync_filesystem(struct super_block *sb, int wait)
42 if (wait) 42 if (wait)
43 sync_inodes_sb(sb); 43 sync_inodes_sb(sb);
44 else 44 else
45 writeback_inodes_sb_locked(sb); 45 writeback_inodes_sb(sb);
46 46
47 if (sb->s_op->sync_fs) 47 if (sb->s_op->sync_fs)
48 sb->s_op->sync_fs(sb, wait); 48 sb->s_op->sync_fs(sb, wait);
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index bde1a4c3679a..0835a3b70e03 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -117,11 +117,13 @@ int sysfs_setattr(struct dentry *dentry, struct iattr *iattr)
117 if (error) 117 if (error)
118 goto out; 118 goto out;
119 119
120 error = sysfs_sd_setattr(sd, iattr);
121 if (error)
122 goto out;
123
120 /* this ignores size changes */ 124 /* this ignores size changes */
121 generic_setattr(inode, iattr); 125 generic_setattr(inode, iattr);
122 126
123 error = sysfs_sd_setattr(sd, iattr);
124
125out: 127out:
126 mutex_unlock(&sysfs_mutex); 128 mutex_unlock(&sysfs_mutex);
127 return error; 129 return error;
diff --git a/fs/sysv/ialloc.c b/fs/sysv/ialloc.c
index bbd69bdb0fa8..fcc498ec9b33 100644
--- a/fs/sysv/ialloc.c
+++ b/fs/sysv/ialloc.c
@@ -25,6 +25,7 @@
25#include <linux/stat.h> 25#include <linux/stat.h>
26#include <linux/string.h> 26#include <linux/string.h>
27#include <linux/buffer_head.h> 27#include <linux/buffer_head.h>
28#include <linux/writeback.h>
28#include "sysv.h" 29#include "sysv.h"
29 30
30/* We don't trust the value of 31/* We don't trust the value of
@@ -139,6 +140,9 @@ struct inode * sysv_new_inode(const struct inode * dir, mode_t mode)
139 struct inode *inode; 140 struct inode *inode;
140 sysv_ino_t ino; 141 sysv_ino_t ino;
141 unsigned count; 142 unsigned count;
143 struct writeback_control wbc = {
144 .sync_mode = WB_SYNC_NONE
145 };
142 146
143 inode = new_inode(sb); 147 inode = new_inode(sb);
144 if (!inode) 148 if (!inode)
@@ -168,7 +172,7 @@ struct inode * sysv_new_inode(const struct inode * dir, mode_t mode)
168 insert_inode_hash(inode); 172 insert_inode_hash(inode);
169 mark_inode_dirty(inode); 173 mark_inode_dirty(inode);
170 174
171 sysv_write_inode(inode, 0); /* ensure inode not allocated again */ 175 sysv_write_inode(inode, &wbc); /* ensure inode not allocated again */
172 mark_inode_dirty(inode); /* cleared by sysv_write_inode() */ 176 mark_inode_dirty(inode); /* cleared by sysv_write_inode() */
173 /* That's it. */ 177 /* That's it. */
174 unlock_super(sb); 178 unlock_super(sb);
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index 076ca50e9933..c8ff0d1ae5d3 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -62,7 +62,9 @@
62 */ 62 */
63static void shrink_liability(struct ubifs_info *c, int nr_to_write) 63static void shrink_liability(struct ubifs_info *c, int nr_to_write)
64{ 64{
65 down_read(&c->vfs_sb->s_umount);
65 writeback_inodes_sb(c->vfs_sb); 66 writeback_inodes_sb(c->vfs_sb);
67 up_read(&c->vfs_sb->s_umount);
66} 68}
67 69
68/** 70/**
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 089eaca860b4..34640d6dbdcb 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1333,6 +1333,21 @@ xfs_vm_writepage(
1333 trace_xfs_writepage(inode, page, 0); 1333 trace_xfs_writepage(inode, page, 0);
1334 1334
1335 /* 1335 /*
1336 * Refuse to write the page out if we are called from reclaim context.
1337 *
1338 * This is primarily to avoid stack overflows when called from deep
1339 * used stacks in random callers for direct reclaim, but disabling
1340 * reclaim for kswap is a nice side-effect as kswapd causes rather
1341 * suboptimal I/O patters, too.
1342 *
1343 * This should really be done by the core VM, but until that happens
1344 * filesystems like XFS, btrfs and ext4 have to take care of this
1345 * by themselves.
1346 */
1347 if (current->flags & PF_MEMALLOC)
1348 goto out_fail;
1349
1350 /*
1336 * We need a transaction if: 1351 * We need a transaction if:
1337 * 1. There are delalloc buffers on the page 1352 * 1. There are delalloc buffers on the page
1338 * 2. The page is uptodate and we have unmapped buffers 1353 * 2. The page is uptodate and we have unmapped buffers
@@ -1366,14 +1381,6 @@ xfs_vm_writepage(
1366 if (!page_has_buffers(page)) 1381 if (!page_has_buffers(page))
1367 create_empty_buffers(page, 1 << inode->i_blkbits, 0); 1382 create_empty_buffers(page, 1 << inode->i_blkbits, 0);
1368 1383
1369
1370 /*
1371 * VM calculation for nr_to_write seems off. Bump it way
1372 * up, this gets simple streaming writes zippy again.
1373 * To be reviewed again after Jens' writeback changes.
1374 */
1375 wbc->nr_to_write *= 4;
1376
1377 /* 1384 /*
1378 * Convert delayed allocate, unwritten or unmapped space 1385 * Convert delayed allocate, unwritten or unmapped space
1379 * to real space and flush out to disk. 1386 * to real space and flush out to disk.
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index 846b75aeb2ab..e7839ee49e43 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -128,13 +128,12 @@ xfs_nfs_get_inode(
128 return ERR_PTR(-ESTALE); 128 return ERR_PTR(-ESTALE);
129 129
130 /* 130 /*
131 * The XFS_IGET_BULKSTAT means that an invalid inode number is just 131 * The XFS_IGET_UNTRUSTED means that an invalid inode number is just
132 * fine and not an indication of a corrupted filesystem. Because 132 * fine and not an indication of a corrupted filesystem as clients can
133 * clients can send any kind of invalid file handle, e.g. after 133 * send invalid file handles and we have to handle it gracefully..
134 * a restore on the server we have to deal with this case gracefully.
135 */ 134 */
136 error = xfs_iget(mp, NULL, ino, XFS_IGET_BULKSTAT, 135 error = xfs_iget(mp, NULL, ino, XFS_IGET_UNTRUSTED,
137 XFS_ILOCK_SHARED, &ip, 0); 136 XFS_ILOCK_SHARED, &ip);
138 if (error) { 137 if (error) {
139 /* 138 /*
140 * EINVAL means the inode cluster doesn't exist anymore. 139 * EINVAL means the inode cluster doesn't exist anymore.
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 699b60cbab9c..e59a81062830 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -679,10 +679,9 @@ xfs_ioc_bulkstat(
679 error = xfs_bulkstat_single(mp, &inlast, 679 error = xfs_bulkstat_single(mp, &inlast,
680 bulkreq.ubuffer, &done); 680 bulkreq.ubuffer, &done);
681 else /* XFS_IOC_FSBULKSTAT */ 681 else /* XFS_IOC_FSBULKSTAT */
682 error = xfs_bulkstat(mp, &inlast, &count, 682 error = xfs_bulkstat(mp, &inlast, &count, xfs_bulkstat_one,
683 (bulkstat_one_pf)xfs_bulkstat_one, NULL, 683 sizeof(xfs_bstat_t), bulkreq.ubuffer,
684 sizeof(xfs_bstat_t), bulkreq.ubuffer, 684 &done);
685 BULKSTAT_FG_QUICK, &done);
686 685
687 if (error) 686 if (error)
688 return -error; 687 return -error;
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index 9287135e9bfc..52ed49e6465c 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -237,15 +237,12 @@ xfs_bulkstat_one_compat(
237 xfs_ino_t ino, /* inode number to get data for */ 237 xfs_ino_t ino, /* inode number to get data for */
238 void __user *buffer, /* buffer to place output in */ 238 void __user *buffer, /* buffer to place output in */
239 int ubsize, /* size of buffer */ 239 int ubsize, /* size of buffer */
240 void *private_data, /* my private data */
241 xfs_daddr_t bno, /* starting bno of inode cluster */
242 int *ubused, /* bytes used by me */ 240 int *ubused, /* bytes used by me */
243 void *dibuff, /* on-disk inode buffer */
244 int *stat) /* BULKSTAT_RV_... */ 241 int *stat) /* BULKSTAT_RV_... */
245{ 242{
246 return xfs_bulkstat_one_int(mp, ino, buffer, ubsize, 243 return xfs_bulkstat_one_int(mp, ino, buffer, ubsize,
247 xfs_bulkstat_one_fmt_compat, bno, 244 xfs_bulkstat_one_fmt_compat,
248 ubused, dibuff, stat); 245 ubused, stat);
249} 246}
250 247
251/* copied from xfs_ioctl.c */ 248/* copied from xfs_ioctl.c */
@@ -298,13 +295,11 @@ xfs_compat_ioc_bulkstat(
298 int res; 295 int res;
299 296
300 error = xfs_bulkstat_one_compat(mp, inlast, bulkreq.ubuffer, 297 error = xfs_bulkstat_one_compat(mp, inlast, bulkreq.ubuffer,
301 sizeof(compat_xfs_bstat_t), 298 sizeof(compat_xfs_bstat_t), 0, &res);
302 NULL, 0, NULL, NULL, &res);
303 } else if (cmd == XFS_IOC_FSBULKSTAT_32) { 299 } else if (cmd == XFS_IOC_FSBULKSTAT_32) {
304 error = xfs_bulkstat(mp, &inlast, &count, 300 error = xfs_bulkstat(mp, &inlast, &count,
305 xfs_bulkstat_one_compat, NULL, 301 xfs_bulkstat_one_compat, sizeof(compat_xfs_bstat_t),
306 sizeof(compat_xfs_bstat_t), bulkreq.ubuffer, 302 bulkreq.ubuffer, &done);
307 BULKSTAT_FG_QUICK, &done);
308 } else 303 } else
309 error = XFS_ERROR(EINVAL); 304 error = XFS_ERROR(EINVAL);
310 if (error) 305 if (error)
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 9c8019c78c92..44f0b2de153e 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -585,11 +585,20 @@ xfs_vn_fallocate(
585 bf.l_len = len; 585 bf.l_len = len;
586 586
587 xfs_ilock(ip, XFS_IOLOCK_EXCL); 587 xfs_ilock(ip, XFS_IOLOCK_EXCL);
588
589 /* check the new inode size is valid before allocating */
590 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
591 offset + len > i_size_read(inode)) {
592 new_size = offset + len;
593 error = inode_newsize_ok(inode, new_size);
594 if (error)
595 goto out_unlock;
596 }
597
588 error = -xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf, 598 error = -xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf,
589 0, XFS_ATTR_NOLOCK); 599 0, XFS_ATTR_NOLOCK);
590 if (!error && !(mode & FALLOC_FL_KEEP_SIZE) && 600 if (error)
591 offset + len > i_size_read(inode)) 601 goto out_unlock;
592 new_size = offset + len;
593 602
594 /* Change file size if needed */ 603 /* Change file size if needed */
595 if (new_size) { 604 if (new_size) {
@@ -600,6 +609,7 @@ xfs_vn_fallocate(
600 error = -xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK); 609 error = -xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK);
601 } 610 }
602 611
612out_unlock:
603 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 613 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
604out_error: 614out_error:
605 return error; 615 return error;
diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c
index 9ac8aea91529..067cafbfc635 100644
--- a/fs/xfs/linux-2.6/xfs_quotaops.c
+++ b/fs/xfs/linux-2.6/xfs_quotaops.c
@@ -23,7 +23,6 @@
23#include "xfs_ag.h" 23#include "xfs_ag.h"
24#include "xfs_mount.h" 24#include "xfs_mount.h"
25#include "xfs_quota.h" 25#include "xfs_quota.h"
26#include "xfs_log.h"
27#include "xfs_trans.h" 26#include "xfs_trans.h"
28#include "xfs_bmap_btree.h" 27#include "xfs_bmap_btree.h"
29#include "xfs_inode.h" 28#include "xfs_inode.h"
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 3884e20bc14e..ef7f0218bccb 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -164,10 +164,6 @@ xfs_inode_ag_iterator(
164 struct xfs_perag *pag; 164 struct xfs_perag *pag;
165 165
166 pag = xfs_perag_get(mp, ag); 166 pag = xfs_perag_get(mp, ag);
167 if (!pag->pag_ici_init) {
168 xfs_perag_put(pag);
169 continue;
170 }
171 error = xfs_inode_ag_walk(mp, pag, execute, flags, tag, 167 error = xfs_inode_ag_walk(mp, pag, execute, flags, tag,
172 exclusive, &nr); 168 exclusive, &nr);
173 xfs_perag_put(pag); 169 xfs_perag_put(pag);
@@ -867,12 +863,7 @@ xfs_reclaim_inode_shrink(
867 down_read(&xfs_mount_list_lock); 863 down_read(&xfs_mount_list_lock);
868 list_for_each_entry(mp, &xfs_mount_list, m_mplist) { 864 list_for_each_entry(mp, &xfs_mount_list, m_mplist) {
869 for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) { 865 for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
870
871 pag = xfs_perag_get(mp, ag); 866 pag = xfs_perag_get(mp, ag);
872 if (!pag->pag_ici_init) {
873 xfs_perag_put(pag);
874 continue;
875 }
876 reclaimable += pag->pag_ici_reclaimable; 867 reclaimable += pag->pag_ici_reclaimable;
877 xfs_perag_put(pag); 868 xfs_perag_put(pag);
878 } 869 }
diff --git a/fs/xfs/linux-2.6/xfs_trace.c b/fs/xfs/linux-2.6/xfs_trace.c
index 207fa77f63ae..d12be8470cba 100644
--- a/fs/xfs/linux-2.6/xfs_trace.c
+++ b/fs/xfs/linux-2.6/xfs_trace.c
@@ -50,7 +50,6 @@
50#include "quota/xfs_dquot_item.h" 50#include "quota/xfs_dquot_item.h"
51#include "quota/xfs_dquot.h" 51#include "quota/xfs_dquot.h"
52#include "xfs_log_recover.h" 52#include "xfs_log_recover.h"
53#include "xfs_buf_item.h"
54#include "xfs_inode_item.h" 53#include "xfs_inode_item.h"
55 54
56/* 55/*
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index ff6bc797baf2..73d5aa117384 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -82,33 +82,6 @@ DECLARE_EVENT_CLASS(xfs_attr_list_class,
82 ) 82 )
83) 83)
84 84
85#define DEFINE_PERAG_REF_EVENT(name) \
86TRACE_EVENT(name, \
87 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount, \
88 unsigned long caller_ip), \
89 TP_ARGS(mp, agno, refcount, caller_ip), \
90 TP_STRUCT__entry( \
91 __field(dev_t, dev) \
92 __field(xfs_agnumber_t, agno) \
93 __field(int, refcount) \
94 __field(unsigned long, caller_ip) \
95 ), \
96 TP_fast_assign( \
97 __entry->dev = mp->m_super->s_dev; \
98 __entry->agno = agno; \
99 __entry->refcount = refcount; \
100 __entry->caller_ip = caller_ip; \
101 ), \
102 TP_printk("dev %d:%d agno %u refcount %d caller %pf", \
103 MAJOR(__entry->dev), MINOR(__entry->dev), \
104 __entry->agno, \
105 __entry->refcount, \
106 (char *)__entry->caller_ip) \
107);
108
109DEFINE_PERAG_REF_EVENT(xfs_perag_get)
110DEFINE_PERAG_REF_EVENT(xfs_perag_put)
111
112#define DEFINE_ATTR_LIST_EVENT(name) \ 85#define DEFINE_ATTR_LIST_EVENT(name) \
113DEFINE_EVENT(xfs_attr_list_class, name, \ 86DEFINE_EVENT(xfs_attr_list_class, name, \
114 TP_PROTO(struct xfs_attr_list_context *ctx), \ 87 TP_PROTO(struct xfs_attr_list_context *ctx), \
@@ -122,6 +95,37 @@ DEFINE_ATTR_LIST_EVENT(xfs_attr_list_add);
122DEFINE_ATTR_LIST_EVENT(xfs_attr_list_wrong_blk); 95DEFINE_ATTR_LIST_EVENT(xfs_attr_list_wrong_blk);
123DEFINE_ATTR_LIST_EVENT(xfs_attr_list_notfound); 96DEFINE_ATTR_LIST_EVENT(xfs_attr_list_notfound);
124 97
98DECLARE_EVENT_CLASS(xfs_perag_class,
99 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount,
100 unsigned long caller_ip),
101 TP_ARGS(mp, agno, refcount, caller_ip),
102 TP_STRUCT__entry(
103 __field(dev_t, dev)
104 __field(xfs_agnumber_t, agno)
105 __field(int, refcount)
106 __field(unsigned long, caller_ip)
107 ),
108 TP_fast_assign(
109 __entry->dev = mp->m_super->s_dev;
110 __entry->agno = agno;
111 __entry->refcount = refcount;
112 __entry->caller_ip = caller_ip;
113 ),
114 TP_printk("dev %d:%d agno %u refcount %d caller %pf",
115 MAJOR(__entry->dev), MINOR(__entry->dev),
116 __entry->agno,
117 __entry->refcount,
118 (char *)__entry->caller_ip)
119);
120
121#define DEFINE_PERAG_REF_EVENT(name) \
122DEFINE_EVENT(xfs_perag_class, name, \
123 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount, \
124 unsigned long caller_ip), \
125 TP_ARGS(mp, agno, refcount, caller_ip))
126DEFINE_PERAG_REF_EVENT(xfs_perag_get);
127DEFINE_PERAG_REF_EVENT(xfs_perag_put);
128
125TRACE_EVENT(xfs_attr_list_node_descend, 129TRACE_EVENT(xfs_attr_list_node_descend,
126 TP_PROTO(struct xfs_attr_list_context *ctx, 130 TP_PROTO(struct xfs_attr_list_context *ctx,
127 struct xfs_da_node_entry *btree), 131 struct xfs_da_node_entry *btree),
@@ -775,165 +779,181 @@ DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_enter);
775DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_exit); 779DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_exit);
776DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_sub); 780DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_sub);
777 781
778#define DEFINE_RW_EVENT(name) \ 782DECLARE_EVENT_CLASS(xfs_file_class,
779TRACE_EVENT(name, \ 783 TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags),
780 TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags), \ 784 TP_ARGS(ip, count, offset, flags),
781 TP_ARGS(ip, count, offset, flags), \ 785 TP_STRUCT__entry(
782 TP_STRUCT__entry( \ 786 __field(dev_t, dev)
783 __field(dev_t, dev) \ 787 __field(xfs_ino_t, ino)
784 __field(xfs_ino_t, ino) \ 788 __field(xfs_fsize_t, size)
785 __field(xfs_fsize_t, size) \ 789 __field(xfs_fsize_t, new_size)
786 __field(xfs_fsize_t, new_size) \ 790 __field(loff_t, offset)
787 __field(loff_t, offset) \ 791 __field(size_t, count)
788 __field(size_t, count) \ 792 __field(int, flags)
789 __field(int, flags) \ 793 ),
790 ), \ 794 TP_fast_assign(
791 TP_fast_assign( \ 795 __entry->dev = VFS_I(ip)->i_sb->s_dev;
792 __entry->dev = VFS_I(ip)->i_sb->s_dev; \ 796 __entry->ino = ip->i_ino;
793 __entry->ino = ip->i_ino; \ 797 __entry->size = ip->i_d.di_size;
794 __entry->size = ip->i_d.di_size; \ 798 __entry->new_size = ip->i_new_size;
795 __entry->new_size = ip->i_new_size; \ 799 __entry->offset = offset;
796 __entry->offset = offset; \ 800 __entry->count = count;
797 __entry->count = count; \ 801 __entry->flags = flags;
798 __entry->flags = flags; \ 802 ),
799 ), \ 803 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
800 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \ 804 "offset 0x%llx count 0x%zx ioflags %s",
801 "offset 0x%llx count 0x%zx ioflags %s", \ 805 MAJOR(__entry->dev), MINOR(__entry->dev),
802 MAJOR(__entry->dev), MINOR(__entry->dev), \ 806 __entry->ino,
803 __entry->ino, \ 807 __entry->size,
804 __entry->size, \ 808 __entry->new_size,
805 __entry->new_size, \ 809 __entry->offset,
806 __entry->offset, \ 810 __entry->count,
807 __entry->count, \ 811 __print_flags(__entry->flags, "|", XFS_IO_FLAGS))
808 __print_flags(__entry->flags, "|", XFS_IO_FLAGS)) \
809) 812)
813
814#define DEFINE_RW_EVENT(name) \
815DEFINE_EVENT(xfs_file_class, name, \
816 TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags), \
817 TP_ARGS(ip, count, offset, flags))
810DEFINE_RW_EVENT(xfs_file_read); 818DEFINE_RW_EVENT(xfs_file_read);
811DEFINE_RW_EVENT(xfs_file_buffered_write); 819DEFINE_RW_EVENT(xfs_file_buffered_write);
812DEFINE_RW_EVENT(xfs_file_direct_write); 820DEFINE_RW_EVENT(xfs_file_direct_write);
813DEFINE_RW_EVENT(xfs_file_splice_read); 821DEFINE_RW_EVENT(xfs_file_splice_read);
814DEFINE_RW_EVENT(xfs_file_splice_write); 822DEFINE_RW_EVENT(xfs_file_splice_write);
815 823
816 824DECLARE_EVENT_CLASS(xfs_page_class,
817#define DEFINE_PAGE_EVENT(name) \ 825 TP_PROTO(struct inode *inode, struct page *page, unsigned long off),
818TRACE_EVENT(name, \ 826 TP_ARGS(inode, page, off),
819 TP_PROTO(struct inode *inode, struct page *page, unsigned long off), \ 827 TP_STRUCT__entry(
820 TP_ARGS(inode, page, off), \ 828 __field(dev_t, dev)
821 TP_STRUCT__entry( \ 829 __field(xfs_ino_t, ino)
822 __field(dev_t, dev) \ 830 __field(pgoff_t, pgoff)
823 __field(xfs_ino_t, ino) \ 831 __field(loff_t, size)
824 __field(pgoff_t, pgoff) \ 832 __field(unsigned long, offset)
825 __field(loff_t, size) \ 833 __field(int, delalloc)
826 __field(unsigned long, offset) \ 834 __field(int, unmapped)
827 __field(int, delalloc) \ 835 __field(int, unwritten)
828 __field(int, unmapped) \ 836 ),
829 __field(int, unwritten) \ 837 TP_fast_assign(
830 ), \ 838 int delalloc = -1, unmapped = -1, unwritten = -1;
831 TP_fast_assign( \ 839
832 int delalloc = -1, unmapped = -1, unwritten = -1; \ 840 if (page_has_buffers(page))
833 \ 841 xfs_count_page_state(page, &delalloc,
834 if (page_has_buffers(page)) \ 842 &unmapped, &unwritten);
835 xfs_count_page_state(page, &delalloc, \ 843 __entry->dev = inode->i_sb->s_dev;
836 &unmapped, &unwritten); \ 844 __entry->ino = XFS_I(inode)->i_ino;
837 __entry->dev = inode->i_sb->s_dev; \ 845 __entry->pgoff = page_offset(page);
838 __entry->ino = XFS_I(inode)->i_ino; \ 846 __entry->size = i_size_read(inode);
839 __entry->pgoff = page_offset(page); \ 847 __entry->offset = off;
840 __entry->size = i_size_read(inode); \ 848 __entry->delalloc = delalloc;
841 __entry->offset = off; \ 849 __entry->unmapped = unmapped;
842 __entry->delalloc = delalloc; \ 850 __entry->unwritten = unwritten;
843 __entry->unmapped = unmapped; \ 851 ),
844 __entry->unwritten = unwritten; \ 852 TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx "
845 ), \ 853 "delalloc %d unmapped %d unwritten %d",
846 TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx " \ 854 MAJOR(__entry->dev), MINOR(__entry->dev),
847 "delalloc %d unmapped %d unwritten %d", \ 855 __entry->ino,
848 MAJOR(__entry->dev), MINOR(__entry->dev), \ 856 __entry->pgoff,
849 __entry->ino, \ 857 __entry->size,
850 __entry->pgoff, \ 858 __entry->offset,
851 __entry->size, \ 859 __entry->delalloc,
852 __entry->offset, \ 860 __entry->unmapped,
853 __entry->delalloc, \ 861 __entry->unwritten)
854 __entry->unmapped, \
855 __entry->unwritten) \
856) 862)
863
864#define DEFINE_PAGE_EVENT(name) \
865DEFINE_EVENT(xfs_page_class, name, \
866 TP_PROTO(struct inode *inode, struct page *page, unsigned long off), \
867 TP_ARGS(inode, page, off))
857DEFINE_PAGE_EVENT(xfs_writepage); 868DEFINE_PAGE_EVENT(xfs_writepage);
858DEFINE_PAGE_EVENT(xfs_releasepage); 869DEFINE_PAGE_EVENT(xfs_releasepage);
859DEFINE_PAGE_EVENT(xfs_invalidatepage); 870DEFINE_PAGE_EVENT(xfs_invalidatepage);
860 871
861#define DEFINE_IOMAP_EVENT(name) \ 872DECLARE_EVENT_CLASS(xfs_iomap_class,
862TRACE_EVENT(name, \ 873 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count,
863 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, \ 874 int flags, struct xfs_bmbt_irec *irec),
864 int flags, struct xfs_bmbt_irec *irec), \ 875 TP_ARGS(ip, offset, count, flags, irec),
865 TP_ARGS(ip, offset, count, flags, irec), \ 876 TP_STRUCT__entry(
866 TP_STRUCT__entry( \ 877 __field(dev_t, dev)
867 __field(dev_t, dev) \ 878 __field(xfs_ino_t, ino)
868 __field(xfs_ino_t, ino) \ 879 __field(loff_t, size)
869 __field(loff_t, size) \ 880 __field(loff_t, new_size)
870 __field(loff_t, new_size) \ 881 __field(loff_t, offset)
871 __field(loff_t, offset) \ 882 __field(size_t, count)
872 __field(size_t, count) \ 883 __field(int, flags)
873 __field(int, flags) \ 884 __field(xfs_fileoff_t, startoff)
874 __field(xfs_fileoff_t, startoff) \ 885 __field(xfs_fsblock_t, startblock)
875 __field(xfs_fsblock_t, startblock) \ 886 __field(xfs_filblks_t, blockcount)
876 __field(xfs_filblks_t, blockcount) \ 887 ),
877 ), \ 888 TP_fast_assign(
878 TP_fast_assign( \ 889 __entry->dev = VFS_I(ip)->i_sb->s_dev;
879 __entry->dev = VFS_I(ip)->i_sb->s_dev; \ 890 __entry->ino = ip->i_ino;
880 __entry->ino = ip->i_ino; \ 891 __entry->size = ip->i_d.di_size;
881 __entry->size = ip->i_d.di_size; \ 892 __entry->new_size = ip->i_new_size;
882 __entry->new_size = ip->i_new_size; \ 893 __entry->offset = offset;
883 __entry->offset = offset; \ 894 __entry->count = count;
884 __entry->count = count; \ 895 __entry->flags = flags;
885 __entry->flags = flags; \ 896 __entry->startoff = irec ? irec->br_startoff : 0;
886 __entry->startoff = irec ? irec->br_startoff : 0; \ 897 __entry->startblock = irec ? irec->br_startblock : 0;
887 __entry->startblock = irec ? irec->br_startblock : 0; \ 898 __entry->blockcount = irec ? irec->br_blockcount : 0;
888 __entry->blockcount = irec ? irec->br_blockcount : 0; \ 899 ),
889 ), \ 900 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
890 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \ 901 "offset 0x%llx count %zd flags %s "
891 "offset 0x%llx count %zd flags %s " \ 902 "startoff 0x%llx startblock %lld blockcount 0x%llx",
892 "startoff 0x%llx startblock %lld blockcount 0x%llx", \ 903 MAJOR(__entry->dev), MINOR(__entry->dev),
893 MAJOR(__entry->dev), MINOR(__entry->dev), \ 904 __entry->ino,
894 __entry->ino, \ 905 __entry->size,
895 __entry->size, \ 906 __entry->new_size,
896 __entry->new_size, \ 907 __entry->offset,
897 __entry->offset, \ 908 __entry->count,
898 __entry->count, \ 909 __print_flags(__entry->flags, "|", BMAPI_FLAGS),
899 __print_flags(__entry->flags, "|", BMAPI_FLAGS), \ 910 __entry->startoff,
900 __entry->startoff, \ 911 (__int64_t)__entry->startblock,
901 (__int64_t)__entry->startblock, \ 912 __entry->blockcount)
902 __entry->blockcount) \
903) 913)
914
915#define DEFINE_IOMAP_EVENT(name) \
916DEFINE_EVENT(xfs_iomap_class, name, \
917 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, \
918 int flags, struct xfs_bmbt_irec *irec), \
919 TP_ARGS(ip, offset, count, flags, irec))
904DEFINE_IOMAP_EVENT(xfs_iomap_enter); 920DEFINE_IOMAP_EVENT(xfs_iomap_enter);
905DEFINE_IOMAP_EVENT(xfs_iomap_found); 921DEFINE_IOMAP_EVENT(xfs_iomap_found);
906DEFINE_IOMAP_EVENT(xfs_iomap_alloc); 922DEFINE_IOMAP_EVENT(xfs_iomap_alloc);
907 923
908#define DEFINE_SIMPLE_IO_EVENT(name) \ 924DECLARE_EVENT_CLASS(xfs_simple_io_class,
909TRACE_EVENT(name, \ 925 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),
910 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), \ 926 TP_ARGS(ip, offset, count),
911 TP_ARGS(ip, offset, count), \ 927 TP_STRUCT__entry(
912 TP_STRUCT__entry( \ 928 __field(dev_t, dev)
913 __field(dev_t, dev) \ 929 __field(xfs_ino_t, ino)
914 __field(xfs_ino_t, ino) \ 930 __field(loff_t, size)
915 __field(loff_t, size) \ 931 __field(loff_t, new_size)
916 __field(loff_t, new_size) \ 932 __field(loff_t, offset)
917 __field(loff_t, offset) \ 933 __field(size_t, count)
918 __field(size_t, count) \ 934 ),
919 ), \ 935 TP_fast_assign(
920 TP_fast_assign( \ 936 __entry->dev = VFS_I(ip)->i_sb->s_dev;
921 __entry->dev = VFS_I(ip)->i_sb->s_dev; \ 937 __entry->ino = ip->i_ino;
922 __entry->ino = ip->i_ino; \ 938 __entry->size = ip->i_d.di_size;
923 __entry->size = ip->i_d.di_size; \ 939 __entry->new_size = ip->i_new_size;
924 __entry->new_size = ip->i_new_size; \ 940 __entry->offset = offset;
925 __entry->offset = offset; \ 941 __entry->count = count;
926 __entry->count = count; \ 942 ),
927 ), \ 943 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
928 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \ 944 "offset 0x%llx count %zd",
929 "offset 0x%llx count %zd", \ 945 MAJOR(__entry->dev), MINOR(__entry->dev),
930 MAJOR(__entry->dev), MINOR(__entry->dev), \ 946 __entry->ino,
931 __entry->ino, \ 947 __entry->size,
932 __entry->size, \ 948 __entry->new_size,
933 __entry->new_size, \ 949 __entry->offset,
934 __entry->offset, \ 950 __entry->count)
935 __entry->count) \
936); 951);
952
953#define DEFINE_SIMPLE_IO_EVENT(name) \
954DEFINE_EVENT(xfs_simple_io_class, name, \
955 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), \
956 TP_ARGS(ip, offset, count))
937DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc); 957DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc);
938DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert); 958DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert);
939 959
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 38e764146644..8c117ff2e3ab 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -249,8 +249,10 @@ xfs_qm_hold_quotafs_ref(
249 249
250 if (!xfs_Gqm) { 250 if (!xfs_Gqm) {
251 xfs_Gqm = xfs_Gqm_init(); 251 xfs_Gqm = xfs_Gqm_init();
252 if (!xfs_Gqm) 252 if (!xfs_Gqm) {
253 mutex_unlock(&xfs_Gqm_lock);
253 return ENOMEM; 254 return ENOMEM;
255 }
254 } 256 }
255 257
256 /* 258 /*
@@ -1630,10 +1632,7 @@ xfs_qm_dqusage_adjust(
1630 xfs_ino_t ino, /* inode number to get data for */ 1632 xfs_ino_t ino, /* inode number to get data for */
1631 void __user *buffer, /* not used */ 1633 void __user *buffer, /* not used */
1632 int ubsize, /* not used */ 1634 int ubsize, /* not used */
1633 void *private_data, /* not used */
1634 xfs_daddr_t bno, /* starting block of inode cluster */
1635 int *ubused, /* not used */ 1635 int *ubused, /* not used */
1636 void *dip, /* on-disk inode pointer (not used) */
1637 int *res) /* result code value */ 1636 int *res) /* result code value */
1638{ 1637{
1639 xfs_inode_t *ip; 1638 xfs_inode_t *ip;
@@ -1658,7 +1657,7 @@ xfs_qm_dqusage_adjust(
1658 * the case in all other instances. It's OK that we do this because 1657 * the case in all other instances. It's OK that we do this because
1659 * quotacheck is done only at mount time. 1658 * quotacheck is done only at mount time.
1660 */ 1659 */
1661 if ((error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip, bno))) { 1660 if ((error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip))) {
1662 *res = BULKSTAT_RV_NOTHING; 1661 *res = BULKSTAT_RV_NOTHING;
1663 return error; 1662 return error;
1664 } 1663 }
@@ -1794,12 +1793,13 @@ xfs_qm_quotacheck(
1794 * Iterate thru all the inodes in the file system, 1793 * Iterate thru all the inodes in the file system,
1795 * adjusting the corresponding dquot counters in core. 1794 * adjusting the corresponding dquot counters in core.
1796 */ 1795 */
1797 if ((error = xfs_bulkstat(mp, &lastino, &count, 1796 error = xfs_bulkstat(mp, &lastino, &count,
1798 xfs_qm_dqusage_adjust, NULL, 1797 xfs_qm_dqusage_adjust,
1799 structsz, NULL, BULKSTAT_FG_IGET, &done))) 1798 structsz, NULL, &done);
1799 if (error)
1800 break; 1800 break;
1801 1801
1802 } while (! done); 1802 } while (!done);
1803 1803
1804 /* 1804 /*
1805 * We've made all the changes that we need to make incore. 1805 * We've made all the changes that we need to make incore.
@@ -1887,14 +1887,14 @@ xfs_qm_init_quotainos(
1887 mp->m_sb.sb_uquotino != NULLFSINO) { 1887 mp->m_sb.sb_uquotino != NULLFSINO) {
1888 ASSERT(mp->m_sb.sb_uquotino > 0); 1888 ASSERT(mp->m_sb.sb_uquotino > 0);
1889 if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 1889 if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
1890 0, 0, &uip, 0))) 1890 0, 0, &uip)))
1891 return XFS_ERROR(error); 1891 return XFS_ERROR(error);
1892 } 1892 }
1893 if (XFS_IS_OQUOTA_ON(mp) && 1893 if (XFS_IS_OQUOTA_ON(mp) &&
1894 mp->m_sb.sb_gquotino != NULLFSINO) { 1894 mp->m_sb.sb_gquotino != NULLFSINO) {
1895 ASSERT(mp->m_sb.sb_gquotino > 0); 1895 ASSERT(mp->m_sb.sb_gquotino > 0);
1896 if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 1896 if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
1897 0, 0, &gip, 0))) { 1897 0, 0, &gip))) {
1898 if (uip) 1898 if (uip)
1899 IRELE(uip); 1899 IRELE(uip);
1900 return XFS_ERROR(error); 1900 return XFS_ERROR(error);
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 92b002f1805f..b4487764e923 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -262,7 +262,7 @@ xfs_qm_scall_trunc_qfiles(
262 } 262 }
263 263
264 if ((flags & XFS_DQ_USER) && mp->m_sb.sb_uquotino != NULLFSINO) { 264 if ((flags & XFS_DQ_USER) && mp->m_sb.sb_uquotino != NULLFSINO) {
265 error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 0, 0, &qip, 0); 265 error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 0, 0, &qip);
266 if (!error) { 266 if (!error) {
267 error = xfs_truncate_file(mp, qip); 267 error = xfs_truncate_file(mp, qip);
268 IRELE(qip); 268 IRELE(qip);
@@ -271,7 +271,7 @@ xfs_qm_scall_trunc_qfiles(
271 271
272 if ((flags & (XFS_DQ_GROUP|XFS_DQ_PROJ)) && 272 if ((flags & (XFS_DQ_GROUP|XFS_DQ_PROJ)) &&
273 mp->m_sb.sb_gquotino != NULLFSINO) { 273 mp->m_sb.sb_gquotino != NULLFSINO) {
274 error2 = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 0, 0, &qip, 0); 274 error2 = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 0, 0, &qip);
275 if (!error2) { 275 if (!error2) {
276 error2 = xfs_truncate_file(mp, qip); 276 error2 = xfs_truncate_file(mp, qip);
277 IRELE(qip); 277 IRELE(qip);
@@ -417,12 +417,12 @@ xfs_qm_scall_getqstat(
417 } 417 }
418 if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) { 418 if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) {
419 if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 419 if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
420 0, 0, &uip, 0) == 0) 420 0, 0, &uip) == 0)
421 tempuqip = B_TRUE; 421 tempuqip = B_TRUE;
422 } 422 }
423 if (!gip && mp->m_sb.sb_gquotino != NULLFSINO) { 423 if (!gip && mp->m_sb.sb_gquotino != NULLFSINO) {
424 if (xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 424 if (xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
425 0, 0, &gip, 0) == 0) 425 0, 0, &gip) == 0)
426 tempgqip = B_TRUE; 426 tempgqip = B_TRUE;
427 } 427 }
428 if (uip) { 428 if (uip) {
@@ -1109,10 +1109,7 @@ xfs_qm_internalqcheck_adjust(
1109 xfs_ino_t ino, /* inode number to get data for */ 1109 xfs_ino_t ino, /* inode number to get data for */
1110 void __user *buffer, /* not used */ 1110 void __user *buffer, /* not used */
1111 int ubsize, /* not used */ 1111 int ubsize, /* not used */
1112 void *private_data, /* not used */
1113 xfs_daddr_t bno, /* starting block of inode cluster */
1114 int *ubused, /* not used */ 1112 int *ubused, /* not used */
1115 void *dip, /* not used */
1116 int *res) /* bulkstat result code */ 1113 int *res) /* bulkstat result code */
1117{ 1114{
1118 xfs_inode_t *ip; 1115 xfs_inode_t *ip;
@@ -1134,7 +1131,7 @@ xfs_qm_internalqcheck_adjust(
1134 ipreleased = B_FALSE; 1131 ipreleased = B_FALSE;
1135 again: 1132 again:
1136 lock_flags = XFS_ILOCK_SHARED; 1133 lock_flags = XFS_ILOCK_SHARED;
1137 if ((error = xfs_iget(mp, NULL, ino, 0, lock_flags, &ip, bno))) { 1134 if ((error = xfs_iget(mp, NULL, ino, 0, lock_flags, &ip))) {
1138 *res = BULKSTAT_RV_NOTHING; 1135 *res = BULKSTAT_RV_NOTHING;
1139 return (error); 1136 return (error);
1140 } 1137 }
@@ -1205,15 +1202,15 @@ xfs_qm_internalqcheck(
1205 * Iterate thru all the inodes in the file system, 1202 * Iterate thru all the inodes in the file system,
1206 * adjusting the corresponding dquot counters 1203 * adjusting the corresponding dquot counters
1207 */ 1204 */
1208 if ((error = xfs_bulkstat(mp, &lastino, &count, 1205 error = xfs_bulkstat(mp, &lastino, &count,
1209 xfs_qm_internalqcheck_adjust, NULL, 1206 xfs_qm_internalqcheck_adjust,
1210 0, NULL, BULKSTAT_FG_IGET, &done))) { 1207 0, NULL, &done);
1208 if (error) {
1209 cmn_err(CE_DEBUG, "Bulkstat returned error 0x%x", error);
1211 break; 1210 break;
1212 } 1211 }
1213 } while (! done); 1212 } while (!done);
1214 if (error) { 1213
1215 cmn_err(CE_DEBUG, "Bulkstat returned error 0x%x", error);
1216 }
1217 cmn_err(CE_DEBUG, "Checking results against system dquots"); 1214 cmn_err(CE_DEBUG, "Checking results against system dquots");
1218 for (i = 0; i < qmtest_hashmask; i++) { 1215 for (i = 0; i < qmtest_hashmask; i++) {
1219 xfs_dqtest_t *d, *n; 1216 xfs_dqtest_t *d, *n;
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index 401f364ad36c..4917d4eed4ed 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -227,7 +227,6 @@ typedef struct xfs_perag {
227 227
228 atomic_t pagf_fstrms; /* # of filestreams active in this AG */ 228 atomic_t pagf_fstrms; /* # of filestreams active in this AG */
229 229
230 int pag_ici_init; /* incore inode cache initialised */
231 rwlock_t pag_ici_lock; /* incore inode lock */ 230 rwlock_t pag_ici_lock; /* incore inode lock */
232 struct radix_tree_root pag_ici_root; /* incore inode cache root */ 231 struct radix_tree_root pag_ici_root; /* incore inode cache root */
233 int pag_ici_reclaimable; /* reclaimable inodes */ 232 int pag_ici_reclaimable; /* reclaimable inodes */
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 5bba29a07812..7f159d2a429a 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -69,7 +69,9 @@ xfs_swapext(
69 goto out; 69 goto out;
70 } 70 }
71 71
72 if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) { 72 if (!(file->f_mode & FMODE_WRITE) ||
73 !(file->f_mode & FMODE_READ) ||
74 (file->f_flags & O_APPEND)) {
73 error = XFS_ERROR(EBADF); 75 error = XFS_ERROR(EBADF);
74 goto out_put_file; 76 goto out_put_file;
75 } 77 }
@@ -81,6 +83,7 @@ xfs_swapext(
81 } 83 }
82 84
83 if (!(tmp_file->f_mode & FMODE_WRITE) || 85 if (!(tmp_file->f_mode & FMODE_WRITE) ||
86 !(tmp_file->f_mode & FMODE_READ) ||
84 (tmp_file->f_flags & O_APPEND)) { 87 (tmp_file->f_flags & O_APPEND)) {
85 error = XFS_ERROR(EBADF); 88 error = XFS_ERROR(EBADF);
86 goto out_put_tmp_file; 89 goto out_put_tmp_file;
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 9d884c127bb9..c7142a064c48 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -1203,6 +1203,63 @@ error0:
1203 return error; 1203 return error;
1204} 1204}
1205 1205
1206STATIC int
1207xfs_imap_lookup(
1208 struct xfs_mount *mp,
1209 struct xfs_trans *tp,
1210 xfs_agnumber_t agno,
1211 xfs_agino_t agino,
1212 xfs_agblock_t agbno,
1213 xfs_agblock_t *chunk_agbno,
1214 xfs_agblock_t *offset_agbno,
1215 int flags)
1216{
1217 struct xfs_inobt_rec_incore rec;
1218 struct xfs_btree_cur *cur;
1219 struct xfs_buf *agbp;
1220 xfs_agino_t startino;
1221 int error;
1222 int i;
1223
1224 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
1225 if (error) {
1226 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
1227 "xfs_ialloc_read_agi() returned "
1228 "error %d, agno %d",
1229 error, agno);
1230 return error;
1231 }
1232
1233 /*
1234 * derive and lookup the exact inode record for the given agino. If the
1235 * record cannot be found, then it's an invalid inode number and we
1236 * should abort.
1237 */
1238 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
1239 startino = agino & ~(XFS_IALLOC_INODES(mp) - 1);
1240 error = xfs_inobt_lookup(cur, startino, XFS_LOOKUP_EQ, &i);
1241 if (!error) {
1242 if (i)
1243 error = xfs_inobt_get_rec(cur, &rec, &i);
1244 if (!error && i == 0)
1245 error = EINVAL;
1246 }
1247
1248 xfs_trans_brelse(tp, agbp);
1249 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
1250 if (error)
1251 return error;
1252
1253 /* for untrusted inodes check it is allocated first */
1254 if ((flags & XFS_IGET_UNTRUSTED) &&
1255 (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino)))
1256 return EINVAL;
1257
1258 *chunk_agbno = XFS_AGINO_TO_AGBNO(mp, rec.ir_startino);
1259 *offset_agbno = agbno - *chunk_agbno;
1260 return 0;
1261}
1262
1206/* 1263/*
1207 * Return the location of the inode in imap, for mapping it into a buffer. 1264 * Return the location of the inode in imap, for mapping it into a buffer.
1208 */ 1265 */
@@ -1235,8 +1292,11 @@ xfs_imap(
1235 if (agno >= mp->m_sb.sb_agcount || agbno >= mp->m_sb.sb_agblocks || 1292 if (agno >= mp->m_sb.sb_agcount || agbno >= mp->m_sb.sb_agblocks ||
1236 ino != XFS_AGINO_TO_INO(mp, agno, agino)) { 1293 ino != XFS_AGINO_TO_INO(mp, agno, agino)) {
1237#ifdef DEBUG 1294#ifdef DEBUG
1238 /* no diagnostics for bulkstat, ino comes from userspace */ 1295 /*
1239 if (flags & XFS_IGET_BULKSTAT) 1296 * Don't output diagnostic information for untrusted inodes
1297 * as they can be invalid without implying corruption.
1298 */
1299 if (flags & XFS_IGET_UNTRUSTED)
1240 return XFS_ERROR(EINVAL); 1300 return XFS_ERROR(EINVAL);
1241 if (agno >= mp->m_sb.sb_agcount) { 1301 if (agno >= mp->m_sb.sb_agcount) {
1242 xfs_fs_cmn_err(CE_ALERT, mp, 1302 xfs_fs_cmn_err(CE_ALERT, mp,
@@ -1263,6 +1323,23 @@ xfs_imap(
1263 return XFS_ERROR(EINVAL); 1323 return XFS_ERROR(EINVAL);
1264 } 1324 }
1265 1325
1326 blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog;
1327
1328 /*
1329 * For bulkstat and handle lookups, we have an untrusted inode number
1330 * that we have to verify is valid. We cannot do this just by reading
1331 * the inode buffer as it may have been unlinked and removed leaving
1332 * inodes in stale state on disk. Hence we have to do a btree lookup
1333 * in all cases where an untrusted inode number is passed.
1334 */
1335 if (flags & XFS_IGET_UNTRUSTED) {
1336 error = xfs_imap_lookup(mp, tp, agno, agino, agbno,
1337 &chunk_agbno, &offset_agbno, flags);
1338 if (error)
1339 return error;
1340 goto out_map;
1341 }
1342
1266 /* 1343 /*
1267 * If the inode cluster size is the same as the blocksize or 1344 * If the inode cluster size is the same as the blocksize or
1268 * smaller we get to the buffer by simple arithmetics. 1345 * smaller we get to the buffer by simple arithmetics.
@@ -1277,24 +1354,6 @@ xfs_imap(
1277 return 0; 1354 return 0;
1278 } 1355 }
1279 1356
1280 blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog;
1281
1282 /*
1283 * If we get a block number passed from bulkstat we can use it to
1284 * find the buffer easily.
1285 */
1286 if (imap->im_blkno) {
1287 offset = XFS_INO_TO_OFFSET(mp, ino);
1288 ASSERT(offset < mp->m_sb.sb_inopblock);
1289
1290 cluster_agbno = xfs_daddr_to_agbno(mp, imap->im_blkno);
1291 offset += (agbno - cluster_agbno) * mp->m_sb.sb_inopblock;
1292
1293 imap->im_len = XFS_FSB_TO_BB(mp, blks_per_cluster);
1294 imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog);
1295 return 0;
1296 }
1297
1298 /* 1357 /*
1299 * If the inode chunks are aligned then use simple maths to 1358 * If the inode chunks are aligned then use simple maths to
1300 * find the location. Otherwise we have to do a btree 1359 * find the location. Otherwise we have to do a btree
@@ -1304,50 +1363,13 @@ xfs_imap(
1304 offset_agbno = agbno & mp->m_inoalign_mask; 1363 offset_agbno = agbno & mp->m_inoalign_mask;
1305 chunk_agbno = agbno - offset_agbno; 1364 chunk_agbno = agbno - offset_agbno;
1306 } else { 1365 } else {
1307 xfs_btree_cur_t *cur; /* inode btree cursor */ 1366 error = xfs_imap_lookup(mp, tp, agno, agino, agbno,
1308 xfs_inobt_rec_incore_t chunk_rec; 1367 &chunk_agbno, &offset_agbno, flags);
1309 xfs_buf_t *agbp; /* agi buffer */
1310 int i; /* temp state */
1311
1312 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
1313 if (error) {
1314 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
1315 "xfs_ialloc_read_agi() returned "
1316 "error %d, agno %d",
1317 error, agno);
1318 return error;
1319 }
1320
1321 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
1322 error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i);
1323 if (error) {
1324 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
1325 "xfs_inobt_lookup() failed");
1326 goto error0;
1327 }
1328
1329 error = xfs_inobt_get_rec(cur, &chunk_rec, &i);
1330 if (error) {
1331 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
1332 "xfs_inobt_get_rec() failed");
1333 goto error0;
1334 }
1335 if (i == 0) {
1336#ifdef DEBUG
1337 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
1338 "xfs_inobt_get_rec() failed");
1339#endif /* DEBUG */
1340 error = XFS_ERROR(EINVAL);
1341 }
1342 error0:
1343 xfs_trans_brelse(tp, agbp);
1344 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
1345 if (error) 1368 if (error)
1346 return error; 1369 return error;
1347 chunk_agbno = XFS_AGINO_TO_AGBNO(mp, chunk_rec.ir_startino);
1348 offset_agbno = agbno - chunk_agbno;
1349 } 1370 }
1350 1371
1372out_map:
1351 ASSERT(agbno >= chunk_agbno); 1373 ASSERT(agbno >= chunk_agbno);
1352 cluster_agbno = chunk_agbno + 1374 cluster_agbno = chunk_agbno +
1353 ((offset_agbno / blks_per_cluster) * blks_per_cluster); 1375 ((offset_agbno / blks_per_cluster) * blks_per_cluster);
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 6845db90818f..8f8b91be2c99 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -259,7 +259,6 @@ xfs_iget_cache_miss(
259 xfs_trans_t *tp, 259 xfs_trans_t *tp,
260 xfs_ino_t ino, 260 xfs_ino_t ino,
261 struct xfs_inode **ipp, 261 struct xfs_inode **ipp,
262 xfs_daddr_t bno,
263 int flags, 262 int flags,
264 int lock_flags) 263 int lock_flags)
265{ 264{
@@ -272,7 +271,7 @@ xfs_iget_cache_miss(
272 if (!ip) 271 if (!ip)
273 return ENOMEM; 272 return ENOMEM;
274 273
275 error = xfs_iread(mp, tp, ip, bno, flags); 274 error = xfs_iread(mp, tp, ip, flags);
276 if (error) 275 if (error)
277 goto out_destroy; 276 goto out_destroy;
278 277
@@ -358,8 +357,6 @@ out_destroy:
358 * within the file system for the inode being requested. 357 * within the file system for the inode being requested.
359 * lock_flags -- flags indicating how to lock the inode. See the comment 358 * lock_flags -- flags indicating how to lock the inode. See the comment
360 * for xfs_ilock() for a list of valid values. 359 * for xfs_ilock() for a list of valid values.
361 * bno -- the block number starting the buffer containing the inode,
362 * if known (as by bulkstat), else 0.
363 */ 360 */
364int 361int
365xfs_iget( 362xfs_iget(
@@ -368,8 +365,7 @@ xfs_iget(
368 xfs_ino_t ino, 365 xfs_ino_t ino,
369 uint flags, 366 uint flags,
370 uint lock_flags, 367 uint lock_flags,
371 xfs_inode_t **ipp, 368 xfs_inode_t **ipp)
372 xfs_daddr_t bno)
373{ 369{
374 xfs_inode_t *ip; 370 xfs_inode_t *ip;
375 int error; 371 int error;
@@ -382,9 +378,6 @@ xfs_iget(
382 378
383 /* get the perag structure and ensure that it's inode capable */ 379 /* get the perag structure and ensure that it's inode capable */
384 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino)); 380 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino));
385 if (!pag->pagi_inodeok)
386 return EINVAL;
387 ASSERT(pag->pag_ici_init);
388 agino = XFS_INO_TO_AGINO(mp, ino); 381 agino = XFS_INO_TO_AGINO(mp, ino);
389 382
390again: 383again:
@@ -400,7 +393,7 @@ again:
400 read_unlock(&pag->pag_ici_lock); 393 read_unlock(&pag->pag_ici_lock);
401 XFS_STATS_INC(xs_ig_missed); 394 XFS_STATS_INC(xs_ig_missed);
402 395
403 error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, bno, 396 error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip,
404 flags, lock_flags); 397 flags, lock_flags);
405 if (error) 398 if (error)
406 goto out_error_or_again; 399 goto out_error_or_again;
@@ -744,30 +737,24 @@ xfs_ilock_demote(
744} 737}
745 738
746#ifdef DEBUG 739#ifdef DEBUG
747/*
748 * Debug-only routine, without additional rw_semaphore APIs, we can
749 * now only answer requests regarding whether we hold the lock for write
750 * (reader state is outside our visibility, we only track writer state).
751 *
752 * Note: this means !xfs_isilocked would give false positives, so don't do that.
753 */
754int 740int
755xfs_isilocked( 741xfs_isilocked(
756 xfs_inode_t *ip, 742 xfs_inode_t *ip,
757 uint lock_flags) 743 uint lock_flags)
758{ 744{
759 if ((lock_flags & (XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)) == 745 if (lock_flags & (XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)) {
760 XFS_ILOCK_EXCL) { 746 if (!(lock_flags & XFS_ILOCK_SHARED))
761 if (!ip->i_lock.mr_writer) 747 return !!ip->i_lock.mr_writer;
762 return 0; 748 return rwsem_is_locked(&ip->i_lock.mr_lock);
763 } 749 }
764 750
765 if ((lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) == 751 if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) {
766 XFS_IOLOCK_EXCL) { 752 if (!(lock_flags & XFS_IOLOCK_SHARED))
767 if (!ip->i_iolock.mr_writer) 753 return !!ip->i_iolock.mr_writer;
768 return 0; 754 return rwsem_is_locked(&ip->i_iolock.mr_lock);
769 } 755 }
770 756
771 return 1; 757 ASSERT(0);
758 return 0;
772} 759}
773#endif 760#endif
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 8cd6e8d8fe9c..b76a829d7e20 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -177,7 +177,7 @@ xfs_imap_to_bp(
177 if (unlikely(XFS_TEST_ERROR(!di_ok, mp, 177 if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
178 XFS_ERRTAG_ITOBP_INOTOBP, 178 XFS_ERRTAG_ITOBP_INOTOBP,
179 XFS_RANDOM_ITOBP_INOTOBP))) { 179 XFS_RANDOM_ITOBP_INOTOBP))) {
180 if (iget_flags & XFS_IGET_BULKSTAT) { 180 if (iget_flags & XFS_IGET_UNTRUSTED) {
181 xfs_trans_brelse(tp, bp); 181 xfs_trans_brelse(tp, bp);
182 return XFS_ERROR(EINVAL); 182 return XFS_ERROR(EINVAL);
183 } 183 }
@@ -787,7 +787,6 @@ xfs_iread(
787 xfs_mount_t *mp, 787 xfs_mount_t *mp,
788 xfs_trans_t *tp, 788 xfs_trans_t *tp,
789 xfs_inode_t *ip, 789 xfs_inode_t *ip,
790 xfs_daddr_t bno,
791 uint iget_flags) 790 uint iget_flags)
792{ 791{
793 xfs_buf_t *bp; 792 xfs_buf_t *bp;
@@ -797,11 +796,9 @@ xfs_iread(
797 /* 796 /*
798 * Fill in the location information in the in-core inode. 797 * Fill in the location information in the in-core inode.
799 */ 798 */
800 ip->i_imap.im_blkno = bno;
801 error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags); 799 error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags);
802 if (error) 800 if (error)
803 return error; 801 return error;
804 ASSERT(bno == 0 || bno == ip->i_imap.im_blkno);
805 802
806 /* 803 /*
807 * Get pointers to the on-disk inode and the buffer containing it. 804 * Get pointers to the on-disk inode and the buffer containing it.
@@ -1940,10 +1937,10 @@ xfs_ifree_cluster(
1940 int blks_per_cluster; 1937 int blks_per_cluster;
1941 int nbufs; 1938 int nbufs;
1942 int ninodes; 1939 int ninodes;
1943 int i, j, found, pre_flushed; 1940 int i, j;
1944 xfs_daddr_t blkno; 1941 xfs_daddr_t blkno;
1945 xfs_buf_t *bp; 1942 xfs_buf_t *bp;
1946 xfs_inode_t *ip, **ip_found; 1943 xfs_inode_t *ip;
1947 xfs_inode_log_item_t *iip; 1944 xfs_inode_log_item_t *iip;
1948 xfs_log_item_t *lip; 1945 xfs_log_item_t *lip;
1949 struct xfs_perag *pag; 1946 struct xfs_perag *pag;
@@ -1960,114 +1957,97 @@ xfs_ifree_cluster(
1960 nbufs = XFS_IALLOC_BLOCKS(mp) / blks_per_cluster; 1957 nbufs = XFS_IALLOC_BLOCKS(mp) / blks_per_cluster;
1961 } 1958 }
1962 1959
1963 ip_found = kmem_alloc(ninodes * sizeof(xfs_inode_t *), KM_NOFS);
1964
1965 for (j = 0; j < nbufs; j++, inum += ninodes) { 1960 for (j = 0; j < nbufs; j++, inum += ninodes) {
1961 int found = 0;
1962
1966 blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum), 1963 blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum),
1967 XFS_INO_TO_AGBNO(mp, inum)); 1964 XFS_INO_TO_AGBNO(mp, inum));
1968 1965
1966 /*
1967 * We obtain and lock the backing buffer first in the process
1968 * here, as we have to ensure that any dirty inode that we
1969 * can't get the flush lock on is attached to the buffer.
1970 * If we scan the in-memory inodes first, then buffer IO can
1971 * complete before we get a lock on it, and hence we may fail
1972 * to mark all the active inodes on the buffer stale.
1973 */
1974 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
1975 mp->m_bsize * blks_per_cluster,
1976 XBF_LOCK);
1977
1978 /*
1979 * Walk the inodes already attached to the buffer and mark them
1980 * stale. These will all have the flush locks held, so an
1981 * in-memory inode walk can't lock them.
1982 */
1983 lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
1984 while (lip) {
1985 if (lip->li_type == XFS_LI_INODE) {
1986 iip = (xfs_inode_log_item_t *)lip;
1987 ASSERT(iip->ili_logged == 1);
1988 lip->li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*)) xfs_istale_done;
1989 xfs_trans_ail_copy_lsn(mp->m_ail,
1990 &iip->ili_flush_lsn,
1991 &iip->ili_item.li_lsn);
1992 xfs_iflags_set(iip->ili_inode, XFS_ISTALE);
1993 found++;
1994 }
1995 lip = lip->li_bio_list;
1996 }
1969 1997
1970 /* 1998 /*
1971 * Look for each inode in memory and attempt to lock it, 1999 * For each inode in memory attempt to add it to the inode
1972 * we can be racing with flush and tail pushing here. 2000 * buffer and set it up for being staled on buffer IO
1973 * any inode we get the locks on, add to an array of 2001 * completion. This is safe as we've locked out tail pushing
1974 * inode items to process later. 2002 * and flushing by locking the buffer.
1975 * 2003 *
1976 * The get the buffer lock, we could beat a flush 2004 * We have already marked every inode that was part of a
1977 * or tail pushing thread to the lock here, in which 2005 * transaction stale above, which means there is no point in
1978 * case they will go looking for the inode buffer 2006 * even trying to lock them.
1979 * and fail, we need some other form of interlock
1980 * here.
1981 */ 2007 */
1982 found = 0;
1983 for (i = 0; i < ninodes; i++) { 2008 for (i = 0; i < ninodes; i++) {
1984 read_lock(&pag->pag_ici_lock); 2009 read_lock(&pag->pag_ici_lock);
1985 ip = radix_tree_lookup(&pag->pag_ici_root, 2010 ip = radix_tree_lookup(&pag->pag_ici_root,
1986 XFS_INO_TO_AGINO(mp, (inum + i))); 2011 XFS_INO_TO_AGINO(mp, (inum + i)));
1987 2012
1988 /* Inode not in memory or we found it already, 2013 /* Inode not in memory or stale, nothing to do */
1989 * nothing to do
1990 */
1991 if (!ip || xfs_iflags_test(ip, XFS_ISTALE)) { 2014 if (!ip || xfs_iflags_test(ip, XFS_ISTALE)) {
1992 read_unlock(&pag->pag_ici_lock); 2015 read_unlock(&pag->pag_ici_lock);
1993 continue; 2016 continue;
1994 } 2017 }
1995 2018
1996 if (xfs_inode_clean(ip)) { 2019 /* don't try to lock/unlock the current inode */
1997 read_unlock(&pag->pag_ici_lock); 2020 if (ip != free_ip &&
1998 continue; 2021 !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
1999 }
2000
2001 /* If we can get the locks then add it to the
2002 * list, otherwise by the time we get the bp lock
2003 * below it will already be attached to the
2004 * inode buffer.
2005 */
2006
2007 /* This inode will already be locked - by us, lets
2008 * keep it that way.
2009 */
2010
2011 if (ip == free_ip) {
2012 if (xfs_iflock_nowait(ip)) {
2013 xfs_iflags_set(ip, XFS_ISTALE);
2014 if (xfs_inode_clean(ip)) {
2015 xfs_ifunlock(ip);
2016 } else {
2017 ip_found[found++] = ip;
2018 }
2019 }
2020 read_unlock(&pag->pag_ici_lock); 2022 read_unlock(&pag->pag_ici_lock);
2021 continue; 2023 continue;
2022 } 2024 }
2025 read_unlock(&pag->pag_ici_lock);
2023 2026
2024 if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { 2027 if (!xfs_iflock_nowait(ip)) {
2025 if (xfs_iflock_nowait(ip)) { 2028 if (ip != free_ip)
2026 xfs_iflags_set(ip, XFS_ISTALE);
2027
2028 if (xfs_inode_clean(ip)) {
2029 xfs_ifunlock(ip);
2030 xfs_iunlock(ip, XFS_ILOCK_EXCL);
2031 } else {
2032 ip_found[found++] = ip;
2033 }
2034 } else {
2035 xfs_iunlock(ip, XFS_ILOCK_EXCL); 2029 xfs_iunlock(ip, XFS_ILOCK_EXCL);
2036 } 2030 continue;
2037 } 2031 }
2038 read_unlock(&pag->pag_ici_lock);
2039 }
2040 2032
2041 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, 2033 xfs_iflags_set(ip, XFS_ISTALE);
2042 mp->m_bsize * blks_per_cluster, 2034 if (xfs_inode_clean(ip)) {
2043 XBF_LOCK); 2035 ASSERT(ip != free_ip);
2044 2036 xfs_ifunlock(ip);
2045 pre_flushed = 0; 2037 xfs_iunlock(ip, XFS_ILOCK_EXCL);
2046 lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); 2038 continue;
2047 while (lip) {
2048 if (lip->li_type == XFS_LI_INODE) {
2049 iip = (xfs_inode_log_item_t *)lip;
2050 ASSERT(iip->ili_logged == 1);
2051 lip->li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*)) xfs_istale_done;
2052 xfs_trans_ail_copy_lsn(mp->m_ail,
2053 &iip->ili_flush_lsn,
2054 &iip->ili_item.li_lsn);
2055 xfs_iflags_set(iip->ili_inode, XFS_ISTALE);
2056 pre_flushed++;
2057 } 2039 }
2058 lip = lip->li_bio_list;
2059 }
2060 2040
2061 for (i = 0; i < found; i++) {
2062 ip = ip_found[i];
2063 iip = ip->i_itemp; 2041 iip = ip->i_itemp;
2064
2065 if (!iip) { 2042 if (!iip) {
2043 /* inode with unlogged changes only */
2044 ASSERT(ip != free_ip);
2066 ip->i_update_core = 0; 2045 ip->i_update_core = 0;
2067 xfs_ifunlock(ip); 2046 xfs_ifunlock(ip);
2068 xfs_iunlock(ip, XFS_ILOCK_EXCL); 2047 xfs_iunlock(ip, XFS_ILOCK_EXCL);
2069 continue; 2048 continue;
2070 } 2049 }
2050 found++;
2071 2051
2072 iip->ili_last_fields = iip->ili_format.ilf_fields; 2052 iip->ili_last_fields = iip->ili_format.ilf_fields;
2073 iip->ili_format.ilf_fields = 0; 2053 iip->ili_format.ilf_fields = 0;
@@ -2078,17 +2058,16 @@ xfs_ifree_cluster(
2078 xfs_buf_attach_iodone(bp, 2058 xfs_buf_attach_iodone(bp,
2079 (void(*)(xfs_buf_t*,xfs_log_item_t*)) 2059 (void(*)(xfs_buf_t*,xfs_log_item_t*))
2080 xfs_istale_done, (xfs_log_item_t *)iip); 2060 xfs_istale_done, (xfs_log_item_t *)iip);
2081 if (ip != free_ip) { 2061
2062 if (ip != free_ip)
2082 xfs_iunlock(ip, XFS_ILOCK_EXCL); 2063 xfs_iunlock(ip, XFS_ILOCK_EXCL);
2083 }
2084 } 2064 }
2085 2065
2086 if (found || pre_flushed) 2066 if (found)
2087 xfs_trans_stale_inode_buf(tp, bp); 2067 xfs_trans_stale_inode_buf(tp, bp);
2088 xfs_trans_binval(tp, bp); 2068 xfs_trans_binval(tp, bp);
2089 } 2069 }
2090 2070
2091 kmem_free(ip_found);
2092 xfs_perag_put(pag); 2071 xfs_perag_put(pag);
2093} 2072}
2094 2073
@@ -2649,8 +2628,6 @@ xfs_iflush_cluster(
2649 int i; 2628 int i;
2650 2629
2651 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); 2630 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
2652 ASSERT(pag->pagi_inodeok);
2653 ASSERT(pag->pag_ici_init);
2654 2631
2655 inodes_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog; 2632 inodes_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog;
2656 ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *); 2633 ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *);
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 9965e40a4615..78550df13cd6 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -442,7 +442,7 @@ static inline void xfs_ifunlock(xfs_inode_t *ip)
442 * xfs_iget.c prototypes. 442 * xfs_iget.c prototypes.
443 */ 443 */
444int xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, 444int xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
445 uint, uint, xfs_inode_t **, xfs_daddr_t); 445 uint, uint, xfs_inode_t **);
446void xfs_iput(xfs_inode_t *, uint); 446void xfs_iput(xfs_inode_t *, uint);
447void xfs_iput_new(xfs_inode_t *, uint); 447void xfs_iput_new(xfs_inode_t *, uint);
448void xfs_ilock(xfs_inode_t *, uint); 448void xfs_ilock(xfs_inode_t *, uint);
@@ -500,7 +500,7 @@ do { \
500 * Flags for xfs_iget() 500 * Flags for xfs_iget()
501 */ 501 */
502#define XFS_IGET_CREATE 0x1 502#define XFS_IGET_CREATE 0x1
503#define XFS_IGET_BULKSTAT 0x2 503#define XFS_IGET_UNTRUSTED 0x2
504 504
505int xfs_inotobp(struct xfs_mount *, struct xfs_trans *, 505int xfs_inotobp(struct xfs_mount *, struct xfs_trans *,
506 xfs_ino_t, struct xfs_dinode **, 506 xfs_ino_t, struct xfs_dinode **,
@@ -509,7 +509,7 @@ int xfs_itobp(struct xfs_mount *, struct xfs_trans *,
509 struct xfs_inode *, struct xfs_dinode **, 509 struct xfs_inode *, struct xfs_dinode **,
510 struct xfs_buf **, uint); 510 struct xfs_buf **, uint);
511int xfs_iread(struct xfs_mount *, struct xfs_trans *, 511int xfs_iread(struct xfs_mount *, struct xfs_trans *,
512 struct xfs_inode *, xfs_daddr_t, uint); 512 struct xfs_inode *, uint);
513void xfs_dinode_to_disk(struct xfs_dinode *, 513void xfs_dinode_to_disk(struct xfs_dinode *,
514 struct xfs_icdinode *); 514 struct xfs_icdinode *);
515void xfs_idestroy_fork(struct xfs_inode *, int); 515void xfs_idestroy_fork(struct xfs_inode *, int);
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index b1b801e4a28e..2b86f8610512 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -49,24 +49,40 @@ xfs_internal_inum(
49 (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino))); 49 (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino)));
50} 50}
51 51
52STATIC int 52/*
53xfs_bulkstat_one_iget( 53 * Return stat information for one inode.
54 xfs_mount_t *mp, /* mount point for filesystem */ 54 * Return 0 if ok, else errno.
55 xfs_ino_t ino, /* inode number to get data for */ 55 */
56 xfs_daddr_t bno, /* starting bno of inode cluster */ 56int
57 xfs_bstat_t *buf, /* return buffer */ 57xfs_bulkstat_one_int(
58 int *stat) /* BULKSTAT_RV_... */ 58 struct xfs_mount *mp, /* mount point for filesystem */
59 xfs_ino_t ino, /* inode to get data for */
60 void __user *buffer, /* buffer to place output in */
61 int ubsize, /* size of buffer */
62 bulkstat_one_fmt_pf formatter, /* formatter, copy to user */
63 int *ubused, /* bytes used by me */
64 int *stat) /* BULKSTAT_RV_... */
59{ 65{
60 xfs_icdinode_t *dic; /* dinode core info pointer */ 66 struct xfs_icdinode *dic; /* dinode core info pointer */
61 xfs_inode_t *ip; /* incore inode pointer */ 67 struct xfs_inode *ip; /* incore inode pointer */
62 struct inode *inode; 68 struct inode *inode;
63 int error; 69 struct xfs_bstat *buf; /* return buffer */
70 int error = 0; /* error value */
71
72 *stat = BULKSTAT_RV_NOTHING;
73
74 if (!buffer || xfs_internal_inum(mp, ino))
75 return XFS_ERROR(EINVAL);
76
77 buf = kmem_alloc(sizeof(*buf), KM_SLEEP | KM_MAYFAIL);
78 if (!buf)
79 return XFS_ERROR(ENOMEM);
64 80
65 error = xfs_iget(mp, NULL, ino, 81 error = xfs_iget(mp, NULL, ino,
66 XFS_IGET_BULKSTAT, XFS_ILOCK_SHARED, &ip, bno); 82 XFS_IGET_UNTRUSTED, XFS_ILOCK_SHARED, &ip);
67 if (error) { 83 if (error) {
68 *stat = BULKSTAT_RV_NOTHING; 84 *stat = BULKSTAT_RV_NOTHING;
69 return error; 85 goto out_free;
70 } 86 }
71 87
72 ASSERT(ip != NULL); 88 ASSERT(ip != NULL);
@@ -127,77 +143,16 @@ xfs_bulkstat_one_iget(
127 buf->bs_blocks = dic->di_nblocks + ip->i_delayed_blks; 143 buf->bs_blocks = dic->di_nblocks + ip->i_delayed_blks;
128 break; 144 break;
129 } 145 }
130
131 xfs_iput(ip, XFS_ILOCK_SHARED); 146 xfs_iput(ip, XFS_ILOCK_SHARED);
132 return error;
133}
134 147
135STATIC void 148 error = formatter(buffer, ubsize, ubused, buf);
136xfs_bulkstat_one_dinode(
137 xfs_mount_t *mp, /* mount point for filesystem */
138 xfs_ino_t ino, /* inode number to get data for */
139 xfs_dinode_t *dic, /* dinode inode pointer */
140 xfs_bstat_t *buf) /* return buffer */
141{
142 /*
143 * The inode format changed when we moved the link count and
144 * made it 32 bits long. If this is an old format inode,
145 * convert it in memory to look like a new one. If it gets
146 * flushed to disk we will convert back before flushing or
147 * logging it. We zero out the new projid field and the old link
148 * count field. We'll handle clearing the pad field (the remains
149 * of the old uuid field) when we actually convert the inode to
150 * the new format. We don't change the version number so that we
151 * can distinguish this from a real new format inode.
152 */
153 if (dic->di_version == 1) {
154 buf->bs_nlink = be16_to_cpu(dic->di_onlink);
155 buf->bs_projid = 0;
156 } else {
157 buf->bs_nlink = be32_to_cpu(dic->di_nlink);
158 buf->bs_projid = be16_to_cpu(dic->di_projid);
159 }
160 149
161 buf->bs_ino = ino; 150 if (!error)
162 buf->bs_mode = be16_to_cpu(dic->di_mode); 151 *stat = BULKSTAT_RV_DIDONE;
163 buf->bs_uid = be32_to_cpu(dic->di_uid);
164 buf->bs_gid = be32_to_cpu(dic->di_gid);
165 buf->bs_size = be64_to_cpu(dic->di_size);
166 buf->bs_atime.tv_sec = be32_to_cpu(dic->di_atime.t_sec);
167 buf->bs_atime.tv_nsec = be32_to_cpu(dic->di_atime.t_nsec);
168 buf->bs_mtime.tv_sec = be32_to_cpu(dic->di_mtime.t_sec);
169 buf->bs_mtime.tv_nsec = be32_to_cpu(dic->di_mtime.t_nsec);
170 buf->bs_ctime.tv_sec = be32_to_cpu(dic->di_ctime.t_sec);
171 buf->bs_ctime.tv_nsec = be32_to_cpu(dic->di_ctime.t_nsec);
172 buf->bs_xflags = xfs_dic2xflags(dic);
173 buf->bs_extsize = be32_to_cpu(dic->di_extsize) << mp->m_sb.sb_blocklog;
174 buf->bs_extents = be32_to_cpu(dic->di_nextents);
175 buf->bs_gen = be32_to_cpu(dic->di_gen);
176 memset(buf->bs_pad, 0, sizeof(buf->bs_pad));
177 buf->bs_dmevmask = be32_to_cpu(dic->di_dmevmask);
178 buf->bs_dmstate = be16_to_cpu(dic->di_dmstate);
179 buf->bs_aextents = be16_to_cpu(dic->di_anextents);
180 buf->bs_forkoff = XFS_DFORK_BOFF(dic);
181 152
182 switch (dic->di_format) { 153 out_free:
183 case XFS_DINODE_FMT_DEV: 154 kmem_free(buf);
184 buf->bs_rdev = xfs_dinode_get_rdev(dic); 155 return error;
185 buf->bs_blksize = BLKDEV_IOSIZE;
186 buf->bs_blocks = 0;
187 break;
188 case XFS_DINODE_FMT_LOCAL:
189 case XFS_DINODE_FMT_UUID:
190 buf->bs_rdev = 0;
191 buf->bs_blksize = mp->m_sb.sb_blocksize;
192 buf->bs_blocks = 0;
193 break;
194 case XFS_DINODE_FMT_EXTENTS:
195 case XFS_DINODE_FMT_BTREE:
196 buf->bs_rdev = 0;
197 buf->bs_blksize = mp->m_sb.sb_blocksize;
198 buf->bs_blocks = be64_to_cpu(dic->di_nblocks);
199 break;
200 }
201} 156}
202 157
203/* Return 0 on success or positive error */ 158/* Return 0 on success or positive error */
@@ -217,118 +172,17 @@ xfs_bulkstat_one_fmt(
217 return 0; 172 return 0;
218} 173}
219 174
220/*
221 * Return stat information for one inode.
222 * Return 0 if ok, else errno.
223 */
224int /* error status */
225xfs_bulkstat_one_int(
226 xfs_mount_t *mp, /* mount point for filesystem */
227 xfs_ino_t ino, /* inode number to get data for */
228 void __user *buffer, /* buffer to place output in */
229 int ubsize, /* size of buffer */
230 bulkstat_one_fmt_pf formatter, /* formatter, copy to user */
231 xfs_daddr_t bno, /* starting bno of inode cluster */
232 int *ubused, /* bytes used by me */
233 void *dibuff, /* on-disk inode buffer */
234 int *stat) /* BULKSTAT_RV_... */
235{
236 xfs_bstat_t *buf; /* return buffer */
237 int error = 0; /* error value */
238 xfs_dinode_t *dip; /* dinode inode pointer */
239
240 dip = (xfs_dinode_t *)dibuff;
241 *stat = BULKSTAT_RV_NOTHING;
242
243 if (!buffer || xfs_internal_inum(mp, ino))
244 return XFS_ERROR(EINVAL);
245
246 buf = kmem_alloc(sizeof(*buf), KM_SLEEP);
247
248 if (dip == NULL) {
249 /* We're not being passed a pointer to a dinode. This happens
250 * if BULKSTAT_FG_IGET is selected. Do the iget.
251 */
252 error = xfs_bulkstat_one_iget(mp, ino, bno, buf, stat);
253 if (error)
254 goto out_free;
255 } else {
256 xfs_bulkstat_one_dinode(mp, ino, dip, buf);
257 }
258
259 error = formatter(buffer, ubsize, ubused, buf);
260 if (error)
261 goto out_free;
262
263 *stat = BULKSTAT_RV_DIDONE;
264
265 out_free:
266 kmem_free(buf);
267 return error;
268}
269
270int 175int
271xfs_bulkstat_one( 176xfs_bulkstat_one(
272 xfs_mount_t *mp, /* mount point for filesystem */ 177 xfs_mount_t *mp, /* mount point for filesystem */
273 xfs_ino_t ino, /* inode number to get data for */ 178 xfs_ino_t ino, /* inode number to get data for */
274 void __user *buffer, /* buffer to place output in */ 179 void __user *buffer, /* buffer to place output in */
275 int ubsize, /* size of buffer */ 180 int ubsize, /* size of buffer */
276 void *private_data, /* my private data */
277 xfs_daddr_t bno, /* starting bno of inode cluster */
278 int *ubused, /* bytes used by me */ 181 int *ubused, /* bytes used by me */
279 void *dibuff, /* on-disk inode buffer */
280 int *stat) /* BULKSTAT_RV_... */ 182 int *stat) /* BULKSTAT_RV_... */
281{ 183{
282 return xfs_bulkstat_one_int(mp, ino, buffer, ubsize, 184 return xfs_bulkstat_one_int(mp, ino, buffer, ubsize,
283 xfs_bulkstat_one_fmt, bno, 185 xfs_bulkstat_one_fmt, ubused, stat);
284 ubused, dibuff, stat);
285}
286
287/*
288 * Test to see whether we can use the ondisk inode directly, based
289 * on the given bulkstat flags, filling in dipp accordingly.
290 * Returns zero if the inode is dodgey.
291 */
292STATIC int
293xfs_bulkstat_use_dinode(
294 xfs_mount_t *mp,
295 int flags,
296 xfs_buf_t *bp,
297 int clustidx,
298 xfs_dinode_t **dipp)
299{
300 xfs_dinode_t *dip;
301 unsigned int aformat;
302
303 *dipp = NULL;
304 if (!bp || (flags & BULKSTAT_FG_IGET))
305 return 1;
306 dip = (xfs_dinode_t *)
307 xfs_buf_offset(bp, clustidx << mp->m_sb.sb_inodelog);
308 /*
309 * Check the buffer containing the on-disk inode for di_mode == 0.
310 * This is to prevent xfs_bulkstat from picking up just reclaimed
311 * inodes that have their in-core state initialized but not flushed
312 * to disk yet. This is a temporary hack that would require a proper
313 * fix in the future.
314 */
315 if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC ||
316 !XFS_DINODE_GOOD_VERSION(dip->di_version) ||
317 !dip->di_mode)
318 return 0;
319 if (flags & BULKSTAT_FG_QUICK) {
320 *dipp = dip;
321 return 1;
322 }
323 /* BULKSTAT_FG_INLINE: if attr fork is local, or not there, use it */
324 aformat = dip->di_aformat;
325 if ((XFS_DFORK_Q(dip) == 0) ||
326 (aformat == XFS_DINODE_FMT_LOCAL) ||
327 (aformat == XFS_DINODE_FMT_EXTENTS && !dip->di_anextents)) {
328 *dipp = dip;
329 return 1;
330 }
331 return 1;
332} 186}
333 187
334#define XFS_BULKSTAT_UBLEFT(ubleft) ((ubleft) >= statstruct_size) 188#define XFS_BULKSTAT_UBLEFT(ubleft) ((ubleft) >= statstruct_size)
@@ -342,10 +196,8 @@ xfs_bulkstat(
342 xfs_ino_t *lastinop, /* last inode returned */ 196 xfs_ino_t *lastinop, /* last inode returned */
343 int *ubcountp, /* size of buffer/count returned */ 197 int *ubcountp, /* size of buffer/count returned */
344 bulkstat_one_pf formatter, /* func that'd fill a single buf */ 198 bulkstat_one_pf formatter, /* func that'd fill a single buf */
345 void *private_data,/* private data for formatter */
346 size_t statstruct_size, /* sizeof struct filling */ 199 size_t statstruct_size, /* sizeof struct filling */
347 char __user *ubuffer, /* buffer with inode stats */ 200 char __user *ubuffer, /* buffer with inode stats */
348 int flags, /* defined in xfs_itable.h */
349 int *done) /* 1 if there are more stats to get */ 201 int *done) /* 1 if there are more stats to get */
350{ 202{
351 xfs_agblock_t agbno=0;/* allocation group block number */ 203 xfs_agblock_t agbno=0;/* allocation group block number */
@@ -380,14 +232,12 @@ xfs_bulkstat(
380 int ubelem; /* spaces used in user's buffer */ 232 int ubelem; /* spaces used in user's buffer */
381 int ubused; /* bytes used by formatter */ 233 int ubused; /* bytes used by formatter */
382 xfs_buf_t *bp; /* ptr to on-disk inode cluster buf */ 234 xfs_buf_t *bp; /* ptr to on-disk inode cluster buf */
383 xfs_dinode_t *dip; /* ptr into bp for specific inode */
384 235
385 /* 236 /*
386 * Get the last inode value, see if there's nothing to do. 237 * Get the last inode value, see if there's nothing to do.
387 */ 238 */
388 ino = (xfs_ino_t)*lastinop; 239 ino = (xfs_ino_t)*lastinop;
389 lastino = ino; 240 lastino = ino;
390 dip = NULL;
391 agno = XFS_INO_TO_AGNO(mp, ino); 241 agno = XFS_INO_TO_AGNO(mp, ino);
392 agino = XFS_INO_TO_AGINO(mp, ino); 242 agino = XFS_INO_TO_AGINO(mp, ino);
393 if (agno >= mp->m_sb.sb_agcount || 243 if (agno >= mp->m_sb.sb_agcount ||
@@ -612,37 +462,6 @@ xfs_bulkstat(
612 irbp->ir_startino) + 462 irbp->ir_startino) +
613 ((chunkidx & nimask) >> 463 ((chunkidx & nimask) >>
614 mp->m_sb.sb_inopblog); 464 mp->m_sb.sb_inopblog);
615
616 if (flags & (BULKSTAT_FG_QUICK |
617 BULKSTAT_FG_INLINE)) {
618 int offset;
619
620 ino = XFS_AGINO_TO_INO(mp, agno,
621 agino);
622 bno = XFS_AGB_TO_DADDR(mp, agno,
623 agbno);
624
625 /*
626 * Get the inode cluster buffer
627 */
628 if (bp)
629 xfs_buf_relse(bp);
630
631 error = xfs_inotobp(mp, NULL, ino, &dip,
632 &bp, &offset,
633 XFS_IGET_BULKSTAT);
634
635 if (!error)
636 clustidx = offset / mp->m_sb.sb_inodesize;
637 if (XFS_TEST_ERROR(error != 0,
638 mp, XFS_ERRTAG_BULKSTAT_READ_CHUNK,
639 XFS_RANDOM_BULKSTAT_READ_CHUNK)) {
640 bp = NULL;
641 ubleft = 0;
642 rval = error;
643 break;
644 }
645 }
646 } 465 }
647 ino = XFS_AGINO_TO_INO(mp, agno, agino); 466 ino = XFS_AGINO_TO_INO(mp, agno, agino);
648 bno = XFS_AGB_TO_DADDR(mp, agno, agbno); 467 bno = XFS_AGB_TO_DADDR(mp, agno, agbno);
@@ -658,35 +477,13 @@ xfs_bulkstat(
658 * when the chunk is used up. 477 * when the chunk is used up.
659 */ 478 */
660 irbp->ir_freecount++; 479 irbp->ir_freecount++;
661 if (!xfs_bulkstat_use_dinode(mp, flags, bp,
662 clustidx, &dip)) {
663 lastino = ino;
664 continue;
665 }
666 /*
667 * If we need to do an iget, cannot hold bp.
668 * Drop it, until starting the next cluster.
669 */
670 if ((flags & BULKSTAT_FG_INLINE) && !dip) {
671 if (bp)
672 xfs_buf_relse(bp);
673 bp = NULL;
674 }
675 480
676 /* 481 /*
677 * Get the inode and fill in a single buffer. 482 * Get the inode and fill in a single buffer.
678 * BULKSTAT_FG_QUICK uses dip to fill it in.
679 * BULKSTAT_FG_IGET uses igets.
680 * BULKSTAT_FG_INLINE uses dip if we have an
681 * inline attr fork, else igets.
682 * See: xfs_bulkstat_one & xfs_dm_bulkstat_one.
683 * This is also used to count inodes/blks, etc
684 * in xfs_qm_quotacheck.
685 */ 483 */
686 ubused = statstruct_size; 484 ubused = statstruct_size;
687 error = formatter(mp, ino, ubufp, 485 error = formatter(mp, ino, ubufp, ubleft,
688 ubleft, private_data, 486 &ubused, &fmterror);
689 bno, &ubused, dip, &fmterror);
690 if (fmterror == BULKSTAT_RV_NOTHING) { 487 if (fmterror == BULKSTAT_RV_NOTHING) {
691 if (error && error != ENOENT && 488 if (error && error != ENOENT &&
692 error != EINVAL) { 489 error != EINVAL) {
@@ -778,8 +575,7 @@ xfs_bulkstat_single(
778 */ 575 */
779 576
780 ino = (xfs_ino_t)*lastinop; 577 ino = (xfs_ino_t)*lastinop;
781 error = xfs_bulkstat_one(mp, ino, buffer, sizeof(xfs_bstat_t), 578 error = xfs_bulkstat_one(mp, ino, buffer, sizeof(xfs_bstat_t), 0, &res);
782 NULL, 0, NULL, NULL, &res);
783 if (error) { 579 if (error) {
784 /* 580 /*
785 * Special case way failed, do it the "long" way 581 * Special case way failed, do it the "long" way
@@ -788,8 +584,7 @@ xfs_bulkstat_single(
788 (*lastinop)--; 584 (*lastinop)--;
789 count = 1; 585 count = 1;
790 if (xfs_bulkstat(mp, lastinop, &count, xfs_bulkstat_one, 586 if (xfs_bulkstat(mp, lastinop, &count, xfs_bulkstat_one,
791 NULL, sizeof(xfs_bstat_t), buffer, 587 sizeof(xfs_bstat_t), buffer, done))
792 BULKSTAT_FG_IGET, done))
793 return error; 588 return error;
794 if (count == 0 || (xfs_ino_t)*lastinop != ino) 589 if (count == 0 || (xfs_ino_t)*lastinop != ino)
795 return error == EFSCORRUPTED ? 590 return error == EFSCORRUPTED ?
diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h
index 20792bf45946..97295d91d170 100644
--- a/fs/xfs/xfs_itable.h
+++ b/fs/xfs/xfs_itable.h
@@ -27,10 +27,7 @@ typedef int (*bulkstat_one_pf)(struct xfs_mount *mp,
27 xfs_ino_t ino, 27 xfs_ino_t ino,
28 void __user *buffer, 28 void __user *buffer,
29 int ubsize, 29 int ubsize,
30 void *private_data,
31 xfs_daddr_t bno,
32 int *ubused, 30 int *ubused,
33 void *dip,
34 int *stat); 31 int *stat);
35 32
36/* 33/*
@@ -41,13 +38,6 @@ typedef int (*bulkstat_one_pf)(struct xfs_mount *mp,
41#define BULKSTAT_RV_GIVEUP 2 38#define BULKSTAT_RV_GIVEUP 2
42 39
43/* 40/*
44 * Values for bulkstat flag argument.
45 */
46#define BULKSTAT_FG_IGET 0x1 /* Go through the buffer cache */
47#define BULKSTAT_FG_QUICK 0x2 /* No iget, walk the dinode cluster */
48#define BULKSTAT_FG_INLINE 0x4 /* No iget if inline attrs */
49
50/*
51 * Return stat information in bulk (by-inode) for the filesystem. 41 * Return stat information in bulk (by-inode) for the filesystem.
52 */ 42 */
53int /* error status */ 43int /* error status */
@@ -56,10 +46,8 @@ xfs_bulkstat(
56 xfs_ino_t *lastino, /* last inode returned */ 46 xfs_ino_t *lastino, /* last inode returned */
57 int *count, /* size of buffer/count returned */ 47 int *count, /* size of buffer/count returned */
58 bulkstat_one_pf formatter, /* func that'd fill a single buf */ 48 bulkstat_one_pf formatter, /* func that'd fill a single buf */
59 void *private_data, /* private data for formatter */
60 size_t statstruct_size,/* sizeof struct that we're filling */ 49 size_t statstruct_size,/* sizeof struct that we're filling */
61 char __user *ubuffer,/* buffer with inode stats */ 50 char __user *ubuffer,/* buffer with inode stats */
62 int flags, /* flag to control access method */
63 int *done); /* 1 if there are more stats to get */ 51 int *done); /* 1 if there are more stats to get */
64 52
65int 53int
@@ -82,9 +70,7 @@ xfs_bulkstat_one_int(
82 void __user *buffer, 70 void __user *buffer,
83 int ubsize, 71 int ubsize,
84 bulkstat_one_fmt_pf formatter, 72 bulkstat_one_fmt_pf formatter,
85 xfs_daddr_t bno,
86 int *ubused, 73 int *ubused,
87 void *dibuff,
88 int *stat); 74 int *stat);
89 75
90int 76int
@@ -93,10 +79,7 @@ xfs_bulkstat_one(
93 xfs_ino_t ino, 79 xfs_ino_t ino,
94 void __user *buffer, 80 void __user *buffer,
95 int ubsize, 81 int ubsize,
96 void *private_data,
97 xfs_daddr_t bno,
98 int *ubused, 82 int *ubused,
99 void *dibuff,
100 int *stat); 83 int *stat);
101 84
102typedef int (*inumbers_fmt_pf)( 85typedef int (*inumbers_fmt_pf)(
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 14a69aec2c0b..9ac5cfab27b9 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -132,15 +132,10 @@ xlog_align(
132 int nbblks, 132 int nbblks,
133 xfs_buf_t *bp) 133 xfs_buf_t *bp)
134{ 134{
135 xfs_daddr_t offset; 135 xfs_daddr_t offset = blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1);
136 xfs_caddr_t ptr;
137 136
138 offset = blk_no & ((xfs_daddr_t) log->l_sectBBsize - 1); 137 ASSERT(BBTOB(offset + nbblks) <= XFS_BUF_SIZE(bp));
139 ptr = XFS_BUF_PTR(bp) + BBTOB(offset); 138 return XFS_BUF_PTR(bp) + BBTOB(offset);
140
141 ASSERT(ptr + BBTOB(nbblks) <= XFS_BUF_PTR(bp) + XFS_BUF_SIZE(bp));
142
143 return ptr;
144} 139}
145 140
146 141
@@ -3203,7 +3198,7 @@ xlog_recover_process_one_iunlink(
3203 int error; 3198 int error;
3204 3199
3205 ino = XFS_AGINO_TO_INO(mp, agno, agino); 3200 ino = XFS_AGINO_TO_INO(mp, agno, agino);
3206 error = xfs_iget(mp, NULL, ino, 0, 0, &ip, 0); 3201 error = xfs_iget(mp, NULL, ino, 0, 0, &ip);
3207 if (error) 3202 if (error)
3208 goto fail; 3203 goto fail;
3209 3204
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index d7bf38c8cd1c..69f62d8b2816 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -268,10 +268,10 @@ xfs_sb_validate_fsb_count(
268 268
269#if XFS_BIG_BLKNOS /* Limited by ULONG_MAX of page cache index */ 269#if XFS_BIG_BLKNOS /* Limited by ULONG_MAX of page cache index */
270 if (nblocks >> (PAGE_CACHE_SHIFT - sbp->sb_blocklog) > ULONG_MAX) 270 if (nblocks >> (PAGE_CACHE_SHIFT - sbp->sb_blocklog) > ULONG_MAX)
271 return E2BIG; 271 return EFBIG;
272#else /* Limited by UINT_MAX of sectors */ 272#else /* Limited by UINT_MAX of sectors */
273 if (nblocks << (sbp->sb_blocklog - BBSHIFT) > UINT_MAX) 273 if (nblocks << (sbp->sb_blocklog - BBSHIFT) > UINT_MAX)
274 return E2BIG; 274 return EFBIG;
275#endif 275#endif
276 return 0; 276 return 0;
277} 277}
@@ -393,7 +393,7 @@ xfs_mount_validate_sb(
393 xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) { 393 xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) {
394 xfs_fs_mount_cmn_err(flags, 394 xfs_fs_mount_cmn_err(flags,
395 "file system too large to be mounted on this system."); 395 "file system too large to be mounted on this system.");
396 return XFS_ERROR(E2BIG); 396 return XFS_ERROR(EFBIG);
397 } 397 }
398 398
399 if (unlikely(sbp->sb_inprogress)) { 399 if (unlikely(sbp->sb_inprogress)) {
@@ -413,17 +413,6 @@ xfs_mount_validate_sb(
413 return 0; 413 return 0;
414} 414}
415 415
416STATIC void
417xfs_initialize_perag_icache(
418 xfs_perag_t *pag)
419{
420 if (!pag->pag_ici_init) {
421 rwlock_init(&pag->pag_ici_lock);
422 INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
423 pag->pag_ici_init = 1;
424 }
425}
426
427int 416int
428xfs_initialize_perag( 417xfs_initialize_perag(
429 xfs_mount_t *mp, 418 xfs_mount_t *mp,
@@ -436,13 +425,8 @@ xfs_initialize_perag(
436 xfs_agino_t agino; 425 xfs_agino_t agino;
437 xfs_ino_t ino; 426 xfs_ino_t ino;
438 xfs_sb_t *sbp = &mp->m_sb; 427 xfs_sb_t *sbp = &mp->m_sb;
439 xfs_ino_t max_inum = XFS_MAXINUMBER_32;
440 int error = -ENOMEM; 428 int error = -ENOMEM;
441 429
442 /* Check to see if the filesystem can overflow 32 bit inodes */
443 agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0);
444 ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
445
446 /* 430 /*
447 * Walk the current per-ag tree so we don't try to initialise AGs 431 * Walk the current per-ag tree so we don't try to initialise AGs
448 * that already exist (growfs case). Allocate and insert all the 432 * that already exist (growfs case). Allocate and insert all the
@@ -456,11 +440,18 @@ xfs_initialize_perag(
456 } 440 }
457 if (!first_initialised) 441 if (!first_initialised)
458 first_initialised = index; 442 first_initialised = index;
443
459 pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL); 444 pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL);
460 if (!pag) 445 if (!pag)
461 goto out_unwind; 446 goto out_unwind;
447 pag->pag_agno = index;
448 pag->pag_mount = mp;
449 rwlock_init(&pag->pag_ici_lock);
450 INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
451
462 if (radix_tree_preload(GFP_NOFS)) 452 if (radix_tree_preload(GFP_NOFS))
463 goto out_unwind; 453 goto out_unwind;
454
464 spin_lock(&mp->m_perag_lock); 455 spin_lock(&mp->m_perag_lock);
465 if (radix_tree_insert(&mp->m_perag_tree, index, pag)) { 456 if (radix_tree_insert(&mp->m_perag_tree, index, pag)) {
466 BUG(); 457 BUG();
@@ -469,25 +460,26 @@ xfs_initialize_perag(
469 error = -EEXIST; 460 error = -EEXIST;
470 goto out_unwind; 461 goto out_unwind;
471 } 462 }
472 pag->pag_agno = index;
473 pag->pag_mount = mp;
474 spin_unlock(&mp->m_perag_lock); 463 spin_unlock(&mp->m_perag_lock);
475 radix_tree_preload_end(); 464 radix_tree_preload_end();
476 } 465 }
477 466
478 /* Clear the mount flag if no inode can overflow 32 bits 467 /*
479 * on this filesystem, or if specifically requested.. 468 * If we mount with the inode64 option, or no inode overflows
469 * the legacy 32-bit address space clear the inode32 option.
480 */ 470 */
481 if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) && ino > max_inum) { 471 agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0);
472 ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
473
474 if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) && ino > XFS_MAXINUMBER_32)
482 mp->m_flags |= XFS_MOUNT_32BITINODES; 475 mp->m_flags |= XFS_MOUNT_32BITINODES;
483 } else { 476 else
484 mp->m_flags &= ~XFS_MOUNT_32BITINODES; 477 mp->m_flags &= ~XFS_MOUNT_32BITINODES;
485 }
486 478
487 /* If we can overflow then setup the ag headers accordingly */
488 if (mp->m_flags & XFS_MOUNT_32BITINODES) { 479 if (mp->m_flags & XFS_MOUNT_32BITINODES) {
489 /* Calculate how much should be reserved for inodes to 480 /*
490 * meet the max inode percentage. 481 * Calculate how much should be reserved for inodes to meet
482 * the max inode percentage.
491 */ 483 */
492 if (mp->m_maxicount) { 484 if (mp->m_maxicount) {
493 __uint64_t icount; 485 __uint64_t icount;
@@ -500,30 +492,28 @@ xfs_initialize_perag(
500 } else { 492 } else {
501 max_metadata = agcount; 493 max_metadata = agcount;
502 } 494 }
495
503 for (index = 0; index < agcount; index++) { 496 for (index = 0; index < agcount; index++) {
504 ino = XFS_AGINO_TO_INO(mp, index, agino); 497 ino = XFS_AGINO_TO_INO(mp, index, agino);
505 if (ino > max_inum) { 498 if (ino > XFS_MAXINUMBER_32) {
506 index++; 499 index++;
507 break; 500 break;
508 } 501 }
509 502
510 /* This ag is preferred for inodes */
511 pag = xfs_perag_get(mp, index); 503 pag = xfs_perag_get(mp, index);
512 pag->pagi_inodeok = 1; 504 pag->pagi_inodeok = 1;
513 if (index < max_metadata) 505 if (index < max_metadata)
514 pag->pagf_metadata = 1; 506 pag->pagf_metadata = 1;
515 xfs_initialize_perag_icache(pag);
516 xfs_perag_put(pag); 507 xfs_perag_put(pag);
517 } 508 }
518 } else { 509 } else {
519 /* Setup default behavior for smaller filesystems */
520 for (index = 0; index < agcount; index++) { 510 for (index = 0; index < agcount; index++) {
521 pag = xfs_perag_get(mp, index); 511 pag = xfs_perag_get(mp, index);
522 pag->pagi_inodeok = 1; 512 pag->pagi_inodeok = 1;
523 xfs_initialize_perag_icache(pag);
524 xfs_perag_put(pag); 513 xfs_perag_put(pag);
525 } 514 }
526 } 515 }
516
527 if (maxagi) 517 if (maxagi)
528 *maxagi = index; 518 *maxagi = index;
529 return 0; 519 return 0;
@@ -1009,7 +999,7 @@ xfs_check_sizes(xfs_mount_t *mp)
1009 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); 999 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
1010 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) { 1000 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) {
1011 cmn_err(CE_WARN, "XFS: size check 1 failed"); 1001 cmn_err(CE_WARN, "XFS: size check 1 failed");
1012 return XFS_ERROR(E2BIG); 1002 return XFS_ERROR(EFBIG);
1013 } 1003 }
1014 error = xfs_read_buf(mp, mp->m_ddev_targp, 1004 error = xfs_read_buf(mp, mp->m_ddev_targp,
1015 d - XFS_FSS_TO_BB(mp, 1), 1005 d - XFS_FSS_TO_BB(mp, 1),
@@ -1019,7 +1009,7 @@ xfs_check_sizes(xfs_mount_t *mp)
1019 } else { 1009 } else {
1020 cmn_err(CE_WARN, "XFS: size check 2 failed"); 1010 cmn_err(CE_WARN, "XFS: size check 2 failed");
1021 if (error == ENOSPC) 1011 if (error == ENOSPC)
1022 error = XFS_ERROR(E2BIG); 1012 error = XFS_ERROR(EFBIG);
1023 return error; 1013 return error;
1024 } 1014 }
1025 1015
@@ -1027,7 +1017,7 @@ xfs_check_sizes(xfs_mount_t *mp)
1027 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); 1017 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
1028 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) { 1018 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) {
1029 cmn_err(CE_WARN, "XFS: size check 3 failed"); 1019 cmn_err(CE_WARN, "XFS: size check 3 failed");
1030 return XFS_ERROR(E2BIG); 1020 return XFS_ERROR(EFBIG);
1031 } 1021 }
1032 error = xfs_read_buf(mp, mp->m_logdev_targp, 1022 error = xfs_read_buf(mp, mp->m_logdev_targp,
1033 d - XFS_FSB_TO_BB(mp, 1), 1023 d - XFS_FSB_TO_BB(mp, 1),
@@ -1037,7 +1027,7 @@ xfs_check_sizes(xfs_mount_t *mp)
1037 } else { 1027 } else {
1038 cmn_err(CE_WARN, "XFS: size check 3 failed"); 1028 cmn_err(CE_WARN, "XFS: size check 3 failed");
1039 if (error == ENOSPC) 1029 if (error == ENOSPC)
1040 error = XFS_ERROR(E2BIG); 1030 error = XFS_ERROR(EFBIG);
1041 return error; 1031 return error;
1042 } 1032 }
1043 } 1033 }
@@ -1254,7 +1244,7 @@ xfs_mountfs(
1254 * Allocate and initialize the per-ag data. 1244 * Allocate and initialize the per-ag data.
1255 */ 1245 */
1256 spin_lock_init(&mp->m_perag_lock); 1246 spin_lock_init(&mp->m_perag_lock);
1257 INIT_RADIX_TREE(&mp->m_perag_tree, GFP_NOFS); 1247 INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC);
1258 error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi); 1248 error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi);
1259 if (error) { 1249 if (error) {
1260 cmn_err(CE_WARN, "XFS: Failed per-ag init: %d", error); 1250 cmn_err(CE_WARN, "XFS: Failed per-ag init: %d", error);
@@ -1310,7 +1300,7 @@ xfs_mountfs(
1310 * Get and sanity-check the root inode. 1300 * Get and sanity-check the root inode.
1311 * Save the pointer to it in the mount structure. 1301 * Save the pointer to it in the mount structure.
1312 */ 1302 */
1313 error = xfs_iget(mp, NULL, sbp->sb_rootino, 0, XFS_ILOCK_EXCL, &rip, 0); 1303 error = xfs_iget(mp, NULL, sbp->sb_rootino, 0, XFS_ILOCK_EXCL, &rip);
1314 if (error) { 1304 if (error) {
1315 cmn_err(CE_WARN, "XFS: failed to read root inode"); 1305 cmn_err(CE_WARN, "XFS: failed to read root inode");
1316 goto out_log_dealloc; 1306 goto out_log_dealloc;
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 6be05f756d59..a2d32ce335aa 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -2247,7 +2247,7 @@ xfs_rtmount_init(
2247 cmn_err(CE_WARN, "XFS: realtime mount -- %llu != %llu", 2247 cmn_err(CE_WARN, "XFS: realtime mount -- %llu != %llu",
2248 (unsigned long long) XFS_BB_TO_FSB(mp, d), 2248 (unsigned long long) XFS_BB_TO_FSB(mp, d),
2249 (unsigned long long) mp->m_sb.sb_rblocks); 2249 (unsigned long long) mp->m_sb.sb_rblocks);
2250 return XFS_ERROR(E2BIG); 2250 return XFS_ERROR(EFBIG);
2251 } 2251 }
2252 error = xfs_read_buf(mp, mp->m_rtdev_targp, 2252 error = xfs_read_buf(mp, mp->m_rtdev_targp,
2253 d - XFS_FSB_TO_BB(mp, 1), 2253 d - XFS_FSB_TO_BB(mp, 1),
@@ -2256,7 +2256,7 @@ xfs_rtmount_init(
2256 cmn_err(CE_WARN, 2256 cmn_err(CE_WARN,
2257 "XFS: realtime mount -- xfs_read_buf failed, returned %d", error); 2257 "XFS: realtime mount -- xfs_read_buf failed, returned %d", error);
2258 if (error == ENOSPC) 2258 if (error == ENOSPC)
2259 return XFS_ERROR(E2BIG); 2259 return XFS_ERROR(EFBIG);
2260 return error; 2260 return error;
2261 } 2261 }
2262 xfs_buf_relse(bp); 2262 xfs_buf_relse(bp);
@@ -2277,12 +2277,12 @@ xfs_rtmount_inodes(
2277 sbp = &mp->m_sb; 2277 sbp = &mp->m_sb;
2278 if (sbp->sb_rbmino == NULLFSINO) 2278 if (sbp->sb_rbmino == NULLFSINO)
2279 return 0; 2279 return 0;
2280 error = xfs_iget(mp, NULL, sbp->sb_rbmino, 0, 0, &mp->m_rbmip, 0); 2280 error = xfs_iget(mp, NULL, sbp->sb_rbmino, 0, 0, &mp->m_rbmip);
2281 if (error) 2281 if (error)
2282 return error; 2282 return error;
2283 ASSERT(mp->m_rbmip != NULL); 2283 ASSERT(mp->m_rbmip != NULL);
2284 ASSERT(sbp->sb_rsumino != NULLFSINO); 2284 ASSERT(sbp->sb_rsumino != NULLFSINO);
2285 error = xfs_iget(mp, NULL, sbp->sb_rsumino, 0, 0, &mp->m_rsumip, 0); 2285 error = xfs_iget(mp, NULL, sbp->sb_rsumino, 0, 0, &mp->m_rsumip);
2286 if (error) { 2286 if (error) {
2287 IRELE(mp->m_rbmip); 2287 IRELE(mp->m_rbmip);
2288 return error; 2288 return error;
diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h
index b2d67adb6a08..ff614c29b441 100644
--- a/fs/xfs/xfs_rtalloc.h
+++ b/fs/xfs/xfs_rtalloc.h
@@ -147,7 +147,16 @@ xfs_growfs_rt(
147# define xfs_rtfree_extent(t,b,l) (ENOSYS) 147# define xfs_rtfree_extent(t,b,l) (ENOSYS)
148# define xfs_rtpick_extent(m,t,l,rb) (ENOSYS) 148# define xfs_rtpick_extent(m,t,l,rb) (ENOSYS)
149# define xfs_growfs_rt(mp,in) (ENOSYS) 149# define xfs_growfs_rt(mp,in) (ENOSYS)
150# define xfs_rtmount_init(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS)) 150static inline int /* error */
151xfs_rtmount_init(
152 xfs_mount_t *mp) /* file system mount structure */
153{
154 if (mp->m_sb.sb_rblocks == 0)
155 return 0;
156
157 cmn_err(CE_WARN, "XFS: Not built with CONFIG_XFS_RT");
158 return ENOSYS;
159}
151# define xfs_rtmount_inodes(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS)) 160# define xfs_rtmount_inodes(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS))
152# define xfs_rtunmount_inodes(m) 161# define xfs_rtunmount_inodes(m)
153#endif /* CONFIG_XFS_RT */ 162#endif /* CONFIG_XFS_RT */
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index ce558efa2ea0..28547dfce037 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -48,134 +48,489 @@
48 48
49kmem_zone_t *xfs_trans_zone; 49kmem_zone_t *xfs_trans_zone;
50 50
51
51/* 52/*
52 * Reservation functions here avoid a huge stack in xfs_trans_init 53 * Various log reservation values.
53 * due to register overflow from temporaries in the calculations. 54 *
55 * These are based on the size of the file system block because that is what
56 * most transactions manipulate. Each adds in an additional 128 bytes per
57 * item logged to try to account for the overhead of the transaction mechanism.
58 *
59 * Note: Most of the reservations underestimate the number of allocation
60 * groups into which they could free extents in the xfs_bmap_finish() call.
61 * This is because the number in the worst case is quite high and quite
62 * unusual. In order to fix this we need to change xfs_bmap_finish() to free
63 * extents in only a single AG at a time. This will require changes to the
64 * EFI code as well, however, so that the EFI for the extents not freed is
65 * logged again in each transaction. See SGI PV #261917.
66 *
67 * Reservation functions here avoid a huge stack in xfs_trans_init due to
68 * register overflow from temporaries in the calculations.
69 */
70
71
72/*
73 * In a write transaction we can allocate a maximum of 2
74 * extents. This gives:
75 * the inode getting the new extents: inode size
76 * the inode's bmap btree: max depth * block size
77 * the agfs of the ags from which the extents are allocated: 2 * sector
78 * the superblock free block counter: sector size
79 * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
80 * And the bmap_finish transaction can free bmap blocks in a join:
81 * the agfs of the ags containing the blocks: 2 * sector size
82 * the agfls of the ags containing the blocks: 2 * sector size
83 * the super block free block counter: sector size
84 * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
54 */ 85 */
55STATIC uint 86STATIC uint
56xfs_calc_write_reservation(xfs_mount_t *mp) 87xfs_calc_write_reservation(
88 struct xfs_mount *mp)
57{ 89{
58 return XFS_CALC_WRITE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); 90 return XFS_DQUOT_LOGRES(mp) +
91 MAX((mp->m_sb.sb_inodesize +
92 XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) +
93 2 * mp->m_sb.sb_sectsize +
94 mp->m_sb.sb_sectsize +
95 XFS_ALLOCFREE_LOG_RES(mp, 2) +
96 128 * (4 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) +
97 XFS_ALLOCFREE_LOG_COUNT(mp, 2))),
98 (2 * mp->m_sb.sb_sectsize +
99 2 * mp->m_sb.sb_sectsize +
100 mp->m_sb.sb_sectsize +
101 XFS_ALLOCFREE_LOG_RES(mp, 2) +
102 128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))));
59} 103}
60 104
105/*
106 * In truncating a file we free up to two extents at once. We can modify:
107 * the inode being truncated: inode size
108 * the inode's bmap btree: (max depth + 1) * block size
109 * And the bmap_finish transaction can free the blocks and bmap blocks:
110 * the agf for each of the ags: 4 * sector size
111 * the agfl for each of the ags: 4 * sector size
112 * the super block to reflect the freed blocks: sector size
113 * worst case split in allocation btrees per extent assuming 4 extents:
114 * 4 exts * 2 trees * (2 * max depth - 1) * block size
115 * the inode btree: max depth * blocksize
116 * the allocation btrees: 2 trees * (max depth - 1) * block size
117 */
61STATIC uint 118STATIC uint
62xfs_calc_itruncate_reservation(xfs_mount_t *mp) 119xfs_calc_itruncate_reservation(
120 struct xfs_mount *mp)
63{ 121{
64 return XFS_CALC_ITRUNCATE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); 122 return XFS_DQUOT_LOGRES(mp) +
123 MAX((mp->m_sb.sb_inodesize +
124 XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1) +
125 128 * (2 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK))),
126 (4 * mp->m_sb.sb_sectsize +
127 4 * mp->m_sb.sb_sectsize +
128 mp->m_sb.sb_sectsize +
129 XFS_ALLOCFREE_LOG_RES(mp, 4) +
130 128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4)) +
131 128 * 5 +
132 XFS_ALLOCFREE_LOG_RES(mp, 1) +
133 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels +
134 XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
65} 135}
66 136
137/*
138 * In renaming a files we can modify:
139 * the four inodes involved: 4 * inode size
140 * the two directory btrees: 2 * (max depth + v2) * dir block size
141 * the two directory bmap btrees: 2 * max depth * block size
142 * And the bmap_finish transaction can free dir and bmap blocks (two sets
143 * of bmap blocks) giving:
144 * the agf for the ags in which the blocks live: 3 * sector size
145 * the agfl for the ags in which the blocks live: 3 * sector size
146 * the superblock for the free block count: sector size
147 * the allocation btrees: 3 exts * 2 trees * (2 * max depth - 1) * block size
148 */
67STATIC uint 149STATIC uint
68xfs_calc_rename_reservation(xfs_mount_t *mp) 150xfs_calc_rename_reservation(
151 struct xfs_mount *mp)
69{ 152{
70 return XFS_CALC_RENAME_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); 153 return XFS_DQUOT_LOGRES(mp) +
154 MAX((4 * mp->m_sb.sb_inodesize +
155 2 * XFS_DIROP_LOG_RES(mp) +
156 128 * (4 + 2 * XFS_DIROP_LOG_COUNT(mp))),
157 (3 * mp->m_sb.sb_sectsize +
158 3 * mp->m_sb.sb_sectsize +
159 mp->m_sb.sb_sectsize +
160 XFS_ALLOCFREE_LOG_RES(mp, 3) +
161 128 * (7 + XFS_ALLOCFREE_LOG_COUNT(mp, 3))));
71} 162}
72 163
164/*
165 * For creating a link to an inode:
166 * the parent directory inode: inode size
167 * the linked inode: inode size
168 * the directory btree could split: (max depth + v2) * dir block size
169 * the directory bmap btree could join or split: (max depth + v2) * blocksize
170 * And the bmap_finish transaction can free some bmap blocks giving:
171 * the agf for the ag in which the blocks live: sector size
172 * the agfl for the ag in which the blocks live: sector size
173 * the superblock for the free block count: sector size
174 * the allocation btrees: 2 trees * (2 * max depth - 1) * block size
175 */
73STATIC uint 176STATIC uint
74xfs_calc_link_reservation(xfs_mount_t *mp) 177xfs_calc_link_reservation(
178 struct xfs_mount *mp)
75{ 179{
76 return XFS_CALC_LINK_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); 180 return XFS_DQUOT_LOGRES(mp) +
181 MAX((mp->m_sb.sb_inodesize +
182 mp->m_sb.sb_inodesize +
183 XFS_DIROP_LOG_RES(mp) +
184 128 * (2 + XFS_DIROP_LOG_COUNT(mp))),
185 (mp->m_sb.sb_sectsize +
186 mp->m_sb.sb_sectsize +
187 mp->m_sb.sb_sectsize +
188 XFS_ALLOCFREE_LOG_RES(mp, 1) +
189 128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
77} 190}
78 191
192/*
193 * For removing a directory entry we can modify:
194 * the parent directory inode: inode size
195 * the removed inode: inode size
196 * the directory btree could join: (max depth + v2) * dir block size
197 * the directory bmap btree could join or split: (max depth + v2) * blocksize
198 * And the bmap_finish transaction can free the dir and bmap blocks giving:
199 * the agf for the ag in which the blocks live: 2 * sector size
200 * the agfl for the ag in which the blocks live: 2 * sector size
201 * the superblock for the free block count: sector size
202 * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
203 */
79STATIC uint 204STATIC uint
80xfs_calc_remove_reservation(xfs_mount_t *mp) 205xfs_calc_remove_reservation(
206 struct xfs_mount *mp)
81{ 207{
82 return XFS_CALC_REMOVE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); 208 return XFS_DQUOT_LOGRES(mp) +
209 MAX((mp->m_sb.sb_inodesize +
210 mp->m_sb.sb_inodesize +
211 XFS_DIROP_LOG_RES(mp) +
212 128 * (2 + XFS_DIROP_LOG_COUNT(mp))),
213 (2 * mp->m_sb.sb_sectsize +
214 2 * mp->m_sb.sb_sectsize +
215 mp->m_sb.sb_sectsize +
216 XFS_ALLOCFREE_LOG_RES(mp, 2) +
217 128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))));
83} 218}
84 219
220/*
221 * For symlink we can modify:
222 * the parent directory inode: inode size
223 * the new inode: inode size
224 * the inode btree entry: 1 block
225 * the directory btree: (max depth + v2) * dir block size
226 * the directory inode's bmap btree: (max depth + v2) * block size
227 * the blocks for the symlink: 1 kB
228 * Or in the first xact we allocate some inodes giving:
229 * the agi and agf of the ag getting the new inodes: 2 * sectorsize
230 * the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize
231 * the inode btree: max depth * blocksize
232 * the allocation btrees: 2 trees * (2 * max depth - 1) * block size
233 */
85STATIC uint 234STATIC uint
86xfs_calc_symlink_reservation(xfs_mount_t *mp) 235xfs_calc_symlink_reservation(
236 struct xfs_mount *mp)
87{ 237{
88 return XFS_CALC_SYMLINK_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); 238 return XFS_DQUOT_LOGRES(mp) +
239 MAX((mp->m_sb.sb_inodesize +
240 mp->m_sb.sb_inodesize +
241 XFS_FSB_TO_B(mp, 1) +
242 XFS_DIROP_LOG_RES(mp) +
243 1024 +
244 128 * (4 + XFS_DIROP_LOG_COUNT(mp))),
245 (2 * mp->m_sb.sb_sectsize +
246 XFS_FSB_TO_B(mp, XFS_IALLOC_BLOCKS(mp)) +
247 XFS_FSB_TO_B(mp, mp->m_in_maxlevels) +
248 XFS_ALLOCFREE_LOG_RES(mp, 1) +
249 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels +
250 XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
89} 251}
90 252
253/*
254 * For create we can modify:
255 * the parent directory inode: inode size
256 * the new inode: inode size
257 * the inode btree entry: block size
258 * the superblock for the nlink flag: sector size
259 * the directory btree: (max depth + v2) * dir block size
260 * the directory inode's bmap btree: (max depth + v2) * block size
261 * Or in the first xact we allocate some inodes giving:
262 * the agi and agf of the ag getting the new inodes: 2 * sectorsize
263 * the superblock for the nlink flag: sector size
264 * the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize
265 * the inode btree: max depth * blocksize
266 * the allocation btrees: 2 trees * (max depth - 1) * block size
267 */
91STATIC uint 268STATIC uint
92xfs_calc_create_reservation(xfs_mount_t *mp) 269xfs_calc_create_reservation(
270 struct xfs_mount *mp)
93{ 271{
94 return XFS_CALC_CREATE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); 272 return XFS_DQUOT_LOGRES(mp) +
273 MAX((mp->m_sb.sb_inodesize +
274 mp->m_sb.sb_inodesize +
275 mp->m_sb.sb_sectsize +
276 XFS_FSB_TO_B(mp, 1) +
277 XFS_DIROP_LOG_RES(mp) +
278 128 * (3 + XFS_DIROP_LOG_COUNT(mp))),
279 (3 * mp->m_sb.sb_sectsize +
280 XFS_FSB_TO_B(mp, XFS_IALLOC_BLOCKS(mp)) +
281 XFS_FSB_TO_B(mp, mp->m_in_maxlevels) +
282 XFS_ALLOCFREE_LOG_RES(mp, 1) +
283 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels +
284 XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
95} 285}
96 286
287/*
288 * Making a new directory is the same as creating a new file.
289 */
97STATIC uint 290STATIC uint
98xfs_calc_mkdir_reservation(xfs_mount_t *mp) 291xfs_calc_mkdir_reservation(
292 struct xfs_mount *mp)
99{ 293{
100 return XFS_CALC_MKDIR_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); 294 return xfs_calc_create_reservation(mp);
101} 295}
102 296
297/*
298 * In freeing an inode we can modify:
299 * the inode being freed: inode size
300 * the super block free inode counter: sector size
301 * the agi hash list and counters: sector size
302 * the inode btree entry: block size
303 * the on disk inode before ours in the agi hash list: inode cluster size
304 * the inode btree: max depth * blocksize
305 * the allocation btrees: 2 trees * (max depth - 1) * block size
306 */
103STATIC uint 307STATIC uint
104xfs_calc_ifree_reservation(xfs_mount_t *mp) 308xfs_calc_ifree_reservation(
309 struct xfs_mount *mp)
105{ 310{
106 return XFS_CALC_IFREE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); 311 return XFS_DQUOT_LOGRES(mp) +
312 mp->m_sb.sb_inodesize +
313 mp->m_sb.sb_sectsize +
314 mp->m_sb.sb_sectsize +
315 XFS_FSB_TO_B(mp, 1) +
316 MAX((__uint16_t)XFS_FSB_TO_B(mp, 1),
317 XFS_INODE_CLUSTER_SIZE(mp)) +
318 128 * 5 +
319 XFS_ALLOCFREE_LOG_RES(mp, 1) +
320 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels +
321 XFS_ALLOCFREE_LOG_COUNT(mp, 1));
107} 322}
108 323
324/*
325 * When only changing the inode we log the inode and possibly the superblock
326 * We also add a bit of slop for the transaction stuff.
327 */
109STATIC uint 328STATIC uint
110xfs_calc_ichange_reservation(xfs_mount_t *mp) 329xfs_calc_ichange_reservation(
330 struct xfs_mount *mp)
111{ 331{
112 return XFS_CALC_ICHANGE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); 332 return XFS_DQUOT_LOGRES(mp) +
333 mp->m_sb.sb_inodesize +
334 mp->m_sb.sb_sectsize +
335 512;
336
113} 337}
114 338
339/*
340 * Growing the data section of the filesystem.
341 * superblock
342 * agi and agf
343 * allocation btrees
344 */
115STATIC uint 345STATIC uint
116xfs_calc_growdata_reservation(xfs_mount_t *mp) 346xfs_calc_growdata_reservation(
347 struct xfs_mount *mp)
117{ 348{
118 return XFS_CALC_GROWDATA_LOG_RES(mp); 349 return mp->m_sb.sb_sectsize * 3 +
350 XFS_ALLOCFREE_LOG_RES(mp, 1) +
351 128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1));
119} 352}
120 353
354/*
355 * Growing the rt section of the filesystem.
356 * In the first set of transactions (ALLOC) we allocate space to the
357 * bitmap or summary files.
358 * superblock: sector size
359 * agf of the ag from which the extent is allocated: sector size
360 * bmap btree for bitmap/summary inode: max depth * blocksize
361 * bitmap/summary inode: inode size
362 * allocation btrees for 1 block alloc: 2 * (2 * maxdepth - 1) * blocksize
363 */
121STATIC uint 364STATIC uint
122xfs_calc_growrtalloc_reservation(xfs_mount_t *mp) 365xfs_calc_growrtalloc_reservation(
366 struct xfs_mount *mp)
123{ 367{
124 return XFS_CALC_GROWRTALLOC_LOG_RES(mp); 368 return 2 * mp->m_sb.sb_sectsize +
369 XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) +
370 mp->m_sb.sb_inodesize +
371 XFS_ALLOCFREE_LOG_RES(mp, 1) +
372 128 * (3 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) +
373 XFS_ALLOCFREE_LOG_COUNT(mp, 1));
125} 374}
126 375
376/*
377 * Growing the rt section of the filesystem.
378 * In the second set of transactions (ZERO) we zero the new metadata blocks.
379 * one bitmap/summary block: blocksize
380 */
127STATIC uint 381STATIC uint
128xfs_calc_growrtzero_reservation(xfs_mount_t *mp) 382xfs_calc_growrtzero_reservation(
383 struct xfs_mount *mp)
129{ 384{
130 return XFS_CALC_GROWRTZERO_LOG_RES(mp); 385 return mp->m_sb.sb_blocksize + 128;
131} 386}
132 387
388/*
389 * Growing the rt section of the filesystem.
390 * In the third set of transactions (FREE) we update metadata without
391 * allocating any new blocks.
392 * superblock: sector size
393 * bitmap inode: inode size
394 * summary inode: inode size
395 * one bitmap block: blocksize
396 * summary blocks: new summary size
397 */
133STATIC uint 398STATIC uint
134xfs_calc_growrtfree_reservation(xfs_mount_t *mp) 399xfs_calc_growrtfree_reservation(
400 struct xfs_mount *mp)
135{ 401{
136 return XFS_CALC_GROWRTFREE_LOG_RES(mp); 402 return mp->m_sb.sb_sectsize +
403 2 * mp->m_sb.sb_inodesize +
404 mp->m_sb.sb_blocksize +
405 mp->m_rsumsize +
406 128 * 5;
137} 407}
138 408
409/*
410 * Logging the inode modification timestamp on a synchronous write.
411 * inode
412 */
139STATIC uint 413STATIC uint
140xfs_calc_swrite_reservation(xfs_mount_t *mp) 414xfs_calc_swrite_reservation(
415 struct xfs_mount *mp)
141{ 416{
142 return XFS_CALC_SWRITE_LOG_RES(mp); 417 return mp->m_sb.sb_inodesize + 128;
143} 418}
144 419
420/*
421 * Logging the inode mode bits when writing a setuid/setgid file
422 * inode
423 */
145STATIC uint 424STATIC uint
146xfs_calc_writeid_reservation(xfs_mount_t *mp) 425xfs_calc_writeid_reservation(xfs_mount_t *mp)
147{ 426{
148 return XFS_CALC_WRITEID_LOG_RES(mp); 427 return mp->m_sb.sb_inodesize + 128;
149} 428}
150 429
430/*
431 * Converting the inode from non-attributed to attributed.
432 * the inode being converted: inode size
433 * agf block and superblock (for block allocation)
434 * the new block (directory sized)
435 * bmap blocks for the new directory block
436 * allocation btrees
437 */
151STATIC uint 438STATIC uint
152xfs_calc_addafork_reservation(xfs_mount_t *mp) 439xfs_calc_addafork_reservation(
440 struct xfs_mount *mp)
153{ 441{
154 return XFS_CALC_ADDAFORK_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); 442 return XFS_DQUOT_LOGRES(mp) +
443 mp->m_sb.sb_inodesize +
444 mp->m_sb.sb_sectsize * 2 +
445 mp->m_dirblksize +
446 XFS_FSB_TO_B(mp, XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1) +
447 XFS_ALLOCFREE_LOG_RES(mp, 1) +
448 128 * (4 + XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1 +
449 XFS_ALLOCFREE_LOG_COUNT(mp, 1));
155} 450}
156 451
452/*
453 * Removing the attribute fork of a file
454 * the inode being truncated: inode size
455 * the inode's bmap btree: max depth * block size
456 * And the bmap_finish transaction can free the blocks and bmap blocks:
457 * the agf for each of the ags: 4 * sector size
458 * the agfl for each of the ags: 4 * sector size
459 * the super block to reflect the freed blocks: sector size
460 * worst case split in allocation btrees per extent assuming 4 extents:
461 * 4 exts * 2 trees * (2 * max depth - 1) * block size
462 */
157STATIC uint 463STATIC uint
158xfs_calc_attrinval_reservation(xfs_mount_t *mp) 464xfs_calc_attrinval_reservation(
465 struct xfs_mount *mp)
159{ 466{
160 return XFS_CALC_ATTRINVAL_LOG_RES(mp); 467 return MAX((mp->m_sb.sb_inodesize +
468 XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) +
469 128 * (1 + XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK))),
470 (4 * mp->m_sb.sb_sectsize +
471 4 * mp->m_sb.sb_sectsize +
472 mp->m_sb.sb_sectsize +
473 XFS_ALLOCFREE_LOG_RES(mp, 4) +
474 128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4))));
161} 475}
162 476
477/*
478 * Setting an attribute.
479 * the inode getting the attribute
480 * the superblock for allocations
481 * the agfs extents are allocated from
482 * the attribute btree * max depth
483 * the inode allocation btree
484 * Since attribute transaction space is dependent on the size of the attribute,
485 * the calculation is done partially at mount time and partially at runtime.
486 */
163STATIC uint 487STATIC uint
164xfs_calc_attrset_reservation(xfs_mount_t *mp) 488xfs_calc_attrset_reservation(
489 struct xfs_mount *mp)
165{ 490{
166 return XFS_CALC_ATTRSET_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); 491 return XFS_DQUOT_LOGRES(mp) +
492 mp->m_sb.sb_inodesize +
493 mp->m_sb.sb_sectsize +
494 XFS_FSB_TO_B(mp, XFS_DA_NODE_MAXDEPTH) +
495 128 * (2 + XFS_DA_NODE_MAXDEPTH);
167} 496}
168 497
498/*
499 * Removing an attribute.
500 * the inode: inode size
501 * the attribute btree could join: max depth * block size
502 * the inode bmap btree could join or split: max depth * block size
503 * And the bmap_finish transaction can free the attr blocks freed giving:
504 * the agf for the ag in which the blocks live: 2 * sector size
505 * the agfl for the ag in which the blocks live: 2 * sector size
506 * the superblock for the free block count: sector size
507 * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
508 */
169STATIC uint 509STATIC uint
170xfs_calc_attrrm_reservation(xfs_mount_t *mp) 510xfs_calc_attrrm_reservation(
511 struct xfs_mount *mp)
171{ 512{
172 return XFS_CALC_ATTRRM_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); 513 return XFS_DQUOT_LOGRES(mp) +
514 MAX((mp->m_sb.sb_inodesize +
515 XFS_FSB_TO_B(mp, XFS_DA_NODE_MAXDEPTH) +
516 XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) +
517 128 * (1 + XFS_DA_NODE_MAXDEPTH +
518 XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK))),
519 (2 * mp->m_sb.sb_sectsize +
520 2 * mp->m_sb.sb_sectsize +
521 mp->m_sb.sb_sectsize +
522 XFS_ALLOCFREE_LOG_RES(mp, 2) +
523 128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))));
173} 524}
174 525
526/*
527 * Clearing a bad agino number in an agi hash bucket.
528 */
175STATIC uint 529STATIC uint
176xfs_calc_clear_agi_bucket_reservation(xfs_mount_t *mp) 530xfs_calc_clear_agi_bucket_reservation(
531 struct xfs_mount *mp)
177{ 532{
178 return XFS_CALC_CLEAR_AGI_BUCKET_LOG_RES(mp); 533 return mp->m_sb.sb_sectsize + 128;
179} 534}
180 535
181/* 536/*
@@ -184,11 +539,10 @@ xfs_calc_clear_agi_bucket_reservation(xfs_mount_t *mp)
184 */ 539 */
185void 540void
186xfs_trans_init( 541xfs_trans_init(
187 xfs_mount_t *mp) 542 struct xfs_mount *mp)
188{ 543{
189 xfs_trans_reservations_t *resp; 544 struct xfs_trans_reservations *resp = &mp->m_reservations;
190 545
191 resp = &(mp->m_reservations);
192 resp->tr_write = xfs_calc_write_reservation(mp); 546 resp->tr_write = xfs_calc_write_reservation(mp);
193 resp->tr_itruncate = xfs_calc_itruncate_reservation(mp); 547 resp->tr_itruncate = xfs_calc_itruncate_reservation(mp);
194 resp->tr_rename = xfs_calc_rename_reservation(mp); 548 resp->tr_rename = xfs_calc_rename_reservation(mp);
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 8c69e7824f68..e639e8e9a2a9 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -300,24 +300,6 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp)
300 300
301 301
302/* 302/*
303 * Various log reservation values.
304 * These are based on the size of the file system block
305 * because that is what most transactions manipulate.
306 * Each adds in an additional 128 bytes per item logged to
307 * try to account for the overhead of the transaction mechanism.
308 *
309 * Note:
310 * Most of the reservations underestimate the number of allocation
311 * groups into which they could free extents in the xfs_bmap_finish()
312 * call. This is because the number in the worst case is quite high
313 * and quite unusual. In order to fix this we need to change
314 * xfs_bmap_finish() to free extents in only a single AG at a time.
315 * This will require changes to the EFI code as well, however, so that
316 * the EFI for the extents not freed is logged again in each transaction.
317 * See bug 261917.
318 */
319
320/*
321 * Per-extent log reservation for the allocation btree changes 303 * Per-extent log reservation for the allocation btree changes
322 * involved in freeing or allocating an extent. 304 * involved in freeing or allocating an extent.
323 * 2 trees * (2 blocks/level * max depth - 1) * block size 305 * 2 trees * (2 blocks/level * max depth - 1) * block size
@@ -341,429 +323,36 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp)
341 (XFS_DAENTER_BLOCKS(mp, XFS_DATA_FORK) + \ 323 (XFS_DAENTER_BLOCKS(mp, XFS_DATA_FORK) + \
342 XFS_DAENTER_BMAPS(mp, XFS_DATA_FORK) + 1) 324 XFS_DAENTER_BMAPS(mp, XFS_DATA_FORK) + 1)
343 325
344/*
345 * In a write transaction we can allocate a maximum of 2
346 * extents. This gives:
347 * the inode getting the new extents: inode size
348 * the inode's bmap btree: max depth * block size
349 * the agfs of the ags from which the extents are allocated: 2 * sector
350 * the superblock free block counter: sector size
351 * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
352 * And the bmap_finish transaction can free bmap blocks in a join:
353 * the agfs of the ags containing the blocks: 2 * sector size
354 * the agfls of the ags containing the blocks: 2 * sector size
355 * the super block free block counter: sector size
356 * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
357 */
358#define XFS_CALC_WRITE_LOG_RES(mp) \
359 (MAX( \
360 ((mp)->m_sb.sb_inodesize + \
361 XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) + \
362 (2 * (mp)->m_sb.sb_sectsize) + \
363 (mp)->m_sb.sb_sectsize + \
364 XFS_ALLOCFREE_LOG_RES(mp, 2) + \
365 (128 * (4 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + XFS_ALLOCFREE_LOG_COUNT(mp, 2)))),\
366 ((2 * (mp)->m_sb.sb_sectsize) + \
367 (2 * (mp)->m_sb.sb_sectsize) + \
368 (mp)->m_sb.sb_sectsize + \
369 XFS_ALLOCFREE_LOG_RES(mp, 2) + \
370 (128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))))))
371 326
372#define XFS_WRITE_LOG_RES(mp) ((mp)->m_reservations.tr_write) 327#define XFS_WRITE_LOG_RES(mp) ((mp)->m_reservations.tr_write)
373
374/*
375 * In truncating a file we free up to two extents at once. We can modify:
376 * the inode being truncated: inode size
377 * the inode's bmap btree: (max depth + 1) * block size
378 * And the bmap_finish transaction can free the blocks and bmap blocks:
379 * the agf for each of the ags: 4 * sector size
380 * the agfl for each of the ags: 4 * sector size
381 * the super block to reflect the freed blocks: sector size
382 * worst case split in allocation btrees per extent assuming 4 extents:
383 * 4 exts * 2 trees * (2 * max depth - 1) * block size
384 * the inode btree: max depth * blocksize
385 * the allocation btrees: 2 trees * (max depth - 1) * block size
386 */
387#define XFS_CALC_ITRUNCATE_LOG_RES(mp) \
388 (MAX( \
389 ((mp)->m_sb.sb_inodesize + \
390 XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1) + \
391 (128 * (2 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)))), \
392 ((4 * (mp)->m_sb.sb_sectsize) + \
393 (4 * (mp)->m_sb.sb_sectsize) + \
394 (mp)->m_sb.sb_sectsize + \
395 XFS_ALLOCFREE_LOG_RES(mp, 4) + \
396 (128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4))) + \
397 (128 * 5) + \
398 XFS_ALLOCFREE_LOG_RES(mp, 1) + \
399 (128 * (2 + XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels + \
400 XFS_ALLOCFREE_LOG_COUNT(mp, 1))))))
401
402#define XFS_ITRUNCATE_LOG_RES(mp) ((mp)->m_reservations.tr_itruncate) 328#define XFS_ITRUNCATE_LOG_RES(mp) ((mp)->m_reservations.tr_itruncate)
403
404/*
405 * In renaming a files we can modify:
406 * the four inodes involved: 4 * inode size
407 * the two directory btrees: 2 * (max depth + v2) * dir block size
408 * the two directory bmap btrees: 2 * max depth * block size
409 * And the bmap_finish transaction can free dir and bmap blocks (two sets
410 * of bmap blocks) giving:
411 * the agf for the ags in which the blocks live: 3 * sector size
412 * the agfl for the ags in which the blocks live: 3 * sector size
413 * the superblock for the free block count: sector size
414 * the allocation btrees: 3 exts * 2 trees * (2 * max depth - 1) * block size
415 */
416#define XFS_CALC_RENAME_LOG_RES(mp) \
417 (MAX( \
418 ((4 * (mp)->m_sb.sb_inodesize) + \
419 (2 * XFS_DIROP_LOG_RES(mp)) + \
420 (128 * (4 + 2 * XFS_DIROP_LOG_COUNT(mp)))), \
421 ((3 * (mp)->m_sb.sb_sectsize) + \
422 (3 * (mp)->m_sb.sb_sectsize) + \
423 (mp)->m_sb.sb_sectsize + \
424 XFS_ALLOCFREE_LOG_RES(mp, 3) + \
425 (128 * (7 + XFS_ALLOCFREE_LOG_COUNT(mp, 3))))))
426
427#define XFS_RENAME_LOG_RES(mp) ((mp)->m_reservations.tr_rename) 329#define XFS_RENAME_LOG_RES(mp) ((mp)->m_reservations.tr_rename)
428
429/*
430 * For creating a link to an inode:
431 * the parent directory inode: inode size
432 * the linked inode: inode size
433 * the directory btree could split: (max depth + v2) * dir block size
434 * the directory bmap btree could join or split: (max depth + v2) * blocksize
435 * And the bmap_finish transaction can free some bmap blocks giving:
436 * the agf for the ag in which the blocks live: sector size
437 * the agfl for the ag in which the blocks live: sector size
438 * the superblock for the free block count: sector size
439 * the allocation btrees: 2 trees * (2 * max depth - 1) * block size
440 */
441#define XFS_CALC_LINK_LOG_RES(mp) \
442 (MAX( \
443 ((mp)->m_sb.sb_inodesize + \
444 (mp)->m_sb.sb_inodesize + \
445 XFS_DIROP_LOG_RES(mp) + \
446 (128 * (2 + XFS_DIROP_LOG_COUNT(mp)))), \
447 ((mp)->m_sb.sb_sectsize + \
448 (mp)->m_sb.sb_sectsize + \
449 (mp)->m_sb.sb_sectsize + \
450 XFS_ALLOCFREE_LOG_RES(mp, 1) + \
451 (128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1))))))
452
453#define XFS_LINK_LOG_RES(mp) ((mp)->m_reservations.tr_link) 330#define XFS_LINK_LOG_RES(mp) ((mp)->m_reservations.tr_link)
454
455/*
456 * For removing a directory entry we can modify:
457 * the parent directory inode: inode size
458 * the removed inode: inode size
459 * the directory btree could join: (max depth + v2) * dir block size
460 * the directory bmap btree could join or split: (max depth + v2) * blocksize
461 * And the bmap_finish transaction can free the dir and bmap blocks giving:
462 * the agf for the ag in which the blocks live: 2 * sector size
463 * the agfl for the ag in which the blocks live: 2 * sector size
464 * the superblock for the free block count: sector size
465 * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
466 */
467#define XFS_CALC_REMOVE_LOG_RES(mp) \
468 (MAX( \
469 ((mp)->m_sb.sb_inodesize + \
470 (mp)->m_sb.sb_inodesize + \
471 XFS_DIROP_LOG_RES(mp) + \
472 (128 * (2 + XFS_DIROP_LOG_COUNT(mp)))), \
473 ((2 * (mp)->m_sb.sb_sectsize) + \
474 (2 * (mp)->m_sb.sb_sectsize) + \
475 (mp)->m_sb.sb_sectsize + \
476 XFS_ALLOCFREE_LOG_RES(mp, 2) + \
477 (128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))))))
478
479#define XFS_REMOVE_LOG_RES(mp) ((mp)->m_reservations.tr_remove) 331#define XFS_REMOVE_LOG_RES(mp) ((mp)->m_reservations.tr_remove)
480
481/*
482 * For symlink we can modify:
483 * the parent directory inode: inode size
484 * the new inode: inode size
485 * the inode btree entry: 1 block
486 * the directory btree: (max depth + v2) * dir block size
487 * the directory inode's bmap btree: (max depth + v2) * block size
488 * the blocks for the symlink: 1 kB
489 * Or in the first xact we allocate some inodes giving:
490 * the agi and agf of the ag getting the new inodes: 2 * sectorsize
491 * the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize
492 * the inode btree: max depth * blocksize
493 * the allocation btrees: 2 trees * (2 * max depth - 1) * block size
494 */
495#define XFS_CALC_SYMLINK_LOG_RES(mp) \
496 (MAX( \
497 ((mp)->m_sb.sb_inodesize + \
498 (mp)->m_sb.sb_inodesize + \
499 XFS_FSB_TO_B(mp, 1) + \
500 XFS_DIROP_LOG_RES(mp) + \
501 1024 + \
502 (128 * (4 + XFS_DIROP_LOG_COUNT(mp)))), \
503 (2 * (mp)->m_sb.sb_sectsize + \
504 XFS_FSB_TO_B((mp), XFS_IALLOC_BLOCKS((mp))) + \
505 XFS_FSB_TO_B((mp), (mp)->m_in_maxlevels) + \
506 XFS_ALLOCFREE_LOG_RES(mp, 1) + \
507 (128 * (2 + XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels + \
508 XFS_ALLOCFREE_LOG_COUNT(mp, 1))))))
509
510#define XFS_SYMLINK_LOG_RES(mp) ((mp)->m_reservations.tr_symlink) 332#define XFS_SYMLINK_LOG_RES(mp) ((mp)->m_reservations.tr_symlink)
511
512/*
513 * For create we can modify:
514 * the parent directory inode: inode size
515 * the new inode: inode size
516 * the inode btree entry: block size
517 * the superblock for the nlink flag: sector size
518 * the directory btree: (max depth + v2) * dir block size
519 * the directory inode's bmap btree: (max depth + v2) * block size
520 * Or in the first xact we allocate some inodes giving:
521 * the agi and agf of the ag getting the new inodes: 2 * sectorsize
522 * the superblock for the nlink flag: sector size
523 * the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize
524 * the inode btree: max depth * blocksize
525 * the allocation btrees: 2 trees * (max depth - 1) * block size
526 */
527#define XFS_CALC_CREATE_LOG_RES(mp) \
528 (MAX( \
529 ((mp)->m_sb.sb_inodesize + \
530 (mp)->m_sb.sb_inodesize + \
531 (mp)->m_sb.sb_sectsize + \
532 XFS_FSB_TO_B(mp, 1) + \
533 XFS_DIROP_LOG_RES(mp) + \
534 (128 * (3 + XFS_DIROP_LOG_COUNT(mp)))), \
535 (3 * (mp)->m_sb.sb_sectsize + \
536 XFS_FSB_TO_B((mp), XFS_IALLOC_BLOCKS((mp))) + \
537 XFS_FSB_TO_B((mp), (mp)->m_in_maxlevels) + \
538 XFS_ALLOCFREE_LOG_RES(mp, 1) + \
539 (128 * (2 + XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels + \
540 XFS_ALLOCFREE_LOG_COUNT(mp, 1))))))
541
542#define XFS_CREATE_LOG_RES(mp) ((mp)->m_reservations.tr_create) 333#define XFS_CREATE_LOG_RES(mp) ((mp)->m_reservations.tr_create)
543
544/*
545 * Making a new directory is the same as creating a new file.
546 */
547#define XFS_CALC_MKDIR_LOG_RES(mp) XFS_CALC_CREATE_LOG_RES(mp)
548
549#define XFS_MKDIR_LOG_RES(mp) ((mp)->m_reservations.tr_mkdir) 334#define XFS_MKDIR_LOG_RES(mp) ((mp)->m_reservations.tr_mkdir)
550
551/*
552 * In freeing an inode we can modify:
553 * the inode being freed: inode size
554 * the super block free inode counter: sector size
555 * the agi hash list and counters: sector size
556 * the inode btree entry: block size
557 * the on disk inode before ours in the agi hash list: inode cluster size
558 * the inode btree: max depth * blocksize
559 * the allocation btrees: 2 trees * (max depth - 1) * block size
560 */
561#define XFS_CALC_IFREE_LOG_RES(mp) \
562 ((mp)->m_sb.sb_inodesize + \
563 (mp)->m_sb.sb_sectsize + \
564 (mp)->m_sb.sb_sectsize + \
565 XFS_FSB_TO_B((mp), 1) + \
566 MAX((__uint16_t)XFS_FSB_TO_B((mp), 1), XFS_INODE_CLUSTER_SIZE(mp)) + \
567 (128 * 5) + \
568 XFS_ALLOCFREE_LOG_RES(mp, 1) + \
569 (128 * (2 + XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels + \
570 XFS_ALLOCFREE_LOG_COUNT(mp, 1))))
571
572
573#define XFS_IFREE_LOG_RES(mp) ((mp)->m_reservations.tr_ifree) 335#define XFS_IFREE_LOG_RES(mp) ((mp)->m_reservations.tr_ifree)
574
575/*
576 * When only changing the inode we log the inode and possibly the superblock
577 * We also add a bit of slop for the transaction stuff.
578 */
579#define XFS_CALC_ICHANGE_LOG_RES(mp) ((mp)->m_sb.sb_inodesize + \
580 (mp)->m_sb.sb_sectsize + 512)
581
582#define XFS_ICHANGE_LOG_RES(mp) ((mp)->m_reservations.tr_ichange) 336#define XFS_ICHANGE_LOG_RES(mp) ((mp)->m_reservations.tr_ichange)
583
584/*
585 * Growing the data section of the filesystem.
586 * superblock
587 * agi and agf
588 * allocation btrees
589 */
590#define XFS_CALC_GROWDATA_LOG_RES(mp) \
591 ((mp)->m_sb.sb_sectsize * 3 + \
592 XFS_ALLOCFREE_LOG_RES(mp, 1) + \
593 (128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1))))
594
595#define XFS_GROWDATA_LOG_RES(mp) ((mp)->m_reservations.tr_growdata) 337#define XFS_GROWDATA_LOG_RES(mp) ((mp)->m_reservations.tr_growdata)
596
597/*
598 * Growing the rt section of the filesystem.
599 * In the first set of transactions (ALLOC) we allocate space to the
600 * bitmap or summary files.
601 * superblock: sector size
602 * agf of the ag from which the extent is allocated: sector size
603 * bmap btree for bitmap/summary inode: max depth * blocksize
604 * bitmap/summary inode: inode size
605 * allocation btrees for 1 block alloc: 2 * (2 * maxdepth - 1) * blocksize
606 */
607#define XFS_CALC_GROWRTALLOC_LOG_RES(mp) \
608 (2 * (mp)->m_sb.sb_sectsize + \
609 XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) + \
610 (mp)->m_sb.sb_inodesize + \
611 XFS_ALLOCFREE_LOG_RES(mp, 1) + \
612 (128 * \
613 (3 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + \
614 XFS_ALLOCFREE_LOG_COUNT(mp, 1))))
615
616#define XFS_GROWRTALLOC_LOG_RES(mp) ((mp)->m_reservations.tr_growrtalloc) 338#define XFS_GROWRTALLOC_LOG_RES(mp) ((mp)->m_reservations.tr_growrtalloc)
617
618/*
619 * Growing the rt section of the filesystem.
620 * In the second set of transactions (ZERO) we zero the new metadata blocks.
621 * one bitmap/summary block: blocksize
622 */
623#define XFS_CALC_GROWRTZERO_LOG_RES(mp) \
624 ((mp)->m_sb.sb_blocksize + 128)
625
626#define XFS_GROWRTZERO_LOG_RES(mp) ((mp)->m_reservations.tr_growrtzero) 339#define XFS_GROWRTZERO_LOG_RES(mp) ((mp)->m_reservations.tr_growrtzero)
627
628/*
629 * Growing the rt section of the filesystem.
630 * In the third set of transactions (FREE) we update metadata without
631 * allocating any new blocks.
632 * superblock: sector size
633 * bitmap inode: inode size
634 * summary inode: inode size
635 * one bitmap block: blocksize
636 * summary blocks: new summary size
637 */
638#define XFS_CALC_GROWRTFREE_LOG_RES(mp) \
639 ((mp)->m_sb.sb_sectsize + \
640 2 * (mp)->m_sb.sb_inodesize + \
641 (mp)->m_sb.sb_blocksize + \
642 (mp)->m_rsumsize + \
643 (128 * 5))
644
645#define XFS_GROWRTFREE_LOG_RES(mp) ((mp)->m_reservations.tr_growrtfree) 340#define XFS_GROWRTFREE_LOG_RES(mp) ((mp)->m_reservations.tr_growrtfree)
646
647/*
648 * Logging the inode modification timestamp on a synchronous write.
649 * inode
650 */
651#define XFS_CALC_SWRITE_LOG_RES(mp) \
652 ((mp)->m_sb.sb_inodesize + 128)
653
654#define XFS_SWRITE_LOG_RES(mp) ((mp)->m_reservations.tr_swrite) 341#define XFS_SWRITE_LOG_RES(mp) ((mp)->m_reservations.tr_swrite)
655
656/* 342/*
657 * Logging the inode timestamps on an fsync -- same as SWRITE 343 * Logging the inode timestamps on an fsync -- same as SWRITE
658 * as long as SWRITE logs the entire inode core 344 * as long as SWRITE logs the entire inode core
659 */ 345 */
660#define XFS_FSYNC_TS_LOG_RES(mp) ((mp)->m_reservations.tr_swrite) 346#define XFS_FSYNC_TS_LOG_RES(mp) ((mp)->m_reservations.tr_swrite)
661
662/*
663 * Logging the inode mode bits when writing a setuid/setgid file
664 * inode
665 */
666#define XFS_CALC_WRITEID_LOG_RES(mp) \
667 ((mp)->m_sb.sb_inodesize + 128)
668
669#define XFS_WRITEID_LOG_RES(mp) ((mp)->m_reservations.tr_swrite) 347#define XFS_WRITEID_LOG_RES(mp) ((mp)->m_reservations.tr_swrite)
670
671/*
672 * Converting the inode from non-attributed to attributed.
673 * the inode being converted: inode size
674 * agf block and superblock (for block allocation)
675 * the new block (directory sized)
676 * bmap blocks for the new directory block
677 * allocation btrees
678 */
679#define XFS_CALC_ADDAFORK_LOG_RES(mp) \
680 ((mp)->m_sb.sb_inodesize + \
681 (mp)->m_sb.sb_sectsize * 2 + \
682 (mp)->m_dirblksize + \
683 XFS_FSB_TO_B(mp, (XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1)) + \
684 XFS_ALLOCFREE_LOG_RES(mp, 1) + \
685 (128 * (4 + (XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1) + \
686 XFS_ALLOCFREE_LOG_COUNT(mp, 1))))
687
688#define XFS_ADDAFORK_LOG_RES(mp) ((mp)->m_reservations.tr_addafork) 348#define XFS_ADDAFORK_LOG_RES(mp) ((mp)->m_reservations.tr_addafork)
689
690/*
691 * Removing the attribute fork of a file
692 * the inode being truncated: inode size
693 * the inode's bmap btree: max depth * block size
694 * And the bmap_finish transaction can free the blocks and bmap blocks:
695 * the agf for each of the ags: 4 * sector size
696 * the agfl for each of the ags: 4 * sector size
697 * the super block to reflect the freed blocks: sector size
698 * worst case split in allocation btrees per extent assuming 4 extents:
699 * 4 exts * 2 trees * (2 * max depth - 1) * block size
700 */
701#define XFS_CALC_ATTRINVAL_LOG_RES(mp) \
702 (MAX( \
703 ((mp)->m_sb.sb_inodesize + \
704 XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) + \
705 (128 * (1 + XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)))), \
706 ((4 * (mp)->m_sb.sb_sectsize) + \
707 (4 * (mp)->m_sb.sb_sectsize) + \
708 (mp)->m_sb.sb_sectsize + \
709 XFS_ALLOCFREE_LOG_RES(mp, 4) + \
710 (128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4))))))
711
712#define XFS_ATTRINVAL_LOG_RES(mp) ((mp)->m_reservations.tr_attrinval) 349#define XFS_ATTRINVAL_LOG_RES(mp) ((mp)->m_reservations.tr_attrinval)
713
714/*
715 * Setting an attribute.
716 * the inode getting the attribute
717 * the superblock for allocations
718 * the agfs extents are allocated from
719 * the attribute btree * max depth
720 * the inode allocation btree
721 * Since attribute transaction space is dependent on the size of the attribute,
722 * the calculation is done partially at mount time and partially at runtime.
723 */
724#define XFS_CALC_ATTRSET_LOG_RES(mp) \
725 ((mp)->m_sb.sb_inodesize + \
726 (mp)->m_sb.sb_sectsize + \
727 XFS_FSB_TO_B((mp), XFS_DA_NODE_MAXDEPTH) + \
728 (128 * (2 + XFS_DA_NODE_MAXDEPTH)))
729
730#define XFS_ATTRSET_LOG_RES(mp, ext) \ 350#define XFS_ATTRSET_LOG_RES(mp, ext) \
731 ((mp)->m_reservations.tr_attrset + \ 351 ((mp)->m_reservations.tr_attrset + \
732 (ext * (mp)->m_sb.sb_sectsize) + \ 352 (ext * (mp)->m_sb.sb_sectsize) + \
733 (ext * XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK))) + \ 353 (ext * XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK))) + \
734 (128 * (ext + (ext * XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK))))) 354 (128 * (ext + (ext * XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)))))
735
736/*
737 * Removing an attribute.
738 * the inode: inode size
739 * the attribute btree could join: max depth * block size
740 * the inode bmap btree could join or split: max depth * block size
741 * And the bmap_finish transaction can free the attr blocks freed giving:
742 * the agf for the ag in which the blocks live: 2 * sector size
743 * the agfl for the ag in which the blocks live: 2 * sector size
744 * the superblock for the free block count: sector size
745 * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
746 */
747#define XFS_CALC_ATTRRM_LOG_RES(mp) \
748 (MAX( \
749 ((mp)->m_sb.sb_inodesize + \
750 XFS_FSB_TO_B((mp), XFS_DA_NODE_MAXDEPTH) + \
751 XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) + \
752 (128 * (1 + XFS_DA_NODE_MAXDEPTH + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)))), \
753 ((2 * (mp)->m_sb.sb_sectsize) + \
754 (2 * (mp)->m_sb.sb_sectsize) + \
755 (mp)->m_sb.sb_sectsize + \
756 XFS_ALLOCFREE_LOG_RES(mp, 2) + \
757 (128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))))))
758
759#define XFS_ATTRRM_LOG_RES(mp) ((mp)->m_reservations.tr_attrrm) 355#define XFS_ATTRRM_LOG_RES(mp) ((mp)->m_reservations.tr_attrrm)
760
761/*
762 * Clearing a bad agino number in an agi hash bucket.
763 */
764#define XFS_CALC_CLEAR_AGI_BUCKET_LOG_RES(mp) \
765 ((mp)->m_sb.sb_sectsize + 128)
766
767#define XFS_CLEAR_AGI_BUCKET_LOG_RES(mp) ((mp)->m_reservations.tr_clearagi) 356#define XFS_CLEAR_AGI_BUCKET_LOG_RES(mp) ((mp)->m_reservations.tr_clearagi)
768 357
769 358
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index 785ff101da0a..2559dfec946b 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -62,7 +62,7 @@ xfs_trans_iget(
62{ 62{
63 int error; 63 int error;
64 64
65 error = xfs_iget(mp, tp, ino, flags, lock_flags, ipp, 0); 65 error = xfs_iget(mp, tp, ino, flags, lock_flags, ipp);
66 if (!error && tp) 66 if (!error && tp)
67 xfs_trans_ijoin(tp, *ipp, lock_flags); 67 xfs_trans_ijoin(tp, *ipp, lock_flags);
68 return error; 68 return error;
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 9d376be0ea38..c1646838898f 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -267,7 +267,7 @@ xfs_setattr(
267 if (code) { 267 if (code) {
268 ASSERT(tp == NULL); 268 ASSERT(tp == NULL);
269 lock_flags &= ~XFS_ILOCK_EXCL; 269 lock_flags &= ~XFS_ILOCK_EXCL;
270 ASSERT(lock_flags == XFS_IOLOCK_EXCL); 270 ASSERT(lock_flags == XFS_IOLOCK_EXCL || !need_iolock);
271 goto error_return; 271 goto error_return;
272 } 272 }
273 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); 273 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
@@ -1269,7 +1269,7 @@ xfs_lookup(
1269 if (error) 1269 if (error)
1270 goto out; 1270 goto out;
1271 1271
1272 error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp, 0); 1272 error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp);
1273 if (error) 1273 if (error)
1274 goto out_free_name; 1274 goto out_free_name;
1275 1275