aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/server.c5
-rw-r--r--fs/afs/write.c1
-rw-r--r--fs/binfmt_elf_fdpic.c26
-rw-r--r--fs/binfmt_flat.c27
-rw-r--r--fs/block_dev.c72
-rw-r--r--fs/btrfs/acl.c8
-rw-r--r--fs/btrfs/ctree.c129
-rw-r--r--fs/btrfs/disk-io.c11
-rw-r--r--fs/btrfs/extent-tree.c3
-rw-r--r--fs/btrfs/extent_io.c2
-rw-r--r--fs/btrfs/file.c12
-rw-r--r--fs/btrfs/inode.c4
-rw-r--r--fs/btrfs/ioctl.c24
-rw-r--r--fs/btrfs/relocation.c7
-rw-r--r--fs/btrfs/root-tree.c3
-rw-r--r--fs/btrfs/super.c6
-rw-r--r--fs/ceph/auth_x.c2
-rw-r--r--fs/ceph/caps.c114
-rw-r--r--fs/ceph/crush/mapper.c41
-rw-r--r--fs/ceph/debugfs.c2
-rw-r--r--fs/ceph/inode.c21
-rw-r--r--fs/ceph/mds_client.c34
-rw-r--r--fs/ceph/mds_client.h6
-rw-r--r--fs/ceph/messenger.c4
-rw-r--r--fs/ceph/mon_client.c5
-rw-r--r--fs/ceph/osd_client.c3
-rw-r--r--fs/ceph/osdmap.c1
-rw-r--r--fs/ceph/super.c4
-rw-r--r--fs/cifs/cifsfs.c16
-rw-r--r--fs/cifs/cifsproto.h1
-rw-r--r--fs/cifs/dir.c76
-rw-r--r--fs/cifs/file.c101
-rw-r--r--fs/cifs/inode.c4
-rw-r--r--fs/cifs/sess.c10
-rw-r--r--fs/compat.c2
-rw-r--r--fs/configfs/inode.c9
-rw-r--r--fs/dcache.c4
-rw-r--r--fs/ext2/acl.c1
-rw-r--r--fs/ext2/inode.c2
-rw-r--r--fs/ext3/acl.c1
-rw-r--r--fs/ext4/inode.c40
-rw-r--r--fs/ext4/move_extent.c3
-rw-r--r--fs/fcntl.c13
-rw-r--r--fs/fs-writeback.c509
-rw-r--r--fs/fscache/page.c36
-rw-r--r--fs/gfs2/bmap.c1
-rw-r--r--fs/gfs2/dir.c2
-rw-r--r--fs/gfs2/glock.c12
-rw-r--r--fs/gfs2/inode.c12
-rw-r--r--fs/gfs2/quota.c10
-rw-r--r--fs/gfs2/quota.h2
-rw-r--r--fs/inode.c2
-rw-r--r--fs/jbd2/journal.c15
-rw-r--r--fs/jbd2/transaction.c9
-rw-r--r--fs/jffs2/acl.c3
-rw-r--r--fs/jffs2/dir.c127
-rw-r--r--fs/jffs2/fs.c7
-rw-r--r--fs/libfs.c3
-rw-r--r--fs/mbcache.c5
-rw-r--r--fs/minix/dir.c4
-rw-r--r--fs/nfs/client.c122
-rw-r--r--fs/nfs/dir.c2
-rw-r--r--fs/nfs/getroot.c2
-rw-r--r--fs/nfs/internal.h3
-rw-r--r--fs/nfs/nfs4xdr.c4
-rw-r--r--fs/nfs/super.c22
-rw-r--r--fs/nfsd/nfs4state.c2
-rw-r--r--fs/nfsd/vfs.c3
-rw-r--r--fs/nilfs2/btree.h2
-rw-r--r--fs/nilfs2/segbuf.h2
-rw-r--r--fs/nilfs2/segment.h2
-rw-r--r--fs/nilfs2/super.c8
-rw-r--r--fs/ocfs2/aops.c94
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c3
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c22
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c2
-rw-r--r--fs/ocfs2/file.c309
-rw-r--r--fs/ocfs2/file.h6
-rw-r--r--fs/ocfs2/journal.c30
-rw-r--r--fs/ocfs2/localalloc.c7
-rw-r--r--fs/ocfs2/quota_global.c2
-rw-r--r--fs/ocfs2/quota_local.c4
-rw-r--r--fs/ocfs2/refcounttree.c12
-rw-r--r--fs/ocfs2/reservations.c1
-rw-r--r--fs/ocfs2/suballoc.c2
-rw-r--r--fs/ocfs2/xattr.c200
-rw-r--r--fs/partitions/ibm.c14
-rw-r--r--fs/pipe.c93
-rw-r--r--fs/proc/proc_devtree.c3
-rw-r--r--fs/proc/task_nommu.c20
-rw-r--r--fs/quota/dquot.c2
-rw-r--r--fs/splice.c11
-rw-r--r--fs/super.c6
-rw-r--r--fs/sync.c2
-rw-r--r--fs/sysfs/inode.c6
-rw-r--r--fs/sysv/ialloc.c6
-rw-r--r--fs/ubifs/budget.c2
-rw-r--r--fs/ubifs/shrinker.c2
-rw-r--r--fs/ubifs/ubifs.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c23
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c5
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c11
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c7
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.c15
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c16
-rw-r--r--fs/xfs/linux-2.6/xfs_quotaops.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c139
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.h359
-rw-r--r--fs/xfs/quota/xfs_qm.c29
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c27
-rw-r--r--fs/xfs/xfs_ag.h1
-rw-r--r--fs/xfs/xfs_dfrag.c5
-rw-r--r--fs/xfs/xfs_ialloc.c142
-rw-r--r--fs/xfs/xfs_iget.c39
-rw-r--r--fs/xfs/xfs_inode.c149
-rw-r--r--fs/xfs/xfs_inode.h6
-rw-r--r--fs/xfs/xfs_itable.c285
-rw-r--r--fs/xfs/xfs_itable.h17
-rw-r--r--fs/xfs/xfs_log_recover.c13
-rw-r--r--fs/xfs/xfs_mount.c70
-rw-r--r--fs/xfs/xfs_mount.h2
-rw-r--r--fs/xfs/xfs_rtalloc.c8
-rw-r--r--fs/xfs/xfs_rtalloc.h11
-rw-r--r--fs/xfs/xfs_trans.c446
-rw-r--r--fs/xfs/xfs_trans.h411
-rw-r--r--fs/xfs/xfs_trans_inode.c2
-rw-r--r--fs/xfs/xfs_vnodeops.c4
130 files changed, 2523 insertions, 2381 deletions
diff --git a/fs/afs/server.c b/fs/afs/server.c
index f49099516675..9fdc7fe3a7bc 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -91,9 +91,10 @@ static struct afs_server *afs_alloc_server(struct afs_cell *cell,
91 91
92 memcpy(&server->addr, addr, sizeof(struct in_addr)); 92 memcpy(&server->addr, addr, sizeof(struct in_addr));
93 server->addr.s_addr = addr->s_addr; 93 server->addr.s_addr = addr->s_addr;
94 _leave(" = %p{%d}", server, atomic_read(&server->usage));
95 } else {
96 _leave(" = NULL [nomem]");
94 } 97 }
95
96 _leave(" = %p{%d}", server, atomic_read(&server->usage));
97 return server; 98 return server;
98} 99}
99 100
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 3dab9e9948d0..722743b152d8 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -680,7 +680,6 @@ int afs_writeback_all(struct afs_vnode *vnode)
680{ 680{
681 struct address_space *mapping = vnode->vfs_inode.i_mapping; 681 struct address_space *mapping = vnode->vfs_inode.i_mapping;
682 struct writeback_control wbc = { 682 struct writeback_control wbc = {
683 .bdi = mapping->backing_dev_info,
684 .sync_mode = WB_SYNC_ALL, 683 .sync_mode = WB_SYNC_ALL,
685 .nr_to_write = LONG_MAX, 684 .nr_to_write = LONG_MAX,
686 .range_cyclic = 1, 685 .range_cyclic = 1,
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 2c5f9a0e5d72..63039ed9576f 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -990,10 +990,9 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(
990 990
991 /* clear any space allocated but not loaded */ 991 /* clear any space allocated but not loaded */
992 if (phdr->p_filesz < phdr->p_memsz) { 992 if (phdr->p_filesz < phdr->p_memsz) {
993 ret = clear_user((void *) (seg->addr + phdr->p_filesz), 993 if (clear_user((void *) (seg->addr + phdr->p_filesz),
994 phdr->p_memsz - phdr->p_filesz); 994 phdr->p_memsz - phdr->p_filesz))
995 if (ret) 995 return -EFAULT;
996 return ret;
997 } 996 }
998 997
999 if (mm) { 998 if (mm) {
@@ -1027,7 +1026,7 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
1027 struct elf32_fdpic_loadseg *seg; 1026 struct elf32_fdpic_loadseg *seg;
1028 struct elf32_phdr *phdr; 1027 struct elf32_phdr *phdr;
1029 unsigned long load_addr, delta_vaddr; 1028 unsigned long load_addr, delta_vaddr;
1030 int loop, dvset, ret; 1029 int loop, dvset;
1031 1030
1032 load_addr = params->load_addr; 1031 load_addr = params->load_addr;
1033 delta_vaddr = 0; 1032 delta_vaddr = 0;
@@ -1127,9 +1126,8 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
1127 * PT_LOAD */ 1126 * PT_LOAD */
1128 if (prot & PROT_WRITE && disp > 0) { 1127 if (prot & PROT_WRITE && disp > 0) {
1129 kdebug("clear[%d] ad=%lx sz=%lx", loop, maddr, disp); 1128 kdebug("clear[%d] ad=%lx sz=%lx", loop, maddr, disp);
1130 ret = clear_user((void __user *) maddr, disp); 1129 if (clear_user((void __user *) maddr, disp))
1131 if (ret) 1130 return -EFAULT;
1132 return ret;
1133 maddr += disp; 1131 maddr += disp;
1134 } 1132 }
1135 1133
@@ -1164,19 +1162,17 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
1164 if (prot & PROT_WRITE && excess1 > 0) { 1162 if (prot & PROT_WRITE && excess1 > 0) {
1165 kdebug("clear[%d] ad=%lx sz=%lx", 1163 kdebug("clear[%d] ad=%lx sz=%lx",
1166 loop, maddr + phdr->p_filesz, excess1); 1164 loop, maddr + phdr->p_filesz, excess1);
1167 ret = clear_user((void __user *) maddr + phdr->p_filesz, 1165 if (clear_user((void __user *) maddr + phdr->p_filesz,
1168 excess1); 1166 excess1))
1169 if (ret) 1167 return -EFAULT;
1170 return ret;
1171 } 1168 }
1172 1169
1173#else 1170#else
1174 if (excess > 0) { 1171 if (excess > 0) {
1175 kdebug("clear[%d] ad=%lx sz=%lx", 1172 kdebug("clear[%d] ad=%lx sz=%lx",
1176 loop, maddr + phdr->p_filesz, excess); 1173 loop, maddr + phdr->p_filesz, excess);
1177 ret = clear_user((void *) maddr + phdr->p_filesz, excess); 1174 if (clear_user((void *) maddr + phdr->p_filesz, excess))
1178 if (ret) 1175 return -EFAULT;
1179 return ret;
1180 } 1176 }
1181#endif 1177#endif
1182 1178
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 49566c1687d8..811384bec8de 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -56,16 +56,19 @@
56#endif 56#endif
57 57
58/* 58/*
59 * User data (stack, data section and bss) needs to be aligned 59 * User data (data section and bss) needs to be aligned.
60 * for the same reasons as SLAB memory is, and to the same amount. 60 * We pick 0x20 here because it is the max value elf2flt has always
61 * Avoid duplicating architecture specific code by using the same 61 * used in producing FLAT files, and because it seems to be large
62 * macro as with SLAB allocation: 62 * enough to make all the gcc alignment related tests happy.
63 */ 63 */
64#ifdef ARCH_SLAB_MINALIGN 64#define FLAT_DATA_ALIGN (0x20)
65#define FLAT_DATA_ALIGN (ARCH_SLAB_MINALIGN) 65
66#else 66/*
67#define FLAT_DATA_ALIGN (sizeof(void *)) 67 * User data (stack) also needs to be aligned.
68#endif 68 * Here we can be a bit looser than the data sections since this
69 * needs to only meet arch ABI requirements.
70 */
71#define FLAT_STACK_ALIGN max_t(unsigned long, sizeof(void *), ARCH_SLAB_MINALIGN)
69 72
70#define RELOC_FAILED 0xff00ff01 /* Relocation incorrect somewhere */ 73#define RELOC_FAILED 0xff00ff01 /* Relocation incorrect somewhere */
71#define UNLOADED_LIB 0x7ff000ff /* Placeholder for unused library */ 74#define UNLOADED_LIB 0x7ff000ff /* Placeholder for unused library */
@@ -129,7 +132,7 @@ static unsigned long create_flat_tables(
129 132
130 sp = (unsigned long *)p; 133 sp = (unsigned long *)p;
131 sp -= (envc + argc + 2) + 1 + (flat_argvp_envp_on_stack() ? 2 : 0); 134 sp -= (envc + argc + 2) + 1 + (flat_argvp_envp_on_stack() ? 2 : 0);
132 sp = (unsigned long *) ((unsigned long)sp & -FLAT_DATA_ALIGN); 135 sp = (unsigned long *) ((unsigned long)sp & -FLAT_STACK_ALIGN);
133 argv = sp + 1 + (flat_argvp_envp_on_stack() ? 2 : 0); 136 argv = sp + 1 + (flat_argvp_envp_on_stack() ? 2 : 0);
134 envp = argv + (argc + 1); 137 envp = argv + (argc + 1);
135 138
@@ -589,7 +592,7 @@ static int load_flat_file(struct linux_binprm * bprm,
589 if (IS_ERR_VALUE(result)) { 592 if (IS_ERR_VALUE(result)) {
590 printk("Unable to read data+bss, errno %d\n", (int)-result); 593 printk("Unable to read data+bss, errno %d\n", (int)-result);
591 do_munmap(current->mm, textpos, text_len); 594 do_munmap(current->mm, textpos, text_len);
592 do_munmap(current->mm, realdatastart, data_len + extra); 595 do_munmap(current->mm, realdatastart, len);
593 ret = result; 596 ret = result;
594 goto err; 597 goto err;
595 } 598 }
@@ -876,7 +879,7 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs)
876 stack_len = TOP_OF_ARGS - bprm->p; /* the strings */ 879 stack_len = TOP_OF_ARGS - bprm->p; /* the strings */
877 stack_len += (bprm->argc + 1) * sizeof(char *); /* the argv array */ 880 stack_len += (bprm->argc + 1) * sizeof(char *); /* the argv array */
878 stack_len += (bprm->envc + 1) * sizeof(char *); /* the envp array */ 881 stack_len += (bprm->envc + 1) * sizeof(char *); /* the envp array */
879 stack_len += FLAT_DATA_ALIGN - 1; /* reserve for upcoming alignment */ 882 stack_len += FLAT_STACK_ALIGN - 1; /* reserve for upcoming alignment */
880 883
881 res = load_flat_file(bprm, &libinfo, 0, &stack_len); 884 res = load_flat_file(bprm, &libinfo, 0, &stack_len);
882 if (IS_ERR_VALUE(res)) 885 if (IS_ERR_VALUE(res))
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 7346c96308a5..99d6af811747 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -706,8 +706,13 @@ retry:
706 * @bdev is about to be opened exclusively. Check @bdev can be opened 706 * @bdev is about to be opened exclusively. Check @bdev can be opened
707 * exclusively and mark that an exclusive open is in progress. Each 707 * exclusively and mark that an exclusive open is in progress. Each
708 * successful call to this function must be matched with a call to 708 * successful call to this function must be matched with a call to
709 * either bd_claim() or bd_abort_claiming(). If this function 709 * either bd_finish_claiming() or bd_abort_claiming() (which do not
710 * succeeds, the matching bd_claim() is guaranteed to succeed. 710 * fail).
711 *
712 * This function is used to gain exclusive access to the block device
713 * without actually causing other exclusive open attempts to fail. It
714 * should be used when the open sequence itself requires exclusive
715 * access but may subsequently fail.
711 * 716 *
712 * CONTEXT: 717 * CONTEXT:
713 * Might sleep. 718 * Might sleep.
@@ -734,6 +739,7 @@ static struct block_device *bd_start_claiming(struct block_device *bdev,
734 return ERR_PTR(-ENXIO); 739 return ERR_PTR(-ENXIO);
735 740
736 whole = bdget_disk(disk, 0); 741 whole = bdget_disk(disk, 0);
742 module_put(disk->fops->owner);
737 put_disk(disk); 743 put_disk(disk);
738 if (!whole) 744 if (!whole)
739 return ERR_PTR(-ENOMEM); 745 return ERR_PTR(-ENOMEM);
@@ -782,15 +788,46 @@ static void bd_abort_claiming(struct block_device *whole, void *holder)
782 __bd_abort_claiming(whole, holder); /* releases bdev_lock */ 788 __bd_abort_claiming(whole, holder); /* releases bdev_lock */
783} 789}
784 790
791/* increment holders when we have a legitimate claim. requires bdev_lock */
792static void __bd_claim(struct block_device *bdev, struct block_device *whole,
793 void *holder)
794{
795 /* note that for a whole device bd_holders
796 * will be incremented twice, and bd_holder will
797 * be set to bd_claim before being set to holder
798 */
799 whole->bd_holders++;
800 whole->bd_holder = bd_claim;
801 bdev->bd_holders++;
802 bdev->bd_holder = holder;
803}
804
805/**
806 * bd_finish_claiming - finish claiming a block device
807 * @bdev: block device of interest (passed to bd_start_claiming())
808 * @whole: whole block device returned by bd_start_claiming()
809 * @holder: holder trying to claim @bdev
810 *
811 * Finish a claiming block started by bd_start_claiming().
812 *
813 * CONTEXT:
814 * Grabs and releases bdev_lock.
815 */
816static void bd_finish_claiming(struct block_device *bdev,
817 struct block_device *whole, void *holder)
818{
819 spin_lock(&bdev_lock);
820 BUG_ON(!bd_may_claim(bdev, whole, holder));
821 __bd_claim(bdev, whole, holder);
822 __bd_abort_claiming(whole, holder); /* not actually an abort */
823}
824
785/** 825/**
786 * bd_claim - claim a block device 826 * bd_claim - claim a block device
787 * @bdev: block device to claim 827 * @bdev: block device to claim
788 * @holder: holder trying to claim @bdev 828 * @holder: holder trying to claim @bdev
789 * 829 *
790 * Try to claim @bdev which must have been opened successfully. This 830 * Try to claim @bdev which must have been opened successfully.
791 * function may be called with or without preceding
792 * blk_start_claiming(). In the former case, this function is always
793 * successful and terminates the claiming block.
794 * 831 *
795 * CONTEXT: 832 * CONTEXT:
796 * Might sleep. 833 * Might sleep.
@@ -806,23 +843,10 @@ int bd_claim(struct block_device *bdev, void *holder)
806 might_sleep(); 843 might_sleep();
807 844
808 spin_lock(&bdev_lock); 845 spin_lock(&bdev_lock);
809
810 res = bd_prepare_to_claim(bdev, whole, holder); 846 res = bd_prepare_to_claim(bdev, whole, holder);
811 if (res == 0) { 847 if (res == 0)
812 /* note that for a whole device bd_holders 848 __bd_claim(bdev, whole, holder);
813 * will be incremented twice, and bd_holder will 849 spin_unlock(&bdev_lock);
814 * be set to bd_claim before being set to holder
815 */
816 whole->bd_holders++;
817 whole->bd_holder = bd_claim;
818 bdev->bd_holders++;
819 bdev->bd_holder = holder;
820 }
821
822 if (whole->bd_claiming)
823 __bd_abort_claiming(whole, holder); /* releases bdev_lock */
824 else
825 spin_unlock(&bdev_lock);
826 850
827 return res; 851 return res;
828} 852}
@@ -1476,7 +1500,7 @@ static int blkdev_open(struct inode * inode, struct file * filp)
1476 1500
1477 if (whole) { 1501 if (whole) {
1478 if (res == 0) 1502 if (res == 0)
1479 BUG_ON(bd_claim(bdev, filp) != 0); 1503 bd_finish_claiming(bdev, whole, filp);
1480 else 1504 else
1481 bd_abort_claiming(whole, filp); 1505 bd_abort_claiming(whole, filp);
1482 } 1506 }
@@ -1712,7 +1736,7 @@ struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *h
1712 if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) 1736 if ((mode & FMODE_WRITE) && bdev_read_only(bdev))
1713 goto out_blkdev_put; 1737 goto out_blkdev_put;
1714 1738
1715 BUG_ON(bd_claim(bdev, holder) != 0); 1739 bd_finish_claiming(bdev, whole, holder);
1716 return bdev; 1740 return bdev;
1717 1741
1718out_blkdev_put: 1742out_blkdev_put:
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 8d432cd9d580..2222d161c7b6 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -60,6 +60,8 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
60 size = __btrfs_getxattr(inode, name, value, size); 60 size = __btrfs_getxattr(inode, name, value, size);
61 if (size > 0) { 61 if (size > 0) {
62 acl = posix_acl_from_xattr(value, size); 62 acl = posix_acl_from_xattr(value, size);
63 if (IS_ERR(acl))
64 return acl;
63 set_cached_acl(inode, type, acl); 65 set_cached_acl(inode, type, acl);
64 } 66 }
65 kfree(value); 67 kfree(value);
@@ -160,6 +162,12 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name,
160 int ret; 162 int ret;
161 struct posix_acl *acl = NULL; 163 struct posix_acl *acl = NULL;
162 164
165 if (!is_owner_or_cap(dentry->d_inode))
166 return -EPERM;
167
168 if (!IS_POSIXACL(dentry->d_inode))
169 return -EOPNOTSUPP;
170
163 if (value) { 171 if (value) {
164 acl = posix_acl_from_xattr(value, size); 172 acl = posix_acl_from_xattr(value, size);
165 if (acl == NULL) { 173 if (acl == NULL) {
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 0d1d966b0fe4..c3df14ce2cc2 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -2304,12 +2304,17 @@ noinline int btrfs_leaf_free_space(struct btrfs_root *root,
2304 return ret; 2304 return ret;
2305} 2305}
2306 2306
2307/*
2308 * min slot controls the lowest index we're willing to push to the
2309 * right. We'll push up to and including min_slot, but no lower
2310 */
2307static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, 2311static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
2308 struct btrfs_root *root, 2312 struct btrfs_root *root,
2309 struct btrfs_path *path, 2313 struct btrfs_path *path,
2310 int data_size, int empty, 2314 int data_size, int empty,
2311 struct extent_buffer *right, 2315 struct extent_buffer *right,
2312 int free_space, u32 left_nritems) 2316 int free_space, u32 left_nritems,
2317 u32 min_slot)
2313{ 2318{
2314 struct extent_buffer *left = path->nodes[0]; 2319 struct extent_buffer *left = path->nodes[0];
2315 struct extent_buffer *upper = path->nodes[1]; 2320 struct extent_buffer *upper = path->nodes[1];
@@ -2327,7 +2332,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
2327 if (empty) 2332 if (empty)
2328 nr = 0; 2333 nr = 0;
2329 else 2334 else
2330 nr = 1; 2335 nr = max_t(u32, 1, min_slot);
2331 2336
2332 if (path->slots[0] >= left_nritems) 2337 if (path->slots[0] >= left_nritems)
2333 push_space += data_size; 2338 push_space += data_size;
@@ -2469,10 +2474,14 @@ out_unlock:
2469 * 2474 *
2470 * returns 1 if the push failed because the other node didn't have enough 2475 * returns 1 if the push failed because the other node didn't have enough
2471 * room, 0 if everything worked out and < 0 if there were major errors. 2476 * room, 0 if everything worked out and < 0 if there were major errors.
2477 *
2478 * this will push starting from min_slot to the end of the leaf. It won't
2479 * push any slot lower than min_slot
2472 */ 2480 */
2473static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root 2481static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
2474 *root, struct btrfs_path *path, int data_size, 2482 *root, struct btrfs_path *path,
2475 int empty) 2483 int min_data_size, int data_size,
2484 int empty, u32 min_slot)
2476{ 2485{
2477 struct extent_buffer *left = path->nodes[0]; 2486 struct extent_buffer *left = path->nodes[0];
2478 struct extent_buffer *right; 2487 struct extent_buffer *right;
@@ -2514,8 +2523,8 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
2514 if (left_nritems == 0) 2523 if (left_nritems == 0)
2515 goto out_unlock; 2524 goto out_unlock;
2516 2525
2517 return __push_leaf_right(trans, root, path, data_size, empty, 2526 return __push_leaf_right(trans, root, path, min_data_size, empty,
2518 right, free_space, left_nritems); 2527 right, free_space, left_nritems, min_slot);
2519out_unlock: 2528out_unlock:
2520 btrfs_tree_unlock(right); 2529 btrfs_tree_unlock(right);
2521 free_extent_buffer(right); 2530 free_extent_buffer(right);
@@ -2525,12 +2534,17 @@ out_unlock:
2525/* 2534/*
2526 * push some data in the path leaf to the left, trying to free up at 2535 * push some data in the path leaf to the left, trying to free up at
2527 * least data_size bytes. returns zero if the push worked, nonzero otherwise 2536 * least data_size bytes. returns zero if the push worked, nonzero otherwise
2537 *
2538 * max_slot can put a limit on how far into the leaf we'll push items. The
2539 * item at 'max_slot' won't be touched. Use (u32)-1 to make us do all the
2540 * items
2528 */ 2541 */
2529static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, 2542static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
2530 struct btrfs_root *root, 2543 struct btrfs_root *root,
2531 struct btrfs_path *path, int data_size, 2544 struct btrfs_path *path, int data_size,
2532 int empty, struct extent_buffer *left, 2545 int empty, struct extent_buffer *left,
2533 int free_space, int right_nritems) 2546 int free_space, u32 right_nritems,
2547 u32 max_slot)
2534{ 2548{
2535 struct btrfs_disk_key disk_key; 2549 struct btrfs_disk_key disk_key;
2536 struct extent_buffer *right = path->nodes[0]; 2550 struct extent_buffer *right = path->nodes[0];
@@ -2549,9 +2563,9 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
2549 slot = path->slots[1]; 2563 slot = path->slots[1];
2550 2564
2551 if (empty) 2565 if (empty)
2552 nr = right_nritems; 2566 nr = min(right_nritems, max_slot);
2553 else 2567 else
2554 nr = right_nritems - 1; 2568 nr = min(right_nritems - 1, max_slot);
2555 2569
2556 for (i = 0; i < nr; i++) { 2570 for (i = 0; i < nr; i++) {
2557 item = btrfs_item_nr(right, i); 2571 item = btrfs_item_nr(right, i);
@@ -2712,10 +2726,14 @@ out:
2712/* 2726/*
2713 * push some data in the path leaf to the left, trying to free up at 2727 * push some data in the path leaf to the left, trying to free up at
2714 * least data_size bytes. returns zero if the push worked, nonzero otherwise 2728 * least data_size bytes. returns zero if the push worked, nonzero otherwise
2729 *
2730 * max_slot can put a limit on how far into the leaf we'll push items. The
2731 * item at 'max_slot' won't be touched. Use (u32)-1 to make us push all the
2732 * items
2715 */ 2733 */
2716static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root 2734static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
2717 *root, struct btrfs_path *path, int data_size, 2735 *root, struct btrfs_path *path, int min_data_size,
2718 int empty) 2736 int data_size, int empty, u32 max_slot)
2719{ 2737{
2720 struct extent_buffer *right = path->nodes[0]; 2738 struct extent_buffer *right = path->nodes[0];
2721 struct extent_buffer *left; 2739 struct extent_buffer *left;
@@ -2761,8 +2779,9 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
2761 goto out; 2779 goto out;
2762 } 2780 }
2763 2781
2764 return __push_leaf_left(trans, root, path, data_size, 2782 return __push_leaf_left(trans, root, path, min_data_size,
2765 empty, left, free_space, right_nritems); 2783 empty, left, free_space, right_nritems,
2784 max_slot);
2766out: 2785out:
2767 btrfs_tree_unlock(left); 2786 btrfs_tree_unlock(left);
2768 free_extent_buffer(left); 2787 free_extent_buffer(left);
@@ -2855,6 +2874,64 @@ static noinline int copy_for_split(struct btrfs_trans_handle *trans,
2855} 2874}
2856 2875
2857/* 2876/*
2877 * double splits happen when we need to insert a big item in the middle
2878 * of a leaf. A double split can leave us with 3 mostly empty leaves:
2879 * leaf: [ slots 0 - N] [ our target ] [ N + 1 - total in leaf ]
2880 * A B C
2881 *
2882 * We avoid this by trying to push the items on either side of our target
2883 * into the adjacent leaves. If all goes well we can avoid the double split
2884 * completely.
2885 */
2886static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
2887 struct btrfs_root *root,
2888 struct btrfs_path *path,
2889 int data_size)
2890{
2891 int ret;
2892 int progress = 0;
2893 int slot;
2894 u32 nritems;
2895
2896 slot = path->slots[0];
2897
2898 /*
2899 * try to push all the items after our slot into the
2900 * right leaf
2901 */
2902 ret = push_leaf_right(trans, root, path, 1, data_size, 0, slot);
2903 if (ret < 0)
2904 return ret;
2905
2906 if (ret == 0)
2907 progress++;
2908
2909 nritems = btrfs_header_nritems(path->nodes[0]);
2910 /*
2911 * our goal is to get our slot at the start or end of a leaf. If
2912 * we've done so we're done
2913 */
2914 if (path->slots[0] == 0 || path->slots[0] == nritems)
2915 return 0;
2916
2917 if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size)
2918 return 0;
2919
2920 /* try to push all the items before our slot into the next leaf */
2921 slot = path->slots[0];
2922 ret = push_leaf_left(trans, root, path, 1, data_size, 0, slot);
2923 if (ret < 0)
2924 return ret;
2925
2926 if (ret == 0)
2927 progress++;
2928
2929 if (progress)
2930 return 0;
2931 return 1;
2932}
2933
2934/*
2858 * split the path's leaf in two, making sure there is at least data_size 2935 * split the path's leaf in two, making sure there is at least data_size
2859 * available for the resulting leaf level of the path. 2936 * available for the resulting leaf level of the path.
2860 * 2937 *
@@ -2876,6 +2953,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
2876 int wret; 2953 int wret;
2877 int split; 2954 int split;
2878 int num_doubles = 0; 2955 int num_doubles = 0;
2956 int tried_avoid_double = 0;
2879 2957
2880 l = path->nodes[0]; 2958 l = path->nodes[0];
2881 slot = path->slots[0]; 2959 slot = path->slots[0];
@@ -2884,12 +2962,14 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
2884 return -EOVERFLOW; 2962 return -EOVERFLOW;
2885 2963
2886 /* first try to make some room by pushing left and right */ 2964 /* first try to make some room by pushing left and right */
2887 if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) { 2965 if (data_size) {
2888 wret = push_leaf_right(trans, root, path, data_size, 0); 2966 wret = push_leaf_right(trans, root, path, data_size,
2967 data_size, 0, 0);
2889 if (wret < 0) 2968 if (wret < 0)
2890 return wret; 2969 return wret;
2891 if (wret) { 2970 if (wret) {
2892 wret = push_leaf_left(trans, root, path, data_size, 0); 2971 wret = push_leaf_left(trans, root, path, data_size,
2972 data_size, 0, (u32)-1);
2893 if (wret < 0) 2973 if (wret < 0)
2894 return wret; 2974 return wret;
2895 } 2975 }
@@ -2923,6 +3003,8 @@ again:
2923 if (mid != nritems && 3003 if (mid != nritems &&
2924 leaf_space_used(l, mid, nritems - mid) + 3004 leaf_space_used(l, mid, nritems - mid) +
2925 data_size > BTRFS_LEAF_DATA_SIZE(root)) { 3005 data_size > BTRFS_LEAF_DATA_SIZE(root)) {
3006 if (data_size && !tried_avoid_double)
3007 goto push_for_double;
2926 split = 2; 3008 split = 2;
2927 } 3009 }
2928 } 3010 }
@@ -2939,6 +3021,8 @@ again:
2939 if (mid != nritems && 3021 if (mid != nritems &&
2940 leaf_space_used(l, mid, nritems - mid) + 3022 leaf_space_used(l, mid, nritems - mid) +
2941 data_size > BTRFS_LEAF_DATA_SIZE(root)) { 3023 data_size > BTRFS_LEAF_DATA_SIZE(root)) {
3024 if (data_size && !tried_avoid_double)
3025 goto push_for_double;
2942 split = 2 ; 3026 split = 2 ;
2943 } 3027 }
2944 } 3028 }
@@ -3019,6 +3103,13 @@ again:
3019 } 3103 }
3020 3104
3021 return ret; 3105 return ret;
3106
3107push_for_double:
3108 push_for_double_split(trans, root, path, data_size);
3109 tried_avoid_double = 1;
3110 if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size)
3111 return 0;
3112 goto again;
3022} 3113}
3023 3114
3024static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans, 3115static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
@@ -3915,13 +4006,15 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3915 extent_buffer_get(leaf); 4006 extent_buffer_get(leaf);
3916 4007
3917 btrfs_set_path_blocking(path); 4008 btrfs_set_path_blocking(path);
3918 wret = push_leaf_left(trans, root, path, 1, 1); 4009 wret = push_leaf_left(trans, root, path, 1, 1,
4010 1, (u32)-1);
3919 if (wret < 0 && wret != -ENOSPC) 4011 if (wret < 0 && wret != -ENOSPC)
3920 ret = wret; 4012 ret = wret;
3921 4013
3922 if (path->nodes[0] == leaf && 4014 if (path->nodes[0] == leaf &&
3923 btrfs_header_nritems(leaf)) { 4015 btrfs_header_nritems(leaf)) {
3924 wret = push_leaf_right(trans, root, path, 1, 1); 4016 wret = push_leaf_right(trans, root, path, 1,
4017 1, 1, 0);
3925 if (wret < 0 && wret != -ENOSPC) 4018 if (wret < 0 && wret != -ENOSPC)
3926 ret = wret; 4019 ret = wret;
3927 } 4020 }
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index f3b287c22caf..34f7c375567e 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1941,8 +1941,11 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1941 btrfs_level_size(tree_root, 1941 btrfs_level_size(tree_root,
1942 btrfs_super_log_root_level(disk_super)); 1942 btrfs_super_log_root_level(disk_super));
1943 1943
1944 log_tree_root = kzalloc(sizeof(struct btrfs_root), 1944 log_tree_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS);
1945 GFP_NOFS); 1945 if (!log_tree_root) {
1946 err = -ENOMEM;
1947 goto fail_trans_kthread;
1948 }
1946 1949
1947 __setup_root(nodesize, leafsize, sectorsize, stripesize, 1950 __setup_root(nodesize, leafsize, sectorsize, stripesize,
1948 log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID); 1951 log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID);
@@ -1982,6 +1985,10 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1982 fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location); 1985 fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location);
1983 if (!fs_info->fs_root) 1986 if (!fs_info->fs_root)
1984 goto fail_trans_kthread; 1987 goto fail_trans_kthread;
1988 if (IS_ERR(fs_info->fs_root)) {
1989 err = PTR_ERR(fs_info->fs_root);
1990 goto fail_trans_kthread;
1991 }
1985 1992
1986 if (!(sb->s_flags & MS_RDONLY)) { 1993 if (!(sb->s_flags & MS_RDONLY)) {
1987 down_read(&fs_info->cleanup_work_sem); 1994 down_read(&fs_info->cleanup_work_sem);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index b9080d71991a..32d094002a57 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4360,7 +4360,8 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
4360 4360
4361 block_rsv = get_block_rsv(trans, root); 4361 block_rsv = get_block_rsv(trans, root);
4362 cache = btrfs_lookup_block_group(root->fs_info, buf->start); 4362 cache = btrfs_lookup_block_group(root->fs_info, buf->start);
4363 BUG_ON(block_rsv->space_info != cache->space_info); 4363 if (block_rsv->space_info != cache->space_info)
4364 goto out;
4364 4365
4365 if (btrfs_header_generation(buf) == trans->transid) { 4366 if (btrfs_header_generation(buf) == trans->transid) {
4366 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { 4367 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index a4080c21ec55..d74e6af9b53a 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2594,7 +2594,6 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
2594 .sync_io = wbc->sync_mode == WB_SYNC_ALL, 2594 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
2595 }; 2595 };
2596 struct writeback_control wbc_writepages = { 2596 struct writeback_control wbc_writepages = {
2597 .bdi = wbc->bdi,
2598 .sync_mode = wbc->sync_mode, 2597 .sync_mode = wbc->sync_mode,
2599 .older_than_this = NULL, 2598 .older_than_this = NULL,
2600 .nr_to_write = 64, 2599 .nr_to_write = 64,
@@ -2628,7 +2627,6 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
2628 .sync_io = mode == WB_SYNC_ALL, 2627 .sync_io = mode == WB_SYNC_ALL,
2629 }; 2628 };
2630 struct writeback_control wbc_writepages = { 2629 struct writeback_control wbc_writepages = {
2631 .bdi = inode->i_mapping->backing_dev_info,
2632 .sync_mode = mode, 2630 .sync_mode = mode,
2633 .older_than_this = NULL, 2631 .older_than_this = NULL,
2634 .nr_to_write = nr_pages * 2, 2632 .nr_to_write = nr_pages * 2,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 787b50a16a14..e354c33df082 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1140,7 +1140,7 @@ int btrfs_sync_file(struct file *file, int datasync)
1140 /* 1140 /*
1141 * ok we haven't committed the transaction yet, lets do a commit 1141 * ok we haven't committed the transaction yet, lets do a commit
1142 */ 1142 */
1143 if (file && file->private_data) 1143 if (file->private_data)
1144 btrfs_ioctl_trans_end(file); 1144 btrfs_ioctl_trans_end(file);
1145 1145
1146 trans = btrfs_start_transaction(root, 0); 1146 trans = btrfs_start_transaction(root, 0);
@@ -1190,14 +1190,22 @@ static const struct vm_operations_struct btrfs_file_vm_ops = {
1190 1190
1191static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) 1191static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma)
1192{ 1192{
1193 vma->vm_ops = &btrfs_file_vm_ops; 1193 struct address_space *mapping = filp->f_mapping;
1194
1195 if (!mapping->a_ops->readpage)
1196 return -ENOEXEC;
1197
1194 file_accessed(filp); 1198 file_accessed(filp);
1199 vma->vm_ops = &btrfs_file_vm_ops;
1200 vma->vm_flags |= VM_CAN_NONLINEAR;
1201
1195 return 0; 1202 return 0;
1196} 1203}
1197 1204
1198const struct file_operations btrfs_file_operations = { 1205const struct file_operations btrfs_file_operations = {
1199 .llseek = generic_file_llseek, 1206 .llseek = generic_file_llseek,
1200 .read = do_sync_read, 1207 .read = do_sync_read,
1208 .write = do_sync_write,
1201 .aio_read = generic_file_aio_read, 1209 .aio_read = generic_file_aio_read,
1202 .splice_read = generic_file_splice_read, 1210 .splice_read = generic_file_splice_read,
1203 .aio_write = btrfs_file_aio_write, 1211 .aio_write = btrfs_file_aio_write,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index fa6ccc1bfe2a..1bff92ad4744 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2673,7 +2673,7 @@ static int check_path_shared(struct btrfs_root *root,
2673 struct extent_buffer *eb; 2673 struct extent_buffer *eb;
2674 int level; 2674 int level;
2675 int ret; 2675 int ret;
2676 u64 refs; 2676 u64 refs = 1;
2677 2677
2678 for (level = 0; level < BTRFS_MAX_LEVEL; level++) { 2678 for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
2679 if (!path->nodes[level]) 2679 if (!path->nodes[level])
@@ -6884,7 +6884,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
6884 if (em->block_start == EXTENT_MAP_HOLE || 6884 if (em->block_start == EXTENT_MAP_HOLE ||
6885 (cur_offset >= inode->i_size && 6885 (cur_offset >= inode->i_size &&
6886 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { 6886 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
6887 ret = btrfs_prealloc_file_range(inode, 0, cur_offset, 6887 ret = btrfs_prealloc_file_range(inode, mode, cur_offset,
6888 last_byte - cur_offset, 6888 last_byte - cur_offset,
6889 1 << inode->i_blkbits, 6889 1 << inode->i_blkbits,
6890 offset + len, 6890 offset + len,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 4cdb98cf26de..9254b3d58dbe 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1280,7 +1280,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
1280 trans = btrfs_start_transaction(root, 0); 1280 trans = btrfs_start_transaction(root, 0);
1281 if (IS_ERR(trans)) { 1281 if (IS_ERR(trans)) {
1282 err = PTR_ERR(trans); 1282 err = PTR_ERR(trans);
1283 goto out; 1283 goto out_up_write;
1284 } 1284 }
1285 trans->block_rsv = &root->fs_info->global_block_rsv; 1285 trans->block_rsv = &root->fs_info->global_block_rsv;
1286 1286
@@ -1458,7 +1458,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1458 */ 1458 */
1459 1459
1460 /* the destination must be opened for writing */ 1460 /* the destination must be opened for writing */
1461 if (!(file->f_mode & FMODE_WRITE)) 1461 if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND))
1462 return -EINVAL; 1462 return -EINVAL;
1463 1463
1464 ret = mnt_want_write(file->f_path.mnt); 1464 ret = mnt_want_write(file->f_path.mnt);
@@ -1511,7 +1511,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1511 1511
1512 /* determine range to clone */ 1512 /* determine range to clone */
1513 ret = -EINVAL; 1513 ret = -EINVAL;
1514 if (off >= src->i_size || off + len > src->i_size) 1514 if (off + len > src->i_size || off + len < off)
1515 goto out_unlock; 1515 goto out_unlock;
1516 if (len == 0) 1516 if (len == 0)
1517 olen = len = src->i_size - off; 1517 olen = len = src->i_size - off;
@@ -1578,6 +1578,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1578 u64 disko = 0, diskl = 0; 1578 u64 disko = 0, diskl = 0;
1579 u64 datao = 0, datal = 0; 1579 u64 datao = 0, datal = 0;
1580 u8 comp; 1580 u8 comp;
1581 u64 endoff;
1581 1582
1582 size = btrfs_item_size_nr(leaf, slot); 1583 size = btrfs_item_size_nr(leaf, slot);
1583 read_extent_buffer(leaf, buf, 1584 read_extent_buffer(leaf, buf,
@@ -1712,9 +1713,18 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1712 btrfs_release_path(root, path); 1713 btrfs_release_path(root, path);
1713 1714
1714 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 1715 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
1715 if (new_key.offset + datal > inode->i_size) 1716
1716 btrfs_i_size_write(inode, 1717 /*
1717 new_key.offset + datal); 1718 * we round up to the block size at eof when
1719 * determining which extents to clone above,
1720 * but shouldn't round up the file size
1721 */
1722 endoff = new_key.offset + datal;
1723 if (endoff > off+olen)
1724 endoff = off+olen;
1725 if (endoff > inode->i_size)
1726 btrfs_i_size_write(inode, endoff);
1727
1718 BTRFS_I(inode)->flags = BTRFS_I(src)->flags; 1728 BTRFS_I(inode)->flags = BTRFS_I(src)->flags;
1719 ret = btrfs_update_inode(trans, root, inode); 1729 ret = btrfs_update_inode(trans, root, inode);
1720 BUG_ON(ret); 1730 BUG_ON(ret);
@@ -1845,7 +1855,7 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
1845 dir_id = btrfs_super_root_dir(&root->fs_info->super_copy); 1855 dir_id = btrfs_super_root_dir(&root->fs_info->super_copy);
1846 di = btrfs_lookup_dir_item(trans, root->fs_info->tree_root, path, 1856 di = btrfs_lookup_dir_item(trans, root->fs_info->tree_root, path,
1847 dir_id, "default", 7, 1); 1857 dir_id, "default", 7, 1);
1848 if (!di) { 1858 if (IS_ERR_OR_NULL(di)) {
1849 btrfs_free_path(path); 1859 btrfs_free_path(path);
1850 btrfs_end_transaction(trans, root); 1860 btrfs_end_transaction(trans, root);
1851 printk(KERN_ERR "Umm, you don't have the default dir item, " 1861 printk(KERN_ERR "Umm, you don't have the default dir item, "
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 05d41e569236..b37d723b9d4a 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -784,16 +784,17 @@ again:
784 struct btrfs_extent_ref_v0 *ref0; 784 struct btrfs_extent_ref_v0 *ref0;
785 ref0 = btrfs_item_ptr(eb, path1->slots[0], 785 ref0 = btrfs_item_ptr(eb, path1->slots[0],
786 struct btrfs_extent_ref_v0); 786 struct btrfs_extent_ref_v0);
787 root = find_tree_root(rc, eb, ref0);
788 if (!root->ref_cows)
789 cur->cowonly = 1;
790 if (key.objectid == key.offset) { 787 if (key.objectid == key.offset) {
788 root = find_tree_root(rc, eb, ref0);
791 if (root && !should_ignore_root(root)) 789 if (root && !should_ignore_root(root))
792 cur->root = root; 790 cur->root = root;
793 else 791 else
794 list_add(&cur->list, &useless); 792 list_add(&cur->list, &useless);
795 break; 793 break;
796 } 794 }
795 if (is_cowonly_root(btrfs_ref_root_v0(eb,
796 ref0)))
797 cur->cowonly = 1;
797 } 798 }
798#else 799#else
799 BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY); 800 BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index b91ccd972644..2d958be761c8 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -330,7 +330,6 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
330{ 330{
331 struct btrfs_path *path; 331 struct btrfs_path *path;
332 int ret; 332 int ret;
333 u32 refs;
334 struct btrfs_root_item *ri; 333 struct btrfs_root_item *ri;
335 struct extent_buffer *leaf; 334 struct extent_buffer *leaf;
336 335
@@ -344,8 +343,6 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
344 leaf = path->nodes[0]; 343 leaf = path->nodes[0];
345 ri = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_item); 344 ri = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_item);
346 345
347 refs = btrfs_disk_root_refs(leaf, ri);
348 BUG_ON(refs != 0);
349 ret = btrfs_del_item(trans, root, path); 346 ret = btrfs_del_item(trans, root, path);
350out: 347out:
351 btrfs_free_path(path); 348 btrfs_free_path(path);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index d34b2dfc9628..f2393b390318 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -360,6 +360,8 @@ static struct dentry *get_default_root(struct super_block *sb,
360 */ 360 */
361 dir_id = btrfs_super_root_dir(&root->fs_info->super_copy); 361 dir_id = btrfs_super_root_dir(&root->fs_info->super_copy);
362 di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0); 362 di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0);
363 if (IS_ERR(di))
364 return ERR_CAST(di);
363 if (!di) { 365 if (!di) {
364 /* 366 /*
365 * Ok the default dir item isn't there. This is weird since 367 * Ok the default dir item isn't there. This is weird since
@@ -390,8 +392,8 @@ setup_root:
390 location.offset = 0; 392 location.offset = 0;
391 393
392 inode = btrfs_iget(sb, &location, new_root, &new); 394 inode = btrfs_iget(sb, &location, new_root, &new);
393 if (!inode) 395 if (IS_ERR(inode))
394 return ERR_PTR(-ENOMEM); 396 return ERR_CAST(inode);
395 397
396 /* 398 /*
397 * If we're just mounting the root most subvol put the inode and return 399 * If we're just mounting the root most subvol put the inode and return
diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c
index 83d4d2785ffe..3fe49042d8ad 100644
--- a/fs/ceph/auth_x.c
+++ b/fs/ceph/auth_x.c
@@ -493,7 +493,7 @@ static int ceph_x_handle_reply(struct ceph_auth_client *ac, int result,
493 return -EAGAIN; 493 return -EAGAIN;
494 } 494 }
495 495
496 op = le32_to_cpu(head->op); 496 op = le16_to_cpu(head->op);
497 result = le32_to_cpu(head->result); 497 result = le32_to_cpu(head->result);
498 dout("handle_reply op %d result %d\n", op, result); 498 dout("handle_reply op %d result %d\n", op, result);
499 switch (op) { 499 switch (op) {
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index ae3e3a306445..74144d6389f0 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -244,8 +244,14 @@ static struct ceph_cap *get_cap(struct ceph_cap_reservation *ctx)
244 struct ceph_cap *cap = NULL; 244 struct ceph_cap *cap = NULL;
245 245
246 /* temporary, until we do something about cap import/export */ 246 /* temporary, until we do something about cap import/export */
247 if (!ctx) 247 if (!ctx) {
248 return kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS); 248 cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS);
249 if (cap) {
250 caps_use_count++;
251 caps_total_count++;
252 }
253 return cap;
254 }
249 255
250 spin_lock(&caps_list_lock); 256 spin_lock(&caps_list_lock);
251 dout("get_cap ctx=%p (%d) %d = %d used + %d resv + %d avail\n", 257 dout("get_cap ctx=%p (%d) %d = %d used + %d resv + %d avail\n",
@@ -981,6 +987,46 @@ static int send_cap_msg(struct ceph_mds_session *session,
981 return 0; 987 return 0;
982} 988}
983 989
990static void __queue_cap_release(struct ceph_mds_session *session,
991 u64 ino, u64 cap_id, u32 migrate_seq,
992 u32 issue_seq)
993{
994 struct ceph_msg *msg;
995 struct ceph_mds_cap_release *head;
996 struct ceph_mds_cap_item *item;
997
998 spin_lock(&session->s_cap_lock);
999 BUG_ON(!session->s_num_cap_releases);
1000 msg = list_first_entry(&session->s_cap_releases,
1001 struct ceph_msg, list_head);
1002
1003 dout(" adding %llx release to mds%d msg %p (%d left)\n",
1004 ino, session->s_mds, msg, session->s_num_cap_releases);
1005
1006 BUG_ON(msg->front.iov_len + sizeof(*item) > PAGE_CACHE_SIZE);
1007 head = msg->front.iov_base;
1008 head->num = cpu_to_le32(le32_to_cpu(head->num) + 1);
1009 item = msg->front.iov_base + msg->front.iov_len;
1010 item->ino = cpu_to_le64(ino);
1011 item->cap_id = cpu_to_le64(cap_id);
1012 item->migrate_seq = cpu_to_le32(migrate_seq);
1013 item->seq = cpu_to_le32(issue_seq);
1014
1015 session->s_num_cap_releases--;
1016
1017 msg->front.iov_len += sizeof(*item);
1018 if (le32_to_cpu(head->num) == CEPH_CAPS_PER_RELEASE) {
1019 dout(" release msg %p full\n", msg);
1020 list_move_tail(&msg->list_head, &session->s_cap_releases_done);
1021 } else {
1022 dout(" release msg %p at %d/%d (%d)\n", msg,
1023 (int)le32_to_cpu(head->num),
1024 (int)CEPH_CAPS_PER_RELEASE,
1025 (int)msg->front.iov_len);
1026 }
1027 spin_unlock(&session->s_cap_lock);
1028}
1029
984/* 1030/*
985 * Queue cap releases when an inode is dropped from our cache. Since 1031 * Queue cap releases when an inode is dropped from our cache. Since
986 * inode is about to be destroyed, there is no need for i_lock. 1032 * inode is about to be destroyed, there is no need for i_lock.
@@ -994,41 +1040,9 @@ void ceph_queue_caps_release(struct inode *inode)
994 while (p) { 1040 while (p) {
995 struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node); 1041 struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node);
996 struct ceph_mds_session *session = cap->session; 1042 struct ceph_mds_session *session = cap->session;
997 struct ceph_msg *msg;
998 struct ceph_mds_cap_release *head;
999 struct ceph_mds_cap_item *item;
1000 1043
1001 spin_lock(&session->s_cap_lock); 1044 __queue_cap_release(session, ceph_ino(inode), cap->cap_id,
1002 BUG_ON(!session->s_num_cap_releases); 1045 cap->mseq, cap->issue_seq);
1003 msg = list_first_entry(&session->s_cap_releases,
1004 struct ceph_msg, list_head);
1005
1006 dout(" adding %p release to mds%d msg %p (%d left)\n",
1007 inode, session->s_mds, msg, session->s_num_cap_releases);
1008
1009 BUG_ON(msg->front.iov_len + sizeof(*item) > PAGE_CACHE_SIZE);
1010 head = msg->front.iov_base;
1011 head->num = cpu_to_le32(le32_to_cpu(head->num) + 1);
1012 item = msg->front.iov_base + msg->front.iov_len;
1013 item->ino = cpu_to_le64(ceph_ino(inode));
1014 item->cap_id = cpu_to_le64(cap->cap_id);
1015 item->migrate_seq = cpu_to_le32(cap->mseq);
1016 item->seq = cpu_to_le32(cap->issue_seq);
1017
1018 session->s_num_cap_releases--;
1019
1020 msg->front.iov_len += sizeof(*item);
1021 if (le32_to_cpu(head->num) == CEPH_CAPS_PER_RELEASE) {
1022 dout(" release msg %p full\n", msg);
1023 list_move_tail(&msg->list_head,
1024 &session->s_cap_releases_done);
1025 } else {
1026 dout(" release msg %p at %d/%d (%d)\n", msg,
1027 (int)le32_to_cpu(head->num),
1028 (int)CEPH_CAPS_PER_RELEASE,
1029 (int)msg->front.iov_len);
1030 }
1031 spin_unlock(&session->s_cap_lock);
1032 p = rb_next(p); 1046 p = rb_next(p);
1033 __ceph_remove_cap(cap); 1047 __ceph_remove_cap(cap);
1034 } 1048 }
@@ -2655,7 +2669,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2655 struct ceph_mds_caps *h; 2669 struct ceph_mds_caps *h;
2656 int mds = session->s_mds; 2670 int mds = session->s_mds;
2657 int op; 2671 int op;
2658 u32 seq; 2672 u32 seq, mseq;
2659 struct ceph_vino vino; 2673 struct ceph_vino vino;
2660 u64 cap_id; 2674 u64 cap_id;
2661 u64 size, max_size; 2675 u64 size, max_size;
@@ -2675,6 +2689,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2675 vino.snap = CEPH_NOSNAP; 2689 vino.snap = CEPH_NOSNAP;
2676 cap_id = le64_to_cpu(h->cap_id); 2690 cap_id = le64_to_cpu(h->cap_id);
2677 seq = le32_to_cpu(h->seq); 2691 seq = le32_to_cpu(h->seq);
2692 mseq = le32_to_cpu(h->migrate_seq);
2678 size = le64_to_cpu(h->size); 2693 size = le64_to_cpu(h->size);
2679 max_size = le64_to_cpu(h->max_size); 2694 max_size = le64_to_cpu(h->max_size);
2680 2695
@@ -2689,6 +2704,18 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2689 vino.snap, inode); 2704 vino.snap, inode);
2690 if (!inode) { 2705 if (!inode) {
2691 dout(" i don't have ino %llx\n", vino.ino); 2706 dout(" i don't have ino %llx\n", vino.ino);
2707
2708 if (op == CEPH_CAP_OP_IMPORT)
2709 __queue_cap_release(session, vino.ino, cap_id,
2710 mseq, seq);
2711
2712 /*
2713 * send any full release message to try to move things
2714 * along for the mds (who clearly thinks we still have this
2715 * cap).
2716 */
2717 ceph_add_cap_releases(mdsc, session, -1);
2718 ceph_send_cap_releases(mdsc, session);
2692 goto done; 2719 goto done;
2693 } 2720 }
2694 2721
@@ -2714,7 +2741,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2714 spin_lock(&inode->i_lock); 2741 spin_lock(&inode->i_lock);
2715 cap = __get_cap_for_mds(ceph_inode(inode), mds); 2742 cap = __get_cap_for_mds(ceph_inode(inode), mds);
2716 if (!cap) { 2743 if (!cap) {
2717 dout("no cap on %p ino %llx.%llx from mds%d, releasing\n", 2744 dout(" no cap on %p ino %llx.%llx from mds%d\n",
2718 inode, ceph_ino(inode), ceph_snap(inode), mds); 2745 inode, ceph_ino(inode), ceph_snap(inode), mds);
2719 spin_unlock(&inode->i_lock); 2746 spin_unlock(&inode->i_lock);
2720 goto done; 2747 goto done;
@@ -2865,18 +2892,19 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
2865 struct ceph_inode_info *ci = ceph_inode(inode); 2892 struct ceph_inode_info *ci = ceph_inode(inode);
2866 struct ceph_cap *cap; 2893 struct ceph_cap *cap;
2867 struct ceph_mds_request_release *rel = *p; 2894 struct ceph_mds_request_release *rel = *p;
2895 int used, dirty;
2868 int ret = 0; 2896 int ret = 0;
2869 int used = 0;
2870 2897
2871 spin_lock(&inode->i_lock); 2898 spin_lock(&inode->i_lock);
2872 used = __ceph_caps_used(ci); 2899 used = __ceph_caps_used(ci);
2900 dirty = __ceph_caps_dirty(ci);
2873 2901
2874 dout("encode_inode_release %p mds%d used %s drop %s unless %s\n", inode, 2902 dout("encode_inode_release %p mds%d used|dirty %s drop %s unless %s\n",
2875 mds, ceph_cap_string(used), ceph_cap_string(drop), 2903 inode, mds, ceph_cap_string(used|dirty), ceph_cap_string(drop),
2876 ceph_cap_string(unless)); 2904 ceph_cap_string(unless));
2877 2905
2878 /* only drop unused caps */ 2906 /* only drop unused, clean caps */
2879 drop &= ~used; 2907 drop &= ~(used | dirty);
2880 2908
2881 cap = __get_cap_for_mds(ci, mds); 2909 cap = __get_cap_for_mds(ci, mds);
2882 if (cap && __cap_is_valid(cap)) { 2910 if (cap && __cap_is_valid(cap)) {
diff --git a/fs/ceph/crush/mapper.c b/fs/ceph/crush/mapper.c
index 9ba54efb6543..a4eec133258e 100644
--- a/fs/ceph/crush/mapper.c
+++ b/fs/ceph/crush/mapper.c
@@ -238,7 +238,7 @@ static int bucket_straw_choose(struct crush_bucket_straw *bucket,
238 238
239static int crush_bucket_choose(struct crush_bucket *in, int x, int r) 239static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
240{ 240{
241 dprintk("choose %d x=%d r=%d\n", in->id, x, r); 241 dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r);
242 switch (in->alg) { 242 switch (in->alg) {
243 case CRUSH_BUCKET_UNIFORM: 243 case CRUSH_BUCKET_UNIFORM:
244 return bucket_uniform_choose((struct crush_bucket_uniform *)in, 244 return bucket_uniform_choose((struct crush_bucket_uniform *)in,
@@ -264,7 +264,7 @@ static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
264 */ 264 */
265static int is_out(struct crush_map *map, __u32 *weight, int item, int x) 265static int is_out(struct crush_map *map, __u32 *weight, int item, int x)
266{ 266{
267 if (weight[item] >= 0x1000) 267 if (weight[item] >= 0x10000)
268 return 0; 268 return 0;
269 if (weight[item] == 0) 269 if (weight[item] == 0)
270 return 1; 270 return 1;
@@ -305,7 +305,9 @@ static int crush_choose(struct crush_map *map,
305 int itemtype; 305 int itemtype;
306 int collide, reject; 306 int collide, reject;
307 const int orig_tries = 5; /* attempts before we fall back to search */ 307 const int orig_tries = 5; /* attempts before we fall back to search */
308 dprintk("choose bucket %d x %d outpos %d\n", bucket->id, x, outpos); 308
309 dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d\n", recurse_to_leaf ? "_LEAF" : "",
310 bucket->id, x, outpos, numrep);
309 311
310 for (rep = outpos; rep < numrep; rep++) { 312 for (rep = outpos; rep < numrep; rep++) {
311 /* keep trying until we get a non-out, non-colliding item */ 313 /* keep trying until we get a non-out, non-colliding item */
@@ -366,6 +368,7 @@ static int crush_choose(struct crush_map *map,
366 BUG_ON(item >= 0 || 368 BUG_ON(item >= 0 ||
367 (-1-item) >= map->max_buckets); 369 (-1-item) >= map->max_buckets);
368 in = map->buckets[-1-item]; 370 in = map->buckets[-1-item];
371 retry_bucket = 1;
369 continue; 372 continue;
370 } 373 }
371 374
@@ -377,15 +380,25 @@ static int crush_choose(struct crush_map *map,
377 } 380 }
378 } 381 }
379 382
380 if (recurse_to_leaf && 383 reject = 0;
381 item < 0 && 384 if (recurse_to_leaf) {
382 crush_choose(map, map->buckets[-1-item], 385 if (item < 0) {
383 weight, 386 if (crush_choose(map,
384 x, outpos+1, 0, 387 map->buckets[-1-item],
385 out2, outpos, 388 weight,
386 firstn, 0, NULL) <= outpos) { 389 x, outpos+1, 0,
387 reject = 1; 390 out2, outpos,
388 } else { 391 firstn, 0,
392 NULL) <= outpos)
393 /* didn't get leaf */
394 reject = 1;
395 } else {
396 /* we already have a leaf! */
397 out2[outpos] = item;
398 }
399 }
400
401 if (!reject) {
389 /* out? */ 402 /* out? */
390 if (itemtype == 0) 403 if (itemtype == 0)
391 reject = is_out(map, weight, 404 reject = is_out(map, weight,
@@ -424,12 +437,12 @@ reject:
424 continue; 437 continue;
425 } 438 }
426 439
427 dprintk("choose got %d\n", item); 440 dprintk("CHOOSE got %d\n", item);
428 out[outpos] = item; 441 out[outpos] = item;
429 outpos++; 442 outpos++;
430 } 443 }
431 444
432 dprintk("choose returns %d\n", outpos); 445 dprintk("CHOOSE returns %d\n", outpos);
433 return outpos; 446 return outpos;
434} 447}
435 448
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 3be33fb066cc..f2f5332ddbba 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -261,7 +261,7 @@ static int osdc_show(struct seq_file *s, void *pp)
261 261
262static int caps_show(struct seq_file *s, void *p) 262static int caps_show(struct seq_file *s, void *p)
263{ 263{
264 struct ceph_client *client = p; 264 struct ceph_client *client = s->private;
265 int total, avail, used, reserved, min; 265 int total, avail, used, reserved, min;
266 266
267 ceph_reservation_status(client, &total, &avail, &used, &reserved, &min); 267 ceph_reservation_status(client, &total, &avail, &used, &reserved, &min);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 226f5a50d362..8f9b9fe8ef9f 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -827,7 +827,7 @@ static void ceph_set_dentry_offset(struct dentry *dn)
827 827
828 spin_lock(&dcache_lock); 828 spin_lock(&dcache_lock);
829 spin_lock(&dn->d_lock); 829 spin_lock(&dn->d_lock);
830 list_move_tail(&dir->d_subdirs, &dn->d_u.d_child); 830 list_move(&dn->d_u.d_child, &dir->d_subdirs);
831 dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset, 831 dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset,
832 dn->d_u.d_child.prev, dn->d_u.d_child.next); 832 dn->d_u.d_child.prev, dn->d_u.d_child.next);
833 spin_unlock(&dn->d_lock); 833 spin_unlock(&dn->d_lock);
@@ -854,8 +854,8 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
854 d_drop(dn); 854 d_drop(dn);
855 realdn = d_materialise_unique(dn, in); 855 realdn = d_materialise_unique(dn, in);
856 if (IS_ERR(realdn)) { 856 if (IS_ERR(realdn)) {
857 pr_err("splice_dentry error %p inode %p ino %llx.%llx\n", 857 pr_err("splice_dentry error %ld %p inode %p ino %llx.%llx\n",
858 dn, in, ceph_vinop(in)); 858 PTR_ERR(realdn), dn, in, ceph_vinop(in));
859 if (prehash) 859 if (prehash)
860 *prehash = false; /* don't rehash on error */ 860 *prehash = false; /* don't rehash on error */
861 dn = realdn; /* note realdn contains the error */ 861 dn = realdn; /* note realdn contains the error */
@@ -1234,18 +1234,23 @@ retry_lookup:
1234 goto out; 1234 goto out;
1235 } 1235 }
1236 dn = splice_dentry(dn, in, NULL); 1236 dn = splice_dentry(dn, in, NULL);
1237 if (IS_ERR(dn))
1238 dn = NULL;
1237 } 1239 }
1238 1240
1239 if (fill_inode(in, &rinfo->dir_in[i], NULL, session, 1241 if (fill_inode(in, &rinfo->dir_in[i], NULL, session,
1240 req->r_request_started, -1, 1242 req->r_request_started, -1,
1241 &req->r_caps_reservation) < 0) { 1243 &req->r_caps_reservation) < 0) {
1242 pr_err("fill_inode badness on %p\n", in); 1244 pr_err("fill_inode badness on %p\n", in);
1243 dput(dn); 1245 goto next_item;
1244 continue;
1245 } 1246 }
1246 update_dentry_lease(dn, rinfo->dir_dlease[i], 1247 if (dn)
1247 req->r_session, req->r_request_started); 1248 update_dentry_lease(dn, rinfo->dir_dlease[i],
1248 dput(dn); 1249 req->r_session,
1250 req->r_request_started);
1251next_item:
1252 if (dn)
1253 dput(dn);
1249 } 1254 }
1250 req->r_did_prepopulate = true; 1255 req->r_did_prepopulate = true;
1251 1256
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index b49f12822cbc..3ab79f6c4ce8 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1066,9 +1066,9 @@ static int trim_caps(struct ceph_mds_client *mdsc,
1066 * 1066 *
1067 * Called under s_mutex. 1067 * Called under s_mutex.
1068 */ 1068 */
1069static int add_cap_releases(struct ceph_mds_client *mdsc, 1069int ceph_add_cap_releases(struct ceph_mds_client *mdsc,
1070 struct ceph_mds_session *session, 1070 struct ceph_mds_session *session,
1071 int extra) 1071 int extra)
1072{ 1072{
1073 struct ceph_msg *msg; 1073 struct ceph_msg *msg;
1074 struct ceph_mds_cap_release *head; 1074 struct ceph_mds_cap_release *head;
@@ -1176,8 +1176,8 @@ static int check_cap_flush(struct ceph_mds_client *mdsc, u64 want_flush_seq)
1176/* 1176/*
1177 * called under s_mutex 1177 * called under s_mutex
1178 */ 1178 */
1179static void send_cap_releases(struct ceph_mds_client *mdsc, 1179void ceph_send_cap_releases(struct ceph_mds_client *mdsc,
1180 struct ceph_mds_session *session) 1180 struct ceph_mds_session *session)
1181{ 1181{
1182 struct ceph_msg *msg; 1182 struct ceph_msg *msg;
1183 1183
@@ -1980,7 +1980,7 @@ out_err:
1980 } 1980 }
1981 mutex_unlock(&mdsc->mutex); 1981 mutex_unlock(&mdsc->mutex);
1982 1982
1983 add_cap_releases(mdsc, req->r_session, -1); 1983 ceph_add_cap_releases(mdsc, req->r_session, -1);
1984 mutex_unlock(&session->s_mutex); 1984 mutex_unlock(&session->s_mutex);
1985 1985
1986 /* kick calling process */ 1986 /* kick calling process */
@@ -2433,6 +2433,7 @@ static void handle_lease(struct ceph_mds_client *mdsc,
2433 struct ceph_dentry_info *di; 2433 struct ceph_dentry_info *di;
2434 int mds = session->s_mds; 2434 int mds = session->s_mds;
2435 struct ceph_mds_lease *h = msg->front.iov_base; 2435 struct ceph_mds_lease *h = msg->front.iov_base;
2436 u32 seq;
2436 struct ceph_vino vino; 2437 struct ceph_vino vino;
2437 int mask; 2438 int mask;
2438 struct qstr dname; 2439 struct qstr dname;
@@ -2446,6 +2447,7 @@ static void handle_lease(struct ceph_mds_client *mdsc,
2446 vino.ino = le64_to_cpu(h->ino); 2447 vino.ino = le64_to_cpu(h->ino);
2447 vino.snap = CEPH_NOSNAP; 2448 vino.snap = CEPH_NOSNAP;
2448 mask = le16_to_cpu(h->mask); 2449 mask = le16_to_cpu(h->mask);
2450 seq = le32_to_cpu(h->seq);
2449 dname.name = (void *)h + sizeof(*h) + sizeof(u32); 2451 dname.name = (void *)h + sizeof(*h) + sizeof(u32);
2450 dname.len = msg->front.iov_len - sizeof(*h) - sizeof(u32); 2452 dname.len = msg->front.iov_len - sizeof(*h) - sizeof(u32);
2451 if (dname.len != get_unaligned_le32(h+1)) 2453 if (dname.len != get_unaligned_le32(h+1))
@@ -2456,8 +2458,9 @@ static void handle_lease(struct ceph_mds_client *mdsc,
2456 2458
2457 /* lookup inode */ 2459 /* lookup inode */
2458 inode = ceph_find_inode(sb, vino); 2460 inode = ceph_find_inode(sb, vino);
2459 dout("handle_lease '%s', mask %d, ino %llx %p\n", 2461 dout("handle_lease %s, mask %d, ino %llx %p %.*s\n",
2460 ceph_lease_op_name(h->action), mask, vino.ino, inode); 2462 ceph_lease_op_name(h->action), mask, vino.ino, inode,
2463 dname.len, dname.name);
2461 if (inode == NULL) { 2464 if (inode == NULL) {
2462 dout("handle_lease no inode %llx\n", vino.ino); 2465 dout("handle_lease no inode %llx\n", vino.ino);
2463 goto release; 2466 goto release;
@@ -2482,7 +2485,8 @@ static void handle_lease(struct ceph_mds_client *mdsc,
2482 switch (h->action) { 2485 switch (h->action) {
2483 case CEPH_MDS_LEASE_REVOKE: 2486 case CEPH_MDS_LEASE_REVOKE:
2484 if (di && di->lease_session == session) { 2487 if (di && di->lease_session == session) {
2485 h->seq = cpu_to_le32(di->lease_seq); 2488 if (ceph_seq_cmp(di->lease_seq, seq) > 0)
2489 h->seq = cpu_to_le32(di->lease_seq);
2486 __ceph_mdsc_drop_dentry_lease(dentry); 2490 __ceph_mdsc_drop_dentry_lease(dentry);
2487 } 2491 }
2488 release = 1; 2492 release = 1;
@@ -2496,7 +2500,7 @@ static void handle_lease(struct ceph_mds_client *mdsc,
2496 unsigned long duration = 2500 unsigned long duration =
2497 le32_to_cpu(h->duration_ms) * HZ / 1000; 2501 le32_to_cpu(h->duration_ms) * HZ / 1000;
2498 2502
2499 di->lease_seq = le32_to_cpu(h->seq); 2503 di->lease_seq = seq;
2500 dentry->d_time = di->lease_renew_from + duration; 2504 dentry->d_time = di->lease_renew_from + duration;
2501 di->lease_renew_after = di->lease_renew_from + 2505 di->lease_renew_after = di->lease_renew_from +
2502 (duration >> 1); 2506 (duration >> 1);
@@ -2686,10 +2690,10 @@ static void delayed_work(struct work_struct *work)
2686 send_renew_caps(mdsc, s); 2690 send_renew_caps(mdsc, s);
2687 else 2691 else
2688 ceph_con_keepalive(&s->s_con); 2692 ceph_con_keepalive(&s->s_con);
2689 add_cap_releases(mdsc, s, -1); 2693 ceph_add_cap_releases(mdsc, s, -1);
2690 if (s->s_state == CEPH_MDS_SESSION_OPEN || 2694 if (s->s_state == CEPH_MDS_SESSION_OPEN ||
2691 s->s_state == CEPH_MDS_SESSION_HUNG) 2695 s->s_state == CEPH_MDS_SESSION_HUNG)
2692 send_cap_releases(mdsc, s); 2696 ceph_send_cap_releases(mdsc, s);
2693 mutex_unlock(&s->s_mutex); 2697 mutex_unlock(&s->s_mutex);
2694 ceph_put_mds_session(s); 2698 ceph_put_mds_session(s);
2695 2699
@@ -2779,6 +2783,12 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc)
2779 drop_leases(mdsc); 2783 drop_leases(mdsc);
2780 ceph_flush_dirty_caps(mdsc); 2784 ceph_flush_dirty_caps(mdsc);
2781 wait_requests(mdsc); 2785 wait_requests(mdsc);
2786
2787 /*
2788 * wait for reply handlers to drop their request refs and
2789 * their inode/dcache refs
2790 */
2791 ceph_msgr_flush();
2782} 2792}
2783 2793
2784/* 2794/*
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index d9936c4f1212..b292fa42a66d 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -322,6 +322,12 @@ static inline void ceph_mdsc_put_request(struct ceph_mds_request *req)
322 kref_put(&req->r_kref, ceph_mdsc_release_request); 322 kref_put(&req->r_kref, ceph_mdsc_release_request);
323} 323}
324 324
325extern int ceph_add_cap_releases(struct ceph_mds_client *mdsc,
326 struct ceph_mds_session *session,
327 int extra);
328extern void ceph_send_cap_releases(struct ceph_mds_client *mdsc,
329 struct ceph_mds_session *session);
330
325extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc); 331extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc);
326 332
327extern char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base, 333extern char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base,
diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c
index 64b8b1f7863d..9ad43a310a41 100644
--- a/fs/ceph/messenger.c
+++ b/fs/ceph/messenger.c
@@ -657,7 +657,7 @@ static void prepare_write_connect(struct ceph_messenger *msgr,
657 dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con, 657 dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con,
658 con->connect_seq, global_seq, proto); 658 con->connect_seq, global_seq, proto);
659 659
660 con->out_connect.features = CEPH_FEATURE_SUPPORTED_CLIENT; 660 con->out_connect.features = cpu_to_le64(CEPH_FEATURE_SUPPORTED_CLIENT);
661 con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT); 661 con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT);
662 con->out_connect.connect_seq = cpu_to_le32(con->connect_seq); 662 con->out_connect.connect_seq = cpu_to_le32(con->connect_seq);
663 con->out_connect.global_seq = cpu_to_le32(global_seq); 663 con->out_connect.global_seq = cpu_to_le32(global_seq);
@@ -1396,10 +1396,12 @@ static int read_partial_message(struct ceph_connection *con)
1396 if (!con->in_msg) { 1396 if (!con->in_msg) {
1397 dout("got hdr type %d front %d data %d\n", con->in_hdr.type, 1397 dout("got hdr type %d front %d data %d\n", con->in_hdr.type,
1398 con->in_hdr.front_len, con->in_hdr.data_len); 1398 con->in_hdr.front_len, con->in_hdr.data_len);
1399 skip = 0;
1399 con->in_msg = ceph_alloc_msg(con, &con->in_hdr, &skip); 1400 con->in_msg = ceph_alloc_msg(con, &con->in_hdr, &skip);
1400 if (skip) { 1401 if (skip) {
1401 /* skip this message */ 1402 /* skip this message */
1402 dout("alloc_msg said skip message\n"); 1403 dout("alloc_msg said skip message\n");
1404 BUG_ON(con->in_msg);
1403 con->in_base_pos = -front_len - middle_len - data_len - 1405 con->in_base_pos = -front_len - middle_len - data_len -
1404 sizeof(m->footer); 1406 sizeof(m->footer);
1405 con->in_tag = CEPH_MSGR_TAG_READY; 1407 con->in_tag = CEPH_MSGR_TAG_READY;
diff --git a/fs/ceph/mon_client.c b/fs/ceph/mon_client.c
index 21c62e9b7d1d..cc115eafae11 100644
--- a/fs/ceph/mon_client.c
+++ b/fs/ceph/mon_client.c
@@ -400,6 +400,8 @@ static void release_generic_request(struct kref *kref)
400 ceph_msg_put(req->reply); 400 ceph_msg_put(req->reply);
401 if (req->request) 401 if (req->request)
402 ceph_msg_put(req->request); 402 ceph_msg_put(req->request);
403
404 kfree(req);
403} 405}
404 406
405static void put_generic_request(struct ceph_mon_generic_request *req) 407static void put_generic_request(struct ceph_mon_generic_request *req)
@@ -723,7 +725,8 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
723 dout("authenticated, starting session\n"); 725 dout("authenticated, starting session\n");
724 726
725 monc->client->msgr->inst.name.type = CEPH_ENTITY_TYPE_CLIENT; 727 monc->client->msgr->inst.name.type = CEPH_ENTITY_TYPE_CLIENT;
726 monc->client->msgr->inst.name.num = monc->auth->global_id; 728 monc->client->msgr->inst.name.num =
729 cpu_to_le64(monc->auth->global_id);
727 730
728 __send_subscribe(monc); 731 __send_subscribe(monc);
729 __resend_generic_request(monc); 732 __resend_generic_request(monc);
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c
index d25b4add85b4..92b7251a53f1 100644
--- a/fs/ceph/osd_client.c
+++ b/fs/ceph/osd_client.c
@@ -1344,7 +1344,7 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
1344 int type = le16_to_cpu(msg->hdr.type); 1344 int type = le16_to_cpu(msg->hdr.type);
1345 1345
1346 if (!osd) 1346 if (!osd)
1347 return; 1347 goto out;
1348 osdc = osd->o_osdc; 1348 osdc = osd->o_osdc;
1349 1349
1350 switch (type) { 1350 switch (type) {
@@ -1359,6 +1359,7 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
1359 pr_err("received unknown message type %d %s\n", type, 1359 pr_err("received unknown message type %d %s\n", type,
1360 ceph_msg_type_name(type)); 1360 ceph_msg_type_name(type));
1361 } 1361 }
1362out:
1362 ceph_msg_put(msg); 1363 ceph_msg_put(msg);
1363} 1364}
1364 1365
diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c
index ddc656fb5c05..50ce64ebd330 100644
--- a/fs/ceph/osdmap.c
+++ b/fs/ceph/osdmap.c
@@ -707,6 +707,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
707 newcrush = crush_decode(*p, min(*p+len, end)); 707 newcrush = crush_decode(*p, min(*p+len, end));
708 if (IS_ERR(newcrush)) 708 if (IS_ERR(newcrush))
709 return ERR_CAST(newcrush); 709 return ERR_CAST(newcrush);
710 *p += len;
710 } 711 }
711 712
712 /* new flags? */ 713 /* new flags? */
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 4e0bee240b9d..fa87f51e38e1 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -89,7 +89,7 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
89 89
90 buf->f_files = le64_to_cpu(st.num_objects); 90 buf->f_files = le64_to_cpu(st.num_objects);
91 buf->f_ffree = -1; 91 buf->f_ffree = -1;
92 buf->f_namelen = PATH_MAX; 92 buf->f_namelen = NAME_MAX;
93 buf->f_frsize = PAGE_CACHE_SIZE; 93 buf->f_frsize = PAGE_CACHE_SIZE;
94 94
95 /* leave fsid little-endian, regardless of host endianness */ 95 /* leave fsid little-endian, regardless of host endianness */
@@ -926,7 +926,7 @@ static int ceph_compare_super(struct super_block *sb, void *data)
926/* 926/*
927 * construct our own bdi so we can control readahead, etc. 927 * construct our own bdi so we can control readahead, etc.
928 */ 928 */
929static atomic_long_t bdi_seq = ATOMIC_INIT(0); 929static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
930 930
931static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client) 931static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client)
932{ 932{
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 78c02eb4cb1f..484e52bb40bb 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -473,14 +473,24 @@ static int cifs_remount(struct super_block *sb, int *flags, char *data)
473 return 0; 473 return 0;
474} 474}
475 475
476void cifs_drop_inode(struct inode *inode)
477{
478 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
479
480 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)
481 return generic_drop_inode(inode);
482
483 return generic_delete_inode(inode);
484}
485
476static const struct super_operations cifs_super_ops = { 486static const struct super_operations cifs_super_ops = {
477 .put_super = cifs_put_super, 487 .put_super = cifs_put_super,
478 .statfs = cifs_statfs, 488 .statfs = cifs_statfs,
479 .alloc_inode = cifs_alloc_inode, 489 .alloc_inode = cifs_alloc_inode,
480 .destroy_inode = cifs_destroy_inode, 490 .destroy_inode = cifs_destroy_inode,
481/* .drop_inode = generic_delete_inode, 491 .drop_inode = cifs_drop_inode,
482 .delete_inode = cifs_delete_inode, */ /* Do not need above two 492/* .delete_inode = cifs_delete_inode, */ /* Do not need above
483 functions unless later we add lazy close of inodes or unless the 493 function unless later we add lazy close of inodes or unless the
484 kernel forgets to call us with the same number of releases (closes) 494 kernel forgets to call us with the same number of releases (closes)
485 as opens */ 495 as opens */
486 .show_options = cifs_show_options, 496 .show_options = cifs_show_options,
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index fb1657e0fdb8..fb6318b81509 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -106,7 +106,6 @@ extern struct cifsFileInfo *cifs_new_fileinfo(struct inode *newinode,
106 __u16 fileHandle, struct file *file, 106 __u16 fileHandle, struct file *file,
107 struct vfsmount *mnt, unsigned int oflags); 107 struct vfsmount *mnt, unsigned int oflags);
108extern int cifs_posix_open(char *full_path, struct inode **pinode, 108extern int cifs_posix_open(char *full_path, struct inode **pinode,
109 struct vfsmount *mnt,
110 struct super_block *sb, 109 struct super_block *sb,
111 int mode, int oflags, 110 int mode, int oflags,
112 __u32 *poplock, __u16 *pnetfid, int xid); 111 __u32 *poplock, __u16 *pnetfid, int xid);
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 391816b461ca..e7ae78b66fa1 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -25,6 +25,7 @@
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <linux/namei.h> 26#include <linux/namei.h>
27#include <linux/mount.h> 27#include <linux/mount.h>
28#include <linux/file.h>
28#include "cifsfs.h" 29#include "cifsfs.h"
29#include "cifspdu.h" 30#include "cifspdu.h"
30#include "cifsglob.h" 31#include "cifsglob.h"
@@ -184,12 +185,13 @@ cifs_new_fileinfo(struct inode *newinode, __u16 fileHandle,
184 } 185 }
185 write_unlock(&GlobalSMBSeslock); 186 write_unlock(&GlobalSMBSeslock);
186 187
188 file->private_data = pCifsFile;
189
187 return pCifsFile; 190 return pCifsFile;
188} 191}
189 192
190int cifs_posix_open(char *full_path, struct inode **pinode, 193int cifs_posix_open(char *full_path, struct inode **pinode,
191 struct vfsmount *mnt, struct super_block *sb, 194 struct super_block *sb, int mode, int oflags,
192 int mode, int oflags,
193 __u32 *poplock, __u16 *pnetfid, int xid) 195 __u32 *poplock, __u16 *pnetfid, int xid)
194{ 196{
195 int rc; 197 int rc;
@@ -258,19 +260,6 @@ int cifs_posix_open(char *full_path, struct inode **pinode,
258 cifs_fattr_to_inode(*pinode, &fattr); 260 cifs_fattr_to_inode(*pinode, &fattr);
259 } 261 }
260 262
261 /*
262 * cifs_fill_filedata() takes care of setting cifsFileInfo pointer to
263 * file->private_data.
264 */
265 if (mnt) {
266 struct cifsFileInfo *pfile_info;
267
268 pfile_info = cifs_new_fileinfo(*pinode, *pnetfid, NULL, mnt,
269 oflags);
270 if (pfile_info == NULL)
271 rc = -ENOMEM;
272 }
273
274posix_open_ret: 263posix_open_ret:
275 kfree(presp_data); 264 kfree(presp_data);
276 return rc; 265 return rc;
@@ -298,7 +287,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
298 int create_options = CREATE_NOT_DIR; 287 int create_options = CREATE_NOT_DIR;
299 __u32 oplock = 0; 288 __u32 oplock = 0;
300 int oflags; 289 int oflags;
301 bool posix_create = false;
302 /* 290 /*
303 * BB below access is probably too much for mknod to request 291 * BB below access is probably too much for mknod to request
304 * but we have to do query and setpathinfo so requesting 292 * but we have to do query and setpathinfo so requesting
@@ -339,7 +327,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
339 (CIFS_UNIX_POSIX_PATH_OPS_CAP & 327 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
340 le64_to_cpu(tcon->fsUnixInfo.Capability))) { 328 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
341 rc = cifs_posix_open(full_path, &newinode, 329 rc = cifs_posix_open(full_path, &newinode,
342 nd ? nd->path.mnt : NULL,
343 inode->i_sb, mode, oflags, &oplock, &fileHandle, xid); 330 inode->i_sb, mode, oflags, &oplock, &fileHandle, xid);
344 /* EIO could indicate that (posix open) operation is not 331 /* EIO could indicate that (posix open) operation is not
345 supported, despite what server claimed in capability 332 supported, despite what server claimed in capability
@@ -347,7 +334,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
347 handled in posix open */ 334 handled in posix open */
348 335
349 if (rc == 0) { 336 if (rc == 0) {
350 posix_create = true;
351 if (newinode == NULL) /* query inode info */ 337 if (newinode == NULL) /* query inode info */
352 goto cifs_create_get_file_info; 338 goto cifs_create_get_file_info;
353 else /* success, no need to query */ 339 else /* success, no need to query */
@@ -478,21 +464,28 @@ cifs_create_set_dentry:
478 else 464 else
479 cFYI(1, "Create worked, get_inode_info failed rc = %d", rc); 465 cFYI(1, "Create worked, get_inode_info failed rc = %d", rc);
480 466
481 /* nfsd case - nfs srv does not set nd */ 467 if (newinode && nd && (nd->flags & LOOKUP_OPEN)) {
482 if ((nd == NULL) || (!(nd->flags & LOOKUP_OPEN))) {
483 /* mknod case - do not leave file open */
484 CIFSSMBClose(xid, tcon, fileHandle);
485 } else if (!(posix_create) && (newinode)) {
486 struct cifsFileInfo *pfile_info; 468 struct cifsFileInfo *pfile_info;
487 /* 469 struct file *filp;
488 * cifs_fill_filedata() takes care of setting cifsFileInfo 470
489 * pointer to file->private_data. 471 filp = lookup_instantiate_filp(nd, direntry, generic_file_open);
490 */ 472 if (IS_ERR(filp)) {
491 pfile_info = cifs_new_fileinfo(newinode, fileHandle, NULL, 473 rc = PTR_ERR(filp);
474 CIFSSMBClose(xid, tcon, fileHandle);
475 goto cifs_create_out;
476 }
477
478 pfile_info = cifs_new_fileinfo(newinode, fileHandle, filp,
492 nd->path.mnt, oflags); 479 nd->path.mnt, oflags);
493 if (pfile_info == NULL) 480 if (pfile_info == NULL) {
481 fput(filp);
482 CIFSSMBClose(xid, tcon, fileHandle);
494 rc = -ENOMEM; 483 rc = -ENOMEM;
484 }
485 } else {
486 CIFSSMBClose(xid, tcon, fileHandle);
495 } 487 }
488
496cifs_create_out: 489cifs_create_out:
497 kfree(buf); 490 kfree(buf);
498 kfree(full_path); 491 kfree(full_path);
@@ -636,6 +629,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
636 bool posix_open = false; 629 bool posix_open = false;
637 struct cifs_sb_info *cifs_sb; 630 struct cifs_sb_info *cifs_sb;
638 struct cifsTconInfo *pTcon; 631 struct cifsTconInfo *pTcon;
632 struct cifsFileInfo *cfile;
639 struct inode *newInode = NULL; 633 struct inode *newInode = NULL;
640 char *full_path = NULL; 634 char *full_path = NULL;
641 struct file *filp; 635 struct file *filp;
@@ -703,7 +697,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
703 if (nd && !(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY)) && 697 if (nd && !(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY)) &&
704 (nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open && 698 (nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open &&
705 (nd->intent.open.flags & O_CREAT)) { 699 (nd->intent.open.flags & O_CREAT)) {
706 rc = cifs_posix_open(full_path, &newInode, nd->path.mnt, 700 rc = cifs_posix_open(full_path, &newInode,
707 parent_dir_inode->i_sb, 701 parent_dir_inode->i_sb,
708 nd->intent.open.create_mode, 702 nd->intent.open.create_mode,
709 nd->intent.open.flags, &oplock, 703 nd->intent.open.flags, &oplock,
@@ -733,8 +727,25 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
733 else 727 else
734 direntry->d_op = &cifs_dentry_ops; 728 direntry->d_op = &cifs_dentry_ops;
735 d_add(direntry, newInode); 729 d_add(direntry, newInode);
736 if (posix_open) 730 if (posix_open) {
737 filp = lookup_instantiate_filp(nd, direntry, NULL); 731 filp = lookup_instantiate_filp(nd, direntry,
732 generic_file_open);
733 if (IS_ERR(filp)) {
734 rc = PTR_ERR(filp);
735 CIFSSMBClose(xid, pTcon, fileHandle);
736 goto lookup_out;
737 }
738
739 cfile = cifs_new_fileinfo(newInode, fileHandle, filp,
740 nd->path.mnt,
741 nd->intent.open.flags);
742 if (cfile == NULL) {
743 fput(filp);
744 CIFSSMBClose(xid, pTcon, fileHandle);
745 rc = -ENOMEM;
746 goto lookup_out;
747 }
748 }
738 /* since paths are not looked up by component - the parent 749 /* since paths are not looked up by component - the parent
739 directories are presumed to be good here */ 750 directories are presumed to be good here */
740 renew_parental_timestamps(direntry); 751 renew_parental_timestamps(direntry);
@@ -755,6 +766,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
755 is a common return code */ 766 is a common return code */
756 } 767 }
757 768
769lookup_out:
758 kfree(full_path); 770 kfree(full_path);
759 FreeXid(xid); 771 FreeXid(xid);
760 return ERR_PTR(rc); 772 return ERR_PTR(rc);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index f1ff785b2292..409e4f523e61 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -162,44 +162,12 @@ psx_client_can_cache:
162 return 0; 162 return 0;
163} 163}
164 164
165static struct cifsFileInfo *
166cifs_fill_filedata(struct file *file)
167{
168 struct list_head *tmp;
169 struct cifsFileInfo *pCifsFile = NULL;
170 struct cifsInodeInfo *pCifsInode = NULL;
171
172 /* search inode for this file and fill in file->private_data */
173 pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
174 read_lock(&GlobalSMBSeslock);
175 list_for_each(tmp, &pCifsInode->openFileList) {
176 pCifsFile = list_entry(tmp, struct cifsFileInfo, flist);
177 if ((pCifsFile->pfile == NULL) &&
178 (pCifsFile->pid == current->tgid)) {
179 /* mode set in cifs_create */
180
181 /* needed for writepage */
182 pCifsFile->pfile = file;
183 file->private_data = pCifsFile;
184 break;
185 }
186 }
187 read_unlock(&GlobalSMBSeslock);
188
189 if (file->private_data != NULL) {
190 return pCifsFile;
191 } else if ((file->f_flags & O_CREAT) && (file->f_flags & O_EXCL))
192 cERROR(1, "could not find file instance for "
193 "new file %p", file);
194 return NULL;
195}
196
197/* all arguments to this function must be checked for validity in caller */ 165/* all arguments to this function must be checked for validity in caller */
198static inline int cifs_open_inode_helper(struct inode *inode, struct file *file, 166static inline int cifs_open_inode_helper(struct inode *inode,
199 struct cifsInodeInfo *pCifsInode, struct cifsFileInfo *pCifsFile,
200 struct cifsTconInfo *pTcon, int *oplock, FILE_ALL_INFO *buf, 167 struct cifsTconInfo *pTcon, int *oplock, FILE_ALL_INFO *buf,
201 char *full_path, int xid) 168 char *full_path, int xid)
202{ 169{
170 struct cifsInodeInfo *pCifsInode = CIFS_I(inode);
203 struct timespec temp; 171 struct timespec temp;
204 int rc; 172 int rc;
205 173
@@ -213,36 +181,35 @@ static inline int cifs_open_inode_helper(struct inode *inode, struct file *file,
213 /* if not oplocked, invalidate inode pages if mtime or file 181 /* if not oplocked, invalidate inode pages if mtime or file
214 size changed */ 182 size changed */
215 temp = cifs_NTtimeToUnix(buf->LastWriteTime); 183 temp = cifs_NTtimeToUnix(buf->LastWriteTime);
216 if (timespec_equal(&file->f_path.dentry->d_inode->i_mtime, &temp) && 184 if (timespec_equal(&inode->i_mtime, &temp) &&
217 (file->f_path.dentry->d_inode->i_size == 185 (inode->i_size ==
218 (loff_t)le64_to_cpu(buf->EndOfFile))) { 186 (loff_t)le64_to_cpu(buf->EndOfFile))) {
219 cFYI(1, "inode unchanged on server"); 187 cFYI(1, "inode unchanged on server");
220 } else { 188 } else {
221 if (file->f_path.dentry->d_inode->i_mapping) { 189 if (inode->i_mapping) {
222 /* BB no need to lock inode until after invalidate 190 /* BB no need to lock inode until after invalidate
223 since namei code should already have it locked? */ 191 since namei code should already have it locked? */
224 rc = filemap_write_and_wait(file->f_path.dentry->d_inode->i_mapping); 192 rc = filemap_write_and_wait(inode->i_mapping);
225 if (rc != 0) 193 if (rc != 0)
226 CIFS_I(file->f_path.dentry->d_inode)->write_behind_rc = rc; 194 pCifsInode->write_behind_rc = rc;
227 } 195 }
228 cFYI(1, "invalidating remote inode since open detected it " 196 cFYI(1, "invalidating remote inode since open detected it "
229 "changed"); 197 "changed");
230 invalidate_remote_inode(file->f_path.dentry->d_inode); 198 invalidate_remote_inode(inode);
231 } 199 }
232 200
233client_can_cache: 201client_can_cache:
234 if (pTcon->unix_ext) 202 if (pTcon->unix_ext)
235 rc = cifs_get_inode_info_unix(&file->f_path.dentry->d_inode, 203 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
236 full_path, inode->i_sb, xid); 204 xid);
237 else 205 else
238 rc = cifs_get_inode_info(&file->f_path.dentry->d_inode, 206 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
239 full_path, buf, inode->i_sb, xid, NULL); 207 xid, NULL);
240 208
241 if ((*oplock & 0xF) == OPLOCK_EXCLUSIVE) { 209 if ((*oplock & 0xF) == OPLOCK_EXCLUSIVE) {
242 pCifsInode->clientCanCacheAll = true; 210 pCifsInode->clientCanCacheAll = true;
243 pCifsInode->clientCanCacheRead = true; 211 pCifsInode->clientCanCacheRead = true;
244 cFYI(1, "Exclusive Oplock granted on inode %p", 212 cFYI(1, "Exclusive Oplock granted on inode %p", inode);
245 file->f_path.dentry->d_inode);
246 } else if ((*oplock & 0xF) == OPLOCK_READ) 213 } else if ((*oplock & 0xF) == OPLOCK_READ)
247 pCifsInode->clientCanCacheRead = true; 214 pCifsInode->clientCanCacheRead = true;
248 215
@@ -256,7 +223,7 @@ int cifs_open(struct inode *inode, struct file *file)
256 __u32 oplock; 223 __u32 oplock;
257 struct cifs_sb_info *cifs_sb; 224 struct cifs_sb_info *cifs_sb;
258 struct cifsTconInfo *tcon; 225 struct cifsTconInfo *tcon;
259 struct cifsFileInfo *pCifsFile; 226 struct cifsFileInfo *pCifsFile = NULL;
260 struct cifsInodeInfo *pCifsInode; 227 struct cifsInodeInfo *pCifsInode;
261 char *full_path = NULL; 228 char *full_path = NULL;
262 int desiredAccess; 229 int desiredAccess;
@@ -270,12 +237,6 @@ int cifs_open(struct inode *inode, struct file *file)
270 tcon = cifs_sb->tcon; 237 tcon = cifs_sb->tcon;
271 238
272 pCifsInode = CIFS_I(file->f_path.dentry->d_inode); 239 pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
273 pCifsFile = cifs_fill_filedata(file);
274 if (pCifsFile) {
275 rc = 0;
276 FreeXid(xid);
277 return rc;
278 }
279 240
280 full_path = build_path_from_dentry(file->f_path.dentry); 241 full_path = build_path_from_dentry(file->f_path.dentry);
281 if (full_path == NULL) { 242 if (full_path == NULL) {
@@ -299,8 +260,7 @@ int cifs_open(struct inode *inode, struct file *file)
299 int oflags = (int) cifs_posix_convert_flags(file->f_flags); 260 int oflags = (int) cifs_posix_convert_flags(file->f_flags);
300 oflags |= SMB_O_CREAT; 261 oflags |= SMB_O_CREAT;
301 /* can not refresh inode info since size could be stale */ 262 /* can not refresh inode info since size could be stale */
302 rc = cifs_posix_open(full_path, &inode, file->f_path.mnt, 263 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
303 inode->i_sb,
304 cifs_sb->mnt_file_mode /* ignored */, 264 cifs_sb->mnt_file_mode /* ignored */,
305 oflags, &oplock, &netfid, xid); 265 oflags, &oplock, &netfid, xid);
306 if (rc == 0) { 266 if (rc == 0) {
@@ -308,9 +268,20 @@ int cifs_open(struct inode *inode, struct file *file)
308 /* no need for special case handling of setting mode 268 /* no need for special case handling of setting mode
309 on read only files needed here */ 269 on read only files needed here */
310 270
311 pCifsFile = cifs_fill_filedata(file); 271 rc = cifs_posix_open_inode_helper(inode, file,
312 cifs_posix_open_inode_helper(inode, file, pCifsInode, 272 pCifsInode, oplock, netfid);
313 oplock, netfid); 273 if (rc != 0) {
274 CIFSSMBClose(xid, tcon, netfid);
275 goto out;
276 }
277
278 pCifsFile = cifs_new_fileinfo(inode, netfid, file,
279 file->f_path.mnt,
280 oflags);
281 if (pCifsFile == NULL) {
282 CIFSSMBClose(xid, tcon, netfid);
283 rc = -ENOMEM;
284 }
314 goto out; 285 goto out;
315 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { 286 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
316 if (tcon->ses->serverNOS) 287 if (tcon->ses->serverNOS)
@@ -391,17 +362,17 @@ int cifs_open(struct inode *inode, struct file *file)
391 goto out; 362 goto out;
392 } 363 }
393 364
365 rc = cifs_open_inode_helper(inode, tcon, &oplock, buf, full_path, xid);
366 if (rc != 0)
367 goto out;
368
394 pCifsFile = cifs_new_fileinfo(inode, netfid, file, file->f_path.mnt, 369 pCifsFile = cifs_new_fileinfo(inode, netfid, file, file->f_path.mnt,
395 file->f_flags); 370 file->f_flags);
396 file->private_data = pCifsFile; 371 if (pCifsFile == NULL) {
397 if (file->private_data == NULL) {
398 rc = -ENOMEM; 372 rc = -ENOMEM;
399 goto out; 373 goto out;
400 } 374 }
401 375
402 rc = cifs_open_inode_helper(inode, file, pCifsInode, pCifsFile, tcon,
403 &oplock, buf, full_path, xid);
404
405 if (oplock & CIFS_CREATE_ACTION) { 376 if (oplock & CIFS_CREATE_ACTION) {
406 /* time to set mode which we can not set earlier due to 377 /* time to set mode which we can not set earlier due to
407 problems creating new read-only files */ 378 problems creating new read-only files */
@@ -513,8 +484,7 @@ reopen_error_exit:
513 le64_to_cpu(tcon->fsUnixInfo.Capability))) { 484 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
514 int oflags = (int) cifs_posix_convert_flags(file->f_flags); 485 int oflags = (int) cifs_posix_convert_flags(file->f_flags);
515 /* can not refresh inode info since size could be stale */ 486 /* can not refresh inode info since size could be stale */
516 rc = cifs_posix_open(full_path, NULL, file->f_path.mnt, 487 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
517 inode->i_sb,
518 cifs_sb->mnt_file_mode /* ignored */, 488 cifs_sb->mnt_file_mode /* ignored */,
519 oflags, &oplock, &netfid, xid); 489 oflags, &oplock, &netfid, xid);
520 if (rc == 0) { 490 if (rc == 0) {
@@ -1952,6 +1922,7 @@ static void cifs_copy_cache_pages(struct address_space *mapping,
1952 bytes_read -= PAGE_CACHE_SIZE; 1922 bytes_read -= PAGE_CACHE_SIZE;
1953 continue; 1923 continue;
1954 } 1924 }
1925 page_cache_release(page);
1955 1926
1956 target = kmap_atomic(page, KM_USER0); 1927 target = kmap_atomic(page, KM_USER0);
1957 1928
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 62b324f26a56..6f0683c68952 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1401,6 +1401,10 @@ cifs_do_rename(int xid, struct dentry *from_dentry, const char *fromPath,
1401 if (rc == 0 || rc != -ETXTBSY) 1401 if (rc == 0 || rc != -ETXTBSY)
1402 return rc; 1402 return rc;
1403 1403
1404 /* open-file renames don't work across directories */
1405 if (to_dentry->d_parent != from_dentry->d_parent)
1406 return rc;
1407
1404 /* open the file to be renamed -- we need DELETE perms */ 1408 /* open the file to be renamed -- we need DELETE perms */
1405 rc = CIFSSMBOpen(xid, pTcon, fromPath, FILE_OPEN, DELETE, 1409 rc = CIFSSMBOpen(xid, pTcon, fromPath, FILE_OPEN, DELETE,
1406 CREATE_NOT_DIR, &srcfid, &oplock, NULL, 1410 CREATE_NOT_DIR, &srcfid, &oplock, NULL,
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 7707389bdf2c..0a57cb7db5dd 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -730,15 +730,7 @@ ssetup_ntlmssp_authenticate:
730 730
731 /* calculate session key */ 731 /* calculate session key */
732 setup_ntlmv2_rsp(ses, v2_sess_key, nls_cp); 732 setup_ntlmv2_rsp(ses, v2_sess_key, nls_cp);
733 if (first_time) /* should this be moved into common code 733 /* FIXME: calculate MAC key */
734 with similar ntlmv2 path? */
735 /* cifs_calculate_ntlmv2_mac_key(ses->server->mac_signing_key,
736 response BB FIXME, v2_sess_key); */
737
738 /* copy session key */
739
740 /* memcpy(bcc_ptr, (char *)ntlm_session_key,LM2_SESS_KEY_SIZE);
741 bcc_ptr += LM2_SESS_KEY_SIZE; */
742 memcpy(bcc_ptr, (char *)v2_sess_key, 734 memcpy(bcc_ptr, (char *)v2_sess_key,
743 sizeof(struct ntlmv2_resp)); 735 sizeof(struct ntlmv2_resp));
744 bcc_ptr += sizeof(struct ntlmv2_resp); 736 bcc_ptr += sizeof(struct ntlmv2_resp);
diff --git a/fs/compat.c b/fs/compat.c
index f0b391c50552..6490d2134ff3 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -626,7 +626,7 @@ ssize_t compat_rw_copy_check_uvector(int type,
626 tot_len += len; 626 tot_len += len;
627 if (tot_len < tmp) /* maths overflow on the compat_ssize_t */ 627 if (tot_len < tmp) /* maths overflow on the compat_ssize_t */
628 goto out; 628 goto out;
629 if (!access_ok(vrfy_dir(type), buf, len)) { 629 if (!access_ok(vrfy_dir(type), compat_ptr(buf), len)) {
630 ret = -EFAULT; 630 ret = -EFAULT;
631 goto out; 631 goto out;
632 } 632 }
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 41645142b88b..cf78d44a8d6a 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -72,10 +72,6 @@ int configfs_setattr(struct dentry * dentry, struct iattr * iattr)
72 if (!sd) 72 if (!sd)
73 return -EINVAL; 73 return -EINVAL;
74 74
75 error = simple_setattr(dentry, iattr);
76 if (error)
77 return error;
78
79 sd_iattr = sd->s_iattr; 75 sd_iattr = sd->s_iattr;
80 if (!sd_iattr) { 76 if (!sd_iattr) {
81 /* setting attributes for the first time, allocate now */ 77 /* setting attributes for the first time, allocate now */
@@ -89,9 +85,12 @@ int configfs_setattr(struct dentry * dentry, struct iattr * iattr)
89 sd_iattr->ia_atime = sd_iattr->ia_mtime = sd_iattr->ia_ctime = CURRENT_TIME; 85 sd_iattr->ia_atime = sd_iattr->ia_mtime = sd_iattr->ia_ctime = CURRENT_TIME;
90 sd->s_iattr = sd_iattr; 86 sd->s_iattr = sd_iattr;
91 } 87 }
92
93 /* attributes were changed atleast once in past */ 88 /* attributes were changed atleast once in past */
94 89
90 error = simple_setattr(dentry, iattr);
91 if (error)
92 return error;
93
95 if (ia_valid & ATTR_UID) 94 if (ia_valid & ATTR_UID)
96 sd_iattr->ia_uid = iattr->ia_uid; 95 sd_iattr->ia_uid = iattr->ia_uid;
97 if (ia_valid & ATTR_GID) 96 if (ia_valid & ATTR_GID)
diff --git a/fs/dcache.c b/fs/dcache.c
index d96047b4a633..86d4db15473e 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -590,6 +590,8 @@ static void prune_dcache(int count)
590 up_read(&sb->s_umount); 590 up_read(&sb->s_umount);
591 } 591 }
592 spin_lock(&sb_lock); 592 spin_lock(&sb_lock);
593 /* lock was dropped, must reset next */
594 list_safe_reset_next(sb, n, s_list);
593 count -= pruned; 595 count -= pruned;
594 __put_super(sb); 596 __put_super(sb);
595 /* more work left to do? */ 597 /* more work left to do? */
@@ -894,7 +896,7 @@ EXPORT_SYMBOL(shrink_dcache_parent);
894 * 896 *
895 * In this case we return -1 to tell the caller that we baled. 897 * In this case we return -1 to tell the caller that we baled.
896 */ 898 */
897static int shrink_dcache_memory(int nr, gfp_t gfp_mask) 899static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
898{ 900{
899 if (nr) { 901 if (nr) {
900 if (!(gfp_mask & __GFP_FS)) 902 if (!(gfp_mask & __GFP_FS))
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index ca7e2a0ed98a..2bcc0431bada 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -200,6 +200,7 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
200 return error; 200 return error;
201 else { 201 else {
202 inode->i_mode = mode; 202 inode->i_mode = mode;
203 inode->i_ctime = CURRENT_TIME_SEC;
203 mark_inode_dirty(inode); 204 mark_inode_dirty(inode);
204 if (error == 0) 205 if (error == 0)
205 acl = NULL; 206 acl = NULL;
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 19214435b752..3675088cb88c 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1552,7 +1552,7 @@ int ext2_setattr(struct dentry *dentry, struct iattr *iattr)
1552 if (error) 1552 if (error)
1553 return error; 1553 return error;
1554 } 1554 }
1555 if (iattr->ia_valid & ATTR_SIZE) { 1555 if (iattr->ia_valid & ATTR_SIZE && iattr->ia_size != inode->i_size) {
1556 error = ext2_setsize(inode, iattr->ia_size); 1556 error = ext2_setsize(inode, iattr->ia_size);
1557 if (error) 1557 if (error)
1558 return error; 1558 return error;
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index 01552abbca3c..8a11fe212183 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -205,6 +205,7 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
205 return error; 205 return error;
206 else { 206 else {
207 inode->i_mode = mode; 207 inode->i_mode = mode;
208 inode->i_ctime = CURRENT_TIME_SEC;
208 ext3_mark_inode_dirty(handle, inode); 209 ext3_mark_inode_dirty(handle, inode);
209 if (error == 0) 210 if (error == 0)
210 acl = NULL; 211 acl = NULL;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 19df61c321fd..42272d67955a 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4942,20 +4942,26 @@ void ext4_set_inode_flags(struct inode *inode)
4942/* Propagate flags from i_flags to EXT4_I(inode)->i_flags */ 4942/* Propagate flags from i_flags to EXT4_I(inode)->i_flags */
4943void ext4_get_inode_flags(struct ext4_inode_info *ei) 4943void ext4_get_inode_flags(struct ext4_inode_info *ei)
4944{ 4944{
4945 unsigned int flags = ei->vfs_inode.i_flags; 4945 unsigned int vfs_fl;
4946 4946 unsigned long old_fl, new_fl;
4947 ei->i_flags &= ~(EXT4_SYNC_FL|EXT4_APPEND_FL| 4947
4948 EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL|EXT4_DIRSYNC_FL); 4948 do {
4949 if (flags & S_SYNC) 4949 vfs_fl = ei->vfs_inode.i_flags;
4950 ei->i_flags |= EXT4_SYNC_FL; 4950 old_fl = ei->i_flags;
4951 if (flags & S_APPEND) 4951 new_fl = old_fl & ~(EXT4_SYNC_FL|EXT4_APPEND_FL|
4952 ei->i_flags |= EXT4_APPEND_FL; 4952 EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL|
4953 if (flags & S_IMMUTABLE) 4953 EXT4_DIRSYNC_FL);
4954 ei->i_flags |= EXT4_IMMUTABLE_FL; 4954 if (vfs_fl & S_SYNC)
4955 if (flags & S_NOATIME) 4955 new_fl |= EXT4_SYNC_FL;
4956 ei->i_flags |= EXT4_NOATIME_FL; 4956 if (vfs_fl & S_APPEND)
4957 if (flags & S_DIRSYNC) 4957 new_fl |= EXT4_APPEND_FL;
4958 ei->i_flags |= EXT4_DIRSYNC_FL; 4958 if (vfs_fl & S_IMMUTABLE)
4959 new_fl |= EXT4_IMMUTABLE_FL;
4960 if (vfs_fl & S_NOATIME)
4961 new_fl |= EXT4_NOATIME_FL;
4962 if (vfs_fl & S_DIRSYNC)
4963 new_fl |= EXT4_DIRSYNC_FL;
4964 } while (cmpxchg(&ei->i_flags, old_fl, new_fl) != old_fl);
4959} 4965}
4960 4966
4961static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode, 4967static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
@@ -5191,7 +5197,7 @@ static int ext4_inode_blocks_set(handle_t *handle,
5191 */ 5197 */
5192 raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); 5198 raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
5193 raw_inode->i_blocks_high = 0; 5199 raw_inode->i_blocks_high = 0;
5194 ei->i_flags &= ~EXT4_HUGE_FILE_FL; 5200 ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE);
5195 return 0; 5201 return 0;
5196 } 5202 }
5197 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) 5203 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE))
@@ -5204,9 +5210,9 @@ static int ext4_inode_blocks_set(handle_t *handle,
5204 */ 5210 */
5205 raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); 5211 raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
5206 raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); 5212 raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32);
5207 ei->i_flags &= ~EXT4_HUGE_FILE_FL; 5213 ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE);
5208 } else { 5214 } else {
5209 ei->i_flags |= EXT4_HUGE_FILE_FL; 5215 ext4_set_inode_flag(inode, EXT4_INODE_HUGE_FILE);
5210 /* i_block is stored in file system block size */ 5216 /* i_block is stored in file system block size */
5211 i_blocks = i_blocks >> (inode->i_blkbits - 9); 5217 i_blocks = i_blocks >> (inode->i_blkbits - 9);
5212 raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); 5218 raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 3a6c92ac131c..52abfa12762a 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -960,6 +960,9 @@ mext_check_arguments(struct inode *orig_inode,
960 return -EINVAL; 960 return -EINVAL;
961 } 961 }
962 962
963 if (IS_IMMUTABLE(donor_inode) || IS_APPEND(donor_inode))
964 return -EPERM;
965
963 /* Ext4 move extent does not support swapfile */ 966 /* Ext4 move extent does not support swapfile */
964 if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) { 967 if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) {
965 ext4_debug("ext4 move extent: The argument files should " 968 ext4_debug("ext4 move extent: The argument files should "
diff --git a/fs/fcntl.c b/fs/fcntl.c
index f74d270ba155..9d175d623aab 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -274,7 +274,7 @@ static int f_setown_ex(struct file *filp, unsigned long arg)
274 274
275 ret = copy_from_user(&owner, owner_p, sizeof(owner)); 275 ret = copy_from_user(&owner, owner_p, sizeof(owner));
276 if (ret) 276 if (ret)
277 return ret; 277 return -EFAULT;
278 278
279 switch (owner.type) { 279 switch (owner.type) {
280 case F_OWNER_TID: 280 case F_OWNER_TID:
@@ -332,8 +332,11 @@ static int f_getown_ex(struct file *filp, unsigned long arg)
332 } 332 }
333 read_unlock(&filp->f_owner.lock); 333 read_unlock(&filp->f_owner.lock);
334 334
335 if (!ret) 335 if (!ret) {
336 ret = copy_to_user(owner_p, &owner, sizeof(owner)); 336 ret = copy_to_user(owner_p, &owner, sizeof(owner));
337 if (ret)
338 ret = -EFAULT;
339 }
337 return ret; 340 return ret;
338} 341}
339 342
@@ -730,12 +733,14 @@ static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
730{ 733{
731 while (fa) { 734 while (fa) {
732 struct fown_struct *fown; 735 struct fown_struct *fown;
736 unsigned long flags;
737
733 if (fa->magic != FASYNC_MAGIC) { 738 if (fa->magic != FASYNC_MAGIC) {
734 printk(KERN_ERR "kill_fasync: bad magic number in " 739 printk(KERN_ERR "kill_fasync: bad magic number in "
735 "fasync_struct!\n"); 740 "fasync_struct!\n");
736 return; 741 return;
737 } 742 }
738 spin_lock(&fa->fa_lock); 743 spin_lock_irqsave(&fa->fa_lock, flags);
739 if (fa->fa_file) { 744 if (fa->fa_file) {
740 fown = &fa->fa_file->f_owner; 745 fown = &fa->fa_file->f_owner;
741 /* Don't send SIGURG to processes which have not set a 746 /* Don't send SIGURG to processes which have not set a
@@ -744,7 +749,7 @@ static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
744 if (!(sig == SIGURG && fown->signum == 0)) 749 if (!(sig == SIGURG && fown->signum == 0))
745 send_sigio(fown, fa->fa_fd, band); 750 send_sigio(fown, fa->fa_fd, band);
746 } 751 }
747 spin_unlock(&fa->fa_lock); 752 spin_unlock_irqrestore(&fa->fa_lock, flags);
748 fa = rcu_dereference(fa->fa_next); 753 fa = rcu_dereference(fa->fa_next);
749 } 754 }
750} 755}
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index ea8592b90696..d5be1693ac93 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -38,52 +38,18 @@ int nr_pdflush_threads;
38/* 38/*
39 * Passed into wb_writeback(), essentially a subset of writeback_control 39 * Passed into wb_writeback(), essentially a subset of writeback_control
40 */ 40 */
41struct wb_writeback_args { 41struct wb_writeback_work {
42 long nr_pages; 42 long nr_pages;
43 struct super_block *sb; 43 struct super_block *sb;
44 enum writeback_sync_modes sync_mode; 44 enum writeback_sync_modes sync_mode;
45 unsigned int for_kupdate:1; 45 unsigned int for_kupdate:1;
46 unsigned int range_cyclic:1; 46 unsigned int range_cyclic:1;
47 unsigned int for_background:1; 47 unsigned int for_background:1;
48 unsigned int sb_pinned:1;
49};
50 48
51/*
52 * Work items for the bdi_writeback threads
53 */
54struct bdi_work {
55 struct list_head list; /* pending work list */ 49 struct list_head list; /* pending work list */
56 struct rcu_head rcu_head; /* for RCU free/clear of work */ 50 struct completion *done; /* set if the caller waits */
57
58 unsigned long seen; /* threads that have seen this work */
59 atomic_t pending; /* number of threads still to do work */
60
61 struct wb_writeback_args args; /* writeback arguments */
62
63 unsigned long state; /* flag bits, see WS_* */
64}; 51};
65 52
66enum {
67 WS_USED_B = 0,
68 WS_ONSTACK_B,
69};
70
71#define WS_USED (1 << WS_USED_B)
72#define WS_ONSTACK (1 << WS_ONSTACK_B)
73
74static inline bool bdi_work_on_stack(struct bdi_work *work)
75{
76 return test_bit(WS_ONSTACK_B, &work->state);
77}
78
79static inline void bdi_work_init(struct bdi_work *work,
80 struct wb_writeback_args *args)
81{
82 INIT_RCU_HEAD(&work->rcu_head);
83 work->args = *args;
84 work->state = WS_USED;
85}
86
87/** 53/**
88 * writeback_in_progress - determine whether there is writeback in progress 54 * writeback_in_progress - determine whether there is writeback in progress
89 * @bdi: the device's backing_dev_info structure. 55 * @bdi: the device's backing_dev_info structure.
@@ -96,76 +62,11 @@ int writeback_in_progress(struct backing_dev_info *bdi)
96 return !list_empty(&bdi->work_list); 62 return !list_empty(&bdi->work_list);
97} 63}
98 64
99static void bdi_work_clear(struct bdi_work *work) 65static void bdi_queue_work(struct backing_dev_info *bdi,
100{ 66 struct wb_writeback_work *work)
101 clear_bit(WS_USED_B, &work->state);
102 smp_mb__after_clear_bit();
103 /*
104 * work can have disappeared at this point. bit waitq functions
105 * should be able to tolerate this, provided bdi_sched_wait does
106 * not dereference it's pointer argument.
107 */
108 wake_up_bit(&work->state, WS_USED_B);
109}
110
111static void bdi_work_free(struct rcu_head *head)
112{
113 struct bdi_work *work = container_of(head, struct bdi_work, rcu_head);
114
115 if (!bdi_work_on_stack(work))
116 kfree(work);
117 else
118 bdi_work_clear(work);
119}
120
121static void wb_work_complete(struct bdi_work *work)
122{
123 const enum writeback_sync_modes sync_mode = work->args.sync_mode;
124 int onstack = bdi_work_on_stack(work);
125
126 /*
127 * For allocated work, we can clear the done/seen bit right here.
128 * For on-stack work, we need to postpone both the clear and free
129 * to after the RCU grace period, since the stack could be invalidated
130 * as soon as bdi_work_clear() has done the wakeup.
131 */
132 if (!onstack)
133 bdi_work_clear(work);
134 if (sync_mode == WB_SYNC_NONE || onstack)
135 call_rcu(&work->rcu_head, bdi_work_free);
136}
137
138static void wb_clear_pending(struct bdi_writeback *wb, struct bdi_work *work)
139{
140 /*
141 * The caller has retrieved the work arguments from this work,
142 * drop our reference. If this is the last ref, delete and free it
143 */
144 if (atomic_dec_and_test(&work->pending)) {
145 struct backing_dev_info *bdi = wb->bdi;
146
147 spin_lock(&bdi->wb_lock);
148 list_del_rcu(&work->list);
149 spin_unlock(&bdi->wb_lock);
150
151 wb_work_complete(work);
152 }
153}
154
155static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work)
156{ 67{
157 work->seen = bdi->wb_mask;
158 BUG_ON(!work->seen);
159 atomic_set(&work->pending, bdi->wb_cnt);
160 BUG_ON(!bdi->wb_cnt);
161
162 /*
163 * list_add_tail_rcu() contains the necessary barriers to
164 * make sure the above stores are seen before the item is
165 * noticed on the list
166 */
167 spin_lock(&bdi->wb_lock); 68 spin_lock(&bdi->wb_lock);
168 list_add_tail_rcu(&work->list, &bdi->work_list); 69 list_add_tail(&work->list, &bdi->work_list);
169 spin_unlock(&bdi->wb_lock); 70 spin_unlock(&bdi->wb_lock);
170 71
171 /* 72 /*
@@ -182,107 +83,59 @@ static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work)
182 } 83 }
183} 84}
184 85
185/* 86static void
186 * Used for on-stack allocated work items. The caller needs to wait until 87__bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
187 * the wb threads have acked the work before it's safe to continue. 88 bool range_cyclic, bool for_background)
188 */
189static void bdi_wait_on_work_clear(struct bdi_work *work)
190{
191 wait_on_bit(&work->state, WS_USED_B, bdi_sched_wait,
192 TASK_UNINTERRUPTIBLE);
193}
194
195static void bdi_alloc_queue_work(struct backing_dev_info *bdi,
196 struct wb_writeback_args *args,
197 int wait)
198{ 89{
199 struct bdi_work *work; 90 struct wb_writeback_work *work;
200 91
201 /* 92 /*
202 * This is WB_SYNC_NONE writeback, so if allocation fails just 93 * This is WB_SYNC_NONE writeback, so if allocation fails just
203 * wakeup the thread for old dirty data writeback 94 * wakeup the thread for old dirty data writeback
204 */ 95 */
205 work = kmalloc(sizeof(*work), GFP_ATOMIC); 96 work = kzalloc(sizeof(*work), GFP_ATOMIC);
206 if (work) { 97 if (!work) {
207 bdi_work_init(work, args); 98 if (bdi->wb.task)
208 bdi_queue_work(bdi, work); 99 wake_up_process(bdi->wb.task);
209 if (wait) 100 return;
210 bdi_wait_on_work_clear(work);
211 } else {
212 struct bdi_writeback *wb = &bdi->wb;
213
214 if (wb->task)
215 wake_up_process(wb->task);
216 } 101 }
102
103 work->sync_mode = WB_SYNC_NONE;
104 work->nr_pages = nr_pages;
105 work->range_cyclic = range_cyclic;
106 work->for_background = for_background;
107
108 bdi_queue_work(bdi, work);
217} 109}
218 110
219/** 111/**
220 * bdi_sync_writeback - start and wait for writeback 112 * bdi_start_writeback - start writeback
221 * @bdi: the backing device to write from 113 * @bdi: the backing device to write from
222 * @sb: write inodes from this super_block 114 * @nr_pages: the number of pages to write
223 * 115 *
224 * Description: 116 * Description:
225 * This does WB_SYNC_ALL data integrity writeback and waits for the 117 * This does WB_SYNC_NONE opportunistic writeback. The IO is only
226 * IO to complete. Callers must hold the sb s_umount semaphore for 118 * started when this function returns, we make no guarentees on
227 * reading, to avoid having the super disappear before we are done. 119 * completion. Caller need not hold sb s_umount semaphore.
120 *
228 */ 121 */
229static void bdi_sync_writeback(struct backing_dev_info *bdi, 122void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages)
230 struct super_block *sb)
231{ 123{
232 struct wb_writeback_args args = { 124 __bdi_start_writeback(bdi, nr_pages, true, false);
233 .sb = sb,
234 .sync_mode = WB_SYNC_ALL,
235 .nr_pages = LONG_MAX,
236 .range_cyclic = 0,
237 /*
238 * Setting sb_pinned is not necessary for WB_SYNC_ALL, but
239 * lets make it explicitly clear.
240 */
241 .sb_pinned = 1,
242 };
243 struct bdi_work work;
244
245 bdi_work_init(&work, &args);
246 work.state |= WS_ONSTACK;
247
248 bdi_queue_work(bdi, &work);
249 bdi_wait_on_work_clear(&work);
250} 125}
251 126
252/** 127/**
253 * bdi_start_writeback - start writeback 128 * bdi_start_background_writeback - start background writeback
254 * @bdi: the backing device to write from 129 * @bdi: the backing device to write from
255 * @sb: write inodes from this super_block
256 * @nr_pages: the number of pages to write
257 * @sb_locked: caller already holds sb umount sem.
258 * 130 *
259 * Description: 131 * Description:
260 * This does WB_SYNC_NONE opportunistic writeback. The IO is only 132 * This does WB_SYNC_NONE background writeback. The IO is only
261 * started when this function returns, we make no guarentees on 133 * started when this function returns, we make no guarentees on
262 * completion. Caller specifies whether sb umount sem is held already or not. 134 * completion. Caller need not hold sb s_umount semaphore.
263 *
264 */ 135 */
265void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, 136void bdi_start_background_writeback(struct backing_dev_info *bdi)
266 long nr_pages, int sb_locked)
267{ 137{
268 struct wb_writeback_args args = { 138 __bdi_start_writeback(bdi, LONG_MAX, true, true);
269 .sb = sb,
270 .sync_mode = WB_SYNC_NONE,
271 .nr_pages = nr_pages,
272 .range_cyclic = 1,
273 .sb_pinned = sb_locked,
274 };
275
276 /*
277 * We treat @nr_pages=0 as the special case to do background writeback,
278 * ie. to sync pages until the background dirty threshold is reached.
279 */
280 if (!nr_pages) {
281 args.nr_pages = LONG_MAX;
282 args.for_background = 1;
283 }
284
285 bdi_alloc_queue_work(bdi, &args, sb_locked);
286} 139}
287 140
288/* 141/*
@@ -572,75 +425,69 @@ select_queue:
572 return ret; 425 return ret;
573} 426}
574 427
575static void unpin_sb_for_writeback(struct super_block *sb)
576{
577 up_read(&sb->s_umount);
578 put_super(sb);
579}
580
581enum sb_pin_state {
582 SB_PINNED,
583 SB_NOT_PINNED,
584 SB_PIN_FAILED
585};
586
587/* 428/*
588 * For WB_SYNC_NONE writeback, the caller does not have the sb pinned 429 * For background writeback the caller does not have the sb pinned
589 * before calling writeback. So make sure that we do pin it, so it doesn't 430 * before calling writeback. So make sure that we do pin it, so it doesn't
590 * go away while we are writing inodes from it. 431 * go away while we are writing inodes from it.
591 */ 432 */
592static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc, 433static bool pin_sb_for_writeback(struct super_block *sb)
593 struct super_block *sb)
594{ 434{
595 /*
596 * Caller must already hold the ref for this
597 */
598 if (wbc->sync_mode == WB_SYNC_ALL || wbc->sb_pinned) {
599 WARN_ON(!rwsem_is_locked(&sb->s_umount));
600 return SB_NOT_PINNED;
601 }
602 spin_lock(&sb_lock); 435 spin_lock(&sb_lock);
436 if (list_empty(&sb->s_instances)) {
437 spin_unlock(&sb_lock);
438 return false;
439 }
440
603 sb->s_count++; 441 sb->s_count++;
442 spin_unlock(&sb_lock);
443
604 if (down_read_trylock(&sb->s_umount)) { 444 if (down_read_trylock(&sb->s_umount)) {
605 if (sb->s_root) { 445 if (sb->s_root)
606 spin_unlock(&sb_lock); 446 return true;
607 return SB_PINNED;
608 }
609 /*
610 * umounted, drop rwsem again and fall through to failure
611 */
612 up_read(&sb->s_umount); 447 up_read(&sb->s_umount);
613 } 448 }
614 sb->s_count--; 449
615 spin_unlock(&sb_lock); 450 put_super(sb);
616 return SB_PIN_FAILED; 451 return false;
617} 452}
618 453
619/* 454/*
620 * Write a portion of b_io inodes which belong to @sb. 455 * Write a portion of b_io inodes which belong to @sb.
621 * If @wbc->sb != NULL, then find and write all such 456 *
457 * If @only_this_sb is true, then find and write all such
622 * inodes. Otherwise write only ones which go sequentially 458 * inodes. Otherwise write only ones which go sequentially
623 * in reverse order. 459 * in reverse order.
460 *
624 * Return 1, if the caller writeback routine should be 461 * Return 1, if the caller writeback routine should be
625 * interrupted. Otherwise return 0. 462 * interrupted. Otherwise return 0.
626 */ 463 */
627static int writeback_sb_inodes(struct super_block *sb, 464static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
628 struct bdi_writeback *wb, 465 struct writeback_control *wbc, bool only_this_sb)
629 struct writeback_control *wbc)
630{ 466{
631 while (!list_empty(&wb->b_io)) { 467 while (!list_empty(&wb->b_io)) {
632 long pages_skipped; 468 long pages_skipped;
633 struct inode *inode = list_entry(wb->b_io.prev, 469 struct inode *inode = list_entry(wb->b_io.prev,
634 struct inode, i_list); 470 struct inode, i_list);
635 if (wbc->sb && sb != inode->i_sb) { 471
636 /* super block given and doesn't 472 if (inode->i_sb != sb) {
637 match, skip this inode */ 473 if (only_this_sb) {
638 redirty_tail(inode); 474 /*
639 continue; 475 * We only want to write back data for this
640 } 476 * superblock, move all inodes not belonging
641 if (sb != inode->i_sb) 477 * to it back onto the dirty list.
642 /* finish with this superblock */ 478 */
479 redirty_tail(inode);
480 continue;
481 }
482
483 /*
484 * The inode belongs to a different superblock.
485 * Bounce back to the caller to unpin this and
486 * pin the next superblock.
487 */
643 return 0; 488 return 0;
489 }
490
644 if (inode->i_state & (I_NEW | I_WILL_FREE)) { 491 if (inode->i_state & (I_NEW | I_WILL_FREE)) {
645 requeue_io(inode); 492 requeue_io(inode);
646 continue; 493 continue;
@@ -678,8 +525,8 @@ static int writeback_sb_inodes(struct super_block *sb,
678 return 1; 525 return 1;
679} 526}
680 527
681static void writeback_inodes_wb(struct bdi_writeback *wb, 528void writeback_inodes_wb(struct bdi_writeback *wb,
682 struct writeback_control *wbc) 529 struct writeback_control *wbc)
683{ 530{
684 int ret = 0; 531 int ret = 0;
685 532
@@ -692,24 +539,14 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
692 struct inode *inode = list_entry(wb->b_io.prev, 539 struct inode *inode = list_entry(wb->b_io.prev,
693 struct inode, i_list); 540 struct inode, i_list);
694 struct super_block *sb = inode->i_sb; 541 struct super_block *sb = inode->i_sb;
695 enum sb_pin_state state;
696
697 if (wbc->sb && sb != wbc->sb) {
698 /* super block given and doesn't
699 match, skip this inode */
700 redirty_tail(inode);
701 continue;
702 }
703 state = pin_sb_for_writeback(wbc, sb);
704 542
705 if (state == SB_PIN_FAILED) { 543 if (!pin_sb_for_writeback(sb)) {
706 requeue_io(inode); 544 requeue_io(inode);
707 continue; 545 continue;
708 } 546 }
709 ret = writeback_sb_inodes(sb, wb, wbc); 547 ret = writeback_sb_inodes(sb, wb, wbc, false);
548 drop_super(sb);
710 549
711 if (state == SB_PINNED)
712 unpin_sb_for_writeback(sb);
713 if (ret) 550 if (ret)
714 break; 551 break;
715 } 552 }
@@ -717,11 +554,17 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
717 /* Leave any unwritten inodes on b_io */ 554 /* Leave any unwritten inodes on b_io */
718} 555}
719 556
720void writeback_inodes_wbc(struct writeback_control *wbc) 557static void __writeback_inodes_sb(struct super_block *sb,
558 struct bdi_writeback *wb, struct writeback_control *wbc)
721{ 559{
722 struct backing_dev_info *bdi = wbc->bdi; 560 WARN_ON(!rwsem_is_locked(&sb->s_umount));
723 561
724 writeback_inodes_wb(&bdi->wb, wbc); 562 wbc->wb_start = jiffies; /* livelock avoidance */
563 spin_lock(&inode_lock);
564 if (!wbc->for_kupdate || list_empty(&wb->b_io))
565 queue_io(wb, wbc->older_than_this);
566 writeback_sb_inodes(sb, wb, wbc, true);
567 spin_unlock(&inode_lock);
725} 568}
726 569
727/* 570/*
@@ -759,17 +602,14 @@ static inline bool over_bground_thresh(void)
759 * all dirty pages if they are all attached to "old" mappings. 602 * all dirty pages if they are all attached to "old" mappings.
760 */ 603 */
761static long wb_writeback(struct bdi_writeback *wb, 604static long wb_writeback(struct bdi_writeback *wb,
762 struct wb_writeback_args *args) 605 struct wb_writeback_work *work)
763{ 606{
764 struct writeback_control wbc = { 607 struct writeback_control wbc = {
765 .bdi = wb->bdi, 608 .sync_mode = work->sync_mode,
766 .sb = args->sb,
767 .sync_mode = args->sync_mode,
768 .older_than_this = NULL, 609 .older_than_this = NULL,
769 .for_kupdate = args->for_kupdate, 610 .for_kupdate = work->for_kupdate,
770 .for_background = args->for_background, 611 .for_background = work->for_background,
771 .range_cyclic = args->range_cyclic, 612 .range_cyclic = work->range_cyclic,
772 .sb_pinned = args->sb_pinned,
773 }; 613 };
774 unsigned long oldest_jif; 614 unsigned long oldest_jif;
775 long wrote = 0; 615 long wrote = 0;
@@ -789,21 +629,24 @@ static long wb_writeback(struct bdi_writeback *wb,
789 /* 629 /*
790 * Stop writeback when nr_pages has been consumed 630 * Stop writeback when nr_pages has been consumed
791 */ 631 */
792 if (args->nr_pages <= 0) 632 if (work->nr_pages <= 0)
793 break; 633 break;
794 634
795 /* 635 /*
796 * For background writeout, stop when we are below the 636 * For background writeout, stop when we are below the
797 * background dirty threshold 637 * background dirty threshold
798 */ 638 */
799 if (args->for_background && !over_bground_thresh()) 639 if (work->for_background && !over_bground_thresh())
800 break; 640 break;
801 641
802 wbc.more_io = 0; 642 wbc.more_io = 0;
803 wbc.nr_to_write = MAX_WRITEBACK_PAGES; 643 wbc.nr_to_write = MAX_WRITEBACK_PAGES;
804 wbc.pages_skipped = 0; 644 wbc.pages_skipped = 0;
805 writeback_inodes_wb(wb, &wbc); 645 if (work->sb)
806 args->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; 646 __writeback_inodes_sb(work->sb, wb, &wbc);
647 else
648 writeback_inodes_wb(wb, &wbc);
649 work->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
807 wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write; 650 wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write;
808 651
809 /* 652 /*
@@ -839,31 +682,21 @@ static long wb_writeback(struct bdi_writeback *wb,
839} 682}
840 683
841/* 684/*
842 * Return the next bdi_work struct that hasn't been processed by this 685 * Return the next wb_writeback_work struct that hasn't been processed yet.
843 * wb thread yet. ->seen is initially set for each thread that exists
844 * for this device, when a thread first notices a piece of work it
845 * clears its bit. Depending on writeback type, the thread will notify
846 * completion on either receiving the work (WB_SYNC_NONE) or after
847 * it is done (WB_SYNC_ALL).
848 */ 686 */
849static struct bdi_work *get_next_work_item(struct backing_dev_info *bdi, 687static struct wb_writeback_work *
850 struct bdi_writeback *wb) 688get_next_work_item(struct backing_dev_info *bdi, struct bdi_writeback *wb)
851{ 689{
852 struct bdi_work *work, *ret = NULL; 690 struct wb_writeback_work *work = NULL;
853 691
854 rcu_read_lock(); 692 spin_lock(&bdi->wb_lock);
855 693 if (!list_empty(&bdi->work_list)) {
856 list_for_each_entry_rcu(work, &bdi->work_list, list) { 694 work = list_entry(bdi->work_list.next,
857 if (!test_bit(wb->nr, &work->seen)) 695 struct wb_writeback_work, list);
858 continue; 696 list_del_init(&work->list);
859 clear_bit(wb->nr, &work->seen);
860
861 ret = work;
862 break;
863 } 697 }
864 698 spin_unlock(&bdi->wb_lock);
865 rcu_read_unlock(); 699 return work;
866 return ret;
867} 700}
868 701
869static long wb_check_old_data_flush(struct bdi_writeback *wb) 702static long wb_check_old_data_flush(struct bdi_writeback *wb)
@@ -888,14 +721,14 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
888 (inodes_stat.nr_inodes - inodes_stat.nr_unused); 721 (inodes_stat.nr_inodes - inodes_stat.nr_unused);
889 722
890 if (nr_pages) { 723 if (nr_pages) {
891 struct wb_writeback_args args = { 724 struct wb_writeback_work work = {
892 .nr_pages = nr_pages, 725 .nr_pages = nr_pages,
893 .sync_mode = WB_SYNC_NONE, 726 .sync_mode = WB_SYNC_NONE,
894 .for_kupdate = 1, 727 .for_kupdate = 1,
895 .range_cyclic = 1, 728 .range_cyclic = 1,
896 }; 729 };
897 730
898 return wb_writeback(wb, &args); 731 return wb_writeback(wb, &work);
899 } 732 }
900 733
901 return 0; 734 return 0;
@@ -907,36 +740,27 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
907long wb_do_writeback(struct bdi_writeback *wb, int force_wait) 740long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
908{ 741{
909 struct backing_dev_info *bdi = wb->bdi; 742 struct backing_dev_info *bdi = wb->bdi;
910 struct bdi_work *work; 743 struct wb_writeback_work *work;
911 long wrote = 0; 744 long wrote = 0;
912 745
913 while ((work = get_next_work_item(bdi, wb)) != NULL) { 746 while ((work = get_next_work_item(bdi, wb)) != NULL) {
914 struct wb_writeback_args args = work->args;
915 int post_clear;
916
917 /* 747 /*
918 * Override sync mode, in case we must wait for completion 748 * Override sync mode, in case we must wait for completion
749 * because this thread is exiting now.
919 */ 750 */
920 if (force_wait) 751 if (force_wait)
921 work->args.sync_mode = args.sync_mode = WB_SYNC_ALL; 752 work->sync_mode = WB_SYNC_ALL;
922
923 post_clear = WB_SYNC_ALL || args.sb_pinned;
924
925 /*
926 * If this isn't a data integrity operation, just notify
927 * that we have seen this work and we are now starting it.
928 */
929 if (!post_clear)
930 wb_clear_pending(wb, work);
931 753
932 wrote += wb_writeback(wb, &args); 754 wrote += wb_writeback(wb, work);
933 755
934 /* 756 /*
935 * This is a data integrity writeback, so only do the 757 * Notify the caller of completion if this is a synchronous
936 * notification when we have completed the work. 758 * work item, otherwise just free it.
937 */ 759 */
938 if (post_clear) 760 if (work->done)
939 wb_clear_pending(wb, work); 761 complete(work->done);
762 else
763 kfree(work);
940 } 764 }
941 765
942 /* 766 /*
@@ -993,42 +817,27 @@ int bdi_writeback_task(struct bdi_writeback *wb)
993} 817}
994 818
995/* 819/*
996 * Schedule writeback for all backing devices. This does WB_SYNC_NONE 820 * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back
997 * writeback, for integrity writeback see bdi_sync_writeback(). 821 * the whole world.
998 */ 822 */
999static void bdi_writeback_all(struct super_block *sb, long nr_pages) 823void wakeup_flusher_threads(long nr_pages)
1000{ 824{
1001 struct wb_writeback_args args = {
1002 .sb = sb,
1003 .nr_pages = nr_pages,
1004 .sync_mode = WB_SYNC_NONE,
1005 };
1006 struct backing_dev_info *bdi; 825 struct backing_dev_info *bdi;
1007 826
1008 rcu_read_lock(); 827 if (!nr_pages) {
828 nr_pages = global_page_state(NR_FILE_DIRTY) +
829 global_page_state(NR_UNSTABLE_NFS);
830 }
1009 831
832 rcu_read_lock();
1010 list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { 833 list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
1011 if (!bdi_has_dirty_io(bdi)) 834 if (!bdi_has_dirty_io(bdi))
1012 continue; 835 continue;
1013 836 __bdi_start_writeback(bdi, nr_pages, false, false);
1014 bdi_alloc_queue_work(bdi, &args, 0);
1015 } 837 }
1016
1017 rcu_read_unlock(); 838 rcu_read_unlock();
1018} 839}
1019 840
1020/*
1021 * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back
1022 * the whole world.
1023 */
1024void wakeup_flusher_threads(long nr_pages)
1025{
1026 if (nr_pages == 0)
1027 nr_pages = global_page_state(NR_FILE_DIRTY) +
1028 global_page_state(NR_UNSTABLE_NFS);
1029 bdi_writeback_all(NULL, nr_pages);
1030}
1031
1032static noinline void block_dump___mark_inode_dirty(struct inode *inode) 841static noinline void block_dump___mark_inode_dirty(struct inode *inode)
1033{ 842{
1034 if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) { 843 if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
@@ -1220,18 +1029,6 @@ static void wait_sb_inodes(struct super_block *sb)
1220 iput(old_inode); 1029 iput(old_inode);
1221} 1030}
1222 1031
1223static void __writeback_inodes_sb(struct super_block *sb, int sb_locked)
1224{
1225 unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
1226 unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
1227 long nr_to_write;
1228
1229 nr_to_write = nr_dirty + nr_unstable +
1230 (inodes_stat.nr_inodes - inodes_stat.nr_unused);
1231
1232 bdi_start_writeback(sb->s_bdi, sb, nr_to_write, sb_locked);
1233}
1234
1235/** 1032/**
1236 * writeback_inodes_sb - writeback dirty inodes from given super_block 1033 * writeback_inodes_sb - writeback dirty inodes from given super_block
1237 * @sb: the superblock 1034 * @sb: the superblock
@@ -1243,21 +1040,24 @@ static void __writeback_inodes_sb(struct super_block *sb, int sb_locked)
1243 */ 1040 */
1244void writeback_inodes_sb(struct super_block *sb) 1041void writeback_inodes_sb(struct super_block *sb)
1245{ 1042{
1246 __writeback_inodes_sb(sb, 0); 1043 unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
1247} 1044 unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
1248EXPORT_SYMBOL(writeback_inodes_sb); 1045 DECLARE_COMPLETION_ONSTACK(done);
1046 struct wb_writeback_work work = {
1047 .sb = sb,
1048 .sync_mode = WB_SYNC_NONE,
1049 .done = &done,
1050 };
1249 1051
1250/** 1052 WARN_ON(!rwsem_is_locked(&sb->s_umount));
1251 * writeback_inodes_sb_locked - writeback dirty inodes from given super_block 1053
1252 * @sb: the superblock 1054 work.nr_pages = nr_dirty + nr_unstable +
1253 * 1055 (inodes_stat.nr_inodes - inodes_stat.nr_unused);
1254 * Like writeback_inodes_sb(), except the caller already holds the 1056
1255 * sb umount sem. 1057 bdi_queue_work(sb->s_bdi, &work);
1256 */ 1058 wait_for_completion(&done);
1257void writeback_inodes_sb_locked(struct super_block *sb)
1258{
1259 __writeback_inodes_sb(sb, 1);
1260} 1059}
1060EXPORT_SYMBOL(writeback_inodes_sb);
1261 1061
1262/** 1062/**
1263 * writeback_inodes_sb_if_idle - start writeback if none underway 1063 * writeback_inodes_sb_if_idle - start writeback if none underway
@@ -1269,7 +1069,9 @@ void writeback_inodes_sb_locked(struct super_block *sb)
1269int writeback_inodes_sb_if_idle(struct super_block *sb) 1069int writeback_inodes_sb_if_idle(struct super_block *sb)
1270{ 1070{
1271 if (!writeback_in_progress(sb->s_bdi)) { 1071 if (!writeback_in_progress(sb->s_bdi)) {
1072 down_read(&sb->s_umount);
1272 writeback_inodes_sb(sb); 1073 writeback_inodes_sb(sb);
1074 up_read(&sb->s_umount);
1273 return 1; 1075 return 1;
1274 } else 1076 } else
1275 return 0; 1077 return 0;
@@ -1285,7 +1087,20 @@ EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
1285 */ 1087 */
1286void sync_inodes_sb(struct super_block *sb) 1088void sync_inodes_sb(struct super_block *sb)
1287{ 1089{
1288 bdi_sync_writeback(sb->s_bdi, sb); 1090 DECLARE_COMPLETION_ONSTACK(done);
1091 struct wb_writeback_work work = {
1092 .sb = sb,
1093 .sync_mode = WB_SYNC_ALL,
1094 .nr_pages = LONG_MAX,
1095 .range_cyclic = 0,
1096 .done = &done,
1097 };
1098
1099 WARN_ON(!rwsem_is_locked(&sb->s_umount));
1100
1101 bdi_queue_work(sb->s_bdi, &work);
1102 wait_for_completion(&done);
1103
1289 wait_sb_inodes(sb); 1104 wait_sb_inodes(sb);
1290} 1105}
1291EXPORT_SYMBOL(sync_inodes_sb); 1106EXPORT_SYMBOL(sync_inodes_sb);
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index 47aefd376e54..723b889fd219 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -710,30 +710,26 @@ static void fscache_write_op(struct fscache_operation *_op)
710 goto superseded; 710 goto superseded;
711 } 711 }
712 712
713 if (page) { 713 radix_tree_tag_set(&cookie->stores, page->index,
714 radix_tree_tag_set(&cookie->stores, page->index, 714 FSCACHE_COOKIE_STORING_TAG);
715 FSCACHE_COOKIE_STORING_TAG); 715 radix_tree_tag_clear(&cookie->stores, page->index,
716 radix_tree_tag_clear(&cookie->stores, page->index, 716 FSCACHE_COOKIE_PENDING_TAG);
717 FSCACHE_COOKIE_PENDING_TAG);
718 }
719 717
720 spin_unlock(&cookie->stores_lock); 718 spin_unlock(&cookie->stores_lock);
721 spin_unlock(&object->lock); 719 spin_unlock(&object->lock);
722 720
723 if (page) { 721 fscache_set_op_state(&op->op, "Store");
724 fscache_set_op_state(&op->op, "Store"); 722 fscache_stat(&fscache_n_store_pages);
725 fscache_stat(&fscache_n_store_pages); 723 fscache_stat(&fscache_n_cop_write_page);
726 fscache_stat(&fscache_n_cop_write_page); 724 ret = object->cache->ops->write_page(op, page);
727 ret = object->cache->ops->write_page(op, page); 725 fscache_stat_d(&fscache_n_cop_write_page);
728 fscache_stat_d(&fscache_n_cop_write_page); 726 fscache_set_op_state(&op->op, "EndWrite");
729 fscache_set_op_state(&op->op, "EndWrite"); 727 fscache_end_page_write(object, page);
730 fscache_end_page_write(object, page); 728 if (ret < 0) {
731 if (ret < 0) { 729 fscache_set_op_state(&op->op, "Abort");
732 fscache_set_op_state(&op->op, "Abort"); 730 fscache_abort_object(object);
733 fscache_abort_object(object); 731 } else {
734 } else { 732 fscache_enqueue_operation(&op->op);
735 fscache_enqueue_operation(&op->op);
736 }
737 } 733 }
738 734
739 _leave(""); 735 _leave("");
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 4a48c0f4b402..84da64b551b2 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -1041,6 +1041,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size)
1041 1041
1042 if (gfs2_is_stuffed(ip)) { 1042 if (gfs2_is_stuffed(ip)) {
1043 u64 dsize = size + sizeof(struct gfs2_inode); 1043 u64 dsize = size + sizeof(struct gfs2_inode);
1044 ip->i_disksize = size;
1044 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; 1045 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
1045 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1046 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1046 gfs2_dinode_out(ip, dibh->b_data); 1047 gfs2_dinode_out(ip, dibh->b_data);
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 8295c5b5d4a9..26ca3361a8bc 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -392,7 +392,7 @@ static int gfs2_dirent_find_space(const struct gfs2_dirent *dent,
392 unsigned totlen = be16_to_cpu(dent->de_rec_len); 392 unsigned totlen = be16_to_cpu(dent->de_rec_len);
393 393
394 if (gfs2_dirent_sentinel(dent)) 394 if (gfs2_dirent_sentinel(dent))
395 actual = GFS2_DIRENT_SIZE(0); 395 actual = 0;
396 if (totlen - actual >= required) 396 if (totlen - actual >= required)
397 return 1; 397 return 1;
398 return 0; 398 return 0;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index ddcdbf493536..0898f3ec8212 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -706,8 +706,18 @@ static void glock_work_func(struct work_struct *work)
706{ 706{
707 unsigned long delay = 0; 707 unsigned long delay = 0;
708 struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work); 708 struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work);
709 struct gfs2_holder *gh;
709 int drop_ref = 0; 710 int drop_ref = 0;
710 711
712 if (unlikely(test_bit(GLF_FROZEN, &gl->gl_flags))) {
713 spin_lock(&gl->gl_spin);
714 gh = find_first_waiter(gl);
715 if (gh && (gh->gh_flags & LM_FLAG_NOEXP) &&
716 test_and_clear_bit(GLF_FROZEN, &gl->gl_flags))
717 set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
718 spin_unlock(&gl->gl_spin);
719 }
720
711 if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags)) { 721 if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags)) {
712 finish_xmote(gl, gl->gl_reply); 722 finish_xmote(gl, gl->gl_reply);
713 drop_ref = 1; 723 drop_ref = 1;
@@ -1348,7 +1358,7 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
1348} 1358}
1349 1359
1350 1360
1351static int gfs2_shrink_glock_memory(int nr, gfp_t gfp_mask) 1361static int gfs2_shrink_glock_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
1352{ 1362{
1353 struct gfs2_glock *gl; 1363 struct gfs2_glock *gl;
1354 int may_demote; 1364 int may_demote;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index b5612cbb62a5..f03afd9c44bc 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -169,7 +169,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb,
169{ 169{
170 struct inode *inode; 170 struct inode *inode;
171 struct gfs2_inode *ip; 171 struct gfs2_inode *ip;
172 struct gfs2_glock *io_gl; 172 struct gfs2_glock *io_gl = NULL;
173 int error; 173 int error;
174 174
175 inode = gfs2_iget(sb, no_addr); 175 inode = gfs2_iget(sb, no_addr);
@@ -198,6 +198,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb,
198 ip->i_iopen_gh.gh_gl->gl_object = ip; 198 ip->i_iopen_gh.gh_gl->gl_object = ip;
199 199
200 gfs2_glock_put(io_gl); 200 gfs2_glock_put(io_gl);
201 io_gl = NULL;
201 202
202 if ((type == DT_UNKNOWN) && (no_formal_ino == 0)) 203 if ((type == DT_UNKNOWN) && (no_formal_ino == 0))
203 goto gfs2_nfsbypass; 204 goto gfs2_nfsbypass;
@@ -228,7 +229,8 @@ gfs2_nfsbypass:
228fail_glock: 229fail_glock:
229 gfs2_glock_dq(&ip->i_iopen_gh); 230 gfs2_glock_dq(&ip->i_iopen_gh);
230fail_iopen: 231fail_iopen:
231 gfs2_glock_put(io_gl); 232 if (io_gl)
233 gfs2_glock_put(io_gl);
232fail_put: 234fail_put:
233 if (inode->i_state & I_NEW) 235 if (inode->i_state & I_NEW)
234 ip->i_gl->gl_object = NULL; 236 ip->i_gl->gl_object = NULL;
@@ -256,7 +258,7 @@ void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr)
256{ 258{
257 struct gfs2_sbd *sdp; 259 struct gfs2_sbd *sdp;
258 struct gfs2_inode *ip; 260 struct gfs2_inode *ip;
259 struct gfs2_glock *io_gl; 261 struct gfs2_glock *io_gl = NULL;
260 int error; 262 int error;
261 struct gfs2_holder gh; 263 struct gfs2_holder gh;
262 struct inode *inode; 264 struct inode *inode;
@@ -293,6 +295,7 @@ void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr)
293 295
294 ip->i_iopen_gh.gh_gl->gl_object = ip; 296 ip->i_iopen_gh.gh_gl->gl_object = ip;
295 gfs2_glock_put(io_gl); 297 gfs2_glock_put(io_gl);
298 io_gl = NULL;
296 299
297 inode->i_mode = DT2IF(DT_UNKNOWN); 300 inode->i_mode = DT2IF(DT_UNKNOWN);
298 301
@@ -319,7 +322,8 @@ void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr)
319fail_glock: 322fail_glock:
320 gfs2_glock_dq(&ip->i_iopen_gh); 323 gfs2_glock_dq(&ip->i_iopen_gh);
321fail_iopen: 324fail_iopen:
322 gfs2_glock_put(io_gl); 325 if (io_gl)
326 gfs2_glock_put(io_gl);
323fail_put: 327fail_put:
324 ip->i_gl->gl_object = NULL; 328 ip->i_gl->gl_object = NULL;
325 gfs2_glock_put(ip->i_gl); 329 gfs2_glock_put(ip->i_gl);
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 49667d68769e..8f02d3db8f42 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -77,7 +77,7 @@ static LIST_HEAD(qd_lru_list);
77static atomic_t qd_lru_count = ATOMIC_INIT(0); 77static atomic_t qd_lru_count = ATOMIC_INIT(0);
78static DEFINE_SPINLOCK(qd_lru_lock); 78static DEFINE_SPINLOCK(qd_lru_lock);
79 79
80int gfs2_shrink_qd_memory(int nr, gfp_t gfp_mask) 80int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
81{ 81{
82 struct gfs2_quota_data *qd; 82 struct gfs2_quota_data *qd;
83 struct gfs2_sbd *sdp; 83 struct gfs2_sbd *sdp;
@@ -694,10 +694,8 @@ get_a_page:
694 if (!buffer_mapped(bh)) 694 if (!buffer_mapped(bh))
695 goto unlock_out; 695 goto unlock_out;
696 /* If it's a newly allocated disk block for quota, zero it */ 696 /* If it's a newly allocated disk block for quota, zero it */
697 if (buffer_new(bh)) { 697 if (buffer_new(bh))
698 memset(bh->b_data, 0, bh->b_size); 698 zero_user(page, pos - blocksize, bh->b_size);
699 set_buffer_uptodate(bh);
700 }
701 } 699 }
702 700
703 if (PageUptodate(page)) 701 if (PageUptodate(page))
@@ -723,7 +721,7 @@ get_a_page:
723 721
724 /* If quota straddles page boundary, we need to update the rest of the 722 /* If quota straddles page boundary, we need to update the rest of the
725 * quota at the beginning of the next page */ 723 * quota at the beginning of the next page */
726 if (offset != 0) { /* first page, offset is closer to PAGE_CACHE_SIZE */ 724 if ((offset + sizeof(struct gfs2_quota)) > PAGE_CACHE_SIZE) {
727 ptr = ptr + nbytes; 725 ptr = ptr + nbytes;
728 nbytes = sizeof(struct gfs2_quota) - nbytes; 726 nbytes = sizeof(struct gfs2_quota) - nbytes;
729 offset = 0; 727 offset = 0;
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index 195f60c8bd14..e7d236ca48bd 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -51,7 +51,7 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip)
51 return ret; 51 return ret;
52} 52}
53 53
54extern int gfs2_shrink_qd_memory(int nr, gfp_t gfp_mask); 54extern int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask);
55extern const struct quotactl_ops gfs2_quotactl_ops; 55extern const struct quotactl_ops gfs2_quotactl_ops;
56 56
57#endif /* __QUOTA_DOT_H__ */ 57#endif /* __QUOTA_DOT_H__ */
diff --git a/fs/inode.c b/fs/inode.c
index 2bee20ae3d65..722860b323a9 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -512,7 +512,7 @@ static void prune_icache(int nr_to_scan)
512 * This function is passed the number of inodes to scan, and it returns the 512 * This function is passed the number of inodes to scan, and it returns the
513 * total number of remaining possibly-reclaimable inodes. 513 * total number of remaining possibly-reclaimable inodes.
514 */ 514 */
515static int shrink_icache_memory(int nr, gfp_t gfp_mask) 515static int shrink_icache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
516{ 516{
517 if (nr) { 517 if (nr) {
518 /* 518 /*
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index bc2ff5932769..036880895bfc 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -297,7 +297,6 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
297 struct page *new_page; 297 struct page *new_page;
298 unsigned int new_offset; 298 unsigned int new_offset;
299 struct buffer_head *bh_in = jh2bh(jh_in); 299 struct buffer_head *bh_in = jh2bh(jh_in);
300 struct jbd2_buffer_trigger_type *triggers;
301 journal_t *journal = transaction->t_journal; 300 journal_t *journal = transaction->t_journal;
302 301
303 /* 302 /*
@@ -328,21 +327,21 @@ repeat:
328 done_copy_out = 1; 327 done_copy_out = 1;
329 new_page = virt_to_page(jh_in->b_frozen_data); 328 new_page = virt_to_page(jh_in->b_frozen_data);
330 new_offset = offset_in_page(jh_in->b_frozen_data); 329 new_offset = offset_in_page(jh_in->b_frozen_data);
331 triggers = jh_in->b_frozen_triggers;
332 } else { 330 } else {
333 new_page = jh2bh(jh_in)->b_page; 331 new_page = jh2bh(jh_in)->b_page;
334 new_offset = offset_in_page(jh2bh(jh_in)->b_data); 332 new_offset = offset_in_page(jh2bh(jh_in)->b_data);
335 triggers = jh_in->b_triggers;
336 } 333 }
337 334
338 mapped_data = kmap_atomic(new_page, KM_USER0); 335 mapped_data = kmap_atomic(new_page, KM_USER0);
339 /* 336 /*
340 * Fire any commit trigger. Do this before checking for escaping, 337 * Fire data frozen trigger if data already wasn't frozen. Do this
341 * as the trigger may modify the magic offset. If a copy-out 338 * before checking for escaping, as the trigger may modify the magic
342 * happens afterwards, it will have the correct data in the buffer. 339 * offset. If a copy-out happens afterwards, it will have the correct
340 * data in the buffer.
343 */ 341 */
344 jbd2_buffer_commit_trigger(jh_in, mapped_data + new_offset, 342 if (!done_copy_out)
345 triggers); 343 jbd2_buffer_frozen_trigger(jh_in, mapped_data + new_offset,
344 jh_in->b_triggers);
346 345
347 /* 346 /*
348 * Check for escaping 347 * Check for escaping
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index e214d68620ac..b8e0806681bb 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -725,6 +725,9 @@ done:
725 page = jh2bh(jh)->b_page; 725 page = jh2bh(jh)->b_page;
726 offset = ((unsigned long) jh2bh(jh)->b_data) & ~PAGE_MASK; 726 offset = ((unsigned long) jh2bh(jh)->b_data) & ~PAGE_MASK;
727 source = kmap_atomic(page, KM_USER0); 727 source = kmap_atomic(page, KM_USER0);
728 /* Fire data frozen trigger just before we copy the data */
729 jbd2_buffer_frozen_trigger(jh, source + offset,
730 jh->b_triggers);
728 memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size); 731 memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size);
729 kunmap_atomic(source, KM_USER0); 732 kunmap_atomic(source, KM_USER0);
730 733
@@ -963,15 +966,15 @@ void jbd2_journal_set_triggers(struct buffer_head *bh,
963 jh->b_triggers = type; 966 jh->b_triggers = type;
964} 967}
965 968
966void jbd2_buffer_commit_trigger(struct journal_head *jh, void *mapped_data, 969void jbd2_buffer_frozen_trigger(struct journal_head *jh, void *mapped_data,
967 struct jbd2_buffer_trigger_type *triggers) 970 struct jbd2_buffer_trigger_type *triggers)
968{ 971{
969 struct buffer_head *bh = jh2bh(jh); 972 struct buffer_head *bh = jh2bh(jh);
970 973
971 if (!triggers || !triggers->t_commit) 974 if (!triggers || !triggers->t_frozen)
972 return; 975 return;
973 976
974 triggers->t_commit(triggers, bh, mapped_data, bh->b_size); 977 triggers->t_frozen(triggers, bh, mapped_data, bh->b_size);
975} 978}
976 979
977void jbd2_buffer_abort_trigger(struct journal_head *jh, 980void jbd2_buffer_abort_trigger(struct journal_head *jh,
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index a33aab6b5e68..54a92fd02bbd 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -234,8 +234,9 @@ static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
234 if (inode->i_mode != mode) { 234 if (inode->i_mode != mode) {
235 struct iattr attr; 235 struct iattr attr;
236 236
237 attr.ia_valid = ATTR_MODE; 237 attr.ia_valid = ATTR_MODE | ATTR_CTIME;
238 attr.ia_mode = mode; 238 attr.ia_mode = mode;
239 attr.ia_ctime = CURRENT_TIME_SEC;
239 rc = jffs2_do_setattr(inode, &attr); 240 rc = jffs2_do_setattr(inode, &attr);
240 if (rc < 0) 241 if (rc < 0)
241 return rc; 242 return rc;
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 7aa4417e085f..166062a68230 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -222,15 +222,18 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry, int mode,
222 dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(ri->ctime)); 222 dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(ri->ctime));
223 223
224 jffs2_free_raw_inode(ri); 224 jffs2_free_raw_inode(ri);
225 d_instantiate(dentry, inode);
226 225
227 D1(printk(KERN_DEBUG "jffs2_create: Created ino #%lu with mode %o, nlink %d(%d). nrpages %ld\n", 226 D1(printk(KERN_DEBUG "jffs2_create: Created ino #%lu with mode %o, nlink %d(%d). nrpages %ld\n",
228 inode->i_ino, inode->i_mode, inode->i_nlink, 227 inode->i_ino, inode->i_mode, inode->i_nlink,
229 f->inocache->pino_nlink, inode->i_mapping->nrpages)); 228 f->inocache->pino_nlink, inode->i_mapping->nrpages));
229
230 d_instantiate(dentry, inode);
231 unlock_new_inode(inode);
230 return 0; 232 return 0;
231 233
232 fail: 234 fail:
233 make_bad_inode(inode); 235 make_bad_inode(inode);
236 unlock_new_inode(inode);
234 iput(inode); 237 iput(inode);
235 jffs2_free_raw_inode(ri); 238 jffs2_free_raw_inode(ri);
236 return ret; 239 return ret;
@@ -360,8 +363,8 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
360 /* Eeek. Wave bye bye */ 363 /* Eeek. Wave bye bye */
361 mutex_unlock(&f->sem); 364 mutex_unlock(&f->sem);
362 jffs2_complete_reservation(c); 365 jffs2_complete_reservation(c);
363 jffs2_clear_inode(inode); 366 ret = PTR_ERR(fn);
364 return PTR_ERR(fn); 367 goto fail;
365 } 368 }
366 369
367 /* We use f->target field to store the target path. */ 370 /* We use f->target field to store the target path. */
@@ -370,8 +373,8 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
370 printk(KERN_WARNING "Can't allocate %d bytes of memory\n", targetlen + 1); 373 printk(KERN_WARNING "Can't allocate %d bytes of memory\n", targetlen + 1);
371 mutex_unlock(&f->sem); 374 mutex_unlock(&f->sem);
372 jffs2_complete_reservation(c); 375 jffs2_complete_reservation(c);
373 jffs2_clear_inode(inode); 376 ret = -ENOMEM;
374 return -ENOMEM; 377 goto fail;
375 } 378 }
376 379
377 memcpy(f->target, target, targetlen + 1); 380 memcpy(f->target, target, targetlen + 1);
@@ -386,30 +389,24 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
386 jffs2_complete_reservation(c); 389 jffs2_complete_reservation(c);
387 390
388 ret = jffs2_init_security(inode, dir_i); 391 ret = jffs2_init_security(inode, dir_i);
389 if (ret) { 392 if (ret)
390 jffs2_clear_inode(inode); 393 goto fail;
391 return ret; 394
392 }
393 ret = jffs2_init_acl_post(inode); 395 ret = jffs2_init_acl_post(inode);
394 if (ret) { 396 if (ret)
395 jffs2_clear_inode(inode); 397 goto fail;
396 return ret;
397 }
398 398
399 ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &alloclen, 399 ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &alloclen,
400 ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(namelen)); 400 ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(namelen));
401 if (ret) { 401 if (ret)
402 /* Eep. */ 402 goto fail;
403 jffs2_clear_inode(inode);
404 return ret;
405 }
406 403
407 rd = jffs2_alloc_raw_dirent(); 404 rd = jffs2_alloc_raw_dirent();
408 if (!rd) { 405 if (!rd) {
409 /* Argh. Now we treat it like a normal delete */ 406 /* Argh. Now we treat it like a normal delete */
410 jffs2_complete_reservation(c); 407 jffs2_complete_reservation(c);
411 jffs2_clear_inode(inode); 408 ret = -ENOMEM;
412 return -ENOMEM; 409 goto fail;
413 } 410 }
414 411
415 dir_f = JFFS2_INODE_INFO(dir_i); 412 dir_f = JFFS2_INODE_INFO(dir_i);
@@ -437,8 +434,8 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
437 jffs2_complete_reservation(c); 434 jffs2_complete_reservation(c);
438 jffs2_free_raw_dirent(rd); 435 jffs2_free_raw_dirent(rd);
439 mutex_unlock(&dir_f->sem); 436 mutex_unlock(&dir_f->sem);
440 jffs2_clear_inode(inode); 437 ret = PTR_ERR(fd);
441 return PTR_ERR(fd); 438 goto fail;
442 } 439 }
443 440
444 dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(rd->mctime)); 441 dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(rd->mctime));
@@ -453,7 +450,14 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
453 jffs2_complete_reservation(c); 450 jffs2_complete_reservation(c);
454 451
455 d_instantiate(dentry, inode); 452 d_instantiate(dentry, inode);
453 unlock_new_inode(inode);
456 return 0; 454 return 0;
455
456 fail:
457 make_bad_inode(inode);
458 unlock_new_inode(inode);
459 iput(inode);
460 return ret;
457} 461}
458 462
459 463
@@ -519,8 +523,8 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
519 /* Eeek. Wave bye bye */ 523 /* Eeek. Wave bye bye */
520 mutex_unlock(&f->sem); 524 mutex_unlock(&f->sem);
521 jffs2_complete_reservation(c); 525 jffs2_complete_reservation(c);
522 jffs2_clear_inode(inode); 526 ret = PTR_ERR(fn);
523 return PTR_ERR(fn); 527 goto fail;
524 } 528 }
525 /* No data here. Only a metadata node, which will be 529 /* No data here. Only a metadata node, which will be
526 obsoleted by the first data write 530 obsoleted by the first data write
@@ -531,30 +535,24 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
531 jffs2_complete_reservation(c); 535 jffs2_complete_reservation(c);
532 536
533 ret = jffs2_init_security(inode, dir_i); 537 ret = jffs2_init_security(inode, dir_i);
534 if (ret) { 538 if (ret)
535 jffs2_clear_inode(inode); 539 goto fail;
536 return ret; 540
537 }
538 ret = jffs2_init_acl_post(inode); 541 ret = jffs2_init_acl_post(inode);
539 if (ret) { 542 if (ret)
540 jffs2_clear_inode(inode); 543 goto fail;
541 return ret;
542 }
543 544
544 ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &alloclen, 545 ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &alloclen,
545 ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(namelen)); 546 ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(namelen));
546 if (ret) { 547 if (ret)
547 /* Eep. */ 548 goto fail;
548 jffs2_clear_inode(inode);
549 return ret;
550 }
551 549
552 rd = jffs2_alloc_raw_dirent(); 550 rd = jffs2_alloc_raw_dirent();
553 if (!rd) { 551 if (!rd) {
554 /* Argh. Now we treat it like a normal delete */ 552 /* Argh. Now we treat it like a normal delete */
555 jffs2_complete_reservation(c); 553 jffs2_complete_reservation(c);
556 jffs2_clear_inode(inode); 554 ret = -ENOMEM;
557 return -ENOMEM; 555 goto fail;
558 } 556 }
559 557
560 dir_f = JFFS2_INODE_INFO(dir_i); 558 dir_f = JFFS2_INODE_INFO(dir_i);
@@ -582,8 +580,8 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
582 jffs2_complete_reservation(c); 580 jffs2_complete_reservation(c);
583 jffs2_free_raw_dirent(rd); 581 jffs2_free_raw_dirent(rd);
584 mutex_unlock(&dir_f->sem); 582 mutex_unlock(&dir_f->sem);
585 jffs2_clear_inode(inode); 583 ret = PTR_ERR(fd);
586 return PTR_ERR(fd); 584 goto fail;
587 } 585 }
588 586
589 dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(rd->mctime)); 587 dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(rd->mctime));
@@ -599,7 +597,14 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
599 jffs2_complete_reservation(c); 597 jffs2_complete_reservation(c);
600 598
601 d_instantiate(dentry, inode); 599 d_instantiate(dentry, inode);
600 unlock_new_inode(inode);
602 return 0; 601 return 0;
602
603 fail:
604 make_bad_inode(inode);
605 unlock_new_inode(inode);
606 iput(inode);
607 return ret;
603} 608}
604 609
605static int jffs2_rmdir (struct inode *dir_i, struct dentry *dentry) 610static int jffs2_rmdir (struct inode *dir_i, struct dentry *dentry)
@@ -693,8 +698,8 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de
693 /* Eeek. Wave bye bye */ 698 /* Eeek. Wave bye bye */
694 mutex_unlock(&f->sem); 699 mutex_unlock(&f->sem);
695 jffs2_complete_reservation(c); 700 jffs2_complete_reservation(c);
696 jffs2_clear_inode(inode); 701 ret = PTR_ERR(fn);
697 return PTR_ERR(fn); 702 goto fail;
698 } 703 }
699 /* No data here. Only a metadata node, which will be 704 /* No data here. Only a metadata node, which will be
700 obsoleted by the first data write 705 obsoleted by the first data write
@@ -705,30 +710,24 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de
705 jffs2_complete_reservation(c); 710 jffs2_complete_reservation(c);
706 711
707 ret = jffs2_init_security(inode, dir_i); 712 ret = jffs2_init_security(inode, dir_i);
708 if (ret) { 713 if (ret)
709 jffs2_clear_inode(inode); 714 goto fail;
710 return ret; 715
711 }
712 ret = jffs2_init_acl_post(inode); 716 ret = jffs2_init_acl_post(inode);
713 if (ret) { 717 if (ret)
714 jffs2_clear_inode(inode); 718 goto fail;
715 return ret;
716 }
717 719
718 ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &alloclen, 720 ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &alloclen,
719 ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(namelen)); 721 ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(namelen));
720 if (ret) { 722 if (ret)
721 /* Eep. */ 723 goto fail;
722 jffs2_clear_inode(inode);
723 return ret;
724 }
725 724
726 rd = jffs2_alloc_raw_dirent(); 725 rd = jffs2_alloc_raw_dirent();
727 if (!rd) { 726 if (!rd) {
728 /* Argh. Now we treat it like a normal delete */ 727 /* Argh. Now we treat it like a normal delete */
729 jffs2_complete_reservation(c); 728 jffs2_complete_reservation(c);
730 jffs2_clear_inode(inode); 729 ret = -ENOMEM;
731 return -ENOMEM; 730 goto fail;
732 } 731 }
733 732
734 dir_f = JFFS2_INODE_INFO(dir_i); 733 dir_f = JFFS2_INODE_INFO(dir_i);
@@ -759,8 +758,8 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de
759 jffs2_complete_reservation(c); 758 jffs2_complete_reservation(c);
760 jffs2_free_raw_dirent(rd); 759 jffs2_free_raw_dirent(rd);
761 mutex_unlock(&dir_f->sem); 760 mutex_unlock(&dir_f->sem);
762 jffs2_clear_inode(inode); 761 ret = PTR_ERR(fd);
763 return PTR_ERR(fd); 762 goto fail;
764 } 763 }
765 764
766 dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(rd->mctime)); 765 dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(rd->mctime));
@@ -775,8 +774,14 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de
775 jffs2_complete_reservation(c); 774 jffs2_complete_reservation(c);
776 775
777 d_instantiate(dentry, inode); 776 d_instantiate(dentry, inode);
778 777 unlock_new_inode(inode);
779 return 0; 778 return 0;
779
780 fail:
781 make_bad_inode(inode);
782 unlock_new_inode(inode);
783 iput(inode);
784 return ret;
780} 785}
781 786
782static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry, 787static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry,
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 8bc2c80ab159..459d39d1ea0b 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -465,7 +465,12 @@ struct inode *jffs2_new_inode (struct inode *dir_i, int mode, struct jffs2_raw_i
465 inode->i_blocks = 0; 465 inode->i_blocks = 0;
466 inode->i_size = 0; 466 inode->i_size = 0;
467 467
468 insert_inode_hash(inode); 468 if (insert_inode_locked(inode) < 0) {
469 make_bad_inode(inode);
470 unlock_new_inode(inode);
471 iput(inode);
472 return ERR_PTR(-EINVAL);
473 }
469 474
470 return inode; 475 return inode;
471} 476}
diff --git a/fs/libfs.c b/fs/libfs.c
index 09e1016eb774..dcaf972cbf1b 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -489,7 +489,8 @@ int simple_write_end(struct file *file, struct address_space *mapping,
489 * unique inode values later for this filesystem, then you must take care 489 * unique inode values later for this filesystem, then you must take care
490 * to pass it an appropriate max_reserved value to avoid collisions. 490 * to pass it an appropriate max_reserved value to avoid collisions.
491 */ 491 */
492int simple_fill_super(struct super_block *s, int magic, struct tree_descr *files) 492int simple_fill_super(struct super_block *s, unsigned long magic,
493 struct tree_descr *files)
493{ 494{
494 struct inode *inode; 495 struct inode *inode;
495 struct dentry *root; 496 struct dentry *root;
diff --git a/fs/mbcache.c b/fs/mbcache.c
index ec88ff3d04a9..e28f21b95344 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -115,7 +115,7 @@ mb_cache_indexes(struct mb_cache *cache)
115 * What the mbcache registers as to get shrunk dynamically. 115 * What the mbcache registers as to get shrunk dynamically.
116 */ 116 */
117 117
118static int mb_cache_shrink_fn(int nr_to_scan, gfp_t gfp_mask); 118static int mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask);
119 119
120static struct shrinker mb_cache_shrinker = { 120static struct shrinker mb_cache_shrinker = {
121 .shrink = mb_cache_shrink_fn, 121 .shrink = mb_cache_shrink_fn,
@@ -191,13 +191,14 @@ forget:
191 * This function is called by the kernel memory management when memory 191 * This function is called by the kernel memory management when memory
192 * gets low. 192 * gets low.
193 * 193 *
194 * @shrink: (ignored)
194 * @nr_to_scan: Number of objects to scan 195 * @nr_to_scan: Number of objects to scan
195 * @gfp_mask: (ignored) 196 * @gfp_mask: (ignored)
196 * 197 *
197 * Returns the number of objects which are present in the cache. 198 * Returns the number of objects which are present in the cache.
198 */ 199 */
199static int 200static int
200mb_cache_shrink_fn(int nr_to_scan, gfp_t gfp_mask) 201mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
201{ 202{
202 LIST_HEAD(free_list); 203 LIST_HEAD(free_list);
203 struct list_head *l, *ltmp; 204 struct list_head *l, *ltmp;
diff --git a/fs/minix/dir.c b/fs/minix/dir.c
index 91969589131c..1dbf921ca44b 100644
--- a/fs/minix/dir.c
+++ b/fs/minix/dir.c
@@ -75,10 +75,6 @@ static struct page * dir_get_page(struct inode *dir, unsigned long n)
75 if (!IS_ERR(page)) 75 if (!IS_ERR(page))
76 kmap(page); 76 kmap(page);
77 return page; 77 return page;
78
79fail:
80 dir_put_page(page);
81 return ERR_PTR(-EIO);
82} 78}
83 79
84static inline void *minix_next_entry(void *de, struct minix_sb_info *sbi) 80static inline void *minix_next_entry(void *de, struct minix_sb_info *sbi)
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 7ec9b34a59f8..d25b5257b7a1 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -1286,6 +1286,55 @@ static void nfs4_session_set_rwsize(struct nfs_server *server)
1286#endif /* CONFIG_NFS_V4_1 */ 1286#endif /* CONFIG_NFS_V4_1 */
1287} 1287}
1288 1288
1289static int nfs4_server_common_setup(struct nfs_server *server,
1290 struct nfs_fh *mntfh)
1291{
1292 struct nfs_fattr *fattr;
1293 int error;
1294
1295 BUG_ON(!server->nfs_client);
1296 BUG_ON(!server->nfs_client->rpc_ops);
1297 BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
1298
1299 fattr = nfs_alloc_fattr();
1300 if (fattr == NULL)
1301 return -ENOMEM;
1302
1303 /* We must ensure the session is initialised first */
1304 error = nfs4_init_session(server);
1305 if (error < 0)
1306 goto out;
1307
1308 /* Probe the root fh to retrieve its FSID and filehandle */
1309 error = nfs4_get_rootfh(server, mntfh);
1310 if (error < 0)
1311 goto out;
1312
1313 dprintk("Server FSID: %llx:%llx\n",
1314 (unsigned long long) server->fsid.major,
1315 (unsigned long long) server->fsid.minor);
1316 dprintk("Mount FH: %d\n", mntfh->size);
1317
1318 nfs4_session_set_rwsize(server);
1319
1320 error = nfs_probe_fsinfo(server, mntfh, fattr);
1321 if (error < 0)
1322 goto out;
1323
1324 if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
1325 server->namelen = NFS4_MAXNAMLEN;
1326
1327 spin_lock(&nfs_client_lock);
1328 list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
1329 list_add_tail(&server->master_link, &nfs_volume_list);
1330 spin_unlock(&nfs_client_lock);
1331
1332 server->mount_time = jiffies;
1333out:
1334 nfs_free_fattr(fattr);
1335 return error;
1336}
1337
1289/* 1338/*
1290 * Create a version 4 volume record 1339 * Create a version 4 volume record
1291 */ 1340 */
@@ -1346,7 +1395,6 @@ error:
1346struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data, 1395struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data,
1347 struct nfs_fh *mntfh) 1396 struct nfs_fh *mntfh)
1348{ 1397{
1349 struct nfs_fattr *fattr;
1350 struct nfs_server *server; 1398 struct nfs_server *server;
1351 int error; 1399 int error;
1352 1400
@@ -1356,55 +1404,19 @@ struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data,
1356 if (!server) 1404 if (!server)
1357 return ERR_PTR(-ENOMEM); 1405 return ERR_PTR(-ENOMEM);
1358 1406
1359 error = -ENOMEM;
1360 fattr = nfs_alloc_fattr();
1361 if (fattr == NULL)
1362 goto error;
1363
1364 /* set up the general RPC client */ 1407 /* set up the general RPC client */
1365 error = nfs4_init_server(server, data); 1408 error = nfs4_init_server(server, data);
1366 if (error < 0) 1409 if (error < 0)
1367 goto error; 1410 goto error;
1368 1411
1369 BUG_ON(!server->nfs_client); 1412 error = nfs4_server_common_setup(server, mntfh);
1370 BUG_ON(!server->nfs_client->rpc_ops);
1371 BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
1372
1373 error = nfs4_init_session(server);
1374 if (error < 0)
1375 goto error;
1376
1377 /* Probe the root fh to retrieve its FSID */
1378 error = nfs4_get_rootfh(server, mntfh);
1379 if (error < 0) 1413 if (error < 0)
1380 goto error; 1414 goto error;
1381 1415
1382 dprintk("Server FSID: %llx:%llx\n",
1383 (unsigned long long) server->fsid.major,
1384 (unsigned long long) server->fsid.minor);
1385 dprintk("Mount FH: %d\n", mntfh->size);
1386
1387 nfs4_session_set_rwsize(server);
1388
1389 error = nfs_probe_fsinfo(server, mntfh, fattr);
1390 if (error < 0)
1391 goto error;
1392
1393 if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
1394 server->namelen = NFS4_MAXNAMLEN;
1395
1396 spin_lock(&nfs_client_lock);
1397 list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
1398 list_add_tail(&server->master_link, &nfs_volume_list);
1399 spin_unlock(&nfs_client_lock);
1400
1401 server->mount_time = jiffies;
1402 dprintk("<-- nfs4_create_server() = %p\n", server); 1416 dprintk("<-- nfs4_create_server() = %p\n", server);
1403 nfs_free_fattr(fattr);
1404 return server; 1417 return server;
1405 1418
1406error: 1419error:
1407 nfs_free_fattr(fattr);
1408 nfs_free_server(server); 1420 nfs_free_server(server);
1409 dprintk("<-- nfs4_create_server() = error %d\n", error); 1421 dprintk("<-- nfs4_create_server() = error %d\n", error);
1410 return ERR_PTR(error); 1422 return ERR_PTR(error);
@@ -1418,7 +1430,6 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
1418{ 1430{
1419 struct nfs_client *parent_client; 1431 struct nfs_client *parent_client;
1420 struct nfs_server *server, *parent_server; 1432 struct nfs_server *server, *parent_server;
1421 struct nfs_fattr *fattr;
1422 int error; 1433 int error;
1423 1434
1424 dprintk("--> nfs4_create_referral_server()\n"); 1435 dprintk("--> nfs4_create_referral_server()\n");
@@ -1427,11 +1438,6 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
1427 if (!server) 1438 if (!server)
1428 return ERR_PTR(-ENOMEM); 1439 return ERR_PTR(-ENOMEM);
1429 1440
1430 error = -ENOMEM;
1431 fattr = nfs_alloc_fattr();
1432 if (fattr == NULL)
1433 goto error;
1434
1435 parent_server = NFS_SB(data->sb); 1441 parent_server = NFS_SB(data->sb);
1436 parent_client = parent_server->nfs_client; 1442 parent_client = parent_server->nfs_client;
1437 1443
@@ -1456,40 +1462,14 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
1456 if (error < 0) 1462 if (error < 0)
1457 goto error; 1463 goto error;
1458 1464
1459 BUG_ON(!server->nfs_client); 1465 error = nfs4_server_common_setup(server, mntfh);
1460 BUG_ON(!server->nfs_client->rpc_ops);
1461 BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
1462
1463 /* Probe the root fh to retrieve its FSID and filehandle */
1464 error = nfs4_get_rootfh(server, mntfh);
1465 if (error < 0)
1466 goto error;
1467
1468 /* probe the filesystem info for this server filesystem */
1469 error = nfs_probe_fsinfo(server, mntfh, fattr);
1470 if (error < 0) 1466 if (error < 0)
1471 goto error; 1467 goto error;
1472 1468
1473 if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
1474 server->namelen = NFS4_MAXNAMLEN;
1475
1476 dprintk("Referral FSID: %llx:%llx\n",
1477 (unsigned long long) server->fsid.major,
1478 (unsigned long long) server->fsid.minor);
1479
1480 spin_lock(&nfs_client_lock);
1481 list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
1482 list_add_tail(&server->master_link, &nfs_volume_list);
1483 spin_unlock(&nfs_client_lock);
1484
1485 server->mount_time = jiffies;
1486
1487 nfs_free_fattr(fattr);
1488 dprintk("<-- nfs_create_referral_server() = %p\n", server); 1469 dprintk("<-- nfs_create_referral_server() = %p\n", server);
1489 return server; 1470 return server;
1490 1471
1491error: 1472error:
1492 nfs_free_fattr(fattr);
1493 nfs_free_server(server); 1473 nfs_free_server(server);
1494 dprintk("<-- nfs4_create_referral_server() = error %d\n", error); 1474 dprintk("<-- nfs4_create_referral_server() = error %d\n", error);
1495 return ERR_PTR(error); 1475 return ERR_PTR(error);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 782b431ef91c..e60416d3f818 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1710,7 +1710,7 @@ static void nfs_access_free_list(struct list_head *head)
1710 } 1710 }
1711} 1711}
1712 1712
1713int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask) 1713int nfs_access_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
1714{ 1714{
1715 LIST_HEAD(head); 1715 LIST_HEAD(head);
1716 struct nfs_inode *nfsi; 1716 struct nfs_inode *nfsi;
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 7428f7d6273b..a70e446e1605 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -146,7 +146,7 @@ int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh)
146 goto out; 146 goto out;
147 } 147 }
148 148
149 if (!(fsinfo.fattr->valid & NFS_ATTR_FATTR_MODE) 149 if (!(fsinfo.fattr->valid & NFS_ATTR_FATTR_TYPE)
150 || !S_ISDIR(fsinfo.fattr->mode)) { 150 || !S_ISDIR(fsinfo.fattr->mode)) {
151 printk(KERN_ERR "nfs4_get_rootfh:" 151 printk(KERN_ERR "nfs4_get_rootfh:"
152 " getroot encountered non-directory\n"); 152 " getroot encountered non-directory\n");
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index d8bd619e386c..e70f44b9b3f4 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -205,7 +205,8 @@ extern struct rpc_procinfo nfs4_procedures[];
205void nfs_close_context(struct nfs_open_context *ctx, int is_sync); 205void nfs_close_context(struct nfs_open_context *ctx, int is_sync);
206 206
207/* dir.c */ 207/* dir.c */
208extern int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask); 208extern int nfs_access_cache_shrinker(struct shrinker *shrink,
209 int nr_to_scan, gfp_t gfp_mask);
209 210
210/* inode.c */ 211/* inode.c */
211extern struct workqueue_struct *nfsiod_workqueue; 212extern struct workqueue_struct *nfsiod_workqueue;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 6bdef28efa33..65c8dae4b267 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -862,8 +862,8 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
862 bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET; 862 bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET;
863 *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME); 863 *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME);
864 *p++ = cpu_to_be32(0); 864 *p++ = cpu_to_be32(0);
865 *p++ = cpu_to_be32(iap->ia_mtime.tv_sec); 865 *p++ = cpu_to_be32(iap->ia_atime.tv_sec);
866 *p++ = cpu_to_be32(iap->ia_mtime.tv_nsec); 866 *p++ = cpu_to_be32(iap->ia_atime.tv_nsec);
867 } 867 }
868 else if (iap->ia_valid & ATTR_ATIME) { 868 else if (iap->ia_valid & ATTR_ATIME) {
869 bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET; 869 bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 04214fc5c304..f9df16de4a56 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -570,6 +570,22 @@ static void nfs_show_mountd_options(struct seq_file *m, struct nfs_server *nfss,
570 nfs_show_mountd_netid(m, nfss, showdefaults); 570 nfs_show_mountd_netid(m, nfss, showdefaults);
571} 571}
572 572
573#ifdef CONFIG_NFS_V4
574static void nfs_show_nfsv4_options(struct seq_file *m, struct nfs_server *nfss,
575 int showdefaults)
576{
577 struct nfs_client *clp = nfss->nfs_client;
578
579 seq_printf(m, ",clientaddr=%s", clp->cl_ipaddr);
580 seq_printf(m, ",minorversion=%u", clp->cl_minorversion);
581}
582#else
583static void nfs_show_nfsv4_options(struct seq_file *m, struct nfs_server *nfss,
584 int showdefaults)
585{
586}
587#endif
588
573/* 589/*
574 * Describe the mount options in force on this server representation 590 * Describe the mount options in force on this server representation
575 */ 591 */
@@ -631,11 +647,9 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
631 647
632 if (version != 4) 648 if (version != 4)
633 nfs_show_mountd_options(m, nfss, showdefaults); 649 nfs_show_mountd_options(m, nfss, showdefaults);
650 else
651 nfs_show_nfsv4_options(m, nfss, showdefaults);
634 652
635#ifdef CONFIG_NFS_V4
636 if (clp->rpc_ops->version == 4)
637 seq_printf(m, ",clientaddr=%s", clp->cl_ipaddr);
638#endif
639 if (nfss->options & NFS_OPTION_FSCACHE) 653 if (nfss->options & NFS_OPTION_FSCACHE)
640 seq_printf(m, ",fsc"); 654 seq_printf(m, ",fsc");
641} 655}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 12f7109720c2..4a2734758778 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4122,8 +4122,8 @@ nfs4_state_shutdown(void)
4122 nfs4_lock_state(); 4122 nfs4_lock_state();
4123 nfs4_release_reclaim(); 4123 nfs4_release_reclaim();
4124 __nfs4_state_shutdown(); 4124 __nfs4_state_shutdown();
4125 nfsd4_destroy_callback_queue();
4126 nfs4_unlock_state(); 4125 nfs4_unlock_state();
4126 nfsd4_destroy_callback_queue();
4127} 4127}
4128 4128
4129/* 4129/*
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index ebbf3b6b2457..3c111120b619 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -443,8 +443,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
443 if (size_change) 443 if (size_change)
444 put_write_access(inode); 444 put_write_access(inode);
445 if (!err) 445 if (!err)
446 if (EX_ISSYNC(fhp->fh_export)) 446 commit_metadata(fhp);
447 write_inode_now(inode, 1);
448out: 447out:
449 return err; 448 return err;
450 449
diff --git a/fs/nilfs2/btree.h b/fs/nilfs2/btree.h
index af638d59e3bf..43c8c5b541fd 100644
--- a/fs/nilfs2/btree.h
+++ b/fs/nilfs2/btree.h
@@ -75,8 +75,6 @@ struct nilfs_btree_path {
75 75
76extern struct kmem_cache *nilfs_btree_path_cache; 76extern struct kmem_cache *nilfs_btree_path_cache;
77 77
78int nilfs_btree_path_cache_init(void);
79void nilfs_btree_path_cache_destroy(void);
80int nilfs_btree_init(struct nilfs_bmap *); 78int nilfs_btree_init(struct nilfs_bmap *);
81int nilfs_btree_convert_and_insert(struct nilfs_bmap *, __u64, __u64, 79int nilfs_btree_convert_and_insert(struct nilfs_bmap *, __u64, __u64,
82 const __u64 *, const __u64 *, int); 80 const __u64 *, const __u64 *, int);
diff --git a/fs/nilfs2/segbuf.h b/fs/nilfs2/segbuf.h
index fdf1c3b6d673..85fbb66455e2 100644
--- a/fs/nilfs2/segbuf.h
+++ b/fs/nilfs2/segbuf.h
@@ -127,8 +127,6 @@ struct nilfs_segment_buffer {
127 127
128extern struct kmem_cache *nilfs_segbuf_cachep; 128extern struct kmem_cache *nilfs_segbuf_cachep;
129 129
130int __init nilfs_init_segbuf_cache(void);
131void nilfs_destroy_segbuf_cache(void);
132struct nilfs_segment_buffer *nilfs_segbuf_new(struct super_block *); 130struct nilfs_segment_buffer *nilfs_segbuf_new(struct super_block *);
133void nilfs_segbuf_free(struct nilfs_segment_buffer *); 131void nilfs_segbuf_free(struct nilfs_segment_buffer *);
134void nilfs_segbuf_map(struct nilfs_segment_buffer *, __u64, unsigned long, 132void nilfs_segbuf_map(struct nilfs_segment_buffer *, __u64, unsigned long,
diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h
index dca142361ccf..01e20dbb217d 100644
--- a/fs/nilfs2/segment.h
+++ b/fs/nilfs2/segment.h
@@ -221,8 +221,6 @@ enum {
221extern struct kmem_cache *nilfs_transaction_cachep; 221extern struct kmem_cache *nilfs_transaction_cachep;
222 222
223/* segment.c */ 223/* segment.c */
224extern int nilfs_init_transaction_cache(void);
225extern void nilfs_destroy_transaction_cache(void);
226extern void nilfs_relax_pressure_in_lock(struct super_block *); 224extern void nilfs_relax_pressure_in_lock(struct super_block *);
227 225
228extern int nilfs_construct_segment(struct super_block *); 226extern int nilfs_construct_segment(struct super_block *);
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 03b34b738993..414ef68931cf 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1130,13 +1130,13 @@ static void nilfs_segbuf_init_once(void *obj)
1130 1130
1131static void nilfs_destroy_cachep(void) 1131static void nilfs_destroy_cachep(void)
1132{ 1132{
1133 if (nilfs_inode_cachep) 1133 if (nilfs_inode_cachep)
1134 kmem_cache_destroy(nilfs_inode_cachep); 1134 kmem_cache_destroy(nilfs_inode_cachep);
1135 if (nilfs_transaction_cachep) 1135 if (nilfs_transaction_cachep)
1136 kmem_cache_destroy(nilfs_transaction_cachep); 1136 kmem_cache_destroy(nilfs_transaction_cachep);
1137 if (nilfs_segbuf_cachep) 1137 if (nilfs_segbuf_cachep)
1138 kmem_cache_destroy(nilfs_segbuf_cachep); 1138 kmem_cache_destroy(nilfs_segbuf_cachep);
1139 if (nilfs_btree_path_cache) 1139 if (nilfs_btree_path_cache)
1140 kmem_cache_destroy(nilfs_btree_path_cache); 1140 kmem_cache_destroy(nilfs_btree_path_cache);
1141} 1141}
1142 1142
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 3623ca20cc18..356e976772bf 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -196,15 +196,14 @@ int ocfs2_get_block(struct inode *inode, sector_t iblock,
196 dump_stack(); 196 dump_stack();
197 goto bail; 197 goto bail;
198 } 198 }
199
200 past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
201 mlog(0, "Inode %lu, past_eof = %llu\n", inode->i_ino,
202 (unsigned long long)past_eof);
203
204 if (create && (iblock >= past_eof))
205 set_buffer_new(bh_result);
206 } 199 }
207 200
201 past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
202 mlog(0, "Inode %lu, past_eof = %llu\n", inode->i_ino,
203 (unsigned long long)past_eof);
204 if (create && (iblock >= past_eof))
205 set_buffer_new(bh_result);
206
208bail: 207bail:
209 if (err < 0) 208 if (err < 0)
210 err = -EIO; 209 err = -EIO;
@@ -459,36 +458,6 @@ int walk_page_buffers( handle_t *handle,
459 return ret; 458 return ret;
460} 459}
461 460
462handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
463 struct page *page,
464 unsigned from,
465 unsigned to)
466{
467 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
468 handle_t *handle;
469 int ret = 0;
470
471 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
472 if (IS_ERR(handle)) {
473 ret = -ENOMEM;
474 mlog_errno(ret);
475 goto out;
476 }
477
478 if (ocfs2_should_order_data(inode)) {
479 ret = ocfs2_jbd2_file_inode(handle, inode);
480 if (ret < 0)
481 mlog_errno(ret);
482 }
483out:
484 if (ret) {
485 if (!IS_ERR(handle))
486 ocfs2_commit_trans(osb, handle);
487 handle = ERR_PTR(ret);
488 }
489 return handle;
490}
491
492static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block) 461static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block)
493{ 462{
494 sector_t status; 463 sector_t status;
@@ -1131,23 +1100,37 @@ out:
1131 */ 1100 */
1132static int ocfs2_grab_pages_for_write(struct address_space *mapping, 1101static int ocfs2_grab_pages_for_write(struct address_space *mapping,
1133 struct ocfs2_write_ctxt *wc, 1102 struct ocfs2_write_ctxt *wc,
1134 u32 cpos, loff_t user_pos, int new, 1103 u32 cpos, loff_t user_pos,
1104 unsigned user_len, int new,
1135 struct page *mmap_page) 1105 struct page *mmap_page)
1136{ 1106{
1137 int ret = 0, i; 1107 int ret = 0, i;
1138 unsigned long start, target_index, index; 1108 unsigned long start, target_index, end_index, index;
1139 struct inode *inode = mapping->host; 1109 struct inode *inode = mapping->host;
1110 loff_t last_byte;
1140 1111
1141 target_index = user_pos >> PAGE_CACHE_SHIFT; 1112 target_index = user_pos >> PAGE_CACHE_SHIFT;
1142 1113
1143 /* 1114 /*
1144 * Figure out how many pages we'll be manipulating here. For 1115 * Figure out how many pages we'll be manipulating here. For
1145 * non allocating write, we just change the one 1116 * non allocating write, we just change the one
1146 * page. Otherwise, we'll need a whole clusters worth. 1117 * page. Otherwise, we'll need a whole clusters worth. If we're
1118 * writing past i_size, we only need enough pages to cover the
1119 * last page of the write.
1147 */ 1120 */
1148 if (new) { 1121 if (new) {
1149 wc->w_num_pages = ocfs2_pages_per_cluster(inode->i_sb); 1122 wc->w_num_pages = ocfs2_pages_per_cluster(inode->i_sb);
1150 start = ocfs2_align_clusters_to_page_index(inode->i_sb, cpos); 1123 start = ocfs2_align_clusters_to_page_index(inode->i_sb, cpos);
1124 /*
1125 * We need the index *past* the last page we could possibly
1126 * touch. This is the page past the end of the write or
1127 * i_size, whichever is greater.
1128 */
1129 last_byte = max(user_pos + user_len, i_size_read(inode));
1130 BUG_ON(last_byte < 1);
1131 end_index = ((last_byte - 1) >> PAGE_CACHE_SHIFT) + 1;
1132 if ((start + wc->w_num_pages) > end_index)
1133 wc->w_num_pages = end_index - start;
1151 } else { 1134 } else {
1152 wc->w_num_pages = 1; 1135 wc->w_num_pages = 1;
1153 start = target_index; 1136 start = target_index;
@@ -1620,21 +1603,20 @@ out:
1620 * write path can treat it as an non-allocating write, which has no 1603 * write path can treat it as an non-allocating write, which has no
1621 * special case code for sparse/nonsparse files. 1604 * special case code for sparse/nonsparse files.
1622 */ 1605 */
1623static int ocfs2_expand_nonsparse_inode(struct inode *inode, loff_t pos, 1606static int ocfs2_expand_nonsparse_inode(struct inode *inode,
1624 unsigned len, 1607 struct buffer_head *di_bh,
1608 loff_t pos, unsigned len,
1625 struct ocfs2_write_ctxt *wc) 1609 struct ocfs2_write_ctxt *wc)
1626{ 1610{
1627 int ret; 1611 int ret;
1628 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1629 loff_t newsize = pos + len; 1612 loff_t newsize = pos + len;
1630 1613
1631 if (ocfs2_sparse_alloc(osb)) 1614 BUG_ON(ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)));
1632 return 0;
1633 1615
1634 if (newsize <= i_size_read(inode)) 1616 if (newsize <= i_size_read(inode))
1635 return 0; 1617 return 0;
1636 1618
1637 ret = ocfs2_extend_no_holes(inode, newsize, pos); 1619 ret = ocfs2_extend_no_holes(inode, di_bh, newsize, pos);
1638 if (ret) 1620 if (ret)
1639 mlog_errno(ret); 1621 mlog_errno(ret);
1640 1622
@@ -1644,6 +1626,18 @@ static int ocfs2_expand_nonsparse_inode(struct inode *inode, loff_t pos,
1644 return ret; 1626 return ret;
1645} 1627}
1646 1628
1629static int ocfs2_zero_tail(struct inode *inode, struct buffer_head *di_bh,
1630 loff_t pos)
1631{
1632 int ret = 0;
1633
1634 BUG_ON(!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)));
1635 if (pos > i_size_read(inode))
1636 ret = ocfs2_zero_extend(inode, di_bh, pos);
1637
1638 return ret;
1639}
1640
1647int ocfs2_write_begin_nolock(struct address_space *mapping, 1641int ocfs2_write_begin_nolock(struct address_space *mapping,
1648 loff_t pos, unsigned len, unsigned flags, 1642 loff_t pos, unsigned len, unsigned flags,
1649 struct page **pagep, void **fsdata, 1643 struct page **pagep, void **fsdata,
@@ -1679,7 +1673,11 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1679 } 1673 }
1680 } 1674 }
1681 1675
1682 ret = ocfs2_expand_nonsparse_inode(inode, pos, len, wc); 1676 if (ocfs2_sparse_alloc(osb))
1677 ret = ocfs2_zero_tail(inode, di_bh, pos);
1678 else
1679 ret = ocfs2_expand_nonsparse_inode(inode, di_bh, pos, len,
1680 wc);
1683 if (ret) { 1681 if (ret) {
1684 mlog_errno(ret); 1682 mlog_errno(ret);
1685 goto out; 1683 goto out;
@@ -1789,7 +1787,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1789 * that we can zero and flush if we error after adding the 1787 * that we can zero and flush if we error after adding the
1790 * extent. 1788 * extent.
1791 */ 1789 */
1792 ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, 1790 ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, len,
1793 cluster_of_pages, mmap_page); 1791 cluster_of_pages, mmap_page);
1794 if (ret) { 1792 if (ret) {
1795 mlog_errno(ret); 1793 mlog_errno(ret);
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 6b5a492e1749..153abb5abef0 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -1671,7 +1671,7 @@ struct dlm_ctxt * dlm_register_domain(const char *domain,
1671 struct dlm_ctxt *dlm = NULL; 1671 struct dlm_ctxt *dlm = NULL;
1672 struct dlm_ctxt *new_ctxt = NULL; 1672 struct dlm_ctxt *new_ctxt = NULL;
1673 1673
1674 if (strlen(domain) > O2NM_MAX_NAME_LEN) { 1674 if (strlen(domain) >= O2NM_MAX_NAME_LEN) {
1675 ret = -ENAMETOOLONG; 1675 ret = -ENAMETOOLONG;
1676 mlog(ML_ERROR, "domain name length too long\n"); 1676 mlog(ML_ERROR, "domain name length too long\n");
1677 goto leave; 1677 goto leave;
@@ -1709,6 +1709,7 @@ retry:
1709 } 1709 }
1710 1710
1711 if (dlm_protocol_compare(&dlm->fs_locking_proto, fs_proto)) { 1711 if (dlm_protocol_compare(&dlm->fs_locking_proto, fs_proto)) {
1712 spin_unlock(&dlm_domain_lock);
1712 mlog(ML_ERROR, 1713 mlog(ML_ERROR,
1713 "Requested locking protocol version is not " 1714 "Requested locking protocol version is not "
1714 "compatible with already registered domain " 1715 "compatible with already registered domain "
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 4a7506a4e314..94b97fc6a88e 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2808,14 +2808,8 @@ again:
2808 mlog(0, "trying again...\n"); 2808 mlog(0, "trying again...\n");
2809 goto again; 2809 goto again;
2810 } 2810 }
2811 /* now that we are sure the MIGRATING state is there, drop
2812 * the unneded state which blocked threads trying to DIRTY */
2813 spin_lock(&res->spinlock);
2814 BUG_ON(!(res->state & DLM_LOCK_RES_BLOCK_DIRTY));
2815 BUG_ON(!(res->state & DLM_LOCK_RES_MIGRATING));
2816 res->state &= ~DLM_LOCK_RES_BLOCK_DIRTY;
2817 spin_unlock(&res->spinlock);
2818 2811
2812 ret = 0;
2819 /* did the target go down or die? */ 2813 /* did the target go down or die? */
2820 spin_lock(&dlm->spinlock); 2814 spin_lock(&dlm->spinlock);
2821 if (!test_bit(target, dlm->domain_map)) { 2815 if (!test_bit(target, dlm->domain_map)) {
@@ -2826,9 +2820,21 @@ again:
2826 spin_unlock(&dlm->spinlock); 2820 spin_unlock(&dlm->spinlock);
2827 2821
2828 /* 2822 /*
2823 * if target is down, we need to clear DLM_LOCK_RES_BLOCK_DIRTY for
2824 * another try; otherwise, we are sure the MIGRATING state is there,
2825 * drop the unneded state which blocked threads trying to DIRTY
2826 */
2827 spin_lock(&res->spinlock);
2828 BUG_ON(!(res->state & DLM_LOCK_RES_BLOCK_DIRTY));
2829 res->state &= ~DLM_LOCK_RES_BLOCK_DIRTY;
2830 if (!ret)
2831 BUG_ON(!(res->state & DLM_LOCK_RES_MIGRATING));
2832 spin_unlock(&res->spinlock);
2833
2834 /*
2829 * at this point: 2835 * at this point:
2830 * 2836 *
2831 * o the DLM_LOCK_RES_MIGRATING flag is set 2837 * o the DLM_LOCK_RES_MIGRATING flag is set if target not down
2832 * o there are no pending asts on this lockres 2838 * o there are no pending asts on this lockres
2833 * o all processes trying to reserve an ast on this 2839 * o all processes trying to reserve an ast on this
2834 * lockres must wait for the MIGRATING flag to clear 2840 * lockres must wait for the MIGRATING flag to clear
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index f8b75ce4be70..9dfaac73b36d 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -463,7 +463,7 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
463 if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) { 463 if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) {
464 int bit; 464 int bit;
465 465
466 bit = find_next_bit (dlm->recovery_map, O2NM_MAX_NODES+1, 0); 466 bit = find_next_bit (dlm->recovery_map, O2NM_MAX_NODES, 0);
467 if (bit >= O2NM_MAX_NODES || bit < 0) 467 if (bit >= O2NM_MAX_NODES || bit < 0)
468 dlm_set_reco_dead_node(dlm, O2NM_INVALID_NODE_NUM); 468 dlm_set_reco_dead_node(dlm, O2NM_INVALID_NODE_NUM);
469 else 469 else
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 6a13ea64c447..2b10b36d1577 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -724,28 +724,55 @@ leave:
724 return status; 724 return status;
725} 725}
726 726
727/*
728 * While a write will already be ordering the data, a truncate will not.
729 * Thus, we need to explicitly order the zeroed pages.
730 */
731static handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode)
732{
733 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
734 handle_t *handle = NULL;
735 int ret = 0;
736
737 if (!ocfs2_should_order_data(inode))
738 goto out;
739
740 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
741 if (IS_ERR(handle)) {
742 ret = -ENOMEM;
743 mlog_errno(ret);
744 goto out;
745 }
746
747 ret = ocfs2_jbd2_file_inode(handle, inode);
748 if (ret < 0)
749 mlog_errno(ret);
750
751out:
752 if (ret) {
753 if (!IS_ERR(handle))
754 ocfs2_commit_trans(osb, handle);
755 handle = ERR_PTR(ret);
756 }
757 return handle;
758}
759
727/* Some parts of this taken from generic_cont_expand, which turned out 760/* Some parts of this taken from generic_cont_expand, which turned out
728 * to be too fragile to do exactly what we need without us having to 761 * to be too fragile to do exactly what we need without us having to
729 * worry about recursive locking in ->write_begin() and ->write_end(). */ 762 * worry about recursive locking in ->write_begin() and ->write_end(). */
730static int ocfs2_write_zero_page(struct inode *inode, 763static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
731 u64 size) 764 u64 abs_to)
732{ 765{
733 struct address_space *mapping = inode->i_mapping; 766 struct address_space *mapping = inode->i_mapping;
734 struct page *page; 767 struct page *page;
735 unsigned long index; 768 unsigned long index = abs_from >> PAGE_CACHE_SHIFT;
736 unsigned int offset;
737 handle_t *handle = NULL; 769 handle_t *handle = NULL;
738 int ret; 770 int ret = 0;
771 unsigned zero_from, zero_to, block_start, block_end;
739 772
740 offset = (size & (PAGE_CACHE_SIZE-1)); /* Within page */ 773 BUG_ON(abs_from >= abs_to);
741 /* ugh. in prepare/commit_write, if from==to==start of block, we 774 BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT));
742 ** skip the prepare. make sure we never send an offset for the start 775 BUG_ON(abs_from & (inode->i_blkbits - 1));
743 ** of a block
744 */
745 if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) {
746 offset++;
747 }
748 index = size >> PAGE_CACHE_SHIFT;
749 776
750 page = grab_cache_page(mapping, index); 777 page = grab_cache_page(mapping, index);
751 if (!page) { 778 if (!page) {
@@ -754,31 +781,56 @@ static int ocfs2_write_zero_page(struct inode *inode,
754 goto out; 781 goto out;
755 } 782 }
756 783
757 ret = ocfs2_prepare_write_nolock(inode, page, offset, offset); 784 /* Get the offsets within the page that we want to zero */
758 if (ret < 0) { 785 zero_from = abs_from & (PAGE_CACHE_SIZE - 1);
759 mlog_errno(ret); 786 zero_to = abs_to & (PAGE_CACHE_SIZE - 1);
760 goto out_unlock; 787 if (!zero_to)
761 } 788 zero_to = PAGE_CACHE_SIZE;
762 789
763 if (ocfs2_should_order_data(inode)) { 790 mlog(0,
764 handle = ocfs2_start_walk_page_trans(inode, page, offset, 791 "abs_from = %llu, abs_to = %llu, index = %lu, zero_from = %u, zero_to = %u\n",
765 offset); 792 (unsigned long long)abs_from, (unsigned long long)abs_to,
766 if (IS_ERR(handle)) { 793 index, zero_from, zero_to);
767 ret = PTR_ERR(handle); 794
768 handle = NULL; 795 /* We know that zero_from is block aligned */
796 for (block_start = zero_from; block_start < zero_to;
797 block_start = block_end) {
798 block_end = block_start + (1 << inode->i_blkbits);
799
800 /*
801 * block_start is block-aligned. Bump it by one to
802 * force ocfs2_{prepare,commit}_write() to zero the
803 * whole block.
804 */
805 ret = ocfs2_prepare_write_nolock(inode, page,
806 block_start + 1,
807 block_start + 1);
808 if (ret < 0) {
809 mlog_errno(ret);
769 goto out_unlock; 810 goto out_unlock;
770 } 811 }
771 }
772 812
773 /* must not update i_size! */ 813 if (!handle) {
774 ret = block_commit_write(page, offset, offset); 814 handle = ocfs2_zero_start_ordered_transaction(inode);
775 if (ret < 0) 815 if (IS_ERR(handle)) {
776 mlog_errno(ret); 816 ret = PTR_ERR(handle);
777 else 817 handle = NULL;
778 ret = 0; 818 break;
819 }
820 }
821
822 /* must not update i_size! */
823 ret = block_commit_write(page, block_start + 1,
824 block_start + 1);
825 if (ret < 0)
826 mlog_errno(ret);
827 else
828 ret = 0;
829 }
779 830
780 if (handle) 831 if (handle)
781 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); 832 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
833
782out_unlock: 834out_unlock:
783 unlock_page(page); 835 unlock_page(page);
784 page_cache_release(page); 836 page_cache_release(page);
@@ -786,22 +838,114 @@ out:
786 return ret; 838 return ret;
787} 839}
788 840
789static int ocfs2_zero_extend(struct inode *inode, 841/*
790 u64 zero_to_size) 842 * Find the next range to zero. We do this in terms of bytes because
843 * that's what ocfs2_zero_extend() wants, and it is dealing with the
844 * pagecache. We may return multiple extents.
845 *
846 * zero_start and zero_end are ocfs2_zero_extend()s current idea of what
847 * needs to be zeroed. range_start and range_end return the next zeroing
848 * range. A subsequent call should pass the previous range_end as its
849 * zero_start. If range_end is 0, there's nothing to do.
850 *
851 * Unwritten extents are skipped over. Refcounted extents are CoWd.
852 */
853static int ocfs2_zero_extend_get_range(struct inode *inode,
854 struct buffer_head *di_bh,
855 u64 zero_start, u64 zero_end,
856 u64 *range_start, u64 *range_end)
791{ 857{
792 int ret = 0; 858 int rc = 0, needs_cow = 0;
793 u64 start_off; 859 u32 p_cpos, zero_clusters = 0;
794 struct super_block *sb = inode->i_sb; 860 u32 zero_cpos =
861 zero_start >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
862 u32 last_cpos = ocfs2_clusters_for_bytes(inode->i_sb, zero_end);
863 unsigned int num_clusters = 0;
864 unsigned int ext_flags = 0;
795 865
796 start_off = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode)); 866 while (zero_cpos < last_cpos) {
797 while (start_off < zero_to_size) { 867 rc = ocfs2_get_clusters(inode, zero_cpos, &p_cpos,
798 ret = ocfs2_write_zero_page(inode, start_off); 868 &num_clusters, &ext_flags);
799 if (ret < 0) { 869 if (rc) {
800 mlog_errno(ret); 870 mlog_errno(rc);
871 goto out;
872 }
873
874 if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) {
875 zero_clusters = num_clusters;
876 if (ext_flags & OCFS2_EXT_REFCOUNTED)
877 needs_cow = 1;
878 break;
879 }
880
881 zero_cpos += num_clusters;
882 }
883 if (!zero_clusters) {
884 *range_end = 0;
885 goto out;
886 }
887
888 while ((zero_cpos + zero_clusters) < last_cpos) {
889 rc = ocfs2_get_clusters(inode, zero_cpos + zero_clusters,
890 &p_cpos, &num_clusters,
891 &ext_flags);
892 if (rc) {
893 mlog_errno(rc);
801 goto out; 894 goto out;
802 } 895 }
803 896
804 start_off += sb->s_blocksize; 897 if (!p_cpos || (ext_flags & OCFS2_EXT_UNWRITTEN))
898 break;
899 if (ext_flags & OCFS2_EXT_REFCOUNTED)
900 needs_cow = 1;
901 zero_clusters += num_clusters;
902 }
903 if ((zero_cpos + zero_clusters) > last_cpos)
904 zero_clusters = last_cpos - zero_cpos;
905
906 if (needs_cow) {
907 rc = ocfs2_refcount_cow(inode, di_bh, zero_cpos, zero_clusters,
908 UINT_MAX);
909 if (rc) {
910 mlog_errno(rc);
911 goto out;
912 }
913 }
914
915 *range_start = ocfs2_clusters_to_bytes(inode->i_sb, zero_cpos);
916 *range_end = ocfs2_clusters_to_bytes(inode->i_sb,
917 zero_cpos + zero_clusters);
918
919out:
920 return rc;
921}
922
923/*
924 * Zero one range returned from ocfs2_zero_extend_get_range(). The caller
925 * has made sure that the entire range needs zeroing.
926 */
927static int ocfs2_zero_extend_range(struct inode *inode, u64 range_start,
928 u64 range_end)
929{
930 int rc = 0;
931 u64 next_pos;
932 u64 zero_pos = range_start;
933
934 mlog(0, "range_start = %llu, range_end = %llu\n",
935 (unsigned long long)range_start,
936 (unsigned long long)range_end);
937 BUG_ON(range_start >= range_end);
938
939 while (zero_pos < range_end) {
940 next_pos = (zero_pos & PAGE_CACHE_MASK) + PAGE_CACHE_SIZE;
941 if (next_pos > range_end)
942 next_pos = range_end;
943 rc = ocfs2_write_zero_page(inode, zero_pos, next_pos);
944 if (rc < 0) {
945 mlog_errno(rc);
946 break;
947 }
948 zero_pos = next_pos;
805 949
806 /* 950 /*
807 * Very large extends have the potential to lock up 951 * Very large extends have the potential to lock up
@@ -810,16 +954,63 @@ static int ocfs2_zero_extend(struct inode *inode,
810 cond_resched(); 954 cond_resched();
811 } 955 }
812 956
813out: 957 return rc;
958}
959
960int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh,
961 loff_t zero_to_size)
962{
963 int ret = 0;
964 u64 zero_start, range_start = 0, range_end = 0;
965 struct super_block *sb = inode->i_sb;
966
967 zero_start = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode));
968 mlog(0, "zero_start %llu for i_size %llu\n",
969 (unsigned long long)zero_start,
970 (unsigned long long)i_size_read(inode));
971 while (zero_start < zero_to_size) {
972 ret = ocfs2_zero_extend_get_range(inode, di_bh, zero_start,
973 zero_to_size,
974 &range_start,
975 &range_end);
976 if (ret) {
977 mlog_errno(ret);
978 break;
979 }
980 if (!range_end)
981 break;
982 /* Trim the ends */
983 if (range_start < zero_start)
984 range_start = zero_start;
985 if (range_end > zero_to_size)
986 range_end = zero_to_size;
987
988 ret = ocfs2_zero_extend_range(inode, range_start,
989 range_end);
990 if (ret) {
991 mlog_errno(ret);
992 break;
993 }
994 zero_start = range_end;
995 }
996
814 return ret; 997 return ret;
815} 998}
816 999
817int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, u64 zero_to) 1000int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh,
1001 u64 new_i_size, u64 zero_to)
818{ 1002{
819 int ret; 1003 int ret;
820 u32 clusters_to_add; 1004 u32 clusters_to_add;
821 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1005 struct ocfs2_inode_info *oi = OCFS2_I(inode);
822 1006
1007 /*
1008 * Only quota files call this without a bh, and they can't be
1009 * refcounted.
1010 */
1011 BUG_ON(!di_bh && (oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
1012 BUG_ON(!di_bh && !(oi->ip_flags & OCFS2_INODE_SYSTEM_FILE));
1013
823 clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size); 1014 clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size);
824 if (clusters_to_add < oi->ip_clusters) 1015 if (clusters_to_add < oi->ip_clusters)
825 clusters_to_add = 0; 1016 clusters_to_add = 0;
@@ -840,7 +1031,7 @@ int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, u64 zero_to)
840 * still need to zero the area between the old i_size and the 1031 * still need to zero the area between the old i_size and the
841 * new i_size. 1032 * new i_size.
842 */ 1033 */
843 ret = ocfs2_zero_extend(inode, zero_to); 1034 ret = ocfs2_zero_extend(inode, di_bh, zero_to);
844 if (ret < 0) 1035 if (ret < 0)
845 mlog_errno(ret); 1036 mlog_errno(ret);
846 1037
@@ -862,27 +1053,15 @@ static int ocfs2_extend_file(struct inode *inode,
862 goto out; 1053 goto out;
863 1054
864 if (i_size_read(inode) == new_i_size) 1055 if (i_size_read(inode) == new_i_size)
865 goto out; 1056 goto out;
866 BUG_ON(new_i_size < i_size_read(inode)); 1057 BUG_ON(new_i_size < i_size_read(inode));
867 1058
868 /* 1059 /*
869 * Fall through for converting inline data, even if the fs
870 * supports sparse files.
871 *
872 * The check for inline data here is legal - nobody can add
873 * the feature since we have i_mutex. We must check it again
874 * after acquiring ip_alloc_sem though, as paths like mmap
875 * might have raced us to converting the inode to extents.
876 */
877 if (!(oi->ip_dyn_features & OCFS2_INLINE_DATA_FL)
878 && ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
879 goto out_update_size;
880
881 /*
882 * The alloc sem blocks people in read/write from reading our 1060 * The alloc sem blocks people in read/write from reading our
883 * allocation until we're done changing it. We depend on 1061 * allocation until we're done changing it. We depend on
884 * i_mutex to block other extend/truncate calls while we're 1062 * i_mutex to block other extend/truncate calls while we're
885 * here. 1063 * here. We even have to hold it for sparse files because there
1064 * might be some tail zeroing.
886 */ 1065 */
887 down_write(&oi->ip_alloc_sem); 1066 down_write(&oi->ip_alloc_sem);
888 1067
@@ -899,14 +1078,16 @@ static int ocfs2_extend_file(struct inode *inode,
899 ret = ocfs2_convert_inline_data_to_extents(inode, di_bh); 1078 ret = ocfs2_convert_inline_data_to_extents(inode, di_bh);
900 if (ret) { 1079 if (ret) {
901 up_write(&oi->ip_alloc_sem); 1080 up_write(&oi->ip_alloc_sem);
902
903 mlog_errno(ret); 1081 mlog_errno(ret);
904 goto out; 1082 goto out;
905 } 1083 }
906 } 1084 }
907 1085
908 if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) 1086 if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
909 ret = ocfs2_extend_no_holes(inode, new_i_size, new_i_size); 1087 ret = ocfs2_zero_extend(inode, di_bh, new_i_size);
1088 else
1089 ret = ocfs2_extend_no_holes(inode, di_bh, new_i_size,
1090 new_i_size);
910 1091
911 up_write(&oi->ip_alloc_sem); 1092 up_write(&oi->ip_alloc_sem);
912 1093
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index d66cf4f7c70e..97bf761c9e7c 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -54,8 +54,10 @@ int ocfs2_add_inode_data(struct ocfs2_super *osb,
54int ocfs2_simple_size_update(struct inode *inode, 54int ocfs2_simple_size_update(struct inode *inode,
55 struct buffer_head *di_bh, 55 struct buffer_head *di_bh,
56 u64 new_i_size); 56 u64 new_i_size);
57int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, 57int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh,
58 u64 zero_to); 58 u64 new_i_size, u64 zero_to);
59int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh,
60 loff_t zero_to);
59int ocfs2_setattr(struct dentry *dentry, struct iattr *attr); 61int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
60int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry, 62int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
61 struct kstat *stat); 63 struct kstat *stat);
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 47878cf16418..625de9d7088c 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -472,7 +472,7 @@ static inline struct ocfs2_triggers *to_ocfs2_trigger(struct jbd2_buffer_trigger
472 return container_of(triggers, struct ocfs2_triggers, ot_triggers); 472 return container_of(triggers, struct ocfs2_triggers, ot_triggers);
473} 473}
474 474
475static void ocfs2_commit_trigger(struct jbd2_buffer_trigger_type *triggers, 475static void ocfs2_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
476 struct buffer_head *bh, 476 struct buffer_head *bh,
477 void *data, size_t size) 477 void *data, size_t size)
478{ 478{
@@ -491,7 +491,7 @@ static void ocfs2_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
491 * Quota blocks have their own trigger because the struct ocfs2_block_check 491 * Quota blocks have their own trigger because the struct ocfs2_block_check
492 * offset depends on the blocksize. 492 * offset depends on the blocksize.
493 */ 493 */
494static void ocfs2_dq_commit_trigger(struct jbd2_buffer_trigger_type *triggers, 494static void ocfs2_dq_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
495 struct buffer_head *bh, 495 struct buffer_head *bh,
496 void *data, size_t size) 496 void *data, size_t size)
497{ 497{
@@ -511,7 +511,7 @@ static void ocfs2_dq_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
511 * Directory blocks also have their own trigger because the 511 * Directory blocks also have their own trigger because the
512 * struct ocfs2_block_check offset depends on the blocksize. 512 * struct ocfs2_block_check offset depends on the blocksize.
513 */ 513 */
514static void ocfs2_db_commit_trigger(struct jbd2_buffer_trigger_type *triggers, 514static void ocfs2_db_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
515 struct buffer_head *bh, 515 struct buffer_head *bh,
516 void *data, size_t size) 516 void *data, size_t size)
517{ 517{
@@ -544,7 +544,7 @@ static void ocfs2_abort_trigger(struct jbd2_buffer_trigger_type *triggers,
544 544
545static struct ocfs2_triggers di_triggers = { 545static struct ocfs2_triggers di_triggers = {
546 .ot_triggers = { 546 .ot_triggers = {
547 .t_commit = ocfs2_commit_trigger, 547 .t_frozen = ocfs2_frozen_trigger,
548 .t_abort = ocfs2_abort_trigger, 548 .t_abort = ocfs2_abort_trigger,
549 }, 549 },
550 .ot_offset = offsetof(struct ocfs2_dinode, i_check), 550 .ot_offset = offsetof(struct ocfs2_dinode, i_check),
@@ -552,7 +552,7 @@ static struct ocfs2_triggers di_triggers = {
552 552
553static struct ocfs2_triggers eb_triggers = { 553static struct ocfs2_triggers eb_triggers = {
554 .ot_triggers = { 554 .ot_triggers = {
555 .t_commit = ocfs2_commit_trigger, 555 .t_frozen = ocfs2_frozen_trigger,
556 .t_abort = ocfs2_abort_trigger, 556 .t_abort = ocfs2_abort_trigger,
557 }, 557 },
558 .ot_offset = offsetof(struct ocfs2_extent_block, h_check), 558 .ot_offset = offsetof(struct ocfs2_extent_block, h_check),
@@ -560,7 +560,7 @@ static struct ocfs2_triggers eb_triggers = {
560 560
561static struct ocfs2_triggers rb_triggers = { 561static struct ocfs2_triggers rb_triggers = {
562 .ot_triggers = { 562 .ot_triggers = {
563 .t_commit = ocfs2_commit_trigger, 563 .t_frozen = ocfs2_frozen_trigger,
564 .t_abort = ocfs2_abort_trigger, 564 .t_abort = ocfs2_abort_trigger,
565 }, 565 },
566 .ot_offset = offsetof(struct ocfs2_refcount_block, rf_check), 566 .ot_offset = offsetof(struct ocfs2_refcount_block, rf_check),
@@ -568,7 +568,7 @@ static struct ocfs2_triggers rb_triggers = {
568 568
569static struct ocfs2_triggers gd_triggers = { 569static struct ocfs2_triggers gd_triggers = {
570 .ot_triggers = { 570 .ot_triggers = {
571 .t_commit = ocfs2_commit_trigger, 571 .t_frozen = ocfs2_frozen_trigger,
572 .t_abort = ocfs2_abort_trigger, 572 .t_abort = ocfs2_abort_trigger,
573 }, 573 },
574 .ot_offset = offsetof(struct ocfs2_group_desc, bg_check), 574 .ot_offset = offsetof(struct ocfs2_group_desc, bg_check),
@@ -576,14 +576,14 @@ static struct ocfs2_triggers gd_triggers = {
576 576
577static struct ocfs2_triggers db_triggers = { 577static struct ocfs2_triggers db_triggers = {
578 .ot_triggers = { 578 .ot_triggers = {
579 .t_commit = ocfs2_db_commit_trigger, 579 .t_frozen = ocfs2_db_frozen_trigger,
580 .t_abort = ocfs2_abort_trigger, 580 .t_abort = ocfs2_abort_trigger,
581 }, 581 },
582}; 582};
583 583
584static struct ocfs2_triggers xb_triggers = { 584static struct ocfs2_triggers xb_triggers = {
585 .ot_triggers = { 585 .ot_triggers = {
586 .t_commit = ocfs2_commit_trigger, 586 .t_frozen = ocfs2_frozen_trigger,
587 .t_abort = ocfs2_abort_trigger, 587 .t_abort = ocfs2_abort_trigger,
588 }, 588 },
589 .ot_offset = offsetof(struct ocfs2_xattr_block, xb_check), 589 .ot_offset = offsetof(struct ocfs2_xattr_block, xb_check),
@@ -591,14 +591,14 @@ static struct ocfs2_triggers xb_triggers = {
591 591
592static struct ocfs2_triggers dq_triggers = { 592static struct ocfs2_triggers dq_triggers = {
593 .ot_triggers = { 593 .ot_triggers = {
594 .t_commit = ocfs2_dq_commit_trigger, 594 .t_frozen = ocfs2_dq_frozen_trigger,
595 .t_abort = ocfs2_abort_trigger, 595 .t_abort = ocfs2_abort_trigger,
596 }, 596 },
597}; 597};
598 598
599static struct ocfs2_triggers dr_triggers = { 599static struct ocfs2_triggers dr_triggers = {
600 .ot_triggers = { 600 .ot_triggers = {
601 .t_commit = ocfs2_commit_trigger, 601 .t_frozen = ocfs2_frozen_trigger,
602 .t_abort = ocfs2_abort_trigger, 602 .t_abort = ocfs2_abort_trigger,
603 }, 603 },
604 .ot_offset = offsetof(struct ocfs2_dx_root_block, dr_check), 604 .ot_offset = offsetof(struct ocfs2_dx_root_block, dr_check),
@@ -606,7 +606,7 @@ static struct ocfs2_triggers dr_triggers = {
606 606
607static struct ocfs2_triggers dl_triggers = { 607static struct ocfs2_triggers dl_triggers = {
608 .ot_triggers = { 608 .ot_triggers = {
609 .t_commit = ocfs2_commit_trigger, 609 .t_frozen = ocfs2_frozen_trigger,
610 .t_abort = ocfs2_abort_trigger, 610 .t_abort = ocfs2_abort_trigger,
611 }, 611 },
612 .ot_offset = offsetof(struct ocfs2_dx_leaf, dl_check), 612 .ot_offset = offsetof(struct ocfs2_dx_leaf, dl_check),
@@ -1936,7 +1936,7 @@ void ocfs2_orphan_scan_work(struct work_struct *work)
1936 mutex_lock(&os->os_lock); 1936 mutex_lock(&os->os_lock);
1937 ocfs2_queue_orphan_scan(osb); 1937 ocfs2_queue_orphan_scan(osb);
1938 if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE) 1938 if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE)
1939 schedule_delayed_work(&os->os_orphan_scan_work, 1939 queue_delayed_work(ocfs2_wq, &os->os_orphan_scan_work,
1940 ocfs2_orphan_scan_timeout()); 1940 ocfs2_orphan_scan_timeout());
1941 mutex_unlock(&os->os_lock); 1941 mutex_unlock(&os->os_lock);
1942} 1942}
@@ -1976,8 +1976,8 @@ void ocfs2_orphan_scan_start(struct ocfs2_super *osb)
1976 atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE); 1976 atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
1977 else { 1977 else {
1978 atomic_set(&os->os_state, ORPHAN_SCAN_ACTIVE); 1978 atomic_set(&os->os_state, ORPHAN_SCAN_ACTIVE);
1979 schedule_delayed_work(&os->os_orphan_scan_work, 1979 queue_delayed_work(ocfs2_wq, &os->os_orphan_scan_work,
1980 ocfs2_orphan_scan_timeout()); 1980 ocfs2_orphan_scan_timeout());
1981 } 1981 }
1982} 1982}
1983 1983
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index 3d7419682dc0..ec6adbf8f551 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -118,6 +118,7 @@ unsigned int ocfs2_la_default_mb(struct ocfs2_super *osb)
118{ 118{
119 unsigned int la_mb; 119 unsigned int la_mb;
120 unsigned int gd_mb; 120 unsigned int gd_mb;
121 unsigned int la_max_mb;
121 unsigned int megs_per_slot; 122 unsigned int megs_per_slot;
122 struct super_block *sb = osb->sb; 123 struct super_block *sb = osb->sb;
123 124
@@ -182,6 +183,12 @@ unsigned int ocfs2_la_default_mb(struct ocfs2_super *osb)
182 if (megs_per_slot < la_mb) 183 if (megs_per_slot < la_mb)
183 la_mb = megs_per_slot; 184 la_mb = megs_per_slot;
184 185
186 /* We can't store more bits than we can in a block. */
187 la_max_mb = ocfs2_clusters_to_megabytes(osb->sb,
188 ocfs2_local_alloc_size(sb) * 8);
189 if (la_mb > la_max_mb)
190 la_mb = la_max_mb;
191
185 return la_mb; 192 return la_mb;
186} 193}
187 194
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 2bb35fe00511..4607923eb24c 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -775,7 +775,7 @@ static int ocfs2_acquire_dquot(struct dquot *dquot)
775 * locking allocators ranks above a transaction start 775 * locking allocators ranks above a transaction start
776 */ 776 */
777 WARN_ON(journal_current_handle()); 777 WARN_ON(journal_current_handle());
778 status = ocfs2_extend_no_holes(gqinode, 778 status = ocfs2_extend_no_holes(gqinode, NULL,
779 gqinode->i_size + (need_alloc << sb->s_blocksize_bits), 779 gqinode->i_size + (need_alloc << sb->s_blocksize_bits),
780 gqinode->i_size); 780 gqinode->i_size);
781 if (status < 0) 781 if (status < 0)
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index 8bd70d4d184d..dc78764ccc4c 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -971,7 +971,7 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
971 u64 p_blkno; 971 u64 p_blkno;
972 972
973 /* We are protected by dqio_sem so no locking needed */ 973 /* We are protected by dqio_sem so no locking needed */
974 status = ocfs2_extend_no_holes(lqinode, 974 status = ocfs2_extend_no_holes(lqinode, NULL,
975 lqinode->i_size + 2 * sb->s_blocksize, 975 lqinode->i_size + 2 * sb->s_blocksize,
976 lqinode->i_size); 976 lqinode->i_size);
977 if (status < 0) { 977 if (status < 0) {
@@ -1114,7 +1114,7 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
1114 return ocfs2_local_quota_add_chunk(sb, type, offset); 1114 return ocfs2_local_quota_add_chunk(sb, type, offset);
1115 1115
1116 /* We are protected by dqio_sem so no locking needed */ 1116 /* We are protected by dqio_sem so no locking needed */
1117 status = ocfs2_extend_no_holes(lqinode, 1117 status = ocfs2_extend_no_holes(lqinode, NULL,
1118 lqinode->i_size + sb->s_blocksize, 1118 lqinode->i_size + sb->s_blocksize,
1119 lqinode->i_size); 1119 lqinode->i_size);
1120 if (status < 0) { 1120 if (status < 0) {
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 4793f36f6518..3ac5aa733e9c 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -2931,6 +2931,12 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle,
2931 2931
2932 offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits; 2932 offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits;
2933 end = offset + (new_len << OCFS2_SB(sb)->s_clustersize_bits); 2933 end = offset + (new_len << OCFS2_SB(sb)->s_clustersize_bits);
2934 /*
2935 * We only duplicate pages until we reach the page contains i_size - 1.
2936 * So trim 'end' to i_size.
2937 */
2938 if (end > i_size_read(context->inode))
2939 end = i_size_read(context->inode);
2934 2940
2935 while (offset < end) { 2941 while (offset < end) {
2936 page_index = offset >> PAGE_CACHE_SHIFT; 2942 page_index = offset >> PAGE_CACHE_SHIFT;
@@ -4166,6 +4172,12 @@ static int __ocfs2_reflink(struct dentry *old_dentry,
4166 struct inode *inode = old_dentry->d_inode; 4172 struct inode *inode = old_dentry->d_inode;
4167 struct buffer_head *new_bh = NULL; 4173 struct buffer_head *new_bh = NULL;
4168 4174
4175 if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE) {
4176 ret = -EINVAL;
4177 mlog_errno(ret);
4178 goto out;
4179 }
4180
4169 ret = filemap_fdatawrite(inode->i_mapping); 4181 ret = filemap_fdatawrite(inode->i_mapping);
4170 if (ret) { 4182 if (ret) {
4171 mlog_errno(ret); 4183 mlog_errno(ret);
diff --git a/fs/ocfs2/reservations.c b/fs/ocfs2/reservations.c
index 40650021fc24..d8b6e4259b80 100644
--- a/fs/ocfs2/reservations.c
+++ b/fs/ocfs2/reservations.c
@@ -26,7 +26,6 @@
26 26
27#include <linux/fs.h> 27#include <linux/fs.h>
28#include <linux/types.h> 28#include <linux/types.h>
29#include <linux/slab.h>
30#include <linux/highmem.h> 29#include <linux/highmem.h>
31#include <linux/bitops.h> 30#include <linux/bitops.h>
32#include <linux/list.h> 31#include <linux/list.h>
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index f4c2a9eb8c4d..a8e6a95a353f 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -741,7 +741,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
741 le16_to_cpu(bg->bg_free_bits_count)); 741 le16_to_cpu(bg->bg_free_bits_count));
742 le32_add_cpu(&cl->cl_recs[alloc_rec].c_total, 742 le32_add_cpu(&cl->cl_recs[alloc_rec].c_total,
743 le16_to_cpu(bg->bg_bits)); 743 le16_to_cpu(bg->bg_bits));
744 cl->cl_recs[alloc_rec].c_blkno = cpu_to_le64(bg->bg_blkno); 744 cl->cl_recs[alloc_rec].c_blkno = bg->bg_blkno;
745 if (le16_to_cpu(cl->cl_next_free_rec) < le16_to_cpu(cl->cl_count)) 745 if (le16_to_cpu(cl->cl_next_free_rec) < le16_to_cpu(cl->cl_count))
746 le16_add_cpu(&cl->cl_next_free_rec, 1); 746 le16_add_cpu(&cl->cl_next_free_rec, 1);
747 747
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index e97b34842cfe..d03469f61801 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -709,7 +709,7 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode,
709 struct ocfs2_xattr_value_buf *vb, 709 struct ocfs2_xattr_value_buf *vb,
710 struct ocfs2_xattr_set_ctxt *ctxt) 710 struct ocfs2_xattr_set_ctxt *ctxt)
711{ 711{
712 int status = 0; 712 int status = 0, credits;
713 handle_t *handle = ctxt->handle; 713 handle_t *handle = ctxt->handle;
714 enum ocfs2_alloc_restarted why; 714 enum ocfs2_alloc_restarted why;
715 u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters); 715 u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters);
@@ -719,38 +719,54 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode,
719 719
720 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); 720 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
721 721
722 status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh, 722 while (clusters_to_add) {
723 OCFS2_JOURNAL_ACCESS_WRITE); 723 status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
724 if (status < 0) { 724 OCFS2_JOURNAL_ACCESS_WRITE);
725 mlog_errno(status); 725 if (status < 0) {
726 goto leave; 726 mlog_errno(status);
727 } 727 break;
728 }
728 729
729 prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters); 730 prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
730 status = ocfs2_add_clusters_in_btree(handle, 731 status = ocfs2_add_clusters_in_btree(handle,
731 &et, 732 &et,
732 &logical_start, 733 &logical_start,
733 clusters_to_add, 734 clusters_to_add,
734 0, 735 0,
735 ctxt->data_ac, 736 ctxt->data_ac,
736 ctxt->meta_ac, 737 ctxt->meta_ac,
737 &why); 738 &why);
738 if (status < 0) { 739 if ((status < 0) && (status != -EAGAIN)) {
739 mlog_errno(status); 740 if (status != -ENOSPC)
740 goto leave; 741 mlog_errno(status);
741 } 742 break;
743 }
742 744
743 ocfs2_journal_dirty(handle, vb->vb_bh); 745 ocfs2_journal_dirty(handle, vb->vb_bh);
744 746
745 clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - prev_clusters; 747 clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) -
748 prev_clusters;
746 749
747 /* 750 if (why != RESTART_NONE && clusters_to_add) {
748 * We should have already allocated enough space before the transaction, 751 /*
749 * so no need to restart. 752 * We can only fail in case the alloc file doesn't give
750 */ 753 * up enough clusters.
751 BUG_ON(why != RESTART_NONE || clusters_to_add); 754 */
752 755 BUG_ON(why == RESTART_META);
753leave: 756
757 mlog(0, "restarting xattr value extension for %u"
758 " clusters,.\n", clusters_to_add);
759 credits = ocfs2_calc_extend_credits(inode->i_sb,
760 &vb->vb_xv->xr_list,
761 clusters_to_add);
762 status = ocfs2_extend_trans(handle, credits);
763 if (status < 0) {
764 status = -ENOMEM;
765 mlog_errno(status);
766 break;
767 }
768 }
769 }
754 770
755 return status; 771 return status;
756} 772}
@@ -6788,16 +6804,15 @@ out:
6788 return ret; 6804 return ret;
6789} 6805}
6790 6806
6791static int ocfs2_reflink_xattr_buckets(handle_t *handle, 6807static int ocfs2_reflink_xattr_bucket(handle_t *handle,
6792 u64 blkno, u64 new_blkno, u32 clusters, 6808 u64 blkno, u64 new_blkno, u32 clusters,
6809 u32 *cpos, int num_buckets,
6793 struct ocfs2_alloc_context *meta_ac, 6810 struct ocfs2_alloc_context *meta_ac,
6794 struct ocfs2_alloc_context *data_ac, 6811 struct ocfs2_alloc_context *data_ac,
6795 struct ocfs2_reflink_xattr_tree_args *args) 6812 struct ocfs2_reflink_xattr_tree_args *args)
6796{ 6813{
6797 int i, j, ret = 0; 6814 int i, j, ret = 0;
6798 struct super_block *sb = args->reflink->old_inode->i_sb; 6815 struct super_block *sb = args->reflink->old_inode->i_sb;
6799 u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb));
6800 u32 num_buckets = clusters * bpc;
6801 int bpb = args->old_bucket->bu_blocks; 6816 int bpb = args->old_bucket->bu_blocks;
6802 struct ocfs2_xattr_value_buf vb = { 6817 struct ocfs2_xattr_value_buf vb = {
6803 .vb_access = ocfs2_journal_access, 6818 .vb_access = ocfs2_journal_access,
@@ -6816,14 +6831,6 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle,
6816 break; 6831 break;
6817 } 6832 }
6818 6833
6819 /*
6820 * The real bucket num in this series of blocks is stored
6821 * in the 1st bucket.
6822 */
6823 if (i == 0)
6824 num_buckets = le16_to_cpu(
6825 bucket_xh(args->old_bucket)->xh_num_buckets);
6826
6827 ret = ocfs2_xattr_bucket_journal_access(handle, 6834 ret = ocfs2_xattr_bucket_journal_access(handle,
6828 args->new_bucket, 6835 args->new_bucket,
6829 OCFS2_JOURNAL_ACCESS_CREATE); 6836 OCFS2_JOURNAL_ACCESS_CREATE);
@@ -6837,6 +6844,18 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle,
6837 bucket_block(args->old_bucket, j), 6844 bucket_block(args->old_bucket, j),
6838 sb->s_blocksize); 6845 sb->s_blocksize);
6839 6846
6847 /*
6848 * Record the start cpos so that we can use it to initialize
6849 * our xattr tree we also set the xh_num_bucket for the new
6850 * bucket.
6851 */
6852 if (i == 0) {
6853 *cpos = le32_to_cpu(bucket_xh(args->new_bucket)->
6854 xh_entries[0].xe_name_hash);
6855 bucket_xh(args->new_bucket)->xh_num_buckets =
6856 cpu_to_le16(num_buckets);
6857 }
6858
6840 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket); 6859 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
6841 6860
6842 ret = ocfs2_reflink_xattr_header(handle, args->reflink, 6861 ret = ocfs2_reflink_xattr_header(handle, args->reflink,
@@ -6866,6 +6885,7 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle,
6866 } 6885 }
6867 6886
6868 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket); 6887 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
6888
6869 ocfs2_xattr_bucket_relse(args->old_bucket); 6889 ocfs2_xattr_bucket_relse(args->old_bucket);
6870 ocfs2_xattr_bucket_relse(args->new_bucket); 6890 ocfs2_xattr_bucket_relse(args->new_bucket);
6871 } 6891 }
@@ -6874,6 +6894,75 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle,
6874 ocfs2_xattr_bucket_relse(args->new_bucket); 6894 ocfs2_xattr_bucket_relse(args->new_bucket);
6875 return ret; 6895 return ret;
6876} 6896}
6897
6898static int ocfs2_reflink_xattr_buckets(handle_t *handle,
6899 struct inode *inode,
6900 struct ocfs2_reflink_xattr_tree_args *args,
6901 struct ocfs2_extent_tree *et,
6902 struct ocfs2_alloc_context *meta_ac,
6903 struct ocfs2_alloc_context *data_ac,
6904 u64 blkno, u32 cpos, u32 len)
6905{
6906 int ret, first_inserted = 0;
6907 u32 p_cluster, num_clusters, reflink_cpos = 0;
6908 u64 new_blkno;
6909 unsigned int num_buckets, reflink_buckets;
6910 unsigned int bpc =
6911 ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
6912
6913 ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno);
6914 if (ret) {
6915 mlog_errno(ret);
6916 goto out;
6917 }
6918 num_buckets = le16_to_cpu(bucket_xh(args->old_bucket)->xh_num_buckets);
6919 ocfs2_xattr_bucket_relse(args->old_bucket);
6920
6921 while (len && num_buckets) {
6922 ret = ocfs2_claim_clusters(handle, data_ac,
6923 1, &p_cluster, &num_clusters);
6924 if (ret) {
6925 mlog_errno(ret);
6926 goto out;
6927 }
6928
6929 new_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
6930 reflink_buckets = min(num_buckets, bpc * num_clusters);
6931
6932 ret = ocfs2_reflink_xattr_bucket(handle, blkno,
6933 new_blkno, num_clusters,
6934 &reflink_cpos, reflink_buckets,
6935 meta_ac, data_ac, args);
6936 if (ret) {
6937 mlog_errno(ret);
6938 goto out;
6939 }
6940
6941 /*
6942 * For the 1st allocated cluster, we make it use the same cpos
6943 * so that the xattr tree looks the same as the original one
6944 * in the most case.
6945 */
6946 if (!first_inserted) {
6947 reflink_cpos = cpos;
6948 first_inserted = 1;
6949 }
6950 ret = ocfs2_insert_extent(handle, et, reflink_cpos, new_blkno,
6951 num_clusters, 0, meta_ac);
6952 if (ret)
6953 mlog_errno(ret);
6954
6955 mlog(0, "insert new xattr extent rec start %llu len %u to %u\n",
6956 (unsigned long long)new_blkno, num_clusters, reflink_cpos);
6957
6958 len -= num_clusters;
6959 blkno += ocfs2_clusters_to_blocks(inode->i_sb, num_clusters);
6960 num_buckets -= reflink_buckets;
6961 }
6962out:
6963 return ret;
6964}
6965
6877/* 6966/*
6878 * Create the same xattr extent record in the new inode's xattr tree. 6967 * Create the same xattr extent record in the new inode's xattr tree.
6879 */ 6968 */
@@ -6885,8 +6974,6 @@ static int ocfs2_reflink_xattr_rec(struct inode *inode,
6885 void *para) 6974 void *para)
6886{ 6975{
6887 int ret, credits = 0; 6976 int ret, credits = 0;
6888 u32 p_cluster, num_clusters;
6889 u64 new_blkno;
6890 handle_t *handle; 6977 handle_t *handle;
6891 struct ocfs2_reflink_xattr_tree_args *args = 6978 struct ocfs2_reflink_xattr_tree_args *args =
6892 (struct ocfs2_reflink_xattr_tree_args *)para; 6979 (struct ocfs2_reflink_xattr_tree_args *)para;
@@ -6895,6 +6982,9 @@ static int ocfs2_reflink_xattr_rec(struct inode *inode,
6895 struct ocfs2_alloc_context *data_ac = NULL; 6982 struct ocfs2_alloc_context *data_ac = NULL;
6896 struct ocfs2_extent_tree et; 6983 struct ocfs2_extent_tree et;
6897 6984
6985 mlog(0, "reflink xattr buckets %llu len %u\n",
6986 (unsigned long long)blkno, len);
6987
6898 ocfs2_init_xattr_tree_extent_tree(&et, 6988 ocfs2_init_xattr_tree_extent_tree(&et,
6899 INODE_CACHE(args->reflink->new_inode), 6989 INODE_CACHE(args->reflink->new_inode),
6900 args->new_blk_bh); 6990 args->new_blk_bh);
@@ -6914,32 +7004,12 @@ static int ocfs2_reflink_xattr_rec(struct inode *inode,
6914 goto out; 7004 goto out;
6915 } 7005 }
6916 7006
6917 ret = ocfs2_claim_clusters(handle, data_ac, 7007 ret = ocfs2_reflink_xattr_buckets(handle, inode, args, &et,
6918 len, &p_cluster, &num_clusters); 7008 meta_ac, data_ac,
6919 if (ret) { 7009 blkno, cpos, len);
6920 mlog_errno(ret);
6921 goto out_commit;
6922 }
6923
6924 new_blkno = ocfs2_clusters_to_blocks(osb->sb, p_cluster);
6925
6926 mlog(0, "reflink xattr buckets %llu to %llu, len %u\n",
6927 (unsigned long long)blkno, (unsigned long long)new_blkno, len);
6928 ret = ocfs2_reflink_xattr_buckets(handle, blkno, new_blkno, len,
6929 meta_ac, data_ac, args);
6930 if (ret) {
6931 mlog_errno(ret);
6932 goto out_commit;
6933 }
6934
6935 mlog(0, "insert new xattr extent rec start %llu len %u to %u\n",
6936 (unsigned long long)new_blkno, len, cpos);
6937 ret = ocfs2_insert_extent(handle, &et, cpos, new_blkno,
6938 len, 0, meta_ac);
6939 if (ret) 7010 if (ret)
6940 mlog_errno(ret); 7011 mlog_errno(ret);
6941 7012
6942out_commit:
6943 ocfs2_commit_trans(osb, handle); 7013 ocfs2_commit_trans(osb, handle);
6944 7014
6945out: 7015out:
diff --git a/fs/partitions/ibm.c b/fs/partitions/ibm.c
index 3e73de5967ff..fc8497643fd0 100644
--- a/fs/partitions/ibm.c
+++ b/fs/partitions/ibm.c
@@ -74,6 +74,7 @@ int ibm_partition(struct parsed_partitions *state)
74 } *label; 74 } *label;
75 unsigned char *data; 75 unsigned char *data;
76 Sector sect; 76 Sector sect;
77 sector_t labelsect;
77 78
78 res = 0; 79 res = 0;
79 blocksize = bdev_logical_block_size(bdev); 80 blocksize = bdev_logical_block_size(bdev);
@@ -98,10 +99,19 @@ int ibm_partition(struct parsed_partitions *state)
98 goto out_freeall; 99 goto out_freeall;
99 100
100 /* 101 /*
102 * Special case for FBA disks: label sector does not depend on
103 * blocksize.
104 */
105 if ((info->cu_type == 0x6310 && info->dev_type == 0x9336) ||
106 (info->cu_type == 0x3880 && info->dev_type == 0x3370))
107 labelsect = info->label_block;
108 else
109 labelsect = info->label_block * (blocksize >> 9);
110
111 /*
101 * Get volume label, extract name and type. 112 * Get volume label, extract name and type.
102 */ 113 */
103 data = read_part_sector(state, info->label_block*(blocksize/512), 114 data = read_part_sector(state, labelsect, &sect);
104 &sect);
105 if (data == NULL) 115 if (data == NULL)
106 goto out_readerr; 116 goto out_readerr;
107 117
diff --git a/fs/pipe.c b/fs/pipe.c
index db6eaaba0dd8..279eef96c51c 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -26,9 +26,14 @@
26 26
27/* 27/*
28 * The max size that a non-root user is allowed to grow the pipe. Can 28 * The max size that a non-root user is allowed to grow the pipe. Can
29 * be set by root in /proc/sys/fs/pipe-max-pages 29 * be set by root in /proc/sys/fs/pipe-max-size
30 */ 30 */
31unsigned int pipe_max_pages = PIPE_DEF_BUFFERS * 16; 31unsigned int pipe_max_size = 1048576;
32
33/*
34 * Minimum pipe size, as required by POSIX
35 */
36unsigned int pipe_min_size = PAGE_SIZE;
32 37
33/* 38/*
34 * We use a start+len construction, which provides full use of the 39 * We use a start+len construction, which provides full use of the
@@ -1118,26 +1123,20 @@ SYSCALL_DEFINE1(pipe, int __user *, fildes)
1118 * Allocate a new array of pipe buffers and copy the info over. Returns the 1123 * Allocate a new array of pipe buffers and copy the info over. Returns the
1119 * pipe size if successful, or return -ERROR on error. 1124 * pipe size if successful, or return -ERROR on error.
1120 */ 1125 */
1121static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) 1126static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long nr_pages)
1122{ 1127{
1123 struct pipe_buffer *bufs; 1128 struct pipe_buffer *bufs;
1124 1129
1125 /* 1130 /*
1126 * Must be a power-of-2 currently
1127 */
1128 if (!is_power_of_2(arg))
1129 return -EINVAL;
1130
1131 /*
1132 * We can shrink the pipe, if arg >= pipe->nrbufs. Since we don't 1131 * We can shrink the pipe, if arg >= pipe->nrbufs. Since we don't
1133 * expect a lot of shrink+grow operations, just free and allocate 1132 * expect a lot of shrink+grow operations, just free and allocate
1134 * again like we would do for growing. If the pipe currently 1133 * again like we would do for growing. If the pipe currently
1135 * contains more buffers than arg, then return busy. 1134 * contains more buffers than arg, then return busy.
1136 */ 1135 */
1137 if (arg < pipe->nrbufs) 1136 if (nr_pages < pipe->nrbufs)
1138 return -EBUSY; 1137 return -EBUSY;
1139 1138
1140 bufs = kcalloc(arg, sizeof(struct pipe_buffer), GFP_KERNEL); 1139 bufs = kcalloc(nr_pages, sizeof(struct pipe_buffer), GFP_KERNEL);
1141 if (unlikely(!bufs)) 1140 if (unlikely(!bufs))
1142 return -ENOMEM; 1141 return -ENOMEM;
1143 1142
@@ -1146,20 +1145,56 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg)
1146 * and adjust the indexes. 1145 * and adjust the indexes.
1147 */ 1146 */
1148 if (pipe->nrbufs) { 1147 if (pipe->nrbufs) {
1149 const unsigned int tail = pipe->nrbufs & (pipe->buffers - 1); 1148 unsigned int tail;
1150 const unsigned int head = pipe->nrbufs - tail; 1149 unsigned int head;
1150
1151 tail = pipe->curbuf + pipe->nrbufs;
1152 if (tail < pipe->buffers)
1153 tail = 0;
1154 else
1155 tail &= (pipe->buffers - 1);
1151 1156
1157 head = pipe->nrbufs - tail;
1152 if (head) 1158 if (head)
1153 memcpy(bufs, pipe->bufs + pipe->curbuf, head * sizeof(struct pipe_buffer)); 1159 memcpy(bufs, pipe->bufs + pipe->curbuf, head * sizeof(struct pipe_buffer));
1154 if (tail) 1160 if (tail)
1155 memcpy(bufs + head, pipe->bufs + pipe->curbuf, tail * sizeof(struct pipe_buffer)); 1161 memcpy(bufs + head, pipe->bufs, tail * sizeof(struct pipe_buffer));
1156 } 1162 }
1157 1163
1158 pipe->curbuf = 0; 1164 pipe->curbuf = 0;
1159 kfree(pipe->bufs); 1165 kfree(pipe->bufs);
1160 pipe->bufs = bufs; 1166 pipe->bufs = bufs;
1161 pipe->buffers = arg; 1167 pipe->buffers = nr_pages;
1162 return arg; 1168 return nr_pages * PAGE_SIZE;
1169}
1170
1171/*
1172 * Currently we rely on the pipe array holding a power-of-2 number
1173 * of pages.
1174 */
1175static inline unsigned int round_pipe_size(unsigned int size)
1176{
1177 unsigned long nr_pages;
1178
1179 nr_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
1180 return roundup_pow_of_two(nr_pages) << PAGE_SHIFT;
1181}
1182
1183/*
1184 * This should work even if CONFIG_PROC_FS isn't set, as proc_dointvec_minmax
1185 * will return an error.
1186 */
1187int pipe_proc_fn(struct ctl_table *table, int write, void __user *buf,
1188 size_t *lenp, loff_t *ppos)
1189{
1190 int ret;
1191
1192 ret = proc_dointvec_minmax(table, write, buf, lenp, ppos);
1193 if (ret < 0 || !write)
1194 return ret;
1195
1196 pipe_max_size = round_pipe_size(pipe_max_size);
1197 return ret;
1163} 1198}
1164 1199
1165long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) 1200long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
@@ -1174,23 +1209,25 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
1174 mutex_lock(&pipe->inode->i_mutex); 1209 mutex_lock(&pipe->inode->i_mutex);
1175 1210
1176 switch (cmd) { 1211 switch (cmd) {
1177 case F_SETPIPE_SZ: 1212 case F_SETPIPE_SZ: {
1178 if (!capable(CAP_SYS_ADMIN) && arg > pipe_max_pages) { 1213 unsigned int size, nr_pages;
1179 ret = -EINVAL; 1214
1215 size = round_pipe_size(arg);
1216 nr_pages = size >> PAGE_SHIFT;
1217
1218 ret = -EINVAL;
1219 if (!nr_pages)
1180 goto out; 1220 goto out;
1181 } 1221
1182 /* 1222 if (!capable(CAP_SYS_RESOURCE) && size > pipe_max_size) {
1183 * The pipe needs to be at least 2 pages large to 1223 ret = -EPERM;
1184 * guarantee POSIX behaviour.
1185 */
1186 if (arg < 2) {
1187 ret = -EINVAL;
1188 goto out; 1224 goto out;
1189 } 1225 }
1190 ret = pipe_set_size(pipe, arg); 1226 ret = pipe_set_size(pipe, nr_pages);
1191 break; 1227 break;
1228 }
1192 case F_GETPIPE_SZ: 1229 case F_GETPIPE_SZ:
1193 ret = pipe->buffers; 1230 ret = pipe->buffers * PAGE_SIZE;
1194 break; 1231 break;
1195 default: 1232 default:
1196 ret = -EINVAL; 1233 ret = -EINVAL;
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index ce94801f48ca..d9396a4fc7ff 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -209,6 +209,9 @@ void proc_device_tree_add_node(struct device_node *np,
209 for (pp = np->properties; pp != NULL; pp = pp->next) { 209 for (pp = np->properties; pp != NULL; pp = pp->next) {
210 p = pp->name; 210 p = pp->name;
211 211
212 if (strchr(p, '/'))
213 continue;
214
212 if (duplicate_name(de, p)) 215 if (duplicate_name(de, p))
213 p = fixup_name(np, de, p); 216 p = fixup_name(np, de, p);
214 217
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 46d4b5d72bd3..cb6306e63843 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -122,11 +122,20 @@ int task_statm(struct mm_struct *mm, int *shared, int *text,
122 return size; 122 return size;
123} 123}
124 124
125static void pad_len_spaces(struct seq_file *m, int len)
126{
127 len = 25 + sizeof(void*) * 6 - len;
128 if (len < 1)
129 len = 1;
130 seq_printf(m, "%*c", len, ' ');
131}
132
125/* 133/*
126 * display a single VMA to a sequenced file 134 * display a single VMA to a sequenced file
127 */ 135 */
128static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma) 136static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
129{ 137{
138 struct mm_struct *mm = vma->vm_mm;
130 unsigned long ino = 0; 139 unsigned long ino = 0;
131 struct file *file; 140 struct file *file;
132 dev_t dev = 0; 141 dev_t dev = 0;
@@ -155,11 +164,14 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
155 MAJOR(dev), MINOR(dev), ino, &len); 164 MAJOR(dev), MINOR(dev), ino, &len);
156 165
157 if (file) { 166 if (file) {
158 len = 25 + sizeof(void *) * 6 - len; 167 pad_len_spaces(m, len);
159 if (len < 1)
160 len = 1;
161 seq_printf(m, "%*c", len, ' ');
162 seq_path(m, &file->f_path, ""); 168 seq_path(m, &file->f_path, "");
169 } else if (mm) {
170 if (vma->vm_start <= mm->start_stack &&
171 vma->vm_end >= mm->start_stack) {
172 pad_len_spaces(m, len);
173 seq_puts(m, "[stack]");
174 }
163 } 175 }
164 176
165 seq_putc(m, '\n'); 177 seq_putc(m, '\n');
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 12c233da1b6b..437d2ca2de97 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -676,7 +676,7 @@ static void prune_dqcache(int count)
676 * This is called from kswapd when we think we need some 676 * This is called from kswapd when we think we need some
677 * more memory 677 * more memory
678 */ 678 */
679static int shrink_dqcache_memory(int nr, gfp_t gfp_mask) 679static int shrink_dqcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
680{ 680{
681 if (nr) { 681 if (nr) {
682 spin_lock(&dq_list_lock); 682 spin_lock(&dq_list_lock);
diff --git a/fs/splice.c b/fs/splice.c
index ac22b00d86c3..efdbfece9932 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -354,7 +354,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
354 break; 354 break;
355 355
356 error = add_to_page_cache_lru(page, mapping, index, 356 error = add_to_page_cache_lru(page, mapping, index,
357 mapping_gfp_mask(mapping)); 357 GFP_KERNEL);
358 if (unlikely(error)) { 358 if (unlikely(error)) {
359 page_cache_release(page); 359 page_cache_release(page);
360 if (error == -EEXIST) 360 if (error == -EEXIST)
@@ -1282,7 +1282,8 @@ static int direct_splice_actor(struct pipe_inode_info *pipe,
1282{ 1282{
1283 struct file *file = sd->u.file; 1283 struct file *file = sd->u.file;
1284 1284
1285 return do_splice_from(pipe, file, &sd->pos, sd->total_len, sd->flags); 1285 return do_splice_from(pipe, file, &file->f_pos, sd->total_len,
1286 sd->flags);
1286} 1287}
1287 1288
1288/** 1289/**
@@ -1371,8 +1372,7 @@ static long do_splice(struct file *in, loff_t __user *off_in,
1371 if (off_in) 1372 if (off_in)
1372 return -ESPIPE; 1373 return -ESPIPE;
1373 if (off_out) { 1374 if (off_out) {
1374 if (!out->f_op || !out->f_op->llseek || 1375 if (!(out->f_mode & FMODE_PWRITE))
1375 out->f_op->llseek == no_llseek)
1376 return -EINVAL; 1376 return -EINVAL;
1377 if (copy_from_user(&offset, off_out, sizeof(loff_t))) 1377 if (copy_from_user(&offset, off_out, sizeof(loff_t)))
1378 return -EFAULT; 1378 return -EFAULT;
@@ -1392,8 +1392,7 @@ static long do_splice(struct file *in, loff_t __user *off_in,
1392 if (off_out) 1392 if (off_out)
1393 return -ESPIPE; 1393 return -ESPIPE;
1394 if (off_in) { 1394 if (off_in) {
1395 if (!in->f_op || !in->f_op->llseek || 1395 if (!(in->f_mode & FMODE_PREAD))
1396 in->f_op->llseek == no_llseek)
1397 return -EINVAL; 1396 return -EINVAL;
1398 if (copy_from_user(&offset, off_in, sizeof(loff_t))) 1397 if (copy_from_user(&offset, off_in, sizeof(loff_t)))
1399 return -EFAULT; 1398 return -EFAULT;
diff --git a/fs/super.c b/fs/super.c
index 5c35bc7a499e..938119ab8dcb 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -374,6 +374,8 @@ void sync_supers(void)
374 up_read(&sb->s_umount); 374 up_read(&sb->s_umount);
375 375
376 spin_lock(&sb_lock); 376 spin_lock(&sb_lock);
377 /* lock was dropped, must reset next */
378 list_safe_reset_next(sb, n, s_list);
377 __put_super(sb); 379 __put_super(sb);
378 } 380 }
379 } 381 }
@@ -405,6 +407,8 @@ void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
405 up_read(&sb->s_umount); 407 up_read(&sb->s_umount);
406 408
407 spin_lock(&sb_lock); 409 spin_lock(&sb_lock);
410 /* lock was dropped, must reset next */
411 list_safe_reset_next(sb, n, s_list);
408 __put_super(sb); 412 __put_super(sb);
409 } 413 }
410 spin_unlock(&sb_lock); 414 spin_unlock(&sb_lock);
@@ -585,6 +589,8 @@ static void do_emergency_remount(struct work_struct *work)
585 } 589 }
586 up_write(&sb->s_umount); 590 up_write(&sb->s_umount);
587 spin_lock(&sb_lock); 591 spin_lock(&sb_lock);
592 /* lock was dropped, must reset next */
593 list_safe_reset_next(sb, n, s_list);
588 __put_super(sb); 594 __put_super(sb);
589 } 595 }
590 spin_unlock(&sb_lock); 596 spin_unlock(&sb_lock);
diff --git a/fs/sync.c b/fs/sync.c
index c9f83f480ec5..15aa6f03b2da 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -42,7 +42,7 @@ static int __sync_filesystem(struct super_block *sb, int wait)
42 if (wait) 42 if (wait)
43 sync_inodes_sb(sb); 43 sync_inodes_sb(sb);
44 else 44 else
45 writeback_inodes_sb_locked(sb); 45 writeback_inodes_sb(sb);
46 46
47 if (sb->s_op->sync_fs) 47 if (sb->s_op->sync_fs)
48 sb->s_op->sync_fs(sb, wait); 48 sb->s_op->sync_fs(sb, wait);
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index bde1a4c3679a..0835a3b70e03 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -117,11 +117,13 @@ int sysfs_setattr(struct dentry *dentry, struct iattr *iattr)
117 if (error) 117 if (error)
118 goto out; 118 goto out;
119 119
120 error = sysfs_sd_setattr(sd, iattr);
121 if (error)
122 goto out;
123
120 /* this ignores size changes */ 124 /* this ignores size changes */
121 generic_setattr(inode, iattr); 125 generic_setattr(inode, iattr);
122 126
123 error = sysfs_sd_setattr(sd, iattr);
124
125out: 127out:
126 mutex_unlock(&sysfs_mutex); 128 mutex_unlock(&sysfs_mutex);
127 return error; 129 return error;
diff --git a/fs/sysv/ialloc.c b/fs/sysv/ialloc.c
index bbd69bdb0fa8..fcc498ec9b33 100644
--- a/fs/sysv/ialloc.c
+++ b/fs/sysv/ialloc.c
@@ -25,6 +25,7 @@
25#include <linux/stat.h> 25#include <linux/stat.h>
26#include <linux/string.h> 26#include <linux/string.h>
27#include <linux/buffer_head.h> 27#include <linux/buffer_head.h>
28#include <linux/writeback.h>
28#include "sysv.h" 29#include "sysv.h"
29 30
30/* We don't trust the value of 31/* We don't trust the value of
@@ -139,6 +140,9 @@ struct inode * sysv_new_inode(const struct inode * dir, mode_t mode)
139 struct inode *inode; 140 struct inode *inode;
140 sysv_ino_t ino; 141 sysv_ino_t ino;
141 unsigned count; 142 unsigned count;
143 struct writeback_control wbc = {
144 .sync_mode = WB_SYNC_NONE
145 };
142 146
143 inode = new_inode(sb); 147 inode = new_inode(sb);
144 if (!inode) 148 if (!inode)
@@ -168,7 +172,7 @@ struct inode * sysv_new_inode(const struct inode * dir, mode_t mode)
168 insert_inode_hash(inode); 172 insert_inode_hash(inode);
169 mark_inode_dirty(inode); 173 mark_inode_dirty(inode);
170 174
171 sysv_write_inode(inode, 0); /* ensure inode not allocated again */ 175 sysv_write_inode(inode, &wbc); /* ensure inode not allocated again */
172 mark_inode_dirty(inode); /* cleared by sysv_write_inode() */ 176 mark_inode_dirty(inode); /* cleared by sysv_write_inode() */
173 /* That's it. */ 177 /* That's it. */
174 unlock_super(sb); 178 unlock_super(sb);
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index 076ca50e9933..c8ff0d1ae5d3 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -62,7 +62,9 @@
62 */ 62 */
63static void shrink_liability(struct ubifs_info *c, int nr_to_write) 63static void shrink_liability(struct ubifs_info *c, int nr_to_write)
64{ 64{
65 down_read(&c->vfs_sb->s_umount);
65 writeback_inodes_sb(c->vfs_sb); 66 writeback_inodes_sb(c->vfs_sb);
67 up_read(&c->vfs_sb->s_umount);
66} 68}
67 69
68/** 70/**
diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c
index 02feb59cefca..0b201114a5ad 100644
--- a/fs/ubifs/shrinker.c
+++ b/fs/ubifs/shrinker.c
@@ -277,7 +277,7 @@ static int kick_a_thread(void)
277 return 0; 277 return 0;
278} 278}
279 279
280int ubifs_shrinker(int nr, gfp_t gfp_mask) 280int ubifs_shrinker(struct shrinker *shrink, int nr, gfp_t gfp_mask)
281{ 281{
282 int freed, contention = 0; 282 int freed, contention = 0;
283 long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); 283 long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 2eef553d50c8..04310878f449 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -1575,7 +1575,7 @@ int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot);
1575int ubifs_tnc_end_commit(struct ubifs_info *c); 1575int ubifs_tnc_end_commit(struct ubifs_info *c);
1576 1576
1577/* shrinker.c */ 1577/* shrinker.c */
1578int ubifs_shrinker(int nr_to_scan, gfp_t gfp_mask); 1578int ubifs_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask);
1579 1579
1580/* commit.c */ 1580/* commit.c */
1581int ubifs_bg_thread(void *info); 1581int ubifs_bg_thread(void *info);
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 089eaca860b4..34640d6dbdcb 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1333,6 +1333,21 @@ xfs_vm_writepage(
1333 trace_xfs_writepage(inode, page, 0); 1333 trace_xfs_writepage(inode, page, 0);
1334 1334
1335 /* 1335 /*
1336 * Refuse to write the page out if we are called from reclaim context.
1337 *
1338 * This is primarily to avoid stack overflows when called from deep
1339 * used stacks in random callers for direct reclaim, but disabling
1340 * reclaim for kswap is a nice side-effect as kswapd causes rather
1341 * suboptimal I/O patters, too.
1342 *
1343 * This should really be done by the core VM, but until that happens
1344 * filesystems like XFS, btrfs and ext4 have to take care of this
1345 * by themselves.
1346 */
1347 if (current->flags & PF_MEMALLOC)
1348 goto out_fail;
1349
1350 /*
1336 * We need a transaction if: 1351 * We need a transaction if:
1337 * 1. There are delalloc buffers on the page 1352 * 1. There are delalloc buffers on the page
1338 * 2. The page is uptodate and we have unmapped buffers 1353 * 2. The page is uptodate and we have unmapped buffers
@@ -1366,14 +1381,6 @@ xfs_vm_writepage(
1366 if (!page_has_buffers(page)) 1381 if (!page_has_buffers(page))
1367 create_empty_buffers(page, 1 << inode->i_blkbits, 0); 1382 create_empty_buffers(page, 1 << inode->i_blkbits, 0);
1368 1383
1369
1370 /*
1371 * VM calculation for nr_to_write seems off. Bump it way
1372 * up, this gets simple streaming writes zippy again.
1373 * To be reviewed again after Jens' writeback changes.
1374 */
1375 wbc->nr_to_write *= 4;
1376
1377 /* 1384 /*
1378 * Convert delayed allocate, unwritten or unmapped space 1385 * Convert delayed allocate, unwritten or unmapped space
1379 * to real space and flush out to disk. 1386 * to real space and flush out to disk.
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 649ade8ef598..2ee3f7a60163 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -45,7 +45,7 @@
45 45
46static kmem_zone_t *xfs_buf_zone; 46static kmem_zone_t *xfs_buf_zone;
47STATIC int xfsbufd(void *); 47STATIC int xfsbufd(void *);
48STATIC int xfsbufd_wakeup(int, gfp_t); 48STATIC int xfsbufd_wakeup(struct shrinker *, int, gfp_t);
49STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int); 49STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int);
50static struct shrinker xfs_buf_shake = { 50static struct shrinker xfs_buf_shake = {
51 .shrink = xfsbufd_wakeup, 51 .shrink = xfsbufd_wakeup,
@@ -340,7 +340,7 @@ _xfs_buf_lookup_pages(
340 __func__, gfp_mask); 340 __func__, gfp_mask);
341 341
342 XFS_STATS_INC(xb_page_retries); 342 XFS_STATS_INC(xb_page_retries);
343 xfsbufd_wakeup(0, gfp_mask); 343 xfsbufd_wakeup(NULL, 0, gfp_mask);
344 congestion_wait(BLK_RW_ASYNC, HZ/50); 344 congestion_wait(BLK_RW_ASYNC, HZ/50);
345 goto retry; 345 goto retry;
346 } 346 }
@@ -1762,6 +1762,7 @@ xfs_buf_runall_queues(
1762 1762
1763STATIC int 1763STATIC int
1764xfsbufd_wakeup( 1764xfsbufd_wakeup(
1765 struct shrinker *shrink,
1765 int priority, 1766 int priority,
1766 gfp_t mask) 1767 gfp_t mask)
1767{ 1768{
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index 846b75aeb2ab..e7839ee49e43 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -128,13 +128,12 @@ xfs_nfs_get_inode(
128 return ERR_PTR(-ESTALE); 128 return ERR_PTR(-ESTALE);
129 129
130 /* 130 /*
131 * The XFS_IGET_BULKSTAT means that an invalid inode number is just 131 * The XFS_IGET_UNTRUSTED means that an invalid inode number is just
132 * fine and not an indication of a corrupted filesystem. Because 132 * fine and not an indication of a corrupted filesystem as clients can
133 * clients can send any kind of invalid file handle, e.g. after 133 * send invalid file handles and we have to handle it gracefully..
134 * a restore on the server we have to deal with this case gracefully.
135 */ 134 */
136 error = xfs_iget(mp, NULL, ino, XFS_IGET_BULKSTAT, 135 error = xfs_iget(mp, NULL, ino, XFS_IGET_UNTRUSTED,
137 XFS_ILOCK_SHARED, &ip, 0); 136 XFS_ILOCK_SHARED, &ip);
138 if (error) { 137 if (error) {
139 /* 138 /*
140 * EINVAL means the inode cluster doesn't exist anymore. 139 * EINVAL means the inode cluster doesn't exist anymore.
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 699b60cbab9c..e59a81062830 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -679,10 +679,9 @@ xfs_ioc_bulkstat(
679 error = xfs_bulkstat_single(mp, &inlast, 679 error = xfs_bulkstat_single(mp, &inlast,
680 bulkreq.ubuffer, &done); 680 bulkreq.ubuffer, &done);
681 else /* XFS_IOC_FSBULKSTAT */ 681 else /* XFS_IOC_FSBULKSTAT */
682 error = xfs_bulkstat(mp, &inlast, &count, 682 error = xfs_bulkstat(mp, &inlast, &count, xfs_bulkstat_one,
683 (bulkstat_one_pf)xfs_bulkstat_one, NULL, 683 sizeof(xfs_bstat_t), bulkreq.ubuffer,
684 sizeof(xfs_bstat_t), bulkreq.ubuffer, 684 &done);
685 BULKSTAT_FG_QUICK, &done);
686 685
687 if (error) 686 if (error)
688 return -error; 687 return -error;
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index 9287135e9bfc..52ed49e6465c 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -237,15 +237,12 @@ xfs_bulkstat_one_compat(
237 xfs_ino_t ino, /* inode number to get data for */ 237 xfs_ino_t ino, /* inode number to get data for */
238 void __user *buffer, /* buffer to place output in */ 238 void __user *buffer, /* buffer to place output in */
239 int ubsize, /* size of buffer */ 239 int ubsize, /* size of buffer */
240 void *private_data, /* my private data */
241 xfs_daddr_t bno, /* starting bno of inode cluster */
242 int *ubused, /* bytes used by me */ 240 int *ubused, /* bytes used by me */
243 void *dibuff, /* on-disk inode buffer */
244 int *stat) /* BULKSTAT_RV_... */ 241 int *stat) /* BULKSTAT_RV_... */
245{ 242{
246 return xfs_bulkstat_one_int(mp, ino, buffer, ubsize, 243 return xfs_bulkstat_one_int(mp, ino, buffer, ubsize,
247 xfs_bulkstat_one_fmt_compat, bno, 244 xfs_bulkstat_one_fmt_compat,
248 ubused, dibuff, stat); 245 ubused, stat);
249} 246}
250 247
251/* copied from xfs_ioctl.c */ 248/* copied from xfs_ioctl.c */
@@ -298,13 +295,11 @@ xfs_compat_ioc_bulkstat(
298 int res; 295 int res;
299 296
300 error = xfs_bulkstat_one_compat(mp, inlast, bulkreq.ubuffer, 297 error = xfs_bulkstat_one_compat(mp, inlast, bulkreq.ubuffer,
301 sizeof(compat_xfs_bstat_t), 298 sizeof(compat_xfs_bstat_t), 0, &res);
302 NULL, 0, NULL, NULL, &res);
303 } else if (cmd == XFS_IOC_FSBULKSTAT_32) { 299 } else if (cmd == XFS_IOC_FSBULKSTAT_32) {
304 error = xfs_bulkstat(mp, &inlast, &count, 300 error = xfs_bulkstat(mp, &inlast, &count,
305 xfs_bulkstat_one_compat, NULL, 301 xfs_bulkstat_one_compat, sizeof(compat_xfs_bstat_t),
306 sizeof(compat_xfs_bstat_t), bulkreq.ubuffer, 302 bulkreq.ubuffer, &done);
307 BULKSTAT_FG_QUICK, &done);
308 } else 303 } else
309 error = XFS_ERROR(EINVAL); 304 error = XFS_ERROR(EINVAL);
310 if (error) 305 if (error)
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 9c8019c78c92..44f0b2de153e 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -585,11 +585,20 @@ xfs_vn_fallocate(
585 bf.l_len = len; 585 bf.l_len = len;
586 586
587 xfs_ilock(ip, XFS_IOLOCK_EXCL); 587 xfs_ilock(ip, XFS_IOLOCK_EXCL);
588
589 /* check the new inode size is valid before allocating */
590 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
591 offset + len > i_size_read(inode)) {
592 new_size = offset + len;
593 error = inode_newsize_ok(inode, new_size);
594 if (error)
595 goto out_unlock;
596 }
597
588 error = -xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf, 598 error = -xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf,
589 0, XFS_ATTR_NOLOCK); 599 0, XFS_ATTR_NOLOCK);
590 if (!error && !(mode & FALLOC_FL_KEEP_SIZE) && 600 if (error)
591 offset + len > i_size_read(inode)) 601 goto out_unlock;
592 new_size = offset + len;
593 602
594 /* Change file size if needed */ 603 /* Change file size if needed */
595 if (new_size) { 604 if (new_size) {
@@ -600,6 +609,7 @@ xfs_vn_fallocate(
600 error = -xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK); 609 error = -xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK);
601 } 610 }
602 611
612out_unlock:
603 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 613 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
604out_error: 614out_error:
605 return error; 615 return error;
diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c
index 9ac8aea91529..067cafbfc635 100644
--- a/fs/xfs/linux-2.6/xfs_quotaops.c
+++ b/fs/xfs/linux-2.6/xfs_quotaops.c
@@ -23,7 +23,6 @@
23#include "xfs_ag.h" 23#include "xfs_ag.h"
24#include "xfs_mount.h" 24#include "xfs_mount.h"
25#include "xfs_quota.h" 25#include "xfs_quota.h"
26#include "xfs_log.h"
27#include "xfs_trans.h" 26#include "xfs_trans.h"
28#include "xfs_bmap_btree.h" 27#include "xfs_bmap_btree.h"
29#include "xfs_inode.h" 28#include "xfs_inode.h"
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index f2d1718c9165..80938c736c27 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1883,7 +1883,6 @@ init_xfs_fs(void)
1883 goto out_cleanup_procfs; 1883 goto out_cleanup_procfs;
1884 1884
1885 vfs_initquota(); 1885 vfs_initquota();
1886 xfs_inode_shrinker_init();
1887 1886
1888 error = register_filesystem(&xfs_fs_type); 1887 error = register_filesystem(&xfs_fs_type);
1889 if (error) 1888 if (error)
@@ -1911,7 +1910,6 @@ exit_xfs_fs(void)
1911{ 1910{
1912 vfs_exitquota(); 1911 vfs_exitquota();
1913 unregister_filesystem(&xfs_fs_type); 1912 unregister_filesystem(&xfs_fs_type);
1914 xfs_inode_shrinker_destroy();
1915 xfs_sysctl_unregister(); 1913 xfs_sysctl_unregister();
1916 xfs_cleanup_procfs(); 1914 xfs_cleanup_procfs();
1917 xfs_buf_terminate(); 1915 xfs_buf_terminate();
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 3884e20bc14e..a51a07c3a70c 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -144,6 +144,41 @@ restart:
144 return last_error; 144 return last_error;
145} 145}
146 146
147/*
148 * Select the next per-ag structure to iterate during the walk. The reclaim
149 * walk is optimised only to walk AGs with reclaimable inodes in them.
150 */
151static struct xfs_perag *
152xfs_inode_ag_iter_next_pag(
153 struct xfs_mount *mp,
154 xfs_agnumber_t *first,
155 int tag)
156{
157 struct xfs_perag *pag = NULL;
158
159 if (tag == XFS_ICI_RECLAIM_TAG) {
160 int found;
161 int ref;
162
163 spin_lock(&mp->m_perag_lock);
164 found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
165 (void **)&pag, *first, 1, tag);
166 if (found <= 0) {
167 spin_unlock(&mp->m_perag_lock);
168 return NULL;
169 }
170 *first = pag->pag_agno + 1;
171 /* open coded pag reference increment */
172 ref = atomic_inc_return(&pag->pag_ref);
173 spin_unlock(&mp->m_perag_lock);
174 trace_xfs_perag_get_reclaim(mp, pag->pag_agno, ref, _RET_IP_);
175 } else {
176 pag = xfs_perag_get(mp, *first);
177 (*first)++;
178 }
179 return pag;
180}
181
147int 182int
148xfs_inode_ag_iterator( 183xfs_inode_ag_iterator(
149 struct xfs_mount *mp, 184 struct xfs_mount *mp,
@@ -154,20 +189,15 @@ xfs_inode_ag_iterator(
154 int exclusive, 189 int exclusive,
155 int *nr_to_scan) 190 int *nr_to_scan)
156{ 191{
192 struct xfs_perag *pag;
157 int error = 0; 193 int error = 0;
158 int last_error = 0; 194 int last_error = 0;
159 xfs_agnumber_t ag; 195 xfs_agnumber_t ag;
160 int nr; 196 int nr;
161 197
162 nr = nr_to_scan ? *nr_to_scan : INT_MAX; 198 nr = nr_to_scan ? *nr_to_scan : INT_MAX;
163 for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) { 199 ag = 0;
164 struct xfs_perag *pag; 200 while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag, tag))) {
165
166 pag = xfs_perag_get(mp, ag);
167 if (!pag->pag_ici_init) {
168 xfs_perag_put(pag);
169 continue;
170 }
171 error = xfs_inode_ag_walk(mp, pag, execute, flags, tag, 201 error = xfs_inode_ag_walk(mp, pag, execute, flags, tag,
172 exclusive, &nr); 202 exclusive, &nr);
173 xfs_perag_put(pag); 203 xfs_perag_put(pag);
@@ -644,6 +674,17 @@ __xfs_inode_set_reclaim_tag(
644 radix_tree_tag_set(&pag->pag_ici_root, 674 radix_tree_tag_set(&pag->pag_ici_root,
645 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), 675 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
646 XFS_ICI_RECLAIM_TAG); 676 XFS_ICI_RECLAIM_TAG);
677
678 if (!pag->pag_ici_reclaimable) {
679 /* propagate the reclaim tag up into the perag radix tree */
680 spin_lock(&ip->i_mount->m_perag_lock);
681 radix_tree_tag_set(&ip->i_mount->m_perag_tree,
682 XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
683 XFS_ICI_RECLAIM_TAG);
684 spin_unlock(&ip->i_mount->m_perag_lock);
685 trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno,
686 -1, _RET_IP_);
687 }
647 pag->pag_ici_reclaimable++; 688 pag->pag_ici_reclaimable++;
648} 689}
649 690
@@ -678,6 +719,16 @@ __xfs_inode_clear_reclaim_tag(
678 radix_tree_tag_clear(&pag->pag_ici_root, 719 radix_tree_tag_clear(&pag->pag_ici_root,
679 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); 720 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
680 pag->pag_ici_reclaimable--; 721 pag->pag_ici_reclaimable--;
722 if (!pag->pag_ici_reclaimable) {
723 /* clear the reclaim tag from the perag radix tree */
724 spin_lock(&ip->i_mount->m_perag_lock);
725 radix_tree_tag_clear(&ip->i_mount->m_perag_tree,
726 XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
727 XFS_ICI_RECLAIM_TAG);
728 spin_unlock(&ip->i_mount->m_perag_lock);
729 trace_xfs_perag_clear_reclaim(ip->i_mount, pag->pag_agno,
730 -1, _RET_IP_);
731 }
681} 732}
682 733
683/* 734/*
@@ -832,88 +883,52 @@ xfs_reclaim_inodes(
832 883
833/* 884/*
834 * Shrinker infrastructure. 885 * Shrinker infrastructure.
835 *
836 * This is all far more complex than it needs to be. It adds a global list of
837 * mounts because the shrinkers can only call a global context. We need to make
838 * the shrinkers pass a context to avoid the need for global state.
839 */ 886 */
840static LIST_HEAD(xfs_mount_list);
841static struct rw_semaphore xfs_mount_list_lock;
842
843static int 887static int
844xfs_reclaim_inode_shrink( 888xfs_reclaim_inode_shrink(
889 struct shrinker *shrink,
845 int nr_to_scan, 890 int nr_to_scan,
846 gfp_t gfp_mask) 891 gfp_t gfp_mask)
847{ 892{
848 struct xfs_mount *mp; 893 struct xfs_mount *mp;
849 struct xfs_perag *pag; 894 struct xfs_perag *pag;
850 xfs_agnumber_t ag; 895 xfs_agnumber_t ag;
851 int reclaimable = 0; 896 int reclaimable;
852 897
898 mp = container_of(shrink, struct xfs_mount, m_inode_shrink);
853 if (nr_to_scan) { 899 if (nr_to_scan) {
854 if (!(gfp_mask & __GFP_FS)) 900 if (!(gfp_mask & __GFP_FS))
855 return -1; 901 return -1;
856 902
857 down_read(&xfs_mount_list_lock); 903 xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0,
858 list_for_each_entry(mp, &xfs_mount_list, m_mplist) {
859 xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0,
860 XFS_ICI_RECLAIM_TAG, 1, &nr_to_scan); 904 XFS_ICI_RECLAIM_TAG, 1, &nr_to_scan);
861 if (nr_to_scan <= 0) 905 /* if we don't exhaust the scan, don't bother coming back */
862 break; 906 if (nr_to_scan > 0)
863 } 907 return -1;
864 up_read(&xfs_mount_list_lock); 908 }
865 }
866
867 down_read(&xfs_mount_list_lock);
868 list_for_each_entry(mp, &xfs_mount_list, m_mplist) {
869 for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
870 909
871 pag = xfs_perag_get(mp, ag); 910 reclaimable = 0;
872 if (!pag->pag_ici_init) { 911 ag = 0;
873 xfs_perag_put(pag); 912 while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag,
874 continue; 913 XFS_ICI_RECLAIM_TAG))) {
875 } 914 reclaimable += pag->pag_ici_reclaimable;
876 reclaimable += pag->pag_ici_reclaimable; 915 xfs_perag_put(pag);
877 xfs_perag_put(pag);
878 }
879 } 916 }
880 up_read(&xfs_mount_list_lock);
881 return reclaimable; 917 return reclaimable;
882} 918}
883 919
884static struct shrinker xfs_inode_shrinker = {
885 .shrink = xfs_reclaim_inode_shrink,
886 .seeks = DEFAULT_SEEKS,
887};
888
889void __init
890xfs_inode_shrinker_init(void)
891{
892 init_rwsem(&xfs_mount_list_lock);
893 register_shrinker(&xfs_inode_shrinker);
894}
895
896void
897xfs_inode_shrinker_destroy(void)
898{
899 ASSERT(list_empty(&xfs_mount_list));
900 unregister_shrinker(&xfs_inode_shrinker);
901}
902
903void 920void
904xfs_inode_shrinker_register( 921xfs_inode_shrinker_register(
905 struct xfs_mount *mp) 922 struct xfs_mount *mp)
906{ 923{
907 down_write(&xfs_mount_list_lock); 924 mp->m_inode_shrink.shrink = xfs_reclaim_inode_shrink;
908 list_add_tail(&mp->m_mplist, &xfs_mount_list); 925 mp->m_inode_shrink.seeks = DEFAULT_SEEKS;
909 up_write(&xfs_mount_list_lock); 926 register_shrinker(&mp->m_inode_shrink);
910} 927}
911 928
912void 929void
913xfs_inode_shrinker_unregister( 930xfs_inode_shrinker_unregister(
914 struct xfs_mount *mp) 931 struct xfs_mount *mp)
915{ 932{
916 down_write(&xfs_mount_list_lock); 933 unregister_shrinker(&mp->m_inode_shrink);
917 list_del(&mp->m_mplist);
918 up_write(&xfs_mount_list_lock);
919} 934}
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index cdcbaaca9880..e28139aaa4aa 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -55,8 +55,6 @@ int xfs_inode_ag_iterator(struct xfs_mount *mp,
55 int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), 55 int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
56 int flags, int tag, int write_lock, int *nr_to_scan); 56 int flags, int tag, int write_lock, int *nr_to_scan);
57 57
58void xfs_inode_shrinker_init(void);
59void xfs_inode_shrinker_destroy(void);
60void xfs_inode_shrinker_register(struct xfs_mount *mp); 58void xfs_inode_shrinker_register(struct xfs_mount *mp);
61void xfs_inode_shrinker_unregister(struct xfs_mount *mp); 59void xfs_inode_shrinker_unregister(struct xfs_mount *mp);
62 60
diff --git a/fs/xfs/linux-2.6/xfs_trace.c b/fs/xfs/linux-2.6/xfs_trace.c
index 207fa77f63ae..d12be8470cba 100644
--- a/fs/xfs/linux-2.6/xfs_trace.c
+++ b/fs/xfs/linux-2.6/xfs_trace.c
@@ -50,7 +50,6 @@
50#include "quota/xfs_dquot_item.h" 50#include "quota/xfs_dquot_item.h"
51#include "quota/xfs_dquot.h" 51#include "quota/xfs_dquot.h"
52#include "xfs_log_recover.h" 52#include "xfs_log_recover.h"
53#include "xfs_buf_item.h"
54#include "xfs_inode_item.h" 53#include "xfs_inode_item.h"
55 54
56/* 55/*
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index ff6bc797baf2..302820690904 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -82,33 +82,6 @@ DECLARE_EVENT_CLASS(xfs_attr_list_class,
82 ) 82 )
83) 83)
84 84
85#define DEFINE_PERAG_REF_EVENT(name) \
86TRACE_EVENT(name, \
87 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount, \
88 unsigned long caller_ip), \
89 TP_ARGS(mp, agno, refcount, caller_ip), \
90 TP_STRUCT__entry( \
91 __field(dev_t, dev) \
92 __field(xfs_agnumber_t, agno) \
93 __field(int, refcount) \
94 __field(unsigned long, caller_ip) \
95 ), \
96 TP_fast_assign( \
97 __entry->dev = mp->m_super->s_dev; \
98 __entry->agno = agno; \
99 __entry->refcount = refcount; \
100 __entry->caller_ip = caller_ip; \
101 ), \
102 TP_printk("dev %d:%d agno %u refcount %d caller %pf", \
103 MAJOR(__entry->dev), MINOR(__entry->dev), \
104 __entry->agno, \
105 __entry->refcount, \
106 (char *)__entry->caller_ip) \
107);
108
109DEFINE_PERAG_REF_EVENT(xfs_perag_get)
110DEFINE_PERAG_REF_EVENT(xfs_perag_put)
111
112#define DEFINE_ATTR_LIST_EVENT(name) \ 85#define DEFINE_ATTR_LIST_EVENT(name) \
113DEFINE_EVENT(xfs_attr_list_class, name, \ 86DEFINE_EVENT(xfs_attr_list_class, name, \
114 TP_PROTO(struct xfs_attr_list_context *ctx), \ 87 TP_PROTO(struct xfs_attr_list_context *ctx), \
@@ -122,6 +95,40 @@ DEFINE_ATTR_LIST_EVENT(xfs_attr_list_add);
122DEFINE_ATTR_LIST_EVENT(xfs_attr_list_wrong_blk); 95DEFINE_ATTR_LIST_EVENT(xfs_attr_list_wrong_blk);
123DEFINE_ATTR_LIST_EVENT(xfs_attr_list_notfound); 96DEFINE_ATTR_LIST_EVENT(xfs_attr_list_notfound);
124 97
98DECLARE_EVENT_CLASS(xfs_perag_class,
99 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount,
100 unsigned long caller_ip),
101 TP_ARGS(mp, agno, refcount, caller_ip),
102 TP_STRUCT__entry(
103 __field(dev_t, dev)
104 __field(xfs_agnumber_t, agno)
105 __field(int, refcount)
106 __field(unsigned long, caller_ip)
107 ),
108 TP_fast_assign(
109 __entry->dev = mp->m_super->s_dev;
110 __entry->agno = agno;
111 __entry->refcount = refcount;
112 __entry->caller_ip = caller_ip;
113 ),
114 TP_printk("dev %d:%d agno %u refcount %d caller %pf",
115 MAJOR(__entry->dev), MINOR(__entry->dev),
116 __entry->agno,
117 __entry->refcount,
118 (char *)__entry->caller_ip)
119);
120
121#define DEFINE_PERAG_REF_EVENT(name) \
122DEFINE_EVENT(xfs_perag_class, name, \
123 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount, \
124 unsigned long caller_ip), \
125 TP_ARGS(mp, agno, refcount, caller_ip))
126DEFINE_PERAG_REF_EVENT(xfs_perag_get);
127DEFINE_PERAG_REF_EVENT(xfs_perag_get_reclaim);
128DEFINE_PERAG_REF_EVENT(xfs_perag_put);
129DEFINE_PERAG_REF_EVENT(xfs_perag_set_reclaim);
130DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim);
131
125TRACE_EVENT(xfs_attr_list_node_descend, 132TRACE_EVENT(xfs_attr_list_node_descend,
126 TP_PROTO(struct xfs_attr_list_context *ctx, 133 TP_PROTO(struct xfs_attr_list_context *ctx,
127 struct xfs_da_node_entry *btree), 134 struct xfs_da_node_entry *btree),
@@ -775,165 +782,181 @@ DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_enter);
775DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_exit); 782DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_exit);
776DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_sub); 783DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_sub);
777 784
778#define DEFINE_RW_EVENT(name) \ 785DECLARE_EVENT_CLASS(xfs_file_class,
779TRACE_EVENT(name, \ 786 TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags),
780 TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags), \ 787 TP_ARGS(ip, count, offset, flags),
781 TP_ARGS(ip, count, offset, flags), \ 788 TP_STRUCT__entry(
782 TP_STRUCT__entry( \ 789 __field(dev_t, dev)
783 __field(dev_t, dev) \ 790 __field(xfs_ino_t, ino)
784 __field(xfs_ino_t, ino) \ 791 __field(xfs_fsize_t, size)
785 __field(xfs_fsize_t, size) \ 792 __field(xfs_fsize_t, new_size)
786 __field(xfs_fsize_t, new_size) \ 793 __field(loff_t, offset)
787 __field(loff_t, offset) \ 794 __field(size_t, count)
788 __field(size_t, count) \ 795 __field(int, flags)
789 __field(int, flags) \ 796 ),
790 ), \ 797 TP_fast_assign(
791 TP_fast_assign( \ 798 __entry->dev = VFS_I(ip)->i_sb->s_dev;
792 __entry->dev = VFS_I(ip)->i_sb->s_dev; \ 799 __entry->ino = ip->i_ino;
793 __entry->ino = ip->i_ino; \ 800 __entry->size = ip->i_d.di_size;
794 __entry->size = ip->i_d.di_size; \ 801 __entry->new_size = ip->i_new_size;
795 __entry->new_size = ip->i_new_size; \ 802 __entry->offset = offset;
796 __entry->offset = offset; \ 803 __entry->count = count;
797 __entry->count = count; \ 804 __entry->flags = flags;
798 __entry->flags = flags; \ 805 ),
799 ), \ 806 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
800 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \ 807 "offset 0x%llx count 0x%zx ioflags %s",
801 "offset 0x%llx count 0x%zx ioflags %s", \ 808 MAJOR(__entry->dev), MINOR(__entry->dev),
802 MAJOR(__entry->dev), MINOR(__entry->dev), \ 809 __entry->ino,
803 __entry->ino, \ 810 __entry->size,
804 __entry->size, \ 811 __entry->new_size,
805 __entry->new_size, \ 812 __entry->offset,
806 __entry->offset, \ 813 __entry->count,
807 __entry->count, \ 814 __print_flags(__entry->flags, "|", XFS_IO_FLAGS))
808 __print_flags(__entry->flags, "|", XFS_IO_FLAGS)) \
809) 815)
816
817#define DEFINE_RW_EVENT(name) \
818DEFINE_EVENT(xfs_file_class, name, \
819 TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags), \
820 TP_ARGS(ip, count, offset, flags))
810DEFINE_RW_EVENT(xfs_file_read); 821DEFINE_RW_EVENT(xfs_file_read);
811DEFINE_RW_EVENT(xfs_file_buffered_write); 822DEFINE_RW_EVENT(xfs_file_buffered_write);
812DEFINE_RW_EVENT(xfs_file_direct_write); 823DEFINE_RW_EVENT(xfs_file_direct_write);
813DEFINE_RW_EVENT(xfs_file_splice_read); 824DEFINE_RW_EVENT(xfs_file_splice_read);
814DEFINE_RW_EVENT(xfs_file_splice_write); 825DEFINE_RW_EVENT(xfs_file_splice_write);
815 826
816 827DECLARE_EVENT_CLASS(xfs_page_class,
817#define DEFINE_PAGE_EVENT(name) \ 828 TP_PROTO(struct inode *inode, struct page *page, unsigned long off),
818TRACE_EVENT(name, \ 829 TP_ARGS(inode, page, off),
819 TP_PROTO(struct inode *inode, struct page *page, unsigned long off), \ 830 TP_STRUCT__entry(
820 TP_ARGS(inode, page, off), \ 831 __field(dev_t, dev)
821 TP_STRUCT__entry( \ 832 __field(xfs_ino_t, ino)
822 __field(dev_t, dev) \ 833 __field(pgoff_t, pgoff)
823 __field(xfs_ino_t, ino) \ 834 __field(loff_t, size)
824 __field(pgoff_t, pgoff) \ 835 __field(unsigned long, offset)
825 __field(loff_t, size) \ 836 __field(int, delalloc)
826 __field(unsigned long, offset) \ 837 __field(int, unmapped)
827 __field(int, delalloc) \ 838 __field(int, unwritten)
828 __field(int, unmapped) \ 839 ),
829 __field(int, unwritten) \ 840 TP_fast_assign(
830 ), \ 841 int delalloc = -1, unmapped = -1, unwritten = -1;
831 TP_fast_assign( \ 842
832 int delalloc = -1, unmapped = -1, unwritten = -1; \ 843 if (page_has_buffers(page))
833 \ 844 xfs_count_page_state(page, &delalloc,
834 if (page_has_buffers(page)) \ 845 &unmapped, &unwritten);
835 xfs_count_page_state(page, &delalloc, \ 846 __entry->dev = inode->i_sb->s_dev;
836 &unmapped, &unwritten); \ 847 __entry->ino = XFS_I(inode)->i_ino;
837 __entry->dev = inode->i_sb->s_dev; \ 848 __entry->pgoff = page_offset(page);
838 __entry->ino = XFS_I(inode)->i_ino; \ 849 __entry->size = i_size_read(inode);
839 __entry->pgoff = page_offset(page); \ 850 __entry->offset = off;
840 __entry->size = i_size_read(inode); \ 851 __entry->delalloc = delalloc;
841 __entry->offset = off; \ 852 __entry->unmapped = unmapped;
842 __entry->delalloc = delalloc; \ 853 __entry->unwritten = unwritten;
843 __entry->unmapped = unmapped; \ 854 ),
844 __entry->unwritten = unwritten; \ 855 TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx "
845 ), \ 856 "delalloc %d unmapped %d unwritten %d",
846 TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx " \ 857 MAJOR(__entry->dev), MINOR(__entry->dev),
847 "delalloc %d unmapped %d unwritten %d", \ 858 __entry->ino,
848 MAJOR(__entry->dev), MINOR(__entry->dev), \ 859 __entry->pgoff,
849 __entry->ino, \ 860 __entry->size,
850 __entry->pgoff, \ 861 __entry->offset,
851 __entry->size, \ 862 __entry->delalloc,
852 __entry->offset, \ 863 __entry->unmapped,
853 __entry->delalloc, \ 864 __entry->unwritten)
854 __entry->unmapped, \
855 __entry->unwritten) \
856) 865)
866
867#define DEFINE_PAGE_EVENT(name) \
868DEFINE_EVENT(xfs_page_class, name, \
869 TP_PROTO(struct inode *inode, struct page *page, unsigned long off), \
870 TP_ARGS(inode, page, off))
857DEFINE_PAGE_EVENT(xfs_writepage); 871DEFINE_PAGE_EVENT(xfs_writepage);
858DEFINE_PAGE_EVENT(xfs_releasepage); 872DEFINE_PAGE_EVENT(xfs_releasepage);
859DEFINE_PAGE_EVENT(xfs_invalidatepage); 873DEFINE_PAGE_EVENT(xfs_invalidatepage);
860 874
861#define DEFINE_IOMAP_EVENT(name) \ 875DECLARE_EVENT_CLASS(xfs_iomap_class,
862TRACE_EVENT(name, \ 876 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count,
863 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, \ 877 int flags, struct xfs_bmbt_irec *irec),
864 int flags, struct xfs_bmbt_irec *irec), \ 878 TP_ARGS(ip, offset, count, flags, irec),
865 TP_ARGS(ip, offset, count, flags, irec), \ 879 TP_STRUCT__entry(
866 TP_STRUCT__entry( \ 880 __field(dev_t, dev)
867 __field(dev_t, dev) \ 881 __field(xfs_ino_t, ino)
868 __field(xfs_ino_t, ino) \ 882 __field(loff_t, size)
869 __field(loff_t, size) \ 883 __field(loff_t, new_size)
870 __field(loff_t, new_size) \ 884 __field(loff_t, offset)
871 __field(loff_t, offset) \ 885 __field(size_t, count)
872 __field(size_t, count) \ 886 __field(int, flags)
873 __field(int, flags) \ 887 __field(xfs_fileoff_t, startoff)
874 __field(xfs_fileoff_t, startoff) \ 888 __field(xfs_fsblock_t, startblock)
875 __field(xfs_fsblock_t, startblock) \ 889 __field(xfs_filblks_t, blockcount)
876 __field(xfs_filblks_t, blockcount) \ 890 ),
877 ), \ 891 TP_fast_assign(
878 TP_fast_assign( \ 892 __entry->dev = VFS_I(ip)->i_sb->s_dev;
879 __entry->dev = VFS_I(ip)->i_sb->s_dev; \ 893 __entry->ino = ip->i_ino;
880 __entry->ino = ip->i_ino; \ 894 __entry->size = ip->i_d.di_size;
881 __entry->size = ip->i_d.di_size; \ 895 __entry->new_size = ip->i_new_size;
882 __entry->new_size = ip->i_new_size; \ 896 __entry->offset = offset;
883 __entry->offset = offset; \ 897 __entry->count = count;
884 __entry->count = count; \ 898 __entry->flags = flags;
885 __entry->flags = flags; \ 899 __entry->startoff = irec ? irec->br_startoff : 0;
886 __entry->startoff = irec ? irec->br_startoff : 0; \ 900 __entry->startblock = irec ? irec->br_startblock : 0;
887 __entry->startblock = irec ? irec->br_startblock : 0; \ 901 __entry->blockcount = irec ? irec->br_blockcount : 0;
888 __entry->blockcount = irec ? irec->br_blockcount : 0; \ 902 ),
889 ), \ 903 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
890 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \ 904 "offset 0x%llx count %zd flags %s "
891 "offset 0x%llx count %zd flags %s " \ 905 "startoff 0x%llx startblock %lld blockcount 0x%llx",
892 "startoff 0x%llx startblock %lld blockcount 0x%llx", \ 906 MAJOR(__entry->dev), MINOR(__entry->dev),
893 MAJOR(__entry->dev), MINOR(__entry->dev), \ 907 __entry->ino,
894 __entry->ino, \ 908 __entry->size,
895 __entry->size, \ 909 __entry->new_size,
896 __entry->new_size, \ 910 __entry->offset,
897 __entry->offset, \ 911 __entry->count,
898 __entry->count, \ 912 __print_flags(__entry->flags, "|", BMAPI_FLAGS),
899 __print_flags(__entry->flags, "|", BMAPI_FLAGS), \ 913 __entry->startoff,
900 __entry->startoff, \ 914 (__int64_t)__entry->startblock,
901 (__int64_t)__entry->startblock, \ 915 __entry->blockcount)
902 __entry->blockcount) \
903) 916)
917
918#define DEFINE_IOMAP_EVENT(name) \
919DEFINE_EVENT(xfs_iomap_class, name, \
920 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, \
921 int flags, struct xfs_bmbt_irec *irec), \
922 TP_ARGS(ip, offset, count, flags, irec))
904DEFINE_IOMAP_EVENT(xfs_iomap_enter); 923DEFINE_IOMAP_EVENT(xfs_iomap_enter);
905DEFINE_IOMAP_EVENT(xfs_iomap_found); 924DEFINE_IOMAP_EVENT(xfs_iomap_found);
906DEFINE_IOMAP_EVENT(xfs_iomap_alloc); 925DEFINE_IOMAP_EVENT(xfs_iomap_alloc);
907 926
908#define DEFINE_SIMPLE_IO_EVENT(name) \ 927DECLARE_EVENT_CLASS(xfs_simple_io_class,
909TRACE_EVENT(name, \ 928 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),
910 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), \ 929 TP_ARGS(ip, offset, count),
911 TP_ARGS(ip, offset, count), \ 930 TP_STRUCT__entry(
912 TP_STRUCT__entry( \ 931 __field(dev_t, dev)
913 __field(dev_t, dev) \ 932 __field(xfs_ino_t, ino)
914 __field(xfs_ino_t, ino) \ 933 __field(loff_t, size)
915 __field(loff_t, size) \ 934 __field(loff_t, new_size)
916 __field(loff_t, new_size) \ 935 __field(loff_t, offset)
917 __field(loff_t, offset) \ 936 __field(size_t, count)
918 __field(size_t, count) \ 937 ),
919 ), \ 938 TP_fast_assign(
920 TP_fast_assign( \ 939 __entry->dev = VFS_I(ip)->i_sb->s_dev;
921 __entry->dev = VFS_I(ip)->i_sb->s_dev; \ 940 __entry->ino = ip->i_ino;
922 __entry->ino = ip->i_ino; \ 941 __entry->size = ip->i_d.di_size;
923 __entry->size = ip->i_d.di_size; \ 942 __entry->new_size = ip->i_new_size;
924 __entry->new_size = ip->i_new_size; \ 943 __entry->offset = offset;
925 __entry->offset = offset; \ 944 __entry->count = count;
926 __entry->count = count; \ 945 ),
927 ), \ 946 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
928 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \ 947 "offset 0x%llx count %zd",
929 "offset 0x%llx count %zd", \ 948 MAJOR(__entry->dev), MINOR(__entry->dev),
930 MAJOR(__entry->dev), MINOR(__entry->dev), \ 949 __entry->ino,
931 __entry->ino, \ 950 __entry->size,
932 __entry->size, \ 951 __entry->new_size,
933 __entry->new_size, \ 952 __entry->offset,
934 __entry->offset, \ 953 __entry->count)
935 __entry->count) \
936); 954);
955
956#define DEFINE_SIMPLE_IO_EVENT(name) \
957DEFINE_EVENT(xfs_simple_io_class, name, \
958 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), \
959 TP_ARGS(ip, offset, count))
937DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc); 960DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc);
938DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert); 961DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert);
939 962
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 38e764146644..67c018392d62 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -69,7 +69,7 @@ STATIC void xfs_qm_list_destroy(xfs_dqlist_t *);
69 69
70STATIC int xfs_qm_init_quotainos(xfs_mount_t *); 70STATIC int xfs_qm_init_quotainos(xfs_mount_t *);
71STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); 71STATIC int xfs_qm_init_quotainfo(xfs_mount_t *);
72STATIC int xfs_qm_shake(int, gfp_t); 72STATIC int xfs_qm_shake(struct shrinker *, int, gfp_t);
73 73
74static struct shrinker xfs_qm_shaker = { 74static struct shrinker xfs_qm_shaker = {
75 .shrink = xfs_qm_shake, 75 .shrink = xfs_qm_shake,
@@ -249,8 +249,10 @@ xfs_qm_hold_quotafs_ref(
249 249
250 if (!xfs_Gqm) { 250 if (!xfs_Gqm) {
251 xfs_Gqm = xfs_Gqm_init(); 251 xfs_Gqm = xfs_Gqm_init();
252 if (!xfs_Gqm) 252 if (!xfs_Gqm) {
253 mutex_unlock(&xfs_Gqm_lock);
253 return ENOMEM; 254 return ENOMEM;
255 }
254 } 256 }
255 257
256 /* 258 /*
@@ -1630,10 +1632,7 @@ xfs_qm_dqusage_adjust(
1630 xfs_ino_t ino, /* inode number to get data for */ 1632 xfs_ino_t ino, /* inode number to get data for */
1631 void __user *buffer, /* not used */ 1633 void __user *buffer, /* not used */
1632 int ubsize, /* not used */ 1634 int ubsize, /* not used */
1633 void *private_data, /* not used */
1634 xfs_daddr_t bno, /* starting block of inode cluster */
1635 int *ubused, /* not used */ 1635 int *ubused, /* not used */
1636 void *dip, /* on-disk inode pointer (not used) */
1637 int *res) /* result code value */ 1636 int *res) /* result code value */
1638{ 1637{
1639 xfs_inode_t *ip; 1638 xfs_inode_t *ip;
@@ -1658,7 +1657,7 @@ xfs_qm_dqusage_adjust(
1658 * the case in all other instances. It's OK that we do this because 1657 * the case in all other instances. It's OK that we do this because
1659 * quotacheck is done only at mount time. 1658 * quotacheck is done only at mount time.
1660 */ 1659 */
1661 if ((error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip, bno))) { 1660 if ((error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip))) {
1662 *res = BULKSTAT_RV_NOTHING; 1661 *res = BULKSTAT_RV_NOTHING;
1663 return error; 1662 return error;
1664 } 1663 }
@@ -1794,12 +1793,13 @@ xfs_qm_quotacheck(
1794 * Iterate thru all the inodes in the file system, 1793 * Iterate thru all the inodes in the file system,
1795 * adjusting the corresponding dquot counters in core. 1794 * adjusting the corresponding dquot counters in core.
1796 */ 1795 */
1797 if ((error = xfs_bulkstat(mp, &lastino, &count, 1796 error = xfs_bulkstat(mp, &lastino, &count,
1798 xfs_qm_dqusage_adjust, NULL, 1797 xfs_qm_dqusage_adjust,
1799 structsz, NULL, BULKSTAT_FG_IGET, &done))) 1798 structsz, NULL, &done);
1799 if (error)
1800 break; 1800 break;
1801 1801
1802 } while (! done); 1802 } while (!done);
1803 1803
1804 /* 1804 /*
1805 * We've made all the changes that we need to make incore. 1805 * We've made all the changes that we need to make incore.
@@ -1887,14 +1887,14 @@ xfs_qm_init_quotainos(
1887 mp->m_sb.sb_uquotino != NULLFSINO) { 1887 mp->m_sb.sb_uquotino != NULLFSINO) {
1888 ASSERT(mp->m_sb.sb_uquotino > 0); 1888 ASSERT(mp->m_sb.sb_uquotino > 0);
1889 if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 1889 if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
1890 0, 0, &uip, 0))) 1890 0, 0, &uip)))
1891 return XFS_ERROR(error); 1891 return XFS_ERROR(error);
1892 } 1892 }
1893 if (XFS_IS_OQUOTA_ON(mp) && 1893 if (XFS_IS_OQUOTA_ON(mp) &&
1894 mp->m_sb.sb_gquotino != NULLFSINO) { 1894 mp->m_sb.sb_gquotino != NULLFSINO) {
1895 ASSERT(mp->m_sb.sb_gquotino > 0); 1895 ASSERT(mp->m_sb.sb_gquotino > 0);
1896 if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 1896 if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
1897 0, 0, &gip, 0))) { 1897 0, 0, &gip))) {
1898 if (uip) 1898 if (uip)
1899 IRELE(uip); 1899 IRELE(uip);
1900 return XFS_ERROR(error); 1900 return XFS_ERROR(error);
@@ -2117,7 +2117,10 @@ xfs_qm_shake_freelist(
2117 */ 2117 */
2118/* ARGSUSED */ 2118/* ARGSUSED */
2119STATIC int 2119STATIC int
2120xfs_qm_shake(int nr_to_scan, gfp_t gfp_mask) 2120xfs_qm_shake(
2121 struct shrinker *shrink,
2122 int nr_to_scan,
2123 gfp_t gfp_mask)
2121{ 2124{
2122 int ndqused, nfree, n; 2125 int ndqused, nfree, n;
2123 2126
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 92b002f1805f..b4487764e923 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -262,7 +262,7 @@ xfs_qm_scall_trunc_qfiles(
262 } 262 }
263 263
264 if ((flags & XFS_DQ_USER) && mp->m_sb.sb_uquotino != NULLFSINO) { 264 if ((flags & XFS_DQ_USER) && mp->m_sb.sb_uquotino != NULLFSINO) {
265 error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 0, 0, &qip, 0); 265 error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 0, 0, &qip);
266 if (!error) { 266 if (!error) {
267 error = xfs_truncate_file(mp, qip); 267 error = xfs_truncate_file(mp, qip);
268 IRELE(qip); 268 IRELE(qip);
@@ -271,7 +271,7 @@ xfs_qm_scall_trunc_qfiles(
271 271
272 if ((flags & (XFS_DQ_GROUP|XFS_DQ_PROJ)) && 272 if ((flags & (XFS_DQ_GROUP|XFS_DQ_PROJ)) &&
273 mp->m_sb.sb_gquotino != NULLFSINO) { 273 mp->m_sb.sb_gquotino != NULLFSINO) {
274 error2 = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 0, 0, &qip, 0); 274 error2 = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 0, 0, &qip);
275 if (!error2) { 275 if (!error2) {
276 error2 = xfs_truncate_file(mp, qip); 276 error2 = xfs_truncate_file(mp, qip);
277 IRELE(qip); 277 IRELE(qip);
@@ -417,12 +417,12 @@ xfs_qm_scall_getqstat(
417 } 417 }
418 if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) { 418 if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) {
419 if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 419 if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
420 0, 0, &uip, 0) == 0) 420 0, 0, &uip) == 0)
421 tempuqip = B_TRUE; 421 tempuqip = B_TRUE;
422 } 422 }
423 if (!gip && mp->m_sb.sb_gquotino != NULLFSINO) { 423 if (!gip && mp->m_sb.sb_gquotino != NULLFSINO) {
424 if (xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 424 if (xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
425 0, 0, &gip, 0) == 0) 425 0, 0, &gip) == 0)
426 tempgqip = B_TRUE; 426 tempgqip = B_TRUE;
427 } 427 }
428 if (uip) { 428 if (uip) {
@@ -1109,10 +1109,7 @@ xfs_qm_internalqcheck_adjust(
1109 xfs_ino_t ino, /* inode number to get data for */ 1109 xfs_ino_t ino, /* inode number to get data for */
1110 void __user *buffer, /* not used */ 1110 void __user *buffer, /* not used */
1111 int ubsize, /* not used */ 1111 int ubsize, /* not used */
1112 void *private_data, /* not used */
1113 xfs_daddr_t bno, /* starting block of inode cluster */
1114 int *ubused, /* not used */ 1112 int *ubused, /* not used */
1115 void *dip, /* not used */
1116 int *res) /* bulkstat result code */ 1113 int *res) /* bulkstat result code */
1117{ 1114{
1118 xfs_inode_t *ip; 1115 xfs_inode_t *ip;
@@ -1134,7 +1131,7 @@ xfs_qm_internalqcheck_adjust(
1134 ipreleased = B_FALSE; 1131 ipreleased = B_FALSE;
1135 again: 1132 again:
1136 lock_flags = XFS_ILOCK_SHARED; 1133 lock_flags = XFS_ILOCK_SHARED;
1137 if ((error = xfs_iget(mp, NULL, ino, 0, lock_flags, &ip, bno))) { 1134 if ((error = xfs_iget(mp, NULL, ino, 0, lock_flags, &ip))) {
1138 *res = BULKSTAT_RV_NOTHING; 1135 *res = BULKSTAT_RV_NOTHING;
1139 return (error); 1136 return (error);
1140 } 1137 }
@@ -1205,15 +1202,15 @@ xfs_qm_internalqcheck(
1205 * Iterate thru all the inodes in the file system, 1202 * Iterate thru all the inodes in the file system,
1206 * adjusting the corresponding dquot counters 1203 * adjusting the corresponding dquot counters
1207 */ 1204 */
1208 if ((error = xfs_bulkstat(mp, &lastino, &count, 1205 error = xfs_bulkstat(mp, &lastino, &count,
1209 xfs_qm_internalqcheck_adjust, NULL, 1206 xfs_qm_internalqcheck_adjust,
1210 0, NULL, BULKSTAT_FG_IGET, &done))) { 1207 0, NULL, &done);
1208 if (error) {
1209 cmn_err(CE_DEBUG, "Bulkstat returned error 0x%x", error);
1211 break; 1210 break;
1212 } 1211 }
1213 } while (! done); 1212 } while (!done);
1214 if (error) { 1213
1215 cmn_err(CE_DEBUG, "Bulkstat returned error 0x%x", error);
1216 }
1217 cmn_err(CE_DEBUG, "Checking results against system dquots"); 1214 cmn_err(CE_DEBUG, "Checking results against system dquots");
1218 for (i = 0; i < qmtest_hashmask; i++) { 1215 for (i = 0; i < qmtest_hashmask; i++) {
1219 xfs_dqtest_t *d, *n; 1216 xfs_dqtest_t *d, *n;
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index 401f364ad36c..4917d4eed4ed 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -227,7 +227,6 @@ typedef struct xfs_perag {
227 227
228 atomic_t pagf_fstrms; /* # of filestreams active in this AG */ 228 atomic_t pagf_fstrms; /* # of filestreams active in this AG */
229 229
230 int pag_ici_init; /* incore inode cache initialised */
231 rwlock_t pag_ici_lock; /* incore inode lock */ 230 rwlock_t pag_ici_lock; /* incore inode lock */
232 struct radix_tree_root pag_ici_root; /* incore inode cache root */ 231 struct radix_tree_root pag_ici_root; /* incore inode cache root */
233 int pag_ici_reclaimable; /* reclaimable inodes */ 232 int pag_ici_reclaimable; /* reclaimable inodes */
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 5bba29a07812..7f159d2a429a 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -69,7 +69,9 @@ xfs_swapext(
69 goto out; 69 goto out;
70 } 70 }
71 71
72 if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) { 72 if (!(file->f_mode & FMODE_WRITE) ||
73 !(file->f_mode & FMODE_READ) ||
74 (file->f_flags & O_APPEND)) {
73 error = XFS_ERROR(EBADF); 75 error = XFS_ERROR(EBADF);
74 goto out_put_file; 76 goto out_put_file;
75 } 77 }
@@ -81,6 +83,7 @@ xfs_swapext(
81 } 83 }
82 84
83 if (!(tmp_file->f_mode & FMODE_WRITE) || 85 if (!(tmp_file->f_mode & FMODE_WRITE) ||
86 !(tmp_file->f_mode & FMODE_READ) ||
84 (tmp_file->f_flags & O_APPEND)) { 87 (tmp_file->f_flags & O_APPEND)) {
85 error = XFS_ERROR(EBADF); 88 error = XFS_ERROR(EBADF);
86 goto out_put_tmp_file; 89 goto out_put_tmp_file;
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 9d884c127bb9..c7142a064c48 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -1203,6 +1203,63 @@ error0:
1203 return error; 1203 return error;
1204} 1204}
1205 1205
1206STATIC int
1207xfs_imap_lookup(
1208 struct xfs_mount *mp,
1209 struct xfs_trans *tp,
1210 xfs_agnumber_t agno,
1211 xfs_agino_t agino,
1212 xfs_agblock_t agbno,
1213 xfs_agblock_t *chunk_agbno,
1214 xfs_agblock_t *offset_agbno,
1215 int flags)
1216{
1217 struct xfs_inobt_rec_incore rec;
1218 struct xfs_btree_cur *cur;
1219 struct xfs_buf *agbp;
1220 xfs_agino_t startino;
1221 int error;
1222 int i;
1223
1224 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
1225 if (error) {
1226 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
1227 "xfs_ialloc_read_agi() returned "
1228 "error %d, agno %d",
1229 error, agno);
1230 return error;
1231 }
1232
1233 /*
1234 * derive and lookup the exact inode record for the given agino. If the
1235 * record cannot be found, then it's an invalid inode number and we
1236 * should abort.
1237 */
1238 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
1239 startino = agino & ~(XFS_IALLOC_INODES(mp) - 1);
1240 error = xfs_inobt_lookup(cur, startino, XFS_LOOKUP_EQ, &i);
1241 if (!error) {
1242 if (i)
1243 error = xfs_inobt_get_rec(cur, &rec, &i);
1244 if (!error && i == 0)
1245 error = EINVAL;
1246 }
1247
1248 xfs_trans_brelse(tp, agbp);
1249 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
1250 if (error)
1251 return error;
1252
1253 /* for untrusted inodes check it is allocated first */
1254 if ((flags & XFS_IGET_UNTRUSTED) &&
1255 (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino)))
1256 return EINVAL;
1257
1258 *chunk_agbno = XFS_AGINO_TO_AGBNO(mp, rec.ir_startino);
1259 *offset_agbno = agbno - *chunk_agbno;
1260 return 0;
1261}
1262
1206/* 1263/*
1207 * Return the location of the inode in imap, for mapping it into a buffer. 1264 * Return the location of the inode in imap, for mapping it into a buffer.
1208 */ 1265 */
@@ -1235,8 +1292,11 @@ xfs_imap(
1235 if (agno >= mp->m_sb.sb_agcount || agbno >= mp->m_sb.sb_agblocks || 1292 if (agno >= mp->m_sb.sb_agcount || agbno >= mp->m_sb.sb_agblocks ||
1236 ino != XFS_AGINO_TO_INO(mp, agno, agino)) { 1293 ino != XFS_AGINO_TO_INO(mp, agno, agino)) {
1237#ifdef DEBUG 1294#ifdef DEBUG
1238 /* no diagnostics for bulkstat, ino comes from userspace */ 1295 /*
1239 if (flags & XFS_IGET_BULKSTAT) 1296 * Don't output diagnostic information for untrusted inodes
1297 * as they can be invalid without implying corruption.
1298 */
1299 if (flags & XFS_IGET_UNTRUSTED)
1240 return XFS_ERROR(EINVAL); 1300 return XFS_ERROR(EINVAL);
1241 if (agno >= mp->m_sb.sb_agcount) { 1301 if (agno >= mp->m_sb.sb_agcount) {
1242 xfs_fs_cmn_err(CE_ALERT, mp, 1302 xfs_fs_cmn_err(CE_ALERT, mp,
@@ -1263,6 +1323,23 @@ xfs_imap(
1263 return XFS_ERROR(EINVAL); 1323 return XFS_ERROR(EINVAL);
1264 } 1324 }
1265 1325
1326 blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog;
1327
1328 /*
1329 * For bulkstat and handle lookups, we have an untrusted inode number
1330 * that we have to verify is valid. We cannot do this just by reading
1331 * the inode buffer as it may have been unlinked and removed leaving
1332 * inodes in stale state on disk. Hence we have to do a btree lookup
1333 * in all cases where an untrusted inode number is passed.
1334 */
1335 if (flags & XFS_IGET_UNTRUSTED) {
1336 error = xfs_imap_lookup(mp, tp, agno, agino, agbno,
1337 &chunk_agbno, &offset_agbno, flags);
1338 if (error)
1339 return error;
1340 goto out_map;
1341 }
1342
1266 /* 1343 /*
1267 * If the inode cluster size is the same as the blocksize or 1344 * If the inode cluster size is the same as the blocksize or
1268 * smaller we get to the buffer by simple arithmetics. 1345 * smaller we get to the buffer by simple arithmetics.
@@ -1277,24 +1354,6 @@ xfs_imap(
1277 return 0; 1354 return 0;
1278 } 1355 }
1279 1356
1280 blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog;
1281
1282 /*
1283 * If we get a block number passed from bulkstat we can use it to
1284 * find the buffer easily.
1285 */
1286 if (imap->im_blkno) {
1287 offset = XFS_INO_TO_OFFSET(mp, ino);
1288 ASSERT(offset < mp->m_sb.sb_inopblock);
1289
1290 cluster_agbno = xfs_daddr_to_agbno(mp, imap->im_blkno);
1291 offset += (agbno - cluster_agbno) * mp->m_sb.sb_inopblock;
1292
1293 imap->im_len = XFS_FSB_TO_BB(mp, blks_per_cluster);
1294 imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog);
1295 return 0;
1296 }
1297
1298 /* 1357 /*
1299 * If the inode chunks are aligned then use simple maths to 1358 * If the inode chunks are aligned then use simple maths to
1300 * find the location. Otherwise we have to do a btree 1359 * find the location. Otherwise we have to do a btree
@@ -1304,50 +1363,13 @@ xfs_imap(
1304 offset_agbno = agbno & mp->m_inoalign_mask; 1363 offset_agbno = agbno & mp->m_inoalign_mask;
1305 chunk_agbno = agbno - offset_agbno; 1364 chunk_agbno = agbno - offset_agbno;
1306 } else { 1365 } else {
1307 xfs_btree_cur_t *cur; /* inode btree cursor */ 1366 error = xfs_imap_lookup(mp, tp, agno, agino, agbno,
1308 xfs_inobt_rec_incore_t chunk_rec; 1367 &chunk_agbno, &offset_agbno, flags);
1309 xfs_buf_t *agbp; /* agi buffer */
1310 int i; /* temp state */
1311
1312 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
1313 if (error) {
1314 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
1315 "xfs_ialloc_read_agi() returned "
1316 "error %d, agno %d",
1317 error, agno);
1318 return error;
1319 }
1320
1321 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
1322 error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i);
1323 if (error) {
1324 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
1325 "xfs_inobt_lookup() failed");
1326 goto error0;
1327 }
1328
1329 error = xfs_inobt_get_rec(cur, &chunk_rec, &i);
1330 if (error) {
1331 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
1332 "xfs_inobt_get_rec() failed");
1333 goto error0;
1334 }
1335 if (i == 0) {
1336#ifdef DEBUG
1337 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
1338 "xfs_inobt_get_rec() failed");
1339#endif /* DEBUG */
1340 error = XFS_ERROR(EINVAL);
1341 }
1342 error0:
1343 xfs_trans_brelse(tp, agbp);
1344 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
1345 if (error) 1368 if (error)
1346 return error; 1369 return error;
1347 chunk_agbno = XFS_AGINO_TO_AGBNO(mp, chunk_rec.ir_startino);
1348 offset_agbno = agbno - chunk_agbno;
1349 } 1370 }
1350 1371
1372out_map:
1351 ASSERT(agbno >= chunk_agbno); 1373 ASSERT(agbno >= chunk_agbno);
1352 cluster_agbno = chunk_agbno + 1374 cluster_agbno = chunk_agbno +
1353 ((offset_agbno / blks_per_cluster) * blks_per_cluster); 1375 ((offset_agbno / blks_per_cluster) * blks_per_cluster);
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 6845db90818f..8f8b91be2c99 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -259,7 +259,6 @@ xfs_iget_cache_miss(
259 xfs_trans_t *tp, 259 xfs_trans_t *tp,
260 xfs_ino_t ino, 260 xfs_ino_t ino,
261 struct xfs_inode **ipp, 261 struct xfs_inode **ipp,
262 xfs_daddr_t bno,
263 int flags, 262 int flags,
264 int lock_flags) 263 int lock_flags)
265{ 264{
@@ -272,7 +271,7 @@ xfs_iget_cache_miss(
272 if (!ip) 271 if (!ip)
273 return ENOMEM; 272 return ENOMEM;
274 273
275 error = xfs_iread(mp, tp, ip, bno, flags); 274 error = xfs_iread(mp, tp, ip, flags);
276 if (error) 275 if (error)
277 goto out_destroy; 276 goto out_destroy;
278 277
@@ -358,8 +357,6 @@ out_destroy:
358 * within the file system for the inode being requested. 357 * within the file system for the inode being requested.
359 * lock_flags -- flags indicating how to lock the inode. See the comment 358 * lock_flags -- flags indicating how to lock the inode. See the comment
360 * for xfs_ilock() for a list of valid values. 359 * for xfs_ilock() for a list of valid values.
361 * bno -- the block number starting the buffer containing the inode,
362 * if known (as by bulkstat), else 0.
363 */ 360 */
364int 361int
365xfs_iget( 362xfs_iget(
@@ -368,8 +365,7 @@ xfs_iget(
368 xfs_ino_t ino, 365 xfs_ino_t ino,
369 uint flags, 366 uint flags,
370 uint lock_flags, 367 uint lock_flags,
371 xfs_inode_t **ipp, 368 xfs_inode_t **ipp)
372 xfs_daddr_t bno)
373{ 369{
374 xfs_inode_t *ip; 370 xfs_inode_t *ip;
375 int error; 371 int error;
@@ -382,9 +378,6 @@ xfs_iget(
382 378
383 /* get the perag structure and ensure that it's inode capable */ 379 /* get the perag structure and ensure that it's inode capable */
384 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino)); 380 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino));
385 if (!pag->pagi_inodeok)
386 return EINVAL;
387 ASSERT(pag->pag_ici_init);
388 agino = XFS_INO_TO_AGINO(mp, ino); 381 agino = XFS_INO_TO_AGINO(mp, ino);
389 382
390again: 383again:
@@ -400,7 +393,7 @@ again:
400 read_unlock(&pag->pag_ici_lock); 393 read_unlock(&pag->pag_ici_lock);
401 XFS_STATS_INC(xs_ig_missed); 394 XFS_STATS_INC(xs_ig_missed);
402 395
403 error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, bno, 396 error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip,
404 flags, lock_flags); 397 flags, lock_flags);
405 if (error) 398 if (error)
406 goto out_error_or_again; 399 goto out_error_or_again;
@@ -744,30 +737,24 @@ xfs_ilock_demote(
744} 737}
745 738
746#ifdef DEBUG 739#ifdef DEBUG
747/*
748 * Debug-only routine, without additional rw_semaphore APIs, we can
749 * now only answer requests regarding whether we hold the lock for write
750 * (reader state is outside our visibility, we only track writer state).
751 *
752 * Note: this means !xfs_isilocked would give false positives, so don't do that.
753 */
754int 740int
755xfs_isilocked( 741xfs_isilocked(
756 xfs_inode_t *ip, 742 xfs_inode_t *ip,
757 uint lock_flags) 743 uint lock_flags)
758{ 744{
759 if ((lock_flags & (XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)) == 745 if (lock_flags & (XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)) {
760 XFS_ILOCK_EXCL) { 746 if (!(lock_flags & XFS_ILOCK_SHARED))
761 if (!ip->i_lock.mr_writer) 747 return !!ip->i_lock.mr_writer;
762 return 0; 748 return rwsem_is_locked(&ip->i_lock.mr_lock);
763 } 749 }
764 750
765 if ((lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) == 751 if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) {
766 XFS_IOLOCK_EXCL) { 752 if (!(lock_flags & XFS_IOLOCK_SHARED))
767 if (!ip->i_iolock.mr_writer) 753 return !!ip->i_iolock.mr_writer;
768 return 0; 754 return rwsem_is_locked(&ip->i_iolock.mr_lock);
769 } 755 }
770 756
771 return 1; 757 ASSERT(0);
758 return 0;
772} 759}
773#endif 760#endif
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 8cd6e8d8fe9c..b76a829d7e20 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -177,7 +177,7 @@ xfs_imap_to_bp(
177 if (unlikely(XFS_TEST_ERROR(!di_ok, mp, 177 if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
178 XFS_ERRTAG_ITOBP_INOTOBP, 178 XFS_ERRTAG_ITOBP_INOTOBP,
179 XFS_RANDOM_ITOBP_INOTOBP))) { 179 XFS_RANDOM_ITOBP_INOTOBP))) {
180 if (iget_flags & XFS_IGET_BULKSTAT) { 180 if (iget_flags & XFS_IGET_UNTRUSTED) {
181 xfs_trans_brelse(tp, bp); 181 xfs_trans_brelse(tp, bp);
182 return XFS_ERROR(EINVAL); 182 return XFS_ERROR(EINVAL);
183 } 183 }
@@ -787,7 +787,6 @@ xfs_iread(
787 xfs_mount_t *mp, 787 xfs_mount_t *mp,
788 xfs_trans_t *tp, 788 xfs_trans_t *tp,
789 xfs_inode_t *ip, 789 xfs_inode_t *ip,
790 xfs_daddr_t bno,
791 uint iget_flags) 790 uint iget_flags)
792{ 791{
793 xfs_buf_t *bp; 792 xfs_buf_t *bp;
@@ -797,11 +796,9 @@ xfs_iread(
797 /* 796 /*
798 * Fill in the location information in the in-core inode. 797 * Fill in the location information in the in-core inode.
799 */ 798 */
800 ip->i_imap.im_blkno = bno;
801 error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags); 799 error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags);
802 if (error) 800 if (error)
803 return error; 801 return error;
804 ASSERT(bno == 0 || bno == ip->i_imap.im_blkno);
805 802
806 /* 803 /*
807 * Get pointers to the on-disk inode and the buffer containing it. 804 * Get pointers to the on-disk inode and the buffer containing it.
@@ -1940,10 +1937,10 @@ xfs_ifree_cluster(
1940 int blks_per_cluster; 1937 int blks_per_cluster;
1941 int nbufs; 1938 int nbufs;
1942 int ninodes; 1939 int ninodes;
1943 int i, j, found, pre_flushed; 1940 int i, j;
1944 xfs_daddr_t blkno; 1941 xfs_daddr_t blkno;
1945 xfs_buf_t *bp; 1942 xfs_buf_t *bp;
1946 xfs_inode_t *ip, **ip_found; 1943 xfs_inode_t *ip;
1947 xfs_inode_log_item_t *iip; 1944 xfs_inode_log_item_t *iip;
1948 xfs_log_item_t *lip; 1945 xfs_log_item_t *lip;
1949 struct xfs_perag *pag; 1946 struct xfs_perag *pag;
@@ -1960,114 +1957,97 @@ xfs_ifree_cluster(
1960 nbufs = XFS_IALLOC_BLOCKS(mp) / blks_per_cluster; 1957 nbufs = XFS_IALLOC_BLOCKS(mp) / blks_per_cluster;
1961 } 1958 }
1962 1959
1963 ip_found = kmem_alloc(ninodes * sizeof(xfs_inode_t *), KM_NOFS);
1964
1965 for (j = 0; j < nbufs; j++, inum += ninodes) { 1960 for (j = 0; j < nbufs; j++, inum += ninodes) {
1961 int found = 0;
1962
1966 blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum), 1963 blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum),
1967 XFS_INO_TO_AGBNO(mp, inum)); 1964 XFS_INO_TO_AGBNO(mp, inum));
1968 1965
1966 /*
1967 * We obtain and lock the backing buffer first in the process
1968 * here, as we have to ensure that any dirty inode that we
1969 * can't get the flush lock on is attached to the buffer.
1970 * If we scan the in-memory inodes first, then buffer IO can
1971 * complete before we get a lock on it, and hence we may fail
1972 * to mark all the active inodes on the buffer stale.
1973 */
1974 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
1975 mp->m_bsize * blks_per_cluster,
1976 XBF_LOCK);
1977
1978 /*
1979 * Walk the inodes already attached to the buffer and mark them
1980 * stale. These will all have the flush locks held, so an
1981 * in-memory inode walk can't lock them.
1982 */
1983 lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
1984 while (lip) {
1985 if (lip->li_type == XFS_LI_INODE) {
1986 iip = (xfs_inode_log_item_t *)lip;
1987 ASSERT(iip->ili_logged == 1);
1988 lip->li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*)) xfs_istale_done;
1989 xfs_trans_ail_copy_lsn(mp->m_ail,
1990 &iip->ili_flush_lsn,
1991 &iip->ili_item.li_lsn);
1992 xfs_iflags_set(iip->ili_inode, XFS_ISTALE);
1993 found++;
1994 }
1995 lip = lip->li_bio_list;
1996 }
1969 1997
1970 /* 1998 /*
1971 * Look for each inode in memory and attempt to lock it, 1999 * For each inode in memory attempt to add it to the inode
1972 * we can be racing with flush and tail pushing here. 2000 * buffer and set it up for being staled on buffer IO
1973 * any inode we get the locks on, add to an array of 2001 * completion. This is safe as we've locked out tail pushing
1974 * inode items to process later. 2002 * and flushing by locking the buffer.
1975 * 2003 *
1976 * The get the buffer lock, we could beat a flush 2004 * We have already marked every inode that was part of a
1977 * or tail pushing thread to the lock here, in which 2005 * transaction stale above, which means there is no point in
1978 * case they will go looking for the inode buffer 2006 * even trying to lock them.
1979 * and fail, we need some other form of interlock
1980 * here.
1981 */ 2007 */
1982 found = 0;
1983 for (i = 0; i < ninodes; i++) { 2008 for (i = 0; i < ninodes; i++) {
1984 read_lock(&pag->pag_ici_lock); 2009 read_lock(&pag->pag_ici_lock);
1985 ip = radix_tree_lookup(&pag->pag_ici_root, 2010 ip = radix_tree_lookup(&pag->pag_ici_root,
1986 XFS_INO_TO_AGINO(mp, (inum + i))); 2011 XFS_INO_TO_AGINO(mp, (inum + i)));
1987 2012
1988 /* Inode not in memory or we found it already, 2013 /* Inode not in memory or stale, nothing to do */
1989 * nothing to do
1990 */
1991 if (!ip || xfs_iflags_test(ip, XFS_ISTALE)) { 2014 if (!ip || xfs_iflags_test(ip, XFS_ISTALE)) {
1992 read_unlock(&pag->pag_ici_lock); 2015 read_unlock(&pag->pag_ici_lock);
1993 continue; 2016 continue;
1994 } 2017 }
1995 2018
1996 if (xfs_inode_clean(ip)) { 2019 /* don't try to lock/unlock the current inode */
1997 read_unlock(&pag->pag_ici_lock); 2020 if (ip != free_ip &&
1998 continue; 2021 !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
1999 }
2000
2001 /* If we can get the locks then add it to the
2002 * list, otherwise by the time we get the bp lock
2003 * below it will already be attached to the
2004 * inode buffer.
2005 */
2006
2007 /* This inode will already be locked - by us, lets
2008 * keep it that way.
2009 */
2010
2011 if (ip == free_ip) {
2012 if (xfs_iflock_nowait(ip)) {
2013 xfs_iflags_set(ip, XFS_ISTALE);
2014 if (xfs_inode_clean(ip)) {
2015 xfs_ifunlock(ip);
2016 } else {
2017 ip_found[found++] = ip;
2018 }
2019 }
2020 read_unlock(&pag->pag_ici_lock); 2022 read_unlock(&pag->pag_ici_lock);
2021 continue; 2023 continue;
2022 } 2024 }
2025 read_unlock(&pag->pag_ici_lock);
2023 2026
2024 if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { 2027 if (!xfs_iflock_nowait(ip)) {
2025 if (xfs_iflock_nowait(ip)) { 2028 if (ip != free_ip)
2026 xfs_iflags_set(ip, XFS_ISTALE);
2027
2028 if (xfs_inode_clean(ip)) {
2029 xfs_ifunlock(ip);
2030 xfs_iunlock(ip, XFS_ILOCK_EXCL);
2031 } else {
2032 ip_found[found++] = ip;
2033 }
2034 } else {
2035 xfs_iunlock(ip, XFS_ILOCK_EXCL); 2029 xfs_iunlock(ip, XFS_ILOCK_EXCL);
2036 } 2030 continue;
2037 } 2031 }
2038 read_unlock(&pag->pag_ici_lock);
2039 }
2040 2032
2041 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, 2033 xfs_iflags_set(ip, XFS_ISTALE);
2042 mp->m_bsize * blks_per_cluster, 2034 if (xfs_inode_clean(ip)) {
2043 XBF_LOCK); 2035 ASSERT(ip != free_ip);
2044 2036 xfs_ifunlock(ip);
2045 pre_flushed = 0; 2037 xfs_iunlock(ip, XFS_ILOCK_EXCL);
2046 lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); 2038 continue;
2047 while (lip) {
2048 if (lip->li_type == XFS_LI_INODE) {
2049 iip = (xfs_inode_log_item_t *)lip;
2050 ASSERT(iip->ili_logged == 1);
2051 lip->li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*)) xfs_istale_done;
2052 xfs_trans_ail_copy_lsn(mp->m_ail,
2053 &iip->ili_flush_lsn,
2054 &iip->ili_item.li_lsn);
2055 xfs_iflags_set(iip->ili_inode, XFS_ISTALE);
2056 pre_flushed++;
2057 } 2039 }
2058 lip = lip->li_bio_list;
2059 }
2060 2040
2061 for (i = 0; i < found; i++) {
2062 ip = ip_found[i];
2063 iip = ip->i_itemp; 2041 iip = ip->i_itemp;
2064
2065 if (!iip) { 2042 if (!iip) {
2043 /* inode with unlogged changes only */
2044 ASSERT(ip != free_ip);
2066 ip->i_update_core = 0; 2045 ip->i_update_core = 0;
2067 xfs_ifunlock(ip); 2046 xfs_ifunlock(ip);
2068 xfs_iunlock(ip, XFS_ILOCK_EXCL); 2047 xfs_iunlock(ip, XFS_ILOCK_EXCL);
2069 continue; 2048 continue;
2070 } 2049 }
2050 found++;
2071 2051
2072 iip->ili_last_fields = iip->ili_format.ilf_fields; 2052 iip->ili_last_fields = iip->ili_format.ilf_fields;
2073 iip->ili_format.ilf_fields = 0; 2053 iip->ili_format.ilf_fields = 0;
@@ -2078,17 +2058,16 @@ xfs_ifree_cluster(
2078 xfs_buf_attach_iodone(bp, 2058 xfs_buf_attach_iodone(bp,
2079 (void(*)(xfs_buf_t*,xfs_log_item_t*)) 2059 (void(*)(xfs_buf_t*,xfs_log_item_t*))
2080 xfs_istale_done, (xfs_log_item_t *)iip); 2060 xfs_istale_done, (xfs_log_item_t *)iip);
2081 if (ip != free_ip) { 2061
2062 if (ip != free_ip)
2082 xfs_iunlock(ip, XFS_ILOCK_EXCL); 2063 xfs_iunlock(ip, XFS_ILOCK_EXCL);
2083 }
2084 } 2064 }
2085 2065
2086 if (found || pre_flushed) 2066 if (found)
2087 xfs_trans_stale_inode_buf(tp, bp); 2067 xfs_trans_stale_inode_buf(tp, bp);
2088 xfs_trans_binval(tp, bp); 2068 xfs_trans_binval(tp, bp);
2089 } 2069 }
2090 2070
2091 kmem_free(ip_found);
2092 xfs_perag_put(pag); 2071 xfs_perag_put(pag);
2093} 2072}
2094 2073
@@ -2649,8 +2628,6 @@ xfs_iflush_cluster(
2649 int i; 2628 int i;
2650 2629
2651 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); 2630 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
2652 ASSERT(pag->pagi_inodeok);
2653 ASSERT(pag->pag_ici_init);
2654 2631
2655 inodes_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog; 2632 inodes_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog;
2656 ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *); 2633 ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *);
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 9965e40a4615..78550df13cd6 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -442,7 +442,7 @@ static inline void xfs_ifunlock(xfs_inode_t *ip)
442 * xfs_iget.c prototypes. 442 * xfs_iget.c prototypes.
443 */ 443 */
444int xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, 444int xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
445 uint, uint, xfs_inode_t **, xfs_daddr_t); 445 uint, uint, xfs_inode_t **);
446void xfs_iput(xfs_inode_t *, uint); 446void xfs_iput(xfs_inode_t *, uint);
447void xfs_iput_new(xfs_inode_t *, uint); 447void xfs_iput_new(xfs_inode_t *, uint);
448void xfs_ilock(xfs_inode_t *, uint); 448void xfs_ilock(xfs_inode_t *, uint);
@@ -500,7 +500,7 @@ do { \
500 * Flags for xfs_iget() 500 * Flags for xfs_iget()
501 */ 501 */
502#define XFS_IGET_CREATE 0x1 502#define XFS_IGET_CREATE 0x1
503#define XFS_IGET_BULKSTAT 0x2 503#define XFS_IGET_UNTRUSTED 0x2
504 504
505int xfs_inotobp(struct xfs_mount *, struct xfs_trans *, 505int xfs_inotobp(struct xfs_mount *, struct xfs_trans *,
506 xfs_ino_t, struct xfs_dinode **, 506 xfs_ino_t, struct xfs_dinode **,
@@ -509,7 +509,7 @@ int xfs_itobp(struct xfs_mount *, struct xfs_trans *,
509 struct xfs_inode *, struct xfs_dinode **, 509 struct xfs_inode *, struct xfs_dinode **,
510 struct xfs_buf **, uint); 510 struct xfs_buf **, uint);
511int xfs_iread(struct xfs_mount *, struct xfs_trans *, 511int xfs_iread(struct xfs_mount *, struct xfs_trans *,
512 struct xfs_inode *, xfs_daddr_t, uint); 512 struct xfs_inode *, uint);
513void xfs_dinode_to_disk(struct xfs_dinode *, 513void xfs_dinode_to_disk(struct xfs_dinode *,
514 struct xfs_icdinode *); 514 struct xfs_icdinode *);
515void xfs_idestroy_fork(struct xfs_inode *, int); 515void xfs_idestroy_fork(struct xfs_inode *, int);
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index b1b801e4a28e..2b86f8610512 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -49,24 +49,40 @@ xfs_internal_inum(
49 (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino))); 49 (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino)));
50} 50}
51 51
52STATIC int 52/*
53xfs_bulkstat_one_iget( 53 * Return stat information for one inode.
54 xfs_mount_t *mp, /* mount point for filesystem */ 54 * Return 0 if ok, else errno.
55 xfs_ino_t ino, /* inode number to get data for */ 55 */
56 xfs_daddr_t bno, /* starting bno of inode cluster */ 56int
57 xfs_bstat_t *buf, /* return buffer */ 57xfs_bulkstat_one_int(
58 int *stat) /* BULKSTAT_RV_... */ 58 struct xfs_mount *mp, /* mount point for filesystem */
59 xfs_ino_t ino, /* inode to get data for */
60 void __user *buffer, /* buffer to place output in */
61 int ubsize, /* size of buffer */
62 bulkstat_one_fmt_pf formatter, /* formatter, copy to user */
63 int *ubused, /* bytes used by me */
64 int *stat) /* BULKSTAT_RV_... */
59{ 65{
60 xfs_icdinode_t *dic; /* dinode core info pointer */ 66 struct xfs_icdinode *dic; /* dinode core info pointer */
61 xfs_inode_t *ip; /* incore inode pointer */ 67 struct xfs_inode *ip; /* incore inode pointer */
62 struct inode *inode; 68 struct inode *inode;
63 int error; 69 struct xfs_bstat *buf; /* return buffer */
70 int error = 0; /* error value */
71
72 *stat = BULKSTAT_RV_NOTHING;
73
74 if (!buffer || xfs_internal_inum(mp, ino))
75 return XFS_ERROR(EINVAL);
76
77 buf = kmem_alloc(sizeof(*buf), KM_SLEEP | KM_MAYFAIL);
78 if (!buf)
79 return XFS_ERROR(ENOMEM);
64 80
65 error = xfs_iget(mp, NULL, ino, 81 error = xfs_iget(mp, NULL, ino,
66 XFS_IGET_BULKSTAT, XFS_ILOCK_SHARED, &ip, bno); 82 XFS_IGET_UNTRUSTED, XFS_ILOCK_SHARED, &ip);
67 if (error) { 83 if (error) {
68 *stat = BULKSTAT_RV_NOTHING; 84 *stat = BULKSTAT_RV_NOTHING;
69 return error; 85 goto out_free;
70 } 86 }
71 87
72 ASSERT(ip != NULL); 88 ASSERT(ip != NULL);
@@ -127,77 +143,16 @@ xfs_bulkstat_one_iget(
127 buf->bs_blocks = dic->di_nblocks + ip->i_delayed_blks; 143 buf->bs_blocks = dic->di_nblocks + ip->i_delayed_blks;
128 break; 144 break;
129 } 145 }
130
131 xfs_iput(ip, XFS_ILOCK_SHARED); 146 xfs_iput(ip, XFS_ILOCK_SHARED);
132 return error;
133}
134 147
135STATIC void 148 error = formatter(buffer, ubsize, ubused, buf);
136xfs_bulkstat_one_dinode(
137 xfs_mount_t *mp, /* mount point for filesystem */
138 xfs_ino_t ino, /* inode number to get data for */
139 xfs_dinode_t *dic, /* dinode inode pointer */
140 xfs_bstat_t *buf) /* return buffer */
141{
142 /*
143 * The inode format changed when we moved the link count and
144 * made it 32 bits long. If this is an old format inode,
145 * convert it in memory to look like a new one. If it gets
146 * flushed to disk we will convert back before flushing or
147 * logging it. We zero out the new projid field and the old link
148 * count field. We'll handle clearing the pad field (the remains
149 * of the old uuid field) when we actually convert the inode to
150 * the new format. We don't change the version number so that we
151 * can distinguish this from a real new format inode.
152 */
153 if (dic->di_version == 1) {
154 buf->bs_nlink = be16_to_cpu(dic->di_onlink);
155 buf->bs_projid = 0;
156 } else {
157 buf->bs_nlink = be32_to_cpu(dic->di_nlink);
158 buf->bs_projid = be16_to_cpu(dic->di_projid);
159 }
160 149
161 buf->bs_ino = ino; 150 if (!error)
162 buf->bs_mode = be16_to_cpu(dic->di_mode); 151 *stat = BULKSTAT_RV_DIDONE;
163 buf->bs_uid = be32_to_cpu(dic->di_uid);
164 buf->bs_gid = be32_to_cpu(dic->di_gid);
165 buf->bs_size = be64_to_cpu(dic->di_size);
166 buf->bs_atime.tv_sec = be32_to_cpu(dic->di_atime.t_sec);
167 buf->bs_atime.tv_nsec = be32_to_cpu(dic->di_atime.t_nsec);
168 buf->bs_mtime.tv_sec = be32_to_cpu(dic->di_mtime.t_sec);
169 buf->bs_mtime.tv_nsec = be32_to_cpu(dic->di_mtime.t_nsec);
170 buf->bs_ctime.tv_sec = be32_to_cpu(dic->di_ctime.t_sec);
171 buf->bs_ctime.tv_nsec = be32_to_cpu(dic->di_ctime.t_nsec);
172 buf->bs_xflags = xfs_dic2xflags(dic);
173 buf->bs_extsize = be32_to_cpu(dic->di_extsize) << mp->m_sb.sb_blocklog;
174 buf->bs_extents = be32_to_cpu(dic->di_nextents);
175 buf->bs_gen = be32_to_cpu(dic->di_gen);
176 memset(buf->bs_pad, 0, sizeof(buf->bs_pad));
177 buf->bs_dmevmask = be32_to_cpu(dic->di_dmevmask);
178 buf->bs_dmstate = be16_to_cpu(dic->di_dmstate);
179 buf->bs_aextents = be16_to_cpu(dic->di_anextents);
180 buf->bs_forkoff = XFS_DFORK_BOFF(dic);
181 152
182 switch (dic->di_format) { 153 out_free:
183 case XFS_DINODE_FMT_DEV: 154 kmem_free(buf);
184 buf->bs_rdev = xfs_dinode_get_rdev(dic); 155 return error;
185 buf->bs_blksize = BLKDEV_IOSIZE;
186 buf->bs_blocks = 0;
187 break;
188 case XFS_DINODE_FMT_LOCAL:
189 case XFS_DINODE_FMT_UUID:
190 buf->bs_rdev = 0;
191 buf->bs_blksize = mp->m_sb.sb_blocksize;
192 buf->bs_blocks = 0;
193 break;
194 case XFS_DINODE_FMT_EXTENTS:
195 case XFS_DINODE_FMT_BTREE:
196 buf->bs_rdev = 0;
197 buf->bs_blksize = mp->m_sb.sb_blocksize;
198 buf->bs_blocks = be64_to_cpu(dic->di_nblocks);
199 break;
200 }
201} 156}
202 157
203/* Return 0 on success or positive error */ 158/* Return 0 on success or positive error */
@@ -217,118 +172,17 @@ xfs_bulkstat_one_fmt(
217 return 0; 172 return 0;
218} 173}
219 174
220/*
221 * Return stat information for one inode.
222 * Return 0 if ok, else errno.
223 */
224int /* error status */
225xfs_bulkstat_one_int(
226 xfs_mount_t *mp, /* mount point for filesystem */
227 xfs_ino_t ino, /* inode number to get data for */
228 void __user *buffer, /* buffer to place output in */
229 int ubsize, /* size of buffer */
230 bulkstat_one_fmt_pf formatter, /* formatter, copy to user */
231 xfs_daddr_t bno, /* starting bno of inode cluster */
232 int *ubused, /* bytes used by me */
233 void *dibuff, /* on-disk inode buffer */
234 int *stat) /* BULKSTAT_RV_... */
235{
236 xfs_bstat_t *buf; /* return buffer */
237 int error = 0; /* error value */
238 xfs_dinode_t *dip; /* dinode inode pointer */
239
240 dip = (xfs_dinode_t *)dibuff;
241 *stat = BULKSTAT_RV_NOTHING;
242
243 if (!buffer || xfs_internal_inum(mp, ino))
244 return XFS_ERROR(EINVAL);
245
246 buf = kmem_alloc(sizeof(*buf), KM_SLEEP);
247
248 if (dip == NULL) {
249 /* We're not being passed a pointer to a dinode. This happens
250 * if BULKSTAT_FG_IGET is selected. Do the iget.
251 */
252 error = xfs_bulkstat_one_iget(mp, ino, bno, buf, stat);
253 if (error)
254 goto out_free;
255 } else {
256 xfs_bulkstat_one_dinode(mp, ino, dip, buf);
257 }
258
259 error = formatter(buffer, ubsize, ubused, buf);
260 if (error)
261 goto out_free;
262
263 *stat = BULKSTAT_RV_DIDONE;
264
265 out_free:
266 kmem_free(buf);
267 return error;
268}
269
270int 175int
271xfs_bulkstat_one( 176xfs_bulkstat_one(
272 xfs_mount_t *mp, /* mount point for filesystem */ 177 xfs_mount_t *mp, /* mount point for filesystem */
273 xfs_ino_t ino, /* inode number to get data for */ 178 xfs_ino_t ino, /* inode number to get data for */
274 void __user *buffer, /* buffer to place output in */ 179 void __user *buffer, /* buffer to place output in */
275 int ubsize, /* size of buffer */ 180 int ubsize, /* size of buffer */
276 void *private_data, /* my private data */
277 xfs_daddr_t bno, /* starting bno of inode cluster */
278 int *ubused, /* bytes used by me */ 181 int *ubused, /* bytes used by me */
279 void *dibuff, /* on-disk inode buffer */
280 int *stat) /* BULKSTAT_RV_... */ 182 int *stat) /* BULKSTAT_RV_... */
281{ 183{
282 return xfs_bulkstat_one_int(mp, ino, buffer, ubsize, 184 return xfs_bulkstat_one_int(mp, ino, buffer, ubsize,
283 xfs_bulkstat_one_fmt, bno, 185 xfs_bulkstat_one_fmt, ubused, stat);
284 ubused, dibuff, stat);
285}
286
287/*
288 * Test to see whether we can use the ondisk inode directly, based
289 * on the given bulkstat flags, filling in dipp accordingly.
290 * Returns zero if the inode is dodgey.
291 */
292STATIC int
293xfs_bulkstat_use_dinode(
294 xfs_mount_t *mp,
295 int flags,
296 xfs_buf_t *bp,
297 int clustidx,
298 xfs_dinode_t **dipp)
299{
300 xfs_dinode_t *dip;
301 unsigned int aformat;
302
303 *dipp = NULL;
304 if (!bp || (flags & BULKSTAT_FG_IGET))
305 return 1;
306 dip = (xfs_dinode_t *)
307 xfs_buf_offset(bp, clustidx << mp->m_sb.sb_inodelog);
308 /*
309 * Check the buffer containing the on-disk inode for di_mode == 0.
310 * This is to prevent xfs_bulkstat from picking up just reclaimed
311 * inodes that have their in-core state initialized but not flushed
312 * to disk yet. This is a temporary hack that would require a proper
313 * fix in the future.
314 */
315 if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC ||
316 !XFS_DINODE_GOOD_VERSION(dip->di_version) ||
317 !dip->di_mode)
318 return 0;
319 if (flags & BULKSTAT_FG_QUICK) {
320 *dipp = dip;
321 return 1;
322 }
323 /* BULKSTAT_FG_INLINE: if attr fork is local, or not there, use it */
324 aformat = dip->di_aformat;
325 if ((XFS_DFORK_Q(dip) == 0) ||
326 (aformat == XFS_DINODE_FMT_LOCAL) ||
327 (aformat == XFS_DINODE_FMT_EXTENTS && !dip->di_anextents)) {
328 *dipp = dip;
329 return 1;
330 }
331 return 1;
332} 186}
333 187
334#define XFS_BULKSTAT_UBLEFT(ubleft) ((ubleft) >= statstruct_size) 188#define XFS_BULKSTAT_UBLEFT(ubleft) ((ubleft) >= statstruct_size)
@@ -342,10 +196,8 @@ xfs_bulkstat(
342 xfs_ino_t *lastinop, /* last inode returned */ 196 xfs_ino_t *lastinop, /* last inode returned */
343 int *ubcountp, /* size of buffer/count returned */ 197 int *ubcountp, /* size of buffer/count returned */
344 bulkstat_one_pf formatter, /* func that'd fill a single buf */ 198 bulkstat_one_pf formatter, /* func that'd fill a single buf */
345 void *private_data,/* private data for formatter */
346 size_t statstruct_size, /* sizeof struct filling */ 199 size_t statstruct_size, /* sizeof struct filling */
347 char __user *ubuffer, /* buffer with inode stats */ 200 char __user *ubuffer, /* buffer with inode stats */
348 int flags, /* defined in xfs_itable.h */
349 int *done) /* 1 if there are more stats to get */ 201 int *done) /* 1 if there are more stats to get */
350{ 202{
351 xfs_agblock_t agbno=0;/* allocation group block number */ 203 xfs_agblock_t agbno=0;/* allocation group block number */
@@ -380,14 +232,12 @@ xfs_bulkstat(
380 int ubelem; /* spaces used in user's buffer */ 232 int ubelem; /* spaces used in user's buffer */
381 int ubused; /* bytes used by formatter */ 233 int ubused; /* bytes used by formatter */
382 xfs_buf_t *bp; /* ptr to on-disk inode cluster buf */ 234 xfs_buf_t *bp; /* ptr to on-disk inode cluster buf */
383 xfs_dinode_t *dip; /* ptr into bp for specific inode */
384 235
385 /* 236 /*
386 * Get the last inode value, see if there's nothing to do. 237 * Get the last inode value, see if there's nothing to do.
387 */ 238 */
388 ino = (xfs_ino_t)*lastinop; 239 ino = (xfs_ino_t)*lastinop;
389 lastino = ino; 240 lastino = ino;
390 dip = NULL;
391 agno = XFS_INO_TO_AGNO(mp, ino); 241 agno = XFS_INO_TO_AGNO(mp, ino);
392 agino = XFS_INO_TO_AGINO(mp, ino); 242 agino = XFS_INO_TO_AGINO(mp, ino);
393 if (agno >= mp->m_sb.sb_agcount || 243 if (agno >= mp->m_sb.sb_agcount ||
@@ -612,37 +462,6 @@ xfs_bulkstat(
612 irbp->ir_startino) + 462 irbp->ir_startino) +
613 ((chunkidx & nimask) >> 463 ((chunkidx & nimask) >>
614 mp->m_sb.sb_inopblog); 464 mp->m_sb.sb_inopblog);
615
616 if (flags & (BULKSTAT_FG_QUICK |
617 BULKSTAT_FG_INLINE)) {
618 int offset;
619
620 ino = XFS_AGINO_TO_INO(mp, agno,
621 agino);
622 bno = XFS_AGB_TO_DADDR(mp, agno,
623 agbno);
624
625 /*
626 * Get the inode cluster buffer
627 */
628 if (bp)
629 xfs_buf_relse(bp);
630
631 error = xfs_inotobp(mp, NULL, ino, &dip,
632 &bp, &offset,
633 XFS_IGET_BULKSTAT);
634
635 if (!error)
636 clustidx = offset / mp->m_sb.sb_inodesize;
637 if (XFS_TEST_ERROR(error != 0,
638 mp, XFS_ERRTAG_BULKSTAT_READ_CHUNK,
639 XFS_RANDOM_BULKSTAT_READ_CHUNK)) {
640 bp = NULL;
641 ubleft = 0;
642 rval = error;
643 break;
644 }
645 }
646 } 465 }
647 ino = XFS_AGINO_TO_INO(mp, agno, agino); 466 ino = XFS_AGINO_TO_INO(mp, agno, agino);
648 bno = XFS_AGB_TO_DADDR(mp, agno, agbno); 467 bno = XFS_AGB_TO_DADDR(mp, agno, agbno);
@@ -658,35 +477,13 @@ xfs_bulkstat(
658 * when the chunk is used up. 477 * when the chunk is used up.
659 */ 478 */
660 irbp->ir_freecount++; 479 irbp->ir_freecount++;
661 if (!xfs_bulkstat_use_dinode(mp, flags, bp,
662 clustidx, &dip)) {
663 lastino = ino;
664 continue;
665 }
666 /*
667 * If we need to do an iget, cannot hold bp.
668 * Drop it, until starting the next cluster.
669 */
670 if ((flags & BULKSTAT_FG_INLINE) && !dip) {
671 if (bp)
672 xfs_buf_relse(bp);
673 bp = NULL;
674 }
675 480
676 /* 481 /*
677 * Get the inode and fill in a single buffer. 482 * Get the inode and fill in a single buffer.
678 * BULKSTAT_FG_QUICK uses dip to fill it in.
679 * BULKSTAT_FG_IGET uses igets.
680 * BULKSTAT_FG_INLINE uses dip if we have an
681 * inline attr fork, else igets.
682 * See: xfs_bulkstat_one & xfs_dm_bulkstat_one.
683 * This is also used to count inodes/blks, etc
684 * in xfs_qm_quotacheck.
685 */ 483 */
686 ubused = statstruct_size; 484 ubused = statstruct_size;
687 error = formatter(mp, ino, ubufp, 485 error = formatter(mp, ino, ubufp, ubleft,
688 ubleft, private_data, 486 &ubused, &fmterror);
689 bno, &ubused, dip, &fmterror);
690 if (fmterror == BULKSTAT_RV_NOTHING) { 487 if (fmterror == BULKSTAT_RV_NOTHING) {
691 if (error && error != ENOENT && 488 if (error && error != ENOENT &&
692 error != EINVAL) { 489 error != EINVAL) {
@@ -778,8 +575,7 @@ xfs_bulkstat_single(
778 */ 575 */
779 576
780 ino = (xfs_ino_t)*lastinop; 577 ino = (xfs_ino_t)*lastinop;
781 error = xfs_bulkstat_one(mp, ino, buffer, sizeof(xfs_bstat_t), 578 error = xfs_bulkstat_one(mp, ino, buffer, sizeof(xfs_bstat_t), 0, &res);
782 NULL, 0, NULL, NULL, &res);
783 if (error) { 579 if (error) {
784 /* 580 /*
785 * Special case way failed, do it the "long" way 581 * Special case way failed, do it the "long" way
@@ -788,8 +584,7 @@ xfs_bulkstat_single(
788 (*lastinop)--; 584 (*lastinop)--;
789 count = 1; 585 count = 1;
790 if (xfs_bulkstat(mp, lastinop, &count, xfs_bulkstat_one, 586 if (xfs_bulkstat(mp, lastinop, &count, xfs_bulkstat_one,
791 NULL, sizeof(xfs_bstat_t), buffer, 587 sizeof(xfs_bstat_t), buffer, done))
792 BULKSTAT_FG_IGET, done))
793 return error; 588 return error;
794 if (count == 0 || (xfs_ino_t)*lastinop != ino) 589 if (count == 0 || (xfs_ino_t)*lastinop != ino)
795 return error == EFSCORRUPTED ? 590 return error == EFSCORRUPTED ?
diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h
index 20792bf45946..97295d91d170 100644
--- a/fs/xfs/xfs_itable.h
+++ b/fs/xfs/xfs_itable.h
@@ -27,10 +27,7 @@ typedef int (*bulkstat_one_pf)(struct xfs_mount *mp,
27 xfs_ino_t ino, 27 xfs_ino_t ino,
28 void __user *buffer, 28 void __user *buffer,
29 int ubsize, 29 int ubsize,
30 void *private_data,
31 xfs_daddr_t bno,
32 int *ubused, 30 int *ubused,
33 void *dip,
34 int *stat); 31 int *stat);
35 32
36/* 33/*
@@ -41,13 +38,6 @@ typedef int (*bulkstat_one_pf)(struct xfs_mount *mp,
41#define BULKSTAT_RV_GIVEUP 2 38#define BULKSTAT_RV_GIVEUP 2
42 39
43/* 40/*
44 * Values for bulkstat flag argument.
45 */
46#define BULKSTAT_FG_IGET 0x1 /* Go through the buffer cache */
47#define BULKSTAT_FG_QUICK 0x2 /* No iget, walk the dinode cluster */
48#define BULKSTAT_FG_INLINE 0x4 /* No iget if inline attrs */
49
50/*
51 * Return stat information in bulk (by-inode) for the filesystem. 41 * Return stat information in bulk (by-inode) for the filesystem.
52 */ 42 */
53int /* error status */ 43int /* error status */
@@ -56,10 +46,8 @@ xfs_bulkstat(
56 xfs_ino_t *lastino, /* last inode returned */ 46 xfs_ino_t *lastino, /* last inode returned */
57 int *count, /* size of buffer/count returned */ 47 int *count, /* size of buffer/count returned */
58 bulkstat_one_pf formatter, /* func that'd fill a single buf */ 48 bulkstat_one_pf formatter, /* func that'd fill a single buf */
59 void *private_data, /* private data for formatter */
60 size_t statstruct_size,/* sizeof struct that we're filling */ 49 size_t statstruct_size,/* sizeof struct that we're filling */
61 char __user *ubuffer,/* buffer with inode stats */ 50 char __user *ubuffer,/* buffer with inode stats */
62 int flags, /* flag to control access method */
63 int *done); /* 1 if there are more stats to get */ 51 int *done); /* 1 if there are more stats to get */
64 52
65int 53int
@@ -82,9 +70,7 @@ xfs_bulkstat_one_int(
82 void __user *buffer, 70 void __user *buffer,
83 int ubsize, 71 int ubsize,
84 bulkstat_one_fmt_pf formatter, 72 bulkstat_one_fmt_pf formatter,
85 xfs_daddr_t bno,
86 int *ubused, 73 int *ubused,
87 void *dibuff,
88 int *stat); 74 int *stat);
89 75
90int 76int
@@ -93,10 +79,7 @@ xfs_bulkstat_one(
93 xfs_ino_t ino, 79 xfs_ino_t ino,
94 void __user *buffer, 80 void __user *buffer,
95 int ubsize, 81 int ubsize,
96 void *private_data,
97 xfs_daddr_t bno,
98 int *ubused, 82 int *ubused,
99 void *dibuff,
100 int *stat); 83 int *stat);
101 84
102typedef int (*inumbers_fmt_pf)( 85typedef int (*inumbers_fmt_pf)(
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 14a69aec2c0b..9ac5cfab27b9 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -132,15 +132,10 @@ xlog_align(
132 int nbblks, 132 int nbblks,
133 xfs_buf_t *bp) 133 xfs_buf_t *bp)
134{ 134{
135 xfs_daddr_t offset; 135 xfs_daddr_t offset = blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1);
136 xfs_caddr_t ptr;
137 136
138 offset = blk_no & ((xfs_daddr_t) log->l_sectBBsize - 1); 137 ASSERT(BBTOB(offset + nbblks) <= XFS_BUF_SIZE(bp));
139 ptr = XFS_BUF_PTR(bp) + BBTOB(offset); 138 return XFS_BUF_PTR(bp) + BBTOB(offset);
140
141 ASSERT(ptr + BBTOB(nbblks) <= XFS_BUF_PTR(bp) + XFS_BUF_SIZE(bp));
142
143 return ptr;
144} 139}
145 140
146 141
@@ -3203,7 +3198,7 @@ xlog_recover_process_one_iunlink(
3203 int error; 3198 int error;
3204 3199
3205 ino = XFS_AGINO_TO_INO(mp, agno, agino); 3200 ino = XFS_AGINO_TO_INO(mp, agno, agino);
3206 error = xfs_iget(mp, NULL, ino, 0, 0, &ip, 0); 3201 error = xfs_iget(mp, NULL, ino, 0, 0, &ip);
3207 if (error) 3202 if (error)
3208 goto fail; 3203 goto fail;
3209 3204
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index d7bf38c8cd1c..69f62d8b2816 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -268,10 +268,10 @@ xfs_sb_validate_fsb_count(
268 268
269#if XFS_BIG_BLKNOS /* Limited by ULONG_MAX of page cache index */ 269#if XFS_BIG_BLKNOS /* Limited by ULONG_MAX of page cache index */
270 if (nblocks >> (PAGE_CACHE_SHIFT - sbp->sb_blocklog) > ULONG_MAX) 270 if (nblocks >> (PAGE_CACHE_SHIFT - sbp->sb_blocklog) > ULONG_MAX)
271 return E2BIG; 271 return EFBIG;
272#else /* Limited by UINT_MAX of sectors */ 272#else /* Limited by UINT_MAX of sectors */
273 if (nblocks << (sbp->sb_blocklog - BBSHIFT) > UINT_MAX) 273 if (nblocks << (sbp->sb_blocklog - BBSHIFT) > UINT_MAX)
274 return E2BIG; 274 return EFBIG;
275#endif 275#endif
276 return 0; 276 return 0;
277} 277}
@@ -393,7 +393,7 @@ xfs_mount_validate_sb(
393 xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) { 393 xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) {
394 xfs_fs_mount_cmn_err(flags, 394 xfs_fs_mount_cmn_err(flags,
395 "file system too large to be mounted on this system."); 395 "file system too large to be mounted on this system.");
396 return XFS_ERROR(E2BIG); 396 return XFS_ERROR(EFBIG);
397 } 397 }
398 398
399 if (unlikely(sbp->sb_inprogress)) { 399 if (unlikely(sbp->sb_inprogress)) {
@@ -413,17 +413,6 @@ xfs_mount_validate_sb(
413 return 0; 413 return 0;
414} 414}
415 415
416STATIC void
417xfs_initialize_perag_icache(
418 xfs_perag_t *pag)
419{
420 if (!pag->pag_ici_init) {
421 rwlock_init(&pag->pag_ici_lock);
422 INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
423 pag->pag_ici_init = 1;
424 }
425}
426
427int 416int
428xfs_initialize_perag( 417xfs_initialize_perag(
429 xfs_mount_t *mp, 418 xfs_mount_t *mp,
@@ -436,13 +425,8 @@ xfs_initialize_perag(
436 xfs_agino_t agino; 425 xfs_agino_t agino;
437 xfs_ino_t ino; 426 xfs_ino_t ino;
438 xfs_sb_t *sbp = &mp->m_sb; 427 xfs_sb_t *sbp = &mp->m_sb;
439 xfs_ino_t max_inum = XFS_MAXINUMBER_32;
440 int error = -ENOMEM; 428 int error = -ENOMEM;
441 429
442 /* Check to see if the filesystem can overflow 32 bit inodes */
443 agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0);
444 ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
445
446 /* 430 /*
447 * Walk the current per-ag tree so we don't try to initialise AGs 431 * Walk the current per-ag tree so we don't try to initialise AGs
448 * that already exist (growfs case). Allocate and insert all the 432 * that already exist (growfs case). Allocate and insert all the
@@ -456,11 +440,18 @@ xfs_initialize_perag(
456 } 440 }
457 if (!first_initialised) 441 if (!first_initialised)
458 first_initialised = index; 442 first_initialised = index;
443
459 pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL); 444 pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL);
460 if (!pag) 445 if (!pag)
461 goto out_unwind; 446 goto out_unwind;
447 pag->pag_agno = index;
448 pag->pag_mount = mp;
449 rwlock_init(&pag->pag_ici_lock);
450 INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
451
462 if (radix_tree_preload(GFP_NOFS)) 452 if (radix_tree_preload(GFP_NOFS))
463 goto out_unwind; 453 goto out_unwind;
454
464 spin_lock(&mp->m_perag_lock); 455 spin_lock(&mp->m_perag_lock);
465 if (radix_tree_insert(&mp->m_perag_tree, index, pag)) { 456 if (radix_tree_insert(&mp->m_perag_tree, index, pag)) {
466 BUG(); 457 BUG();
@@ -469,25 +460,26 @@ xfs_initialize_perag(
469 error = -EEXIST; 460 error = -EEXIST;
470 goto out_unwind; 461 goto out_unwind;
471 } 462 }
472 pag->pag_agno = index;
473 pag->pag_mount = mp;
474 spin_unlock(&mp->m_perag_lock); 463 spin_unlock(&mp->m_perag_lock);
475 radix_tree_preload_end(); 464 radix_tree_preload_end();
476 } 465 }
477 466
478 /* Clear the mount flag if no inode can overflow 32 bits 467 /*
479 * on this filesystem, or if specifically requested.. 468 * If we mount with the inode64 option, or no inode overflows
469 * the legacy 32-bit address space clear the inode32 option.
480 */ 470 */
481 if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) && ino > max_inum) { 471 agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0);
472 ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
473
474 if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) && ino > XFS_MAXINUMBER_32)
482 mp->m_flags |= XFS_MOUNT_32BITINODES; 475 mp->m_flags |= XFS_MOUNT_32BITINODES;
483 } else { 476 else
484 mp->m_flags &= ~XFS_MOUNT_32BITINODES; 477 mp->m_flags &= ~XFS_MOUNT_32BITINODES;
485 }
486 478
487 /* If we can overflow then setup the ag headers accordingly */
488 if (mp->m_flags & XFS_MOUNT_32BITINODES) { 479 if (mp->m_flags & XFS_MOUNT_32BITINODES) {
489 /* Calculate how much should be reserved for inodes to 480 /*
490 * meet the max inode percentage. 481 * Calculate how much should be reserved for inodes to meet
482 * the max inode percentage.
491 */ 483 */
492 if (mp->m_maxicount) { 484 if (mp->m_maxicount) {
493 __uint64_t icount; 485 __uint64_t icount;
@@ -500,30 +492,28 @@ xfs_initialize_perag(
500 } else { 492 } else {
501 max_metadata = agcount; 493 max_metadata = agcount;
502 } 494 }
495
503 for (index = 0; index < agcount; index++) { 496 for (index = 0; index < agcount; index++) {
504 ino = XFS_AGINO_TO_INO(mp, index, agino); 497 ino = XFS_AGINO_TO_INO(mp, index, agino);
505 if (ino > max_inum) { 498 if (ino > XFS_MAXINUMBER_32) {
506 index++; 499 index++;
507 break; 500 break;
508 } 501 }
509 502
510 /* This ag is preferred for inodes */
511 pag = xfs_perag_get(mp, index); 503 pag = xfs_perag_get(mp, index);
512 pag->pagi_inodeok = 1; 504 pag->pagi_inodeok = 1;
513 if (index < max_metadata) 505 if (index < max_metadata)
514 pag->pagf_metadata = 1; 506 pag->pagf_metadata = 1;
515 xfs_initialize_perag_icache(pag);
516 xfs_perag_put(pag); 507 xfs_perag_put(pag);
517 } 508 }
518 } else { 509 } else {
519 /* Setup default behavior for smaller filesystems */
520 for (index = 0; index < agcount; index++) { 510 for (index = 0; index < agcount; index++) {
521 pag = xfs_perag_get(mp, index); 511 pag = xfs_perag_get(mp, index);
522 pag->pagi_inodeok = 1; 512 pag->pagi_inodeok = 1;
523 xfs_initialize_perag_icache(pag);
524 xfs_perag_put(pag); 513 xfs_perag_put(pag);
525 } 514 }
526 } 515 }
516
527 if (maxagi) 517 if (maxagi)
528 *maxagi = index; 518 *maxagi = index;
529 return 0; 519 return 0;
@@ -1009,7 +999,7 @@ xfs_check_sizes(xfs_mount_t *mp)
1009 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); 999 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
1010 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) { 1000 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) {
1011 cmn_err(CE_WARN, "XFS: size check 1 failed"); 1001 cmn_err(CE_WARN, "XFS: size check 1 failed");
1012 return XFS_ERROR(E2BIG); 1002 return XFS_ERROR(EFBIG);
1013 } 1003 }
1014 error = xfs_read_buf(mp, mp->m_ddev_targp, 1004 error = xfs_read_buf(mp, mp->m_ddev_targp,
1015 d - XFS_FSS_TO_BB(mp, 1), 1005 d - XFS_FSS_TO_BB(mp, 1),
@@ -1019,7 +1009,7 @@ xfs_check_sizes(xfs_mount_t *mp)
1019 } else { 1009 } else {
1020 cmn_err(CE_WARN, "XFS: size check 2 failed"); 1010 cmn_err(CE_WARN, "XFS: size check 2 failed");
1021 if (error == ENOSPC) 1011 if (error == ENOSPC)
1022 error = XFS_ERROR(E2BIG); 1012 error = XFS_ERROR(EFBIG);
1023 return error; 1013 return error;
1024 } 1014 }
1025 1015
@@ -1027,7 +1017,7 @@ xfs_check_sizes(xfs_mount_t *mp)
1027 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); 1017 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
1028 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) { 1018 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) {
1029 cmn_err(CE_WARN, "XFS: size check 3 failed"); 1019 cmn_err(CE_WARN, "XFS: size check 3 failed");
1030 return XFS_ERROR(E2BIG); 1020 return XFS_ERROR(EFBIG);
1031 } 1021 }
1032 error = xfs_read_buf(mp, mp->m_logdev_targp, 1022 error = xfs_read_buf(mp, mp->m_logdev_targp,
1033 d - XFS_FSB_TO_BB(mp, 1), 1023 d - XFS_FSB_TO_BB(mp, 1),
@@ -1037,7 +1027,7 @@ xfs_check_sizes(xfs_mount_t *mp)
1037 } else { 1027 } else {
1038 cmn_err(CE_WARN, "XFS: size check 3 failed"); 1028 cmn_err(CE_WARN, "XFS: size check 3 failed");
1039 if (error == ENOSPC) 1029 if (error == ENOSPC)
1040 error = XFS_ERROR(E2BIG); 1030 error = XFS_ERROR(EFBIG);
1041 return error; 1031 return error;
1042 } 1032 }
1043 } 1033 }
@@ -1254,7 +1244,7 @@ xfs_mountfs(
1254 * Allocate and initialize the per-ag data. 1244 * Allocate and initialize the per-ag data.
1255 */ 1245 */
1256 spin_lock_init(&mp->m_perag_lock); 1246 spin_lock_init(&mp->m_perag_lock);
1257 INIT_RADIX_TREE(&mp->m_perag_tree, GFP_NOFS); 1247 INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC);
1258 error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi); 1248 error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi);
1259 if (error) { 1249 if (error) {
1260 cmn_err(CE_WARN, "XFS: Failed per-ag init: %d", error); 1250 cmn_err(CE_WARN, "XFS: Failed per-ag init: %d", error);
@@ -1310,7 +1300,7 @@ xfs_mountfs(
1310 * Get and sanity-check the root inode. 1300 * Get and sanity-check the root inode.
1311 * Save the pointer to it in the mount structure. 1301 * Save the pointer to it in the mount structure.
1312 */ 1302 */
1313 error = xfs_iget(mp, NULL, sbp->sb_rootino, 0, XFS_ILOCK_EXCL, &rip, 0); 1303 error = xfs_iget(mp, NULL, sbp->sb_rootino, 0, XFS_ILOCK_EXCL, &rip);
1314 if (error) { 1304 if (error) {
1315 cmn_err(CE_WARN, "XFS: failed to read root inode"); 1305 cmn_err(CE_WARN, "XFS: failed to read root inode");
1316 goto out_log_dealloc; 1306 goto out_log_dealloc;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 1d2c7eed4eda..5761087ee8ea 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -259,7 +259,7 @@ typedef struct xfs_mount {
259 wait_queue_head_t m_wait_single_sync_task; 259 wait_queue_head_t m_wait_single_sync_task;
260 __int64_t m_update_flags; /* sb flags we need to update 260 __int64_t m_update_flags; /* sb flags we need to update
261 on the next remount,rw */ 261 on the next remount,rw */
262 struct list_head m_mplist; /* inode shrinker mount list */ 262 struct shrinker m_inode_shrink; /* inode reclaim shrinker */
263} xfs_mount_t; 263} xfs_mount_t;
264 264
265/* 265/*
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 6be05f756d59..a2d32ce335aa 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -2247,7 +2247,7 @@ xfs_rtmount_init(
2247 cmn_err(CE_WARN, "XFS: realtime mount -- %llu != %llu", 2247 cmn_err(CE_WARN, "XFS: realtime mount -- %llu != %llu",
2248 (unsigned long long) XFS_BB_TO_FSB(mp, d), 2248 (unsigned long long) XFS_BB_TO_FSB(mp, d),
2249 (unsigned long long) mp->m_sb.sb_rblocks); 2249 (unsigned long long) mp->m_sb.sb_rblocks);
2250 return XFS_ERROR(E2BIG); 2250 return XFS_ERROR(EFBIG);
2251 } 2251 }
2252 error = xfs_read_buf(mp, mp->m_rtdev_targp, 2252 error = xfs_read_buf(mp, mp->m_rtdev_targp,
2253 d - XFS_FSB_TO_BB(mp, 1), 2253 d - XFS_FSB_TO_BB(mp, 1),
@@ -2256,7 +2256,7 @@ xfs_rtmount_init(
2256 cmn_err(CE_WARN, 2256 cmn_err(CE_WARN,
2257 "XFS: realtime mount -- xfs_read_buf failed, returned %d", error); 2257 "XFS: realtime mount -- xfs_read_buf failed, returned %d", error);
2258 if (error == ENOSPC) 2258 if (error == ENOSPC)
2259 return XFS_ERROR(E2BIG); 2259 return XFS_ERROR(EFBIG);
2260 return error; 2260 return error;
2261 } 2261 }
2262 xfs_buf_relse(bp); 2262 xfs_buf_relse(bp);
@@ -2277,12 +2277,12 @@ xfs_rtmount_inodes(
2277 sbp = &mp->m_sb; 2277 sbp = &mp->m_sb;
2278 if (sbp->sb_rbmino == NULLFSINO) 2278 if (sbp->sb_rbmino == NULLFSINO)
2279 return 0; 2279 return 0;
2280 error = xfs_iget(mp, NULL, sbp->sb_rbmino, 0, 0, &mp->m_rbmip, 0); 2280 error = xfs_iget(mp, NULL, sbp->sb_rbmino, 0, 0, &mp->m_rbmip);
2281 if (error) 2281 if (error)
2282 return error; 2282 return error;
2283 ASSERT(mp->m_rbmip != NULL); 2283 ASSERT(mp->m_rbmip != NULL);
2284 ASSERT(sbp->sb_rsumino != NULLFSINO); 2284 ASSERT(sbp->sb_rsumino != NULLFSINO);
2285 error = xfs_iget(mp, NULL, sbp->sb_rsumino, 0, 0, &mp->m_rsumip, 0); 2285 error = xfs_iget(mp, NULL, sbp->sb_rsumino, 0, 0, &mp->m_rsumip);
2286 if (error) { 2286 if (error) {
2287 IRELE(mp->m_rbmip); 2287 IRELE(mp->m_rbmip);
2288 return error; 2288 return error;
diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h
index b2d67adb6a08..ff614c29b441 100644
--- a/fs/xfs/xfs_rtalloc.h
+++ b/fs/xfs/xfs_rtalloc.h
@@ -147,7 +147,16 @@ xfs_growfs_rt(
147# define xfs_rtfree_extent(t,b,l) (ENOSYS) 147# define xfs_rtfree_extent(t,b,l) (ENOSYS)
148# define xfs_rtpick_extent(m,t,l,rb) (ENOSYS) 148# define xfs_rtpick_extent(m,t,l,rb) (ENOSYS)
149# define xfs_growfs_rt(mp,in) (ENOSYS) 149# define xfs_growfs_rt(mp,in) (ENOSYS)
150# define xfs_rtmount_init(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS)) 150static inline int /* error */
151xfs_rtmount_init(
152 xfs_mount_t *mp) /* file system mount structure */
153{
154 if (mp->m_sb.sb_rblocks == 0)
155 return 0;
156
157 cmn_err(CE_WARN, "XFS: Not built with CONFIG_XFS_RT");
158 return ENOSYS;
159}
151# define xfs_rtmount_inodes(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS)) 160# define xfs_rtmount_inodes(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS))
152# define xfs_rtunmount_inodes(m) 161# define xfs_rtunmount_inodes(m)
153#endif /* CONFIG_XFS_RT */ 162#endif /* CONFIG_XFS_RT */
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index ce558efa2ea0..28547dfce037 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -48,134 +48,489 @@
48 48
49kmem_zone_t *xfs_trans_zone; 49kmem_zone_t *xfs_trans_zone;
50 50
51
51/* 52/*
52 * Reservation functions here avoid a huge stack in xfs_trans_init 53 * Various log reservation values.
53 * due to register overflow from temporaries in the calculations. 54 *
55 * These are based on the size of the file system block because that is what
56 * most transactions manipulate. Each adds in an additional 128 bytes per
57 * item logged to try to account for the overhead of the transaction mechanism.
58 *
59 * Note: Most of the reservations underestimate the number of allocation
60 * groups into which they could free extents in the xfs_bmap_finish() call.
61 * This is because the number in the worst case is quite high and quite
62 * unusual. In order to fix this we need to change xfs_bmap_finish() to free
63 * extents in only a single AG at a time. This will require changes to the
64 * EFI code as well, however, so that the EFI for the extents not freed is
65 * logged again in each transaction. See SGI PV #261917.
66 *
67 * Reservation functions here avoid a huge stack in xfs_trans_init due to
68 * register overflow from temporaries in the calculations.
69 */
70
71
72/*
73 * In a write transaction we can allocate a maximum of 2
74 * extents. This gives:
75 * the inode getting the new extents: inode size
76 * the inode's bmap btree: max depth * block size
77 * the agfs of the ags from which the extents are allocated: 2 * sector
78 * the superblock free block counter: sector size
79 * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
80 * And the bmap_finish transaction can free bmap blocks in a join:
81 * the agfs of the ags containing the blocks: 2 * sector size
82 * the agfls of the ags containing the blocks: 2 * sector size
83 * the super block free block counter: sector size
84 * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
54 */ 85 */
55STATIC uint 86STATIC uint
56xfs_calc_write_reservation(xfs_mount_t *mp) 87xfs_calc_write_reservation(
88 struct xfs_mount *mp)
57{ 89{
58 return XFS_CALC_WRITE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); 90 return XFS_DQUOT_LOGRES(mp) +
91 MAX((mp->m_sb.sb_inodesize +
92 XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) +
93 2 * mp->m_sb.sb_sectsize +
94 mp->m_sb.sb_sectsize +
95 XFS_ALLOCFREE_LOG_RES(mp, 2) +
96 128 * (4 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) +
97 XFS_ALLOCFREE_LOG_COUNT(mp, 2))),
98 (2 * mp->m_sb.sb_sectsize +
99 2 * mp->m_sb.sb_sectsize +
100 mp->m_sb.sb_sectsize +
101 XFS_ALLOCFREE_LOG_RES(mp, 2) +
102 128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))));
59} 103}
60 104
105/*
106 * In truncating a file we free up to two extents at once. We can modify:
107 * the inode being truncated: inode size
108 * the inode's bmap btree: (max depth + 1) * block size
109 * And the bmap_finish transaction can free the blocks and bmap blocks:
110 * the agf for each of the ags: 4 * sector size
111 * the agfl for each of the ags: 4 * sector size
112 * the super block to reflect the freed blocks: sector size
113 * worst case split in allocation btrees per extent assuming 4 extents:
114 * 4 exts * 2 trees * (2 * max depth - 1) * block size
115 * the inode btree: max depth * blocksize
116 * the allocation btrees: 2 trees * (max depth - 1) * block size
117 */
61STATIC uint 118STATIC uint
62xfs_calc_itruncate_reservation(xfs_mount_t *mp) 119xfs_calc_itruncate_reservation(
120 struct xfs_mount *mp)
63{ 121{
64 return XFS_CALC_ITRUNCATE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); 122 return XFS_DQUOT_LOGRES(mp) +
123 MAX((mp->m_sb.sb_inodesize +
124 XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1) +
125 128 * (2 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK))),
126 (4 * mp->m_sb.sb_sectsize +
127 4 * mp->m_sb.sb_sectsize +
128 mp->m_sb.sb_sectsize +
129 XFS_ALLOCFREE_LOG_RES(mp, 4) +
130 128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4)) +
131 128 * 5 +
132 XFS_ALLOCFREE_LOG_RES(mp, 1) +
133 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels +
134 XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
65} 135}
66 136
137/*
138 * In renaming a files we can modify:
139 * the four inodes involved: 4 * inode size
140 * the two directory btrees: 2 * (max depth + v2) * dir block size
141 * the two directory bmap btrees: 2 * max depth * block size
142 * And the bmap_finish transaction can free dir and bmap blocks (two sets
143 * of bmap blocks) giving:
144 * the agf for the ags in which the blocks live: 3 * sector size
145 * the agfl for the ags in which the blocks live: 3 * sector size
146 * the superblock for the free block count: sector size
147 * the allocation btrees: 3 exts * 2 trees * (2 * max depth - 1) * block size
148 */
67STATIC uint 149STATIC uint
68xfs_calc_rename_reservation(xfs_mount_t *mp) 150xfs_calc_rename_reservation(
151 struct xfs_mount *mp)
69{ 152{
70 return XFS_CALC_RENAME_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); 153 return XFS_DQUOT_LOGRES(mp) +
154 MAX((4 * mp->m_sb.sb_inodesize +
155 2 * XFS_DIROP_LOG_RES(mp) +
156 128 * (4 + 2 * XFS_DIROP_LOG_COUNT(mp))),
157 (3 * mp->m_sb.sb_sectsize +
158 3 * mp->m_sb.sb_sectsize +
159 mp->m_sb.sb_sectsize +
160 XFS_ALLOCFREE_LOG_RES(mp, 3) +
161 128 * (7 + XFS_ALLOCFREE_LOG_COUNT(mp, 3))));
71} 162}
72 163
164/*
165 * For creating a link to an inode:
166 * the parent directory inode: inode size
167 * the linked inode: inode size
168 * the directory btree could split: (max depth + v2) * dir block size
169 * the directory bmap btree could join or split: (max depth + v2) * blocksize
170 * And the bmap_finish transaction can free some bmap blocks giving:
171 * the agf for the ag in which the blocks live: sector size
172 * the agfl for the ag in which the blocks live: sector size
173 * the superblock for the free block count: sector size
174 * the allocation btrees: 2 trees * (2 * max depth - 1) * block size
175 */
73STATIC uint 176STATIC uint
74xfs_calc_link_reservation(xfs_mount_t *mp) 177xfs_calc_link_reservation(
178 struct xfs_mount *mp)
75{ 179{
76 return XFS_CALC_LINK_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); 180 return XFS_DQUOT_LOGRES(mp) +
181 MAX((mp->m_sb.sb_inodesize +
182 mp->m_sb.sb_inodesize +
183 XFS_DIROP_LOG_RES(mp) +
184 128 * (2 + XFS_DIROP_LOG_COUNT(mp))),
185 (mp->m_sb.sb_sectsize +
186 mp->m_sb.sb_sectsize +
187 mp->m_sb.sb_sectsize +
188 XFS_ALLOCFREE_LOG_RES(mp, 1) +
189 128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
77} 190}
78 191
192/*
193 * For removing a directory entry we can modify:
194 * the parent directory inode: inode size
195 * the removed inode: inode size
196 * the directory btree could join: (max depth + v2) * dir block size
197 * the directory bmap btree could join or split: (max depth + v2) * blocksize
198 * And the bmap_finish transaction can free the dir and bmap blocks giving:
199 * the agf for the ag in which the blocks live: 2 * sector size
200 * the agfl for the ag in which the blocks live: 2 * sector size
201 * the superblock for the free block count: sector size
202 * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
203 */
79STATIC uint 204STATIC uint
80xfs_calc_remove_reservation(xfs_mount_t *mp) 205xfs_calc_remove_reservation(
206 struct xfs_mount *mp)
81{ 207{
82 return XFS_CALC_REMOVE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); 208 return XFS_DQUOT_LOGRES(mp) +
209 MAX((mp->m_sb.sb_inodesize +
210 mp->m_sb.sb_inodesize +
211 XFS_DIROP_LOG_RES(mp) +
212 128 * (2 + XFS_DIROP_LOG_COUNT(mp))),
213 (2 * mp->m_sb.sb_sectsize +
214 2 * mp->m_sb.sb_sectsize +
215 mp->m_sb.sb_sectsize +
216 XFS_ALLOCFREE_LOG_RES(mp, 2) +
217 128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))));
83} 218}
84 219
220/*
221 * For symlink we can modify:
222 * the parent directory inode: inode size
223 * the new inode: inode size
224 * the inode btree entry: 1 block
225 * the directory btree: (max depth + v2) * dir block size
226 * the directory inode's bmap btree: (max depth + v2) * block size
227 * the blocks for the symlink: 1 kB
228 * Or in the first xact we allocate some inodes giving:
229 * the agi and agf of the ag getting the new inodes: 2 * sectorsize
230 * the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize
231 * the inode btree: max depth * blocksize
232 * the allocation btrees: 2 trees * (2 * max depth - 1) * block size
233 */
85STATIC uint 234STATIC uint
86xfs_calc_symlink_reservation(xfs_mount_t *mp) 235xfs_calc_symlink_reservation(
236 struct xfs_mount *mp)
87{ 237{
88 return XFS_CALC_SYMLINK_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); 238 return XFS_DQUOT_LOGRES(mp) +
239 MAX((mp->m_sb.sb_inodesize +
240 mp->m_sb.sb_inodesize +
241 XFS_FSB_TO_B(mp, 1) +
242 XFS_DIROP_LOG_RES(mp) +
243 1024 +
244 128 * (4 + XFS_DIROP_LOG_COUNT(mp))),
245 (2 * mp->m_sb.sb_sectsize +
246 XFS_FSB_TO_B(mp, XFS_IALLOC_BLOCKS(mp)) +
247 XFS_FSB_TO_B(mp, mp->m_in_maxlevels) +
248 XFS_ALLOCFREE_LOG_RES(mp, 1) +
249 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels +
250 XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
89} 251}
90 252
253/*
254 * For create we can modify:
255 * the parent directory inode: inode size
256 * the new inode: inode size
257 * the inode btree entry: block size
258 * the superblock for the nlink flag: sector size
259 * the directory btree: (max depth + v2) * dir block size
260 * the directory inode's bmap btree: (max depth + v2) * block size
261 * Or in the first xact we allocate some inodes giving:
262 * the agi and agf of the ag getting the new inodes: 2 * sectorsize
263 * the superblock for the nlink flag: sector size
264 * the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize
265 * the inode btree: max depth * blocksize
266 * the allocation btrees: 2 trees * (max depth - 1) * block size
267 */
91STATIC uint 268STATIC uint
92xfs_calc_create_reservation(xfs_mount_t *mp) 269xfs_calc_create_reservation(
270 struct xfs_mount *mp)
93{ 271{
94 return XFS_CALC_CREATE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); 272 return XFS_DQUOT_LOGRES(mp) +
273 MAX((mp->m_sb.sb_inodesize +
274 mp->m_sb.sb_inodesize +
275 mp->m_sb.sb_sectsize +
276 XFS_FSB_TO_B(mp, 1) +
277 XFS_DIROP_LOG_RES(mp) +
278 128 * (3 + XFS_DIROP_LOG_COUNT(mp))),
279 (3 * mp->m_sb.sb_sectsize +
280 XFS_FSB_TO_B(mp, XFS_IALLOC_BLOCKS(mp)) +
281 XFS_FSB_TO_B(mp, mp->m_in_maxlevels) +
282 XFS_ALLOCFREE_LOG_RES(mp, 1) +
283 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels +
284 XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
95} 285}
96 286
287/*
288 * Making a new directory is the same as creating a new file.
289 */
97STATIC uint 290STATIC uint
98xfs_calc_mkdir_reservation(xfs_mount_t *mp) 291xfs_calc_mkdir_reservation(
292 struct xfs_mount *mp)
99{ 293{
100 return XFS_CALC_MKDIR_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); 294 return xfs_calc_create_reservation(mp);
101} 295}
102 296
297/*
298 * In freeing an inode we can modify:
299 * the inode being freed: inode size
300 * the super block free inode counter: sector size
301 * the agi hash list and counters: sector size
302 * the inode btree entry: block size
303 * the on disk inode before ours in the agi hash list: inode cluster size
304 * the inode btree: max depth * blocksize
305 * the allocation btrees: 2 trees * (max depth - 1) * block size
306 */
103STATIC uint 307STATIC uint
104xfs_calc_ifree_reservation(xfs_mount_t *mp) 308xfs_calc_ifree_reservation(
309 struct xfs_mount *mp)
105{ 310{
106 return XFS_CALC_IFREE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); 311 return XFS_DQUOT_LOGRES(mp) +
312 mp->m_sb.sb_inodesize +
313 mp->m_sb.sb_sectsize +
314 mp->m_sb.sb_sectsize +
315 XFS_FSB_TO_B(mp, 1) +
316 MAX((__uint16_t)XFS_FSB_TO_B(mp, 1),
317 XFS_INODE_CLUSTER_SIZE(mp)) +
318 128 * 5 +
319 XFS_ALLOCFREE_LOG_RES(mp, 1) +
320 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels +
321 XFS_ALLOCFREE_LOG_COUNT(mp, 1));
107} 322}
108 323
324/*
325 * When only changing the inode we log the inode and possibly the superblock
326 * We also add a bit of slop for the transaction stuff.
327 */
109STATIC uint 328STATIC uint
110xfs_calc_ichange_reservation(xfs_mount_t *mp) 329xfs_calc_ichange_reservation(
330 struct xfs_mount *mp)
111{ 331{
112 return XFS_CALC_ICHANGE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); 332 return XFS_DQUOT_LOGRES(mp) +
333 mp->m_sb.sb_inodesize +
334 mp->m_sb.sb_sectsize +
335 512;
336
113} 337}
114 338
339/*
340 * Growing the data section of the filesystem.
341 * superblock
342 * agi and agf
343 * allocation btrees
344 */
115STATIC uint 345STATIC uint
116xfs_calc_growdata_reservation(xfs_mount_t *mp) 346xfs_calc_growdata_reservation(
347 struct xfs_mount *mp)
117{ 348{
118 return XFS_CALC_GROWDATA_LOG_RES(mp); 349 return mp->m_sb.sb_sectsize * 3 +
350 XFS_ALLOCFREE_LOG_RES(mp, 1) +
351 128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1));
119} 352}
120 353
354/*
355 * Growing the rt section of the filesystem.
356 * In the first set of transactions (ALLOC) we allocate space to the
357 * bitmap or summary files.
358 * superblock: sector size
359 * agf of the ag from which the extent is allocated: sector size
360 * bmap btree for bitmap/summary inode: max depth * blocksize
361 * bitmap/summary inode: inode size
362 * allocation btrees for 1 block alloc: 2 * (2 * maxdepth - 1) * blocksize
363 */
121STATIC uint 364STATIC uint
122xfs_calc_growrtalloc_reservation(xfs_mount_t *mp) 365xfs_calc_growrtalloc_reservation(
366 struct xfs_mount *mp)
123{ 367{
124 return XFS_CALC_GROWRTALLOC_LOG_RES(mp); 368 return 2 * mp->m_sb.sb_sectsize +
369 XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) +
370 mp->m_sb.sb_inodesize +
371 XFS_ALLOCFREE_LOG_RES(mp, 1) +
372 128 * (3 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) +
373 XFS_ALLOCFREE_LOG_COUNT(mp, 1));
125} 374}
126 375
376/*
377 * Growing the rt section of the filesystem.
378 * In the second set of transactions (ZERO) we zero the new metadata blocks.
379 * one bitmap/summary block: blocksize
380 */
127STATIC uint 381STATIC uint
128xfs_calc_growrtzero_reservation(xfs_mount_t *mp) 382xfs_calc_growrtzero_reservation(
383 struct xfs_mount *mp)
129{ 384{
130 return XFS_CALC_GROWRTZERO_LOG_RES(mp); 385 return mp->m_sb.sb_blocksize + 128;
131} 386}
132 387
388/*
389 * Growing the rt section of the filesystem.
390 * In the third set of transactions (FREE) we update metadata without
391 * allocating any new blocks.
392 * superblock: sector size
393 * bitmap inode: inode size
394 * summary inode: inode size
395 * one bitmap block: blocksize
396 * summary blocks: new summary size
397 */
133STATIC uint 398STATIC uint
134xfs_calc_growrtfree_reservation(xfs_mount_t *mp) 399xfs_calc_growrtfree_reservation(
400 struct xfs_mount *mp)
135{ 401{
136 return XFS_CALC_GROWRTFREE_LOG_RES(mp); 402 return mp->m_sb.sb_sectsize +
403 2 * mp->m_sb.sb_inodesize +
404 mp->m_sb.sb_blocksize +
405 mp->m_rsumsize +
406 128 * 5;
137} 407}
138 408
409/*
410 * Logging the inode modification timestamp on a synchronous write.
411 * inode
412 */
139STATIC uint 413STATIC uint
140xfs_calc_swrite_reservation(xfs_mount_t *mp) 414xfs_calc_swrite_reservation(
415 struct xfs_mount *mp)
141{ 416{
142 return XFS_CALC_SWRITE_LOG_RES(mp); 417 return mp->m_sb.sb_inodesize + 128;
143} 418}
144 419
420/*
421 * Logging the inode mode bits when writing a setuid/setgid file
422 * inode
423 */
145STATIC uint 424STATIC uint
146xfs_calc_writeid_reservation(xfs_mount_t *mp) 425xfs_calc_writeid_reservation(xfs_mount_t *mp)
147{ 426{
148 return XFS_CALC_WRITEID_LOG_RES(mp); 427 return mp->m_sb.sb_inodesize + 128;
149} 428}
150 429
430/*
431 * Converting the inode from non-attributed to attributed.
432 * the inode being converted: inode size
433 * agf block and superblock (for block allocation)
434 * the new block (directory sized)
435 * bmap blocks for the new directory block
436 * allocation btrees
437 */
151STATIC uint 438STATIC uint
152xfs_calc_addafork_reservation(xfs_mount_t *mp) 439xfs_calc_addafork_reservation(
440 struct xfs_mount *mp)
153{ 441{
154 return XFS_CALC_ADDAFORK_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); 442 return XFS_DQUOT_LOGRES(mp) +
443 mp->m_sb.sb_inodesize +
444 mp->m_sb.sb_sectsize * 2 +
445 mp->m_dirblksize +
446 XFS_FSB_TO_B(mp, XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1) +
447 XFS_ALLOCFREE_LOG_RES(mp, 1) +
448 128 * (4 + XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1 +
449 XFS_ALLOCFREE_LOG_COUNT(mp, 1));
155} 450}
156 451
452/*
453 * Removing the attribute fork of a file
454 * the inode being truncated: inode size
455 * the inode's bmap btree: max depth * block size
456 * And the bmap_finish transaction can free the blocks and bmap blocks:
457 * the agf for each of the ags: 4 * sector size
458 * the agfl for each of the ags: 4 * sector size
459 * the super block to reflect the freed blocks: sector size
460 * worst case split in allocation btrees per extent assuming 4 extents:
461 * 4 exts * 2 trees * (2 * max depth - 1) * block size
462 */
157STATIC uint 463STATIC uint
158xfs_calc_attrinval_reservation(xfs_mount_t *mp) 464xfs_calc_attrinval_reservation(
465 struct xfs_mount *mp)
159{ 466{
160 return XFS_CALC_ATTRINVAL_LOG_RES(mp); 467 return MAX((mp->m_sb.sb_inodesize +
468 XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) +
469 128 * (1 + XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK))),
470 (4 * mp->m_sb.sb_sectsize +
471 4 * mp->m_sb.sb_sectsize +
472 mp->m_sb.sb_sectsize +
473 XFS_ALLOCFREE_LOG_RES(mp, 4) +
474 128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4))));
161} 475}
162 476
477/*
478 * Setting an attribute.
479 * the inode getting the attribute
480 * the superblock for allocations
481 * the agfs extents are allocated from
482 * the attribute btree * max depth
483 * the inode allocation btree
484 * Since attribute transaction space is dependent on the size of the attribute,
485 * the calculation is done partially at mount time and partially at runtime.
486 */
163STATIC uint 487STATIC uint
164xfs_calc_attrset_reservation(xfs_mount_t *mp) 488xfs_calc_attrset_reservation(
489 struct xfs_mount *mp)
165{ 490{
166 return XFS_CALC_ATTRSET_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); 491 return XFS_DQUOT_LOGRES(mp) +
492 mp->m_sb.sb_inodesize +
493 mp->m_sb.sb_sectsize +
494 XFS_FSB_TO_B(mp, XFS_DA_NODE_MAXDEPTH) +
495 128 * (2 + XFS_DA_NODE_MAXDEPTH);
167} 496}
168 497
498/*
499 * Removing an attribute.
500 * the inode: inode size
501 * the attribute btree could join: max depth * block size
502 * the inode bmap btree could join or split: max depth * block size
503 * And the bmap_finish transaction can free the attr blocks freed giving:
504 * the agf for the ag in which the blocks live: 2 * sector size
505 * the agfl for the ag in which the blocks live: 2 * sector size
506 * the superblock for the free block count: sector size
507 * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
508 */
169STATIC uint 509STATIC uint
170xfs_calc_attrrm_reservation(xfs_mount_t *mp) 510xfs_calc_attrrm_reservation(
511 struct xfs_mount *mp)
171{ 512{
172 return XFS_CALC_ATTRRM_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); 513 return XFS_DQUOT_LOGRES(mp) +
514 MAX((mp->m_sb.sb_inodesize +
515 XFS_FSB_TO_B(mp, XFS_DA_NODE_MAXDEPTH) +
516 XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) +
517 128 * (1 + XFS_DA_NODE_MAXDEPTH +
518 XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK))),
519 (2 * mp->m_sb.sb_sectsize +
520 2 * mp->m_sb.sb_sectsize +
521 mp->m_sb.sb_sectsize +
522 XFS_ALLOCFREE_LOG_RES(mp, 2) +
523 128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))));
173} 524}
174 525
526/*
527 * Clearing a bad agino number in an agi hash bucket.
528 */
175STATIC uint 529STATIC uint
176xfs_calc_clear_agi_bucket_reservation(xfs_mount_t *mp) 530xfs_calc_clear_agi_bucket_reservation(
531 struct xfs_mount *mp)
177{ 532{
178 return XFS_CALC_CLEAR_AGI_BUCKET_LOG_RES(mp); 533 return mp->m_sb.sb_sectsize + 128;
179} 534}
180 535
181/* 536/*
@@ -184,11 +539,10 @@ xfs_calc_clear_agi_bucket_reservation(xfs_mount_t *mp)
184 */ 539 */
185void 540void
186xfs_trans_init( 541xfs_trans_init(
187 xfs_mount_t *mp) 542 struct xfs_mount *mp)
188{ 543{
189 xfs_trans_reservations_t *resp; 544 struct xfs_trans_reservations *resp = &mp->m_reservations;
190 545
191 resp = &(mp->m_reservations);
192 resp->tr_write = xfs_calc_write_reservation(mp); 546 resp->tr_write = xfs_calc_write_reservation(mp);
193 resp->tr_itruncate = xfs_calc_itruncate_reservation(mp); 547 resp->tr_itruncate = xfs_calc_itruncate_reservation(mp);
194 resp->tr_rename = xfs_calc_rename_reservation(mp); 548 resp->tr_rename = xfs_calc_rename_reservation(mp);
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 8c69e7824f68..e639e8e9a2a9 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -300,24 +300,6 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp)
300 300
301 301
302/* 302/*
303 * Various log reservation values.
304 * These are based on the size of the file system block
305 * because that is what most transactions manipulate.
306 * Each adds in an additional 128 bytes per item logged to
307 * try to account for the overhead of the transaction mechanism.
308 *
309 * Note:
310 * Most of the reservations underestimate the number of allocation
311 * groups into which they could free extents in the xfs_bmap_finish()
312 * call. This is because the number in the worst case is quite high
313 * and quite unusual. In order to fix this we need to change
314 * xfs_bmap_finish() to free extents in only a single AG at a time.
315 * This will require changes to the EFI code as well, however, so that
316 * the EFI for the extents not freed is logged again in each transaction.
317 * See bug 261917.
318 */
319
320/*
321 * Per-extent log reservation for the allocation btree changes 303 * Per-extent log reservation for the allocation btree changes
322 * involved in freeing or allocating an extent. 304 * involved in freeing or allocating an extent.
323 * 2 trees * (2 blocks/level * max depth - 1) * block size 305 * 2 trees * (2 blocks/level * max depth - 1) * block size
@@ -341,429 +323,36 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp)
341 (XFS_DAENTER_BLOCKS(mp, XFS_DATA_FORK) + \ 323 (XFS_DAENTER_BLOCKS(mp, XFS_DATA_FORK) + \
342 XFS_DAENTER_BMAPS(mp, XFS_DATA_FORK) + 1) 324 XFS_DAENTER_BMAPS(mp, XFS_DATA_FORK) + 1)
343 325
344/*
345 * In a write transaction we can allocate a maximum of 2
346 * extents. This gives:
347 * the inode getting the new extents: inode size
348 * the inode's bmap btree: max depth * block size
349 * the agfs of the ags from which the extents are allocated: 2 * sector
350 * the superblock free block counter: sector size
351 * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
352 * And the bmap_finish transaction can free bmap blocks in a join:
353 * the agfs of the ags containing the blocks: 2 * sector size
354 * the agfls of the ags containing the blocks: 2 * sector size
355 * the super block free block counter: sector size
356 * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
357 */
358#define XFS_CALC_WRITE_LOG_RES(mp) \
359 (MAX( \
360 ((mp)->m_sb.sb_inodesize + \
361 XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) + \
362 (2 * (mp)->m_sb.sb_sectsize) + \
363 (mp)->m_sb.sb_sectsize + \
364 XFS_ALLOCFREE_LOG_RES(mp, 2) + \
365 (128 * (4 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + XFS_ALLOCFREE_LOG_COUNT(mp, 2)))),\
366 ((2 * (mp)->m_sb.sb_sectsize) + \
367 (2 * (mp)->m_sb.sb_sectsize) + \
368 (mp)->m_sb.sb_sectsize + \
369 XFS_ALLOCFREE_LOG_RES(mp, 2) + \
370 (128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))))))
371 326
372#define XFS_WRITE_LOG_RES(mp) ((mp)->m_reservations.tr_write) 327#define XFS_WRITE_LOG_RES(mp) ((mp)->m_reservations.tr_write)
373
374/*
375 * In truncating a file we free up to two extents at once. We can modify:
376 * the inode being truncated: inode size
377 * the inode's bmap btree: (max depth + 1) * block size
378 * And the bmap_finish transaction can free the blocks and bmap blocks:
379 * the agf for each of the ags: 4 * sector size
380 * the agfl for each of the ags: 4 * sector size
381 * the super block to reflect the freed blocks: sector size
382 * worst case split in allocation btrees per extent assuming 4 extents:
383 * 4 exts * 2 trees * (2 * max depth - 1) * block size
384 * the inode btree: max depth * blocksize
385 * the allocation btrees: 2 trees * (max depth - 1) * block size
386 */
387#define XFS_CALC_ITRUNCATE_LOG_RES(mp) \
388 (MAX( \
389 ((mp)->m_sb.sb_inodesize + \
390 XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1) + \
391 (128 * (2 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)))), \
392 ((4 * (mp)->m_sb.sb_sectsize) + \
393 (4 * (mp)->m_sb.sb_sectsize) + \
394 (mp)->m_sb.sb_sectsize + \
395 XFS_ALLOCFREE_LOG_RES(mp, 4) + \
396 (128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4))) + \
397 (128 * 5) + \
398 XFS_ALLOCFREE_LOG_RES(mp, 1) + \
399 (128 * (2 + XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels + \
400 XFS_ALLOCFREE_LOG_COUNT(mp, 1))))))
401
402#define XFS_ITRUNCATE_LOG_RES(mp) ((mp)->m_reservations.tr_itruncate) 328#define XFS_ITRUNCATE_LOG_RES(mp) ((mp)->m_reservations.tr_itruncate)
403
404/*
405 * In renaming a files we can modify:
406 * the four inodes involved: 4 * inode size
407 * the two directory btrees: 2 * (max depth + v2) * dir block size
408 * the two directory bmap btrees: 2 * max depth * block size
409 * And the bmap_finish transaction can free dir and bmap blocks (two sets
410 * of bmap blocks) giving:
411 * the agf for the ags in which the blocks live: 3 * sector size
412 * the agfl for the ags in which the blocks live: 3 * sector size
413 * the superblock for the free block count: sector size
414 * the allocation btrees: 3 exts * 2 trees * (2 * max depth - 1) * block size
415 */
416#define XFS_CALC_RENAME_LOG_RES(mp) \
417 (MAX( \
418 ((4 * (mp)->m_sb.sb_inodesize) + \
419 (2 * XFS_DIROP_LOG_RES(mp)) + \
420 (128 * (4 + 2 * XFS_DIROP_LOG_COUNT(mp)))), \
421 ((3 * (mp)->m_sb.sb_sectsize) + \
422 (3 * (mp)->m_sb.sb_sectsize) + \
423 (mp)->m_sb.sb_sectsize + \
424 XFS_ALLOCFREE_LOG_RES(mp, 3) + \
425 (128 * (7 + XFS_ALLOCFREE_LOG_COUNT(mp, 3))))))
426
427#define XFS_RENAME_LOG_RES(mp) ((mp)->m_reservations.tr_rename) 329#define XFS_RENAME_LOG_RES(mp) ((mp)->m_reservations.tr_rename)
428
429/*
430 * For creating a link to an inode:
431 * the parent directory inode: inode size
432 * the linked inode: inode size
433 * the directory btree could split: (max depth + v2) * dir block size
434 * the directory bmap btree could join or split: (max depth + v2) * blocksize
435 * And the bmap_finish transaction can free some bmap blocks giving:
436 * the agf for the ag in which the blocks live: sector size
437 * the agfl for the ag in which the blocks live: sector size
438 * the superblock for the free block count: sector size
439 * the allocation btrees: 2 trees * (2 * max depth - 1) * block size
440 */
441#define XFS_CALC_LINK_LOG_RES(mp) \
442 (MAX( \
443 ((mp)->m_sb.sb_inodesize + \
444 (mp)->m_sb.sb_inodesize + \
445 XFS_DIROP_LOG_RES(mp) + \
446 (128 * (2 + XFS_DIROP_LOG_COUNT(mp)))), \
447 ((mp)->m_sb.sb_sectsize + \
448 (mp)->m_sb.sb_sectsize + \
449 (mp)->m_sb.sb_sectsize + \
450 XFS_ALLOCFREE_LOG_RES(mp, 1) + \
451 (128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1))))))
452
453#define XFS_LINK_LOG_RES(mp) ((mp)->m_reservations.tr_link) 330#define XFS_LINK_LOG_RES(mp) ((mp)->m_reservations.tr_link)
454
455/*
456 * For removing a directory entry we can modify:
457 * the parent directory inode: inode size
458 * the removed inode: inode size
459 * the directory btree could join: (max depth + v2) * dir block size
460 * the directory bmap btree could join or split: (max depth + v2) * blocksize
461 * And the bmap_finish transaction can free the dir and bmap blocks giving:
462 * the agf for the ag in which the blocks live: 2 * sector size
463 * the agfl for the ag in which the blocks live: 2 * sector size
464 * the superblock for the free block count: sector size
465 * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
466 */
467#define XFS_CALC_REMOVE_LOG_RES(mp) \
468 (MAX( \
469 ((mp)->m_sb.sb_inodesize + \
470 (mp)->m_sb.sb_inodesize + \
471 XFS_DIROP_LOG_RES(mp) + \
472 (128 * (2 + XFS_DIROP_LOG_COUNT(mp)))), \
473 ((2 * (mp)->m_sb.sb_sectsize) + \
474 (2 * (mp)->m_sb.sb_sectsize) + \
475 (mp)->m_sb.sb_sectsize + \
476 XFS_ALLOCFREE_LOG_RES(mp, 2) + \
477 (128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))))))
478
479#define XFS_REMOVE_LOG_RES(mp) ((mp)->m_reservations.tr_remove) 331#define XFS_REMOVE_LOG_RES(mp) ((mp)->m_reservations.tr_remove)
480
481/*
482 * For symlink we can modify:
483 * the parent directory inode: inode size
484 * the new inode: inode size
485 * the inode btree entry: 1 block
486 * the directory btree: (max depth + v2) * dir block size
487 * the directory inode's bmap btree: (max depth + v2) * block size
488 * the blocks for the symlink: 1 kB
489 * Or in the first xact we allocate some inodes giving:
490 * the agi and agf of the ag getting the new inodes: 2 * sectorsize
491 * the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize
492 * the inode btree: max depth * blocksize
493 * the allocation btrees: 2 trees * (2 * max depth - 1) * block size
494 */
495#define XFS_CALC_SYMLINK_LOG_RES(mp) \
496 (MAX( \
497 ((mp)->m_sb.sb_inodesize + \
498 (mp)->m_sb.sb_inodesize + \
499 XFS_FSB_TO_B(mp, 1) + \
500 XFS_DIROP_LOG_RES(mp) + \
501 1024 + \
502 (128 * (4 + XFS_DIROP_LOG_COUNT(mp)))), \
503 (2 * (mp)->m_sb.sb_sectsize + \
504 XFS_FSB_TO_B((mp), XFS_IALLOC_BLOCKS((mp))) + \
505 XFS_FSB_TO_B((mp), (mp)->m_in_maxlevels) + \
506 XFS_ALLOCFREE_LOG_RES(mp, 1) + \
507 (128 * (2 + XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels + \
508 XFS_ALLOCFREE_LOG_COUNT(mp, 1))))))
509
510#define XFS_SYMLINK_LOG_RES(mp) ((mp)->m_reservations.tr_symlink) 332#define XFS_SYMLINK_LOG_RES(mp) ((mp)->m_reservations.tr_symlink)
511
512/*
513 * For create we can modify:
514 * the parent directory inode: inode size
515 * the new inode: inode size
516 * the inode btree entry: block size
517 * the superblock for the nlink flag: sector size
518 * the directory btree: (max depth + v2) * dir block size
519 * the directory inode's bmap btree: (max depth + v2) * block size
520 * Or in the first xact we allocate some inodes giving:
521 * the agi and agf of the ag getting the new inodes: 2 * sectorsize
522 * the superblock for the nlink flag: sector size
523 * the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize
524 * the inode btree: max depth * blocksize
525 * the allocation btrees: 2 trees * (max depth - 1) * block size
526 */
527#define XFS_CALC_CREATE_LOG_RES(mp) \
528 (MAX( \
529 ((mp)->m_sb.sb_inodesize + \
530 (mp)->m_sb.sb_inodesize + \
531 (mp)->m_sb.sb_sectsize + \
532 XFS_FSB_TO_B(mp, 1) + \
533 XFS_DIROP_LOG_RES(mp) + \
534 (128 * (3 + XFS_DIROP_LOG_COUNT(mp)))), \
535 (3 * (mp)->m_sb.sb_sectsize + \
536 XFS_FSB_TO_B((mp), XFS_IALLOC_BLOCKS((mp))) + \
537 XFS_FSB_TO_B((mp), (mp)->m_in_maxlevels) + \
538 XFS_ALLOCFREE_LOG_RES(mp, 1) + \
539 (128 * (2 + XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels + \
540 XFS_ALLOCFREE_LOG_COUNT(mp, 1))))))
541
542#define XFS_CREATE_LOG_RES(mp) ((mp)->m_reservations.tr_create) 333#define XFS_CREATE_LOG_RES(mp) ((mp)->m_reservations.tr_create)
543
544/*
545 * Making a new directory is the same as creating a new file.
546 */
547#define XFS_CALC_MKDIR_LOG_RES(mp) XFS_CALC_CREATE_LOG_RES(mp)
548
549#define XFS_MKDIR_LOG_RES(mp) ((mp)->m_reservations.tr_mkdir) 334#define XFS_MKDIR_LOG_RES(mp) ((mp)->m_reservations.tr_mkdir)
550
551/*
552 * In freeing an inode we can modify:
553 * the inode being freed: inode size
554 * the super block free inode counter: sector size
555 * the agi hash list and counters: sector size
556 * the inode btree entry: block size
557 * the on disk inode before ours in the agi hash list: inode cluster size
558 * the inode btree: max depth * blocksize
559 * the allocation btrees: 2 trees * (max depth - 1) * block size
560 */
561#define XFS_CALC_IFREE_LOG_RES(mp) \
562 ((mp)->m_sb.sb_inodesize + \
563 (mp)->m_sb.sb_sectsize + \
564 (mp)->m_sb.sb_sectsize + \
565 XFS_FSB_TO_B((mp), 1) + \
566 MAX((__uint16_t)XFS_FSB_TO_B((mp), 1), XFS_INODE_CLUSTER_SIZE(mp)) + \
567 (128 * 5) + \
568 XFS_ALLOCFREE_LOG_RES(mp, 1) + \
569 (128 * (2 + XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels + \
570 XFS_ALLOCFREE_LOG_COUNT(mp, 1))))
571
572
573#define XFS_IFREE_LOG_RES(mp) ((mp)->m_reservations.tr_ifree) 335#define XFS_IFREE_LOG_RES(mp) ((mp)->m_reservations.tr_ifree)
574
575/*
576 * When only changing the inode we log the inode and possibly the superblock
577 * We also add a bit of slop for the transaction stuff.
578 */
579#define XFS_CALC_ICHANGE_LOG_RES(mp) ((mp)->m_sb.sb_inodesize + \
580 (mp)->m_sb.sb_sectsize + 512)
581
582#define XFS_ICHANGE_LOG_RES(mp) ((mp)->m_reservations.tr_ichange) 336#define XFS_ICHANGE_LOG_RES(mp) ((mp)->m_reservations.tr_ichange)
583
584/*
585 * Growing the data section of the filesystem.
586 * superblock
587 * agi and agf
588 * allocation btrees
589 */
590#define XFS_CALC_GROWDATA_LOG_RES(mp) \
591 ((mp)->m_sb.sb_sectsize * 3 + \
592 XFS_ALLOCFREE_LOG_RES(mp, 1) + \
593 (128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1))))
594
595#define XFS_GROWDATA_LOG_RES(mp) ((mp)->m_reservations.tr_growdata) 337#define XFS_GROWDATA_LOG_RES(mp) ((mp)->m_reservations.tr_growdata)
596
597/*
598 * Growing the rt section of the filesystem.
599 * In the first set of transactions (ALLOC) we allocate space to the
600 * bitmap or summary files.
601 * superblock: sector size
602 * agf of the ag from which the extent is allocated: sector size
603 * bmap btree for bitmap/summary inode: max depth * blocksize
604 * bitmap/summary inode: inode size
605 * allocation btrees for 1 block alloc: 2 * (2 * maxdepth - 1) * blocksize
606 */
607#define XFS_CALC_GROWRTALLOC_LOG_RES(mp) \
608 (2 * (mp)->m_sb.sb_sectsize + \
609 XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) + \
610 (mp)->m_sb.sb_inodesize + \
611 XFS_ALLOCFREE_LOG_RES(mp, 1) + \
612 (128 * \
613 (3 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + \
614 XFS_ALLOCFREE_LOG_COUNT(mp, 1))))
615
616#define XFS_GROWRTALLOC_LOG_RES(mp) ((mp)->m_reservations.tr_growrtalloc) 338#define XFS_GROWRTALLOC_LOG_RES(mp) ((mp)->m_reservations.tr_growrtalloc)
617
618/*
619 * Growing the rt section of the filesystem.
620 * In the second set of transactions (ZERO) we zero the new metadata blocks.
621 * one bitmap/summary block: blocksize
622 */
623#define XFS_CALC_GROWRTZERO_LOG_RES(mp) \
624 ((mp)->m_sb.sb_blocksize + 128)
625
626#define XFS_GROWRTZERO_LOG_RES(mp) ((mp)->m_reservations.tr_growrtzero) 339#define XFS_GROWRTZERO_LOG_RES(mp) ((mp)->m_reservations.tr_growrtzero)
627
628/*
629 * Growing the rt section of the filesystem.
630 * In the third set of transactions (FREE) we update metadata without
631 * allocating any new blocks.
632 * superblock: sector size
633 * bitmap inode: inode size
634 * summary inode: inode size
635 * one bitmap block: blocksize
636 * summary blocks: new summary size
637 */
638#define XFS_CALC_GROWRTFREE_LOG_RES(mp) \
639 ((mp)->m_sb.sb_sectsize + \
640 2 * (mp)->m_sb.sb_inodesize + \
641 (mp)->m_sb.sb_blocksize + \
642 (mp)->m_rsumsize + \
643 (128 * 5))
644
645#define XFS_GROWRTFREE_LOG_RES(mp) ((mp)->m_reservations.tr_growrtfree) 340#define XFS_GROWRTFREE_LOG_RES(mp) ((mp)->m_reservations.tr_growrtfree)
646
647/*
648 * Logging the inode modification timestamp on a synchronous write.
649 * inode
650 */
651#define XFS_CALC_SWRITE_LOG_RES(mp) \
652 ((mp)->m_sb.sb_inodesize + 128)
653
654#define XFS_SWRITE_LOG_RES(mp) ((mp)->m_reservations.tr_swrite) 341#define XFS_SWRITE_LOG_RES(mp) ((mp)->m_reservations.tr_swrite)
655
656/* 342/*
657 * Logging the inode timestamps on an fsync -- same as SWRITE 343 * Logging the inode timestamps on an fsync -- same as SWRITE
658 * as long as SWRITE logs the entire inode core 344 * as long as SWRITE logs the entire inode core
659 */ 345 */
660#define XFS_FSYNC_TS_LOG_RES(mp) ((mp)->m_reservations.tr_swrite) 346#define XFS_FSYNC_TS_LOG_RES(mp) ((mp)->m_reservations.tr_swrite)
661
662/*
663 * Logging the inode mode bits when writing a setuid/setgid file
664 * inode
665 */
666#define XFS_CALC_WRITEID_LOG_RES(mp) \
667 ((mp)->m_sb.sb_inodesize + 128)
668
669#define XFS_WRITEID_LOG_RES(mp) ((mp)->m_reservations.tr_swrite) 347#define XFS_WRITEID_LOG_RES(mp) ((mp)->m_reservations.tr_swrite)
670
671/*
672 * Converting the inode from non-attributed to attributed.
673 * the inode being converted: inode size
674 * agf block and superblock (for block allocation)
675 * the new block (directory sized)
676 * bmap blocks for the new directory block
677 * allocation btrees
678 */
679#define XFS_CALC_ADDAFORK_LOG_RES(mp) \
680 ((mp)->m_sb.sb_inodesize + \
681 (mp)->m_sb.sb_sectsize * 2 + \
682 (mp)->m_dirblksize + \
683 XFS_FSB_TO_B(mp, (XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1)) + \
684 XFS_ALLOCFREE_LOG_RES(mp, 1) + \
685 (128 * (4 + (XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1) + \
686 XFS_ALLOCFREE_LOG_COUNT(mp, 1))))
687
688#define XFS_ADDAFORK_LOG_RES(mp) ((mp)->m_reservations.tr_addafork) 348#define XFS_ADDAFORK_LOG_RES(mp) ((mp)->m_reservations.tr_addafork)
689
690/*
691 * Removing the attribute fork of a file
692 * the inode being truncated: inode size
693 * the inode's bmap btree: max depth * block size
694 * And the bmap_finish transaction can free the blocks and bmap blocks:
695 * the agf for each of the ags: 4 * sector size
696 * the agfl for each of the ags: 4 * sector size
697 * the super block to reflect the freed blocks: sector size
698 * worst case split in allocation btrees per extent assuming 4 extents:
699 * 4 exts * 2 trees * (2 * max depth - 1) * block size
700 */
701#define XFS_CALC_ATTRINVAL_LOG_RES(mp) \
702 (MAX( \
703 ((mp)->m_sb.sb_inodesize + \
704 XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) + \
705 (128 * (1 + XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)))), \
706 ((4 * (mp)->m_sb.sb_sectsize) + \
707 (4 * (mp)->m_sb.sb_sectsize) + \
708 (mp)->m_sb.sb_sectsize + \
709 XFS_ALLOCFREE_LOG_RES(mp, 4) + \
710 (128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4))))))
711
712#define XFS_ATTRINVAL_LOG_RES(mp) ((mp)->m_reservations.tr_attrinval) 349#define XFS_ATTRINVAL_LOG_RES(mp) ((mp)->m_reservations.tr_attrinval)
713
714/*
715 * Setting an attribute.
716 * the inode getting the attribute
717 * the superblock for allocations
718 * the agfs extents are allocated from
719 * the attribute btree * max depth
720 * the inode allocation btree
721 * Since attribute transaction space is dependent on the size of the attribute,
722 * the calculation is done partially at mount time and partially at runtime.
723 */
724#define XFS_CALC_ATTRSET_LOG_RES(mp) \
725 ((mp)->m_sb.sb_inodesize + \
726 (mp)->m_sb.sb_sectsize + \
727 XFS_FSB_TO_B((mp), XFS_DA_NODE_MAXDEPTH) + \
728 (128 * (2 + XFS_DA_NODE_MAXDEPTH)))
729
730#define XFS_ATTRSET_LOG_RES(mp, ext) \ 350#define XFS_ATTRSET_LOG_RES(mp, ext) \
731 ((mp)->m_reservations.tr_attrset + \ 351 ((mp)->m_reservations.tr_attrset + \
732 (ext * (mp)->m_sb.sb_sectsize) + \ 352 (ext * (mp)->m_sb.sb_sectsize) + \
733 (ext * XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK))) + \ 353 (ext * XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK))) + \
734 (128 * (ext + (ext * XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK))))) 354 (128 * (ext + (ext * XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)))))
735
736/*
737 * Removing an attribute.
738 * the inode: inode size
739 * the attribute btree could join: max depth * block size
740 * the inode bmap btree could join or split: max depth * block size
741 * And the bmap_finish transaction can free the attr blocks freed giving:
742 * the agf for the ag in which the blocks live: 2 * sector size
743 * the agfl for the ag in which the blocks live: 2 * sector size
744 * the superblock for the free block count: sector size
745 * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
746 */
747#define XFS_CALC_ATTRRM_LOG_RES(mp) \
748 (MAX( \
749 ((mp)->m_sb.sb_inodesize + \
750 XFS_FSB_TO_B((mp), XFS_DA_NODE_MAXDEPTH) + \
751 XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) + \
752 (128 * (1 + XFS_DA_NODE_MAXDEPTH + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)))), \
753 ((2 * (mp)->m_sb.sb_sectsize) + \
754 (2 * (mp)->m_sb.sb_sectsize) + \
755 (mp)->m_sb.sb_sectsize + \
756 XFS_ALLOCFREE_LOG_RES(mp, 2) + \
757 (128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))))))
758
759#define XFS_ATTRRM_LOG_RES(mp) ((mp)->m_reservations.tr_attrrm) 355#define XFS_ATTRRM_LOG_RES(mp) ((mp)->m_reservations.tr_attrrm)
760
761/*
762 * Clearing a bad agino number in an agi hash bucket.
763 */
764#define XFS_CALC_CLEAR_AGI_BUCKET_LOG_RES(mp) \
765 ((mp)->m_sb.sb_sectsize + 128)
766
767#define XFS_CLEAR_AGI_BUCKET_LOG_RES(mp) ((mp)->m_reservations.tr_clearagi) 356#define XFS_CLEAR_AGI_BUCKET_LOG_RES(mp) ((mp)->m_reservations.tr_clearagi)
768 357
769 358
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index 785ff101da0a..2559dfec946b 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -62,7 +62,7 @@ xfs_trans_iget(
62{ 62{
63 int error; 63 int error;
64 64
65 error = xfs_iget(mp, tp, ino, flags, lock_flags, ipp, 0); 65 error = xfs_iget(mp, tp, ino, flags, lock_flags, ipp);
66 if (!error && tp) 66 if (!error && tp)
67 xfs_trans_ijoin(tp, *ipp, lock_flags); 67 xfs_trans_ijoin(tp, *ipp, lock_flags);
68 return error; 68 return error;
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 9d376be0ea38..c1646838898f 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -267,7 +267,7 @@ xfs_setattr(
267 if (code) { 267 if (code) {
268 ASSERT(tp == NULL); 268 ASSERT(tp == NULL);
269 lock_flags &= ~XFS_ILOCK_EXCL; 269 lock_flags &= ~XFS_ILOCK_EXCL;
270 ASSERT(lock_flags == XFS_IOLOCK_EXCL); 270 ASSERT(lock_flags == XFS_IOLOCK_EXCL || !need_iolock);
271 goto error_return; 271 goto error_return;
272 } 272 }
273 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); 273 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
@@ -1269,7 +1269,7 @@ xfs_lookup(
1269 if (error) 1269 if (error)
1270 goto out; 1270 goto out;
1271 1271
1272 error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp, 0); 1272 error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp);
1273 if (error) 1273 if (error)
1274 goto out_free_name; 1274 goto out_free_name;
1275 1275