aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_dir.c1
-rw-r--r--fs/Kconfig21
-rw-r--r--fs/adfs/dir.c1
-rw-r--r--fs/affs/dir.c1
-rw-r--r--fs/afs/mntpt.c2
-rw-r--r--fs/afs/write.c2
-rw-r--r--fs/autofs4/root.c2
-rw-r--r--fs/befs/linuxvfs.c1
-rw-r--r--fs/binfmt_flat.c4
-rw-r--r--fs/binfmt_misc.c4
-rw-r--r--fs/bio.c57
-rw-r--r--fs/block_dev.c5
-rw-r--r--fs/buffer.c19
-rw-r--r--fs/cifs/CHANGES16
-rw-r--r--fs/cifs/README44
-rw-r--r--fs/cifs/asn1.c263
-rw-r--r--fs/cifs/cifs_debug.c53
-rw-r--r--fs/cifs/cifs_dfs_ref.c2
-rw-r--r--fs/cifs/cifs_spnego.c22
-rw-r--r--fs/cifs/cifs_spnego.h2
-rw-r--r--fs/cifs/cifsencrypt.c1
-rw-r--r--fs/cifs/cifsfs.c73
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h3
-rw-r--r--fs/cifs/cifspdu.h2
-rw-r--r--fs/cifs/cifsproto.h24
-rw-r--r--fs/cifs/cifssmb.c46
-rw-r--r--fs/cifs/connect.c186
-rw-r--r--fs/cifs/dir.c67
-rw-r--r--fs/cifs/dns_resolve.c7
-rw-r--r--fs/cifs/file.c25
-rw-r--r--fs/cifs/inode.c423
-rw-r--r--fs/cifs/sess.c13
-rw-r--r--fs/cifs/transport.c1
-rw-r--r--fs/compat.c8
-rw-r--r--fs/configfs/configfs_internal.h3
-rw-r--r--fs/configfs/dir.c219
-rw-r--r--fs/configfs/symlink.c26
-rw-r--r--fs/cramfs/inode.c84
-rw-r--r--fs/dcache.c102
-rw-r--r--fs/devpts/inode.c16
-rw-r--r--fs/dlm/config.c203
-rw-r--r--fs/dlm/user.c10
-rw-r--r--fs/dquot.c33
-rw-r--r--fs/efs/namei.c3
-rw-r--r--fs/eventpoll.c5
-rw-r--r--fs/ext3/super.c3
-rw-r--r--fs/ext4/acl.c188
-rw-r--r--fs/ext4/balloc.c14
-rw-r--r--fs/ext4/dir.c20
-rw-r--r--fs/ext4/ext4.h5
-rw-r--r--fs/ext4/ext4_extents.h4
-rw-r--r--fs/ext4/ext4_jbd2.h8
-rw-r--r--fs/ext4/extents.c168
-rw-r--r--fs/ext4/ialloc.c60
-rw-r--r--fs/ext4/inode.c634
-rw-r--r--fs/ext4/mballoc.c293
-rw-r--r--fs/ext4/mballoc.h10
-rw-r--r--fs/ext4/migrate.c3
-rw-r--r--fs/ext4/resize.c82
-rw-r--r--fs/ext4/super.c320
-rw-r--r--fs/ext4/xattr.c2
-rw-r--r--fs/fat/file.c6
-rw-r--r--fs/fat/inode.c10
-rw-r--r--fs/fcntl.c140
-rw-r--r--fs/file.c61
-rw-r--r--fs/inode.c1
-rw-r--r--fs/ioprio.c8
-rw-r--r--fs/jbd/commit.c6
-rw-r--r--fs/jbd/transaction.c4
-rw-r--r--fs/jbd2/commit.c26
-rw-r--r--fs/jbd2/journal.c1
-rw-r--r--fs/jbd2/transaction.c4
-rw-r--r--fs/jffs2/jffs2_fs_i.h1
-rw-r--r--fs/jffs2/summary.c40
-rw-r--r--fs/jffs2/summary.h6
-rw-r--r--fs/libfs.c4
-rw-r--r--fs/lockd/svc4proc.c4
-rw-r--r--fs/lockd/svcproc.c4
-rw-r--r--fs/namei.c17
-rw-r--r--fs/namespace.c16
-rw-r--r--fs/nfs/namespace.c2
-rw-r--r--fs/nfs/nfsroot.c2
-rw-r--r--fs/nfsd/export.c6
-rw-r--r--fs/nfsd/nfs4acl.c2
-rw-r--r--fs/nfsd/nfs4proc.c17
-rw-r--r--fs/ntfs/aops.c2
-rw-r--r--fs/ntfs/compress.c2
-rw-r--r--fs/ntfs/mft.c4
-rw-r--r--fs/ntfs/namei.c89
-rw-r--r--fs/ntfs/usnjrnl.h4
-rw-r--r--fs/ocfs2/aops.c29
-rw-r--r--fs/ocfs2/cluster/netdebug.c26
-rw-r--r--fs/ocfs2/cluster/tcp.c44
-rw-r--r--fs/ocfs2/cluster/tcp_internal.h32
-rw-r--r--fs/ocfs2/dir.c11
-rw-r--r--fs/ocfs2/file.c2
-rw-r--r--fs/ocfs2/journal.c192
-rw-r--r--fs/ocfs2/journal.h3
-rw-r--r--fs/ocfs2/ocfs2.h2
-rw-r--r--fs/ocfs2/ocfs2_fs.h5
-rw-r--r--fs/ocfs2/stackglue.c7
-rw-r--r--fs/ocfs2/super.c12
-rw-r--r--fs/omfs/bitmap.c11
-rw-r--r--fs/omfs/dir.c2
-rw-r--r--fs/omfs/file.c39
-rw-r--r--fs/omfs/inode.c6
-rw-r--r--fs/open.c56
-rw-r--r--fs/proc/array.c59
-rw-r--r--fs/proc/base.c11
-rw-r--r--fs/proc/generic.c28
-rw-r--r--fs/proc/nommu.c4
-rw-r--r--fs/proc/proc_misc.c7
-rw-r--r--fs/proc/task_mmu.c4
-rw-r--r--fs/readdir.c8
-rw-r--r--fs/reiserfs/inode.c2
-rw-r--r--fs/reiserfs/journal.c6
-rw-r--r--fs/reiserfs/super.c17
-rw-r--r--fs/romfs/inode.c37
-rw-r--r--fs/seq_file.c25
-rw-r--r--fs/splice.c2
-rw-r--r--fs/ubifs/budget.c33
-rw-r--r--fs/ubifs/commit.c3
-rw-r--r--fs/ubifs/debug.c27
-rw-r--r--fs/ubifs/debug.h143
-rw-r--r--fs/ubifs/dir.c24
-rw-r--r--fs/ubifs/file.c8
-rw-r--r--fs/ubifs/find.c9
-rw-r--r--fs/ubifs/io.c14
-rw-r--r--fs/ubifs/journal.c110
-rw-r--r--fs/ubifs/log.c4
-rw-r--r--fs/ubifs/misc.h16
-rw-r--r--fs/ubifs/orphan.c4
-rw-r--r--fs/ubifs/super.c48
-rw-r--r--fs/ubifs/tnc_commit.c37
-rw-r--r--fs/ubifs/ubifs-media.h4
-rw-r--r--fs/ubifs/ubifs.h33
-rw-r--r--fs/ubifs/xattr.c54
-rw-r--r--fs/ufs/super.c2
-rw-r--r--fs/xfs/Makefile3
-rw-r--r--fs/xfs/linux-2.6/kmem.c6
-rw-r--r--fs/xfs/linux-2.6/kmem.h4
-rw-r--r--fs/xfs/linux-2.6/sema.h52
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c14
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c30
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h6
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c12
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_fs_subr.c6
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c378
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c531
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.h15
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h14
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c6
-rw-r--r--fs/xfs/linux-2.6/xfs_stats.c15
-rw-r--r--fs/xfs/linux-2.6/xfs_stats.h12
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c1087
-rw-r--r--fs/xfs/linux-2.6/xfs_super.h7
-rw-r--r--fs/xfs/linux-2.6/xfs_sysctl.c8
-rw-r--r--fs/xfs/linux-2.6/xfs_sysctl.h4
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.c72
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.h154
-rw-r--r--fs/xfs/linux-2.6/xfs_xattr.c330
-rw-r--r--fs/xfs/quota/xfs_dquot.c41
-rw-r--r--fs/xfs/quota/xfs_dquot.h31
-rw-r--r--fs/xfs/quota/xfs_dquot_item.c12
-rw-r--r--fs/xfs/quota/xfs_qm.c38
-rw-r--r--fs/xfs/quota/xfs_qm.h2
-rw-r--r--fs/xfs/quota/xfs_qm_bhv.c7
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c16
-rw-r--r--fs/xfs/quota/xfs_quota_priv.h3
-rw-r--r--fs/xfs/support/ktrace.c4
-rw-r--r--fs/xfs/support/uuid.c8
-rw-r--r--fs/xfs/support/uuid.h1
-rw-r--r--fs/xfs/xfs_acl.c69
-rw-r--r--fs/xfs/xfs_acl.h18
-rw-r--r--fs/xfs/xfs_arch.h68
-rw-r--r--fs/xfs/xfs_attr.c718
-rw-r--r--fs/xfs/xfs_attr.h91
-rw-r--r--fs/xfs/xfs_attr_leaf.c174
-rw-r--r--fs/xfs/xfs_attr_leaf.h31
-rw-r--r--fs/xfs/xfs_attr_sf.h10
-rw-r--r--fs/xfs/xfs_bit.c103
-rw-r--r--fs/xfs/xfs_bit.h34
-rw-r--r--fs/xfs/xfs_bmap.c152
-rw-r--r--fs/xfs/xfs_bmap.h13
-rw-r--r--fs/xfs/xfs_bmap_btree.c76
-rw-r--r--fs/xfs/xfs_btree.c105
-rw-r--r--fs/xfs/xfs_btree.h8
-rw-r--r--fs/xfs/xfs_buf_item.c12
-rw-r--r--fs/xfs/xfs_clnt.h1
-rw-r--r--fs/xfs/xfs_da_btree.c48
-rw-r--r--fs/xfs/xfs_da_btree.h36
-rw-r--r--fs/xfs/xfs_dfrag.c37
-rw-r--r--fs/xfs/xfs_dir2.c125
-rw-r--r--fs/xfs/xfs_dir2.h6
-rw-r--r--fs/xfs/xfs_dir2_block.c56
-rw-r--r--fs/xfs/xfs_dir2_data.c5
-rw-r--r--fs/xfs/xfs_dir2_leaf.c93
-rw-r--r--fs/xfs/xfs_dir2_node.c402
-rw-r--r--fs/xfs/xfs_dir2_sf.c83
-rw-r--r--fs/xfs/xfs_dir2_sf.h6
-rw-r--r--fs/xfs/xfs_dir2_trace.c20
-rw-r--r--fs/xfs/xfs_dmapi.h3
-rw-r--r--fs/xfs/xfs_error.c18
-rw-r--r--fs/xfs/xfs_error.h13
-rw-r--r--fs/xfs/xfs_extfree_item.c6
-rw-r--r--fs/xfs/xfs_filestream.c6
-rw-r--r--fs/xfs/xfs_fs.h4
-rw-r--r--fs/xfs/xfs_fsops.c4
-rw-r--r--fs/xfs/xfs_ialloc_btree.c30
-rw-r--r--fs/xfs/xfs_iget.c48
-rw-r--r--fs/xfs/xfs_inode.c235
-rw-r--r--fs/xfs/xfs_inode.h49
-rw-r--r--fs/xfs/xfs_inode_item.c18
-rw-r--r--fs/xfs/xfs_iomap.c10
-rw-r--r--fs/xfs/xfs_itable.c10
-rw-r--r--fs/xfs/xfs_log.c135
-rw-r--r--fs/xfs/xfs_log.h2
-rw-r--r--fs/xfs/xfs_log_priv.h20
-rw-r--r--fs/xfs/xfs_log_recover.c28
-rw-r--r--fs/xfs/xfs_mount.c188
-rw-r--r--fs/xfs/xfs_mount.h32
-rw-r--r--fs/xfs/xfs_mru_cache.c21
-rw-r--r--fs/xfs/xfs_rename.c22
-rw-r--r--fs/xfs/xfs_rtalloc.c21
-rw-r--r--fs/xfs/xfs_rw.c2
-rw-r--r--fs/xfs/xfs_sb.h17
-rw-r--r--fs/xfs/xfs_trans.c79
-rw-r--r--fs/xfs/xfs_trans.h12
-rw-r--r--fs/xfs/xfs_trans_buf.c12
-rw-r--r--fs/xfs/xfs_trans_inode.c2
-rw-r--r--fs/xfs/xfs_trans_item.c74
-rw-r--r--fs/xfs/xfs_utils.c4
-rw-r--r--fs/xfs/xfs_utils.h3
-rw-r--r--fs/xfs/xfs_vfsops.c623
-rw-r--r--fs/xfs/xfs_vfsops.h5
-rw-r--r--fs/xfs/xfs_vnodeops.c908
-rw-r--r--fs/xfs/xfs_vnodeops.h12
239 files changed, 7202 insertions, 6676 deletions
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 88e3787c6ea9..e298fe194093 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -119,6 +119,7 @@ int v9fs_dir_release(struct inode *inode, struct file *filp)
119 119
120const struct file_operations v9fs_dir_operations = { 120const struct file_operations v9fs_dir_operations = {
121 .read = generic_read_dir, 121 .read = generic_read_dir,
122 .llseek = generic_file_llseek,
122 .readdir = v9fs_dir_readdir, 123 .readdir = v9fs_dir_readdir,
123 .open = v9fs_file_open, 124 .open = v9fs_file_open,
124 .release = v9fs_dir_release, 125 .release = v9fs_dir_release,
diff --git a/fs/Kconfig b/fs/Kconfig
index d3873583360b..abccb5dab9a8 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1930,6 +1930,16 @@ config CIFS_WEAK_PW_HASH
1930 1930
1931 If unsure, say N. 1931 If unsure, say N.
1932 1932
1933config CIFS_UPCALL
1934 bool "Kerberos/SPNEGO advanced session setup"
1935 depends on CIFS && KEYS
1936 help
1937 Enables an upcall mechanism for CIFS which accesses
1938 userspace helper utilities to provide SPNEGO packaged (RFC 4178)
1939 Kerberos tickets which are needed to mount to certain secure servers
1940 (for which more secure Kerberos authentication is required). If
1941 unsure, say N.
1942
1933config CIFS_XATTR 1943config CIFS_XATTR
1934 bool "CIFS extended attributes" 1944 bool "CIFS extended attributes"
1935 depends on CIFS 1945 depends on CIFS
@@ -1982,17 +1992,6 @@ config CIFS_EXPERIMENTAL
1982 (which is disabled by default). See the file fs/cifs/README 1992 (which is disabled by default). See the file fs/cifs/README
1983 for more details. If unsure, say N. 1993 for more details. If unsure, say N.
1984 1994
1985config CIFS_UPCALL
1986 bool "Kerberos/SPNEGO advanced session setup (EXPERIMENTAL)"
1987 depends on CIFS_EXPERIMENTAL
1988 depends on KEYS
1989 help
1990 Enables an upcall mechanism for CIFS which accesses
1991 userspace helper utilities to provide SPNEGO packaged (RFC 4178)
1992 Kerberos tickets which are needed to mount to certain secure servers
1993 (for which more secure Kerberos authentication is required). If
1994 unsure, say N.
1995
1996config CIFS_DFS_UPCALL 1995config CIFS_DFS_UPCALL
1997 bool "DFS feature support (EXPERIMENTAL)" 1996 bool "DFS feature support (EXPERIMENTAL)"
1998 depends on CIFS_EXPERIMENTAL 1997 depends on CIFS_EXPERIMENTAL
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c
index fc1a8dc64d78..85a30e929800 100644
--- a/fs/adfs/dir.c
+++ b/fs/adfs/dir.c
@@ -197,6 +197,7 @@ out:
197 197
198const struct file_operations adfs_dir_operations = { 198const struct file_operations adfs_dir_operations = {
199 .read = generic_read_dir, 199 .read = generic_read_dir,
200 .llseek = generic_file_llseek,
200 .readdir = adfs_readdir, 201 .readdir = adfs_readdir,
201 .fsync = file_fsync, 202 .fsync = file_fsync,
202}; 203};
diff --git a/fs/affs/dir.c b/fs/affs/dir.c
index 6e3f282424b0..7b36904dbeac 100644
--- a/fs/affs/dir.c
+++ b/fs/affs/dir.c
@@ -19,6 +19,7 @@ static int affs_readdir(struct file *, void *, filldir_t);
19 19
20const struct file_operations affs_dir_operations = { 20const struct file_operations affs_dir_operations = {
21 .read = generic_read_dir, 21 .read = generic_read_dir,
22 .llseek = generic_file_llseek,
22 .readdir = affs_readdir, 23 .readdir = affs_readdir,
23 .fsync = file_fsync, 24 .fsync = file_fsync,
24}; 25};
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 2f5503902c37..78db4953a800 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -232,7 +232,7 @@ static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd)
232 } 232 }
233 233
234 mntget(newmnt); 234 mntget(newmnt);
235 err = do_add_mount(newmnt, nd, MNT_SHRINKABLE, &afs_vfsmounts); 235 err = do_add_mount(newmnt, &nd->path, MNT_SHRINKABLE, &afs_vfsmounts);
236 switch (err) { 236 switch (err) {
237 case 0: 237 case 0:
238 path_put(&nd->path); 238 path_put(&nd->path);
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 9a849ad3c489..065b4e10681a 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -404,7 +404,7 @@ static int afs_write_back_from_locked_page(struct afs_writeback *wb,
404 page = pages[loop]; 404 page = pages[loop];
405 if (page->index > wb->last) 405 if (page->index > wb->last)
406 break; 406 break;
407 if (TestSetPageLocked(page)) 407 if (!trylock_page(page))
408 break; 408 break;
409 if (!PageDirty(page) || 409 if (!PageDirty(page) ||
410 page_private(page) != (unsigned long) wb) { 410 page_private(page) != (unsigned long) wb) {
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index bcfb2dc0a61b..2a41c2a7fc52 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -36,6 +36,7 @@ const struct file_operations autofs4_root_operations = {
36 .release = dcache_dir_close, 36 .release = dcache_dir_close,
37 .read = generic_read_dir, 37 .read = generic_read_dir,
38 .readdir = dcache_readdir, 38 .readdir = dcache_readdir,
39 .llseek = dcache_dir_lseek,
39 .ioctl = autofs4_root_ioctl, 40 .ioctl = autofs4_root_ioctl,
40}; 41};
41 42
@@ -44,6 +45,7 @@ const struct file_operations autofs4_dir_operations = {
44 .release = dcache_dir_close, 45 .release = dcache_dir_close,
45 .read = generic_read_dir, 46 .read = generic_read_dir,
46 .readdir = dcache_readdir, 47 .readdir = dcache_readdir,
48 .llseek = dcache_dir_lseek,
47}; 49};
48 50
49const struct inode_operations autofs4_indirect_root_inode_operations = { 51const struct inode_operations autofs4_indirect_root_inode_operations = {
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 02c6e62b72f8..740f53672a8a 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -66,6 +66,7 @@ static struct kmem_cache *befs_inode_cachep;
66static const struct file_operations befs_dir_operations = { 66static const struct file_operations befs_dir_operations = {
67 .read = generic_read_dir, 67 .read = generic_read_dir,
68 .readdir = befs_readdir, 68 .readdir = befs_readdir,
69 .llseek = generic_file_llseek,
69}; 70};
70 71
71static const struct inode_operations befs_dir_inode_operations = { 72static const struct inode_operations befs_dir_inode_operations = {
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 56372ecf1690..dfc0197905ca 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -914,7 +914,9 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs)
914 /* Stash our initial stack pointer into the mm structure */ 914 /* Stash our initial stack pointer into the mm structure */
915 current->mm->start_stack = (unsigned long )sp; 915 current->mm->start_stack = (unsigned long )sp;
916 916
917 917#ifdef FLAT_PLAT_INIT
918 FLAT_PLAT_INIT(regs);
919#endif
918 DBG_FLT("start_thread(regs=0x%x, entry=0x%x, start_stack=0x%x)\n", 920 DBG_FLT("start_thread(regs=0x%x, entry=0x%x, start_stack=0x%x)\n",
919 (int)regs, (int)start_addr, (int)current->mm->start_stack); 921 (int)regs, (int)start_addr, (int)current->mm->start_stack);
920 922
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 756205314c24..8d7e88e02e0f 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -120,8 +120,6 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs)
120 if (bprm->misc_bang) 120 if (bprm->misc_bang)
121 goto _ret; 121 goto _ret;
122 122
123 bprm->misc_bang = 1;
124
125 /* to keep locking time low, we copy the interpreter string */ 123 /* to keep locking time low, we copy the interpreter string */
126 read_lock(&entries_lock); 124 read_lock(&entries_lock);
127 fmt = check_file(bprm); 125 fmt = check_file(bprm);
@@ -199,6 +197,8 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs)
199 if (retval < 0) 197 if (retval < 0)
200 goto _error; 198 goto _error;
201 199
200 bprm->misc_bang = 1;
201
202 retval = search_binary_handler (bprm, regs); 202 retval = search_binary_handler (bprm, regs);
203 if (retval < 0) 203 if (retval < 0)
204 goto _error; 204 goto _error;
diff --git a/fs/bio.c b/fs/bio.c
index 25f1af0d81e5..3cba7ae34d75 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -77,11 +77,8 @@ struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct
77 */ 77 */
78 78
79 bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask); 79 bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask);
80 if (bvl) { 80 if (bvl)
81 struct biovec_slab *bp = bvec_slabs + *idx; 81 memset(bvl, 0, bvec_nr_vecs(*idx) * sizeof(struct bio_vec));
82
83 memset(bvl, 0, bp->nr_vecs * sizeof(struct bio_vec));
84 }
85 82
86 return bvl; 83 return bvl;
87} 84}
@@ -149,7 +146,7 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
149 goto out; 146 goto out;
150 } 147 }
151 bio->bi_flags |= idx << BIO_POOL_OFFSET; 148 bio->bi_flags |= idx << BIO_POOL_OFFSET;
152 bio->bi_max_vecs = bvec_slabs[idx].nr_vecs; 149 bio->bi_max_vecs = bvec_nr_vecs(idx);
153 } 150 }
154 bio->bi_io_vec = bvl; 151 bio->bi_io_vec = bvl;
155 } 152 }
@@ -472,20 +469,21 @@ static void bio_free_map_data(struct bio_map_data *bmd)
472 kfree(bmd); 469 kfree(bmd);
473} 470}
474 471
475static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count) 472static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count,
473 gfp_t gfp_mask)
476{ 474{
477 struct bio_map_data *bmd = kmalloc(sizeof(*bmd), GFP_KERNEL); 475 struct bio_map_data *bmd = kmalloc(sizeof(*bmd), gfp_mask);
478 476
479 if (!bmd) 477 if (!bmd)
480 return NULL; 478 return NULL;
481 479
482 bmd->iovecs = kmalloc(sizeof(struct bio_vec) * nr_segs, GFP_KERNEL); 480 bmd->iovecs = kmalloc(sizeof(struct bio_vec) * nr_segs, gfp_mask);
483 if (!bmd->iovecs) { 481 if (!bmd->iovecs) {
484 kfree(bmd); 482 kfree(bmd);
485 return NULL; 483 return NULL;
486 } 484 }
487 485
488 bmd->sgvecs = kmalloc(sizeof(struct sg_iovec) * iov_count, GFP_KERNEL); 486 bmd->sgvecs = kmalloc(sizeof(struct sg_iovec) * iov_count, gfp_mask);
489 if (bmd->sgvecs) 487 if (bmd->sgvecs)
490 return bmd; 488 return bmd;
491 489
@@ -494,8 +492,8 @@ static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count)
494 return NULL; 492 return NULL;
495} 493}
496 494
497static int __bio_copy_iov(struct bio *bio, struct sg_iovec *iov, int iov_count, 495static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs,
498 int uncopy) 496 struct sg_iovec *iov, int iov_count, int uncopy)
499{ 497{
500 int ret = 0, i; 498 int ret = 0, i;
501 struct bio_vec *bvec; 499 struct bio_vec *bvec;
@@ -505,7 +503,7 @@ static int __bio_copy_iov(struct bio *bio, struct sg_iovec *iov, int iov_count,
505 503
506 __bio_for_each_segment(bvec, bio, i, 0) { 504 __bio_for_each_segment(bvec, bio, i, 0) {
507 char *bv_addr = page_address(bvec->bv_page); 505 char *bv_addr = page_address(bvec->bv_page);
508 unsigned int bv_len = bvec->bv_len; 506 unsigned int bv_len = iovecs[i].bv_len;
509 507
510 while (bv_len && iov_idx < iov_count) { 508 while (bv_len && iov_idx < iov_count) {
511 unsigned int bytes; 509 unsigned int bytes;
@@ -557,7 +555,7 @@ int bio_uncopy_user(struct bio *bio)
557 struct bio_map_data *bmd = bio->bi_private; 555 struct bio_map_data *bmd = bio->bi_private;
558 int ret; 556 int ret;
559 557
560 ret = __bio_copy_iov(bio, bmd->sgvecs, bmd->nr_sgvecs, 1); 558 ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs, bmd->nr_sgvecs, 1);
561 559
562 bio_free_map_data(bmd); 560 bio_free_map_data(bmd);
563 bio_put(bio); 561 bio_put(bio);
@@ -599,7 +597,7 @@ struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov,
599 len += iov[i].iov_len; 597 len += iov[i].iov_len;
600 } 598 }
601 599
602 bmd = bio_alloc_map_data(nr_pages, iov_count); 600 bmd = bio_alloc_map_data(nr_pages, iov_count, GFP_KERNEL);
603 if (!bmd) 601 if (!bmd)
604 return ERR_PTR(-ENOMEM); 602 return ERR_PTR(-ENOMEM);
605 603
@@ -636,7 +634,7 @@ struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov,
636 * success 634 * success
637 */ 635 */
638 if (!write_to_vm) { 636 if (!write_to_vm) {
639 ret = __bio_copy_iov(bio, iov, iov_count, 0); 637 ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0);
640 if (ret) 638 if (ret)
641 goto cleanup; 639 goto cleanup;
642 } 640 }
@@ -945,19 +943,22 @@ static void bio_copy_kern_endio(struct bio *bio, int err)
945{ 943{
946 struct bio_vec *bvec; 944 struct bio_vec *bvec;
947 const int read = bio_data_dir(bio) == READ; 945 const int read = bio_data_dir(bio) == READ;
948 char *p = bio->bi_private; 946 struct bio_map_data *bmd = bio->bi_private;
949 int i; 947 int i;
948 char *p = bmd->sgvecs[0].iov_base;
950 949
951 __bio_for_each_segment(bvec, bio, i, 0) { 950 __bio_for_each_segment(bvec, bio, i, 0) {
952 char *addr = page_address(bvec->bv_page); 951 char *addr = page_address(bvec->bv_page);
952 int len = bmd->iovecs[i].bv_len;
953 953
954 if (read && !err) 954 if (read && !err)
955 memcpy(p, addr, bvec->bv_len); 955 memcpy(p, addr, len);
956 956
957 __free_page(bvec->bv_page); 957 __free_page(bvec->bv_page);
958 p += bvec->bv_len; 958 p += len;
959 } 959 }
960 960
961 bio_free_map_data(bmd);
961 bio_put(bio); 962 bio_put(bio);
962} 963}
963 964
@@ -981,11 +982,21 @@ struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
981 const int nr_pages = end - start; 982 const int nr_pages = end - start;
982 struct bio *bio; 983 struct bio *bio;
983 struct bio_vec *bvec; 984 struct bio_vec *bvec;
985 struct bio_map_data *bmd;
984 int i, ret; 986 int i, ret;
987 struct sg_iovec iov;
988
989 iov.iov_base = data;
990 iov.iov_len = len;
991
992 bmd = bio_alloc_map_data(nr_pages, 1, gfp_mask);
993 if (!bmd)
994 return ERR_PTR(-ENOMEM);
985 995
996 ret = -ENOMEM;
986 bio = bio_alloc(gfp_mask, nr_pages); 997 bio = bio_alloc(gfp_mask, nr_pages);
987 if (!bio) 998 if (!bio)
988 return ERR_PTR(-ENOMEM); 999 goto out_bmd;
989 1000
990 while (len) { 1001 while (len) {
991 struct page *page; 1002 struct page *page;
@@ -1019,14 +1030,18 @@ struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
1019 } 1030 }
1020 } 1031 }
1021 1032
1022 bio->bi_private = data; 1033 bio->bi_private = bmd;
1023 bio->bi_end_io = bio_copy_kern_endio; 1034 bio->bi_end_io = bio_copy_kern_endio;
1035
1036 bio_set_map_data(bmd, bio, &iov, 1);
1024 return bio; 1037 return bio;
1025cleanup: 1038cleanup:
1026 bio_for_each_segment(bvec, bio, i) 1039 bio_for_each_segment(bvec, bio, i)
1027 __free_page(bvec->bv_page); 1040 __free_page(bvec->bv_page);
1028 1041
1029 bio_put(bio); 1042 bio_put(bio);
1043out_bmd:
1044 bio_free_map_data(bmd);
1030 1045
1031 return ERR_PTR(ret); 1046 return ERR_PTR(ret);
1032} 1047}
diff --git a/fs/block_dev.c b/fs/block_dev.c
index dcf37cada369..aff54219e049 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -941,8 +941,10 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part)
941 * hooks: /n/, see "layering violations". 941 * hooks: /n/, see "layering violations".
942 */ 942 */
943 ret = devcgroup_inode_permission(bdev->bd_inode, perm); 943 ret = devcgroup_inode_permission(bdev->bd_inode, perm);
944 if (ret != 0) 944 if (ret != 0) {
945 bdput(bdev);
945 return ret; 946 return ret;
947 }
946 948
947 ret = -ENXIO; 949 ret = -ENXIO;
948 file->f_mapping = bdev->bd_inode->i_mapping; 950 file->f_mapping = bdev->bd_inode->i_mapping;
@@ -1234,6 +1236,7 @@ fail:
1234 bdev = ERR_PTR(error); 1236 bdev = ERR_PTR(error);
1235 goto out; 1237 goto out;
1236} 1238}
1239EXPORT_SYMBOL(lookup_bdev);
1237 1240
1238/** 1241/**
1239 * open_bdev_excl - open a block device by name and set it up for use 1242 * open_bdev_excl - open a block device by name and set it up for use
diff --git a/fs/buffer.c b/fs/buffer.c
index ca12a6bb82b1..ac78d4c19b3b 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -580,7 +580,7 @@ EXPORT_SYMBOL(mark_buffer_async_write);
580/* 580/*
581 * The buffer's backing address_space's private_lock must be held 581 * The buffer's backing address_space's private_lock must be held
582 */ 582 */
583static inline void __remove_assoc_queue(struct buffer_head *bh) 583static void __remove_assoc_queue(struct buffer_head *bh)
584{ 584{
585 list_del_init(&bh->b_assoc_buffers); 585 list_del_init(&bh->b_assoc_buffers);
586 WARN_ON(!bh->b_assoc_map); 586 WARN_ON(!bh->b_assoc_map);
@@ -1720,7 +1720,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
1720 */ 1720 */
1721 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) { 1721 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
1722 lock_buffer(bh); 1722 lock_buffer(bh);
1723 } else if (test_set_buffer_locked(bh)) { 1723 } else if (!trylock_buffer(bh)) {
1724 redirty_page_for_writepage(wbc, page); 1724 redirty_page_for_writepage(wbc, page);
1725 continue; 1725 continue;
1726 } 1726 }
@@ -2926,14 +2926,17 @@ int submit_bh(int rw, struct buffer_head * bh)
2926 BUG_ON(!buffer_mapped(bh)); 2926 BUG_ON(!buffer_mapped(bh));
2927 BUG_ON(!bh->b_end_io); 2927 BUG_ON(!bh->b_end_io);
2928 2928
2929 if (buffer_ordered(bh) && (rw == WRITE)) 2929 /*
2930 rw = WRITE_BARRIER; 2930 * Mask in barrier bit for a write (could be either a WRITE or a
2931 * WRITE_SYNC
2932 */
2933 if (buffer_ordered(bh) && (rw & WRITE))
2934 rw |= WRITE_BARRIER;
2931 2935
2932 /* 2936 /*
2933 * Only clear out a write error when rewriting, should this 2937 * Only clear out a write error when rewriting
2934 * include WRITE_SYNC as well?
2935 */ 2938 */
2936 if (test_set_buffer_req(bh) && (rw == WRITE || rw == WRITE_BARRIER)) 2939 if (test_set_buffer_req(bh) && (rw & WRITE))
2937 clear_buffer_write_io_error(bh); 2940 clear_buffer_write_io_error(bh);
2938 2941
2939 /* 2942 /*
@@ -3000,7 +3003,7 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
3000 3003
3001 if (rw == SWRITE || rw == SWRITE_SYNC) 3004 if (rw == SWRITE || rw == SWRITE_SYNC)
3002 lock_buffer(bh); 3005 lock_buffer(bh);
3003 else if (test_set_buffer_locked(bh)) 3006 else if (!trylock_buffer(bh))
3004 continue; 3007 continue;
3005 3008
3006 if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC) { 3009 if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC) {
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 1f3465201fdf..06e521a945c3 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -1,3 +1,19 @@
1Version 1.54
2------------
3Fix premature write failure on congested networks (we would give up
4on EAGAIN from the socket too quickly on large writes).
5Cifs_mkdir and cifs_create now respect the setgid bit on parent dir.
6Fix endian problems in acl (mode from/to cifs acl) on bigendian
7architectures. Fix problems with preserving timestamps on copying open
8files (e.g. "cp -a") to Windows servers. For mkdir and create honor setgid bit
9on parent directory when server supports Unix Extensions but not POSIX
10create. Update cifs.upcall version to handle new Kerberos sec flags
11(this requires update of cifs.upcall program from Samba). Fix memory leak
12on dns_upcall (resolving DFS referralls). Fix plain text password
13authentication (requires setting SecurityFlags to 0x30030 to enable
14lanman and plain text though). Fix writes to be at correct offset when
15file is open with O_APPEND and file is on a directio (forcediretio) mount.
16
1Version 1.53 17Version 1.53
2------------ 18------------
3DFS support added (Microsoft Distributed File System client support needed 19DFS support added (Microsoft Distributed File System client support needed
diff --git a/fs/cifs/README b/fs/cifs/README
index 2bd6fe556f88..bd2343d4c6a6 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -542,10 +542,20 @@ SecurityFlags Flags which control security negotiation and
542 hashing mechanisms (as "must use") on the other hand 542 hashing mechanisms (as "must use") on the other hand
543 does not make much sense. Default flags are 543 does not make much sense. Default flags are
544 0x07007 544 0x07007
545 (NTLM, NTLMv2 and packet signing allowed). Maximum 545 (NTLM, NTLMv2 and packet signing allowed). The maximum
546 allowable flags if you want to allow mounts to servers 546 allowable flags if you want to allow mounts to servers
547 using weaker password hashes is 0x37037 (lanman, 547 using weaker password hashes is 0x37037 (lanman,
548 plaintext, ntlm, ntlmv2, signing allowed): 548 plaintext, ntlm, ntlmv2, signing allowed). Some
549 SecurityFlags require the corresponding menuconfig
550 options to be enabled (lanman and plaintext require
551 CONFIG_CIFS_WEAK_PW_HASH for example). Enabling
552 plaintext authentication currently requires also
553 enabling lanman authentication in the security flags
554 because the cifs module only supports sending
555 laintext passwords using the older lanman dialect
556 form of the session setup SMB. (e.g. for authentication
557 using plain text passwords, set the SecurityFlags
558 to 0x30030):
549 559
550 may use packet signing 0x00001 560 may use packet signing 0x00001
551 must use packet signing 0x01001 561 must use packet signing 0x01001
@@ -642,8 +652,30 @@ The statistics for the number of total SMBs and oplock breaks are different in
642that they represent all for that share, not just those for which the server 652that they represent all for that share, not just those for which the server
643returned success. 653returned success.
644 654
645Also note that "cat /proc/fs/cifs/DebugData" will display information about 655Also note that "cat /proc/fs/cifs/DebugData" will display information about
646the active sessions and the shares that are mounted. 656the active sessions and the shares that are mounted.
647Enabling Kerberos (extended security) works when CONFIG_CIFS_EXPERIMENTAL is 657
648on but requires a user space helper (from the Samba project). NTLM and NTLMv2 and 658Enabling Kerberos (extended security) works but requires version 1.2 or later
649LANMAN support do not require this helper. 659of the helper program cifs.upcall to be present and to be configured in the
660/etc/request-key.conf file. The cifs.upcall helper program is from the Samba
661project(http://www.samba.org). NTLM and NTLMv2 and LANMAN support do not
662require this helper. Note that NTLMv2 security (which does not require the
663cifs.upcall helper program), instead of using Kerberos, is sufficient for
664some use cases.
665
666Enabling DFS support (used to access shares transparently in an MS-DFS
667global name space) requires that CONFIG_CIFS_EXPERIMENTAL be enabled. In
668addition, DFS support for target shares which are specified as UNC
669names which begin with host names (rather than IP addresses) requires
670a user space helper (such as cifs.upcall) to be present in order to
671translate host names to ip address, and the user space helper must also
672be configured in the file /etc/request-key.conf
673
674To use cifs Kerberos and DFS support, the Linux keyutils package should be
675installed and something like the following lines should be added to the
676/etc/request-key.conf file:
677
678create cifs.spnego * * /usr/local/sbin/cifs.upcall %k
679create dns_resolver * * /usr/local/sbin/cifs.upcall %k
680
681
diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c
index 6bb440b257b0..1b09f1670061 100644
--- a/fs/cifs/asn1.c
+++ b/fs/cifs/asn1.c
@@ -476,6 +476,7 @@ decode_negTokenInit(unsigned char *security_blob, int length,
476 unsigned int cls, con, tag, oidlen, rc; 476 unsigned int cls, con, tag, oidlen, rc;
477 bool use_ntlmssp = false; 477 bool use_ntlmssp = false;
478 bool use_kerberos = false; 478 bool use_kerberos = false;
479 bool use_mskerberos = false;
479 480
480 *secType = NTLM; /* BB eventually make Kerberos or NLTMSSP the default*/ 481 *secType = NTLM; /* BB eventually make Kerberos or NLTMSSP the default*/
481 482
@@ -483,6 +484,7 @@ decode_negTokenInit(unsigned char *security_blob, int length,
483 484
484 asn1_open(&ctx, security_blob, length); 485 asn1_open(&ctx, security_blob, length);
485 486
487 /* GSSAPI header */
486 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 488 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
487 cFYI(1, ("Error decoding negTokenInit header")); 489 cFYI(1, ("Error decoding negTokenInit header"));
488 return 0; 490 return 0;
@@ -490,156 +492,149 @@ decode_negTokenInit(unsigned char *security_blob, int length,
490 || (tag != ASN1_EOC)) { 492 || (tag != ASN1_EOC)) {
491 cFYI(1, ("cls = %d con = %d tag = %d", cls, con, tag)); 493 cFYI(1, ("cls = %d con = %d tag = %d", cls, con, tag));
492 return 0; 494 return 0;
493 } else { 495 }
494 /* remember to free obj->oid */
495 rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag);
496 if (rc) {
497 if ((tag == ASN1_OJI) && (con == ASN1_PRI)) {
498 rc = asn1_oid_decode(&ctx, end, &oid, &oidlen);
499 if (rc) {
500 rc = compare_oid(oid, oidlen,
501 SPNEGO_OID,
502 SPNEGO_OID_LEN);
503 kfree(oid);
504 }
505 } else
506 rc = 0;
507 }
508 496
509 if (!rc) { 497 /* Check for SPNEGO OID -- remember to free obj->oid */
510 cFYI(1, ("Error decoding negTokenInit header")); 498 rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag);
511 return 0; 499 if (rc) {
512 } 500 if ((tag == ASN1_OJI) && (con == ASN1_PRI) &&
501 (cls == ASN1_UNI)) {
502 rc = asn1_oid_decode(&ctx, end, &oid, &oidlen);
503 if (rc) {
504 rc = compare_oid(oid, oidlen, SPNEGO_OID,
505 SPNEGO_OID_LEN);
506 kfree(oid);
507 }
508 } else
509 rc = 0;
510 }
513 511
514 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 512 /* SPNEGO OID not present or garbled -- bail out */
515 cFYI(1, ("Error decoding negTokenInit")); 513 if (!rc) {
516 return 0; 514 cFYI(1, ("Error decoding negTokenInit header"));
517 } else if ((cls != ASN1_CTX) || (con != ASN1_CON) 515 return 0;
518 || (tag != ASN1_EOC)) { 516 }
519 cFYI(1,
520 ("cls = %d con = %d tag = %d end = %p (%d) exit 0",
521 cls, con, tag, end, *end));
522 return 0;
523 }
524 517
525 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 518 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
526 cFYI(1, ("Error decoding negTokenInit")); 519 cFYI(1, ("Error decoding negTokenInit"));
527 return 0; 520 return 0;
528 } else if ((cls != ASN1_UNI) || (con != ASN1_CON) 521 } else if ((cls != ASN1_CTX) || (con != ASN1_CON)
529 || (tag != ASN1_SEQ)) { 522 || (tag != ASN1_EOC)) {
530 cFYI(1, 523 cFYI(1,
531 ("cls = %d con = %d tag = %d end = %p (%d) exit 1", 524 ("cls = %d con = %d tag = %d end = %p (%d) exit 0",
532 cls, con, tag, end, *end)); 525 cls, con, tag, end, *end));
533 return 0; 526 return 0;
534 } 527 }
535 528
536 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 529 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
537 cFYI(1, ("Error decoding 2nd part of negTokenInit")); 530 cFYI(1, ("Error decoding negTokenInit"));
538 return 0; 531 return 0;
539 } else if ((cls != ASN1_CTX) || (con != ASN1_CON) 532 } else if ((cls != ASN1_UNI) || (con != ASN1_CON)
540 || (tag != ASN1_EOC)) { 533 || (tag != ASN1_SEQ)) {
541 cFYI(1, 534 cFYI(1,
542 ("cls = %d con = %d tag = %d end = %p (%d) exit 0", 535 ("cls = %d con = %d tag = %d end = %p (%d) exit 1",
543 cls, con, tag, end, *end)); 536 cls, con, tag, end, *end));
544 return 0; 537 return 0;
545 } 538 }
546 539
547 if (asn1_header_decode 540 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
548 (&ctx, &sequence_end, &cls, &con, &tag) == 0) { 541 cFYI(1, ("Error decoding 2nd part of negTokenInit"));
549 cFYI(1, ("Error decoding 2nd part of negTokenInit")); 542 return 0;
550 return 0; 543 } else if ((cls != ASN1_CTX) || (con != ASN1_CON)
551 } else if ((cls != ASN1_UNI) || (con != ASN1_CON) 544 || (tag != ASN1_EOC)) {
552 || (tag != ASN1_SEQ)) { 545 cFYI(1,
553 cFYI(1, 546 ("cls = %d con = %d tag = %d end = %p (%d) exit 0",
554 ("cls = %d con = %d tag = %d end = %p (%d) exit 1", 547 cls, con, tag, end, *end));
555 cls, con, tag, end, *end)); 548 return 0;
556 return 0; 549 }
557 }
558 550
559 while (!asn1_eoc_decode(&ctx, sequence_end)) { 551 if (asn1_header_decode
560 rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag); 552 (&ctx, &sequence_end, &cls, &con, &tag) == 0) {
561 if (!rc) { 553 cFYI(1, ("Error decoding 2nd part of negTokenInit"));
562 cFYI(1, 554 return 0;
563 ("Error decoding negTokenInit hdr exit2")); 555 } else if ((cls != ASN1_UNI) || (con != ASN1_CON)
564 return 0; 556 || (tag != ASN1_SEQ)) {
565 } 557 cFYI(1,
566 if ((tag == ASN1_OJI) && (con == ASN1_PRI)) { 558 ("cls = %d con = %d tag = %d end = %p (%d) exit 1",
567 if (asn1_oid_decode(&ctx, end, &oid, &oidlen)) { 559 cls, con, tag, end, *end));
568 560 return 0;
569 cFYI(1, 561 }
570 ("OID len = %d oid = 0x%lx 0x%lx "
571 "0x%lx 0x%lx",
572 oidlen, *oid, *(oid + 1),
573 *(oid + 2), *(oid + 3)));
574
575 if (compare_oid(oid, oidlen,
576 MSKRB5_OID,
577 MSKRB5_OID_LEN))
578 use_kerberos = true;
579 else if (compare_oid(oid, oidlen,
580 KRB5_OID,
581 KRB5_OID_LEN))
582 use_kerberos = true;
583 else if (compare_oid(oid, oidlen,
584 NTLMSSP_OID,
585 NTLMSSP_OID_LEN))
586 use_ntlmssp = true;
587
588 kfree(oid);
589 }
590 } else {
591 cFYI(1, ("Should be an oid what is going on?"));
592 }
593 }
594 562
595 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 563 while (!asn1_eoc_decode(&ctx, sequence_end)) {
596 cFYI(1, 564 rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag);
597 ("Error decoding last part negTokenInit exit3")); 565 if (!rc) {
598 return 0;
599 } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) {
600 /* tag = 3 indicating mechListMIC */
601 cFYI(1, 566 cFYI(1,
602 ("Exit 4 cls = %d con = %d tag = %d end = %p (%d)", 567 ("Error decoding negTokenInit hdr exit2"));
603 cls, con, tag, end, *end));
604 return 0; 568 return 0;
605 } 569 }
606 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 570 if ((tag == ASN1_OJI) && (con == ASN1_PRI)) {
607 cFYI(1, 571 if (asn1_oid_decode(&ctx, end, &oid, &oidlen)) {
608 ("Error decoding last part negTokenInit exit5")); 572
609 return 0; 573 cFYI(1, ("OID len = %d oid = 0x%lx 0x%lx "
610 } else if ((cls != ASN1_UNI) || (con != ASN1_CON) 574 "0x%lx 0x%lx", oidlen, *oid,
611 || (tag != ASN1_SEQ)) { 575 *(oid + 1), *(oid + 2), *(oid + 3)));
612 cFYI(1, ("cls = %d con = %d tag = %d end = %p (%d)", 576
613 cls, con, tag, end, *end)); 577 if (compare_oid(oid, oidlen, MSKRB5_OID,
578 MSKRB5_OID_LEN) &&
579 !use_kerberos)
580 use_mskerberos = true;
581 else if (compare_oid(oid, oidlen, KRB5_OID,
582 KRB5_OID_LEN) &&
583 !use_mskerberos)
584 use_kerberos = true;
585 else if (compare_oid(oid, oidlen, NTLMSSP_OID,
586 NTLMSSP_OID_LEN))
587 use_ntlmssp = true;
588
589 kfree(oid);
590 }
591 } else {
592 cFYI(1, ("Should be an oid what is going on?"));
614 } 593 }
594 }
615 595
616 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 596 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
617 cFYI(1, 597 cFYI(1, ("Error decoding last part negTokenInit exit3"));
618 ("Error decoding last part negTokenInit exit 7")); 598 return 0;
619 return 0; 599 } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) {
620 } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) { 600 /* tag = 3 indicating mechListMIC */
621 cFYI(1, 601 cFYI(1, ("Exit 4 cls = %d con = %d tag = %d end = %p (%d)",
622 ("Exit 8 cls = %d con = %d tag = %d end = %p (%d)", 602 cls, con, tag, end, *end));
623 cls, con, tag, end, *end)); 603 return 0;
624 return 0; 604 }
625 } 605 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
626 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 606 cFYI(1, ("Error decoding last part negTokenInit exit5"));
627 cFYI(1, 607 return 0;
628 ("Error decoding last part negTokenInit exit9")); 608 } else if ((cls != ASN1_UNI) || (con != ASN1_CON)
629 return 0; 609 || (tag != ASN1_SEQ)) {
630 } else if ((cls != ASN1_UNI) || (con != ASN1_PRI) 610 cFYI(1, ("cls = %d con = %d tag = %d end = %p (%d)",
631 || (tag != ASN1_GENSTR)) { 611 cls, con, tag, end, *end));
632 cFYI(1, 612 }
633 ("Exit10 cls = %d con = %d tag = %d end = %p (%d)", 613
634 cls, con, tag, end, *end)); 614 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
635 return 0; 615 cFYI(1, ("Error decoding last part negTokenInit exit 7"));
636 } 616 return 0;
637 cFYI(1, ("Need to call asn1_octets_decode() function for %s", 617 } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) {
638 ctx.pointer)); /* is this UTF-8 or ASCII? */ 618 cFYI(1, ("Exit 8 cls = %d con = %d tag = %d end = %p (%d)",
619 cls, con, tag, end, *end));
620 return 0;
621 }
622 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
623 cFYI(1, ("Error decoding last part negTokenInit exit9"));
624 return 0;
625 } else if ((cls != ASN1_UNI) || (con != ASN1_PRI)
626 || (tag != ASN1_GENSTR)) {
627 cFYI(1, ("Exit10 cls = %d con = %d tag = %d end = %p (%d)",
628 cls, con, tag, end, *end));
629 return 0;
639 } 630 }
631 cFYI(1, ("Need to call asn1_octets_decode() function for %s",
632 ctx.pointer)); /* is this UTF-8 or ASCII? */
640 633
641 if (use_kerberos) 634 if (use_kerberos)
642 *secType = Kerberos; 635 *secType = Kerberos;
636 else if (use_mskerberos)
637 *secType = MSKerberos;
643 else if (use_ntlmssp) 638 else if (use_ntlmssp)
644 *secType = NTLMSSP; 639 *secType = NTLMSSP;
645 640
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 688a2d42153f..69a12aae91d3 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -79,27 +79,25 @@ void cifs_dump_mids(struct TCP_Server_Info *server)
79 spin_lock(&GlobalMid_Lock); 79 spin_lock(&GlobalMid_Lock);
80 list_for_each(tmp, &server->pending_mid_q) { 80 list_for_each(tmp, &server->pending_mid_q) {
81 mid_entry = list_entry(tmp, struct mid_q_entry, qhead); 81 mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
82 if (mid_entry) { 82 cERROR(1, ("State: %d Cmd: %d Pid: %d Tsk: %p Mid %d",
83 cERROR(1, ("State: %d Cmd: %d Pid: %d Tsk: %p Mid %d", 83 mid_entry->midState,
84 mid_entry->midState, 84 (int)mid_entry->command,
85 (int)mid_entry->command, 85 mid_entry->pid,
86 mid_entry->pid, 86 mid_entry->tsk,
87 mid_entry->tsk, 87 mid_entry->mid));
88 mid_entry->mid));
89#ifdef CONFIG_CIFS_STATS2 88#ifdef CONFIG_CIFS_STATS2
90 cERROR(1, ("IsLarge: %d buf: %p time rcv: %ld now: %ld", 89 cERROR(1, ("IsLarge: %d buf: %p time rcv: %ld now: %ld",
91 mid_entry->largeBuf, 90 mid_entry->largeBuf,
92 mid_entry->resp_buf, 91 mid_entry->resp_buf,
93 mid_entry->when_received, 92 mid_entry->when_received,
94 jiffies)); 93 jiffies));
95#endif /* STATS2 */ 94#endif /* STATS2 */
96 cERROR(1, ("IsMult: %d IsEnd: %d", mid_entry->multiRsp, 95 cERROR(1, ("IsMult: %d IsEnd: %d", mid_entry->multiRsp,
97 mid_entry->multiEnd)); 96 mid_entry->multiEnd));
98 if (mid_entry->resp_buf) { 97 if (mid_entry->resp_buf) {
99 cifs_dump_detail(mid_entry->resp_buf); 98 cifs_dump_detail(mid_entry->resp_buf);
100 cifs_dump_mem("existing buf: ", 99 cifs_dump_mem("existing buf: ",
101 mid_entry->resp_buf, 62); 100 mid_entry->resp_buf, 62);
102 }
103 } 101 }
104 } 102 }
105 spin_unlock(&GlobalMid_Lock); 103 spin_unlock(&GlobalMid_Lock);
@@ -163,16 +161,13 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
163 mid_entry = list_entry(tmp1, struct 161 mid_entry = list_entry(tmp1, struct
164 mid_q_entry, 162 mid_q_entry,
165 qhead); 163 qhead);
166 if (mid_entry) { 164 seq_printf(m, "State: %d com: %d pid:"
167 seq_printf(m, 165 " %d tsk: %p mid %d\n",
168 "State: %d com: %d pid:" 166 mid_entry->midState,
169 " %d tsk: %p mid %d\n", 167 (int)mid_entry->command,
170 mid_entry->midState, 168 mid_entry->pid,
171 (int)mid_entry->command, 169 mid_entry->tsk,
172 mid_entry->pid, 170 mid_entry->mid);
173 mid_entry->tsk,
174 mid_entry->mid);
175 }
176 } 171 }
177 spin_unlock(&GlobalMid_Lock); 172 spin_unlock(&GlobalMid_Lock);
178 } 173 }
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index d82374c9e329..d2c8eef84f3c 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -226,7 +226,7 @@ static int add_mount_helper(struct vfsmount *newmnt, struct nameidata *nd,
226 int err; 226 int err;
227 227
228 mntget(newmnt); 228 mntget(newmnt);
229 err = do_add_mount(newmnt, nd, nd->path.mnt->mnt_flags, mntlist); 229 err = do_add_mount(newmnt, &nd->path, nd->path.mnt->mnt_flags, mntlist);
230 switch (err) { 230 switch (err) {
231 case 0: 231 case 0:
232 path_put(&nd->path); 232 path_put(&nd->path);
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index 7013aaff6aed..117ef4bba68e 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -66,8 +66,8 @@ struct key_type cifs_spnego_key_type = {
66 .describe = user_describe, 66 .describe = user_describe,
67}; 67};
68 68
69#define MAX_VER_STR_LEN 9 /* length of longest version string e.g. 69#define MAX_VER_STR_LEN 8 /* length of longest version string e.g.
70 strlen(";ver=0xFF") */ 70 strlen("ver=0xFF") */
71#define MAX_MECH_STR_LEN 13 /* length of longest security mechanism name, eg 71#define MAX_MECH_STR_LEN 13 /* length of longest security mechanism name, eg
72 in future could have strlen(";sec=ntlmsspi") */ 72 in future could have strlen(";sec=ntlmsspi") */
73#define MAX_IPV6_ADDR_LEN 42 /* eg FEDC:BA98:7654:3210:FEDC:BA98:7654:3210/60 */ 73#define MAX_IPV6_ADDR_LEN 42 /* eg FEDC:BA98:7654:3210:FEDC:BA98:7654:3210/60 */
@@ -81,11 +81,15 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
81 struct key *spnego_key; 81 struct key *spnego_key;
82 const char *hostname = server->hostname; 82 const char *hostname = server->hostname;
83 83
84 /* BB: come up with better scheme for determining length */ 84 /* length of fields (with semicolons): ver=0xyz ip4=ipaddress
85 /* length of fields (with semicolons): ver=0xyz ipv4= ipaddress host= 85 host=hostname sec=mechanism uid=0xFF user=username */
86 hostname sec=mechanism uid=0x uid */ 86 desc_len = MAX_VER_STR_LEN +
87 desc_len = MAX_VER_STR_LEN + 5 + MAX_IPV6_ADDR_LEN + 1 + 6 + 87 6 /* len of "host=" */ + strlen(hostname) +
88 strlen(hostname) + MAX_MECH_STR_LEN + 8 + (sizeof(uid_t) * 2); 88 5 /* len of ";ipv4=" */ + MAX_IPV6_ADDR_LEN +
89 MAX_MECH_STR_LEN +
90 7 /* len of ";uid=0x" */ + (sizeof(uid_t) * 2) +
91 6 /* len of ";user=" */ + strlen(sesInfo->userName) + 1;
92
89 spnego_key = ERR_PTR(-ENOMEM); 93 spnego_key = ERR_PTR(-ENOMEM);
90 description = kzalloc(desc_len, GFP_KERNEL); 94 description = kzalloc(desc_len, GFP_KERNEL);
91 if (description == NULL) 95 if (description == NULL)
@@ -110,9 +114,11 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
110 114
111 dp = description + strlen(description); 115 dp = description + strlen(description);
112 116
113 /* for now, only sec=krb5 is valid */ 117 /* for now, only sec=krb5 and sec=mskrb5 are valid */
114 if (server->secType == Kerberos) 118 if (server->secType == Kerberos)
115 sprintf(dp, ";sec=krb5"); 119 sprintf(dp, ";sec=krb5");
120 else if (server->secType == MSKerberos)
121 sprintf(dp, ";sec=mskrb5");
116 else 122 else
117 goto out; 123 goto out;
118 124
diff --git a/fs/cifs/cifs_spnego.h b/fs/cifs/cifs_spnego.h
index 05a34b17a1ab..e4041ec4d712 100644
--- a/fs/cifs/cifs_spnego.h
+++ b/fs/cifs/cifs_spnego.h
@@ -23,7 +23,7 @@
23#ifndef _CIFS_SPNEGO_H 23#ifndef _CIFS_SPNEGO_H
24#define _CIFS_SPNEGO_H 24#define _CIFS_SPNEGO_H
25 25
26#define CIFS_SPNEGO_UPCALL_VERSION 1 26#define CIFS_SPNEGO_UPCALL_VERSION 2
27 27
28/* 28/*
29 * The version field should always be set to CIFS_SPNEGO_UPCALL_VERSION. 29 * The version field should always be set to CIFS_SPNEGO_UPCALL_VERSION.
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 83fd40dc1ef0..bd5f13d38450 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -294,6 +294,7 @@ void calc_lanman_hash(struct cifsSesInfo *ses, char *lnm_session_key)
294 294
295 if ((ses->server->secMode & SECMODE_PW_ENCRYPT) == 0) 295 if ((ses->server->secMode & SECMODE_PW_ENCRYPT) == 0)
296 if (extended_security & CIFSSEC_MAY_PLNTXT) { 296 if (extended_security & CIFSSEC_MAY_PLNTXT) {
297 memset(lnm_session_key, 0, CIFS_SESS_KEY_SIZE);
297 memcpy(lnm_session_key, password_with_pad, 298 memcpy(lnm_session_key, password_with_pad,
298 CIFS_ENCPWD_SIZE); 299 CIFS_ENCPWD_SIZE);
299 return; 300 return;
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 1ec7076f7b24..25ecbd5b0404 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -175,6 +175,8 @@ out_no_root:
175 if (inode) 175 if (inode)
176 iput(inode); 176 iput(inode);
177 177
178 cifs_umount(sb, cifs_sb);
179
178out_mount_failed: 180out_mount_failed:
179 if (cifs_sb) { 181 if (cifs_sb) {
180#ifdef CONFIG_CIFS_DFS_UPCALL 182#ifdef CONFIG_CIFS_DFS_UPCALL
@@ -930,36 +932,34 @@ static int cifs_oplock_thread(void *dummyarg)
930 schedule_timeout(39*HZ); 932 schedule_timeout(39*HZ);
931 } else { 933 } else {
932 oplock_item = list_entry(GlobalOplock_Q.next, 934 oplock_item = list_entry(GlobalOplock_Q.next,
933 struct oplock_q_entry, qhead); 935 struct oplock_q_entry, qhead);
934 if (oplock_item) { 936 cFYI(1, ("found oplock item to write out"));
935 cFYI(1, ("found oplock item to write out")); 937 pTcon = oplock_item->tcon;
936 pTcon = oplock_item->tcon; 938 inode = oplock_item->pinode;
937 inode = oplock_item->pinode; 939 netfid = oplock_item->netfid;
938 netfid = oplock_item->netfid; 940 spin_unlock(&GlobalMid_Lock);
939 spin_unlock(&GlobalMid_Lock); 941 DeleteOplockQEntry(oplock_item);
940 DeleteOplockQEntry(oplock_item); 942 /* can not grab inode sem here since it would
941 /* can not grab inode sem here since it would
942 deadlock when oplock received on delete 943 deadlock when oplock received on delete
943 since vfs_unlink holds the i_mutex across 944 since vfs_unlink holds the i_mutex across
944 the call */ 945 the call */
945 /* mutex_lock(&inode->i_mutex);*/ 946 /* mutex_lock(&inode->i_mutex);*/
946 if (S_ISREG(inode->i_mode)) { 947 if (S_ISREG(inode->i_mode)) {
947 rc = 948 rc = filemap_fdatawrite(inode->i_mapping);
948 filemap_fdatawrite(inode->i_mapping); 949 if (CIFS_I(inode)->clientCanCacheRead == 0) {
949 if (CIFS_I(inode)->clientCanCacheRead 950 waitrc = filemap_fdatawait(
950 == 0) { 951 inode->i_mapping);
951 waitrc = filemap_fdatawait(inode->i_mapping); 952 invalidate_remote_inode(inode);
952 invalidate_remote_inode(inode); 953 }
953 } 954 if (rc == 0)
954 if (rc == 0) 955 rc = waitrc;
955 rc = waitrc; 956 } else
956 } else 957 rc = 0;
957 rc = 0; 958 /* mutex_unlock(&inode->i_mutex);*/
958 /* mutex_unlock(&inode->i_mutex);*/ 959 if (rc)
959 if (rc) 960 CIFS_I(inode)->write_behind_rc = rc;
960 CIFS_I(inode)->write_behind_rc = rc; 961 cFYI(1, ("Oplock flush inode %p rc %d",
961 cFYI(1, ("Oplock flush inode %p rc %d", 962 inode, rc));
962 inode, rc));
963 963
964 /* releasing stale oplock after recent reconnect 964 /* releasing stale oplock after recent reconnect
965 of smb session using a now incorrect file 965 of smb session using a now incorrect file
@@ -967,15 +967,13 @@ static int cifs_oplock_thread(void *dummyarg)
967 not bother sending an oplock release if session 967 not bother sending an oplock release if session
968 to server still is disconnected since oplock 968 to server still is disconnected since oplock
969 already released by the server in that case */ 969 already released by the server in that case */
970 if (pTcon->tidStatus != CifsNeedReconnect) { 970 if (pTcon->tidStatus != CifsNeedReconnect) {
971 rc = CIFSSMBLock(0, pTcon, netfid, 971 rc = CIFSSMBLock(0, pTcon, netfid,
972 0 /* len */ , 0 /* offset */, 0, 972 0 /* len */ , 0 /* offset */, 0,
973 0, LOCKING_ANDX_OPLOCK_RELEASE, 973 0, LOCKING_ANDX_OPLOCK_RELEASE,
974 false /* wait flag */); 974 false /* wait flag */);
975 cFYI(1, ("Oplock release rc = %d", rc)); 975 cFYI(1, ("Oplock release rc = %d", rc));
976 } 976 }
977 } else
978 spin_unlock(&GlobalMid_Lock);
979 set_current_state(TASK_INTERRUPTIBLE); 977 set_current_state(TASK_INTERRUPTIBLE);
980 schedule_timeout(1); /* yield in case q were corrupt */ 978 schedule_timeout(1); /* yield in case q were corrupt */
981 } 979 }
@@ -1001,8 +999,7 @@ static int cifs_dnotify_thread(void *dummyarg)
1001 list_for_each(tmp, &GlobalSMBSessionList) { 999 list_for_each(tmp, &GlobalSMBSessionList) {
1002 ses = list_entry(tmp, struct cifsSesInfo, 1000 ses = list_entry(tmp, struct cifsSesInfo,
1003 cifsSessionList); 1001 cifsSessionList);
1004 if (ses && ses->server && 1002 if (ses->server && atomic_read(&ses->server->inFlight))
1005 atomic_read(&ses->server->inFlight))
1006 wake_up_all(&ses->server->response_q); 1003 wake_up_all(&ses->server->response_q);
1007 } 1004 }
1008 read_unlock(&GlobalSMBSeslock); 1005 read_unlock(&GlobalSMBSeslock);
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 25a6cbd15529..135c965c4137 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -101,5 +101,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
101extern const struct export_operations cifs_export_ops; 101extern const struct export_operations cifs_export_ops;
102#endif /* EXPERIMENTAL */ 102#endif /* EXPERIMENTAL */
103 103
104#define CIFS_VERSION "1.53" 104#define CIFS_VERSION "1.54"
105#endif /* _CIFSFS_H */ 105#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 7e1cf262effe..8dfd6f24d488 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -80,7 +80,8 @@ enum securityEnum {
80 NTLMv2, /* Legacy NTLM auth with NTLMv2 hash */ 80 NTLMv2, /* Legacy NTLM auth with NTLMv2 hash */
81 RawNTLMSSP, /* NTLMSSP without SPNEGO */ 81 RawNTLMSSP, /* NTLMSSP without SPNEGO */
82 NTLMSSP, /* NTLMSSP via SPNEGO */ 82 NTLMSSP, /* NTLMSSP via SPNEGO */
83 Kerberos /* Kerberos via SPNEGO */ 83 Kerberos, /* Kerberos via SPNEGO */
84 MSKerberos, /* MS Kerberos via SPNEGO */
84}; 85};
85 86
86enum protocolEnum { 87enum protocolEnum {
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 409abce12732..d2a073edd1b8 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -262,7 +262,7 @@
262 */ 262 */
263#define CIFS_NO_HANDLE 0xFFFF 263#define CIFS_NO_HANDLE 0xFFFF
264 264
265#define NO_CHANGE_64 cpu_to_le64(0xFFFFFFFFFFFFFFFFULL) 265#define NO_CHANGE_64 0xFFFFFFFFFFFFFFFFULL
266#define NO_CHANGE_32 0xFFFFFFFFUL 266#define NO_CHANGE_32 0xFFFFFFFFUL
267 267
268/* IPC$ in ASCII */ 268/* IPC$ in ASCII */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index b9f5e935f821..a729d083e6f4 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -172,12 +172,13 @@ extern int CIFSSMBQFSUnixInfo(const int xid, struct cifsTconInfo *tcon);
172extern int CIFSSMBQFSPosixInfo(const int xid, struct cifsTconInfo *tcon, 172extern int CIFSSMBQFSPosixInfo(const int xid, struct cifsTconInfo *tcon,
173 struct kstatfs *FSData); 173 struct kstatfs *FSData);
174 174
175extern int CIFSSMBSetTimes(const int xid, struct cifsTconInfo *tcon, 175extern int CIFSSMBSetPathInfo(const int xid, struct cifsTconInfo *tcon,
176 const char *fileName, const FILE_BASIC_INFO *data, 176 const char *fileName, const FILE_BASIC_INFO *data,
177 const struct nls_table *nls_codepage, 177 const struct nls_table *nls_codepage,
178 int remap_special_chars); 178 int remap_special_chars);
179extern int CIFSSMBSetFileTimes(const int xid, struct cifsTconInfo *tcon, 179extern int CIFSSMBSetFileInfo(const int xid, struct cifsTconInfo *tcon,
180 const FILE_BASIC_INFO *data, __u16 fid); 180 const FILE_BASIC_INFO *data, __u16 fid,
181 __u32 pid_of_opener);
181#if 0 182#if 0
182extern int CIFSSMBSetAttrLegacy(int xid, struct cifsTconInfo *tcon, 183extern int CIFSSMBSetAttrLegacy(int xid, struct cifsTconInfo *tcon,
183 char *fileName, __u16 dos_attributes, 184 char *fileName, __u16 dos_attributes,
@@ -191,9 +192,20 @@ extern int CIFSSMBSetEOF(const int xid, struct cifsTconInfo *tcon,
191extern int CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, 192extern int CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon,
192 __u64 size, __u16 fileHandle, __u32 opener_pid, 193 __u64 size, __u16 fileHandle, __u32 opener_pid,
193 bool AllocSizeFlag); 194 bool AllocSizeFlag);
194extern int CIFSSMBUnixSetPerms(const int xid, struct cifsTconInfo *pTcon, 195
195 char *full_path, __u64 mode, __u64 uid, 196struct cifs_unix_set_info_args {
196 __u64 gid, dev_t dev, 197 __u64 ctime;
198 __u64 atime;
199 __u64 mtime;
200 __u64 mode;
201 __u64 uid;
202 __u64 gid;
203 dev_t device;
204};
205
206extern int CIFSSMBUnixSetInfo(const int xid, struct cifsTconInfo *pTcon,
207 char *fileName,
208 const struct cifs_unix_set_info_args *args,
197 const struct nls_table *nls_codepage, 209 const struct nls_table *nls_codepage,
198 int remap_special_chars); 210 int remap_special_chars);
199 211
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index c621ffa2ca90..994de7c90474 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -128,8 +128,7 @@ static void mark_open_files_invalid(struct cifsTconInfo *pTcon)
128 write_lock(&GlobalSMBSeslock); 128 write_lock(&GlobalSMBSeslock);
129 list_for_each_safe(tmp, tmp1, &pTcon->openFileList) { 129 list_for_each_safe(tmp, tmp1, &pTcon->openFileList) {
130 open_file = list_entry(tmp, struct cifsFileInfo, tlist); 130 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
131 if (open_file) 131 open_file->invalidHandle = true;
132 open_file->invalidHandle = true;
133 } 132 }
134 write_unlock(&GlobalSMBSeslock); 133 write_unlock(&GlobalSMBSeslock);
135 /* BB Add call to invalidate_inodes(sb) for all superblocks mounted 134 /* BB Add call to invalidate_inodes(sb) for all superblocks mounted
@@ -4816,8 +4815,8 @@ CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, __u64 size,
4816 time and resort to the original setpathinfo level which takes the ancient 4815 time and resort to the original setpathinfo level which takes the ancient
4817 DOS time format with 2 second granularity */ 4816 DOS time format with 2 second granularity */
4818int 4817int
4819CIFSSMBSetFileTimes(const int xid, struct cifsTconInfo *tcon, 4818CIFSSMBSetFileInfo(const int xid, struct cifsTconInfo *tcon,
4820 const FILE_BASIC_INFO *data, __u16 fid) 4819 const FILE_BASIC_INFO *data, __u16 fid, __u32 pid_of_opener)
4821{ 4820{
4822 struct smb_com_transaction2_sfi_req *pSMB = NULL; 4821 struct smb_com_transaction2_sfi_req *pSMB = NULL;
4823 char *data_offset; 4822 char *data_offset;
@@ -4830,11 +4829,8 @@ CIFSSMBSetFileTimes(const int xid, struct cifsTconInfo *tcon,
4830 if (rc) 4829 if (rc)
4831 return rc; 4830 return rc;
4832 4831
4833 /* At this point there is no need to override the current pid 4832 pSMB->hdr.Pid = cpu_to_le16((__u16)pid_of_opener);
4834 with the pid of the opener, but that could change if we someday 4833 pSMB->hdr.PidHigh = cpu_to_le16((__u16)(pid_of_opener >> 16));
4835 use an existing handle (rather than opening one on the fly) */
4836 /* pSMB->hdr.Pid = cpu_to_le16((__u16)pid_of_opener);
4837 pSMB->hdr.PidHigh = cpu_to_le16((__u16)(pid_of_opener >> 16));*/
4838 4834
4839 params = 6; 4835 params = 6;
4840 pSMB->MaxSetupCount = 0; 4836 pSMB->MaxSetupCount = 0;
@@ -4882,9 +4878,9 @@ CIFSSMBSetFileTimes(const int xid, struct cifsTconInfo *tcon,
4882 4878
4883 4879
4884int 4880int
4885CIFSSMBSetTimes(const int xid, struct cifsTconInfo *tcon, const char *fileName, 4881CIFSSMBSetPathInfo(const int xid, struct cifsTconInfo *tcon,
4886 const FILE_BASIC_INFO *data, 4882 const char *fileName, const FILE_BASIC_INFO *data,
4887 const struct nls_table *nls_codepage, int remap) 4883 const struct nls_table *nls_codepage, int remap)
4888{ 4884{
4889 TRANSACTION2_SPI_REQ *pSMB = NULL; 4885 TRANSACTION2_SPI_REQ *pSMB = NULL;
4890 TRANSACTION2_SPI_RSP *pSMBr = NULL; 4886 TRANSACTION2_SPI_RSP *pSMBr = NULL;
@@ -5013,10 +5009,9 @@ SetAttrLgcyRetry:
5013#endif /* temporarily unneeded SetAttr legacy function */ 5009#endif /* temporarily unneeded SetAttr legacy function */
5014 5010
5015int 5011int
5016CIFSSMBUnixSetPerms(const int xid, struct cifsTconInfo *tcon, 5012CIFSSMBUnixSetInfo(const int xid, struct cifsTconInfo *tcon, char *fileName,
5017 char *fileName, __u64 mode, __u64 uid, __u64 gid, 5013 const struct cifs_unix_set_info_args *args,
5018 dev_t device, const struct nls_table *nls_codepage, 5014 const struct nls_table *nls_codepage, int remap)
5019 int remap)
5020{ 5015{
5021 TRANSACTION2_SPI_REQ *pSMB = NULL; 5016 TRANSACTION2_SPI_REQ *pSMB = NULL;
5022 TRANSACTION2_SPI_RSP *pSMBr = NULL; 5017 TRANSACTION2_SPI_RSP *pSMBr = NULL;
@@ -5025,6 +5020,7 @@ CIFSSMBUnixSetPerms(const int xid, struct cifsTconInfo *tcon,
5025 int bytes_returned = 0; 5020 int bytes_returned = 0;
5026 FILE_UNIX_BASIC_INFO *data_offset; 5021 FILE_UNIX_BASIC_INFO *data_offset;
5027 __u16 params, param_offset, offset, count, byte_count; 5022 __u16 params, param_offset, offset, count, byte_count;
5023 __u64 mode = args->mode;
5028 5024
5029 cFYI(1, ("In SetUID/GID/Mode")); 5025 cFYI(1, ("In SetUID/GID/Mode"));
5030setPermsRetry: 5026setPermsRetry:
@@ -5080,16 +5076,16 @@ setPermsRetry:
5080 set file size and do not want to truncate file size to zero 5076 set file size and do not want to truncate file size to zero
5081 accidently as happened on one Samba server beta by putting 5077 accidently as happened on one Samba server beta by putting
5082 zero instead of -1 here */ 5078 zero instead of -1 here */
5083 data_offset->EndOfFile = NO_CHANGE_64; 5079 data_offset->EndOfFile = cpu_to_le64(NO_CHANGE_64);
5084 data_offset->NumOfBytes = NO_CHANGE_64; 5080 data_offset->NumOfBytes = cpu_to_le64(NO_CHANGE_64);
5085 data_offset->LastStatusChange = NO_CHANGE_64; 5081 data_offset->LastStatusChange = cpu_to_le64(args->ctime);
5086 data_offset->LastAccessTime = NO_CHANGE_64; 5082 data_offset->LastAccessTime = cpu_to_le64(args->atime);
5087 data_offset->LastModificationTime = NO_CHANGE_64; 5083 data_offset->LastModificationTime = cpu_to_le64(args->mtime);
5088 data_offset->Uid = cpu_to_le64(uid); 5084 data_offset->Uid = cpu_to_le64(args->uid);
5089 data_offset->Gid = cpu_to_le64(gid); 5085 data_offset->Gid = cpu_to_le64(args->gid);
5090 /* better to leave device as zero when it is */ 5086 /* better to leave device as zero when it is */
5091 data_offset->DevMajor = cpu_to_le64(MAJOR(device)); 5087 data_offset->DevMajor = cpu_to_le64(MAJOR(args->device));
5092 data_offset->DevMinor = cpu_to_le64(MINOR(device)); 5088 data_offset->DevMinor = cpu_to_le64(MINOR(args->device));
5093 data_offset->Permissions = cpu_to_le64(mode); 5089 data_offset->Permissions = cpu_to_le64(mode);
5094 5090
5095 if (S_ISREG(mode)) 5091 if (S_ISREG(mode))
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index b51d5777cde6..4c13bcdb92a5 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -151,7 +151,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
151 } 151 }
152 list_for_each(tmp, &GlobalTreeConnectionList) { 152 list_for_each(tmp, &GlobalTreeConnectionList) {
153 tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList); 153 tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList);
154 if ((tcon) && (tcon->ses) && (tcon->ses->server == server)) 154 if ((tcon->ses) && (tcon->ses->server == server))
155 tcon->tidStatus = CifsNeedReconnect; 155 tcon->tidStatus = CifsNeedReconnect;
156 } 156 }
157 read_unlock(&GlobalSMBSeslock); 157 read_unlock(&GlobalSMBSeslock);
@@ -173,14 +173,12 @@ cifs_reconnect(struct TCP_Server_Info *server)
173 mid_entry = list_entry(tmp, struct 173 mid_entry = list_entry(tmp, struct
174 mid_q_entry, 174 mid_q_entry,
175 qhead); 175 qhead);
176 if (mid_entry) { 176 if (mid_entry->midState == MID_REQUEST_SUBMITTED) {
177 if (mid_entry->midState == MID_REQUEST_SUBMITTED) {
178 /* Mark other intransit requests as needing 177 /* Mark other intransit requests as needing
179 retry so we do not immediately mark the 178 retry so we do not immediately mark the
180 session bad again (ie after we reconnect 179 session bad again (ie after we reconnect
181 below) as they timeout too */ 180 below) as they timeout too */
182 mid_entry->midState = MID_RETRY_NEEDED; 181 mid_entry->midState = MID_RETRY_NEEDED;
183 }
184 } 182 }
185 } 183 }
186 spin_unlock(&GlobalMid_Lock); 184 spin_unlock(&GlobalMid_Lock);
@@ -351,11 +349,9 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
351 349
352 current->flags |= PF_MEMALLOC; 350 current->flags |= PF_MEMALLOC;
353 cFYI(1, ("Demultiplex PID: %d", task_pid_nr(current))); 351 cFYI(1, ("Demultiplex PID: %d", task_pid_nr(current)));
354 write_lock(&GlobalSMBSeslock); 352
355 atomic_inc(&tcpSesAllocCount); 353 length = atomic_inc_return(&tcpSesAllocCount);
356 length = tcpSesAllocCount.counter; 354 if (length > 1)
357 write_unlock(&GlobalSMBSeslock);
358 if (length > 1)
359 mempool_resize(cifs_req_poolp, length + cifs_min_rcv, 355 mempool_resize(cifs_req_poolp, length + cifs_min_rcv,
360 GFP_KERNEL); 356 GFP_KERNEL);
361 357
@@ -745,14 +741,11 @@ multi_t2_fnd:
745 coming home not much else we can do but free the memory */ 741 coming home not much else we can do but free the memory */
746 } 742 }
747 743
748 write_lock(&GlobalSMBSeslock);
749 atomic_dec(&tcpSesAllocCount);
750 length = tcpSesAllocCount.counter;
751
752 /* last chance to mark ses pointers invalid 744 /* last chance to mark ses pointers invalid
753 if there are any pointing to this (e.g 745 if there are any pointing to this (e.g
754 if a crazy root user tried to kill cifsd 746 if a crazy root user tried to kill cifsd
755 kernel thread explicitly this might happen) */ 747 kernel thread explicitly this might happen) */
748 write_lock(&GlobalSMBSeslock);
756 list_for_each(tmp, &GlobalSMBSessionList) { 749 list_for_each(tmp, &GlobalSMBSessionList) {
757 ses = list_entry(tmp, struct cifsSesInfo, 750 ses = list_entry(tmp, struct cifsSesInfo,
758 cifsSessionList); 751 cifsSessionList);
@@ -763,6 +756,8 @@ multi_t2_fnd:
763 756
764 kfree(server->hostname); 757 kfree(server->hostname);
765 kfree(server); 758 kfree(server);
759
760 length = atomic_dec_return(&tcpSesAllocCount);
766 if (length > 0) 761 if (length > 0)
767 mempool_resize(cifs_req_poolp, length + cifs_min_rcv, 762 mempool_resize(cifs_req_poolp, length + cifs_min_rcv,
768 GFP_KERNEL); 763 GFP_KERNEL);
@@ -3603,19 +3598,21 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo,
3603 char ntlm_session_key[CIFS_SESS_KEY_SIZE]; 3598 char ntlm_session_key[CIFS_SESS_KEY_SIZE];
3604 bool ntlmv2_flag = false; 3599 bool ntlmv2_flag = false;
3605 int first_time = 0; 3600 int first_time = 0;
3601 struct TCP_Server_Info *server = pSesInfo->server;
3606 3602
3607 /* what if server changes its buffer size after dropping the session? */ 3603 /* what if server changes its buffer size after dropping the session? */
3608 if (pSesInfo->server->maxBuf == 0) /* no need to send on reconnect */ { 3604 if (server->maxBuf == 0) /* no need to send on reconnect */ {
3609 rc = CIFSSMBNegotiate(xid, pSesInfo); 3605 rc = CIFSSMBNegotiate(xid, pSesInfo);
3610 if (rc == -EAGAIN) /* retry only once on 1st time connection */ { 3606 if (rc == -EAGAIN) {
3607 /* retry only once on 1st time connection */
3611 rc = CIFSSMBNegotiate(xid, pSesInfo); 3608 rc = CIFSSMBNegotiate(xid, pSesInfo);
3612 if (rc == -EAGAIN) 3609 if (rc == -EAGAIN)
3613 rc = -EHOSTDOWN; 3610 rc = -EHOSTDOWN;
3614 } 3611 }
3615 if (rc == 0) { 3612 if (rc == 0) {
3616 spin_lock(&GlobalMid_Lock); 3613 spin_lock(&GlobalMid_Lock);
3617 if (pSesInfo->server->tcpStatus != CifsExiting) 3614 if (server->tcpStatus != CifsExiting)
3618 pSesInfo->server->tcpStatus = CifsGood; 3615 server->tcpStatus = CifsGood;
3619 else 3616 else
3620 rc = -EHOSTDOWN; 3617 rc = -EHOSTDOWN;
3621 spin_unlock(&GlobalMid_Lock); 3618 spin_unlock(&GlobalMid_Lock);
@@ -3623,97 +3620,90 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo,
3623 } 3620 }
3624 first_time = 1; 3621 first_time = 1;
3625 } 3622 }
3626 if (!rc) { 3623
3627 pSesInfo->flags = 0; 3624 if (rc)
3628 pSesInfo->capabilities = pSesInfo->server->capabilities; 3625 goto ss_err_exit;
3629 if (linuxExtEnabled == 0) 3626
3630 pSesInfo->capabilities &= (~CAP_UNIX); 3627 pSesInfo->flags = 0;
3628 pSesInfo->capabilities = server->capabilities;
3629 if (linuxExtEnabled == 0)
3630 pSesInfo->capabilities &= (~CAP_UNIX);
3631 /* pSesInfo->sequence_number = 0;*/ 3631 /* pSesInfo->sequence_number = 0;*/
3632 cFYI(1, 3632 cFYI(1, ("Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d",
3633 ("Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d", 3633 server->secMode, server->capabilities, server->timeAdj));
3634 pSesInfo->server->secMode, 3634
3635 pSesInfo->server->capabilities, 3635 if (experimEnabled < 2)
3636 pSesInfo->server->timeAdj)); 3636 rc = CIFS_SessSetup(xid, pSesInfo, first_time, nls_info);
3637 if (experimEnabled < 2) 3637 else if (extended_security
3638 rc = CIFS_SessSetup(xid, pSesInfo, 3638 && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY)
3639 first_time, nls_info); 3639 && (server->secType == NTLMSSP)) {
3640 else if (extended_security 3640 rc = -EOPNOTSUPP;
3641 && (pSesInfo->capabilities 3641 } else if (extended_security
3642 & CAP_EXTENDED_SECURITY) 3642 && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY)
3643 && (pSesInfo->server->secType == NTLMSSP)) { 3643 && (server->secType == RawNTLMSSP)) {
3644 rc = -EOPNOTSUPP; 3644 cFYI(1, ("NTLMSSP sesssetup"));
3645 } else if (extended_security 3645 rc = CIFSNTLMSSPNegotiateSessSetup(xid, pSesInfo, &ntlmv2_flag,
3646 && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY) 3646 nls_info);
3647 && (pSesInfo->server->secType == RawNTLMSSP)) { 3647 if (!rc) {
3648 cFYI(1, ("NTLMSSP sesssetup")); 3648 if (ntlmv2_flag) {
3649 rc = CIFSNTLMSSPNegotiateSessSetup(xid, 3649 char *v2_response;
3650 pSesInfo, 3650 cFYI(1, ("more secure NTLM ver2 hash"));
3651 &ntlmv2_flag, 3651 if (CalcNTLMv2_partial_mac_key(pSesInfo,
3652 nls_info); 3652 nls_info)) {
3653 if (!rc) { 3653 rc = -ENOMEM;
3654 if (ntlmv2_flag) { 3654 goto ss_err_exit;
3655 char *v2_response; 3655 } else
3656 cFYI(1, ("more secure NTLM ver2 hash")); 3656 v2_response = kmalloc(16 + 64 /* blob*/,
3657 if (CalcNTLMv2_partial_mac_key(pSesInfo, 3657 GFP_KERNEL);
3658 nls_info)) { 3658 if (v2_response) {
3659 rc = -ENOMEM; 3659 CalcNTLMv2_response(pSesInfo,
3660 goto ss_err_exit; 3660 v2_response);
3661 } else 3661 /* if (first_time)
3662 v2_response = kmalloc(16 + 64 /* blob */, GFP_KERNEL); 3662 cifs_calculate_ntlmv2_mac_key */
3663 if (v2_response) { 3663 kfree(v2_response);
3664 CalcNTLMv2_response(pSesInfo,
3665 v2_response);
3666 /* if (first_time)
3667 cifs_calculate_ntlmv2_mac_key(
3668 pSesInfo->server->mac_signing_key,
3669 response, ntlm_session_key,*/
3670 kfree(v2_response);
3671 /* BB Put dummy sig in SessSetup PDU? */ 3664 /* BB Put dummy sig in SessSetup PDU? */
3672 } else {
3673 rc = -ENOMEM;
3674 goto ss_err_exit;
3675 }
3676
3677 } else { 3665 } else {
3678 SMBNTencrypt(pSesInfo->password, 3666 rc = -ENOMEM;
3679 pSesInfo->server->cryptKey, 3667 goto ss_err_exit;
3680 ntlm_session_key);
3681
3682 if (first_time)
3683 cifs_calculate_mac_key(
3684 &pSesInfo->server->mac_signing_key,
3685 ntlm_session_key,
3686 pSesInfo->password);
3687 } 3668 }
3669
3670 } else {
3671 SMBNTencrypt(pSesInfo->password,
3672 server->cryptKey,
3673 ntlm_session_key);
3674
3675 if (first_time)
3676 cifs_calculate_mac_key(
3677 &server->mac_signing_key,
3678 ntlm_session_key,
3679 pSesInfo->password);
3680 }
3688 /* for better security the weaker lanman hash not sent 3681 /* for better security the weaker lanman hash not sent
3689 in AuthSessSetup so we no longer calculate it */ 3682 in AuthSessSetup so we no longer calculate it */
3690 3683
3691 rc = CIFSNTLMSSPAuthSessSetup(xid, 3684 rc = CIFSNTLMSSPAuthSessSetup(xid, pSesInfo,
3692 pSesInfo, 3685 ntlm_session_key,
3693 ntlm_session_key, 3686 ntlmv2_flag,
3694 ntlmv2_flag, 3687 nls_info);
3695 nls_info); 3688 }
3696 } 3689 } else { /* old style NTLM 0.12 session setup */
3697 } else { /* old style NTLM 0.12 session setup */ 3690 SMBNTencrypt(pSesInfo->password, server->cryptKey,
3698 SMBNTencrypt(pSesInfo->password, 3691 ntlm_session_key);
3699 pSesInfo->server->cryptKey,
3700 ntlm_session_key);
3701 3692
3702 if (first_time) 3693 if (first_time)
3703 cifs_calculate_mac_key( 3694 cifs_calculate_mac_key(&server->mac_signing_key,
3704 &pSesInfo->server->mac_signing_key, 3695 ntlm_session_key,
3705 ntlm_session_key, pSesInfo->password); 3696 pSesInfo->password);
3706 3697
3707 rc = CIFSSessSetup(xid, pSesInfo, 3698 rc = CIFSSessSetup(xid, pSesInfo, ntlm_session_key, nls_info);
3708 ntlm_session_key, nls_info); 3699 }
3709 } 3700 if (rc) {
3710 if (rc) { 3701 cERROR(1, ("Send error in SessSetup = %d", rc));
3711 cERROR(1, ("Send error in SessSetup = %d", rc)); 3702 } else {
3712 } else { 3703 cFYI(1, ("CIFS Session Established successfully"));
3713 cFYI(1, ("CIFS Session Established successfully"));
3714 pSesInfo->status = CifsGood; 3704 pSesInfo->status = CifsGood;
3715 }
3716 } 3705 }
3706
3717ss_err_exit: 3707ss_err_exit:
3718 return rc; 3708 return rc;
3719} 3709}
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index fb69c1fa85c9..e962e75e6f7b 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -226,23 +226,28 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
226 /* If Open reported that we actually created a file 226 /* If Open reported that we actually created a file
227 then we now have to set the mode if possible */ 227 then we now have to set the mode if possible */
228 if ((pTcon->unix_ext) && (oplock & CIFS_CREATE_ACTION)) { 228 if ((pTcon->unix_ext) && (oplock & CIFS_CREATE_ACTION)) {
229 struct cifs_unix_set_info_args args = {
230 .mode = mode,
231 .ctime = NO_CHANGE_64,
232 .atime = NO_CHANGE_64,
233 .mtime = NO_CHANGE_64,
234 .device = 0,
235 };
236
229 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { 237 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
230 CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode, 238 args.uid = (__u64) current->fsuid;
231 (__u64)current->fsuid, 239 if (inode->i_mode & S_ISGID)
232 (__u64)current->fsgid, 240 args.gid = (__u64) inode->i_gid;
233 0 /* dev */, 241 else
234 cifs_sb->local_nls, 242 args.gid = (__u64) current->fsgid;
235 cifs_sb->mnt_cifs_flags &
236 CIFS_MOUNT_MAP_SPECIAL_CHR);
237 } else { 243 } else {
238 CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode, 244 args.uid = NO_CHANGE_64;
239 (__u64)-1, 245 args.gid = NO_CHANGE_64;
240 (__u64)-1,
241 0 /* dev */,
242 cifs_sb->local_nls,
243 cifs_sb->mnt_cifs_flags &
244 CIFS_MOUNT_MAP_SPECIAL_CHR);
245 } 246 }
247 CIFSSMBUnixSetInfo(xid, pTcon, full_path, &args,
248 cifs_sb->local_nls,
249 cifs_sb->mnt_cifs_flags &
250 CIFS_MOUNT_MAP_SPECIAL_CHR);
246 } else { 251 } else {
247 /* BB implement mode setting via Windows security 252 /* BB implement mode setting via Windows security
248 descriptors e.g. */ 253 descriptors e.g. */
@@ -267,7 +272,12 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
267 (cifs_sb->mnt_cifs_flags & 272 (cifs_sb->mnt_cifs_flags &
268 CIFS_MOUNT_SET_UID)) { 273 CIFS_MOUNT_SET_UID)) {
269 newinode->i_uid = current->fsuid; 274 newinode->i_uid = current->fsuid;
270 newinode->i_gid = current->fsgid; 275 if (inode->i_mode & S_ISGID)
276 newinode->i_gid =
277 inode->i_gid;
278 else
279 newinode->i_gid =
280 current->fsgid;
271 } 281 }
272 } 282 }
273 } 283 }
@@ -357,21 +367,24 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
357 if (full_path == NULL) 367 if (full_path == NULL)
358 rc = -ENOMEM; 368 rc = -ENOMEM;
359 else if (pTcon->unix_ext) { 369 else if (pTcon->unix_ext) {
360 mode &= ~current->fs->umask; 370 struct cifs_unix_set_info_args args = {
371 .mode = mode & ~current->fs->umask,
372 .ctime = NO_CHANGE_64,
373 .atime = NO_CHANGE_64,
374 .mtime = NO_CHANGE_64,
375 .device = device_number,
376 };
361 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { 377 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
362 rc = CIFSSMBUnixSetPerms(xid, pTcon, full_path, 378 args.uid = (__u64) current->fsuid;
363 mode, (__u64)current->fsuid, 379 args.gid = (__u64) current->fsgid;
364 (__u64)current->fsgid,
365 device_number, cifs_sb->local_nls,
366 cifs_sb->mnt_cifs_flags &
367 CIFS_MOUNT_MAP_SPECIAL_CHR);
368 } else { 380 } else {
369 rc = CIFSSMBUnixSetPerms(xid, pTcon, 381 args.uid = NO_CHANGE_64;
370 full_path, mode, (__u64)-1, (__u64)-1, 382 args.gid = NO_CHANGE_64;
371 device_number, cifs_sb->local_nls,
372 cifs_sb->mnt_cifs_flags &
373 CIFS_MOUNT_MAP_SPECIAL_CHR);
374 } 383 }
384 rc = CIFSSMBUnixSetInfo(xid, pTcon, full_path,
385 &args, cifs_sb->local_nls,
386 cifs_sb->mnt_cifs_flags &
387 CIFS_MOUNT_MAP_SPECIAL_CHR);
375 388
376 if (!rc) { 389 if (!rc) {
377 rc = cifs_get_inode_info_unix(&newinode, full_path, 390 rc = cifs_get_inode_info_unix(&newinode, full_path,
diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c
index f730ef35499e..a2e0673e1b08 100644
--- a/fs/cifs/dns_resolve.c
+++ b/fs/cifs/dns_resolve.c
@@ -47,11 +47,18 @@ static int dns_resolver_instantiate(struct key *key, const void *data,
47 return rc; 47 return rc;
48} 48}
49 49
50static void
51dns_resolver_destroy(struct key *key)
52{
53 kfree(key->payload.data);
54}
55
50struct key_type key_type_dns_resolver = { 56struct key_type key_type_dns_resolver = {
51 .name = "dns_resolver", 57 .name = "dns_resolver",
52 .def_datalen = sizeof(struct in_addr), 58 .def_datalen = sizeof(struct in_addr),
53 .describe = user_describe, 59 .describe = user_describe,
54 .instantiate = dns_resolver_instantiate, 60 .instantiate = dns_resolver_instantiate,
61 .destroy = dns_resolver_destroy,
55 .match = user_match, 62 .match = user_match,
56}; 63};
57 64
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 0aac824371a5..cbefe1f1f9fe 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -310,18 +310,19 @@ int cifs_open(struct inode *inode, struct file *file)
310 /* time to set mode which we can not set earlier due to 310 /* time to set mode which we can not set earlier due to
311 problems creating new read-only files */ 311 problems creating new read-only files */
312 if (pTcon->unix_ext) { 312 if (pTcon->unix_ext) {
313 CIFSSMBUnixSetPerms(xid, pTcon, full_path, 313 struct cifs_unix_set_info_args args = {
314 inode->i_mode, 314 .mode = inode->i_mode,
315 (__u64)-1, (__u64)-1, 0 /* dev */, 315 .uid = NO_CHANGE_64,
316 .gid = NO_CHANGE_64,
317 .ctime = NO_CHANGE_64,
318 .atime = NO_CHANGE_64,
319 .mtime = NO_CHANGE_64,
320 .device = 0,
321 };
322 CIFSSMBUnixSetInfo(xid, pTcon, full_path, &args,
316 cifs_sb->local_nls, 323 cifs_sb->local_nls,
317 cifs_sb->mnt_cifs_flags & 324 cifs_sb->mnt_cifs_flags &
318 CIFS_MOUNT_MAP_SPECIAL_CHR); 325 CIFS_MOUNT_MAP_SPECIAL_CHR);
319 } else {
320 /* BB implement via Windows security descriptors eg
321 CIFSSMBWinSetPerms(xid, pTcon, full_path, mode,
322 -1, -1, local_nls);
323 in the meantime could set r/o dos attribute when
324 perms are eg: mode & 0222 == 0 */
325 } 326 }
326 } 327 }
327 328
@@ -832,6 +833,10 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data,
832 return -EBADF; 833 return -EBADF;
833 open_file = (struct cifsFileInfo *) file->private_data; 834 open_file = (struct cifsFileInfo *) file->private_data;
834 835
836 rc = generic_write_checks(file, poffset, &write_size, 0);
837 if (rc)
838 return rc;
839
835 xid = GetXid(); 840 xid = GetXid();
836 841
837 if (*poffset > file->f_path.dentry->d_inode->i_size) 842 if (*poffset > file->f_path.dentry->d_inode->i_size)
@@ -1280,7 +1285,7 @@ retry:
1280 1285
1281 if (first < 0) 1286 if (first < 0)
1282 lock_page(page); 1287 lock_page(page);
1283 else if (TestSetPageLocked(page)) 1288 else if (!trylock_page(page))
1284 break; 1289 break;
1285 1290
1286 if (unlikely(page->mapping != mapping)) { 1291 if (unlikely(page->mapping != mapping)) {
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 46e54d39461d..9c548f110102 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -546,7 +546,8 @@ int cifs_get_inode_info(struct inode **pinode,
546 if ((inode->i_mode & S_IWUGO) == 0 && 546 if ((inode->i_mode & S_IWUGO) == 0 &&
547 (attr & ATTR_READONLY) == 0) 547 (attr & ATTR_READONLY) == 0)
548 inode->i_mode |= (S_IWUGO & default_mode); 548 inode->i_mode |= (S_IWUGO & default_mode);
549 inode->i_mode &= ~S_IFMT; 549
550 inode->i_mode &= ~S_IFMT;
550 } 551 }
551 /* clear write bits if ATTR_READONLY is set */ 552 /* clear write bits if ATTR_READONLY is set */
552 if (attr & ATTR_READONLY) 553 if (attr & ATTR_READONLY)
@@ -649,6 +650,7 @@ struct inode *cifs_iget(struct super_block *sb, unsigned long ino)
649 inode->i_fop = &simple_dir_operations; 650 inode->i_fop = &simple_dir_operations;
650 inode->i_uid = cifs_sb->mnt_uid; 651 inode->i_uid = cifs_sb->mnt_uid;
651 inode->i_gid = cifs_sb->mnt_gid; 652 inode->i_gid = cifs_sb->mnt_gid;
653 } else if (rc) {
652 _FreeXid(xid); 654 _FreeXid(xid);
653 iget_failed(inode); 655 iget_failed(inode);
654 return ERR_PTR(rc); 656 return ERR_PTR(rc);
@@ -737,7 +739,7 @@ psx_del_no_retry:
737 /* ATTRS set to normal clears r/o bit */ 739 /* ATTRS set to normal clears r/o bit */
738 pinfo_buf->Attributes = cpu_to_le32(ATTR_NORMAL); 740 pinfo_buf->Attributes = cpu_to_le32(ATTR_NORMAL);
739 if (!(pTcon->ses->flags & CIFS_SES_NT4)) 741 if (!(pTcon->ses->flags & CIFS_SES_NT4))
740 rc = CIFSSMBSetTimes(xid, pTcon, full_path, 742 rc = CIFSSMBSetPathInfo(xid, pTcon, full_path,
741 pinfo_buf, 743 pinfo_buf,
742 cifs_sb->local_nls, 744 cifs_sb->local_nls,
743 cifs_sb->mnt_cifs_flags & 745 cifs_sb->mnt_cifs_flags &
@@ -767,9 +769,10 @@ psx_del_no_retry:
767 cifs_sb->mnt_cifs_flags & 769 cifs_sb->mnt_cifs_flags &
768 CIFS_MOUNT_MAP_SPECIAL_CHR); 770 CIFS_MOUNT_MAP_SPECIAL_CHR);
769 if (rc == 0) { 771 if (rc == 0) {
770 rc = CIFSSMBSetFileTimes(xid, pTcon, 772 rc = CIFSSMBSetFileInfo(xid, pTcon,
771 pinfo_buf, 773 pinfo_buf,
772 netfid); 774 netfid,
775 current->tgid);
773 CIFSSMBClose(xid, pTcon, netfid); 776 CIFSSMBClose(xid, pTcon, netfid);
774 } 777 }
775 } 778 }
@@ -984,32 +987,41 @@ mkdir_get_info:
984 * failed to get it from the server or was set bogus */ 987 * failed to get it from the server or was set bogus */
985 if ((direntry->d_inode) && (direntry->d_inode->i_nlink < 2)) 988 if ((direntry->d_inode) && (direntry->d_inode->i_nlink < 2))
986 direntry->d_inode->i_nlink = 2; 989 direntry->d_inode->i_nlink = 2;
990
987 mode &= ~current->fs->umask; 991 mode &= ~current->fs->umask;
992 /* must turn on setgid bit if parent dir has it */
993 if (inode->i_mode & S_ISGID)
994 mode |= S_ISGID;
995
988 if (pTcon->unix_ext) { 996 if (pTcon->unix_ext) {
997 struct cifs_unix_set_info_args args = {
998 .mode = mode,
999 .ctime = NO_CHANGE_64,
1000 .atime = NO_CHANGE_64,
1001 .mtime = NO_CHANGE_64,
1002 .device = 0,
1003 };
989 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { 1004 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
990 CIFSSMBUnixSetPerms(xid, pTcon, full_path, 1005 args.uid = (__u64)current->fsuid;
991 mode, 1006 if (inode->i_mode & S_ISGID)
992 (__u64)current->fsuid, 1007 args.gid = (__u64)inode->i_gid;
993 (__u64)current->fsgid, 1008 else
994 0 /* dev_t */, 1009 args.gid = (__u64)current->fsgid;
995 cifs_sb->local_nls,
996 cifs_sb->mnt_cifs_flags &
997 CIFS_MOUNT_MAP_SPECIAL_CHR);
998 } else { 1010 } else {
999 CIFSSMBUnixSetPerms(xid, pTcon, full_path, 1011 args.uid = NO_CHANGE_64;
1000 mode, (__u64)-1, 1012 args.gid = NO_CHANGE_64;
1001 (__u64)-1, 0 /* dev_t */,
1002 cifs_sb->local_nls,
1003 cifs_sb->mnt_cifs_flags &
1004 CIFS_MOUNT_MAP_SPECIAL_CHR);
1005 } 1013 }
1014 CIFSSMBUnixSetInfo(xid, pTcon, full_path, &args,
1015 cifs_sb->local_nls,
1016 cifs_sb->mnt_cifs_flags &
1017 CIFS_MOUNT_MAP_SPECIAL_CHR);
1006 } else { 1018 } else {
1007 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) && 1019 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) &&
1008 (mode & S_IWUGO) == 0) { 1020 (mode & S_IWUGO) == 0) {
1009 FILE_BASIC_INFO pInfo; 1021 FILE_BASIC_INFO pInfo;
1010 memset(&pInfo, 0, sizeof(pInfo)); 1022 memset(&pInfo, 0, sizeof(pInfo));
1011 pInfo.Attributes = cpu_to_le32(ATTR_READONLY); 1023 pInfo.Attributes = cpu_to_le32(ATTR_READONLY);
1012 CIFSSMBSetTimes(xid, pTcon, full_path, 1024 CIFSSMBSetPathInfo(xid, pTcon, full_path,
1013 &pInfo, cifs_sb->local_nls, 1025 &pInfo, cifs_sb->local_nls,
1014 cifs_sb->mnt_cifs_flags & 1026 cifs_sb->mnt_cifs_flags &
1015 CIFS_MOUNT_MAP_SPECIAL_CHR); 1027 CIFS_MOUNT_MAP_SPECIAL_CHR);
@@ -1024,8 +1036,12 @@ mkdir_get_info:
1024 CIFS_MOUNT_SET_UID) { 1036 CIFS_MOUNT_SET_UID) {
1025 direntry->d_inode->i_uid = 1037 direntry->d_inode->i_uid =
1026 current->fsuid; 1038 current->fsuid;
1027 direntry->d_inode->i_gid = 1039 if (inode->i_mode & S_ISGID)
1028 current->fsgid; 1040 direntry->d_inode->i_gid =
1041 inode->i_gid;
1042 else
1043 direntry->d_inode->i_gid =
1044 current->fsgid;
1029 } 1045 }
1030 } 1046 }
1031 } 1047 }
@@ -1310,10 +1326,11 @@ int cifs_revalidate(struct dentry *direntry)
1310/* if (S_ISDIR(direntry->d_inode->i_mode)) 1326/* if (S_ISDIR(direntry->d_inode->i_mode))
1311 shrink_dcache_parent(direntry); */ 1327 shrink_dcache_parent(direntry); */
1312 if (S_ISREG(direntry->d_inode->i_mode)) { 1328 if (S_ISREG(direntry->d_inode->i_mode)) {
1313 if (direntry->d_inode->i_mapping) 1329 if (direntry->d_inode->i_mapping) {
1314 wbrc = filemap_fdatawait(direntry->d_inode->i_mapping); 1330 wbrc = filemap_fdatawait(direntry->d_inode->i_mapping);
1315 if (wbrc) 1331 if (wbrc)
1316 CIFS_I(direntry->d_inode)->write_behind_rc = wbrc; 1332 CIFS_I(direntry->d_inode)->write_behind_rc = wbrc;
1333 }
1317 /* may eventually have to do this for open files too */ 1334 /* may eventually have to do this for open files too */
1318 if (list_empty(&(cifsInode->openFileList))) { 1335 if (list_empty(&(cifsInode->openFileList))) {
1319 /* changed on server - flush read ahead pages */ 1336 /* changed on server - flush read ahead pages */
@@ -1489,30 +1506,228 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs,
1489 return rc; 1506 return rc;
1490} 1507}
1491 1508
1492int cifs_setattr(struct dentry *direntry, struct iattr *attrs) 1509static int
1510cifs_set_file_info(struct inode *inode, struct iattr *attrs, int xid,
1511 char *full_path, __u32 dosattr)
1493{ 1512{
1513 int rc;
1514 int oplock = 0;
1515 __u16 netfid;
1516 __u32 netpid;
1517 bool set_time = false;
1518 struct cifsFileInfo *open_file;
1519 struct cifsInodeInfo *cifsInode = CIFS_I(inode);
1520 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1521 struct cifsTconInfo *pTcon = cifs_sb->tcon;
1522 FILE_BASIC_INFO info_buf;
1523
1524 if (attrs->ia_valid & ATTR_ATIME) {
1525 set_time = true;
1526 info_buf.LastAccessTime =
1527 cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_atime));
1528 } else
1529 info_buf.LastAccessTime = 0;
1530
1531 if (attrs->ia_valid & ATTR_MTIME) {
1532 set_time = true;
1533 info_buf.LastWriteTime =
1534 cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_mtime));
1535 } else
1536 info_buf.LastWriteTime = 0;
1537
1538 /*
1539 * Samba throws this field away, but windows may actually use it.
1540 * Do not set ctime unless other time stamps are changed explicitly
1541 * (i.e. by utimes()) since we would then have a mix of client and
1542 * server times.
1543 */
1544 if (set_time && (attrs->ia_valid & ATTR_CTIME)) {
1545 cFYI(1, ("CIFS - CTIME changed"));
1546 info_buf.ChangeTime =
1547 cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_ctime));
1548 } else
1549 info_buf.ChangeTime = 0;
1550
1551 info_buf.CreationTime = 0; /* don't change */
1552 info_buf.Attributes = cpu_to_le32(dosattr);
1553
1554 /*
1555 * If the file is already open for write, just use that fileid
1556 */
1557 open_file = find_writable_file(cifsInode);
1558 if (open_file) {
1559 netfid = open_file->netfid;
1560 netpid = open_file->pid;
1561 goto set_via_filehandle;
1562 }
1563
1564 /*
1565 * NT4 apparently returns success on this call, but it doesn't
1566 * really work.
1567 */
1568 if (!(pTcon->ses->flags & CIFS_SES_NT4)) {
1569 rc = CIFSSMBSetPathInfo(xid, pTcon, full_path,
1570 &info_buf, cifs_sb->local_nls,
1571 cifs_sb->mnt_cifs_flags &
1572 CIFS_MOUNT_MAP_SPECIAL_CHR);
1573 if (rc != -EOPNOTSUPP && rc != -EINVAL)
1574 goto out;
1575 }
1576
1577 cFYI(1, ("calling SetFileInfo since SetPathInfo for "
1578 "times not supported by this server"));
1579 rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN,
1580 SYNCHRONIZE | FILE_WRITE_ATTRIBUTES,
1581 CREATE_NOT_DIR, &netfid, &oplock,
1582 NULL, cifs_sb->local_nls,
1583 cifs_sb->mnt_cifs_flags &
1584 CIFS_MOUNT_MAP_SPECIAL_CHR);
1585
1586 if (rc != 0) {
1587 if (rc == -EIO)
1588 rc = -EINVAL;
1589 goto out;
1590 }
1591
1592 netpid = current->tgid;
1593
1594set_via_filehandle:
1595 rc = CIFSSMBSetFileInfo(xid, pTcon, &info_buf, netfid, netpid);
1596 if (open_file == NULL)
1597 CIFSSMBClose(xid, pTcon, netfid);
1598 else
1599 atomic_dec(&open_file->wrtPending);
1600out:
1601 return rc;
1602}
1603
1604static int
1605cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
1606{
1607 int rc;
1494 int xid; 1608 int xid;
1495 struct cifs_sb_info *cifs_sb;
1496 struct cifsTconInfo *pTcon;
1497 char *full_path = NULL; 1609 char *full_path = NULL;
1498 int rc = -EACCES;
1499 FILE_BASIC_INFO time_buf;
1500 bool set_time = false;
1501 bool set_dosattr = false;
1502 __u64 mode = 0xFFFFFFFFFFFFFFFFULL;
1503 __u64 uid = 0xFFFFFFFFFFFFFFFFULL;
1504 __u64 gid = 0xFFFFFFFFFFFFFFFFULL;
1505 struct cifsInodeInfo *cifsInode;
1506 struct inode *inode = direntry->d_inode; 1610 struct inode *inode = direntry->d_inode;
1611 struct cifsInodeInfo *cifsInode = CIFS_I(inode);
1612 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1613 struct cifsTconInfo *pTcon = cifs_sb->tcon;
1614 struct cifs_unix_set_info_args *args = NULL;
1615
1616 cFYI(1, ("setattr_unix on file %s attrs->ia_valid=0x%x",
1617 direntry->d_name.name, attrs->ia_valid));
1618
1619 xid = GetXid();
1620
1621 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) == 0) {
1622 /* check if we have permission to change attrs */
1623 rc = inode_change_ok(inode, attrs);
1624 if (rc < 0)
1625 goto out;
1626 else
1627 rc = 0;
1628 }
1629
1630 full_path = build_path_from_dentry(direntry);
1631 if (full_path == NULL) {
1632 rc = -ENOMEM;
1633 goto out;
1634 }
1635
1636 if ((attrs->ia_valid & ATTR_MTIME) || (attrs->ia_valid & ATTR_SIZE)) {
1637 /*
1638 Flush data before changing file size or changing the last
1639 write time of the file on the server. If the
1640 flush returns error, store it to report later and continue.
1641 BB: This should be smarter. Why bother flushing pages that
1642 will be truncated anyway? Also, should we error out here if
1643 the flush returns error?
1644 */
1645 rc = filemap_write_and_wait(inode->i_mapping);
1646 if (rc != 0) {
1647 cifsInode->write_behind_rc = rc;
1648 rc = 0;
1649 }
1650 }
1651
1652 if (attrs->ia_valid & ATTR_SIZE) {
1653 rc = cifs_set_file_size(inode, attrs, xid, full_path);
1654 if (rc != 0)
1655 goto out;
1656 }
1657
1658 /* skip mode change if it's just for clearing setuid/setgid */
1659 if (attrs->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
1660 attrs->ia_valid &= ~ATTR_MODE;
1661
1662 args = kmalloc(sizeof(*args), GFP_KERNEL);
1663 if (args == NULL) {
1664 rc = -ENOMEM;
1665 goto out;
1666 }
1667
1668 /* set up the struct */
1669 if (attrs->ia_valid & ATTR_MODE)
1670 args->mode = attrs->ia_mode;
1671 else
1672 args->mode = NO_CHANGE_64;
1673
1674 if (attrs->ia_valid & ATTR_UID)
1675 args->uid = attrs->ia_uid;
1676 else
1677 args->uid = NO_CHANGE_64;
1678
1679 if (attrs->ia_valid & ATTR_GID)
1680 args->gid = attrs->ia_gid;
1681 else
1682 args->gid = NO_CHANGE_64;
1683
1684 if (attrs->ia_valid & ATTR_ATIME)
1685 args->atime = cifs_UnixTimeToNT(attrs->ia_atime);
1686 else
1687 args->atime = NO_CHANGE_64;
1688
1689 if (attrs->ia_valid & ATTR_MTIME)
1690 args->mtime = cifs_UnixTimeToNT(attrs->ia_mtime);
1691 else
1692 args->mtime = NO_CHANGE_64;
1693
1694 if (attrs->ia_valid & ATTR_CTIME)
1695 args->ctime = cifs_UnixTimeToNT(attrs->ia_ctime);
1696 else
1697 args->ctime = NO_CHANGE_64;
1698
1699 args->device = 0;
1700 rc = CIFSSMBUnixSetInfo(xid, pTcon, full_path, args,
1701 cifs_sb->local_nls,
1702 cifs_sb->mnt_cifs_flags &
1703 CIFS_MOUNT_MAP_SPECIAL_CHR);
1704
1705 if (!rc)
1706 rc = inode_setattr(inode, attrs);
1707out:
1708 kfree(args);
1709 kfree(full_path);
1710 FreeXid(xid);
1711 return rc;
1712}
1713
1714static int
1715cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
1716{
1717 int xid;
1718 struct inode *inode = direntry->d_inode;
1719 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1720 struct cifsInodeInfo *cifsInode = CIFS_I(inode);
1721 char *full_path = NULL;
1722 int rc = -EACCES;
1723 __u32 dosattr = 0;
1724 __u64 mode = NO_CHANGE_64;
1507 1725
1508 xid = GetXid(); 1726 xid = GetXid();
1509 1727
1510 cFYI(1, ("setattr on file %s attrs->iavalid 0x%x", 1728 cFYI(1, ("setattr on file %s attrs->iavalid 0x%x",
1511 direntry->d_name.name, attrs->ia_valid)); 1729 direntry->d_name.name, attrs->ia_valid));
1512 1730
1513 cifs_sb = CIFS_SB(inode->i_sb);
1514 pTcon = cifs_sb->tcon;
1515
1516 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) == 0) { 1731 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) == 0) {
1517 /* check if we have permission to change attrs */ 1732 /* check if we have permission to change attrs */
1518 rc = inode_change_ok(inode, attrs); 1733 rc = inode_change_ok(inode, attrs);
@@ -1528,7 +1743,6 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1528 FreeXid(xid); 1743 FreeXid(xid);
1529 return -ENOMEM; 1744 return -ENOMEM;
1530 } 1745 }
1531 cifsInode = CIFS_I(inode);
1532 1746
1533 if ((attrs->ia_valid & ATTR_MTIME) || (attrs->ia_valid & ATTR_SIZE)) { 1747 if ((attrs->ia_valid & ATTR_MTIME) || (attrs->ia_valid & ATTR_SIZE)) {
1534 /* 1748 /*
@@ -1559,21 +1773,8 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1559 * CIFSACL support + proper Windows to Unix idmapping, we may be 1773 * CIFSACL support + proper Windows to Unix idmapping, we may be
1560 * able to support this in the future. 1774 * able to support this in the future.
1561 */ 1775 */
1562 if (!pTcon->unix_ext && 1776 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID))
1563 !(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)) {
1564 attrs->ia_valid &= ~(ATTR_UID | ATTR_GID); 1777 attrs->ia_valid &= ~(ATTR_UID | ATTR_GID);
1565 } else {
1566 if (attrs->ia_valid & ATTR_UID) {
1567 cFYI(1, ("UID changed to %d", attrs->ia_uid));
1568 uid = attrs->ia_uid;
1569 }
1570 if (attrs->ia_valid & ATTR_GID) {
1571 cFYI(1, ("GID changed to %d", attrs->ia_gid));
1572 gid = attrs->ia_gid;
1573 }
1574 }
1575
1576 time_buf.Attributes = 0;
1577 1778
1578 /* skip mode change if it's just for clearing setuid/setgid */ 1779 /* skip mode change if it's just for clearing setuid/setgid */
1579 if (attrs->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID)) 1780 if (attrs->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
@@ -1584,13 +1785,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1584 mode = attrs->ia_mode; 1785 mode = attrs->ia_mode;
1585 } 1786 }
1586 1787
1587 if ((pTcon->unix_ext) 1788 if (attrs->ia_valid & ATTR_MODE) {
1588 && (attrs->ia_valid & (ATTR_MODE | ATTR_GID | ATTR_UID)))
1589 rc = CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode, uid, gid,
1590 0 /* dev_t */, cifs_sb->local_nls,
1591 cifs_sb->mnt_cifs_flags &
1592 CIFS_MOUNT_MAP_SPECIAL_CHR);
1593 else if (attrs->ia_valid & ATTR_MODE) {
1594 rc = 0; 1789 rc = 0;
1595#ifdef CONFIG_CIFS_EXPERIMENTAL 1790#ifdef CONFIG_CIFS_EXPERIMENTAL
1596 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) 1791 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL)
@@ -1599,24 +1794,19 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1599#endif 1794#endif
1600 if (((mode & S_IWUGO) == 0) && 1795 if (((mode & S_IWUGO) == 0) &&
1601 (cifsInode->cifsAttrs & ATTR_READONLY) == 0) { 1796 (cifsInode->cifsAttrs & ATTR_READONLY) == 0) {
1602 set_dosattr = true; 1797
1603 time_buf.Attributes = cpu_to_le32(cifsInode->cifsAttrs | 1798 dosattr = cifsInode->cifsAttrs | ATTR_READONLY;
1604 ATTR_READONLY); 1799
1605 /* fix up mode if we're not using dynperm */ 1800 /* fix up mode if we're not using dynperm */
1606 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) == 0) 1801 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) == 0)
1607 attrs->ia_mode = inode->i_mode & ~S_IWUGO; 1802 attrs->ia_mode = inode->i_mode & ~S_IWUGO;
1608 } else if ((mode & S_IWUGO) && 1803 } else if ((mode & S_IWUGO) &&
1609 (cifsInode->cifsAttrs & ATTR_READONLY)) { 1804 (cifsInode->cifsAttrs & ATTR_READONLY)) {
1610 /* If file is readonly on server, we would 1805
1611 not be able to write to it - so if any write 1806 dosattr = cifsInode->cifsAttrs & ~ATTR_READONLY;
1612 bit is enabled for user or group or other we 1807 /* Attributes of 0 are ignored */
1613 need to at least try to remove r/o dos attr */ 1808 if (dosattr == 0)
1614 set_dosattr = true; 1809 dosattr |= ATTR_NORMAL;
1615 time_buf.Attributes = cpu_to_le32(cifsInode->cifsAttrs &
1616 (~ATTR_READONLY));
1617 /* Windows ignores set to zero */
1618 if (time_buf.Attributes == 0)
1619 time_buf.Attributes |= cpu_to_le32(ATTR_NORMAL);
1620 1810
1621 /* reset local inode permissions to normal */ 1811 /* reset local inode permissions to normal */
1622 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)) { 1812 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)) {
@@ -1634,82 +1824,18 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1634 } 1824 }
1635 } 1825 }
1636 1826
1637 if (attrs->ia_valid & ATTR_ATIME) { 1827 if (attrs->ia_valid & (ATTR_MTIME|ATTR_ATIME|ATTR_CTIME) ||
1638 set_time = true; 1828 ((attrs->ia_valid & ATTR_MODE) && dosattr)) {
1639 time_buf.LastAccessTime = 1829 rc = cifs_set_file_info(inode, attrs, xid, full_path, dosattr);
1640 cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_atime)); 1830 /* BB: check for rc = -EOPNOTSUPP and switch to legacy mode */
1641 } else
1642 time_buf.LastAccessTime = 0;
1643
1644 if (attrs->ia_valid & ATTR_MTIME) {
1645 set_time = true;
1646 time_buf.LastWriteTime =
1647 cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_mtime));
1648 } else
1649 time_buf.LastWriteTime = 0;
1650 /* Do not set ctime explicitly unless other time
1651 stamps are changed explicitly (i.e. by utime()
1652 since we would then have a mix of client and
1653 server times */
1654 1831
1655 if (set_time && (attrs->ia_valid & ATTR_CTIME)) {
1656 set_time = true;
1657 /* Although Samba throws this field away
1658 it may be useful to Windows - but we do
1659 not want to set ctime unless some other
1660 timestamp is changing */
1661 cFYI(1, ("CIFS - CTIME changed"));
1662 time_buf.ChangeTime =
1663 cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_ctime));
1664 } else
1665 time_buf.ChangeTime = 0;
1666
1667 if (set_time || set_dosattr) {
1668 time_buf.CreationTime = 0; /* do not change */
1669 /* In the future we should experiment - try setting timestamps
1670 via Handle (SetFileInfo) instead of by path */
1671 if (!(pTcon->ses->flags & CIFS_SES_NT4))
1672 rc = CIFSSMBSetTimes(xid, pTcon, full_path, &time_buf,
1673 cifs_sb->local_nls,
1674 cifs_sb->mnt_cifs_flags &
1675 CIFS_MOUNT_MAP_SPECIAL_CHR);
1676 else
1677 rc = -EOPNOTSUPP;
1678
1679 if (rc == -EOPNOTSUPP) {
1680 int oplock = 0;
1681 __u16 netfid;
1682
1683 cFYI(1, ("calling SetFileInfo since SetPathInfo for "
1684 "times not supported by this server"));
1685 /* BB we could scan to see if we already have it open
1686 and pass in pid of opener to function */
1687 rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN,
1688 SYNCHRONIZE | FILE_WRITE_ATTRIBUTES,
1689 CREATE_NOT_DIR, &netfid, &oplock,
1690 NULL, cifs_sb->local_nls,
1691 cifs_sb->mnt_cifs_flags &
1692 CIFS_MOUNT_MAP_SPECIAL_CHR);
1693 if (rc == 0) {
1694 rc = CIFSSMBSetFileTimes(xid, pTcon, &time_buf,
1695 netfid);
1696 CIFSSMBClose(xid, pTcon, netfid);
1697 } else {
1698 /* BB For even older servers we could convert time_buf
1699 into old DOS style which uses two second
1700 granularity */
1701
1702 /* rc = CIFSSMBSetTimesLegacy(xid, pTcon, full_path,
1703 &time_buf, cifs_sb->local_nls); */
1704 }
1705 }
1706 /* Even if error on time set, no sense failing the call if 1832 /* Even if error on time set, no sense failing the call if
1707 the server would set the time to a reasonable value anyway, 1833 the server would set the time to a reasonable value anyway,
1708 and this check ensures that we are not being called from 1834 and this check ensures that we are not being called from
1709 sys_utimes in which case we ought to fail the call back to 1835 sys_utimes in which case we ought to fail the call back to
1710 the user when the server rejects the call */ 1836 the user when the server rejects the call */
1711 if ((rc) && (attrs->ia_valid & 1837 if ((rc) && (attrs->ia_valid &
1712 (ATTR_MODE | ATTR_GID | ATTR_UID | ATTR_SIZE))) 1838 (ATTR_MODE | ATTR_GID | ATTR_UID | ATTR_SIZE)))
1713 rc = 0; 1839 rc = 0;
1714 } 1840 }
1715 1841
@@ -1723,6 +1849,21 @@ cifs_setattr_exit:
1723 return rc; 1849 return rc;
1724} 1850}
1725 1851
1852int
1853cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1854{
1855 struct inode *inode = direntry->d_inode;
1856 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1857 struct cifsTconInfo *pTcon = cifs_sb->tcon;
1858
1859 if (pTcon->unix_ext)
1860 return cifs_setattr_unix(direntry, attrs);
1861
1862 return cifs_setattr_nounix(direntry, attrs);
1863
1864 /* BB: add cifs_setattr_legacy for really old servers */
1865}
1866
1726#if 0 1867#if 0
1727void cifs_delete_inode(struct inode *inode) 1868void cifs_delete_inode(struct inode *inode)
1728{ 1869{
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index ed150efbe27c..252fdc0567f1 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -409,6 +409,8 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time,
409#ifdef CONFIG_CIFS_WEAK_PW_HASH 409#ifdef CONFIG_CIFS_WEAK_PW_HASH
410 char lnm_session_key[CIFS_SESS_KEY_SIZE]; 410 char lnm_session_key[CIFS_SESS_KEY_SIZE];
411 411
412 pSMB->req.hdr.Flags2 &= ~SMBFLG2_UNICODE;
413
412 /* no capabilities flags in old lanman negotiation */ 414 /* no capabilities flags in old lanman negotiation */
413 415
414 pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_SESS_KEY_SIZE); 416 pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_SESS_KEY_SIZE);
@@ -505,7 +507,7 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time,
505 unicode_ssetup_strings(&bcc_ptr, ses, nls_cp); 507 unicode_ssetup_strings(&bcc_ptr, ses, nls_cp);
506 } else 508 } else
507 ascii_ssetup_strings(&bcc_ptr, ses, nls_cp); 509 ascii_ssetup_strings(&bcc_ptr, ses, nls_cp);
508 } else if (type == Kerberos) { 510 } else if (type == Kerberos || type == MSKerberos) {
509#ifdef CONFIG_CIFS_UPCALL 511#ifdef CONFIG_CIFS_UPCALL
510 struct cifs_spnego_msg *msg; 512 struct cifs_spnego_msg *msg;
511 spnego_key = cifs_get_spnego_key(ses); 513 spnego_key = cifs_get_spnego_key(ses);
@@ -516,6 +518,15 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time,
516 } 518 }
517 519
518 msg = spnego_key->payload.data; 520 msg = spnego_key->payload.data;
521 /* check version field to make sure that cifs.upcall is
522 sending us a response in an expected form */
523 if (msg->version != CIFS_SPNEGO_UPCALL_VERSION) {
524 cERROR(1, ("incorrect version of cifs.upcall (expected"
525 " %d but got %d)",
526 CIFS_SPNEGO_UPCALL_VERSION, msg->version));
527 rc = -EKEYREJECTED;
528 goto ssetup_exit;
529 }
519 /* bail out if key is too long */ 530 /* bail out if key is too long */
520 if (msg->sesskey_len > 531 if (msg->sesskey_len >
521 sizeof(ses->server->mac_signing_key.data.krb5)) { 532 sizeof(ses->server->mac_signing_key.data.krb5)) {
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 000ac509c98a..e286db9f5ee2 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -265,6 +265,7 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec,
265 cFYI(1, ("Sending smb: total_len %d", total_len)); 265 cFYI(1, ("Sending smb: total_len %d", total_len));
266 dump_smb(smb_buffer, len); 266 dump_smb(smb_buffer, len);
267 267
268 i = 0;
268 while (total_len) { 269 while (total_len) {
269 rc = kernel_sendmsg(ssocket, &smb_msg, &iov[first_vec], 270 rc = kernel_sendmsg(ssocket, &smb_msg, &iov[first_vec],
270 n_vec - first_vec, total_len); 271 n_vec - first_vec, total_len);
diff --git a/fs/compat.c b/fs/compat.c
index c9d1472e65c5..075d0509970d 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -792,8 +792,10 @@ static int compat_fillonedir(void *__buf, const char *name, int namlen,
792 if (buf->result) 792 if (buf->result)
793 return -EINVAL; 793 return -EINVAL;
794 d_ino = ino; 794 d_ino = ino;
795 if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) 795 if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
796 buf->result = -EOVERFLOW;
796 return -EOVERFLOW; 797 return -EOVERFLOW;
798 }
797 buf->result++; 799 buf->result++;
798 dirent = buf->dirent; 800 dirent = buf->dirent;
799 if (!access_ok(VERIFY_WRITE, dirent, 801 if (!access_ok(VERIFY_WRITE, dirent,
@@ -862,8 +864,10 @@ static int compat_filldir(void *__buf, const char *name, int namlen,
862 if (reclen > buf->count) 864 if (reclen > buf->count)
863 return -EINVAL; 865 return -EINVAL;
864 d_ino = ino; 866 d_ino = ino;
865 if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) 867 if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
868 buf->error = -EOVERFLOW;
866 return -EOVERFLOW; 869 return -EOVERFLOW;
870 }
867 dirent = buf->previous; 871 dirent = buf->previous;
868 if (dirent) { 872 if (dirent) {
869 if (__put_user(offset, &dirent->d_off)) 873 if (__put_user(offset, &dirent->d_off))
diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h
index da015c12e3ea..762d287123ca 100644
--- a/fs/configfs/configfs_internal.h
+++ b/fs/configfs/configfs_internal.h
@@ -49,8 +49,10 @@ struct configfs_dirent {
49#define CONFIGFS_USET_DEFAULT 0x0080 49#define CONFIGFS_USET_DEFAULT 0x0080
50#define CONFIGFS_USET_DROPPING 0x0100 50#define CONFIGFS_USET_DROPPING 0x0100
51#define CONFIGFS_USET_IN_MKDIR 0x0200 51#define CONFIGFS_USET_IN_MKDIR 0x0200
52#define CONFIGFS_USET_CREATING 0x0400
52#define CONFIGFS_NOT_PINNED (CONFIGFS_ITEM_ATTR) 53#define CONFIGFS_NOT_PINNED (CONFIGFS_ITEM_ATTR)
53 54
55extern struct mutex configfs_symlink_mutex;
54extern spinlock_t configfs_dirent_lock; 56extern spinlock_t configfs_dirent_lock;
55 57
56extern struct vfsmount * configfs_mount; 58extern struct vfsmount * configfs_mount;
@@ -66,6 +68,7 @@ extern void configfs_inode_exit(void);
66extern int configfs_create_file(struct config_item *, const struct configfs_attribute *); 68extern int configfs_create_file(struct config_item *, const struct configfs_attribute *);
67extern int configfs_make_dirent(struct configfs_dirent *, 69extern int configfs_make_dirent(struct configfs_dirent *,
68 struct dentry *, void *, umode_t, int); 70 struct dentry *, void *, umode_t, int);
71extern int configfs_dirent_is_ready(struct configfs_dirent *);
69 72
70extern int configfs_add_file(struct dentry *, const struct configfs_attribute *, int); 73extern int configfs_add_file(struct dentry *, const struct configfs_attribute *, int);
71extern void configfs_hash_and_remove(struct dentry * dir, const char * name); 74extern void configfs_hash_and_remove(struct dentry * dir, const char * name);
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 179589be063a..8e93341f3e82 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -185,7 +185,7 @@ static int create_dir(struct config_item * k, struct dentry * p,
185 error = configfs_dirent_exists(p->d_fsdata, d->d_name.name); 185 error = configfs_dirent_exists(p->d_fsdata, d->d_name.name);
186 if (!error) 186 if (!error)
187 error = configfs_make_dirent(p->d_fsdata, d, k, mode, 187 error = configfs_make_dirent(p->d_fsdata, d, k, mode,
188 CONFIGFS_DIR); 188 CONFIGFS_DIR | CONFIGFS_USET_CREATING);
189 if (!error) { 189 if (!error) {
190 error = configfs_create(d, mode, init_dir); 190 error = configfs_create(d, mode, init_dir);
191 if (!error) { 191 if (!error) {
@@ -209,6 +209,9 @@ static int create_dir(struct config_item * k, struct dentry * p,
209 * configfs_create_dir - create a directory for an config_item. 209 * configfs_create_dir - create a directory for an config_item.
210 * @item: config_itemwe're creating directory for. 210 * @item: config_itemwe're creating directory for.
211 * @dentry: config_item's dentry. 211 * @dentry: config_item's dentry.
212 *
213 * Note: user-created entries won't be allowed under this new directory
214 * until it is validated by configfs_dir_set_ready()
212 */ 215 */
213 216
214static int configfs_create_dir(struct config_item * item, struct dentry *dentry) 217static int configfs_create_dir(struct config_item * item, struct dentry *dentry)
@@ -231,6 +234,44 @@ static int configfs_create_dir(struct config_item * item, struct dentry *dentry)
231 return error; 234 return error;
232} 235}
233 236
237/*
238 * Allow userspace to create new entries under a new directory created with
239 * configfs_create_dir(), and under all of its chidlren directories recursively.
240 * @sd configfs_dirent of the new directory to validate
241 *
242 * Caller must hold configfs_dirent_lock.
243 */
244static void configfs_dir_set_ready(struct configfs_dirent *sd)
245{
246 struct configfs_dirent *child_sd;
247
248 sd->s_type &= ~CONFIGFS_USET_CREATING;
249 list_for_each_entry(child_sd, &sd->s_children, s_sibling)
250 if (child_sd->s_type & CONFIGFS_USET_CREATING)
251 configfs_dir_set_ready(child_sd);
252}
253
254/*
255 * Check that a directory does not belong to a directory hierarchy being
256 * attached and not validated yet.
257 * @sd configfs_dirent of the directory to check
258 *
259 * @return non-zero iff the directory was validated
260 *
261 * Note: takes configfs_dirent_lock, so the result may change from false to true
262 * in two consecutive calls, but never from true to false.
263 */
264int configfs_dirent_is_ready(struct configfs_dirent *sd)
265{
266 int ret;
267
268 spin_lock(&configfs_dirent_lock);
269 ret = !(sd->s_type & CONFIGFS_USET_CREATING);
270 spin_unlock(&configfs_dirent_lock);
271
272 return ret;
273}
274
234int configfs_create_link(struct configfs_symlink *sl, 275int configfs_create_link(struct configfs_symlink *sl,
235 struct dentry *parent, 276 struct dentry *parent,
236 struct dentry *dentry) 277 struct dentry *dentry)
@@ -283,6 +324,8 @@ static void remove_dir(struct dentry * d)
283 * The only thing special about this is that we remove any files in 324 * The only thing special about this is that we remove any files in
284 * the directory before we remove the directory, and we've inlined 325 * the directory before we remove the directory, and we've inlined
285 * what used to be configfs_rmdir() below, instead of calling separately. 326 * what used to be configfs_rmdir() below, instead of calling separately.
327 *
328 * Caller holds the mutex of the item's inode
286 */ 329 */
287 330
288static void configfs_remove_dir(struct config_item * item) 331static void configfs_remove_dir(struct config_item * item)
@@ -330,7 +373,19 @@ static struct dentry * configfs_lookup(struct inode *dir,
330 struct configfs_dirent * parent_sd = dentry->d_parent->d_fsdata; 373 struct configfs_dirent * parent_sd = dentry->d_parent->d_fsdata;
331 struct configfs_dirent * sd; 374 struct configfs_dirent * sd;
332 int found = 0; 375 int found = 0;
333 int err = 0; 376 int err;
377
378 /*
379 * Fake invisibility if dir belongs to a group/default groups hierarchy
380 * being attached
381 *
382 * This forbids userspace to read/write attributes of items which may
383 * not complete their initialization, since the dentries of the
384 * attributes won't be instantiated.
385 */
386 err = -ENOENT;
387 if (!configfs_dirent_is_ready(parent_sd))
388 goto out;
334 389
335 list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { 390 list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
336 if (sd->s_type & CONFIGFS_NOT_PINNED) { 391 if (sd->s_type & CONFIGFS_NOT_PINNED) {
@@ -353,6 +408,7 @@ static struct dentry * configfs_lookup(struct inode *dir,
353 return simple_lookup(dir, dentry, nd); 408 return simple_lookup(dir, dentry, nd);
354 } 409 }
355 410
411out:
356 return ERR_PTR(err); 412 return ERR_PTR(err);
357} 413}
358 414
@@ -370,13 +426,17 @@ static int configfs_detach_prep(struct dentry *dentry, struct mutex **wait_mutex
370 struct configfs_dirent *sd; 426 struct configfs_dirent *sd;
371 int ret; 427 int ret;
372 428
429 /* Mark that we're trying to drop the group */
430 parent_sd->s_type |= CONFIGFS_USET_DROPPING;
431
373 ret = -EBUSY; 432 ret = -EBUSY;
374 if (!list_empty(&parent_sd->s_links)) 433 if (!list_empty(&parent_sd->s_links))
375 goto out; 434 goto out;
376 435
377 ret = 0; 436 ret = 0;
378 list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { 437 list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
379 if (sd->s_type & CONFIGFS_NOT_PINNED) 438 if (!sd->s_element ||
439 (sd->s_type & CONFIGFS_NOT_PINNED))
380 continue; 440 continue;
381 if (sd->s_type & CONFIGFS_USET_DEFAULT) { 441 if (sd->s_type & CONFIGFS_USET_DEFAULT) {
382 /* Abort if racing with mkdir() */ 442 /* Abort if racing with mkdir() */
@@ -385,8 +445,6 @@ static int configfs_detach_prep(struct dentry *dentry, struct mutex **wait_mutex
385 *wait_mutex = &sd->s_dentry->d_inode->i_mutex; 445 *wait_mutex = &sd->s_dentry->d_inode->i_mutex;
386 return -EAGAIN; 446 return -EAGAIN;
387 } 447 }
388 /* Mark that we're trying to drop the group */
389 sd->s_type |= CONFIGFS_USET_DROPPING;
390 448
391 /* 449 /*
392 * Yup, recursive. If there's a problem, blame 450 * Yup, recursive. If there's a problem, blame
@@ -414,12 +472,11 @@ static void configfs_detach_rollback(struct dentry *dentry)
414 struct configfs_dirent *parent_sd = dentry->d_fsdata; 472 struct configfs_dirent *parent_sd = dentry->d_fsdata;
415 struct configfs_dirent *sd; 473 struct configfs_dirent *sd;
416 474
417 list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { 475 parent_sd->s_type &= ~CONFIGFS_USET_DROPPING;
418 if (sd->s_type & CONFIGFS_USET_DEFAULT) { 476
477 list_for_each_entry(sd, &parent_sd->s_children, s_sibling)
478 if (sd->s_type & CONFIGFS_USET_DEFAULT)
419 configfs_detach_rollback(sd->s_dentry); 479 configfs_detach_rollback(sd->s_dentry);
420 sd->s_type &= ~CONFIGFS_USET_DROPPING;
421 }
422 }
423} 480}
424 481
425static void detach_attrs(struct config_item * item) 482static void detach_attrs(struct config_item * item)
@@ -558,36 +615,21 @@ static int create_default_group(struct config_group *parent_group,
558static int populate_groups(struct config_group *group) 615static int populate_groups(struct config_group *group)
559{ 616{
560 struct config_group *new_group; 617 struct config_group *new_group;
561 struct dentry *dentry = group->cg_item.ci_dentry;
562 int ret = 0; 618 int ret = 0;
563 int i; 619 int i;
564 620
565 if (group->default_groups) { 621 if (group->default_groups) {
566 /*
567 * FYI, we're faking mkdir here
568 * I'm not sure we need this semaphore, as we're called
569 * from our parent's mkdir. That holds our parent's
570 * i_mutex, so afaik lookup cannot continue through our
571 * parent to find us, let alone mess with our tree.
572 * That said, taking our i_mutex is closer to mkdir
573 * emulation, and shouldn't hurt.
574 */
575 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
576
577 for (i = 0; group->default_groups[i]; i++) { 622 for (i = 0; group->default_groups[i]; i++) {
578 new_group = group->default_groups[i]; 623 new_group = group->default_groups[i];
579 624
580 ret = create_default_group(group, new_group); 625 ret = create_default_group(group, new_group);
581 if (ret) 626 if (ret) {
627 detach_groups(group);
582 break; 628 break;
629 }
583 } 630 }
584
585 mutex_unlock(&dentry->d_inode->i_mutex);
586 } 631 }
587 632
588 if (ret)
589 detach_groups(group);
590
591 return ret; 633 return ret;
592} 634}
593 635
@@ -702,7 +744,15 @@ static int configfs_attach_item(struct config_item *parent_item,
702 if (!ret) { 744 if (!ret) {
703 ret = populate_attrs(item); 745 ret = populate_attrs(item);
704 if (ret) { 746 if (ret) {
747 /*
748 * We are going to remove an inode and its dentry but
749 * the VFS may already have hit and used them. Thus,
750 * we must lock them as rmdir() would.
751 */
752 mutex_lock(&dentry->d_inode->i_mutex);
705 configfs_remove_dir(item); 753 configfs_remove_dir(item);
754 dentry->d_inode->i_flags |= S_DEAD;
755 mutex_unlock(&dentry->d_inode->i_mutex);
706 d_delete(dentry); 756 d_delete(dentry);
707 } 757 }
708 } 758 }
@@ -710,6 +760,7 @@ static int configfs_attach_item(struct config_item *parent_item,
710 return ret; 760 return ret;
711} 761}
712 762
763/* Caller holds the mutex of the item's inode */
713static void configfs_detach_item(struct config_item *item) 764static void configfs_detach_item(struct config_item *item)
714{ 765{
715 detach_attrs(item); 766 detach_attrs(item);
@@ -728,16 +779,30 @@ static int configfs_attach_group(struct config_item *parent_item,
728 sd = dentry->d_fsdata; 779 sd = dentry->d_fsdata;
729 sd->s_type |= CONFIGFS_USET_DIR; 780 sd->s_type |= CONFIGFS_USET_DIR;
730 781
782 /*
783 * FYI, we're faking mkdir in populate_groups()
784 * We must lock the group's inode to avoid races with the VFS
785 * which can already hit the inode and try to add/remove entries
786 * under it.
787 *
788 * We must also lock the inode to remove it safely in case of
789 * error, as rmdir() would.
790 */
791 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
731 ret = populate_groups(to_config_group(item)); 792 ret = populate_groups(to_config_group(item));
732 if (ret) { 793 if (ret) {
733 configfs_detach_item(item); 794 configfs_detach_item(item);
734 d_delete(dentry); 795 dentry->d_inode->i_flags |= S_DEAD;
735 } 796 }
797 mutex_unlock(&dentry->d_inode->i_mutex);
798 if (ret)
799 d_delete(dentry);
736 } 800 }
737 801
738 return ret; 802 return ret;
739} 803}
740 804
805/* Caller holds the mutex of the group's inode */
741static void configfs_detach_group(struct config_item *item) 806static void configfs_detach_group(struct config_item *item)
742{ 807{
743 detach_groups(to_config_group(item)); 808 detach_groups(to_config_group(item));
@@ -1035,7 +1100,7 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1035 struct configfs_subsystem *subsys; 1100 struct configfs_subsystem *subsys;
1036 struct configfs_dirent *sd; 1101 struct configfs_dirent *sd;
1037 struct config_item_type *type; 1102 struct config_item_type *type;
1038 struct module *owner = NULL; 1103 struct module *subsys_owner = NULL, *new_item_owner = NULL;
1039 char *name; 1104 char *name;
1040 1105
1041 if (dentry->d_parent == configfs_sb->s_root) { 1106 if (dentry->d_parent == configfs_sb->s_root) {
@@ -1044,6 +1109,16 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1044 } 1109 }
1045 1110
1046 sd = dentry->d_parent->d_fsdata; 1111 sd = dentry->d_parent->d_fsdata;
1112
1113 /*
1114 * Fake invisibility if dir belongs to a group/default groups hierarchy
1115 * being attached
1116 */
1117 if (!configfs_dirent_is_ready(sd)) {
1118 ret = -ENOENT;
1119 goto out;
1120 }
1121
1047 if (!(sd->s_type & CONFIGFS_USET_DIR)) { 1122 if (!(sd->s_type & CONFIGFS_USET_DIR)) {
1048 ret = -EPERM; 1123 ret = -EPERM;
1049 goto out; 1124 goto out;
@@ -1062,10 +1137,25 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1062 goto out_put; 1137 goto out_put;
1063 } 1138 }
1064 1139
1140 /*
1141 * The subsystem may belong to a different module than the item
1142 * being created. We don't want to safely pin the new item but
1143 * fail to pin the subsystem it sits under.
1144 */
1145 if (!subsys->su_group.cg_item.ci_type) {
1146 ret = -EINVAL;
1147 goto out_put;
1148 }
1149 subsys_owner = subsys->su_group.cg_item.ci_type->ct_owner;
1150 if (!try_module_get(subsys_owner)) {
1151 ret = -EINVAL;
1152 goto out_put;
1153 }
1154
1065 name = kmalloc(dentry->d_name.len + 1, GFP_KERNEL); 1155 name = kmalloc(dentry->d_name.len + 1, GFP_KERNEL);
1066 if (!name) { 1156 if (!name) {
1067 ret = -ENOMEM; 1157 ret = -ENOMEM;
1068 goto out_put; 1158 goto out_subsys_put;
1069 } 1159 }
1070 1160
1071 snprintf(name, dentry->d_name.len + 1, "%s", dentry->d_name.name); 1161 snprintf(name, dentry->d_name.len + 1, "%s", dentry->d_name.name);
@@ -1094,10 +1184,10 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1094 kfree(name); 1184 kfree(name);
1095 if (ret) { 1185 if (ret) {
1096 /* 1186 /*
1097 * If item == NULL, then link_obj() was never called. 1187 * If ret != 0, then link_obj() was never called.
1098 * There are no extra references to clean up. 1188 * There are no extra references to clean up.
1099 */ 1189 */
1100 goto out_put; 1190 goto out_subsys_put;
1101 } 1191 }
1102 1192
1103 /* 1193 /*
@@ -1111,8 +1201,8 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1111 goto out_unlink; 1201 goto out_unlink;
1112 } 1202 }
1113 1203
1114 owner = type->ct_owner; 1204 new_item_owner = type->ct_owner;
1115 if (!try_module_get(owner)) { 1205 if (!try_module_get(new_item_owner)) {
1116 ret = -EINVAL; 1206 ret = -EINVAL;
1117 goto out_unlink; 1207 goto out_unlink;
1118 } 1208 }
@@ -1142,6 +1232,8 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1142 1232
1143 spin_lock(&configfs_dirent_lock); 1233 spin_lock(&configfs_dirent_lock);
1144 sd->s_type &= ~CONFIGFS_USET_IN_MKDIR; 1234 sd->s_type &= ~CONFIGFS_USET_IN_MKDIR;
1235 if (!ret)
1236 configfs_dir_set_ready(dentry->d_fsdata);
1145 spin_unlock(&configfs_dirent_lock); 1237 spin_unlock(&configfs_dirent_lock);
1146 1238
1147out_unlink: 1239out_unlink:
@@ -1159,9 +1251,13 @@ out_unlink:
1159 mutex_unlock(&subsys->su_mutex); 1251 mutex_unlock(&subsys->su_mutex);
1160 1252
1161 if (module_got) 1253 if (module_got)
1162 module_put(owner); 1254 module_put(new_item_owner);
1163 } 1255 }
1164 1256
1257out_subsys_put:
1258 if (ret)
1259 module_put(subsys_owner);
1260
1165out_put: 1261out_put:
1166 /* 1262 /*
1167 * link_obj()/link_group() took a reference from child->parent, 1263 * link_obj()/link_group() took a reference from child->parent,
@@ -1180,7 +1276,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
1180 struct config_item *item; 1276 struct config_item *item;
1181 struct configfs_subsystem *subsys; 1277 struct configfs_subsystem *subsys;
1182 struct configfs_dirent *sd; 1278 struct configfs_dirent *sd;
1183 struct module *owner = NULL; 1279 struct module *subsys_owner = NULL, *dead_item_owner = NULL;
1184 int ret; 1280 int ret;
1185 1281
1186 if (dentry->d_parent == configfs_sb->s_root) 1282 if (dentry->d_parent == configfs_sb->s_root)
@@ -1207,14 +1303,26 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
1207 return -EINVAL; 1303 return -EINVAL;
1208 } 1304 }
1209 1305
1210 spin_lock(&configfs_dirent_lock); 1306 /* configfs_mkdir() shouldn't have allowed this */
1307 BUG_ON(!subsys->su_group.cg_item.ci_type);
1308 subsys_owner = subsys->su_group.cg_item.ci_type->ct_owner;
1309
1310 /*
1311 * Ensure that no racing symlink() will make detach_prep() fail while
1312 * the new link is temporarily attached
1313 */
1211 do { 1314 do {
1212 struct mutex *wait_mutex; 1315 struct mutex *wait_mutex;
1213 1316
1317 mutex_lock(&configfs_symlink_mutex);
1318 spin_lock(&configfs_dirent_lock);
1214 ret = configfs_detach_prep(dentry, &wait_mutex); 1319 ret = configfs_detach_prep(dentry, &wait_mutex);
1215 if (ret) { 1320 if (ret)
1216 configfs_detach_rollback(dentry); 1321 configfs_detach_rollback(dentry);
1217 spin_unlock(&configfs_dirent_lock); 1322 spin_unlock(&configfs_dirent_lock);
1323 mutex_unlock(&configfs_symlink_mutex);
1324
1325 if (ret) {
1218 if (ret != -EAGAIN) { 1326 if (ret != -EAGAIN) {
1219 config_item_put(parent_item); 1327 config_item_put(parent_item);
1220 return ret; 1328 return ret;
@@ -1223,11 +1331,8 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
1223 /* Wait until the racing operation terminates */ 1331 /* Wait until the racing operation terminates */
1224 mutex_lock(wait_mutex); 1332 mutex_lock(wait_mutex);
1225 mutex_unlock(wait_mutex); 1333 mutex_unlock(wait_mutex);
1226
1227 spin_lock(&configfs_dirent_lock);
1228 } 1334 }
1229 } while (ret == -EAGAIN); 1335 } while (ret == -EAGAIN);
1230 spin_unlock(&configfs_dirent_lock);
1231 1336
1232 /* Get a working ref for the duration of this function */ 1337 /* Get a working ref for the duration of this function */
1233 item = configfs_get_config_item(dentry); 1338 item = configfs_get_config_item(dentry);
@@ -1236,7 +1341,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
1236 config_item_put(parent_item); 1341 config_item_put(parent_item);
1237 1342
1238 if (item->ci_type) 1343 if (item->ci_type)
1239 owner = item->ci_type->ct_owner; 1344 dead_item_owner = item->ci_type->ct_owner;
1240 1345
1241 if (sd->s_type & CONFIGFS_USET_DIR) { 1346 if (sd->s_type & CONFIGFS_USET_DIR) {
1242 configfs_detach_group(item); 1347 configfs_detach_group(item);
@@ -1258,7 +1363,8 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
1258 /* Drop our reference from above */ 1363 /* Drop our reference from above */
1259 config_item_put(item); 1364 config_item_put(item);
1260 1365
1261 module_put(owner); 1366 module_put(dead_item_owner);
1367 module_put(subsys_owner);
1262 1368
1263 return 0; 1369 return 0;
1264} 1370}
@@ -1314,13 +1420,24 @@ static int configfs_dir_open(struct inode *inode, struct file *file)
1314{ 1420{
1315 struct dentry * dentry = file->f_path.dentry; 1421 struct dentry * dentry = file->f_path.dentry;
1316 struct configfs_dirent * parent_sd = dentry->d_fsdata; 1422 struct configfs_dirent * parent_sd = dentry->d_fsdata;
1423 int err;
1317 1424
1318 mutex_lock(&dentry->d_inode->i_mutex); 1425 mutex_lock(&dentry->d_inode->i_mutex);
1319 file->private_data = configfs_new_dirent(parent_sd, NULL); 1426 /*
1427 * Fake invisibility if dir belongs to a group/default groups hierarchy
1428 * being attached
1429 */
1430 err = -ENOENT;
1431 if (configfs_dirent_is_ready(parent_sd)) {
1432 file->private_data = configfs_new_dirent(parent_sd, NULL);
1433 if (IS_ERR(file->private_data))
1434 err = PTR_ERR(file->private_data);
1435 else
1436 err = 0;
1437 }
1320 mutex_unlock(&dentry->d_inode->i_mutex); 1438 mutex_unlock(&dentry->d_inode->i_mutex);
1321 1439
1322 return IS_ERR(file->private_data) ? PTR_ERR(file->private_data) : 0; 1440 return err;
1323
1324} 1441}
1325 1442
1326static int configfs_dir_close(struct inode *inode, struct file *file) 1443static int configfs_dir_close(struct inode *inode, struct file *file)
@@ -1491,6 +1608,10 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
1491 if (err) { 1608 if (err) {
1492 d_delete(dentry); 1609 d_delete(dentry);
1493 dput(dentry); 1610 dput(dentry);
1611 } else {
1612 spin_lock(&configfs_dirent_lock);
1613 configfs_dir_set_ready(dentry->d_fsdata);
1614 spin_unlock(&configfs_dirent_lock);
1494 } 1615 }
1495 } 1616 }
1496 1617
@@ -1517,11 +1638,13 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys)
1517 mutex_lock_nested(&configfs_sb->s_root->d_inode->i_mutex, 1638 mutex_lock_nested(&configfs_sb->s_root->d_inode->i_mutex,
1518 I_MUTEX_PARENT); 1639 I_MUTEX_PARENT);
1519 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD); 1640 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
1641 mutex_lock(&configfs_symlink_mutex);
1520 spin_lock(&configfs_dirent_lock); 1642 spin_lock(&configfs_dirent_lock);
1521 if (configfs_detach_prep(dentry, NULL)) { 1643 if (configfs_detach_prep(dentry, NULL)) {
1522 printk(KERN_ERR "configfs: Tried to unregister non-empty subsystem!\n"); 1644 printk(KERN_ERR "configfs: Tried to unregister non-empty subsystem!\n");
1523 } 1645 }
1524 spin_unlock(&configfs_dirent_lock); 1646 spin_unlock(&configfs_dirent_lock);
1647 mutex_unlock(&configfs_symlink_mutex);
1525 configfs_detach_group(&group->cg_item); 1648 configfs_detach_group(&group->cg_item);
1526 dentry->d_inode->i_flags |= S_DEAD; 1649 dentry->d_inode->i_flags |= S_DEAD;
1527 mutex_unlock(&dentry->d_inode->i_mutex); 1650 mutex_unlock(&dentry->d_inode->i_mutex);
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index 0004d18c40ac..bf74973b0492 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -31,6 +31,9 @@
31#include <linux/configfs.h> 31#include <linux/configfs.h>
32#include "configfs_internal.h" 32#include "configfs_internal.h"
33 33
34/* Protects attachments of new symlinks */
35DEFINE_MUTEX(configfs_symlink_mutex);
36
34static int item_depth(struct config_item * item) 37static int item_depth(struct config_item * item)
35{ 38{
36 struct config_item * p = item; 39 struct config_item * p = item;
@@ -73,11 +76,20 @@ static int create_link(struct config_item *parent_item,
73 struct configfs_symlink *sl; 76 struct configfs_symlink *sl;
74 int ret; 77 int ret;
75 78
79 ret = -ENOENT;
80 if (!configfs_dirent_is_ready(target_sd))
81 goto out;
76 ret = -ENOMEM; 82 ret = -ENOMEM;
77 sl = kmalloc(sizeof(struct configfs_symlink), GFP_KERNEL); 83 sl = kmalloc(sizeof(struct configfs_symlink), GFP_KERNEL);
78 if (sl) { 84 if (sl) {
79 sl->sl_target = config_item_get(item); 85 sl->sl_target = config_item_get(item);
80 spin_lock(&configfs_dirent_lock); 86 spin_lock(&configfs_dirent_lock);
87 if (target_sd->s_type & CONFIGFS_USET_DROPPING) {
88 spin_unlock(&configfs_dirent_lock);
89 config_item_put(item);
90 kfree(sl);
91 return -ENOENT;
92 }
81 list_add(&sl->sl_list, &target_sd->s_links); 93 list_add(&sl->sl_list, &target_sd->s_links);
82 spin_unlock(&configfs_dirent_lock); 94 spin_unlock(&configfs_dirent_lock);
83 ret = configfs_create_link(sl, parent_item->ci_dentry, 95 ret = configfs_create_link(sl, parent_item->ci_dentry,
@@ -91,6 +103,7 @@ static int create_link(struct config_item *parent_item,
91 } 103 }
92 } 104 }
93 105
106out:
94 return ret; 107 return ret;
95} 108}
96 109
@@ -120,6 +133,7 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna
120{ 133{
121 int ret; 134 int ret;
122 struct nameidata nd; 135 struct nameidata nd;
136 struct configfs_dirent *sd;
123 struct config_item *parent_item; 137 struct config_item *parent_item;
124 struct config_item *target_item; 138 struct config_item *target_item;
125 struct config_item_type *type; 139 struct config_item_type *type;
@@ -128,9 +142,19 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna
128 if (dentry->d_parent == configfs_sb->s_root) 142 if (dentry->d_parent == configfs_sb->s_root)
129 goto out; 143 goto out;
130 144
145 sd = dentry->d_parent->d_fsdata;
146 /*
147 * Fake invisibility if dir belongs to a group/default groups hierarchy
148 * being attached
149 */
150 ret = -ENOENT;
151 if (!configfs_dirent_is_ready(sd))
152 goto out;
153
131 parent_item = configfs_get_config_item(dentry->d_parent); 154 parent_item = configfs_get_config_item(dentry->d_parent);
132 type = parent_item->ci_type; 155 type = parent_item->ci_type;
133 156
157 ret = -EPERM;
134 if (!type || !type->ct_item_ops || 158 if (!type || !type->ct_item_ops ||
135 !type->ct_item_ops->allow_link) 159 !type->ct_item_ops->allow_link)
136 goto out_put; 160 goto out_put;
@@ -141,7 +165,9 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna
141 165
142 ret = type->ct_item_ops->allow_link(parent_item, target_item); 166 ret = type->ct_item_ops->allow_link(parent_item, target_item);
143 if (!ret) { 167 if (!ret) {
168 mutex_lock(&configfs_symlink_mutex);
144 ret = create_link(parent_item, target_item, dentry); 169 ret = create_link(parent_item, target_item, dentry);
170 mutex_unlock(&configfs_symlink_mutex);
145 if (ret && type->ct_item_ops->drop_link) 171 if (ret && type->ct_item_ops->drop_link)
146 type->ct_item_ops->drop_link(parent_item, 172 type->ct_item_ops->drop_link(parent_item,
147 target_item); 173 target_item);
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 0c3b618c15b3..f40423eb1a14 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -43,58 +43,13 @@ static DEFINE_MUTEX(read_mutex);
43static int cramfs_iget5_test(struct inode *inode, void *opaque) 43static int cramfs_iget5_test(struct inode *inode, void *opaque)
44{ 44{
45 struct cramfs_inode *cramfs_inode = opaque; 45 struct cramfs_inode *cramfs_inode = opaque;
46 46 return inode->i_ino == CRAMINO(cramfs_inode) && inode->i_ino != 1;
47 if (inode->i_ino != CRAMINO(cramfs_inode))
48 return 0; /* does not match */
49
50 if (inode->i_ino != 1)
51 return 1;
52
53 /* all empty directories, char, block, pipe, and sock, share inode #1 */
54
55 if ((inode->i_mode != cramfs_inode->mode) ||
56 (inode->i_gid != cramfs_inode->gid) ||
57 (inode->i_uid != cramfs_inode->uid))
58 return 0; /* does not match */
59
60 if ((S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) &&
61 (inode->i_rdev != old_decode_dev(cramfs_inode->size)))
62 return 0; /* does not match */
63
64 return 1; /* matches */
65} 47}
66 48
67static int cramfs_iget5_set(struct inode *inode, void *opaque) 49static int cramfs_iget5_set(struct inode *inode, void *opaque)
68{ 50{
69 static struct timespec zerotime;
70 struct cramfs_inode *cramfs_inode = opaque; 51 struct cramfs_inode *cramfs_inode = opaque;
71 inode->i_mode = cramfs_inode->mode;
72 inode->i_uid = cramfs_inode->uid;
73 inode->i_size = cramfs_inode->size;
74 inode->i_blocks = (cramfs_inode->size - 1) / 512 + 1;
75 inode->i_gid = cramfs_inode->gid;
76 /* Struct copy intentional */
77 inode->i_mtime = inode->i_atime = inode->i_ctime = zerotime;
78 inode->i_ino = CRAMINO(cramfs_inode); 52 inode->i_ino = CRAMINO(cramfs_inode);
79 /* inode->i_nlink is left 1 - arguably wrong for directories,
80 but it's the best we can do without reading the directory
81 contents. 1 yields the right result in GNU find, even
82 without -noleaf option. */
83 if (S_ISREG(inode->i_mode)) {
84 inode->i_fop = &generic_ro_fops;
85 inode->i_data.a_ops = &cramfs_aops;
86 } else if (S_ISDIR(inode->i_mode)) {
87 inode->i_op = &cramfs_dir_inode_operations;
88 inode->i_fop = &cramfs_directory_operations;
89 } else if (S_ISLNK(inode->i_mode)) {
90 inode->i_op = &page_symlink_inode_operations;
91 inode->i_data.a_ops = &cramfs_aops;
92 } else {
93 inode->i_size = 0;
94 inode->i_blocks = 0;
95 init_special_inode(inode, inode->i_mode,
96 old_decode_dev(cramfs_inode->size));
97 }
98 return 0; 53 return 0;
99} 54}
100 55
@@ -104,12 +59,48 @@ static struct inode *get_cramfs_inode(struct super_block *sb,
104 struct inode *inode = iget5_locked(sb, CRAMINO(cramfs_inode), 59 struct inode *inode = iget5_locked(sb, CRAMINO(cramfs_inode),
105 cramfs_iget5_test, cramfs_iget5_set, 60 cramfs_iget5_test, cramfs_iget5_set,
106 cramfs_inode); 61 cramfs_inode);
62 static struct timespec zerotime;
63
107 if (inode && (inode->i_state & I_NEW)) { 64 if (inode && (inode->i_state & I_NEW)) {
65 inode->i_mode = cramfs_inode->mode;
66 inode->i_uid = cramfs_inode->uid;
67 inode->i_size = cramfs_inode->size;
68 inode->i_blocks = (cramfs_inode->size - 1) / 512 + 1;
69 inode->i_gid = cramfs_inode->gid;
70 /* Struct copy intentional */
71 inode->i_mtime = inode->i_atime = inode->i_ctime = zerotime;
72 /* inode->i_nlink is left 1 - arguably wrong for directories,
73 but it's the best we can do without reading the directory
74 contents. 1 yields the right result in GNU find, even
75 without -noleaf option. */
76 if (S_ISREG(inode->i_mode)) {
77 inode->i_fop = &generic_ro_fops;
78 inode->i_data.a_ops = &cramfs_aops;
79 } else if (S_ISDIR(inode->i_mode)) {
80 inode->i_op = &cramfs_dir_inode_operations;
81 inode->i_fop = &cramfs_directory_operations;
82 } else if (S_ISLNK(inode->i_mode)) {
83 inode->i_op = &page_symlink_inode_operations;
84 inode->i_data.a_ops = &cramfs_aops;
85 } else {
86 inode->i_size = 0;
87 inode->i_blocks = 0;
88 init_special_inode(inode, inode->i_mode,
89 old_decode_dev(cramfs_inode->size));
90 }
108 unlock_new_inode(inode); 91 unlock_new_inode(inode);
109 } 92 }
110 return inode; 93 return inode;
111} 94}
112 95
96static void cramfs_drop_inode(struct inode *inode)
97{
98 if (inode->i_ino == 1)
99 generic_delete_inode(inode);
100 else
101 generic_drop_inode(inode);
102}
103
113/* 104/*
114 * We have our own block cache: don't fill up the buffer cache 105 * We have our own block cache: don't fill up the buffer cache
115 * with the rom-image, because the way the filesystem is set 106 * with the rom-image, because the way the filesystem is set
@@ -534,6 +525,7 @@ static const struct super_operations cramfs_ops = {
534 .put_super = cramfs_put_super, 525 .put_super = cramfs_put_super,
535 .remount_fs = cramfs_remount, 526 .remount_fs = cramfs_remount,
536 .statfs = cramfs_statfs, 527 .statfs = cramfs_statfs,
528 .drop_inode = cramfs_drop_inode,
537}; 529};
538 530
539static int cramfs_get_sb(struct file_system_type *fs_type, 531static int cramfs_get_sb(struct file_system_type *fs_type,
diff --git a/fs/dcache.c b/fs/dcache.c
index f2584d22cb45..80e93956aced 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1220,6 +1220,107 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
1220 return new; 1220 return new;
1221} 1221}
1222 1222
1223/**
1224 * d_add_ci - lookup or allocate new dentry with case-exact name
1225 * @inode: the inode case-insensitive lookup has found
1226 * @dentry: the negative dentry that was passed to the parent's lookup func
1227 * @name: the case-exact name to be associated with the returned dentry
1228 *
1229 * This is to avoid filling the dcache with case-insensitive names to the
1230 * same inode, only the actual correct case is stored in the dcache for
1231 * case-insensitive filesystems.
1232 *
1233 * For a case-insensitive lookup match and if the the case-exact dentry
1234 * already exists in in the dcache, use it and return it.
1235 *
1236 * If no entry exists with the exact case name, allocate new dentry with
1237 * the exact case, and return the spliced entry.
1238 */
1239struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
1240 struct qstr *name)
1241{
1242 int error;
1243 struct dentry *found;
1244 struct dentry *new;
1245
1246 /* Does a dentry matching the name exist already? */
1247 found = d_hash_and_lookup(dentry->d_parent, name);
1248 /* If not, create it now and return */
1249 if (!found) {
1250 new = d_alloc(dentry->d_parent, name);
1251 if (!new) {
1252 error = -ENOMEM;
1253 goto err_out;
1254 }
1255 found = d_splice_alias(inode, new);
1256 if (found) {
1257 dput(new);
1258 return found;
1259 }
1260 return new;
1261 }
1262 /* Matching dentry exists, check if it is negative. */
1263 if (found->d_inode) {
1264 if (unlikely(found->d_inode != inode)) {
1265 /* This can't happen because bad inodes are unhashed. */
1266 BUG_ON(!is_bad_inode(inode));
1267 BUG_ON(!is_bad_inode(found->d_inode));
1268 }
1269 /*
1270 * Already have the inode and the dentry attached, decrement
1271 * the reference count to balance the iget() done
1272 * earlier on. We found the dentry using d_lookup() so it
1273 * cannot be disconnected and thus we do not need to worry
1274 * about any NFS/disconnectedness issues here.
1275 */
1276 iput(inode);
1277 return found;
1278 }
1279 /*
1280 * Negative dentry: instantiate it unless the inode is a directory and
1281 * has a 'disconnected' dentry (i.e. IS_ROOT and DCACHE_DISCONNECTED),
1282 * in which case d_move() that in place of the found dentry.
1283 */
1284 if (!S_ISDIR(inode->i_mode)) {
1285 /* Not a directory; everything is easy. */
1286 d_instantiate(found, inode);
1287 return found;
1288 }
1289 spin_lock(&dcache_lock);
1290 if (list_empty(&inode->i_dentry)) {
1291 /*
1292 * Directory without a 'disconnected' dentry; we need to do
1293 * d_instantiate() by hand because it takes dcache_lock which
1294 * we already hold.
1295 */
1296 list_add(&found->d_alias, &inode->i_dentry);
1297 found->d_inode = inode;
1298 spin_unlock(&dcache_lock);
1299 security_d_instantiate(found, inode);
1300 return found;
1301 }
1302 /*
1303 * Directory with a 'disconnected' dentry; get a reference to the
1304 * 'disconnected' dentry.
1305 */
1306 new = list_entry(inode->i_dentry.next, struct dentry, d_alias);
1307 dget_locked(new);
1308 spin_unlock(&dcache_lock);
1309 /* Do security vodoo. */
1310 security_d_instantiate(found, inode);
1311 /* Move new in place of found. */
1312 d_move(new, found);
1313 /* Balance the iget() we did above. */
1314 iput(inode);
1315 /* Throw away found. */
1316 dput(found);
1317 /* Use new as the actual dentry. */
1318 return new;
1319
1320err_out:
1321 iput(inode);
1322 return ERR_PTR(error);
1323}
1223 1324
1224/** 1325/**
1225 * d_lookup - search for a dentry 1326 * d_lookup - search for a dentry
@@ -2254,6 +2355,7 @@ EXPORT_SYMBOL(d_path);
2254EXPORT_SYMBOL(d_prune_aliases); 2355EXPORT_SYMBOL(d_prune_aliases);
2255EXPORT_SYMBOL(d_rehash); 2356EXPORT_SYMBOL(d_rehash);
2256EXPORT_SYMBOL(d_splice_alias); 2357EXPORT_SYMBOL(d_splice_alias);
2358EXPORT_SYMBOL(d_add_ci);
2257EXPORT_SYMBOL(d_validate); 2359EXPORT_SYMBOL(d_validate);
2258EXPORT_SYMBOL(dget_locked); 2360EXPORT_SYMBOL(dget_locked);
2259EXPORT_SYMBOL(dput); 2361EXPORT_SYMBOL(dput);
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 285b64a8b06e..488eb424f662 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -29,7 +29,7 @@
29#define DEVPTS_DEFAULT_MODE 0600 29#define DEVPTS_DEFAULT_MODE 0600
30 30
31extern int pty_limit; /* Config limit on Unix98 ptys */ 31extern int pty_limit; /* Config limit on Unix98 ptys */
32static DEFINE_IDR(allocated_ptys); 32static DEFINE_IDA(allocated_ptys);
33static DEFINE_MUTEX(allocated_ptys_lock); 33static DEFINE_MUTEX(allocated_ptys_lock);
34 34
35static struct vfsmount *devpts_mnt; 35static struct vfsmount *devpts_mnt;
@@ -180,24 +180,24 @@ static struct dentry *get_node(int num)
180int devpts_new_index(void) 180int devpts_new_index(void)
181{ 181{
182 int index; 182 int index;
183 int idr_ret; 183 int ida_ret;
184 184
185retry: 185retry:
186 if (!idr_pre_get(&allocated_ptys, GFP_KERNEL)) { 186 if (!ida_pre_get(&allocated_ptys, GFP_KERNEL)) {
187 return -ENOMEM; 187 return -ENOMEM;
188 } 188 }
189 189
190 mutex_lock(&allocated_ptys_lock); 190 mutex_lock(&allocated_ptys_lock);
191 idr_ret = idr_get_new(&allocated_ptys, NULL, &index); 191 ida_ret = ida_get_new(&allocated_ptys, &index);
192 if (idr_ret < 0) { 192 if (ida_ret < 0) {
193 mutex_unlock(&allocated_ptys_lock); 193 mutex_unlock(&allocated_ptys_lock);
194 if (idr_ret == -EAGAIN) 194 if (ida_ret == -EAGAIN)
195 goto retry; 195 goto retry;
196 return -EIO; 196 return -EIO;
197 } 197 }
198 198
199 if (index >= pty_limit) { 199 if (index >= pty_limit) {
200 idr_remove(&allocated_ptys, index); 200 ida_remove(&allocated_ptys, index);
201 mutex_unlock(&allocated_ptys_lock); 201 mutex_unlock(&allocated_ptys_lock);
202 return -EIO; 202 return -EIO;
203 } 203 }
@@ -208,7 +208,7 @@ retry:
208void devpts_kill_index(int idx) 208void devpts_kill_index(int idx)
209{ 209{
210 mutex_lock(&allocated_ptys_lock); 210 mutex_lock(&allocated_ptys_lock);
211 idr_remove(&allocated_ptys, idx); 211 ida_remove(&allocated_ptys, idx);
212 mutex_unlock(&allocated_ptys_lock); 212 mutex_unlock(&allocated_ptys_lock);
213} 213}
214 214
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index c4e7d721bd8d..89d2fb7b991a 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -2,7 +2,7 @@
2******************************************************************************* 2*******************************************************************************
3** 3**
4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
5** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. 5** Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
6** 6**
7** This copyrighted material is made available to anyone wishing to use, 7** This copyrighted material is made available to anyone wishing to use,
8** modify, copy, or redistribute it subject to the terms and conditions 8** modify, copy, or redistribute it subject to the terms and conditions
@@ -30,16 +30,16 @@
30 30
31static struct config_group *space_list; 31static struct config_group *space_list;
32static struct config_group *comm_list; 32static struct config_group *comm_list;
33static struct comm *local_comm; 33static struct dlm_comm *local_comm;
34 34
35struct clusters; 35struct dlm_clusters;
36struct cluster; 36struct dlm_cluster;
37struct spaces; 37struct dlm_spaces;
38struct space; 38struct dlm_space;
39struct comms; 39struct dlm_comms;
40struct comm; 40struct dlm_comm;
41struct nodes; 41struct dlm_nodes;
42struct node; 42struct dlm_node;
43 43
44static struct config_group *make_cluster(struct config_group *, const char *); 44static struct config_group *make_cluster(struct config_group *, const char *);
45static void drop_cluster(struct config_group *, struct config_item *); 45static void drop_cluster(struct config_group *, struct config_item *);
@@ -68,17 +68,22 @@ static ssize_t show_node(struct config_item *i, struct configfs_attribute *a,
68static ssize_t store_node(struct config_item *i, struct configfs_attribute *a, 68static ssize_t store_node(struct config_item *i, struct configfs_attribute *a,
69 const char *buf, size_t len); 69 const char *buf, size_t len);
70 70
71static ssize_t comm_nodeid_read(struct comm *cm, char *buf); 71static ssize_t comm_nodeid_read(struct dlm_comm *cm, char *buf);
72static ssize_t comm_nodeid_write(struct comm *cm, const char *buf, size_t len); 72static ssize_t comm_nodeid_write(struct dlm_comm *cm, const char *buf,
73static ssize_t comm_local_read(struct comm *cm, char *buf); 73 size_t len);
74static ssize_t comm_local_write(struct comm *cm, const char *buf, size_t len); 74static ssize_t comm_local_read(struct dlm_comm *cm, char *buf);
75static ssize_t comm_addr_write(struct comm *cm, const char *buf, size_t len); 75static ssize_t comm_local_write(struct dlm_comm *cm, const char *buf,
76static ssize_t node_nodeid_read(struct node *nd, char *buf); 76 size_t len);
77static ssize_t node_nodeid_write(struct node *nd, const char *buf, size_t len); 77static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf,
78static ssize_t node_weight_read(struct node *nd, char *buf); 78 size_t len);
79static ssize_t node_weight_write(struct node *nd, const char *buf, size_t len); 79static ssize_t node_nodeid_read(struct dlm_node *nd, char *buf);
80 80static ssize_t node_nodeid_write(struct dlm_node *nd, const char *buf,
81struct cluster { 81 size_t len);
82static ssize_t node_weight_read(struct dlm_node *nd, char *buf);
83static ssize_t node_weight_write(struct dlm_node *nd, const char *buf,
84 size_t len);
85
86struct dlm_cluster {
82 struct config_group group; 87 struct config_group group;
83 unsigned int cl_tcp_port; 88 unsigned int cl_tcp_port;
84 unsigned int cl_buffer_size; 89 unsigned int cl_buffer_size;
@@ -109,11 +114,11 @@ enum {
109 114
110struct cluster_attribute { 115struct cluster_attribute {
111 struct configfs_attribute attr; 116 struct configfs_attribute attr;
112 ssize_t (*show)(struct cluster *, char *); 117 ssize_t (*show)(struct dlm_cluster *, char *);
113 ssize_t (*store)(struct cluster *, const char *, size_t); 118 ssize_t (*store)(struct dlm_cluster *, const char *, size_t);
114}; 119};
115 120
116static ssize_t cluster_set(struct cluster *cl, unsigned int *cl_field, 121static ssize_t cluster_set(struct dlm_cluster *cl, unsigned int *cl_field,
117 int *info_field, int check_zero, 122 int *info_field, int check_zero,
118 const char *buf, size_t len) 123 const char *buf, size_t len)
119{ 124{
@@ -134,12 +139,12 @@ static ssize_t cluster_set(struct cluster *cl, unsigned int *cl_field,
134} 139}
135 140
136#define CLUSTER_ATTR(name, check_zero) \ 141#define CLUSTER_ATTR(name, check_zero) \
137static ssize_t name##_write(struct cluster *cl, const char *buf, size_t len) \ 142static ssize_t name##_write(struct dlm_cluster *cl, const char *buf, size_t len) \
138{ \ 143{ \
139 return cluster_set(cl, &cl->cl_##name, &dlm_config.ci_##name, \ 144 return cluster_set(cl, &cl->cl_##name, &dlm_config.ci_##name, \
140 check_zero, buf, len); \ 145 check_zero, buf, len); \
141} \ 146} \
142static ssize_t name##_read(struct cluster *cl, char *buf) \ 147static ssize_t name##_read(struct dlm_cluster *cl, char *buf) \
143{ \ 148{ \
144 return snprintf(buf, PAGE_SIZE, "%u\n", cl->cl_##name); \ 149 return snprintf(buf, PAGE_SIZE, "%u\n", cl->cl_##name); \
145} \ 150} \
@@ -181,8 +186,8 @@ enum {
181 186
182struct comm_attribute { 187struct comm_attribute {
183 struct configfs_attribute attr; 188 struct configfs_attribute attr;
184 ssize_t (*show)(struct comm *, char *); 189 ssize_t (*show)(struct dlm_comm *, char *);
185 ssize_t (*store)(struct comm *, const char *, size_t); 190 ssize_t (*store)(struct dlm_comm *, const char *, size_t);
186}; 191};
187 192
188static struct comm_attribute comm_attr_nodeid = { 193static struct comm_attribute comm_attr_nodeid = {
@@ -222,8 +227,8 @@ enum {
222 227
223struct node_attribute { 228struct node_attribute {
224 struct configfs_attribute attr; 229 struct configfs_attribute attr;
225 ssize_t (*show)(struct node *, char *); 230 ssize_t (*show)(struct dlm_node *, char *);
226 ssize_t (*store)(struct node *, const char *, size_t); 231 ssize_t (*store)(struct dlm_node *, const char *, size_t);
227}; 232};
228 233
229static struct node_attribute node_attr_nodeid = { 234static struct node_attribute node_attr_nodeid = {
@@ -248,26 +253,26 @@ static struct configfs_attribute *node_attrs[] = {
248 NULL, 253 NULL,
249}; 254};
250 255
251struct clusters { 256struct dlm_clusters {
252 struct configfs_subsystem subsys; 257 struct configfs_subsystem subsys;
253}; 258};
254 259
255struct spaces { 260struct dlm_spaces {
256 struct config_group ss_group; 261 struct config_group ss_group;
257}; 262};
258 263
259struct space { 264struct dlm_space {
260 struct config_group group; 265 struct config_group group;
261 struct list_head members; 266 struct list_head members;
262 struct mutex members_lock; 267 struct mutex members_lock;
263 int members_count; 268 int members_count;
264}; 269};
265 270
266struct comms { 271struct dlm_comms {
267 struct config_group cs_group; 272 struct config_group cs_group;
268}; 273};
269 274
270struct comm { 275struct dlm_comm {
271 struct config_item item; 276 struct config_item item;
272 int nodeid; 277 int nodeid;
273 int local; 278 int local;
@@ -275,11 +280,11 @@ struct comm {
275 struct sockaddr_storage *addr[DLM_MAX_ADDR_COUNT]; 280 struct sockaddr_storage *addr[DLM_MAX_ADDR_COUNT];
276}; 281};
277 282
278struct nodes { 283struct dlm_nodes {
279 struct config_group ns_group; 284 struct config_group ns_group;
280}; 285};
281 286
282struct node { 287struct dlm_node {
283 struct config_item item; 288 struct config_item item;
284 struct list_head list; /* space->members */ 289 struct list_head list; /* space->members */
285 int nodeid; 290 int nodeid;
@@ -372,38 +377,40 @@ static struct config_item_type node_type = {
372 .ct_owner = THIS_MODULE, 377 .ct_owner = THIS_MODULE,
373}; 378};
374 379
375static struct cluster *to_cluster(struct config_item *i) 380static struct dlm_cluster *to_cluster(struct config_item *i)
376{ 381{
377 return i ? container_of(to_config_group(i), struct cluster, group):NULL; 382 return i ? container_of(to_config_group(i), struct dlm_cluster, group) :
383 NULL;
378} 384}
379 385
380static struct space *to_space(struct config_item *i) 386static struct dlm_space *to_space(struct config_item *i)
381{ 387{
382 return i ? container_of(to_config_group(i), struct space, group) : NULL; 388 return i ? container_of(to_config_group(i), struct dlm_space, group) :
389 NULL;
383} 390}
384 391
385static struct comm *to_comm(struct config_item *i) 392static struct dlm_comm *to_comm(struct config_item *i)
386{ 393{
387 return i ? container_of(i, struct comm, item) : NULL; 394 return i ? container_of(i, struct dlm_comm, item) : NULL;
388} 395}
389 396
390static struct node *to_node(struct config_item *i) 397static struct dlm_node *to_node(struct config_item *i)
391{ 398{
392 return i ? container_of(i, struct node, item) : NULL; 399 return i ? container_of(i, struct dlm_node, item) : NULL;
393} 400}
394 401
395static struct config_group *make_cluster(struct config_group *g, 402static struct config_group *make_cluster(struct config_group *g,
396 const char *name) 403 const char *name)
397{ 404{
398 struct cluster *cl = NULL; 405 struct dlm_cluster *cl = NULL;
399 struct spaces *sps = NULL; 406 struct dlm_spaces *sps = NULL;
400 struct comms *cms = NULL; 407 struct dlm_comms *cms = NULL;
401 void *gps = NULL; 408 void *gps = NULL;
402 409
403 cl = kzalloc(sizeof(struct cluster), GFP_KERNEL); 410 cl = kzalloc(sizeof(struct dlm_cluster), GFP_KERNEL);
404 gps = kcalloc(3, sizeof(struct config_group *), GFP_KERNEL); 411 gps = kcalloc(3, sizeof(struct config_group *), GFP_KERNEL);
405 sps = kzalloc(sizeof(struct spaces), GFP_KERNEL); 412 sps = kzalloc(sizeof(struct dlm_spaces), GFP_KERNEL);
406 cms = kzalloc(sizeof(struct comms), GFP_KERNEL); 413 cms = kzalloc(sizeof(struct dlm_comms), GFP_KERNEL);
407 414
408 if (!cl || !gps || !sps || !cms) 415 if (!cl || !gps || !sps || !cms)
409 goto fail; 416 goto fail;
@@ -443,7 +450,7 @@ static struct config_group *make_cluster(struct config_group *g,
443 450
444static void drop_cluster(struct config_group *g, struct config_item *i) 451static void drop_cluster(struct config_group *g, struct config_item *i)
445{ 452{
446 struct cluster *cl = to_cluster(i); 453 struct dlm_cluster *cl = to_cluster(i);
447 struct config_item *tmp; 454 struct config_item *tmp;
448 int j; 455 int j;
449 456
@@ -461,20 +468,20 @@ static void drop_cluster(struct config_group *g, struct config_item *i)
461 468
462static void release_cluster(struct config_item *i) 469static void release_cluster(struct config_item *i)
463{ 470{
464 struct cluster *cl = to_cluster(i); 471 struct dlm_cluster *cl = to_cluster(i);
465 kfree(cl->group.default_groups); 472 kfree(cl->group.default_groups);
466 kfree(cl); 473 kfree(cl);
467} 474}
468 475
469static struct config_group *make_space(struct config_group *g, const char *name) 476static struct config_group *make_space(struct config_group *g, const char *name)
470{ 477{
471 struct space *sp = NULL; 478 struct dlm_space *sp = NULL;
472 struct nodes *nds = NULL; 479 struct dlm_nodes *nds = NULL;
473 void *gps = NULL; 480 void *gps = NULL;
474 481
475 sp = kzalloc(sizeof(struct space), GFP_KERNEL); 482 sp = kzalloc(sizeof(struct dlm_space), GFP_KERNEL);
476 gps = kcalloc(2, sizeof(struct config_group *), GFP_KERNEL); 483 gps = kcalloc(2, sizeof(struct config_group *), GFP_KERNEL);
477 nds = kzalloc(sizeof(struct nodes), GFP_KERNEL); 484 nds = kzalloc(sizeof(struct dlm_nodes), GFP_KERNEL);
478 485
479 if (!sp || !gps || !nds) 486 if (!sp || !gps || !nds)
480 goto fail; 487 goto fail;
@@ -500,7 +507,7 @@ static struct config_group *make_space(struct config_group *g, const char *name)
500 507
501static void drop_space(struct config_group *g, struct config_item *i) 508static void drop_space(struct config_group *g, struct config_item *i)
502{ 509{
503 struct space *sp = to_space(i); 510 struct dlm_space *sp = to_space(i);
504 struct config_item *tmp; 511 struct config_item *tmp;
505 int j; 512 int j;
506 513
@@ -517,16 +524,16 @@ static void drop_space(struct config_group *g, struct config_item *i)
517 524
518static void release_space(struct config_item *i) 525static void release_space(struct config_item *i)
519{ 526{
520 struct space *sp = to_space(i); 527 struct dlm_space *sp = to_space(i);
521 kfree(sp->group.default_groups); 528 kfree(sp->group.default_groups);
522 kfree(sp); 529 kfree(sp);
523} 530}
524 531
525static struct config_item *make_comm(struct config_group *g, const char *name) 532static struct config_item *make_comm(struct config_group *g, const char *name)
526{ 533{
527 struct comm *cm; 534 struct dlm_comm *cm;
528 535
529 cm = kzalloc(sizeof(struct comm), GFP_KERNEL); 536 cm = kzalloc(sizeof(struct dlm_comm), GFP_KERNEL);
530 if (!cm) 537 if (!cm)
531 return ERR_PTR(-ENOMEM); 538 return ERR_PTR(-ENOMEM);
532 539
@@ -539,7 +546,7 @@ static struct config_item *make_comm(struct config_group *g, const char *name)
539 546
540static void drop_comm(struct config_group *g, struct config_item *i) 547static void drop_comm(struct config_group *g, struct config_item *i)
541{ 548{
542 struct comm *cm = to_comm(i); 549 struct dlm_comm *cm = to_comm(i);
543 if (local_comm == cm) 550 if (local_comm == cm)
544 local_comm = NULL; 551 local_comm = NULL;
545 dlm_lowcomms_close(cm->nodeid); 552 dlm_lowcomms_close(cm->nodeid);
@@ -550,16 +557,16 @@ static void drop_comm(struct config_group *g, struct config_item *i)
550 557
551static void release_comm(struct config_item *i) 558static void release_comm(struct config_item *i)
552{ 559{
553 struct comm *cm = to_comm(i); 560 struct dlm_comm *cm = to_comm(i);
554 kfree(cm); 561 kfree(cm);
555} 562}
556 563
557static struct config_item *make_node(struct config_group *g, const char *name) 564static struct config_item *make_node(struct config_group *g, const char *name)
558{ 565{
559 struct space *sp = to_space(g->cg_item.ci_parent); 566 struct dlm_space *sp = to_space(g->cg_item.ci_parent);
560 struct node *nd; 567 struct dlm_node *nd;
561 568
562 nd = kzalloc(sizeof(struct node), GFP_KERNEL); 569 nd = kzalloc(sizeof(struct dlm_node), GFP_KERNEL);
563 if (!nd) 570 if (!nd)
564 return ERR_PTR(-ENOMEM); 571 return ERR_PTR(-ENOMEM);
565 572
@@ -578,8 +585,8 @@ static struct config_item *make_node(struct config_group *g, const char *name)
578 585
579static void drop_node(struct config_group *g, struct config_item *i) 586static void drop_node(struct config_group *g, struct config_item *i)
580{ 587{
581 struct space *sp = to_space(g->cg_item.ci_parent); 588 struct dlm_space *sp = to_space(g->cg_item.ci_parent);
582 struct node *nd = to_node(i); 589 struct dlm_node *nd = to_node(i);
583 590
584 mutex_lock(&sp->members_lock); 591 mutex_lock(&sp->members_lock);
585 list_del(&nd->list); 592 list_del(&nd->list);
@@ -591,11 +598,11 @@ static void drop_node(struct config_group *g, struct config_item *i)
591 598
592static void release_node(struct config_item *i) 599static void release_node(struct config_item *i)
593{ 600{
594 struct node *nd = to_node(i); 601 struct dlm_node *nd = to_node(i);
595 kfree(nd); 602 kfree(nd);
596} 603}
597 604
598static struct clusters clusters_root = { 605static struct dlm_clusters clusters_root = {
599 .subsys = { 606 .subsys = {
600 .su_group = { 607 .su_group = {
601 .cg_item = { 608 .cg_item = {
@@ -625,7 +632,7 @@ void dlm_config_exit(void)
625static ssize_t show_cluster(struct config_item *i, struct configfs_attribute *a, 632static ssize_t show_cluster(struct config_item *i, struct configfs_attribute *a,
626 char *buf) 633 char *buf)
627{ 634{
628 struct cluster *cl = to_cluster(i); 635 struct dlm_cluster *cl = to_cluster(i);
629 struct cluster_attribute *cla = 636 struct cluster_attribute *cla =
630 container_of(a, struct cluster_attribute, attr); 637 container_of(a, struct cluster_attribute, attr);
631 return cla->show ? cla->show(cl, buf) : 0; 638 return cla->show ? cla->show(cl, buf) : 0;
@@ -635,7 +642,7 @@ static ssize_t store_cluster(struct config_item *i,
635 struct configfs_attribute *a, 642 struct configfs_attribute *a,
636 const char *buf, size_t len) 643 const char *buf, size_t len)
637{ 644{
638 struct cluster *cl = to_cluster(i); 645 struct dlm_cluster *cl = to_cluster(i);
639 struct cluster_attribute *cla = 646 struct cluster_attribute *cla =
640 container_of(a, struct cluster_attribute, attr); 647 container_of(a, struct cluster_attribute, attr);
641 return cla->store ? cla->store(cl, buf, len) : -EINVAL; 648 return cla->store ? cla->store(cl, buf, len) : -EINVAL;
@@ -644,7 +651,7 @@ static ssize_t store_cluster(struct config_item *i,
644static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a, 651static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a,
645 char *buf) 652 char *buf)
646{ 653{
647 struct comm *cm = to_comm(i); 654 struct dlm_comm *cm = to_comm(i);
648 struct comm_attribute *cma = 655 struct comm_attribute *cma =
649 container_of(a, struct comm_attribute, attr); 656 container_of(a, struct comm_attribute, attr);
650 return cma->show ? cma->show(cm, buf) : 0; 657 return cma->show ? cma->show(cm, buf) : 0;
@@ -653,29 +660,31 @@ static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a,
653static ssize_t store_comm(struct config_item *i, struct configfs_attribute *a, 660static ssize_t store_comm(struct config_item *i, struct configfs_attribute *a,
654 const char *buf, size_t len) 661 const char *buf, size_t len)
655{ 662{
656 struct comm *cm = to_comm(i); 663 struct dlm_comm *cm = to_comm(i);
657 struct comm_attribute *cma = 664 struct comm_attribute *cma =
658 container_of(a, struct comm_attribute, attr); 665 container_of(a, struct comm_attribute, attr);
659 return cma->store ? cma->store(cm, buf, len) : -EINVAL; 666 return cma->store ? cma->store(cm, buf, len) : -EINVAL;
660} 667}
661 668
662static ssize_t comm_nodeid_read(struct comm *cm, char *buf) 669static ssize_t comm_nodeid_read(struct dlm_comm *cm, char *buf)
663{ 670{
664 return sprintf(buf, "%d\n", cm->nodeid); 671 return sprintf(buf, "%d\n", cm->nodeid);
665} 672}
666 673
667static ssize_t comm_nodeid_write(struct comm *cm, const char *buf, size_t len) 674static ssize_t comm_nodeid_write(struct dlm_comm *cm, const char *buf,
675 size_t len)
668{ 676{
669 cm->nodeid = simple_strtol(buf, NULL, 0); 677 cm->nodeid = simple_strtol(buf, NULL, 0);
670 return len; 678 return len;
671} 679}
672 680
673static ssize_t comm_local_read(struct comm *cm, char *buf) 681static ssize_t comm_local_read(struct dlm_comm *cm, char *buf)
674{ 682{
675 return sprintf(buf, "%d\n", cm->local); 683 return sprintf(buf, "%d\n", cm->local);
676} 684}
677 685
678static ssize_t comm_local_write(struct comm *cm, const char *buf, size_t len) 686static ssize_t comm_local_write(struct dlm_comm *cm, const char *buf,
687 size_t len)
679{ 688{
680 cm->local= simple_strtol(buf, NULL, 0); 689 cm->local= simple_strtol(buf, NULL, 0);
681 if (cm->local && !local_comm) 690 if (cm->local && !local_comm)
@@ -683,7 +692,7 @@ static ssize_t comm_local_write(struct comm *cm, const char *buf, size_t len)
683 return len; 692 return len;
684} 693}
685 694
686static ssize_t comm_addr_write(struct comm *cm, const char *buf, size_t len) 695static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf, size_t len)
687{ 696{
688 struct sockaddr_storage *addr; 697 struct sockaddr_storage *addr;
689 698
@@ -705,7 +714,7 @@ static ssize_t comm_addr_write(struct comm *cm, const char *buf, size_t len)
705static ssize_t show_node(struct config_item *i, struct configfs_attribute *a, 714static ssize_t show_node(struct config_item *i, struct configfs_attribute *a,
706 char *buf) 715 char *buf)
707{ 716{
708 struct node *nd = to_node(i); 717 struct dlm_node *nd = to_node(i);
709 struct node_attribute *nda = 718 struct node_attribute *nda =
710 container_of(a, struct node_attribute, attr); 719 container_of(a, struct node_attribute, attr);
711 return nda->show ? nda->show(nd, buf) : 0; 720 return nda->show ? nda->show(nd, buf) : 0;
@@ -714,29 +723,31 @@ static ssize_t show_node(struct config_item *i, struct configfs_attribute *a,
714static ssize_t store_node(struct config_item *i, struct configfs_attribute *a, 723static ssize_t store_node(struct config_item *i, struct configfs_attribute *a,
715 const char *buf, size_t len) 724 const char *buf, size_t len)
716{ 725{
717 struct node *nd = to_node(i); 726 struct dlm_node *nd = to_node(i);
718 struct node_attribute *nda = 727 struct node_attribute *nda =
719 container_of(a, struct node_attribute, attr); 728 container_of(a, struct node_attribute, attr);
720 return nda->store ? nda->store(nd, buf, len) : -EINVAL; 729 return nda->store ? nda->store(nd, buf, len) : -EINVAL;
721} 730}
722 731
723static ssize_t node_nodeid_read(struct node *nd, char *buf) 732static ssize_t node_nodeid_read(struct dlm_node *nd, char *buf)
724{ 733{
725 return sprintf(buf, "%d\n", nd->nodeid); 734 return sprintf(buf, "%d\n", nd->nodeid);
726} 735}
727 736
728static ssize_t node_nodeid_write(struct node *nd, const char *buf, size_t len) 737static ssize_t node_nodeid_write(struct dlm_node *nd, const char *buf,
738 size_t len)
729{ 739{
730 nd->nodeid = simple_strtol(buf, NULL, 0); 740 nd->nodeid = simple_strtol(buf, NULL, 0);
731 return len; 741 return len;
732} 742}
733 743
734static ssize_t node_weight_read(struct node *nd, char *buf) 744static ssize_t node_weight_read(struct dlm_node *nd, char *buf)
735{ 745{
736 return sprintf(buf, "%d\n", nd->weight); 746 return sprintf(buf, "%d\n", nd->weight);
737} 747}
738 748
739static ssize_t node_weight_write(struct node *nd, const char *buf, size_t len) 749static ssize_t node_weight_write(struct dlm_node *nd, const char *buf,
750 size_t len)
740{ 751{
741 nd->weight = simple_strtol(buf, NULL, 0); 752 nd->weight = simple_strtol(buf, NULL, 0);
742 return len; 753 return len;
@@ -746,7 +757,7 @@ static ssize_t node_weight_write(struct node *nd, const char *buf, size_t len)
746 * Functions for the dlm to get the info that's been configured 757 * Functions for the dlm to get the info that's been configured
747 */ 758 */
748 759
749static struct space *get_space(char *name) 760static struct dlm_space *get_space(char *name)
750{ 761{
751 struct config_item *i; 762 struct config_item *i;
752 763
@@ -760,15 +771,15 @@ static struct space *get_space(char *name)
760 return to_space(i); 771 return to_space(i);
761} 772}
762 773
763static void put_space(struct space *sp) 774static void put_space(struct dlm_space *sp)
764{ 775{
765 config_item_put(&sp->group.cg_item); 776 config_item_put(&sp->group.cg_item);
766} 777}
767 778
768static struct comm *get_comm(int nodeid, struct sockaddr_storage *addr) 779static struct dlm_comm *get_comm(int nodeid, struct sockaddr_storage *addr)
769{ 780{
770 struct config_item *i; 781 struct config_item *i;
771 struct comm *cm = NULL; 782 struct dlm_comm *cm = NULL;
772 int found = 0; 783 int found = 0;
773 784
774 if (!comm_list) 785 if (!comm_list)
@@ -801,7 +812,7 @@ static struct comm *get_comm(int nodeid, struct sockaddr_storage *addr)
801 return cm; 812 return cm;
802} 813}
803 814
804static void put_comm(struct comm *cm) 815static void put_comm(struct dlm_comm *cm)
805{ 816{
806 config_item_put(&cm->item); 817 config_item_put(&cm->item);
807} 818}
@@ -810,8 +821,8 @@ static void put_comm(struct comm *cm)
810int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out, 821int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out,
811 int **new_out, int *new_count_out) 822 int **new_out, int *new_count_out)
812{ 823{
813 struct space *sp; 824 struct dlm_space *sp;
814 struct node *nd; 825 struct dlm_node *nd;
815 int i = 0, rv = 0, ids_count = 0, new_count = 0; 826 int i = 0, rv = 0, ids_count = 0, new_count = 0;
816 int *ids, *new; 827 int *ids, *new;
817 828
@@ -874,8 +885,8 @@ int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out,
874 885
875int dlm_node_weight(char *lsname, int nodeid) 886int dlm_node_weight(char *lsname, int nodeid)
876{ 887{
877 struct space *sp; 888 struct dlm_space *sp;
878 struct node *nd; 889 struct dlm_node *nd;
879 int w = -EEXIST; 890 int w = -EEXIST;
880 891
881 sp = get_space(lsname); 892 sp = get_space(lsname);
@@ -897,7 +908,7 @@ int dlm_node_weight(char *lsname, int nodeid)
897 908
898int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr) 909int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr)
899{ 910{
900 struct comm *cm = get_comm(nodeid, NULL); 911 struct dlm_comm *cm = get_comm(nodeid, NULL);
901 if (!cm) 912 if (!cm)
902 return -EEXIST; 913 return -EEXIST;
903 if (!cm->addr_count) 914 if (!cm->addr_count)
@@ -909,7 +920,7 @@ int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr)
909 920
910int dlm_addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid) 921int dlm_addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid)
911{ 922{
912 struct comm *cm = get_comm(0, addr); 923 struct dlm_comm *cm = get_comm(0, addr);
913 if (!cm) 924 if (!cm)
914 return -EEXIST; 925 return -EEXIST;
915 *nodeid = cm->nodeid; 926 *nodeid = cm->nodeid;
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index 929e48ae7591..34f14a14fb4e 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -527,8 +527,10 @@ static ssize_t device_write(struct file *file, const char __user *buf,
527 k32buf = (struct dlm_write_request32 *)kbuf; 527 k32buf = (struct dlm_write_request32 *)kbuf;
528 kbuf = kmalloc(count + 1 + (sizeof(struct dlm_write_request) - 528 kbuf = kmalloc(count + 1 + (sizeof(struct dlm_write_request) -
529 sizeof(struct dlm_write_request32)), GFP_KERNEL); 529 sizeof(struct dlm_write_request32)), GFP_KERNEL);
530 if (!kbuf) 530 if (!kbuf) {
531 kfree(k32buf);
531 return -ENOMEM; 532 return -ENOMEM;
533 }
532 534
533 if (proc) 535 if (proc)
534 set_bit(DLM_PROC_FLAGS_COMPAT, &proc->flags); 536 set_bit(DLM_PROC_FLAGS_COMPAT, &proc->flags);
@@ -539,8 +541,10 @@ static ssize_t device_write(struct file *file, const char __user *buf,
539 541
540 /* do we really need this? can a write happen after a close? */ 542 /* do we really need this? can a write happen after a close? */
541 if ((kbuf->cmd == DLM_USER_LOCK || kbuf->cmd == DLM_USER_UNLOCK) && 543 if ((kbuf->cmd == DLM_USER_LOCK || kbuf->cmd == DLM_USER_UNLOCK) &&
542 (proc && test_bit(DLM_PROC_FLAGS_CLOSING, &proc->flags))) 544 (proc && test_bit(DLM_PROC_FLAGS_CLOSING, &proc->flags))) {
543 return -EINVAL; 545 error = -EINVAL;
546 goto out_free;
547 }
544 548
545 sigfillset(&allsigs); 549 sigfillset(&allsigs);
546 sigprocmask(SIG_BLOCK, &allsigs, &tmpsig); 550 sigprocmask(SIG_BLOCK, &allsigs, &tmpsig);
diff --git a/fs/dquot.c b/fs/dquot.c
index 1346eebe74ce..8ec4d6cc7633 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -1793,6 +1793,21 @@ static int vfs_quota_on_remount(struct super_block *sb, int type)
1793 return ret; 1793 return ret;
1794} 1794}
1795 1795
1796int vfs_quota_on_path(struct super_block *sb, int type, int format_id,
1797 struct path *path)
1798{
1799 int error = security_quota_on(path->dentry);
1800 if (error)
1801 return error;
1802 /* Quota file not on the same filesystem? */
1803 if (path->mnt->mnt_sb != sb)
1804 error = -EXDEV;
1805 else
1806 error = vfs_quota_on_inode(path->dentry->d_inode, type,
1807 format_id);
1808 return error;
1809}
1810
1796/* Actual function called from quotactl() */ 1811/* Actual function called from quotactl() */
1797int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path, 1812int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path,
1798 int remount) 1813 int remount)
@@ -1804,19 +1819,10 @@ int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path,
1804 return vfs_quota_on_remount(sb, type); 1819 return vfs_quota_on_remount(sb, type);
1805 1820
1806 error = path_lookup(path, LOOKUP_FOLLOW, &nd); 1821 error = path_lookup(path, LOOKUP_FOLLOW, &nd);
1807 if (error < 0) 1822 if (!error) {
1808 return error; 1823 error = vfs_quota_on_path(sb, type, format_id, &nd.path);
1809 error = security_quota_on(nd.path.dentry); 1824 path_put(&nd.path);
1810 if (error) 1825 }
1811 goto out_path;
1812 /* Quota file not on the same filesystem? */
1813 if (nd.path.mnt->mnt_sb != sb)
1814 error = -EXDEV;
1815 else
1816 error = vfs_quota_on_inode(nd.path.dentry->d_inode, type,
1817 format_id);
1818out_path:
1819 path_put(&nd.path);
1820 return error; 1826 return error;
1821} 1827}
1822 1828
@@ -2185,6 +2191,7 @@ EXPORT_SYMBOL(unregister_quota_format);
2185EXPORT_SYMBOL(dqstats); 2191EXPORT_SYMBOL(dqstats);
2186EXPORT_SYMBOL(dq_data_lock); 2192EXPORT_SYMBOL(dq_data_lock);
2187EXPORT_SYMBOL(vfs_quota_on); 2193EXPORT_SYMBOL(vfs_quota_on);
2194EXPORT_SYMBOL(vfs_quota_on_path);
2188EXPORT_SYMBOL(vfs_quota_on_mount); 2195EXPORT_SYMBOL(vfs_quota_on_mount);
2189EXPORT_SYMBOL(vfs_quota_off); 2196EXPORT_SYMBOL(vfs_quota_off);
2190EXPORT_SYMBOL(vfs_quota_sync); 2197EXPORT_SYMBOL(vfs_quota_sync);
diff --git a/fs/efs/namei.c b/fs/efs/namei.c
index 3a404e7fad53..291abb11e20e 100644
--- a/fs/efs/namei.c
+++ b/fs/efs/namei.c
@@ -74,8 +74,7 @@ struct dentry *efs_lookup(struct inode *dir, struct dentry *dentry, struct namei
74 } 74 }
75 unlock_kernel(); 75 unlock_kernel();
76 76
77 d_add(dentry, inode); 77 return d_splice_alias(inode, dentry);
78 return NULL;
79} 78}
80 79
81static struct inode *efs_nfs_get_inode(struct super_block *sb, u64 ino, 80static struct inode *efs_nfs_get_inode(struct super_block *sb, u64 ino,
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 0c87474f7917..7cc0eb756b55 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1041,10 +1041,7 @@ retry:
1041} 1041}
1042 1042
1043/* 1043/*
1044 * It opens an eventpoll file descriptor. The "size" parameter is there 1044 * Open an eventpoll file descriptor.
1045 * for historical reasons, when epoll was using an hash instead of an
1046 * RB tree. With the current implementation, the "size" parameter is ignored
1047 * (besides sanity checks).
1048 */ 1045 */
1049asmlinkage long sys_epoll_create1(int flags) 1046asmlinkage long sys_epoll_create1(int flags)
1050{ 1047{
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 8ddced384674..f38a5afc39a1 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2810,8 +2810,9 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id,
2810 journal_unlock_updates(EXT3_SB(sb)->s_journal); 2810 journal_unlock_updates(EXT3_SB(sb)->s_journal);
2811 } 2811 }
2812 2812
2813 err = vfs_quota_on_path(sb, type, format_id, &nd.path);
2813 path_put(&nd.path); 2814 path_put(&nd.path);
2814 return vfs_quota_on(sb, type, format_id, path, remount); 2815 return err;
2815} 2816}
2816 2817
2817/* Read data from quotafile - avoid pagecache and such because we cannot afford 2818/* Read data from quotafile - avoid pagecache and such because we cannot afford
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index c7d04e165446..694ed6fadcc8 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -40,34 +40,35 @@ ext4_acl_from_disk(const void *value, size_t size)
40 acl = posix_acl_alloc(count, GFP_NOFS); 40 acl = posix_acl_alloc(count, GFP_NOFS);
41 if (!acl) 41 if (!acl)
42 return ERR_PTR(-ENOMEM); 42 return ERR_PTR(-ENOMEM);
43 for (n=0; n < count; n++) { 43 for (n = 0; n < count; n++) {
44 ext4_acl_entry *entry = 44 ext4_acl_entry *entry =
45 (ext4_acl_entry *)value; 45 (ext4_acl_entry *)value;
46 if ((char *)value + sizeof(ext4_acl_entry_short) > end) 46 if ((char *)value + sizeof(ext4_acl_entry_short) > end)
47 goto fail; 47 goto fail;
48 acl->a_entries[n].e_tag = le16_to_cpu(entry->e_tag); 48 acl->a_entries[n].e_tag = le16_to_cpu(entry->e_tag);
49 acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm); 49 acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm);
50 switch(acl->a_entries[n].e_tag) { 50
51 case ACL_USER_OBJ: 51 switch (acl->a_entries[n].e_tag) {
52 case ACL_GROUP_OBJ: 52 case ACL_USER_OBJ:
53 case ACL_MASK: 53 case ACL_GROUP_OBJ:
54 case ACL_OTHER: 54 case ACL_MASK:
55 value = (char *)value + 55 case ACL_OTHER:
56 sizeof(ext4_acl_entry_short); 56 value = (char *)value +
57 acl->a_entries[n].e_id = ACL_UNDEFINED_ID; 57 sizeof(ext4_acl_entry_short);
58 break; 58 acl->a_entries[n].e_id = ACL_UNDEFINED_ID;
59 59 break;
60 case ACL_USER: 60
61 case ACL_GROUP: 61 case ACL_USER:
62 value = (char *)value + sizeof(ext4_acl_entry); 62 case ACL_GROUP:
63 if ((char *)value > end) 63 value = (char *)value + sizeof(ext4_acl_entry);
64 goto fail; 64 if ((char *)value > end)
65 acl->a_entries[n].e_id =
66 le32_to_cpu(entry->e_id);
67 break;
68
69 default:
70 goto fail; 65 goto fail;
66 acl->a_entries[n].e_id =
67 le32_to_cpu(entry->e_id);
68 break;
69
70 default:
71 goto fail;
71 } 72 }
72 } 73 }
73 if (value != end) 74 if (value != end)
@@ -96,27 +97,26 @@ ext4_acl_to_disk(const struct posix_acl *acl, size_t *size)
96 return ERR_PTR(-ENOMEM); 97 return ERR_PTR(-ENOMEM);
97 ext_acl->a_version = cpu_to_le32(EXT4_ACL_VERSION); 98 ext_acl->a_version = cpu_to_le32(EXT4_ACL_VERSION);
98 e = (char *)ext_acl + sizeof(ext4_acl_header); 99 e = (char *)ext_acl + sizeof(ext4_acl_header);
99 for (n=0; n < acl->a_count; n++) { 100 for (n = 0; n < acl->a_count; n++) {
100 ext4_acl_entry *entry = (ext4_acl_entry *)e; 101 ext4_acl_entry *entry = (ext4_acl_entry *)e;
101 entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag); 102 entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag);
102 entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm); 103 entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm);
103 switch(acl->a_entries[n].e_tag) { 104 switch (acl->a_entries[n].e_tag) {
104 case ACL_USER: 105 case ACL_USER:
105 case ACL_GROUP: 106 case ACL_GROUP:
106 entry->e_id = 107 entry->e_id = cpu_to_le32(acl->a_entries[n].e_id);
107 cpu_to_le32(acl->a_entries[n].e_id); 108 e += sizeof(ext4_acl_entry);
108 e += sizeof(ext4_acl_entry); 109 break;
109 break; 110
110 111 case ACL_USER_OBJ:
111 case ACL_USER_OBJ: 112 case ACL_GROUP_OBJ:
112 case ACL_GROUP_OBJ: 113 case ACL_MASK:
113 case ACL_MASK: 114 case ACL_OTHER:
114 case ACL_OTHER: 115 e += sizeof(ext4_acl_entry_short);
115 e += sizeof(ext4_acl_entry_short); 116 break;
116 break; 117
117 118 default:
118 default: 119 goto fail;
119 goto fail;
120 } 120 }
121 } 121 }
122 return (char *)ext_acl; 122 return (char *)ext_acl;
@@ -167,23 +167,23 @@ ext4_get_acl(struct inode *inode, int type)
167 if (!test_opt(inode->i_sb, POSIX_ACL)) 167 if (!test_opt(inode->i_sb, POSIX_ACL))
168 return NULL; 168 return NULL;
169 169
170 switch(type) { 170 switch (type) {
171 case ACL_TYPE_ACCESS: 171 case ACL_TYPE_ACCESS:
172 acl = ext4_iget_acl(inode, &ei->i_acl); 172 acl = ext4_iget_acl(inode, &ei->i_acl);
173 if (acl != EXT4_ACL_NOT_CACHED) 173 if (acl != EXT4_ACL_NOT_CACHED)
174 return acl; 174 return acl;
175 name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS; 175 name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS;
176 break; 176 break;
177 177
178 case ACL_TYPE_DEFAULT: 178 case ACL_TYPE_DEFAULT:
179 acl = ext4_iget_acl(inode, &ei->i_default_acl); 179 acl = ext4_iget_acl(inode, &ei->i_default_acl);
180 if (acl != EXT4_ACL_NOT_CACHED) 180 if (acl != EXT4_ACL_NOT_CACHED)
181 return acl; 181 return acl;
182 name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT; 182 name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT;
183 break; 183 break;
184 184
185 default: 185 default:
186 return ERR_PTR(-EINVAL); 186 return ERR_PTR(-EINVAL);
187 } 187 }
188 retval = ext4_xattr_get(inode, name_index, "", NULL, 0); 188 retval = ext4_xattr_get(inode, name_index, "", NULL, 0);
189 if (retval > 0) { 189 if (retval > 0) {
@@ -201,14 +201,14 @@ ext4_get_acl(struct inode *inode, int type)
201 kfree(value); 201 kfree(value);
202 202
203 if (!IS_ERR(acl)) { 203 if (!IS_ERR(acl)) {
204 switch(type) { 204 switch (type) {
205 case ACL_TYPE_ACCESS: 205 case ACL_TYPE_ACCESS:
206 ext4_iset_acl(inode, &ei->i_acl, acl); 206 ext4_iset_acl(inode, &ei->i_acl, acl);
207 break; 207 break;
208 208
209 case ACL_TYPE_DEFAULT: 209 case ACL_TYPE_DEFAULT:
210 ext4_iset_acl(inode, &ei->i_default_acl, acl); 210 ext4_iset_acl(inode, &ei->i_default_acl, acl);
211 break; 211 break;
212 } 212 }
213 } 213 }
214 return acl; 214 return acl;
@@ -232,31 +232,31 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type,
232 if (S_ISLNK(inode->i_mode)) 232 if (S_ISLNK(inode->i_mode))
233 return -EOPNOTSUPP; 233 return -EOPNOTSUPP;
234 234
235 switch(type) { 235 switch (type) {
236 case ACL_TYPE_ACCESS: 236 case ACL_TYPE_ACCESS:
237 name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS; 237 name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS;
238 if (acl) { 238 if (acl) {
239 mode_t mode = inode->i_mode; 239 mode_t mode = inode->i_mode;
240 error = posix_acl_equiv_mode(acl, &mode); 240 error = posix_acl_equiv_mode(acl, &mode);
241 if (error < 0) 241 if (error < 0)
242 return error; 242 return error;
243 else { 243 else {
244 inode->i_mode = mode; 244 inode->i_mode = mode;
245 ext4_mark_inode_dirty(handle, inode); 245 ext4_mark_inode_dirty(handle, inode);
246 if (error == 0) 246 if (error == 0)
247 acl = NULL; 247 acl = NULL;
248 }
249 } 248 }
250 break; 249 }
250 break;
251 251
252 case ACL_TYPE_DEFAULT: 252 case ACL_TYPE_DEFAULT:
253 name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT; 253 name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT;
254 if (!S_ISDIR(inode->i_mode)) 254 if (!S_ISDIR(inode->i_mode))
255 return acl ? -EACCES : 0; 255 return acl ? -EACCES : 0;
256 break; 256 break;
257 257
258 default: 258 default:
259 return -EINVAL; 259 return -EINVAL;
260 } 260 }
261 if (acl) { 261 if (acl) {
262 value = ext4_acl_to_disk(acl, &size); 262 value = ext4_acl_to_disk(acl, &size);
@@ -269,14 +269,14 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type,
269 269
270 kfree(value); 270 kfree(value);
271 if (!error) { 271 if (!error) {
272 switch(type) { 272 switch (type) {
273 case ACL_TYPE_ACCESS: 273 case ACL_TYPE_ACCESS:
274 ext4_iset_acl(inode, &ei->i_acl, acl); 274 ext4_iset_acl(inode, &ei->i_acl, acl);
275 break; 275 break;
276 276
277 case ACL_TYPE_DEFAULT: 277 case ACL_TYPE_DEFAULT:
278 ext4_iset_acl(inode, &ei->i_default_acl, acl); 278 ext4_iset_acl(inode, &ei->i_default_acl, acl);
279 break; 279 break;
280 } 280 }
281 } 281 }
282 return error; 282 return error;
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 495ab21b9832..e9fa960ba6da 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -314,25 +314,28 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
314 if (unlikely(!bh)) { 314 if (unlikely(!bh)) {
315 ext4_error(sb, __func__, 315 ext4_error(sb, __func__,
316 "Cannot read block bitmap - " 316 "Cannot read block bitmap - "
317 "block_group = %d, block_bitmap = %llu", 317 "block_group = %lu, block_bitmap = %llu",
318 (int)block_group, (unsigned long long)bitmap_blk); 318 block_group, bitmap_blk);
319 return NULL; 319 return NULL;
320 } 320 }
321 if (bh_uptodate_or_lock(bh)) 321 if (bh_uptodate_or_lock(bh))
322 return bh; 322 return bh;
323 323
324 spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
324 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 325 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
325 ext4_init_block_bitmap(sb, bh, block_group, desc); 326 ext4_init_block_bitmap(sb, bh, block_group, desc);
326 set_buffer_uptodate(bh); 327 set_buffer_uptodate(bh);
327 unlock_buffer(bh); 328 unlock_buffer(bh);
329 spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
328 return bh; 330 return bh;
329 } 331 }
332 spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
330 if (bh_submit_read(bh) < 0) { 333 if (bh_submit_read(bh) < 0) {
331 put_bh(bh); 334 put_bh(bh);
332 ext4_error(sb, __func__, 335 ext4_error(sb, __func__,
333 "Cannot read block bitmap - " 336 "Cannot read block bitmap - "
334 "block_group = %d, block_bitmap = %llu", 337 "block_group = %lu, block_bitmap = %llu",
335 (int)block_group, (unsigned long long)bitmap_blk); 338 block_group, bitmap_blk);
336 return NULL; 339 return NULL;
337 } 340 }
338 ext4_valid_block_bitmap(sb, desc, block_group, bh); 341 ext4_valid_block_bitmap(sb, desc, block_group, bh);
@@ -1623,6 +1626,9 @@ ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi,
1623 free_blocks = 1626 free_blocks =
1624 percpu_counter_sum_and_set(&sbi->s_freeblocks_counter); 1627 percpu_counter_sum_and_set(&sbi->s_freeblocks_counter);
1625#endif 1628#endif
1629 if (free_blocks <= root_blocks)
1630 /* we don't have free space */
1631 return 0;
1626 if (free_blocks - root_blocks < nblocks) 1632 if (free_blocks - root_blocks < nblocks)
1627 return free_blocks - root_blocks; 1633 return free_blocks - root_blocks;
1628 return nblocks; 1634 return nblocks;
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index d3d23d73c08b..ec8e33b45219 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -411,7 +411,7 @@ static int call_filldir(struct file * filp, void * dirent,
411 get_dtype(sb, fname->file_type)); 411 get_dtype(sb, fname->file_type));
412 if (error) { 412 if (error) {
413 filp->f_pos = curr_pos; 413 filp->f_pos = curr_pos;
414 info->extra_fname = fname->next; 414 info->extra_fname = fname;
415 return error; 415 return error;
416 } 416 }
417 fname = fname->next; 417 fname = fname->next;
@@ -450,11 +450,21 @@ static int ext4_dx_readdir(struct file * filp,
450 * If there are any leftover names on the hash collision 450 * If there are any leftover names on the hash collision
451 * chain, return them first. 451 * chain, return them first.
452 */ 452 */
453 if (info->extra_fname && 453 if (info->extra_fname) {
454 call_filldir(filp, dirent, filldir, info->extra_fname)) 454 if (call_filldir(filp, dirent, filldir, info->extra_fname))
455 goto finished; 455 goto finished;
456 456
457 if (!info->curr_node) 457 info->extra_fname = NULL;
458 info->curr_node = rb_next(info->curr_node);
459 if (!info->curr_node) {
460 if (info->next_hash == ~0) {
461 filp->f_pos = EXT4_HTREE_EOF;
462 goto finished;
463 }
464 info->curr_hash = info->next_hash;
465 info->curr_minor_hash = 0;
466 }
467 } else if (!info->curr_node)
458 info->curr_node = rb_first(&info->root); 468 info->curr_node = rb_first(&info->root);
459 469
460 while (1) { 470 while (1) {
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 303e41cf7b14..295003241d3d 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1044,7 +1044,6 @@ extern void ext4_mb_update_group_info(struct ext4_group_info *grp,
1044 1044
1045 1045
1046/* inode.c */ 1046/* inode.c */
1047void ext4_da_release_space(struct inode *inode, int used, int to_free);
1048int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, 1047int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
1049 struct buffer_head *bh, ext4_fsblk_t blocknr); 1048 struct buffer_head *bh, ext4_fsblk_t blocknr);
1050struct buffer_head *ext4_getblk(handle_t *, struct inode *, 1049struct buffer_head *ext4_getblk(handle_t *, struct inode *,
@@ -1073,6 +1072,8 @@ extern void ext4_set_inode_flags(struct inode *);
1073extern void ext4_get_inode_flags(struct ext4_inode_info *); 1072extern void ext4_get_inode_flags(struct ext4_inode_info *);
1074extern void ext4_set_aops(struct inode *inode); 1073extern void ext4_set_aops(struct inode *inode);
1075extern int ext4_writepage_trans_blocks(struct inode *); 1074extern int ext4_writepage_trans_blocks(struct inode *);
1075extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int idxblocks);
1076extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
1076extern int ext4_block_truncate_page(handle_t *handle, 1077extern int ext4_block_truncate_page(handle_t *handle,
1077 struct address_space *mapping, loff_t from); 1078 struct address_space *mapping, loff_t from);
1078extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page); 1079extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page);
@@ -1228,6 +1229,8 @@ extern const struct inode_operations ext4_fast_symlink_inode_operations;
1228/* extents.c */ 1229/* extents.c */
1229extern int ext4_ext_tree_init(handle_t *handle, struct inode *); 1230extern int ext4_ext_tree_init(handle_t *handle, struct inode *);
1230extern int ext4_ext_writepage_trans_blocks(struct inode *, int); 1231extern int ext4_ext_writepage_trans_blocks(struct inode *, int);
1232extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks,
1233 int chunk);
1231extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, 1234extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
1232 ext4_lblk_t iblock, 1235 ext4_lblk_t iblock,
1233 unsigned long max_blocks, struct buffer_head *bh_result, 1236 unsigned long max_blocks, struct buffer_head *bh_result,
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 6c166c0a54b7..d33dc56d6986 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -216,7 +216,9 @@ extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks);
216extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); 216extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *);
217extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); 217extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t);
218extern int ext4_extent_tree_init(handle_t *, struct inode *); 218extern int ext4_extent_tree_init(handle_t *, struct inode *);
219extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *); 219extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode,
220 int num,
221 struct ext4_ext_path *path);
220extern int ext4_ext_try_to_merge(struct inode *inode, 222extern int ext4_ext_try_to_merge(struct inode *inode,
221 struct ext4_ext_path *path, 223 struct ext4_ext_path *path,
222 struct ext4_extent *); 224 struct ext4_extent *);
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index eb8bc3afe6e9..b455c685a98b 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -51,6 +51,14 @@
51 EXT4_XATTR_TRANS_BLOCKS - 2 + \ 51 EXT4_XATTR_TRANS_BLOCKS - 2 + \
52 2*EXT4_QUOTA_TRANS_BLOCKS(sb)) 52 2*EXT4_QUOTA_TRANS_BLOCKS(sb))
53 53
54/*
55 * Define the number of metadata blocks we need to account to modify data.
56 *
57 * This include super block, inode block, quota blocks and xattr blocks
58 */
59#define EXT4_META_TRANS_BLOCKS(sb) (EXT4_XATTR_TRANS_BLOCKS + \
60 2*EXT4_QUOTA_TRANS_BLOCKS(sb))
61
54/* Delete operations potentially hit one directory's namespace plus an 62/* Delete operations potentially hit one directory's namespace plus an
55 * entire inode, plus arbitrary amounts of bitmap/indirection data. Be 63 * entire inode, plus arbitrary amounts of bitmap/indirection data. Be
56 * generous. We can grow the delete transaction later if necessary. */ 64 * generous. We can grow the delete transaction later if necessary. */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 42c4c0c892ed..b24d3c53f20c 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -99,7 +99,7 @@ static int ext4_ext_journal_restart(handle_t *handle, int needed)
99 if (handle->h_buffer_credits > needed) 99 if (handle->h_buffer_credits > needed)
100 return 0; 100 return 0;
101 err = ext4_journal_extend(handle, needed); 101 err = ext4_journal_extend(handle, needed);
102 if (err) 102 if (err <= 0)
103 return err; 103 return err;
104 return ext4_journal_restart(handle, needed); 104 return ext4_journal_restart(handle, needed);
105} 105}
@@ -1441,7 +1441,7 @@ unsigned int ext4_ext_check_overlap(struct inode *inode,
1441 1441
1442 /* 1442 /*
1443 * get the next allocated block if the extent in the path 1443 * get the next allocated block if the extent in the path
1444 * is before the requested block(s) 1444 * is before the requested block(s)
1445 */ 1445 */
1446 if (b2 < b1) { 1446 if (b2 < b1) {
1447 b2 = ext4_ext_next_allocated_block(path); 1447 b2 = ext4_ext_next_allocated_block(path);
@@ -1747,54 +1747,61 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
1747} 1747}
1748 1748
1749/* 1749/*
1750 * ext4_ext_calc_credits_for_insert: 1750 * ext4_ext_calc_credits_for_single_extent:
1751 * This routine returns max. credits that the extent tree can consume. 1751 * This routine returns max. credits that needed to insert an extent
1752 * It should be OK for low-performance paths like ->writepage() 1752 * to the extent tree.
1753 * To allow many writing processes to fit into a single transaction, 1753 * When pass the actual path, the caller should calculate credits
1754 * the caller should calculate credits under i_data_sem and 1754 * under i_data_sem.
1755 * pass the actual path.
1756 */ 1755 */
1757int ext4_ext_calc_credits_for_insert(struct inode *inode, 1756int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks,
1758 struct ext4_ext_path *path) 1757 struct ext4_ext_path *path)
1759{ 1758{
1760 int depth, needed;
1761
1762 if (path) { 1759 if (path) {
1760 int depth = ext_depth(inode);
1761 int ret = 0;
1762
1763 /* probably there is space in leaf? */ 1763 /* probably there is space in leaf? */
1764 depth = ext_depth(inode);
1765 if (le16_to_cpu(path[depth].p_hdr->eh_entries) 1764 if (le16_to_cpu(path[depth].p_hdr->eh_entries)
1766 < le16_to_cpu(path[depth].p_hdr->eh_max)) 1765 < le16_to_cpu(path[depth].p_hdr->eh_max)) {
1767 return 1;
1768 }
1769
1770 /*
1771 * given 32-bit logical block (4294967296 blocks), max. tree
1772 * can be 4 levels in depth -- 4 * 340^4 == 53453440000.
1773 * Let's also add one more level for imbalance.
1774 */
1775 depth = 5;
1776 1766
1777 /* allocation of new data block(s) */ 1767 /*
1778 needed = 2; 1768 * There are some space in the leaf tree, no
1769 * need to account for leaf block credit
1770 *
1771 * bitmaps and block group descriptor blocks
1772 * and other metadat blocks still need to be
1773 * accounted.
1774 */
1775 /* 1 bitmap, 1 block group descriptor */
1776 ret = 2 + EXT4_META_TRANS_BLOCKS(inode->i_sb);
1777 }
1778 }
1779 1779
1780 /* 1780 return ext4_chunk_trans_blocks(inode, nrblocks);
1781 * tree can be full, so it would need to grow in depth: 1781}
1782 * we need one credit to modify old root, credits for
1783 * new root will be added in split accounting
1784 */
1785 needed += 1;
1786 1782
1787 /* 1783/*
1788 * Index split can happen, we would need: 1784 * How many index/leaf blocks need to change/allocate to modify nrblocks?
1789 * allocate intermediate indexes (bitmap + group) 1785 *
1790 * + change two blocks at each level, but root (already included) 1786 * if nrblocks are fit in a single extent (chunk flag is 1), then
1791 */ 1787 * in the worse case, each tree level index/leaf need to be changed
1792 needed += (depth * 2) + (depth * 2); 1788 * if the tree split due to insert a new extent, then the old tree
1789 * index/leaf need to be updated too
1790 *
1791 * If the nrblocks are discontiguous, they could cause
1792 * the whole tree split more than once, but this is really rare.
1793 */
1794int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
1795{
1796 int index;
1797 int depth = ext_depth(inode);
1793 1798
1794 /* any allocation modifies superblock */ 1799 if (chunk)
1795 needed += 1; 1800 index = depth * 2;
1801 else
1802 index = depth * 3;
1796 1803
1797 return needed; 1804 return index;
1798} 1805}
1799 1806
1800static int ext4_remove_blocks(handle_t *handle, struct inode *inode, 1807static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
@@ -1910,16 +1917,18 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
1910 BUG_ON(b != ex_ee_block + ex_ee_len - 1); 1917 BUG_ON(b != ex_ee_block + ex_ee_len - 1);
1911 } 1918 }
1912 1919
1913 /* at present, extent can't cross block group: */ 1920 /*
1914 /* leaf + bitmap + group desc + sb + inode */ 1921 * 3 for leaf, sb, and inode plus 2 (bmap and group
1915 credits = 5; 1922 * descriptor) for each block group; assume two block
1923 * groups plus ex_ee_len/blocks_per_block_group for
1924 * the worst case
1925 */
1926 credits = 7 + 2*(ex_ee_len/EXT4_BLOCKS_PER_GROUP(inode->i_sb));
1916 if (ex == EXT_FIRST_EXTENT(eh)) { 1927 if (ex == EXT_FIRST_EXTENT(eh)) {
1917 correct_index = 1; 1928 correct_index = 1;
1918 credits += (ext_depth(inode)) + 1; 1929 credits += (ext_depth(inode)) + 1;
1919 } 1930 }
1920#ifdef CONFIG_QUOTA
1921 credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); 1931 credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
1922#endif
1923 1932
1924 err = ext4_ext_journal_restart(handle, credits); 1933 err = ext4_ext_journal_restart(handle, credits);
1925 if (err) 1934 if (err)
@@ -2323,7 +2332,10 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2323 unsigned int newdepth; 2332 unsigned int newdepth;
2324 /* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */ 2333 /* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */
2325 if (allocated <= EXT4_EXT_ZERO_LEN) { 2334 if (allocated <= EXT4_EXT_ZERO_LEN) {
2326 /* Mark first half uninitialized. 2335 /*
2336 * iblock == ee_block is handled by the zerouout
2337 * at the beginning.
2338 * Mark first half uninitialized.
2327 * Mark second half initialized and zero out the 2339 * Mark second half initialized and zero out the
2328 * initialized extent 2340 * initialized extent
2329 */ 2341 */
@@ -2346,7 +2358,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2346 ex->ee_len = orig_ex.ee_len; 2358 ex->ee_len = orig_ex.ee_len;
2347 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); 2359 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2348 ext4_ext_dirty(handle, inode, path + depth); 2360 ext4_ext_dirty(handle, inode, path + depth);
2349 /* zeroed the full extent */ 2361 /* blocks available from iblock */
2350 return allocated; 2362 return allocated;
2351 2363
2352 } else if (err) 2364 } else if (err)
@@ -2374,6 +2386,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2374 err = PTR_ERR(path); 2386 err = PTR_ERR(path);
2375 return err; 2387 return err;
2376 } 2388 }
2389 /* get the second half extent details */
2377 ex = path[depth].p_ext; 2390 ex = path[depth].p_ext;
2378 err = ext4_ext_get_access(handle, inode, 2391 err = ext4_ext_get_access(handle, inode,
2379 path + depth); 2392 path + depth);
@@ -2403,6 +2416,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2403 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); 2416 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2404 ext4_ext_dirty(handle, inode, path + depth); 2417 ext4_ext_dirty(handle, inode, path + depth);
2405 /* zeroed the full extent */ 2418 /* zeroed the full extent */
2419 /* blocks available from iblock */
2406 return allocated; 2420 return allocated;
2407 2421
2408 } else if (err) 2422 } else if (err)
@@ -2418,23 +2432,22 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2418 */ 2432 */
2419 orig_ex.ee_len = cpu_to_le16(ee_len - 2433 orig_ex.ee_len = cpu_to_le16(ee_len -
2420 ext4_ext_get_actual_len(ex3)); 2434 ext4_ext_get_actual_len(ex3));
2421 if (newdepth != depth) { 2435 depth = newdepth;
2422 depth = newdepth; 2436 ext4_ext_drop_refs(path);
2423 ext4_ext_drop_refs(path); 2437 path = ext4_ext_find_extent(inode, iblock, path);
2424 path = ext4_ext_find_extent(inode, iblock, path); 2438 if (IS_ERR(path)) {
2425 if (IS_ERR(path)) { 2439 err = PTR_ERR(path);
2426 err = PTR_ERR(path); 2440 goto out;
2427 goto out;
2428 }
2429 eh = path[depth].p_hdr;
2430 ex = path[depth].p_ext;
2431 if (ex2 != &newex)
2432 ex2 = ex;
2433
2434 err = ext4_ext_get_access(handle, inode, path + depth);
2435 if (err)
2436 goto out;
2437 } 2441 }
2442 eh = path[depth].p_hdr;
2443 ex = path[depth].p_ext;
2444 if (ex2 != &newex)
2445 ex2 = ex;
2446
2447 err = ext4_ext_get_access(handle, inode, path + depth);
2448 if (err)
2449 goto out;
2450
2438 allocated = max_blocks; 2451 allocated = max_blocks;
2439 2452
2440 /* If extent has less than EXT4_EXT_ZERO_LEN and we are trying 2453 /* If extent has less than EXT4_EXT_ZERO_LEN and we are trying
@@ -2452,6 +2465,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2452 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); 2465 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2453 ext4_ext_dirty(handle, inode, path + depth); 2466 ext4_ext_dirty(handle, inode, path + depth);
2454 /* zero out the first half */ 2467 /* zero out the first half */
2468 /* blocks available from iblock */
2455 return allocated; 2469 return allocated;
2456 } 2470 }
2457 } 2471 }
@@ -2796,7 +2810,7 @@ void ext4_ext_truncate(struct inode *inode)
2796 /* 2810 /*
2797 * probably first extent we're gonna free will be last in block 2811 * probably first extent we're gonna free will be last in block
2798 */ 2812 */
2799 err = ext4_writepage_trans_blocks(inode) + 3; 2813 err = ext4_writepage_trans_blocks(inode);
2800 handle = ext4_journal_start(inode, err); 2814 handle = ext4_journal_start(inode, err);
2801 if (IS_ERR(handle)) 2815 if (IS_ERR(handle))
2802 return; 2816 return;
@@ -2810,7 +2824,7 @@ void ext4_ext_truncate(struct inode *inode)
2810 down_write(&EXT4_I(inode)->i_data_sem); 2824 down_write(&EXT4_I(inode)->i_data_sem);
2811 ext4_ext_invalidate_cache(inode); 2825 ext4_ext_invalidate_cache(inode);
2812 2826
2813 ext4_mb_discard_inode_preallocations(inode); 2827 ext4_discard_reservation(inode);
2814 2828
2815 /* 2829 /*
2816 * TODO: optimization is possible here. 2830 * TODO: optimization is possible here.
@@ -2849,27 +2863,6 @@ out_stop:
2849 ext4_journal_stop(handle); 2863 ext4_journal_stop(handle);
2850} 2864}
2851 2865
2852/*
2853 * ext4_ext_writepage_trans_blocks:
2854 * calculate max number of blocks we could modify
2855 * in order to allocate new block for an inode
2856 */
2857int ext4_ext_writepage_trans_blocks(struct inode *inode, int num)
2858{
2859 int needed;
2860
2861 needed = ext4_ext_calc_credits_for_insert(inode, NULL);
2862
2863 /* caller wants to allocate num blocks, but note it includes sb */
2864 needed = needed * num - (num - 1);
2865
2866#ifdef CONFIG_QUOTA
2867 needed += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
2868#endif
2869
2870 return needed;
2871}
2872
2873static void ext4_falloc_update_inode(struct inode *inode, 2866static void ext4_falloc_update_inode(struct inode *inode,
2874 int mode, loff_t new_size, int update_ctime) 2867 int mode, loff_t new_size, int update_ctime)
2875{ 2868{
@@ -2930,10 +2923,9 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
2930 max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) 2923 max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits)
2931 - block; 2924 - block;
2932 /* 2925 /*
2933 * credits to insert 1 extent into extent tree + buffers to be able to 2926 * credits to insert 1 extent into extent tree
2934 * modify 1 super block, 1 block bitmap and 1 group descriptor.
2935 */ 2927 */
2936 credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + 3; 2928 credits = ext4_chunk_trans_blocks(inode, max_blocks);
2937 mutex_lock(&inode->i_mutex); 2929 mutex_lock(&inode->i_mutex);
2938retry: 2930retry:
2939 while (ret >= 0 && ret < max_blocks) { 2931 while (ret >= 0 && ret < max_blocks) {
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index a92eb305344f..f344834bbf58 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -97,34 +97,44 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh,
97 * Return buffer_head of bitmap on success or NULL. 97 * Return buffer_head of bitmap on success or NULL.
98 */ 98 */
99static struct buffer_head * 99static struct buffer_head *
100read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) 100ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
101{ 101{
102 struct ext4_group_desc *desc; 102 struct ext4_group_desc *desc;
103 struct buffer_head *bh = NULL; 103 struct buffer_head *bh = NULL;
104 ext4_fsblk_t bitmap_blk;
104 105
105 desc = ext4_get_group_desc(sb, block_group, NULL); 106 desc = ext4_get_group_desc(sb, block_group, NULL);
106 if (!desc) 107 if (!desc)
107 goto error_out; 108 return NULL;
108 if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { 109 bitmap_blk = ext4_inode_bitmap(sb, desc);
109 bh = sb_getblk(sb, ext4_inode_bitmap(sb, desc)); 110 bh = sb_getblk(sb, bitmap_blk);
110 if (!buffer_uptodate(bh)) { 111 if (unlikely(!bh)) {
111 lock_buffer(bh); 112 ext4_error(sb, __func__,
112 if (!buffer_uptodate(bh)) { 113 "Cannot read inode bitmap - "
113 ext4_init_inode_bitmap(sb, bh, block_group, 114 "block_group = %lu, inode_bitmap = %llu",
114 desc); 115 block_group, bitmap_blk);
115 set_buffer_uptodate(bh); 116 return NULL;
116 }
117 unlock_buffer(bh);
118 }
119 } else {
120 bh = sb_bread(sb, ext4_inode_bitmap(sb, desc));
121 } 117 }
122 if (!bh) 118 if (bh_uptodate_or_lock(bh))
123 ext4_error(sb, "read_inode_bitmap", 119 return bh;
120
121 spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
122 if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
123 ext4_init_inode_bitmap(sb, bh, block_group, desc);
124 set_buffer_uptodate(bh);
125 unlock_buffer(bh);
126 spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
127 return bh;
128 }
129 spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
130 if (bh_submit_read(bh) < 0) {
131 put_bh(bh);
132 ext4_error(sb, __func__,
124 "Cannot read inode bitmap - " 133 "Cannot read inode bitmap - "
125 "block_group = %lu, inode_bitmap = %llu", 134 "block_group = %lu, inode_bitmap = %llu",
126 block_group, ext4_inode_bitmap(sb, desc)); 135 block_group, bitmap_blk);
127error_out: 136 return NULL;
137 }
128 return bh; 138 return bh;
129} 139}
130 140
@@ -200,7 +210,7 @@ void ext4_free_inode (handle_t *handle, struct inode * inode)
200 } 210 }
201 block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); 211 block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
202 bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb); 212 bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
203 bitmap_bh = read_inode_bitmap(sb, block_group); 213 bitmap_bh = ext4_read_inode_bitmap(sb, block_group);
204 if (!bitmap_bh) 214 if (!bitmap_bh)
205 goto error_return; 215 goto error_return;
206 216
@@ -341,7 +351,7 @@ find_close_to_parent:
341 goto found_flexbg; 351 goto found_flexbg;
342 } 352 }
343 353
344 if (best_flex < 0 || 354 if (flex_group[best_flex].free_inodes == 0 ||
345 (flex_group[i].free_blocks > 355 (flex_group[i].free_blocks >
346 flex_group[best_flex].free_blocks && 356 flex_group[best_flex].free_blocks &&
347 flex_group[i].free_inodes)) 357 flex_group[i].free_inodes))
@@ -623,7 +633,7 @@ got_group:
623 goto fail; 633 goto fail;
624 634
625 brelse(bitmap_bh); 635 brelse(bitmap_bh);
626 bitmap_bh = read_inode_bitmap(sb, group); 636 bitmap_bh = ext4_read_inode_bitmap(sb, group);
627 if (!bitmap_bh) 637 if (!bitmap_bh)
628 goto fail; 638 goto fail;
629 639
@@ -728,7 +738,7 @@ got:
728 738
729 /* When marking the block group with 739 /* When marking the block group with
730 * ~EXT4_BG_INODE_UNINIT we don't want to depend 740 * ~EXT4_BG_INODE_UNINIT we don't want to depend
731 * on the value of bg_itable_unsed even though 741 * on the value of bg_itable_unused even though
732 * mke2fs could have initialized the same for us. 742 * mke2fs could have initialized the same for us.
733 * Instead we calculated the value below 743 * Instead we calculated the value below
734 */ 744 */
@@ -891,7 +901,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
891 901
892 block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); 902 block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
893 bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb); 903 bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
894 bitmap_bh = read_inode_bitmap(sb, block_group); 904 bitmap_bh = ext4_read_inode_bitmap(sb, block_group);
895 if (!bitmap_bh) { 905 if (!bitmap_bh) {
896 ext4_warning(sb, __func__, 906 ext4_warning(sb, __func__,
897 "inode bitmap error for orphan %lu", ino); 907 "inode bitmap error for orphan %lu", ino);
@@ -969,7 +979,7 @@ unsigned long ext4_count_free_inodes (struct super_block * sb)
969 continue; 979 continue;
970 desc_count += le16_to_cpu(gdp->bg_free_inodes_count); 980 desc_count += le16_to_cpu(gdp->bg_free_inodes_count);
971 brelse(bitmap_bh); 981 brelse(bitmap_bh);
972 bitmap_bh = read_inode_bitmap(sb, i); 982 bitmap_bh = ext4_read_inode_bitmap(sb, i);
973 if (!bitmap_bh) 983 if (!bitmap_bh)
974 continue; 984 continue;
975 985
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 9843b046c235..7e91913e325b 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -41,6 +41,8 @@
41#include "acl.h" 41#include "acl.h"
42#include "ext4_extents.h" 42#include "ext4_extents.h"
43 43
44#define MPAGE_DA_EXTENT_TAIL 0x01
45
44static inline int ext4_begin_ordered_truncate(struct inode *inode, 46static inline int ext4_begin_ordered_truncate(struct inode *inode,
45 loff_t new_size) 47 loff_t new_size)
46{ 48{
@@ -191,6 +193,7 @@ static int ext4_journal_test_restart(handle_t *handle, struct inode *inode)
191void ext4_delete_inode (struct inode * inode) 193void ext4_delete_inode (struct inode * inode)
192{ 194{
193 handle_t *handle; 195 handle_t *handle;
196 int err;
194 197
195 if (ext4_should_order_data(inode)) 198 if (ext4_should_order_data(inode))
196 ext4_begin_ordered_truncate(inode, 0); 199 ext4_begin_ordered_truncate(inode, 0);
@@ -199,8 +202,9 @@ void ext4_delete_inode (struct inode * inode)
199 if (is_bad_inode(inode)) 202 if (is_bad_inode(inode))
200 goto no_delete; 203 goto no_delete;
201 204
202 handle = start_transaction(inode); 205 handle = ext4_journal_start(inode, blocks_for_truncate(inode)+3);
203 if (IS_ERR(handle)) { 206 if (IS_ERR(handle)) {
207 ext4_std_error(inode->i_sb, PTR_ERR(handle));
204 /* 208 /*
205 * If we're going to skip the normal cleanup, we still need to 209 * If we're going to skip the normal cleanup, we still need to
206 * make sure that the in-core orphan linked list is properly 210 * make sure that the in-core orphan linked list is properly
@@ -213,8 +217,34 @@ void ext4_delete_inode (struct inode * inode)
213 if (IS_SYNC(inode)) 217 if (IS_SYNC(inode))
214 handle->h_sync = 1; 218 handle->h_sync = 1;
215 inode->i_size = 0; 219 inode->i_size = 0;
220 err = ext4_mark_inode_dirty(handle, inode);
221 if (err) {
222 ext4_warning(inode->i_sb, __func__,
223 "couldn't mark inode dirty (err %d)", err);
224 goto stop_handle;
225 }
216 if (inode->i_blocks) 226 if (inode->i_blocks)
217 ext4_truncate(inode); 227 ext4_truncate(inode);
228
229 /*
230 * ext4_ext_truncate() doesn't reserve any slop when it
231 * restarts journal transactions; therefore there may not be
232 * enough credits left in the handle to remove the inode from
233 * the orphan list and set the dtime field.
234 */
235 if (handle->h_buffer_credits < 3) {
236 err = ext4_journal_extend(handle, 3);
237 if (err > 0)
238 err = ext4_journal_restart(handle, 3);
239 if (err != 0) {
240 ext4_warning(inode->i_sb, __func__,
241 "couldn't extend journal (err %d)", err);
242 stop_handle:
243 ext4_journal_stop(handle);
244 goto no_delete;
245 }
246 }
247
218 /* 248 /*
219 * Kill off the orphan record which ext4_truncate created. 249 * Kill off the orphan record which ext4_truncate created.
220 * AKPM: I think this can be inside the above `if'. 250 * AKPM: I think this can be inside the above `if'.
@@ -952,23 +982,74 @@ out:
952 return err; 982 return err;
953} 983}
954 984
955/* Maximum number of blocks we map for direct IO at once. */
956#define DIO_MAX_BLOCKS 4096
957/* 985/*
958 * Number of credits we need for writing DIO_MAX_BLOCKS: 986 * Calculate the number of metadata blocks need to reserve
959 * We need sb + group descriptor + bitmap + inode -> 4 987 * to allocate @blocks for non extent file based file
960 * For B blocks with A block pointers per block we need:
961 * 1 (triple ind.) + (B/A/A + 2) (doubly ind.) + (B/A + 2) (indirect).
962 * If we plug in 4096 for B and 256 for A (for 1KB block size), we get 25.
963 */ 988 */
964#define DIO_CREDITS 25 989static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks)
990{
991 int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb);
992 int ind_blks, dind_blks, tind_blks;
993
994 /* number of new indirect blocks needed */
995 ind_blks = (blocks + icap - 1) / icap;
996
997 dind_blks = (ind_blks + icap - 1) / icap;
965 998
999 tind_blks = 1;
1000
1001 return ind_blks + dind_blks + tind_blks;
1002}
966 1003
967/* 1004/*
1005 * Calculate the number of metadata blocks need to reserve
1006 * to allocate given number of blocks
1007 */
1008static int ext4_calc_metadata_amount(struct inode *inode, int blocks)
1009{
1010 if (!blocks)
1011 return 0;
1012
1013 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
1014 return ext4_ext_calc_metadata_amount(inode, blocks);
1015
1016 return ext4_indirect_calc_metadata_amount(inode, blocks);
1017}
1018
1019static void ext4_da_update_reserve_space(struct inode *inode, int used)
1020{
1021 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1022 int total, mdb, mdb_free;
1023
1024 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
1025 /* recalculate the number of metablocks still need to be reserved */
1026 total = EXT4_I(inode)->i_reserved_data_blocks - used;
1027 mdb = ext4_calc_metadata_amount(inode, total);
1028
1029 /* figure out how many metablocks to release */
1030 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
1031 mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb;
1032
1033 /* Account for allocated meta_blocks */
1034 mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks;
1035
1036 /* update fs free blocks counter for truncate case */
1037 percpu_counter_add(&sbi->s_freeblocks_counter, mdb_free);
1038
1039 /* update per-inode reservations */
1040 BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks);
1041 EXT4_I(inode)->i_reserved_data_blocks -= used;
1042
1043 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
1044 EXT4_I(inode)->i_reserved_meta_blocks = mdb;
1045 EXT4_I(inode)->i_allocated_meta_blocks = 0;
1046 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1047}
1048
1049/*
1050 * The ext4_get_blocks_wrap() function try to look up the requested blocks,
1051 * and returns if the blocks are already mapped.
968 * 1052 *
969 *
970 * ext4_ext4 get_block() wrapper function
971 * It will do a look up first, and returns if the blocks already mapped.
972 * Otherwise it takes the write lock of the i_data_sem and allocate blocks 1053 * Otherwise it takes the write lock of the i_data_sem and allocate blocks
973 * and store the allocated blocks in the result buffer head and mark it 1054 * and store the allocated blocks in the result buffer head and mark it
974 * mapped. 1055 * mapped.
@@ -1069,26 +1150,30 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
1069 * which were deferred till now 1150 * which were deferred till now
1070 */ 1151 */
1071 if ((retval > 0) && buffer_delay(bh)) 1152 if ((retval > 0) && buffer_delay(bh))
1072 ext4_da_release_space(inode, retval, 0); 1153 ext4_da_update_reserve_space(inode, retval);
1073 } 1154 }
1074 1155
1075 up_write((&EXT4_I(inode)->i_data_sem)); 1156 up_write((&EXT4_I(inode)->i_data_sem));
1076 return retval; 1157 return retval;
1077} 1158}
1078 1159
1160/* Maximum number of blocks we map for direct IO at once. */
1161#define DIO_MAX_BLOCKS 4096
1162
1079static int ext4_get_block(struct inode *inode, sector_t iblock, 1163static int ext4_get_block(struct inode *inode, sector_t iblock,
1080 struct buffer_head *bh_result, int create) 1164 struct buffer_head *bh_result, int create)
1081{ 1165{
1082 handle_t *handle = ext4_journal_current_handle(); 1166 handle_t *handle = ext4_journal_current_handle();
1083 int ret = 0, started = 0; 1167 int ret = 0, started = 0;
1084 unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; 1168 unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
1169 int dio_credits;
1085 1170
1086 if (create && !handle) { 1171 if (create && !handle) {
1087 /* Direct IO write... */ 1172 /* Direct IO write... */
1088 if (max_blocks > DIO_MAX_BLOCKS) 1173 if (max_blocks > DIO_MAX_BLOCKS)
1089 max_blocks = DIO_MAX_BLOCKS; 1174 max_blocks = DIO_MAX_BLOCKS;
1090 handle = ext4_journal_start(inode, DIO_CREDITS + 1175 dio_credits = ext4_chunk_trans_blocks(inode, max_blocks);
1091 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb)); 1176 handle = ext4_journal_start(inode, dio_credits);
1092 if (IS_ERR(handle)) { 1177 if (IS_ERR(handle)) {
1093 ret = PTR_ERR(handle); 1178 ret = PTR_ERR(handle);
1094 goto out; 1179 goto out;
@@ -1336,12 +1421,8 @@ static int ext4_ordered_write_end(struct file *file,
1336{ 1421{
1337 handle_t *handle = ext4_journal_current_handle(); 1422 handle_t *handle = ext4_journal_current_handle();
1338 struct inode *inode = mapping->host; 1423 struct inode *inode = mapping->host;
1339 unsigned from, to;
1340 int ret = 0, ret2; 1424 int ret = 0, ret2;
1341 1425
1342 from = pos & (PAGE_CACHE_SIZE - 1);
1343 to = from + len;
1344
1345 ret = ext4_jbd2_file_inode(handle, inode); 1426 ret = ext4_jbd2_file_inode(handle, inode);
1346 1427
1347 if (ret == 0) { 1428 if (ret == 0) {
@@ -1437,36 +1518,6 @@ static int ext4_journalled_write_end(struct file *file,
1437 1518
1438 return ret ? ret : copied; 1519 return ret ? ret : copied;
1439} 1520}
1440/*
1441 * Calculate the number of metadata blocks need to reserve
1442 * to allocate @blocks for non extent file based file
1443 */
1444static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks)
1445{
1446 int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb);
1447 int ind_blks, dind_blks, tind_blks;
1448
1449 /* number of new indirect blocks needed */
1450 ind_blks = (blocks + icap - 1) / icap;
1451
1452 dind_blks = (ind_blks + icap - 1) / icap;
1453
1454 tind_blks = 1;
1455
1456 return ind_blks + dind_blks + tind_blks;
1457}
1458
1459/*
1460 * Calculate the number of metadata blocks need to reserve
1461 * to allocate given number of blocks
1462 */
1463static int ext4_calc_metadata_amount(struct inode *inode, int blocks)
1464{
1465 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
1466 return ext4_ext_calc_metadata_amount(inode, blocks);
1467
1468 return ext4_indirect_calc_metadata_amount(inode, blocks);
1469}
1470 1521
1471static int ext4_da_reserve_space(struct inode *inode, int nrblocks) 1522static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
1472{ 1523{
@@ -1490,7 +1541,6 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
1490 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1541 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1491 return -ENOSPC; 1542 return -ENOSPC;
1492 } 1543 }
1493
1494 /* reduce fs free blocks counter */ 1544 /* reduce fs free blocks counter */
1495 percpu_counter_sub(&sbi->s_freeblocks_counter, total); 1545 percpu_counter_sub(&sbi->s_freeblocks_counter, total);
1496 1546
@@ -1501,35 +1551,49 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
1501 return 0; /* success */ 1551 return 0; /* success */
1502} 1552}
1503 1553
1504void ext4_da_release_space(struct inode *inode, int used, int to_free) 1554static void ext4_da_release_space(struct inode *inode, int to_free)
1505{ 1555{
1506 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1556 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1507 int total, mdb, mdb_free, release; 1557 int total, mdb, mdb_free, release;
1508 1558
1559 if (!to_free)
1560 return; /* Nothing to release, exit */
1561
1509 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1562 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
1563
1564 if (!EXT4_I(inode)->i_reserved_data_blocks) {
1565 /*
1566 * if there is no reserved blocks, but we try to free some
1567 * then the counter is messed up somewhere.
1568 * but since this function is called from invalidate
1569 * page, it's harmless to return without any action
1570 */
1571 printk(KERN_INFO "ext4 delalloc try to release %d reserved "
1572 "blocks for inode %lu, but there is no reserved "
1573 "data blocks\n", to_free, inode->i_ino);
1574 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1575 return;
1576 }
1577
1510 /* recalculate the number of metablocks still need to be reserved */ 1578 /* recalculate the number of metablocks still need to be reserved */
1511 total = EXT4_I(inode)->i_reserved_data_blocks - used - to_free; 1579 total = EXT4_I(inode)->i_reserved_data_blocks - to_free;
1512 mdb = ext4_calc_metadata_amount(inode, total); 1580 mdb = ext4_calc_metadata_amount(inode, total);
1513 1581
1514 /* figure out how many metablocks to release */ 1582 /* figure out how many metablocks to release */
1515 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); 1583 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
1516 mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; 1584 mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb;
1517 1585
1518 /* Account for allocated meta_blocks */
1519 mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks;
1520
1521 release = to_free + mdb_free; 1586 release = to_free + mdb_free;
1522 1587
1523 /* update fs free blocks counter for truncate case */ 1588 /* update fs free blocks counter for truncate case */
1524 percpu_counter_add(&sbi->s_freeblocks_counter, release); 1589 percpu_counter_add(&sbi->s_freeblocks_counter, release);
1525 1590
1526 /* update per-inode reservations */ 1591 /* update per-inode reservations */
1527 BUG_ON(used + to_free > EXT4_I(inode)->i_reserved_data_blocks); 1592 BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks);
1528 EXT4_I(inode)->i_reserved_data_blocks -= (used + to_free); 1593 EXT4_I(inode)->i_reserved_data_blocks -= to_free;
1529 1594
1530 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); 1595 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
1531 EXT4_I(inode)->i_reserved_meta_blocks = mdb; 1596 EXT4_I(inode)->i_reserved_meta_blocks = mdb;
1532 EXT4_I(inode)->i_allocated_meta_blocks = 0;
1533 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1597 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1534} 1598}
1535 1599
@@ -1551,7 +1615,7 @@ static void ext4_da_page_release_reservation(struct page *page,
1551 } 1615 }
1552 curr_off = next_off; 1616 curr_off = next_off;
1553 } while ((bh = bh->b_this_page) != head); 1617 } while ((bh = bh->b_this_page) != head);
1554 ext4_da_release_space(page->mapping->host, 0, to_release); 1618 ext4_da_release_space(page->mapping->host, to_release);
1555} 1619}
1556 1620
1557/* 1621/*
@@ -1564,11 +1628,13 @@ struct mpage_da_data {
1564 unsigned long first_page, next_page; /* extent of pages */ 1628 unsigned long first_page, next_page; /* extent of pages */
1565 get_block_t *get_block; 1629 get_block_t *get_block;
1566 struct writeback_control *wbc; 1630 struct writeback_control *wbc;
1631 int io_done;
1632 long pages_written;
1567}; 1633};
1568 1634
1569/* 1635/*
1570 * mpage_da_submit_io - walks through extent of pages and try to write 1636 * mpage_da_submit_io - walks through extent of pages and try to write
1571 * them with __mpage_writepage() 1637 * them with writepage() call back
1572 * 1638 *
1573 * @mpd->inode: inode 1639 * @mpd->inode: inode
1574 * @mpd->first_page: first page of the extent 1640 * @mpd->first_page: first page of the extent
@@ -1583,18 +1649,11 @@ struct mpage_da_data {
1583static int mpage_da_submit_io(struct mpage_da_data *mpd) 1649static int mpage_da_submit_io(struct mpage_da_data *mpd)
1584{ 1650{
1585 struct address_space *mapping = mpd->inode->i_mapping; 1651 struct address_space *mapping = mpd->inode->i_mapping;
1586 struct mpage_data mpd_pp = {
1587 .bio = NULL,
1588 .last_block_in_bio = 0,
1589 .get_block = mpd->get_block,
1590 .use_writepage = 1,
1591 };
1592 int ret = 0, err, nr_pages, i; 1652 int ret = 0, err, nr_pages, i;
1593 unsigned long index, end; 1653 unsigned long index, end;
1594 struct pagevec pvec; 1654 struct pagevec pvec;
1595 1655
1596 BUG_ON(mpd->next_page <= mpd->first_page); 1656 BUG_ON(mpd->next_page <= mpd->first_page);
1597
1598 pagevec_init(&pvec, 0); 1657 pagevec_init(&pvec, 0);
1599 index = mpd->first_page; 1658 index = mpd->first_page;
1600 end = mpd->next_page - 1; 1659 end = mpd->next_page - 1;
@@ -1612,8 +1671,9 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
1612 break; 1671 break;
1613 index++; 1672 index++;
1614 1673
1615 err = __mpage_writepage(page, mpd->wbc, &mpd_pp); 1674 err = mapping->a_ops->writepage(page, mpd->wbc);
1616 1675 if (!err)
1676 mpd->pages_written++;
1617 /* 1677 /*
1618 * In error case, we have to continue because 1678 * In error case, we have to continue because
1619 * remaining pages are still locked 1679 * remaining pages are still locked
@@ -1624,9 +1684,6 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
1624 } 1684 }
1625 pagevec_release(&pvec); 1685 pagevec_release(&pvec);
1626 } 1686 }
1627 if (mpd_pp.bio)
1628 mpage_bio_submit(WRITE, mpd_pp.bio);
1629
1630 return ret; 1687 return ret;
1631} 1688}
1632 1689
@@ -1649,7 +1706,7 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
1649 int blocks = exbh->b_size >> inode->i_blkbits; 1706 int blocks = exbh->b_size >> inode->i_blkbits;
1650 sector_t pblock = exbh->b_blocknr, cur_logical; 1707 sector_t pblock = exbh->b_blocknr, cur_logical;
1651 struct buffer_head *head, *bh; 1708 struct buffer_head *head, *bh;
1652 unsigned long index, end; 1709 pgoff_t index, end;
1653 struct pagevec pvec; 1710 struct pagevec pvec;
1654 int nr_pages, i; 1711 int nr_pages, i;
1655 1712
@@ -1692,6 +1749,13 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
1692 if (buffer_delay(bh)) { 1749 if (buffer_delay(bh)) {
1693 bh->b_blocknr = pblock; 1750 bh->b_blocknr = pblock;
1694 clear_buffer_delay(bh); 1751 clear_buffer_delay(bh);
1752 bh->b_bdev = inode->i_sb->s_bdev;
1753 } else if (buffer_unwritten(bh)) {
1754 bh->b_blocknr = pblock;
1755 clear_buffer_unwritten(bh);
1756 set_buffer_mapped(bh);
1757 set_buffer_new(bh);
1758 bh->b_bdev = inode->i_sb->s_bdev;
1695 } else if (buffer_mapped(bh)) 1759 } else if (buffer_mapped(bh))
1696 BUG_ON(bh->b_blocknr != pblock); 1760 BUG_ON(bh->b_blocknr != pblock);
1697 1761
@@ -1727,13 +1791,11 @@ static inline void __unmap_underlying_blocks(struct inode *inode,
1727 * 1791 *
1728 * The function skips space we know is already mapped to disk blocks. 1792 * The function skips space we know is already mapped to disk blocks.
1729 * 1793 *
1730 * The function ignores errors ->get_block() returns, thus real
1731 * error handling is postponed to __mpage_writepage()
1732 */ 1794 */
1733static void mpage_da_map_blocks(struct mpage_da_data *mpd) 1795static void mpage_da_map_blocks(struct mpage_da_data *mpd)
1734{ 1796{
1797 int err = 0;
1735 struct buffer_head *lbh = &mpd->lbh; 1798 struct buffer_head *lbh = &mpd->lbh;
1736 int err = 0, remain = lbh->b_size;
1737 sector_t next = lbh->b_blocknr; 1799 sector_t next = lbh->b_blocknr;
1738 struct buffer_head new; 1800 struct buffer_head new;
1739 1801
@@ -1743,38 +1805,36 @@ static void mpage_da_map_blocks(struct mpage_da_data *mpd)
1743 if (buffer_mapped(lbh) && !buffer_delay(lbh)) 1805 if (buffer_mapped(lbh) && !buffer_delay(lbh))
1744 return; 1806 return;
1745 1807
1746 while (remain) { 1808 new.b_state = lbh->b_state;
1747 new.b_state = lbh->b_state; 1809 new.b_blocknr = 0;
1748 new.b_blocknr = 0; 1810 new.b_size = lbh->b_size;
1749 new.b_size = remain;
1750 err = mpd->get_block(mpd->inode, next, &new, 1);
1751 if (err) {
1752 /*
1753 * Rather than implement own error handling
1754 * here, we just leave remaining blocks
1755 * unallocated and try again with ->writepage()
1756 */
1757 break;
1758 }
1759 BUG_ON(new.b_size == 0);
1760 1811
1761 if (buffer_new(&new)) 1812 /*
1762 __unmap_underlying_blocks(mpd->inode, &new); 1813 * If we didn't accumulate anything
1814 * to write simply return
1815 */
1816 if (!new.b_size)
1817 return;
1818 err = mpd->get_block(mpd->inode, next, &new, 1);
1819 if (err)
1820 return;
1821 BUG_ON(new.b_size == 0);
1763 1822
1764 /* 1823 if (buffer_new(&new))
1765 * If blocks are delayed marked, we need to 1824 __unmap_underlying_blocks(mpd->inode, &new);
1766 * put actual blocknr and drop delayed bit
1767 */
1768 if (buffer_delay(lbh))
1769 mpage_put_bnr_to_bhs(mpd, next, &new);
1770 1825
1771 /* go for the remaining blocks */ 1826 /*
1772 next += new.b_size >> mpd->inode->i_blkbits; 1827 * If blocks are delayed marked, we need to
1773 remain -= new.b_size; 1828 * put actual blocknr and drop delayed bit
1774 } 1829 */
1830 if (buffer_delay(lbh) || buffer_unwritten(lbh))
1831 mpage_put_bnr_to_bhs(mpd, next, &new);
1832
1833 return;
1775} 1834}
1776 1835
1777#define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | (1 << BH_Delay)) 1836#define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \
1837 (1 << BH_Delay) | (1 << BH_Unwritten))
1778 1838
1779/* 1839/*
1780 * mpage_add_bh_to_extent - try to add one more block to extent of blocks 1840 * mpage_add_bh_to_extent - try to add one more block to extent of blocks
@@ -1788,41 +1848,61 @@ static void mpage_da_map_blocks(struct mpage_da_data *mpd)
1788static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, 1848static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
1789 sector_t logical, struct buffer_head *bh) 1849 sector_t logical, struct buffer_head *bh)
1790{ 1850{
1791 struct buffer_head *lbh = &mpd->lbh;
1792 sector_t next; 1851 sector_t next;
1852 size_t b_size = bh->b_size;
1853 struct buffer_head *lbh = &mpd->lbh;
1854 int nrblocks = lbh->b_size >> mpd->inode->i_blkbits;
1793 1855
1794 next = lbh->b_blocknr + (lbh->b_size >> mpd->inode->i_blkbits); 1856 /* check if thereserved journal credits might overflow */
1795 1857 if (!(EXT4_I(mpd->inode)->i_flags & EXT4_EXTENTS_FL)) {
1858 if (nrblocks >= EXT4_MAX_TRANS_DATA) {
1859 /*
1860 * With non-extent format we are limited by the journal
1861 * credit available. Total credit needed to insert
1862 * nrblocks contiguous blocks is dependent on the
1863 * nrblocks. So limit nrblocks.
1864 */
1865 goto flush_it;
1866 } else if ((nrblocks + (b_size >> mpd->inode->i_blkbits)) >
1867 EXT4_MAX_TRANS_DATA) {
1868 /*
1869 * Adding the new buffer_head would make it cross the
1870 * allowed limit for which we have journal credit
1871 * reserved. So limit the new bh->b_size
1872 */
1873 b_size = (EXT4_MAX_TRANS_DATA - nrblocks) <<
1874 mpd->inode->i_blkbits;
1875 /* we will do mpage_da_submit_io in the next loop */
1876 }
1877 }
1796 /* 1878 /*
1797 * First block in the extent 1879 * First block in the extent
1798 */ 1880 */
1799 if (lbh->b_size == 0) { 1881 if (lbh->b_size == 0) {
1800 lbh->b_blocknr = logical; 1882 lbh->b_blocknr = logical;
1801 lbh->b_size = bh->b_size; 1883 lbh->b_size = b_size;
1802 lbh->b_state = bh->b_state & BH_FLAGS; 1884 lbh->b_state = bh->b_state & BH_FLAGS;
1803 return; 1885 return;
1804 } 1886 }
1805 1887
1888 next = lbh->b_blocknr + nrblocks;
1806 /* 1889 /*
1807 * Can we merge the block to our big extent? 1890 * Can we merge the block to our big extent?
1808 */ 1891 */
1809 if (logical == next && (bh->b_state & BH_FLAGS) == lbh->b_state) { 1892 if (logical == next && (bh->b_state & BH_FLAGS) == lbh->b_state) {
1810 lbh->b_size += bh->b_size; 1893 lbh->b_size += b_size;
1811 return; 1894 return;
1812 } 1895 }
1813 1896
1897flush_it:
1814 /* 1898 /*
1815 * We couldn't merge the block to our extent, so we 1899 * We couldn't merge the block to our extent, so we
1816 * need to flush current extent and start new one 1900 * need to flush current extent and start new one
1817 */ 1901 */
1818 mpage_da_map_blocks(mpd); 1902 mpage_da_map_blocks(mpd);
1819 1903 mpage_da_submit_io(mpd);
1820 /* 1904 mpd->io_done = 1;
1821 * Now start a new extent 1905 return;
1822 */
1823 lbh->b_size = bh->b_size;
1824 lbh->b_state = bh->b_state & BH_FLAGS;
1825 lbh->b_blocknr = logical;
1826} 1906}
1827 1907
1828/* 1908/*
@@ -1842,17 +1922,35 @@ static int __mpage_da_writepage(struct page *page,
1842 struct buffer_head *bh, *head, fake; 1922 struct buffer_head *bh, *head, fake;
1843 sector_t logical; 1923 sector_t logical;
1844 1924
1925 if (mpd->io_done) {
1926 /*
1927 * Rest of the page in the page_vec
1928 * redirty then and skip then. We will
1929 * try to to write them again after
1930 * starting a new transaction
1931 */
1932 redirty_page_for_writepage(wbc, page);
1933 unlock_page(page);
1934 return MPAGE_DA_EXTENT_TAIL;
1935 }
1845 /* 1936 /*
1846 * Can we merge this page to current extent? 1937 * Can we merge this page to current extent?
1847 */ 1938 */
1848 if (mpd->next_page != page->index) { 1939 if (mpd->next_page != page->index) {
1849 /* 1940 /*
1850 * Nope, we can't. So, we map non-allocated blocks 1941 * Nope, we can't. So, we map non-allocated blocks
1851 * and start IO on them using __mpage_writepage() 1942 * and start IO on them using writepage()
1852 */ 1943 */
1853 if (mpd->next_page != mpd->first_page) { 1944 if (mpd->next_page != mpd->first_page) {
1854 mpage_da_map_blocks(mpd); 1945 mpage_da_map_blocks(mpd);
1855 mpage_da_submit_io(mpd); 1946 mpage_da_submit_io(mpd);
1947 /*
1948 * skip rest of the page in the page_vec
1949 */
1950 mpd->io_done = 1;
1951 redirty_page_for_writepage(wbc, page);
1952 unlock_page(page);
1953 return MPAGE_DA_EXTENT_TAIL;
1856 } 1954 }
1857 1955
1858 /* 1956 /*
@@ -1883,6 +1981,8 @@ static int __mpage_da_writepage(struct page *page,
1883 set_buffer_dirty(bh); 1981 set_buffer_dirty(bh);
1884 set_buffer_uptodate(bh); 1982 set_buffer_uptodate(bh);
1885 mpage_add_bh_to_extent(mpd, logical, bh); 1983 mpage_add_bh_to_extent(mpd, logical, bh);
1984 if (mpd->io_done)
1985 return MPAGE_DA_EXTENT_TAIL;
1886 } else { 1986 } else {
1887 /* 1987 /*
1888 * Page with regular buffer heads, just add all dirty ones 1988 * Page with regular buffer heads, just add all dirty ones
@@ -1891,8 +1991,12 @@ static int __mpage_da_writepage(struct page *page,
1891 bh = head; 1991 bh = head;
1892 do { 1992 do {
1893 BUG_ON(buffer_locked(bh)); 1993 BUG_ON(buffer_locked(bh));
1894 if (buffer_dirty(bh)) 1994 if (buffer_dirty(bh) &&
1995 (!buffer_mapped(bh) || buffer_delay(bh))) {
1895 mpage_add_bh_to_extent(mpd, logical, bh); 1996 mpage_add_bh_to_extent(mpd, logical, bh);
1997 if (mpd->io_done)
1998 return MPAGE_DA_EXTENT_TAIL;
1999 }
1896 logical++; 2000 logical++;
1897 } while ((bh = bh->b_this_page) != head); 2001 } while ((bh = bh->b_this_page) != head);
1898 } 2002 }
@@ -1911,22 +2015,13 @@ static int __mpage_da_writepage(struct page *page,
1911 * 2015 *
1912 * This is a library function, which implements the writepages() 2016 * This is a library function, which implements the writepages()
1913 * address_space_operation. 2017 * address_space_operation.
1914 *
1915 * In order to avoid duplication of logic that deals with partial pages,
1916 * multiple bio per page, etc, we find non-allocated blocks, allocate
1917 * them with minimal calls to ->get_block() and re-use __mpage_writepage()
1918 *
1919 * It's important that we call __mpage_writepage() only once for each
1920 * involved page, otherwise we'd have to implement more complicated logic
1921 * to deal with pages w/o PG_lock or w/ PG_writeback and so on.
1922 *
1923 * See comments to mpage_writepages()
1924 */ 2018 */
1925static int mpage_da_writepages(struct address_space *mapping, 2019static int mpage_da_writepages(struct address_space *mapping,
1926 struct writeback_control *wbc, 2020 struct writeback_control *wbc,
1927 get_block_t get_block) 2021 get_block_t get_block)
1928{ 2022{
1929 struct mpage_da_data mpd; 2023 struct mpage_da_data mpd;
2024 long to_write;
1930 int ret; 2025 int ret;
1931 2026
1932 if (!get_block) 2027 if (!get_block)
@@ -1940,17 +2035,22 @@ static int mpage_da_writepages(struct address_space *mapping,
1940 mpd.first_page = 0; 2035 mpd.first_page = 0;
1941 mpd.next_page = 0; 2036 mpd.next_page = 0;
1942 mpd.get_block = get_block; 2037 mpd.get_block = get_block;
2038 mpd.io_done = 0;
2039 mpd.pages_written = 0;
2040
2041 to_write = wbc->nr_to_write;
1943 2042
1944 ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, &mpd); 2043 ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, &mpd);
1945 2044
1946 /* 2045 /*
1947 * Handle last extent of pages 2046 * Handle last extent of pages
1948 */ 2047 */
1949 if (mpd.next_page != mpd.first_page) { 2048 if (!mpd.io_done && mpd.next_page != mpd.first_page) {
1950 mpage_da_map_blocks(&mpd); 2049 mpage_da_map_blocks(&mpd);
1951 mpage_da_submit_io(&mpd); 2050 mpage_da_submit_io(&mpd);
1952 } 2051 }
1953 2052
2053 wbc->nr_to_write = to_write - mpd.pages_written;
1954 return ret; 2054 return ret;
1955} 2055}
1956 2056
@@ -2155,63 +2255,95 @@ static int ext4_da_writepage(struct page *page,
2155} 2255}
2156 2256
2157/* 2257/*
2158 * For now just follow the DIO way to estimate the max credits 2258 * This is called via ext4_da_writepages() to
2159 * needed to write out EXT4_MAX_WRITEBACK_PAGES. 2259 * calulate the total number of credits to reserve to fit
2160 * todo: need to calculate the max credits need for 2260 * a single extent allocation into a single transaction,
2161 * extent based files, currently the DIO credits is based on 2261 * ext4_da_writpeages() will loop calling this before
2162 * indirect-blocks mapping way. 2262 * the block allocation.
2163 *
2164 * Probably should have a generic way to calculate credits
2165 * for DIO, writepages, and truncate
2166 */ 2263 */
2167#define EXT4_MAX_WRITEBACK_PAGES DIO_MAX_BLOCKS 2264
2168#define EXT4_MAX_WRITEBACK_CREDITS DIO_CREDITS 2265static int ext4_da_writepages_trans_blocks(struct inode *inode)
2266{
2267 int max_blocks = EXT4_I(inode)->i_reserved_data_blocks;
2268
2269 /*
2270 * With non-extent format the journal credit needed to
2271 * insert nrblocks contiguous block is dependent on
2272 * number of contiguous block. So we will limit
2273 * number of contiguous block to a sane value
2274 */
2275 if (!(inode->i_flags & EXT4_EXTENTS_FL) &&
2276 (max_blocks > EXT4_MAX_TRANS_DATA))
2277 max_blocks = EXT4_MAX_TRANS_DATA;
2278
2279 return ext4_chunk_trans_blocks(inode, max_blocks);
2280}
2169 2281
2170static int ext4_da_writepages(struct address_space *mapping, 2282static int ext4_da_writepages(struct address_space *mapping,
2171 struct writeback_control *wbc) 2283 struct writeback_control *wbc)
2172{ 2284{
2173 struct inode *inode = mapping->host;
2174 handle_t *handle = NULL; 2285 handle_t *handle = NULL;
2175 int needed_blocks;
2176 int ret = 0;
2177 long to_write;
2178 loff_t range_start = 0; 2286 loff_t range_start = 0;
2287 struct inode *inode = mapping->host;
2288 int needed_blocks, ret = 0, nr_to_writebump = 0;
2289 long to_write, pages_skipped = 0;
2290 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
2179 2291
2180 /* 2292 /*
2181 * No pages to write? This is mainly a kludge to avoid starting 2293 * No pages to write? This is mainly a kludge to avoid starting
2182 * a transaction for special inodes like journal inode on last iput() 2294 * a transaction for special inodes like journal inode on last iput()
2183 * because that could violate lock ordering on umount 2295 * because that could violate lock ordering on umount
2184 */ 2296 */
2185 if (!mapping->nrpages) 2297 if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
2186 return 0; 2298 return 0;
2187
2188 /* 2299 /*
2189 * Estimate the worse case needed credits to write out 2300 * Make sure nr_to_write is >= sbi->s_mb_stream_request
2190 * EXT4_MAX_BUF_BLOCKS pages 2301 * This make sure small files blocks are allocated in
2302 * single attempt. This ensure that small files
2303 * get less fragmented.
2191 */ 2304 */
2192 needed_blocks = EXT4_MAX_WRITEBACK_CREDITS; 2305 if (wbc->nr_to_write < sbi->s_mb_stream_request) {
2306 nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write;
2307 wbc->nr_to_write = sbi->s_mb_stream_request;
2308 }
2193 2309
2194 to_write = wbc->nr_to_write; 2310 if (!wbc->range_cyclic)
2195 if (!wbc->range_cyclic) {
2196 /* 2311 /*
2197 * If range_cyclic is not set force range_cont 2312 * If range_cyclic is not set force range_cont
2198 * and save the old writeback_index 2313 * and save the old writeback_index
2199 */ 2314 */
2200 wbc->range_cont = 1; 2315 wbc->range_cont = 1;
2201 range_start = wbc->range_start;
2202 }
2203 2316
2204 while (!ret && to_write) { 2317 range_start = wbc->range_start;
2318 pages_skipped = wbc->pages_skipped;
2319
2320restart_loop:
2321 to_write = wbc->nr_to_write;
2322 while (!ret && to_write > 0) {
2323
2324 /*
2325 * we insert one extent at a time. So we need
2326 * credit needed for single extent allocation.
2327 * journalled mode is currently not supported
2328 * by delalloc
2329 */
2330 BUG_ON(ext4_should_journal_data(inode));
2331 needed_blocks = ext4_da_writepages_trans_blocks(inode);
2332
2205 /* start a new transaction*/ 2333 /* start a new transaction*/
2206 handle = ext4_journal_start(inode, needed_blocks); 2334 handle = ext4_journal_start(inode, needed_blocks);
2207 if (IS_ERR(handle)) { 2335 if (IS_ERR(handle)) {
2208 ret = PTR_ERR(handle); 2336 ret = PTR_ERR(handle);
2337 printk(KERN_EMERG "%s: jbd2_start: "
2338 "%ld pages, ino %lu; err %d\n", __func__,
2339 wbc->nr_to_write, inode->i_ino, ret);
2340 dump_stack();
2209 goto out_writepages; 2341 goto out_writepages;
2210 } 2342 }
2211 if (ext4_should_order_data(inode)) { 2343 if (ext4_should_order_data(inode)) {
2212 /* 2344 /*
2213 * With ordered mode we need to add 2345 * With ordered mode we need to add
2214 * the inode to the journal handle 2346 * the inode to the journal handl
2215 * when we do block allocation. 2347 * when we do block allocation.
2216 */ 2348 */
2217 ret = ext4_jbd2_file_inode(handle, inode); 2349 ret = ext4_jbd2_file_inode(handle, inode);
@@ -2219,20 +2351,20 @@ static int ext4_da_writepages(struct address_space *mapping,
2219 ext4_journal_stop(handle); 2351 ext4_journal_stop(handle);
2220 goto out_writepages; 2352 goto out_writepages;
2221 } 2353 }
2222
2223 } 2354 }
2224 /*
2225 * set the max dirty pages could be write at a time
2226 * to fit into the reserved transaction credits
2227 */
2228 if (wbc->nr_to_write > EXT4_MAX_WRITEBACK_PAGES)
2229 wbc->nr_to_write = EXT4_MAX_WRITEBACK_PAGES;
2230 2355
2231 to_write -= wbc->nr_to_write; 2356 to_write -= wbc->nr_to_write;
2232 ret = mpage_da_writepages(mapping, wbc, 2357 ret = mpage_da_writepages(mapping, wbc,
2233 ext4_da_get_block_write); 2358 ext4_da_get_block_write);
2234 ext4_journal_stop(handle); 2359 ext4_journal_stop(handle);
2235 if (wbc->nr_to_write) { 2360 if (ret == MPAGE_DA_EXTENT_TAIL) {
2361 /*
2362 * got one extent now try with
2363 * rest of the pages
2364 */
2365 to_write += wbc->nr_to_write;
2366 ret = 0;
2367 } else if (wbc->nr_to_write) {
2236 /* 2368 /*
2237 * There is no more writeout needed 2369 * There is no more writeout needed
2238 * or we requested for a noblocking writeout 2370 * or we requested for a noblocking writeout
@@ -2244,10 +2376,18 @@ static int ext4_da_writepages(struct address_space *mapping,
2244 wbc->nr_to_write = to_write; 2376 wbc->nr_to_write = to_write;
2245 } 2377 }
2246 2378
2247out_writepages: 2379 if (wbc->range_cont && (pages_skipped != wbc->pages_skipped)) {
2248 wbc->nr_to_write = to_write; 2380 /* We skipped pages in this loop */
2249 if (range_start)
2250 wbc->range_start = range_start; 2381 wbc->range_start = range_start;
2382 wbc->nr_to_write = to_write +
2383 wbc->pages_skipped - pages_skipped;
2384 wbc->pages_skipped = pages_skipped;
2385 goto restart_loop;
2386 }
2387
2388out_writepages:
2389 wbc->nr_to_write = to_write - nr_to_writebump;
2390 wbc->range_start = range_start;
2251 return ret; 2391 return ret;
2252} 2392}
2253 2393
@@ -2280,8 +2420,11 @@ retry:
2280 } 2420 }
2281 2421
2282 page = __grab_cache_page(mapping, index); 2422 page = __grab_cache_page(mapping, index);
2283 if (!page) 2423 if (!page) {
2284 return -ENOMEM; 2424 ext4_journal_stop(handle);
2425 ret = -ENOMEM;
2426 goto out;
2427 }
2285 *pagep = page; 2428 *pagep = page;
2286 2429
2287 ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, 2430 ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
@@ -3434,6 +3577,9 @@ void ext4_truncate(struct inode *inode)
3434 * modify the block allocation tree. 3577 * modify the block allocation tree.
3435 */ 3578 */
3436 down_write(&ei->i_data_sem); 3579 down_write(&ei->i_data_sem);
3580
3581 ext4_discard_reservation(inode);
3582
3437 /* 3583 /*
3438 * The orphan list entry will now protect us from any crash which 3584 * The orphan list entry will now protect us from any crash which
3439 * occurs before the truncate completes, so it is now safe to propagate 3585 * occurs before the truncate completes, so it is now safe to propagate
@@ -3503,8 +3649,6 @@ do_indirects:
3503 ; 3649 ;
3504 } 3650 }
3505 3651
3506 ext4_discard_reservation(inode);
3507
3508 up_write(&ei->i_data_sem); 3652 up_write(&ei->i_data_sem);
3509 inode->i_mtime = inode->i_ctime = ext4_current_time(inode); 3653 inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
3510 ext4_mark_inode_dirty(handle, inode); 3654 ext4_mark_inode_dirty(handle, inode);
@@ -3590,6 +3734,16 @@ static int __ext4_get_inode_loc(struct inode *inode,
3590 } 3734 }
3591 if (!buffer_uptodate(bh)) { 3735 if (!buffer_uptodate(bh)) {
3592 lock_buffer(bh); 3736 lock_buffer(bh);
3737
3738 /*
3739 * If the buffer has the write error flag, we have failed
3740 * to write out another inode in the same block. In this
3741 * case, we don't have to read the block because we may
3742 * read the old inode data successfully.
3743 */
3744 if (buffer_write_io_error(bh) && !buffer_uptodate(bh))
3745 set_buffer_uptodate(bh);
3746
3593 if (buffer_uptodate(bh)) { 3747 if (buffer_uptodate(bh)) {
3594 /* someone brought it uptodate while we waited */ 3748 /* someone brought it uptodate while we waited */
3595 unlock_buffer(bh); 3749 unlock_buffer(bh);
@@ -4262,57 +4416,129 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
4262 return 0; 4416 return 0;
4263} 4417}
4264 4418
4419static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks,
4420 int chunk)
4421{
4422 int indirects;
4423
4424 /* if nrblocks are contiguous */
4425 if (chunk) {
4426 /*
4427 * With N contiguous data blocks, it need at most
4428 * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) indirect blocks
4429 * 2 dindirect blocks
4430 * 1 tindirect block
4431 */
4432 indirects = nrblocks / EXT4_ADDR_PER_BLOCK(inode->i_sb);
4433 return indirects + 3;
4434 }
4435 /*
4436 * if nrblocks are not contiguous, worse case, each block touch
4437 * a indirect block, and each indirect block touch a double indirect
4438 * block, plus a triple indirect block
4439 */
4440 indirects = nrblocks * 2 + 1;
4441 return indirects;
4442}
4443
4444static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
4445{
4446 if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
4447 return ext4_indirect_trans_blocks(inode, nrblocks, 0);
4448 return ext4_ext_index_trans_blocks(inode, nrblocks, 0);
4449}
4265/* 4450/*
4266 * How many blocks doth make a writepage()? 4451 * Account for index blocks, block groups bitmaps and block group
4452 * descriptor blocks if modify datablocks and index blocks
4453 * worse case, the indexs blocks spread over different block groups
4267 * 4454 *
4268 * With N blocks per page, it may be: 4455 * If datablocks are discontiguous, they are possible to spread over
4269 * N data blocks 4456 * different block groups too. If they are contiugous, with flexbg,
4270 * 2 indirect block 4457 * they could still across block group boundary.
4271 * 2 dindirect
4272 * 1 tindirect
4273 * N+5 bitmap blocks (from the above)
4274 * N+5 group descriptor summary blocks
4275 * 1 inode block
4276 * 1 superblock.
4277 * 2 * EXT4_SINGLEDATA_TRANS_BLOCKS for the quote files
4278 * 4458 *
4279 * 3 * (N + 5) + 2 + 2 * EXT4_SINGLEDATA_TRANS_BLOCKS 4459 * Also account for superblock, inode, quota and xattr blocks
4280 * 4460 */
4281 * With ordered or writeback data it's the same, less the N data blocks. 4461int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
4462{
4463 int groups, gdpblocks;
4464 int idxblocks;
4465 int ret = 0;
4466
4467 /*
4468 * How many index blocks need to touch to modify nrblocks?
4469 * The "Chunk" flag indicating whether the nrblocks is
4470 * physically contiguous on disk
4471 *
4472 * For Direct IO and fallocate, they calls get_block to allocate
4473 * one single extent at a time, so they could set the "Chunk" flag
4474 */
4475 idxblocks = ext4_index_trans_blocks(inode, nrblocks, chunk);
4476
4477 ret = idxblocks;
4478
4479 /*
4480 * Now let's see how many group bitmaps and group descriptors need
4481 * to account
4482 */
4483 groups = idxblocks;
4484 if (chunk)
4485 groups += 1;
4486 else
4487 groups += nrblocks;
4488
4489 gdpblocks = groups;
4490 if (groups > EXT4_SB(inode->i_sb)->s_groups_count)
4491 groups = EXT4_SB(inode->i_sb)->s_groups_count;
4492 if (groups > EXT4_SB(inode->i_sb)->s_gdb_count)
4493 gdpblocks = EXT4_SB(inode->i_sb)->s_gdb_count;
4494
4495 /* bitmaps and block group descriptor blocks */
4496 ret += groups + gdpblocks;
4497
4498 /* Blocks for super block, inode, quota and xattr blocks */
4499 ret += EXT4_META_TRANS_BLOCKS(inode->i_sb);
4500
4501 return ret;
4502}
4503
4504/*
4505 * Calulate the total number of credits to reserve to fit
4506 * the modification of a single pages into a single transaction,
4507 * which may include multiple chunks of block allocations.
4282 * 4508 *
4283 * If the inode's direct blocks can hold an integral number of pages then a 4509 * This could be called via ext4_write_begin()
4284 * page cannot straddle two indirect blocks, and we can only touch one indirect
4285 * and dindirect block, and the "5" above becomes "3".
4286 * 4510 *
4287 * This still overestimates under most circumstances. If we were to pass the 4511 * We need to consider the worse case, when
4288 * start and end offsets in here as well we could do block_to_path() on each 4512 * one new block per extent.
4289 * block and work out the exact number of indirects which are touched. Pah.
4290 */ 4513 */
4291
4292int ext4_writepage_trans_blocks(struct inode *inode) 4514int ext4_writepage_trans_blocks(struct inode *inode)
4293{ 4515{
4294 int bpp = ext4_journal_blocks_per_page(inode); 4516 int bpp = ext4_journal_blocks_per_page(inode);
4295 int indirects = (EXT4_NDIR_BLOCKS % bpp) ? 5 : 3;
4296 int ret; 4517 int ret;
4297 4518
4298 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) 4519 ret = ext4_meta_trans_blocks(inode, bpp, 0);
4299 return ext4_ext_writepage_trans_blocks(inode, bpp);
4300 4520
4521 /* Account for data blocks for journalled mode */
4301 if (ext4_should_journal_data(inode)) 4522 if (ext4_should_journal_data(inode))
4302 ret = 3 * (bpp + indirects) + 2; 4523 ret += bpp;
4303 else
4304 ret = 2 * (bpp + indirects) + 2;
4305
4306#ifdef CONFIG_QUOTA
4307 /* We know that structure was already allocated during DQUOT_INIT so
4308 * we will be updating only the data blocks + inodes */
4309 ret += 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
4310#endif
4311
4312 return ret; 4524 return ret;
4313} 4525}
4314 4526
4315/* 4527/*
4528 * Calculate the journal credits for a chunk of data modification.
4529 *
4530 * This is called from DIO, fallocate or whoever calling
4531 * ext4_get_blocks_wrap() to map/allocate a chunk of contigous disk blocks.
4532 *
4533 * journal buffers for data blocks are not included here, as DIO
4534 * and fallocate do no need to journal data buffers.
4535 */
4536int ext4_chunk_trans_blocks(struct inode *inode, int nrblocks)
4537{
4538 return ext4_meta_trans_blocks(inode, nrblocks, 1);
4539}
4540
4541/*
4316 * The caller must have previously called ext4_reserve_inode_write(). 4542 * The caller must have previously called ext4_reserve_inode_write().
4317 * Give this, we know that the caller already has write access to iloc->bh. 4543 * Give this, we know that the caller already has write access to iloc->bh.
4318 */ 4544 */
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 8d141a25bbee..e0e3a5eb1ddb 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -787,13 +787,16 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
787 if (bh_uptodate_or_lock(bh[i])) 787 if (bh_uptodate_or_lock(bh[i]))
788 continue; 788 continue;
789 789
790 spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
790 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 791 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
791 ext4_init_block_bitmap(sb, bh[i], 792 ext4_init_block_bitmap(sb, bh[i],
792 first_group + i, desc); 793 first_group + i, desc);
793 set_buffer_uptodate(bh[i]); 794 set_buffer_uptodate(bh[i]);
794 unlock_buffer(bh[i]); 795 unlock_buffer(bh[i]);
796 spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
795 continue; 797 continue;
796 } 798 }
799 spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
797 get_bh(bh[i]); 800 get_bh(bh[i]);
798 bh[i]->b_end_io = end_buffer_read_sync; 801 bh[i]->b_end_io = end_buffer_read_sync;
799 submit_bh(READ, bh[i]); 802 submit_bh(READ, bh[i]);
@@ -2477,7 +2480,7 @@ err_freesgi:
2477int ext4_mb_init(struct super_block *sb, int needs_recovery) 2480int ext4_mb_init(struct super_block *sb, int needs_recovery)
2478{ 2481{
2479 struct ext4_sb_info *sbi = EXT4_SB(sb); 2482 struct ext4_sb_info *sbi = EXT4_SB(sb);
2480 unsigned i; 2483 unsigned i, j;
2481 unsigned offset; 2484 unsigned offset;
2482 unsigned max; 2485 unsigned max;
2483 int ret; 2486 int ret;
@@ -2537,7 +2540,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2537 sbi->s_mb_history_filter = EXT4_MB_HISTORY_DEFAULT; 2540 sbi->s_mb_history_filter = EXT4_MB_HISTORY_DEFAULT;
2538 sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC; 2541 sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC;
2539 2542
2540 i = sizeof(struct ext4_locality_group) * NR_CPUS; 2543 i = sizeof(struct ext4_locality_group) * nr_cpu_ids;
2541 sbi->s_locality_groups = kmalloc(i, GFP_KERNEL); 2544 sbi->s_locality_groups = kmalloc(i, GFP_KERNEL);
2542 if (sbi->s_locality_groups == NULL) { 2545 if (sbi->s_locality_groups == NULL) {
2543 clear_opt(sbi->s_mount_opt, MBALLOC); 2546 clear_opt(sbi->s_mount_opt, MBALLOC);
@@ -2545,11 +2548,12 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2545 kfree(sbi->s_mb_maxs); 2548 kfree(sbi->s_mb_maxs);
2546 return -ENOMEM; 2549 return -ENOMEM;
2547 } 2550 }
2548 for (i = 0; i < NR_CPUS; i++) { 2551 for (i = 0; i < nr_cpu_ids; i++) {
2549 struct ext4_locality_group *lg; 2552 struct ext4_locality_group *lg;
2550 lg = &sbi->s_locality_groups[i]; 2553 lg = &sbi->s_locality_groups[i];
2551 mutex_init(&lg->lg_mutex); 2554 mutex_init(&lg->lg_mutex);
2552 INIT_LIST_HEAD(&lg->lg_prealloc_list); 2555 for (j = 0; j < PREALLOC_TB_SIZE; j++)
2556 INIT_LIST_HEAD(&lg->lg_prealloc_list[j]);
2553 spin_lock_init(&lg->lg_prealloc_lock); 2557 spin_lock_init(&lg->lg_prealloc_lock);
2554 } 2558 }
2555 2559
@@ -3260,6 +3264,7 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
3260 struct ext4_prealloc_space *pa) 3264 struct ext4_prealloc_space *pa)
3261{ 3265{
3262 unsigned int len = ac->ac_o_ex.fe_len; 3266 unsigned int len = ac->ac_o_ex.fe_len;
3267
3263 ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart, 3268 ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart,
3264 &ac->ac_b_ex.fe_group, 3269 &ac->ac_b_ex.fe_group,
3265 &ac->ac_b_ex.fe_start); 3270 &ac->ac_b_ex.fe_start);
@@ -3277,14 +3282,45 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
3277} 3282}
3278 3283
3279/* 3284/*
3285 * Return the prealloc space that have minimal distance
3286 * from the goal block. @cpa is the prealloc
3287 * space that is having currently known minimal distance
3288 * from the goal block.
3289 */
3290static struct ext4_prealloc_space *
3291ext4_mb_check_group_pa(ext4_fsblk_t goal_block,
3292 struct ext4_prealloc_space *pa,
3293 struct ext4_prealloc_space *cpa)
3294{
3295 ext4_fsblk_t cur_distance, new_distance;
3296
3297 if (cpa == NULL) {
3298 atomic_inc(&pa->pa_count);
3299 return pa;
3300 }
3301 cur_distance = abs(goal_block - cpa->pa_pstart);
3302 new_distance = abs(goal_block - pa->pa_pstart);
3303
3304 if (cur_distance < new_distance)
3305 return cpa;
3306
3307 /* drop the previous reference */
3308 atomic_dec(&cpa->pa_count);
3309 atomic_inc(&pa->pa_count);
3310 return pa;
3311}
3312
3313/*
3280 * search goal blocks in preallocated space 3314 * search goal blocks in preallocated space
3281 */ 3315 */
3282static noinline_for_stack int 3316static noinline_for_stack int
3283ext4_mb_use_preallocated(struct ext4_allocation_context *ac) 3317ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3284{ 3318{
3319 int order, i;
3285 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); 3320 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
3286 struct ext4_locality_group *lg; 3321 struct ext4_locality_group *lg;
3287 struct ext4_prealloc_space *pa; 3322 struct ext4_prealloc_space *pa, *cpa = NULL;
3323 ext4_fsblk_t goal_block;
3288 3324
3289 /* only data can be preallocated */ 3325 /* only data can be preallocated */
3290 if (!(ac->ac_flags & EXT4_MB_HINT_DATA)) 3326 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
@@ -3322,22 +3358,38 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3322 lg = ac->ac_lg; 3358 lg = ac->ac_lg;
3323 if (lg == NULL) 3359 if (lg == NULL)
3324 return 0; 3360 return 0;
3361 order = fls(ac->ac_o_ex.fe_len) - 1;
3362 if (order > PREALLOC_TB_SIZE - 1)
3363 /* The max size of hash table is PREALLOC_TB_SIZE */
3364 order = PREALLOC_TB_SIZE - 1;
3365
3366 goal_block = ac->ac_g_ex.fe_group * EXT4_BLOCKS_PER_GROUP(ac->ac_sb) +
3367 ac->ac_g_ex.fe_start +
3368 le32_to_cpu(EXT4_SB(ac->ac_sb)->s_es->s_first_data_block);
3369 /*
3370 * search for the prealloc space that is having
3371 * minimal distance from the goal block.
3372 */
3373 for (i = order; i < PREALLOC_TB_SIZE; i++) {
3374 rcu_read_lock();
3375 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i],
3376 pa_inode_list) {
3377 spin_lock(&pa->pa_lock);
3378 if (pa->pa_deleted == 0 &&
3379 pa->pa_free >= ac->ac_o_ex.fe_len) {
3325 3380
3326 rcu_read_lock(); 3381 cpa = ext4_mb_check_group_pa(goal_block,
3327 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list, pa_inode_list) { 3382 pa, cpa);
3328 spin_lock(&pa->pa_lock); 3383 }
3329 if (pa->pa_deleted == 0 && pa->pa_free >= ac->ac_o_ex.fe_len) {
3330 atomic_inc(&pa->pa_count);
3331 ext4_mb_use_group_pa(ac, pa);
3332 spin_unlock(&pa->pa_lock); 3384 spin_unlock(&pa->pa_lock);
3333 ac->ac_criteria = 20;
3334 rcu_read_unlock();
3335 return 1;
3336 } 3385 }
3337 spin_unlock(&pa->pa_lock); 3386 rcu_read_unlock();
3387 }
3388 if (cpa) {
3389 ext4_mb_use_group_pa(ac, cpa);
3390 ac->ac_criteria = 20;
3391 return 1;
3338 } 3392 }
3339 rcu_read_unlock();
3340
3341 return 0; 3393 return 0;
3342} 3394}
3343 3395
@@ -3560,6 +3612,7 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
3560 pa->pa_free = pa->pa_len; 3612 pa->pa_free = pa->pa_len;
3561 atomic_set(&pa->pa_count, 1); 3613 atomic_set(&pa->pa_count, 1);
3562 spin_lock_init(&pa->pa_lock); 3614 spin_lock_init(&pa->pa_lock);
3615 INIT_LIST_HEAD(&pa->pa_inode_list);
3563 pa->pa_deleted = 0; 3616 pa->pa_deleted = 0;
3564 pa->pa_linear = 1; 3617 pa->pa_linear = 1;
3565 3618
@@ -3580,10 +3633,10 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
3580 list_add(&pa->pa_group_list, &grp->bb_prealloc_list); 3633 list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
3581 ext4_unlock_group(sb, ac->ac_b_ex.fe_group); 3634 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
3582 3635
3583 spin_lock(pa->pa_obj_lock); 3636 /*
3584 list_add_tail_rcu(&pa->pa_inode_list, &lg->lg_prealloc_list); 3637 * We will later add the new pa to the right bucket
3585 spin_unlock(pa->pa_obj_lock); 3638 * after updating the pa_free in ext4_mb_release_context
3586 3639 */
3587 return 0; 3640 return 0;
3588} 3641}
3589 3642
@@ -3733,20 +3786,23 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
3733 3786
3734 bitmap_bh = ext4_read_block_bitmap(sb, group); 3787 bitmap_bh = ext4_read_block_bitmap(sb, group);
3735 if (bitmap_bh == NULL) { 3788 if (bitmap_bh == NULL) {
3736 /* error handling here */ 3789 ext4_error(sb, __func__, "Error in reading block "
3737 ext4_mb_release_desc(&e4b); 3790 "bitmap for %lu\n", group);
3738 BUG_ON(bitmap_bh == NULL); 3791 return 0;
3739 } 3792 }
3740 3793
3741 err = ext4_mb_load_buddy(sb, group, &e4b); 3794 err = ext4_mb_load_buddy(sb, group, &e4b);
3742 BUG_ON(err != 0); /* error handling here */ 3795 if (err) {
3796 ext4_error(sb, __func__, "Error in loading buddy "
3797 "information for %lu\n", group);
3798 put_bh(bitmap_bh);
3799 return 0;
3800 }
3743 3801
3744 if (needed == 0) 3802 if (needed == 0)
3745 needed = EXT4_BLOCKS_PER_GROUP(sb) + 1; 3803 needed = EXT4_BLOCKS_PER_GROUP(sb) + 1;
3746 3804
3747 grp = ext4_get_group_info(sb, group);
3748 INIT_LIST_HEAD(&list); 3805 INIT_LIST_HEAD(&list);
3749
3750 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); 3806 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
3751repeat: 3807repeat:
3752 ext4_lock_group(sb, group); 3808 ext4_lock_group(sb, group);
@@ -3903,13 +3959,18 @@ repeat:
3903 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL); 3959 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
3904 3960
3905 err = ext4_mb_load_buddy(sb, group, &e4b); 3961 err = ext4_mb_load_buddy(sb, group, &e4b);
3906 BUG_ON(err != 0); /* error handling here */ 3962 if (err) {
3963 ext4_error(sb, __func__, "Error in loading buddy "
3964 "information for %lu\n", group);
3965 continue;
3966 }
3907 3967
3908 bitmap_bh = ext4_read_block_bitmap(sb, group); 3968 bitmap_bh = ext4_read_block_bitmap(sb, group);
3909 if (bitmap_bh == NULL) { 3969 if (bitmap_bh == NULL) {
3910 /* error handling here */ 3970 ext4_error(sb, __func__, "Error in reading block "
3971 "bitmap for %lu\n", group);
3911 ext4_mb_release_desc(&e4b); 3972 ext4_mb_release_desc(&e4b);
3912 BUG_ON(bitmap_bh == NULL); 3973 continue;
3913 } 3974 }
3914 3975
3915 ext4_lock_group(sb, group); 3976 ext4_lock_group(sb, group);
@@ -4112,22 +4173,168 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
4112 4173
4113} 4174}
4114 4175
4176static noinline_for_stack void
4177ext4_mb_discard_lg_preallocations(struct super_block *sb,
4178 struct ext4_locality_group *lg,
4179 int order, int total_entries)
4180{
4181 ext4_group_t group = 0;
4182 struct ext4_buddy e4b;
4183 struct list_head discard_list;
4184 struct ext4_prealloc_space *pa, *tmp;
4185 struct ext4_allocation_context *ac;
4186
4187 mb_debug("discard locality group preallocation\n");
4188
4189 INIT_LIST_HEAD(&discard_list);
4190 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
4191
4192 spin_lock(&lg->lg_prealloc_lock);
4193 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
4194 pa_inode_list) {
4195 spin_lock(&pa->pa_lock);
4196 if (atomic_read(&pa->pa_count)) {
4197 /*
4198 * This is the pa that we just used
4199 * for block allocation. So don't
4200 * free that
4201 */
4202 spin_unlock(&pa->pa_lock);
4203 continue;
4204 }
4205 if (pa->pa_deleted) {
4206 spin_unlock(&pa->pa_lock);
4207 continue;
4208 }
4209 /* only lg prealloc space */
4210 BUG_ON(!pa->pa_linear);
4211
4212 /* seems this one can be freed ... */
4213 pa->pa_deleted = 1;
4214 spin_unlock(&pa->pa_lock);
4215
4216 list_del_rcu(&pa->pa_inode_list);
4217 list_add(&pa->u.pa_tmp_list, &discard_list);
4218
4219 total_entries--;
4220 if (total_entries <= 5) {
4221 /*
4222 * we want to keep only 5 entries
4223 * allowing it to grow to 8. This
4224 * mak sure we don't call discard
4225 * soon for this list.
4226 */
4227 break;
4228 }
4229 }
4230 spin_unlock(&lg->lg_prealloc_lock);
4231
4232 list_for_each_entry_safe(pa, tmp, &discard_list, u.pa_tmp_list) {
4233
4234 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
4235 if (ext4_mb_load_buddy(sb, group, &e4b)) {
4236 ext4_error(sb, __func__, "Error in loading buddy "
4237 "information for %lu\n", group);
4238 continue;
4239 }
4240 ext4_lock_group(sb, group);
4241 list_del(&pa->pa_group_list);
4242 ext4_mb_release_group_pa(&e4b, pa, ac);
4243 ext4_unlock_group(sb, group);
4244
4245 ext4_mb_release_desc(&e4b);
4246 list_del(&pa->u.pa_tmp_list);
4247 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
4248 }
4249 if (ac)
4250 kmem_cache_free(ext4_ac_cachep, ac);
4251}
4252
4253/*
4254 * We have incremented pa_count. So it cannot be freed at this
4255 * point. Also we hold lg_mutex. So no parallel allocation is
4256 * possible from this lg. That means pa_free cannot be updated.
4257 *
4258 * A parallel ext4_mb_discard_group_preallocations is possible.
4259 * which can cause the lg_prealloc_list to be updated.
4260 */
4261
4262static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
4263{
4264 int order, added = 0, lg_prealloc_count = 1;
4265 struct super_block *sb = ac->ac_sb;
4266 struct ext4_locality_group *lg = ac->ac_lg;
4267 struct ext4_prealloc_space *tmp_pa, *pa = ac->ac_pa;
4268
4269 order = fls(pa->pa_free) - 1;
4270 if (order > PREALLOC_TB_SIZE - 1)
4271 /* The max size of hash table is PREALLOC_TB_SIZE */
4272 order = PREALLOC_TB_SIZE - 1;
4273 /* Add the prealloc space to lg */
4274 rcu_read_lock();
4275 list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order],
4276 pa_inode_list) {
4277 spin_lock(&tmp_pa->pa_lock);
4278 if (tmp_pa->pa_deleted) {
4279 spin_unlock(&pa->pa_lock);
4280 continue;
4281 }
4282 if (!added && pa->pa_free < tmp_pa->pa_free) {
4283 /* Add to the tail of the previous entry */
4284 list_add_tail_rcu(&pa->pa_inode_list,
4285 &tmp_pa->pa_inode_list);
4286 added = 1;
4287 /*
4288 * we want to count the total
4289 * number of entries in the list
4290 */
4291 }
4292 spin_unlock(&tmp_pa->pa_lock);
4293 lg_prealloc_count++;
4294 }
4295 if (!added)
4296 list_add_tail_rcu(&pa->pa_inode_list,
4297 &lg->lg_prealloc_list[order]);
4298 rcu_read_unlock();
4299
4300 /* Now trim the list to be not more than 8 elements */
4301 if (lg_prealloc_count > 8) {
4302 ext4_mb_discard_lg_preallocations(sb, lg,
4303 order, lg_prealloc_count);
4304 return;
4305 }
4306 return ;
4307}
4308
4115/* 4309/*
4116 * release all resource we used in allocation 4310 * release all resource we used in allocation
4117 */ 4311 */
4118static int ext4_mb_release_context(struct ext4_allocation_context *ac) 4312static int ext4_mb_release_context(struct ext4_allocation_context *ac)
4119{ 4313{
4120 if (ac->ac_pa) { 4314 struct ext4_prealloc_space *pa = ac->ac_pa;
4121 if (ac->ac_pa->pa_linear) { 4315 if (pa) {
4316 if (pa->pa_linear) {
4122 /* see comment in ext4_mb_use_group_pa() */ 4317 /* see comment in ext4_mb_use_group_pa() */
4123 spin_lock(&ac->ac_pa->pa_lock); 4318 spin_lock(&pa->pa_lock);
4124 ac->ac_pa->pa_pstart += ac->ac_b_ex.fe_len; 4319 pa->pa_pstart += ac->ac_b_ex.fe_len;
4125 ac->ac_pa->pa_lstart += ac->ac_b_ex.fe_len; 4320 pa->pa_lstart += ac->ac_b_ex.fe_len;
4126 ac->ac_pa->pa_free -= ac->ac_b_ex.fe_len; 4321 pa->pa_free -= ac->ac_b_ex.fe_len;
4127 ac->ac_pa->pa_len -= ac->ac_b_ex.fe_len; 4322 pa->pa_len -= ac->ac_b_ex.fe_len;
4128 spin_unlock(&ac->ac_pa->pa_lock); 4323 spin_unlock(&pa->pa_lock);
4324 /*
4325 * We want to add the pa to the right bucket.
4326 * Remove it from the list and while adding
4327 * make sure the list to which we are adding
4328 * doesn't grow big.
4329 */
4330 if (likely(pa->pa_free)) {
4331 spin_lock(pa->pa_obj_lock);
4332 list_del_rcu(&pa->pa_inode_list);
4333 spin_unlock(pa->pa_obj_lock);
4334 ext4_mb_add_n_trim(ac);
4335 }
4129 } 4336 }
4130 ext4_mb_put_pa(ac, ac->ac_sb, ac->ac_pa); 4337 ext4_mb_put_pa(ac, ac->ac_sb, pa);
4131 } 4338 }
4132 if (ac->ac_bitmap_page) 4339 if (ac->ac_bitmap_page)
4133 page_cache_release(ac->ac_bitmap_page); 4340 page_cache_release(ac->ac_bitmap_page);
@@ -4420,11 +4627,15 @@ do_more:
4420 count -= overflow; 4627 count -= overflow;
4421 } 4628 }
4422 bitmap_bh = ext4_read_block_bitmap(sb, block_group); 4629 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
4423 if (!bitmap_bh) 4630 if (!bitmap_bh) {
4631 err = -EIO;
4424 goto error_return; 4632 goto error_return;
4633 }
4425 gdp = ext4_get_group_desc(sb, block_group, &gd_bh); 4634 gdp = ext4_get_group_desc(sb, block_group, &gd_bh);
4426 if (!gdp) 4635 if (!gdp) {
4636 err = -EIO;
4427 goto error_return; 4637 goto error_return;
4638 }
4428 4639
4429 if (in_range(ext4_block_bitmap(sb, gdp), block, count) || 4640 if (in_range(ext4_block_bitmap(sb, gdp), block, count) ||
4430 in_range(ext4_inode_bitmap(sb, gdp), block, count) || 4641 in_range(ext4_inode_bitmap(sb, gdp), block, count) ||
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index bfe6add46bcf..c7c9906c2a75 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -164,11 +164,17 @@ struct ext4_free_extent {
164 * Locality group: 164 * Locality group:
165 * we try to group all related changes together 165 * we try to group all related changes together
166 * so that writeback can flush/allocate them together as well 166 * so that writeback can flush/allocate them together as well
167 * Size of lg_prealloc_list hash is determined by MB_DEFAULT_GROUP_PREALLOC
168 * (512). We store prealloc space into the hash based on the pa_free blocks
169 * order value.ie, fls(pa_free)-1;
167 */ 170 */
171#define PREALLOC_TB_SIZE 10
168struct ext4_locality_group { 172struct ext4_locality_group {
169 /* for allocator */ 173 /* for allocator */
170 struct mutex lg_mutex; /* to serialize allocates */ 174 /* to serialize allocates */
171 struct list_head lg_prealloc_list;/* list of preallocations */ 175 struct mutex lg_mutex;
176 /* list of preallocations */
177 struct list_head lg_prealloc_list[PREALLOC_TB_SIZE];
172 spinlock_t lg_prealloc_lock; 178 spinlock_t lg_prealloc_lock;
173}; 179};
174 180
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index b9e077ba07e9..46fc0b5b12ba 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -53,7 +53,8 @@ static int finish_range(handle_t *handle, struct inode *inode,
53 * credit. But below we try to not accumalate too much 53 * credit. But below we try to not accumalate too much
54 * of them by restarting the journal. 54 * of them by restarting the journal.
55 */ 55 */
56 needed = ext4_ext_calc_credits_for_insert(inode, path); 56 needed = ext4_ext_calc_credits_for_single_extent(inode,
57 lb->last_block - lb->first_block + 1, path);
57 58
58 /* 59 /*
59 * Make sure the credit we accumalated is not really high 60 * Make sure the credit we accumalated is not really high
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index f000fbe2cd93..b3d35604ea18 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -73,7 +73,7 @@ static int verify_group_input(struct super_block *sb,
73 "Inode bitmap not in group (block %llu)", 73 "Inode bitmap not in group (block %llu)",
74 (unsigned long long)input->inode_bitmap); 74 (unsigned long long)input->inode_bitmap);
75 else if (outside(input->inode_table, start, end) || 75 else if (outside(input->inode_table, start, end) ||
76 outside(itend - 1, start, end)) 76 outside(itend - 1, start, end))
77 ext4_warning(sb, __func__, 77 ext4_warning(sb, __func__,
78 "Inode table not in group (blocks %llu-%llu)", 78 "Inode table not in group (blocks %llu-%llu)",
79 (unsigned long long)input->inode_table, itend - 1); 79 (unsigned long long)input->inode_table, itend - 1);
@@ -104,7 +104,7 @@ static int verify_group_input(struct super_block *sb,
104 (unsigned long long)input->inode_bitmap, 104 (unsigned long long)input->inode_bitmap,
105 start, metaend - 1); 105 start, metaend - 1);
106 else if (inside(input->inode_table, start, metaend) || 106 else if (inside(input->inode_table, start, metaend) ||
107 inside(itend - 1, start, metaend)) 107 inside(itend - 1, start, metaend))
108 ext4_warning(sb, __func__, 108 ext4_warning(sb, __func__,
109 "Inode table (%llu-%llu) overlaps" 109 "Inode table (%llu-%llu) overlaps"
110 "GDT table (%llu-%llu)", 110 "GDT table (%llu-%llu)",
@@ -158,9 +158,9 @@ static int extend_or_restart_transaction(handle_t *handle, int thresh,
158 if (err) { 158 if (err) {
159 if ((err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA))) 159 if ((err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA)))
160 return err; 160 return err;
161 if ((err = ext4_journal_get_write_access(handle, bh))) 161 if ((err = ext4_journal_get_write_access(handle, bh)))
162 return err; 162 return err;
163 } 163 }
164 164
165 return 0; 165 return 0;
166} 166}
@@ -416,11 +416,11 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
416 "EXT4-fs: ext4_add_new_gdb: adding group block %lu\n", 416 "EXT4-fs: ext4_add_new_gdb: adding group block %lu\n",
417 gdb_num); 417 gdb_num);
418 418
419 /* 419 /*
420 * If we are not using the primary superblock/GDT copy don't resize, 420 * If we are not using the primary superblock/GDT copy don't resize,
421 * because the user tools have no way of handling this. Probably a 421 * because the user tools have no way of handling this. Probably a
422 * bad time to do it anyways. 422 * bad time to do it anyways.
423 */ 423 */
424 if (EXT4_SB(sb)->s_sbh->b_blocknr != 424 if (EXT4_SB(sb)->s_sbh->b_blocknr !=
425 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) { 425 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) {
426 ext4_warning(sb, __func__, 426 ext4_warning(sb, __func__,
@@ -507,14 +507,14 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
507 return 0; 507 return 0;
508 508
509exit_inode: 509exit_inode:
510 //ext4_journal_release_buffer(handle, iloc.bh); 510 /* ext4_journal_release_buffer(handle, iloc.bh); */
511 brelse(iloc.bh); 511 brelse(iloc.bh);
512exit_dindj: 512exit_dindj:
513 //ext4_journal_release_buffer(handle, dind); 513 /* ext4_journal_release_buffer(handle, dind); */
514exit_primary: 514exit_primary:
515 //ext4_journal_release_buffer(handle, *primary); 515 /* ext4_journal_release_buffer(handle, *primary); */
516exit_sbh: 516exit_sbh:
517 //ext4_journal_release_buffer(handle, *primary); 517 /* ext4_journal_release_buffer(handle, *primary); */
518exit_dind: 518exit_dind:
519 brelse(dind); 519 brelse(dind);
520exit_bh: 520exit_bh:
@@ -773,7 +773,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
773 773
774 if (reserved_gdb || gdb_off == 0) { 774 if (reserved_gdb || gdb_off == 0) {
775 if (!EXT4_HAS_COMPAT_FEATURE(sb, 775 if (!EXT4_HAS_COMPAT_FEATURE(sb,
776 EXT4_FEATURE_COMPAT_RESIZE_INODE)){ 776 EXT4_FEATURE_COMPAT_RESIZE_INODE)
777 || !le16_to_cpu(es->s_reserved_gdt_blocks)) {
777 ext4_warning(sb, __func__, 778 ext4_warning(sb, __func__,
778 "No reserved GDT blocks, can't resize"); 779 "No reserved GDT blocks, can't resize");
779 return -EPERM; 780 return -EPERM;
@@ -818,12 +819,12 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
818 if ((err = ext4_journal_get_write_access(handle, sbi->s_sbh))) 819 if ((err = ext4_journal_get_write_access(handle, sbi->s_sbh)))
819 goto exit_journal; 820 goto exit_journal;
820 821
821 /* 822 /*
822 * We will only either add reserved group blocks to a backup group 823 * We will only either add reserved group blocks to a backup group
823 * or remove reserved blocks for the first group in a new group block. 824 * or remove reserved blocks for the first group in a new group block.
824 * Doing both would be mean more complex code, and sane people don't 825 * Doing both would be mean more complex code, and sane people don't
825 * use non-sparse filesystems anymore. This is already checked above. 826 * use non-sparse filesystems anymore. This is already checked above.
826 */ 827 */
827 if (gdb_off) { 828 if (gdb_off) {
828 primary = sbi->s_group_desc[gdb_num]; 829 primary = sbi->s_group_desc[gdb_num];
829 if ((err = ext4_journal_get_write_access(handle, primary))) 830 if ((err = ext4_journal_get_write_access(handle, primary)))
@@ -835,24 +836,24 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
835 } else if ((err = add_new_gdb(handle, inode, input, &primary))) 836 } else if ((err = add_new_gdb(handle, inode, input, &primary)))
836 goto exit_journal; 837 goto exit_journal;
837 838
838 /* 839 /*
839 * OK, now we've set up the new group. Time to make it active. 840 * OK, now we've set up the new group. Time to make it active.
840 * 841 *
841 * Current kernels don't lock all allocations via lock_super(), 842 * Current kernels don't lock all allocations via lock_super(),
842 * so we have to be safe wrt. concurrent accesses the group 843 * so we have to be safe wrt. concurrent accesses the group
843 * data. So we need to be careful to set all of the relevant 844 * data. So we need to be careful to set all of the relevant
844 * group descriptor data etc. *before* we enable the group. 845 * group descriptor data etc. *before* we enable the group.
845 * 846 *
846 * The key field here is sbi->s_groups_count: as long as 847 * The key field here is sbi->s_groups_count: as long as
847 * that retains its old value, nobody is going to access the new 848 * that retains its old value, nobody is going to access the new
848 * group. 849 * group.
849 * 850 *
850 * So first we update all the descriptor metadata for the new 851 * So first we update all the descriptor metadata for the new
851 * group; then we update the total disk blocks count; then we 852 * group; then we update the total disk blocks count; then we
852 * update the groups count to enable the group; then finally we 853 * update the groups count to enable the group; then finally we
853 * update the free space counts so that the system can start 854 * update the free space counts so that the system can start
854 * using the new disk blocks. 855 * using the new disk blocks.
855 */ 856 */
856 857
857 /* Update group descriptor block for new group */ 858 /* Update group descriptor block for new group */
858 gdp = (struct ext4_group_desc *)((char *)primary->b_data + 859 gdp = (struct ext4_group_desc *)((char *)primary->b_data +
@@ -946,7 +947,8 @@ exit_put:
946 return err; 947 return err;
947} /* ext4_group_add */ 948} /* ext4_group_add */
948 949
949/* Extend the filesystem to the new number of blocks specified. This entry 950/*
951 * Extend the filesystem to the new number of blocks specified. This entry
950 * point is only used to extend the current filesystem to the end of the last 952 * point is only used to extend the current filesystem to the end of the last
951 * existing group. It can be accessed via ioctl, or by "remount,resize=<size>" 953 * existing group. It can be accessed via ioctl, or by "remount,resize=<size>"
952 * for emergencies (because it has no dependencies on reserved blocks). 954 * for emergencies (because it has no dependencies on reserved blocks).
@@ -1024,7 +1026,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1024 o_blocks_count + add, add); 1026 o_blocks_count + add, add);
1025 1027
1026 /* See if the device is actually as big as what was requested */ 1028 /* See if the device is actually as big as what was requested */
1027 bh = sb_bread(sb, o_blocks_count + add -1); 1029 bh = sb_bread(sb, o_blocks_count + add - 1);
1028 if (!bh) { 1030 if (!bh) {
1029 ext4_warning(sb, __func__, 1031 ext4_warning(sb, __func__,
1030 "can't read last block, resize aborted"); 1032 "can't read last block, resize aborted");
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index b5479b1dff14..566344b926b7 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -49,20 +49,19 @@ static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
49 unsigned long journal_devnum); 49 unsigned long journal_devnum);
50static int ext4_create_journal(struct super_block *, struct ext4_super_block *, 50static int ext4_create_journal(struct super_block *, struct ext4_super_block *,
51 unsigned int); 51 unsigned int);
52static void ext4_commit_super (struct super_block * sb, 52static void ext4_commit_super(struct super_block *sb,
53 struct ext4_super_block * es, 53 struct ext4_super_block *es, int sync);
54 int sync); 54static void ext4_mark_recovery_complete(struct super_block *sb,
55static void ext4_mark_recovery_complete(struct super_block * sb, 55 struct ext4_super_block *es);
56 struct ext4_super_block * es); 56static void ext4_clear_journal_err(struct super_block *sb,
57static void ext4_clear_journal_err(struct super_block * sb, 57 struct ext4_super_block *es);
58 struct ext4_super_block * es);
59static int ext4_sync_fs(struct super_block *sb, int wait); 58static int ext4_sync_fs(struct super_block *sb, int wait);
60static const char *ext4_decode_error(struct super_block * sb, int errno, 59static const char *ext4_decode_error(struct super_block *sb, int errno,
61 char nbuf[16]); 60 char nbuf[16]);
62static int ext4_remount (struct super_block * sb, int * flags, char * data); 61static int ext4_remount(struct super_block *sb, int *flags, char *data);
63static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf); 62static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
64static void ext4_unlockfs(struct super_block *sb); 63static void ext4_unlockfs(struct super_block *sb);
65static void ext4_write_super (struct super_block * sb); 64static void ext4_write_super(struct super_block *sb);
66static void ext4_write_super_lockfs(struct super_block *sb); 65static void ext4_write_super_lockfs(struct super_block *sb);
67 66
68 67
@@ -211,15 +210,15 @@ static void ext4_handle_error(struct super_block *sb)
211 if (sb->s_flags & MS_RDONLY) 210 if (sb->s_flags & MS_RDONLY)
212 return; 211 return;
213 212
214 if (!test_opt (sb, ERRORS_CONT)) { 213 if (!test_opt(sb, ERRORS_CONT)) {
215 journal_t *journal = EXT4_SB(sb)->s_journal; 214 journal_t *journal = EXT4_SB(sb)->s_journal;
216 215
217 EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT; 216 EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT;
218 if (journal) 217 if (journal)
219 jbd2_journal_abort(journal, -EIO); 218 jbd2_journal_abort(journal, -EIO);
220 } 219 }
221 if (test_opt (sb, ERRORS_RO)) { 220 if (test_opt(sb, ERRORS_RO)) {
222 printk (KERN_CRIT "Remounting filesystem read-only\n"); 221 printk(KERN_CRIT "Remounting filesystem read-only\n");
223 sb->s_flags |= MS_RDONLY; 222 sb->s_flags |= MS_RDONLY;
224 } 223 }
225 ext4_commit_super(sb, es, 1); 224 ext4_commit_super(sb, es, 1);
@@ -228,13 +227,13 @@ static void ext4_handle_error(struct super_block *sb)
228 sb->s_id); 227 sb->s_id);
229} 228}
230 229
231void ext4_error (struct super_block * sb, const char * function, 230void ext4_error(struct super_block *sb, const char *function,
232 const char * fmt, ...) 231 const char *fmt, ...)
233{ 232{
234 va_list args; 233 va_list args;
235 234
236 va_start(args, fmt); 235 va_start(args, fmt);
237 printk(KERN_CRIT "EXT4-fs error (device %s): %s: ",sb->s_id, function); 236 printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
238 vprintk(fmt, args); 237 vprintk(fmt, args);
239 printk("\n"); 238 printk("\n");
240 va_end(args); 239 va_end(args);
@@ -242,7 +241,7 @@ void ext4_error (struct super_block * sb, const char * function,
242 ext4_handle_error(sb); 241 ext4_handle_error(sb);
243} 242}
244 243
245static const char *ext4_decode_error(struct super_block * sb, int errno, 244static const char *ext4_decode_error(struct super_block *sb, int errno,
246 char nbuf[16]) 245 char nbuf[16])
247{ 246{
248 char *errstr = NULL; 247 char *errstr = NULL;
@@ -278,8 +277,7 @@ static const char *ext4_decode_error(struct super_block * sb, int errno,
278/* __ext4_std_error decodes expected errors from journaling functions 277/* __ext4_std_error decodes expected errors from journaling functions
279 * automatically and invokes the appropriate error response. */ 278 * automatically and invokes the appropriate error response. */
280 279
281void __ext4_std_error (struct super_block * sb, const char * function, 280void __ext4_std_error(struct super_block *sb, const char *function, int errno)
282 int errno)
283{ 281{
284 char nbuf[16]; 282 char nbuf[16];
285 const char *errstr; 283 const char *errstr;
@@ -292,8 +290,8 @@ void __ext4_std_error (struct super_block * sb, const char * function,
292 return; 290 return;
293 291
294 errstr = ext4_decode_error(sb, errno, nbuf); 292 errstr = ext4_decode_error(sb, errno, nbuf);
295 printk (KERN_CRIT "EXT4-fs error (device %s) in %s: %s\n", 293 printk(KERN_CRIT "EXT4-fs error (device %s) in %s: %s\n",
296 sb->s_id, function, errstr); 294 sb->s_id, function, errstr);
297 295
298 ext4_handle_error(sb); 296 ext4_handle_error(sb);
299} 297}
@@ -308,15 +306,15 @@ void __ext4_std_error (struct super_block * sb, const char * function,
308 * case we take the easy way out and panic immediately. 306 * case we take the easy way out and panic immediately.
309 */ 307 */
310 308
311void ext4_abort (struct super_block * sb, const char * function, 309void ext4_abort(struct super_block *sb, const char *function,
312 const char * fmt, ...) 310 const char *fmt, ...)
313{ 311{
314 va_list args; 312 va_list args;
315 313
316 printk (KERN_CRIT "ext4_abort called.\n"); 314 printk(KERN_CRIT "ext4_abort called.\n");
317 315
318 va_start(args, fmt); 316 va_start(args, fmt);
319 printk(KERN_CRIT "EXT4-fs error (device %s): %s: ",sb->s_id, function); 317 printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
320 vprintk(fmt, args); 318 vprintk(fmt, args);
321 printk("\n"); 319 printk("\n");
322 va_end(args); 320 va_end(args);
@@ -334,8 +332,8 @@ void ext4_abort (struct super_block * sb, const char * function,
334 jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); 332 jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
335} 333}
336 334
337void ext4_warning (struct super_block * sb, const char * function, 335void ext4_warning(struct super_block *sb, const char *function,
338 const char * fmt, ...) 336 const char *fmt, ...)
339{ 337{
340 va_list args; 338 va_list args;
341 339
@@ -496,7 +494,7 @@ static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi)
496 } 494 }
497} 495}
498 496
499static void ext4_put_super (struct super_block * sb) 497static void ext4_put_super(struct super_block *sb)
500{ 498{
501 struct ext4_sb_info *sbi = EXT4_SB(sb); 499 struct ext4_sb_info *sbi = EXT4_SB(sb);
502 struct ext4_super_block *es = sbi->s_es; 500 struct ext4_super_block *es = sbi->s_es;
@@ -570,6 +568,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
570#endif 568#endif
571 ei->i_block_alloc_info = NULL; 569 ei->i_block_alloc_info = NULL;
572 ei->vfs_inode.i_version = 1; 570 ei->vfs_inode.i_version = 1;
571 ei->vfs_inode.i_data.writeback_index = 0;
573 memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); 572 memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
574 INIT_LIST_HEAD(&ei->i_prealloc_list); 573 INIT_LIST_HEAD(&ei->i_prealloc_list);
575 spin_lock_init(&ei->i_prealloc_lock); 574 spin_lock_init(&ei->i_prealloc_lock);
@@ -647,7 +646,8 @@ static void ext4_clear_inode(struct inode *inode)
647 &EXT4_I(inode)->jinode); 646 &EXT4_I(inode)->jinode);
648} 647}
649 648
650static inline void ext4_show_quota_options(struct seq_file *seq, struct super_block *sb) 649static inline void ext4_show_quota_options(struct seq_file *seq,
650 struct super_block *sb)
651{ 651{
652#if defined(CONFIG_QUOTA) 652#if defined(CONFIG_QUOTA)
653 struct ext4_sb_info *sbi = EXT4_SB(sb); 653 struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -822,8 +822,8 @@ static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
822} 822}
823 823
824#ifdef CONFIG_QUOTA 824#ifdef CONFIG_QUOTA
825#define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group") 825#define QTYPE2NAME(t) ((t) == USRQUOTA?"user":"group")
826#define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) 826#define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
827 827
828static int ext4_dquot_initialize(struct inode *inode, int type); 828static int ext4_dquot_initialize(struct inode *inode, int type);
829static int ext4_dquot_drop(struct inode *inode); 829static int ext4_dquot_drop(struct inode *inode);
@@ -991,12 +991,12 @@ static ext4_fsblk_t get_sb_block(void **data)
991 return sb_block; 991 return sb_block;
992} 992}
993 993
994static int parse_options (char *options, struct super_block *sb, 994static int parse_options(char *options, struct super_block *sb,
995 unsigned int *inum, unsigned long *journal_devnum, 995 unsigned int *inum, unsigned long *journal_devnum,
996 ext4_fsblk_t *n_blocks_count, int is_remount) 996 ext4_fsblk_t *n_blocks_count, int is_remount)
997{ 997{
998 struct ext4_sb_info *sbi = EXT4_SB(sb); 998 struct ext4_sb_info *sbi = EXT4_SB(sb);
999 char * p; 999 char *p;
1000 substring_t args[MAX_OPT_ARGS]; 1000 substring_t args[MAX_OPT_ARGS];
1001 int data_opt = 0; 1001 int data_opt = 0;
1002 int option; 1002 int option;
@@ -1009,7 +1009,7 @@ static int parse_options (char *options, struct super_block *sb,
1009 if (!options) 1009 if (!options)
1010 return 1; 1010 return 1;
1011 1011
1012 while ((p = strsep (&options, ",")) != NULL) { 1012 while ((p = strsep(&options, ",")) != NULL) {
1013 int token; 1013 int token;
1014 if (!*p) 1014 if (!*p)
1015 continue; 1015 continue;
@@ -1017,16 +1017,16 @@ static int parse_options (char *options, struct super_block *sb,
1017 token = match_token(p, tokens, args); 1017 token = match_token(p, tokens, args);
1018 switch (token) { 1018 switch (token) {
1019 case Opt_bsd_df: 1019 case Opt_bsd_df:
1020 clear_opt (sbi->s_mount_opt, MINIX_DF); 1020 clear_opt(sbi->s_mount_opt, MINIX_DF);
1021 break; 1021 break;
1022 case Opt_minix_df: 1022 case Opt_minix_df:
1023 set_opt (sbi->s_mount_opt, MINIX_DF); 1023 set_opt(sbi->s_mount_opt, MINIX_DF);
1024 break; 1024 break;
1025 case Opt_grpid: 1025 case Opt_grpid:
1026 set_opt (sbi->s_mount_opt, GRPID); 1026 set_opt(sbi->s_mount_opt, GRPID);
1027 break; 1027 break;
1028 case Opt_nogrpid: 1028 case Opt_nogrpid:
1029 clear_opt (sbi->s_mount_opt, GRPID); 1029 clear_opt(sbi->s_mount_opt, GRPID);
1030 break; 1030 break;
1031 case Opt_resuid: 1031 case Opt_resuid:
1032 if (match_int(&args[0], &option)) 1032 if (match_int(&args[0], &option))
@@ -1043,41 +1043,41 @@ static int parse_options (char *options, struct super_block *sb,
1043 /* *sb_block = match_int(&args[0]); */ 1043 /* *sb_block = match_int(&args[0]); */
1044 break; 1044 break;
1045 case Opt_err_panic: 1045 case Opt_err_panic:
1046 clear_opt (sbi->s_mount_opt, ERRORS_CONT); 1046 clear_opt(sbi->s_mount_opt, ERRORS_CONT);
1047 clear_opt (sbi->s_mount_opt, ERRORS_RO); 1047 clear_opt(sbi->s_mount_opt, ERRORS_RO);
1048 set_opt (sbi->s_mount_opt, ERRORS_PANIC); 1048 set_opt(sbi->s_mount_opt, ERRORS_PANIC);
1049 break; 1049 break;
1050 case Opt_err_ro: 1050 case Opt_err_ro:
1051 clear_opt (sbi->s_mount_opt, ERRORS_CONT); 1051 clear_opt(sbi->s_mount_opt, ERRORS_CONT);
1052 clear_opt (sbi->s_mount_opt, ERRORS_PANIC); 1052 clear_opt(sbi->s_mount_opt, ERRORS_PANIC);
1053 set_opt (sbi->s_mount_opt, ERRORS_RO); 1053 set_opt(sbi->s_mount_opt, ERRORS_RO);
1054 break; 1054 break;
1055 case Opt_err_cont: 1055 case Opt_err_cont:
1056 clear_opt (sbi->s_mount_opt, ERRORS_RO); 1056 clear_opt(sbi->s_mount_opt, ERRORS_RO);
1057 clear_opt (sbi->s_mount_opt, ERRORS_PANIC); 1057 clear_opt(sbi->s_mount_opt, ERRORS_PANIC);
1058 set_opt (sbi->s_mount_opt, ERRORS_CONT); 1058 set_opt(sbi->s_mount_opt, ERRORS_CONT);
1059 break; 1059 break;
1060 case Opt_nouid32: 1060 case Opt_nouid32:
1061 set_opt (sbi->s_mount_opt, NO_UID32); 1061 set_opt(sbi->s_mount_opt, NO_UID32);
1062 break; 1062 break;
1063 case Opt_nocheck: 1063 case Opt_nocheck:
1064 clear_opt (sbi->s_mount_opt, CHECK); 1064 clear_opt(sbi->s_mount_opt, CHECK);
1065 break; 1065 break;
1066 case Opt_debug: 1066 case Opt_debug:
1067 set_opt (sbi->s_mount_opt, DEBUG); 1067 set_opt(sbi->s_mount_opt, DEBUG);
1068 break; 1068 break;
1069 case Opt_oldalloc: 1069 case Opt_oldalloc:
1070 set_opt (sbi->s_mount_opt, OLDALLOC); 1070 set_opt(sbi->s_mount_opt, OLDALLOC);
1071 break; 1071 break;
1072 case Opt_orlov: 1072 case Opt_orlov:
1073 clear_opt (sbi->s_mount_opt, OLDALLOC); 1073 clear_opt(sbi->s_mount_opt, OLDALLOC);
1074 break; 1074 break;
1075#ifdef CONFIG_EXT4DEV_FS_XATTR 1075#ifdef CONFIG_EXT4DEV_FS_XATTR
1076 case Opt_user_xattr: 1076 case Opt_user_xattr:
1077 set_opt (sbi->s_mount_opt, XATTR_USER); 1077 set_opt(sbi->s_mount_opt, XATTR_USER);
1078 break; 1078 break;
1079 case Opt_nouser_xattr: 1079 case Opt_nouser_xattr:
1080 clear_opt (sbi->s_mount_opt, XATTR_USER); 1080 clear_opt(sbi->s_mount_opt, XATTR_USER);
1081 break; 1081 break;
1082#else 1082#else
1083 case Opt_user_xattr: 1083 case Opt_user_xattr:
@@ -1115,7 +1115,7 @@ static int parse_options (char *options, struct super_block *sb,
1115 "journal on remount\n"); 1115 "journal on remount\n");
1116 return 0; 1116 return 0;
1117 } 1117 }
1118 set_opt (sbi->s_mount_opt, UPDATE_JOURNAL); 1118 set_opt(sbi->s_mount_opt, UPDATE_JOURNAL);
1119 break; 1119 break;
1120 case Opt_journal_inum: 1120 case Opt_journal_inum:
1121 if (is_remount) { 1121 if (is_remount) {
@@ -1145,7 +1145,7 @@ static int parse_options (char *options, struct super_block *sb,
1145 set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM); 1145 set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM);
1146 break; 1146 break;
1147 case Opt_noload: 1147 case Opt_noload:
1148 set_opt (sbi->s_mount_opt, NOLOAD); 1148 set_opt(sbi->s_mount_opt, NOLOAD);
1149 break; 1149 break;
1150 case Opt_commit: 1150 case Opt_commit:
1151 if (match_int(&args[0], &option)) 1151 if (match_int(&args[0], &option))
@@ -1331,7 +1331,7 @@ set_qf_format:
1331 "on this filesystem, use tune2fs\n"); 1331 "on this filesystem, use tune2fs\n");
1332 return 0; 1332 return 0;
1333 } 1333 }
1334 set_opt (sbi->s_mount_opt, EXTENTS); 1334 set_opt(sbi->s_mount_opt, EXTENTS);
1335 break; 1335 break;
1336 case Opt_noextents: 1336 case Opt_noextents:
1337 /* 1337 /*
@@ -1348,7 +1348,7 @@ set_qf_format:
1348 "-o noextents options\n"); 1348 "-o noextents options\n");
1349 return 0; 1349 return 0;
1350 } 1350 }
1351 clear_opt (sbi->s_mount_opt, EXTENTS); 1351 clear_opt(sbi->s_mount_opt, EXTENTS);
1352 break; 1352 break;
1353 case Opt_i_version: 1353 case Opt_i_version:
1354 set_opt(sbi->s_mount_opt, I_VERSION); 1354 set_opt(sbi->s_mount_opt, I_VERSION);
@@ -1374,9 +1374,9 @@ set_qf_format:
1374 set_opt(sbi->s_mount_opt, DELALLOC); 1374 set_opt(sbi->s_mount_opt, DELALLOC);
1375 break; 1375 break;
1376 default: 1376 default:
1377 printk (KERN_ERR 1377 printk(KERN_ERR
1378 "EXT4-fs: Unrecognized mount option \"%s\" " 1378 "EXT4-fs: Unrecognized mount option \"%s\" "
1379 "or missing value\n", p); 1379 "or missing value\n", p);
1380 return 0; 1380 return 0;
1381 } 1381 }
1382 } 1382 }
@@ -1423,31 +1423,31 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
1423 int res = 0; 1423 int res = 0;
1424 1424
1425 if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) { 1425 if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {
1426 printk (KERN_ERR "EXT4-fs warning: revision level too high, " 1426 printk(KERN_ERR "EXT4-fs warning: revision level too high, "
1427 "forcing read-only mode\n"); 1427 "forcing read-only mode\n");
1428 res = MS_RDONLY; 1428 res = MS_RDONLY;
1429 } 1429 }
1430 if (read_only) 1430 if (read_only)
1431 return res; 1431 return res;
1432 if (!(sbi->s_mount_state & EXT4_VALID_FS)) 1432 if (!(sbi->s_mount_state & EXT4_VALID_FS))
1433 printk (KERN_WARNING "EXT4-fs warning: mounting unchecked fs, " 1433 printk(KERN_WARNING "EXT4-fs warning: mounting unchecked fs, "
1434 "running e2fsck is recommended\n"); 1434 "running e2fsck is recommended\n");
1435 else if ((sbi->s_mount_state & EXT4_ERROR_FS)) 1435 else if ((sbi->s_mount_state & EXT4_ERROR_FS))
1436 printk (KERN_WARNING 1436 printk(KERN_WARNING
1437 "EXT4-fs warning: mounting fs with errors, " 1437 "EXT4-fs warning: mounting fs with errors, "
1438 "running e2fsck is recommended\n"); 1438 "running e2fsck is recommended\n");
1439 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 && 1439 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 &&
1440 le16_to_cpu(es->s_mnt_count) >= 1440 le16_to_cpu(es->s_mnt_count) >=
1441 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count)) 1441 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
1442 printk (KERN_WARNING 1442 printk(KERN_WARNING
1443 "EXT4-fs warning: maximal mount count reached, " 1443 "EXT4-fs warning: maximal mount count reached, "
1444 "running e2fsck is recommended\n"); 1444 "running e2fsck is recommended\n");
1445 else if (le32_to_cpu(es->s_checkinterval) && 1445 else if (le32_to_cpu(es->s_checkinterval) &&
1446 (le32_to_cpu(es->s_lastcheck) + 1446 (le32_to_cpu(es->s_lastcheck) +
1447 le32_to_cpu(es->s_checkinterval) <= get_seconds())) 1447 le32_to_cpu(es->s_checkinterval) <= get_seconds()))
1448 printk (KERN_WARNING 1448 printk(KERN_WARNING
1449 "EXT4-fs warning: checktime reached, " 1449 "EXT4-fs warning: checktime reached, "
1450 "running e2fsck is recommended\n"); 1450 "running e2fsck is recommended\n");
1451#if 0 1451#if 0
1452 /* @@@ We _will_ want to clear the valid bit if we find 1452 /* @@@ We _will_ want to clear the valid bit if we find
1453 * inconsistencies, to force a fsck at reboot. But for 1453 * inconsistencies, to force a fsck at reboot. But for
@@ -1506,14 +1506,13 @@ static int ext4_fill_flex_info(struct super_block *sb)
1506 1506
1507 flex_group_count = (sbi->s_groups_count + groups_per_flex - 1) / 1507 flex_group_count = (sbi->s_groups_count + groups_per_flex - 1) /
1508 groups_per_flex; 1508 groups_per_flex;
1509 sbi->s_flex_groups = kmalloc(flex_group_count * 1509 sbi->s_flex_groups = kzalloc(flex_group_count *
1510 sizeof(struct flex_groups), GFP_KERNEL); 1510 sizeof(struct flex_groups), GFP_KERNEL);
1511 if (sbi->s_flex_groups == NULL) { 1511 if (sbi->s_flex_groups == NULL) {
1512 printk(KERN_ERR "EXT4-fs: not enough memory\n"); 1512 printk(KERN_ERR "EXT4-fs: not enough memory for "
1513 "%lu flex groups\n", flex_group_count);
1513 goto failed; 1514 goto failed;
1514 } 1515 }
1515 memset(sbi->s_flex_groups, 0, flex_group_count *
1516 sizeof(struct flex_groups));
1517 1516
1518 gdp = ext4_get_group_desc(sb, 1, &bh); 1517 gdp = ext4_get_group_desc(sb, 1, &bh);
1519 block_bitmap = ext4_block_bitmap(sb, gdp) - 1; 1518 block_bitmap = ext4_block_bitmap(sb, gdp) - 1;
@@ -1597,16 +1596,14 @@ static int ext4_check_descriptors(struct super_block *sb)
1597 (EXT4_BLOCKS_PER_GROUP(sb) - 1); 1596 (EXT4_BLOCKS_PER_GROUP(sb) - 1);
1598 1597
1599 block_bitmap = ext4_block_bitmap(sb, gdp); 1598 block_bitmap = ext4_block_bitmap(sb, gdp);
1600 if (block_bitmap < first_block || block_bitmap > last_block) 1599 if (block_bitmap < first_block || block_bitmap > last_block) {
1601 {
1602 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " 1600 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1603 "Block bitmap for group %lu not in group " 1601 "Block bitmap for group %lu not in group "
1604 "(block %llu)!", i, block_bitmap); 1602 "(block %llu)!", i, block_bitmap);
1605 return 0; 1603 return 0;
1606 } 1604 }
1607 inode_bitmap = ext4_inode_bitmap(sb, gdp); 1605 inode_bitmap = ext4_inode_bitmap(sb, gdp);
1608 if (inode_bitmap < first_block || inode_bitmap > last_block) 1606 if (inode_bitmap < first_block || inode_bitmap > last_block) {
1609 {
1610 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " 1607 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1611 "Inode bitmap for group %lu not in group " 1608 "Inode bitmap for group %lu not in group "
1612 "(block %llu)!", i, inode_bitmap); 1609 "(block %llu)!", i, inode_bitmap);
@@ -1614,26 +1611,28 @@ static int ext4_check_descriptors(struct super_block *sb)
1614 } 1611 }
1615 inode_table = ext4_inode_table(sb, gdp); 1612 inode_table = ext4_inode_table(sb, gdp);
1616 if (inode_table < first_block || 1613 if (inode_table < first_block ||
1617 inode_table + sbi->s_itb_per_group - 1 > last_block) 1614 inode_table + sbi->s_itb_per_group - 1 > last_block) {
1618 {
1619 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " 1615 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1620 "Inode table for group %lu not in group " 1616 "Inode table for group %lu not in group "
1621 "(block %llu)!", i, inode_table); 1617 "(block %llu)!", i, inode_table);
1622 return 0; 1618 return 0;
1623 } 1619 }
1620 spin_lock(sb_bgl_lock(sbi, i));
1624 if (!ext4_group_desc_csum_verify(sbi, i, gdp)) { 1621 if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
1625 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " 1622 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1626 "Checksum for group %lu failed (%u!=%u)\n", 1623 "Checksum for group %lu failed (%u!=%u)\n",
1627 i, le16_to_cpu(ext4_group_desc_csum(sbi, i, 1624 i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
1628 gdp)), le16_to_cpu(gdp->bg_checksum)); 1625 gdp)), le16_to_cpu(gdp->bg_checksum));
1629 return 0; 1626 if (!(sb->s_flags & MS_RDONLY))
1627 return 0;
1630 } 1628 }
1629 spin_unlock(sb_bgl_lock(sbi, i));
1631 if (!flexbg_flag) 1630 if (!flexbg_flag)
1632 first_block += EXT4_BLOCKS_PER_GROUP(sb); 1631 first_block += EXT4_BLOCKS_PER_GROUP(sb);
1633 } 1632 }
1634 1633
1635 ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb)); 1634 ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb));
1636 sbi->s_es->s_free_inodes_count=cpu_to_le32(ext4_count_free_inodes(sb)); 1635 sbi->s_es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb));
1637 return 1; 1636 return 1;
1638} 1637}
1639 1638
@@ -1654,8 +1653,8 @@ static int ext4_check_descriptors(struct super_block *sb)
1654 * e2fsck was run on this filesystem, and it must have already done the orphan 1653 * e2fsck was run on this filesystem, and it must have already done the orphan
1655 * inode cleanup for us, so we can safely abort without any further action. 1654 * inode cleanup for us, so we can safely abort without any further action.
1656 */ 1655 */
1657static void ext4_orphan_cleanup (struct super_block * sb, 1656static void ext4_orphan_cleanup(struct super_block *sb,
1658 struct ext4_super_block * es) 1657 struct ext4_super_block *es)
1659{ 1658{
1660 unsigned int s_flags = sb->s_flags; 1659 unsigned int s_flags = sb->s_flags;
1661 int nr_orphans = 0, nr_truncates = 0; 1660 int nr_orphans = 0, nr_truncates = 0;
@@ -1732,7 +1731,7 @@ static void ext4_orphan_cleanup (struct super_block * sb,
1732 iput(inode); /* The delete magic happens here! */ 1731 iput(inode); /* The delete magic happens here! */
1733 } 1732 }
1734 1733
1735#define PLURAL(x) (x), ((x)==1) ? "" : "s" 1734#define PLURAL(x) (x), ((x) == 1) ? "" : "s"
1736 1735
1737 if (nr_orphans) 1736 if (nr_orphans)
1738 printk(KERN_INFO "EXT4-fs: %s: %d orphan inode%s deleted\n", 1737 printk(KERN_INFO "EXT4-fs: %s: %d orphan inode%s deleted\n",
@@ -1899,12 +1898,12 @@ static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
1899 return 0; 1898 return 0;
1900} 1899}
1901 1900
1902static int ext4_fill_super (struct super_block *sb, void *data, int silent) 1901static int ext4_fill_super(struct super_block *sb, void *data, int silent)
1903 __releases(kernel_lock) 1902 __releases(kernel_lock)
1904 __acquires(kernel_lock) 1903 __acquires(kernel_lock)
1905 1904
1906{ 1905{
1907 struct buffer_head * bh; 1906 struct buffer_head *bh;
1908 struct ext4_super_block *es = NULL; 1907 struct ext4_super_block *es = NULL;
1909 struct ext4_sb_info *sbi; 1908 struct ext4_sb_info *sbi;
1910 ext4_fsblk_t block; 1909 ext4_fsblk_t block;
@@ -1953,7 +1952,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
1953 } 1952 }
1954 1953
1955 if (!(bh = sb_bread(sb, logical_sb_block))) { 1954 if (!(bh = sb_bread(sb, logical_sb_block))) {
1956 printk (KERN_ERR "EXT4-fs: unable to read superblock\n"); 1955 printk(KERN_ERR "EXT4-fs: unable to read superblock\n");
1957 goto out_fail; 1956 goto out_fail;
1958 } 1957 }
1959 /* 1958 /*
@@ -2026,8 +2025,8 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
2026 set_opt(sbi->s_mount_opt, DELALLOC); 2025 set_opt(sbi->s_mount_opt, DELALLOC);
2027 2026
2028 2027
2029 if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum, 2028 if (!parse_options((char *) data, sb, &journal_inum, &journal_devnum,
2030 NULL, 0)) 2029 NULL, 0))
2031 goto failed_mount; 2030 goto failed_mount;
2032 2031
2033 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 2032 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
@@ -2102,7 +2101,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
2102 goto failed_mount; 2101 goto failed_mount;
2103 } 2102 }
2104 2103
2105 brelse (bh); 2104 brelse(bh);
2106 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; 2105 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
2107 offset = do_div(logical_sb_block, blocksize); 2106 offset = do_div(logical_sb_block, blocksize);
2108 bh = sb_bread(sb, logical_sb_block); 2107 bh = sb_bread(sb, logical_sb_block);
@@ -2114,8 +2113,8 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
2114 es = (struct ext4_super_block *)(((char *)bh->b_data) + offset); 2113 es = (struct ext4_super_block *)(((char *)bh->b_data) + offset);
2115 sbi->s_es = es; 2114 sbi->s_es = es;
2116 if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) { 2115 if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
2117 printk (KERN_ERR 2116 printk(KERN_ERR
2118 "EXT4-fs: Magic mismatch, very weird !\n"); 2117 "EXT4-fs: Magic mismatch, very weird !\n");
2119 goto failed_mount; 2118 goto failed_mount;
2120 } 2119 }
2121 } 2120 }
@@ -2132,9 +2131,9 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
2132 if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) || 2131 if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
2133 (!is_power_of_2(sbi->s_inode_size)) || 2132 (!is_power_of_2(sbi->s_inode_size)) ||
2134 (sbi->s_inode_size > blocksize)) { 2133 (sbi->s_inode_size > blocksize)) {
2135 printk (KERN_ERR 2134 printk(KERN_ERR
2136 "EXT4-fs: unsupported inode size: %d\n", 2135 "EXT4-fs: unsupported inode size: %d\n",
2137 sbi->s_inode_size); 2136 sbi->s_inode_size);
2138 goto failed_mount; 2137 goto failed_mount;
2139 } 2138 }
2140 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) 2139 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE)
@@ -2166,20 +2165,20 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
2166 sbi->s_mount_state = le16_to_cpu(es->s_state); 2165 sbi->s_mount_state = le16_to_cpu(es->s_state);
2167 sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb)); 2166 sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
2168 sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb)); 2167 sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
2169 for (i=0; i < 4; i++) 2168 for (i = 0; i < 4; i++)
2170 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); 2169 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
2171 sbi->s_def_hash_version = es->s_def_hash_version; 2170 sbi->s_def_hash_version = es->s_def_hash_version;
2172 2171
2173 if (sbi->s_blocks_per_group > blocksize * 8) { 2172 if (sbi->s_blocks_per_group > blocksize * 8) {
2174 printk (KERN_ERR 2173 printk(KERN_ERR
2175 "EXT4-fs: #blocks per group too big: %lu\n", 2174 "EXT4-fs: #blocks per group too big: %lu\n",
2176 sbi->s_blocks_per_group); 2175 sbi->s_blocks_per_group);
2177 goto failed_mount; 2176 goto failed_mount;
2178 } 2177 }
2179 if (sbi->s_inodes_per_group > blocksize * 8) { 2178 if (sbi->s_inodes_per_group > blocksize * 8) {
2180 printk (KERN_ERR 2179 printk(KERN_ERR
2181 "EXT4-fs: #inodes per group too big: %lu\n", 2180 "EXT4-fs: #inodes per group too big: %lu\n",
2182 sbi->s_inodes_per_group); 2181 sbi->s_inodes_per_group);
2183 goto failed_mount; 2182 goto failed_mount;
2184 } 2183 }
2185 2184
@@ -2213,10 +2212,10 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
2213 sbi->s_groups_count = blocks_count; 2212 sbi->s_groups_count = blocks_count;
2214 db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / 2213 db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
2215 EXT4_DESC_PER_BLOCK(sb); 2214 EXT4_DESC_PER_BLOCK(sb);
2216 sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *), 2215 sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *),
2217 GFP_KERNEL); 2216 GFP_KERNEL);
2218 if (sbi->s_group_desc == NULL) { 2217 if (sbi->s_group_desc == NULL) {
2219 printk (KERN_ERR "EXT4-fs: not enough memory\n"); 2218 printk(KERN_ERR "EXT4-fs: not enough memory\n");
2220 goto failed_mount; 2219 goto failed_mount;
2221 } 2220 }
2222 2221
@@ -2226,13 +2225,13 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
2226 block = descriptor_loc(sb, logical_sb_block, i); 2225 block = descriptor_loc(sb, logical_sb_block, i);
2227 sbi->s_group_desc[i] = sb_bread(sb, block); 2226 sbi->s_group_desc[i] = sb_bread(sb, block);
2228 if (!sbi->s_group_desc[i]) { 2227 if (!sbi->s_group_desc[i]) {
2229 printk (KERN_ERR "EXT4-fs: " 2228 printk(KERN_ERR "EXT4-fs: "
2230 "can't read group descriptor %d\n", i); 2229 "can't read group descriptor %d\n", i);
2231 db_count = i; 2230 db_count = i;
2232 goto failed_mount2; 2231 goto failed_mount2;
2233 } 2232 }
2234 } 2233 }
2235 if (!ext4_check_descriptors (sb)) { 2234 if (!ext4_check_descriptors(sb)) {
2236 printk(KERN_ERR "EXT4-fs: group descriptors corrupted!\n"); 2235 printk(KERN_ERR "EXT4-fs: group descriptors corrupted!\n");
2237 goto failed_mount2; 2236 goto failed_mount2;
2238 } 2237 }
@@ -2308,11 +2307,11 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
2308 EXT4_SB(sb)->s_journal->j_failed_commit) { 2307 EXT4_SB(sb)->s_journal->j_failed_commit) {
2309 printk(KERN_CRIT "EXT4-fs error (device %s): " 2308 printk(KERN_CRIT "EXT4-fs error (device %s): "
2310 "ext4_fill_super: Journal transaction " 2309 "ext4_fill_super: Journal transaction "
2311 "%u is corrupt\n", sb->s_id, 2310 "%u is corrupt\n", sb->s_id,
2312 EXT4_SB(sb)->s_journal->j_failed_commit); 2311 EXT4_SB(sb)->s_journal->j_failed_commit);
2313 if (test_opt (sb, ERRORS_RO)) { 2312 if (test_opt(sb, ERRORS_RO)) {
2314 printk (KERN_CRIT 2313 printk(KERN_CRIT
2315 "Mounting filesystem read-only\n"); 2314 "Mounting filesystem read-only\n");
2316 sb->s_flags |= MS_RDONLY; 2315 sb->s_flags |= MS_RDONLY;
2317 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 2316 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
2318 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 2317 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
@@ -2332,9 +2331,9 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
2332 goto failed_mount3; 2331 goto failed_mount3;
2333 } else { 2332 } else {
2334 if (!silent) 2333 if (!silent)
2335 printk (KERN_ERR 2334 printk(KERN_ERR
2336 "ext4: No journal on filesystem on %s\n", 2335 "ext4: No journal on filesystem on %s\n",
2337 sb->s_id); 2336 sb->s_id);
2338 goto failed_mount3; 2337 goto failed_mount3;
2339 } 2338 }
2340 2339
@@ -2418,7 +2417,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
2418 goto failed_mount4; 2417 goto failed_mount4;
2419 } 2418 }
2420 2419
2421 ext4_setup_super (sb, es, sb->s_flags & MS_RDONLY); 2420 ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY);
2422 2421
2423 /* determine the minimum size of new large inodes, if present */ 2422 /* determine the minimum size of new large inodes, if present */
2424 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) { 2423 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
@@ -2457,12 +2456,12 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
2457 ext4_orphan_cleanup(sb, es); 2456 ext4_orphan_cleanup(sb, es);
2458 EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS; 2457 EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
2459 if (needs_recovery) 2458 if (needs_recovery)
2460 printk (KERN_INFO "EXT4-fs: recovery complete.\n"); 2459 printk(KERN_INFO "EXT4-fs: recovery complete.\n");
2461 ext4_mark_recovery_complete(sb, es); 2460 ext4_mark_recovery_complete(sb, es);
2462 printk (KERN_INFO "EXT4-fs: mounted filesystem with %s data mode.\n", 2461 printk(KERN_INFO "EXT4-fs: mounted filesystem with %s data mode.\n",
2463 test_opt(sb,DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ? "journal": 2462 test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ? "journal":
2464 test_opt(sb,DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered": 2463 test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered":
2465 "writeback"); 2464 "writeback");
2466 2465
2467 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { 2466 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
2468 printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - " 2467 printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - "
@@ -2575,14 +2574,14 @@ static journal_t *ext4_get_journal(struct super_block *sb,
2575static journal_t *ext4_get_dev_journal(struct super_block *sb, 2574static journal_t *ext4_get_dev_journal(struct super_block *sb,
2576 dev_t j_dev) 2575 dev_t j_dev)
2577{ 2576{
2578 struct buffer_head * bh; 2577 struct buffer_head *bh;
2579 journal_t *journal; 2578 journal_t *journal;
2580 ext4_fsblk_t start; 2579 ext4_fsblk_t start;
2581 ext4_fsblk_t len; 2580 ext4_fsblk_t len;
2582 int hblock, blocksize; 2581 int hblock, blocksize;
2583 ext4_fsblk_t sb_block; 2582 ext4_fsblk_t sb_block;
2584 unsigned long offset; 2583 unsigned long offset;
2585 struct ext4_super_block * es; 2584 struct ext4_super_block *es;
2586 struct block_device *bdev; 2585 struct block_device *bdev;
2587 2586
2588 bdev = ext4_blkdev_get(j_dev); 2587 bdev = ext4_blkdev_get(j_dev);
@@ -2697,8 +2696,8 @@ static int ext4_load_journal(struct super_block *sb,
2697 "unavailable, cannot proceed.\n"); 2696 "unavailable, cannot proceed.\n");
2698 return -EROFS; 2697 return -EROFS;
2699 } 2698 }
2700 printk (KERN_INFO "EXT4-fs: write access will " 2699 printk(KERN_INFO "EXT4-fs: write access will "
2701 "be enabled during recovery.\n"); 2700 "be enabled during recovery.\n");
2702 } 2701 }
2703 } 2702 }
2704 2703
@@ -2751,8 +2750,8 @@ static int ext4_load_journal(struct super_block *sb,
2751 return 0; 2750 return 0;
2752} 2751}
2753 2752
2754static int ext4_create_journal(struct super_block * sb, 2753static int ext4_create_journal(struct super_block *sb,
2755 struct ext4_super_block * es, 2754 struct ext4_super_block *es,
2756 unsigned int journal_inum) 2755 unsigned int journal_inum)
2757{ 2756{
2758 journal_t *journal; 2757 journal_t *journal;
@@ -2793,9 +2792,8 @@ static int ext4_create_journal(struct super_block * sb,
2793 return 0; 2792 return 0;
2794} 2793}
2795 2794
2796static void ext4_commit_super (struct super_block * sb, 2795static void ext4_commit_super(struct super_block *sb,
2797 struct ext4_super_block * es, 2796 struct ext4_super_block *es, int sync)
2798 int sync)
2799{ 2797{
2800 struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; 2798 struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
2801 2799
@@ -2816,8 +2814,8 @@ static void ext4_commit_super (struct super_block * sb,
2816 * remounting) the filesystem readonly, then we will end up with a 2814 * remounting) the filesystem readonly, then we will end up with a
2817 * consistent fs on disk. Record that fact. 2815 * consistent fs on disk. Record that fact.
2818 */ 2816 */
2819static void ext4_mark_recovery_complete(struct super_block * sb, 2817static void ext4_mark_recovery_complete(struct super_block *sb,
2820 struct ext4_super_block * es) 2818 struct ext4_super_block *es)
2821{ 2819{
2822 journal_t *journal = EXT4_SB(sb)->s_journal; 2820 journal_t *journal = EXT4_SB(sb)->s_journal;
2823 2821
@@ -2839,8 +2837,8 @@ static void ext4_mark_recovery_complete(struct super_block * sb,
2839 * has recorded an error from a previous lifetime, move that error to the 2837 * has recorded an error from a previous lifetime, move that error to the
2840 * main filesystem now. 2838 * main filesystem now.
2841 */ 2839 */
2842static void ext4_clear_journal_err(struct super_block * sb, 2840static void ext4_clear_journal_err(struct super_block *sb,
2843 struct ext4_super_block * es) 2841 struct ext4_super_block *es)
2844{ 2842{
2845 journal_t *journal; 2843 journal_t *journal;
2846 int j_errno; 2844 int j_errno;
@@ -2865,7 +2863,7 @@ static void ext4_clear_journal_err(struct super_block * sb,
2865 2863
2866 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 2864 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
2867 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 2865 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
2868 ext4_commit_super (sb, es, 1); 2866 ext4_commit_super(sb, es, 1);
2869 2867
2870 jbd2_journal_clear_err(journal); 2868 jbd2_journal_clear_err(journal);
2871 } 2869 }
@@ -2898,7 +2896,7 @@ int ext4_force_commit(struct super_block *sb)
2898 * This implicitly triggers the writebehind on sync(). 2896 * This implicitly triggers the writebehind on sync().
2899 */ 2897 */
2900 2898
2901static void ext4_write_super (struct super_block * sb) 2899static void ext4_write_super(struct super_block *sb)
2902{ 2900{
2903 if (mutex_trylock(&sb->s_lock) != 0) 2901 if (mutex_trylock(&sb->s_lock) != 0)
2904 BUG(); 2902 BUG();
@@ -2954,13 +2952,14 @@ static void ext4_unlockfs(struct super_block *sb)
2954 } 2952 }
2955} 2953}
2956 2954
2957static int ext4_remount (struct super_block * sb, int * flags, char * data) 2955static int ext4_remount(struct super_block *sb, int *flags, char *data)
2958{ 2956{
2959 struct ext4_super_block * es; 2957 struct ext4_super_block *es;
2960 struct ext4_sb_info *sbi = EXT4_SB(sb); 2958 struct ext4_sb_info *sbi = EXT4_SB(sb);
2961 ext4_fsblk_t n_blocks_count = 0; 2959 ext4_fsblk_t n_blocks_count = 0;
2962 unsigned long old_sb_flags; 2960 unsigned long old_sb_flags;
2963 struct ext4_mount_options old_opts; 2961 struct ext4_mount_options old_opts;
2962 ext4_group_t g;
2964 int err; 2963 int err;
2965#ifdef CONFIG_QUOTA 2964#ifdef CONFIG_QUOTA
2966 int i; 2965 int i;
@@ -3039,6 +3038,26 @@ static int ext4_remount (struct super_block * sb, int * flags, char * data)
3039 } 3038 }
3040 3039
3041 /* 3040 /*
3041 * Make sure the group descriptor checksums
3042 * are sane. If they aren't, refuse to
3043 * remount r/w.
3044 */
3045 for (g = 0; g < sbi->s_groups_count; g++) {
3046 struct ext4_group_desc *gdp =
3047 ext4_get_group_desc(sb, g, NULL);
3048
3049 if (!ext4_group_desc_csum_verify(sbi, g, gdp)) {
3050 printk(KERN_ERR
3051 "EXT4-fs: ext4_remount: "
3052 "Checksum for group %lu failed (%u!=%u)\n",
3053 g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)),
3054 le16_to_cpu(gdp->bg_checksum));
3055 err = -EINVAL;
3056 goto restore_opts;
3057 }
3058 }
3059
3060 /*
3042 * If we have an unprocessed orphan list hanging 3061 * If we have an unprocessed orphan list hanging
3043 * around from a previously readonly bdev mount, 3062 * around from a previously readonly bdev mount,
3044 * require a full umount/remount for now. 3063 * require a full umount/remount for now.
@@ -3063,7 +3082,7 @@ static int ext4_remount (struct super_block * sb, int * flags, char * data)
3063 sbi->s_mount_state = le16_to_cpu(es->s_state); 3082 sbi->s_mount_state = le16_to_cpu(es->s_state);
3064 if ((err = ext4_group_extend(sb, es, n_blocks_count))) 3083 if ((err = ext4_group_extend(sb, es, n_blocks_count)))
3065 goto restore_opts; 3084 goto restore_opts;
3066 if (!ext4_setup_super (sb, es, 0)) 3085 if (!ext4_setup_super(sb, es, 0))
3067 sb->s_flags &= ~MS_RDONLY; 3086 sb->s_flags &= ~MS_RDONLY;
3068 } 3087 }
3069 } 3088 }
@@ -3093,7 +3112,7 @@ restore_opts:
3093 return err; 3112 return err;
3094} 3113}
3095 3114
3096static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf) 3115static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
3097{ 3116{
3098 struct super_block *sb = dentry->d_sb; 3117 struct super_block *sb = dentry->d_sb;
3099 struct ext4_sb_info *sbi = EXT4_SB(sb); 3118 struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -3331,12 +3350,12 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
3331 } 3350 }
3332 /* Journaling quota? */ 3351 /* Journaling quota? */
3333 if (EXT4_SB(sb)->s_qf_names[type]) { 3352 if (EXT4_SB(sb)->s_qf_names[type]) {
3334 /* Quotafile not of fs root? */ 3353 /* Quotafile not in fs root? */
3335 if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode) 3354 if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
3336 printk(KERN_WARNING 3355 printk(KERN_WARNING
3337 "EXT4-fs: Quota file not on filesystem root. " 3356 "EXT4-fs: Quota file not on filesystem root. "
3338 "Journaled quota will not work.\n"); 3357 "Journaled quota will not work.\n");
3339 } 3358 }
3340 3359
3341 /* 3360 /*
3342 * When we journal data on quota file, we have to flush journal to see 3361 * When we journal data on quota file, we have to flush journal to see
@@ -3352,8 +3371,9 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
3352 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 3371 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
3353 } 3372 }
3354 3373
3374 err = vfs_quota_on_path(sb, type, format_id, &nd.path);
3355 path_put(&nd.path); 3375 path_put(&nd.path);
3356 return vfs_quota_on(sb, type, format_id, path, remount); 3376 return err;
3357} 3377}
3358 3378
3359/* Read data from quotafile - avoid pagecache and such because we cannot afford 3379/* Read data from quotafile - avoid pagecache and such because we cannot afford
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 93c5fdcdad2e..8954208b4893 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1512,7 +1512,7 @@ static inline void ext4_xattr_hash_entry(struct ext4_xattr_header *header,
1512 char *name = entry->e_name; 1512 char *name = entry->e_name;
1513 int n; 1513 int n;
1514 1514
1515 for (n=0; n < entry->e_name_len; n++) { 1515 for (n = 0; n < entry->e_name_len; n++) {
1516 hash = (hash << NAME_HASH_SHIFT) ^ 1516 hash = (hash << NAME_HASH_SHIFT) ^
1517 (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^ 1517 (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^
1518 *name++; 1518 *name++;
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 8707a8cfa02c..ddde37025ca6 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -313,6 +313,8 @@ static int fat_allow_set_time(struct msdos_sb_info *sbi, struct inode *inode)
313 return 0; 313 return 0;
314} 314}
315 315
316#define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)
317
316int fat_setattr(struct dentry *dentry, struct iattr *attr) 318int fat_setattr(struct dentry *dentry, struct iattr *attr)
317{ 319{
318 struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb); 320 struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb);
@@ -336,9 +338,9 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr)
336 338
337 /* Check for setting the inode time. */ 339 /* Check for setting the inode time. */
338 ia_valid = attr->ia_valid; 340 ia_valid = attr->ia_valid;
339 if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET)) { 341 if (ia_valid & TIMES_SET_FLAGS) {
340 if (fat_allow_set_time(sbi, inode)) 342 if (fat_allow_set_time(sbi, inode))
341 attr->ia_valid &= ~(ATTR_MTIME_SET | ATTR_ATIME_SET); 343 attr->ia_valid &= ~TIMES_SET_FLAGS;
342 } 344 }
343 345
344 error = inode_change_ok(inode, attr); 346 error = inode_change_ok(inode, attr);
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 6d266d793e2c..80ff3381fa21 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -562,26 +562,23 @@ static int fat_write_inode(struct inode *inode, int wait)
562 struct buffer_head *bh; 562 struct buffer_head *bh;
563 struct msdos_dir_entry *raw_entry; 563 struct msdos_dir_entry *raw_entry;
564 loff_t i_pos; 564 loff_t i_pos;
565 int err = 0; 565 int err;
566 566
567retry: 567retry:
568 i_pos = MSDOS_I(inode)->i_pos; 568 i_pos = MSDOS_I(inode)->i_pos;
569 if (inode->i_ino == MSDOS_ROOT_INO || !i_pos) 569 if (inode->i_ino == MSDOS_ROOT_INO || !i_pos)
570 return 0; 570 return 0;
571 571
572 lock_super(sb);
573 bh = sb_bread(sb, i_pos >> sbi->dir_per_block_bits); 572 bh = sb_bread(sb, i_pos >> sbi->dir_per_block_bits);
574 if (!bh) { 573 if (!bh) {
575 printk(KERN_ERR "FAT: unable to read inode block " 574 printk(KERN_ERR "FAT: unable to read inode block "
576 "for updating (i_pos %lld)\n", i_pos); 575 "for updating (i_pos %lld)\n", i_pos);
577 err = -EIO; 576 return -EIO;
578 goto out;
579 } 577 }
580 spin_lock(&sbi->inode_hash_lock); 578 spin_lock(&sbi->inode_hash_lock);
581 if (i_pos != MSDOS_I(inode)->i_pos) { 579 if (i_pos != MSDOS_I(inode)->i_pos) {
582 spin_unlock(&sbi->inode_hash_lock); 580 spin_unlock(&sbi->inode_hash_lock);
583 brelse(bh); 581 brelse(bh);
584 unlock_super(sb);
585 goto retry; 582 goto retry;
586 } 583 }
587 584
@@ -607,11 +604,10 @@ retry:
607 } 604 }
608 spin_unlock(&sbi->inode_hash_lock); 605 spin_unlock(&sbi->inode_hash_lock);
609 mark_buffer_dirty(bh); 606 mark_buffer_dirty(bh);
607 err = 0;
610 if (wait) 608 if (wait)
611 err = sync_dirty_buffer(bh); 609 err = sync_dirty_buffer(bh);
612 brelse(bh); 610 brelse(bh);
613out:
614 unlock_super(sb);
615 return err; 611 return err;
616} 612}
617 613
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 61d625136813..ac4f7db9f134 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -49,73 +49,6 @@ static int get_close_on_exec(unsigned int fd)
49 return res; 49 return res;
50} 50}
51 51
52/*
53 * locate_fd finds a free file descriptor in the open_fds fdset,
54 * expanding the fd arrays if necessary. Must be called with the
55 * file_lock held for write.
56 */
57
58static int locate_fd(unsigned int orig_start, int cloexec)
59{
60 struct files_struct *files = current->files;
61 unsigned int newfd;
62 unsigned int start;
63 int error;
64 struct fdtable *fdt;
65
66 spin_lock(&files->file_lock);
67repeat:
68 fdt = files_fdtable(files);
69 /*
70 * Someone might have closed fd's in the range
71 * orig_start..fdt->next_fd
72 */
73 start = orig_start;
74 if (start < files->next_fd)
75 start = files->next_fd;
76
77 newfd = start;
78 if (start < fdt->max_fds)
79 newfd = find_next_zero_bit(fdt->open_fds->fds_bits,
80 fdt->max_fds, start);
81
82 error = expand_files(files, newfd);
83 if (error < 0)
84 goto out;
85
86 /*
87 * If we needed to expand the fs array we
88 * might have blocked - try again.
89 */
90 if (error)
91 goto repeat;
92
93 if (start <= files->next_fd)
94 files->next_fd = newfd + 1;
95
96 FD_SET(newfd, fdt->open_fds);
97 if (cloexec)
98 FD_SET(newfd, fdt->close_on_exec);
99 else
100 FD_CLR(newfd, fdt->close_on_exec);
101 error = newfd;
102
103out:
104 spin_unlock(&files->file_lock);
105 return error;
106}
107
108static int dupfd(struct file *file, unsigned int start, int cloexec)
109{
110 int fd = locate_fd(start, cloexec);
111 if (fd >= 0)
112 fd_install(fd, file);
113 else
114 fput(file);
115
116 return fd;
117}
118
119asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags) 52asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
120{ 53{
121 int err = -EBADF; 54 int err = -EBADF;
@@ -130,31 +63,35 @@ asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
130 return -EINVAL; 63 return -EINVAL;
131 64
132 spin_lock(&files->file_lock); 65 spin_lock(&files->file_lock);
133 if (!(file = fcheck(oldfd)))
134 goto out_unlock;
135 get_file(file); /* We are now finished with oldfd */
136
137 err = expand_files(files, newfd); 66 err = expand_files(files, newfd);
67 file = fcheck(oldfd);
68 if (unlikely(!file))
69 goto Ebadf;
138 if (unlikely(err < 0)) { 70 if (unlikely(err < 0)) {
139 if (err == -EMFILE) 71 if (err == -EMFILE)
140 err = -EBADF; 72 goto Ebadf;
141 goto out_fput; 73 goto out_unlock;
142 } 74 }
143 75 /*
144 /* To avoid races with open() and dup(), we will mark the fd as 76 * We need to detect attempts to do dup2() over allocated but still
145 * in-use in the open-file bitmap throughout the entire dup2() 77 * not finished descriptor. NB: OpenBSD avoids that at the price of
146 * process. This is quite safe: do_close() uses the fd array 78 * extra work in their equivalent of fget() - they insert struct
147 * entry, not the bitmap, to decide what work needs to be 79 * file immediately after grabbing descriptor, mark it larval if
148 * done. --sct */ 80 * more work (e.g. actual opening) is needed and make sure that
149 /* Doesn't work. open() might be there first. --AV */ 81 * fget() treats larval files as absent. Potentially interesting,
150 82 * but while extra work in fget() is trivial, locking implications
151 /* Yes. It's a race. In user space. Nothing sane to do */ 83 * and amount of surgery on open()-related paths in VFS are not.
84 * FreeBSD fails with -EBADF in the same situation, NetBSD "solution"
85 * deadlocks in rather amusing ways, AFAICS. All of that is out of
86 * scope of POSIX or SUS, since neither considers shared descriptor
87 * tables and this condition does not arise without those.
88 */
152 err = -EBUSY; 89 err = -EBUSY;
153 fdt = files_fdtable(files); 90 fdt = files_fdtable(files);
154 tofree = fdt->fd[newfd]; 91 tofree = fdt->fd[newfd];
155 if (!tofree && FD_ISSET(newfd, fdt->open_fds)) 92 if (!tofree && FD_ISSET(newfd, fdt->open_fds))
156 goto out_fput; 93 goto out_unlock;
157 94 get_file(file);
158 rcu_assign_pointer(fdt->fd[newfd], file); 95 rcu_assign_pointer(fdt->fd[newfd], file);
159 FD_SET(newfd, fdt->open_fds); 96 FD_SET(newfd, fdt->open_fds);
160 if (flags & O_CLOEXEC) 97 if (flags & O_CLOEXEC)
@@ -165,17 +102,14 @@ asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
165 102
166 if (tofree) 103 if (tofree)
167 filp_close(tofree, files); 104 filp_close(tofree, files);
168 err = newfd;
169out:
170 return err;
171out_unlock:
172 spin_unlock(&files->file_lock);
173 goto out;
174 105
175out_fput: 106 return newfd;
107
108Ebadf:
109 err = -EBADF;
110out_unlock:
176 spin_unlock(&files->file_lock); 111 spin_unlock(&files->file_lock);
177 fput(file); 112 return err;
178 goto out;
179} 113}
180 114
181asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd) 115asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
@@ -194,10 +128,15 @@ asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
194asmlinkage long sys_dup(unsigned int fildes) 128asmlinkage long sys_dup(unsigned int fildes)
195{ 129{
196 int ret = -EBADF; 130 int ret = -EBADF;
197 struct file * file = fget(fildes); 131 struct file *file = fget(fildes);
198 132
199 if (file) 133 if (file) {
200 ret = dupfd(file, 0, 0); 134 ret = get_unused_fd();
135 if (ret >= 0)
136 fd_install(ret, file);
137 else
138 fput(file);
139 }
201 return ret; 140 return ret;
202} 141}
203 142
@@ -322,8 +261,11 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
322 case F_DUPFD_CLOEXEC: 261 case F_DUPFD_CLOEXEC:
323 if (arg >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) 262 if (arg >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
324 break; 263 break;
325 get_file(filp); 264 err = alloc_fd(arg, cmd == F_DUPFD_CLOEXEC ? O_CLOEXEC : 0);
326 err = dupfd(filp, arg, cmd == F_DUPFD_CLOEXEC); 265 if (err >= 0) {
266 get_file(filp);
267 fd_install(err, filp);
268 }
327 break; 269 break;
328 case F_GETFD: 270 case F_GETFD:
329 err = get_close_on_exec(fd) ? FD_CLOEXEC : 0; 271 err = get_close_on_exec(fd) ? FD_CLOEXEC : 0;
diff --git a/fs/file.c b/fs/file.c
index d8773b19fe47..f313314f996f 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -6,6 +6,7 @@
6 * Manage the dynamic fd arrays in the process files_struct. 6 * Manage the dynamic fd arrays in the process files_struct.
7 */ 7 */
8 8
9#include <linux/module.h>
9#include <linux/fs.h> 10#include <linux/fs.h>
10#include <linux/mm.h> 11#include <linux/mm.h>
11#include <linux/time.h> 12#include <linux/time.h>
@@ -432,3 +433,63 @@ struct files_struct init_files = {
432 }, 433 },
433 .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), 434 .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock),
434}; 435};
436
437/*
438 * allocate a file descriptor, mark it busy.
439 */
440int alloc_fd(unsigned start, unsigned flags)
441{
442 struct files_struct *files = current->files;
443 unsigned int fd;
444 int error;
445 struct fdtable *fdt;
446
447 spin_lock(&files->file_lock);
448repeat:
449 fdt = files_fdtable(files);
450 fd = start;
451 if (fd < files->next_fd)
452 fd = files->next_fd;
453
454 if (fd < fdt->max_fds)
455 fd = find_next_zero_bit(fdt->open_fds->fds_bits,
456 fdt->max_fds, fd);
457
458 error = expand_files(files, fd);
459 if (error < 0)
460 goto out;
461
462 /*
463 * If we needed to expand the fs array we
464 * might have blocked - try again.
465 */
466 if (error)
467 goto repeat;
468
469 if (start <= files->next_fd)
470 files->next_fd = fd + 1;
471
472 FD_SET(fd, fdt->open_fds);
473 if (flags & O_CLOEXEC)
474 FD_SET(fd, fdt->close_on_exec);
475 else
476 FD_CLR(fd, fdt->close_on_exec);
477 error = fd;
478#if 1
479 /* Sanity check */
480 if (rcu_dereference(fdt->fd[fd]) != NULL) {
481 printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd);
482 rcu_assign_pointer(fdt->fd[fd], NULL);
483 }
484#endif
485
486out:
487 spin_unlock(&files->file_lock);
488 return error;
489}
490
491int get_unused_fd(void)
492{
493 return alloc_fd(0, 0);
494}
495EXPORT_SYMBOL(get_unused_fd);
diff --git a/fs/inode.c b/fs/inode.c
index b6726f644530..0487ddba1397 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -166,6 +166,7 @@ static struct inode *alloc_inode(struct super_block *sb)
166 mapping_set_gfp_mask(mapping, GFP_HIGHUSER_PAGECACHE); 166 mapping_set_gfp_mask(mapping, GFP_HIGHUSER_PAGECACHE);
167 mapping->assoc_mapping = NULL; 167 mapping->assoc_mapping = NULL;
168 mapping->backing_dev_info = &default_backing_dev_info; 168 mapping->backing_dev_info = &default_backing_dev_info;
169 mapping->writeback_index = 0;
169 170
170 /* 171 /*
171 * If the block_device provides a backing_dev_info for client 172 * If the block_device provides a backing_dev_info for client
diff --git a/fs/ioprio.c b/fs/ioprio.c
index c4a1c3c65aac..da3cc460d4df 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -115,11 +115,11 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio)
115 pgrp = task_pgrp(current); 115 pgrp = task_pgrp(current);
116 else 116 else
117 pgrp = find_vpid(who); 117 pgrp = find_vpid(who);
118 do_each_pid_task(pgrp, PIDTYPE_PGID, p) { 118 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
119 ret = set_task_ioprio(p, ioprio); 119 ret = set_task_ioprio(p, ioprio);
120 if (ret) 120 if (ret)
121 break; 121 break;
122 } while_each_pid_task(pgrp, PIDTYPE_PGID, p); 122 } while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
123 break; 123 break;
124 case IOPRIO_WHO_USER: 124 case IOPRIO_WHO_USER:
125 if (!who) 125 if (!who)
@@ -204,7 +204,7 @@ asmlinkage long sys_ioprio_get(int which, int who)
204 pgrp = task_pgrp(current); 204 pgrp = task_pgrp(current);
205 else 205 else
206 pgrp = find_vpid(who); 206 pgrp = find_vpid(who);
207 do_each_pid_task(pgrp, PIDTYPE_PGID, p) { 207 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
208 tmpio = get_task_ioprio(p); 208 tmpio = get_task_ioprio(p);
209 if (tmpio < 0) 209 if (tmpio < 0)
210 continue; 210 continue;
@@ -212,7 +212,7 @@ asmlinkage long sys_ioprio_get(int which, int who)
212 ret = tmpio; 212 ret = tmpio;
213 else 213 else
214 ret = ioprio_best(ret, tmpio); 214 ret = ioprio_best(ret, tmpio);
215 } while_each_pid_task(pgrp, PIDTYPE_PGID, p); 215 } while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
216 break; 216 break;
217 case IOPRIO_WHO_USER: 217 case IOPRIO_WHO_USER:
218 if (!who) 218 if (!who)
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 2eccbfaa1d48..ae08c057e751 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -63,7 +63,7 @@ static void release_buffer_page(struct buffer_head *bh)
63 goto nope; 63 goto nope;
64 64
65 /* OK, it's a truncated page */ 65 /* OK, it's a truncated page */
66 if (TestSetPageLocked(page)) 66 if (!trylock_page(page))
67 goto nope; 67 goto nope;
68 68
69 page_cache_get(page); 69 page_cache_get(page);
@@ -221,7 +221,7 @@ write_out_data:
221 * blocking lock_buffer(). 221 * blocking lock_buffer().
222 */ 222 */
223 if (buffer_dirty(bh)) { 223 if (buffer_dirty(bh)) {
224 if (test_set_buffer_locked(bh)) { 224 if (!trylock_buffer(bh)) {
225 BUFFER_TRACE(bh, "needs blocking lock"); 225 BUFFER_TRACE(bh, "needs blocking lock");
226 spin_unlock(&journal->j_list_lock); 226 spin_unlock(&journal->j_list_lock);
227 /* Write out all data to prevent deadlocks */ 227 /* Write out all data to prevent deadlocks */
@@ -446,7 +446,7 @@ void journal_commit_transaction(journal_t *journal)
446 spin_lock(&journal->j_list_lock); 446 spin_lock(&journal->j_list_lock);
447 } 447 }
448 if (unlikely(!buffer_uptodate(bh))) { 448 if (unlikely(!buffer_uptodate(bh))) {
449 if (TestSetPageLocked(bh->b_page)) { 449 if (!trylock_page(bh->b_page)) {
450 spin_unlock(&journal->j_list_lock); 450 spin_unlock(&journal->j_list_lock);
451 lock_page(bh->b_page); 451 lock_page(bh->b_page);
452 spin_lock(&journal->j_list_lock); 452 spin_lock(&journal->j_list_lock);
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 8dee32007500..0540ca27a446 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -291,7 +291,7 @@ handle_t *journal_start(journal_t *journal, int nblocks)
291 goto out; 291 goto out;
292 } 292 }
293 293
294 lock_acquire(&handle->h_lockdep_map, 0, 0, 0, 2, _THIS_IP_); 294 lock_map_acquire(&handle->h_lockdep_map);
295 295
296out: 296out:
297 return handle; 297 return handle;
@@ -1448,7 +1448,7 @@ int journal_stop(handle_t *handle)
1448 spin_unlock(&journal->j_state_lock); 1448 spin_unlock(&journal->j_state_lock);
1449 } 1449 }
1450 1450
1451 lock_release(&handle->h_lockdep_map, 1, _THIS_IP_); 1451 lock_map_release(&handle->h_lockdep_map);
1452 1452
1453 jbd_free_handle(handle); 1453 jbd_free_handle(handle);
1454 return err; 1454 return err;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index f8b3be873226..f2ad061e95ec 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -67,7 +67,7 @@ static void release_buffer_page(struct buffer_head *bh)
67 goto nope; 67 goto nope;
68 68
69 /* OK, it's a truncated page */ 69 /* OK, it's a truncated page */
70 if (TestSetPageLocked(page)) 70 if (!trylock_page(page))
71 goto nope; 71 goto nope;
72 72
73 page_cache_get(page); 73 page_cache_get(page);
@@ -262,8 +262,18 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
262 jinode->i_flags |= JI_COMMIT_RUNNING; 262 jinode->i_flags |= JI_COMMIT_RUNNING;
263 spin_unlock(&journal->j_list_lock); 263 spin_unlock(&journal->j_list_lock);
264 err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping); 264 err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping);
265 if (!ret) 265 if (err) {
266 ret = err; 266 /*
267 * Because AS_EIO is cleared by
268 * wait_on_page_writeback_range(), set it again so
269 * that user process can get -EIO from fsync().
270 */
271 set_bit(AS_EIO,
272 &jinode->i_vfs_inode->i_mapping->flags);
273
274 if (!ret)
275 ret = err;
276 }
267 spin_lock(&journal->j_list_lock); 277 spin_lock(&journal->j_list_lock);
268 jinode->i_flags &= ~JI_COMMIT_RUNNING; 278 jinode->i_flags &= ~JI_COMMIT_RUNNING;
269 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); 279 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
@@ -670,8 +680,14 @@ start_journal_io:
670 * commit block, which happens below in such setting. 680 * commit block, which happens below in such setting.
671 */ 681 */
672 err = journal_finish_inode_data_buffers(journal, commit_transaction); 682 err = journal_finish_inode_data_buffers(journal, commit_transaction);
673 if (err) 683 if (err) {
674 jbd2_journal_abort(journal, err); 684 char b[BDEVNAME_SIZE];
685
686 printk(KERN_WARNING
687 "JBD2: Detected IO errors while flushing file data "
688 "on %s\n", bdevname(journal->j_fs_dev, b));
689 err = 0;
690 }
675 691
676 /* Lo and behold: we have just managed to send a transaction to 692 /* Lo and behold: we have just managed to send a transaction to
677 the log. Before we can commit it, wait for the IO so far to 693 the log. Before we can commit it, wait for the IO so far to
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index b26c6d9fe6ae..8207a01c4edb 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -68,7 +68,6 @@ EXPORT_SYMBOL(jbd2_journal_set_features);
68EXPORT_SYMBOL(jbd2_journal_create); 68EXPORT_SYMBOL(jbd2_journal_create);
69EXPORT_SYMBOL(jbd2_journal_load); 69EXPORT_SYMBOL(jbd2_journal_load);
70EXPORT_SYMBOL(jbd2_journal_destroy); 70EXPORT_SYMBOL(jbd2_journal_destroy);
71EXPORT_SYMBOL(jbd2_journal_update_superblock);
72EXPORT_SYMBOL(jbd2_journal_abort); 71EXPORT_SYMBOL(jbd2_journal_abort);
73EXPORT_SYMBOL(jbd2_journal_errno); 72EXPORT_SYMBOL(jbd2_journal_errno);
74EXPORT_SYMBOL(jbd2_journal_ack_err); 73EXPORT_SYMBOL(jbd2_journal_ack_err);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 4f7cadbb19fa..e5d540588fa9 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -301,7 +301,7 @@ handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
301 goto out; 301 goto out;
302 } 302 }
303 303
304 lock_acquire(&handle->h_lockdep_map, 0, 0, 0, 2, _THIS_IP_); 304 lock_map_acquire(&handle->h_lockdep_map);
305out: 305out:
306 return handle; 306 return handle;
307} 307}
@@ -1279,7 +1279,7 @@ int jbd2_journal_stop(handle_t *handle)
1279 spin_unlock(&journal->j_state_lock); 1279 spin_unlock(&journal->j_state_lock);
1280 } 1280 }
1281 1281
1282 lock_release(&handle->h_lockdep_map, 1, _THIS_IP_); 1282 lock_map_release(&handle->h_lockdep_map);
1283 1283
1284 jbd2_free_handle(handle); 1284 jbd2_free_handle(handle);
1285 return err; 1285 return err;
diff --git a/fs/jffs2/jffs2_fs_i.h b/fs/jffs2/jffs2_fs_i.h
index 31559f45fdde..4c41db91eaa4 100644
--- a/fs/jffs2/jffs2_fs_i.h
+++ b/fs/jffs2/jffs2_fs_i.h
@@ -12,7 +12,6 @@
12#ifndef _JFFS2_FS_I 12#ifndef _JFFS2_FS_I
13#define _JFFS2_FS_I 13#define _JFFS2_FS_I
14 14
15#include <linux/version.h>
16#include <linux/rbtree.h> 15#include <linux/rbtree.h>
17#include <linux/posix_acl.h> 16#include <linux/posix_acl.h>
18#include <linux/mutex.h> 17#include <linux/mutex.h>
diff --git a/fs/jffs2/summary.c b/fs/jffs2/summary.c
index 629af01e5ade..6caf1e1ee26d 100644
--- a/fs/jffs2/summary.c
+++ b/fs/jffs2/summary.c
@@ -23,6 +23,8 @@
23 23
24int jffs2_sum_init(struct jffs2_sb_info *c) 24int jffs2_sum_init(struct jffs2_sb_info *c)
25{ 25{
26 uint32_t sum_size = max_t(uint32_t, c->sector_size, MAX_SUMMARY_SIZE);
27
26 c->summary = kzalloc(sizeof(struct jffs2_summary), GFP_KERNEL); 28 c->summary = kzalloc(sizeof(struct jffs2_summary), GFP_KERNEL);
27 29
28 if (!c->summary) { 30 if (!c->summary) {
@@ -30,7 +32,7 @@ int jffs2_sum_init(struct jffs2_sb_info *c)
30 return -ENOMEM; 32 return -ENOMEM;
31 } 33 }
32 34
33 c->summary->sum_buf = vmalloc(c->sector_size); 35 c->summary->sum_buf = kmalloc(sum_size, GFP_KERNEL);
34 36
35 if (!c->summary->sum_buf) { 37 if (!c->summary->sum_buf) {
36 JFFS2_WARNING("Can't allocate buffer for writing out summary information!\n"); 38 JFFS2_WARNING("Can't allocate buffer for writing out summary information!\n");
@@ -49,7 +51,7 @@ void jffs2_sum_exit(struct jffs2_sb_info *c)
49 51
50 jffs2_sum_disable_collecting(c->summary); 52 jffs2_sum_disable_collecting(c->summary);
51 53
52 vfree(c->summary->sum_buf); 54 kfree(c->summary->sum_buf);
53 c->summary->sum_buf = NULL; 55 c->summary->sum_buf = NULL;
54 56
55 kfree(c->summary); 57 kfree(c->summary);
@@ -665,7 +667,7 @@ crc_err:
665/* Write summary data to flash - helper function for jffs2_sum_write_sumnode() */ 667/* Write summary data to flash - helper function for jffs2_sum_write_sumnode() */
666 668
667static int jffs2_sum_write_data(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, 669static int jffs2_sum_write_data(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
668 uint32_t infosize, uint32_t datasize, int padsize) 670 uint32_t infosize, uint32_t datasize, int padsize)
669{ 671{
670 struct jffs2_raw_summary isum; 672 struct jffs2_raw_summary isum;
671 union jffs2_sum_mem *temp; 673 union jffs2_sum_mem *temp;
@@ -676,6 +678,26 @@ static int jffs2_sum_write_data(struct jffs2_sb_info *c, struct jffs2_eraseblock
676 int ret; 678 int ret;
677 size_t retlen; 679 size_t retlen;
678 680
681 if (padsize + datasize > MAX_SUMMARY_SIZE) {
682 /* It won't fit in the buffer. Abort summary for this jeb */
683 jffs2_sum_disable_collecting(c->summary);
684
685 JFFS2_WARNING("Summary too big (%d data, %d pad) in eraseblock at %08x\n",
686 datasize, padsize, jeb->offset);
687 /* Non-fatal */
688 return 0;
689 }
690 /* Is there enough space for summary? */
691 if (padsize < 0) {
692 /* don't try to write out summary for this jeb */
693 jffs2_sum_disable_collecting(c->summary);
694
695 JFFS2_WARNING("Not enough space for summary, padsize = %d\n",
696 padsize);
697 /* Non-fatal */
698 return 0;
699 }
700
679 memset(c->summary->sum_buf, 0xff, datasize); 701 memset(c->summary->sum_buf, 0xff, datasize);
680 memset(&isum, 0, sizeof(isum)); 702 memset(&isum, 0, sizeof(isum));
681 703
@@ -821,7 +843,7 @@ int jffs2_sum_write_sumnode(struct jffs2_sb_info *c)
821{ 843{
822 int datasize, infosize, padsize; 844 int datasize, infosize, padsize;
823 struct jffs2_eraseblock *jeb; 845 struct jffs2_eraseblock *jeb;
824 int ret; 846 int ret = 0;
825 847
826 dbg_summary("called\n"); 848 dbg_summary("called\n");
827 849
@@ -841,16 +863,6 @@ int jffs2_sum_write_sumnode(struct jffs2_sb_info *c)
841 infosize += padsize; 863 infosize += padsize;
842 datasize += padsize; 864 datasize += padsize;
843 865
844 /* Is there enough space for summary? */
845 if (padsize < 0) {
846 /* don't try to write out summary for this jeb */
847 jffs2_sum_disable_collecting(c->summary);
848
849 JFFS2_WARNING("Not enough space for summary, padsize = %d\n", padsize);
850 spin_lock(&c->erase_completion_lock);
851 return 0;
852 }
853
854 ret = jffs2_sum_write_data(c, jeb, infosize, datasize, padsize); 866 ret = jffs2_sum_write_data(c, jeb, infosize, datasize, padsize);
855 spin_lock(&c->erase_completion_lock); 867 spin_lock(&c->erase_completion_lock);
856 return ret; 868 return ret;
diff --git a/fs/jffs2/summary.h b/fs/jffs2/summary.h
index 8bf34f2fa5ce..60207a2ae952 100644
--- a/fs/jffs2/summary.h
+++ b/fs/jffs2/summary.h
@@ -13,6 +13,12 @@
13#ifndef JFFS2_SUMMARY_H 13#ifndef JFFS2_SUMMARY_H
14#define JFFS2_SUMMARY_H 14#define JFFS2_SUMMARY_H
15 15
16/* Limit summary size to 64KiB so that we can kmalloc it. If the summary
17 is larger than that, we have to just ditch it and avoid using summary
18 for the eraseblock in question... and it probably doesn't hurt us much
19 anyway. */
20#define MAX_SUMMARY_SIZE 65536
21
16#include <linux/uio.h> 22#include <linux/uio.h>
17#include <linux/jffs2.h> 23#include <linux/jffs2.h>
18 24
diff --git a/fs/libfs.c b/fs/libfs.c
index baeb71ee1cde..1add676a19df 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -216,8 +216,8 @@ int get_sb_pseudo(struct file_system_type *fs_type, char *name,
216 216
217 s->s_flags = MS_NOUSER; 217 s->s_flags = MS_NOUSER;
218 s->s_maxbytes = ~0ULL; 218 s->s_maxbytes = ~0ULL;
219 s->s_blocksize = 1024; 219 s->s_blocksize = PAGE_SIZE;
220 s->s_blocksize_bits = 10; 220 s->s_blocksize_bits = PAGE_SHIFT;
221 s->s_magic = magic; 221 s->s_magic = magic;
222 s->s_op = ops ? ops : &simple_super_operations; 222 s->s_op = ops ? ops : &simple_super_operations;
223 s->s_time_gran = 1; 223 s->s_time_gran = 1;
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 399444639337..4a714f64515b 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -83,7 +83,7 @@ nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
83{ 83{
84 struct nlm_host *host; 84 struct nlm_host *host;
85 struct nlm_file *file; 85 struct nlm_file *file;
86 int rc = rpc_success; 86 __be32 rc = rpc_success;
87 87
88 dprintk("lockd: TEST4 called\n"); 88 dprintk("lockd: TEST4 called\n");
89 resp->cookie = argp->cookie; 89 resp->cookie = argp->cookie;
@@ -116,7 +116,7 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
116{ 116{
117 struct nlm_host *host; 117 struct nlm_host *host;
118 struct nlm_file *file; 118 struct nlm_file *file;
119 int rc = rpc_success; 119 __be32 rc = rpc_success;
120 120
121 dprintk("lockd: LOCK called\n"); 121 dprintk("lockd: LOCK called\n");
122 122
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 76019d2ff72d..76262c1986f2 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -112,7 +112,7 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
112{ 112{
113 struct nlm_host *host; 113 struct nlm_host *host;
114 struct nlm_file *file; 114 struct nlm_file *file;
115 int rc = rpc_success; 115 __be32 rc = rpc_success;
116 116
117 dprintk("lockd: TEST called\n"); 117 dprintk("lockd: TEST called\n");
118 resp->cookie = argp->cookie; 118 resp->cookie = argp->cookie;
@@ -146,7 +146,7 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
146{ 146{
147 struct nlm_host *host; 147 struct nlm_host *host;
148 struct nlm_file *file; 148 struct nlm_file *file;
149 int rc = rpc_success; 149 __be32 rc = rpc_success;
150 150
151 dprintk("lockd: LOCK called\n"); 151 dprintk("lockd: LOCK called\n");
152 152
diff --git a/fs/namei.c b/fs/namei.c
index a7b0a0b80128..4ea63ed5e791 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -274,7 +274,7 @@ int inode_permission(struct inode *inode, int mask)
274 return retval; 274 return retval;
275 275
276 return security_inode_permission(inode, 276 return security_inode_permission(inode,
277 mask & (MAY_READ|MAY_WRITE|MAY_EXEC)); 277 mask & (MAY_READ|MAY_WRITE|MAY_EXEC|MAY_APPEND));
278} 278}
279 279
280/** 280/**
@@ -1431,8 +1431,7 @@ static int may_delete(struct inode *dir,struct dentry *victim,int isdir)
1431 * 3. We should have write and exec permissions on dir 1431 * 3. We should have write and exec permissions on dir
1432 * 4. We can't do it if dir is immutable (done in permission()) 1432 * 4. We can't do it if dir is immutable (done in permission())
1433 */ 1433 */
1434static inline int may_create(struct inode *dir, struct dentry *child, 1434static inline int may_create(struct inode *dir, struct dentry *child)
1435 struct nameidata *nd)
1436{ 1435{
1437 if (child->d_inode) 1436 if (child->d_inode)
1438 return -EEXIST; 1437 return -EEXIST;
@@ -1504,7 +1503,7 @@ void unlock_rename(struct dentry *p1, struct dentry *p2)
1504int vfs_create(struct inode *dir, struct dentry *dentry, int mode, 1503int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
1505 struct nameidata *nd) 1504 struct nameidata *nd)
1506{ 1505{
1507 int error = may_create(dir, dentry, nd); 1506 int error = may_create(dir, dentry);
1508 1507
1509 if (error) 1508 if (error)
1510 return error; 1509 return error;
@@ -1948,7 +1947,7 @@ EXPORT_SYMBOL_GPL(lookup_create);
1948 1947
1949int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) 1948int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
1950{ 1949{
1951 int error = may_create(dir, dentry, NULL); 1950 int error = may_create(dir, dentry);
1952 1951
1953 if (error) 1952 if (error)
1954 return error; 1953 return error;
@@ -2049,7 +2048,7 @@ asmlinkage long sys_mknod(const char __user *filename, int mode, unsigned dev)
2049 2048
2050int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 2049int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
2051{ 2050{
2052 int error = may_create(dir, dentry, NULL); 2051 int error = may_create(dir, dentry);
2053 2052
2054 if (error) 2053 if (error)
2055 return error; 2054 return error;
@@ -2316,7 +2315,7 @@ asmlinkage long sys_unlink(const char __user *pathname)
2316 2315
2317int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname) 2316int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
2318{ 2317{
2319 int error = may_create(dir, dentry, NULL); 2318 int error = may_create(dir, dentry);
2320 2319
2321 if (error) 2320 if (error)
2322 return error; 2321 return error;
@@ -2386,7 +2385,7 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
2386 if (!inode) 2385 if (!inode)
2387 return -ENOENT; 2386 return -ENOENT;
2388 2387
2389 error = may_create(dir, new_dentry, NULL); 2388 error = may_create(dir, new_dentry);
2390 if (error) 2389 if (error)
2391 return error; 2390 return error;
2392 2391
@@ -2595,7 +2594,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
2595 return error; 2594 return error;
2596 2595
2597 if (!new_dentry->d_inode) 2596 if (!new_dentry->d_inode)
2598 error = may_create(new_dir, new_dentry, NULL); 2597 error = may_create(new_dir, new_dentry);
2599 else 2598 else
2600 error = may_delete(new_dir, new_dentry, is_dir); 2599 error = may_delete(new_dir, new_dentry, is_dir);
2601 if (error) 2600 if (error)
diff --git a/fs/namespace.c b/fs/namespace.c
index 411728c0c8bb..6e283c93b50d 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1667,31 +1667,31 @@ static noinline int do_new_mount(struct nameidata *nd, char *type, int flags,
1667 if (IS_ERR(mnt)) 1667 if (IS_ERR(mnt))
1668 return PTR_ERR(mnt); 1668 return PTR_ERR(mnt);
1669 1669
1670 return do_add_mount(mnt, nd, mnt_flags, NULL); 1670 return do_add_mount(mnt, &nd->path, mnt_flags, NULL);
1671} 1671}
1672 1672
1673/* 1673/*
1674 * add a mount into a namespace's mount tree 1674 * add a mount into a namespace's mount tree
1675 * - provide the option of adding the new mount to an expiration list 1675 * - provide the option of adding the new mount to an expiration list
1676 */ 1676 */
1677int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd, 1677int do_add_mount(struct vfsmount *newmnt, struct path *path,
1678 int mnt_flags, struct list_head *fslist) 1678 int mnt_flags, struct list_head *fslist)
1679{ 1679{
1680 int err; 1680 int err;
1681 1681
1682 down_write(&namespace_sem); 1682 down_write(&namespace_sem);
1683 /* Something was mounted here while we slept */ 1683 /* Something was mounted here while we slept */
1684 while (d_mountpoint(nd->path.dentry) && 1684 while (d_mountpoint(path->dentry) &&
1685 follow_down(&nd->path.mnt, &nd->path.dentry)) 1685 follow_down(&path->mnt, &path->dentry))
1686 ; 1686 ;
1687 err = -EINVAL; 1687 err = -EINVAL;
1688 if (!check_mnt(nd->path.mnt)) 1688 if (!check_mnt(path->mnt))
1689 goto unlock; 1689 goto unlock;
1690 1690
1691 /* Refuse the same filesystem on the same mount point */ 1691 /* Refuse the same filesystem on the same mount point */
1692 err = -EBUSY; 1692 err = -EBUSY;
1693 if (nd->path.mnt->mnt_sb == newmnt->mnt_sb && 1693 if (path->mnt->mnt_sb == newmnt->mnt_sb &&
1694 nd->path.mnt->mnt_root == nd->path.dentry) 1694 path->mnt->mnt_root == path->dentry)
1695 goto unlock; 1695 goto unlock;
1696 1696
1697 err = -EINVAL; 1697 err = -EINVAL;
@@ -1699,7 +1699,7 @@ int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd,
1699 goto unlock; 1699 goto unlock;
1700 1700
1701 newmnt->mnt_flags = mnt_flags; 1701 newmnt->mnt_flags = mnt_flags;
1702 if ((err = graft_tree(newmnt, &nd->path))) 1702 if ((err = graft_tree(newmnt, path)))
1703 goto unlock; 1703 goto unlock;
1704 1704
1705 if (fslist) /* add to the specified expiration list */ 1705 if (fslist) /* add to the specified expiration list */
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 2f285ef76399..66df08dd1caf 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -129,7 +129,7 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
129 goto out_err; 129 goto out_err;
130 130
131 mntget(mnt); 131 mntget(mnt);
132 err = do_add_mount(mnt, nd, nd->path.mnt->mnt_flags|MNT_SHRINKABLE, 132 err = do_add_mount(mnt, &nd->path, nd->path.mnt->mnt_flags|MNT_SHRINKABLE,
133 &nfs_automount_list); 133 &nfs_automount_list);
134 if (err < 0) { 134 if (err < 0) {
135 mntput(mnt); 135 mntput(mnt);
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 8478fc25daee..46763d1cd397 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -127,7 +127,7 @@ enum {
127 Opt_err 127 Opt_err
128}; 128};
129 129
130static match_table_t __initconst tokens = { 130static match_table_t __initdata tokens = {
131 {Opt_port, "port=%u"}, 131 {Opt_port, "port=%u"},
132 {Opt_rsize, "rsize=%u"}, 132 {Opt_rsize, "rsize=%u"},
133 {Opt_wsize, "wsize=%u"}, 133 {Opt_wsize, "wsize=%u"},
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 33bfcf09db46..9dc036f18356 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1023,7 +1023,7 @@ exp_export(struct nfsctl_export *nxp)
1023 /* Look up the dentry */ 1023 /* Look up the dentry */
1024 err = path_lookup(nxp->ex_path, 0, &nd); 1024 err = path_lookup(nxp->ex_path, 0, &nd);
1025 if (err) 1025 if (err)
1026 goto out_unlock; 1026 goto out_put_clp;
1027 err = -EINVAL; 1027 err = -EINVAL;
1028 1028
1029 exp = exp_get_by_name(clp, nd.path.mnt, nd.path.dentry, NULL); 1029 exp = exp_get_by_name(clp, nd.path.mnt, nd.path.dentry, NULL);
@@ -1090,9 +1090,9 @@ finish:
1090 exp_put(exp); 1090 exp_put(exp);
1091 if (fsid_key && !IS_ERR(fsid_key)) 1091 if (fsid_key && !IS_ERR(fsid_key))
1092 cache_put(&fsid_key->h, &svc_expkey_cache); 1092 cache_put(&fsid_key->h, &svc_expkey_cache);
1093 if (clp)
1094 auth_domain_put(clp);
1095 path_put(&nd.path); 1093 path_put(&nd.path);
1094out_put_clp:
1095 auth_domain_put(clp);
1096out_unlock: 1096out_unlock:
1097 exp_writeunlock(); 1097 exp_writeunlock();
1098out: 1098out:
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index b6ed38380ab8..54b8b4140c8f 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -443,7 +443,7 @@ init_state(struct posix_acl_state *state, int cnt)
443 * enough space for either: 443 * enough space for either:
444 */ 444 */
445 alloc = sizeof(struct posix_ace_state_array) 445 alloc = sizeof(struct posix_ace_state_array)
446 + cnt*sizeof(struct posix_ace_state); 446 + cnt*sizeof(struct posix_user_ace_state);
447 state->users = kzalloc(alloc, GFP_KERNEL); 447 state->users = kzalloc(alloc, GFP_KERNEL);
448 if (!state->users) 448 if (!state->users)
449 return -ENOMEM; 449 return -ENOMEM;
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index eef1629806f5..e5b51ffafc6c 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -851,7 +851,7 @@ struct nfsd4_operation {
851 851
852static struct nfsd4_operation nfsd4_ops[]; 852static struct nfsd4_operation nfsd4_ops[];
853 853
854static inline char *nfsd4_op_name(unsigned opnum); 854static const char *nfsd4_op_name(unsigned opnum);
855 855
856/* 856/*
857 * COMPOUND call. 857 * COMPOUND call.
@@ -867,11 +867,6 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
867 int slack_bytes; 867 int slack_bytes;
868 __be32 status; 868 __be32 status;
869 869
870 status = nfserr_resource;
871 cstate = cstate_alloc();
872 if (cstate == NULL)
873 goto out;
874
875 resp->xbuf = &rqstp->rq_res; 870 resp->xbuf = &rqstp->rq_res;
876 resp->p = rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len; 871 resp->p = rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len;
877 resp->tagp = resp->p; 872 resp->tagp = resp->p;
@@ -890,6 +885,11 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
890 if (args->minorversion > NFSD_SUPPORTED_MINOR_VERSION) 885 if (args->minorversion > NFSD_SUPPORTED_MINOR_VERSION)
891 goto out; 886 goto out;
892 887
888 status = nfserr_resource;
889 cstate = cstate_alloc();
890 if (cstate == NULL)
891 goto out;
892
893 status = nfs_ok; 893 status = nfs_ok;
894 while (!status && resp->opcnt < args->opcnt) { 894 while (!status && resp->opcnt < args->opcnt) {
895 op = &args->ops[resp->opcnt++]; 895 op = &args->ops[resp->opcnt++];
@@ -957,9 +957,9 @@ encode_op:
957 nfsd4_increment_op_stats(op->opnum); 957 nfsd4_increment_op_stats(op->opnum);
958 } 958 }
959 959
960 cstate_free(cstate);
960out: 961out:
961 nfsd4_release_compoundargs(args); 962 nfsd4_release_compoundargs(args);
962 cstate_free(cstate);
963 dprintk("nfsv4 compound returned %d\n", ntohl(status)); 963 dprintk("nfsv4 compound returned %d\n", ntohl(status));
964 return status; 964 return status;
965} 965}
@@ -1116,8 +1116,7 @@ static struct nfsd4_operation nfsd4_ops[OP_RELEASE_LOCKOWNER+1] = {
1116 }, 1116 },
1117}; 1117};
1118 1118
1119static inline char * 1119static const char *nfsd4_op_name(unsigned opnum)
1120nfsd4_op_name(unsigned opnum)
1121{ 1120{
1122 if (opnum < ARRAY_SIZE(nfsd4_ops)) 1121 if (opnum < ARRAY_SIZE(nfsd4_ops))
1123 return nfsd4_ops[opnum].op_name; 1122 return nfsd4_ops[opnum].op_name;
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 00e9ccde8e42..b38f944f0667 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -1194,7 +1194,7 @@ lock_retry_remap:
1194 tbh = bhs[i]; 1194 tbh = bhs[i];
1195 if (!tbh) 1195 if (!tbh)
1196 continue; 1196 continue;
1197 if (unlikely(test_set_buffer_locked(tbh))) 1197 if (!trylock_buffer(tbh))
1198 BUG(); 1198 BUG();
1199 /* The buffer dirty state is now irrelevant, just clean it. */ 1199 /* The buffer dirty state is now irrelevant, just clean it. */
1200 clear_buffer_dirty(tbh); 1200 clear_buffer_dirty(tbh);
diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c
index 33ff314cc507..9669541d0119 100644
--- a/fs/ntfs/compress.c
+++ b/fs/ntfs/compress.c
@@ -665,7 +665,7 @@ lock_retry_remap:
665 for (i = 0; i < nr_bhs; i++) { 665 for (i = 0; i < nr_bhs; i++) {
666 struct buffer_head *tbh = bhs[i]; 666 struct buffer_head *tbh = bhs[i];
667 667
668 if (unlikely(test_set_buffer_locked(tbh))) 668 if (!trylock_buffer(tbh))
669 continue; 669 continue;
670 if (unlikely(buffer_uptodate(tbh))) { 670 if (unlikely(buffer_uptodate(tbh))) {
671 unlock_buffer(tbh); 671 unlock_buffer(tbh);
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 790defb847e7..17d32ca6bc35 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -586,7 +586,7 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no,
586 for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) { 586 for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) {
587 struct buffer_head *tbh = bhs[i_bhs]; 587 struct buffer_head *tbh = bhs[i_bhs];
588 588
589 if (unlikely(test_set_buffer_locked(tbh))) 589 if (!trylock_buffer(tbh))
590 BUG(); 590 BUG();
591 BUG_ON(!buffer_uptodate(tbh)); 591 BUG_ON(!buffer_uptodate(tbh));
592 clear_buffer_dirty(tbh); 592 clear_buffer_dirty(tbh);
@@ -779,7 +779,7 @@ int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync)
779 for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) { 779 for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) {
780 struct buffer_head *tbh = bhs[i_bhs]; 780 struct buffer_head *tbh = bhs[i_bhs];
781 781
782 if (unlikely(test_set_buffer_locked(tbh))) 782 if (!trylock_buffer(tbh))
783 BUG(); 783 BUG();
784 BUG_ON(!buffer_uptodate(tbh)); 784 BUG_ON(!buffer_uptodate(tbh));
785 clear_buffer_dirty(tbh); 785 clear_buffer_dirty(tbh);
diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c
index e1781c8b1650..9e8a95be7a1e 100644
--- a/fs/ntfs/namei.c
+++ b/fs/ntfs/namei.c
@@ -174,7 +174,6 @@ static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent,
174 // TODO: Consider moving this lot to a separate function! (AIA) 174 // TODO: Consider moving this lot to a separate function! (AIA)
175handle_name: 175handle_name:
176 { 176 {
177 struct dentry *real_dent, *new_dent;
178 MFT_RECORD *m; 177 MFT_RECORD *m;
179 ntfs_attr_search_ctx *ctx; 178 ntfs_attr_search_ctx *ctx;
180 ntfs_inode *ni = NTFS_I(dent_inode); 179 ntfs_inode *ni = NTFS_I(dent_inode);
@@ -255,93 +254,9 @@ handle_name:
255 } 254 }
256 nls_name.hash = full_name_hash(nls_name.name, nls_name.len); 255 nls_name.hash = full_name_hash(nls_name.name, nls_name.len);
257 256
258 /* 257 dent = d_add_ci(dent, dent_inode, &nls_name);
259 * Note: No need for dent->d_lock lock as i_mutex is held on the
260 * parent inode.
261 */
262
263 /* Does a dentry matching the nls_name exist already? */
264 real_dent = d_lookup(dent->d_parent, &nls_name);
265 /* If not, create it now. */
266 if (!real_dent) {
267 real_dent = d_alloc(dent->d_parent, &nls_name);
268 kfree(nls_name.name);
269 if (!real_dent) {
270 err = -ENOMEM;
271 goto err_out;
272 }
273 new_dent = d_splice_alias(dent_inode, real_dent);
274 if (new_dent)
275 dput(real_dent);
276 else
277 new_dent = real_dent;
278 ntfs_debug("Done. (Created new dentry.)");
279 return new_dent;
280 }
281 kfree(nls_name.name); 258 kfree(nls_name.name);
282 /* Matching dentry exists, check if it is negative. */ 259 return dent;
283 if (real_dent->d_inode) {
284 if (unlikely(real_dent->d_inode != dent_inode)) {
285 /* This can happen because bad inodes are unhashed. */
286 BUG_ON(!is_bad_inode(dent_inode));
287 BUG_ON(!is_bad_inode(real_dent->d_inode));
288 }
289 /*
290 * Already have the inode and the dentry attached, decrement
291 * the reference count to balance the ntfs_iget() we did
292 * earlier on. We found the dentry using d_lookup() so it
293 * cannot be disconnected and thus we do not need to worry
294 * about any NFS/disconnectedness issues here.
295 */
296 iput(dent_inode);
297 ntfs_debug("Done. (Already had inode and dentry.)");
298 return real_dent;
299 }
300 /*
301 * Negative dentry: instantiate it unless the inode is a directory and
302 * has a 'disconnected' dentry (i.e. IS_ROOT and DCACHE_DISCONNECTED),
303 * in which case d_move() that in place of the found dentry.
304 */
305 if (!S_ISDIR(dent_inode->i_mode)) {
306 /* Not a directory; everything is easy. */
307 d_instantiate(real_dent, dent_inode);
308 ntfs_debug("Done. (Already had negative file dentry.)");
309 return real_dent;
310 }
311 spin_lock(&dcache_lock);
312 if (list_empty(&dent_inode->i_dentry)) {
313 /*
314 * Directory without a 'disconnected' dentry; we need to do
315 * d_instantiate() by hand because it takes dcache_lock which
316 * we already hold.
317 */
318 list_add(&real_dent->d_alias, &dent_inode->i_dentry);
319 real_dent->d_inode = dent_inode;
320 spin_unlock(&dcache_lock);
321 security_d_instantiate(real_dent, dent_inode);
322 ntfs_debug("Done. (Already had negative directory dentry.)");
323 return real_dent;
324 }
325 /*
326 * Directory with a 'disconnected' dentry; get a reference to the
327 * 'disconnected' dentry.
328 */
329 new_dent = list_entry(dent_inode->i_dentry.next, struct dentry,
330 d_alias);
331 dget_locked(new_dent);
332 spin_unlock(&dcache_lock);
333 /* Do security vodoo. */
334 security_d_instantiate(real_dent, dent_inode);
335 /* Move new_dent in place of real_dent. */
336 d_move(new_dent, real_dent);
337 /* Balance the ntfs_iget() we did above. */
338 iput(dent_inode);
339 /* Throw away real_dent. */
340 dput(real_dent);
341 /* Use new_dent as the actual dentry. */
342 ntfs_debug("Done. (Already had negative, disconnected directory "
343 "dentry.)");
344 return new_dent;
345 260
346eio_err_out: 261eio_err_out:
347 ntfs_error(vol->sb, "Illegal file name attribute. Run chkdsk."); 262 ntfs_error(vol->sb, "Illegal file name attribute. Run chkdsk.");
diff --git a/fs/ntfs/usnjrnl.h b/fs/ntfs/usnjrnl.h
index 3a8af75351e8..4087fbdac327 100644
--- a/fs/ntfs/usnjrnl.h
+++ b/fs/ntfs/usnjrnl.h
@@ -113,7 +113,7 @@ typedef struct {
113 * Reason flags (32-bit). Cumulative flags describing the change(s) to the 113 * Reason flags (32-bit). Cumulative flags describing the change(s) to the
114 * file since it was last opened. I think the names speak for themselves but 114 * file since it was last opened. I think the names speak for themselves but
115 * if you disagree check out the descriptions in the Linux NTFS project NTFS 115 * if you disagree check out the descriptions in the Linux NTFS project NTFS
116 * documentation: http://linux-ntfs.sourceforge.net/ntfs/files/usnjrnl.html 116 * documentation: http://www.linux-ntfs.org/
117 */ 117 */
118enum { 118enum {
119 USN_REASON_DATA_OVERWRITE = const_cpu_to_le32(0x00000001), 119 USN_REASON_DATA_OVERWRITE = const_cpu_to_le32(0x00000001),
@@ -145,7 +145,7 @@ typedef le32 USN_REASON_FLAGS;
145 * Source info flags (32-bit). Information about the source of the change(s) 145 * Source info flags (32-bit). Information about the source of the change(s)
146 * to the file. For detailed descriptions of what these mean, see the Linux 146 * to the file. For detailed descriptions of what these mean, see the Linux
147 * NTFS project NTFS documentation: 147 * NTFS project NTFS documentation:
148 * http://linux-ntfs.sourceforge.net/ntfs/files/usnjrnl.html 148 * http://www.linux-ntfs.org/
149 */ 149 */
150enum { 150enum {
151 USN_SOURCE_DATA_MANAGEMENT = const_cpu_to_le32(0x00000001), 151 USN_SOURCE_DATA_MANAGEMENT = const_cpu_to_le32(0x00000001),
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 1db080135c6d..506c24fb5078 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1073,12 +1073,15 @@ static void ocfs2_write_failure(struct inode *inode,
1073 for(i = 0; i < wc->w_num_pages; i++) { 1073 for(i = 0; i < wc->w_num_pages; i++) {
1074 tmppage = wc->w_pages[i]; 1074 tmppage = wc->w_pages[i];
1075 1075
1076 if (ocfs2_should_order_data(inode)) 1076 if (page_has_buffers(tmppage)) {
1077 walk_page_buffers(wc->w_handle, page_buffers(tmppage), 1077 if (ocfs2_should_order_data(inode))
1078 from, to, NULL, 1078 walk_page_buffers(wc->w_handle,
1079 ocfs2_journal_dirty_data); 1079 page_buffers(tmppage),
1080 1080 from, to, NULL,
1081 block_commit_write(tmppage, from, to); 1081 ocfs2_journal_dirty_data);
1082
1083 block_commit_write(tmppage, from, to);
1084 }
1082 } 1085 }
1083} 1086}
1084 1087
@@ -1901,12 +1904,14 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
1901 to = PAGE_CACHE_SIZE; 1904 to = PAGE_CACHE_SIZE;
1902 } 1905 }
1903 1906
1904 if (ocfs2_should_order_data(inode)) 1907 if (page_has_buffers(tmppage)) {
1905 walk_page_buffers(wc->w_handle, page_buffers(tmppage), 1908 if (ocfs2_should_order_data(inode))
1906 from, to, NULL, 1909 walk_page_buffers(wc->w_handle,
1907 ocfs2_journal_dirty_data); 1910 page_buffers(tmppage),
1908 1911 from, to, NULL,
1909 block_commit_write(tmppage, from, to); 1912 ocfs2_journal_dirty_data);
1913 block_commit_write(tmppage, from, to);
1914 }
1910 } 1915 }
1911 1916
1912out_write_size: 1917out_write_size:
diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c
index d8bfa0eb41b2..52276c02f710 100644
--- a/fs/ocfs2/cluster/netdebug.c
+++ b/fs/ocfs2/cluster/netdebug.c
@@ -138,20 +138,20 @@ static int nst_seq_show(struct seq_file *seq, void *v)
138 " message id: %d\n" 138 " message id: %d\n"
139 " message type: %u\n" 139 " message type: %u\n"
140 " message key: 0x%08x\n" 140 " message key: 0x%08x\n"
141 " sock acquiry: %lu.%lu\n" 141 " sock acquiry: %lu.%ld\n"
142 " send start: %lu.%lu\n" 142 " send start: %lu.%ld\n"
143 " wait start: %lu.%lu\n", 143 " wait start: %lu.%ld\n",
144 nst, (unsigned long)nst->st_task->pid, 144 nst, (unsigned long)nst->st_task->pid,
145 (unsigned long)nst->st_task->tgid, 145 (unsigned long)nst->st_task->tgid,
146 nst->st_task->comm, nst->st_node, 146 nst->st_task->comm, nst->st_node,
147 nst->st_sc, nst->st_id, nst->st_msg_type, 147 nst->st_sc, nst->st_id, nst->st_msg_type,
148 nst->st_msg_key, 148 nst->st_msg_key,
149 nst->st_sock_time.tv_sec, 149 nst->st_sock_time.tv_sec,
150 (unsigned long)nst->st_sock_time.tv_usec, 150 (long)nst->st_sock_time.tv_usec,
151 nst->st_send_time.tv_sec, 151 nst->st_send_time.tv_sec,
152 (unsigned long)nst->st_send_time.tv_usec, 152 (long)nst->st_send_time.tv_usec,
153 nst->st_status_time.tv_sec, 153 nst->st_status_time.tv_sec,
154 nst->st_status_time.tv_usec); 154 (long)nst->st_status_time.tv_usec);
155 } 155 }
156 156
157 spin_unlock(&o2net_debug_lock); 157 spin_unlock(&o2net_debug_lock);
@@ -276,7 +276,7 @@ static void *sc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
276 return sc; /* unused, just needs to be null when done */ 276 return sc; /* unused, just needs to be null when done */
277} 277}
278 278
279#define TV_SEC_USEC(TV) TV.tv_sec, (unsigned long)TV.tv_usec 279#define TV_SEC_USEC(TV) TV.tv_sec, (long)TV.tv_usec
280 280
281static int sc_seq_show(struct seq_file *seq, void *v) 281static int sc_seq_show(struct seq_file *seq, void *v)
282{ 282{
@@ -309,12 +309,12 @@ static int sc_seq_show(struct seq_file *seq, void *v)
309 " remote node: %s\n" 309 " remote node: %s\n"
310 " page off: %zu\n" 310 " page off: %zu\n"
311 " handshake ok: %u\n" 311 " handshake ok: %u\n"
312 " timer: %lu.%lu\n" 312 " timer: %lu.%ld\n"
313 " data ready: %lu.%lu\n" 313 " data ready: %lu.%ld\n"
314 " advance start: %lu.%lu\n" 314 " advance start: %lu.%ld\n"
315 " advance stop: %lu.%lu\n" 315 " advance stop: %lu.%ld\n"
316 " func start: %lu.%lu\n" 316 " func start: %lu.%ld\n"
317 " func stop: %lu.%lu\n" 317 " func stop: %lu.%ld\n"
318 " func key: %u\n" 318 " func key: %u\n"
319 " func type: %u\n", 319 " func type: %u\n",
320 sc, 320 sc,
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index a27d61581bd6..2bcf706d9dd3 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -143,8 +143,8 @@ static void o2net_sc_postpone_idle(struct o2net_sock_container *sc);
143static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc); 143static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc);
144 144
145#ifdef CONFIG_DEBUG_FS 145#ifdef CONFIG_DEBUG_FS
146void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, 146static void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype,
147 u32 msgkey, struct task_struct *task, u8 node) 147 u32 msgkey, struct task_struct *task, u8 node)
148{ 148{
149 INIT_LIST_HEAD(&nst->st_net_debug_item); 149 INIT_LIST_HEAD(&nst->st_net_debug_item);
150 nst->st_task = task; 150 nst->st_task = task;
@@ -153,31 +153,61 @@ void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype,
153 nst->st_node = node; 153 nst->st_node = node;
154} 154}
155 155
156void o2net_set_nst_sock_time(struct o2net_send_tracking *nst) 156static void o2net_set_nst_sock_time(struct o2net_send_tracking *nst)
157{ 157{
158 do_gettimeofday(&nst->st_sock_time); 158 do_gettimeofday(&nst->st_sock_time);
159} 159}
160 160
161void o2net_set_nst_send_time(struct o2net_send_tracking *nst) 161static void o2net_set_nst_send_time(struct o2net_send_tracking *nst)
162{ 162{
163 do_gettimeofday(&nst->st_send_time); 163 do_gettimeofday(&nst->st_send_time);
164} 164}
165 165
166void o2net_set_nst_status_time(struct o2net_send_tracking *nst) 166static void o2net_set_nst_status_time(struct o2net_send_tracking *nst)
167{ 167{
168 do_gettimeofday(&nst->st_status_time); 168 do_gettimeofday(&nst->st_status_time);
169} 169}
170 170
171void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, 171static void o2net_set_nst_sock_container(struct o2net_send_tracking *nst,
172 struct o2net_sock_container *sc) 172 struct o2net_sock_container *sc)
173{ 173{
174 nst->st_sc = sc; 174 nst->st_sc = sc;
175} 175}
176 176
177void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id) 177static void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id)
178{ 178{
179 nst->st_id = msg_id; 179 nst->st_id = msg_id;
180} 180}
181
182#else /* CONFIG_DEBUG_FS */
183
184static inline void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype,
185 u32 msgkey, struct task_struct *task, u8 node)
186{
187}
188
189static inline void o2net_set_nst_sock_time(struct o2net_send_tracking *nst)
190{
191}
192
193static inline void o2net_set_nst_send_time(struct o2net_send_tracking *nst)
194{
195}
196
197static inline void o2net_set_nst_status_time(struct o2net_send_tracking *nst)
198{
199}
200
201static inline void o2net_set_nst_sock_container(struct o2net_send_tracking *nst,
202 struct o2net_sock_container *sc)
203{
204}
205
206static inline void o2net_set_nst_msg_id(struct o2net_send_tracking *nst,
207 u32 msg_id)
208{
209}
210
181#endif /* CONFIG_DEBUG_FS */ 211#endif /* CONFIG_DEBUG_FS */
182 212
183static inline int o2net_reconnect_delay(void) 213static inline int o2net_reconnect_delay(void)
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index 18307ff81b77..8d58cfe410b1 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -224,42 +224,10 @@ struct o2net_send_tracking {
224 struct timeval st_send_time; 224 struct timeval st_send_time;
225 struct timeval st_status_time; 225 struct timeval st_status_time;
226}; 226};
227
228void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype,
229 u32 msgkey, struct task_struct *task, u8 node);
230void o2net_set_nst_sock_time(struct o2net_send_tracking *nst);
231void o2net_set_nst_send_time(struct o2net_send_tracking *nst);
232void o2net_set_nst_status_time(struct o2net_send_tracking *nst);
233void o2net_set_nst_sock_container(struct o2net_send_tracking *nst,
234 struct o2net_sock_container *sc);
235void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id);
236
237#else 227#else
238struct o2net_send_tracking { 228struct o2net_send_tracking {
239 u32 dummy; 229 u32 dummy;
240}; 230};
241
242static inline void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype,
243 u32 msgkey, struct task_struct *task, u8 node)
244{
245}
246static inline void o2net_set_nst_sock_time(struct o2net_send_tracking *nst)
247{
248}
249static inline void o2net_set_nst_send_time(struct o2net_send_tracking *nst)
250{
251}
252static inline void o2net_set_nst_status_time(struct o2net_send_tracking *nst)
253{
254}
255static inline void o2net_set_nst_sock_container(struct o2net_send_tracking *nst,
256 struct o2net_sock_container *sc)
257{
258}
259static inline void o2net_set_nst_msg_id(struct o2net_send_tracking *nst,
260 u32 msg_id)
261{
262}
263#endif /* CONFIG_DEBUG_FS */ 231#endif /* CONFIG_DEBUG_FS */
264 232
265#endif /* O2CLUSTER_TCP_INTERNAL_H */ 233#endif /* O2CLUSTER_TCP_INTERNAL_H */
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 8a1875848080..9cce563fd627 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -1300,7 +1300,6 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
1300 di->i_size = cpu_to_le64(sb->s_blocksize); 1300 di->i_size = cpu_to_le64(sb->s_blocksize);
1301 di->i_ctime = di->i_mtime = cpu_to_le64(dir->i_ctime.tv_sec); 1301 di->i_ctime = di->i_mtime = cpu_to_le64(dir->i_ctime.tv_sec);
1302 di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(dir->i_ctime.tv_nsec); 1302 di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(dir->i_ctime.tv_nsec);
1303 dir->i_blocks = ocfs2_inode_sector_count(dir);
1304 1303
1305 /* 1304 /*
1306 * This should never fail as our extent list is empty and all 1305 * This should never fail as our extent list is empty and all
@@ -1310,9 +1309,15 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
1310 NULL); 1309 NULL);
1311 if (ret) { 1310 if (ret) {
1312 mlog_errno(ret); 1311 mlog_errno(ret);
1313 goto out; 1312 goto out_commit;
1314 } 1313 }
1315 1314
1315 /*
1316 * Set i_blocks after the extent insert for the most up to
1317 * date ip_clusters value.
1318 */
1319 dir->i_blocks = ocfs2_inode_sector_count(dir);
1320
1316 ret = ocfs2_journal_dirty(handle, di_bh); 1321 ret = ocfs2_journal_dirty(handle, di_bh);
1317 if (ret) { 1322 if (ret) {
1318 mlog_errno(ret); 1323 mlog_errno(ret);
@@ -1336,7 +1341,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
1336 len, 0, NULL); 1341 len, 0, NULL);
1337 if (ret) { 1342 if (ret) {
1338 mlog_errno(ret); 1343 mlog_errno(ret);
1339 goto out; 1344 goto out_commit;
1340 } 1345 }
1341 } 1346 }
1342 1347
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index be2dd95d3a1d..ec2ed15c3daa 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1766,8 +1766,8 @@ out_inode_unlock:
1766out_rw_unlock: 1766out_rw_unlock:
1767 ocfs2_rw_unlock(inode, 1); 1767 ocfs2_rw_unlock(inode, 1);
1768 1768
1769 mutex_unlock(&inode->i_mutex);
1770out: 1769out:
1770 mutex_unlock(&inode->i_mutex);
1771 return ret; 1771 return ret;
1772} 1772}
1773 1773
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index a8c19cb3cfdd..c47bc2a809c2 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -57,7 +57,7 @@ static int __ocfs2_recovery_thread(void *arg);
57static int ocfs2_commit_cache(struct ocfs2_super *osb); 57static int ocfs2_commit_cache(struct ocfs2_super *osb);
58static int ocfs2_wait_on_mount(struct ocfs2_super *osb); 58static int ocfs2_wait_on_mount(struct ocfs2_super *osb);
59static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb, 59static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
60 int dirty); 60 int dirty, int replayed);
61static int ocfs2_trylock_journal(struct ocfs2_super *osb, 61static int ocfs2_trylock_journal(struct ocfs2_super *osb,
62 int slot_num); 62 int slot_num);
63static int ocfs2_recover_orphans(struct ocfs2_super *osb, 63static int ocfs2_recover_orphans(struct ocfs2_super *osb,
@@ -562,8 +562,18 @@ done:
562 return status; 562 return status;
563} 563}
564 564
565static void ocfs2_bump_recovery_generation(struct ocfs2_dinode *di)
566{
567 le32_add_cpu(&(di->id1.journal1.ij_recovery_generation), 1);
568}
569
570static u32 ocfs2_get_recovery_generation(struct ocfs2_dinode *di)
571{
572 return le32_to_cpu(di->id1.journal1.ij_recovery_generation);
573}
574
565static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb, 575static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
566 int dirty) 576 int dirty, int replayed)
567{ 577{
568 int status; 578 int status;
569 unsigned int flags; 579 unsigned int flags;
@@ -593,6 +603,9 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
593 flags &= ~OCFS2_JOURNAL_DIRTY_FL; 603 flags &= ~OCFS2_JOURNAL_DIRTY_FL;
594 fe->id1.journal1.ij_flags = cpu_to_le32(flags); 604 fe->id1.journal1.ij_flags = cpu_to_le32(flags);
595 605
606 if (replayed)
607 ocfs2_bump_recovery_generation(fe);
608
596 status = ocfs2_write_block(osb, bh, journal->j_inode); 609 status = ocfs2_write_block(osb, bh, journal->j_inode);
597 if (status < 0) 610 if (status < 0)
598 mlog_errno(status); 611 mlog_errno(status);
@@ -667,7 +680,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
667 * Do not toggle if flush was unsuccessful otherwise 680 * Do not toggle if flush was unsuccessful otherwise
668 * will leave dirty metadata in a "clean" journal 681 * will leave dirty metadata in a "clean" journal
669 */ 682 */
670 status = ocfs2_journal_toggle_dirty(osb, 0); 683 status = ocfs2_journal_toggle_dirty(osb, 0, 0);
671 if (status < 0) 684 if (status < 0)
672 mlog_errno(status); 685 mlog_errno(status);
673 } 686 }
@@ -710,7 +723,7 @@ static void ocfs2_clear_journal_error(struct super_block *sb,
710 } 723 }
711} 724}
712 725
713int ocfs2_journal_load(struct ocfs2_journal *journal, int local) 726int ocfs2_journal_load(struct ocfs2_journal *journal, int local, int replayed)
714{ 727{
715 int status = 0; 728 int status = 0;
716 struct ocfs2_super *osb; 729 struct ocfs2_super *osb;
@@ -729,7 +742,7 @@ int ocfs2_journal_load(struct ocfs2_journal *journal, int local)
729 742
730 ocfs2_clear_journal_error(osb->sb, journal->j_journal, osb->slot_num); 743 ocfs2_clear_journal_error(osb->sb, journal->j_journal, osb->slot_num);
731 744
732 status = ocfs2_journal_toggle_dirty(osb, 1); 745 status = ocfs2_journal_toggle_dirty(osb, 1, replayed);
733 if (status < 0) { 746 if (status < 0) {
734 mlog_errno(status); 747 mlog_errno(status);
735 goto done; 748 goto done;
@@ -771,7 +784,7 @@ int ocfs2_journal_wipe(struct ocfs2_journal *journal, int full)
771 goto bail; 784 goto bail;
772 } 785 }
773 786
774 status = ocfs2_journal_toggle_dirty(journal->j_osb, 0); 787 status = ocfs2_journal_toggle_dirty(journal->j_osb, 0, 0);
775 if (status < 0) 788 if (status < 0)
776 mlog_errno(status); 789 mlog_errno(status);
777 790
@@ -1034,6 +1047,12 @@ restart:
1034 spin_unlock(&osb->osb_lock); 1047 spin_unlock(&osb->osb_lock);
1035 mlog(0, "All nodes recovered\n"); 1048 mlog(0, "All nodes recovered\n");
1036 1049
1050 /* Refresh all journal recovery generations from disk */
1051 status = ocfs2_check_journals_nolocks(osb);
1052 status = (status == -EROFS) ? 0 : status;
1053 if (status < 0)
1054 mlog_errno(status);
1055
1037 ocfs2_super_unlock(osb, 1); 1056 ocfs2_super_unlock(osb, 1);
1038 1057
1039 /* We always run recovery on our own orphan dir - the dead 1058 /* We always run recovery on our own orphan dir - the dead
@@ -1096,6 +1115,42 @@ out:
1096 mlog_exit_void(); 1115 mlog_exit_void();
1097} 1116}
1098 1117
1118static int ocfs2_read_journal_inode(struct ocfs2_super *osb,
1119 int slot_num,
1120 struct buffer_head **bh,
1121 struct inode **ret_inode)
1122{
1123 int status = -EACCES;
1124 struct inode *inode = NULL;
1125
1126 BUG_ON(slot_num >= osb->max_slots);
1127
1128 inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
1129 slot_num);
1130 if (!inode || is_bad_inode(inode)) {
1131 mlog_errno(status);
1132 goto bail;
1133 }
1134 SET_INODE_JOURNAL(inode);
1135
1136 status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, bh, 0, inode);
1137 if (status < 0) {
1138 mlog_errno(status);
1139 goto bail;
1140 }
1141
1142 status = 0;
1143
1144bail:
1145 if (inode) {
1146 if (status || !ret_inode)
1147 iput(inode);
1148 else
1149 *ret_inode = inode;
1150 }
1151 return status;
1152}
1153
1099/* Does the actual journal replay and marks the journal inode as 1154/* Does the actual journal replay and marks the journal inode as
1100 * clean. Will only replay if the journal inode is marked dirty. */ 1155 * clean. Will only replay if the journal inode is marked dirty. */
1101static int ocfs2_replay_journal(struct ocfs2_super *osb, 1156static int ocfs2_replay_journal(struct ocfs2_super *osb,
@@ -1109,22 +1164,36 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
1109 struct ocfs2_dinode *fe; 1164 struct ocfs2_dinode *fe;
1110 journal_t *journal = NULL; 1165 journal_t *journal = NULL;
1111 struct buffer_head *bh = NULL; 1166 struct buffer_head *bh = NULL;
1167 u32 slot_reco_gen;
1112 1168
1113 inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE, 1169 status = ocfs2_read_journal_inode(osb, slot_num, &bh, &inode);
1114 slot_num); 1170 if (status) {
1115 if (inode == NULL) {
1116 status = -EACCES;
1117 mlog_errno(status); 1171 mlog_errno(status);
1118 goto done; 1172 goto done;
1119 } 1173 }
1120 if (is_bad_inode(inode)) { 1174
1121 status = -EACCES; 1175 fe = (struct ocfs2_dinode *)bh->b_data;
1122 iput(inode); 1176 slot_reco_gen = ocfs2_get_recovery_generation(fe);
1123 inode = NULL; 1177 brelse(bh);
1124 mlog_errno(status); 1178 bh = NULL;
1179
1180 /*
1181 * As the fs recovery is asynchronous, there is a small chance that
1182 * another node mounted (and recovered) the slot before the recovery
1183 * thread could get the lock. To handle that, we dirty read the journal
1184 * inode for that slot to get the recovery generation. If it is
1185 * different than what we expected, the slot has been recovered.
1186 * If not, it needs recovery.
1187 */
1188 if (osb->slot_recovery_generations[slot_num] != slot_reco_gen) {
1189 mlog(0, "Slot %u already recovered (old/new=%u/%u)\n", slot_num,
1190 osb->slot_recovery_generations[slot_num], slot_reco_gen);
1191 osb->slot_recovery_generations[slot_num] = slot_reco_gen;
1192 status = -EBUSY;
1125 goto done; 1193 goto done;
1126 } 1194 }
1127 SET_INODE_JOURNAL(inode); 1195
1196 /* Continue with recovery as the journal has not yet been recovered */
1128 1197
1129 status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY); 1198 status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY);
1130 if (status < 0) { 1199 if (status < 0) {
@@ -1138,9 +1207,12 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
1138 fe = (struct ocfs2_dinode *) bh->b_data; 1207 fe = (struct ocfs2_dinode *) bh->b_data;
1139 1208
1140 flags = le32_to_cpu(fe->id1.journal1.ij_flags); 1209 flags = le32_to_cpu(fe->id1.journal1.ij_flags);
1210 slot_reco_gen = ocfs2_get_recovery_generation(fe);
1141 1211
1142 if (!(flags & OCFS2_JOURNAL_DIRTY_FL)) { 1212 if (!(flags & OCFS2_JOURNAL_DIRTY_FL)) {
1143 mlog(0, "No recovery required for node %d\n", node_num); 1213 mlog(0, "No recovery required for node %d\n", node_num);
1214 /* Refresh recovery generation for the slot */
1215 osb->slot_recovery_generations[slot_num] = slot_reco_gen;
1144 goto done; 1216 goto done;
1145 } 1217 }
1146 1218
@@ -1188,6 +1260,11 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
1188 flags &= ~OCFS2_JOURNAL_DIRTY_FL; 1260 flags &= ~OCFS2_JOURNAL_DIRTY_FL;
1189 fe->id1.journal1.ij_flags = cpu_to_le32(flags); 1261 fe->id1.journal1.ij_flags = cpu_to_le32(flags);
1190 1262
1263 /* Increment recovery generation to indicate successful recovery */
1264 ocfs2_bump_recovery_generation(fe);
1265 osb->slot_recovery_generations[slot_num] =
1266 ocfs2_get_recovery_generation(fe);
1267
1191 status = ocfs2_write_block(osb, bh, inode); 1268 status = ocfs2_write_block(osb, bh, inode);
1192 if (status < 0) 1269 if (status < 0)
1193 mlog_errno(status); 1270 mlog_errno(status);
@@ -1252,6 +1329,13 @@ static int ocfs2_recover_node(struct ocfs2_super *osb,
1252 1329
1253 status = ocfs2_replay_journal(osb, node_num, slot_num); 1330 status = ocfs2_replay_journal(osb, node_num, slot_num);
1254 if (status < 0) { 1331 if (status < 0) {
1332 if (status == -EBUSY) {
1333 mlog(0, "Skipping recovery for slot %u (node %u) "
1334 "as another node has recovered it\n", slot_num,
1335 node_num);
1336 status = 0;
1337 goto done;
1338 }
1255 mlog_errno(status); 1339 mlog_errno(status);
1256 goto done; 1340 goto done;
1257 } 1341 }
@@ -1334,21 +1418,46 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb)
1334{ 1418{
1335 unsigned int node_num; 1419 unsigned int node_num;
1336 int status, i; 1420 int status, i;
1421 u32 gen;
1422 struct buffer_head *bh = NULL;
1423 struct ocfs2_dinode *di;
1337 1424
1338 /* This is called with the super block cluster lock, so we 1425 /* This is called with the super block cluster lock, so we
1339 * know that the slot map can't change underneath us. */ 1426 * know that the slot map can't change underneath us. */
1340 1427
1341 spin_lock(&osb->osb_lock);
1342 for (i = 0; i < osb->max_slots; i++) { 1428 for (i = 0; i < osb->max_slots; i++) {
1343 if (i == osb->slot_num) 1429 /* Read journal inode to get the recovery generation */
1430 status = ocfs2_read_journal_inode(osb, i, &bh, NULL);
1431 if (status) {
1432 mlog_errno(status);
1433 goto bail;
1434 }
1435 di = (struct ocfs2_dinode *)bh->b_data;
1436 gen = ocfs2_get_recovery_generation(di);
1437 brelse(bh);
1438 bh = NULL;
1439
1440 spin_lock(&osb->osb_lock);
1441 osb->slot_recovery_generations[i] = gen;
1442
1443 mlog(0, "Slot %u recovery generation is %u\n", i,
1444 osb->slot_recovery_generations[i]);
1445
1446 if (i == osb->slot_num) {
1447 spin_unlock(&osb->osb_lock);
1344 continue; 1448 continue;
1449 }
1345 1450
1346 status = ocfs2_slot_to_node_num_locked(osb, i, &node_num); 1451 status = ocfs2_slot_to_node_num_locked(osb, i, &node_num);
1347 if (status == -ENOENT) 1452 if (status == -ENOENT) {
1453 spin_unlock(&osb->osb_lock);
1348 continue; 1454 continue;
1455 }
1349 1456
1350 if (__ocfs2_recovery_map_test(osb, node_num)) 1457 if (__ocfs2_recovery_map_test(osb, node_num)) {
1458 spin_unlock(&osb->osb_lock);
1351 continue; 1459 continue;
1460 }
1352 spin_unlock(&osb->osb_lock); 1461 spin_unlock(&osb->osb_lock);
1353 1462
1354 /* Ok, we have a slot occupied by another node which 1463 /* Ok, we have a slot occupied by another node which
@@ -1364,10 +1473,7 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb)
1364 mlog_errno(status); 1473 mlog_errno(status);
1365 goto bail; 1474 goto bail;
1366 } 1475 }
1367
1368 spin_lock(&osb->osb_lock);
1369 } 1476 }
1370 spin_unlock(&osb->osb_lock);
1371 1477
1372 status = 0; 1478 status = 0;
1373bail: 1479bail:
@@ -1603,49 +1709,41 @@ static int ocfs2_commit_thread(void *arg)
1603 return 0; 1709 return 0;
1604} 1710}
1605 1711
1606/* Look for a dirty journal without taking any cluster locks. Used for 1712/* Reads all the journal inodes without taking any cluster locks. Used
1607 * hard readonly access to determine whether the file system journals 1713 * for hard readonly access to determine whether any journal requires
1608 * require recovery. */ 1714 * recovery. Also used to refresh the recovery generation numbers after
1715 * a journal has been recovered by another node.
1716 */
1609int ocfs2_check_journals_nolocks(struct ocfs2_super *osb) 1717int ocfs2_check_journals_nolocks(struct ocfs2_super *osb)
1610{ 1718{
1611 int ret = 0; 1719 int ret = 0;
1612 unsigned int slot; 1720 unsigned int slot;
1613 struct buffer_head *di_bh; 1721 struct buffer_head *di_bh = NULL;
1614 struct ocfs2_dinode *di; 1722 struct ocfs2_dinode *di;
1615 struct inode *journal = NULL; 1723 int journal_dirty = 0;
1616 1724
1617 for(slot = 0; slot < osb->max_slots; slot++) { 1725 for(slot = 0; slot < osb->max_slots; slot++) {
1618 journal = ocfs2_get_system_file_inode(osb, 1726 ret = ocfs2_read_journal_inode(osb, slot, &di_bh, NULL);
1619 JOURNAL_SYSTEM_INODE, 1727 if (ret) {
1620 slot);
1621 if (!journal || is_bad_inode(journal)) {
1622 ret = -EACCES;
1623 mlog_errno(ret);
1624 goto out;
1625 }
1626
1627 di_bh = NULL;
1628 ret = ocfs2_read_block(osb, OCFS2_I(journal)->ip_blkno, &di_bh,
1629 0, journal);
1630 if (ret < 0) {
1631 mlog_errno(ret); 1728 mlog_errno(ret);
1632 goto out; 1729 goto out;
1633 } 1730 }
1634 1731
1635 di = (struct ocfs2_dinode *) di_bh->b_data; 1732 di = (struct ocfs2_dinode *) di_bh->b_data;
1636 1733
1734 osb->slot_recovery_generations[slot] =
1735 ocfs2_get_recovery_generation(di);
1736
1637 if (le32_to_cpu(di->id1.journal1.ij_flags) & 1737 if (le32_to_cpu(di->id1.journal1.ij_flags) &
1638 OCFS2_JOURNAL_DIRTY_FL) 1738 OCFS2_JOURNAL_DIRTY_FL)
1639 ret = -EROFS; 1739 journal_dirty = 1;
1640 1740
1641 brelse(di_bh); 1741 brelse(di_bh);
1642 if (ret) 1742 di_bh = NULL;
1643 break;
1644 } 1743 }
1645 1744
1646out: 1745out:
1647 if (journal) 1746 if (journal_dirty)
1648 iput(journal); 1747 ret = -EROFS;
1649
1650 return ret; 1748 return ret;
1651} 1749}
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index db82be2532ed..2178ebffa05f 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -161,7 +161,8 @@ int ocfs2_journal_init(struct ocfs2_journal *journal,
161void ocfs2_journal_shutdown(struct ocfs2_super *osb); 161void ocfs2_journal_shutdown(struct ocfs2_super *osb);
162int ocfs2_journal_wipe(struct ocfs2_journal *journal, 162int ocfs2_journal_wipe(struct ocfs2_journal *journal,
163 int full); 163 int full);
164int ocfs2_journal_load(struct ocfs2_journal *journal, int local); 164int ocfs2_journal_load(struct ocfs2_journal *journal, int local,
165 int replayed);
165int ocfs2_check_journals_nolocks(struct ocfs2_super *osb); 166int ocfs2_check_journals_nolocks(struct ocfs2_super *osb);
166void ocfs2_recovery_thread(struct ocfs2_super *osb, 167void ocfs2_recovery_thread(struct ocfs2_super *osb,
167 int node_num); 168 int node_num);
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 1cb814be8ef1..7f625f2b1117 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -204,6 +204,8 @@ struct ocfs2_super
204 204
205 struct ocfs2_slot_info *slot_info; 205 struct ocfs2_slot_info *slot_info;
206 206
207 u32 *slot_recovery_generations;
208
207 spinlock_t node_map_lock; 209 spinlock_t node_map_lock;
208 210
209 u64 root_blkno; 211 u64 root_blkno;
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 3f1945177629..4f619850ccf7 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -660,7 +660,10 @@ struct ocfs2_dinode {
660 struct { /* Info for journal system 660 struct { /* Info for journal system
661 inodes */ 661 inodes */
662 __le32 ij_flags; /* Mounted, version, etc. */ 662 __le32 ij_flags; /* Mounted, version, etc. */
663 __le32 ij_pad; 663 __le32 ij_recovery_generation; /* Incremented when the
664 journal is recovered
665 after an unclean
666 shutdown */
664 } journal1; 667 } journal1;
665 } id1; /* Inode type dependant 1 */ 668 } id1; /* Inode type dependant 1 */
666/*C0*/ union { 669/*C0*/ union {
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
index 10e149ae5e3a..07f348b8d721 100644
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c
@@ -97,13 +97,14 @@ static int ocfs2_stack_driver_request(const char *stack_name,
97 goto out; 97 goto out;
98 } 98 }
99 99
100 /* Ok, the stack is pinned */
101 p->sp_count++;
102 active_stack = p; 100 active_stack = p;
103
104 rc = 0; 101 rc = 0;
105 102
106out: 103out:
104 /* If we found it, pin it */
105 if (!rc)
106 active_stack->sp_count++;
107
107 spin_unlock(&ocfs2_stack_lock); 108 spin_unlock(&ocfs2_stack_lock);
108 return rc; 109 return rc;
109} 110}
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 2560b33889aa..88255d3f52b4 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1442,6 +1442,15 @@ static int ocfs2_initialize_super(struct super_block *sb,
1442 } 1442 }
1443 mlog(0, "max_slots for this device: %u\n", osb->max_slots); 1443 mlog(0, "max_slots for this device: %u\n", osb->max_slots);
1444 1444
1445 osb->slot_recovery_generations =
1446 kcalloc(osb->max_slots, sizeof(*osb->slot_recovery_generations),
1447 GFP_KERNEL);
1448 if (!osb->slot_recovery_generations) {
1449 status = -ENOMEM;
1450 mlog_errno(status);
1451 goto bail;
1452 }
1453
1445 init_waitqueue_head(&osb->osb_wipe_event); 1454 init_waitqueue_head(&osb->osb_wipe_event);
1446 osb->osb_orphan_wipes = kcalloc(osb->max_slots, 1455 osb->osb_orphan_wipes = kcalloc(osb->max_slots,
1447 sizeof(*osb->osb_orphan_wipes), 1456 sizeof(*osb->osb_orphan_wipes),
@@ -1703,7 +1712,7 @@ static int ocfs2_check_volume(struct ocfs2_super *osb)
1703 local = ocfs2_mount_local(osb); 1712 local = ocfs2_mount_local(osb);
1704 1713
1705 /* will play back anything left in the journal. */ 1714 /* will play back anything left in the journal. */
1706 status = ocfs2_journal_load(osb->journal, local); 1715 status = ocfs2_journal_load(osb->journal, local, dirty);
1707 if (status < 0) { 1716 if (status < 0) {
1708 mlog(ML_ERROR, "ocfs2 journal load failed! %d\n", status); 1717 mlog(ML_ERROR, "ocfs2 journal load failed! %d\n", status);
1709 goto finally; 1718 goto finally;
@@ -1768,6 +1777,7 @@ static void ocfs2_delete_osb(struct ocfs2_super *osb)
1768 ocfs2_free_slot_info(osb); 1777 ocfs2_free_slot_info(osb);
1769 1778
1770 kfree(osb->osb_orphan_wipes); 1779 kfree(osb->osb_orphan_wipes);
1780 kfree(osb->slot_recovery_generations);
1771 /* FIXME 1781 /* FIXME
1772 * This belongs in journal shutdown, but because we have to 1782 * This belongs in journal shutdown, but because we have to
1773 * allocate osb->journal at the start of ocfs2_initalize_osb(), 1783 * allocate osb->journal at the start of ocfs2_initalize_osb(),
diff --git a/fs/omfs/bitmap.c b/fs/omfs/bitmap.c
index dc75f22be3f2..e1c0ec0ae989 100644
--- a/fs/omfs/bitmap.c
+++ b/fs/omfs/bitmap.c
@@ -71,10 +71,10 @@ static int set_run(struct super_block *sb, int map,
71 } 71 }
72 if (set) { 72 if (set) {
73 set_bit(bit, sbi->s_imap[map]); 73 set_bit(bit, sbi->s_imap[map]);
74 set_bit(bit, (long *) bh->b_data); 74 set_bit(bit, (unsigned long *)bh->b_data);
75 } else { 75 } else {
76 clear_bit(bit, sbi->s_imap[map]); 76 clear_bit(bit, sbi->s_imap[map]);
77 clear_bit(bit, (long *) bh->b_data); 77 clear_bit(bit, (unsigned long *)bh->b_data);
78 } 78 }
79 } 79 }
80 mark_buffer_dirty(bh); 80 mark_buffer_dirty(bh);
@@ -92,7 +92,7 @@ int omfs_allocate_block(struct super_block *sb, u64 block)
92 struct buffer_head *bh; 92 struct buffer_head *bh;
93 struct omfs_sb_info *sbi = OMFS_SB(sb); 93 struct omfs_sb_info *sbi = OMFS_SB(sb);
94 int bits_per_entry = 8 * sb->s_blocksize; 94 int bits_per_entry = 8 * sb->s_blocksize;
95 int map, bit; 95 unsigned int map, bit;
96 int ret = 0; 96 int ret = 0;
97 u64 tmp; 97 u64 tmp;
98 98
@@ -109,7 +109,7 @@ int omfs_allocate_block(struct super_block *sb, u64 block)
109 if (!bh) 109 if (!bh)
110 goto out; 110 goto out;
111 111
112 set_bit(bit, (long *) bh->b_data); 112 set_bit(bit, (unsigned long *)bh->b_data);
113 mark_buffer_dirty(bh); 113 mark_buffer_dirty(bh);
114 brelse(bh); 114 brelse(bh);
115 } 115 }
@@ -176,7 +176,8 @@ int omfs_clear_range(struct super_block *sb, u64 block, int count)
176 struct omfs_sb_info *sbi = OMFS_SB(sb); 176 struct omfs_sb_info *sbi = OMFS_SB(sb);
177 int bits_per_entry = 8 * sb->s_blocksize; 177 int bits_per_entry = 8 * sb->s_blocksize;
178 u64 tmp; 178 u64 tmp;
179 int map, bit, ret; 179 unsigned int map, bit;
180 int ret;
180 181
181 tmp = block; 182 tmp = block;
182 bit = do_div(tmp, bits_per_entry); 183 bit = do_div(tmp, bits_per_entry);
diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c
index 05a5bc31e4bd..c0757e998876 100644
--- a/fs/omfs/dir.c
+++ b/fs/omfs/dir.c
@@ -104,7 +104,7 @@ int omfs_make_empty(struct inode *inode, struct super_block *sb)
104 104
105 oi = (struct omfs_inode *) bh->b_data; 105 oi = (struct omfs_inode *) bh->b_data;
106 oi->i_head.h_self = cpu_to_be64(inode->i_ino); 106 oi->i_head.h_self = cpu_to_be64(inode->i_ino);
107 oi->i_sibling = ~0ULL; 107 oi->i_sibling = ~cpu_to_be64(0ULL);
108 108
109 mark_buffer_dirty(bh); 109 mark_buffer_dirty(bh);
110 brelse(bh); 110 brelse(bh);
diff --git a/fs/omfs/file.c b/fs/omfs/file.c
index 66e01fae4384..834b2331f6b3 100644
--- a/fs/omfs/file.c
+++ b/fs/omfs/file.c
@@ -26,15 +26,22 @@ static int omfs_sync_file(struct file *file, struct dentry *dentry,
26 return err ? -EIO : 0; 26 return err ? -EIO : 0;
27} 27}
28 28
29static u32 omfs_max_extents(struct omfs_sb_info *sbi, int offset)
30{
31 return (sbi->s_sys_blocksize - offset -
32 sizeof(struct omfs_extent)) /
33 sizeof(struct omfs_extent_entry) + 1;
34}
35
29void omfs_make_empty_table(struct buffer_head *bh, int offset) 36void omfs_make_empty_table(struct buffer_head *bh, int offset)
30{ 37{
31 struct omfs_extent *oe = (struct omfs_extent *) &bh->b_data[offset]; 38 struct omfs_extent *oe = (struct omfs_extent *) &bh->b_data[offset];
32 39
33 oe->e_next = ~0ULL; 40 oe->e_next = ~cpu_to_be64(0ULL);
34 oe->e_extent_count = cpu_to_be32(1), 41 oe->e_extent_count = cpu_to_be32(1),
35 oe->e_fill = cpu_to_be32(0x22), 42 oe->e_fill = cpu_to_be32(0x22),
36 oe->e_entry.e_cluster = ~0ULL; 43 oe->e_entry.e_cluster = ~cpu_to_be64(0ULL);
37 oe->e_entry.e_blocks = ~0ULL; 44 oe->e_entry.e_blocks = ~cpu_to_be64(0ULL);
38} 45}
39 46
40int omfs_shrink_inode(struct inode *inode) 47int omfs_shrink_inode(struct inode *inode)
@@ -45,6 +52,7 @@ int omfs_shrink_inode(struct inode *inode)
45 struct buffer_head *bh; 52 struct buffer_head *bh;
46 u64 next, last; 53 u64 next, last;
47 u32 extent_count; 54 u32 extent_count;
55 u32 max_extents;
48 int ret; 56 int ret;
49 57
50 /* traverse extent table, freeing each entry that is greater 58 /* traverse extent table, freeing each entry that is greater
@@ -62,15 +70,18 @@ int omfs_shrink_inode(struct inode *inode)
62 goto out; 70 goto out;
63 71
64 oe = (struct omfs_extent *)(&bh->b_data[OMFS_EXTENT_START]); 72 oe = (struct omfs_extent *)(&bh->b_data[OMFS_EXTENT_START]);
73 max_extents = omfs_max_extents(sbi, OMFS_EXTENT_START);
65 74
66 for (;;) { 75 for (;;) {
67 76
68 if (omfs_is_bad(sbi, (struct omfs_header *) bh->b_data, next)) { 77 if (omfs_is_bad(sbi, (struct omfs_header *) bh->b_data, next))
69 brelse(bh); 78 goto out_brelse;
70 goto out;
71 }
72 79
73 extent_count = be32_to_cpu(oe->e_extent_count); 80 extent_count = be32_to_cpu(oe->e_extent_count);
81
82 if (extent_count > max_extents)
83 goto out_brelse;
84
74 last = next; 85 last = next;
75 next = be64_to_cpu(oe->e_next); 86 next = be64_to_cpu(oe->e_next);
76 entry = &oe->e_entry; 87 entry = &oe->e_entry;
@@ -98,10 +109,14 @@ int omfs_shrink_inode(struct inode *inode)
98 if (!bh) 109 if (!bh)
99 goto out; 110 goto out;
100 oe = (struct omfs_extent *) (&bh->b_data[OMFS_EXTENT_CONT]); 111 oe = (struct omfs_extent *) (&bh->b_data[OMFS_EXTENT_CONT]);
112 max_extents = omfs_max_extents(sbi, OMFS_EXTENT_CONT);
101 } 113 }
102 ret = 0; 114 ret = 0;
103out: 115out:
104 return ret; 116 return ret;
117out_brelse:
118 brelse(bh);
119 return ret;
105} 120}
106 121
107static void omfs_truncate(struct inode *inode) 122static void omfs_truncate(struct inode *inode)
@@ -154,9 +169,7 @@ static int omfs_grow_extent(struct inode *inode, struct omfs_extent *oe,
154 goto out; 169 goto out;
155 } 170 }
156 } 171 }
157 max_count = (sbi->s_sys_blocksize - OMFS_EXTENT_START - 172 max_count = omfs_max_extents(sbi, OMFS_EXTENT_START);
158 sizeof(struct omfs_extent)) /
159 sizeof(struct omfs_extent_entry) + 1;
160 173
161 /* TODO: add a continuation block here */ 174 /* TODO: add a continuation block here */
162 if (be32_to_cpu(oe->e_extent_count) > max_count-1) 175 if (be32_to_cpu(oe->e_extent_count) > max_count-1)
@@ -225,6 +238,7 @@ static int omfs_get_block(struct inode *inode, sector_t block,
225 sector_t next, offset; 238 sector_t next, offset;
226 int ret; 239 int ret;
227 u64 new_block; 240 u64 new_block;
241 u32 max_extents;
228 int extent_count; 242 int extent_count;
229 struct omfs_extent *oe; 243 struct omfs_extent *oe;
230 struct omfs_extent_entry *entry; 244 struct omfs_extent_entry *entry;
@@ -238,6 +252,7 @@ static int omfs_get_block(struct inode *inode, sector_t block,
238 goto out; 252 goto out;
239 253
240 oe = (struct omfs_extent *)(&bh->b_data[OMFS_EXTENT_START]); 254 oe = (struct omfs_extent *)(&bh->b_data[OMFS_EXTENT_START]);
255 max_extents = omfs_max_extents(sbi, OMFS_EXTENT_START);
241 next = inode->i_ino; 256 next = inode->i_ino;
242 257
243 for (;;) { 258 for (;;) {
@@ -249,6 +264,9 @@ static int omfs_get_block(struct inode *inode, sector_t block,
249 next = be64_to_cpu(oe->e_next); 264 next = be64_to_cpu(oe->e_next);
250 entry = &oe->e_entry; 265 entry = &oe->e_entry;
251 266
267 if (extent_count > max_extents)
268 goto out_brelse;
269
252 offset = find_block(inode, entry, block, extent_count, &remain); 270 offset = find_block(inode, entry, block, extent_count, &remain);
253 if (offset > 0) { 271 if (offset > 0) {
254 ret = 0; 272 ret = 0;
@@ -266,6 +284,7 @@ static int omfs_get_block(struct inode *inode, sector_t block,
266 if (!bh) 284 if (!bh)
267 goto out; 285 goto out;
268 oe = (struct omfs_extent *) (&bh->b_data[OMFS_EXTENT_CONT]); 286 oe = (struct omfs_extent *) (&bh->b_data[OMFS_EXTENT_CONT]);
287 max_extents = omfs_max_extents(sbi, OMFS_EXTENT_CONT);
269 } 288 }
270 if (create) { 289 if (create) {
271 ret = omfs_grow_extent(inode, oe, &new_block); 290 ret = omfs_grow_extent(inode, oe, &new_block);
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index d865f5535436..d29047b1b9b0 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -232,8 +232,7 @@ struct inode *omfs_iget(struct super_block *sb, ino_t ino)
232 inode->i_mode = S_IFDIR | (S_IRWXUGO & ~sbi->s_dmask); 232 inode->i_mode = S_IFDIR | (S_IRWXUGO & ~sbi->s_dmask);
233 inode->i_op = &omfs_dir_inops; 233 inode->i_op = &omfs_dir_inops;
234 inode->i_fop = &omfs_dir_operations; 234 inode->i_fop = &omfs_dir_operations;
235 inode->i_size = be32_to_cpu(oi->i_head.h_body_size) + 235 inode->i_size = sbi->s_sys_blocksize;
236 sizeof(struct omfs_header);
237 inc_nlink(inode); 236 inc_nlink(inode);
238 break; 237 break;
239 case OMFS_FILE: 238 case OMFS_FILE:
@@ -492,7 +491,8 @@ static int omfs_fill_super(struct super_block *sb, void *data, int silent)
492 if (sbi->s_num_blocks != be64_to_cpu(omfs_rb->r_num_blocks)) { 491 if (sbi->s_num_blocks != be64_to_cpu(omfs_rb->r_num_blocks)) {
493 printk(KERN_ERR "omfs: block count discrepancy between " 492 printk(KERN_ERR "omfs: block count discrepancy between "
494 "super and root blocks (%llx, %llx)\n", 493 "super and root blocks (%llx, %llx)\n",
495 sbi->s_num_blocks, be64_to_cpu(omfs_rb->r_num_blocks)); 494 (unsigned long long)sbi->s_num_blocks,
495 (unsigned long long)be64_to_cpu(omfs_rb->r_num_blocks));
496 goto out_brelse_bh2; 496 goto out_brelse_bh2;
497 } 497 }
498 498
diff --git a/fs/open.c b/fs/open.c
index 52647be277a2..07da9359481c 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -963,62 +963,6 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
963} 963}
964EXPORT_SYMBOL(dentry_open); 964EXPORT_SYMBOL(dentry_open);
965 965
966/*
967 * Find an empty file descriptor entry, and mark it busy.
968 */
969int get_unused_fd_flags(int flags)
970{
971 struct files_struct * files = current->files;
972 int fd, error;
973 struct fdtable *fdt;
974
975 spin_lock(&files->file_lock);
976
977repeat:
978 fdt = files_fdtable(files);
979 fd = find_next_zero_bit(fdt->open_fds->fds_bits, fdt->max_fds,
980 files->next_fd);
981
982 /* Do we need to expand the fd array or fd set? */
983 error = expand_files(files, fd);
984 if (error < 0)
985 goto out;
986
987 if (error) {
988 /*
989 * If we needed to expand the fs array we
990 * might have blocked - try again.
991 */
992 goto repeat;
993 }
994
995 FD_SET(fd, fdt->open_fds);
996 if (flags & O_CLOEXEC)
997 FD_SET(fd, fdt->close_on_exec);
998 else
999 FD_CLR(fd, fdt->close_on_exec);
1000 files->next_fd = fd + 1;
1001#if 1
1002 /* Sanity check */
1003 if (fdt->fd[fd] != NULL) {
1004 printk(KERN_WARNING "get_unused_fd: slot %d not NULL!\n", fd);
1005 fdt->fd[fd] = NULL;
1006 }
1007#endif
1008 error = fd;
1009
1010out:
1011 spin_unlock(&files->file_lock);
1012 return error;
1013}
1014
1015int get_unused_fd(void)
1016{
1017 return get_unused_fd_flags(0);
1018}
1019
1020EXPORT_SYMBOL(get_unused_fd);
1021
1022static void __put_unused_fd(struct files_struct *files, unsigned int fd) 966static void __put_unused_fd(struct files_struct *files, unsigned int fd)
1023{ 967{
1024 struct fdtable *fdt = files_fdtable(files); 968 struct fdtable *fdt = files_fdtable(files);
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 0d6eb33597c6..71c9be59c9c2 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -337,65 +337,6 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
337 return 0; 337 return 0;
338} 338}
339 339
340/*
341 * Use precise platform statistics if available:
342 */
343#ifdef CONFIG_VIRT_CPU_ACCOUNTING
344static cputime_t task_utime(struct task_struct *p)
345{
346 return p->utime;
347}
348
349static cputime_t task_stime(struct task_struct *p)
350{
351 return p->stime;
352}
353#else
354static cputime_t task_utime(struct task_struct *p)
355{
356 clock_t utime = cputime_to_clock_t(p->utime),
357 total = utime + cputime_to_clock_t(p->stime);
358 u64 temp;
359
360 /*
361 * Use CFS's precise accounting:
362 */
363 temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime);
364
365 if (total) {
366 temp *= utime;
367 do_div(temp, total);
368 }
369 utime = (clock_t)temp;
370
371 p->prev_utime = max(p->prev_utime, clock_t_to_cputime(utime));
372 return p->prev_utime;
373}
374
375static cputime_t task_stime(struct task_struct *p)
376{
377 clock_t stime;
378
379 /*
380 * Use CFS's precise accounting. (we subtract utime from
381 * the total, to make sure the total observed by userspace
382 * grows monotonically - apps rely on that):
383 */
384 stime = nsec_to_clock_t(p->se.sum_exec_runtime) -
385 cputime_to_clock_t(task_utime(p));
386
387 if (stime >= 0)
388 p->prev_stime = max(p->prev_stime, clock_t_to_cputime(stime));
389
390 return p->prev_stime;
391}
392#endif
393
394static cputime_t task_gtime(struct task_struct *p)
395{
396 return p->gtime;
397}
398
399static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, 340static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
400 struct pid *pid, struct task_struct *task, int whole) 341 struct pid *pid, struct task_struct *task, int whole)
401{ 342{
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 01ed610f9b87..a28840b11b89 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2423,10 +2423,13 @@ static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
2423 "read_bytes: %llu\n" 2423 "read_bytes: %llu\n"
2424 "write_bytes: %llu\n" 2424 "write_bytes: %llu\n"
2425 "cancelled_write_bytes: %llu\n", 2425 "cancelled_write_bytes: %llu\n",
2426 acct.rchar, acct.wchar, 2426 (unsigned long long)acct.rchar,
2427 acct.syscr, acct.syscw, 2427 (unsigned long long)acct.wchar,
2428 acct.read_bytes, acct.write_bytes, 2428 (unsigned long long)acct.syscr,
2429 acct.cancelled_write_bytes); 2429 (unsigned long long)acct.syscw,
2430 (unsigned long long)acct.read_bytes,
2431 (unsigned long long)acct.write_bytes,
2432 (unsigned long long)acct.cancelled_write_bytes);
2430} 2433}
2431 2434
2432static int proc_tid_io_accounting(struct task_struct *task, char *buffer) 2435static int proc_tid_io_accounting(struct task_struct *task, char *buffer)
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index cb4096cc3fb7..bca0f81eb687 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -300,10 +300,10 @@ out:
300 return rtn; 300 return rtn;
301} 301}
302 302
303static DEFINE_IDR(proc_inum_idr); 303static DEFINE_IDA(proc_inum_ida);
304static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */ 304static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */
305 305
306#define PROC_DYNAMIC_FIRST 0xF0000000UL 306#define PROC_DYNAMIC_FIRST 0xF0000000U
307 307
308/* 308/*
309 * Return an inode number between PROC_DYNAMIC_FIRST and 309 * Return an inode number between PROC_DYNAMIC_FIRST and
@@ -311,36 +311,34 @@ static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */
311 */ 311 */
312static unsigned int get_inode_number(void) 312static unsigned int get_inode_number(void)
313{ 313{
314 int i, inum = 0; 314 unsigned int i;
315 int error; 315 int error;
316 316
317retry: 317retry:
318 if (idr_pre_get(&proc_inum_idr, GFP_KERNEL) == 0) 318 if (ida_pre_get(&proc_inum_ida, GFP_KERNEL) == 0)
319 return 0; 319 return 0;
320 320
321 spin_lock(&proc_inum_lock); 321 spin_lock(&proc_inum_lock);
322 error = idr_get_new(&proc_inum_idr, NULL, &i); 322 error = ida_get_new(&proc_inum_ida, &i);
323 spin_unlock(&proc_inum_lock); 323 spin_unlock(&proc_inum_lock);
324 if (error == -EAGAIN) 324 if (error == -EAGAIN)
325 goto retry; 325 goto retry;
326 else if (error) 326 else if (error)
327 return 0; 327 return 0;
328 328
329 inum = (i & MAX_ID_MASK) + PROC_DYNAMIC_FIRST; 329 if (i > UINT_MAX - PROC_DYNAMIC_FIRST) {
330 330 spin_lock(&proc_inum_lock);
331 /* inum will never be more than 0xf0ffffff, so no check 331 ida_remove(&proc_inum_ida, i);
332 * for overflow. 332 spin_unlock(&proc_inum_lock);
333 */ 333 return 0;
334 334 }
335 return inum; 335 return PROC_DYNAMIC_FIRST + i;
336} 336}
337 337
338static void release_inode_number(unsigned int inum) 338static void release_inode_number(unsigned int inum)
339{ 339{
340 int id = (inum - PROC_DYNAMIC_FIRST) | ~MAX_ID_MASK;
341
342 spin_lock(&proc_inum_lock); 340 spin_lock(&proc_inum_lock);
343 idr_remove(&proc_inum_idr, id); 341 ida_remove(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST);
344 spin_unlock(&proc_inum_lock); 342 spin_unlock(&proc_inum_lock);
345} 343}
346 344
diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
index 79ecd281d2cb..3f87d2632947 100644
--- a/fs/proc/nommu.c
+++ b/fs/proc/nommu.c
@@ -52,14 +52,14 @@ int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
52 } 52 }
53 53
54 seq_printf(m, 54 seq_printf(m,
55 "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n", 55 "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n",
56 vma->vm_start, 56 vma->vm_start,
57 vma->vm_end, 57 vma->vm_end,
58 flags & VM_READ ? 'r' : '-', 58 flags & VM_READ ? 'r' : '-',
59 flags & VM_WRITE ? 'w' : '-', 59 flags & VM_WRITE ? 'w' : '-',
60 flags & VM_EXEC ? 'x' : '-', 60 flags & VM_EXEC ? 'x' : '-',
61 flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p', 61 flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p',
62 vma->vm_pgoff << PAGE_SHIFT, 62 ((loff_t)vma->vm_pgoff) << PAGE_SHIFT,
63 MAJOR(dev), MINOR(dev), ino, &len); 63 MAJOR(dev), MINOR(dev), ino, &len);
64 64
65 if (file) { 65 if (file) {
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index ded969862960..00f10a2dcf12 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -24,6 +24,7 @@
24#include <linux/tty.h> 24#include <linux/tty.h>
25#include <linux/string.h> 25#include <linux/string.h>
26#include <linux/mman.h> 26#include <linux/mman.h>
27#include <linux/quicklist.h>
27#include <linux/proc_fs.h> 28#include <linux/proc_fs.h>
28#include <linux/ioport.h> 29#include <linux/ioport.h>
29#include <linux/mm.h> 30#include <linux/mm.h>
@@ -189,7 +190,8 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
189 "Committed_AS: %8lu kB\n" 190 "Committed_AS: %8lu kB\n"
190 "VmallocTotal: %8lu kB\n" 191 "VmallocTotal: %8lu kB\n"
191 "VmallocUsed: %8lu kB\n" 192 "VmallocUsed: %8lu kB\n"
192 "VmallocChunk: %8lu kB\n", 193 "VmallocChunk: %8lu kB\n"
194 "Quicklists: %8lu kB\n",
193 K(i.totalram), 195 K(i.totalram),
194 K(i.freeram), 196 K(i.freeram),
195 K(i.bufferram), 197 K(i.bufferram),
@@ -221,7 +223,8 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
221 K(committed), 223 K(committed),
222 (unsigned long)VMALLOC_TOTAL >> 10, 224 (unsigned long)VMALLOC_TOTAL >> 10,
223 vmi.used >> 10, 225 vmi.used >> 10,
224 vmi.largest_chunk >> 10 226 vmi.largest_chunk >> 10,
227 K(quicklist_total_size())
225 ); 228 );
226 229
227 len += hugetlb_report_meminfo(page + len); 230 len += hugetlb_report_meminfo(page + len);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 7546a918f790..73d1891ee625 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -219,14 +219,14 @@ static int show_map(struct seq_file *m, void *v)
219 ino = inode->i_ino; 219 ino = inode->i_ino;
220 } 220 }
221 221
222 seq_printf(m, "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n", 222 seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n",
223 vma->vm_start, 223 vma->vm_start,
224 vma->vm_end, 224 vma->vm_end,
225 flags & VM_READ ? 'r' : '-', 225 flags & VM_READ ? 'r' : '-',
226 flags & VM_WRITE ? 'w' : '-', 226 flags & VM_WRITE ? 'w' : '-',
227 flags & VM_EXEC ? 'x' : '-', 227 flags & VM_EXEC ? 'x' : '-',
228 flags & VM_MAYSHARE ? 's' : 'p', 228 flags & VM_MAYSHARE ? 's' : 'p',
229 vma->vm_pgoff << PAGE_SHIFT, 229 ((loff_t)vma->vm_pgoff) << PAGE_SHIFT,
230 MAJOR(dev), MINOR(dev), ino, &len); 230 MAJOR(dev), MINOR(dev), ino, &len);
231 231
232 /* 232 /*
diff --git a/fs/readdir.c b/fs/readdir.c
index 4e026e5407fb..93a7559bbfd8 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -80,8 +80,10 @@ static int fillonedir(void * __buf, const char * name, int namlen, loff_t offset
80 if (buf->result) 80 if (buf->result)
81 return -EINVAL; 81 return -EINVAL;
82 d_ino = ino; 82 d_ino = ino;
83 if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) 83 if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
84 buf->result = -EOVERFLOW;
84 return -EOVERFLOW; 85 return -EOVERFLOW;
86 }
85 buf->result++; 87 buf->result++;
86 dirent = buf->dirent; 88 dirent = buf->dirent;
87 if (!access_ok(VERIFY_WRITE, dirent, 89 if (!access_ok(VERIFY_WRITE, dirent,
@@ -155,8 +157,10 @@ static int filldir(void * __buf, const char * name, int namlen, loff_t offset,
155 if (reclen > buf->count) 157 if (reclen > buf->count)
156 return -EINVAL; 158 return -EINVAL;
157 d_ino = ino; 159 d_ino = ino;
158 if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) 160 if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
161 buf->error = -EOVERFLOW;
159 return -EOVERFLOW; 162 return -EOVERFLOW;
163 }
160 dirent = buf->previous; 164 dirent = buf->previous;
161 if (dirent) { 165 if (dirent) {
162 if (__put_user(offset, &dirent->d_off)) 166 if (__put_user(offset, &dirent->d_off))
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 192269698a8a..5699171212ae 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -2435,7 +2435,7 @@ static int reiserfs_write_full_page(struct page *page,
2435 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) { 2435 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
2436 lock_buffer(bh); 2436 lock_buffer(bh);
2437 } else { 2437 } else {
2438 if (test_set_buffer_locked(bh)) { 2438 if (!trylock_buffer(bh)) {
2439 redirty_page_for_writepage(wbc, page); 2439 redirty_page_for_writepage(wbc, page);
2440 continue; 2440 continue;
2441 } 2441 }
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index c8f60ee183b5..c21df71943a6 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -627,7 +627,7 @@ static int journal_list_still_alive(struct super_block *s,
627static void release_buffer_page(struct buffer_head *bh) 627static void release_buffer_page(struct buffer_head *bh)
628{ 628{
629 struct page *page = bh->b_page; 629 struct page *page = bh->b_page;
630 if (!page->mapping && !TestSetPageLocked(page)) { 630 if (!page->mapping && trylock_page(page)) {
631 page_cache_get(page); 631 page_cache_get(page);
632 put_bh(bh); 632 put_bh(bh);
633 if (!page->mapping) 633 if (!page->mapping)
@@ -855,7 +855,7 @@ static int write_ordered_buffers(spinlock_t * lock,
855 jh = JH_ENTRY(list->next); 855 jh = JH_ENTRY(list->next);
856 bh = jh->bh; 856 bh = jh->bh;
857 get_bh(bh); 857 get_bh(bh);
858 if (test_set_buffer_locked(bh)) { 858 if (!trylock_buffer(bh)) {
859 if (!buffer_dirty(bh)) { 859 if (!buffer_dirty(bh)) {
860 list_move(&jh->list, &tmp); 860 list_move(&jh->list, &tmp);
861 goto loop_next; 861 goto loop_next;
@@ -3871,7 +3871,7 @@ int reiserfs_prepare_for_journal(struct super_block *p_s_sb,
3871{ 3871{
3872 PROC_INFO_INC(p_s_sb, journal.prepare); 3872 PROC_INFO_INC(p_s_sb, journal.prepare);
3873 3873
3874 if (test_set_buffer_locked(bh)) { 3874 if (!trylock_buffer(bh)) {
3875 if (!wait) 3875 if (!wait)
3876 return 0; 3876 return 0;
3877 lock_buffer(bh); 3877 lock_buffer(bh);
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 879e54d35c2d..d318c7e663fa 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -27,7 +27,6 @@
27#include <linux/mnt_namespace.h> 27#include <linux/mnt_namespace.h>
28#include <linux/mount.h> 28#include <linux/mount.h>
29#include <linux/namei.h> 29#include <linux/namei.h>
30#include <linux/quotaops.h>
31 30
32struct file_system_type reiserfs_fs_type; 31struct file_system_type reiserfs_fs_type;
33 32
@@ -2076,8 +2075,8 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
2076 return err; 2075 return err;
2077 /* Quotafile not on the same filesystem? */ 2076 /* Quotafile not on the same filesystem? */
2078 if (nd.path.mnt->mnt_sb != sb) { 2077 if (nd.path.mnt->mnt_sb != sb) {
2079 path_put(&nd.path); 2078 err = -EXDEV;
2080 return -EXDEV; 2079 goto out;
2081 } 2080 }
2082 inode = nd.path.dentry->d_inode; 2081 inode = nd.path.dentry->d_inode;
2083 /* We must not pack tails for quota files on reiserfs for quota IO to work */ 2082 /* We must not pack tails for quota files on reiserfs for quota IO to work */
@@ -2087,8 +2086,8 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
2087 reiserfs_warning(sb, 2086 reiserfs_warning(sb,
2088 "reiserfs: Unpacking tail of quota file failed" 2087 "reiserfs: Unpacking tail of quota file failed"
2089 " (%d). Cannot turn on quotas.", err); 2088 " (%d). Cannot turn on quotas.", err);
2090 path_put(&nd.path); 2089 err = -EINVAL;
2091 return -EINVAL; 2090 goto out;
2092 } 2091 }
2093 mark_inode_dirty(inode); 2092 mark_inode_dirty(inode);
2094 } 2093 }
@@ -2109,13 +2108,15 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
2109 /* Just start temporary transaction and finish it */ 2108 /* Just start temporary transaction and finish it */
2110 err = journal_begin(&th, sb, 1); 2109 err = journal_begin(&th, sb, 1);
2111 if (err) 2110 if (err)
2112 return err; 2111 goto out;
2113 err = journal_end_sync(&th, sb, 1); 2112 err = journal_end_sync(&th, sb, 1);
2114 if (err) 2113 if (err)
2115 return err; 2114 goto out;
2116 } 2115 }
2116 err = vfs_quota_on_path(sb, type, format_id, &nd.path);
2117out:
2117 path_put(&nd.path); 2118 path_put(&nd.path);
2118 return vfs_quota_on(sb, type, format_id, path, 0); 2119 return err;
2119} 2120}
2120 2121
2121/* Read data from quotafile - avoid pagecache and such because we cannot afford 2122/* Read data from quotafile - avoid pagecache and such because we cannot afford
diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c
index 8e51a2aaa977..60d2f822e87b 100644
--- a/fs/romfs/inode.c
+++ b/fs/romfs/inode.c
@@ -418,7 +418,8 @@ static int
418romfs_readpage(struct file *file, struct page * page) 418romfs_readpage(struct file *file, struct page * page)
419{ 419{
420 struct inode *inode = page->mapping->host; 420 struct inode *inode = page->mapping->host;
421 loff_t offset, avail, readlen; 421 loff_t offset, size;
422 unsigned long filled;
422 void *buf; 423 void *buf;
423 int result = -EIO; 424 int result = -EIO;
424 425
@@ -430,21 +431,29 @@ romfs_readpage(struct file *file, struct page * page)
430 431
431 /* 32 bit warning -- but not for us :) */ 432 /* 32 bit warning -- but not for us :) */
432 offset = page_offset(page); 433 offset = page_offset(page);
433 if (offset < i_size_read(inode)) { 434 size = i_size_read(inode);
434 avail = inode->i_size-offset; 435 filled = 0;
435 readlen = min_t(unsigned long, avail, PAGE_SIZE); 436 result = 0;
436 if (romfs_copyfrom(inode, buf, ROMFS_I(inode)->i_dataoffset+offset, readlen) == readlen) { 437 if (offset < size) {
437 if (readlen < PAGE_SIZE) { 438 unsigned long readlen;
438 memset(buf + readlen,0,PAGE_SIZE-readlen); 439
439 } 440 size -= offset;
440 SetPageUptodate(page); 441 readlen = size > PAGE_SIZE ? PAGE_SIZE : size;
441 result = 0; 442
443 filled = romfs_copyfrom(inode, buf, ROMFS_I(inode)->i_dataoffset+offset, readlen);
444
445 if (filled != readlen) {
446 SetPageError(page);
447 filled = 0;
448 result = -EIO;
442 } 449 }
443 } 450 }
444 if (result) { 451
445 memset(buf, 0, PAGE_SIZE); 452 if (filled < PAGE_SIZE)
446 SetPageError(page); 453 memset(buf + filled, 0, PAGE_SIZE-filled);
447 } 454
455 if (!result)
456 SetPageUptodate(page);
448 flush_dcache_page(page); 457 flush_dcache_page(page);
449 458
450 unlock_page(page); 459 unlock_page(page);
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 3f54dbd6c49b..bd20f7f5a933 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -108,9 +108,9 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
108 goto Done; 108 goto Done;
109 } 109 }
110 /* we need at least one record in buffer */ 110 /* we need at least one record in buffer */
111 pos = m->index;
112 p = m->op->start(m, &pos);
111 while (1) { 113 while (1) {
112 pos = m->index;
113 p = m->op->start(m, &pos);
114 err = PTR_ERR(p); 114 err = PTR_ERR(p);
115 if (!p || IS_ERR(p)) 115 if (!p || IS_ERR(p))
116 break; 116 break;
@@ -119,6 +119,11 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
119 break; 119 break;
120 if (unlikely(err)) 120 if (unlikely(err))
121 m->count = 0; 121 m->count = 0;
122 if (unlikely(!m->count)) {
123 p = m->op->next(m, p, &pos);
124 m->index = pos;
125 continue;
126 }
122 if (m->count < m->size) 127 if (m->count < m->size)
123 goto Fill; 128 goto Fill;
124 m->op->stop(m, p); 129 m->op->stop(m, p);
@@ -128,6 +133,8 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
128 goto Enomem; 133 goto Enomem;
129 m->count = 0; 134 m->count = 0;
130 m->version = 0; 135 m->version = 0;
136 pos = m->index;
137 p = m->op->start(m, &pos);
131 } 138 }
132 m->op->stop(m, p); 139 m->op->stop(m, p);
133 m->count = 0; 140 m->count = 0;
@@ -443,6 +450,20 @@ int seq_dentry(struct seq_file *m, struct dentry *dentry, char *esc)
443 return -1; 450 return -1;
444} 451}
445 452
453int seq_bitmap(struct seq_file *m, unsigned long *bits, unsigned int nr_bits)
454{
455 size_t len = bitmap_scnprintf_len(nr_bits);
456
457 if (m->count + len < m->size) {
458 bitmap_scnprintf(m->buf + m->count, m->size - m->count,
459 bits, nr_bits);
460 m->count += len;
461 return 0;
462 }
463 m->count = m->size;
464 return -1;
465}
466
446static void *single_start(struct seq_file *p, loff_t *pos) 467static void *single_start(struct seq_file *p, loff_t *pos)
447{ 468{
448 return NULL + (*pos == 0); 469 return NULL + (*pos == 0);
diff --git a/fs/splice.c b/fs/splice.c
index b30311ba8af6..1bbc6f4bb09c 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -371,7 +371,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
371 * for an in-flight io page 371 * for an in-flight io page
372 */ 372 */
373 if (flags & SPLICE_F_NONBLOCK) { 373 if (flags & SPLICE_F_NONBLOCK) {
374 if (TestSetPageLocked(page)) { 374 if (!trylock_page(page)) {
375 error = -EAGAIN; 375 error = -EAGAIN;
376 break; 376 break;
377 } 377 }
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index d81fb9ed2b8e..154098157473 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -263,8 +263,8 @@ int ubifs_calc_min_idx_lebs(struct ubifs_info *c)
263 263
264 idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx; 264 idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx;
265 265
266 /* And make sure we have twice the index size of space reserved */ 266 /* And make sure we have thrice the index size of space reserved */
267 idx_size <<= 1; 267 idx_size = idx_size + (idx_size << 1);
268 268
269 /* 269 /*
270 * We do not maintain 'old_idx_size' as 'old_idx_lebs'/'old_idx_bytes' 270 * We do not maintain 'old_idx_size' as 'old_idx_lebs'/'old_idx_bytes'
@@ -388,11 +388,11 @@ static int can_use_rp(struct ubifs_info *c)
388 * This function makes sure UBIFS has enough free eraseblocks for index growth 388 * This function makes sure UBIFS has enough free eraseblocks for index growth
389 * and data. 389 * and data.
390 * 390 *
391 * When budgeting index space, UBIFS reserves twice as more LEBs as the index 391 * When budgeting index space, UBIFS reserves thrice as many LEBs as the index
392 * would take if it was consolidated and written to the flash. This guarantees 392 * would take if it was consolidated and written to the flash. This guarantees
393 * that the "in-the-gaps" commit method always succeeds and UBIFS will always 393 * that the "in-the-gaps" commit method always succeeds and UBIFS will always
394 * be able to commit dirty index. So this function basically adds amount of 394 * be able to commit dirty index. So this function basically adds amount of
395 * budgeted index space to the size of the current index, multiplies this by 2, 395 * budgeted index space to the size of the current index, multiplies this by 3,
396 * and makes sure this does not exceed the amount of free eraseblocks. 396 * and makes sure this does not exceed the amount of free eraseblocks.
397 * 397 *
398 * Notes about @c->min_idx_lebs and @c->lst.idx_lebs variables: 398 * Notes about @c->min_idx_lebs and @c->lst.idx_lebs variables:
@@ -543,8 +543,16 @@ int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req)
543 int err, idx_growth, data_growth, dd_growth; 543 int err, idx_growth, data_growth, dd_growth;
544 struct retries_info ri; 544 struct retries_info ri;
545 545
546 ubifs_assert(req->new_page <= 1);
547 ubifs_assert(req->dirtied_page <= 1);
548 ubifs_assert(req->new_dent <= 1);
549 ubifs_assert(req->mod_dent <= 1);
550 ubifs_assert(req->new_ino <= 1);
551 ubifs_assert(req->new_ino_d <= UBIFS_MAX_INO_DATA);
546 ubifs_assert(req->dirtied_ino <= 4); 552 ubifs_assert(req->dirtied_ino <= 4);
547 ubifs_assert(req->dirtied_ino_d <= UBIFS_MAX_INO_DATA * 4); 553 ubifs_assert(req->dirtied_ino_d <= UBIFS_MAX_INO_DATA * 4);
554 ubifs_assert(!(req->new_ino_d & 7));
555 ubifs_assert(!(req->dirtied_ino_d & 7));
548 556
549 data_growth = calc_data_growth(c, req); 557 data_growth = calc_data_growth(c, req);
550 dd_growth = calc_dd_growth(c, req); 558 dd_growth = calc_dd_growth(c, req);
@@ -618,8 +626,16 @@ again:
618 */ 626 */
619void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req) 627void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req)
620{ 628{
629 ubifs_assert(req->new_page <= 1);
630 ubifs_assert(req->dirtied_page <= 1);
631 ubifs_assert(req->new_dent <= 1);
632 ubifs_assert(req->mod_dent <= 1);
633 ubifs_assert(req->new_ino <= 1);
634 ubifs_assert(req->new_ino_d <= UBIFS_MAX_INO_DATA);
621 ubifs_assert(req->dirtied_ino <= 4); 635 ubifs_assert(req->dirtied_ino <= 4);
622 ubifs_assert(req->dirtied_ino_d <= UBIFS_MAX_INO_DATA * 4); 636 ubifs_assert(req->dirtied_ino_d <= UBIFS_MAX_INO_DATA * 4);
637 ubifs_assert(!(req->new_ino_d & 7));
638 ubifs_assert(!(req->dirtied_ino_d & 7));
623 if (!req->recalculate) { 639 if (!req->recalculate) {
624 ubifs_assert(req->idx_growth >= 0); 640 ubifs_assert(req->idx_growth >= 0);
625 ubifs_assert(req->data_growth >= 0); 641 ubifs_assert(req->data_growth >= 0);
@@ -647,7 +663,11 @@ void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req)
647 663
648 ubifs_assert(c->budg_idx_growth >= 0); 664 ubifs_assert(c->budg_idx_growth >= 0);
649 ubifs_assert(c->budg_data_growth >= 0); 665 ubifs_assert(c->budg_data_growth >= 0);
666 ubifs_assert(c->budg_dd_growth >= 0);
650 ubifs_assert(c->min_idx_lebs < c->main_lebs); 667 ubifs_assert(c->min_idx_lebs < c->main_lebs);
668 ubifs_assert(!(c->budg_idx_growth & 7));
669 ubifs_assert(!(c->budg_data_growth & 7));
670 ubifs_assert(!(c->budg_dd_growth & 7));
651 spin_unlock(&c->space_lock); 671 spin_unlock(&c->space_lock);
652} 672}
653 673
@@ -686,9 +706,10 @@ void ubifs_convert_page_budget(struct ubifs_info *c)
686void ubifs_release_dirty_inode_budget(struct ubifs_info *c, 706void ubifs_release_dirty_inode_budget(struct ubifs_info *c,
687 struct ubifs_inode *ui) 707 struct ubifs_inode *ui)
688{ 708{
689 struct ubifs_budget_req req = {.dd_growth = c->inode_budget, 709 struct ubifs_budget_req req;
690 .dirtied_ino_d = ui->data_len};
691 710
711 memset(&req, 0, sizeof(struct ubifs_budget_req));
712 req.dd_growth = c->inode_budget + ALIGN(ui->data_len, 8);
692 ubifs_release_budget(c, &req); 713 ubifs_release_budget(c, &req);
693} 714}
694 715
diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c
index 3b516316c9b3..0a6aa2cc78f0 100644
--- a/fs/ubifs/commit.c
+++ b/fs/ubifs/commit.c
@@ -74,6 +74,7 @@ static int do_commit(struct ubifs_info *c)
74 goto out_up; 74 goto out_up;
75 } 75 }
76 76
77 c->cmt_no += 1;
77 err = ubifs_gc_start_commit(c); 78 err = ubifs_gc_start_commit(c);
78 if (err) 79 if (err)
79 goto out_up; 80 goto out_up;
@@ -115,7 +116,7 @@ static int do_commit(struct ubifs_info *c)
115 goto out; 116 goto out;
116 117
117 mutex_lock(&c->mst_mutex); 118 mutex_lock(&c->mst_mutex);
118 c->mst_node->cmt_no = cpu_to_le64(++c->cmt_no); 119 c->mst_node->cmt_no = cpu_to_le64(c->cmt_no);
119 c->mst_node->log_lnum = cpu_to_le32(new_ltail_lnum); 120 c->mst_node->log_lnum = cpu_to_le32(new_ltail_lnum);
120 c->mst_node->root_lnum = cpu_to_le32(zroot.lnum); 121 c->mst_node->root_lnum = cpu_to_le32(zroot.lnum);
121 c->mst_node->root_offs = cpu_to_le32(zroot.offs); 122 c->mst_node->root_offs = cpu_to_le32(zroot.offs);
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 4e3aaeba4eca..b9cb77473758 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -568,8 +568,8 @@ void dbg_dump_budget_req(const struct ubifs_budget_req *req)
568void dbg_dump_lstats(const struct ubifs_lp_stats *lst) 568void dbg_dump_lstats(const struct ubifs_lp_stats *lst)
569{ 569{
570 spin_lock(&dbg_lock); 570 spin_lock(&dbg_lock);
571 printk(KERN_DEBUG "Lprops statistics: empty_lebs %d, idx_lebs %d\n", 571 printk(KERN_DEBUG "(pid %d) Lprops statistics: empty_lebs %d, "
572 lst->empty_lebs, lst->idx_lebs); 572 "idx_lebs %d\n", current->pid, lst->empty_lebs, lst->idx_lebs);
573 printk(KERN_DEBUG "\ttaken_empty_lebs %d, total_free %lld, " 573 printk(KERN_DEBUG "\ttaken_empty_lebs %d, total_free %lld, "
574 "total_dirty %lld\n", lst->taken_empty_lebs, lst->total_free, 574 "total_dirty %lld\n", lst->taken_empty_lebs, lst->total_free,
575 lst->total_dirty); 575 lst->total_dirty);
@@ -587,8 +587,8 @@ void dbg_dump_budg(struct ubifs_info *c)
587 struct ubifs_gced_idx_leb *idx_gc; 587 struct ubifs_gced_idx_leb *idx_gc;
588 588
589 spin_lock(&dbg_lock); 589 spin_lock(&dbg_lock);
590 printk(KERN_DEBUG "Budgeting info: budg_data_growth %lld, " 590 printk(KERN_DEBUG "(pid %d) Budgeting info: budg_data_growth %lld, "
591 "budg_dd_growth %lld, budg_idx_growth %lld\n", 591 "budg_dd_growth %lld, budg_idx_growth %lld\n", current->pid,
592 c->budg_data_growth, c->budg_dd_growth, c->budg_idx_growth); 592 c->budg_data_growth, c->budg_dd_growth, c->budg_idx_growth);
593 printk(KERN_DEBUG "\tdata budget sum %lld, total budget sum %lld, " 593 printk(KERN_DEBUG "\tdata budget sum %lld, total budget sum %lld, "
594 "freeable_cnt %d\n", c->budg_data_growth + c->budg_dd_growth, 594 "freeable_cnt %d\n", c->budg_data_growth + c->budg_dd_growth,
@@ -634,7 +634,7 @@ void dbg_dump_lprops(struct ubifs_info *c)
634 struct ubifs_lprops lp; 634 struct ubifs_lprops lp;
635 struct ubifs_lp_stats lst; 635 struct ubifs_lp_stats lst;
636 636
637 printk(KERN_DEBUG "Dumping LEB properties\n"); 637 printk(KERN_DEBUG "(pid %d) Dumping LEB properties\n", current->pid);
638 ubifs_get_lp_stats(c, &lst); 638 ubifs_get_lp_stats(c, &lst);
639 dbg_dump_lstats(&lst); 639 dbg_dump_lstats(&lst);
640 640
@@ -655,7 +655,7 @@ void dbg_dump_leb(const struct ubifs_info *c, int lnum)
655 if (dbg_failure_mode) 655 if (dbg_failure_mode)
656 return; 656 return;
657 657
658 printk(KERN_DEBUG "Dumping LEB %d\n", lnum); 658 printk(KERN_DEBUG "(pid %d) Dumping LEB %d\n", current->pid, lnum);
659 659
660 sleb = ubifs_scan(c, lnum, 0, c->dbg_buf); 660 sleb = ubifs_scan(c, lnum, 0, c->dbg_buf);
661 if (IS_ERR(sleb)) { 661 if (IS_ERR(sleb)) {
@@ -720,8 +720,8 @@ void dbg_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat)
720{ 720{
721 int i; 721 int i;
722 722
723 printk(KERN_DEBUG "Dumping heap cat %d (%d elements)\n", 723 printk(KERN_DEBUG "(pid %d) Dumping heap cat %d (%d elements)\n",
724 cat, heap->cnt); 724 current->pid, cat, heap->cnt);
725 for (i = 0; i < heap->cnt; i++) { 725 for (i = 0; i < heap->cnt; i++) {
726 struct ubifs_lprops *lprops = heap->arr[i]; 726 struct ubifs_lprops *lprops = heap->arr[i];
727 727
@@ -736,7 +736,7 @@ void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
736{ 736{
737 int i; 737 int i;
738 738
739 printk(KERN_DEBUG "Dumping pnode:\n"); 739 printk(KERN_DEBUG "(pid %d) Dumping pnode:\n", current->pid);
740 printk(KERN_DEBUG "\taddress %zx parent %zx cnext %zx\n", 740 printk(KERN_DEBUG "\taddress %zx parent %zx cnext %zx\n",
741 (size_t)pnode, (size_t)parent, (size_t)pnode->cnext); 741 (size_t)pnode, (size_t)parent, (size_t)pnode->cnext);
742 printk(KERN_DEBUG "\tflags %lu iip %d level %d num %d\n", 742 printk(KERN_DEBUG "\tflags %lu iip %d level %d num %d\n",
@@ -755,7 +755,7 @@ void dbg_dump_tnc(struct ubifs_info *c)
755 int level; 755 int level;
756 756
757 printk(KERN_DEBUG "\n"); 757 printk(KERN_DEBUG "\n");
758 printk(KERN_DEBUG "Dumping the TNC tree\n"); 758 printk(KERN_DEBUG "(pid %d) Dumping the TNC tree\n", current->pid);
759 znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL); 759 znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL);
760 level = znode->level; 760 level = znode->level;
761 printk(KERN_DEBUG "== Level %d ==\n", level); 761 printk(KERN_DEBUG "== Level %d ==\n", level);
@@ -2208,16 +2208,17 @@ int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset,
2208int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf, 2208int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf,
2209 int offset, int len, int dtype) 2209 int offset, int len, int dtype)
2210{ 2210{
2211 int err; 2211 int err, failing;
2212 2212
2213 if (in_failure_mode(desc)) 2213 if (in_failure_mode(desc))
2214 return -EIO; 2214 return -EIO;
2215 if (do_fail(desc, lnum, 1)) 2215 failing = do_fail(desc, lnum, 1);
2216 if (failing)
2216 cut_data(buf, len); 2217 cut_data(buf, len);
2217 err = ubi_leb_write(desc, lnum, buf, offset, len, dtype); 2218 err = ubi_leb_write(desc, lnum, buf, offset, len, dtype);
2218 if (err) 2219 if (err)
2219 return err; 2220 return err;
2220 if (in_failure_mode(desc)) 2221 if (failing)
2221 return -EIO; 2222 return -EIO;
2222 return 0; 2223 return 0;
2223} 2224}
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index 3c4f1e93c9e0..50315fc57185 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -27,7 +27,7 @@
27 27
28#define UBIFS_DBG(op) op 28#define UBIFS_DBG(op) op
29 29
30#define ubifs_assert(expr) do { \ 30#define ubifs_assert(expr) do { \
31 if (unlikely(!(expr))) { \ 31 if (unlikely(!(expr))) { \
32 printk(KERN_CRIT "UBIFS assert failed in %s at %u (pid %d)\n", \ 32 printk(KERN_CRIT "UBIFS assert failed in %s at %u (pid %d)\n", \
33 __func__, __LINE__, current->pid); \ 33 __func__, __LINE__, current->pid); \
@@ -73,50 +73,50 @@ const char *dbg_key_str1(const struct ubifs_info *c,
73 const union ubifs_key *key); 73 const union ubifs_key *key);
74 74
75/* 75/*
76 * DBGKEY macros require dbg_lock to be held, which it is in the dbg message 76 * DBGKEY macros require @dbg_lock to be held, which it is in the dbg message
77 * macros. 77 * macros.
78 */ 78 */
79#define DBGKEY(key) dbg_key_str0(c, (key)) 79#define DBGKEY(key) dbg_key_str0(c, (key))
80#define DBGKEY1(key) dbg_key_str1(c, (key)) 80#define DBGKEY1(key) dbg_key_str1(c, (key))
81 81
82/* General messages */ 82/* General messages */
83#define dbg_gen(fmt, ...) dbg_do_msg(UBIFS_MSG_GEN, fmt, ##__VA_ARGS__) 83#define dbg_gen(fmt, ...) dbg_do_msg(UBIFS_MSG_GEN, fmt, ##__VA_ARGS__)
84 84
85/* Additional journal messages */ 85/* Additional journal messages */
86#define dbg_jnl(fmt, ...) dbg_do_msg(UBIFS_MSG_JNL, fmt, ##__VA_ARGS__) 86#define dbg_jnl(fmt, ...) dbg_do_msg(UBIFS_MSG_JNL, fmt, ##__VA_ARGS__)
87 87
88/* Additional TNC messages */ 88/* Additional TNC messages */
89#define dbg_tnc(fmt, ...) dbg_do_msg(UBIFS_MSG_TNC, fmt, ##__VA_ARGS__) 89#define dbg_tnc(fmt, ...) dbg_do_msg(UBIFS_MSG_TNC, fmt, ##__VA_ARGS__)
90 90
91/* Additional lprops messages */ 91/* Additional lprops messages */
92#define dbg_lp(fmt, ...) dbg_do_msg(UBIFS_MSG_LP, fmt, ##__VA_ARGS__) 92#define dbg_lp(fmt, ...) dbg_do_msg(UBIFS_MSG_LP, fmt, ##__VA_ARGS__)
93 93
94/* Additional LEB find messages */ 94/* Additional LEB find messages */
95#define dbg_find(fmt, ...) dbg_do_msg(UBIFS_MSG_FIND, fmt, ##__VA_ARGS__) 95#define dbg_find(fmt, ...) dbg_do_msg(UBIFS_MSG_FIND, fmt, ##__VA_ARGS__)
96 96
97/* Additional mount messages */ 97/* Additional mount messages */
98#define dbg_mnt(fmt, ...) dbg_do_msg(UBIFS_MSG_MNT, fmt, ##__VA_ARGS__) 98#define dbg_mnt(fmt, ...) dbg_do_msg(UBIFS_MSG_MNT, fmt, ##__VA_ARGS__)
99 99
100/* Additional I/O messages */ 100/* Additional I/O messages */
101#define dbg_io(fmt, ...) dbg_do_msg(UBIFS_MSG_IO, fmt, ##__VA_ARGS__) 101#define dbg_io(fmt, ...) dbg_do_msg(UBIFS_MSG_IO, fmt, ##__VA_ARGS__)
102 102
103/* Additional commit messages */ 103/* Additional commit messages */
104#define dbg_cmt(fmt, ...) dbg_do_msg(UBIFS_MSG_CMT, fmt, ##__VA_ARGS__) 104#define dbg_cmt(fmt, ...) dbg_do_msg(UBIFS_MSG_CMT, fmt, ##__VA_ARGS__)
105 105
106/* Additional budgeting messages */ 106/* Additional budgeting messages */
107#define dbg_budg(fmt, ...) dbg_do_msg(UBIFS_MSG_BUDG, fmt, ##__VA_ARGS__) 107#define dbg_budg(fmt, ...) dbg_do_msg(UBIFS_MSG_BUDG, fmt, ##__VA_ARGS__)
108 108
109/* Additional log messages */ 109/* Additional log messages */
110#define dbg_log(fmt, ...) dbg_do_msg(UBIFS_MSG_LOG, fmt, ##__VA_ARGS__) 110#define dbg_log(fmt, ...) dbg_do_msg(UBIFS_MSG_LOG, fmt, ##__VA_ARGS__)
111 111
112/* Additional gc messages */ 112/* Additional gc messages */
113#define dbg_gc(fmt, ...) dbg_do_msg(UBIFS_MSG_GC, fmt, ##__VA_ARGS__) 113#define dbg_gc(fmt, ...) dbg_do_msg(UBIFS_MSG_GC, fmt, ##__VA_ARGS__)
114 114
115/* Additional scan messages */ 115/* Additional scan messages */
116#define dbg_scan(fmt, ...) dbg_do_msg(UBIFS_MSG_SCAN, fmt, ##__VA_ARGS__) 116#define dbg_scan(fmt, ...) dbg_do_msg(UBIFS_MSG_SCAN, fmt, ##__VA_ARGS__)
117 117
118/* Additional recovery messages */ 118/* Additional recovery messages */
119#define dbg_rcvry(fmt, ...) dbg_do_msg(UBIFS_MSG_RCVRY, fmt, ##__VA_ARGS__) 119#define dbg_rcvry(fmt, ...) dbg_do_msg(UBIFS_MSG_RCVRY, fmt, ##__VA_ARGS__)
120 120
121/* 121/*
122 * Debugging message type flags (must match msg_type_names in debug.c). 122 * Debugging message type flags (must match msg_type_names in debug.c).
@@ -239,34 +239,23 @@ typedef int (*dbg_leaf_callback)(struct ubifs_info *c,
239 struct ubifs_zbranch *zbr, void *priv); 239 struct ubifs_zbranch *zbr, void *priv);
240typedef int (*dbg_znode_callback)(struct ubifs_info *c, 240typedef int (*dbg_znode_callback)(struct ubifs_info *c,
241 struct ubifs_znode *znode, void *priv); 241 struct ubifs_znode *znode, void *priv);
242
243int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb, 242int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb,
244 dbg_znode_callback znode_cb, void *priv); 243 dbg_znode_callback znode_cb, void *priv);
245 244
246/* Checking functions */ 245/* Checking functions */
247 246
248int dbg_check_lprops(struct ubifs_info *c); 247int dbg_check_lprops(struct ubifs_info *c);
249
250int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot); 248int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot);
251int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot); 249int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot);
252
253int dbg_check_cats(struct ubifs_info *c); 250int dbg_check_cats(struct ubifs_info *c);
254
255int dbg_check_ltab(struct ubifs_info *c); 251int dbg_check_ltab(struct ubifs_info *c);
256
257int dbg_check_synced_i_size(struct inode *inode); 252int dbg_check_synced_i_size(struct inode *inode);
258
259int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir); 253int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir);
260
261int dbg_check_tnc(struct ubifs_info *c, int extra); 254int dbg_check_tnc(struct ubifs_info *c, int extra);
262
263int dbg_check_idx_size(struct ubifs_info *c, long long idx_size); 255int dbg_check_idx_size(struct ubifs_info *c, long long idx_size);
264
265int dbg_check_filesystem(struct ubifs_info *c); 256int dbg_check_filesystem(struct ubifs_info *c);
266
267void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat, 257void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat,
268 int add_pos); 258 int add_pos);
269
270int dbg_check_lprops(struct ubifs_info *c); 259int dbg_check_lprops(struct ubifs_info *c);
271int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode, 260int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode,
272 int row, int col); 261 int row, int col);
@@ -329,71 +318,77 @@ static inline int dbg_change(struct ubi_volume_desc *desc, int lnum,
329#else /* !CONFIG_UBIFS_FS_DEBUG */ 318#else /* !CONFIG_UBIFS_FS_DEBUG */
330 319
331#define UBIFS_DBG(op) 320#define UBIFS_DBG(op)
332#define ubifs_assert(expr) ({}) 321
333#define ubifs_assert_cmt_locked(c) 322/* Use "if (0)" to make compiler check arguments even if debugging is off */
323#define ubifs_assert(expr) do { \
324 if (0 && (expr)) \
325 printk(KERN_CRIT "UBIFS assert failed in %s at %u (pid %d)\n", \
326 __func__, __LINE__, current->pid); \
327} while (0)
328
329#define dbg_err(fmt, ...) do { \
330 if (0) \
331 ubifs_err(fmt, ##__VA_ARGS__); \
332} while (0)
333
334#define dbg_msg(fmt, ...) do { \
335 if (0) \
336 printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", \
337 current->pid, __func__, ##__VA_ARGS__); \
338} while (0)
339
334#define dbg_dump_stack() 340#define dbg_dump_stack()
335#define dbg_err(fmt, ...) ({}) 341#define ubifs_assert_cmt_locked(c)
336#define dbg_msg(fmt, ...) ({})
337#define dbg_key(c, key, fmt, ...) ({})
338
339#define dbg_gen(fmt, ...) ({})
340#define dbg_jnl(fmt, ...) ({})
341#define dbg_tnc(fmt, ...) ({})
342#define dbg_lp(fmt, ...) ({})
343#define dbg_find(fmt, ...) ({})
344#define dbg_mnt(fmt, ...) ({})
345#define dbg_io(fmt, ...) ({})
346#define dbg_cmt(fmt, ...) ({})
347#define dbg_budg(fmt, ...) ({})
348#define dbg_log(fmt, ...) ({})
349#define dbg_gc(fmt, ...) ({})
350#define dbg_scan(fmt, ...) ({})
351#define dbg_rcvry(fmt, ...) ({})
352
353#define dbg_ntype(type) ""
354#define dbg_cstate(cmt_state) ""
355#define dbg_get_key_dump(c, key) ({})
356#define dbg_dump_inode(c, inode) ({})
357#define dbg_dump_node(c, node) ({})
358#define dbg_dump_budget_req(req) ({})
359#define dbg_dump_lstats(lst) ({})
360#define dbg_dump_budg(c) ({})
361#define dbg_dump_lprop(c, lp) ({})
362#define dbg_dump_lprops(c) ({})
363#define dbg_dump_leb(c, lnum) ({})
364#define dbg_dump_znode(c, znode) ({})
365#define dbg_dump_heap(c, heap, cat) ({})
366#define dbg_dump_pnode(c, pnode, parent, iip) ({})
367#define dbg_dump_tnc(c) ({})
368#define dbg_dump_index(c) ({})
369 342
370#define dbg_walk_index(c, leaf_cb, znode_cb, priv) 0 343#define dbg_gen(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
344#define dbg_jnl(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
345#define dbg_tnc(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
346#define dbg_lp(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
347#define dbg_find(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
348#define dbg_mnt(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
349#define dbg_io(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
350#define dbg_cmt(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
351#define dbg_budg(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
352#define dbg_log(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
353#define dbg_gc(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
354#define dbg_scan(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
355#define dbg_rcvry(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
356
357#define DBGKEY(key) ((char *)(key))
358#define DBGKEY1(key) ((char *)(key))
359
360#define dbg_ntype(type) ""
361#define dbg_cstate(cmt_state) ""
362#define dbg_get_key_dump(c, key) ({})
363#define dbg_dump_inode(c, inode) ({})
364#define dbg_dump_node(c, node) ({})
365#define dbg_dump_budget_req(req) ({})
366#define dbg_dump_lstats(lst) ({})
367#define dbg_dump_budg(c) ({})
368#define dbg_dump_lprop(c, lp) ({})
369#define dbg_dump_lprops(c) ({})
370#define dbg_dump_leb(c, lnum) ({})
371#define dbg_dump_znode(c, znode) ({})
372#define dbg_dump_heap(c, heap, cat) ({})
373#define dbg_dump_pnode(c, pnode, parent, iip) ({})
374#define dbg_dump_tnc(c) ({})
375#define dbg_dump_index(c) ({})
371 376
377#define dbg_walk_index(c, leaf_cb, znode_cb, priv) 0
372#define dbg_old_index_check_init(c, zroot) 0 378#define dbg_old_index_check_init(c, zroot) 0
373#define dbg_check_old_index(c, zroot) 0 379#define dbg_check_old_index(c, zroot) 0
374
375#define dbg_check_cats(c) 0 380#define dbg_check_cats(c) 0
376
377#define dbg_check_ltab(c) 0 381#define dbg_check_ltab(c) 0
378
379#define dbg_check_synced_i_size(inode) 0 382#define dbg_check_synced_i_size(inode) 0
380
381#define dbg_check_dir_size(c, dir) 0 383#define dbg_check_dir_size(c, dir) 0
382
383#define dbg_check_tnc(c, x) 0 384#define dbg_check_tnc(c, x) 0
384
385#define dbg_check_idx_size(c, idx_size) 0 385#define dbg_check_idx_size(c, idx_size) 0
386
387#define dbg_check_filesystem(c) 0 386#define dbg_check_filesystem(c) 0
388
389#define dbg_check_heap(c, heap, cat, add_pos) ({}) 387#define dbg_check_heap(c, heap, cat, add_pos) ({})
390
391#define dbg_check_lprops(c) 0 388#define dbg_check_lprops(c) 0
392#define dbg_check_lpt_nodes(c, cnode, row, col) 0 389#define dbg_check_lpt_nodes(c, cnode, row, col) 0
393
394#define dbg_force_in_the_gaps_enabled 0 390#define dbg_force_in_the_gaps_enabled 0
395#define dbg_force_in_the_gaps() 0 391#define dbg_force_in_the_gaps() 0
396
397#define dbg_failure_mode 0 392#define dbg_failure_mode 0
398#define dbg_failure_mode_registration(c) ({}) 393#define dbg_failure_mode_registration(c) ({})
399#define dbg_failure_mode_deregistration(c) ({}) 394#define dbg_failure_mode_deregistration(c) ({})
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index e90374be7d3b..5c96f1fb7016 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -165,7 +165,6 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir,
165 } 165 }
166 166
167 inode->i_ino = ++c->highest_inum; 167 inode->i_ino = ++c->highest_inum;
168 inode->i_generation = ++c->vfs_gen;
169 /* 168 /*
170 * The creation sequence number remains with this inode for its 169 * The creation sequence number remains with this inode for its
171 * lifetime. All nodes for this inode have a greater sequence number, 170 * lifetime. All nodes for this inode have a greater sequence number,
@@ -220,15 +219,7 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry,
220 219
221 err = ubifs_tnc_lookup_nm(c, &key, dent, &dentry->d_name); 220 err = ubifs_tnc_lookup_nm(c, &key, dent, &dentry->d_name);
222 if (err) { 221 if (err) {
223 /* 222 if (err == -ENOENT) {
224 * Do not hash the direntry if parent 'i_nlink' is zero, because
225 * this has side-effects - '->delete_inode()' call will not be
226 * called for the parent orphan inode, because 'd_count' of its
227 * direntry will stay 1 (it'll be negative direntry I guess)
228 * and prevent 'iput_final()' until the dentry is destroyed due
229 * to unmount or memory pressure.
230 */
231 if (err == -ENOENT && dir->i_nlink != 0) {
232 dbg_gen("not found"); 223 dbg_gen("not found");
233 goto done; 224 goto done;
234 } 225 }
@@ -525,7 +516,7 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
525 struct ubifs_inode *dir_ui = ubifs_inode(dir); 516 struct ubifs_inode *dir_ui = ubifs_inode(dir);
526 int err, sz_change = CALC_DENT_SIZE(dentry->d_name.len); 517 int err, sz_change = CALC_DENT_SIZE(dentry->d_name.len);
527 struct ubifs_budget_req req = { .new_dent = 1, .dirtied_ino = 2, 518 struct ubifs_budget_req req = { .new_dent = 1, .dirtied_ino = 2,
528 .dirtied_ino_d = ui->data_len }; 519 .dirtied_ino_d = ALIGN(ui->data_len, 8) };
529 520
530 /* 521 /*
531 * Budget request settings: new direntry, changing the target inode, 522 * Budget request settings: new direntry, changing the target inode,
@@ -727,8 +718,7 @@ static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
727 struct ubifs_inode *dir_ui = ubifs_inode(dir); 718 struct ubifs_inode *dir_ui = ubifs_inode(dir);
728 struct ubifs_info *c = dir->i_sb->s_fs_info; 719 struct ubifs_info *c = dir->i_sb->s_fs_info;
729 int err, sz_change = CALC_DENT_SIZE(dentry->d_name.len); 720 int err, sz_change = CALC_DENT_SIZE(dentry->d_name.len);
730 struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, 721 struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1 };
731 .dirtied_ino_d = 1 };
732 722
733 /* 723 /*
734 * Budget request settings: new inode, new direntry and changing parent 724 * Budget request settings: new inode, new direntry and changing parent
@@ -789,7 +779,8 @@ static int ubifs_mknod(struct inode *dir, struct dentry *dentry,
789 int sz_change = CALC_DENT_SIZE(dentry->d_name.len); 779 int sz_change = CALC_DENT_SIZE(dentry->d_name.len);
790 int err, devlen = 0; 780 int err, devlen = 0;
791 struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, 781 struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1,
792 .new_ino_d = devlen, .dirtied_ino = 1 }; 782 .new_ino_d = ALIGN(devlen, 8),
783 .dirtied_ino = 1 };
793 784
794 /* 785 /*
795 * Budget request settings: new inode, new direntry and changing parent 786 * Budget request settings: new inode, new direntry and changing parent
@@ -863,7 +854,8 @@ static int ubifs_symlink(struct inode *dir, struct dentry *dentry,
863 int err, len = strlen(symname); 854 int err, len = strlen(symname);
864 int sz_change = CALC_DENT_SIZE(dentry->d_name.len); 855 int sz_change = CALC_DENT_SIZE(dentry->d_name.len);
865 struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, 856 struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1,
866 .new_ino_d = len, .dirtied_ino = 1 }; 857 .new_ino_d = ALIGN(len, 8),
858 .dirtied_ino = 1 };
867 859
868 /* 860 /*
869 * Budget request settings: new inode, new direntry and changing parent 861 * Budget request settings: new inode, new direntry and changing parent
@@ -1012,7 +1004,7 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
1012 struct ubifs_budget_req req = { .new_dent = 1, .mod_dent = 1, 1004 struct ubifs_budget_req req = { .new_dent = 1, .mod_dent = 1,
1013 .dirtied_ino = 3 }; 1005 .dirtied_ino = 3 };
1014 struct ubifs_budget_req ino_req = { .dirtied_ino = 1, 1006 struct ubifs_budget_req ino_req = { .dirtied_ino = 1,
1015 .dirtied_ino_d = old_inode_ui->data_len }; 1007 .dirtied_ino_d = ALIGN(old_inode_ui->data_len, 8) };
1016 struct timespec time; 1008 struct timespec time;
1017 1009
1018 /* 1010 /*
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 8565e586e533..4071d1cae29f 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -890,7 +890,7 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode,
890 loff_t new_size = attr->ia_size; 890 loff_t new_size = attr->ia_size;
891 struct ubifs_inode *ui = ubifs_inode(inode); 891 struct ubifs_inode *ui = ubifs_inode(inode);
892 struct ubifs_budget_req req = { .dirtied_ino = 1, 892 struct ubifs_budget_req req = { .dirtied_ino = 1,
893 .dirtied_ino_d = ui->data_len }; 893 .dirtied_ino_d = ALIGN(ui->data_len, 8) };
894 894
895 err = ubifs_budget_space(c, &req); 895 err = ubifs_budget_space(c, &req);
896 if (err) 896 if (err)
@@ -941,7 +941,8 @@ int ubifs_setattr(struct dentry *dentry, struct iattr *attr)
941 struct inode *inode = dentry->d_inode; 941 struct inode *inode = dentry->d_inode;
942 struct ubifs_info *c = inode->i_sb->s_fs_info; 942 struct ubifs_info *c = inode->i_sb->s_fs_info;
943 943
944 dbg_gen("ino %lu, ia_valid %#x", inode->i_ino, attr->ia_valid); 944 dbg_gen("ino %lu, mode %#x, ia_valid %#x",
945 inode->i_ino, inode->i_mode, attr->ia_valid);
945 err = inode_change_ok(inode, attr); 946 err = inode_change_ok(inode, attr);
946 if (err) 947 if (err)
947 return err; 948 return err;
@@ -1051,7 +1052,7 @@ static int update_mctime(struct ubifs_info *c, struct inode *inode)
1051 if (mctime_update_needed(inode, &now)) { 1052 if (mctime_update_needed(inode, &now)) {
1052 int err, release; 1053 int err, release;
1053 struct ubifs_budget_req req = { .dirtied_ino = 1, 1054 struct ubifs_budget_req req = { .dirtied_ino = 1,
1054 .dirtied_ino_d = ui->data_len }; 1055 .dirtied_ino_d = ALIGN(ui->data_len, 8) };
1055 1056
1056 err = ubifs_budget_space(c, &req); 1057 err = ubifs_budget_space(c, &req);
1057 if (err) 1058 if (err)
@@ -1270,6 +1271,7 @@ struct file_operations ubifs_file_operations = {
1270 .fsync = ubifs_fsync, 1271 .fsync = ubifs_fsync,
1271 .unlocked_ioctl = ubifs_ioctl, 1272 .unlocked_ioctl = ubifs_ioctl,
1272 .splice_read = generic_file_splice_read, 1273 .splice_read = generic_file_splice_read,
1274 .splice_write = generic_file_splice_write,
1273#ifdef CONFIG_COMPAT 1275#ifdef CONFIG_COMPAT
1274 .compat_ioctl = ubifs_compat_ioctl, 1276 .compat_ioctl = ubifs_compat_ioctl,
1275#endif 1277#endif
diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c
index 10394c548367..adee7b5ddeab 100644
--- a/fs/ubifs/find.c
+++ b/fs/ubifs/find.c
@@ -290,9 +290,14 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
290 idx_lp = idx_heap->arr[0]; 290 idx_lp = idx_heap->arr[0];
291 sum = idx_lp->free + idx_lp->dirty; 291 sum = idx_lp->free + idx_lp->dirty;
292 /* 292 /*
293 * Since we reserve twice as more space for the index than it 293 * Since we reserve thrice as much space for the index than it
294 * actually takes, it does not make sense to pick indexing LEBs 294 * actually takes, it does not make sense to pick indexing LEBs
295 * with less than half LEB of dirty space. 295 * with less than, say, half LEB of dirty space. May be half is
296 * not the optimal boundary - this should be tested and
297 * checked. This boundary should determine how much we use
298 * in-the-gaps to consolidate the index comparing to how much
299 * we use garbage collector to consolidate it. The "half"
300 * criteria just feels to be fine.
296 */ 301 */
297 if (sum < min_space || sum < c->half_leb_size) 302 if (sum < min_space || sum < c->half_leb_size)
298 idx_lp = NULL; 303 idx_lp = NULL;
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index 3374f91b6709..054363f2b207 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -54,6 +54,20 @@
54#include "ubifs.h" 54#include "ubifs.h"
55 55
56/** 56/**
57 * ubifs_ro_mode - switch UBIFS to read read-only mode.
58 * @c: UBIFS file-system description object
59 * @err: error code which is the reason of switching to R/O mode
60 */
61void ubifs_ro_mode(struct ubifs_info *c, int err)
62{
63 if (!c->ro_media) {
64 c->ro_media = 1;
65 ubifs_warn("switched to read-only mode, error %d", err);
66 dbg_dump_stack();
67 }
68}
69
70/**
57 * ubifs_check_node - check node. 71 * ubifs_check_node - check node.
58 * @c: UBIFS file-system description object 72 * @c: UBIFS file-system description object
59 * @buf: node to check 73 * @buf: node to check
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index 283155abe5f5..22993f867d19 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -447,13 +447,11 @@ static int get_dent_type(int mode)
447 * @ino: buffer in which to pack inode node 447 * @ino: buffer in which to pack inode node
448 * @inode: inode to pack 448 * @inode: inode to pack
449 * @last: indicates the last node of the group 449 * @last: indicates the last node of the group
450 * @last_reference: non-zero if this is a deletion inode
451 */ 450 */
452static void pack_inode(struct ubifs_info *c, struct ubifs_ino_node *ino, 451static void pack_inode(struct ubifs_info *c, struct ubifs_ino_node *ino,
453 const struct inode *inode, int last, 452 const struct inode *inode, int last)
454 int last_reference)
455{ 453{
456 int data_len = 0; 454 int data_len = 0, last_reference = !inode->i_nlink;
457 struct ubifs_inode *ui = ubifs_inode(inode); 455 struct ubifs_inode *ui = ubifs_inode(inode);
458 456
459 ino->ch.node_type = UBIFS_INO_NODE; 457 ino->ch.node_type = UBIFS_INO_NODE;
@@ -596,9 +594,9 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir,
596 ubifs_prep_grp_node(c, dent, dlen, 0); 594 ubifs_prep_grp_node(c, dent, dlen, 0);
597 595
598 ino = (void *)dent + aligned_dlen; 596 ino = (void *)dent + aligned_dlen;
599 pack_inode(c, ino, inode, 0, last_reference); 597 pack_inode(c, ino, inode, 0);
600 ino = (void *)ino + aligned_ilen; 598 ino = (void *)ino + aligned_ilen;
601 pack_inode(c, ino, dir, 1, 0); 599 pack_inode(c, ino, dir, 1);
602 600
603 if (last_reference) { 601 if (last_reference) {
604 err = ubifs_add_orphan(c, inode->i_ino); 602 err = ubifs_add_orphan(c, inode->i_ino);
@@ -606,6 +604,7 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir,
606 release_head(c, BASEHD); 604 release_head(c, BASEHD);
607 goto out_finish; 605 goto out_finish;
608 } 606 }
607 ui->del_cmtno = c->cmt_no;
609 } 608 }
610 609
611 err = write_head(c, BASEHD, dent, len, &lnum, &dent_offs, sync); 610 err = write_head(c, BASEHD, dent, len, &lnum, &dent_offs, sync);
@@ -750,30 +749,25 @@ out_free:
750 * ubifs_jnl_write_inode - flush inode to the journal. 749 * ubifs_jnl_write_inode - flush inode to the journal.
751 * @c: UBIFS file-system description object 750 * @c: UBIFS file-system description object
752 * @inode: inode to flush 751 * @inode: inode to flush
753 * @deletion: inode has been deleted
754 * 752 *
755 * This function writes inode @inode to the journal. If the inode is 753 * This function writes inode @inode to the journal. If the inode is
756 * synchronous, it also synchronizes the write-buffer. Returns zero in case of 754 * synchronous, it also synchronizes the write-buffer. Returns zero in case of
757 * success and a negative error code in case of failure. 755 * success and a negative error code in case of failure.
758 */ 756 */
759int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode, 757int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode)
760 int deletion)
761{ 758{
762 int err, len, lnum, offs, sync = 0; 759 int err, lnum, offs;
763 struct ubifs_ino_node *ino; 760 struct ubifs_ino_node *ino;
764 struct ubifs_inode *ui = ubifs_inode(inode); 761 struct ubifs_inode *ui = ubifs_inode(inode);
762 int sync = 0, len = UBIFS_INO_NODE_SZ, last_reference = !inode->i_nlink;
765 763
766 dbg_jnl("ino %lu%s", inode->i_ino, 764 dbg_jnl("ino %lu, nlink %u", inode->i_ino, inode->i_nlink);
767 deletion ? " (last reference)" : "");
768 if (deletion)
769 ubifs_assert(inode->i_nlink == 0);
770 765
771 len = UBIFS_INO_NODE_SZ;
772 /* 766 /*
773 * If the inode is being deleted, do not write the attached data. No 767 * If the inode is being deleted, do not write the attached data. No
774 * need to synchronize the write-buffer either. 768 * need to synchronize the write-buffer either.
775 */ 769 */
776 if (!deletion) { 770 if (!last_reference) {
777 len += ui->data_len; 771 len += ui->data_len;
778 sync = IS_SYNC(inode); 772 sync = IS_SYNC(inode);
779 } 773 }
@@ -786,7 +780,7 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode,
786 if (err) 780 if (err)
787 goto out_free; 781 goto out_free;
788 782
789 pack_inode(c, ino, inode, 1, deletion); 783 pack_inode(c, ino, inode, 1);
790 err = write_head(c, BASEHD, ino, len, &lnum, &offs, sync); 784 err = write_head(c, BASEHD, ino, len, &lnum, &offs, sync);
791 if (err) 785 if (err)
792 goto out_release; 786 goto out_release;
@@ -795,7 +789,7 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode,
795 inode->i_ino); 789 inode->i_ino);
796 release_head(c, BASEHD); 790 release_head(c, BASEHD);
797 791
798 if (deletion) { 792 if (last_reference) {
799 err = ubifs_tnc_remove_ino(c, inode->i_ino); 793 err = ubifs_tnc_remove_ino(c, inode->i_ino);
800 if (err) 794 if (err)
801 goto out_ro; 795 goto out_ro;
@@ -828,6 +822,65 @@ out_free:
828} 822}
829 823
830/** 824/**
825 * ubifs_jnl_delete_inode - delete an inode.
826 * @c: UBIFS file-system description object
827 * @inode: inode to delete
828 *
829 * This function deletes inode @inode which includes removing it from orphans,
830 * deleting it from TNC and, in some cases, writing a deletion inode to the
831 * journal.
832 *
833 * When regular file inodes are unlinked or a directory inode is removed, the
834 * 'ubifs_jnl_update()' function writes a corresponding deletion inode and
835 * direntry to the media, and adds the inode to orphans. After this, when the
836 * last reference to this inode has been dropped, this function is called. In
837 * general, it has to write one more deletion inode to the media, because if
838 * a commit happened between 'ubifs_jnl_update()' and
839 * 'ubifs_jnl_delete_inode()', the deletion inode is not in the journal
840 * anymore, and in fact it might not be on the flash anymore, because it might
841 * have been garbage-collected already. And for optimization reasons UBIFS does
842 * not read the orphan area if it has been unmounted cleanly, so it would have
843 * no indication in the journal that there is a deleted inode which has to be
844 * removed from TNC.
845 *
846 * However, if there was no commit between 'ubifs_jnl_update()' and
847 * 'ubifs_jnl_delete_inode()', then there is no need to write the deletion
848 * inode to the media for the second time. And this is quite a typical case.
849 *
850 * This function returns zero in case of success and a negative error code in
851 * case of failure.
852 */
853int ubifs_jnl_delete_inode(struct ubifs_info *c, const struct inode *inode)
854{
855 int err;
856 struct ubifs_inode *ui = ubifs_inode(inode);
857
858 ubifs_assert(inode->i_nlink == 0);
859
860 if (ui->del_cmtno != c->cmt_no)
861 /* A commit happened for sure */
862 return ubifs_jnl_write_inode(c, inode);
863
864 down_read(&c->commit_sem);
865 /*
866 * Check commit number again, because the first test has been done
867 * without @c->commit_sem, so a commit might have happened.
868 */
869 if (ui->del_cmtno != c->cmt_no) {
870 up_read(&c->commit_sem);
871 return ubifs_jnl_write_inode(c, inode);
872 }
873
874 err = ubifs_tnc_remove_ino(c, inode->i_ino);
875 if (err)
876 ubifs_ro_mode(c, err);
877 else
878 ubifs_delete_orphan(c, inode->i_ino);
879 up_read(&c->commit_sem);
880 return err;
881}
882
883/**
831 * ubifs_jnl_rename - rename a directory entry. 884 * ubifs_jnl_rename - rename a directory entry.
832 * @c: UBIFS file-system description object 885 * @c: UBIFS file-system description object
833 * @old_dir: parent inode of directory entry to rename 886 * @old_dir: parent inode of directory entry to rename
@@ -917,16 +970,16 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
917 970
918 p = (void *)dent2 + aligned_dlen2; 971 p = (void *)dent2 + aligned_dlen2;
919 if (new_inode) { 972 if (new_inode) {
920 pack_inode(c, p, new_inode, 0, last_reference); 973 pack_inode(c, p, new_inode, 0);
921 p += ALIGN(ilen, 8); 974 p += ALIGN(ilen, 8);
922 } 975 }
923 976
924 if (!move) 977 if (!move)
925 pack_inode(c, p, old_dir, 1, 0); 978 pack_inode(c, p, old_dir, 1);
926 else { 979 else {
927 pack_inode(c, p, old_dir, 0, 0); 980 pack_inode(c, p, old_dir, 0);
928 p += ALIGN(plen, 8); 981 p += ALIGN(plen, 8);
929 pack_inode(c, p, new_dir, 1, 0); 982 pack_inode(c, p, new_dir, 1);
930 } 983 }
931 984
932 if (last_reference) { 985 if (last_reference) {
@@ -935,6 +988,7 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
935 release_head(c, BASEHD); 988 release_head(c, BASEHD);
936 goto out_finish; 989 goto out_finish;
937 } 990 }
991 new_ui->del_cmtno = c->cmt_no;
938 } 992 }
939 993
940 err = write_head(c, BASEHD, dent, len, &lnum, &offs, sync); 994 err = write_head(c, BASEHD, dent, len, &lnum, &offs, sync);
@@ -1131,7 +1185,7 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode,
1131 if (err) 1185 if (err)
1132 goto out_free; 1186 goto out_free;
1133 1187
1134 pack_inode(c, ino, inode, 0, 0); 1188 pack_inode(c, ino, inode, 0);
1135 ubifs_prep_grp_node(c, trun, UBIFS_TRUN_NODE_SZ, dlen ? 0 : 1); 1189 ubifs_prep_grp_node(c, trun, UBIFS_TRUN_NODE_SZ, dlen ? 0 : 1);
1136 if (dlen) 1190 if (dlen)
1137 ubifs_prep_grp_node(c, dn, dlen, 1); 1191 ubifs_prep_grp_node(c, dn, dlen, 1);
@@ -1251,9 +1305,9 @@ int ubifs_jnl_delete_xattr(struct ubifs_info *c, const struct inode *host,
1251 ubifs_prep_grp_node(c, xent, xlen, 0); 1305 ubifs_prep_grp_node(c, xent, xlen, 0);
1252 1306
1253 ino = (void *)xent + aligned_xlen; 1307 ino = (void *)xent + aligned_xlen;
1254 pack_inode(c, ino, inode, 0, 1); 1308 pack_inode(c, ino, inode, 0);
1255 ino = (void *)ino + UBIFS_INO_NODE_SZ; 1309 ino = (void *)ino + UBIFS_INO_NODE_SZ;
1256 pack_inode(c, ino, host, 1, 0); 1310 pack_inode(c, ino, host, 1);
1257 1311
1258 err = write_head(c, BASEHD, xent, len, &lnum, &xent_offs, sync); 1312 err = write_head(c, BASEHD, xent, len, &lnum, &xent_offs, sync);
1259 if (!sync && !err) 1313 if (!sync && !err)
@@ -1320,7 +1374,7 @@ int ubifs_jnl_change_xattr(struct ubifs_info *c, const struct inode *inode,
1320 const struct inode *host) 1374 const struct inode *host)
1321{ 1375{
1322 int err, len1, len2, aligned_len, aligned_len1, lnum, offs; 1376 int err, len1, len2, aligned_len, aligned_len1, lnum, offs;
1323 struct ubifs_inode *host_ui = ubifs_inode(inode); 1377 struct ubifs_inode *host_ui = ubifs_inode(host);
1324 struct ubifs_ino_node *ino; 1378 struct ubifs_ino_node *ino;
1325 union ubifs_key key; 1379 union ubifs_key key;
1326 int sync = IS_DIRSYNC(host); 1380 int sync = IS_DIRSYNC(host);
@@ -1344,8 +1398,8 @@ int ubifs_jnl_change_xattr(struct ubifs_info *c, const struct inode *inode,
1344 if (err) 1398 if (err)
1345 goto out_free; 1399 goto out_free;
1346 1400
1347 pack_inode(c, ino, host, 0, 0); 1401 pack_inode(c, ino, host, 0);
1348 pack_inode(c, (void *)ino + aligned_len1, inode, 1, 0); 1402 pack_inode(c, (void *)ino + aligned_len1, inode, 1);
1349 1403
1350 err = write_head(c, BASEHD, ino, aligned_len, &lnum, &offs, 0); 1404 err = write_head(c, BASEHD, ino, aligned_len, &lnum, &offs, 0);
1351 if (!sync && !err) { 1405 if (!sync && !err) {
diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c
index 36857b9ed59e..3e0aa7367556 100644
--- a/fs/ubifs/log.c
+++ b/fs/ubifs/log.c
@@ -317,6 +317,8 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs)
317 return 0; 317 return 0;
318 318
319out_unlock: 319out_unlock:
320 if (err != -EAGAIN)
321 ubifs_ro_mode(c, err);
320 mutex_unlock(&c->log_mutex); 322 mutex_unlock(&c->log_mutex);
321 kfree(ref); 323 kfree(ref);
322 kfree(bud); 324 kfree(bud);
@@ -410,7 +412,7 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum)
410 return -ENOMEM; 412 return -ENOMEM;
411 413
412 cs->ch.node_type = UBIFS_CS_NODE; 414 cs->ch.node_type = UBIFS_CS_NODE;
413 cs->cmt_no = cpu_to_le64(c->cmt_no + 1); 415 cs->cmt_no = cpu_to_le64(c->cmt_no);
414 ubifs_prepare_node(c, cs, UBIFS_CS_NODE_SZ, 0); 416 ubifs_prepare_node(c, cs, UBIFS_CS_NODE_SZ, 0);
415 417
416 /* 418 /*
diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h
index 4beccfc256d2..87dabf9fe742 100644
--- a/fs/ubifs/misc.h
+++ b/fs/ubifs/misc.h
@@ -80,20 +80,6 @@ static inline struct ubifs_inode *ubifs_inode(const struct inode *inode)
80} 80}
81 81
82/** 82/**
83 * ubifs_ro_mode - switch UBIFS to read read-only mode.
84 * @c: UBIFS file-system description object
85 * @err: error code which is the reason of switching to R/O mode
86 */
87static inline void ubifs_ro_mode(struct ubifs_info *c, int err)
88{
89 if (!c->ro_media) {
90 c->ro_media = 1;
91 ubifs_warn("switched to read-only mode, error %d", err);
92 dbg_dump_stack();
93 }
94}
95
96/**
97 * ubifs_compr_present - check if compressor was compiled in. 83 * ubifs_compr_present - check if compressor was compiled in.
98 * @compr_type: compressor type to check 84 * @compr_type: compressor type to check
99 * 85 *
@@ -322,7 +308,7 @@ static inline long long ubifs_reported_space(const struct ubifs_info *c,
322{ 308{
323 int divisor, factor; 309 int divisor, factor;
324 310
325 divisor = UBIFS_MAX_DATA_NODE_SZ + (c->max_idx_node_sz << 1); 311 divisor = UBIFS_MAX_DATA_NODE_SZ + (c->max_idx_node_sz * 3);
326 factor = UBIFS_MAX_DATA_NODE_SZ - UBIFS_DATA_NODE_SZ; 312 factor = UBIFS_MAX_DATA_NODE_SZ - UBIFS_DATA_NODE_SZ;
327 do_div(free, divisor); 313 do_div(free, divisor);
328 314
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
index 3afeb9242c6a..02d3462f4d3e 100644
--- a/fs/ubifs/orphan.c
+++ b/fs/ubifs/orphan.c
@@ -310,10 +310,10 @@ static int write_orph_node(struct ubifs_info *c, int atomic)
310 c->cmt_orphans -= cnt; 310 c->cmt_orphans -= cnt;
311 spin_unlock(&c->orphan_lock); 311 spin_unlock(&c->orphan_lock);
312 if (c->cmt_orphans) 312 if (c->cmt_orphans)
313 orph->cmt_no = cpu_to_le64(c->cmt_no + 1); 313 orph->cmt_no = cpu_to_le64(c->cmt_no);
314 else 314 else
315 /* Mark the last node of the commit */ 315 /* Mark the last node of the commit */
316 orph->cmt_no = cpu_to_le64((c->cmt_no + 1) | (1ULL << 63)); 316 orph->cmt_no = cpu_to_le64((c->cmt_no) | (1ULL << 63));
317 ubifs_assert(c->ohead_offs + len <= c->leb_size); 317 ubifs_assert(c->ohead_offs + len <= c->leb_size);
318 ubifs_assert(c->ohead_lnum >= c->orph_first); 318 ubifs_assert(c->ohead_lnum >= c->orph_first);
319 ubifs_assert(c->ohead_lnum <= c->orph_last); 319 ubifs_assert(c->ohead_lnum <= c->orph_last);
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index ca1e2d4e03cc..f71e6b8822c4 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -30,7 +30,6 @@
30#include <linux/slab.h> 30#include <linux/slab.h>
31#include <linux/module.h> 31#include <linux/module.h>
32#include <linux/ctype.h> 32#include <linux/ctype.h>
33#include <linux/random.h>
34#include <linux/kthread.h> 33#include <linux/kthread.h>
35#include <linux/parser.h> 34#include <linux/parser.h>
36#include <linux/seq_file.h> 35#include <linux/seq_file.h>
@@ -149,7 +148,7 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum)
149 if (err) 148 if (err)
150 goto out_invalid; 149 goto out_invalid;
151 150
152 /* Disable readahead */ 151 /* Disable read-ahead */
153 inode->i_mapping->backing_dev_info = &c->bdi; 152 inode->i_mapping->backing_dev_info = &c->bdi;
154 153
155 switch (inode->i_mode & S_IFMT) { 154 switch (inode->i_mode & S_IFMT) {
@@ -278,7 +277,7 @@ static void ubifs_destroy_inode(struct inode *inode)
278 */ 277 */
279static int ubifs_write_inode(struct inode *inode, int wait) 278static int ubifs_write_inode(struct inode *inode, int wait)
280{ 279{
281 int err; 280 int err = 0;
282 struct ubifs_info *c = inode->i_sb->s_fs_info; 281 struct ubifs_info *c = inode->i_sb->s_fs_info;
283 struct ubifs_inode *ui = ubifs_inode(inode); 282 struct ubifs_inode *ui = ubifs_inode(inode);
284 283
@@ -299,10 +298,18 @@ static int ubifs_write_inode(struct inode *inode, int wait)
299 return 0; 298 return 0;
300 } 299 }
301 300
302 dbg_gen("inode %lu", inode->i_ino); 301 /*
303 err = ubifs_jnl_write_inode(c, inode, 0); 302 * As an optimization, do not write orphan inodes to the media just
304 if (err) 303 * because this is not needed.
305 ubifs_err("can't write inode %lu, error %d", inode->i_ino, err); 304 */
305 dbg_gen("inode %lu, mode %#x, nlink %u",
306 inode->i_ino, (int)inode->i_mode, inode->i_nlink);
307 if (inode->i_nlink) {
308 err = ubifs_jnl_write_inode(c, inode);
309 if (err)
310 ubifs_err("can't write inode %lu, error %d",
311 inode->i_ino, err);
312 }
306 313
307 ui->dirty = 0; 314 ui->dirty = 0;
308 mutex_unlock(&ui->ui_mutex); 315 mutex_unlock(&ui->ui_mutex);
@@ -314,8 +321,9 @@ static void ubifs_delete_inode(struct inode *inode)
314{ 321{
315 int err; 322 int err;
316 struct ubifs_info *c = inode->i_sb->s_fs_info; 323 struct ubifs_info *c = inode->i_sb->s_fs_info;
324 struct ubifs_inode *ui = ubifs_inode(inode);
317 325
318 if (ubifs_inode(inode)->xattr) 326 if (ui->xattr)
319 /* 327 /*
320 * Extended attribute inode deletions are fully handled in 328 * Extended attribute inode deletions are fully handled in
321 * 'ubifs_removexattr()'. These inodes are special and have 329 * 'ubifs_removexattr()'. These inodes are special and have
@@ -323,7 +331,7 @@ static void ubifs_delete_inode(struct inode *inode)
323 */ 331 */
324 goto out; 332 goto out;
325 333
326 dbg_gen("inode %lu", inode->i_ino); 334 dbg_gen("inode %lu, mode %#x", inode->i_ino, (int)inode->i_mode);
327 ubifs_assert(!atomic_read(&inode->i_count)); 335 ubifs_assert(!atomic_read(&inode->i_count));
328 ubifs_assert(inode->i_nlink == 0); 336 ubifs_assert(inode->i_nlink == 0);
329 337
@@ -331,15 +339,19 @@ static void ubifs_delete_inode(struct inode *inode)
331 if (is_bad_inode(inode)) 339 if (is_bad_inode(inode))
332 goto out; 340 goto out;
333 341
334 ubifs_inode(inode)->ui_size = inode->i_size = 0; 342 ui->ui_size = inode->i_size = 0;
335 err = ubifs_jnl_write_inode(c, inode, 1); 343 err = ubifs_jnl_delete_inode(c, inode);
336 if (err) 344 if (err)
337 /* 345 /*
338 * Worst case we have a lost orphan inode wasting space, so a 346 * Worst case we have a lost orphan inode wasting space, so a
339 * simple error message is ok here. 347 * simple error message is OK here.
340 */ 348 */
341 ubifs_err("can't write inode %lu, error %d", inode->i_ino, err); 349 ubifs_err("can't delete inode %lu, error %d",
350 inode->i_ino, err);
351
342out: 352out:
353 if (ui->dirty)
354 ubifs_release_dirty_inode_budget(c, ui);
343 clear_inode(inode); 355 clear_inode(inode);
344} 356}
345 357
@@ -1122,8 +1134,8 @@ static int mount_ubifs(struct ubifs_info *c)
1122 if (err) 1134 if (err)
1123 goto out_infos; 1135 goto out_infos;
1124 1136
1125 ubifs_msg("mounted UBI device %d, volume %d", c->vi.ubi_num, 1137 ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"",
1126 c->vi.vol_id); 1138 c->vi.ubi_num, c->vi.vol_id, c->vi.name);
1127 if (mounted_read_only) 1139 if (mounted_read_only)
1128 ubifs_msg("mounted read-only"); 1140 ubifs_msg("mounted read-only");
1129 x = (long long)c->main_lebs * c->leb_size; 1141 x = (long long)c->main_lebs * c->leb_size;
@@ -1469,6 +1481,7 @@ static void ubifs_put_super(struct super_block *sb)
1469 */ 1481 */
1470 ubifs_assert(atomic_long_read(&c->dirty_pg_cnt) == 0); 1482 ubifs_assert(atomic_long_read(&c->dirty_pg_cnt) == 0);
1471 ubifs_assert(c->budg_idx_growth == 0); 1483 ubifs_assert(c->budg_idx_growth == 0);
1484 ubifs_assert(c->budg_dd_growth == 0);
1472 ubifs_assert(c->budg_data_growth == 0); 1485 ubifs_assert(c->budg_data_growth == 0);
1473 1486
1474 /* 1487 /*
@@ -1657,7 +1670,6 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
1657 INIT_LIST_HEAD(&c->orph_new); 1670 INIT_LIST_HEAD(&c->orph_new);
1658 1671
1659 c->highest_inum = UBIFS_FIRST_INO; 1672 c->highest_inum = UBIFS_FIRST_INO;
1660 get_random_bytes(&c->vfs_gen, sizeof(int));
1661 c->lhead_lnum = c->ltail_lnum = UBIFS_LOG_LNUM; 1673 c->lhead_lnum = c->ltail_lnum = UBIFS_LOG_LNUM;
1662 1674
1663 ubi_get_volume_info(ubi, &c->vi); 1675 ubi_get_volume_info(ubi, &c->vi);
@@ -1671,10 +1683,10 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
1671 } 1683 }
1672 1684
1673 /* 1685 /*
1674 * UBIFS provids 'backing_dev_info' in order to disable readahead. For 1686 * UBIFS provides 'backing_dev_info' in order to disable read-ahead. For
1675 * UBIFS, I/O is not deferred, it is done immediately in readpage, 1687 * UBIFS, I/O is not deferred, it is done immediately in readpage,
1676 * which means the user would have to wait not just for their own I/O 1688 * which means the user would have to wait not just for their own I/O
1677 * but the readahead I/O as well i.e. completely pointless. 1689 * but the read-ahead I/O as well i.e. completely pointless.
1678 * 1690 *
1679 * Read-ahead will be disabled because @c->bdi.ra_pages is 0. 1691 * Read-ahead will be disabled because @c->bdi.ra_pages is 0.
1680 */ 1692 */
diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c
index 8117e65ba2e9..8ac76b1c2d55 100644
--- a/fs/ubifs/tnc_commit.c
+++ b/fs/ubifs/tnc_commit.c
@@ -372,26 +372,25 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt)
372 written = layout_leb_in_gaps(c, p); 372 written = layout_leb_in_gaps(c, p);
373 if (written < 0) { 373 if (written < 0) {
374 err = written; 374 err = written;
375 if (err == -ENOSPC) { 375 if (err != -ENOSPC) {
376 if (!dbg_force_in_the_gaps_enabled) { 376 kfree(c->gap_lebs);
377 /* 377 c->gap_lebs = NULL;
378 * Do not print scary warnings if the 378 return err;
379 * debugging option which forces
380 * in-the-gaps is enabled.
381 */
382 ubifs_err("out of space");
383 spin_lock(&c->space_lock);
384 dbg_dump_budg(c);
385 spin_unlock(&c->space_lock);
386 dbg_dump_lprops(c);
387 }
388 /* Try to commit anyway */
389 err = 0;
390 break;
391 } 379 }
392 kfree(c->gap_lebs); 380 if (!dbg_force_in_the_gaps_enabled) {
393 c->gap_lebs = NULL; 381 /*
394 return err; 382 * Do not print scary warnings if the debugging
383 * option which forces in-the-gaps is enabled.
384 */
385 ubifs_err("out of space");
386 spin_lock(&c->space_lock);
387 dbg_dump_budg(c);
388 spin_unlock(&c->space_lock);
389 dbg_dump_lprops(c);
390 }
391 /* Try to commit anyway */
392 err = 0;
393 break;
395 } 394 }
396 p++; 395 p++;
397 cnt -= written; 396 cnt -= written;
diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h
index 0cc7da9bed47..bd2121f3426e 100644
--- a/fs/ubifs/ubifs-media.h
+++ b/fs/ubifs/ubifs-media.h
@@ -228,10 +228,10 @@ enum {
228/* Minimum number of orphan area logical eraseblocks */ 228/* Minimum number of orphan area logical eraseblocks */
229#define UBIFS_MIN_ORPH_LEBS 1 229#define UBIFS_MIN_ORPH_LEBS 1
230/* 230/*
231 * Minimum number of main area logical eraseblocks (buds, 2 for the index, 1 231 * Minimum number of main area logical eraseblocks (buds, 3 for the index, 1
232 * for GC, 1 for deletions, and at least 1 for committed data). 232 * for GC, 1 for deletions, and at least 1 for committed data).
233 */ 233 */
234#define UBIFS_MIN_MAIN_LEBS (UBIFS_MIN_BUD_LEBS + 5) 234#define UBIFS_MIN_MAIN_LEBS (UBIFS_MIN_BUD_LEBS + 6)
235 235
236/* Minimum number of logical eraseblocks */ 236/* Minimum number of logical eraseblocks */
237#define UBIFS_MIN_LEB_CNT (UBIFS_SB_LEBS + UBIFS_MST_LEBS + \ 237#define UBIFS_MIN_LEB_CNT (UBIFS_SB_LEBS + UBIFS_MST_LEBS + \
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index e4f89f271827..d7f706f7a302 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -20,8 +20,6 @@
20 * Adrian Hunter 20 * Adrian Hunter
21 */ 21 */
22 22
23/* Implementation version 0.7 */
24
25#ifndef __UBIFS_H__ 23#ifndef __UBIFS_H__
26#define __UBIFS_H__ 24#define __UBIFS_H__
27 25
@@ -322,6 +320,8 @@ struct ubifs_gced_idx_leb {
322 * struct ubifs_inode - UBIFS in-memory inode description. 320 * struct ubifs_inode - UBIFS in-memory inode description.
323 * @vfs_inode: VFS inode description object 321 * @vfs_inode: VFS inode description object
324 * @creat_sqnum: sequence number at time of creation 322 * @creat_sqnum: sequence number at time of creation
323 * @del_cmtno: commit number corresponding to the time the inode was deleted,
324 * protected by @c->commit_sem;
325 * @xattr_size: summarized size of all extended attributes in bytes 325 * @xattr_size: summarized size of all extended attributes in bytes
326 * @xattr_cnt: count of extended attributes this inode has 326 * @xattr_cnt: count of extended attributes this inode has
327 * @xattr_names: sum of lengths of all extended attribute names belonging to 327 * @xattr_names: sum of lengths of all extended attribute names belonging to
@@ -373,6 +373,7 @@ struct ubifs_gced_idx_leb {
373struct ubifs_inode { 373struct ubifs_inode {
374 struct inode vfs_inode; 374 struct inode vfs_inode;
375 unsigned long long creat_sqnum; 375 unsigned long long creat_sqnum;
376 unsigned long long del_cmtno;
376 unsigned int xattr_size; 377 unsigned int xattr_size;
377 unsigned int xattr_cnt; 378 unsigned int xattr_cnt;
378 unsigned int xattr_names; 379 unsigned int xattr_names;
@@ -779,7 +780,7 @@ struct ubifs_compressor {
779/** 780/**
780 * struct ubifs_budget_req - budget requirements of an operation. 781 * struct ubifs_budget_req - budget requirements of an operation.
781 * 782 *
782 * @fast: non-zero if the budgeting should try to aquire budget quickly and 783 * @fast: non-zero if the budgeting should try to acquire budget quickly and
783 * should not try to call write-back 784 * should not try to call write-back
784 * @recalculate: non-zero if @idx_growth, @data_growth, and @dd_growth fields 785 * @recalculate: non-zero if @idx_growth, @data_growth, and @dd_growth fields
785 * have to be re-calculated 786 * have to be re-calculated
@@ -805,21 +806,31 @@ struct ubifs_compressor {
805 * An inode may contain 4KiB of data at max., thus the widths of @new_ino_d 806 * An inode may contain 4KiB of data at max., thus the widths of @new_ino_d
806 * is 13 bits, and @dirtied_ino_d - 15, because up to 4 inodes may be made 807 * is 13 bits, and @dirtied_ino_d - 15, because up to 4 inodes may be made
807 * dirty by the re-name operation. 808 * dirty by the re-name operation.
809 *
810 * Note, UBIFS aligns node lengths to 8-bytes boundary, so the requester has to
811 * make sure the amount of inode data which contribute to @new_ino_d and
812 * @dirtied_ino_d fields are aligned.
808 */ 813 */
809struct ubifs_budget_req { 814struct ubifs_budget_req {
810 unsigned int fast:1; 815 unsigned int fast:1;
811 unsigned int recalculate:1; 816 unsigned int recalculate:1;
817#ifndef UBIFS_DEBUG
812 unsigned int new_page:1; 818 unsigned int new_page:1;
813 unsigned int dirtied_page:1; 819 unsigned int dirtied_page:1;
814 unsigned int new_dent:1; 820 unsigned int new_dent:1;
815 unsigned int mod_dent:1; 821 unsigned int mod_dent:1;
816 unsigned int new_ino:1; 822 unsigned int new_ino:1;
817 unsigned int new_ino_d:13; 823 unsigned int new_ino_d:13;
818#ifndef UBIFS_DEBUG
819 unsigned int dirtied_ino:4; 824 unsigned int dirtied_ino:4;
820 unsigned int dirtied_ino_d:15; 825 unsigned int dirtied_ino_d:15;
821#else 826#else
822 /* Not bit-fields to check for overflows */ 827 /* Not bit-fields to check for overflows */
828 unsigned int new_page;
829 unsigned int dirtied_page;
830 unsigned int new_dent;
831 unsigned int mod_dent;
832 unsigned int new_ino;
833 unsigned int new_ino_d;
823 unsigned int dirtied_ino; 834 unsigned int dirtied_ino;
824 unsigned int dirtied_ino_d; 835 unsigned int dirtied_ino_d;
825#endif 836#endif
@@ -860,13 +871,13 @@ struct ubifs_mount_opts {
860 * struct ubifs_info - UBIFS file-system description data structure 871 * struct ubifs_info - UBIFS file-system description data structure
861 * (per-superblock). 872 * (per-superblock).
862 * @vfs_sb: VFS @struct super_block object 873 * @vfs_sb: VFS @struct super_block object
863 * @bdi: backing device info object to make VFS happy and disable readahead 874 * @bdi: backing device info object to make VFS happy and disable read-ahead
864 * 875 *
865 * @highest_inum: highest used inode number 876 * @highest_inum: highest used inode number
866 * @vfs_gen: VFS inode generation counter
867 * @max_sqnum: current global sequence number 877 * @max_sqnum: current global sequence number
868 * @cmt_no: commit number (last successfully completed commit) 878 * @cmt_no: commit number of the last successfully completed commit, protected
869 * @cnt_lock: protects @highest_inum, @vfs_gen, and @max_sqnum counters 879 * by @commit_sem
880 * @cnt_lock: protects @highest_inum and @max_sqnum counters
870 * @fmt_version: UBIFS on-flash format version 881 * @fmt_version: UBIFS on-flash format version
871 * @uuid: UUID from super block 882 * @uuid: UUID from super block
872 * 883 *
@@ -1103,7 +1114,6 @@ struct ubifs_info {
1103 struct backing_dev_info bdi; 1114 struct backing_dev_info bdi;
1104 1115
1105 ino_t highest_inum; 1116 ino_t highest_inum;
1106 unsigned int vfs_gen;
1107 unsigned long long max_sqnum; 1117 unsigned long long max_sqnum;
1108 unsigned long long cmt_no; 1118 unsigned long long cmt_no;
1109 spinlock_t cnt_lock; 1119 spinlock_t cnt_lock;
@@ -1346,6 +1356,7 @@ extern struct backing_dev_info ubifs_backing_dev_info;
1346extern struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT]; 1356extern struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT];
1347 1357
1348/* io.c */ 1358/* io.c */
1359void ubifs_ro_mode(struct ubifs_info *c, int err);
1349int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len); 1360int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len);
1350int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, 1361int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs,
1351 int dtype); 1362 int dtype);
@@ -1399,8 +1410,8 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir,
1399 int deletion, int xent); 1410 int deletion, int xent);
1400int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode, 1411int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
1401 const union ubifs_key *key, const void *buf, int len); 1412 const union ubifs_key *key, const void *buf, int len);
1402int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode, 1413int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode);
1403 int last_reference); 1414int ubifs_jnl_delete_inode(struct ubifs_info *c, const struct inode *inode);
1404int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, 1415int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
1405 const struct dentry *old_dentry, 1416 const struct dentry *old_dentry,
1406 const struct inode *new_dir, 1417 const struct inode *new_dir,
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index 1388a078e1a9..649bec78b645 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -61,7 +61,7 @@
61 61
62/* 62/*
63 * Limit the number of extended attributes per inode so that the total size 63 * Limit the number of extended attributes per inode so that the total size
64 * (xattr_size) is guaranteeded to fit in an 'unsigned int'. 64 * (@xattr_size) is guaranteeded to fit in an 'unsigned int'.
65 */ 65 */
66#define MAX_XATTRS_PER_INODE 65535 66#define MAX_XATTRS_PER_INODE 65535
67 67
@@ -103,14 +103,14 @@ static int create_xattr(struct ubifs_info *c, struct inode *host,
103 struct inode *inode; 103 struct inode *inode;
104 struct ubifs_inode *ui, *host_ui = ubifs_inode(host); 104 struct ubifs_inode *ui, *host_ui = ubifs_inode(host);
105 struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, 105 struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1,
106 .new_ino_d = size, .dirtied_ino = 1, 106 .new_ino_d = ALIGN(size, 8), .dirtied_ino = 1,
107 .dirtied_ino_d = host_ui->data_len}; 107 .dirtied_ino_d = ALIGN(host_ui->data_len, 8) };
108 108
109 if (host_ui->xattr_cnt >= MAX_XATTRS_PER_INODE) 109 if (host_ui->xattr_cnt >= MAX_XATTRS_PER_INODE)
110 return -ENOSPC; 110 return -ENOSPC;
111 /* 111 /*
112 * Linux limits the maximum size of the extended attribute names list 112 * Linux limits the maximum size of the extended attribute names list
113 * to %XATTR_LIST_MAX. This means we should not allow creating more* 113 * to %XATTR_LIST_MAX. This means we should not allow creating more
114 * extended attributes if the name list becomes larger. This limitation 114 * extended attributes if the name list becomes larger. This limitation
115 * is artificial for UBIFS, though. 115 * is artificial for UBIFS, though.
116 */ 116 */
@@ -128,7 +128,6 @@ static int create_xattr(struct ubifs_info *c, struct inode *host,
128 goto out_budg; 128 goto out_budg;
129 } 129 }
130 130
131 mutex_lock(&host_ui->ui_mutex);
132 /* Re-define all operations to be "nothing" */ 131 /* Re-define all operations to be "nothing" */
133 inode->i_mapping->a_ops = &none_address_operations; 132 inode->i_mapping->a_ops = &none_address_operations;
134 inode->i_op = &none_inode_operations; 133 inode->i_op = &none_inode_operations;
@@ -141,23 +140,19 @@ static int create_xattr(struct ubifs_info *c, struct inode *host,
141 ui->data = kmalloc(size, GFP_NOFS); 140 ui->data = kmalloc(size, GFP_NOFS);
142 if (!ui->data) { 141 if (!ui->data) {
143 err = -ENOMEM; 142 err = -ENOMEM;
144 goto out_unlock; 143 goto out_free;
145 } 144 }
146
147 memcpy(ui->data, value, size); 145 memcpy(ui->data, value, size);
146 inode->i_size = ui->ui_size = size;
147 ui->data_len = size;
148
149 mutex_lock(&host_ui->ui_mutex);
148 host->i_ctime = ubifs_current_time(host); 150 host->i_ctime = ubifs_current_time(host);
149 host_ui->xattr_cnt += 1; 151 host_ui->xattr_cnt += 1;
150 host_ui->xattr_size += CALC_DENT_SIZE(nm->len); 152 host_ui->xattr_size += CALC_DENT_SIZE(nm->len);
151 host_ui->xattr_size += CALC_XATTR_BYTES(size); 153 host_ui->xattr_size += CALC_XATTR_BYTES(size);
152 host_ui->xattr_names += nm->len; 154 host_ui->xattr_names += nm->len;
153 155
154 /*
155 * We do not use i_size_write() because nobody can race with us as we
156 * are holding host @host->i_mutex - every xattr operation for this
157 * inode is serialized by it.
158 */
159 inode->i_size = ui->ui_size = size;
160 ui->data_len = size;
161 err = ubifs_jnl_update(c, host, nm, inode, 0, 1); 156 err = ubifs_jnl_update(c, host, nm, inode, 0, 1);
162 if (err) 157 if (err)
163 goto out_cancel; 158 goto out_cancel;
@@ -172,8 +167,8 @@ out_cancel:
172 host_ui->xattr_cnt -= 1; 167 host_ui->xattr_cnt -= 1;
173 host_ui->xattr_size -= CALC_DENT_SIZE(nm->len); 168 host_ui->xattr_size -= CALC_DENT_SIZE(nm->len);
174 host_ui->xattr_size -= CALC_XATTR_BYTES(size); 169 host_ui->xattr_size -= CALC_XATTR_BYTES(size);
175out_unlock:
176 mutex_unlock(&host_ui->ui_mutex); 170 mutex_unlock(&host_ui->ui_mutex);
171out_free:
177 make_bad_inode(inode); 172 make_bad_inode(inode);
178 iput(inode); 173 iput(inode);
179out_budg: 174out_budg:
@@ -200,29 +195,28 @@ static int change_xattr(struct ubifs_info *c, struct inode *host,
200 struct ubifs_inode *host_ui = ubifs_inode(host); 195 struct ubifs_inode *host_ui = ubifs_inode(host);
201 struct ubifs_inode *ui = ubifs_inode(inode); 196 struct ubifs_inode *ui = ubifs_inode(inode);
202 struct ubifs_budget_req req = { .dirtied_ino = 2, 197 struct ubifs_budget_req req = { .dirtied_ino = 2,
203 .dirtied_ino_d = size + host_ui->data_len }; 198 .dirtied_ino_d = ALIGN(size, 8) + ALIGN(host_ui->data_len, 8) };
204 199
205 ubifs_assert(ui->data_len == inode->i_size); 200 ubifs_assert(ui->data_len == inode->i_size);
206 err = ubifs_budget_space(c, &req); 201 err = ubifs_budget_space(c, &req);
207 if (err) 202 if (err)
208 return err; 203 return err;
209 204
210 mutex_lock(&host_ui->ui_mutex);
211 host->i_ctime = ubifs_current_time(host);
212 host_ui->xattr_size -= CALC_XATTR_BYTES(ui->data_len);
213 host_ui->xattr_size += CALC_XATTR_BYTES(size);
214
215 kfree(ui->data); 205 kfree(ui->data);
216 ui->data = kmalloc(size, GFP_NOFS); 206 ui->data = kmalloc(size, GFP_NOFS);
217 if (!ui->data) { 207 if (!ui->data) {
218 err = -ENOMEM; 208 err = -ENOMEM;
219 goto out_unlock; 209 goto out_free;
220 } 210 }
221
222 memcpy(ui->data, value, size); 211 memcpy(ui->data, value, size);
223 inode->i_size = ui->ui_size = size; 212 inode->i_size = ui->ui_size = size;
224 ui->data_len = size; 213 ui->data_len = size;
225 214
215 mutex_lock(&host_ui->ui_mutex);
216 host->i_ctime = ubifs_current_time(host);
217 host_ui->xattr_size -= CALC_XATTR_BYTES(ui->data_len);
218 host_ui->xattr_size += CALC_XATTR_BYTES(size);
219
226 /* 220 /*
227 * It is important to write the host inode after the xattr inode 221 * It is important to write the host inode after the xattr inode
228 * because if the host inode gets synchronized (via 'fsync()'), then 222 * because if the host inode gets synchronized (via 'fsync()'), then
@@ -240,9 +234,9 @@ static int change_xattr(struct ubifs_info *c, struct inode *host,
240out_cancel: 234out_cancel:
241 host_ui->xattr_size -= CALC_XATTR_BYTES(size); 235 host_ui->xattr_size -= CALC_XATTR_BYTES(size);
242 host_ui->xattr_size += CALC_XATTR_BYTES(ui->data_len); 236 host_ui->xattr_size += CALC_XATTR_BYTES(ui->data_len);
243 make_bad_inode(inode);
244out_unlock:
245 mutex_unlock(&host_ui->ui_mutex); 237 mutex_unlock(&host_ui->ui_mutex);
238 make_bad_inode(inode);
239out_free:
246 ubifs_release_budget(c, &req); 240 ubifs_release_budget(c, &req);
247 return err; 241 return err;
248} 242}
@@ -312,6 +306,7 @@ int ubifs_setxattr(struct dentry *dentry, const char *name,
312 306
313 dbg_gen("xattr '%s', host ino %lu ('%.*s'), size %zd", name, 307 dbg_gen("xattr '%s', host ino %lu ('%.*s'), size %zd", name,
314 host->i_ino, dentry->d_name.len, dentry->d_name.name, size); 308 host->i_ino, dentry->d_name.len, dentry->d_name.name, size);
309 ubifs_assert(mutex_is_locked(&host->i_mutex));
315 310
316 if (size > UBIFS_MAX_INO_DATA) 311 if (size > UBIFS_MAX_INO_DATA)
317 return -ERANGE; 312 return -ERANGE;
@@ -384,7 +379,6 @@ ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf,
384 if (!xent) 379 if (!xent)
385 return -ENOMEM; 380 return -ENOMEM;
386 381
387 mutex_lock(&host->i_mutex);
388 xent_key_init(c, &key, host->i_ino, &nm); 382 xent_key_init(c, &key, host->i_ino, &nm);
389 err = ubifs_tnc_lookup_nm(c, &key, xent, &nm); 383 err = ubifs_tnc_lookup_nm(c, &key, xent, &nm);
390 if (err) { 384 if (err) {
@@ -419,7 +413,6 @@ ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf,
419out_iput: 413out_iput:
420 iput(inode); 414 iput(inode);
421out_unlock: 415out_unlock:
422 mutex_unlock(&host->i_mutex);
423 kfree(xent); 416 kfree(xent);
424 return err; 417 return err;
425} 418}
@@ -449,8 +442,6 @@ ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size)
449 return -ERANGE; 442 return -ERANGE;
450 443
451 lowest_xent_key(c, &key, host->i_ino); 444 lowest_xent_key(c, &key, host->i_ino);
452
453 mutex_lock(&host->i_mutex);
454 while (1) { 445 while (1) {
455 int type; 446 int type;
456 447
@@ -479,7 +470,6 @@ ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size)
479 pxent = xent; 470 pxent = xent;
480 key_read(c, &xent->key, &key); 471 key_read(c, &xent->key, &key);
481 } 472 }
482 mutex_unlock(&host->i_mutex);
483 473
484 kfree(pxent); 474 kfree(pxent);
485 if (err != -ENOENT) { 475 if (err != -ENOENT) {
@@ -497,8 +487,8 @@ static int remove_xattr(struct ubifs_info *c, struct inode *host,
497 int err; 487 int err;
498 struct ubifs_inode *host_ui = ubifs_inode(host); 488 struct ubifs_inode *host_ui = ubifs_inode(host);
499 struct ubifs_inode *ui = ubifs_inode(inode); 489 struct ubifs_inode *ui = ubifs_inode(inode);
500 struct ubifs_budget_req req = { .dirtied_ino = 1, .mod_dent = 1, 490 struct ubifs_budget_req req = { .dirtied_ino = 2, .mod_dent = 1,
501 .dirtied_ino_d = host_ui->data_len }; 491 .dirtied_ino_d = ALIGN(host_ui->data_len, 8) };
502 492
503 ubifs_assert(ui->data_len == inode->i_size); 493 ubifs_assert(ui->data_len == inode->i_size);
504 494
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 3e30e40aa24d..3141969b456d 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1233,7 +1233,7 @@ static int ufs_show_options(struct seq_file *seq, struct vfsmount *vfs)
1233{ 1233{
1234 struct ufs_sb_info *sbi = UFS_SB(vfs->mnt_sb); 1234 struct ufs_sb_info *sbi = UFS_SB(vfs->mnt_sb);
1235 unsigned mval = sbi->s_mount_opt & UFS_MOUNT_UFSTYPE; 1235 unsigned mval = sbi->s_mount_opt & UFS_MOUNT_UFSTYPE;
1236 const struct match_token *tp = tokens; 1236 struct match_token *tp = tokens;
1237 1237
1238 while (tp->token != Opt_onerror_panic && tp->token != mval) 1238 while (tp->token != Opt_onerror_panic && tp->token != mval)
1239 ++tp; 1239 ++tp;
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 36ec614e699a..737c9a425361 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -106,7 +106,8 @@ xfs-y += $(addprefix $(XFS_LINUX)/, \
106 xfs_iops.o \ 106 xfs_iops.o \
107 xfs_lrw.o \ 107 xfs_lrw.o \
108 xfs_super.o \ 108 xfs_super.o \
109 xfs_vnode.o) 109 xfs_vnode.o \
110 xfs_xattr.o)
110 111
111# Objects in support/ 112# Objects in support/
112xfs-y += $(addprefix support/, \ 113xfs-y += $(addprefix support/, \
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c
index 9b1bb17a0501..1cd3b55ee3d2 100644
--- a/fs/xfs/linux-2.6/kmem.c
+++ b/fs/xfs/linux-2.6/kmem.c
@@ -90,7 +90,7 @@ kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize,
90} 90}
91 91
92void 92void
93kmem_free(void *ptr, size_t size) 93kmem_free(const void *ptr)
94{ 94{
95 if (!is_vmalloc_addr(ptr)) { 95 if (!is_vmalloc_addr(ptr)) {
96 kfree(ptr); 96 kfree(ptr);
@@ -100,7 +100,7 @@ kmem_free(void *ptr, size_t size)
100} 100}
101 101
102void * 102void *
103kmem_realloc(void *ptr, size_t newsize, size_t oldsize, 103kmem_realloc(const void *ptr, size_t newsize, size_t oldsize,
104 unsigned int __nocast flags) 104 unsigned int __nocast flags)
105{ 105{
106 void *new; 106 void *new;
@@ -110,7 +110,7 @@ kmem_realloc(void *ptr, size_t newsize, size_t oldsize,
110 if (new) 110 if (new)
111 memcpy(new, ptr, 111 memcpy(new, ptr,
112 ((oldsize < newsize) ? oldsize : newsize)); 112 ((oldsize < newsize) ? oldsize : newsize));
113 kmem_free(ptr, oldsize); 113 kmem_free(ptr);
114 } 114 }
115 return new; 115 return new;
116} 116}
diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h
index a20683cf74dd..af6843c7ee4b 100644
--- a/fs/xfs/linux-2.6/kmem.h
+++ b/fs/xfs/linux-2.6/kmem.h
@@ -57,8 +57,8 @@ kmem_flags_convert(unsigned int __nocast flags)
57extern void *kmem_alloc(size_t, unsigned int __nocast); 57extern void *kmem_alloc(size_t, unsigned int __nocast);
58extern void *kmem_zalloc(size_t, unsigned int __nocast); 58extern void *kmem_zalloc(size_t, unsigned int __nocast);
59extern void *kmem_zalloc_greedy(size_t *, size_t, size_t, unsigned int __nocast); 59extern void *kmem_zalloc_greedy(size_t *, size_t, size_t, unsigned int __nocast);
60extern void *kmem_realloc(void *, size_t, size_t, unsigned int __nocast); 60extern void *kmem_realloc(const void *, size_t, size_t, unsigned int __nocast);
61extern void kmem_free(void *, size_t); 61extern void kmem_free(const void *);
62 62
63/* 63/*
64 * Zone interfaces 64 * Zone interfaces
diff --git a/fs/xfs/linux-2.6/sema.h b/fs/xfs/linux-2.6/sema.h
deleted file mode 100644
index 3abe7e9ceb33..000000000000
--- a/fs/xfs/linux-2.6/sema.h
+++ /dev/null
@@ -1,52 +0,0 @@
1/*
2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_SUPPORT_SEMA_H__
19#define __XFS_SUPPORT_SEMA_H__
20
21#include <linux/time.h>
22#include <linux/wait.h>
23#include <linux/semaphore.h>
24#include <asm/atomic.h>
25
26/*
27 * sema_t structure just maps to struct semaphore in Linux kernel.
28 */
29
30typedef struct semaphore sema_t;
31
32#define initnsema(sp, val, name) sema_init(sp, val)
33#define psema(sp, b) down(sp)
34#define vsema(sp) up(sp)
35#define freesema(sema) do { } while (0)
36
37static inline int issemalocked(sema_t *sp)
38{
39 return down_trylock(sp) || (up(sp), 0);
40}
41
42/*
43 * Map cpsema (try to get the sema) to down_trylock. We need to switch
44 * the return values since cpsema returns 1 (acquired) 0 (failed) and
45 * down_trylock returns the reverse 0 (acquired) 1 (failed).
46 */
47static inline int cpsema(sema_t *sp)
48{
49 return down_trylock(sp) ? 0 : 1;
50}
51
52#endif /* __XFS_SUPPORT_SEMA_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index a55c3b26d840..f42f80a3b1fa 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -73,7 +73,6 @@ xfs_page_trace(
73 unsigned long pgoff) 73 unsigned long pgoff)
74{ 74{
75 xfs_inode_t *ip; 75 xfs_inode_t *ip;
76 bhv_vnode_t *vp = vn_from_inode(inode);
77 loff_t isize = i_size_read(inode); 76 loff_t isize = i_size_read(inode);
78 loff_t offset = page_offset(page); 77 loff_t offset = page_offset(page);
79 int delalloc = -1, unmapped = -1, unwritten = -1; 78 int delalloc = -1, unmapped = -1, unwritten = -1;
@@ -81,7 +80,7 @@ xfs_page_trace(
81 if (page_has_buffers(page)) 80 if (page_has_buffers(page))
82 xfs_count_page_state(page, &delalloc, &unmapped, &unwritten); 81 xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
83 82
84 ip = xfs_vtoi(vp); 83 ip = XFS_I(inode);
85 if (!ip->i_rwtrace) 84 if (!ip->i_rwtrace)
86 return; 85 return;
87 86
@@ -409,7 +408,6 @@ xfs_start_buffer_writeback(
409STATIC void 408STATIC void
410xfs_start_page_writeback( 409xfs_start_page_writeback(
411 struct page *page, 410 struct page *page,
412 struct writeback_control *wbc,
413 int clear_dirty, 411 int clear_dirty,
414 int buffers) 412 int buffers)
415{ 413{
@@ -676,7 +674,7 @@ xfs_probe_cluster(
676 } else 674 } else
677 pg_offset = PAGE_CACHE_SIZE; 675 pg_offset = PAGE_CACHE_SIZE;
678 676
679 if (page->index == tindex && !TestSetPageLocked(page)) { 677 if (page->index == tindex && trylock_page(page)) {
680 pg_len = xfs_probe_page(page, pg_offset, mapped); 678 pg_len = xfs_probe_page(page, pg_offset, mapped);
681 unlock_page(page); 679 unlock_page(page);
682 } 680 }
@@ -760,7 +758,7 @@ xfs_convert_page(
760 758
761 if (page->index != tindex) 759 if (page->index != tindex)
762 goto fail; 760 goto fail;
763 if (TestSetPageLocked(page)) 761 if (!trylock_page(page))
764 goto fail; 762 goto fail;
765 if (PageWriteback(page)) 763 if (PageWriteback(page))
766 goto fail_unlock_page; 764 goto fail_unlock_page;
@@ -858,7 +856,7 @@ xfs_convert_page(
858 done = 1; 856 done = 1;
859 } 857 }
860 } 858 }
861 xfs_start_page_writeback(page, wbc, !page_dirty, count); 859 xfs_start_page_writeback(page, !page_dirty, count);
862 } 860 }
863 861
864 return done; 862 return done;
@@ -1105,7 +1103,7 @@ xfs_page_state_convert(
1105 * that we are writing into for the first time. 1103 * that we are writing into for the first time.
1106 */ 1104 */
1107 type = IOMAP_NEW; 1105 type = IOMAP_NEW;
1108 if (!test_and_set_bit(BH_Lock, &bh->b_state)) { 1106 if (trylock_buffer(bh)) {
1109 ASSERT(buffer_mapped(bh)); 1107 ASSERT(buffer_mapped(bh));
1110 if (iomap_valid) 1108 if (iomap_valid)
1111 all_bh = 1; 1109 all_bh = 1;
@@ -1130,7 +1128,7 @@ xfs_page_state_convert(
1130 SetPageUptodate(page); 1128 SetPageUptodate(page);
1131 1129
1132 if (startio) 1130 if (startio)
1133 xfs_start_page_writeback(page, wbc, 1, count); 1131 xfs_start_page_writeback(page, 1, count);
1134 1132
1135 if (ioend && iomap_valid) { 1133 if (ioend && iomap_valid) {
1136 offset = (iomap.iomap_offset + iomap.iomap_bsize - 1) >> 1134 offset = (iomap.iomap_offset + iomap.iomap_bsize - 1) >>
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 98e0e86093b4..986061ae1b9b 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -58,7 +58,7 @@ xfs_buf_trace(
58 bp, id, 58 bp, id,
59 (void *)(unsigned long)bp->b_flags, 59 (void *)(unsigned long)bp->b_flags,
60 (void *)(unsigned long)bp->b_hold.counter, 60 (void *)(unsigned long)bp->b_hold.counter,
61 (void *)(unsigned long)bp->b_sema.count.counter, 61 (void *)(unsigned long)bp->b_sema.count,
62 (void *)current, 62 (void *)current,
63 data, ra, 63 data, ra,
64 (void *)(unsigned long)((bp->b_file_offset>>32) & 0xffffffff), 64 (void *)(unsigned long)((bp->b_file_offset>>32) & 0xffffffff),
@@ -253,7 +253,7 @@ _xfs_buf_initialize(
253 253
254 memset(bp, 0, sizeof(xfs_buf_t)); 254 memset(bp, 0, sizeof(xfs_buf_t));
255 atomic_set(&bp->b_hold, 1); 255 atomic_set(&bp->b_hold, 1);
256 init_MUTEX_LOCKED(&bp->b_iodonesema); 256 init_completion(&bp->b_iowait);
257 INIT_LIST_HEAD(&bp->b_list); 257 INIT_LIST_HEAD(&bp->b_list);
258 INIT_LIST_HEAD(&bp->b_hash_list); 258 INIT_LIST_HEAD(&bp->b_hash_list);
259 init_MUTEX_LOCKED(&bp->b_sema); /* held, no waiters */ 259 init_MUTEX_LOCKED(&bp->b_sema); /* held, no waiters */
@@ -310,8 +310,7 @@ _xfs_buf_free_pages(
310 xfs_buf_t *bp) 310 xfs_buf_t *bp)
311{ 311{
312 if (bp->b_pages != bp->b_page_array) { 312 if (bp->b_pages != bp->b_page_array) {
313 kmem_free(bp->b_pages, 313 kmem_free(bp->b_pages);
314 bp->b_page_count * sizeof(struct page *));
315 } 314 }
316} 315}
317 316
@@ -839,6 +838,7 @@ xfs_buf_rele(
839 return; 838 return;
840 } 839 }
841 840
841 ASSERT(atomic_read(&bp->b_hold) > 0);
842 if (atomic_dec_and_lock(&bp->b_hold, &hash->bh_lock)) { 842 if (atomic_dec_and_lock(&bp->b_hold, &hash->bh_lock)) {
843 if (bp->b_relse) { 843 if (bp->b_relse) {
844 atomic_inc(&bp->b_hold); 844 atomic_inc(&bp->b_hold);
@@ -852,11 +852,6 @@ xfs_buf_rele(
852 spin_unlock(&hash->bh_lock); 852 spin_unlock(&hash->bh_lock);
853 xfs_buf_free(bp); 853 xfs_buf_free(bp);
854 } 854 }
855 } else {
856 /*
857 * Catch reference count leaks
858 */
859 ASSERT(atomic_read(&bp->b_hold) >= 0);
860 } 855 }
861} 856}
862 857
@@ -1038,7 +1033,7 @@ xfs_buf_ioend(
1038 xfs_buf_iodone_work(&bp->b_iodone_work); 1033 xfs_buf_iodone_work(&bp->b_iodone_work);
1039 } 1034 }
1040 } else { 1035 } else {
1041 up(&bp->b_iodonesema); 1036 complete(&bp->b_iowait);
1042 } 1037 }
1043} 1038}
1044 1039
@@ -1276,7 +1271,7 @@ xfs_buf_iowait(
1276 XB_TRACE(bp, "iowait", 0); 1271 XB_TRACE(bp, "iowait", 0);
1277 if (atomic_read(&bp->b_io_remaining)) 1272 if (atomic_read(&bp->b_io_remaining))
1278 blk_run_address_space(bp->b_target->bt_mapping); 1273 blk_run_address_space(bp->b_target->bt_mapping);
1279 down(&bp->b_iodonesema); 1274 wait_for_completion(&bp->b_iowait);
1280 XB_TRACE(bp, "iowaited", (long)bp->b_error); 1275 XB_TRACE(bp, "iowaited", (long)bp->b_error);
1281 return bp->b_error; 1276 return bp->b_error;
1282} 1277}
@@ -1398,7 +1393,7 @@ STATIC void
1398xfs_free_bufhash( 1393xfs_free_bufhash(
1399 xfs_buftarg_t *btp) 1394 xfs_buftarg_t *btp)
1400{ 1395{
1401 kmem_free(btp->bt_hash, (1<<btp->bt_hashshift) * sizeof(xfs_bufhash_t)); 1396 kmem_free(btp->bt_hash);
1402 btp->bt_hash = NULL; 1397 btp->bt_hash = NULL;
1403} 1398}
1404 1399
@@ -1428,13 +1423,10 @@ xfs_unregister_buftarg(
1428 1423
1429void 1424void
1430xfs_free_buftarg( 1425xfs_free_buftarg(
1431 xfs_buftarg_t *btp, 1426 xfs_buftarg_t *btp)
1432 int external)
1433{ 1427{
1434 xfs_flush_buftarg(btp, 1); 1428 xfs_flush_buftarg(btp, 1);
1435 xfs_blkdev_issue_flush(btp); 1429 xfs_blkdev_issue_flush(btp);
1436 if (external)
1437 xfs_blkdev_put(btp->bt_bdev);
1438 xfs_free_bufhash(btp); 1430 xfs_free_bufhash(btp);
1439 iput(btp->bt_mapping->host); 1431 iput(btp->bt_mapping->host);
1440 1432
@@ -1444,7 +1436,7 @@ xfs_free_buftarg(
1444 xfs_unregister_buftarg(btp); 1436 xfs_unregister_buftarg(btp);
1445 kthread_stop(btp->bt_task); 1437 kthread_stop(btp->bt_task);
1446 1438
1447 kmem_free(btp, sizeof(*btp)); 1439 kmem_free(btp);
1448} 1440}
1449 1441
1450STATIC int 1442STATIC int
@@ -1575,7 +1567,7 @@ xfs_alloc_buftarg(
1575 return btp; 1567 return btp;
1576 1568
1577error: 1569error:
1578 kmem_free(btp, sizeof(*btp)); 1570 kmem_free(btp);
1579 return NULL; 1571 return NULL;
1580} 1572}
1581 1573
@@ -1803,7 +1795,7 @@ int __init
1803xfs_buf_init(void) 1795xfs_buf_init(void)
1804{ 1796{
1805#ifdef XFS_BUF_TRACE 1797#ifdef XFS_BUF_TRACE
1806 xfs_buf_trace_buf = ktrace_alloc(XFS_BUF_TRACE_SIZE, KM_SLEEP); 1798 xfs_buf_trace_buf = ktrace_alloc(XFS_BUF_TRACE_SIZE, KM_NOFS);
1807#endif 1799#endif
1808 1800
1809 xfs_buf_zone = kmem_zone_init_flags(sizeof(xfs_buf_t), "xfs_buf", 1801 xfs_buf_zone = kmem_zone_init_flags(sizeof(xfs_buf_t), "xfs_buf",
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index f948ec7ba9a4..fe0109956656 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -157,7 +157,7 @@ typedef struct xfs_buf {
157 xfs_buf_iodone_t b_iodone; /* I/O completion function */ 157 xfs_buf_iodone_t b_iodone; /* I/O completion function */
158 xfs_buf_relse_t b_relse; /* releasing function */ 158 xfs_buf_relse_t b_relse; /* releasing function */
159 xfs_buf_bdstrat_t b_strat; /* pre-write function */ 159 xfs_buf_bdstrat_t b_strat; /* pre-write function */
160 struct semaphore b_iodonesema; /* Semaphore for I/O waiters */ 160 struct completion b_iowait; /* queue for I/O waiters */
161 void *b_fspriv; 161 void *b_fspriv;
162 void *b_fspriv2; 162 void *b_fspriv2;
163 void *b_fspriv3; 163 void *b_fspriv3;
@@ -352,7 +352,7 @@ extern void xfs_buf_trace(xfs_buf_t *, char *, void *, void *);
352#define XFS_BUF_CPSEMA(bp) (xfs_buf_cond_lock(bp) == 0) 352#define XFS_BUF_CPSEMA(bp) (xfs_buf_cond_lock(bp) == 0)
353#define XFS_BUF_VSEMA(bp) xfs_buf_unlock(bp) 353#define XFS_BUF_VSEMA(bp) xfs_buf_unlock(bp)
354#define XFS_BUF_PSEMA(bp,x) xfs_buf_lock(bp) 354#define XFS_BUF_PSEMA(bp,x) xfs_buf_lock(bp)
355#define XFS_BUF_V_IODONESEMA(bp) up(&bp->b_iodonesema); 355#define XFS_BUF_FINISH_IOWAIT(bp) complete(&bp->b_iowait);
356 356
357#define XFS_BUF_SET_TARGET(bp, target) ((bp)->b_target = (target)) 357#define XFS_BUF_SET_TARGET(bp, target) ((bp)->b_target = (target))
358#define XFS_BUF_TARGET(bp) ((bp)->b_target) 358#define XFS_BUF_TARGET(bp) ((bp)->b_target)
@@ -429,7 +429,7 @@ static inline void xfs_bdwrite(void *mp, xfs_buf_t *bp)
429 * Handling of buftargs. 429 * Handling of buftargs.
430 */ 430 */
431extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int); 431extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int);
432extern void xfs_free_buftarg(xfs_buftarg_t *, int); 432extern void xfs_free_buftarg(xfs_buftarg_t *);
433extern void xfs_wait_buftarg(xfs_buftarg_t *); 433extern void xfs_wait_buftarg(xfs_buftarg_t *);
434extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); 434extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
435extern int xfs_flush_buftarg(xfs_buftarg_t *, int); 435extern int xfs_flush_buftarg(xfs_buftarg_t *, int);
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index c672b3238b14..24fd598af846 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -139,7 +139,7 @@ xfs_nfs_get_inode(
139 } 139 }
140 140
141 xfs_iunlock(ip, XFS_ILOCK_SHARED); 141 xfs_iunlock(ip, XFS_ILOCK_SHARED);
142 return ip->i_vnode; 142 return VFS_I(ip);
143} 143}
144 144
145STATIC struct dentry * 145STATIC struct dentry *
@@ -167,7 +167,7 @@ xfs_fs_fh_to_dentry(struct super_block *sb, struct fid *fid,
167 if (!inode) 167 if (!inode)
168 return NULL; 168 return NULL;
169 if (IS_ERR(inode)) 169 if (IS_ERR(inode))
170 return ERR_PTR(PTR_ERR(inode)); 170 return ERR_CAST(inode);
171 result = d_alloc_anon(inode); 171 result = d_alloc_anon(inode);
172 if (!result) { 172 if (!result) {
173 iput(inode); 173 iput(inode);
@@ -198,7 +198,7 @@ xfs_fs_fh_to_parent(struct super_block *sb, struct fid *fid,
198 if (!inode) 198 if (!inode)
199 return NULL; 199 return NULL;
200 if (IS_ERR(inode)) 200 if (IS_ERR(inode))
201 return ERR_PTR(PTR_ERR(inode)); 201 return ERR_CAST(inode);
202 result = d_alloc_anon(inode); 202 result = d_alloc_anon(inode);
203 if (!result) { 203 if (!result) {
204 iput(inode); 204 iput(inode);
@@ -215,13 +215,13 @@ xfs_fs_get_parent(
215 struct xfs_inode *cip; 215 struct xfs_inode *cip;
216 struct dentry *parent; 216 struct dentry *parent;
217 217
218 error = xfs_lookup(XFS_I(child->d_inode), &xfs_name_dotdot, &cip); 218 error = xfs_lookup(XFS_I(child->d_inode), &xfs_name_dotdot, &cip, NULL);
219 if (unlikely(error)) 219 if (unlikely(error))
220 return ERR_PTR(-error); 220 return ERR_PTR(-error);
221 221
222 parent = d_alloc_anon(cip->i_vnode); 222 parent = d_alloc_anon(VFS_I(cip));
223 if (unlikely(!parent)) { 223 if (unlikely(!parent)) {
224 iput(cip->i_vnode); 224 iput(VFS_I(cip));
225 return ERR_PTR(-ENOMEM); 225 return ERR_PTR(-ENOMEM);
226 } 226 }
227 return parent; 227 return parent;
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 5f60363b9343..5311c1acdd40 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -475,6 +475,7 @@ const struct file_operations xfs_invis_file_operations = {
475const struct file_operations xfs_dir_file_operations = { 475const struct file_operations xfs_dir_file_operations = {
476 .read = generic_read_dir, 476 .read = generic_read_dir,
477 .readdir = xfs_file_readdir, 477 .readdir = xfs_file_readdir,
478 .llseek = generic_file_llseek,
478 .unlocked_ioctl = xfs_file_ioctl, 479 .unlocked_ioctl = xfs_file_ioctl,
479#ifdef CONFIG_COMPAT 480#ifdef CONFIG_COMPAT
480 .compat_ioctl = xfs_file_compat_ioctl, 481 .compat_ioctl = xfs_file_compat_ioctl,
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c
index 1eefe61f0e10..36caa6d957df 100644
--- a/fs/xfs/linux-2.6/xfs_fs_subr.c
+++ b/fs/xfs/linux-2.6/xfs_fs_subr.c
@@ -31,7 +31,7 @@ xfs_tosspages(
31 xfs_off_t last, 31 xfs_off_t last,
32 int fiopt) 32 int fiopt)
33{ 33{
34 struct address_space *mapping = ip->i_vnode->i_mapping; 34 struct address_space *mapping = VFS_I(ip)->i_mapping;
35 35
36 if (mapping->nrpages) 36 if (mapping->nrpages)
37 truncate_inode_pages(mapping, first); 37 truncate_inode_pages(mapping, first);
@@ -44,7 +44,7 @@ xfs_flushinval_pages(
44 xfs_off_t last, 44 xfs_off_t last,
45 int fiopt) 45 int fiopt)
46{ 46{
47 struct address_space *mapping = ip->i_vnode->i_mapping; 47 struct address_space *mapping = VFS_I(ip)->i_mapping;
48 int ret = 0; 48 int ret = 0;
49 49
50 if (mapping->nrpages) { 50 if (mapping->nrpages) {
@@ -64,7 +64,7 @@ xfs_flush_pages(
64 uint64_t flags, 64 uint64_t flags,
65 int fiopt) 65 int fiopt)
66{ 66{
67 struct address_space *mapping = ip->i_vnode->i_mapping; 67 struct address_space *mapping = VFS_I(ip)->i_mapping;
68 int ret = 0; 68 int ret = 0;
69 int ret2; 69 int ret2;
70 70
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 01939ba2d8de..48799ba7e3e6 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -48,6 +48,8 @@
48#include "xfs_dfrag.h" 48#include "xfs_dfrag.h"
49#include "xfs_fsops.h" 49#include "xfs_fsops.h"
50#include "xfs_vnodeops.h" 50#include "xfs_vnodeops.h"
51#include "xfs_quota.h"
52#include "xfs_inode_item.h"
51 53
52#include <linux/capability.h> 54#include <linux/capability.h>
53#include <linux/dcache.h> 55#include <linux/dcache.h>
@@ -243,7 +245,7 @@ xfs_vget_fsop_handlereq(
243 245
244 xfs_iunlock(ip, XFS_ILOCK_SHARED); 246 xfs_iunlock(ip, XFS_ILOCK_SHARED);
245 247
246 *inode = XFS_ITOV(ip); 248 *inode = VFS_I(ip);
247 return 0; 249 return 0;
248} 250}
249 251
@@ -468,6 +470,12 @@ xfs_attrlist_by_handle(
468 if (al_hreq.buflen > XATTR_LIST_MAX) 470 if (al_hreq.buflen > XATTR_LIST_MAX)
469 return -XFS_ERROR(EINVAL); 471 return -XFS_ERROR(EINVAL);
470 472
473 /*
474 * Reject flags, only allow namespaces.
475 */
476 if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE))
477 return -XFS_ERROR(EINVAL);
478
471 error = xfs_vget_fsop_handlereq(mp, parinode, &al_hreq.hreq, &inode); 479 error = xfs_vget_fsop_handlereq(mp, parinode, &al_hreq.hreq, &inode);
472 if (error) 480 if (error)
473 goto out; 481 goto out;
@@ -587,7 +595,7 @@ xfs_attrmulti_by_handle(
587 goto out; 595 goto out;
588 596
589 error = E2BIG; 597 error = E2BIG;
590 size = am_hreq.opcount * sizeof(attr_multiop_t); 598 size = am_hreq.opcount * sizeof(xfs_attr_multiop_t);
591 if (!size || size > 16 * PAGE_SIZE) 599 if (!size || size > 16 * PAGE_SIZE)
592 goto out_vn_rele; 600 goto out_vn_rele;
593 601
@@ -680,9 +688,9 @@ xfs_ioc_space(
680 return -XFS_ERROR(EFAULT); 688 return -XFS_ERROR(EFAULT);
681 689
682 if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) 690 if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
683 attr_flags |= ATTR_NONBLOCK; 691 attr_flags |= XFS_ATTR_NONBLOCK;
684 if (ioflags & IO_INVIS) 692 if (ioflags & IO_INVIS)
685 attr_flags |= ATTR_DMI; 693 attr_flags |= XFS_ATTR_DMI;
686 694
687 error = xfs_change_file_space(ip, cmd, &bf, filp->f_pos, 695 error = xfs_change_file_space(ip, cmd, &bf, filp->f_pos,
688 NULL, attr_flags); 696 NULL, attr_flags);
@@ -873,6 +881,322 @@ xfs_ioc_fsgetxattr(
873 return 0; 881 return 0;
874} 882}
875 883
884STATIC void
885xfs_set_diflags(
886 struct xfs_inode *ip,
887 unsigned int xflags)
888{
889 unsigned int di_flags;
890
891 /* can't set PREALLOC this way, just preserve it */
892 di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC);
893 if (xflags & XFS_XFLAG_IMMUTABLE)
894 di_flags |= XFS_DIFLAG_IMMUTABLE;
895 if (xflags & XFS_XFLAG_APPEND)
896 di_flags |= XFS_DIFLAG_APPEND;
897 if (xflags & XFS_XFLAG_SYNC)
898 di_flags |= XFS_DIFLAG_SYNC;
899 if (xflags & XFS_XFLAG_NOATIME)
900 di_flags |= XFS_DIFLAG_NOATIME;
901 if (xflags & XFS_XFLAG_NODUMP)
902 di_flags |= XFS_DIFLAG_NODUMP;
903 if (xflags & XFS_XFLAG_PROJINHERIT)
904 di_flags |= XFS_DIFLAG_PROJINHERIT;
905 if (xflags & XFS_XFLAG_NODEFRAG)
906 di_flags |= XFS_DIFLAG_NODEFRAG;
907 if (xflags & XFS_XFLAG_FILESTREAM)
908 di_flags |= XFS_DIFLAG_FILESTREAM;
909 if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
910 if (xflags & XFS_XFLAG_RTINHERIT)
911 di_flags |= XFS_DIFLAG_RTINHERIT;
912 if (xflags & XFS_XFLAG_NOSYMLINKS)
913 di_flags |= XFS_DIFLAG_NOSYMLINKS;
914 if (xflags & XFS_XFLAG_EXTSZINHERIT)
915 di_flags |= XFS_DIFLAG_EXTSZINHERIT;
916 } else if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) {
917 if (xflags & XFS_XFLAG_REALTIME)
918 di_flags |= XFS_DIFLAG_REALTIME;
919 if (xflags & XFS_XFLAG_EXTSIZE)
920 di_flags |= XFS_DIFLAG_EXTSIZE;
921 }
922
923 ip->i_d.di_flags = di_flags;
924}
925
926STATIC void
927xfs_diflags_to_linux(
928 struct xfs_inode *ip)
929{
930 struct inode *inode = VFS_I(ip);
931 unsigned int xflags = xfs_ip2xflags(ip);
932
933 if (xflags & XFS_XFLAG_IMMUTABLE)
934 inode->i_flags |= S_IMMUTABLE;
935 else
936 inode->i_flags &= ~S_IMMUTABLE;
937 if (xflags & XFS_XFLAG_APPEND)
938 inode->i_flags |= S_APPEND;
939 else
940 inode->i_flags &= ~S_APPEND;
941 if (xflags & XFS_XFLAG_SYNC)
942 inode->i_flags |= S_SYNC;
943 else
944 inode->i_flags &= ~S_SYNC;
945 if (xflags & XFS_XFLAG_NOATIME)
946 inode->i_flags |= S_NOATIME;
947 else
948 inode->i_flags &= ~S_NOATIME;
949}
950
951#define FSX_PROJID 1
952#define FSX_EXTSIZE 2
953#define FSX_XFLAGS 4
954#define FSX_NONBLOCK 8
955
956STATIC int
957xfs_ioctl_setattr(
958 xfs_inode_t *ip,
959 struct fsxattr *fa,
960 int mask)
961{
962 struct xfs_mount *mp = ip->i_mount;
963 struct xfs_trans *tp;
964 unsigned int lock_flags = 0;
965 struct xfs_dquot *udqp = NULL, *gdqp = NULL;
966 struct xfs_dquot *olddquot = NULL;
967 int code;
968
969 xfs_itrace_entry(ip);
970
971 if (mp->m_flags & XFS_MOUNT_RDONLY)
972 return XFS_ERROR(EROFS);
973 if (XFS_FORCED_SHUTDOWN(mp))
974 return XFS_ERROR(EIO);
975
976 /*
977 * If disk quotas is on, we make sure that the dquots do exist on disk,
978 * before we start any other transactions. Trying to do this later
979 * is messy. We don't care to take a readlock to look at the ids
980 * in inode here, because we can't hold it across the trans_reserve.
981 * If the IDs do change before we take the ilock, we're covered
982 * because the i_*dquot fields will get updated anyway.
983 */
984 if (XFS_IS_QUOTA_ON(mp) && (mask & FSX_PROJID)) {
985 code = XFS_QM_DQVOPALLOC(mp, ip, ip->i_d.di_uid,
986 ip->i_d.di_gid, fa->fsx_projid,
987 XFS_QMOPT_PQUOTA, &udqp, &gdqp);
988 if (code)
989 return code;
990 }
991
992 /*
993 * For the other attributes, we acquire the inode lock and
994 * first do an error checking pass.
995 */
996 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
997 code = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
998 if (code)
999 goto error_return;
1000
1001 lock_flags = XFS_ILOCK_EXCL;
1002 xfs_ilock(ip, lock_flags);
1003
1004 /*
1005 * CAP_FOWNER overrides the following restrictions:
1006 *
1007 * The user ID of the calling process must be equal
1008 * to the file owner ID, except in cases where the
1009 * CAP_FSETID capability is applicable.
1010 */
1011 if (current->fsuid != ip->i_d.di_uid && !capable(CAP_FOWNER)) {
1012 code = XFS_ERROR(EPERM);
1013 goto error_return;
1014 }
1015
1016 /*
1017 * Do a quota reservation only if projid is actually going to change.
1018 */
1019 if (mask & FSX_PROJID) {
1020 if (XFS_IS_PQUOTA_ON(mp) &&
1021 ip->i_d.di_projid != fa->fsx_projid) {
1022 ASSERT(tp);
1023 code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp,
1024 capable(CAP_FOWNER) ?
1025 XFS_QMOPT_FORCE_RES : 0);
1026 if (code) /* out of quota */
1027 goto error_return;
1028 }
1029 }
1030
1031 if (mask & FSX_EXTSIZE) {
1032 /*
1033 * Can't change extent size if any extents are allocated.
1034 */
1035 if (ip->i_d.di_nextents &&
1036 ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) !=
1037 fa->fsx_extsize)) {
1038 code = XFS_ERROR(EINVAL); /* EFBIG? */
1039 goto error_return;
1040 }
1041
1042 /*
1043 * Extent size must be a multiple of the appropriate block
1044 * size, if set at all.
1045 */
1046 if (fa->fsx_extsize != 0) {
1047 xfs_extlen_t size;
1048
1049 if (XFS_IS_REALTIME_INODE(ip) ||
1050 ((mask & FSX_XFLAGS) &&
1051 (fa->fsx_xflags & XFS_XFLAG_REALTIME))) {
1052 size = mp->m_sb.sb_rextsize <<
1053 mp->m_sb.sb_blocklog;
1054 } else {
1055 size = mp->m_sb.sb_blocksize;
1056 }
1057
1058 if (fa->fsx_extsize % size) {
1059 code = XFS_ERROR(EINVAL);
1060 goto error_return;
1061 }
1062 }
1063 }
1064
1065
1066 if (mask & FSX_XFLAGS) {
1067 /*
1068 * Can't change realtime flag if any extents are allocated.
1069 */
1070 if ((ip->i_d.di_nextents || ip->i_delayed_blks) &&
1071 (XFS_IS_REALTIME_INODE(ip)) !=
1072 (fa->fsx_xflags & XFS_XFLAG_REALTIME)) {
1073 code = XFS_ERROR(EINVAL); /* EFBIG? */
1074 goto error_return;
1075 }
1076
1077 /*
1078 * If realtime flag is set then must have realtime data.
1079 */
1080 if ((fa->fsx_xflags & XFS_XFLAG_REALTIME)) {
1081 if ((mp->m_sb.sb_rblocks == 0) ||
1082 (mp->m_sb.sb_rextsize == 0) ||
1083 (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) {
1084 code = XFS_ERROR(EINVAL);
1085 goto error_return;
1086 }
1087 }
1088
1089 /*
1090 * Can't modify an immutable/append-only file unless
1091 * we have appropriate permission.
1092 */
1093 if ((ip->i_d.di_flags &
1094 (XFS_DIFLAG_IMMUTABLE|XFS_DIFLAG_APPEND) ||
1095 (fa->fsx_xflags &
1096 (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) &&
1097 !capable(CAP_LINUX_IMMUTABLE)) {
1098 code = XFS_ERROR(EPERM);
1099 goto error_return;
1100 }
1101 }
1102
1103 xfs_trans_ijoin(tp, ip, lock_flags);
1104 xfs_trans_ihold(tp, ip);
1105
1106 /*
1107 * Change file ownership. Must be the owner or privileged.
1108 * If the system was configured with the "restricted_chown"
1109 * option, the owner is not permitted to give away the file,
1110 * and can change the group id only to a group of which he
1111 * or she is a member.
1112 */
1113 if (mask & FSX_PROJID) {
1114 /*
1115 * CAP_FSETID overrides the following restrictions:
1116 *
1117 * The set-user-ID and set-group-ID bits of a file will be
1118 * cleared upon successful return from chown()
1119 */
1120 if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
1121 !capable(CAP_FSETID))
1122 ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
1123
1124 /*
1125 * Change the ownerships and register quota modifications
1126 * in the transaction.
1127 */
1128 if (ip->i_d.di_projid != fa->fsx_projid) {
1129 if (XFS_IS_PQUOTA_ON(mp)) {
1130 olddquot = XFS_QM_DQVOPCHOWN(mp, tp, ip,
1131 &ip->i_gdquot, gdqp);
1132 }
1133 ip->i_d.di_projid = fa->fsx_projid;
1134
1135 /*
1136 * We may have to rev the inode as well as
1137 * the superblock version number since projids didn't
1138 * exist before DINODE_VERSION_2 and SB_VERSION_NLINK.
1139 */
1140 if (ip->i_d.di_version == XFS_DINODE_VERSION_1)
1141 xfs_bump_ino_vers2(tp, ip);
1142 }
1143
1144 }
1145
1146 if (mask & FSX_EXTSIZE)
1147 ip->i_d.di_extsize = fa->fsx_extsize >> mp->m_sb.sb_blocklog;
1148 if (mask & FSX_XFLAGS) {
1149 xfs_set_diflags(ip, fa->fsx_xflags);
1150 xfs_diflags_to_linux(ip);
1151 }
1152
1153 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1154 xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
1155
1156 XFS_STATS_INC(xs_ig_attrchg);
1157
1158 /*
1159 * If this is a synchronous mount, make sure that the
1160 * transaction goes to disk before returning to the user.
1161 * This is slightly sub-optimal in that truncates require
1162 * two sync transactions instead of one for wsync filesystems.
1163 * One for the truncate and one for the timestamps since we
1164 * don't want to change the timestamps unless we're sure the
1165 * truncate worked. Truncates are less than 1% of the laddis
1166 * mix so this probably isn't worth the trouble to optimize.
1167 */
1168 if (mp->m_flags & XFS_MOUNT_WSYNC)
1169 xfs_trans_set_sync(tp);
1170 code = xfs_trans_commit(tp, 0);
1171 xfs_iunlock(ip, lock_flags);
1172
1173 /*
1174 * Release any dquot(s) the inode had kept before chown.
1175 */
1176 XFS_QM_DQRELE(mp, olddquot);
1177 XFS_QM_DQRELE(mp, udqp);
1178 XFS_QM_DQRELE(mp, gdqp);
1179
1180 if (code)
1181 return code;
1182
1183 if (DM_EVENT_ENABLED(ip, DM_EVENT_ATTRIBUTE)) {
1184 XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, ip, DM_RIGHT_NULL,
1185 NULL, DM_RIGHT_NULL, NULL, NULL, 0, 0,
1186 (mask & FSX_NONBLOCK) ? DM_FLAGS_NDELAY : 0);
1187 }
1188
1189 return 0;
1190
1191 error_return:
1192 XFS_QM_DQRELE(mp, udqp);
1193 XFS_QM_DQRELE(mp, gdqp);
1194 xfs_trans_cancel(tp, 0);
1195 if (lock_flags)
1196 xfs_iunlock(ip, lock_flags);
1197 return code;
1198}
1199
876STATIC int 1200STATIC int
877xfs_ioc_fssetxattr( 1201xfs_ioc_fssetxattr(
878 xfs_inode_t *ip, 1202 xfs_inode_t *ip,
@@ -880,31 +1204,16 @@ xfs_ioc_fssetxattr(
880 void __user *arg) 1204 void __user *arg)
881{ 1205{
882 struct fsxattr fa; 1206 struct fsxattr fa;
883 struct bhv_vattr *vattr; 1207 unsigned int mask;
884 int error;
885 int attr_flags;
886 1208
887 if (copy_from_user(&fa, arg, sizeof(fa))) 1209 if (copy_from_user(&fa, arg, sizeof(fa)))
888 return -EFAULT; 1210 return -EFAULT;
889 1211
890 vattr = kmalloc(sizeof(*vattr), GFP_KERNEL); 1212 mask = FSX_XFLAGS | FSX_EXTSIZE | FSX_PROJID;
891 if (unlikely(!vattr))
892 return -ENOMEM;
893
894 attr_flags = 0;
895 if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) 1213 if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
896 attr_flags |= ATTR_NONBLOCK; 1214 mask |= FSX_NONBLOCK;
897
898 vattr->va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE | XFS_AT_PROJID;
899 vattr->va_xflags = fa.fsx_xflags;
900 vattr->va_extsize = fa.fsx_extsize;
901 vattr->va_projid = fa.fsx_projid;
902 1215
903 error = -xfs_setattr(ip, vattr, attr_flags, NULL); 1216 return -xfs_ioctl_setattr(ip, &fa, mask);
904 if (!error)
905 vn_revalidate(XFS_ITOV(ip)); /* update flags */
906 kfree(vattr);
907 return 0;
908} 1217}
909 1218
910STATIC int 1219STATIC int
@@ -926,10 +1235,9 @@ xfs_ioc_setxflags(
926 struct file *filp, 1235 struct file *filp,
927 void __user *arg) 1236 void __user *arg)
928{ 1237{
929 struct bhv_vattr *vattr; 1238 struct fsxattr fa;
930 unsigned int flags; 1239 unsigned int flags;
931 int attr_flags; 1240 unsigned int mask;
932 int error;
933 1241
934 if (copy_from_user(&flags, arg, sizeof(flags))) 1242 if (copy_from_user(&flags, arg, sizeof(flags)))
935 return -EFAULT; 1243 return -EFAULT;
@@ -939,22 +1247,12 @@ xfs_ioc_setxflags(
939 FS_SYNC_FL)) 1247 FS_SYNC_FL))
940 return -EOPNOTSUPP; 1248 return -EOPNOTSUPP;
941 1249
942 vattr = kmalloc(sizeof(*vattr), GFP_KERNEL); 1250 mask = FSX_XFLAGS;
943 if (unlikely(!vattr))
944 return -ENOMEM;
945
946 attr_flags = 0;
947 if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) 1251 if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
948 attr_flags |= ATTR_NONBLOCK; 1252 mask |= FSX_NONBLOCK;
949 1253 fa.fsx_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip));
950 vattr->va_mask = XFS_AT_XFLAGS;
951 vattr->va_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip));
952 1254
953 error = -xfs_setattr(ip, vattr, attr_flags, NULL); 1255 return -xfs_ioctl_setattr(ip, &fa, mask);
954 if (likely(!error))
955 vn_revalidate(XFS_ITOV(ip)); /* update flags */
956 kfree(vattr);
957 return error;
958} 1256}
959 1257
960STATIC int 1258STATIC int
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 5fc61c824bb9..095d271f3434 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -62,7 +62,7 @@ void
62xfs_synchronize_atime( 62xfs_synchronize_atime(
63 xfs_inode_t *ip) 63 xfs_inode_t *ip)
64{ 64{
65 struct inode *inode = ip->i_vnode; 65 struct inode *inode = VFS_I(ip);
66 66
67 if (inode) { 67 if (inode) {
68 ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec; 68 ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec;
@@ -79,7 +79,7 @@ void
79xfs_mark_inode_dirty_sync( 79xfs_mark_inode_dirty_sync(
80 xfs_inode_t *ip) 80 xfs_inode_t *ip)
81{ 81{
82 struct inode *inode = ip->i_vnode; 82 struct inode *inode = VFS_I(ip);
83 83
84 if (inode) 84 if (inode)
85 mark_inode_dirty_sync(inode); 85 mark_inode_dirty_sync(inode);
@@ -89,36 +89,31 @@ xfs_mark_inode_dirty_sync(
89 * Change the requested timestamp in the given inode. 89 * Change the requested timestamp in the given inode.
90 * We don't lock across timestamp updates, and we don't log them but 90 * We don't lock across timestamp updates, and we don't log them but
91 * we do record the fact that there is dirty information in core. 91 * we do record the fact that there is dirty information in core.
92 *
93 * NOTE -- callers MUST combine XFS_ICHGTIME_MOD or XFS_ICHGTIME_CHG
94 * with XFS_ICHGTIME_ACC to be sure that access time
95 * update will take. Calling first with XFS_ICHGTIME_ACC
96 * and then XFS_ICHGTIME_MOD may fail to modify the access
97 * timestamp if the filesystem is mounted noacctm.
98 */ 92 */
99void 93void
100xfs_ichgtime( 94xfs_ichgtime(
101 xfs_inode_t *ip, 95 xfs_inode_t *ip,
102 int flags) 96 int flags)
103{ 97{
104 struct inode *inode = vn_to_inode(XFS_ITOV(ip)); 98 struct inode *inode = VFS_I(ip);
105 timespec_t tv; 99 timespec_t tv;
100 int sync_it = 0;
101
102 tv = current_fs_time(inode->i_sb);
106 103
107 nanotime(&tv); 104 if ((flags & XFS_ICHGTIME_MOD) &&
108 if (flags & XFS_ICHGTIME_MOD) { 105 !timespec_equal(&inode->i_mtime, &tv)) {
109 inode->i_mtime = tv; 106 inode->i_mtime = tv;
110 ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec; 107 ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec;
111 ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec; 108 ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec;
109 sync_it = 1;
112 } 110 }
113 if (flags & XFS_ICHGTIME_ACC) { 111 if ((flags & XFS_ICHGTIME_CHG) &&
114 inode->i_atime = tv; 112 !timespec_equal(&inode->i_ctime, &tv)) {
115 ip->i_d.di_atime.t_sec = (__int32_t)tv.tv_sec;
116 ip->i_d.di_atime.t_nsec = (__int32_t)tv.tv_nsec;
117 }
118 if (flags & XFS_ICHGTIME_CHG) {
119 inode->i_ctime = tv; 113 inode->i_ctime = tv;
120 ip->i_d.di_ctime.t_sec = (__int32_t)tv.tv_sec; 114 ip->i_d.di_ctime.t_sec = (__int32_t)tv.tv_sec;
121 ip->i_d.di_ctime.t_nsec = (__int32_t)tv.tv_nsec; 115 ip->i_d.di_ctime.t_nsec = (__int32_t)tv.tv_nsec;
116 sync_it = 1;
122 } 117 }
123 118
124 /* 119 /*
@@ -130,72 +125,11 @@ xfs_ichgtime(
130 * ensure that the compiler does not reorder the update 125 * ensure that the compiler does not reorder the update
131 * of i_update_core above the timestamp updates above. 126 * of i_update_core above the timestamp updates above.
132 */ 127 */
133 SYNCHRONIZE(); 128 if (sync_it) {
134 ip->i_update_core = 1; 129 SYNCHRONIZE();
135 if (!(inode->i_state & I_NEW)) 130 ip->i_update_core = 1;
136 mark_inode_dirty_sync(inode); 131 mark_inode_dirty_sync(inode);
137}
138
139/*
140 * Variant on the above which avoids querying the system clock
141 * in situations where we know the Linux inode timestamps have
142 * just been updated (and so we can update our inode cheaply).
143 */
144void
145xfs_ichgtime_fast(
146 xfs_inode_t *ip,
147 struct inode *inode,
148 int flags)
149{
150 timespec_t *tvp;
151
152 /*
153 * Atime updates for read() & friends are handled lazily now, and
154 * explicit updates must go through xfs_ichgtime()
155 */
156 ASSERT((flags & XFS_ICHGTIME_ACC) == 0);
157
158 if (flags & XFS_ICHGTIME_MOD) {
159 tvp = &inode->i_mtime;
160 ip->i_d.di_mtime.t_sec = (__int32_t)tvp->tv_sec;
161 ip->i_d.di_mtime.t_nsec = (__int32_t)tvp->tv_nsec;
162 }
163 if (flags & XFS_ICHGTIME_CHG) {
164 tvp = &inode->i_ctime;
165 ip->i_d.di_ctime.t_sec = (__int32_t)tvp->tv_sec;
166 ip->i_d.di_ctime.t_nsec = (__int32_t)tvp->tv_nsec;
167 } 132 }
168
169 /*
170 * We update the i_update_core field _after_ changing
171 * the timestamps in order to coordinate properly with
172 * xfs_iflush() so that we don't lose timestamp updates.
173 * This keeps us from having to hold the inode lock
174 * while doing this. We use the SYNCHRONIZE macro to
175 * ensure that the compiler does not reorder the update
176 * of i_update_core above the timestamp updates above.
177 */
178 SYNCHRONIZE();
179 ip->i_update_core = 1;
180 if (!(inode->i_state & I_NEW))
181 mark_inode_dirty_sync(inode);
182}
183
184
185/*
186 * Pull the link count and size up from the xfs inode to the linux inode
187 */
188STATIC void
189xfs_validate_fields(
190 struct inode *inode)
191{
192 struct xfs_inode *ip = XFS_I(inode);
193 loff_t size;
194
195 /* we're under i_sem so i_size can't change under us */
196 size = XFS_ISIZE(ip);
197 if (i_size_read(inode) != size)
198 i_size_write(inode, size);
199} 133}
200 134
201/* 135/*
@@ -245,8 +179,7 @@ STATIC void
245xfs_cleanup_inode( 179xfs_cleanup_inode(
246 struct inode *dir, 180 struct inode *dir,
247 struct inode *inode, 181 struct inode *inode,
248 struct dentry *dentry, 182 struct dentry *dentry)
249 int mode)
250{ 183{
251 struct xfs_name teardown; 184 struct xfs_name teardown;
252 185
@@ -257,10 +190,7 @@ xfs_cleanup_inode(
257 */ 190 */
258 xfs_dentry_to_name(&teardown, dentry); 191 xfs_dentry_to_name(&teardown, dentry);
259 192
260 if (S_ISDIR(mode)) 193 xfs_remove(XFS_I(dir), &teardown, XFS_I(inode));
261 xfs_rmdir(XFS_I(dir), &teardown, XFS_I(inode));
262 else
263 xfs_remove(XFS_I(dir), &teardown, XFS_I(inode));
264 iput(inode); 194 iput(inode);
265} 195}
266 196
@@ -275,7 +205,7 @@ xfs_vn_mknod(
275 struct xfs_inode *ip = NULL; 205 struct xfs_inode *ip = NULL;
276 xfs_acl_t *default_acl = NULL; 206 xfs_acl_t *default_acl = NULL;
277 struct xfs_name name; 207 struct xfs_name name;
278 attrexists_t test_default_acl = _ACL_DEFAULT_EXISTS; 208 int (*test_default_acl)(struct inode *) = _ACL_DEFAULT_EXISTS;
279 int error; 209 int error;
280 210
281 /* 211 /*
@@ -320,7 +250,7 @@ xfs_vn_mknod(
320 if (unlikely(error)) 250 if (unlikely(error))
321 goto out_free_acl; 251 goto out_free_acl;
322 252
323 inode = ip->i_vnode; 253 inode = VFS_I(ip);
324 254
325 error = xfs_init_security(inode, dir); 255 error = xfs_init_security(inode, dir);
326 if (unlikely(error)) 256 if (unlikely(error))
@@ -335,14 +265,11 @@ xfs_vn_mknod(
335 } 265 }
336 266
337 267
338 if (S_ISDIR(mode))
339 xfs_validate_fields(inode);
340 d_instantiate(dentry, inode); 268 d_instantiate(dentry, inode);
341 xfs_validate_fields(dir);
342 return -error; 269 return -error;
343 270
344 out_cleanup_inode: 271 out_cleanup_inode:
345 xfs_cleanup_inode(dir, inode, dentry, mode); 272 xfs_cleanup_inode(dir, inode, dentry);
346 out_free_acl: 273 out_free_acl:
347 if (default_acl) 274 if (default_acl)
348 _ACL_FREE(default_acl); 275 _ACL_FREE(default_acl);
@@ -382,7 +309,7 @@ xfs_vn_lookup(
382 return ERR_PTR(-ENAMETOOLONG); 309 return ERR_PTR(-ENAMETOOLONG);
383 310
384 xfs_dentry_to_name(&name, dentry); 311 xfs_dentry_to_name(&name, dentry);
385 error = xfs_lookup(XFS_I(dir), &name, &cip); 312 error = xfs_lookup(XFS_I(dir), &name, &cip, NULL);
386 if (unlikely(error)) { 313 if (unlikely(error)) {
387 if (unlikely(error != ENOENT)) 314 if (unlikely(error != ENOENT))
388 return ERR_PTR(-error); 315 return ERR_PTR(-error);
@@ -390,7 +317,47 @@ xfs_vn_lookup(
390 return NULL; 317 return NULL;
391 } 318 }
392 319
393 return d_splice_alias(cip->i_vnode, dentry); 320 return d_splice_alias(VFS_I(cip), dentry);
321}
322
323STATIC struct dentry *
324xfs_vn_ci_lookup(
325 struct inode *dir,
326 struct dentry *dentry,
327 struct nameidata *nd)
328{
329 struct xfs_inode *ip;
330 struct xfs_name xname;
331 struct xfs_name ci_name;
332 struct qstr dname;
333 int error;
334
335 if (dentry->d_name.len >= MAXNAMELEN)
336 return ERR_PTR(-ENAMETOOLONG);
337
338 xfs_dentry_to_name(&xname, dentry);
339 error = xfs_lookup(XFS_I(dir), &xname, &ip, &ci_name);
340 if (unlikely(error)) {
341 if (unlikely(error != ENOENT))
342 return ERR_PTR(-error);
343 /*
344 * call d_add(dentry, NULL) here when d_drop_negative_children
345 * is called in xfs_vn_mknod (ie. allow negative dentries
346 * with CI filesystems).
347 */
348 return NULL;
349 }
350
351 /* if exact match, just splice and exit */
352 if (!ci_name.name)
353 return d_splice_alias(VFS_I(ip), dentry);
354
355 /* else case-insensitive match... */
356 dname.name = ci_name.name;
357 dname.len = ci_name.len;
358 dentry = d_add_ci(dentry, VFS_I(ip), &dname);
359 kmem_free(ci_name.name);
360 return dentry;
394} 361}
395 362
396STATIC int 363STATIC int
@@ -414,7 +381,6 @@ xfs_vn_link(
414 } 381 }
415 382
416 xfs_iflags_set(XFS_I(dir), XFS_IMODIFIED); 383 xfs_iflags_set(XFS_I(dir), XFS_IMODIFIED);
417 xfs_validate_fields(inode);
418 d_instantiate(dentry, inode); 384 d_instantiate(dentry, inode);
419 return 0; 385 return 0;
420} 386}
@@ -424,19 +390,23 @@ xfs_vn_unlink(
424 struct inode *dir, 390 struct inode *dir,
425 struct dentry *dentry) 391 struct dentry *dentry)
426{ 392{
427 struct inode *inode;
428 struct xfs_name name; 393 struct xfs_name name;
429 int error; 394 int error;
430 395
431 inode = dentry->d_inode;
432 xfs_dentry_to_name(&name, dentry); 396 xfs_dentry_to_name(&name, dentry);
433 397
434 error = xfs_remove(XFS_I(dir), &name, XFS_I(inode)); 398 error = -xfs_remove(XFS_I(dir), &name, XFS_I(dentry->d_inode));
435 if (likely(!error)) { 399 if (error)
436 xfs_validate_fields(dir); /* size needs update */ 400 return error;
437 xfs_validate_fields(inode); 401
438 } 402 /*
439 return -error; 403 * With unlink, the VFS makes the dentry "negative": no inode,
404 * but still hashed. This is incompatible with case-insensitive
405 * mode, so invalidate (unhash) the dentry in CI-mode.
406 */
407 if (xfs_sb_version_hasasciici(&XFS_M(dir->i_sb)->m_sb))
408 d_invalidate(dentry);
409 return 0;
440} 410}
441 411
442STATIC int 412STATIC int
@@ -459,43 +429,22 @@ xfs_vn_symlink(
459 if (unlikely(error)) 429 if (unlikely(error))
460 goto out; 430 goto out;
461 431
462 inode = cip->i_vnode; 432 inode = VFS_I(cip);
463 433
464 error = xfs_init_security(inode, dir); 434 error = xfs_init_security(inode, dir);
465 if (unlikely(error)) 435 if (unlikely(error))
466 goto out_cleanup_inode; 436 goto out_cleanup_inode;
467 437
468 d_instantiate(dentry, inode); 438 d_instantiate(dentry, inode);
469 xfs_validate_fields(dir);
470 xfs_validate_fields(inode);
471 return 0; 439 return 0;
472 440
473 out_cleanup_inode: 441 out_cleanup_inode:
474 xfs_cleanup_inode(dir, inode, dentry, 0); 442 xfs_cleanup_inode(dir, inode, dentry);
475 out: 443 out:
476 return -error; 444 return -error;
477} 445}
478 446
479STATIC int 447STATIC int
480xfs_vn_rmdir(
481 struct inode *dir,
482 struct dentry *dentry)
483{
484 struct inode *inode = dentry->d_inode;
485 struct xfs_name name;
486 int error;
487
488 xfs_dentry_to_name(&name, dentry);
489
490 error = xfs_rmdir(XFS_I(dir), &name, XFS_I(inode));
491 if (likely(!error)) {
492 xfs_validate_fields(inode);
493 xfs_validate_fields(dir);
494 }
495 return -error;
496}
497
498STATIC int
499xfs_vn_rename( 448xfs_vn_rename(
500 struct inode *odir, 449 struct inode *odir,
501 struct dentry *odentry, 450 struct dentry *odentry,
@@ -505,22 +454,13 @@ xfs_vn_rename(
505 struct inode *new_inode = ndentry->d_inode; 454 struct inode *new_inode = ndentry->d_inode;
506 struct xfs_name oname; 455 struct xfs_name oname;
507 struct xfs_name nname; 456 struct xfs_name nname;
508 int error;
509 457
510 xfs_dentry_to_name(&oname, odentry); 458 xfs_dentry_to_name(&oname, odentry);
511 xfs_dentry_to_name(&nname, ndentry); 459 xfs_dentry_to_name(&nname, ndentry);
512 460
513 error = xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode), 461 return -xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode),
514 XFS_I(ndir), &nname, new_inode ? 462 XFS_I(ndir), &nname, new_inode ?
515 XFS_I(new_inode) : NULL); 463 XFS_I(new_inode) : NULL);
516 if (likely(!error)) {
517 if (new_inode)
518 xfs_validate_fields(new_inode);
519 xfs_validate_fields(odir);
520 if (ndir != odir)
521 xfs_validate_fields(ndir);
522 }
523 return -error;
524} 464}
525 465
526/* 466/*
@@ -659,57 +599,9 @@ xfs_vn_getattr(
659STATIC int 599STATIC int
660xfs_vn_setattr( 600xfs_vn_setattr(
661 struct dentry *dentry, 601 struct dentry *dentry,
662 struct iattr *attr) 602 struct iattr *iattr)
663{ 603{
664 struct inode *inode = dentry->d_inode; 604 return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0, NULL);
665 unsigned int ia_valid = attr->ia_valid;
666 bhv_vattr_t vattr = { 0 };
667 int flags = 0;
668 int error;
669
670 if (ia_valid & ATTR_UID) {
671 vattr.va_mask |= XFS_AT_UID;
672 vattr.va_uid = attr->ia_uid;
673 }
674 if (ia_valid & ATTR_GID) {
675 vattr.va_mask |= XFS_AT_GID;
676 vattr.va_gid = attr->ia_gid;
677 }
678 if (ia_valid & ATTR_SIZE) {
679 vattr.va_mask |= XFS_AT_SIZE;
680 vattr.va_size = attr->ia_size;
681 }
682 if (ia_valid & ATTR_ATIME) {
683 vattr.va_mask |= XFS_AT_ATIME;
684 vattr.va_atime = attr->ia_atime;
685 inode->i_atime = attr->ia_atime;
686 }
687 if (ia_valid & ATTR_MTIME) {
688 vattr.va_mask |= XFS_AT_MTIME;
689 vattr.va_mtime = attr->ia_mtime;
690 }
691 if (ia_valid & ATTR_CTIME) {
692 vattr.va_mask |= XFS_AT_CTIME;
693 vattr.va_ctime = attr->ia_ctime;
694 }
695 if (ia_valid & ATTR_MODE) {
696 vattr.va_mask |= XFS_AT_MODE;
697 vattr.va_mode = attr->ia_mode;
698 if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
699 inode->i_mode &= ~S_ISGID;
700 }
701
702 if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET))
703 flags |= ATTR_UTIME;
704#ifdef ATTR_NO_BLOCK
705 if ((ia_valid & ATTR_NO_BLOCK))
706 flags |= ATTR_NONBLOCK;
707#endif
708
709 error = xfs_setattr(XFS_I(inode), &vattr, flags, NULL);
710 if (likely(!error))
711 vn_revalidate(vn_from_inode(inode));
712 return -error;
713} 605}
714 606
715/* 607/*
@@ -727,109 +619,6 @@ xfs_vn_truncate(
727 WARN_ON(error); 619 WARN_ON(error);
728} 620}
729 621
730STATIC int
731xfs_vn_setxattr(
732 struct dentry *dentry,
733 const char *name,
734 const void *data,
735 size_t size,
736 int flags)
737{
738 bhv_vnode_t *vp = vn_from_inode(dentry->d_inode);
739 char *attr = (char *)name;
740 attrnames_t *namesp;
741 int xflags = 0;
742 int error;
743
744 namesp = attr_lookup_namespace(attr, attr_namespaces, ATTR_NAMECOUNT);
745 if (!namesp)
746 return -EOPNOTSUPP;
747 attr += namesp->attr_namelen;
748 error = namesp->attr_capable(vp, NULL);
749 if (error)
750 return error;
751
752 /* Convert Linux syscall to XFS internal ATTR flags */
753 if (flags & XATTR_CREATE)
754 xflags |= ATTR_CREATE;
755 if (flags & XATTR_REPLACE)
756 xflags |= ATTR_REPLACE;
757 xflags |= namesp->attr_flag;
758 return namesp->attr_set(vp, attr, (void *)data, size, xflags);
759}
760
761STATIC ssize_t
762xfs_vn_getxattr(
763 struct dentry *dentry,
764 const char *name,
765 void *data,
766 size_t size)
767{
768 bhv_vnode_t *vp = vn_from_inode(dentry->d_inode);
769 char *attr = (char *)name;
770 attrnames_t *namesp;
771 int xflags = 0;
772 ssize_t error;
773
774 namesp = attr_lookup_namespace(attr, attr_namespaces, ATTR_NAMECOUNT);
775 if (!namesp)
776 return -EOPNOTSUPP;
777 attr += namesp->attr_namelen;
778 error = namesp->attr_capable(vp, NULL);
779 if (error)
780 return error;
781
782 /* Convert Linux syscall to XFS internal ATTR flags */
783 if (!size) {
784 xflags |= ATTR_KERNOVAL;
785 data = NULL;
786 }
787 xflags |= namesp->attr_flag;
788 return namesp->attr_get(vp, attr, (void *)data, size, xflags);
789}
790
791STATIC ssize_t
792xfs_vn_listxattr(
793 struct dentry *dentry,
794 char *data,
795 size_t size)
796{
797 bhv_vnode_t *vp = vn_from_inode(dentry->d_inode);
798 int error, xflags = ATTR_KERNAMELS;
799 ssize_t result;
800
801 if (!size)
802 xflags |= ATTR_KERNOVAL;
803 xflags |= capable(CAP_SYS_ADMIN) ? ATTR_KERNFULLS : ATTR_KERNORMALS;
804
805 error = attr_generic_list(vp, data, size, xflags, &result);
806 if (error < 0)
807 return error;
808 return result;
809}
810
811STATIC int
812xfs_vn_removexattr(
813 struct dentry *dentry,
814 const char *name)
815{
816 bhv_vnode_t *vp = vn_from_inode(dentry->d_inode);
817 char *attr = (char *)name;
818 attrnames_t *namesp;
819 int xflags = 0;
820 int error;
821
822 namesp = attr_lookup_namespace(attr, attr_namespaces, ATTR_NAMECOUNT);
823 if (!namesp)
824 return -EOPNOTSUPP;
825 attr += namesp->attr_namelen;
826 error = namesp->attr_capable(vp, NULL);
827 if (error)
828 return error;
829 xflags |= namesp->attr_flag;
830 return namesp->attr_remove(vp, attr, xflags);
831}
832
833STATIC long 622STATIC long
834xfs_vn_fallocate( 623xfs_vn_fallocate(
835 struct inode *inode, 624 struct inode *inode,
@@ -853,18 +642,18 @@ xfs_vn_fallocate(
853 642
854 xfs_ilock(ip, XFS_IOLOCK_EXCL); 643 xfs_ilock(ip, XFS_IOLOCK_EXCL);
855 error = xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf, 644 error = xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf,
856 0, NULL, ATTR_NOLOCK); 645 0, NULL, XFS_ATTR_NOLOCK);
857 if (!error && !(mode & FALLOC_FL_KEEP_SIZE) && 646 if (!error && !(mode & FALLOC_FL_KEEP_SIZE) &&
858 offset + len > i_size_read(inode)) 647 offset + len > i_size_read(inode))
859 new_size = offset + len; 648 new_size = offset + len;
860 649
861 /* Change file size if needed */ 650 /* Change file size if needed */
862 if (new_size) { 651 if (new_size) {
863 bhv_vattr_t va; 652 struct iattr iattr;
864 653
865 va.va_mask = XFS_AT_SIZE; 654 iattr.ia_valid = ATTR_SIZE;
866 va.va_size = new_size; 655 iattr.ia_size = new_size;
867 error = xfs_setattr(ip, &va, ATTR_NOLOCK, NULL); 656 error = xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK, NULL);
868 } 657 }
869 658
870 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 659 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
@@ -872,46 +661,172 @@ out_error:
872 return error; 661 return error;
873} 662}
874 663
875const struct inode_operations xfs_inode_operations = { 664static const struct inode_operations xfs_inode_operations = {
876 .permission = xfs_vn_permission, 665 .permission = xfs_vn_permission,
877 .truncate = xfs_vn_truncate, 666 .truncate = xfs_vn_truncate,
878 .getattr = xfs_vn_getattr, 667 .getattr = xfs_vn_getattr,
879 .setattr = xfs_vn_setattr, 668 .setattr = xfs_vn_setattr,
880 .setxattr = xfs_vn_setxattr, 669 .setxattr = generic_setxattr,
881 .getxattr = xfs_vn_getxattr, 670 .getxattr = generic_getxattr,
671 .removexattr = generic_removexattr,
882 .listxattr = xfs_vn_listxattr, 672 .listxattr = xfs_vn_listxattr,
883 .removexattr = xfs_vn_removexattr,
884 .fallocate = xfs_vn_fallocate, 673 .fallocate = xfs_vn_fallocate,
885}; 674};
886 675
887const struct inode_operations xfs_dir_inode_operations = { 676static const struct inode_operations xfs_dir_inode_operations = {
888 .create = xfs_vn_create, 677 .create = xfs_vn_create,
889 .lookup = xfs_vn_lookup, 678 .lookup = xfs_vn_lookup,
890 .link = xfs_vn_link, 679 .link = xfs_vn_link,
891 .unlink = xfs_vn_unlink, 680 .unlink = xfs_vn_unlink,
892 .symlink = xfs_vn_symlink, 681 .symlink = xfs_vn_symlink,
893 .mkdir = xfs_vn_mkdir, 682 .mkdir = xfs_vn_mkdir,
894 .rmdir = xfs_vn_rmdir, 683 /*
684 * Yes, XFS uses the same method for rmdir and unlink.
685 *
686 * There are some subtile differences deeper in the code,
687 * but we use S_ISDIR to check for those.
688 */
689 .rmdir = xfs_vn_unlink,
690 .mknod = xfs_vn_mknod,
691 .rename = xfs_vn_rename,
692 .permission = xfs_vn_permission,
693 .getattr = xfs_vn_getattr,
694 .setattr = xfs_vn_setattr,
695 .setxattr = generic_setxattr,
696 .getxattr = generic_getxattr,
697 .removexattr = generic_removexattr,
698 .listxattr = xfs_vn_listxattr,
699};
700
701static const struct inode_operations xfs_dir_ci_inode_operations = {
702 .create = xfs_vn_create,
703 .lookup = xfs_vn_ci_lookup,
704 .link = xfs_vn_link,
705 .unlink = xfs_vn_unlink,
706 .symlink = xfs_vn_symlink,
707 .mkdir = xfs_vn_mkdir,
708 /*
709 * Yes, XFS uses the same method for rmdir and unlink.
710 *
711 * There are some subtile differences deeper in the code,
712 * but we use S_ISDIR to check for those.
713 */
714 .rmdir = xfs_vn_unlink,
895 .mknod = xfs_vn_mknod, 715 .mknod = xfs_vn_mknod,
896 .rename = xfs_vn_rename, 716 .rename = xfs_vn_rename,
897 .permission = xfs_vn_permission, 717 .permission = xfs_vn_permission,
898 .getattr = xfs_vn_getattr, 718 .getattr = xfs_vn_getattr,
899 .setattr = xfs_vn_setattr, 719 .setattr = xfs_vn_setattr,
900 .setxattr = xfs_vn_setxattr, 720 .setxattr = generic_setxattr,
901 .getxattr = xfs_vn_getxattr, 721 .getxattr = generic_getxattr,
722 .removexattr = generic_removexattr,
902 .listxattr = xfs_vn_listxattr, 723 .listxattr = xfs_vn_listxattr,
903 .removexattr = xfs_vn_removexattr,
904}; 724};
905 725
906const struct inode_operations xfs_symlink_inode_operations = { 726static const struct inode_operations xfs_symlink_inode_operations = {
907 .readlink = generic_readlink, 727 .readlink = generic_readlink,
908 .follow_link = xfs_vn_follow_link, 728 .follow_link = xfs_vn_follow_link,
909 .put_link = xfs_vn_put_link, 729 .put_link = xfs_vn_put_link,
910 .permission = xfs_vn_permission, 730 .permission = xfs_vn_permission,
911 .getattr = xfs_vn_getattr, 731 .getattr = xfs_vn_getattr,
912 .setattr = xfs_vn_setattr, 732 .setattr = xfs_vn_setattr,
913 .setxattr = xfs_vn_setxattr, 733 .setxattr = generic_setxattr,
914 .getxattr = xfs_vn_getxattr, 734 .getxattr = generic_getxattr,
735 .removexattr = generic_removexattr,
915 .listxattr = xfs_vn_listxattr, 736 .listxattr = xfs_vn_listxattr,
916 .removexattr = xfs_vn_removexattr,
917}; 737};
738
739STATIC void
740xfs_diflags_to_iflags(
741 struct inode *inode,
742 struct xfs_inode *ip)
743{
744 if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE)
745 inode->i_flags |= S_IMMUTABLE;
746 else
747 inode->i_flags &= ~S_IMMUTABLE;
748 if (ip->i_d.di_flags & XFS_DIFLAG_APPEND)
749 inode->i_flags |= S_APPEND;
750 else
751 inode->i_flags &= ~S_APPEND;
752 if (ip->i_d.di_flags & XFS_DIFLAG_SYNC)
753 inode->i_flags |= S_SYNC;
754 else
755 inode->i_flags &= ~S_SYNC;
756 if (ip->i_d.di_flags & XFS_DIFLAG_NOATIME)
757 inode->i_flags |= S_NOATIME;
758 else
759 inode->i_flags &= ~S_NOATIME;
760}
761
762/*
763 * Initialize the Linux inode, set up the operation vectors and
764 * unlock the inode.
765 *
766 * When reading existing inodes from disk this is called directly
767 * from xfs_iget, when creating a new inode it is called from
768 * xfs_ialloc after setting up the inode.
769 */
770void
771xfs_setup_inode(
772 struct xfs_inode *ip)
773{
774 struct inode *inode = ip->i_vnode;
775
776 inode->i_mode = ip->i_d.di_mode;
777 inode->i_nlink = ip->i_d.di_nlink;
778 inode->i_uid = ip->i_d.di_uid;
779 inode->i_gid = ip->i_d.di_gid;
780
781 switch (inode->i_mode & S_IFMT) {
782 case S_IFBLK:
783 case S_IFCHR:
784 inode->i_rdev =
785 MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff,
786 sysv_minor(ip->i_df.if_u2.if_rdev));
787 break;
788 default:
789 inode->i_rdev = 0;
790 break;
791 }
792
793 inode->i_generation = ip->i_d.di_gen;
794 i_size_write(inode, ip->i_d.di_size);
795 inode->i_atime.tv_sec = ip->i_d.di_atime.t_sec;
796 inode->i_atime.tv_nsec = ip->i_d.di_atime.t_nsec;
797 inode->i_mtime.tv_sec = ip->i_d.di_mtime.t_sec;
798 inode->i_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec;
799 inode->i_ctime.tv_sec = ip->i_d.di_ctime.t_sec;
800 inode->i_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec;
801 xfs_diflags_to_iflags(inode, ip);
802 xfs_iflags_clear(ip, XFS_IMODIFIED);
803
804 switch (inode->i_mode & S_IFMT) {
805 case S_IFREG:
806 inode->i_op = &xfs_inode_operations;
807 inode->i_fop = &xfs_file_operations;
808 inode->i_mapping->a_ops = &xfs_address_space_operations;
809 break;
810 case S_IFDIR:
811 if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb))
812 inode->i_op = &xfs_dir_ci_inode_operations;
813 else
814 inode->i_op = &xfs_dir_inode_operations;
815 inode->i_fop = &xfs_dir_file_operations;
816 break;
817 case S_IFLNK:
818 inode->i_op = &xfs_symlink_inode_operations;
819 if (!(ip->i_df.if_flags & XFS_IFINLINE))
820 inode->i_mapping->a_ops = &xfs_address_space_operations;
821 break;
822 default:
823 inode->i_op = &xfs_inode_operations;
824 init_special_inode(inode, inode->i_mode, inode->i_rdev);
825 break;
826 }
827
828 xfs_iflags_clear(ip, XFS_INEW);
829 barrier();
830
831 unlock_new_inode(inode);
832}
diff --git a/fs/xfs/linux-2.6/xfs_iops.h b/fs/xfs/linux-2.6/xfs_iops.h
index 14d0deb7afff..8b1a1e31dc21 100644
--- a/fs/xfs/linux-2.6/xfs_iops.h
+++ b/fs/xfs/linux-2.6/xfs_iops.h
@@ -18,23 +18,14 @@
18#ifndef __XFS_IOPS_H__ 18#ifndef __XFS_IOPS_H__
19#define __XFS_IOPS_H__ 19#define __XFS_IOPS_H__
20 20
21extern const struct inode_operations xfs_inode_operations; 21struct xfs_inode;
22extern const struct inode_operations xfs_dir_inode_operations;
23extern const struct inode_operations xfs_symlink_inode_operations;
24 22
25extern const struct file_operations xfs_file_operations; 23extern const struct file_operations xfs_file_operations;
26extern const struct file_operations xfs_dir_file_operations; 24extern const struct file_operations xfs_dir_file_operations;
27extern const struct file_operations xfs_invis_file_operations; 25extern const struct file_operations xfs_invis_file_operations;
28 26
27extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size);
29 28
30struct xfs_inode; 29extern void xfs_setup_inode(struct xfs_inode *);
31extern void xfs_ichgtime(struct xfs_inode *, int);
32extern void xfs_ichgtime_fast(struct xfs_inode *, struct inode *, int);
33
34#define xfs_vtoi(vp) \
35 ((struct xfs_inode *)vn_to_inode(vp)->i_private)
36
37#define XFS_I(inode) \
38 ((struct xfs_inode *)(inode)->i_private)
39 30
40#endif /* __XFS_IOPS_H__ */ 31#endif /* __XFS_IOPS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 4edc46915b57..cc0f7b3a9795 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -45,13 +45,13 @@
45#include <mrlock.h> 45#include <mrlock.h>
46#include <sv.h> 46#include <sv.h>
47#include <mutex.h> 47#include <mutex.h>
48#include <sema.h>
49#include <time.h> 48#include <time.h>
50 49
51#include <support/ktrace.h> 50#include <support/ktrace.h>
52#include <support/debug.h> 51#include <support/debug.h>
53#include <support/uuid.h> 52#include <support/uuid.h>
54 53
54#include <linux/semaphore.h>
55#include <linux/mm.h> 55#include <linux/mm.h>
56#include <linux/kernel.h> 56#include <linux/kernel.h>
57#include <linux/blkdev.h> 57#include <linux/blkdev.h>
@@ -76,6 +76,7 @@
76#include <linux/log2.h> 76#include <linux/log2.h>
77#include <linux/spinlock.h> 77#include <linux/spinlock.h>
78#include <linux/random.h> 78#include <linux/random.h>
79#include <linux/ctype.h>
79 80
80#include <asm/page.h> 81#include <asm/page.h>
81#include <asm/div64.h> 82#include <asm/div64.h>
@@ -125,8 +126,6 @@
125 126
126#define current_cpu() (raw_smp_processor_id()) 127#define current_cpu() (raw_smp_processor_id())
127#define current_pid() (current->pid) 128#define current_pid() (current->pid)
128#define current_fsuid(cred) (current->fsuid)
129#define current_fsgid(cred) (current->fsgid)
130#define current_test_flags(f) (current->flags & (f)) 129#define current_test_flags(f) (current->flags & (f))
131#define current_set_flags_nested(sp, f) \ 130#define current_set_flags_nested(sp, f) \
132 (*(sp) = current->flags, current->flags |= (f)) 131 (*(sp) = current->flags, current->flags |= (f))
@@ -179,7 +178,7 @@
179#define xfs_sort(a,n,s,fn) sort(a,n,s,fn,NULL) 178#define xfs_sort(a,n,s,fn) sort(a,n,s,fn,NULL)
180#define xfs_stack_trace() dump_stack() 179#define xfs_stack_trace() dump_stack()
181#define xfs_itruncate_data(ip, off) \ 180#define xfs_itruncate_data(ip, off) \
182 (-vmtruncate(vn_to_inode(XFS_ITOV(ip)), (off))) 181 (-vmtruncate(VFS_I(ip), (off)))
183 182
184 183
185/* Move the kernel do_div definition off to one side */ 184/* Move the kernel do_div definition off to one side */
@@ -299,4 +298,11 @@ static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y)
299 return x; 298 return x;
300} 299}
301 300
301/* ARM old ABI has some weird alignment/padding */
302#if defined(__arm__) && !defined(__ARM_EABI__)
303#define __arch_pack __attribute__((packed))
304#else
305#define __arch_pack
306#endif
307
302#endif /* __XFS_LINUX__ */ 308#endif /* __XFS_LINUX__ */
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 82333b3e118e..1957e5357d04 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -137,7 +137,7 @@ xfs_iozero(
137 struct address_space *mapping; 137 struct address_space *mapping;
138 int status; 138 int status;
139 139
140 mapping = ip->i_vnode->i_mapping; 140 mapping = VFS_I(ip)->i_mapping;
141 do { 141 do {
142 unsigned offset, bytes; 142 unsigned offset, bytes;
143 void *fsdata; 143 void *fsdata;
@@ -674,9 +674,7 @@ start:
674 */ 674 */
675 if (likely(!(ioflags & IO_INVIS) && 675 if (likely(!(ioflags & IO_INVIS) &&
676 !mnt_want_write(file->f_path.mnt))) { 676 !mnt_want_write(file->f_path.mnt))) {
677 file_update_time(file); 677 xfs_ichgtime(xip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
678 xfs_ichgtime_fast(xip, inode,
679 XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
680 mnt_drop_write(file->f_path.mnt); 678 mnt_drop_write(file->f_path.mnt);
681 } 679 }
682 680
diff --git a/fs/xfs/linux-2.6/xfs_stats.c b/fs/xfs/linux-2.6/xfs_stats.c
index e480b6102051..3d5b67c075c7 100644
--- a/fs/xfs/linux-2.6/xfs_stats.c
+++ b/fs/xfs/linux-2.6/xfs_stats.c
@@ -98,12 +98,21 @@ xfs_read_xfsstats(
98 return len; 98 return len;
99} 99}
100 100
101void 101int
102xfs_init_procfs(void) 102xfs_init_procfs(void)
103{ 103{
104 if (!proc_mkdir("fs/xfs", NULL)) 104 if (!proc_mkdir("fs/xfs", NULL))
105 return; 105 goto out;
106 create_proc_read_entry("fs/xfs/stat", 0, NULL, xfs_read_xfsstats, NULL); 106
107 if (!create_proc_read_entry("fs/xfs/stat", 0, NULL,
108 xfs_read_xfsstats, NULL))
109 goto out_remove_entry;
110 return 0;
111
112 out_remove_entry:
113 remove_proc_entry("fs/xfs", NULL);
114 out:
115 return -ENOMEM;
107} 116}
108 117
109void 118void
diff --git a/fs/xfs/linux-2.6/xfs_stats.h b/fs/xfs/linux-2.6/xfs_stats.h
index afd0b0d5fdb2..e83820febc9f 100644
--- a/fs/xfs/linux-2.6/xfs_stats.h
+++ b/fs/xfs/linux-2.6/xfs_stats.h
@@ -134,7 +134,7 @@ DECLARE_PER_CPU(struct xfsstats, xfsstats);
134#define XFS_STATS_DEC(v) (per_cpu(xfsstats, current_cpu()).v--) 134#define XFS_STATS_DEC(v) (per_cpu(xfsstats, current_cpu()).v--)
135#define XFS_STATS_ADD(v, inc) (per_cpu(xfsstats, current_cpu()).v += (inc)) 135#define XFS_STATS_ADD(v, inc) (per_cpu(xfsstats, current_cpu()).v += (inc))
136 136
137extern void xfs_init_procfs(void); 137extern int xfs_init_procfs(void);
138extern void xfs_cleanup_procfs(void); 138extern void xfs_cleanup_procfs(void);
139 139
140 140
@@ -144,8 +144,14 @@ extern void xfs_cleanup_procfs(void);
144# define XFS_STATS_DEC(count) 144# define XFS_STATS_DEC(count)
145# define XFS_STATS_ADD(count, inc) 145# define XFS_STATS_ADD(count, inc)
146 146
147static inline void xfs_init_procfs(void) { }; 147static inline int xfs_init_procfs(void)
148static inline void xfs_cleanup_procfs(void) { }; 148{
149 return 0;
150}
151
152static inline void xfs_cleanup_procfs(void)
153{
154}
149 155
150#endif /* !CONFIG_PROC_FS */ 156#endif /* !CONFIG_PROC_FS */
151 157
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 943381284e2e..73c65f19e549 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -52,6 +52,12 @@
52#include "xfs_version.h" 52#include "xfs_version.h"
53#include "xfs_log_priv.h" 53#include "xfs_log_priv.h"
54#include "xfs_trans_priv.h" 54#include "xfs_trans_priv.h"
55#include "xfs_filestream.h"
56#include "xfs_da_btree.h"
57#include "xfs_dir2_trace.h"
58#include "xfs_extfree_item.h"
59#include "xfs_mru_cache.h"
60#include "xfs_inode_item.h"
55 61
56#include <linux/namei.h> 62#include <linux/namei.h>
57#include <linux/init.h> 63#include <linux/init.h>
@@ -60,6 +66,7 @@
60#include <linux/writeback.h> 66#include <linux/writeback.h>
61#include <linux/kthread.h> 67#include <linux/kthread.h>
62#include <linux/freezer.h> 68#include <linux/freezer.h>
69#include <linux/parser.h>
63 70
64static struct quotactl_ops xfs_quotactl_operations; 71static struct quotactl_ops xfs_quotactl_operations;
65static struct super_operations xfs_super_operations; 72static struct super_operations xfs_super_operations;
@@ -74,7 +81,10 @@ xfs_args_allocate(
74{ 81{
75 struct xfs_mount_args *args; 82 struct xfs_mount_args *args;
76 83
77 args = kmem_zalloc(sizeof(struct xfs_mount_args), KM_SLEEP); 84 args = kzalloc(sizeof(struct xfs_mount_args), GFP_KERNEL);
85 if (!args)
86 return NULL;
87
78 args->logbufs = args->logbufsize = -1; 88 args->logbufs = args->logbufsize = -1;
79 strncpy(args->fsname, sb->s_id, MAXNAMELEN); 89 strncpy(args->fsname, sb->s_id, MAXNAMELEN);
80 90
@@ -138,6 +148,23 @@ xfs_args_allocate(
138#define MNTOPT_XDSM "xdsm" /* DMI enabled (DMAPI / XDSM) */ 148#define MNTOPT_XDSM "xdsm" /* DMI enabled (DMAPI / XDSM) */
139#define MNTOPT_DMI "dmi" /* DMI enabled (DMAPI / XDSM) */ 149#define MNTOPT_DMI "dmi" /* DMI enabled (DMAPI / XDSM) */
140 150
151/*
152 * Table driven mount option parser.
153 *
154 * Currently only used for remount, but it will be used for mount
155 * in the future, too.
156 */
157enum {
158 Opt_barrier, Opt_nobarrier, Opt_err
159};
160
161static match_table_t tokens = {
162 {Opt_barrier, "barrier"},
163 {Opt_nobarrier, "nobarrier"},
164 {Opt_err, NULL}
165};
166
167
141STATIC unsigned long 168STATIC unsigned long
142suffix_strtoul(char *s, char **endp, unsigned int base) 169suffix_strtoul(char *s, char **endp, unsigned int base)
143{ 170{
@@ -314,6 +341,7 @@ xfs_parseargs(
314 args->flags |= XFSMNT_ATTR2; 341 args->flags |= XFSMNT_ATTR2;
315 } else if (!strcmp(this_char, MNTOPT_NOATTR2)) { 342 } else if (!strcmp(this_char, MNTOPT_NOATTR2)) {
316 args->flags &= ~XFSMNT_ATTR2; 343 args->flags &= ~XFSMNT_ATTR2;
344 args->flags |= XFSMNT_NOATTR2;
317 } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) { 345 } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) {
318 args->flags2 |= XFSMNT2_FILESTREAMS; 346 args->flags2 |= XFSMNT2_FILESTREAMS;
319 } else if (!strcmp(this_char, MNTOPT_NOQUOTA)) { 347 } else if (!strcmp(this_char, MNTOPT_NOQUOTA)) {
@@ -553,115 +581,6 @@ xfs_max_file_offset(
553 return (((__uint64_t)pagefactor) << bitshift) - 1; 581 return (((__uint64_t)pagefactor) << bitshift) - 1;
554} 582}
555 583
556STATIC_INLINE void
557xfs_set_inodeops(
558 struct inode *inode)
559{
560 switch (inode->i_mode & S_IFMT) {
561 case S_IFREG:
562 inode->i_op = &xfs_inode_operations;
563 inode->i_fop = &xfs_file_operations;
564 inode->i_mapping->a_ops = &xfs_address_space_operations;
565 break;
566 case S_IFDIR:
567 inode->i_op = &xfs_dir_inode_operations;
568 inode->i_fop = &xfs_dir_file_operations;
569 break;
570 case S_IFLNK:
571 inode->i_op = &xfs_symlink_inode_operations;
572 if (!(XFS_I(inode)->i_df.if_flags & XFS_IFINLINE))
573 inode->i_mapping->a_ops = &xfs_address_space_operations;
574 break;
575 default:
576 inode->i_op = &xfs_inode_operations;
577 init_special_inode(inode, inode->i_mode, inode->i_rdev);
578 break;
579 }
580}
581
582STATIC_INLINE void
583xfs_revalidate_inode(
584 xfs_mount_t *mp,
585 bhv_vnode_t *vp,
586 xfs_inode_t *ip)
587{
588 struct inode *inode = vn_to_inode(vp);
589
590 inode->i_mode = ip->i_d.di_mode;
591 inode->i_nlink = ip->i_d.di_nlink;
592 inode->i_uid = ip->i_d.di_uid;
593 inode->i_gid = ip->i_d.di_gid;
594
595 switch (inode->i_mode & S_IFMT) {
596 case S_IFBLK:
597 case S_IFCHR:
598 inode->i_rdev =
599 MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff,
600 sysv_minor(ip->i_df.if_u2.if_rdev));
601 break;
602 default:
603 inode->i_rdev = 0;
604 break;
605 }
606
607 inode->i_generation = ip->i_d.di_gen;
608 i_size_write(inode, ip->i_d.di_size);
609 inode->i_atime.tv_sec = ip->i_d.di_atime.t_sec;
610 inode->i_atime.tv_nsec = ip->i_d.di_atime.t_nsec;
611 inode->i_mtime.tv_sec = ip->i_d.di_mtime.t_sec;
612 inode->i_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec;
613 inode->i_ctime.tv_sec = ip->i_d.di_ctime.t_sec;
614 inode->i_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec;
615 if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE)
616 inode->i_flags |= S_IMMUTABLE;
617 else
618 inode->i_flags &= ~S_IMMUTABLE;
619 if (ip->i_d.di_flags & XFS_DIFLAG_APPEND)
620 inode->i_flags |= S_APPEND;
621 else
622 inode->i_flags &= ~S_APPEND;
623 if (ip->i_d.di_flags & XFS_DIFLAG_SYNC)
624 inode->i_flags |= S_SYNC;
625 else
626 inode->i_flags &= ~S_SYNC;
627 if (ip->i_d.di_flags & XFS_DIFLAG_NOATIME)
628 inode->i_flags |= S_NOATIME;
629 else
630 inode->i_flags &= ~S_NOATIME;
631 xfs_iflags_clear(ip, XFS_IMODIFIED);
632}
633
634void
635xfs_initialize_vnode(
636 struct xfs_mount *mp,
637 bhv_vnode_t *vp,
638 struct xfs_inode *ip)
639{
640 struct inode *inode = vn_to_inode(vp);
641
642 if (!ip->i_vnode) {
643 ip->i_vnode = vp;
644 inode->i_private = ip;
645 }
646
647 /*
648 * We need to set the ops vectors, and unlock the inode, but if
649 * we have been called during the new inode create process, it is
650 * too early to fill in the Linux inode. We will get called a
651 * second time once the inode is properly set up, and then we can
652 * finish our work.
653 */
654 if (ip->i_d.di_mode != 0 && (inode->i_state & I_NEW)) {
655 xfs_revalidate_inode(mp, vp, ip);
656 xfs_set_inodeops(inode);
657
658 xfs_iflags_clear(ip, XFS_INEW);
659 barrier();
660
661 unlock_new_inode(inode);
662 }
663}
664
665int 584int
666xfs_blkdev_get( 585xfs_blkdev_get(
667 xfs_mount_t *mp, 586 xfs_mount_t *mp,
@@ -733,14 +652,6 @@ xfs_mountfs_check_barriers(xfs_mount_t *mp)
733 return; 652 return;
734 } 653 }
735 654
736 if (mp->m_ddev_targp->bt_bdev->bd_disk->queue->ordered ==
737 QUEUE_ORDERED_NONE) {
738 xfs_fs_cmn_err(CE_NOTE, mp,
739 "Disabling barriers, not supported by the underlying device");
740 mp->m_flags &= ~XFS_MOUNT_BARRIER;
741 return;
742 }
743
744 if (xfs_readonly_buftarg(mp->m_ddev_targp)) { 655 if (xfs_readonly_buftarg(mp->m_ddev_targp)) {
745 xfs_fs_cmn_err(CE_NOTE, mp, 656 xfs_fs_cmn_err(CE_NOTE, mp,
746 "Disabling barriers, underlying device is readonly"); 657 "Disabling barriers, underlying device is readonly");
@@ -764,6 +675,139 @@ xfs_blkdev_issue_flush(
764 blkdev_issue_flush(buftarg->bt_bdev, NULL); 675 blkdev_issue_flush(buftarg->bt_bdev, NULL);
765} 676}
766 677
678STATIC void
679xfs_close_devices(
680 struct xfs_mount *mp)
681{
682 if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
683 struct block_device *logdev = mp->m_logdev_targp->bt_bdev;
684 xfs_free_buftarg(mp->m_logdev_targp);
685 xfs_blkdev_put(logdev);
686 }
687 if (mp->m_rtdev_targp) {
688 struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev;
689 xfs_free_buftarg(mp->m_rtdev_targp);
690 xfs_blkdev_put(rtdev);
691 }
692 xfs_free_buftarg(mp->m_ddev_targp);
693}
694
695/*
696 * The file system configurations are:
697 * (1) device (partition) with data and internal log
698 * (2) logical volume with data and log subvolumes.
699 * (3) logical volume with data, log, and realtime subvolumes.
700 *
701 * We only have to handle opening the log and realtime volumes here if
702 * they are present. The data subvolume has already been opened by
703 * get_sb_bdev() and is stored in sb->s_bdev.
704 */
705STATIC int
706xfs_open_devices(
707 struct xfs_mount *mp,
708 struct xfs_mount_args *args)
709{
710 struct block_device *ddev = mp->m_super->s_bdev;
711 struct block_device *logdev = NULL, *rtdev = NULL;
712 int error;
713
714 /*
715 * Open real time and log devices - order is important.
716 */
717 if (args->logname[0]) {
718 error = xfs_blkdev_get(mp, args->logname, &logdev);
719 if (error)
720 goto out;
721 }
722
723 if (args->rtname[0]) {
724 error = xfs_blkdev_get(mp, args->rtname, &rtdev);
725 if (error)
726 goto out_close_logdev;
727
728 if (rtdev == ddev || rtdev == logdev) {
729 cmn_err(CE_WARN,
730 "XFS: Cannot mount filesystem with identical rtdev and ddev/logdev.");
731 error = EINVAL;
732 goto out_close_rtdev;
733 }
734 }
735
736 /*
737 * Setup xfs_mount buffer target pointers
738 */
739 error = ENOMEM;
740 mp->m_ddev_targp = xfs_alloc_buftarg(ddev, 0);
741 if (!mp->m_ddev_targp)
742 goto out_close_rtdev;
743
744 if (rtdev) {
745 mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev, 1);
746 if (!mp->m_rtdev_targp)
747 goto out_free_ddev_targ;
748 }
749
750 if (logdev && logdev != ddev) {
751 mp->m_logdev_targp = xfs_alloc_buftarg(logdev, 1);
752 if (!mp->m_logdev_targp)
753 goto out_free_rtdev_targ;
754 } else {
755 mp->m_logdev_targp = mp->m_ddev_targp;
756 }
757
758 return 0;
759
760 out_free_rtdev_targ:
761 if (mp->m_rtdev_targp)
762 xfs_free_buftarg(mp->m_rtdev_targp);
763 out_free_ddev_targ:
764 xfs_free_buftarg(mp->m_ddev_targp);
765 out_close_rtdev:
766 if (rtdev)
767 xfs_blkdev_put(rtdev);
768 out_close_logdev:
769 if (logdev && logdev != ddev)
770 xfs_blkdev_put(logdev);
771 out:
772 return error;
773}
774
775/*
776 * Setup xfs_mount buffer target pointers based on superblock
777 */
778STATIC int
779xfs_setup_devices(
780 struct xfs_mount *mp)
781{
782 int error;
783
784 error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_blocksize,
785 mp->m_sb.sb_sectsize);
786 if (error)
787 return error;
788
789 if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
790 unsigned int log_sector_size = BBSIZE;
791
792 if (xfs_sb_version_hassector(&mp->m_sb))
793 log_sector_size = mp->m_sb.sb_logsectsize;
794 error = xfs_setsize_buftarg(mp->m_logdev_targp,
795 mp->m_sb.sb_blocksize,
796 log_sector_size);
797 if (error)
798 return error;
799 }
800 if (mp->m_rtdev_targp) {
801 error = xfs_setsize_buftarg(mp->m_rtdev_targp,
802 mp->m_sb.sb_blocksize,
803 mp->m_sb.sb_sectsize);
804 if (error)
805 return error;
806 }
807
808 return 0;
809}
810
767/* 811/*
768 * XFS AIL push thread support 812 * XFS AIL push thread support
769 */ 813 */
@@ -826,62 +870,21 @@ STATIC struct inode *
826xfs_fs_alloc_inode( 870xfs_fs_alloc_inode(
827 struct super_block *sb) 871 struct super_block *sb)
828{ 872{
829 bhv_vnode_t *vp; 873 return kmem_zone_alloc(xfs_vnode_zone, KM_SLEEP);
830
831 vp = kmem_zone_alloc(xfs_vnode_zone, KM_SLEEP);
832 if (unlikely(!vp))
833 return NULL;
834 return vn_to_inode(vp);
835} 874}
836 875
837STATIC void 876STATIC void
838xfs_fs_destroy_inode( 877xfs_fs_destroy_inode(
839 struct inode *inode) 878 struct inode *inode)
840{ 879{
841 kmem_zone_free(xfs_vnode_zone, vn_from_inode(inode)); 880 kmem_zone_free(xfs_vnode_zone, inode);
842} 881}
843 882
844STATIC void 883STATIC void
845xfs_fs_inode_init_once( 884xfs_fs_inode_init_once(
846 void *vnode) 885 void *vnode)
847{ 886{
848 inode_init_once(vn_to_inode((bhv_vnode_t *)vnode)); 887 inode_init_once((struct inode *)vnode);
849}
850
851STATIC int __init
852xfs_init_zones(void)
853{
854 xfs_vnode_zone = kmem_zone_init_flags(sizeof(bhv_vnode_t), "xfs_vnode",
855 KM_ZONE_HWALIGN | KM_ZONE_RECLAIM |
856 KM_ZONE_SPREAD,
857 xfs_fs_inode_init_once);
858 if (!xfs_vnode_zone)
859 goto out;
860
861 xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend");
862 if (!xfs_ioend_zone)
863 goto out_destroy_vnode_zone;
864
865 xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE,
866 xfs_ioend_zone);
867 if (!xfs_ioend_pool)
868 goto out_free_ioend_zone;
869 return 0;
870
871 out_free_ioend_zone:
872 kmem_zone_destroy(xfs_ioend_zone);
873 out_destroy_vnode_zone:
874 kmem_zone_destroy(xfs_vnode_zone);
875 out:
876 return -ENOMEM;
877}
878
879STATIC void
880xfs_destroy_zones(void)
881{
882 mempool_destroy(xfs_ioend_pool);
883 kmem_zone_destroy(xfs_vnode_zone);
884 kmem_zone_destroy(xfs_ioend_zone);
885} 888}
886 889
887/* 890/*
@@ -986,7 +989,7 @@ void
986xfs_flush_inode( 989xfs_flush_inode(
987 xfs_inode_t *ip) 990 xfs_inode_t *ip)
988{ 991{
989 struct inode *inode = ip->i_vnode; 992 struct inode *inode = VFS_I(ip);
990 993
991 igrab(inode); 994 igrab(inode);
992 xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inode_work); 995 xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inode_work);
@@ -1011,7 +1014,7 @@ void
1011xfs_flush_device( 1014xfs_flush_device(
1012 xfs_inode_t *ip) 1015 xfs_inode_t *ip)
1013{ 1016{
1014 struct inode *inode = vn_to_inode(XFS_ITOV(ip)); 1017 struct inode *inode = VFS_I(ip);
1015 1018
1016 igrab(inode); 1019 igrab(inode);
1017 xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_device_work); 1020 xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_device_work);
@@ -1073,7 +1076,7 @@ xfssyncd(
1073 list_del(&work->w_list); 1076 list_del(&work->w_list);
1074 if (work == &mp->m_sync_work) 1077 if (work == &mp->m_sync_work)
1075 continue; 1078 continue;
1076 kmem_free(work, sizeof(struct bhv_vfs_sync_work)); 1079 kmem_free(work);
1077 } 1080 }
1078 } 1081 }
1079 1082
@@ -1081,18 +1084,76 @@ xfssyncd(
1081} 1084}
1082 1085
1083STATIC void 1086STATIC void
1087xfs_free_fsname(
1088 struct xfs_mount *mp)
1089{
1090 kfree(mp->m_fsname);
1091 kfree(mp->m_rtname);
1092 kfree(mp->m_logname);
1093}
1094
1095STATIC void
1084xfs_fs_put_super( 1096xfs_fs_put_super(
1085 struct super_block *sb) 1097 struct super_block *sb)
1086{ 1098{
1087 struct xfs_mount *mp = XFS_M(sb); 1099 struct xfs_mount *mp = XFS_M(sb);
1100 struct xfs_inode *rip = mp->m_rootip;
1101 int unmount_event_flags = 0;
1088 int error; 1102 int error;
1089 1103
1090 kthread_stop(mp->m_sync_task); 1104 kthread_stop(mp->m_sync_task);
1091 1105
1092 xfs_sync(mp, SYNC_ATTR | SYNC_DELWRI); 1106 xfs_sync(mp, SYNC_ATTR | SYNC_DELWRI);
1093 error = xfs_unmount(mp, 0, NULL); 1107
1094 if (error) 1108#ifdef HAVE_DMAPI
1095 printk("XFS: unmount got error=%d\n", error); 1109 if (mp->m_flags & XFS_MOUNT_DMAPI) {
1110 unmount_event_flags =
1111 (mp->m_dmevmask & (1 << DM_EVENT_UNMOUNT)) ?
1112 0 : DM_FLAGS_UNWANTED;
1113 /*
1114 * Ignore error from dmapi here, first unmount is not allowed
1115 * to fail anyway, and second we wouldn't want to fail a
1116 * unmount because of dmapi.
1117 */
1118 XFS_SEND_PREUNMOUNT(mp, rip, DM_RIGHT_NULL, rip, DM_RIGHT_NULL,
1119 NULL, NULL, 0, 0, unmount_event_flags);
1120 }
1121#endif
1122
1123 /*
1124 * Blow away any referenced inode in the filestreams cache.
1125 * This can and will cause log traffic as inodes go inactive
1126 * here.
1127 */
1128 xfs_filestream_unmount(mp);
1129
1130 XFS_bflush(mp->m_ddev_targp);
1131 error = xfs_unmount_flush(mp, 0);
1132 WARN_ON(error);
1133
1134 /*
1135 * If we're forcing a shutdown, typically because of a media error,
1136 * we want to make sure we invalidate dirty pages that belong to
1137 * referenced vnodes as well.
1138 */
1139 if (XFS_FORCED_SHUTDOWN(mp)) {
1140 error = xfs_sync(mp, SYNC_WAIT | SYNC_CLOSE);
1141 ASSERT(error != EFSCORRUPTED);
1142 }
1143
1144 if (mp->m_flags & XFS_MOUNT_DMAPI) {
1145 XFS_SEND_UNMOUNT(mp, rip, DM_RIGHT_NULL, 0, 0,
1146 unmount_event_flags);
1147 }
1148
1149 xfs_unmountfs(mp);
1150 xfs_freesb(mp);
1151 xfs_icsb_destroy_counters(mp);
1152 xfs_close_devices(mp);
1153 xfs_qmops_put(mp);
1154 xfs_dmops_put(mp);
1155 xfs_free_fsname(mp);
1156 kfree(mp);
1096} 1157}
1097 1158
1098STATIC void 1159STATIC void
@@ -1215,14 +1276,54 @@ xfs_fs_remount(
1215 char *options) 1276 char *options)
1216{ 1277{
1217 struct xfs_mount *mp = XFS_M(sb); 1278 struct xfs_mount *mp = XFS_M(sb);
1218 struct xfs_mount_args *args = xfs_args_allocate(sb, 0); 1279 substring_t args[MAX_OPT_ARGS];
1219 int error; 1280 char *p;
1220 1281
1221 error = xfs_parseargs(mp, options, args, 1); 1282 while ((p = strsep(&options, ",")) != NULL) {
1222 if (!error) 1283 int token;
1223 error = xfs_mntupdate(mp, flags, args); 1284
1224 kmem_free(args, sizeof(*args)); 1285 if (!*p)
1225 return -error; 1286 continue;
1287
1288 token = match_token(p, tokens, args);
1289 switch (token) {
1290 case Opt_barrier:
1291 mp->m_flags |= XFS_MOUNT_BARRIER;
1292
1293 /*
1294 * Test if barriers are actually working if we can,
1295 * else delay this check until the filesystem is
1296 * marked writeable.
1297 */
1298 if (!(mp->m_flags & XFS_MOUNT_RDONLY))
1299 xfs_mountfs_check_barriers(mp);
1300 break;
1301 case Opt_nobarrier:
1302 mp->m_flags &= ~XFS_MOUNT_BARRIER;
1303 break;
1304 default:
1305 printk(KERN_INFO
1306 "XFS: mount option \"%s\" not supported for remount\n", p);
1307 return -EINVAL;
1308 }
1309 }
1310
1311 /* rw/ro -> rw */
1312 if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) {
1313 mp->m_flags &= ~XFS_MOUNT_RDONLY;
1314 if (mp->m_flags & XFS_MOUNT_BARRIER)
1315 xfs_mountfs_check_barriers(mp);
1316 }
1317
1318 /* rw -> ro */
1319 if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) {
1320 xfs_filestream_flush(mp);
1321 xfs_sync(mp, SYNC_DATA_QUIESCE);
1322 xfs_attr_quiesce(mp);
1323 mp->m_flags |= XFS_MOUNT_RDONLY;
1324 }
1325
1326 return 0;
1226} 1327}
1227 1328
1228/* 1329/*
@@ -1299,6 +1400,245 @@ xfs_fs_setxquota(
1299 Q_XSETPQLIM), id, (caddr_t)fdq); 1400 Q_XSETPQLIM), id, (caddr_t)fdq);
1300} 1401}
1301 1402
1403/*
1404 * This function fills in xfs_mount_t fields based on mount args.
1405 * Note: the superblock has _not_ yet been read in.
1406 */
1407STATIC int
1408xfs_start_flags(
1409 struct xfs_mount_args *ap,
1410 struct xfs_mount *mp)
1411{
1412 int error;
1413
1414 /* Values are in BBs */
1415 if ((ap->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) {
1416 /*
1417 * At this point the superblock has not been read
1418 * in, therefore we do not know the block size.
1419 * Before the mount call ends we will convert
1420 * these to FSBs.
1421 */
1422 mp->m_dalign = ap->sunit;
1423 mp->m_swidth = ap->swidth;
1424 }
1425
1426 if (ap->logbufs != -1 &&
1427 ap->logbufs != 0 &&
1428 (ap->logbufs < XLOG_MIN_ICLOGS ||
1429 ap->logbufs > XLOG_MAX_ICLOGS)) {
1430 cmn_err(CE_WARN,
1431 "XFS: invalid logbufs value: %d [not %d-%d]",
1432 ap->logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
1433 return XFS_ERROR(EINVAL);
1434 }
1435 mp->m_logbufs = ap->logbufs;
1436 if (ap->logbufsize != -1 &&
1437 ap->logbufsize != 0 &&
1438 (ap->logbufsize < XLOG_MIN_RECORD_BSIZE ||
1439 ap->logbufsize > XLOG_MAX_RECORD_BSIZE ||
1440 !is_power_of_2(ap->logbufsize))) {
1441 cmn_err(CE_WARN,
1442 "XFS: invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
1443 ap->logbufsize);
1444 return XFS_ERROR(EINVAL);
1445 }
1446
1447 error = ENOMEM;
1448
1449 mp->m_logbsize = ap->logbufsize;
1450 mp->m_fsname_len = strlen(ap->fsname) + 1;
1451
1452 mp->m_fsname = kstrdup(ap->fsname, GFP_KERNEL);
1453 if (!mp->m_fsname)
1454 goto out;
1455
1456 if (ap->rtname[0]) {
1457 mp->m_rtname = kstrdup(ap->rtname, GFP_KERNEL);
1458 if (!mp->m_rtname)
1459 goto out_free_fsname;
1460
1461 }
1462
1463 if (ap->logname[0]) {
1464 mp->m_logname = kstrdup(ap->logname, GFP_KERNEL);
1465 if (!mp->m_logname)
1466 goto out_free_rtname;
1467 }
1468
1469 if (ap->flags & XFSMNT_WSYNC)
1470 mp->m_flags |= XFS_MOUNT_WSYNC;
1471#if XFS_BIG_INUMS
1472 if (ap->flags & XFSMNT_INO64) {
1473 mp->m_flags |= XFS_MOUNT_INO64;
1474 mp->m_inoadd = XFS_INO64_OFFSET;
1475 }
1476#endif
1477 if (ap->flags & XFSMNT_RETERR)
1478 mp->m_flags |= XFS_MOUNT_RETERR;
1479 if (ap->flags & XFSMNT_NOALIGN)
1480 mp->m_flags |= XFS_MOUNT_NOALIGN;
1481 if (ap->flags & XFSMNT_SWALLOC)
1482 mp->m_flags |= XFS_MOUNT_SWALLOC;
1483 if (ap->flags & XFSMNT_OSYNCISOSYNC)
1484 mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC;
1485 if (ap->flags & XFSMNT_32BITINODES)
1486 mp->m_flags |= XFS_MOUNT_32BITINODES;
1487
1488 if (ap->flags & XFSMNT_IOSIZE) {
1489 if (ap->iosizelog > XFS_MAX_IO_LOG ||
1490 ap->iosizelog < XFS_MIN_IO_LOG) {
1491 cmn_err(CE_WARN,
1492 "XFS: invalid log iosize: %d [not %d-%d]",
1493 ap->iosizelog, XFS_MIN_IO_LOG,
1494 XFS_MAX_IO_LOG);
1495 return XFS_ERROR(EINVAL);
1496 }
1497
1498 mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE;
1499 mp->m_readio_log = mp->m_writeio_log = ap->iosizelog;
1500 }
1501
1502 if (ap->flags & XFSMNT_IKEEP)
1503 mp->m_flags |= XFS_MOUNT_IKEEP;
1504 if (ap->flags & XFSMNT_DIRSYNC)
1505 mp->m_flags |= XFS_MOUNT_DIRSYNC;
1506 if (ap->flags & XFSMNT_ATTR2)
1507 mp->m_flags |= XFS_MOUNT_ATTR2;
1508 if (ap->flags & XFSMNT_NOATTR2)
1509 mp->m_flags |= XFS_MOUNT_NOATTR2;
1510
1511 if (ap->flags2 & XFSMNT2_COMPAT_IOSIZE)
1512 mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
1513
1514 /*
1515 * no recovery flag requires a read-only mount
1516 */
1517 if (ap->flags & XFSMNT_NORECOVERY) {
1518 if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
1519 cmn_err(CE_WARN,
1520 "XFS: tried to mount a FS read-write without recovery!");
1521 return XFS_ERROR(EINVAL);
1522 }
1523 mp->m_flags |= XFS_MOUNT_NORECOVERY;
1524 }
1525
1526 if (ap->flags & XFSMNT_NOUUID)
1527 mp->m_flags |= XFS_MOUNT_NOUUID;
1528 if (ap->flags & XFSMNT_BARRIER)
1529 mp->m_flags |= XFS_MOUNT_BARRIER;
1530 else
1531 mp->m_flags &= ~XFS_MOUNT_BARRIER;
1532
1533 if (ap->flags2 & XFSMNT2_FILESTREAMS)
1534 mp->m_flags |= XFS_MOUNT_FILESTREAMS;
1535
1536 if (ap->flags & XFSMNT_DMAPI)
1537 mp->m_flags |= XFS_MOUNT_DMAPI;
1538 return 0;
1539
1540
1541 out_free_rtname:
1542 kfree(mp->m_rtname);
1543 out_free_fsname:
1544 kfree(mp->m_fsname);
1545 out:
1546 return error;
1547}
1548
1549/*
1550 * This function fills in xfs_mount_t fields based on mount args.
1551 * Note: the superblock _has_ now been read in.
1552 */
1553STATIC int
1554xfs_finish_flags(
1555 struct xfs_mount_args *ap,
1556 struct xfs_mount *mp)
1557{
1558 int ronly = (mp->m_flags & XFS_MOUNT_RDONLY);
1559
1560 /* Fail a mount where the logbuf is smaller then the log stripe */
1561 if (xfs_sb_version_haslogv2(&mp->m_sb)) {
1562 if ((ap->logbufsize <= 0) &&
1563 (mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE)) {
1564 mp->m_logbsize = mp->m_sb.sb_logsunit;
1565 } else if (ap->logbufsize > 0 &&
1566 ap->logbufsize < mp->m_sb.sb_logsunit) {
1567 cmn_err(CE_WARN,
1568 "XFS: logbuf size must be greater than or equal to log stripe size");
1569 return XFS_ERROR(EINVAL);
1570 }
1571 } else {
1572 /* Fail a mount if the logbuf is larger than 32K */
1573 if (ap->logbufsize > XLOG_BIG_RECORD_BSIZE) {
1574 cmn_err(CE_WARN,
1575 "XFS: logbuf size for version 1 logs must be 16K or 32K");
1576 return XFS_ERROR(EINVAL);
1577 }
1578 }
1579
1580 /*
1581 * mkfs'ed attr2 will turn on attr2 mount unless explicitly
1582 * told by noattr2 to turn it off
1583 */
1584 if (xfs_sb_version_hasattr2(&mp->m_sb) &&
1585 !(ap->flags & XFSMNT_NOATTR2))
1586 mp->m_flags |= XFS_MOUNT_ATTR2;
1587
1588 /*
1589 * prohibit r/w mounts of read-only filesystems
1590 */
1591 if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) {
1592 cmn_err(CE_WARN,
1593 "XFS: cannot mount a read-only filesystem as read-write");
1594 return XFS_ERROR(EROFS);
1595 }
1596
1597 /*
1598 * check for shared mount.
1599 */
1600 if (ap->flags & XFSMNT_SHARED) {
1601 if (!xfs_sb_version_hasshared(&mp->m_sb))
1602 return XFS_ERROR(EINVAL);
1603
1604 /*
1605 * For IRIX 6.5, shared mounts must have the shared
1606 * version bit set, have the persistent readonly
1607 * field set, must be version 0 and can only be mounted
1608 * read-only.
1609 */
1610 if (!ronly || !(mp->m_sb.sb_flags & XFS_SBF_READONLY) ||
1611 (mp->m_sb.sb_shared_vn != 0))
1612 return XFS_ERROR(EINVAL);
1613
1614 mp->m_flags |= XFS_MOUNT_SHARED;
1615
1616 /*
1617 * Shared XFS V0 can't deal with DMI. Return EINVAL.
1618 */
1619 if (mp->m_sb.sb_shared_vn == 0 && (ap->flags & XFSMNT_DMAPI))
1620 return XFS_ERROR(EINVAL);
1621 }
1622
1623 if (ap->flags & XFSMNT_UQUOTA) {
1624 mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
1625 if (ap->flags & XFSMNT_UQUOTAENF)
1626 mp->m_qflags |= XFS_UQUOTA_ENFD;
1627 }
1628
1629 if (ap->flags & XFSMNT_GQUOTA) {
1630 mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
1631 if (ap->flags & XFSMNT_GQUOTAENF)
1632 mp->m_qflags |= XFS_OQUOTA_ENFD;
1633 } else if (ap->flags & XFSMNT_PQUOTA) {
1634 mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
1635 if (ap->flags & XFSMNT_PQUOTAENF)
1636 mp->m_qflags |= XFS_OQUOTA_ENFD;
1637 }
1638
1639 return 0;
1640}
1641
1302STATIC int 1642STATIC int
1303xfs_fs_fill_super( 1643xfs_fs_fill_super(
1304 struct super_block *sb, 1644 struct super_block *sb,
@@ -1307,11 +1647,21 @@ xfs_fs_fill_super(
1307{ 1647{
1308 struct inode *root; 1648 struct inode *root;
1309 struct xfs_mount *mp = NULL; 1649 struct xfs_mount *mp = NULL;
1310 struct xfs_mount_args *args = xfs_args_allocate(sb, silent); 1650 struct xfs_mount_args *args;
1311 int error; 1651 int flags = 0, error = ENOMEM;
1312 1652
1313 mp = xfs_mount_init(); 1653 args = xfs_args_allocate(sb, silent);
1654 if (!args)
1655 return -ENOMEM;
1314 1656
1657 mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL);
1658 if (!mp)
1659 goto out_free_args;
1660
1661 spin_lock_init(&mp->m_sb_lock);
1662 mutex_init(&mp->m_ilock);
1663 mutex_init(&mp->m_growlock);
1664 atomic_set(&mp->m_active_trans, 0);
1315 INIT_LIST_HEAD(&mp->m_sync_list); 1665 INIT_LIST_HEAD(&mp->m_sync_list);
1316 spin_lock_init(&mp->m_sync_lock); 1666 spin_lock_init(&mp->m_sync_lock);
1317 init_waitqueue_head(&mp->m_wait_single_sync_task); 1667 init_waitqueue_head(&mp->m_wait_single_sync_task);
@@ -1324,16 +1674,60 @@ xfs_fs_fill_super(
1324 1674
1325 error = xfs_parseargs(mp, (char *)data, args, 0); 1675 error = xfs_parseargs(mp, (char *)data, args, 0);
1326 if (error) 1676 if (error)
1327 goto fail_vfsop; 1677 goto out_free_mp;
1328 1678
1329 sb_min_blocksize(sb, BBSIZE); 1679 sb_min_blocksize(sb, BBSIZE);
1680 sb->s_xattr = xfs_xattr_handlers;
1330 sb->s_export_op = &xfs_export_operations; 1681 sb->s_export_op = &xfs_export_operations;
1331 sb->s_qcop = &xfs_quotactl_operations; 1682 sb->s_qcop = &xfs_quotactl_operations;
1332 sb->s_op = &xfs_super_operations; 1683 sb->s_op = &xfs_super_operations;
1333 1684
1334 error = xfs_mount(mp, args, NULL); 1685 error = xfs_dmops_get(mp, args);
1686 if (error)
1687 goto out_free_mp;
1688 error = xfs_qmops_get(mp, args);
1689 if (error)
1690 goto out_put_dmops;
1691
1692 if (args->flags & XFSMNT_QUIET)
1693 flags |= XFS_MFSI_QUIET;
1694
1695 error = xfs_open_devices(mp, args);
1696 if (error)
1697 goto out_put_qmops;
1698
1699 if (xfs_icsb_init_counters(mp))
1700 mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB;
1701
1702 /*
1703 * Setup flags based on mount(2) options and then the superblock
1704 */
1705 error = xfs_start_flags(args, mp);
1706 if (error)
1707 goto out_free_fsname;
1708 error = xfs_readsb(mp, flags);
1709 if (error)
1710 goto out_free_fsname;
1711 error = xfs_finish_flags(args, mp);
1335 if (error) 1712 if (error)
1336 goto fail_vfsop; 1713 goto out_free_sb;
1714
1715 error = xfs_setup_devices(mp);
1716 if (error)
1717 goto out_free_sb;
1718
1719 if (mp->m_flags & XFS_MOUNT_BARRIER)
1720 xfs_mountfs_check_barriers(mp);
1721
1722 error = xfs_filestream_mount(mp);
1723 if (error)
1724 goto out_free_sb;
1725
1726 error = xfs_mountfs(mp);
1727 if (error)
1728 goto out_filestream_unmount;
1729
1730 XFS_SEND_MOUNT(mp, DM_RIGHT_NULL, args->mtpt, args->fsname);
1337 1731
1338 sb->s_dirt = 1; 1732 sb->s_dirt = 1;
1339 sb->s_magic = XFS_SB_MAGIC; 1733 sb->s_magic = XFS_SB_MAGIC;
@@ -1343,7 +1737,7 @@ xfs_fs_fill_super(
1343 sb->s_time_gran = 1; 1737 sb->s_time_gran = 1;
1344 set_posix_acl_flag(sb); 1738 set_posix_acl_flag(sb);
1345 1739
1346 root = igrab(mp->m_rootip->i_vnode); 1740 root = igrab(VFS_I(mp->m_rootip));
1347 if (!root) { 1741 if (!root) {
1348 error = ENOENT; 1742 error = ENOENT;
1349 goto fail_unmount; 1743 goto fail_unmount;
@@ -1368,10 +1762,28 @@ xfs_fs_fill_super(
1368 1762
1369 xfs_itrace_exit(XFS_I(sb->s_root->d_inode)); 1763 xfs_itrace_exit(XFS_I(sb->s_root->d_inode));
1370 1764
1371 kmem_free(args, sizeof(*args)); 1765 kfree(args);
1372 return 0; 1766 return 0;
1373 1767
1374fail_vnrele: 1768 out_filestream_unmount:
1769 xfs_filestream_unmount(mp);
1770 out_free_sb:
1771 xfs_freesb(mp);
1772 out_free_fsname:
1773 xfs_free_fsname(mp);
1774 xfs_icsb_destroy_counters(mp);
1775 xfs_close_devices(mp);
1776 out_put_qmops:
1777 xfs_qmops_put(mp);
1778 out_put_dmops:
1779 xfs_dmops_put(mp);
1780 out_free_mp:
1781 kfree(mp);
1782 out_free_args:
1783 kfree(args);
1784 return -error;
1785
1786 fail_vnrele:
1375 if (sb->s_root) { 1787 if (sb->s_root) {
1376 dput(sb->s_root); 1788 dput(sb->s_root);
1377 sb->s_root = NULL; 1789 sb->s_root = NULL;
@@ -1379,12 +1791,20 @@ fail_vnrele:
1379 iput(root); 1791 iput(root);
1380 } 1792 }
1381 1793
1382fail_unmount: 1794 fail_unmount:
1383 xfs_unmount(mp, 0, NULL); 1795 /*
1796 * Blow away any referenced inode in the filestreams cache.
1797 * This can and will cause log traffic as inodes go inactive
1798 * here.
1799 */
1800 xfs_filestream_unmount(mp);
1801
1802 XFS_bflush(mp->m_ddev_targp);
1803 error = xfs_unmount_flush(mp, 0);
1804 WARN_ON(error);
1384 1805
1385fail_vfsop: 1806 xfs_unmountfs(mp);
1386 kmem_free(args, sizeof(*args)); 1807 goto out_free_sb;
1387 return -error;
1388} 1808}
1389 1809
1390STATIC int 1810STATIC int
@@ -1429,9 +1849,235 @@ static struct file_system_type xfs_fs_type = {
1429 .fs_flags = FS_REQUIRES_DEV, 1849 .fs_flags = FS_REQUIRES_DEV,
1430}; 1850};
1431 1851
1852STATIC int __init
1853xfs_alloc_trace_bufs(void)
1854{
1855#ifdef XFS_ALLOC_TRACE
1856 xfs_alloc_trace_buf = ktrace_alloc(XFS_ALLOC_TRACE_SIZE, KM_MAYFAIL);
1857 if (!xfs_alloc_trace_buf)
1858 goto out;
1859#endif
1860#ifdef XFS_BMAP_TRACE
1861 xfs_bmap_trace_buf = ktrace_alloc(XFS_BMAP_TRACE_SIZE, KM_MAYFAIL);
1862 if (!xfs_bmap_trace_buf)
1863 goto out_free_alloc_trace;
1864#endif
1865#ifdef XFS_BMBT_TRACE
1866 xfs_bmbt_trace_buf = ktrace_alloc(XFS_BMBT_TRACE_SIZE, KM_MAYFAIL);
1867 if (!xfs_bmbt_trace_buf)
1868 goto out_free_bmap_trace;
1869#endif
1870#ifdef XFS_ATTR_TRACE
1871 xfs_attr_trace_buf = ktrace_alloc(XFS_ATTR_TRACE_SIZE, KM_MAYFAIL);
1872 if (!xfs_attr_trace_buf)
1873 goto out_free_bmbt_trace;
1874#endif
1875#ifdef XFS_DIR2_TRACE
1876 xfs_dir2_trace_buf = ktrace_alloc(XFS_DIR2_GTRACE_SIZE, KM_MAYFAIL);
1877 if (!xfs_dir2_trace_buf)
1878 goto out_free_attr_trace;
1879#endif
1880
1881 return 0;
1882
1883#ifdef XFS_DIR2_TRACE
1884 out_free_attr_trace:
1885#endif
1886#ifdef XFS_ATTR_TRACE
1887 ktrace_free(xfs_attr_trace_buf);
1888 out_free_bmbt_trace:
1889#endif
1890#ifdef XFS_BMBT_TRACE
1891 ktrace_free(xfs_bmbt_trace_buf);
1892 out_free_bmap_trace:
1893#endif
1894#ifdef XFS_BMAP_TRACE
1895 ktrace_free(xfs_bmap_trace_buf);
1896 out_free_alloc_trace:
1897#endif
1898#ifdef XFS_ALLOC_TRACE
1899 ktrace_free(xfs_alloc_trace_buf);
1900 out:
1901#endif
1902 return -ENOMEM;
1903}
1904
1905STATIC void
1906xfs_free_trace_bufs(void)
1907{
1908#ifdef XFS_DIR2_TRACE
1909 ktrace_free(xfs_dir2_trace_buf);
1910#endif
1911#ifdef XFS_ATTR_TRACE
1912 ktrace_free(xfs_attr_trace_buf);
1913#endif
1914#ifdef XFS_BMBT_TRACE
1915 ktrace_free(xfs_bmbt_trace_buf);
1916#endif
1917#ifdef XFS_BMAP_TRACE
1918 ktrace_free(xfs_bmap_trace_buf);
1919#endif
1920#ifdef XFS_ALLOC_TRACE
1921 ktrace_free(xfs_alloc_trace_buf);
1922#endif
1923}
1924
1925STATIC int __init
1926xfs_init_zones(void)
1927{
1928 xfs_vnode_zone = kmem_zone_init_flags(sizeof(struct inode), "xfs_vnode",
1929 KM_ZONE_HWALIGN | KM_ZONE_RECLAIM |
1930 KM_ZONE_SPREAD,
1931 xfs_fs_inode_init_once);
1932 if (!xfs_vnode_zone)
1933 goto out;
1934
1935 xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend");
1936 if (!xfs_ioend_zone)
1937 goto out_destroy_vnode_zone;
1938
1939 xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE,
1940 xfs_ioend_zone);
1941 if (!xfs_ioend_pool)
1942 goto out_destroy_ioend_zone;
1943
1944 xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t),
1945 "xfs_log_ticket");
1946 if (!xfs_log_ticket_zone)
1947 goto out_destroy_ioend_pool;
1948
1949 xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t),
1950 "xfs_bmap_free_item");
1951 if (!xfs_bmap_free_item_zone)
1952 goto out_destroy_log_ticket_zone;
1953 xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t),
1954 "xfs_btree_cur");
1955 if (!xfs_btree_cur_zone)
1956 goto out_destroy_bmap_free_item_zone;
1957
1958 xfs_da_state_zone = kmem_zone_init(sizeof(xfs_da_state_t),
1959 "xfs_da_state");
1960 if (!xfs_da_state_zone)
1961 goto out_destroy_btree_cur_zone;
1962
1963 xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf");
1964 if (!xfs_dabuf_zone)
1965 goto out_destroy_da_state_zone;
1966
1967 xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork");
1968 if (!xfs_ifork_zone)
1969 goto out_destroy_dabuf_zone;
1970
1971 xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans");
1972 if (!xfs_trans_zone)
1973 goto out_destroy_ifork_zone;
1974
1975 /*
1976 * The size of the zone allocated buf log item is the maximum
1977 * size possible under XFS. This wastes a little bit of memory,
1978 * but it is much faster.
1979 */
1980 xfs_buf_item_zone = kmem_zone_init((sizeof(xfs_buf_log_item_t) +
1981 (((XFS_MAX_BLOCKSIZE / XFS_BLI_CHUNK) /
1982 NBWORD) * sizeof(int))), "xfs_buf_item");
1983 if (!xfs_buf_item_zone)
1984 goto out_destroy_trans_zone;
1985
1986 xfs_efd_zone = kmem_zone_init((sizeof(xfs_efd_log_item_t) +
1987 ((XFS_EFD_MAX_FAST_EXTENTS - 1) *
1988 sizeof(xfs_extent_t))), "xfs_efd_item");
1989 if (!xfs_efd_zone)
1990 goto out_destroy_buf_item_zone;
1991
1992 xfs_efi_zone = kmem_zone_init((sizeof(xfs_efi_log_item_t) +
1993 ((XFS_EFI_MAX_FAST_EXTENTS - 1) *
1994 sizeof(xfs_extent_t))), "xfs_efi_item");
1995 if (!xfs_efi_zone)
1996 goto out_destroy_efd_zone;
1997
1998 xfs_inode_zone =
1999 kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode",
2000 KM_ZONE_HWALIGN | KM_ZONE_RECLAIM |
2001 KM_ZONE_SPREAD, NULL);
2002 if (!xfs_inode_zone)
2003 goto out_destroy_efi_zone;
2004
2005 xfs_ili_zone =
2006 kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili",
2007 KM_ZONE_SPREAD, NULL);
2008 if (!xfs_ili_zone)
2009 goto out_destroy_inode_zone;
2010
2011#ifdef CONFIG_XFS_POSIX_ACL
2012 xfs_acl_zone = kmem_zone_init(sizeof(xfs_acl_t), "xfs_acl");
2013 if (!xfs_acl_zone)
2014 goto out_destroy_ili_zone;
2015#endif
2016
2017 return 0;
2018
2019#ifdef CONFIG_XFS_POSIX_ACL
2020 out_destroy_ili_zone:
2021#endif
2022 kmem_zone_destroy(xfs_ili_zone);
2023 out_destroy_inode_zone:
2024 kmem_zone_destroy(xfs_inode_zone);
2025 out_destroy_efi_zone:
2026 kmem_zone_destroy(xfs_efi_zone);
2027 out_destroy_efd_zone:
2028 kmem_zone_destroy(xfs_efd_zone);
2029 out_destroy_buf_item_zone:
2030 kmem_zone_destroy(xfs_buf_item_zone);
2031 out_destroy_trans_zone:
2032 kmem_zone_destroy(xfs_trans_zone);
2033 out_destroy_ifork_zone:
2034 kmem_zone_destroy(xfs_ifork_zone);
2035 out_destroy_dabuf_zone:
2036 kmem_zone_destroy(xfs_dabuf_zone);
2037 out_destroy_da_state_zone:
2038 kmem_zone_destroy(xfs_da_state_zone);
2039 out_destroy_btree_cur_zone:
2040 kmem_zone_destroy(xfs_btree_cur_zone);
2041 out_destroy_bmap_free_item_zone:
2042 kmem_zone_destroy(xfs_bmap_free_item_zone);
2043 out_destroy_log_ticket_zone:
2044 kmem_zone_destroy(xfs_log_ticket_zone);
2045 out_destroy_ioend_pool:
2046 mempool_destroy(xfs_ioend_pool);
2047 out_destroy_ioend_zone:
2048 kmem_zone_destroy(xfs_ioend_zone);
2049 out_destroy_vnode_zone:
2050 kmem_zone_destroy(xfs_vnode_zone);
2051 out:
2052 return -ENOMEM;
2053}
2054
2055STATIC void
2056xfs_destroy_zones(void)
2057{
2058#ifdef CONFIG_XFS_POSIX_ACL
2059 kmem_zone_destroy(xfs_acl_zone);
2060#endif
2061 kmem_zone_destroy(xfs_ili_zone);
2062 kmem_zone_destroy(xfs_inode_zone);
2063 kmem_zone_destroy(xfs_efi_zone);
2064 kmem_zone_destroy(xfs_efd_zone);
2065 kmem_zone_destroy(xfs_buf_item_zone);
2066 kmem_zone_destroy(xfs_trans_zone);
2067 kmem_zone_destroy(xfs_ifork_zone);
2068 kmem_zone_destroy(xfs_dabuf_zone);
2069 kmem_zone_destroy(xfs_da_state_zone);
2070 kmem_zone_destroy(xfs_btree_cur_zone);
2071 kmem_zone_destroy(xfs_bmap_free_item_zone);
2072 kmem_zone_destroy(xfs_log_ticket_zone);
2073 mempool_destroy(xfs_ioend_pool);
2074 kmem_zone_destroy(xfs_ioend_zone);
2075 kmem_zone_destroy(xfs_vnode_zone);
2076
2077}
1432 2078
1433STATIC int __init 2079STATIC int __init
1434init_xfs_fs( void ) 2080init_xfs_fs(void)
1435{ 2081{
1436 int error; 2082 int error;
1437 static char message[] __initdata = KERN_INFO \ 2083 static char message[] __initdata = KERN_INFO \
@@ -1440,42 +2086,73 @@ init_xfs_fs( void )
1440 printk(message); 2086 printk(message);
1441 2087
1442 ktrace_init(64); 2088 ktrace_init(64);
2089 vn_init();
2090 xfs_dir_startup();
1443 2091
1444 error = xfs_init_zones(); 2092 error = xfs_init_zones();
1445 if (error < 0) 2093 if (error)
1446 goto undo_zones; 2094 goto out;
2095
2096 error = xfs_alloc_trace_bufs();
2097 if (error)
2098 goto out_destroy_zones;
2099
2100 error = xfs_mru_cache_init();
2101 if (error)
2102 goto out_free_trace_buffers;
2103
2104 error = xfs_filestream_init();
2105 if (error)
2106 goto out_mru_cache_uninit;
1447 2107
1448 error = xfs_buf_init(); 2108 error = xfs_buf_init();
1449 if (error < 0) 2109 if (error)
1450 goto undo_buffers; 2110 goto out_filestream_uninit;
2111
2112 error = xfs_init_procfs();
2113 if (error)
2114 goto out_buf_terminate;
2115
2116 error = xfs_sysctl_register();
2117 if (error)
2118 goto out_cleanup_procfs;
1451 2119
1452 vn_init();
1453 xfs_init();
1454 uuid_init();
1455 vfs_initquota(); 2120 vfs_initquota();
1456 2121
1457 error = register_filesystem(&xfs_fs_type); 2122 error = register_filesystem(&xfs_fs_type);
1458 if (error) 2123 if (error)
1459 goto undo_register; 2124 goto out_sysctl_unregister;
1460 return 0; 2125 return 0;
1461 2126
1462undo_register: 2127 out_sysctl_unregister:
2128 xfs_sysctl_unregister();
2129 out_cleanup_procfs:
2130 xfs_cleanup_procfs();
2131 out_buf_terminate:
1463 xfs_buf_terminate(); 2132 xfs_buf_terminate();
1464 2133 out_filestream_uninit:
1465undo_buffers: 2134 xfs_filestream_uninit();
2135 out_mru_cache_uninit:
2136 xfs_mru_cache_uninit();
2137 out_free_trace_buffers:
2138 xfs_free_trace_bufs();
2139 out_destroy_zones:
1466 xfs_destroy_zones(); 2140 xfs_destroy_zones();
1467 2141 out:
1468undo_zones:
1469 return error; 2142 return error;
1470} 2143}
1471 2144
1472STATIC void __exit 2145STATIC void __exit
1473exit_xfs_fs( void ) 2146exit_xfs_fs(void)
1474{ 2147{
1475 vfs_exitquota(); 2148 vfs_exitquota();
1476 unregister_filesystem(&xfs_fs_type); 2149 unregister_filesystem(&xfs_fs_type);
1477 xfs_cleanup(); 2150 xfs_sysctl_unregister();
2151 xfs_cleanup_procfs();
1478 xfs_buf_terminate(); 2152 xfs_buf_terminate();
2153 xfs_filestream_uninit();
2154 xfs_mru_cache_uninit();
2155 xfs_free_trace_bufs();
1479 xfs_destroy_zones(); 2156 xfs_destroy_zones();
1480 ktrace_uninit(); 2157 ktrace_uninit();
1481} 2158}
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h
index 3efb7c6d3303..fe2ef4e6a0f9 100644
--- a/fs/xfs/linux-2.6/xfs_super.h
+++ b/fs/xfs/linux-2.6/xfs_super.h
@@ -101,18 +101,13 @@ struct block_device;
101 101
102extern __uint64_t xfs_max_file_offset(unsigned int); 102extern __uint64_t xfs_max_file_offset(unsigned int);
103 103
104extern void xfs_initialize_vnode(struct xfs_mount *mp, bhv_vnode_t *vp,
105 struct xfs_inode *ip);
106
107extern void xfs_flush_inode(struct xfs_inode *); 104extern void xfs_flush_inode(struct xfs_inode *);
108extern void xfs_flush_device(struct xfs_inode *); 105extern void xfs_flush_device(struct xfs_inode *);
109 106
110extern int xfs_blkdev_get(struct xfs_mount *, const char *,
111 struct block_device **);
112extern void xfs_blkdev_put(struct block_device *);
113extern void xfs_blkdev_issue_flush(struct xfs_buftarg *); 107extern void xfs_blkdev_issue_flush(struct xfs_buftarg *);
114 108
115extern const struct export_operations xfs_export_operations; 109extern const struct export_operations xfs_export_operations;
110extern struct xattr_handler *xfs_xattr_handlers[];
116 111
117#define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info)) 112#define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info))
118 113
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c
index bb997d75c05c..7dacb5bbde3f 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.c
+++ b/fs/xfs/linux-2.6/xfs_sysctl.c
@@ -259,15 +259,17 @@ static ctl_table xfs_root_table[] = {
259 {} 259 {}
260}; 260};
261 261
262void 262int
263xfs_sysctl_register(void) 263xfs_sysctl_register(void)
264{ 264{
265 xfs_table_header = register_sysctl_table(xfs_root_table); 265 xfs_table_header = register_sysctl_table(xfs_root_table);
266 if (!xfs_table_header)
267 return -ENOMEM;
268 return 0;
266} 269}
267 270
268void 271void
269xfs_sysctl_unregister(void) 272xfs_sysctl_unregister(void)
270{ 273{
271 if (xfs_table_header) 274 unregister_sysctl_table(xfs_table_header);
272 unregister_sysctl_table(xfs_table_header);
273} 275}
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.h b/fs/xfs/linux-2.6/xfs_sysctl.h
index 98b97e399d6f..4aadb8056c37 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.h
+++ b/fs/xfs/linux-2.6/xfs_sysctl.h
@@ -93,10 +93,10 @@ enum {
93extern xfs_param_t xfs_params; 93extern xfs_param_t xfs_params;
94 94
95#ifdef CONFIG_SYSCTL 95#ifdef CONFIG_SYSCTL
96extern void xfs_sysctl_register(void); 96extern int xfs_sysctl_register(void);
97extern void xfs_sysctl_unregister(void); 97extern void xfs_sysctl_unregister(void);
98#else 98#else
99# define xfs_sysctl_register() do { } while (0) 99# define xfs_sysctl_register() (0)
100# define xfs_sysctl_unregister() do { } while (0) 100# define xfs_sysctl_unregister() do { } while (0)
101#endif /* CONFIG_SYSCTL */ 101#endif /* CONFIG_SYSCTL */
102 102
diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c
index bc7afe007338..b52528bbbfff 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.c
+++ b/fs/xfs/linux-2.6/xfs_vnode.c
@@ -33,7 +33,7 @@
33 33
34 34
35/* 35/*
36 * Dedicated vnode inactive/reclaim sync semaphores. 36 * Dedicated vnode inactive/reclaim sync wait queues.
37 * Prime number of hash buckets since address is used as the key. 37 * Prime number of hash buckets since address is used as the key.
38 */ 38 */
39#define NVSYNC 37 39#define NVSYNC 37
@@ -82,74 +82,6 @@ vn_ioerror(
82 xfs_do_force_shutdown(ip->i_mount, SHUTDOWN_DEVICE_REQ, f, l); 82 xfs_do_force_shutdown(ip->i_mount, SHUTDOWN_DEVICE_REQ, f, l);
83} 83}
84 84
85/*
86 * Revalidate the Linux inode from the XFS inode.
87 * Note: i_size _not_ updated; we must hold the inode
88 * semaphore when doing that - callers responsibility.
89 */
90int
91vn_revalidate(
92 bhv_vnode_t *vp)
93{
94 struct inode *inode = vn_to_inode(vp);
95 struct xfs_inode *ip = XFS_I(inode);
96 struct xfs_mount *mp = ip->i_mount;
97 unsigned long xflags;
98
99 xfs_itrace_entry(ip);
100
101 if (XFS_FORCED_SHUTDOWN(mp))
102 return -EIO;
103
104 xfs_ilock(ip, XFS_ILOCK_SHARED);
105 inode->i_mode = ip->i_d.di_mode;
106 inode->i_uid = ip->i_d.di_uid;
107 inode->i_gid = ip->i_d.di_gid;
108 inode->i_mtime.tv_sec = ip->i_d.di_mtime.t_sec;
109 inode->i_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec;
110 inode->i_ctime.tv_sec = ip->i_d.di_ctime.t_sec;
111 inode->i_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec;
112
113 xflags = xfs_ip2xflags(ip);
114 if (xflags & XFS_XFLAG_IMMUTABLE)
115 inode->i_flags |= S_IMMUTABLE;
116 else
117 inode->i_flags &= ~S_IMMUTABLE;
118 if (xflags & XFS_XFLAG_APPEND)
119 inode->i_flags |= S_APPEND;
120 else
121 inode->i_flags &= ~S_APPEND;
122 if (xflags & XFS_XFLAG_SYNC)
123 inode->i_flags |= S_SYNC;
124 else
125 inode->i_flags &= ~S_SYNC;
126 if (xflags & XFS_XFLAG_NOATIME)
127 inode->i_flags |= S_NOATIME;
128 else
129 inode->i_flags &= ~S_NOATIME;
130 xfs_iunlock(ip, XFS_ILOCK_SHARED);
131
132 xfs_iflags_clear(ip, XFS_IMODIFIED);
133 return 0;
134}
135
136/*
137 * Add a reference to a referenced vnode.
138 */
139bhv_vnode_t *
140vn_hold(
141 bhv_vnode_t *vp)
142{
143 struct inode *inode;
144
145 XFS_STATS_INC(vn_hold);
146
147 inode = igrab(vn_to_inode(vp));
148 ASSERT(inode);
149
150 return vp;
151}
152
153#ifdef XFS_INODE_TRACE 85#ifdef XFS_INODE_TRACE
154 86
155/* 87/*
@@ -158,7 +90,7 @@ vn_hold(
158 */ 90 */
159static inline int xfs_icount(struct xfs_inode *ip) 91static inline int xfs_icount(struct xfs_inode *ip)
160{ 92{
161 bhv_vnode_t *vp = XFS_ITOV_NULL(ip); 93 struct inode *vp = VFS_I(ip);
162 94
163 if (vp) 95 if (vp)
164 return vn_count(vp); 96 return vn_count(vp);
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index 25eb2a9e8d9b..683ce16210ff 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -19,24 +19,9 @@
19#define __XFS_VNODE_H__ 19#define __XFS_VNODE_H__
20 20
21struct file; 21struct file;
22struct bhv_vattr;
23struct xfs_iomap; 22struct xfs_iomap;
24struct attrlist_cursor_kern; 23struct attrlist_cursor_kern;
25 24
26typedef struct inode bhv_vnode_t;
27
28/*
29 * Vnode to Linux inode mapping.
30 */
31static inline bhv_vnode_t *vn_from_inode(struct inode *inode)
32{
33 return inode;
34}
35static inline struct inode *vn_to_inode(bhv_vnode_t *vnode)
36{
37 return vnode;
38}
39
40/* 25/*
41 * Return values for xfs_inactive. A return value of 26 * Return values for xfs_inactive. A return value of
42 * VN_INACTIVE_NOCACHE implies that the file system behavior 27 * VN_INACTIVE_NOCACHE implies that the file system behavior
@@ -66,87 +51,8 @@ static inline struct inode *vn_to_inode(bhv_vnode_t *vnode)
66 Prevent VM access to the pages until 51 Prevent VM access to the pages until
67 the operation completes. */ 52 the operation completes. */
68 53
69/*
70 * Vnode attributes. va_mask indicates those attributes the caller
71 * wants to set or extract.
72 */
73typedef struct bhv_vattr {
74 int va_mask; /* bit-mask of attributes present */
75 mode_t va_mode; /* file access mode and type */
76 xfs_nlink_t va_nlink; /* number of references to file */
77 uid_t va_uid; /* owner user id */
78 gid_t va_gid; /* owner group id */
79 xfs_ino_t va_nodeid; /* file id */
80 xfs_off_t va_size; /* file size in bytes */
81 u_long va_blocksize; /* blocksize preferred for i/o */
82 struct timespec va_atime; /* time of last access */
83 struct timespec va_mtime; /* time of last modification */
84 struct timespec va_ctime; /* time file changed */
85 u_int va_gen; /* generation number of file */
86 xfs_dev_t va_rdev; /* device the special file represents */
87 __int64_t va_nblocks; /* number of blocks allocated */
88 u_long va_xflags; /* random extended file flags */
89 u_long va_extsize; /* file extent size */
90 u_long va_nextents; /* number of extents in file */
91 u_long va_anextents; /* number of attr extents in file */
92 prid_t va_projid; /* project id */
93} bhv_vattr_t;
94
95/*
96 * setattr or getattr attributes
97 */
98#define XFS_AT_TYPE 0x00000001
99#define XFS_AT_MODE 0x00000002
100#define XFS_AT_UID 0x00000004
101#define XFS_AT_GID 0x00000008
102#define XFS_AT_FSID 0x00000010
103#define XFS_AT_NODEID 0x00000020
104#define XFS_AT_NLINK 0x00000040
105#define XFS_AT_SIZE 0x00000080
106#define XFS_AT_ATIME 0x00000100
107#define XFS_AT_MTIME 0x00000200
108#define XFS_AT_CTIME 0x00000400
109#define XFS_AT_RDEV 0x00000800
110#define XFS_AT_BLKSIZE 0x00001000
111#define XFS_AT_NBLOCKS 0x00002000
112#define XFS_AT_VCODE 0x00004000
113#define XFS_AT_MAC 0x00008000
114#define XFS_AT_UPDATIME 0x00010000
115#define XFS_AT_UPDMTIME 0x00020000
116#define XFS_AT_UPDCTIME 0x00040000
117#define XFS_AT_ACL 0x00080000
118#define XFS_AT_CAP 0x00100000
119#define XFS_AT_INF 0x00200000
120#define XFS_AT_XFLAGS 0x00400000
121#define XFS_AT_EXTSIZE 0x00800000
122#define XFS_AT_NEXTENTS 0x01000000
123#define XFS_AT_ANEXTENTS 0x02000000
124#define XFS_AT_PROJID 0x04000000
125#define XFS_AT_SIZE_NOPERM 0x08000000
126#define XFS_AT_GENCOUNT 0x10000000
127
128#define XFS_AT_ALL (XFS_AT_TYPE|XFS_AT_MODE|XFS_AT_UID|XFS_AT_GID|\
129 XFS_AT_FSID|XFS_AT_NODEID|XFS_AT_NLINK|XFS_AT_SIZE|\
130 XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME|XFS_AT_RDEV|\
131 XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_VCODE|XFS_AT_MAC|\
132 XFS_AT_ACL|XFS_AT_CAP|XFS_AT_INF|XFS_AT_XFLAGS|XFS_AT_EXTSIZE|\
133 XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_PROJID|XFS_AT_GENCOUNT)
134
135#define XFS_AT_STAT (XFS_AT_TYPE|XFS_AT_MODE|XFS_AT_UID|XFS_AT_GID|\
136 XFS_AT_FSID|XFS_AT_NODEID|XFS_AT_NLINK|XFS_AT_SIZE|\
137 XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME|XFS_AT_RDEV|\
138 XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_PROJID)
139
140#define XFS_AT_TIMES (XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME)
141
142#define XFS_AT_UPDTIMES (XFS_AT_UPDATIME|XFS_AT_UPDMTIME|XFS_AT_UPDCTIME)
143
144#define XFS_AT_NOSET (XFS_AT_NLINK|XFS_AT_RDEV|XFS_AT_FSID|XFS_AT_NODEID|\
145 XFS_AT_TYPE|XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_VCODE|\
146 XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_GENCOUNT)
147 54
148extern void vn_init(void); 55extern void vn_init(void);
149extern int vn_revalidate(bhv_vnode_t *);
150 56
151/* 57/*
152 * Yeah, these don't take vnode anymore at all, all this should be 58 * Yeah, these don't take vnode anymore at all, all this should be
@@ -156,57 +62,52 @@ extern void vn_iowait(struct xfs_inode *ip);
156extern void vn_iowake(struct xfs_inode *ip); 62extern void vn_iowake(struct xfs_inode *ip);
157extern void vn_ioerror(struct xfs_inode *ip, int error, char *f, int l); 63extern void vn_ioerror(struct xfs_inode *ip, int error, char *f, int l);
158 64
159static inline int vn_count(bhv_vnode_t *vp) 65static inline int vn_count(struct inode *vp)
160{ 66{
161 return atomic_read(&vn_to_inode(vp)->i_count); 67 return atomic_read(&vp->i_count);
162} 68}
163 69
164/* 70#define IHOLD(ip) \
165 * Vnode reference counting functions (and macros for compatibility). 71do { \
166 */ 72 ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \
167extern bhv_vnode_t *vn_hold(bhv_vnode_t *); 73 atomic_inc(&(VFS_I(ip)->i_count)); \
74 xfs_itrace_hold((ip), __FILE__, __LINE__, (inst_t *)__return_address); \
75} while (0)
168 76
169#if defined(XFS_INODE_TRACE) 77#define IRELE(ip) \
170#define VN_HOLD(vp) \ 78do { \
171 ((void)vn_hold(vp), \ 79 xfs_itrace_rele((ip), __FILE__, __LINE__, (inst_t *)__return_address); \
172 xfs_itrace_hold(xfs_vtoi(vp), __FILE__, __LINE__, (inst_t *)__return_address)) 80 iput(VFS_I(ip)); \
173#define VN_RELE(vp) \ 81} while (0)
174 (xfs_itrace_rele(xfs_vtoi(vp), __FILE__, __LINE__, (inst_t *)__return_address), \
175 iput(vn_to_inode(vp)))
176#else
177#define VN_HOLD(vp) ((void)vn_hold(vp))
178#define VN_RELE(vp) (iput(vn_to_inode(vp)))
179#endif
180 82
181static inline bhv_vnode_t *vn_grab(bhv_vnode_t *vp) 83static inline struct inode *vn_grab(struct inode *vp)
182{ 84{
183 struct inode *inode = igrab(vn_to_inode(vp)); 85 return igrab(vp);
184 return inode ? vn_from_inode(inode) : NULL;
185} 86}
186 87
187/* 88/*
188 * Dealing with bad inodes 89 * Dealing with bad inodes
189 */ 90 */
190static inline int VN_BAD(bhv_vnode_t *vp) 91static inline int VN_BAD(struct inode *vp)
191{ 92{
192 return is_bad_inode(vn_to_inode(vp)); 93 return is_bad_inode(vp);
193} 94}
194 95
195/* 96/*
196 * Extracting atime values in various formats 97 * Extracting atime values in various formats
197 */ 98 */
198static inline void vn_atime_to_bstime(bhv_vnode_t *vp, xfs_bstime_t *bs_atime) 99static inline void vn_atime_to_bstime(struct inode *vp, xfs_bstime_t *bs_atime)
199{ 100{
200 bs_atime->tv_sec = vp->i_atime.tv_sec; 101 bs_atime->tv_sec = vp->i_atime.tv_sec;
201 bs_atime->tv_nsec = vp->i_atime.tv_nsec; 102 bs_atime->tv_nsec = vp->i_atime.tv_nsec;
202} 103}
203 104
204static inline void vn_atime_to_timespec(bhv_vnode_t *vp, struct timespec *ts) 105static inline void vn_atime_to_timespec(struct inode *vp, struct timespec *ts)
205{ 106{
206 *ts = vp->i_atime; 107 *ts = vp->i_atime;
207} 108}
208 109
209static inline void vn_atime_to_time_t(bhv_vnode_t *vp, time_t *tt) 110static inline void vn_atime_to_time_t(struct inode *vp, time_t *tt)
210{ 111{
211 *tt = vp->i_atime.tv_sec; 112 *tt = vp->i_atime.tv_sec;
212} 113}
@@ -214,20 +115,11 @@ static inline void vn_atime_to_time_t(bhv_vnode_t *vp, time_t *tt)
214/* 115/*
215 * Some useful predicates. 116 * Some useful predicates.
216 */ 117 */
217#define VN_MAPPED(vp) mapping_mapped(vn_to_inode(vp)->i_mapping) 118#define VN_MAPPED(vp) mapping_mapped(vp->i_mapping)
218#define VN_CACHED(vp) (vn_to_inode(vp)->i_mapping->nrpages) 119#define VN_CACHED(vp) (vp->i_mapping->nrpages)
219#define VN_DIRTY(vp) mapping_tagged(vn_to_inode(vp)->i_mapping, \ 120#define VN_DIRTY(vp) mapping_tagged(vp->i_mapping, \
220 PAGECACHE_TAG_DIRTY) 121 PAGECACHE_TAG_DIRTY)
221 122
222/*
223 * Flags to vop_setattr/getattr.
224 */
225#define ATTR_UTIME 0x01 /* non-default utime(2) request */
226#define ATTR_DMI 0x08 /* invocation from a DMI function */
227#define ATTR_LAZY 0x80 /* set/get attributes lazily */
228#define ATTR_NONBLOCK 0x100 /* return EAGAIN if operation would block */
229#define ATTR_NOLOCK 0x200 /* Don't grab any conflicting locks */
230#define ATTR_NOSIZETOK 0x400 /* Don't get the SIZE token */
231 123
232/* 124/*
233 * Tracking vnode activity. 125 * Tracking vnode activity.
diff --git a/fs/xfs/linux-2.6/xfs_xattr.c b/fs/xfs/linux-2.6/xfs_xattr.c
new file mode 100644
index 000000000000..964621fde6ed
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_xattr.c
@@ -0,0 +1,330 @@
1/*
2 * Copyright (C) 2008 Christoph Hellwig.
3 * Portions Copyright (C) 2000-2008 Silicon Graphics, Inc.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19#include "xfs.h"
20#include "xfs_da_btree.h"
21#include "xfs_bmap_btree.h"
22#include "xfs_inode.h"
23#include "xfs_attr.h"
24#include "xfs_attr_leaf.h"
25#include "xfs_acl.h"
26#include "xfs_vnodeops.h"
27
28#include <linux/posix_acl_xattr.h>
29#include <linux/xattr.h>
30
31
32/*
33 * ACL handling. Should eventually be moved into xfs_acl.c
34 */
35
36static int
37xfs_decode_acl(const char *name)
38{
39 if (strcmp(name, "posix_acl_access") == 0)
40 return _ACL_TYPE_ACCESS;
41 else if (strcmp(name, "posix_acl_default") == 0)
42 return _ACL_TYPE_DEFAULT;
43 return -EINVAL;
44}
45
46/*
47 * Get system extended attributes which at the moment only
48 * includes Posix ACLs.
49 */
50static int
51xfs_xattr_system_get(struct inode *inode, const char *name,
52 void *buffer, size_t size)
53{
54 int acl;
55
56 acl = xfs_decode_acl(name);
57 if (acl < 0)
58 return acl;
59
60 return xfs_acl_vget(inode, buffer, size, acl);
61}
62
63static int
64xfs_xattr_system_set(struct inode *inode, const char *name,
65 const void *value, size_t size, int flags)
66{
67 int acl;
68
69 acl = xfs_decode_acl(name);
70 if (acl < 0)
71 return acl;
72 if (flags & XATTR_CREATE)
73 return -EINVAL;
74
75 if (!value)
76 return xfs_acl_vremove(inode, acl);
77
78 return xfs_acl_vset(inode, (void *)value, size, acl);
79}
80
81static struct xattr_handler xfs_xattr_system_handler = {
82 .prefix = XATTR_SYSTEM_PREFIX,
83 .get = xfs_xattr_system_get,
84 .set = xfs_xattr_system_set,
85};
86
87
88/*
89 * Real xattr handling. The only difference between the namespaces is
90 * a flag passed to the low-level attr code.
91 */
92
93static int
94__xfs_xattr_get(struct inode *inode, const char *name,
95 void *value, size_t size, int xflags)
96{
97 struct xfs_inode *ip = XFS_I(inode);
98 int error, asize = size;
99
100 if (strcmp(name, "") == 0)
101 return -EINVAL;
102
103 /* Convert Linux syscall to XFS internal ATTR flags */
104 if (!size) {
105 xflags |= ATTR_KERNOVAL;
106 value = NULL;
107 }
108
109 error = -xfs_attr_get(ip, name, value, &asize, xflags);
110 if (error)
111 return error;
112 return asize;
113}
114
115static int
116__xfs_xattr_set(struct inode *inode, const char *name, const void *value,
117 size_t size, int flags, int xflags)
118{
119 struct xfs_inode *ip = XFS_I(inode);
120
121 if (strcmp(name, "") == 0)
122 return -EINVAL;
123
124 /* Convert Linux syscall to XFS internal ATTR flags */
125 if (flags & XATTR_CREATE)
126 xflags |= ATTR_CREATE;
127 if (flags & XATTR_REPLACE)
128 xflags |= ATTR_REPLACE;
129
130 if (!value)
131 return -xfs_attr_remove(ip, name, xflags);
132 return -xfs_attr_set(ip, name, (void *)value, size, xflags);
133}
134
135static int
136xfs_xattr_user_get(struct inode *inode, const char *name,
137 void *value, size_t size)
138{
139 return __xfs_xattr_get(inode, name, value, size, 0);
140}
141
142static int
143xfs_xattr_user_set(struct inode *inode, const char *name,
144 const void *value, size_t size, int flags)
145{
146 return __xfs_xattr_set(inode, name, value, size, flags, 0);
147}
148
149static struct xattr_handler xfs_xattr_user_handler = {
150 .prefix = XATTR_USER_PREFIX,
151 .get = xfs_xattr_user_get,
152 .set = xfs_xattr_user_set,
153};
154
155
156static int
157xfs_xattr_trusted_get(struct inode *inode, const char *name,
158 void *value, size_t size)
159{
160 return __xfs_xattr_get(inode, name, value, size, ATTR_ROOT);
161}
162
163static int
164xfs_xattr_trusted_set(struct inode *inode, const char *name,
165 const void *value, size_t size, int flags)
166{
167 return __xfs_xattr_set(inode, name, value, size, flags, ATTR_ROOT);
168}
169
170static struct xattr_handler xfs_xattr_trusted_handler = {
171 .prefix = XATTR_TRUSTED_PREFIX,
172 .get = xfs_xattr_trusted_get,
173 .set = xfs_xattr_trusted_set,
174};
175
176
177static int
178xfs_xattr_secure_get(struct inode *inode, const char *name,
179 void *value, size_t size)
180{
181 return __xfs_xattr_get(inode, name, value, size, ATTR_SECURE);
182}
183
184static int
185xfs_xattr_secure_set(struct inode *inode, const char *name,
186 const void *value, size_t size, int flags)
187{
188 return __xfs_xattr_set(inode, name, value, size, flags, ATTR_SECURE);
189}
190
191static struct xattr_handler xfs_xattr_security_handler = {
192 .prefix = XATTR_SECURITY_PREFIX,
193 .get = xfs_xattr_secure_get,
194 .set = xfs_xattr_secure_set,
195};
196
197
198struct xattr_handler *xfs_xattr_handlers[] = {
199 &xfs_xattr_user_handler,
200 &xfs_xattr_trusted_handler,
201 &xfs_xattr_security_handler,
202 &xfs_xattr_system_handler,
203 NULL
204};
205
206static unsigned int xfs_xattr_prefix_len(int flags)
207{
208 if (flags & XFS_ATTR_SECURE)
209 return sizeof("security");
210 else if (flags & XFS_ATTR_ROOT)
211 return sizeof("trusted");
212 else
213 return sizeof("user");
214}
215
216static const char *xfs_xattr_prefix(int flags)
217{
218 if (flags & XFS_ATTR_SECURE)
219 return xfs_xattr_security_handler.prefix;
220 else if (flags & XFS_ATTR_ROOT)
221 return xfs_xattr_trusted_handler.prefix;
222 else
223 return xfs_xattr_user_handler.prefix;
224}
225
226static int
227xfs_xattr_put_listent(struct xfs_attr_list_context *context, int flags,
228 char *name, int namelen, int valuelen, char *value)
229{
230 unsigned int prefix_len = xfs_xattr_prefix_len(flags);
231 char *offset;
232 int arraytop;
233
234 ASSERT(context->count >= 0);
235
236 /*
237 * Only show root namespace entries if we are actually allowed to
238 * see them.
239 */
240 if ((flags & XFS_ATTR_ROOT) && !capable(CAP_SYS_ADMIN))
241 return 0;
242
243 arraytop = context->count + prefix_len + namelen + 1;
244 if (arraytop > context->firstu) {
245 context->count = -1; /* insufficient space */
246 return 1;
247 }
248 offset = (char *)context->alist + context->count;
249 strncpy(offset, xfs_xattr_prefix(flags), prefix_len);
250 offset += prefix_len;
251 strncpy(offset, name, namelen); /* real name */
252 offset += namelen;
253 *offset = '\0';
254 context->count += prefix_len + namelen + 1;
255 return 0;
256}
257
258static int
259xfs_xattr_put_listent_sizes(struct xfs_attr_list_context *context, int flags,
260 char *name, int namelen, int valuelen, char *value)
261{
262 context->count += xfs_xattr_prefix_len(flags) + namelen + 1;
263 return 0;
264}
265
266static int
267list_one_attr(const char *name, const size_t len, void *data,
268 size_t size, ssize_t *result)
269{
270 char *p = data + *result;
271
272 *result += len;
273 if (!size)
274 return 0;
275 if (*result > size)
276 return -ERANGE;
277
278 strcpy(p, name);
279 return 0;
280}
281
282ssize_t
283xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size)
284{
285 struct xfs_attr_list_context context;
286 struct attrlist_cursor_kern cursor = { 0 };
287 struct inode *inode = dentry->d_inode;
288 int error;
289
290 /*
291 * First read the regular on-disk attributes.
292 */
293 memset(&context, 0, sizeof(context));
294 context.dp = XFS_I(inode);
295 context.cursor = &cursor;
296 context.resynch = 1;
297 context.alist = data;
298 context.bufsize = size;
299 context.firstu = context.bufsize;
300
301 if (size)
302 context.put_listent = xfs_xattr_put_listent;
303 else
304 context.put_listent = xfs_xattr_put_listent_sizes;
305
306 xfs_attr_list_int(&context);
307 if (context.count < 0)
308 return -ERANGE;
309
310 /*
311 * Then add the two synthetic ACL attributes.
312 */
313 if (xfs_acl_vhasacl_access(inode)) {
314 error = list_one_attr(POSIX_ACL_XATTR_ACCESS,
315 strlen(POSIX_ACL_XATTR_ACCESS) + 1,
316 data, size, &context.count);
317 if (error)
318 return error;
319 }
320
321 if (xfs_acl_vhasacl_default(inode)) {
322 error = list_one_attr(POSIX_ACL_XATTR_DEFAULT,
323 strlen(POSIX_ACL_XATTR_DEFAULT) + 1,
324 data, size, &context.count);
325 if (error)
326 return error;
327 }
328
329 return context.count;
330}
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index 85df3288efd5..f2705f2fd43c 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -101,11 +101,18 @@ xfs_qm_dqinit(
101 if (brandnewdquot) { 101 if (brandnewdquot) {
102 dqp->dq_flnext = dqp->dq_flprev = dqp; 102 dqp->dq_flnext = dqp->dq_flprev = dqp;
103 mutex_init(&dqp->q_qlock); 103 mutex_init(&dqp->q_qlock);
104 initnsema(&dqp->q_flock, 1, "fdq");
105 sv_init(&dqp->q_pinwait, SV_DEFAULT, "pdq"); 104 sv_init(&dqp->q_pinwait, SV_DEFAULT, "pdq");
106 105
106 /*
107 * Because we want to use a counting completion, complete
108 * the flush completion once to allow a single access to
109 * the flush completion without blocking.
110 */
111 init_completion(&dqp->q_flush);
112 complete(&dqp->q_flush);
113
107#ifdef XFS_DQUOT_TRACE 114#ifdef XFS_DQUOT_TRACE
108 dqp->q_trace = ktrace_alloc(DQUOT_TRACE_SIZE, KM_SLEEP); 115 dqp->q_trace = ktrace_alloc(DQUOT_TRACE_SIZE, KM_NOFS);
109 xfs_dqtrace_entry(dqp, "DQINIT"); 116 xfs_dqtrace_entry(dqp, "DQINIT");
110#endif 117#endif
111 } else { 118 } else {
@@ -150,7 +157,6 @@ xfs_qm_dqdestroy(
150 ASSERT(! XFS_DQ_IS_ON_FREELIST(dqp)); 157 ASSERT(! XFS_DQ_IS_ON_FREELIST(dqp));
151 158
152 mutex_destroy(&dqp->q_qlock); 159 mutex_destroy(&dqp->q_qlock);
153 freesema(&dqp->q_flock);
154 sv_destroy(&dqp->q_pinwait); 160 sv_destroy(&dqp->q_pinwait);
155 161
156#ifdef XFS_DQUOT_TRACE 162#ifdef XFS_DQUOT_TRACE
@@ -431,7 +437,7 @@ xfs_qm_dqalloc(
431 * when it unlocks the inode. Since we want to keep the quota 437 * when it unlocks the inode. Since we want to keep the quota
432 * inode around, we bump the vnode ref count now. 438 * inode around, we bump the vnode ref count now.
433 */ 439 */
434 VN_HOLD(XFS_ITOV(quotip)); 440 IHOLD(quotip);
435 441
436 xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL); 442 xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL);
437 nmaps = 1; 443 nmaps = 1;
@@ -1211,7 +1217,7 @@ xfs_qm_dqflush(
1211 int error; 1217 int error;
1212 1218
1213 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 1219 ASSERT(XFS_DQ_IS_LOCKED(dqp));
1214 ASSERT(XFS_DQ_IS_FLUSH_LOCKED(dqp)); 1220 ASSERT(!completion_done(&dqp->q_flush));
1215 xfs_dqtrace_entry(dqp, "DQFLUSH"); 1221 xfs_dqtrace_entry(dqp, "DQFLUSH");
1216 1222
1217 /* 1223 /*
@@ -1348,34 +1354,18 @@ xfs_qm_dqflush_done(
1348 xfs_dqfunlock(dqp); 1354 xfs_dqfunlock(dqp);
1349} 1355}
1350 1356
1351
1352int
1353xfs_qm_dqflock_nowait(
1354 xfs_dquot_t *dqp)
1355{
1356 int locked;
1357
1358 locked = cpsema(&((dqp)->q_flock));
1359
1360 /* XXX ifdef these out */
1361 if (locked)
1362 (dqp)->dq_flags |= XFS_DQ_FLOCKED;
1363 return (locked);
1364}
1365
1366
1367int 1357int
1368xfs_qm_dqlock_nowait( 1358xfs_qm_dqlock_nowait(
1369 xfs_dquot_t *dqp) 1359 xfs_dquot_t *dqp)
1370{ 1360{
1371 return (mutex_trylock(&((dqp)->q_qlock))); 1361 return mutex_trylock(&dqp->q_qlock);
1372} 1362}
1373 1363
1374void 1364void
1375xfs_dqlock( 1365xfs_dqlock(
1376 xfs_dquot_t *dqp) 1366 xfs_dquot_t *dqp)
1377{ 1367{
1378 mutex_lock(&(dqp->q_qlock)); 1368 mutex_lock(&dqp->q_qlock);
1379} 1369}
1380 1370
1381void 1371void
@@ -1435,8 +1425,7 @@ xfs_dqlock2(
1435/* ARGSUSED */ 1425/* ARGSUSED */
1436int 1426int
1437xfs_qm_dqpurge( 1427xfs_qm_dqpurge(
1438 xfs_dquot_t *dqp, 1428 xfs_dquot_t *dqp)
1439 uint flags)
1440{ 1429{
1441 xfs_dqhash_t *thishash; 1430 xfs_dqhash_t *thishash;
1442 xfs_mount_t *mp = dqp->q_mount; 1431 xfs_mount_t *mp = dqp->q_mount;
@@ -1469,7 +1458,7 @@ xfs_qm_dqpurge(
1469 * if we're turning off quotas. Basically, we need this flush 1458 * if we're turning off quotas. Basically, we need this flush
1470 * lock, and are willing to block on it. 1459 * lock, and are willing to block on it.
1471 */ 1460 */
1472 if (! xfs_qm_dqflock_nowait(dqp)) { 1461 if (!xfs_dqflock_nowait(dqp)) {
1473 /* 1462 /*
1474 * Block on the flush lock after nudging dquot buffer, 1463 * Block on the flush lock after nudging dquot buffer,
1475 * if it is incore. 1464 * if it is incore.
diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h
index 5c371a92e3e2..8958d0faf8d3 100644
--- a/fs/xfs/quota/xfs_dquot.h
+++ b/fs/xfs/quota/xfs_dquot.h
@@ -82,7 +82,7 @@ typedef struct xfs_dquot {
82 xfs_qcnt_t q_res_icount; /* total inos allocd+reserved */ 82 xfs_qcnt_t q_res_icount; /* total inos allocd+reserved */
83 xfs_qcnt_t q_res_rtbcount;/* total realtime blks used+reserved */ 83 xfs_qcnt_t q_res_rtbcount;/* total realtime blks used+reserved */
84 mutex_t q_qlock; /* quota lock */ 84 mutex_t q_qlock; /* quota lock */
85 sema_t q_flock; /* flush lock */ 85 struct completion q_flush; /* flush completion queue */
86 uint q_pincount; /* pin count for this dquot */ 86 uint q_pincount; /* pin count for this dquot */
87 sv_t q_pinwait; /* sync var for pinning */ 87 sv_t q_pinwait; /* sync var for pinning */
88#ifdef XFS_DQUOT_TRACE 88#ifdef XFS_DQUOT_TRACE
@@ -113,17 +113,25 @@ XFS_DQ_IS_LOCKED(xfs_dquot_t *dqp)
113 113
114 114
115/* 115/*
116 * The following three routines simply manage the q_flock 116 * Manage the q_flush completion queue embedded in the dquot. This completion
117 * semaphore embedded in the dquot. This semaphore synchronizes 117 * queue synchronizes processes attempting to flush the in-core dquot back to
118 * processes attempting to flush the in-core dquot back to disk. 118 * disk.
119 */ 119 */
120#define xfs_dqflock(dqp) { psema(&((dqp)->q_flock), PINOD | PRECALC);\ 120static inline void xfs_dqflock(xfs_dquot_t *dqp)
121 (dqp)->dq_flags |= XFS_DQ_FLOCKED; } 121{
122#define xfs_dqfunlock(dqp) { ASSERT(issemalocked(&((dqp)->q_flock))); \ 122 wait_for_completion(&dqp->q_flush);
123 vsema(&((dqp)->q_flock)); \ 123}
124 (dqp)->dq_flags &= ~(XFS_DQ_FLOCKED); } 124
125static inline int xfs_dqflock_nowait(xfs_dquot_t *dqp)
126{
127 return try_wait_for_completion(&dqp->q_flush);
128}
129
130static inline void xfs_dqfunlock(xfs_dquot_t *dqp)
131{
132 complete(&dqp->q_flush);
133}
125 134
126#define XFS_DQ_IS_FLUSH_LOCKED(dqp) (issemalocked(&((dqp)->q_flock)))
127#define XFS_DQ_IS_ON_FREELIST(dqp) ((dqp)->dq_flnext != (dqp)) 135#define XFS_DQ_IS_ON_FREELIST(dqp) ((dqp)->dq_flnext != (dqp))
128#define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY) 136#define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY)
129#define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER) 137#define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER)
@@ -164,10 +172,9 @@ extern void xfs_qm_dqprint(xfs_dquot_t *);
164 172
165extern void xfs_qm_dqdestroy(xfs_dquot_t *); 173extern void xfs_qm_dqdestroy(xfs_dquot_t *);
166extern int xfs_qm_dqflush(xfs_dquot_t *, uint); 174extern int xfs_qm_dqflush(xfs_dquot_t *, uint);
167extern int xfs_qm_dqpurge(xfs_dquot_t *, uint); 175extern int xfs_qm_dqpurge(xfs_dquot_t *);
168extern void xfs_qm_dqunpin_wait(xfs_dquot_t *); 176extern void xfs_qm_dqunpin_wait(xfs_dquot_t *);
169extern int xfs_qm_dqlock_nowait(xfs_dquot_t *); 177extern int xfs_qm_dqlock_nowait(xfs_dquot_t *);
170extern int xfs_qm_dqflock_nowait(xfs_dquot_t *);
171extern void xfs_qm_dqflock_pushbuf_wait(xfs_dquot_t *dqp); 178extern void xfs_qm_dqflock_pushbuf_wait(xfs_dquot_t *dqp);
172extern void xfs_qm_adjust_dqtimers(xfs_mount_t *, 179extern void xfs_qm_adjust_dqtimers(xfs_mount_t *,
173 xfs_disk_dquot_t *); 180 xfs_disk_dquot_t *);
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index 36e05ca78412..f028644caa5e 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -151,7 +151,7 @@ xfs_qm_dquot_logitem_push(
151 dqp = logitem->qli_dquot; 151 dqp = logitem->qli_dquot;
152 152
153 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 153 ASSERT(XFS_DQ_IS_LOCKED(dqp));
154 ASSERT(XFS_DQ_IS_FLUSH_LOCKED(dqp)); 154 ASSERT(!completion_done(&dqp->q_flush));
155 155
156 /* 156 /*
157 * Since we were able to lock the dquot's flush lock and 157 * Since we were able to lock the dquot's flush lock and
@@ -245,7 +245,7 @@ xfs_qm_dquot_logitem_pushbuf(
245 * inode flush completed and the inode was taken off the AIL. 245 * inode flush completed and the inode was taken off the AIL.
246 * So, just get out. 246 * So, just get out.
247 */ 247 */
248 if (!issemalocked(&(dqp->q_flock)) || 248 if (completion_done(&dqp->q_flush) ||
249 ((qip->qli_item.li_flags & XFS_LI_IN_AIL) == 0)) { 249 ((qip->qli_item.li_flags & XFS_LI_IN_AIL) == 0)) {
250 qip->qli_pushbuf_flag = 0; 250 qip->qli_pushbuf_flag = 0;
251 xfs_dqunlock(dqp); 251 xfs_dqunlock(dqp);
@@ -258,7 +258,7 @@ xfs_qm_dquot_logitem_pushbuf(
258 if (bp != NULL) { 258 if (bp != NULL) {
259 if (XFS_BUF_ISDELAYWRITE(bp)) { 259 if (XFS_BUF_ISDELAYWRITE(bp)) {
260 dopush = ((qip->qli_item.li_flags & XFS_LI_IN_AIL) && 260 dopush = ((qip->qli_item.li_flags & XFS_LI_IN_AIL) &&
261 issemalocked(&(dqp->q_flock))); 261 !completion_done(&dqp->q_flush));
262 qip->qli_pushbuf_flag = 0; 262 qip->qli_pushbuf_flag = 0;
263 xfs_dqunlock(dqp); 263 xfs_dqunlock(dqp);
264 264
@@ -317,7 +317,7 @@ xfs_qm_dquot_logitem_trylock(
317 return (XFS_ITEM_LOCKED); 317 return (XFS_ITEM_LOCKED);
318 318
319 retval = XFS_ITEM_SUCCESS; 319 retval = XFS_ITEM_SUCCESS;
320 if (! xfs_qm_dqflock_nowait(dqp)) { 320 if (!xfs_dqflock_nowait(dqp)) {
321 /* 321 /*
322 * The dquot is already being flushed. It may have been 322 * The dquot is already being flushed. It may have been
323 * flushed delayed write, however, and we don't want to 323 * flushed delayed write, however, and we don't want to
@@ -576,8 +576,8 @@ xfs_qm_qoffend_logitem_committed(
576 * xfs_trans_delete_ail() drops the AIL lock. 576 * xfs_trans_delete_ail() drops the AIL lock.
577 */ 577 */
578 xfs_trans_delete_ail(qfs->qql_item.li_mountp, (xfs_log_item_t *)qfs); 578 xfs_trans_delete_ail(qfs->qql_item.li_mountp, (xfs_log_item_t *)qfs);
579 kmem_free(qfs, sizeof(xfs_qoff_logitem_t)); 579 kmem_free(qfs);
580 kmem_free(qfe, sizeof(xfs_qoff_logitem_t)); 580 kmem_free(qfe);
581 return (xfs_lsn_t)-1; 581 return (xfs_lsn_t)-1;
582} 582}
583 583
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index d31cce1165c5..df0ffef9775a 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -192,8 +192,8 @@ xfs_qm_destroy(
192 xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i])); 192 xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i]));
193 xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i])); 193 xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i]));
194 } 194 }
195 kmem_free(xqm->qm_usr_dqhtable, hsize * sizeof(xfs_dqhash_t)); 195 kmem_free(xqm->qm_usr_dqhtable);
196 kmem_free(xqm->qm_grp_dqhtable, hsize * sizeof(xfs_dqhash_t)); 196 kmem_free(xqm->qm_grp_dqhtable);
197 xqm->qm_usr_dqhtable = NULL; 197 xqm->qm_usr_dqhtable = NULL;
198 xqm->qm_grp_dqhtable = NULL; 198 xqm->qm_grp_dqhtable = NULL;
199 xqm->qm_dqhashmask = 0; 199 xqm->qm_dqhashmask = 0;
@@ -201,7 +201,7 @@ xfs_qm_destroy(
201#ifdef DEBUG 201#ifdef DEBUG
202 mutex_destroy(&qcheck_lock); 202 mutex_destroy(&qcheck_lock);
203#endif 203#endif
204 kmem_free(xqm, sizeof(xfs_qm_t)); 204 kmem_free(xqm);
205} 205}
206 206
207/* 207/*
@@ -310,8 +310,7 @@ xfs_qm_unmount_quotadestroy(
310 */ 310 */
311void 311void
312xfs_qm_mount_quotas( 312xfs_qm_mount_quotas(
313 xfs_mount_t *mp, 313 xfs_mount_t *mp)
314 int mfsi_flags)
315{ 314{
316 int error = 0; 315 int error = 0;
317 uint sbf; 316 uint sbf;
@@ -346,8 +345,7 @@ xfs_qm_mount_quotas(
346 /* 345 /*
347 * If any of the quotas are not consistent, do a quotacheck. 346 * If any of the quotas are not consistent, do a quotacheck.
348 */ 347 */
349 if (XFS_QM_NEED_QUOTACHECK(mp) && 348 if (XFS_QM_NEED_QUOTACHECK(mp)) {
350 !(mfsi_flags & XFS_MFSI_NO_QUOTACHECK)) {
351 error = xfs_qm_quotacheck(mp); 349 error = xfs_qm_quotacheck(mp);
352 if (error) { 350 if (error) {
353 /* Quotacheck failed and disabled quotas. */ 351 /* Quotacheck failed and disabled quotas. */
@@ -445,11 +443,11 @@ xfs_qm_unmount_quotas(
445 } 443 }
446 } 444 }
447 if (uqp) { 445 if (uqp) {
448 XFS_PURGE_INODE(uqp); 446 IRELE(uqp);
449 mp->m_quotainfo->qi_uquotaip = NULL; 447 mp->m_quotainfo->qi_uquotaip = NULL;
450 } 448 }
451 if (gqp) { 449 if (gqp) {
452 XFS_PURGE_INODE(gqp); 450 IRELE(gqp);
453 mp->m_quotainfo->qi_gquotaip = NULL; 451 mp->m_quotainfo->qi_gquotaip = NULL;
454 } 452 }
455out: 453out:
@@ -484,7 +482,7 @@ again:
484 xfs_dqtrace_entry(dqp, "FLUSHALL: DQDIRTY"); 482 xfs_dqtrace_entry(dqp, "FLUSHALL: DQDIRTY");
485 /* XXX a sentinel would be better */ 483 /* XXX a sentinel would be better */
486 recl = XFS_QI_MPLRECLAIMS(mp); 484 recl = XFS_QI_MPLRECLAIMS(mp);
487 if (! xfs_qm_dqflock_nowait(dqp)) { 485 if (!xfs_dqflock_nowait(dqp)) {
488 /* 486 /*
489 * If we can't grab the flush lock then check 487 * If we can't grab the flush lock then check
490 * to see if the dquot has been flushed delayed 488 * to see if the dquot has been flushed delayed
@@ -631,7 +629,7 @@ xfs_qm_dqpurge_int(
631 * freelist in INACTIVE state. 629 * freelist in INACTIVE state.
632 */ 630 */
633 nextdqp = dqp->MPL_NEXT; 631 nextdqp = dqp->MPL_NEXT;
634 nmisses += xfs_qm_dqpurge(dqp, flags); 632 nmisses += xfs_qm_dqpurge(dqp);
635 dqp = nextdqp; 633 dqp = nextdqp;
636 } 634 }
637 xfs_qm_mplist_unlock(mp); 635 xfs_qm_mplist_unlock(mp);
@@ -1062,7 +1060,7 @@ xfs_qm_sync(
1062 1060
1063 /* XXX a sentinel would be better */ 1061 /* XXX a sentinel would be better */
1064 recl = XFS_QI_MPLRECLAIMS(mp); 1062 recl = XFS_QI_MPLRECLAIMS(mp);
1065 if (! xfs_qm_dqflock_nowait(dqp)) { 1063 if (!xfs_dqflock_nowait(dqp)) {
1066 if (nowait) { 1064 if (nowait) {
1067 xfs_dqunlock(dqp); 1065 xfs_dqunlock(dqp);
1068 continue; 1066 continue;
@@ -1134,7 +1132,7 @@ xfs_qm_init_quotainfo(
1134 * and change the superblock accordingly. 1132 * and change the superblock accordingly.
1135 */ 1133 */
1136 if ((error = xfs_qm_init_quotainos(mp))) { 1134 if ((error = xfs_qm_init_quotainos(mp))) {
1137 kmem_free(qinf, sizeof(xfs_quotainfo_t)); 1135 kmem_free(qinf);
1138 mp->m_quotainfo = NULL; 1136 mp->m_quotainfo = NULL;
1139 return error; 1137 return error;
1140 } 1138 }
@@ -1240,15 +1238,15 @@ xfs_qm_destroy_quotainfo(
1240 xfs_qm_list_destroy(&qi->qi_dqlist); 1238 xfs_qm_list_destroy(&qi->qi_dqlist);
1241 1239
1242 if (qi->qi_uquotaip) { 1240 if (qi->qi_uquotaip) {
1243 XFS_PURGE_INODE(qi->qi_uquotaip); 1241 IRELE(qi->qi_uquotaip);
1244 qi->qi_uquotaip = NULL; /* paranoia */ 1242 qi->qi_uquotaip = NULL; /* paranoia */
1245 } 1243 }
1246 if (qi->qi_gquotaip) { 1244 if (qi->qi_gquotaip) {
1247 XFS_PURGE_INODE(qi->qi_gquotaip); 1245 IRELE(qi->qi_gquotaip);
1248 qi->qi_gquotaip = NULL; 1246 qi->qi_gquotaip = NULL;
1249 } 1247 }
1250 mutex_destroy(&qi->qi_quotaofflock); 1248 mutex_destroy(&qi->qi_quotaofflock);
1251 kmem_free(qi, sizeof(xfs_quotainfo_t)); 1249 kmem_free(qi);
1252 mp->m_quotainfo = NULL; 1250 mp->m_quotainfo = NULL;
1253} 1251}
1254 1252
@@ -1394,7 +1392,7 @@ xfs_qm_qino_alloc(
1394 * locked exclusively and joined to the transaction already. 1392 * locked exclusively and joined to the transaction already.
1395 */ 1393 */
1396 ASSERT(xfs_isilocked(*ip, XFS_ILOCK_EXCL)); 1394 ASSERT(xfs_isilocked(*ip, XFS_ILOCK_EXCL));
1397 VN_HOLD(XFS_ITOV((*ip))); 1395 IHOLD(*ip);
1398 1396
1399 /* 1397 /*
1400 * Make the changes in the superblock, and log those too. 1398 * Make the changes in the superblock, and log those too.
@@ -1623,7 +1621,7 @@ xfs_qm_dqiterate(
1623 break; 1621 break;
1624 } while (nmaps > 0); 1622 } while (nmaps > 0);
1625 1623
1626 kmem_free(map, XFS_DQITER_MAP_SIZE * sizeof(*map)); 1624 kmem_free(map);
1627 1625
1628 return error; 1626 return error;
1629} 1627}
@@ -2079,7 +2077,7 @@ xfs_qm_shake_freelist(
2079 * Try to grab the flush lock. If this dquot is in the process of 2077 * Try to grab the flush lock. If this dquot is in the process of
2080 * getting flushed to disk, we don't want to reclaim it. 2078 * getting flushed to disk, we don't want to reclaim it.
2081 */ 2079 */
2082 if (! xfs_qm_dqflock_nowait(dqp)) { 2080 if (!xfs_dqflock_nowait(dqp)) {
2083 xfs_dqunlock(dqp); 2081 xfs_dqunlock(dqp);
2084 dqp = dqp->dq_flnext; 2082 dqp = dqp->dq_flnext;
2085 continue; 2083 continue;
@@ -2257,7 +2255,7 @@ xfs_qm_dqreclaim_one(void)
2257 * Try to grab the flush lock. If this dquot is in the process of 2255 * Try to grab the flush lock. If this dquot is in the process of
2258 * getting flushed to disk, we don't want to reclaim it. 2256 * getting flushed to disk, we don't want to reclaim it.
2259 */ 2257 */
2260 if (! xfs_qm_dqflock_nowait(dqp)) { 2258 if (!xfs_dqflock_nowait(dqp)) {
2261 xfs_dqunlock(dqp); 2259 xfs_dqunlock(dqp);
2262 continue; 2260 continue;
2263 } 2261 }
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h
index cd2300e374af..44f25349e478 100644
--- a/fs/xfs/quota/xfs_qm.h
+++ b/fs/xfs/quota/xfs_qm.h
@@ -165,7 +165,7 @@ typedef struct xfs_dquot_acct {
165#define XFS_QM_RELE(xqm) ((xqm)->qm_nrefs--) 165#define XFS_QM_RELE(xqm) ((xqm)->qm_nrefs--)
166 166
167extern void xfs_qm_destroy_quotainfo(xfs_mount_t *); 167extern void xfs_qm_destroy_quotainfo(xfs_mount_t *);
168extern void xfs_qm_mount_quotas(xfs_mount_t *, int); 168extern void xfs_qm_mount_quotas(xfs_mount_t *);
169extern int xfs_qm_quotacheck(xfs_mount_t *); 169extern int xfs_qm_quotacheck(xfs_mount_t *);
170extern void xfs_qm_unmount_quotadestroy(xfs_mount_t *); 170extern void xfs_qm_unmount_quotadestroy(xfs_mount_t *);
171extern int xfs_qm_unmount_quotas(xfs_mount_t *); 171extern int xfs_qm_unmount_quotas(xfs_mount_t *);
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
index f4f6c4c861d7..eea2e60b456b 100644
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ b/fs/xfs/quota/xfs_qm_bhv.c
@@ -162,7 +162,7 @@ xfs_qm_newmount(
162 * mounting, and get on with the boring life 162 * mounting, and get on with the boring life
163 * without disk quotas. 163 * without disk quotas.
164 */ 164 */
165 xfs_qm_mount_quotas(mp, 0); 165 xfs_qm_mount_quotas(mp);
166 } else { 166 } else {
167 /* 167 /*
168 * Clear the quota flags, but remember them. This 168 * Clear the quota flags, but remember them. This
@@ -184,13 +184,12 @@ STATIC int
184xfs_qm_endmount( 184xfs_qm_endmount(
185 xfs_mount_t *mp, 185 xfs_mount_t *mp,
186 uint needquotamount, 186 uint needquotamount,
187 uint quotaflags, 187 uint quotaflags)
188 int mfsi_flags)
189{ 188{
190 if (needquotamount) { 189 if (needquotamount) {
191 ASSERT(mp->m_qflags == 0); 190 ASSERT(mp->m_qflags == 0);
192 mp->m_qflags = quotaflags; 191 mp->m_qflags = quotaflags;
193 xfs_qm_mount_quotas(mp, mfsi_flags); 192 xfs_qm_mount_quotas(mp);
194 } 193 }
195 194
196#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY) 195#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 768a3b27d2b6..1a3b803dfa55 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -362,11 +362,11 @@ xfs_qm_scall_quotaoff(
362 * if we don't need them anymore. 362 * if we don't need them anymore.
363 */ 363 */
364 if ((dqtype & XFS_QMOPT_UQUOTA) && XFS_QI_UQIP(mp)) { 364 if ((dqtype & XFS_QMOPT_UQUOTA) && XFS_QI_UQIP(mp)) {
365 XFS_PURGE_INODE(XFS_QI_UQIP(mp)); 365 IRELE(XFS_QI_UQIP(mp));
366 XFS_QI_UQIP(mp) = NULL; 366 XFS_QI_UQIP(mp) = NULL;
367 } 367 }
368 if ((dqtype & (XFS_QMOPT_GQUOTA|XFS_QMOPT_PQUOTA)) && XFS_QI_GQIP(mp)) { 368 if ((dqtype & (XFS_QMOPT_GQUOTA|XFS_QMOPT_PQUOTA)) && XFS_QI_GQIP(mp)) {
369 XFS_PURGE_INODE(XFS_QI_GQIP(mp)); 369 IRELE(XFS_QI_GQIP(mp));
370 XFS_QI_GQIP(mp) = NULL; 370 XFS_QI_GQIP(mp) = NULL;
371 } 371 }
372out_error: 372out_error:
@@ -1034,7 +1034,7 @@ xfs_qm_dqrele_all_inodes(
1034{ 1034{
1035 xfs_inode_t *ip, *topino; 1035 xfs_inode_t *ip, *topino;
1036 uint ireclaims; 1036 uint ireclaims;
1037 bhv_vnode_t *vp; 1037 struct inode *vp;
1038 boolean_t vnode_refd; 1038 boolean_t vnode_refd;
1039 1039
1040 ASSERT(mp->m_quotainfo); 1040 ASSERT(mp->m_quotainfo);
@@ -1059,7 +1059,7 @@ again:
1059 ip = ip->i_mnext; 1059 ip = ip->i_mnext;
1060 continue; 1060 continue;
1061 } 1061 }
1062 vp = XFS_ITOV_NULL(ip); 1062 vp = VFS_I(ip);
1063 if (!vp) { 1063 if (!vp) {
1064 ASSERT(ip->i_udquot == NULL); 1064 ASSERT(ip->i_udquot == NULL);
1065 ASSERT(ip->i_gdquot == NULL); 1065 ASSERT(ip->i_gdquot == NULL);
@@ -1449,14 +1449,14 @@ xfs_qm_internalqcheck(
1449 for (d = (xfs_dqtest_t *) h1->qh_next; d != NULL; ) { 1449 for (d = (xfs_dqtest_t *) h1->qh_next; d != NULL; ) {
1450 xfs_dqtest_cmp(d); 1450 xfs_dqtest_cmp(d);
1451 e = (xfs_dqtest_t *) d->HL_NEXT; 1451 e = (xfs_dqtest_t *) d->HL_NEXT;
1452 kmem_free(d, sizeof(xfs_dqtest_t)); 1452 kmem_free(d);
1453 d = e; 1453 d = e;
1454 } 1454 }
1455 h1 = &qmtest_gdqtab[i]; 1455 h1 = &qmtest_gdqtab[i];
1456 for (d = (xfs_dqtest_t *) h1->qh_next; d != NULL; ) { 1456 for (d = (xfs_dqtest_t *) h1->qh_next; d != NULL; ) {
1457 xfs_dqtest_cmp(d); 1457 xfs_dqtest_cmp(d);
1458 e = (xfs_dqtest_t *) d->HL_NEXT; 1458 e = (xfs_dqtest_t *) d->HL_NEXT;
1459 kmem_free(d, sizeof(xfs_dqtest_t)); 1459 kmem_free(d);
1460 d = e; 1460 d = e;
1461 } 1461 }
1462 } 1462 }
@@ -1467,8 +1467,8 @@ xfs_qm_internalqcheck(
1467 } else { 1467 } else {
1468 cmn_err(CE_DEBUG, "******** quotacheck successful! ********"); 1468 cmn_err(CE_DEBUG, "******** quotacheck successful! ********");
1469 } 1469 }
1470 kmem_free(qmtest_udqtab, qmtest_hashmask * sizeof(xfs_dqhash_t)); 1470 kmem_free(qmtest_udqtab);
1471 kmem_free(qmtest_gdqtab, qmtest_hashmask * sizeof(xfs_dqhash_t)); 1471 kmem_free(qmtest_gdqtab);
1472 mutex_unlock(&qcheck_lock); 1472 mutex_unlock(&qcheck_lock);
1473 return (qmtest_nfails); 1473 return (qmtest_nfails);
1474} 1474}
diff --git a/fs/xfs/quota/xfs_quota_priv.h b/fs/xfs/quota/xfs_quota_priv.h
index 5e4a40b1c565..c4fcea600bc2 100644
--- a/fs/xfs/quota/xfs_quota_priv.h
+++ b/fs/xfs/quota/xfs_quota_priv.h
@@ -158,9 +158,6 @@ for ((dqp) = (qlist)->qh_next; (dqp) != (xfs_dquot_t *)(qlist); \
158#define XFS_IS_SUSER_DQUOT(dqp) \ 158#define XFS_IS_SUSER_DQUOT(dqp) \
159 (!((dqp)->q_core.d_id)) 159 (!((dqp)->q_core.d_id))
160 160
161#define XFS_PURGE_INODE(ip) \
162 IRELE(ip);
163
164#define DQFLAGTO_TYPESTR(d) (((d)->dq_flags & XFS_DQ_USER) ? "USR" : \ 161#define DQFLAGTO_TYPESTR(d) (((d)->dq_flags & XFS_DQ_USER) ? "USR" : \
165 (((d)->dq_flags & XFS_DQ_GROUP) ? "GRP" : \ 162 (((d)->dq_flags & XFS_DQ_GROUP) ? "GRP" : \
166 (((d)->dq_flags & XFS_DQ_PROJ) ? "PRJ":"???"))) 163 (((d)->dq_flags & XFS_DQ_PROJ) ? "PRJ":"???")))
diff --git a/fs/xfs/support/ktrace.c b/fs/xfs/support/ktrace.c
index 0b75d302508f..a34ef05489b1 100644
--- a/fs/xfs/support/ktrace.c
+++ b/fs/xfs/support/ktrace.c
@@ -89,7 +89,7 @@ ktrace_alloc(int nentries, unsigned int __nocast sleep)
89 if (sleep & KM_SLEEP) 89 if (sleep & KM_SLEEP)
90 panic("ktrace_alloc: NULL memory on KM_SLEEP request!"); 90 panic("ktrace_alloc: NULL memory on KM_SLEEP request!");
91 91
92 kmem_free(ktp, sizeof(*ktp)); 92 kmem_free(ktp);
93 93
94 return NULL; 94 return NULL;
95 } 95 }
@@ -126,7 +126,7 @@ ktrace_free(ktrace_t *ktp)
126 } else { 126 } else {
127 entries_size = (int)(ktp->kt_nentries * sizeof(ktrace_entry_t)); 127 entries_size = (int)(ktp->kt_nentries * sizeof(ktrace_entry_t));
128 128
129 kmem_free(ktp->kt_entries, entries_size); 129 kmem_free(ktp->kt_entries);
130 } 130 }
131 131
132 kmem_zone_free(ktrace_hdr_zone, ktp); 132 kmem_zone_free(ktrace_hdr_zone, ktp);
diff --git a/fs/xfs/support/uuid.c b/fs/xfs/support/uuid.c
index 493a6ecf8590..5830c040ea7e 100644
--- a/fs/xfs/support/uuid.c
+++ b/fs/xfs/support/uuid.c
@@ -17,7 +17,7 @@
17 */ 17 */
18#include <xfs.h> 18#include <xfs.h>
19 19
20static mutex_t uuid_monitor; 20static DEFINE_MUTEX(uuid_monitor);
21static int uuid_table_size; 21static int uuid_table_size;
22static uuid_t *uuid_table; 22static uuid_t *uuid_table;
23 23
@@ -132,9 +132,3 @@ uuid_table_remove(uuid_t *uuid)
132 ASSERT(i < uuid_table_size); 132 ASSERT(i < uuid_table_size);
133 mutex_unlock(&uuid_monitor); 133 mutex_unlock(&uuid_monitor);
134} 134}
135
136void __init
137uuid_init(void)
138{
139 mutex_init(&uuid_monitor);
140}
diff --git a/fs/xfs/support/uuid.h b/fs/xfs/support/uuid.h
index b6f5922199ba..cff5b607d445 100644
--- a/fs/xfs/support/uuid.h
+++ b/fs/xfs/support/uuid.h
@@ -22,7 +22,6 @@ typedef struct {
22 unsigned char __u_bits[16]; 22 unsigned char __u_bits[16];
23} uuid_t; 23} uuid_t;
24 24
25extern void uuid_init(void);
26extern void uuid_create_nil(uuid_t *uuid); 25extern void uuid_create_nil(uuid_t *uuid);
27extern int uuid_is_nil(uuid_t *uuid); 26extern int uuid_is_nil(uuid_t *uuid);
28extern int uuid_equal(uuid_t *uuid1, uuid_t *uuid2); 27extern int uuid_equal(uuid_t *uuid1, uuid_t *uuid2);
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index ebee3a4f703a..b2f639a1416f 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -37,15 +37,15 @@
37#include <linux/capability.h> 37#include <linux/capability.h>
38#include <linux/posix_acl_xattr.h> 38#include <linux/posix_acl_xattr.h>
39 39
40STATIC int xfs_acl_setmode(bhv_vnode_t *, xfs_acl_t *, int *); 40STATIC int xfs_acl_setmode(struct inode *, xfs_acl_t *, int *);
41STATIC void xfs_acl_filter_mode(mode_t, xfs_acl_t *); 41STATIC void xfs_acl_filter_mode(mode_t, xfs_acl_t *);
42STATIC void xfs_acl_get_endian(xfs_acl_t *); 42STATIC void xfs_acl_get_endian(xfs_acl_t *);
43STATIC int xfs_acl_access(uid_t, gid_t, xfs_acl_t *, mode_t, cred_t *); 43STATIC int xfs_acl_access(uid_t, gid_t, xfs_acl_t *, mode_t, cred_t *);
44STATIC int xfs_acl_invalid(xfs_acl_t *); 44STATIC int xfs_acl_invalid(xfs_acl_t *);
45STATIC void xfs_acl_sync_mode(mode_t, xfs_acl_t *); 45STATIC void xfs_acl_sync_mode(mode_t, xfs_acl_t *);
46STATIC void xfs_acl_get_attr(bhv_vnode_t *, xfs_acl_t *, int, int, int *); 46STATIC void xfs_acl_get_attr(struct inode *, xfs_acl_t *, int, int, int *);
47STATIC void xfs_acl_set_attr(bhv_vnode_t *, xfs_acl_t *, int, int *); 47STATIC void xfs_acl_set_attr(struct inode *, xfs_acl_t *, int, int *);
48STATIC int xfs_acl_allow_set(bhv_vnode_t *, int); 48STATIC int xfs_acl_allow_set(struct inode *, int);
49 49
50kmem_zone_t *xfs_acl_zone; 50kmem_zone_t *xfs_acl_zone;
51 51
@@ -55,7 +55,7 @@ kmem_zone_t *xfs_acl_zone;
55 */ 55 */
56int 56int
57xfs_acl_vhasacl_access( 57xfs_acl_vhasacl_access(
58 bhv_vnode_t *vp) 58 struct inode *vp)
59{ 59{
60 int error; 60 int error;
61 61
@@ -68,7 +68,7 @@ xfs_acl_vhasacl_access(
68 */ 68 */
69int 69int
70xfs_acl_vhasacl_default( 70xfs_acl_vhasacl_default(
71 bhv_vnode_t *vp) 71 struct inode *vp)
72{ 72{
73 int error; 73 int error;
74 74
@@ -207,7 +207,7 @@ posix_acl_xfs_to_xattr(
207 207
208int 208int
209xfs_acl_vget( 209xfs_acl_vget(
210 bhv_vnode_t *vp, 210 struct inode *vp,
211 void *acl, 211 void *acl,
212 size_t size, 212 size_t size,
213 int kind) 213 int kind)
@@ -217,7 +217,6 @@ xfs_acl_vget(
217 posix_acl_xattr_header *ext_acl = acl; 217 posix_acl_xattr_header *ext_acl = acl;
218 int flags = 0; 218 int flags = 0;
219 219
220 VN_HOLD(vp);
221 if(size) { 220 if(size) {
222 if (!(_ACL_ALLOC(xfs_acl))) { 221 if (!(_ACL_ALLOC(xfs_acl))) {
223 error = ENOMEM; 222 error = ENOMEM;
@@ -239,11 +238,10 @@ xfs_acl_vget(
239 goto out; 238 goto out;
240 } 239 }
241 if (kind == _ACL_TYPE_ACCESS) 240 if (kind == _ACL_TYPE_ACCESS)
242 xfs_acl_sync_mode(xfs_vtoi(vp)->i_d.di_mode, xfs_acl); 241 xfs_acl_sync_mode(XFS_I(vp)->i_d.di_mode, xfs_acl);
243 error = -posix_acl_xfs_to_xattr(xfs_acl, ext_acl, size); 242 error = -posix_acl_xfs_to_xattr(xfs_acl, ext_acl, size);
244 } 243 }
245out: 244out:
246 VN_RELE(vp);
247 if(xfs_acl) 245 if(xfs_acl)
248 _ACL_FREE(xfs_acl); 246 _ACL_FREE(xfs_acl);
249 return -error; 247 return -error;
@@ -251,28 +249,26 @@ out:
251 249
252int 250int
253xfs_acl_vremove( 251xfs_acl_vremove(
254 bhv_vnode_t *vp, 252 struct inode *vp,
255 int kind) 253 int kind)
256{ 254{
257 int error; 255 int error;
258 256
259 VN_HOLD(vp);
260 error = xfs_acl_allow_set(vp, kind); 257 error = xfs_acl_allow_set(vp, kind);
261 if (!error) { 258 if (!error) {
262 error = xfs_attr_remove(xfs_vtoi(vp), 259 error = xfs_attr_remove(XFS_I(vp),
263 kind == _ACL_TYPE_DEFAULT? 260 kind == _ACL_TYPE_DEFAULT?
264 SGI_ACL_DEFAULT: SGI_ACL_FILE, 261 SGI_ACL_DEFAULT: SGI_ACL_FILE,
265 ATTR_ROOT); 262 ATTR_ROOT);
266 if (error == ENOATTR) 263 if (error == ENOATTR)
267 error = 0; /* 'scool */ 264 error = 0; /* 'scool */
268 } 265 }
269 VN_RELE(vp);
270 return -error; 266 return -error;
271} 267}
272 268
273int 269int
274xfs_acl_vset( 270xfs_acl_vset(
275 bhv_vnode_t *vp, 271 struct inode *vp,
276 void *acl, 272 void *acl,
277 size_t size, 273 size_t size,
278 int kind) 274 int kind)
@@ -298,7 +294,6 @@ xfs_acl_vset(
298 return 0; 294 return 0;
299 } 295 }
300 296
301 VN_HOLD(vp);
302 error = xfs_acl_allow_set(vp, kind); 297 error = xfs_acl_allow_set(vp, kind);
303 298
304 /* Incoming ACL exists, set file mode based on its value */ 299 /* Incoming ACL exists, set file mode based on its value */
@@ -321,7 +316,6 @@ xfs_acl_vset(
321 } 316 }
322 317
323out: 318out:
324 VN_RELE(vp);
325 _ACL_FREE(xfs_acl); 319 _ACL_FREE(xfs_acl);
326 return -error; 320 return -error;
327} 321}
@@ -341,8 +335,7 @@ xfs_acl_iaccess(
341 335
342 /* If the file has no ACL return -1. */ 336 /* If the file has no ACL return -1. */
343 rval = sizeof(xfs_acl_t); 337 rval = sizeof(xfs_acl_t);
344 if (xfs_attr_fetch(ip, &acl_name, (char *)acl, &rval, 338 if (xfs_attr_fetch(ip, &acl_name, (char *)acl, &rval, ATTR_ROOT)) {
345 ATTR_ROOT | ATTR_KERNACCESS)) {
346 _ACL_FREE(acl); 339 _ACL_FREE(acl);
347 return -1; 340 return -1;
348 } 341 }
@@ -364,7 +357,7 @@ xfs_acl_iaccess(
364 357
365STATIC int 358STATIC int
366xfs_acl_allow_set( 359xfs_acl_allow_set(
367 bhv_vnode_t *vp, 360 struct inode *vp,
368 int kind) 361 int kind)
369{ 362{
370 if (vp->i_flags & (S_IMMUTABLE|S_APPEND)) 363 if (vp->i_flags & (S_IMMUTABLE|S_APPEND))
@@ -373,7 +366,7 @@ xfs_acl_allow_set(
373 return ENOTDIR; 366 return ENOTDIR;
374 if (vp->i_sb->s_flags & MS_RDONLY) 367 if (vp->i_sb->s_flags & MS_RDONLY)
375 return EROFS; 368 return EROFS;
376 if (xfs_vtoi(vp)->i_d.di_uid != current->fsuid && !capable(CAP_FOWNER)) 369 if (XFS_I(vp)->i_d.di_uid != current->fsuid && !capable(CAP_FOWNER))
377 return EPERM; 370 return EPERM;
378 return 0; 371 return 0;
379} 372}
@@ -567,7 +560,7 @@ xfs_acl_get_endian(
567 */ 560 */
568STATIC void 561STATIC void
569xfs_acl_get_attr( 562xfs_acl_get_attr(
570 bhv_vnode_t *vp, 563 struct inode *vp,
571 xfs_acl_t *aclp, 564 xfs_acl_t *aclp,
572 int kind, 565 int kind,
573 int flags, 566 int flags,
@@ -577,7 +570,7 @@ xfs_acl_get_attr(
577 570
578 ASSERT((flags & ATTR_KERNOVAL) ? (aclp == NULL) : 1); 571 ASSERT((flags & ATTR_KERNOVAL) ? (aclp == NULL) : 1);
579 flags |= ATTR_ROOT; 572 flags |= ATTR_ROOT;
580 *error = xfs_attr_get(xfs_vtoi(vp), 573 *error = xfs_attr_get(XFS_I(vp),
581 kind == _ACL_TYPE_ACCESS ? 574 kind == _ACL_TYPE_ACCESS ?
582 SGI_ACL_FILE : SGI_ACL_DEFAULT, 575 SGI_ACL_FILE : SGI_ACL_DEFAULT,
583 (char *)aclp, &len, flags); 576 (char *)aclp, &len, flags);
@@ -591,7 +584,7 @@ xfs_acl_get_attr(
591 */ 584 */
592STATIC void 585STATIC void
593xfs_acl_set_attr( 586xfs_acl_set_attr(
594 bhv_vnode_t *vp, 587 struct inode *vp,
595 xfs_acl_t *aclp, 588 xfs_acl_t *aclp,
596 int kind, 589 int kind,
597 int *error) 590 int *error)
@@ -616,7 +609,7 @@ xfs_acl_set_attr(
616 INT_SET(newace->ae_perm, ARCH_CONVERT, ace->ae_perm); 609 INT_SET(newace->ae_perm, ARCH_CONVERT, ace->ae_perm);
617 } 610 }
618 INT_SET(newacl->acl_cnt, ARCH_CONVERT, aclp->acl_cnt); 611 INT_SET(newacl->acl_cnt, ARCH_CONVERT, aclp->acl_cnt);
619 *error = xfs_attr_set(xfs_vtoi(vp), 612 *error = xfs_attr_set(XFS_I(vp),
620 kind == _ACL_TYPE_ACCESS ? 613 kind == _ACL_TYPE_ACCESS ?
621 SGI_ACL_FILE: SGI_ACL_DEFAULT, 614 SGI_ACL_FILE: SGI_ACL_DEFAULT,
622 (char *)newacl, len, ATTR_ROOT); 615 (char *)newacl, len, ATTR_ROOT);
@@ -625,7 +618,7 @@ xfs_acl_set_attr(
625 618
626int 619int
627xfs_acl_vtoacl( 620xfs_acl_vtoacl(
628 bhv_vnode_t *vp, 621 struct inode *vp,
629 xfs_acl_t *access_acl, 622 xfs_acl_t *access_acl,
630 xfs_acl_t *default_acl) 623 xfs_acl_t *default_acl)
631{ 624{
@@ -640,7 +633,7 @@ xfs_acl_vtoacl(
640 if (error) 633 if (error)
641 access_acl->acl_cnt = XFS_ACL_NOT_PRESENT; 634 access_acl->acl_cnt = XFS_ACL_NOT_PRESENT;
642 else /* We have a good ACL and the file mode, synchronize. */ 635 else /* We have a good ACL and the file mode, synchronize. */
643 xfs_acl_sync_mode(xfs_vtoi(vp)->i_d.di_mode, access_acl); 636 xfs_acl_sync_mode(XFS_I(vp)->i_d.di_mode, access_acl);
644 } 637 }
645 638
646 if (default_acl) { 639 if (default_acl) {
@@ -657,7 +650,7 @@ xfs_acl_vtoacl(
657 */ 650 */
658int 651int
659xfs_acl_inherit( 652xfs_acl_inherit(
660 bhv_vnode_t *vp, 653 struct inode *vp,
661 mode_t mode, 654 mode_t mode,
662 xfs_acl_t *pdaclp) 655 xfs_acl_t *pdaclp)
663{ 656{
@@ -716,11 +709,11 @@ out_error:
716 */ 709 */
717STATIC int 710STATIC int
718xfs_acl_setmode( 711xfs_acl_setmode(
719 bhv_vnode_t *vp, 712 struct inode *vp,
720 xfs_acl_t *acl, 713 xfs_acl_t *acl,
721 int *basicperms) 714 int *basicperms)
722{ 715{
723 bhv_vattr_t va; 716 struct iattr iattr;
724 xfs_acl_entry_t *ap; 717 xfs_acl_entry_t *ap;
725 xfs_acl_entry_t *gap = NULL; 718 xfs_acl_entry_t *gap = NULL;
726 int i, nomask = 1; 719 int i, nomask = 1;
@@ -734,25 +727,25 @@ xfs_acl_setmode(
734 * Copy the u::, g::, o::, and m:: bits from the ACL into the 727 * Copy the u::, g::, o::, and m:: bits from the ACL into the
735 * mode. The m:: bits take precedence over the g:: bits. 728 * mode. The m:: bits take precedence over the g:: bits.
736 */ 729 */
737 va.va_mask = XFS_AT_MODE; 730 iattr.ia_valid = ATTR_MODE;
738 va.va_mode = xfs_vtoi(vp)->i_d.di_mode; 731 iattr.ia_mode = XFS_I(vp)->i_d.di_mode;
739 va.va_mode &= ~(S_IRWXU|S_IRWXG|S_IRWXO); 732 iattr.ia_mode &= ~(S_IRWXU|S_IRWXG|S_IRWXO);
740 ap = acl->acl_entry; 733 ap = acl->acl_entry;
741 for (i = 0; i < acl->acl_cnt; ++i) { 734 for (i = 0; i < acl->acl_cnt; ++i) {
742 switch (ap->ae_tag) { 735 switch (ap->ae_tag) {
743 case ACL_USER_OBJ: 736 case ACL_USER_OBJ:
744 va.va_mode |= ap->ae_perm << 6; 737 iattr.ia_mode |= ap->ae_perm << 6;
745 break; 738 break;
746 case ACL_GROUP_OBJ: 739 case ACL_GROUP_OBJ:
747 gap = ap; 740 gap = ap;
748 break; 741 break;
749 case ACL_MASK: /* more than just standard modes */ 742 case ACL_MASK: /* more than just standard modes */
750 nomask = 0; 743 nomask = 0;
751 va.va_mode |= ap->ae_perm << 3; 744 iattr.ia_mode |= ap->ae_perm << 3;
752 *basicperms = 0; 745 *basicperms = 0;
753 break; 746 break;
754 case ACL_OTHER: 747 case ACL_OTHER:
755 va.va_mode |= ap->ae_perm; 748 iattr.ia_mode |= ap->ae_perm;
756 break; 749 break;
757 default: /* more than just standard modes */ 750 default: /* more than just standard modes */
758 *basicperms = 0; 751 *basicperms = 0;
@@ -763,9 +756,9 @@ xfs_acl_setmode(
763 756
764 /* Set the group bits from ACL_GROUP_OBJ if there's no ACL_MASK */ 757 /* Set the group bits from ACL_GROUP_OBJ if there's no ACL_MASK */
765 if (gap && nomask) 758 if (gap && nomask)
766 va.va_mode |= gap->ae_perm << 3; 759 iattr.ia_mode |= gap->ae_perm << 3;
767 760
768 return xfs_setattr(xfs_vtoi(vp), &va, 0, sys_cred); 761 return xfs_setattr(XFS_I(vp), &iattr, 0, sys_cred);
769} 762}
770 763
771/* 764/*
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 332a772461c4..a4e293b93efa 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -46,6 +46,8 @@ typedef struct xfs_acl {
46#define SGI_ACL_FILE_SIZE (sizeof(SGI_ACL_FILE)-1) 46#define SGI_ACL_FILE_SIZE (sizeof(SGI_ACL_FILE)-1)
47#define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1) 47#define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1)
48 48
49#define _ACL_TYPE_ACCESS 1
50#define _ACL_TYPE_DEFAULT 2
49 51
50#ifdef CONFIG_XFS_POSIX_ACL 52#ifdef CONFIG_XFS_POSIX_ACL
51 53
@@ -57,17 +59,15 @@ extern struct kmem_zone *xfs_acl_zone;
57 (zone) = kmem_zone_init(sizeof(xfs_acl_t), (name)) 59 (zone) = kmem_zone_init(sizeof(xfs_acl_t), (name))
58#define xfs_acl_zone_destroy(zone) kmem_zone_destroy(zone) 60#define xfs_acl_zone_destroy(zone) kmem_zone_destroy(zone)
59 61
60extern int xfs_acl_inherit(bhv_vnode_t *, mode_t mode, xfs_acl_t *); 62extern int xfs_acl_inherit(struct inode *, mode_t mode, xfs_acl_t *);
61extern int xfs_acl_iaccess(struct xfs_inode *, mode_t, cred_t *); 63extern int xfs_acl_iaccess(struct xfs_inode *, mode_t, cred_t *);
62extern int xfs_acl_vtoacl(bhv_vnode_t *, xfs_acl_t *, xfs_acl_t *); 64extern int xfs_acl_vtoacl(struct inode *, xfs_acl_t *, xfs_acl_t *);
63extern int xfs_acl_vhasacl_access(bhv_vnode_t *); 65extern int xfs_acl_vhasacl_access(struct inode *);
64extern int xfs_acl_vhasacl_default(bhv_vnode_t *); 66extern int xfs_acl_vhasacl_default(struct inode *);
65extern int xfs_acl_vset(bhv_vnode_t *, void *, size_t, int); 67extern int xfs_acl_vset(struct inode *, void *, size_t, int);
66extern int xfs_acl_vget(bhv_vnode_t *, void *, size_t, int); 68extern int xfs_acl_vget(struct inode *, void *, size_t, int);
67extern int xfs_acl_vremove(bhv_vnode_t *, int); 69extern int xfs_acl_vremove(struct inode *, int);
68 70
69#define _ACL_TYPE_ACCESS 1
70#define _ACL_TYPE_DEFAULT 2
71#define _ACL_PERM_INVALID(perm) ((perm) & ~(ACL_READ|ACL_WRITE|ACL_EXECUTE)) 71#define _ACL_PERM_INVALID(perm) ((perm) & ~(ACL_READ|ACL_WRITE|ACL_EXECUTE))
72 72
73#define _ACL_INHERIT(c,m,d) (xfs_acl_inherit(c,m,d)) 73#define _ACL_INHERIT(c,m,d) (xfs_acl_inherit(c,m,d))
diff --git a/fs/xfs/xfs_arch.h b/fs/xfs/xfs_arch.h
index f9472a2076d4..0b3b5efe848c 100644
--- a/fs/xfs/xfs_arch.h
+++ b/fs/xfs/xfs_arch.h
@@ -92,16 +92,6 @@
92 ((__u8*)(pointer))[1] = (((value) ) & 0xff); \ 92 ((__u8*)(pointer))[1] = (((value) ) & 0xff); \
93 } 93 }
94 94
95/* define generic INT_ macros */
96
97#define INT_GET(reference,arch) \
98 (((arch) == ARCH_NOCONVERT) \
99 ? \
100 (reference) \
101 : \
102 INT_SWAP((reference),(reference)) \
103 )
104
105/* does not return a value */ 95/* does not return a value */
106#define INT_SET(reference,arch,valueref) \ 96#define INT_SET(reference,arch,valueref) \
107 (__builtin_constant_p(valueref) ? \ 97 (__builtin_constant_p(valueref) ? \
@@ -112,64 +102,6 @@
112 ) \ 102 ) \
113 ) 103 )
114 104
115/* does not return a value */
116#define INT_MOD_EXPR(reference,arch,code) \
117 (((arch) == ARCH_NOCONVERT) \
118 ? \
119 (void)((reference) code) \
120 : \
121 (void)( \
122 (reference) = INT_GET((reference),arch) , \
123 ((reference) code), \
124 INT_SET(reference, arch, reference) \
125 ) \
126 )
127
128/* does not return a value */
129#define INT_MOD(reference,arch,delta) \
130 (void)( \
131 INT_MOD_EXPR(reference,arch,+=(delta)) \
132 )
133
134/*
135 * INT_COPY - copy a value between two locations with the
136 * _same architecture_ but _potentially different sizes_
137 *
138 * if the types of the two parameters are equal or they are
139 * in native architecture, a simple copy is done
140 *
141 * otherwise, architecture conversions are done
142 *
143 */
144
145/* does not return a value */
146#define INT_COPY(dst,src,arch) \
147 ( \
148 ((sizeof(dst) == sizeof(src)) || ((arch) == ARCH_NOCONVERT)) \
149 ? \
150 (void)((dst) = (src)) \
151 : \
152 INT_SET(dst, arch, INT_GET(src, arch)) \
153 )
154
155/*
156 * INT_XLATE - copy a value in either direction between two locations
157 * with different architectures
158 *
159 * dir < 0 - copy from memory to buffer (native to arch)
160 * dir > 0 - copy from buffer to memory (arch to native)
161 */
162
163/* does not return a value */
164#define INT_XLATE(buf,mem,dir,arch) {\
165 ASSERT(dir); \
166 if (dir>0) { \
167 (mem)=INT_GET(buf, arch); \
168 } else { \
169 INT_SET(buf, arch, mem); \
170 } \
171}
172
173/* 105/*
174 * In directories inode numbers are stored as unaligned arrays of unsigned 106 * In directories inode numbers are stored as unaligned arrays of unsigned
175 * 8bit integers on disk. 107 * 8bit integers on disk.
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index df151a859186..f7cdc28aff41 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -16,8 +16,6 @@
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */ 17 */
18 18
19#include <linux/capability.h>
20
21#include "xfs.h" 19#include "xfs.h"
22#include "xfs_fs.h" 20#include "xfs_fs.h"
23#include "xfs_types.h" 21#include "xfs_types.h"
@@ -57,11 +55,6 @@
57 * Provide the external interfaces to manage attribute lists. 55 * Provide the external interfaces to manage attribute lists.
58 */ 56 */
59 57
60#define ATTR_SYSCOUNT 2
61static struct attrnames posix_acl_access;
62static struct attrnames posix_acl_default;
63static struct attrnames *attr_system_names[ATTR_SYSCOUNT];
64
65/*======================================================================== 58/*========================================================================
66 * Function prototypes for the kernel. 59 * Function prototypes for the kernel.
67 *========================================================================*/ 60 *========================================================================*/
@@ -116,6 +109,17 @@ xfs_attr_name_to_xname(
116 return 0; 109 return 0;
117} 110}
118 111
112STATIC int
113xfs_inode_hasattr(
114 struct xfs_inode *ip)
115{
116 if (!XFS_IFORK_Q(ip) ||
117 (ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
118 ip->i_d.di_anextents == 0))
119 return 0;
120 return 1;
121}
122
119/*======================================================================== 123/*========================================================================
120 * Overall external interface routines. 124 * Overall external interface routines.
121 *========================================================================*/ 125 *========================================================================*/
@@ -127,10 +131,8 @@ xfs_attr_fetch(xfs_inode_t *ip, struct xfs_name *name,
127 xfs_da_args_t args; 131 xfs_da_args_t args;
128 int error; 132 int error;
129 133
130 if ((XFS_IFORK_Q(ip) == 0) || 134 if (!xfs_inode_hasattr(ip))
131 (ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS && 135 return ENOATTR;
132 ip->i_d.di_anextents == 0))
133 return(ENOATTR);
134 136
135 /* 137 /*
136 * Fill in the arg structure for this request. 138 * Fill in the arg structure for this request.
@@ -148,11 +150,7 @@ xfs_attr_fetch(xfs_inode_t *ip, struct xfs_name *name,
148 /* 150 /*
149 * Decide on what work routines to call based on the inode size. 151 * Decide on what work routines to call based on the inode size.
150 */ 152 */
151 if (XFS_IFORK_Q(ip) == 0 || 153 if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
152 (ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
153 ip->i_d.di_anextents == 0)) {
154 error = XFS_ERROR(ENOATTR);
155 } else if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
156 error = xfs_attr_shortform_getvalue(&args); 154 error = xfs_attr_shortform_getvalue(&args);
157 } else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK)) { 155 } else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK)) {
158 error = xfs_attr_leaf_get(&args); 156 error = xfs_attr_leaf_get(&args);
@@ -196,6 +194,46 @@ xfs_attr_get(
196 return(error); 194 return(error);
197} 195}
198 196
197/*
198 * Calculate how many blocks we need for the new attribute,
199 */
200int
201xfs_attr_calc_size(
202 struct xfs_inode *ip,
203 int namelen,
204 int valuelen,
205 int *local)
206{
207 struct xfs_mount *mp = ip->i_mount;
208 int size;
209 int nblks;
210
211 /*
212 * Determine space new attribute will use, and if it would be
213 * "local" or "remote" (note: local != inline).
214 */
215 size = xfs_attr_leaf_newentsize(namelen, valuelen,
216 mp->m_sb.sb_blocksize, local);
217
218 nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK);
219 if (*local) {
220 if (size > (mp->m_sb.sb_blocksize >> 1)) {
221 /* Double split possible */
222 nblks *= 2;
223 }
224 } else {
225 /*
226 * Out of line attribute, cannot double split, but
227 * make room for the attribute value itself.
228 */
229 uint dblocks = XFS_B_TO_FSB(mp, valuelen);
230 nblks += dblocks;
231 nblks += XFS_NEXTENTADD_SPACE_RES(mp, dblocks, XFS_ATTR_FORK);
232 }
233
234 return nblks;
235}
236
199STATIC int 237STATIC int
200xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name, 238xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name,
201 char *value, int valuelen, int flags) 239 char *value, int valuelen, int flags)
@@ -204,10 +242,9 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name,
204 xfs_fsblock_t firstblock; 242 xfs_fsblock_t firstblock;
205 xfs_bmap_free_t flist; 243 xfs_bmap_free_t flist;
206 int error, err2, committed; 244 int error, err2, committed;
207 int local, size;
208 uint nblks;
209 xfs_mount_t *mp = dp->i_mount; 245 xfs_mount_t *mp = dp->i_mount;
210 int rsvd = (flags & ATTR_ROOT) != 0; 246 int rsvd = (flags & ATTR_ROOT) != 0;
247 int local;
211 248
212 /* 249 /*
213 * Attach the dquots to the inode. 250 * Attach the dquots to the inode.
@@ -241,33 +278,10 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name,
241 args.firstblock = &firstblock; 278 args.firstblock = &firstblock;
242 args.flist = &flist; 279 args.flist = &flist;
243 args.whichfork = XFS_ATTR_FORK; 280 args.whichfork = XFS_ATTR_FORK;
244 args.addname = 1; 281 args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
245 args.oknoent = 1;
246
247 /*
248 * Determine space new attribute will use, and if it would be
249 * "local" or "remote" (note: local != inline).
250 */
251 size = xfs_attr_leaf_newentsize(name->len, valuelen,
252 mp->m_sb.sb_blocksize, &local);
253
254 nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK);
255 if (local) {
256 if (size > (mp->m_sb.sb_blocksize >> 1)) {
257 /* Double split possible */
258 nblks <<= 1;
259 }
260 } else {
261 uint dblocks = XFS_B_TO_FSB(mp, valuelen);
262 /* Out of line attribute, cannot double split, but make
263 * room for the attribute value itself.
264 */
265 nblks += dblocks;
266 nblks += XFS_NEXTENTADD_SPACE_RES(mp, dblocks, XFS_ATTR_FORK);
267 }
268 282
269 /* Size is now blocks for attribute data */ 283 /* Size is now blocks for attribute data */
270 args.total = nblks; 284 args.total = xfs_attr_calc_size(dp, name->len, valuelen, &local);
271 285
272 /* 286 /*
273 * Start our first transaction of the day. 287 * Start our first transaction of the day.
@@ -289,18 +303,17 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name,
289 if (rsvd) 303 if (rsvd)
290 args.trans->t_flags |= XFS_TRANS_RESERVE; 304 args.trans->t_flags |= XFS_TRANS_RESERVE;
291 305
292 if ((error = xfs_trans_reserve(args.trans, (uint) nblks, 306 if ((error = xfs_trans_reserve(args.trans, args.total,
293 XFS_ATTRSET_LOG_RES(mp, nblks), 307 XFS_ATTRSET_LOG_RES(mp, args.total), 0,
294 0, XFS_TRANS_PERM_LOG_RES, 308 XFS_TRANS_PERM_LOG_RES, XFS_ATTRSET_LOG_COUNT))) {
295 XFS_ATTRSET_LOG_COUNT))) {
296 xfs_trans_cancel(args.trans, 0); 309 xfs_trans_cancel(args.trans, 0);
297 return(error); 310 return(error);
298 } 311 }
299 xfs_ilock(dp, XFS_ILOCK_EXCL); 312 xfs_ilock(dp, XFS_ILOCK_EXCL);
300 313
301 error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, args.trans, dp, nblks, 0, 314 error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, args.trans, dp, args.total, 0,
302 rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES : 315 rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
303 XFS_QMOPT_RES_REGBLKS); 316 XFS_QMOPT_RES_REGBLKS);
304 if (error) { 317 if (error) {
305 xfs_iunlock(dp, XFS_ILOCK_EXCL); 318 xfs_iunlock(dp, XFS_ILOCK_EXCL);
306 xfs_trans_cancel(args.trans, XFS_TRANS_RELEASE_LOG_RES); 319 xfs_trans_cancel(args.trans, XFS_TRANS_RELEASE_LOG_RES);
@@ -387,7 +400,9 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name,
387 * Commit the leaf transformation. We'll need another (linked) 400 * Commit the leaf transformation. We'll need another (linked)
388 * transaction to add the new attribute to the leaf. 401 * transaction to add the new attribute to the leaf.
389 */ 402 */
390 if ((error = xfs_attr_rolltrans(&args.trans, dp))) 403
404 error = xfs_trans_roll(&args.trans, dp);
405 if (error)
391 goto out; 406 goto out;
392 407
393 } 408 }
@@ -529,9 +544,7 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)
529 /* 544 /*
530 * Decide on what work routines to call based on the inode size. 545 * Decide on what work routines to call based on the inode size.
531 */ 546 */
532 if (XFS_IFORK_Q(dp) == 0 || 547 if (!xfs_inode_hasattr(dp)) {
533 (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
534 dp->i_d.di_anextents == 0)) {
535 error = XFS_ERROR(ENOATTR); 548 error = XFS_ERROR(ENOATTR);
536 goto out; 549 goto out;
537 } 550 }
@@ -601,29 +614,33 @@ xfs_attr_remove(
601 return error; 614 return error;
602 615
603 xfs_ilock(dp, XFS_ILOCK_SHARED); 616 xfs_ilock(dp, XFS_ILOCK_SHARED);
604 if (XFS_IFORK_Q(dp) == 0 || 617 if (!xfs_inode_hasattr(dp)) {
605 (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
606 dp->i_d.di_anextents == 0)) {
607 xfs_iunlock(dp, XFS_ILOCK_SHARED); 618 xfs_iunlock(dp, XFS_ILOCK_SHARED);
608 return(XFS_ERROR(ENOATTR)); 619 return XFS_ERROR(ENOATTR);
609 } 620 }
610 xfs_iunlock(dp, XFS_ILOCK_SHARED); 621 xfs_iunlock(dp, XFS_ILOCK_SHARED);
611 622
612 return xfs_attr_remove_int(dp, &xname, flags); 623 return xfs_attr_remove_int(dp, &xname, flags);
613} 624}
614 625
615STATIC int 626int
616xfs_attr_list_int(xfs_attr_list_context_t *context) 627xfs_attr_list_int(xfs_attr_list_context_t *context)
617{ 628{
618 int error; 629 int error;
619 xfs_inode_t *dp = context->dp; 630 xfs_inode_t *dp = context->dp;
620 631
632 XFS_STATS_INC(xs_attr_list);
633
634 if (XFS_FORCED_SHUTDOWN(dp->i_mount))
635 return EIO;
636
637 xfs_ilock(dp, XFS_ILOCK_SHARED);
638 xfs_attr_trace_l_c("syscall start", context);
639
621 /* 640 /*
622 * Decide on what work routines to call based on the inode size. 641 * Decide on what work routines to call based on the inode size.
623 */ 642 */
624 if (XFS_IFORK_Q(dp) == 0 || 643 if (!xfs_inode_hasattr(dp)) {
625 (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
626 dp->i_d.di_anextents == 0)) {
627 error = 0; 644 error = 0;
628 } else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) { 645 } else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
629 error = xfs_attr_shortform_list(context); 646 error = xfs_attr_shortform_list(context);
@@ -632,6 +649,10 @@ xfs_attr_list_int(xfs_attr_list_context_t *context)
632 } else { 649 } else {
633 error = xfs_attr_node_list(context); 650 error = xfs_attr_node_list(context);
634 } 651 }
652
653 xfs_iunlock(dp, XFS_ILOCK_SHARED);
654 xfs_attr_trace_l_c("syscall end", context);
655
635 return error; 656 return error;
636} 657}
637 658
@@ -648,74 +669,50 @@ xfs_attr_list_int(xfs_attr_list_context_t *context)
648 */ 669 */
649/*ARGSUSED*/ 670/*ARGSUSED*/
650STATIC int 671STATIC int
651xfs_attr_put_listent(xfs_attr_list_context_t *context, attrnames_t *namesp, 672xfs_attr_put_listent(xfs_attr_list_context_t *context, int flags,
652 char *name, int namelen, 673 char *name, int namelen,
653 int valuelen, char *value) 674 int valuelen, char *value)
654{ 675{
676 struct attrlist *alist = (struct attrlist *)context->alist;
655 attrlist_ent_t *aep; 677 attrlist_ent_t *aep;
656 int arraytop; 678 int arraytop;
657 679
658 ASSERT(!(context->flags & ATTR_KERNOVAL)); 680 ASSERT(!(context->flags & ATTR_KERNOVAL));
659 ASSERT(context->count >= 0); 681 ASSERT(context->count >= 0);
660 ASSERT(context->count < (ATTR_MAX_VALUELEN/8)); 682 ASSERT(context->count < (ATTR_MAX_VALUELEN/8));
661 ASSERT(context->firstu >= sizeof(*context->alist)); 683 ASSERT(context->firstu >= sizeof(*alist));
662 ASSERT(context->firstu <= context->bufsize); 684 ASSERT(context->firstu <= context->bufsize);
663 685
664 arraytop = sizeof(*context->alist) + 686 /*
665 context->count * sizeof(context->alist->al_offset[0]); 687 * Only list entries in the right namespace.
688 */
689 if (((context->flags & ATTR_SECURE) == 0) !=
690 ((flags & XFS_ATTR_SECURE) == 0))
691 return 0;
692 if (((context->flags & ATTR_ROOT) == 0) !=
693 ((flags & XFS_ATTR_ROOT) == 0))
694 return 0;
695
696 arraytop = sizeof(*alist) +
697 context->count * sizeof(alist->al_offset[0]);
666 context->firstu -= ATTR_ENTSIZE(namelen); 698 context->firstu -= ATTR_ENTSIZE(namelen);
667 if (context->firstu < arraytop) { 699 if (context->firstu < arraytop) {
668 xfs_attr_trace_l_c("buffer full", context); 700 xfs_attr_trace_l_c("buffer full", context);
669 context->alist->al_more = 1; 701 alist->al_more = 1;
670 context->seen_enough = 1; 702 context->seen_enough = 1;
671 return 1; 703 return 1;
672 } 704 }
673 705
674 aep = (attrlist_ent_t *)&(((char *)context->alist)[ context->firstu ]); 706 aep = (attrlist_ent_t *)&context->alist[context->firstu];
675 aep->a_valuelen = valuelen; 707 aep->a_valuelen = valuelen;
676 memcpy(aep->a_name, name, namelen); 708 memcpy(aep->a_name, name, namelen);
677 aep->a_name[ namelen ] = 0; 709 aep->a_name[namelen] = 0;
678 context->alist->al_offset[ context->count++ ] = context->firstu; 710 alist->al_offset[context->count++] = context->firstu;
679 context->alist->al_count = context->count; 711 alist->al_count = context->count;
680 xfs_attr_trace_l_c("add", context); 712 xfs_attr_trace_l_c("add", context);
681 return 0; 713 return 0;
682} 714}
683 715
684STATIC int
685xfs_attr_kern_list(xfs_attr_list_context_t *context, attrnames_t *namesp,
686 char *name, int namelen,
687 int valuelen, char *value)
688{
689 char *offset;
690 int arraytop;
691
692 ASSERT(context->count >= 0);
693
694 arraytop = context->count + namesp->attr_namelen + namelen + 1;
695 if (arraytop > context->firstu) {
696 context->count = -1; /* insufficient space */
697 return 1;
698 }
699 offset = (char *)context->alist + context->count;
700 strncpy(offset, namesp->attr_name, namesp->attr_namelen);
701 offset += namesp->attr_namelen;
702 strncpy(offset, name, namelen); /* real name */
703 offset += namelen;
704 *offset = '\0';
705 context->count += namesp->attr_namelen + namelen + 1;
706 return 0;
707}
708
709/*ARGSUSED*/
710STATIC int
711xfs_attr_kern_list_sizes(xfs_attr_list_context_t *context, attrnames_t *namesp,
712 char *name, int namelen,
713 int valuelen, char *value)
714{
715 context->count += namesp->attr_namelen + namelen + 1;
716 return 0;
717}
718
719/* 716/*
720 * Generate a list of extended attribute names and optionally 717 * Generate a list of extended attribute names and optionally
721 * also value lengths. Positive return value follows the XFS 718 * also value lengths. Positive return value follows the XFS
@@ -732,10 +729,9 @@ xfs_attr_list(
732 attrlist_cursor_kern_t *cursor) 729 attrlist_cursor_kern_t *cursor)
733{ 730{
734 xfs_attr_list_context_t context; 731 xfs_attr_list_context_t context;
732 struct attrlist *alist;
735 int error; 733 int error;
736 734
737 XFS_STATS_INC(xs_attr_list);
738
739 /* 735 /*
740 * Validate the cursor. 736 * Validate the cursor.
741 */ 737 */
@@ -756,52 +752,23 @@ xfs_attr_list(
756 /* 752 /*
757 * Initialize the output buffer. 753 * Initialize the output buffer.
758 */ 754 */
755 memset(&context, 0, sizeof(context));
759 context.dp = dp; 756 context.dp = dp;
760 context.cursor = cursor; 757 context.cursor = cursor;
761 context.count = 0;
762 context.dupcnt = 0;
763 context.resynch = 1; 758 context.resynch = 1;
764 context.flags = flags; 759 context.flags = flags;
765 context.seen_enough = 0; 760 context.alist = buffer;
766 context.alist = (attrlist_t *)buffer; 761 context.bufsize = (bufsize & ~(sizeof(int)-1)); /* align */
767 context.put_value = 0; 762 context.firstu = context.bufsize;
768 763 context.put_listent = xfs_attr_put_listent;
769 if (flags & ATTR_KERNAMELS) {
770 context.bufsize = bufsize;
771 context.firstu = context.bufsize;
772 if (flags & ATTR_KERNOVAL)
773 context.put_listent = xfs_attr_kern_list_sizes;
774 else
775 context.put_listent = xfs_attr_kern_list;
776 } else {
777 context.bufsize = (bufsize & ~(sizeof(int)-1)); /* align */
778 context.firstu = context.bufsize;
779 context.alist->al_count = 0;
780 context.alist->al_more = 0;
781 context.alist->al_offset[0] = context.bufsize;
782 context.put_listent = xfs_attr_put_listent;
783 }
784
785 if (XFS_FORCED_SHUTDOWN(dp->i_mount))
786 return EIO;
787 764
788 xfs_ilock(dp, XFS_ILOCK_SHARED); 765 alist = (struct attrlist *)context.alist;
789 xfs_attr_trace_l_c("syscall start", &context); 766 alist->al_count = 0;
767 alist->al_more = 0;
768 alist->al_offset[0] = context.bufsize;
790 769
791 error = xfs_attr_list_int(&context); 770 error = xfs_attr_list_int(&context);
792 771 ASSERT(error >= 0);
793 xfs_iunlock(dp, XFS_ILOCK_SHARED);
794 xfs_attr_trace_l_c("syscall end", &context);
795
796 if (context.flags & (ATTR_KERNOVAL|ATTR_KERNAMELS)) {
797 /* must return negated buffer size or the error */
798 if (context.count < 0)
799 error = XFS_ERROR(ERANGE);
800 else
801 error = -context.count;
802 } else
803 ASSERT(error >= 0);
804
805 return error; 772 return error;
806} 773}
807 774
@@ -816,12 +783,10 @@ xfs_attr_inactive(xfs_inode_t *dp)
816 ASSERT(! XFS_NOT_DQATTACHED(mp, dp)); 783 ASSERT(! XFS_NOT_DQATTACHED(mp, dp));
817 784
818 xfs_ilock(dp, XFS_ILOCK_SHARED); 785 xfs_ilock(dp, XFS_ILOCK_SHARED);
819 if ((XFS_IFORK_Q(dp) == 0) || 786 if (!xfs_inode_hasattr(dp) ||
820 (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) || 787 dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
821 (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
822 dp->i_d.di_anextents == 0)) {
823 xfs_iunlock(dp, XFS_ILOCK_SHARED); 788 xfs_iunlock(dp, XFS_ILOCK_SHARED);
824 return(0); 789 return 0;
825 } 790 }
826 xfs_iunlock(dp, XFS_ILOCK_SHARED); 791 xfs_iunlock(dp, XFS_ILOCK_SHARED);
827 792
@@ -854,10 +819,8 @@ xfs_attr_inactive(xfs_inode_t *dp)
854 /* 819 /*
855 * Decide on what work routines to call based on the inode size. 820 * Decide on what work routines to call based on the inode size.
856 */ 821 */
857 if ((XFS_IFORK_Q(dp) == 0) || 822 if (!xfs_inode_hasattr(dp) ||
858 (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) || 823 dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
859 (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
860 dp->i_d.di_anextents == 0)) {
861 error = 0; 824 error = 0;
862 goto out; 825 goto out;
863 } 826 }
@@ -974,7 +937,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
974 xfs_da_brelse(args->trans, bp); 937 xfs_da_brelse(args->trans, bp);
975 return(retval); 938 return(retval);
976 } 939 }
977 args->rename = 1; /* an atomic rename */ 940 args->op_flags |= XFS_DA_OP_RENAME; /* an atomic rename */
978 args->blkno2 = args->blkno; /* set 2nd entry info*/ 941 args->blkno2 = args->blkno; /* set 2nd entry info*/
979 args->index2 = args->index; 942 args->index2 = args->index;
980 args->rmtblkno2 = args->rmtblkno; 943 args->rmtblkno2 = args->rmtblkno;
@@ -1019,7 +982,8 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
1019 * Commit the current trans (including the inode) and start 982 * Commit the current trans (including the inode) and start
1020 * a new one. 983 * a new one.
1021 */ 984 */
1022 if ((error = xfs_attr_rolltrans(&args->trans, dp))) 985 error = xfs_trans_roll(&args->trans, dp);
986 if (error)
1023 return (error); 987 return (error);
1024 988
1025 /* 989 /*
@@ -1033,7 +997,8 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
1033 * Commit the transaction that added the attr name so that 997 * Commit the transaction that added the attr name so that
1034 * later routines can manage their own transactions. 998 * later routines can manage their own transactions.
1035 */ 999 */
1036 if ((error = xfs_attr_rolltrans(&args->trans, dp))) 1000 error = xfs_trans_roll(&args->trans, dp);
1001 if (error)
1037 return (error); 1002 return (error);
1038 1003
1039 /* 1004 /*
@@ -1054,7 +1019,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
1054 * so that one disappears and one appears atomically. Then we 1019 * so that one disappears and one appears atomically. Then we
1055 * must remove the "old" attribute/value pair. 1020 * must remove the "old" attribute/value pair.
1056 */ 1021 */
1057 if (args->rename) { 1022 if (args->op_flags & XFS_DA_OP_RENAME) {
1058 /* 1023 /*
1059 * In a separate transaction, set the incomplete flag on the 1024 * In a separate transaction, set the incomplete flag on the
1060 * "old" attr and clear the incomplete flag on the "new" attr. 1025 * "old" attr and clear the incomplete flag on the "new" attr.
@@ -1122,7 +1087,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
1122 /* 1087 /*
1123 * Commit the remove and start the next trans in series. 1088 * Commit the remove and start the next trans in series.
1124 */ 1089 */
1125 error = xfs_attr_rolltrans(&args->trans, dp); 1090 error = xfs_trans_roll(&args->trans, dp);
1126 1091
1127 } else if (args->rmtblkno > 0) { 1092 } else if (args->rmtblkno > 0) {
1128 /* 1093 /*
@@ -1307,7 +1272,7 @@ restart:
1307 } else if (retval == EEXIST) { 1272 } else if (retval == EEXIST) {
1308 if (args->flags & ATTR_CREATE) 1273 if (args->flags & ATTR_CREATE)
1309 goto out; 1274 goto out;
1310 args->rename = 1; /* atomic rename op */ 1275 args->op_flags |= XFS_DA_OP_RENAME; /* atomic rename op */
1311 args->blkno2 = args->blkno; /* set 2nd entry info*/ 1276 args->blkno2 = args->blkno; /* set 2nd entry info*/
1312 args->index2 = args->index; 1277 args->index2 = args->index;
1313 args->rmtblkno2 = args->rmtblkno; 1278 args->rmtblkno2 = args->rmtblkno;
@@ -1353,7 +1318,8 @@ restart:
1353 * Commit the node conversion and start the next 1318 * Commit the node conversion and start the next
1354 * trans in the chain. 1319 * trans in the chain.
1355 */ 1320 */
1356 if ((error = xfs_attr_rolltrans(&args->trans, dp))) 1321 error = xfs_trans_roll(&args->trans, dp);
1322 if (error)
1357 goto out; 1323 goto out;
1358 1324
1359 goto restart; 1325 goto restart;
@@ -1404,7 +1370,8 @@ restart:
1404 * Commit the leaf addition or btree split and start the next 1370 * Commit the leaf addition or btree split and start the next
1405 * trans in the chain. 1371 * trans in the chain.
1406 */ 1372 */
1407 if ((error = xfs_attr_rolltrans(&args->trans, dp))) 1373 error = xfs_trans_roll(&args->trans, dp);
1374 if (error)
1408 goto out; 1375 goto out;
1409 1376
1410 /* 1377 /*
@@ -1425,7 +1392,7 @@ restart:
1425 * so that one disappears and one appears atomically. Then we 1392 * so that one disappears and one appears atomically. Then we
1426 * must remove the "old" attribute/value pair. 1393 * must remove the "old" attribute/value pair.
1427 */ 1394 */
1428 if (args->rename) { 1395 if (args->op_flags & XFS_DA_OP_RENAME) {
1429 /* 1396 /*
1430 * In a separate transaction, set the incomplete flag on the 1397 * In a separate transaction, set the incomplete flag on the
1431 * "old" attr and clear the incomplete flag on the "new" attr. 1398 * "old" attr and clear the incomplete flag on the "new" attr.
@@ -1504,7 +1471,8 @@ restart:
1504 /* 1471 /*
1505 * Commit and start the next trans in the chain. 1472 * Commit and start the next trans in the chain.
1506 */ 1473 */
1507 if ((error = xfs_attr_rolltrans(&args->trans, dp))) 1474 error = xfs_trans_roll(&args->trans, dp);
1475 if (error)
1508 goto out; 1476 goto out;
1509 1477
1510 } else if (args->rmtblkno > 0) { 1478 } else if (args->rmtblkno > 0) {
@@ -1636,7 +1604,8 @@ xfs_attr_node_removename(xfs_da_args_t *args)
1636 /* 1604 /*
1637 * Commit the Btree join operation and start a new trans. 1605 * Commit the Btree join operation and start a new trans.
1638 */ 1606 */
1639 if ((error = xfs_attr_rolltrans(&args->trans, dp))) 1607 error = xfs_trans_roll(&args->trans, dp);
1608 if (error)
1640 goto out; 1609 goto out;
1641 } 1610 }
1642 1611
@@ -2137,7 +2106,8 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
2137 /* 2106 /*
2138 * Start the next trans in the chain. 2107 * Start the next trans in the chain.
2139 */ 2108 */
2140 if ((error = xfs_attr_rolltrans(&args->trans, dp))) 2109 error = xfs_trans_roll(&args->trans, dp);
2110 if (error)
2141 return (error); 2111 return (error);
2142 } 2112 }
2143 2113
@@ -2287,7 +2257,8 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
2287 /* 2257 /*
2288 * Close out trans and start the next one in the chain. 2258 * Close out trans and start the next one in the chain.
2289 */ 2259 */
2290 if ((error = xfs_attr_rolltrans(&args->trans, args->dp))) 2260 error = xfs_trans_roll(&args->trans, args->dp);
2261 if (error)
2291 return (error); 2262 return (error);
2292 } 2263 }
2293 return(0); 2264 return(0);
@@ -2300,23 +2271,7 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
2300void 2271void
2301xfs_attr_trace_l_c(char *where, struct xfs_attr_list_context *context) 2272xfs_attr_trace_l_c(char *where, struct xfs_attr_list_context *context)
2302{ 2273{
2303 xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_C, where, 2274 xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_C, where, context,
2304 (__psunsigned_t)context->dp,
2305 (__psunsigned_t)context->cursor->hashval,
2306 (__psunsigned_t)context->cursor->blkno,
2307 (__psunsigned_t)context->cursor->offset,
2308 (__psunsigned_t)context->alist,
2309 (__psunsigned_t)context->bufsize,
2310 (__psunsigned_t)context->count,
2311 (__psunsigned_t)context->firstu,
2312 (__psunsigned_t)
2313 ((context->count > 0) &&
2314 !(context->flags & (ATTR_KERNAMELS|ATTR_KERNOVAL)))
2315 ? (ATTR_ENTRY(context->alist,
2316 context->count-1)->a_valuelen)
2317 : 0,
2318 (__psunsigned_t)context->dupcnt,
2319 (__psunsigned_t)context->flags,
2320 (__psunsigned_t)NULL, 2275 (__psunsigned_t)NULL,
2321 (__psunsigned_t)NULL, 2276 (__psunsigned_t)NULL,
2322 (__psunsigned_t)NULL); 2277 (__psunsigned_t)NULL);
@@ -2329,23 +2284,7 @@ void
2329xfs_attr_trace_l_cn(char *where, struct xfs_attr_list_context *context, 2284xfs_attr_trace_l_cn(char *where, struct xfs_attr_list_context *context,
2330 struct xfs_da_intnode *node) 2285 struct xfs_da_intnode *node)
2331{ 2286{
2332 xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CN, where, 2287 xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CN, where, context,
2333 (__psunsigned_t)context->dp,
2334 (__psunsigned_t)context->cursor->hashval,
2335 (__psunsigned_t)context->cursor->blkno,
2336 (__psunsigned_t)context->cursor->offset,
2337 (__psunsigned_t)context->alist,
2338 (__psunsigned_t)context->bufsize,
2339 (__psunsigned_t)context->count,
2340 (__psunsigned_t)context->firstu,
2341 (__psunsigned_t)
2342 ((context->count > 0) &&
2343 !(context->flags & (ATTR_KERNAMELS|ATTR_KERNOVAL)))
2344 ? (ATTR_ENTRY(context->alist,
2345 context->count-1)->a_valuelen)
2346 : 0,
2347 (__psunsigned_t)context->dupcnt,
2348 (__psunsigned_t)context->flags,
2349 (__psunsigned_t)be16_to_cpu(node->hdr.count), 2288 (__psunsigned_t)be16_to_cpu(node->hdr.count),
2350 (__psunsigned_t)be32_to_cpu(node->btree[0].hashval), 2289 (__psunsigned_t)be32_to_cpu(node->btree[0].hashval),
2351 (__psunsigned_t)be32_to_cpu(node->btree[ 2290 (__psunsigned_t)be32_to_cpu(node->btree[
@@ -2359,23 +2298,7 @@ void
2359xfs_attr_trace_l_cb(char *where, struct xfs_attr_list_context *context, 2298xfs_attr_trace_l_cb(char *where, struct xfs_attr_list_context *context,
2360 struct xfs_da_node_entry *btree) 2299 struct xfs_da_node_entry *btree)
2361{ 2300{
2362 xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CB, where, 2301 xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CB, where, context,
2363 (__psunsigned_t)context->dp,
2364 (__psunsigned_t)context->cursor->hashval,
2365 (__psunsigned_t)context->cursor->blkno,
2366 (__psunsigned_t)context->cursor->offset,
2367 (__psunsigned_t)context->alist,
2368 (__psunsigned_t)context->bufsize,
2369 (__psunsigned_t)context->count,
2370 (__psunsigned_t)context->firstu,
2371 (__psunsigned_t)
2372 ((context->count > 0) &&
2373 !(context->flags & (ATTR_KERNAMELS|ATTR_KERNOVAL)))
2374 ? (ATTR_ENTRY(context->alist,
2375 context->count-1)->a_valuelen)
2376 : 0,
2377 (__psunsigned_t)context->dupcnt,
2378 (__psunsigned_t)context->flags,
2379 (__psunsigned_t)be32_to_cpu(btree->hashval), 2302 (__psunsigned_t)be32_to_cpu(btree->hashval),
2380 (__psunsigned_t)be32_to_cpu(btree->before), 2303 (__psunsigned_t)be32_to_cpu(btree->before),
2381 (__psunsigned_t)NULL); 2304 (__psunsigned_t)NULL);
@@ -2388,23 +2311,7 @@ void
2388xfs_attr_trace_l_cl(char *where, struct xfs_attr_list_context *context, 2311xfs_attr_trace_l_cl(char *where, struct xfs_attr_list_context *context,
2389 struct xfs_attr_leafblock *leaf) 2312 struct xfs_attr_leafblock *leaf)
2390{ 2313{
2391 xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CL, where, 2314 xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CL, where, context,
2392 (__psunsigned_t)context->dp,
2393 (__psunsigned_t)context->cursor->hashval,
2394 (__psunsigned_t)context->cursor->blkno,
2395 (__psunsigned_t)context->cursor->offset,
2396 (__psunsigned_t)context->alist,
2397 (__psunsigned_t)context->bufsize,
2398 (__psunsigned_t)context->count,
2399 (__psunsigned_t)context->firstu,
2400 (__psunsigned_t)
2401 ((context->count > 0) &&
2402 !(context->flags & (ATTR_KERNAMELS|ATTR_KERNOVAL)))
2403 ? (ATTR_ENTRY(context->alist,
2404 context->count-1)->a_valuelen)
2405 : 0,
2406 (__psunsigned_t)context->dupcnt,
2407 (__psunsigned_t)context->flags,
2408 (__psunsigned_t)be16_to_cpu(leaf->hdr.count), 2315 (__psunsigned_t)be16_to_cpu(leaf->hdr.count),
2409 (__psunsigned_t)be32_to_cpu(leaf->entries[0].hashval), 2316 (__psunsigned_t)be32_to_cpu(leaf->entries[0].hashval),
2410 (__psunsigned_t)be32_to_cpu(leaf->entries[ 2317 (__psunsigned_t)be32_to_cpu(leaf->entries[
@@ -2417,329 +2324,24 @@ xfs_attr_trace_l_cl(char *where, struct xfs_attr_list_context *context,
2417 */ 2324 */
2418void 2325void
2419xfs_attr_trace_enter(int type, char *where, 2326xfs_attr_trace_enter(int type, char *where,
2420 __psunsigned_t a2, __psunsigned_t a3, 2327 struct xfs_attr_list_context *context,
2421 __psunsigned_t a4, __psunsigned_t a5, 2328 __psunsigned_t a13, __psunsigned_t a14,
2422 __psunsigned_t a6, __psunsigned_t a7, 2329 __psunsigned_t a15)
2423 __psunsigned_t a8, __psunsigned_t a9,
2424 __psunsigned_t a10, __psunsigned_t a11,
2425 __psunsigned_t a12, __psunsigned_t a13,
2426 __psunsigned_t a14, __psunsigned_t a15)
2427{ 2330{
2428 ASSERT(xfs_attr_trace_buf); 2331 ASSERT(xfs_attr_trace_buf);
2429 ktrace_enter(xfs_attr_trace_buf, (void *)((__psunsigned_t)type), 2332 ktrace_enter(xfs_attr_trace_buf, (void *)((__psunsigned_t)type),
2430 (void *)where, 2333 (void *)((__psunsigned_t)where),
2431 (void *)a2, (void *)a3, (void *)a4, 2334 (void *)((__psunsigned_t)context->dp),
2432 (void *)a5, (void *)a6, (void *)a7, 2335 (void *)((__psunsigned_t)context->cursor->hashval),
2433 (void *)a8, (void *)a9, (void *)a10, 2336 (void *)((__psunsigned_t)context->cursor->blkno),
2434 (void *)a11, (void *)a12, (void *)a13, 2337 (void *)((__psunsigned_t)context->cursor->offset),
2435 (void *)a14, (void *)a15); 2338 (void *)((__psunsigned_t)context->alist),
2339 (void *)((__psunsigned_t)context->bufsize),
2340 (void *)((__psunsigned_t)context->count),
2341 (void *)((__psunsigned_t)context->firstu),
2342 NULL,
2343 (void *)((__psunsigned_t)context->dupcnt),
2344 (void *)((__psunsigned_t)context->flags),
2345 (void *)a13, (void *)a14, (void *)a15);
2436} 2346}
2437#endif /* XFS_ATTR_TRACE */ 2347#endif /* XFS_ATTR_TRACE */
2438
2439
2440/*========================================================================
2441 * System (pseudo) namespace attribute interface routines.
2442 *========================================================================*/
2443
2444STATIC int
2445posix_acl_access_set(
2446 bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2447{
2448 return xfs_acl_vset(vp, data, size, _ACL_TYPE_ACCESS);
2449}
2450
2451STATIC int
2452posix_acl_access_remove(
2453 bhv_vnode_t *vp, char *name, int xflags)
2454{
2455 return xfs_acl_vremove(vp, _ACL_TYPE_ACCESS);
2456}
2457
2458STATIC int
2459posix_acl_access_get(
2460 bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2461{
2462 return xfs_acl_vget(vp, data, size, _ACL_TYPE_ACCESS);
2463}
2464
2465STATIC int
2466posix_acl_access_exists(
2467 bhv_vnode_t *vp)
2468{
2469 return xfs_acl_vhasacl_access(vp);
2470}
2471
2472STATIC int
2473posix_acl_default_set(
2474 bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2475{
2476 return xfs_acl_vset(vp, data, size, _ACL_TYPE_DEFAULT);
2477}
2478
2479STATIC int
2480posix_acl_default_get(
2481 bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2482{
2483 return xfs_acl_vget(vp, data, size, _ACL_TYPE_DEFAULT);
2484}
2485
2486STATIC int
2487posix_acl_default_remove(
2488 bhv_vnode_t *vp, char *name, int xflags)
2489{
2490 return xfs_acl_vremove(vp, _ACL_TYPE_DEFAULT);
2491}
2492
2493STATIC int
2494posix_acl_default_exists(
2495 bhv_vnode_t *vp)
2496{
2497 return xfs_acl_vhasacl_default(vp);
2498}
2499
2500static struct attrnames posix_acl_access = {
2501 .attr_name = "posix_acl_access",
2502 .attr_namelen = sizeof("posix_acl_access") - 1,
2503 .attr_get = posix_acl_access_get,
2504 .attr_set = posix_acl_access_set,
2505 .attr_remove = posix_acl_access_remove,
2506 .attr_exists = posix_acl_access_exists,
2507};
2508
2509static struct attrnames posix_acl_default = {
2510 .attr_name = "posix_acl_default",
2511 .attr_namelen = sizeof("posix_acl_default") - 1,
2512 .attr_get = posix_acl_default_get,
2513 .attr_set = posix_acl_default_set,
2514 .attr_remove = posix_acl_default_remove,
2515 .attr_exists = posix_acl_default_exists,
2516};
2517
2518static struct attrnames *attr_system_names[] =
2519 { &posix_acl_access, &posix_acl_default };
2520
2521
2522/*========================================================================
2523 * Namespace-prefix-style attribute name interface routines.
2524 *========================================================================*/
2525
2526STATIC int
2527attr_generic_set(
2528 bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2529{
2530 return -xfs_attr_set(xfs_vtoi(vp), name, data, size, xflags);
2531}
2532
2533STATIC int
2534attr_generic_get(
2535 bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2536{
2537 int error, asize = size;
2538
2539 error = xfs_attr_get(xfs_vtoi(vp), name, data, &asize, xflags);
2540 if (!error)
2541 return asize;
2542 return -error;
2543}
2544
2545STATIC int
2546attr_generic_remove(
2547 bhv_vnode_t *vp, char *name, int xflags)
2548{
2549 return -xfs_attr_remove(xfs_vtoi(vp), name, xflags);
2550}
2551
2552STATIC int
2553attr_generic_listadd(
2554 attrnames_t *prefix,
2555 attrnames_t *namesp,
2556 void *data,
2557 size_t size,
2558 ssize_t *result)
2559{
2560 char *p = data + *result;
2561
2562 *result += prefix->attr_namelen;
2563 *result += namesp->attr_namelen + 1;
2564 if (!size)
2565 return 0;
2566 if (*result > size)
2567 return -ERANGE;
2568 strcpy(p, prefix->attr_name);
2569 p += prefix->attr_namelen;
2570 strcpy(p, namesp->attr_name);
2571 p += namesp->attr_namelen + 1;
2572 return 0;
2573}
2574
2575STATIC int
2576attr_system_list(
2577 bhv_vnode_t *vp,
2578 void *data,
2579 size_t size,
2580 ssize_t *result)
2581{
2582 attrnames_t *namesp;
2583 int i, error = 0;
2584
2585 for (i = 0; i < ATTR_SYSCOUNT; i++) {
2586 namesp = attr_system_names[i];
2587 if (!namesp->attr_exists || !namesp->attr_exists(vp))
2588 continue;
2589 error = attr_generic_listadd(&attr_system, namesp,
2590 data, size, result);
2591 if (error)
2592 break;
2593 }
2594 return error;
2595}
2596
2597int
2598attr_generic_list(
2599 bhv_vnode_t *vp, void *data, size_t size, int xflags, ssize_t *result)
2600{
2601 attrlist_cursor_kern_t cursor = { 0 };
2602 int error;
2603
2604 error = xfs_attr_list(xfs_vtoi(vp), data, size, xflags, &cursor);
2605 if (error > 0)
2606 return -error;
2607 *result = -error;
2608 return attr_system_list(vp, data, size, result);
2609}
2610
2611attrnames_t *
2612attr_lookup_namespace(
2613 char *name,
2614 struct attrnames **names,
2615 int nnames)
2616{
2617 int i;
2618
2619 for (i = 0; i < nnames; i++)
2620 if (!strncmp(name, names[i]->attr_name, names[i]->attr_namelen))
2621 return names[i];
2622 return NULL;
2623}
2624
2625/*
2626 * Some checks to prevent people abusing EAs to get over quota:
2627 * - Don't allow modifying user EAs on devices/symlinks;
2628 * - Don't allow modifying user EAs if sticky bit set;
2629 */
2630STATIC int
2631attr_user_capable(
2632 bhv_vnode_t *vp,
2633 cred_t *cred)
2634{
2635 struct inode *inode = vn_to_inode(vp);
2636
2637 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
2638 return -EPERM;
2639 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode) &&
2640 !capable(CAP_SYS_ADMIN))
2641 return -EPERM;
2642 if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) &&
2643 (current_fsuid(cred) != inode->i_uid) && !capable(CAP_FOWNER))
2644 return -EPERM;
2645 return 0;
2646}
2647
2648STATIC int
2649attr_trusted_capable(
2650 bhv_vnode_t *vp,
2651 cred_t *cred)
2652{
2653 struct inode *inode = vn_to_inode(vp);
2654
2655 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
2656 return -EPERM;
2657 if (!capable(CAP_SYS_ADMIN))
2658 return -EPERM;
2659 return 0;
2660}
2661
2662STATIC int
2663attr_system_set(
2664 bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2665{
2666 attrnames_t *namesp;
2667 int error;
2668
2669 if (xflags & ATTR_CREATE)
2670 return -EINVAL;
2671
2672 namesp = attr_lookup_namespace(name, attr_system_names, ATTR_SYSCOUNT);
2673 if (!namesp)
2674 return -EOPNOTSUPP;
2675 error = namesp->attr_set(vp, name, data, size, xflags);
2676 if (!error)
2677 error = vn_revalidate(vp);
2678 return error;
2679}
2680
2681STATIC int
2682attr_system_get(
2683 bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2684{
2685 attrnames_t *namesp;
2686
2687 namesp = attr_lookup_namespace(name, attr_system_names, ATTR_SYSCOUNT);
2688 if (!namesp)
2689 return -EOPNOTSUPP;
2690 return namesp->attr_get(vp, name, data, size, xflags);
2691}
2692
2693STATIC int
2694attr_system_remove(
2695 bhv_vnode_t *vp, char *name, int xflags)
2696{
2697 attrnames_t *namesp;
2698
2699 namesp = attr_lookup_namespace(name, attr_system_names, ATTR_SYSCOUNT);
2700 if (!namesp)
2701 return -EOPNOTSUPP;
2702 return namesp->attr_remove(vp, name, xflags);
2703}
2704
2705struct attrnames attr_system = {
2706 .attr_name = "system.",
2707 .attr_namelen = sizeof("system.") - 1,
2708 .attr_flag = ATTR_SYSTEM,
2709 .attr_get = attr_system_get,
2710 .attr_set = attr_system_set,
2711 .attr_remove = attr_system_remove,
2712 .attr_capable = (attrcapable_t)fs_noerr,
2713};
2714
2715struct attrnames attr_trusted = {
2716 .attr_name = "trusted.",
2717 .attr_namelen = sizeof("trusted.") - 1,
2718 .attr_flag = ATTR_ROOT,
2719 .attr_get = attr_generic_get,
2720 .attr_set = attr_generic_set,
2721 .attr_remove = attr_generic_remove,
2722 .attr_capable = attr_trusted_capable,
2723};
2724
2725struct attrnames attr_secure = {
2726 .attr_name = "security.",
2727 .attr_namelen = sizeof("security.") - 1,
2728 .attr_flag = ATTR_SECURE,
2729 .attr_get = attr_generic_get,
2730 .attr_set = attr_generic_set,
2731 .attr_remove = attr_generic_remove,
2732 .attr_capable = (attrcapable_t)fs_noerr,
2733};
2734
2735struct attrnames attr_user = {
2736 .attr_name = "user.",
2737 .attr_namelen = sizeof("user.") - 1,
2738 .attr_get = attr_generic_get,
2739 .attr_set = attr_generic_set,
2740 .attr_remove = attr_generic_remove,
2741 .attr_capable = attr_user_capable,
2742};
2743
2744struct attrnames *attr_namespaces[] =
2745 { &attr_system, &attr_trusted, &attr_secure, &attr_user };
diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h
index 6cfc9384fe35..fb3b2a68b9b9 100644
--- a/fs/xfs/xfs_attr.h
+++ b/fs/xfs/xfs_attr.h
@@ -18,9 +18,11 @@
18#ifndef __XFS_ATTR_H__ 18#ifndef __XFS_ATTR_H__
19#define __XFS_ATTR_H__ 19#define __XFS_ATTR_H__
20 20
21struct xfs_inode;
22struct xfs_da_args;
23struct xfs_attr_list_context;
24
21/* 25/*
22 * xfs_attr.h
23 *
24 * Large attribute lists are structured around Btrees where all the data 26 * Large attribute lists are structured around Btrees where all the data
25 * elements are in the leaf nodes. Attribute names are hashed into an int, 27 * elements are in the leaf nodes. Attribute names are hashed into an int,
26 * then that int is used as the index into the Btree. Since the hashval 28 * then that int is used as the index into the Btree. Since the hashval
@@ -35,35 +37,6 @@
35 * External interfaces 37 * External interfaces
36 *========================================================================*/ 38 *========================================================================*/
37 39
38struct cred;
39struct xfs_attr_list_context;
40
41typedef int (*attrset_t)(bhv_vnode_t *, char *, void *, size_t, int);
42typedef int (*attrget_t)(bhv_vnode_t *, char *, void *, size_t, int);
43typedef int (*attrremove_t)(bhv_vnode_t *, char *, int);
44typedef int (*attrexists_t)(bhv_vnode_t *);
45typedef int (*attrcapable_t)(bhv_vnode_t *, struct cred *);
46
47typedef struct attrnames {
48 char * attr_name;
49 unsigned int attr_namelen;
50 unsigned int attr_flag;
51 attrget_t attr_get;
52 attrset_t attr_set;
53 attrremove_t attr_remove;
54 attrexists_t attr_exists;
55 attrcapable_t attr_capable;
56} attrnames_t;
57
58#define ATTR_NAMECOUNT 4
59extern struct attrnames attr_user;
60extern struct attrnames attr_secure;
61extern struct attrnames attr_system;
62extern struct attrnames attr_trusted;
63extern struct attrnames *attr_namespaces[ATTR_NAMECOUNT];
64
65extern attrnames_t *attr_lookup_namespace(char *, attrnames_t **, int);
66extern int attr_generic_list(bhv_vnode_t *, void *, size_t, int, ssize_t *);
67 40
68#define ATTR_DONTFOLLOW 0x0001 /* -- unused, from IRIX -- */ 41#define ATTR_DONTFOLLOW 0x0001 /* -- unused, from IRIX -- */
69#define ATTR_ROOT 0x0002 /* use attrs in root (trusted) namespace */ 42#define ATTR_ROOT 0x0002 /* use attrs in root (trusted) namespace */
@@ -71,16 +44,9 @@ extern int attr_generic_list(bhv_vnode_t *, void *, size_t, int, ssize_t *);
71#define ATTR_SECURE 0x0008 /* use attrs in security namespace */ 44#define ATTR_SECURE 0x0008 /* use attrs in security namespace */
72#define ATTR_CREATE 0x0010 /* pure create: fail if attr already exists */ 45#define ATTR_CREATE 0x0010 /* pure create: fail if attr already exists */
73#define ATTR_REPLACE 0x0020 /* pure set: fail if attr does not exist */ 46#define ATTR_REPLACE 0x0020 /* pure set: fail if attr does not exist */
74#define ATTR_SYSTEM 0x0100 /* use attrs in system (pseudo) namespace */
75 47
76#define ATTR_KERNACCESS 0x0400 /* [kernel] iaccess, inode held io-locked */
77#define ATTR_KERNOTIME 0x1000 /* [kernel] don't update inode timestamps */ 48#define ATTR_KERNOTIME 0x1000 /* [kernel] don't update inode timestamps */
78#define ATTR_KERNOVAL 0x2000 /* [kernel] get attr size only, not value */ 49#define ATTR_KERNOVAL 0x2000 /* [kernel] get attr size only, not value */
79#define ATTR_KERNAMELS 0x4000 /* [kernel] list attr names (simple list) */
80
81#define ATTR_KERNORMALS 0x0800 /* [kernel] normal attr list: user+secure */
82#define ATTR_KERNROOTLS 0x8000 /* [kernel] include root in the attr list */
83#define ATTR_KERNFULLS (ATTR_KERNORMALS|ATTR_KERNROOTLS)
84 50
85/* 51/*
86 * The maximum size (into the kernel or returned from the kernel) of an 52 * The maximum size (into the kernel or returned from the kernel) of an
@@ -119,22 +85,6 @@ typedef struct attrlist_ent { /* data from attr_list() */
119 &((char *)buffer)[ ((attrlist_t *)(buffer))->al_offset[index] ]) 85 &((char *)buffer)[ ((attrlist_t *)(buffer))->al_offset[index] ])
120 86
121/* 87/*
122 * Multi-attribute operation vector.
123 */
124typedef struct attr_multiop {
125 int am_opcode; /* operation to perform (ATTR_OP_GET, etc.) */
126 int am_error; /* [out arg] result of this sub-op (an errno) */
127 char *am_attrname; /* attribute name to work with */
128 char *am_attrvalue; /* [in/out arg] attribute value (raw bytes) */
129 int am_length; /* [in/out arg] length of value */
130 int am_flags; /* bitwise OR of attr API flags defined above */
131} attr_multiop_t;
132
133#define ATTR_OP_GET 1 /* return the indicated attr's value */
134#define ATTR_OP_SET 2 /* set/create the indicated attr/value pair */
135#define ATTR_OP_REMOVE 3 /* remove the indicated attr */
136
137/*
138 * Kernel-internal version of the attrlist cursor. 88 * Kernel-internal version of the attrlist cursor.
139 */ 89 */
140typedef struct attrlist_cursor_kern { 90typedef struct attrlist_cursor_kern {
@@ -148,20 +98,41 @@ typedef struct attrlist_cursor_kern {
148 98
149 99
150/*======================================================================== 100/*========================================================================
151 * Function prototypes for the kernel. 101 * Structure used to pass context around among the routines.
152 *========================================================================*/ 102 *========================================================================*/
153 103
154struct xfs_inode; 104
155struct attrlist_cursor_kern; 105typedef int (*put_listent_func_t)(struct xfs_attr_list_context *, int,
156struct xfs_da_args; 106 char *, int, int, char *);
107
108typedef struct xfs_attr_list_context {
109 struct xfs_inode *dp; /* inode */
110 struct attrlist_cursor_kern *cursor; /* position in list */
111 char *alist; /* output buffer */
112 int seen_enough; /* T/F: seen enough of list? */
113 ssize_t count; /* num used entries */
114 int dupcnt; /* count dup hashvals seen */
115 int bufsize; /* total buffer size */
116 int firstu; /* first used byte in buffer */
117 int flags; /* from VOP call */
118 int resynch; /* T/F: resynch with cursor */
119 int put_value; /* T/F: need value for listent */
120 put_listent_func_t put_listent; /* list output fmt function */
121 int index; /* index into output buffer */
122} xfs_attr_list_context_t;
123
124
125/*========================================================================
126 * Function prototypes for the kernel.
127 *========================================================================*/
157 128
158/* 129/*
159 * Overall external interface routines. 130 * Overall external interface routines.
160 */ 131 */
132int xfs_attr_calc_size(struct xfs_inode *, int, int, int *);
161int xfs_attr_inactive(struct xfs_inode *dp); 133int xfs_attr_inactive(struct xfs_inode *dp);
162
163int xfs_attr_shortform_getvalue(struct xfs_da_args *);
164int xfs_attr_fetch(struct xfs_inode *, struct xfs_name *, char *, int *, int); 134int xfs_attr_fetch(struct xfs_inode *, struct xfs_name *, char *, int *, int);
165int xfs_attr_rmtval_get(struct xfs_da_args *args); 135int xfs_attr_rmtval_get(struct xfs_da_args *args);
136int xfs_attr_list_int(struct xfs_attr_list_context *);
166 137
167#endif /* __XFS_ATTR_H__ */ 138#endif /* __XFS_ATTR_H__ */
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 303d41e4217b..79da6b2ea99e 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -94,13 +94,6 @@ STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index);
94 * Namespace helper routines 94 * Namespace helper routines
95 *========================================================================*/ 95 *========================================================================*/
96 96
97STATIC_INLINE attrnames_t *
98xfs_attr_flags_namesp(int flags)
99{
100 return ((flags & XFS_ATTR_SECURE) ? &attr_secure:
101 ((flags & XFS_ATTR_ROOT) ? &attr_trusted : &attr_user));
102}
103
104/* 97/*
105 * If namespace bits don't match return 0. 98 * If namespace bits don't match return 0.
106 * If all match then return 1. 99 * If all match then return 1.
@@ -111,25 +104,6 @@ xfs_attr_namesp_match(int arg_flags, int ondisk_flags)
111 return XFS_ATTR_NSP_ONDISK(ondisk_flags) == XFS_ATTR_NSP_ARGS_TO_ONDISK(arg_flags); 104 return XFS_ATTR_NSP_ONDISK(ondisk_flags) == XFS_ATTR_NSP_ARGS_TO_ONDISK(arg_flags);
112} 105}
113 106
114/*
115 * If namespace bits don't match and we don't have an override for it
116 * then return 0.
117 * If all match or are overridable then return 1.
118 */
119STATIC_INLINE int
120xfs_attr_namesp_match_overrides(int arg_flags, int ondisk_flags)
121{
122 if (((arg_flags & ATTR_SECURE) == 0) !=
123 ((ondisk_flags & XFS_ATTR_SECURE) == 0) &&
124 !(arg_flags & ATTR_KERNORMALS))
125 return 0;
126 if (((arg_flags & ATTR_ROOT) == 0) !=
127 ((ondisk_flags & XFS_ATTR_ROOT) == 0) &&
128 !(arg_flags & ATTR_KERNROOTLS))
129 return 0;
130 return 1;
131}
132
133 107
134/*======================================================================== 108/*========================================================================
135 * External routines when attribute fork size < XFS_LITINO(mp). 109 * External routines when attribute fork size < XFS_LITINO(mp).
@@ -369,9 +343,10 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
369 * Fix up the start offset of the attribute fork 343 * Fix up the start offset of the attribute fork
370 */ 344 */
371 totsize -= size; 345 totsize -= size;
372 if (totsize == sizeof(xfs_attr_sf_hdr_t) && !args->addname && 346 if (totsize == sizeof(xfs_attr_sf_hdr_t) &&
373 (mp->m_flags & XFS_MOUNT_ATTR2) && 347 !(args->op_flags & XFS_DA_OP_ADDNAME) &&
374 (dp->i_d.di_format != XFS_DINODE_FMT_BTREE)) { 348 (mp->m_flags & XFS_MOUNT_ATTR2) &&
349 (dp->i_d.di_format != XFS_DINODE_FMT_BTREE)) {
375 /* 350 /*
376 * Last attribute now removed, revert to original 351 * Last attribute now removed, revert to original
377 * inode format making all literal area available 352 * inode format making all literal area available
@@ -389,9 +364,10 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
389 xfs_idata_realloc(dp, -size, XFS_ATTR_FORK); 364 xfs_idata_realloc(dp, -size, XFS_ATTR_FORK);
390 dp->i_d.di_forkoff = xfs_attr_shortform_bytesfit(dp, totsize); 365 dp->i_d.di_forkoff = xfs_attr_shortform_bytesfit(dp, totsize);
391 ASSERT(dp->i_d.di_forkoff); 366 ASSERT(dp->i_d.di_forkoff);
392 ASSERT(totsize > sizeof(xfs_attr_sf_hdr_t) || args->addname || 367 ASSERT(totsize > sizeof(xfs_attr_sf_hdr_t) ||
393 !(mp->m_flags & XFS_MOUNT_ATTR2) || 368 (args->op_flags & XFS_DA_OP_ADDNAME) ||
394 dp->i_d.di_format == XFS_DINODE_FMT_BTREE); 369 !(mp->m_flags & XFS_MOUNT_ATTR2) ||
370 dp->i_d.di_format == XFS_DINODE_FMT_BTREE);
395 dp->i_afp->if_ext_max = 371 dp->i_afp->if_ext_max =
396 XFS_IFORK_ASIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t); 372 XFS_IFORK_ASIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t);
397 dp->i_df.if_ext_max = 373 dp->i_df.if_ext_max =
@@ -531,7 +507,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
531 nargs.total = args->total; 507 nargs.total = args->total;
532 nargs.whichfork = XFS_ATTR_FORK; 508 nargs.whichfork = XFS_ATTR_FORK;
533 nargs.trans = args->trans; 509 nargs.trans = args->trans;
534 nargs.oknoent = 1; 510 nargs.op_flags = XFS_DA_OP_OKNOENT;
535 511
536 sfe = &sf->list[0]; 512 sfe = &sf->list[0];
537 for (i = 0; i < sf->hdr.count; i++) { 513 for (i = 0; i < sf->hdr.count; i++) {
@@ -555,7 +531,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
555out: 531out:
556 if(bp) 532 if(bp)
557 xfs_da_buf_done(bp); 533 xfs_da_buf_done(bp);
558 kmem_free(tmpbuffer, size); 534 kmem_free(tmpbuffer);
559 return(error); 535 return(error);
560} 536}
561 537
@@ -624,15 +600,8 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
624 (XFS_ISRESET_CURSOR(cursor) && 600 (XFS_ISRESET_CURSOR(cursor) &&
625 (dp->i_afp->if_bytes + sf->hdr.count * 16) < context->bufsize)) { 601 (dp->i_afp->if_bytes + sf->hdr.count * 16) < context->bufsize)) {
626 for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) { 602 for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) {
627 attrnames_t *namesp;
628
629 if (!xfs_attr_namesp_match_overrides(context->flags, sfe->flags)) {
630 sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
631 continue;
632 }
633 namesp = xfs_attr_flags_namesp(sfe->flags);
634 error = context->put_listent(context, 603 error = context->put_listent(context,
635 namesp, 604 sfe->flags,
636 (char *)sfe->nameval, 605 (char *)sfe->nameval,
637 (int)sfe->namelen, 606 (int)sfe->namelen,
638 (int)sfe->valuelen, 607 (int)sfe->valuelen,
@@ -676,13 +645,10 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
676 XFS_ERRLEVEL_LOW, 645 XFS_ERRLEVEL_LOW,
677 context->dp->i_mount, sfe); 646 context->dp->i_mount, sfe);
678 xfs_attr_trace_l_c("sf corrupted", context); 647 xfs_attr_trace_l_c("sf corrupted", context);
679 kmem_free(sbuf, sbsize); 648 kmem_free(sbuf);
680 return XFS_ERROR(EFSCORRUPTED); 649 return XFS_ERROR(EFSCORRUPTED);
681 } 650 }
682 if (!xfs_attr_namesp_match_overrides(context->flags, sfe->flags)) { 651
683 sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
684 continue;
685 }
686 sbp->entno = i; 652 sbp->entno = i;
687 sbp->hash = xfs_da_hashname((char *)sfe->nameval, sfe->namelen); 653 sbp->hash = xfs_da_hashname((char *)sfe->nameval, sfe->namelen);
688 sbp->name = (char *)sfe->nameval; 654 sbp->name = (char *)sfe->nameval;
@@ -717,7 +683,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
717 } 683 }
718 } 684 }
719 if (i == nsbuf) { 685 if (i == nsbuf) {
720 kmem_free(sbuf, sbsize); 686 kmem_free(sbuf);
721 xfs_attr_trace_l_c("blk end", context); 687 xfs_attr_trace_l_c("blk end", context);
722 return(0); 688 return(0);
723 } 689 }
@@ -726,16 +692,12 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
726 * Loop putting entries into the user buffer. 692 * Loop putting entries into the user buffer.
727 */ 693 */
728 for ( ; i < nsbuf; i++, sbp++) { 694 for ( ; i < nsbuf; i++, sbp++) {
729 attrnames_t *namesp;
730
731 namesp = xfs_attr_flags_namesp(sbp->flags);
732
733 if (cursor->hashval != sbp->hash) { 695 if (cursor->hashval != sbp->hash) {
734 cursor->hashval = sbp->hash; 696 cursor->hashval = sbp->hash;
735 cursor->offset = 0; 697 cursor->offset = 0;
736 } 698 }
737 error = context->put_listent(context, 699 error = context->put_listent(context,
738 namesp, 700 sbp->flags,
739 sbp->name, 701 sbp->name,
740 sbp->namelen, 702 sbp->namelen,
741 sbp->valuelen, 703 sbp->valuelen,
@@ -747,7 +709,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
747 cursor->offset++; 709 cursor->offset++;
748 } 710 }
749 711
750 kmem_free(sbuf, sbsize); 712 kmem_free(sbuf);
751 xfs_attr_trace_l_c("sf E-O-F", context); 713 xfs_attr_trace_l_c("sf E-O-F", context);
752 return(0); 714 return(0);
753} 715}
@@ -853,7 +815,7 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff)
853 nargs.total = args->total; 815 nargs.total = args->total;
854 nargs.whichfork = XFS_ATTR_FORK; 816 nargs.whichfork = XFS_ATTR_FORK;
855 nargs.trans = args->trans; 817 nargs.trans = args->trans;
856 nargs.oknoent = 1; 818 nargs.op_flags = XFS_DA_OP_OKNOENT;
857 entry = &leaf->entries[0]; 819 entry = &leaf->entries[0];
858 for (i = 0; i < be16_to_cpu(leaf->hdr.count); entry++, i++) { 820 for (i = 0; i < be16_to_cpu(leaf->hdr.count); entry++, i++) {
859 if (entry->flags & XFS_ATTR_INCOMPLETE) 821 if (entry->flags & XFS_ATTR_INCOMPLETE)
@@ -873,7 +835,7 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff)
873 error = 0; 835 error = 0;
874 836
875out: 837out:
876 kmem_free(tmpbuffer, XFS_LBSIZE(dp->i_mount)); 838 kmem_free(tmpbuffer);
877 return(error); 839 return(error);
878} 840}
879 841
@@ -1155,7 +1117,7 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
1155 entry->hashval = cpu_to_be32(args->hashval); 1117 entry->hashval = cpu_to_be32(args->hashval);
1156 entry->flags = tmp ? XFS_ATTR_LOCAL : 0; 1118 entry->flags = tmp ? XFS_ATTR_LOCAL : 0;
1157 entry->flags |= XFS_ATTR_NSP_ARGS_TO_ONDISK(args->flags); 1119 entry->flags |= XFS_ATTR_NSP_ARGS_TO_ONDISK(args->flags);
1158 if (args->rename) { 1120 if (args->op_flags & XFS_DA_OP_RENAME) {
1159 entry->flags |= XFS_ATTR_INCOMPLETE; 1121 entry->flags |= XFS_ATTR_INCOMPLETE;
1160 if ((args->blkno2 == args->blkno) && 1122 if ((args->blkno2 == args->blkno) &&
1161 (args->index2 <= args->index)) { 1123 (args->index2 <= args->index)) {
@@ -1271,7 +1233,7 @@ xfs_attr_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *bp)
1271 be16_to_cpu(hdr_s->count), mp); 1233 be16_to_cpu(hdr_s->count), mp);
1272 xfs_da_log_buf(trans, bp, 0, XFS_LBSIZE(mp) - 1); 1234 xfs_da_log_buf(trans, bp, 0, XFS_LBSIZE(mp) - 1);
1273 1235
1274 kmem_free(tmpbuffer, XFS_LBSIZE(mp)); 1236 kmem_free(tmpbuffer);
1275} 1237}
1276 1238
1277/* 1239/*
@@ -1921,7 +1883,7 @@ xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
1921 be16_to_cpu(drop_hdr->count), mp); 1883 be16_to_cpu(drop_hdr->count), mp);
1922 } 1884 }
1923 memcpy((char *)save_leaf, (char *)tmp_leaf, state->blocksize); 1885 memcpy((char *)save_leaf, (char *)tmp_leaf, state->blocksize);
1924 kmem_free(tmpbuffer, state->blocksize); 1886 kmem_free(tmpbuffer);
1925 } 1887 }
1926 1888
1927 xfs_da_log_buf(state->args->trans, save_blk->bp, 0, 1889 xfs_da_log_buf(state->args->trans, save_blk->bp, 0,
@@ -2400,8 +2362,6 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
2400 */ 2362 */
2401 retval = 0; 2363 retval = 0;
2402 for ( ; (i < be16_to_cpu(leaf->hdr.count)); entry++, i++) { 2364 for ( ; (i < be16_to_cpu(leaf->hdr.count)); entry++, i++) {
2403 attrnames_t *namesp;
2404
2405 if (be32_to_cpu(entry->hashval) != cursor->hashval) { 2365 if (be32_to_cpu(entry->hashval) != cursor->hashval) {
2406 cursor->hashval = be32_to_cpu(entry->hashval); 2366 cursor->hashval = be32_to_cpu(entry->hashval);
2407 cursor->offset = 0; 2367 cursor->offset = 0;
@@ -2409,17 +2369,13 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
2409 2369
2410 if (entry->flags & XFS_ATTR_INCOMPLETE) 2370 if (entry->flags & XFS_ATTR_INCOMPLETE)
2411 continue; /* skip incomplete entries */ 2371 continue; /* skip incomplete entries */
2412 if (!xfs_attr_namesp_match_overrides(context->flags, entry->flags))
2413 continue;
2414
2415 namesp = xfs_attr_flags_namesp(entry->flags);
2416 2372
2417 if (entry->flags & XFS_ATTR_LOCAL) { 2373 if (entry->flags & XFS_ATTR_LOCAL) {
2418 xfs_attr_leaf_name_local_t *name_loc = 2374 xfs_attr_leaf_name_local_t *name_loc =
2419 XFS_ATTR_LEAF_NAME_LOCAL(leaf, i); 2375 XFS_ATTR_LEAF_NAME_LOCAL(leaf, i);
2420 2376
2421 retval = context->put_listent(context, 2377 retval = context->put_listent(context,
2422 namesp, 2378 entry->flags,
2423 (char *)name_loc->nameval, 2379 (char *)name_loc->nameval,
2424 (int)name_loc->namelen, 2380 (int)name_loc->namelen,
2425 be16_to_cpu(name_loc->valuelen), 2381 be16_to_cpu(name_loc->valuelen),
@@ -2446,16 +2402,15 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
2446 if (retval) 2402 if (retval)
2447 return retval; 2403 return retval;
2448 retval = context->put_listent(context, 2404 retval = context->put_listent(context,
2449 namesp, 2405 entry->flags,
2450 (char *)name_rmt->name, 2406 (char *)name_rmt->name,
2451 (int)name_rmt->namelen, 2407 (int)name_rmt->namelen,
2452 valuelen, 2408 valuelen,
2453 (char*)args.value); 2409 (char*)args.value);
2454 kmem_free(args.value, valuelen); 2410 kmem_free(args.value);
2455 } 2411 } else {
2456 else {
2457 retval = context->put_listent(context, 2412 retval = context->put_listent(context,
2458 namesp, 2413 entry->flags,
2459 (char *)name_rmt->name, 2414 (char *)name_rmt->name,
2460 (int)name_rmt->namelen, 2415 (int)name_rmt->namelen,
2461 valuelen, 2416 valuelen,
@@ -2543,9 +2498,7 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args)
2543 /* 2498 /*
2544 * Commit the flag value change and start the next trans in series. 2499 * Commit the flag value change and start the next trans in series.
2545 */ 2500 */
2546 error = xfs_attr_rolltrans(&args->trans, args->dp); 2501 return xfs_trans_roll(&args->trans, args->dp);
2547
2548 return(error);
2549} 2502}
2550 2503
2551/* 2504/*
@@ -2592,9 +2545,7 @@ xfs_attr_leaf_setflag(xfs_da_args_t *args)
2592 /* 2545 /*
2593 * Commit the flag value change and start the next trans in series. 2546 * Commit the flag value change and start the next trans in series.
2594 */ 2547 */
2595 error = xfs_attr_rolltrans(&args->trans, args->dp); 2548 return xfs_trans_roll(&args->trans, args->dp);
2596
2597 return(error);
2598} 2549}
2599 2550
2600/* 2551/*
@@ -2710,7 +2661,7 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args)
2710 /* 2661 /*
2711 * Commit the flag value change and start the next trans in series. 2662 * Commit the flag value change and start the next trans in series.
2712 */ 2663 */
2713 error = xfs_attr_rolltrans(&args->trans, args->dp); 2664 error = xfs_trans_roll(&args->trans, args->dp);
2714 2665
2715 return(error); 2666 return(error);
2716} 2667}
@@ -2768,7 +2719,7 @@ xfs_attr_root_inactive(xfs_trans_t **trans, xfs_inode_t *dp)
2768 /* 2719 /*
2769 * Commit the invalidate and start the next transaction. 2720 * Commit the invalidate and start the next transaction.
2770 */ 2721 */
2771 error = xfs_attr_rolltrans(trans, dp); 2722 error = xfs_trans_roll(trans, dp);
2772 2723
2773 return (error); 2724 return (error);
2774} 2725}
@@ -2870,7 +2821,8 @@ xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp,
2870 /* 2821 /*
2871 * Atomically commit the whole invalidate stuff. 2822 * Atomically commit the whole invalidate stuff.
2872 */ 2823 */
2873 if ((error = xfs_attr_rolltrans(trans, dp))) 2824 error = xfs_trans_roll(trans, dp);
2825 if (error)
2874 return (error); 2826 return (error);
2875 } 2827 }
2876 2828
@@ -2954,7 +2906,7 @@ xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp)
2954 error = tmp; /* save only the 1st errno */ 2906 error = tmp; /* save only the 1st errno */
2955 } 2907 }
2956 2908
2957 kmem_free((xfs_caddr_t)list, size); 2909 kmem_free((xfs_caddr_t)list);
2958 return(error); 2910 return(error);
2959} 2911}
2960 2912
@@ -3009,7 +2961,8 @@ xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp,
3009 /* 2961 /*
3010 * Roll to next transaction. 2962 * Roll to next transaction.
3011 */ 2963 */
3012 if ((error = xfs_attr_rolltrans(trans, dp))) 2964 error = xfs_trans_roll(trans, dp);
2965 if (error)
3013 return (error); 2966 return (error);
3014 } 2967 }
3015 2968
@@ -3019,60 +2972,3 @@ xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp,
3019 2972
3020 return(0); 2973 return(0);
3021} 2974}
3022
3023
3024/*
3025 * Roll from one trans in the sequence of PERMANENT transactions to the next.
3026 */
3027int
3028xfs_attr_rolltrans(xfs_trans_t **transp, xfs_inode_t *dp)
3029{
3030 xfs_trans_t *trans;
3031 unsigned int logres, count;
3032 int error;
3033
3034 /*
3035 * Ensure that the inode is always logged.
3036 */
3037 trans = *transp;
3038 xfs_trans_log_inode(trans, dp, XFS_ILOG_CORE);
3039
3040 /*
3041 * Copy the critical parameters from one trans to the next.
3042 */
3043 logres = trans->t_log_res;
3044 count = trans->t_log_count;
3045 *transp = xfs_trans_dup(trans);
3046
3047 /*
3048 * Commit the current transaction.
3049 * If this commit failed, then it'd just unlock those items that
3050 * are not marked ihold. That also means that a filesystem shutdown
3051 * is in progress. The caller takes the responsibility to cancel
3052 * the duplicate transaction that gets returned.
3053 */
3054 if ((error = xfs_trans_commit(trans, 0)))
3055 return (error);
3056
3057 trans = *transp;
3058
3059 /*
3060 * Reserve space in the log for th next transaction.
3061 * This also pushes items in the "AIL", the list of logged items,
3062 * out to disk if they are taking up space at the tail of the log
3063 * that we want to use. This requires that either nothing be locked
3064 * across this call, or that anything that is locked be logged in
3065 * the prior and the next transactions.
3066 */
3067 error = xfs_trans_reserve(trans, 0, logres, 0,
3068 XFS_TRANS_PERM_LOG_RES, count);
3069 /*
3070 * Ensure that the inode is in the new transaction and locked.
3071 */
3072 if (!error) {
3073 xfs_trans_ijoin(trans, dp, XFS_ILOCK_EXCL);
3074 xfs_trans_ihold(trans, dp);
3075 }
3076 return (error);
3077
3078}
diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/xfs_attr_leaf.h
index 040f732ce1e2..83e9af417ca2 100644
--- a/fs/xfs/xfs_attr_leaf.h
+++ b/fs/xfs/xfs_attr_leaf.h
@@ -30,7 +30,7 @@
30 30
31struct attrlist; 31struct attrlist;
32struct attrlist_cursor_kern; 32struct attrlist_cursor_kern;
33struct attrnames; 33struct xfs_attr_list_context;
34struct xfs_dabuf; 34struct xfs_dabuf;
35struct xfs_da_args; 35struct xfs_da_args;
36struct xfs_da_state; 36struct xfs_da_state;
@@ -204,33 +204,6 @@ static inline int xfs_attr_leaf_entsize_local_max(int bsize)
204 return (((bsize) >> 1) + ((bsize) >> 2)); 204 return (((bsize) >> 1) + ((bsize) >> 2));
205} 205}
206 206
207
208/*========================================================================
209 * Structure used to pass context around among the routines.
210 *========================================================================*/
211
212
213struct xfs_attr_list_context;
214
215typedef int (*put_listent_func_t)(struct xfs_attr_list_context *, struct attrnames *,
216 char *, int, int, char *);
217
218typedef struct xfs_attr_list_context {
219 struct xfs_inode *dp; /* inode */
220 struct attrlist_cursor_kern *cursor; /* position in list */
221 struct attrlist *alist; /* output buffer */
222 int seen_enough; /* T/F: seen enough of list? */
223 int count; /* num used entries */
224 int dupcnt; /* count dup hashvals seen */
225 int bufsize; /* total buffer size */
226 int firstu; /* first used byte in buffer */
227 int flags; /* from VOP call */
228 int resynch; /* T/F: resynch with cursor */
229 int put_value; /* T/F: need value for listent */
230 put_listent_func_t put_listent; /* list output fmt function */
231 int index; /* index into output buffer */
232} xfs_attr_list_context_t;
233
234/* 207/*
235 * Used to keep a list of "remote value" extents when unlinking an inode. 208 * Used to keep a list of "remote value" extents when unlinking an inode.
236 */ 209 */
@@ -301,6 +274,4 @@ int xfs_attr_leaf_order(struct xfs_dabuf *leaf1_bp,
301 struct xfs_dabuf *leaf2_bp); 274 struct xfs_dabuf *leaf2_bp);
302int xfs_attr_leaf_newentsize(int namelen, int valuelen, int blocksize, 275int xfs_attr_leaf_newentsize(int namelen, int valuelen, int blocksize,
303 int *local); 276 int *local);
304int xfs_attr_rolltrans(struct xfs_trans **transp, struct xfs_inode *dp);
305
306#endif /* __XFS_ATTR_LEAF_H__ */ 277#endif /* __XFS_ATTR_LEAF_H__ */
diff --git a/fs/xfs/xfs_attr_sf.h b/fs/xfs/xfs_attr_sf.h
index f67f917803b1..ea22839caed2 100644
--- a/fs/xfs/xfs_attr_sf.h
+++ b/fs/xfs/xfs_attr_sf.h
@@ -97,13 +97,9 @@ void xfs_attr_trace_l_cb(char *where, struct xfs_attr_list_context *context,
97void xfs_attr_trace_l_cl(char *where, struct xfs_attr_list_context *context, 97void xfs_attr_trace_l_cl(char *where, struct xfs_attr_list_context *context,
98 struct xfs_attr_leafblock *leaf); 98 struct xfs_attr_leafblock *leaf);
99void xfs_attr_trace_enter(int type, char *where, 99void xfs_attr_trace_enter(int type, char *where,
100 __psunsigned_t a2, __psunsigned_t a3, 100 struct xfs_attr_list_context *context,
101 __psunsigned_t a4, __psunsigned_t a5, 101 __psunsigned_t a13, __psunsigned_t a14,
102 __psunsigned_t a6, __psunsigned_t a7, 102 __psunsigned_t a15);
103 __psunsigned_t a8, __psunsigned_t a9,
104 __psunsigned_t a10, __psunsigned_t a11,
105 __psunsigned_t a12, __psunsigned_t a13,
106 __psunsigned_t a14, __psunsigned_t a15);
107#else 103#else
108#define xfs_attr_trace_l_c(w,c) 104#define xfs_attr_trace_l_c(w,c)
109#define xfs_attr_trace_l_cn(w,c,n) 105#define xfs_attr_trace_l_cn(w,c,n)
diff --git a/fs/xfs/xfs_bit.c b/fs/xfs/xfs_bit.c
index fab0b6d5a41b..48228848f5ae 100644
--- a/fs/xfs/xfs_bit.c
+++ b/fs/xfs/xfs_bit.c
@@ -25,109 +25,6 @@
25 * XFS bit manipulation routines, used in non-realtime code. 25 * XFS bit manipulation routines, used in non-realtime code.
26 */ 26 */
27 27
28#ifndef HAVE_ARCH_HIGHBIT
29/*
30 * Index of high bit number in byte, -1 for none set, 0..7 otherwise.
31 */
32static const char xfs_highbit[256] = {
33 -1, 0, 1, 1, 2, 2, 2, 2, /* 00 .. 07 */
34 3, 3, 3, 3, 3, 3, 3, 3, /* 08 .. 0f */
35 4, 4, 4, 4, 4, 4, 4, 4, /* 10 .. 17 */
36 4, 4, 4, 4, 4, 4, 4, 4, /* 18 .. 1f */
37 5, 5, 5, 5, 5, 5, 5, 5, /* 20 .. 27 */
38 5, 5, 5, 5, 5, 5, 5, 5, /* 28 .. 2f */
39 5, 5, 5, 5, 5, 5, 5, 5, /* 30 .. 37 */
40 5, 5, 5, 5, 5, 5, 5, 5, /* 38 .. 3f */
41 6, 6, 6, 6, 6, 6, 6, 6, /* 40 .. 47 */
42 6, 6, 6, 6, 6, 6, 6, 6, /* 48 .. 4f */
43 6, 6, 6, 6, 6, 6, 6, 6, /* 50 .. 57 */
44 6, 6, 6, 6, 6, 6, 6, 6, /* 58 .. 5f */
45 6, 6, 6, 6, 6, 6, 6, 6, /* 60 .. 67 */
46 6, 6, 6, 6, 6, 6, 6, 6, /* 68 .. 6f */
47 6, 6, 6, 6, 6, 6, 6, 6, /* 70 .. 77 */
48 6, 6, 6, 6, 6, 6, 6, 6, /* 78 .. 7f */
49 7, 7, 7, 7, 7, 7, 7, 7, /* 80 .. 87 */
50 7, 7, 7, 7, 7, 7, 7, 7, /* 88 .. 8f */
51 7, 7, 7, 7, 7, 7, 7, 7, /* 90 .. 97 */
52 7, 7, 7, 7, 7, 7, 7, 7, /* 98 .. 9f */
53 7, 7, 7, 7, 7, 7, 7, 7, /* a0 .. a7 */
54 7, 7, 7, 7, 7, 7, 7, 7, /* a8 .. af */
55 7, 7, 7, 7, 7, 7, 7, 7, /* b0 .. b7 */
56 7, 7, 7, 7, 7, 7, 7, 7, /* b8 .. bf */
57 7, 7, 7, 7, 7, 7, 7, 7, /* c0 .. c7 */
58 7, 7, 7, 7, 7, 7, 7, 7, /* c8 .. cf */
59 7, 7, 7, 7, 7, 7, 7, 7, /* d0 .. d7 */
60 7, 7, 7, 7, 7, 7, 7, 7, /* d8 .. df */
61 7, 7, 7, 7, 7, 7, 7, 7, /* e0 .. e7 */
62 7, 7, 7, 7, 7, 7, 7, 7, /* e8 .. ef */
63 7, 7, 7, 7, 7, 7, 7, 7, /* f0 .. f7 */
64 7, 7, 7, 7, 7, 7, 7, 7, /* f8 .. ff */
65};
66#endif
67
68/*
69 * xfs_highbit32: get high bit set out of 32-bit argument, -1 if none set.
70 */
71inline int
72xfs_highbit32(
73 __uint32_t v)
74{
75#ifdef HAVE_ARCH_HIGHBIT
76 return highbit32(v);
77#else
78 int i;
79
80 if (v & 0xffff0000)
81 if (v & 0xff000000)
82 i = 24;
83 else
84 i = 16;
85 else if (v & 0x0000ffff)
86 if (v & 0x0000ff00)
87 i = 8;
88 else
89 i = 0;
90 else
91 return -1;
92 return i + xfs_highbit[(v >> i) & 0xff];
93#endif
94}
95
96/*
97 * xfs_lowbit64: get low bit set out of 64-bit argument, -1 if none set.
98 */
99int
100xfs_lowbit64(
101 __uint64_t v)
102{
103 __uint32_t w = (__uint32_t)v;
104 int n = 0;
105
106 if (w) { /* lower bits */
107 n = ffs(w);
108 } else { /* upper bits */
109 w = (__uint32_t)(v >> 32);
110 if (w && (n = ffs(w)))
111 n += 32;
112 }
113 return n - 1;
114}
115
116/*
117 * xfs_highbit64: get high bit set out of 64-bit argument, -1 if none set.
118 */
119int
120xfs_highbit64(
121 __uint64_t v)
122{
123 __uint32_t h = (__uint32_t)(v >> 32);
124
125 if (h)
126 return xfs_highbit32(h) + 32;
127 return xfs_highbit32((__uint32_t)v);
128}
129
130
131/* 28/*
132 * Return whether bitmap is empty. 29 * Return whether bitmap is empty.
133 * Size is number of words in the bitmap, which is padded to word boundary 30 * Size is number of words in the bitmap, which is padded to word boundary
diff --git a/fs/xfs/xfs_bit.h b/fs/xfs/xfs_bit.h
index 082641a9782c..8e0e463dae2d 100644
--- a/fs/xfs/xfs_bit.h
+++ b/fs/xfs/xfs_bit.h
@@ -47,13 +47,39 @@ static inline __uint64_t xfs_mask64lo(int n)
47} 47}
48 48
49/* Get high bit set out of 32-bit argument, -1 if none set */ 49/* Get high bit set out of 32-bit argument, -1 if none set */
50extern int xfs_highbit32(__uint32_t v); 50static inline int xfs_highbit32(__uint32_t v)
51{
52 return fls(v) - 1;
53}
54
55/* Get high bit set out of 64-bit argument, -1 if none set */
56static inline int xfs_highbit64(__uint64_t v)
57{
58 return fls64(v) - 1;
59}
60
61/* Get low bit set out of 32-bit argument, -1 if none set */
62static inline int xfs_lowbit32(__uint32_t v)
63{
64 unsigned long t = v;
65 return (v) ? find_first_bit(&t, 32) : -1;
66}
51 67
52/* Get low bit set out of 64-bit argument, -1 if none set */ 68/* Get low bit set out of 64-bit argument, -1 if none set */
53extern int xfs_lowbit64(__uint64_t v); 69static inline int xfs_lowbit64(__uint64_t v)
70{
71 __uint32_t w = (__uint32_t)v;
72 int n = 0;
54 73
55/* Get high bit set out of 64-bit argument, -1 if none set */ 74 if (w) { /* lower bits */
56extern int xfs_highbit64(__uint64_t); 75 n = ffs(w);
76 } else { /* upper bits */
77 w = (__uint32_t)(v >> 32);
78 if (w && (n = ffs(w)))
79 n += 32;
80 }
81 return n - 1;
82}
57 83
58/* Return whether bitmap is empty (1 == empty) */ 84/* Return whether bitmap is empty (1 == empty) */
59extern int xfs_bitmap_empty(uint *map, uint size); 85extern int xfs_bitmap_empty(uint *map, uint size);
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 53c259f5a5af..a1aab9275d5a 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -384,14 +384,14 @@ xfs_bmap_count_tree(
384 int levelin, 384 int levelin,
385 int *count); 385 int *count);
386 386
387STATIC int 387STATIC void
388xfs_bmap_count_leaves( 388xfs_bmap_count_leaves(
389 xfs_ifork_t *ifp, 389 xfs_ifork_t *ifp,
390 xfs_extnum_t idx, 390 xfs_extnum_t idx,
391 int numrecs, 391 int numrecs,
392 int *count); 392 int *count);
393 393
394STATIC int 394STATIC void
395xfs_bmap_disk_count_leaves( 395xfs_bmap_disk_count_leaves(
396 xfs_extnum_t idx, 396 xfs_extnum_t idx,
397 xfs_bmbt_block_t *block, 397 xfs_bmbt_block_t *block,
@@ -428,7 +428,8 @@ xfs_bmap_add_attrfork_btree(
428 cur->bc_private.b.firstblock = *firstblock; 428 cur->bc_private.b.firstblock = *firstblock;
429 if ((error = xfs_bmbt_lookup_ge(cur, 0, 0, 0, &stat))) 429 if ((error = xfs_bmbt_lookup_ge(cur, 0, 0, 0, &stat)))
430 goto error0; 430 goto error0;
431 ASSERT(stat == 1); /* must be at least one entry */ 431 /* must be at least one entry */
432 XFS_WANT_CORRUPTED_GOTO(stat == 1, error0);
432 if ((error = xfs_bmbt_newroot(cur, flags, &stat))) 433 if ((error = xfs_bmbt_newroot(cur, flags, &stat)))
433 goto error0; 434 goto error0;
434 if (stat == 0) { 435 if (stat == 0) {
@@ -816,13 +817,13 @@ xfs_bmap_add_extent_delay_real(
816 RIGHT.br_startblock, 817 RIGHT.br_startblock,
817 RIGHT.br_blockcount, &i))) 818 RIGHT.br_blockcount, &i)))
818 goto done; 819 goto done;
819 ASSERT(i == 1); 820 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
820 if ((error = xfs_bmbt_delete(cur, &i))) 821 if ((error = xfs_bmbt_delete(cur, &i)))
821 goto done; 822 goto done;
822 ASSERT(i == 1); 823 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
823 if ((error = xfs_bmbt_decrement(cur, 0, &i))) 824 if ((error = xfs_bmbt_decrement(cur, 0, &i)))
824 goto done; 825 goto done;
825 ASSERT(i == 1); 826 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
826 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, 827 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
827 LEFT.br_startblock, 828 LEFT.br_startblock,
828 LEFT.br_blockcount + 829 LEFT.br_blockcount +
@@ -860,7 +861,7 @@ xfs_bmap_add_extent_delay_real(
860 LEFT.br_startblock, LEFT.br_blockcount, 861 LEFT.br_startblock, LEFT.br_blockcount,
861 &i))) 862 &i)))
862 goto done; 863 goto done;
863 ASSERT(i == 1); 864 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
864 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, 865 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
865 LEFT.br_startblock, 866 LEFT.br_startblock,
866 LEFT.br_blockcount + 867 LEFT.br_blockcount +
@@ -895,7 +896,7 @@ xfs_bmap_add_extent_delay_real(
895 RIGHT.br_startblock, 896 RIGHT.br_startblock,
896 RIGHT.br_blockcount, &i))) 897 RIGHT.br_blockcount, &i)))
897 goto done; 898 goto done;
898 ASSERT(i == 1); 899 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
899 if ((error = xfs_bmbt_update(cur, PREV.br_startoff, 900 if ((error = xfs_bmbt_update(cur, PREV.br_startoff,
900 new->br_startblock, 901 new->br_startblock,
901 PREV.br_blockcount + 902 PREV.br_blockcount +
@@ -928,11 +929,11 @@ xfs_bmap_add_extent_delay_real(
928 new->br_startblock, new->br_blockcount, 929 new->br_startblock, new->br_blockcount,
929 &i))) 930 &i)))
930 goto done; 931 goto done;
931 ASSERT(i == 0); 932 XFS_WANT_CORRUPTED_GOTO(i == 0, done);
932 cur->bc_rec.b.br_state = XFS_EXT_NORM; 933 cur->bc_rec.b.br_state = XFS_EXT_NORM;
933 if ((error = xfs_bmbt_insert(cur, &i))) 934 if ((error = xfs_bmbt_insert(cur, &i)))
934 goto done; 935 goto done;
935 ASSERT(i == 1); 936 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
936 } 937 }
937 *dnew = 0; 938 *dnew = 0;
938 /* DELTA: The in-core extent described by new changed type. */ 939 /* DELTA: The in-core extent described by new changed type. */
@@ -963,7 +964,7 @@ xfs_bmap_add_extent_delay_real(
963 LEFT.br_startblock, LEFT.br_blockcount, 964 LEFT.br_startblock, LEFT.br_blockcount,
964 &i))) 965 &i)))
965 goto done; 966 goto done;
966 ASSERT(i == 1); 967 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
967 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, 968 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
968 LEFT.br_startblock, 969 LEFT.br_startblock,
969 LEFT.br_blockcount + 970 LEFT.br_blockcount +
@@ -1004,11 +1005,11 @@ xfs_bmap_add_extent_delay_real(
1004 new->br_startblock, new->br_blockcount, 1005 new->br_startblock, new->br_blockcount,
1005 &i))) 1006 &i)))
1006 goto done; 1007 goto done;
1007 ASSERT(i == 0); 1008 XFS_WANT_CORRUPTED_GOTO(i == 0, done);
1008 cur->bc_rec.b.br_state = XFS_EXT_NORM; 1009 cur->bc_rec.b.br_state = XFS_EXT_NORM;
1009 if ((error = xfs_bmbt_insert(cur, &i))) 1010 if ((error = xfs_bmbt_insert(cur, &i)))
1010 goto done; 1011 goto done;
1011 ASSERT(i == 1); 1012 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1012 } 1013 }
1013 if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && 1014 if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
1014 ip->i_d.di_nextents > ip->i_df.if_ext_max) { 1015 ip->i_d.di_nextents > ip->i_df.if_ext_max) {
@@ -1054,7 +1055,7 @@ xfs_bmap_add_extent_delay_real(
1054 RIGHT.br_startblock, 1055 RIGHT.br_startblock,
1055 RIGHT.br_blockcount, &i))) 1056 RIGHT.br_blockcount, &i)))
1056 goto done; 1057 goto done;
1057 ASSERT(i == 1); 1058 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1058 if ((error = xfs_bmbt_update(cur, new->br_startoff, 1059 if ((error = xfs_bmbt_update(cur, new->br_startoff,
1059 new->br_startblock, 1060 new->br_startblock,
1060 new->br_blockcount + 1061 new->br_blockcount +
@@ -1094,11 +1095,11 @@ xfs_bmap_add_extent_delay_real(
1094 new->br_startblock, new->br_blockcount, 1095 new->br_startblock, new->br_blockcount,
1095 &i))) 1096 &i)))
1096 goto done; 1097 goto done;
1097 ASSERT(i == 0); 1098 XFS_WANT_CORRUPTED_GOTO(i == 0, done);
1098 cur->bc_rec.b.br_state = XFS_EXT_NORM; 1099 cur->bc_rec.b.br_state = XFS_EXT_NORM;
1099 if ((error = xfs_bmbt_insert(cur, &i))) 1100 if ((error = xfs_bmbt_insert(cur, &i)))
1100 goto done; 1101 goto done;
1101 ASSERT(i == 1); 1102 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1102 } 1103 }
1103 if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && 1104 if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
1104 ip->i_d.di_nextents > ip->i_df.if_ext_max) { 1105 ip->i_d.di_nextents > ip->i_df.if_ext_max) {
@@ -1149,11 +1150,11 @@ xfs_bmap_add_extent_delay_real(
1149 new->br_startblock, new->br_blockcount, 1150 new->br_startblock, new->br_blockcount,
1150 &i))) 1151 &i)))
1151 goto done; 1152 goto done;
1152 ASSERT(i == 0); 1153 XFS_WANT_CORRUPTED_GOTO(i == 0, done);
1153 cur->bc_rec.b.br_state = XFS_EXT_NORM; 1154 cur->bc_rec.b.br_state = XFS_EXT_NORM;
1154 if ((error = xfs_bmbt_insert(cur, &i))) 1155 if ((error = xfs_bmbt_insert(cur, &i)))
1155 goto done; 1156 goto done;
1156 ASSERT(i == 1); 1157 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1157 } 1158 }
1158 if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && 1159 if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
1159 ip->i_d.di_nextents > ip->i_df.if_ext_max) { 1160 ip->i_d.di_nextents > ip->i_df.if_ext_max) {
@@ -1377,19 +1378,19 @@ xfs_bmap_add_extent_unwritten_real(
1377 RIGHT.br_startblock, 1378 RIGHT.br_startblock,
1378 RIGHT.br_blockcount, &i))) 1379 RIGHT.br_blockcount, &i)))
1379 goto done; 1380 goto done;
1380 ASSERT(i == 1); 1381 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1381 if ((error = xfs_bmbt_delete(cur, &i))) 1382 if ((error = xfs_bmbt_delete(cur, &i)))
1382 goto done; 1383 goto done;
1383 ASSERT(i == 1); 1384 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1384 if ((error = xfs_bmbt_decrement(cur, 0, &i))) 1385 if ((error = xfs_bmbt_decrement(cur, 0, &i)))
1385 goto done; 1386 goto done;
1386 ASSERT(i == 1); 1387 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1387 if ((error = xfs_bmbt_delete(cur, &i))) 1388 if ((error = xfs_bmbt_delete(cur, &i)))
1388 goto done; 1389 goto done;
1389 ASSERT(i == 1); 1390 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1390 if ((error = xfs_bmbt_decrement(cur, 0, &i))) 1391 if ((error = xfs_bmbt_decrement(cur, 0, &i)))
1391 goto done; 1392 goto done;
1392 ASSERT(i == 1); 1393 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1393 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, 1394 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
1394 LEFT.br_startblock, 1395 LEFT.br_startblock,
1395 LEFT.br_blockcount + PREV.br_blockcount + 1396 LEFT.br_blockcount + PREV.br_blockcount +
@@ -1426,13 +1427,13 @@ xfs_bmap_add_extent_unwritten_real(
1426 PREV.br_startblock, PREV.br_blockcount, 1427 PREV.br_startblock, PREV.br_blockcount,
1427 &i))) 1428 &i)))
1428 goto done; 1429 goto done;
1429 ASSERT(i == 1); 1430 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1430 if ((error = xfs_bmbt_delete(cur, &i))) 1431 if ((error = xfs_bmbt_delete(cur, &i)))
1431 goto done; 1432 goto done;
1432 ASSERT(i == 1); 1433 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1433 if ((error = xfs_bmbt_decrement(cur, 0, &i))) 1434 if ((error = xfs_bmbt_decrement(cur, 0, &i)))
1434 goto done; 1435 goto done;
1435 ASSERT(i == 1); 1436 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1436 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, 1437 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
1437 LEFT.br_startblock, 1438 LEFT.br_startblock,
1438 LEFT.br_blockcount + PREV.br_blockcount, 1439 LEFT.br_blockcount + PREV.br_blockcount,
@@ -1469,13 +1470,13 @@ xfs_bmap_add_extent_unwritten_real(
1469 RIGHT.br_startblock, 1470 RIGHT.br_startblock,
1470 RIGHT.br_blockcount, &i))) 1471 RIGHT.br_blockcount, &i)))
1471 goto done; 1472 goto done;
1472 ASSERT(i == 1); 1473 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1473 if ((error = xfs_bmbt_delete(cur, &i))) 1474 if ((error = xfs_bmbt_delete(cur, &i)))
1474 goto done; 1475 goto done;
1475 ASSERT(i == 1); 1476 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1476 if ((error = xfs_bmbt_decrement(cur, 0, &i))) 1477 if ((error = xfs_bmbt_decrement(cur, 0, &i)))
1477 goto done; 1478 goto done;
1478 ASSERT(i == 1); 1479 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1479 if ((error = xfs_bmbt_update(cur, new->br_startoff, 1480 if ((error = xfs_bmbt_update(cur, new->br_startoff,
1480 new->br_startblock, 1481 new->br_startblock,
1481 new->br_blockcount + RIGHT.br_blockcount, 1482 new->br_blockcount + RIGHT.br_blockcount,
@@ -1508,7 +1509,7 @@ xfs_bmap_add_extent_unwritten_real(
1508 new->br_startblock, new->br_blockcount, 1509 new->br_startblock, new->br_blockcount,
1509 &i))) 1510 &i)))
1510 goto done; 1511 goto done;
1511 ASSERT(i == 1); 1512 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1512 if ((error = xfs_bmbt_update(cur, new->br_startoff, 1513 if ((error = xfs_bmbt_update(cur, new->br_startoff,
1513 new->br_startblock, new->br_blockcount, 1514 new->br_startblock, new->br_blockcount,
1514 newext))) 1515 newext)))
@@ -1549,7 +1550,7 @@ xfs_bmap_add_extent_unwritten_real(
1549 PREV.br_startblock, PREV.br_blockcount, 1550 PREV.br_startblock, PREV.br_blockcount,
1550 &i))) 1551 &i)))
1551 goto done; 1552 goto done;
1552 ASSERT(i == 1); 1553 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1553 if ((error = xfs_bmbt_update(cur, 1554 if ((error = xfs_bmbt_update(cur,
1554 PREV.br_startoff + new->br_blockcount, 1555 PREV.br_startoff + new->br_blockcount,
1555 PREV.br_startblock + new->br_blockcount, 1556 PREV.br_startblock + new->br_blockcount,
@@ -1596,7 +1597,7 @@ xfs_bmap_add_extent_unwritten_real(
1596 PREV.br_startblock, PREV.br_blockcount, 1597 PREV.br_startblock, PREV.br_blockcount,
1597 &i))) 1598 &i)))
1598 goto done; 1599 goto done;
1599 ASSERT(i == 1); 1600 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1600 if ((error = xfs_bmbt_update(cur, 1601 if ((error = xfs_bmbt_update(cur,
1601 PREV.br_startoff + new->br_blockcount, 1602 PREV.br_startoff + new->br_blockcount,
1602 PREV.br_startblock + new->br_blockcount, 1603 PREV.br_startblock + new->br_blockcount,
@@ -1606,7 +1607,7 @@ xfs_bmap_add_extent_unwritten_real(
1606 cur->bc_rec.b = *new; 1607 cur->bc_rec.b = *new;
1607 if ((error = xfs_bmbt_insert(cur, &i))) 1608 if ((error = xfs_bmbt_insert(cur, &i)))
1608 goto done; 1609 goto done;
1609 ASSERT(i == 1); 1610 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1610 } 1611 }
1611 /* DELTA: One in-core extent is split in two. */ 1612 /* DELTA: One in-core extent is split in two. */
1612 temp = PREV.br_startoff; 1613 temp = PREV.br_startoff;
@@ -1640,7 +1641,7 @@ xfs_bmap_add_extent_unwritten_real(
1640 PREV.br_startblock, 1641 PREV.br_startblock,
1641 PREV.br_blockcount, &i))) 1642 PREV.br_blockcount, &i)))
1642 goto done; 1643 goto done;
1643 ASSERT(i == 1); 1644 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1644 if ((error = xfs_bmbt_update(cur, PREV.br_startoff, 1645 if ((error = xfs_bmbt_update(cur, PREV.br_startoff,
1645 PREV.br_startblock, 1646 PREV.br_startblock,
1646 PREV.br_blockcount - new->br_blockcount, 1647 PREV.br_blockcount - new->br_blockcount,
@@ -1682,7 +1683,7 @@ xfs_bmap_add_extent_unwritten_real(
1682 PREV.br_startblock, PREV.br_blockcount, 1683 PREV.br_startblock, PREV.br_blockcount,
1683 &i))) 1684 &i)))
1684 goto done; 1685 goto done;
1685 ASSERT(i == 1); 1686 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1686 if ((error = xfs_bmbt_update(cur, PREV.br_startoff, 1687 if ((error = xfs_bmbt_update(cur, PREV.br_startoff,
1687 PREV.br_startblock, 1688 PREV.br_startblock,
1688 PREV.br_blockcount - new->br_blockcount, 1689 PREV.br_blockcount - new->br_blockcount,
@@ -1692,11 +1693,11 @@ xfs_bmap_add_extent_unwritten_real(
1692 new->br_startblock, new->br_blockcount, 1693 new->br_startblock, new->br_blockcount,
1693 &i))) 1694 &i)))
1694 goto done; 1695 goto done;
1695 ASSERT(i == 0); 1696 XFS_WANT_CORRUPTED_GOTO(i == 0, done);
1696 cur->bc_rec.b.br_state = XFS_EXT_NORM; 1697 cur->bc_rec.b.br_state = XFS_EXT_NORM;
1697 if ((error = xfs_bmbt_insert(cur, &i))) 1698 if ((error = xfs_bmbt_insert(cur, &i)))
1698 goto done; 1699 goto done;
1699 ASSERT(i == 1); 1700 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1700 } 1701 }
1701 /* DELTA: One in-core extent is split in two. */ 1702 /* DELTA: One in-core extent is split in two. */
1702 temp = PREV.br_startoff; 1703 temp = PREV.br_startoff;
@@ -1732,27 +1733,34 @@ xfs_bmap_add_extent_unwritten_real(
1732 PREV.br_startblock, PREV.br_blockcount, 1733 PREV.br_startblock, PREV.br_blockcount,
1733 &i))) 1734 &i)))
1734 goto done; 1735 goto done;
1735 ASSERT(i == 1); 1736 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1736 /* new right extent - oldext */ 1737 /* new right extent - oldext */
1737 if ((error = xfs_bmbt_update(cur, r[1].br_startoff, 1738 if ((error = xfs_bmbt_update(cur, r[1].br_startoff,
1738 r[1].br_startblock, r[1].br_blockcount, 1739 r[1].br_startblock, r[1].br_blockcount,
1739 r[1].br_state))) 1740 r[1].br_state)))
1740 goto done; 1741 goto done;
1741 /* new left extent - oldext */ 1742 /* new left extent - oldext */
1742 PREV.br_blockcount =
1743 new->br_startoff - PREV.br_startoff;
1744 cur->bc_rec.b = PREV; 1743 cur->bc_rec.b = PREV;
1744 cur->bc_rec.b.br_blockcount =
1745 new->br_startoff - PREV.br_startoff;
1745 if ((error = xfs_bmbt_insert(cur, &i))) 1746 if ((error = xfs_bmbt_insert(cur, &i)))
1746 goto done; 1747 goto done;
1747 ASSERT(i == 1); 1748 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1748 if ((error = xfs_bmbt_increment(cur, 0, &i))) 1749 /*
1750 * Reset the cursor to the position of the new extent
1751 * we are about to insert as we can't trust it after
1752 * the previous insert.
1753 */
1754 if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
1755 new->br_startblock, new->br_blockcount,
1756 &i)))
1749 goto done; 1757 goto done;
1750 ASSERT(i == 1); 1758 XFS_WANT_CORRUPTED_GOTO(i == 0, done);
1751 /* new middle extent - newext */ 1759 /* new middle extent - newext */
1752 cur->bc_rec.b = *new; 1760 cur->bc_rec.b.br_state = new->br_state;
1753 if ((error = xfs_bmbt_insert(cur, &i))) 1761 if ((error = xfs_bmbt_insert(cur, &i)))
1754 goto done; 1762 goto done;
1755 ASSERT(i == 1); 1763 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1756 } 1764 }
1757 /* DELTA: One in-core extent is split in three. */ 1765 /* DELTA: One in-core extent is split in three. */
1758 temp = PREV.br_startoff; 1766 temp = PREV.br_startoff;
@@ -2097,13 +2105,13 @@ xfs_bmap_add_extent_hole_real(
2097 right.br_startblock, 2105 right.br_startblock,
2098 right.br_blockcount, &i))) 2106 right.br_blockcount, &i)))
2099 goto done; 2107 goto done;
2100 ASSERT(i == 1); 2108 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
2101 if ((error = xfs_bmbt_delete(cur, &i))) 2109 if ((error = xfs_bmbt_delete(cur, &i)))
2102 goto done; 2110 goto done;
2103 ASSERT(i == 1); 2111 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
2104 if ((error = xfs_bmbt_decrement(cur, 0, &i))) 2112 if ((error = xfs_bmbt_decrement(cur, 0, &i)))
2105 goto done; 2113 goto done;
2106 ASSERT(i == 1); 2114 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
2107 if ((error = xfs_bmbt_update(cur, left.br_startoff, 2115 if ((error = xfs_bmbt_update(cur, left.br_startoff,
2108 left.br_startblock, 2116 left.br_startblock,
2109 left.br_blockcount + 2117 left.br_blockcount +
@@ -2139,7 +2147,7 @@ xfs_bmap_add_extent_hole_real(
2139 left.br_startblock, 2147 left.br_startblock,
2140 left.br_blockcount, &i))) 2148 left.br_blockcount, &i)))
2141 goto done; 2149 goto done;
2142 ASSERT(i == 1); 2150 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
2143 if ((error = xfs_bmbt_update(cur, left.br_startoff, 2151 if ((error = xfs_bmbt_update(cur, left.br_startoff,
2144 left.br_startblock, 2152 left.br_startblock,
2145 left.br_blockcount + 2153 left.br_blockcount +
@@ -2174,7 +2182,7 @@ xfs_bmap_add_extent_hole_real(
2174 right.br_startblock, 2182 right.br_startblock,
2175 right.br_blockcount, &i))) 2183 right.br_blockcount, &i)))
2176 goto done; 2184 goto done;
2177 ASSERT(i == 1); 2185 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
2178 if ((error = xfs_bmbt_update(cur, new->br_startoff, 2186 if ((error = xfs_bmbt_update(cur, new->br_startoff,
2179 new->br_startblock, 2187 new->br_startblock,
2180 new->br_blockcount + 2188 new->br_blockcount +
@@ -2208,11 +2216,11 @@ xfs_bmap_add_extent_hole_real(
2208 new->br_startblock, 2216 new->br_startblock,
2209 new->br_blockcount, &i))) 2217 new->br_blockcount, &i)))
2210 goto done; 2218 goto done;
2211 ASSERT(i == 0); 2219 XFS_WANT_CORRUPTED_GOTO(i == 0, done);
2212 cur->bc_rec.b.br_state = new->br_state; 2220 cur->bc_rec.b.br_state = new->br_state;
2213 if ((error = xfs_bmbt_insert(cur, &i))) 2221 if ((error = xfs_bmbt_insert(cur, &i)))
2214 goto done; 2222 goto done;
2215 ASSERT(i == 1); 2223 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
2216 } 2224 }
2217 /* DELTA: A new extent was added in a hole. */ 2225 /* DELTA: A new extent was added in a hole. */
2218 temp = new->br_startoff; 2226 temp = new->br_startoff;
@@ -3131,7 +3139,7 @@ xfs_bmap_del_extent(
3131 got.br_startblock, got.br_blockcount, 3139 got.br_startblock, got.br_blockcount,
3132 &i))) 3140 &i)))
3133 goto done; 3141 goto done;
3134 ASSERT(i == 1); 3142 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
3135 } 3143 }
3136 da_old = da_new = 0; 3144 da_old = da_new = 0;
3137 } else { 3145 } else {
@@ -3164,7 +3172,7 @@ xfs_bmap_del_extent(
3164 } 3172 }
3165 if ((error = xfs_bmbt_delete(cur, &i))) 3173 if ((error = xfs_bmbt_delete(cur, &i)))
3166 goto done; 3174 goto done;
3167 ASSERT(i == 1); 3175 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
3168 break; 3176 break;
3169 3177
3170 case 2: 3178 case 2:
@@ -3268,7 +3276,7 @@ xfs_bmap_del_extent(
3268 got.br_startblock, 3276 got.br_startblock,
3269 temp, &i))) 3277 temp, &i)))
3270 goto done; 3278 goto done;
3271 ASSERT(i == 1); 3279 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
3272 /* 3280 /*
3273 * Update the btree record back 3281 * Update the btree record back
3274 * to the original value. 3282 * to the original value.
@@ -3289,7 +3297,7 @@ xfs_bmap_del_extent(
3289 error = XFS_ERROR(ENOSPC); 3297 error = XFS_ERROR(ENOSPC);
3290 goto done; 3298 goto done;
3291 } 3299 }
3292 ASSERT(i == 1); 3300 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
3293 } else 3301 } else
3294 flags |= XFS_ILOG_FEXT(whichfork); 3302 flags |= XFS_ILOG_FEXT(whichfork);
3295 XFS_IFORK_NEXT_SET(ip, whichfork, 3303 XFS_IFORK_NEXT_SET(ip, whichfork,
@@ -3992,7 +4000,7 @@ xfs_bmap_add_attrfork(
3992 ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; 4000 ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
3993 } 4001 }
3994 ASSERT(ip->i_d.di_anextents == 0); 4002 ASSERT(ip->i_d.di_anextents == 0);
3995 VN_HOLD(XFS_ITOV(ip)); 4003 IHOLD(ip);
3996 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 4004 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
3997 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 4005 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
3998 switch (ip->i_d.di_format) { 4006 switch (ip->i_d.di_format) {
@@ -5970,7 +5978,7 @@ unlock_and_return:
5970 xfs_iunlock_map_shared(ip, lock); 5978 xfs_iunlock_map_shared(ip, lock);
5971 xfs_iunlock(ip, XFS_IOLOCK_SHARED); 5979 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
5972 5980
5973 kmem_free(map, subnex * sizeof(*map)); 5981 kmem_free(map);
5974 5982
5975 return error; 5983 return error;
5976} 5984}
@@ -6088,7 +6096,7 @@ xfs_bmap_get_bp(
6088 tp = cur->bc_tp; 6096 tp = cur->bc_tp;
6089 licp = &tp->t_items; 6097 licp = &tp->t_items;
6090 while (!bp && licp != NULL) { 6098 while (!bp && licp != NULL) {
6091 if (XFS_LIC_ARE_ALL_FREE(licp)) { 6099 if (xfs_lic_are_all_free(licp)) {
6092 licp = licp->lic_next; 6100 licp = licp->lic_next;
6093 continue; 6101 continue;
6094 } 6102 }
@@ -6098,11 +6106,11 @@ xfs_bmap_get_bp(
6098 xfs_buf_log_item_t *bip; 6106 xfs_buf_log_item_t *bip;
6099 xfs_buf_t *lbp; 6107 xfs_buf_t *lbp;
6100 6108
6101 if (XFS_LIC_ISFREE(licp, i)) { 6109 if (xfs_lic_isfree(licp, i)) {
6102 continue; 6110 continue;
6103 } 6111 }
6104 6112
6105 lidp = XFS_LIC_SLOT(licp, i); 6113 lidp = xfs_lic_slot(licp, i);
6106 lip = lidp->lid_item; 6114 lip = lidp->lid_item;
6107 if (lip->li_type != XFS_LI_BUF) 6115 if (lip->li_type != XFS_LI_BUF)
6108 continue; 6116 continue;
@@ -6359,13 +6367,9 @@ xfs_bmap_count_blocks(
6359 mp = ip->i_mount; 6367 mp = ip->i_mount;
6360 ifp = XFS_IFORK_PTR(ip, whichfork); 6368 ifp = XFS_IFORK_PTR(ip, whichfork);
6361 if ( XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ) { 6369 if ( XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ) {
6362 if (unlikely(xfs_bmap_count_leaves(ifp, 0, 6370 xfs_bmap_count_leaves(ifp, 0,
6363 ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t), 6371 ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t),
6364 count) < 0)) { 6372 count);
6365 XFS_ERROR_REPORT("xfs_bmap_count_blocks(1)",
6366 XFS_ERRLEVEL_LOW, mp);
6367 return XFS_ERROR(EFSCORRUPTED);
6368 }
6369 return 0; 6373 return 0;
6370 } 6374 }
6371 6375
@@ -6446,13 +6450,7 @@ xfs_bmap_count_tree(
6446 for (;;) { 6450 for (;;) {
6447 nextbno = be64_to_cpu(block->bb_rightsib); 6451 nextbno = be64_to_cpu(block->bb_rightsib);
6448 numrecs = be16_to_cpu(block->bb_numrecs); 6452 numrecs = be16_to_cpu(block->bb_numrecs);
6449 if (unlikely(xfs_bmap_disk_count_leaves(0, 6453 xfs_bmap_disk_count_leaves(0, block, numrecs, count);
6450 block, numrecs, count) < 0)) {
6451 xfs_trans_brelse(tp, bp);
6452 XFS_ERROR_REPORT("xfs_bmap_count_tree(2)",
6453 XFS_ERRLEVEL_LOW, mp);
6454 return XFS_ERROR(EFSCORRUPTED);
6455 }
6456 xfs_trans_brelse(tp, bp); 6454 xfs_trans_brelse(tp, bp);
6457 if (nextbno == NULLFSBLOCK) 6455 if (nextbno == NULLFSBLOCK)
6458 break; 6456 break;
@@ -6470,7 +6468,7 @@ xfs_bmap_count_tree(
6470/* 6468/*
6471 * Count leaf blocks given a range of extent records. 6469 * Count leaf blocks given a range of extent records.
6472 */ 6470 */
6473STATIC int 6471STATIC void
6474xfs_bmap_count_leaves( 6472xfs_bmap_count_leaves(
6475 xfs_ifork_t *ifp, 6473 xfs_ifork_t *ifp,
6476 xfs_extnum_t idx, 6474 xfs_extnum_t idx,
@@ -6483,14 +6481,13 @@ xfs_bmap_count_leaves(
6483 xfs_bmbt_rec_host_t *frp = xfs_iext_get_ext(ifp, idx + b); 6481 xfs_bmbt_rec_host_t *frp = xfs_iext_get_ext(ifp, idx + b);
6484 *count += xfs_bmbt_get_blockcount(frp); 6482 *count += xfs_bmbt_get_blockcount(frp);
6485 } 6483 }
6486 return 0;
6487} 6484}
6488 6485
6489/* 6486/*
6490 * Count leaf blocks given a range of extent records originally 6487 * Count leaf blocks given a range of extent records originally
6491 * in btree format. 6488 * in btree format.
6492 */ 6489 */
6493STATIC int 6490STATIC void
6494xfs_bmap_disk_count_leaves( 6491xfs_bmap_disk_count_leaves(
6495 xfs_extnum_t idx, 6492 xfs_extnum_t idx,
6496 xfs_bmbt_block_t *block, 6493 xfs_bmbt_block_t *block,
@@ -6504,5 +6501,4 @@ xfs_bmap_disk_count_leaves(
6504 frp = XFS_BTREE_REC_ADDR(xfs_bmbt, block, idx + b); 6501 frp = XFS_BTREE_REC_ADDR(xfs_bmbt, block, idx + b);
6505 *count += xfs_bmbt_disk_get_blockcount(frp); 6502 *count += xfs_bmbt_disk_get_blockcount(frp);
6506 } 6503 }
6507 return 0;
6508} 6504}
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 6ff70cda451c..9f3e3a836d15 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -54,12 +54,23 @@ typedef struct xfs_bmap_free_item
54 54
55/* 55/*
56 * Header for free extent list. 56 * Header for free extent list.
57 *
58 * xbf_low is used by the allocator to activate the lowspace algorithm -
59 * when free space is running low the extent allocator may choose to
60 * allocate an extent from an AG without leaving sufficient space for
61 * a btree split when inserting the new extent. In this case the allocator
62 * will enable the lowspace algorithm which is supposed to allow further
63 * allocations (such as btree splits and newroots) to allocate from
64 * sequential AGs. In order to avoid locking AGs out of order the lowspace
65 * algorithm will start searching for free space from AG 0. If the correct
66 * transaction reservations have been made then this algorithm will eventually
67 * find all the space it needs.
57 */ 68 */
58typedef struct xfs_bmap_free 69typedef struct xfs_bmap_free
59{ 70{
60 xfs_bmap_free_item_t *xbf_first; /* list of to-be-free extents */ 71 xfs_bmap_free_item_t *xbf_first; /* list of to-be-free extents */
61 int xbf_count; /* count of items on list */ 72 int xbf_count; /* count of items on list */
62 int xbf_low; /* kludge: alloc in low mode */ 73 int xbf_low; /* alloc in low mode */
63} xfs_bmap_free_t; 74} xfs_bmap_free_t;
64 75
65#define XFS_BMAP_MAX_NMAP 4 76#define XFS_BMAP_MAX_NMAP 4
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 4f0e849d973e..23efad29a5cd 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -1493,12 +1493,27 @@ xfs_bmbt_split(
1493 left = XFS_BUF_TO_BMBT_BLOCK(lbp); 1493 left = XFS_BUF_TO_BMBT_BLOCK(lbp);
1494 args.fsbno = cur->bc_private.b.firstblock; 1494 args.fsbno = cur->bc_private.b.firstblock;
1495 args.firstblock = args.fsbno; 1495 args.firstblock = args.fsbno;
1496 args.minleft = 0;
1496 if (args.fsbno == NULLFSBLOCK) { 1497 if (args.fsbno == NULLFSBLOCK) {
1497 args.fsbno = lbno; 1498 args.fsbno = lbno;
1498 args.type = XFS_ALLOCTYPE_START_BNO; 1499 args.type = XFS_ALLOCTYPE_START_BNO;
1499 } else 1500 /*
1501 * Make sure there is sufficient room left in the AG to
1502 * complete a full tree split for an extent insert. If
1503 * we are converting the middle part of an extent then
1504 * we may need space for two tree splits.
1505 *
1506 * We are relying on the caller to make the correct block
1507 * reservation for this operation to succeed. If the
1508 * reservation amount is insufficient then we may fail a
1509 * block allocation here and corrupt the filesystem.
1510 */
1511 args.minleft = xfs_trans_get_block_res(args.tp);
1512 } else if (cur->bc_private.b.flist->xbf_low)
1513 args.type = XFS_ALLOCTYPE_START_BNO;
1514 else
1500 args.type = XFS_ALLOCTYPE_NEAR_BNO; 1515 args.type = XFS_ALLOCTYPE_NEAR_BNO;
1501 args.mod = args.minleft = args.alignment = args.total = args.isfl = 1516 args.mod = args.alignment = args.total = args.isfl =
1502 args.userdata = args.minalignslop = 0; 1517 args.userdata = args.minalignslop = 0;
1503 args.minlen = args.maxlen = args.prod = 1; 1518 args.minlen = args.maxlen = args.prod = 1;
1504 args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL; 1519 args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL;
@@ -1510,6 +1525,21 @@ xfs_bmbt_split(
1510 XFS_BMBT_TRACE_CURSOR(cur, ERROR); 1525 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1511 return error; 1526 return error;
1512 } 1527 }
1528 if (args.fsbno == NULLFSBLOCK && args.minleft) {
1529 /*
1530 * Could not find an AG with enough free space to satisfy
1531 * a full btree split. Try again without minleft and if
1532 * successful activate the lowspace algorithm.
1533 */
1534 args.fsbno = 0;
1535 args.type = XFS_ALLOCTYPE_FIRST_AG;
1536 args.minleft = 0;
1537 if ((error = xfs_alloc_vextent(&args))) {
1538 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1539 return error;
1540 }
1541 cur->bc_private.b.flist->xbf_low = 1;
1542 }
1513 if (args.fsbno == NULLFSBLOCK) { 1543 if (args.fsbno == NULLFSBLOCK) {
1514 XFS_BMBT_TRACE_CURSOR(cur, EXIT); 1544 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1515 *stat = 0; 1545 *stat = 0;
@@ -2029,22 +2059,8 @@ xfs_bmbt_increment(
2029 * Insert the current record at the point referenced by cur. 2059 * Insert the current record at the point referenced by cur.
2030 * 2060 *
2031 * A multi-level split of the tree on insert will invalidate the original 2061 * A multi-level split of the tree on insert will invalidate the original
2032 * cursor. It appears, however, that some callers assume that the cursor is 2062 * cursor. All callers of this function should assume that the cursor is
2033 * always valid. Hence if we do a multi-level split we need to revalidate the 2063 * no longer valid and revalidate it.
2034 * cursor.
2035 *
2036 * When a split occurs, we will see a new cursor returned. Use that as a
2037 * trigger to determine if we need to revalidate the original cursor. If we get
2038 * a split, then use the original irec to lookup up the path of the record we
2039 * just inserted.
2040 *
2041 * Note that the fact that the btree root is in the inode means that we can
2042 * have the level of the tree change without a "split" occurring at the root
2043 * level. What happens is that the root is migrated to an allocated block and
2044 * the inode root is pointed to it. This means a single split can change the
2045 * level of the tree (level 2 -> level 3) and invalidate the old cursor. Hence
2046 * the level change should be accounted as a split so as to correctly trigger a
2047 * revalidation of the old cursor.
2048 */ 2064 */
2049int /* error */ 2065int /* error */
2050xfs_bmbt_insert( 2066xfs_bmbt_insert(
@@ -2057,14 +2073,11 @@ xfs_bmbt_insert(
2057 xfs_fsblock_t nbno; 2073 xfs_fsblock_t nbno;
2058 xfs_btree_cur_t *ncur; 2074 xfs_btree_cur_t *ncur;
2059 xfs_bmbt_rec_t nrec; 2075 xfs_bmbt_rec_t nrec;
2060 xfs_bmbt_irec_t oirec; /* original irec */
2061 xfs_btree_cur_t *pcur; 2076 xfs_btree_cur_t *pcur;
2062 int splits = 0;
2063 2077
2064 XFS_BMBT_TRACE_CURSOR(cur, ENTRY); 2078 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
2065 level = 0; 2079 level = 0;
2066 nbno = NULLFSBLOCK; 2080 nbno = NULLFSBLOCK;
2067 oirec = cur->bc_rec.b;
2068 xfs_bmbt_disk_set_all(&nrec, &cur->bc_rec.b); 2081 xfs_bmbt_disk_set_all(&nrec, &cur->bc_rec.b);
2069 ncur = NULL; 2082 ncur = NULL;
2070 pcur = cur; 2083 pcur = cur;
@@ -2073,13 +2086,11 @@ xfs_bmbt_insert(
2073 &i))) { 2086 &i))) {
2074 if (pcur != cur) 2087 if (pcur != cur)
2075 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR); 2088 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
2076 goto error0; 2089 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
2090 return error;
2077 } 2091 }
2078 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 2092 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
2079 if (pcur != cur && (ncur || nbno == NULLFSBLOCK)) { 2093 if (pcur != cur && (ncur || nbno == NULLFSBLOCK)) {
2080 /* allocating a new root is effectively a split */
2081 if (cur->bc_nlevels != pcur->bc_nlevels)
2082 splits++;
2083 cur->bc_nlevels = pcur->bc_nlevels; 2094 cur->bc_nlevels = pcur->bc_nlevels;
2084 cur->bc_private.b.allocated += 2095 cur->bc_private.b.allocated +=
2085 pcur->bc_private.b.allocated; 2096 pcur->bc_private.b.allocated;
@@ -2093,21 +2104,10 @@ xfs_bmbt_insert(
2093 xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR); 2104 xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
2094 } 2105 }
2095 if (ncur) { 2106 if (ncur) {
2096 splits++;
2097 pcur = ncur; 2107 pcur = ncur;
2098 ncur = NULL; 2108 ncur = NULL;
2099 } 2109 }
2100 } while (nbno != NULLFSBLOCK); 2110 } while (nbno != NULLFSBLOCK);
2101
2102 if (splits > 1) {
2103 /* revalidate the old cursor as we had a multi-level split */
2104 error = xfs_bmbt_lookup_eq(cur, oirec.br_startoff,
2105 oirec.br_startblock, oirec.br_blockcount, &i);
2106 if (error)
2107 goto error0;
2108 ASSERT(i == 1);
2109 }
2110
2111 XFS_BMBT_TRACE_CURSOR(cur, EXIT); 2111 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
2112 *stat = i; 2112 *stat = i;
2113 return 0; 2113 return 0;
@@ -2254,7 +2254,9 @@ xfs_bmbt_newroot(
2254#endif 2254#endif
2255 args.fsbno = be64_to_cpu(*pp); 2255 args.fsbno = be64_to_cpu(*pp);
2256 args.type = XFS_ALLOCTYPE_START_BNO; 2256 args.type = XFS_ALLOCTYPE_START_BNO;
2257 } else 2257 } else if (cur->bc_private.b.flist->xbf_low)
2258 args.type = XFS_ALLOCTYPE_START_BNO;
2259 else
2258 args.type = XFS_ALLOCTYPE_NEAR_BNO; 2260 args.type = XFS_ALLOCTYPE_NEAR_BNO;
2259 if ((error = xfs_alloc_vextent(&args))) { 2261 if ((error = xfs_alloc_vextent(&args))) {
2260 XFS_BMBT_TRACE_CURSOR(cur, ERROR); 2262 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index aeb87ca69fcc..cc593a84c345 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -46,38 +46,11 @@ kmem_zone_t *xfs_btree_cur_zone;
46/* 46/*
47 * Btree magic numbers. 47 * Btree magic numbers.
48 */ 48 */
49const __uint32_t xfs_magics[XFS_BTNUM_MAX] = 49const __uint32_t xfs_magics[XFS_BTNUM_MAX] = {
50{
51 XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC 50 XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC
52}; 51};
53 52
54/* 53/*
55 * Prototypes for internal routines.
56 */
57
58/*
59 * Checking routine: return maxrecs for the block.
60 */
61STATIC int /* number of records fitting in block */
62xfs_btree_maxrecs(
63 xfs_btree_cur_t *cur, /* btree cursor */
64 xfs_btree_block_t *block);/* generic btree block pointer */
65
66/*
67 * Internal routines.
68 */
69
70/*
71 * Retrieve the block pointer from the cursor at the given level.
72 * This may be a bmap btree root or from a buffer.
73 */
74STATIC xfs_btree_block_t * /* generic btree block pointer */
75xfs_btree_get_block(
76 xfs_btree_cur_t *cur, /* btree cursor */
77 int level, /* level in btree */
78 struct xfs_buf **bpp); /* buffer containing the block */
79
80/*
81 * Checking routine: return maxrecs for the block. 54 * Checking routine: return maxrecs for the block.
82 */ 55 */
83STATIC int /* number of records fitting in block */ 56STATIC int /* number of records fitting in block */
@@ -457,35 +430,6 @@ xfs_btree_dup_cursor(
457} 430}
458 431
459/* 432/*
460 * Change the cursor to point to the first record at the given level.
461 * Other levels are unaffected.
462 */
463int /* success=1, failure=0 */
464xfs_btree_firstrec(
465 xfs_btree_cur_t *cur, /* btree cursor */
466 int level) /* level to change */
467{
468 xfs_btree_block_t *block; /* generic btree block pointer */
469 xfs_buf_t *bp; /* buffer containing block */
470
471 /*
472 * Get the block pointer for this level.
473 */
474 block = xfs_btree_get_block(cur, level, &bp);
475 xfs_btree_check_block(cur, block, level, bp);
476 /*
477 * It's empty, there is no such record.
478 */
479 if (!block->bb_h.bb_numrecs)
480 return 0;
481 /*
482 * Set the ptr value to 1, that's the first record/key.
483 */
484 cur->bc_ptrs[level] = 1;
485 return 1;
486}
487
488/*
489 * Retrieve the block pointer from the cursor at the given level. 433 * Retrieve the block pointer from the cursor at the given level.
490 * This may be a bmap btree root or from a buffer. 434 * This may be a bmap btree root or from a buffer.
491 */ 435 */
@@ -626,6 +570,13 @@ xfs_btree_init_cursor(
626 cur->bc_private.a.agbp = agbp; 570 cur->bc_private.a.agbp = agbp;
627 cur->bc_private.a.agno = agno; 571 cur->bc_private.a.agno = agno;
628 break; 572 break;
573 case XFS_BTNUM_INO:
574 /*
575 * Inode allocation btree fields.
576 */
577 cur->bc_private.a.agbp = agbp;
578 cur->bc_private.a.agno = agno;
579 break;
629 case XFS_BTNUM_BMAP: 580 case XFS_BTNUM_BMAP:
630 /* 581 /*
631 * Bmap btree fields. 582 * Bmap btree fields.
@@ -638,13 +589,6 @@ xfs_btree_init_cursor(
638 cur->bc_private.b.flags = 0; 589 cur->bc_private.b.flags = 0;
639 cur->bc_private.b.whichfork = whichfork; 590 cur->bc_private.b.whichfork = whichfork;
640 break; 591 break;
641 case XFS_BTNUM_INO:
642 /*
643 * Inode allocation btree fields.
644 */
645 cur->bc_private.i.agbp = agbp;
646 cur->bc_private.i.agno = agno;
647 break;
648 default: 592 default:
649 ASSERT(0); 593 ASSERT(0);
650 } 594 }
@@ -671,6 +615,35 @@ xfs_btree_islastblock(
671} 615}
672 616
673/* 617/*
618 * Change the cursor to point to the first record at the given level.
619 * Other levels are unaffected.
620 */
621int /* success=1, failure=0 */
622xfs_btree_firstrec(
623 xfs_btree_cur_t *cur, /* btree cursor */
624 int level) /* level to change */
625{
626 xfs_btree_block_t *block; /* generic btree block pointer */
627 xfs_buf_t *bp; /* buffer containing block */
628
629 /*
630 * Get the block pointer for this level.
631 */
632 block = xfs_btree_get_block(cur, level, &bp);
633 xfs_btree_check_block(cur, block, level, bp);
634 /*
635 * It's empty, there is no such record.
636 */
637 if (!block->bb_h.bb_numrecs)
638 return 0;
639 /*
640 * Set the ptr value to 1, that's the first record/key.
641 */
642 cur->bc_ptrs[level] = 1;
643 return 1;
644}
645
646/*
674 * Change the cursor to point to the last record in the current block 647 * Change the cursor to point to the last record in the current block
675 * at the given level. Other levels are unaffected. 648 * at the given level. Other levels are unaffected.
676 */ 649 */
@@ -890,12 +863,12 @@ xfs_btree_readahead_core(
890 case XFS_BTNUM_INO: 863 case XFS_BTNUM_INO:
891 i = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[lev]); 864 i = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[lev]);
892 if ((lr & XFS_BTCUR_LEFTRA) && be32_to_cpu(i->bb_leftsib) != NULLAGBLOCK) { 865 if ((lr & XFS_BTCUR_LEFTRA) && be32_to_cpu(i->bb_leftsib) != NULLAGBLOCK) {
893 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.i.agno, 866 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
894 be32_to_cpu(i->bb_leftsib), 1); 867 be32_to_cpu(i->bb_leftsib), 1);
895 rval++; 868 rval++;
896 } 869 }
897 if ((lr & XFS_BTCUR_RIGHTRA) && be32_to_cpu(i->bb_rightsib) != NULLAGBLOCK) { 870 if ((lr & XFS_BTCUR_RIGHTRA) && be32_to_cpu(i->bb_rightsib) != NULLAGBLOCK) {
898 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.i.agno, 871 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
899 be32_to_cpu(i->bb_rightsib), 1); 872 be32_to_cpu(i->bb_rightsib), 1);
900 rval++; 873 rval++;
901 } 874 }
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h
index 7440b78f9cec..1f528a2a3754 100644
--- a/fs/xfs/xfs_btree.h
+++ b/fs/xfs/xfs_btree.h
@@ -158,8 +158,8 @@ typedef struct xfs_btree_cur
158 __uint8_t bc_blocklog; /* log2(blocksize) of btree blocks */ 158 __uint8_t bc_blocklog; /* log2(blocksize) of btree blocks */
159 xfs_btnum_t bc_btnum; /* identifies which btree type */ 159 xfs_btnum_t bc_btnum; /* identifies which btree type */
160 union { 160 union {
161 struct { /* needed for BNO, CNT */ 161 struct { /* needed for BNO, CNT, INO */
162 struct xfs_buf *agbp; /* agf buffer pointer */ 162 struct xfs_buf *agbp; /* agf/agi buffer pointer */
163 xfs_agnumber_t agno; /* ag number */ 163 xfs_agnumber_t agno; /* ag number */
164 } a; 164 } a;
165 struct { /* needed for BMAP */ 165 struct { /* needed for BMAP */
@@ -172,10 +172,6 @@ typedef struct xfs_btree_cur
172 char flags; /* flags */ 172 char flags; /* flags */
173#define XFS_BTCUR_BPRV_WASDEL 1 /* was delayed */ 173#define XFS_BTCUR_BPRV_WASDEL 1 /* was delayed */
174 } b; 174 } b;
175 struct { /* needed for INO */
176 struct xfs_buf *agbp; /* agi buffer pointer */
177 xfs_agnumber_t agno; /* ag number */
178 } i;
179 } bc_private; /* per-btree type data */ 175 } bc_private; /* per-btree type data */
180} xfs_btree_cur_t; 176} xfs_btree_cur_t;
181 177
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 53a71c62025d..608c30c3f76b 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -737,7 +737,7 @@ xfs_buf_item_init(
737 bip->bli_format.blf_len = (ushort)BTOBB(XFS_BUF_COUNT(bp)); 737 bip->bli_format.blf_len = (ushort)BTOBB(XFS_BUF_COUNT(bp));
738 bip->bli_format.blf_map_size = map_size; 738 bip->bli_format.blf_map_size = map_size;
739#ifdef XFS_BLI_TRACE 739#ifdef XFS_BLI_TRACE
740 bip->bli_trace = ktrace_alloc(XFS_BLI_TRACE_SIZE, KM_SLEEP); 740 bip->bli_trace = ktrace_alloc(XFS_BLI_TRACE_SIZE, KM_NOFS);
741#endif 741#endif
742 742
743#ifdef XFS_TRANS_DEBUG 743#ifdef XFS_TRANS_DEBUG
@@ -889,9 +889,9 @@ xfs_buf_item_relse(
889 } 889 }
890 890
891#ifdef XFS_TRANS_DEBUG 891#ifdef XFS_TRANS_DEBUG
892 kmem_free(bip->bli_orig, XFS_BUF_COUNT(bp)); 892 kmem_free(bip->bli_orig);
893 bip->bli_orig = NULL; 893 bip->bli_orig = NULL;
894 kmem_free(bip->bli_logged, XFS_BUF_COUNT(bp) / NBBY); 894 kmem_free(bip->bli_logged);
895 bip->bli_logged = NULL; 895 bip->bli_logged = NULL;
896#endif /* XFS_TRANS_DEBUG */ 896#endif /* XFS_TRANS_DEBUG */
897 897
@@ -1056,7 +1056,7 @@ xfs_buf_iodone_callbacks(
1056 anyway. */ 1056 anyway. */
1057 XFS_BUF_SET_BRELSE_FUNC(bp,xfs_buf_error_relse); 1057 XFS_BUF_SET_BRELSE_FUNC(bp,xfs_buf_error_relse);
1058 XFS_BUF_DONE(bp); 1058 XFS_BUF_DONE(bp);
1059 XFS_BUF_V_IODONESEMA(bp); 1059 XFS_BUF_FINISH_IOWAIT(bp);
1060 } 1060 }
1061 return; 1061 return;
1062 } 1062 }
@@ -1138,9 +1138,9 @@ xfs_buf_iodone(
1138 xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip); 1138 xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip);
1139 1139
1140#ifdef XFS_TRANS_DEBUG 1140#ifdef XFS_TRANS_DEBUG
1141 kmem_free(bip->bli_orig, XFS_BUF_COUNT(bp)); 1141 kmem_free(bip->bli_orig);
1142 bip->bli_orig = NULL; 1142 bip->bli_orig = NULL;
1143 kmem_free(bip->bli_logged, XFS_BUF_COUNT(bp) / NBBY); 1143 kmem_free(bip->bli_logged);
1144 bip->bli_logged = NULL; 1144 bip->bli_logged = NULL;
1145#endif /* XFS_TRANS_DEBUG */ 1145#endif /* XFS_TRANS_DEBUG */
1146 1146
diff --git a/fs/xfs/xfs_clnt.h b/fs/xfs/xfs_clnt.h
index d5d1e60ee224..d2ce5dd70d87 100644
--- a/fs/xfs/xfs_clnt.h
+++ b/fs/xfs/xfs_clnt.h
@@ -78,6 +78,7 @@ struct xfs_mount_args {
78#define XFSMNT_IOSIZE 0x00002000 /* optimize for I/O size */ 78#define XFSMNT_IOSIZE 0x00002000 /* optimize for I/O size */
79#define XFSMNT_OSYNCISOSYNC 0x00004000 /* o_sync is REALLY o_sync */ 79#define XFSMNT_OSYNCISOSYNC 0x00004000 /* o_sync is REALLY o_sync */
80 /* (osyncisdsync is default) */ 80 /* (osyncisdsync is default) */
81#define XFSMNT_NOATTR2 0x00008000 /* turn off ATTR2 EA format */
81#define XFSMNT_32BITINODES 0x00200000 /* restrict inodes to 32 82#define XFSMNT_32BITINODES 0x00200000 /* restrict inodes to 32
82 * bits of address space */ 83 * bits of address space */
83#define XFSMNT_GQUOTA 0x00400000 /* group quota accounting */ 84#define XFSMNT_GQUOTA 0x00400000 /* group quota accounting */
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 021a8f7e563f..9e561a9cefca 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -1431,7 +1431,7 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
1431 } 1431 }
1432 if (level < 0) { 1432 if (level < 0) {
1433 *result = XFS_ERROR(ENOENT); /* we're out of our tree */ 1433 *result = XFS_ERROR(ENOENT); /* we're out of our tree */
1434 ASSERT(args->oknoent); 1434 ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
1435 return(0); 1435 return(0);
1436 } 1436 }
1437 1437
@@ -1530,6 +1530,28 @@ xfs_da_hashname(const uchar_t *name, int namelen)
1530 } 1530 }
1531} 1531}
1532 1532
1533enum xfs_dacmp
1534xfs_da_compname(
1535 struct xfs_da_args *args,
1536 const char *name,
1537 int len)
1538{
1539 return (args->namelen == len && memcmp(args->name, name, len) == 0) ?
1540 XFS_CMP_EXACT : XFS_CMP_DIFFERENT;
1541}
1542
1543static xfs_dahash_t
1544xfs_default_hashname(
1545 struct xfs_name *name)
1546{
1547 return xfs_da_hashname(name->name, name->len);
1548}
1549
1550const struct xfs_nameops xfs_default_nameops = {
1551 .hashname = xfs_default_hashname,
1552 .compname = xfs_da_compname
1553};
1554
1533/* 1555/*
1534 * Add a block to the btree ahead of the file. 1556 * Add a block to the btree ahead of the file.
1535 * Return the new block number to the caller. 1557 * Return the new block number to the caller.
@@ -1598,7 +1620,7 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
1598 args->firstblock, args->total, 1620 args->firstblock, args->total,
1599 &mapp[mapi], &nmap, args->flist, 1621 &mapp[mapi], &nmap, args->flist,
1600 NULL))) { 1622 NULL))) {
1601 kmem_free(mapp, sizeof(*mapp) * count); 1623 kmem_free(mapp);
1602 return error; 1624 return error;
1603 } 1625 }
1604 if (nmap < 1) 1626 if (nmap < 1)
@@ -1620,11 +1642,11 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
1620 mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount != 1642 mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount !=
1621 bno + count) { 1643 bno + count) {
1622 if (mapp != &map) 1644 if (mapp != &map)
1623 kmem_free(mapp, sizeof(*mapp) * count); 1645 kmem_free(mapp);
1624 return XFS_ERROR(ENOSPC); 1646 return XFS_ERROR(ENOSPC);
1625 } 1647 }
1626 if (mapp != &map) 1648 if (mapp != &map)
1627 kmem_free(mapp, sizeof(*mapp) * count); 1649 kmem_free(mapp);
1628 *new_blkno = (xfs_dablk_t)bno; 1650 *new_blkno = (xfs_dablk_t)bno;
1629 return 0; 1651 return 0;
1630} 1652}
@@ -2090,10 +2112,10 @@ xfs_da_do_buf(
2090 } 2112 }
2091 } 2113 }
2092 if (bplist) { 2114 if (bplist) {
2093 kmem_free(bplist, sizeof(*bplist) * nmap); 2115 kmem_free(bplist);
2094 } 2116 }
2095 if (mapp != &map) { 2117 if (mapp != &map) {
2096 kmem_free(mapp, sizeof(*mapp) * nfsb); 2118 kmem_free(mapp);
2097 } 2119 }
2098 if (bpp) 2120 if (bpp)
2099 *bpp = rbp; 2121 *bpp = rbp;
@@ -2102,11 +2124,11 @@ exit1:
2102 if (bplist) { 2124 if (bplist) {
2103 for (i = 0; i < nbplist; i++) 2125 for (i = 0; i < nbplist; i++)
2104 xfs_trans_brelse(trans, bplist[i]); 2126 xfs_trans_brelse(trans, bplist[i]);
2105 kmem_free(bplist, sizeof(*bplist) * nmap); 2127 kmem_free(bplist);
2106 } 2128 }
2107exit0: 2129exit0:
2108 if (mapp != &map) 2130 if (mapp != &map)
2109 kmem_free(mapp, sizeof(*mapp) * nfsb); 2131 kmem_free(mapp);
2110 if (bpp) 2132 if (bpp)
2111 *bpp = NULL; 2133 *bpp = NULL;
2112 return error; 2134 return error;
@@ -2218,7 +2240,7 @@ xfs_da_state_free(xfs_da_state_t *state)
2218 2240
2219#ifdef XFS_DABUF_DEBUG 2241#ifdef XFS_DABUF_DEBUG
2220xfs_dabuf_t *xfs_dabuf_global_list; 2242xfs_dabuf_t *xfs_dabuf_global_list;
2221spinlock_t xfs_dabuf_global_lock; 2243static DEFINE_SPINLOCK(xfs_dabuf_global_lock);
2222#endif 2244#endif
2223 2245
2224/* 2246/*
@@ -2315,7 +2337,7 @@ xfs_da_buf_done(xfs_dabuf_t *dabuf)
2315 if (dabuf->dirty) 2337 if (dabuf->dirty)
2316 xfs_da_buf_clean(dabuf); 2338 xfs_da_buf_clean(dabuf);
2317 if (dabuf->nbuf > 1) 2339 if (dabuf->nbuf > 1)
2318 kmem_free(dabuf->data, BBTOB(dabuf->bbcount)); 2340 kmem_free(dabuf->data);
2319#ifdef XFS_DABUF_DEBUG 2341#ifdef XFS_DABUF_DEBUG
2320 { 2342 {
2321 spin_lock(&xfs_dabuf_global_lock); 2343 spin_lock(&xfs_dabuf_global_lock);
@@ -2332,7 +2354,7 @@ xfs_da_buf_done(xfs_dabuf_t *dabuf)
2332 if (dabuf->nbuf == 1) 2354 if (dabuf->nbuf == 1)
2333 kmem_zone_free(xfs_dabuf_zone, dabuf); 2355 kmem_zone_free(xfs_dabuf_zone, dabuf);
2334 else 2356 else
2335 kmem_free(dabuf, XFS_DA_BUF_SIZE(dabuf->nbuf)); 2357 kmem_free(dabuf);
2336} 2358}
2337 2359
2338/* 2360/*
@@ -2403,7 +2425,7 @@ xfs_da_brelse(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
2403 for (i = 0; i < nbuf; i++) 2425 for (i = 0; i < nbuf; i++)
2404 xfs_trans_brelse(tp, bplist[i]); 2426 xfs_trans_brelse(tp, bplist[i]);
2405 if (bplist != &bp) 2427 if (bplist != &bp)
2406 kmem_free(bplist, nbuf * sizeof(*bplist)); 2428 kmem_free(bplist);
2407} 2429}
2408 2430
2409/* 2431/*
@@ -2429,7 +2451,7 @@ xfs_da_binval(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
2429 for (i = 0; i < nbuf; i++) 2451 for (i = 0; i < nbuf; i++)
2430 xfs_trans_binval(tp, bplist[i]); 2452 xfs_trans_binval(tp, bplist[i]);
2431 if (bplist != &bp) 2453 if (bplist != &bp)
2432 kmem_free(bplist, nbuf * sizeof(*bplist)); 2454 kmem_free(bplist);
2433} 2455}
2434 2456
2435/* 2457/*
diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h
index 7facf86f74f9..8be0b00ede9a 100644
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/xfs_da_btree.h
@@ -99,6 +99,15 @@ typedef struct xfs_da_node_entry xfs_da_node_entry_t;
99 *========================================================================*/ 99 *========================================================================*/
100 100
101/* 101/*
102 * Search comparison results
103 */
104enum xfs_dacmp {
105 XFS_CMP_DIFFERENT, /* names are completely different */
106 XFS_CMP_EXACT, /* names are exactly the same */
107 XFS_CMP_CASE /* names are same but differ in case */
108};
109
110/*
102 * Structure to ease passing around component names. 111 * Structure to ease passing around component names.
103 */ 112 */
104typedef struct xfs_da_args { 113typedef struct xfs_da_args {
@@ -123,13 +132,20 @@ typedef struct xfs_da_args {
123 int index2; /* index of 2nd attr in blk */ 132 int index2; /* index of 2nd attr in blk */
124 xfs_dablk_t rmtblkno2; /* remote attr value starting blkno */ 133 xfs_dablk_t rmtblkno2; /* remote attr value starting blkno */
125 int rmtblkcnt2; /* remote attr value block count */ 134 int rmtblkcnt2; /* remote attr value block count */
126 unsigned char justcheck; /* T/F: check for ok with no space */ 135 int op_flags; /* operation flags */
127 unsigned char rename; /* T/F: this is an atomic rename op */ 136 enum xfs_dacmp cmpresult; /* name compare result for lookups */
128 unsigned char addname; /* T/F: this is an add operation */
129 unsigned char oknoent; /* T/F: ok to return ENOENT, else die */
130} xfs_da_args_t; 137} xfs_da_args_t;
131 138
132/* 139/*
140 * Operation flags:
141 */
142#define XFS_DA_OP_JUSTCHECK 0x0001 /* check for ok with no space */
143#define XFS_DA_OP_RENAME 0x0002 /* this is an atomic rename op */
144#define XFS_DA_OP_ADDNAME 0x0004 /* this is an add operation */
145#define XFS_DA_OP_OKNOENT 0x0008 /* lookup/add op, ENOENT ok, else die */
146#define XFS_DA_OP_CILOOKUP 0x0010 /* lookup to return CI name if found */
147
148/*
133 * Structure to describe buffer(s) for a block. 149 * Structure to describe buffer(s) for a block.
134 * This is needed in the directory version 2 format case, when 150 * This is needed in the directory version 2 format case, when
135 * multiple non-contiguous fsblocks might be needed to cover one 151 * multiple non-contiguous fsblocks might be needed to cover one
@@ -201,6 +217,14 @@ typedef struct xfs_da_state {
201 (uint)(XFS_DA_LOGOFF(BASE, ADDR)), \ 217 (uint)(XFS_DA_LOGOFF(BASE, ADDR)), \
202 (uint)(XFS_DA_LOGOFF(BASE, ADDR)+(SIZE)-1) 218 (uint)(XFS_DA_LOGOFF(BASE, ADDR)+(SIZE)-1)
203 219
220/*
221 * Name ops for directory and/or attr name operations
222 */
223struct xfs_nameops {
224 xfs_dahash_t (*hashname)(struct xfs_name *);
225 enum xfs_dacmp (*compname)(struct xfs_da_args *, const char *, int);
226};
227
204 228
205#ifdef __KERNEL__ 229#ifdef __KERNEL__
206/*======================================================================== 230/*========================================================================
@@ -249,6 +273,10 @@ int xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
249 xfs_dabuf_t *dead_buf); 273 xfs_dabuf_t *dead_buf);
250 274
251uint xfs_da_hashname(const uchar_t *name_string, int name_length); 275uint xfs_da_hashname(const uchar_t *name_string, int name_length);
276enum xfs_dacmp xfs_da_compname(struct xfs_da_args *args,
277 const char *name, int len);
278
279
252xfs_da_state_t *xfs_da_state_alloc(void); 280xfs_da_state_t *xfs_da_state_alloc(void);
253void xfs_da_state_free(xfs_da_state_t *state); 281void xfs_da_state_free(xfs_da_state_t *state);
254 282
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 5f3647cb9885..760f4c5b5160 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -116,7 +116,7 @@ xfs_swapext(
116 out_put_file: 116 out_put_file:
117 fput(file); 117 fput(file);
118 out_free_sxp: 118 out_free_sxp:
119 kmem_free(sxp, sizeof(xfs_swapext_t)); 119 kmem_free(sxp);
120 out: 120 out:
121 return error; 121 return error;
122} 122}
@@ -128,10 +128,8 @@ xfs_swap_extents(
128 xfs_swapext_t *sxp) 128 xfs_swapext_t *sxp)
129{ 129{
130 xfs_mount_t *mp; 130 xfs_mount_t *mp;
131 xfs_inode_t *ips[2];
132 xfs_trans_t *tp; 131 xfs_trans_t *tp;
133 xfs_bstat_t *sbp = &sxp->sx_stat; 132 xfs_bstat_t *sbp = &sxp->sx_stat;
134 bhv_vnode_t *vp, *tvp;
135 xfs_ifork_t *tempifp, *ifp, *tifp; 133 xfs_ifork_t *tempifp, *ifp, *tifp;
136 int ilf_fields, tilf_fields; 134 int ilf_fields, tilf_fields;
137 static uint lock_flags = XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL; 135 static uint lock_flags = XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL;
@@ -150,19 +148,8 @@ xfs_swap_extents(
150 } 148 }
151 149
152 sbp = &sxp->sx_stat; 150 sbp = &sxp->sx_stat;
153 vp = XFS_ITOV(ip);
154 tvp = XFS_ITOV(tip);
155
156 /* Lock in i_ino order */
157 if (ip->i_ino < tip->i_ino) {
158 ips[0] = ip;
159 ips[1] = tip;
160 } else {
161 ips[0] = tip;
162 ips[1] = ip;
163 }
164 151
165 xfs_lock_inodes(ips, 2, lock_flags); 152 xfs_lock_two_inodes(ip, tip, lock_flags);
166 locked = 1; 153 locked = 1;
167 154
168 /* Verify that both files have the same format */ 155 /* Verify that both files have the same format */
@@ -184,7 +171,7 @@ xfs_swap_extents(
184 goto error0; 171 goto error0;
185 } 172 }
186 173
187 if (VN_CACHED(tvp) != 0) { 174 if (VN_CACHED(VFS_I(tip)) != 0) {
188 xfs_inval_cached_trace(tip, 0, -1, 0, -1); 175 xfs_inval_cached_trace(tip, 0, -1, 0, -1);
189 error = xfs_flushinval_pages(tip, 0, -1, 176 error = xfs_flushinval_pages(tip, 0, -1,
190 FI_REMAPF_LOCKED); 177 FI_REMAPF_LOCKED);
@@ -193,7 +180,7 @@ xfs_swap_extents(
193 } 180 }
194 181
195 /* Verify O_DIRECT for ftmp */ 182 /* Verify O_DIRECT for ftmp */
196 if (VN_CACHED(tvp) != 0) { 183 if (VN_CACHED(VFS_I(tip)) != 0) {
197 error = XFS_ERROR(EINVAL); 184 error = XFS_ERROR(EINVAL);
198 goto error0; 185 goto error0;
199 } 186 }
@@ -237,7 +224,7 @@ xfs_swap_extents(
237 * vop_read (or write in the case of autogrow) they block on the iolock 224 * vop_read (or write in the case of autogrow) they block on the iolock
238 * until we have switched the extents. 225 * until we have switched the extents.
239 */ 226 */
240 if (VN_MAPPED(vp)) { 227 if (VN_MAPPED(VFS_I(ip))) {
241 error = XFS_ERROR(EBUSY); 228 error = XFS_ERROR(EBUSY);
242 goto error0; 229 goto error0;
243 } 230 }
@@ -265,7 +252,7 @@ xfs_swap_extents(
265 locked = 0; 252 locked = 0;
266 goto error0; 253 goto error0;
267 } 254 }
268 xfs_lock_inodes(ips, 2, XFS_ILOCK_EXCL); 255 xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL);
269 256
270 /* 257 /*
271 * Count the number of extended attribute blocks 258 * Count the number of extended attribute blocks
@@ -350,15 +337,11 @@ xfs_swap_extents(
350 break; 337 break;
351 } 338 }
352 339
353 /*
354 * Increment vnode ref counts since xfs_trans_commit &
355 * xfs_trans_cancel will both unlock the inodes and
356 * decrement the associated ref counts.
357 */
358 VN_HOLD(vp);
359 VN_HOLD(tvp);
360 340
341 IHOLD(ip);
361 xfs_trans_ijoin(tp, ip, lock_flags); 342 xfs_trans_ijoin(tp, ip, lock_flags);
343
344 IHOLD(tip);
362 xfs_trans_ijoin(tp, tip, lock_flags); 345 xfs_trans_ijoin(tp, tip, lock_flags);
363 346
364 xfs_trans_log_inode(tp, ip, ilf_fields); 347 xfs_trans_log_inode(tp, ip, ilf_fields);
@@ -381,6 +364,6 @@ xfs_swap_extents(
381 xfs_iunlock(tip, lock_flags); 364 xfs_iunlock(tip, lock_flags);
382 } 365 }
383 if (tempifp != NULL) 366 if (tempifp != NULL)
384 kmem_free(tempifp, sizeof(xfs_ifork_t)); 367 kmem_free(tempifp);
385 return error; 368 return error;
386} 369}
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index 7cb26529766b..80e0dc51361c 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -46,6 +46,54 @@
46 46
47struct xfs_name xfs_name_dotdot = {"..", 2}; 47struct xfs_name xfs_name_dotdot = {"..", 2};
48 48
49extern const struct xfs_nameops xfs_default_nameops;
50
51/*
52 * ASCII case-insensitive (ie. A-Z) support for directories that was
53 * used in IRIX.
54 */
55STATIC xfs_dahash_t
56xfs_ascii_ci_hashname(
57 struct xfs_name *name)
58{
59 xfs_dahash_t hash;
60 int i;
61
62 for (i = 0, hash = 0; i < name->len; i++)
63 hash = tolower(name->name[i]) ^ rol32(hash, 7);
64
65 return hash;
66}
67
68STATIC enum xfs_dacmp
69xfs_ascii_ci_compname(
70 struct xfs_da_args *args,
71 const char *name,
72 int len)
73{
74 enum xfs_dacmp result;
75 int i;
76
77 if (args->namelen != len)
78 return XFS_CMP_DIFFERENT;
79
80 result = XFS_CMP_EXACT;
81 for (i = 0; i < len; i++) {
82 if (args->name[i] == name[i])
83 continue;
84 if (tolower(args->name[i]) != tolower(name[i]))
85 return XFS_CMP_DIFFERENT;
86 result = XFS_CMP_CASE;
87 }
88
89 return result;
90}
91
92static struct xfs_nameops xfs_ascii_ci_nameops = {
93 .hashname = xfs_ascii_ci_hashname,
94 .compname = xfs_ascii_ci_compname,
95};
96
49void 97void
50xfs_dir_mount( 98xfs_dir_mount(
51 xfs_mount_t *mp) 99 xfs_mount_t *mp)
@@ -65,6 +113,10 @@ xfs_dir_mount(
65 (mp->m_dirblksize - (uint)sizeof(xfs_da_node_hdr_t)) / 113 (mp->m_dirblksize - (uint)sizeof(xfs_da_node_hdr_t)) /
66 (uint)sizeof(xfs_da_node_entry_t); 114 (uint)sizeof(xfs_da_node_entry_t);
67 mp->m_dir_magicpct = (mp->m_dirblksize * 37) / 100; 115 mp->m_dir_magicpct = (mp->m_dirblksize * 37) / 100;
116 if (xfs_sb_version_hasasciici(&mp->m_sb))
117 mp->m_dirnameops = &xfs_ascii_ci_nameops;
118 else
119 mp->m_dirnameops = &xfs_default_nameops;
68} 120}
69 121
70/* 122/*
@@ -162,9 +214,10 @@ xfs_dir_createname(
162 return rval; 214 return rval;
163 XFS_STATS_INC(xs_dir_create); 215 XFS_STATS_INC(xs_dir_create);
164 216
217 memset(&args, 0, sizeof(xfs_da_args_t));
165 args.name = name->name; 218 args.name = name->name;
166 args.namelen = name->len; 219 args.namelen = name->len;
167 args.hashval = xfs_da_hashname(name->name, name->len); 220 args.hashval = dp->i_mount->m_dirnameops->hashname(name);
168 args.inumber = inum; 221 args.inumber = inum;
169 args.dp = dp; 222 args.dp = dp;
170 args.firstblock = first; 223 args.firstblock = first;
@@ -172,8 +225,7 @@ xfs_dir_createname(
172 args.total = total; 225 args.total = total;
173 args.whichfork = XFS_DATA_FORK; 226 args.whichfork = XFS_DATA_FORK;
174 args.trans = tp; 227 args.trans = tp;
175 args.justcheck = 0; 228 args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
176 args.addname = args.oknoent = 1;
177 229
178 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) 230 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
179 rval = xfs_dir2_sf_addname(&args); 231 rval = xfs_dir2_sf_addname(&args);
@@ -191,14 +243,43 @@ xfs_dir_createname(
191} 243}
192 244
193/* 245/*
246 * If doing a CI lookup and case-insensitive match, dup actual name into
247 * args.value. Return EEXIST for success (ie. name found) or an error.
248 */
249int
250xfs_dir_cilookup_result(
251 struct xfs_da_args *args,
252 const char *name,
253 int len)
254{
255 if (args->cmpresult == XFS_CMP_DIFFERENT)
256 return ENOENT;
257 if (args->cmpresult != XFS_CMP_CASE ||
258 !(args->op_flags & XFS_DA_OP_CILOOKUP))
259 return EEXIST;
260
261 args->value = kmem_alloc(len, KM_MAYFAIL);
262 if (!args->value)
263 return ENOMEM;
264
265 memcpy(args->value, name, len);
266 args->valuelen = len;
267 return EEXIST;
268}
269
270/*
194 * Lookup a name in a directory, give back the inode number. 271 * Lookup a name in a directory, give back the inode number.
272 * If ci_name is not NULL, returns the actual name in ci_name if it differs
273 * to name, or ci_name->name is set to NULL for an exact match.
195 */ 274 */
275
196int 276int
197xfs_dir_lookup( 277xfs_dir_lookup(
198 xfs_trans_t *tp, 278 xfs_trans_t *tp,
199 xfs_inode_t *dp, 279 xfs_inode_t *dp,
200 struct xfs_name *name, 280 struct xfs_name *name,
201 xfs_ino_t *inum) /* out: inode number */ 281 xfs_ino_t *inum, /* out: inode number */
282 struct xfs_name *ci_name) /* out: actual name if CI match */
202{ 283{
203 xfs_da_args_t args; 284 xfs_da_args_t args;
204 int rval; 285 int rval;
@@ -206,15 +287,17 @@ xfs_dir_lookup(
206 287
207 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); 288 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
208 XFS_STATS_INC(xs_dir_lookup); 289 XFS_STATS_INC(xs_dir_lookup);
209 memset(&args, 0, sizeof(xfs_da_args_t));
210 290
291 memset(&args, 0, sizeof(xfs_da_args_t));
211 args.name = name->name; 292 args.name = name->name;
212 args.namelen = name->len; 293 args.namelen = name->len;
213 args.hashval = xfs_da_hashname(name->name, name->len); 294 args.hashval = dp->i_mount->m_dirnameops->hashname(name);
214 args.dp = dp; 295 args.dp = dp;
215 args.whichfork = XFS_DATA_FORK; 296 args.whichfork = XFS_DATA_FORK;
216 args.trans = tp; 297 args.trans = tp;
217 args.oknoent = 1; 298 args.op_flags = XFS_DA_OP_OKNOENT;
299 if (ci_name)
300 args.op_flags |= XFS_DA_OP_CILOOKUP;
218 301
219 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) 302 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
220 rval = xfs_dir2_sf_lookup(&args); 303 rval = xfs_dir2_sf_lookup(&args);
@@ -230,8 +313,13 @@ xfs_dir_lookup(
230 rval = xfs_dir2_node_lookup(&args); 313 rval = xfs_dir2_node_lookup(&args);
231 if (rval == EEXIST) 314 if (rval == EEXIST)
232 rval = 0; 315 rval = 0;
233 if (rval == 0) 316 if (!rval) {
234 *inum = args.inumber; 317 *inum = args.inumber;
318 if (ci_name) {
319 ci_name->name = args.value;
320 ci_name->len = args.valuelen;
321 }
322 }
235 return rval; 323 return rval;
236} 324}
237 325
@@ -255,9 +343,10 @@ xfs_dir_removename(
255 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); 343 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
256 XFS_STATS_INC(xs_dir_remove); 344 XFS_STATS_INC(xs_dir_remove);
257 345
346 memset(&args, 0, sizeof(xfs_da_args_t));
258 args.name = name->name; 347 args.name = name->name;
259 args.namelen = name->len; 348 args.namelen = name->len;
260 args.hashval = xfs_da_hashname(name->name, name->len); 349 args.hashval = dp->i_mount->m_dirnameops->hashname(name);
261 args.inumber = ino; 350 args.inumber = ino;
262 args.dp = dp; 351 args.dp = dp;
263 args.firstblock = first; 352 args.firstblock = first;
@@ -265,7 +354,6 @@ xfs_dir_removename(
265 args.total = total; 354 args.total = total;
266 args.whichfork = XFS_DATA_FORK; 355 args.whichfork = XFS_DATA_FORK;
267 args.trans = tp; 356 args.trans = tp;
268 args.justcheck = args.addname = args.oknoent = 0;
269 357
270 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) 358 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
271 rval = xfs_dir2_sf_removename(&args); 359 rval = xfs_dir2_sf_removename(&args);
@@ -338,9 +426,10 @@ xfs_dir_replace(
338 if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum))) 426 if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum)))
339 return rval; 427 return rval;
340 428
429 memset(&args, 0, sizeof(xfs_da_args_t));
341 args.name = name->name; 430 args.name = name->name;
342 args.namelen = name->len; 431 args.namelen = name->len;
343 args.hashval = xfs_da_hashname(name->name, name->len); 432 args.hashval = dp->i_mount->m_dirnameops->hashname(name);
344 args.inumber = inum; 433 args.inumber = inum;
345 args.dp = dp; 434 args.dp = dp;
346 args.firstblock = first; 435 args.firstblock = first;
@@ -348,7 +437,6 @@ xfs_dir_replace(
348 args.total = total; 437 args.total = total;
349 args.whichfork = XFS_DATA_FORK; 438 args.whichfork = XFS_DATA_FORK;
350 args.trans = tp; 439 args.trans = tp;
351 args.justcheck = args.addname = args.oknoent = 0;
352 440
353 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) 441 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
354 rval = xfs_dir2_sf_replace(&args); 442 rval = xfs_dir2_sf_replace(&args);
@@ -384,15 +472,16 @@ xfs_dir_canenter(
384 return 0; 472 return 0;
385 473
386 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); 474 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
387 memset(&args, 0, sizeof(xfs_da_args_t));
388 475
476 memset(&args, 0, sizeof(xfs_da_args_t));
389 args.name = name->name; 477 args.name = name->name;
390 args.namelen = name->len; 478 args.namelen = name->len;
391 args.hashval = xfs_da_hashname(name->name, name->len); 479 args.hashval = dp->i_mount->m_dirnameops->hashname(name);
392 args.dp = dp; 480 args.dp = dp;
393 args.whichfork = XFS_DATA_FORK; 481 args.whichfork = XFS_DATA_FORK;
394 args.trans = tp; 482 args.trans = tp;
395 args.justcheck = args.addname = args.oknoent = 1; 483 args.op_flags = XFS_DA_OP_JUSTCHECK | XFS_DA_OP_ADDNAME |
484 XFS_DA_OP_OKNOENT;
396 485
397 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) 486 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
398 rval = xfs_dir2_sf_addname(&args); 487 rval = xfs_dir2_sf_addname(&args);
@@ -493,7 +582,7 @@ xfs_dir2_grow_inode(
493 args->firstblock, args->total, 582 args->firstblock, args->total,
494 &mapp[mapi], &nmap, args->flist, 583 &mapp[mapi], &nmap, args->flist,
495 NULL))) { 584 NULL))) {
496 kmem_free(mapp, sizeof(*mapp) * count); 585 kmem_free(mapp);
497 return error; 586 return error;
498 } 587 }
499 if (nmap < 1) 588 if (nmap < 1)
@@ -525,14 +614,14 @@ xfs_dir2_grow_inode(
525 mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount != 614 mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount !=
526 bno + count) { 615 bno + count) {
527 if (mapp != &map) 616 if (mapp != &map)
528 kmem_free(mapp, sizeof(*mapp) * count); 617 kmem_free(mapp);
529 return XFS_ERROR(ENOSPC); 618 return XFS_ERROR(ENOSPC);
530 } 619 }
531 /* 620 /*
532 * Done with the temporary mapping table. 621 * Done with the temporary mapping table.
533 */ 622 */
534 if (mapp != &map) 623 if (mapp != &map)
535 kmem_free(mapp, sizeof(*mapp) * count); 624 kmem_free(mapp);
536 *dbp = xfs_dir2_da_to_db(mp, (xfs_dablk_t)bno); 625 *dbp = xfs_dir2_da_to_db(mp, (xfs_dablk_t)bno);
537 /* 626 /*
538 * Update file's size if this is the data space and it grew. 627 * Update file's size if this is the data space and it grew.
diff --git a/fs/xfs/xfs_dir2.h b/fs/xfs/xfs_dir2.h
index 6392f939029f..1d9ef96f33aa 100644
--- a/fs/xfs/xfs_dir2.h
+++ b/fs/xfs/xfs_dir2.h
@@ -74,7 +74,8 @@ extern int xfs_dir_createname(struct xfs_trans *tp, struct xfs_inode *dp,
74 xfs_fsblock_t *first, 74 xfs_fsblock_t *first,
75 struct xfs_bmap_free *flist, xfs_extlen_t tot); 75 struct xfs_bmap_free *flist, xfs_extlen_t tot);
76extern int xfs_dir_lookup(struct xfs_trans *tp, struct xfs_inode *dp, 76extern int xfs_dir_lookup(struct xfs_trans *tp, struct xfs_inode *dp,
77 struct xfs_name *name, xfs_ino_t *inum); 77 struct xfs_name *name, xfs_ino_t *inum,
78 struct xfs_name *ci_name);
78extern int xfs_dir_removename(struct xfs_trans *tp, struct xfs_inode *dp, 79extern int xfs_dir_removename(struct xfs_trans *tp, struct xfs_inode *dp,
79 struct xfs_name *name, xfs_ino_t ino, 80 struct xfs_name *name, xfs_ino_t ino,
80 xfs_fsblock_t *first, 81 xfs_fsblock_t *first,
@@ -99,4 +100,7 @@ extern int xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp,
99extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db, 100extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db,
100 struct xfs_dabuf *bp); 101 struct xfs_dabuf *bp);
101 102
103extern int xfs_dir_cilookup_result(struct xfs_da_args *args, const char *name,
104 int len);
105
102#endif /* __XFS_DIR2_H__ */ 106#endif /* __XFS_DIR2_H__ */
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index fb5a556725b3..e2fa0a1d8e96 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -215,7 +215,7 @@ xfs_dir2_block_addname(
215 /* 215 /*
216 * If this isn't a real add, we're done with the buffer. 216 * If this isn't a real add, we're done with the buffer.
217 */ 217 */
218 if (args->justcheck) 218 if (args->op_flags & XFS_DA_OP_JUSTCHECK)
219 xfs_da_brelse(tp, bp); 219 xfs_da_brelse(tp, bp);
220 /* 220 /*
221 * If we don't have space for the new entry & leaf ... 221 * If we don't have space for the new entry & leaf ...
@@ -225,7 +225,7 @@ xfs_dir2_block_addname(
225 * Not trying to actually do anything, or don't have 225 * Not trying to actually do anything, or don't have
226 * a space reservation: return no-space. 226 * a space reservation: return no-space.
227 */ 227 */
228 if (args->justcheck || args->total == 0) 228 if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0)
229 return XFS_ERROR(ENOSPC); 229 return XFS_ERROR(ENOSPC);
230 /* 230 /*
231 * Convert to the next larger format. 231 * Convert to the next larger format.
@@ -240,7 +240,7 @@ xfs_dir2_block_addname(
240 /* 240 /*
241 * Just checking, and it would work, so say so. 241 * Just checking, and it would work, so say so.
242 */ 242 */
243 if (args->justcheck) 243 if (args->op_flags & XFS_DA_OP_JUSTCHECK)
244 return 0; 244 return 0;
245 needlog = needscan = 0; 245 needlog = needscan = 0;
246 /* 246 /*
@@ -610,14 +610,15 @@ xfs_dir2_block_lookup(
610 /* 610 /*
611 * Get the offset from the leaf entry, to point to the data. 611 * Get the offset from the leaf entry, to point to the data.
612 */ 612 */
613 dep = (xfs_dir2_data_entry_t *) 613 dep = (xfs_dir2_data_entry_t *)((char *)block +
614 ((char *)block + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address))); 614 xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
615 /* 615 /*
616 * Fill in inode number, release the block. 616 * Fill in inode number, CI name if appropriate, release the block.
617 */ 617 */
618 args->inumber = be64_to_cpu(dep->inumber); 618 args->inumber = be64_to_cpu(dep->inumber);
619 error = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
619 xfs_da_brelse(args->trans, bp); 620 xfs_da_brelse(args->trans, bp);
620 return XFS_ERROR(EEXIST); 621 return XFS_ERROR(error);
621} 622}
622 623
623/* 624/*
@@ -643,6 +644,7 @@ xfs_dir2_block_lookup_int(
643 int mid; /* binary search current idx */ 644 int mid; /* binary search current idx */
644 xfs_mount_t *mp; /* filesystem mount point */ 645 xfs_mount_t *mp; /* filesystem mount point */
645 xfs_trans_t *tp; /* transaction pointer */ 646 xfs_trans_t *tp; /* transaction pointer */
647 enum xfs_dacmp cmp; /* comparison result */
646 648
647 dp = args->dp; 649 dp = args->dp;
648 tp = args->trans; 650 tp = args->trans;
@@ -673,7 +675,7 @@ xfs_dir2_block_lookup_int(
673 else 675 else
674 high = mid - 1; 676 high = mid - 1;
675 if (low > high) { 677 if (low > high) {
676 ASSERT(args->oknoent); 678 ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
677 xfs_da_brelse(tp, bp); 679 xfs_da_brelse(tp, bp);
678 return XFS_ERROR(ENOENT); 680 return XFS_ERROR(ENOENT);
679 } 681 }
@@ -697,20 +699,31 @@ xfs_dir2_block_lookup_int(
697 dep = (xfs_dir2_data_entry_t *) 699 dep = (xfs_dir2_data_entry_t *)
698 ((char *)block + xfs_dir2_dataptr_to_off(mp, addr)); 700 ((char *)block + xfs_dir2_dataptr_to_off(mp, addr));
699 /* 701 /*
700 * Compare, if it's right give back buffer & entry number. 702 * Compare name and if it's an exact match, return the index
703 * and buffer. If it's the first case-insensitive match, store
704 * the index and buffer and continue looking for an exact match.
701 */ 705 */
702 if (dep->namelen == args->namelen && 706 cmp = mp->m_dirnameops->compname(args, dep->name, dep->namelen);
703 dep->name[0] == args->name[0] && 707 if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
704 memcmp(dep->name, args->name, args->namelen) == 0) { 708 args->cmpresult = cmp;
705 *bpp = bp; 709 *bpp = bp;
706 *entno = mid; 710 *entno = mid;
707 return 0; 711 if (cmp == XFS_CMP_EXACT)
712 return 0;
708 } 713 }
709 } while (++mid < be32_to_cpu(btp->count) && be32_to_cpu(blp[mid].hashval) == hash); 714 } while (++mid < be32_to_cpu(btp->count) &&
715 be32_to_cpu(blp[mid].hashval) == hash);
716
717 ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
718 /*
719 * Here, we can only be doing a lookup (not a rename or replace).
720 * If a case-insensitive match was found earlier, return success.
721 */
722 if (args->cmpresult == XFS_CMP_CASE)
723 return 0;
710 /* 724 /*
711 * No match, release the buffer and return ENOENT. 725 * No match, release the buffer and return ENOENT.
712 */ 726 */
713 ASSERT(args->oknoent);
714 xfs_da_brelse(tp, bp); 727 xfs_da_brelse(tp, bp);
715 return XFS_ERROR(ENOENT); 728 return XFS_ERROR(ENOENT);
716} 729}
@@ -1033,6 +1046,7 @@ xfs_dir2_sf_to_block(
1033 xfs_dir2_sf_t *sfp; /* shortform structure */ 1046 xfs_dir2_sf_t *sfp; /* shortform structure */
1034 __be16 *tagp; /* end of data entry */ 1047 __be16 *tagp; /* end of data entry */
1035 xfs_trans_t *tp; /* transaction pointer */ 1048 xfs_trans_t *tp; /* transaction pointer */
1049 struct xfs_name name;
1036 1050
1037 xfs_dir2_trace_args("sf_to_block", args); 1051 xfs_dir2_trace_args("sf_to_block", args);
1038 dp = args->dp; 1052 dp = args->dp;
@@ -1071,7 +1085,7 @@ xfs_dir2_sf_to_block(
1071 */ 1085 */
1072 error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE, &blkno); 1086 error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE, &blkno);
1073 if (error) { 1087 if (error) {
1074 kmem_free(buf, buf_len); 1088 kmem_free(buf);
1075 return error; 1089 return error;
1076 } 1090 }
1077 /* 1091 /*
@@ -1079,7 +1093,7 @@ xfs_dir2_sf_to_block(
1079 */ 1093 */
1080 error = xfs_dir2_data_init(args, blkno, &bp); 1094 error = xfs_dir2_data_init(args, blkno, &bp);
1081 if (error) { 1095 if (error) {
1082 kmem_free(buf, buf_len); 1096 kmem_free(buf);
1083 return error; 1097 return error;
1084 } 1098 }
1085 block = bp->data; 1099 block = bp->data;
@@ -1187,8 +1201,10 @@ xfs_dir2_sf_to_block(
1187 tagp = xfs_dir2_data_entry_tag_p(dep); 1201 tagp = xfs_dir2_data_entry_tag_p(dep);
1188 *tagp = cpu_to_be16((char *)dep - (char *)block); 1202 *tagp = cpu_to_be16((char *)dep - (char *)block);
1189 xfs_dir2_data_log_entry(tp, bp, dep); 1203 xfs_dir2_data_log_entry(tp, bp, dep);
1190 blp[2 + i].hashval = cpu_to_be32(xfs_da_hashname( 1204 name.name = sfep->name;
1191 (char *)sfep->name, sfep->namelen)); 1205 name.len = sfep->namelen;
1206 blp[2 + i].hashval = cpu_to_be32(mp->m_dirnameops->
1207 hashname(&name));
1192 blp[2 + i].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, 1208 blp[2 + i].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
1193 (char *)dep - (char *)block)); 1209 (char *)dep - (char *)block));
1194 offset = (int)((char *)(tagp + 1) - (char *)block); 1210 offset = (int)((char *)(tagp + 1) - (char *)block);
@@ -1198,7 +1214,7 @@ xfs_dir2_sf_to_block(
1198 sfep = xfs_dir2_sf_nextentry(sfp, sfep); 1214 sfep = xfs_dir2_sf_nextentry(sfp, sfep);
1199 } 1215 }
1200 /* Done with the temporary buffer */ 1216 /* Done with the temporary buffer */
1201 kmem_free(buf, buf_len); 1217 kmem_free(buf);
1202 /* 1218 /*
1203 * Sort the leaf entries by hash value. 1219 * Sort the leaf entries by hash value.
1204 */ 1220 */
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
index fb8c9e08b23d..498f8d694330 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -65,6 +65,7 @@ xfs_dir2_data_check(
65 xfs_mount_t *mp; /* filesystem mount point */ 65 xfs_mount_t *mp; /* filesystem mount point */
66 char *p; /* current data position */ 66 char *p; /* current data position */
67 int stale; /* count of stale leaves */ 67 int stale; /* count of stale leaves */
68 struct xfs_name name;
68 69
69 mp = dp->i_mount; 70 mp = dp->i_mount;
70 d = bp->data; 71 d = bp->data;
@@ -140,7 +141,9 @@ xfs_dir2_data_check(
140 addr = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, 141 addr = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
141 (xfs_dir2_data_aoff_t) 142 (xfs_dir2_data_aoff_t)
142 ((char *)dep - (char *)d)); 143 ((char *)dep - (char *)d));
143 hash = xfs_da_hashname((char *)dep->name, dep->namelen); 144 name.name = dep->name;
145 name.len = dep->namelen;
146 hash = mp->m_dirnameops->hashname(&name);
144 for (i = 0; i < be32_to_cpu(btp->count); i++) { 147 for (i = 0; i < be32_to_cpu(btp->count); i++) {
145 if (be32_to_cpu(lep[i].address) == addr && 148 if (be32_to_cpu(lep[i].address) == addr &&
146 be32_to_cpu(lep[i].hashval) == hash) 149 be32_to_cpu(lep[i].hashval) == hash)
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index bc52b803d79b..93535992cb60 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -263,20 +263,21 @@ xfs_dir2_leaf_addname(
263 * If we don't have enough free bytes but we can make enough 263 * If we don't have enough free bytes but we can make enough
264 * by compacting out stale entries, we'll do that. 264 * by compacting out stale entries, we'll do that.
265 */ 265 */
266 if ((char *)bestsp - (char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] < needbytes && 266 if ((char *)bestsp - (char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] <
267 be16_to_cpu(leaf->hdr.stale) > 1) { 267 needbytes && be16_to_cpu(leaf->hdr.stale) > 1) {
268 compact = 1; 268 compact = 1;
269 } 269 }
270 /* 270 /*
271 * Otherwise if we don't have enough free bytes we need to 271 * Otherwise if we don't have enough free bytes we need to
272 * convert to node form. 272 * convert to node form.
273 */ 273 */
274 else if ((char *)bestsp - (char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] < 274 else if ((char *)bestsp - (char *)&leaf->ents[be16_to_cpu(
275 needbytes) { 275 leaf->hdr.count)] < needbytes) {
276 /* 276 /*
277 * Just checking or no space reservation, give up. 277 * Just checking or no space reservation, give up.
278 */ 278 */
279 if (args->justcheck || args->total == 0) { 279 if ((args->op_flags & XFS_DA_OP_JUSTCHECK) ||
280 args->total == 0) {
280 xfs_da_brelse(tp, lbp); 281 xfs_da_brelse(tp, lbp);
281 return XFS_ERROR(ENOSPC); 282 return XFS_ERROR(ENOSPC);
282 } 283 }
@@ -301,7 +302,7 @@ xfs_dir2_leaf_addname(
301 * If just checking, then it will fit unless we needed to allocate 302 * If just checking, then it will fit unless we needed to allocate
302 * a new data block. 303 * a new data block.
303 */ 304 */
304 if (args->justcheck) { 305 if (args->op_flags & XFS_DA_OP_JUSTCHECK) {
305 xfs_da_brelse(tp, lbp); 306 xfs_da_brelse(tp, lbp);
306 return use_block == -1 ? XFS_ERROR(ENOSPC) : 0; 307 return use_block == -1 ? XFS_ERROR(ENOSPC) : 0;
307 } 308 }
@@ -1110,7 +1111,7 @@ xfs_dir2_leaf_getdents(
1110 *offset = XFS_DIR2_MAX_DATAPTR; 1111 *offset = XFS_DIR2_MAX_DATAPTR;
1111 else 1112 else
1112 *offset = xfs_dir2_byte_to_dataptr(mp, curoff); 1113 *offset = xfs_dir2_byte_to_dataptr(mp, curoff);
1113 kmem_free(map, map_size * sizeof(*map)); 1114 kmem_free(map);
1114 if (bp) 1115 if (bp)
1115 xfs_da_brelse(NULL, bp); 1116 xfs_da_brelse(NULL, bp);
1116 return error; 1117 return error;
@@ -1298,12 +1299,13 @@ xfs_dir2_leaf_lookup(
1298 ((char *)dbp->data + 1299 ((char *)dbp->data +
1299 xfs_dir2_dataptr_to_off(dp->i_mount, be32_to_cpu(lep->address))); 1300 xfs_dir2_dataptr_to_off(dp->i_mount, be32_to_cpu(lep->address)));
1300 /* 1301 /*
1301 * Return the found inode number. 1302 * Return the found inode number & CI name if appropriate
1302 */ 1303 */
1303 args->inumber = be64_to_cpu(dep->inumber); 1304 args->inumber = be64_to_cpu(dep->inumber);
1305 error = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
1304 xfs_da_brelse(tp, dbp); 1306 xfs_da_brelse(tp, dbp);
1305 xfs_da_brelse(tp, lbp); 1307 xfs_da_brelse(tp, lbp);
1306 return XFS_ERROR(EEXIST); 1308 return XFS_ERROR(error);
1307} 1309}
1308 1310
1309/* 1311/*
@@ -1319,8 +1321,8 @@ xfs_dir2_leaf_lookup_int(
1319 int *indexp, /* out: index in leaf block */ 1321 int *indexp, /* out: index in leaf block */
1320 xfs_dabuf_t **dbpp) /* out: data buffer */ 1322 xfs_dabuf_t **dbpp) /* out: data buffer */
1321{ 1323{
1322 xfs_dir2_db_t curdb; /* current data block number */ 1324 xfs_dir2_db_t curdb = -1; /* current data block number */
1323 xfs_dabuf_t *dbp; /* data buffer */ 1325 xfs_dabuf_t *dbp = NULL; /* data buffer */
1324 xfs_dir2_data_entry_t *dep; /* data entry */ 1326 xfs_dir2_data_entry_t *dep; /* data entry */
1325 xfs_inode_t *dp; /* incore directory inode */ 1327 xfs_inode_t *dp; /* incore directory inode */
1326 int error; /* error return code */ 1328 int error; /* error return code */
@@ -1331,6 +1333,8 @@ xfs_dir2_leaf_lookup_int(
1331 xfs_mount_t *mp; /* filesystem mount point */ 1333 xfs_mount_t *mp; /* filesystem mount point */
1332 xfs_dir2_db_t newdb; /* new data block number */ 1334 xfs_dir2_db_t newdb; /* new data block number */
1333 xfs_trans_t *tp; /* transaction pointer */ 1335 xfs_trans_t *tp; /* transaction pointer */
1336 xfs_dir2_db_t cidb = -1; /* case match data block no. */
1337 enum xfs_dacmp cmp; /* name compare result */
1334 1338
1335 dp = args->dp; 1339 dp = args->dp;
1336 tp = args->trans; 1340 tp = args->trans;
@@ -1338,11 +1342,10 @@ xfs_dir2_leaf_lookup_int(
1338 /* 1342 /*
1339 * Read the leaf block into the buffer. 1343 * Read the leaf block into the buffer.
1340 */ 1344 */
1341 if ((error = 1345 error = xfs_da_read_buf(tp, dp, mp->m_dirleafblk, -1, &lbp,
1342 xfs_da_read_buf(tp, dp, mp->m_dirleafblk, -1, &lbp, 1346 XFS_DATA_FORK);
1343 XFS_DATA_FORK))) { 1347 if (error)
1344 return error; 1348 return error;
1345 }
1346 *lbpp = lbp; 1349 *lbpp = lbp;
1347 leaf = lbp->data; 1350 leaf = lbp->data;
1348 xfs_dir2_leaf_check(dp, lbp); 1351 xfs_dir2_leaf_check(dp, lbp);
@@ -1354,9 +1357,9 @@ xfs_dir2_leaf_lookup_int(
1354 * Loop over all the entries with the right hash value 1357 * Loop over all the entries with the right hash value
1355 * looking to match the name. 1358 * looking to match the name.
1356 */ 1359 */
1357 for (lep = &leaf->ents[index], dbp = NULL, curdb = -1; 1360 for (lep = &leaf->ents[index]; index < be16_to_cpu(leaf->hdr.count) &&
1358 index < be16_to_cpu(leaf->hdr.count) && be32_to_cpu(lep->hashval) == args->hashval; 1361 be32_to_cpu(lep->hashval) == args->hashval;
1359 lep++, index++) { 1362 lep++, index++) {
1360 /* 1363 /*
1361 * Skip over stale leaf entries. 1364 * Skip over stale leaf entries.
1362 */ 1365 */
@@ -1373,10 +1376,10 @@ xfs_dir2_leaf_lookup_int(
1373 if (newdb != curdb) { 1376 if (newdb != curdb) {
1374 if (dbp) 1377 if (dbp)
1375 xfs_da_brelse(tp, dbp); 1378 xfs_da_brelse(tp, dbp);
1376 if ((error = 1379 error = xfs_da_read_buf(tp, dp,
1377 xfs_da_read_buf(tp, dp, 1380 xfs_dir2_db_to_da(mp, newdb),
1378 xfs_dir2_db_to_da(mp, newdb), -1, &dbp, 1381 -1, &dbp, XFS_DATA_FORK);
1379 XFS_DATA_FORK))) { 1382 if (error) {
1380 xfs_da_brelse(tp, lbp); 1383 xfs_da_brelse(tp, lbp);
1381 return error; 1384 return error;
1382 } 1385 }
@@ -1386,24 +1389,50 @@ xfs_dir2_leaf_lookup_int(
1386 /* 1389 /*
1387 * Point to the data entry. 1390 * Point to the data entry.
1388 */ 1391 */
1389 dep = (xfs_dir2_data_entry_t *) 1392 dep = (xfs_dir2_data_entry_t *)((char *)dbp->data +
1390 ((char *)dbp->data + 1393 xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
1391 xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
1392 /* 1394 /*
1393 * If it matches then return it. 1395 * Compare name and if it's an exact match, return the index
1396 * and buffer. If it's the first case-insensitive match, store
1397 * the index and buffer and continue looking for an exact match.
1394 */ 1398 */
1395 if (dep->namelen == args->namelen && 1399 cmp = mp->m_dirnameops->compname(args, dep->name, dep->namelen);
1396 dep->name[0] == args->name[0] && 1400 if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
1397 memcmp(dep->name, args->name, args->namelen) == 0) { 1401 args->cmpresult = cmp;
1398 *dbpp = dbp;
1399 *indexp = index; 1402 *indexp = index;
1400 return 0; 1403 /* case exact match: return the current buffer. */
1404 if (cmp == XFS_CMP_EXACT) {
1405 *dbpp = dbp;
1406 return 0;
1407 }
1408 cidb = curdb;
1401 } 1409 }
1402 } 1410 }
1411 ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
1412 /*
1413 * Here, we can only be doing a lookup (not a rename or remove).
1414 * If a case-insensitive match was found earlier, re-read the
1415 * appropriate data block if required and return it.
1416 */
1417 if (args->cmpresult == XFS_CMP_CASE) {
1418 ASSERT(cidb != -1);
1419 if (cidb != curdb) {
1420 xfs_da_brelse(tp, dbp);
1421 error = xfs_da_read_buf(tp, dp,
1422 xfs_dir2_db_to_da(mp, cidb),
1423 -1, &dbp, XFS_DATA_FORK);
1424 if (error) {
1425 xfs_da_brelse(tp, lbp);
1426 return error;
1427 }
1428 }
1429 *dbpp = dbp;
1430 return 0;
1431 }
1403 /* 1432 /*
1404 * No match found, return ENOENT. 1433 * No match found, return ENOENT.
1405 */ 1434 */
1406 ASSERT(args->oknoent); 1435 ASSERT(cidb == -1);
1407 if (dbp) 1436 if (dbp)
1408 xfs_da_brelse(tp, dbp); 1437 xfs_da_brelse(tp, dbp);
1409 xfs_da_brelse(tp, lbp); 1438 xfs_da_brelse(tp, lbp);
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index 8dade711f099..fa6c3a5ddbc6 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -226,7 +226,7 @@ xfs_dir2_leafn_add(
226 ASSERT(index == be16_to_cpu(leaf->hdr.count) || 226 ASSERT(index == be16_to_cpu(leaf->hdr.count) ||
227 be32_to_cpu(leaf->ents[index].hashval) >= args->hashval); 227 be32_to_cpu(leaf->ents[index].hashval) >= args->hashval);
228 228
229 if (args->justcheck) 229 if (args->op_flags & XFS_DA_OP_JUSTCHECK)
230 return 0; 230 return 0;
231 231
232 /* 232 /*
@@ -387,28 +387,26 @@ xfs_dir2_leafn_lasthash(
387} 387}
388 388
389/* 389/*
390 * Look up a leaf entry in a node-format leaf block. 390 * Look up a leaf entry for space to add a name in a node-format leaf block.
391 * If this is an addname then the extrablk in state is a freespace block, 391 * The extrablk in state is a freespace block.
392 * otherwise it's a data block.
393 */ 392 */
394int 393STATIC int
395xfs_dir2_leafn_lookup_int( 394xfs_dir2_leafn_lookup_for_addname(
396 xfs_dabuf_t *bp, /* leaf buffer */ 395 xfs_dabuf_t *bp, /* leaf buffer */
397 xfs_da_args_t *args, /* operation arguments */ 396 xfs_da_args_t *args, /* operation arguments */
398 int *indexp, /* out: leaf entry index */ 397 int *indexp, /* out: leaf entry index */
399 xfs_da_state_t *state) /* state to fill in */ 398 xfs_da_state_t *state) /* state to fill in */
400{ 399{
401 xfs_dabuf_t *curbp; /* current data/free buffer */ 400 xfs_dabuf_t *curbp = NULL; /* current data/free buffer */
402 xfs_dir2_db_t curdb; /* current data block number */ 401 xfs_dir2_db_t curdb = -1; /* current data block number */
403 xfs_dir2_db_t curfdb; /* current free block number */ 402 xfs_dir2_db_t curfdb = -1; /* current free block number */
404 xfs_dir2_data_entry_t *dep; /* data block entry */
405 xfs_inode_t *dp; /* incore directory inode */ 403 xfs_inode_t *dp; /* incore directory inode */
406 int error; /* error return value */ 404 int error; /* error return value */
407 int fi; /* free entry index */ 405 int fi; /* free entry index */
408 xfs_dir2_free_t *free=NULL; /* free block structure */ 406 xfs_dir2_free_t *free = NULL; /* free block structure */
409 int index; /* leaf entry index */ 407 int index; /* leaf entry index */
410 xfs_dir2_leaf_t *leaf; /* leaf structure */ 408 xfs_dir2_leaf_t *leaf; /* leaf structure */
411 int length=0; /* length of new data entry */ 409 int length; /* length of new data entry */
412 xfs_dir2_leaf_entry_t *lep; /* leaf entry */ 410 xfs_dir2_leaf_entry_t *lep; /* leaf entry */
413 xfs_mount_t *mp; /* filesystem mount point */ 411 xfs_mount_t *mp; /* filesystem mount point */
414 xfs_dir2_db_t newdb; /* new data block number */ 412 xfs_dir2_db_t newdb; /* new data block number */
@@ -431,33 +429,20 @@ xfs_dir2_leafn_lookup_int(
431 /* 429 /*
432 * Do we have a buffer coming in? 430 * Do we have a buffer coming in?
433 */ 431 */
434 if (state->extravalid) 432 if (state->extravalid) {
433 /* If so, it's a free block buffer, get the block number. */
435 curbp = state->extrablk.bp; 434 curbp = state->extrablk.bp;
436 else 435 curfdb = state->extrablk.blkno;
437 curbp = NULL; 436 free = curbp->data;
438 /* 437 ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
439 * For addname, it's a free block buffer, get the block number.
440 */
441 if (args->addname) {
442 curfdb = curbp ? state->extrablk.blkno : -1;
443 curdb = -1;
444 length = xfs_dir2_data_entsize(args->namelen);
445 if ((free = (curbp ? curbp->data : NULL)))
446 ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
447 }
448 /*
449 * For others, it's a data block buffer, get the block number.
450 */
451 else {
452 curfdb = -1;
453 curdb = curbp ? state->extrablk.blkno : -1;
454 } 438 }
439 length = xfs_dir2_data_entsize(args->namelen);
455 /* 440 /*
456 * Loop over leaf entries with the right hash value. 441 * Loop over leaf entries with the right hash value.
457 */ 442 */
458 for (lep = &leaf->ents[index]; 443 for (lep = &leaf->ents[index]; index < be16_to_cpu(leaf->hdr.count) &&
459 index < be16_to_cpu(leaf->hdr.count) && be32_to_cpu(lep->hashval) == args->hashval; 444 be32_to_cpu(lep->hashval) == args->hashval;
460 lep++, index++) { 445 lep++, index++) {
461 /* 446 /*
462 * Skip stale leaf entries. 447 * Skip stale leaf entries.
463 */ 448 */
@@ -471,161 +456,244 @@ xfs_dir2_leafn_lookup_int(
471 * For addname, we're looking for a place to put the new entry. 456 * For addname, we're looking for a place to put the new entry.
472 * We want to use a data block with an entry of equal 457 * We want to use a data block with an entry of equal
473 * hash value to ours if there is one with room. 458 * hash value to ours if there is one with room.
459 *
460 * If this block isn't the data block we already have
461 * in hand, take a look at it.
474 */ 462 */
475 if (args->addname) { 463 if (newdb != curdb) {
464 curdb = newdb;
476 /* 465 /*
477 * If this block isn't the data block we already have 466 * Convert the data block to the free block
478 * in hand, take a look at it. 467 * holding its freespace information.
479 */ 468 */
480 if (newdb != curdb) { 469 newfdb = xfs_dir2_db_to_fdb(mp, newdb);
481 curdb = newdb;
482 /*
483 * Convert the data block to the free block
484 * holding its freespace information.
485 */
486 newfdb = xfs_dir2_db_to_fdb(mp, newdb);
487 /*
488 * If it's not the one we have in hand,
489 * read it in.
490 */
491 if (newfdb != curfdb) {
492 /*
493 * If we had one before, drop it.
494 */
495 if (curbp)
496 xfs_da_brelse(tp, curbp);
497 /*
498 * Read the free block.
499 */
500 if ((error = xfs_da_read_buf(tp, dp,
501 xfs_dir2_db_to_da(mp,
502 newfdb),
503 -1, &curbp,
504 XFS_DATA_FORK))) {
505 return error;
506 }
507 free = curbp->data;
508 ASSERT(be32_to_cpu(free->hdr.magic) ==
509 XFS_DIR2_FREE_MAGIC);
510 ASSERT((be32_to_cpu(free->hdr.firstdb) %
511 XFS_DIR2_MAX_FREE_BESTS(mp)) ==
512 0);
513 ASSERT(be32_to_cpu(free->hdr.firstdb) <= curdb);
514 ASSERT(curdb <
515 be32_to_cpu(free->hdr.firstdb) +
516 be32_to_cpu(free->hdr.nvalid));
517 }
518 /*
519 * Get the index for our entry.
520 */
521 fi = xfs_dir2_db_to_fdindex(mp, curdb);
522 /*
523 * If it has room, return it.
524 */
525 if (unlikely(be16_to_cpu(free->bests[fi]) == NULLDATAOFF)) {
526 XFS_ERROR_REPORT("xfs_dir2_leafn_lookup_int",
527 XFS_ERRLEVEL_LOW, mp);
528 if (curfdb != newfdb)
529 xfs_da_brelse(tp, curbp);
530 return XFS_ERROR(EFSCORRUPTED);
531 }
532 curfdb = newfdb;
533 if (be16_to_cpu(free->bests[fi]) >= length) {
534 *indexp = index;
535 state->extravalid = 1;
536 state->extrablk.bp = curbp;
537 state->extrablk.blkno = curfdb;
538 state->extrablk.index = fi;
539 state->extrablk.magic =
540 XFS_DIR2_FREE_MAGIC;
541 ASSERT(args->oknoent);
542 return XFS_ERROR(ENOENT);
543 }
544 }
545 }
546 /*
547 * Not adding a new entry, so we really want to find
548 * the name given to us.
549 */
550 else {
551 /* 470 /*
552 * If it's a different data block, go get it. 471 * If it's not the one we have in hand, read it in.
553 */ 472 */
554 if (newdb != curdb) { 473 if (newfdb != curfdb) {
555 /* 474 /*
556 * If we had a block before, drop it. 475 * If we had one before, drop it.
557 */ 476 */
558 if (curbp) 477 if (curbp)
559 xfs_da_brelse(tp, curbp); 478 xfs_da_brelse(tp, curbp);
560 /* 479 /*
561 * Read the data block. 480 * Read the free block.
562 */ 481 */
563 if ((error = 482 error = xfs_da_read_buf(tp, dp,
564 xfs_da_read_buf(tp, dp, 483 xfs_dir2_db_to_da(mp, newfdb),
565 xfs_dir2_db_to_da(mp, newdb), -1, 484 -1, &curbp, XFS_DATA_FORK);
566 &curbp, XFS_DATA_FORK))) { 485 if (error)
567 return error; 486 return error;
568 } 487 free = curbp->data;
569 xfs_dir2_data_check(dp, curbp); 488 ASSERT(be32_to_cpu(free->hdr.magic) ==
570 curdb = newdb; 489 XFS_DIR2_FREE_MAGIC);
490 ASSERT((be32_to_cpu(free->hdr.firstdb) %
491 XFS_DIR2_MAX_FREE_BESTS(mp)) == 0);
492 ASSERT(be32_to_cpu(free->hdr.firstdb) <= curdb);
493 ASSERT(curdb < be32_to_cpu(free->hdr.firstdb) +
494 be32_to_cpu(free->hdr.nvalid));
571 } 495 }
572 /* 496 /*
573 * Point to the data entry. 497 * Get the index for our entry.
574 */ 498 */
575 dep = (xfs_dir2_data_entry_t *) 499 fi = xfs_dir2_db_to_fdindex(mp, curdb);
576 ((char *)curbp->data +
577 xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
578 /* 500 /*
579 * Compare the entry, return it if it matches. 501 * If it has room, return it.
580 */ 502 */
581 if (dep->namelen == args->namelen && 503 if (unlikely(be16_to_cpu(free->bests[fi]) == NULLDATAOFF)) {
582 dep->name[0] == args->name[0] && 504 XFS_ERROR_REPORT("xfs_dir2_leafn_lookup_int",
583 memcmp(dep->name, args->name, args->namelen) == 0) { 505 XFS_ERRLEVEL_LOW, mp);
584 args->inumber = be64_to_cpu(dep->inumber); 506 if (curfdb != newfdb)
585 *indexp = index; 507 xfs_da_brelse(tp, curbp);
586 state->extravalid = 1; 508 return XFS_ERROR(EFSCORRUPTED);
587 state->extrablk.bp = curbp;
588 state->extrablk.blkno = curdb;
589 state->extrablk.index =
590 (int)((char *)dep -
591 (char *)curbp->data);
592 state->extrablk.magic = XFS_DIR2_DATA_MAGIC;
593 return XFS_ERROR(EEXIST);
594 } 509 }
510 curfdb = newfdb;
511 if (be16_to_cpu(free->bests[fi]) >= length)
512 goto out;
595 } 513 }
596 } 514 }
515 /* Didn't find any space */
516 fi = -1;
517out:
518 ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
519 if (curbp) {
520 /* Giving back a free block. */
521 state->extravalid = 1;
522 state->extrablk.bp = curbp;
523 state->extrablk.index = fi;
524 state->extrablk.blkno = curfdb;
525 state->extrablk.magic = XFS_DIR2_FREE_MAGIC;
526 } else {
527 state->extravalid = 0;
528 }
597 /* 529 /*
598 * Didn't find a match. 530 * Return the index, that will be the insertion point.
599 * If we are holding a buffer, give it back in case our caller
600 * finds it useful.
601 */ 531 */
602 if ((state->extravalid = (curbp != NULL))) { 532 *indexp = index;
603 state->extrablk.bp = curbp; 533 return XFS_ERROR(ENOENT);
604 state->extrablk.index = -1; 534}
535
536/*
537 * Look up a leaf entry in a node-format leaf block.
538 * The extrablk in state a data block.
539 */
540STATIC int
541xfs_dir2_leafn_lookup_for_entry(
542 xfs_dabuf_t *bp, /* leaf buffer */
543 xfs_da_args_t *args, /* operation arguments */
544 int *indexp, /* out: leaf entry index */
545 xfs_da_state_t *state) /* state to fill in */
546{
547 xfs_dabuf_t *curbp = NULL; /* current data/free buffer */
548 xfs_dir2_db_t curdb = -1; /* current data block number */
549 xfs_dir2_data_entry_t *dep; /* data block entry */
550 xfs_inode_t *dp; /* incore directory inode */
551 int error; /* error return value */
552 int index; /* leaf entry index */
553 xfs_dir2_leaf_t *leaf; /* leaf structure */
554 xfs_dir2_leaf_entry_t *lep; /* leaf entry */
555 xfs_mount_t *mp; /* filesystem mount point */
556 xfs_dir2_db_t newdb; /* new data block number */
557 xfs_trans_t *tp; /* transaction pointer */
558 enum xfs_dacmp cmp; /* comparison result */
559
560 dp = args->dp;
561 tp = args->trans;
562 mp = dp->i_mount;
563 leaf = bp->data;
564 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
565#ifdef __KERNEL__
566 ASSERT(be16_to_cpu(leaf->hdr.count) > 0);
567#endif
568 xfs_dir2_leafn_check(dp, bp);
569 /*
570 * Look up the hash value in the leaf entries.
571 */
572 index = xfs_dir2_leaf_search_hash(args, bp);
573 /*
574 * Do we have a buffer coming in?
575 */
576 if (state->extravalid) {
577 curbp = state->extrablk.bp;
578 curdb = state->extrablk.blkno;
579 }
580 /*
581 * Loop over leaf entries with the right hash value.
582 */
583 for (lep = &leaf->ents[index]; index < be16_to_cpu(leaf->hdr.count) &&
584 be32_to_cpu(lep->hashval) == args->hashval;
585 lep++, index++) {
605 /* 586 /*
606 * For addname, giving back a free block. 587 * Skip stale leaf entries.
607 */ 588 */
608 if (args->addname) { 589 if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR)
609 state->extrablk.blkno = curfdb; 590 continue;
610 state->extrablk.magic = XFS_DIR2_FREE_MAGIC; 591 /*
592 * Pull the data block number from the entry.
593 */
594 newdb = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
595 /*
596 * Not adding a new entry, so we really want to find
597 * the name given to us.
598 *
599 * If it's a different data block, go get it.
600 */
601 if (newdb != curdb) {
602 /*
603 * If we had a block before that we aren't saving
604 * for a CI name, drop it
605 */
606 if (curbp && (args->cmpresult == XFS_CMP_DIFFERENT ||
607 curdb != state->extrablk.blkno))
608 xfs_da_brelse(tp, curbp);
609 /*
610 * If needing the block that is saved with a CI match,
611 * use it otherwise read in the new data block.
612 */
613 if (args->cmpresult != XFS_CMP_DIFFERENT &&
614 newdb == state->extrablk.blkno) {
615 ASSERT(state->extravalid);
616 curbp = state->extrablk.bp;
617 } else {
618 error = xfs_da_read_buf(tp, dp,
619 xfs_dir2_db_to_da(mp, newdb),
620 -1, &curbp, XFS_DATA_FORK);
621 if (error)
622 return error;
623 }
624 xfs_dir2_data_check(dp, curbp);
625 curdb = newdb;
611 } 626 }
612 /* 627 /*
613 * For other callers, giving back a data block. 628 * Point to the data entry.
614 */ 629 */
615 else { 630 dep = (xfs_dir2_data_entry_t *)((char *)curbp->data +
631 xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
632 /*
633 * Compare the entry and if it's an exact match, return
634 * EEXIST immediately. If it's the first case-insensitive
635 * match, store the block & inode number and continue looking.
636 */
637 cmp = mp->m_dirnameops->compname(args, dep->name, dep->namelen);
638 if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
639 /* If there is a CI match block, drop it */
640 if (args->cmpresult != XFS_CMP_DIFFERENT &&
641 curdb != state->extrablk.blkno)
642 xfs_da_brelse(tp, state->extrablk.bp);
643 args->cmpresult = cmp;
644 args->inumber = be64_to_cpu(dep->inumber);
645 *indexp = index;
646 state->extravalid = 1;
647 state->extrablk.bp = curbp;
616 state->extrablk.blkno = curdb; 648 state->extrablk.blkno = curdb;
649 state->extrablk.index = (int)((char *)dep -
650 (char *)curbp->data);
617 state->extrablk.magic = XFS_DIR2_DATA_MAGIC; 651 state->extrablk.magic = XFS_DIR2_DATA_MAGIC;
652 if (cmp == XFS_CMP_EXACT)
653 return XFS_ERROR(EEXIST);
618 } 654 }
619 } 655 }
620 /* 656 ASSERT(index == be16_to_cpu(leaf->hdr.count) ||
621 * Return the final index, that will be the insertion point. 657 (args->op_flags & XFS_DA_OP_OKNOENT));
622 */ 658 if (curbp) {
659 if (args->cmpresult == XFS_CMP_DIFFERENT) {
660 /* Giving back last used data block. */
661 state->extravalid = 1;
662 state->extrablk.bp = curbp;
663 state->extrablk.index = -1;
664 state->extrablk.blkno = curdb;
665 state->extrablk.magic = XFS_DIR2_DATA_MAGIC;
666 } else {
667 /* If the curbp is not the CI match block, drop it */
668 if (state->extrablk.bp != curbp)
669 xfs_da_brelse(tp, curbp);
670 }
671 } else {
672 state->extravalid = 0;
673 }
623 *indexp = index; 674 *indexp = index;
624 ASSERT(index == be16_to_cpu(leaf->hdr.count) || args->oknoent);
625 return XFS_ERROR(ENOENT); 675 return XFS_ERROR(ENOENT);
626} 676}
627 677
628/* 678/*
679 * Look up a leaf entry in a node-format leaf block.
680 * If this is an addname then the extrablk in state is a freespace block,
681 * otherwise it's a data block.
682 */
683int
684xfs_dir2_leafn_lookup_int(
685 xfs_dabuf_t *bp, /* leaf buffer */
686 xfs_da_args_t *args, /* operation arguments */
687 int *indexp, /* out: leaf entry index */
688 xfs_da_state_t *state) /* state to fill in */
689{
690 if (args->op_flags & XFS_DA_OP_ADDNAME)
691 return xfs_dir2_leafn_lookup_for_addname(bp, args, indexp,
692 state);
693 return xfs_dir2_leafn_lookup_for_entry(bp, args, indexp, state);
694}
695
696/*
629 * Move count leaf entries from source to destination leaf. 697 * Move count leaf entries from source to destination leaf.
630 * Log entries and headers. Stale entries are preserved. 698 * Log entries and headers. Stale entries are preserved.
631 */ 699 */
@@ -823,9 +891,10 @@ xfs_dir2_leafn_rebalance(
823 */ 891 */
824 if (!state->inleaf) 892 if (!state->inleaf)
825 blk2->index = blk1->index - be16_to_cpu(leaf1->hdr.count); 893 blk2->index = blk1->index - be16_to_cpu(leaf1->hdr.count);
826 894
827 /* 895 /*
828 * Finally sanity check just to make sure we are not returning a negative index 896 * Finally sanity check just to make sure we are not returning a
897 * negative index
829 */ 898 */
830 if(blk2->index < 0) { 899 if(blk2->index < 0) {
831 state->inleaf = 1; 900 state->inleaf = 1;
@@ -1332,7 +1401,7 @@ xfs_dir2_node_addname(
1332 /* 1401 /*
1333 * It worked, fix the hash values up the btree. 1402 * It worked, fix the hash values up the btree.
1334 */ 1403 */
1335 if (!args->justcheck) 1404 if (!(args->op_flags & XFS_DA_OP_JUSTCHECK))
1336 xfs_da_fixhashpath(state, &state->path); 1405 xfs_da_fixhashpath(state, &state->path);
1337 } else { 1406 } else {
1338 /* 1407 /*
@@ -1515,7 +1584,8 @@ xfs_dir2_node_addname_int(
1515 /* 1584 /*
1516 * Not allowed to allocate, return failure. 1585 * Not allowed to allocate, return failure.
1517 */ 1586 */
1518 if (args->justcheck || args->total == 0) { 1587 if ((args->op_flags & XFS_DA_OP_JUSTCHECK) ||
1588 args->total == 0) {
1519 /* 1589 /*
1520 * Drop the freespace buffer unless it came from our 1590 * Drop the freespace buffer unless it came from our
1521 * caller. 1591 * caller.
@@ -1661,7 +1731,7 @@ xfs_dir2_node_addname_int(
1661 /* 1731 /*
1662 * If just checking, we succeeded. 1732 * If just checking, we succeeded.
1663 */ 1733 */
1664 if (args->justcheck) { 1734 if (args->op_flags & XFS_DA_OP_JUSTCHECK) {
1665 if ((fblk == NULL || fblk->bp == NULL) && fbp != NULL) 1735 if ((fblk == NULL || fblk->bp == NULL) && fbp != NULL)
1666 xfs_da_buf_done(fbp); 1736 xfs_da_buf_done(fbp);
1667 return 0; 1737 return 0;
@@ -1767,6 +1837,14 @@ xfs_dir2_node_lookup(
1767 error = xfs_da_node_lookup_int(state, &rval); 1837 error = xfs_da_node_lookup_int(state, &rval);
1768 if (error) 1838 if (error)
1769 rval = error; 1839 rval = error;
1840 else if (rval == ENOENT && args->cmpresult == XFS_CMP_CASE) {
1841 /* If a CI match, dup the actual name and return EEXIST */
1842 xfs_dir2_data_entry_t *dep;
1843
1844 dep = (xfs_dir2_data_entry_t *)((char *)state->extrablk.bp->
1845 data + state->extrablk.index);
1846 rval = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
1847 }
1770 /* 1848 /*
1771 * Release the btree blocks and leaf block. 1849 * Release the btree blocks and leaf block.
1772 */ 1850 */
@@ -1810,9 +1888,8 @@ xfs_dir2_node_removename(
1810 * Look up the entry we're deleting, set up the cursor. 1888 * Look up the entry we're deleting, set up the cursor.
1811 */ 1889 */
1812 error = xfs_da_node_lookup_int(state, &rval); 1890 error = xfs_da_node_lookup_int(state, &rval);
1813 if (error) { 1891 if (error)
1814 rval = error; 1892 rval = error;
1815 }
1816 /* 1893 /*
1817 * Didn't find it, upper layer screwed up. 1894 * Didn't find it, upper layer screwed up.
1818 */ 1895 */
@@ -1829,9 +1906,8 @@ xfs_dir2_node_removename(
1829 */ 1906 */
1830 error = xfs_dir2_leafn_remove(args, blk->bp, blk->index, 1907 error = xfs_dir2_leafn_remove(args, blk->bp, blk->index,
1831 &state->extrablk, &rval); 1908 &state->extrablk, &rval);
1832 if (error) { 1909 if (error)
1833 return error; 1910 return error;
1834 }
1835 /* 1911 /*
1836 * Fix the hash values up the btree. 1912 * Fix the hash values up the btree.
1837 */ 1913 */
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c
index 919d275a1cef..b46af0013ec9 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -255,7 +255,7 @@ xfs_dir2_block_to_sf(
255 xfs_dir2_sf_check(args); 255 xfs_dir2_sf_check(args);
256out: 256out:
257 xfs_trans_log_inode(args->trans, dp, logflags); 257 xfs_trans_log_inode(args->trans, dp, logflags);
258 kmem_free(block, mp->m_dirblksize); 258 kmem_free(block);
259 return error; 259 return error;
260} 260}
261 261
@@ -332,7 +332,7 @@ xfs_dir2_sf_addname(
332 /* 332 /*
333 * Just checking or no space reservation, it doesn't fit. 333 * Just checking or no space reservation, it doesn't fit.
334 */ 334 */
335 if (args->justcheck || args->total == 0) 335 if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0)
336 return XFS_ERROR(ENOSPC); 336 return XFS_ERROR(ENOSPC);
337 /* 337 /*
338 * Convert to block form then add the name. 338 * Convert to block form then add the name.
@@ -345,7 +345,7 @@ xfs_dir2_sf_addname(
345 /* 345 /*
346 * Just checking, it fits. 346 * Just checking, it fits.
347 */ 347 */
348 if (args->justcheck) 348 if (args->op_flags & XFS_DA_OP_JUSTCHECK)
349 return 0; 349 return 0;
350 /* 350 /*
351 * Do it the easy way - just add it at the end. 351 * Do it the easy way - just add it at the end.
@@ -512,7 +512,7 @@ xfs_dir2_sf_addname_hard(
512 sfep = xfs_dir2_sf_nextentry(sfp, sfep); 512 sfep = xfs_dir2_sf_nextentry(sfp, sfep);
513 memcpy(sfep, oldsfep, old_isize - nbytes); 513 memcpy(sfep, oldsfep, old_isize - nbytes);
514 } 514 }
515 kmem_free(buf, old_isize); 515 kmem_free(buf);
516 dp->i_d.di_size = new_isize; 516 dp->i_d.di_size = new_isize;
517 xfs_dir2_sf_check(args); 517 xfs_dir2_sf_check(args);
518} 518}
@@ -812,8 +812,11 @@ xfs_dir2_sf_lookup(
812{ 812{
813 xfs_inode_t *dp; /* incore directory inode */ 813 xfs_inode_t *dp; /* incore directory inode */
814 int i; /* entry index */ 814 int i; /* entry index */
815 int error;
815 xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */ 816 xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */
816 xfs_dir2_sf_t *sfp; /* shortform structure */ 817 xfs_dir2_sf_t *sfp; /* shortform structure */
818 enum xfs_dacmp cmp; /* comparison result */
819 xfs_dir2_sf_entry_t *ci_sfep; /* case-insens. entry */
817 820
818 xfs_dir2_trace_args("sf_lookup", args); 821 xfs_dir2_trace_args("sf_lookup", args);
819 xfs_dir2_sf_check(args); 822 xfs_dir2_sf_check(args);
@@ -836,6 +839,7 @@ xfs_dir2_sf_lookup(
836 */ 839 */
837 if (args->namelen == 1 && args->name[0] == '.') { 840 if (args->namelen == 1 && args->name[0] == '.') {
838 args->inumber = dp->i_ino; 841 args->inumber = dp->i_ino;
842 args->cmpresult = XFS_CMP_EXACT;
839 return XFS_ERROR(EEXIST); 843 return XFS_ERROR(EEXIST);
840 } 844 }
841 /* 845 /*
@@ -844,28 +848,41 @@ xfs_dir2_sf_lookup(
844 if (args->namelen == 2 && 848 if (args->namelen == 2 &&
845 args->name[0] == '.' && args->name[1] == '.') { 849 args->name[0] == '.' && args->name[1] == '.') {
846 args->inumber = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent); 850 args->inumber = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent);
851 args->cmpresult = XFS_CMP_EXACT;
847 return XFS_ERROR(EEXIST); 852 return XFS_ERROR(EEXIST);
848 } 853 }
849 /* 854 /*
850 * Loop over all the entries trying to match ours. 855 * Loop over all the entries trying to match ours.
851 */ 856 */
852 for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); 857 ci_sfep = NULL;
853 i < sfp->hdr.count; 858 for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->hdr.count;
854 i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) { 859 i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
855 if (sfep->namelen == args->namelen && 860 /*
856 sfep->name[0] == args->name[0] && 861 * Compare name and if it's an exact match, return the inode
857 memcmp(args->name, sfep->name, args->namelen) == 0) { 862 * number. If it's the first case-insensitive match, store the
858 args->inumber = 863 * inode number and continue looking for an exact match.
859 xfs_dir2_sf_get_inumber(sfp, 864 */
860 xfs_dir2_sf_inumberp(sfep)); 865 cmp = dp->i_mount->m_dirnameops->compname(args, sfep->name,
861 return XFS_ERROR(EEXIST); 866 sfep->namelen);
867 if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
868 args->cmpresult = cmp;
869 args->inumber = xfs_dir2_sf_get_inumber(sfp,
870 xfs_dir2_sf_inumberp(sfep));
871 if (cmp == XFS_CMP_EXACT)
872 return XFS_ERROR(EEXIST);
873 ci_sfep = sfep;
862 } 874 }
863 } 875 }
876 ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
864 /* 877 /*
865 * Didn't find it. 878 * Here, we can only be doing a lookup (not a rename or replace).
879 * If a case-insensitive match was not found, return ENOENT.
866 */ 880 */
867 ASSERT(args->oknoent); 881 if (!ci_sfep)
868 return XFS_ERROR(ENOENT); 882 return XFS_ERROR(ENOENT);
883 /* otherwise process the CI match as required by the caller */
884 error = xfs_dir_cilookup_result(args, ci_sfep->name, ci_sfep->namelen);
885 return XFS_ERROR(error);
869} 886}
870 887
871/* 888/*
@@ -904,24 +921,21 @@ xfs_dir2_sf_removename(
904 * Loop over the old directory entries. 921 * Loop over the old directory entries.
905 * Find the one we're deleting. 922 * Find the one we're deleting.
906 */ 923 */
907 for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); 924 for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->hdr.count;
908 i < sfp->hdr.count; 925 i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
909 i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) { 926 if (xfs_da_compname(args, sfep->name, sfep->namelen) ==
910 if (sfep->namelen == args->namelen && 927 XFS_CMP_EXACT) {
911 sfep->name[0] == args->name[0] &&
912 memcmp(sfep->name, args->name, args->namelen) == 0) {
913 ASSERT(xfs_dir2_sf_get_inumber(sfp, 928 ASSERT(xfs_dir2_sf_get_inumber(sfp,
914 xfs_dir2_sf_inumberp(sfep)) == 929 xfs_dir2_sf_inumberp(sfep)) ==
915 args->inumber); 930 args->inumber);
916 break; 931 break;
917 } 932 }
918 } 933 }
919 /* 934 /*
920 * Didn't find it. 935 * Didn't find it.
921 */ 936 */
922 if (i == sfp->hdr.count) { 937 if (i == sfp->hdr.count)
923 return XFS_ERROR(ENOENT); 938 return XFS_ERROR(ENOENT);
924 }
925 /* 939 /*
926 * Calculate sizes. 940 * Calculate sizes.
927 */ 941 */
@@ -1042,11 +1056,10 @@ xfs_dir2_sf_replace(
1042 */ 1056 */
1043 else { 1057 else {
1044 for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); 1058 for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
1045 i < sfp->hdr.count; 1059 i < sfp->hdr.count;
1046 i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) { 1060 i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
1047 if (sfep->namelen == args->namelen && 1061 if (xfs_da_compname(args, sfep->name, sfep->namelen) ==
1048 sfep->name[0] == args->name[0] && 1062 XFS_CMP_EXACT) {
1049 memcmp(args->name, sfep->name, args->namelen) == 0) {
1050#if XFS_BIG_INUMS || defined(DEBUG) 1063#if XFS_BIG_INUMS || defined(DEBUG)
1051 ino = xfs_dir2_sf_get_inumber(sfp, 1064 ino = xfs_dir2_sf_get_inumber(sfp,
1052 xfs_dir2_sf_inumberp(sfep)); 1065 xfs_dir2_sf_inumberp(sfep));
@@ -1061,7 +1074,7 @@ xfs_dir2_sf_replace(
1061 * Didn't find it. 1074 * Didn't find it.
1062 */ 1075 */
1063 if (i == sfp->hdr.count) { 1076 if (i == sfp->hdr.count) {
1064 ASSERT(args->oknoent); 1077 ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
1065#if XFS_BIG_INUMS 1078#if XFS_BIG_INUMS
1066 if (i8elevated) 1079 if (i8elevated)
1067 xfs_dir2_sf_toino4(args); 1080 xfs_dir2_sf_toino4(args);
@@ -1174,7 +1187,7 @@ xfs_dir2_sf_toino4(
1174 /* 1187 /*
1175 * Clean up the inode. 1188 * Clean up the inode.
1176 */ 1189 */
1177 kmem_free(buf, oldsize); 1190 kmem_free(buf);
1178 dp->i_d.di_size = newsize; 1191 dp->i_d.di_size = newsize;
1179 xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA); 1192 xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
1180} 1193}
@@ -1251,7 +1264,7 @@ xfs_dir2_sf_toino8(
1251 /* 1264 /*
1252 * Clean up the inode. 1265 * Clean up the inode.
1253 */ 1266 */
1254 kmem_free(buf, oldsize); 1267 kmem_free(buf);
1255 dp->i_d.di_size = newsize; 1268 dp->i_d.di_size = newsize;
1256 xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA); 1269 xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
1257} 1270}
diff --git a/fs/xfs/xfs_dir2_sf.h b/fs/xfs/xfs_dir2_sf.h
index 005629d702d2..deecc9d238f8 100644
--- a/fs/xfs/xfs_dir2_sf.h
+++ b/fs/xfs/xfs_dir2_sf.h
@@ -62,7 +62,7 @@ typedef union {
62 * Normalized offset (in a data block) of the entry, really xfs_dir2_data_off_t. 62 * Normalized offset (in a data block) of the entry, really xfs_dir2_data_off_t.
63 * Only need 16 bits, this is the byte offset into the single block form. 63 * Only need 16 bits, this is the byte offset into the single block form.
64 */ 64 */
65typedef struct { __uint8_t i[2]; } xfs_dir2_sf_off_t; 65typedef struct { __uint8_t i[2]; } __arch_pack xfs_dir2_sf_off_t;
66 66
67/* 67/*
68 * The parent directory has a dedicated field, and the self-pointer must 68 * The parent directory has a dedicated field, and the self-pointer must
@@ -76,14 +76,14 @@ typedef struct xfs_dir2_sf_hdr {
76 __uint8_t count; /* count of entries */ 76 __uint8_t count; /* count of entries */
77 __uint8_t i8count; /* count of 8-byte inode #s */ 77 __uint8_t i8count; /* count of 8-byte inode #s */
78 xfs_dir2_inou_t parent; /* parent dir inode number */ 78 xfs_dir2_inou_t parent; /* parent dir inode number */
79} xfs_dir2_sf_hdr_t; 79} __arch_pack xfs_dir2_sf_hdr_t;
80 80
81typedef struct xfs_dir2_sf_entry { 81typedef struct xfs_dir2_sf_entry {
82 __uint8_t namelen; /* actual name length */ 82 __uint8_t namelen; /* actual name length */
83 xfs_dir2_sf_off_t offset; /* saved offset */ 83 xfs_dir2_sf_off_t offset; /* saved offset */
84 __uint8_t name[1]; /* name, variable size */ 84 __uint8_t name[1]; /* name, variable size */
85 xfs_dir2_inou_t inumber; /* inode number, var. offset */ 85 xfs_dir2_inou_t inumber; /* inode number, var. offset */
86} xfs_dir2_sf_entry_t; 86} __arch_pack xfs_dir2_sf_entry_t;
87 87
88typedef struct xfs_dir2_sf { 88typedef struct xfs_dir2_sf {
89 xfs_dir2_sf_hdr_t hdr; /* shortform header */ 89 xfs_dir2_sf_hdr_t hdr; /* shortform header */
diff --git a/fs/xfs/xfs_dir2_trace.c b/fs/xfs/xfs_dir2_trace.c
index f3fb2ffd6f5c..6cc7c0c681ac 100644
--- a/fs/xfs/xfs_dir2_trace.c
+++ b/fs/xfs/xfs_dir2_trace.c
@@ -85,7 +85,8 @@ xfs_dir2_trace_args(
85 (void *)((unsigned long)(args->inumber >> 32)), 85 (void *)((unsigned long)(args->inumber >> 32)),
86 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)), 86 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
87 (void *)args->dp, (void *)args->trans, 87 (void *)args->dp, (void *)args->trans,
88 (void *)(unsigned long)args->justcheck, NULL, NULL); 88 (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
89 NULL, NULL);
89} 90}
90 91
91void 92void
@@ -100,7 +101,7 @@ xfs_dir2_trace_args_b(
100 (void *)((unsigned long)(args->inumber >> 32)), 101 (void *)((unsigned long)(args->inumber >> 32)),
101 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)), 102 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
102 (void *)args->dp, (void *)args->trans, 103 (void *)args->dp, (void *)args->trans,
103 (void *)(unsigned long)args->justcheck, 104 (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
104 (void *)(bp ? bp->bps[0] : NULL), NULL); 105 (void *)(bp ? bp->bps[0] : NULL), NULL);
105} 106}
106 107
@@ -117,7 +118,7 @@ xfs_dir2_trace_args_bb(
117 (void *)((unsigned long)(args->inumber >> 32)), 118 (void *)((unsigned long)(args->inumber >> 32)),
118 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)), 119 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
119 (void *)args->dp, (void *)args->trans, 120 (void *)args->dp, (void *)args->trans,
120 (void *)(unsigned long)args->justcheck, 121 (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
121 (void *)(lbp ? lbp->bps[0] : NULL), 122 (void *)(lbp ? lbp->bps[0] : NULL),
122 (void *)(dbp ? dbp->bps[0] : NULL)); 123 (void *)(dbp ? dbp->bps[0] : NULL));
123} 124}
@@ -157,8 +158,8 @@ xfs_dir2_trace_args_db(
157 (void *)((unsigned long)(args->inumber >> 32)), 158 (void *)((unsigned long)(args->inumber >> 32)),
158 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)), 159 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
159 (void *)args->dp, (void *)args->trans, 160 (void *)args->dp, (void *)args->trans,
160 (void *)(unsigned long)args->justcheck, (void *)(long)db, 161 (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
161 (void *)dbp); 162 (void *)(long)db, (void *)dbp);
162} 163}
163 164
164void 165void
@@ -173,7 +174,7 @@ xfs_dir2_trace_args_i(
173 (void *)((unsigned long)(args->inumber >> 32)), 174 (void *)((unsigned long)(args->inumber >> 32)),
174 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)), 175 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
175 (void *)args->dp, (void *)args->trans, 176 (void *)args->dp, (void *)args->trans,
176 (void *)(unsigned long)args->justcheck, 177 (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
177 (void *)((unsigned long)(i >> 32)), 178 (void *)((unsigned long)(i >> 32)),
178 (void *)((unsigned long)(i & 0xFFFFFFFF))); 179 (void *)((unsigned long)(i & 0xFFFFFFFF)));
179} 180}
@@ -190,7 +191,8 @@ xfs_dir2_trace_args_s(
190 (void *)((unsigned long)(args->inumber >> 32)), 191 (void *)((unsigned long)(args->inumber >> 32)),
191 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)), 192 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
192 (void *)args->dp, (void *)args->trans, 193 (void *)args->dp, (void *)args->trans,
193 (void *)(unsigned long)args->justcheck, (void *)(long)s, NULL); 194 (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
195 (void *)(long)s, NULL);
194} 196}
195 197
196void 198void
@@ -208,7 +210,7 @@ xfs_dir2_trace_args_sb(
208 (void *)((unsigned long)(args->inumber >> 32)), 210 (void *)((unsigned long)(args->inumber >> 32)),
209 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)), 211 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
210 (void *)args->dp, (void *)args->trans, 212 (void *)args->dp, (void *)args->trans,
211 (void *)(unsigned long)args->justcheck, (void *)(long)s, 213 (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
212 (void *)dbp); 214 (void *)(long)s, (void *)dbp);
213} 215}
214#endif /* XFS_DIR2_TRACE */ 216#endif /* XFS_DIR2_TRACE */
diff --git a/fs/xfs/xfs_dmapi.h b/fs/xfs/xfs_dmapi.h
index f71784ab6a60..2813cdd72375 100644
--- a/fs/xfs/xfs_dmapi.h
+++ b/fs/xfs/xfs_dmapi.h
@@ -18,7 +18,6 @@
18#ifndef __XFS_DMAPI_H__ 18#ifndef __XFS_DMAPI_H__
19#define __XFS_DMAPI_H__ 19#define __XFS_DMAPI_H__
20 20
21#include <linux/version.h>
22/* Values used to define the on-disk version of dm_attrname_t. All 21/* Values used to define the on-disk version of dm_attrname_t. All
23 * on-disk attribute names start with the 8-byte string "SGI_DMI_". 22 * on-disk attribute names start with the 8-byte string "SGI_DMI_".
24 * 23 *
@@ -166,6 +165,6 @@ typedef enum {
166 165
167#define FILP_DELAY_FLAG(filp) ((filp->f_flags&(O_NDELAY|O_NONBLOCK)) ? \ 166#define FILP_DELAY_FLAG(filp) ((filp->f_flags&(O_NDELAY|O_NONBLOCK)) ? \
168 DM_FLAGS_NDELAY : 0) 167 DM_FLAGS_NDELAY : 0)
169#define AT_DELAY_FLAG(f) ((f&ATTR_NONBLOCK) ? DM_FLAGS_NDELAY : 0) 168#define AT_DELAY_FLAG(f) ((f & XFS_ATTR_NONBLOCK) ? DM_FLAGS_NDELAY : 0)
170 169
171#endif /* __XFS_DMAPI_H__ */ 170#endif /* __XFS_DMAPI_H__ */
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 05e5365d3c31..f227ecd1a294 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -58,22 +58,11 @@ xfs_error_trap(int e)
58 } 58 }
59 return e; 59 return e;
60} 60}
61#endif
62
63#if (defined(DEBUG) || defined(INDUCE_IO_ERROR))
64 61
65int xfs_etest[XFS_NUM_INJECT_ERROR]; 62int xfs_etest[XFS_NUM_INJECT_ERROR];
66int64_t xfs_etest_fsid[XFS_NUM_INJECT_ERROR]; 63int64_t xfs_etest_fsid[XFS_NUM_INJECT_ERROR];
67char * xfs_etest_fsname[XFS_NUM_INJECT_ERROR]; 64char * xfs_etest_fsname[XFS_NUM_INJECT_ERROR];
68 65
69void
70xfs_error_test_init(void)
71{
72 memset(xfs_etest, 0, sizeof(xfs_etest));
73 memset(xfs_etest_fsid, 0, sizeof(xfs_etest_fsid));
74 memset(xfs_etest_fsname, 0, sizeof(xfs_etest_fsname));
75}
76
77int 66int
78xfs_error_test(int error_tag, int *fsidp, char *expression, 67xfs_error_test(int error_tag, int *fsidp, char *expression,
79 int line, char *file, unsigned long randfactor) 68 int line, char *file, unsigned long randfactor)
@@ -150,8 +139,7 @@ xfs_errortag_clearall(xfs_mount_t *mp, int loud)
150 xfs_etest[i]); 139 xfs_etest[i]);
151 xfs_etest[i] = 0; 140 xfs_etest[i] = 0;
152 xfs_etest_fsid[i] = 0LL; 141 xfs_etest_fsid[i] = 0LL;
153 kmem_free(xfs_etest_fsname[i], 142 kmem_free(xfs_etest_fsname[i]);
154 strlen(xfs_etest_fsname[i]) + 1);
155 xfs_etest_fsname[i] = NULL; 143 xfs_etest_fsname[i] = NULL;
156 } 144 }
157 } 145 }
@@ -163,7 +151,7 @@ xfs_errortag_clearall(xfs_mount_t *mp, int loud)
163 151
164 return 0; 152 return 0;
165} 153}
166#endif /* DEBUG || INDUCE_IO_ERROR */ 154#endif /* DEBUG */
167 155
168static void 156static void
169xfs_fs_vcmn_err(int level, xfs_mount_t *mp, char *fmt, va_list ap) 157xfs_fs_vcmn_err(int level, xfs_mount_t *mp, char *fmt, va_list ap)
@@ -175,7 +163,7 @@ xfs_fs_vcmn_err(int level, xfs_mount_t *mp, char *fmt, va_list ap)
175 newfmt = kmem_alloc(len, KM_SLEEP); 163 newfmt = kmem_alloc(len, KM_SLEEP);
176 sprintf(newfmt, "Filesystem \"%s\": %s", mp->m_fsname, fmt); 164 sprintf(newfmt, "Filesystem \"%s\": %s", mp->m_fsname, fmt);
177 icmn_err(level, newfmt, ap); 165 icmn_err(level, newfmt, ap);
178 kmem_free(newfmt, len); 166 kmem_free(newfmt);
179 } else { 167 } else {
180 icmn_err(level, fmt, ap); 168 icmn_err(level, fmt, ap);
181 } 169 }
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index 6490d2a9f8e1..11543f10b0c6 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -125,23 +125,14 @@ extern void xfs_corruption_error(char *tag, int level, struct xfs_mount *mp,
125#define XFS_RANDOM_DIOWRITE_IOERR (XFS_RANDOM_DEFAULT/10) 125#define XFS_RANDOM_DIOWRITE_IOERR (XFS_RANDOM_DEFAULT/10)
126#define XFS_RANDOM_BMAPIFORMAT XFS_RANDOM_DEFAULT 126#define XFS_RANDOM_BMAPIFORMAT XFS_RANDOM_DEFAULT
127 127
128#if (defined(DEBUG) || defined(INDUCE_IO_ERROR)) 128#ifdef DEBUG
129extern int xfs_error_test(int, int *, char *, int, char *, unsigned long); 129extern int xfs_error_test(int, int *, char *, int, char *, unsigned long);
130extern void xfs_error_test_init(void);
131 130
132#define XFS_NUM_INJECT_ERROR 10 131#define XFS_NUM_INJECT_ERROR 10
133
134#ifdef __ANSI_CPP__
135#define XFS_TEST_ERROR(expr, mp, tag, rf) \
136 ((expr) || \
137 xfs_error_test((tag), (mp)->m_fixedfsid, #expr, __LINE__, __FILE__, \
138 (rf)))
139#else
140#define XFS_TEST_ERROR(expr, mp, tag, rf) \ 132#define XFS_TEST_ERROR(expr, mp, tag, rf) \
141 ((expr) || \ 133 ((expr) || \
142 xfs_error_test((tag), (mp)->m_fixedfsid, "expr", __LINE__, __FILE__, \ 134 xfs_error_test((tag), (mp)->m_fixedfsid, "expr", __LINE__, __FILE__, \
143 (rf))) 135 (rf)))
144#endif /* __ANSI_CPP__ */
145 136
146extern int xfs_errortag_add(int error_tag, xfs_mount_t *mp); 137extern int xfs_errortag_add(int error_tag, xfs_mount_t *mp);
147extern int xfs_errortag_clearall(xfs_mount_t *mp, int loud); 138extern int xfs_errortag_clearall(xfs_mount_t *mp, int loud);
@@ -149,7 +140,7 @@ extern int xfs_errortag_clearall(xfs_mount_t *mp, int loud);
149#define XFS_TEST_ERROR(expr, mp, tag, rf) (expr) 140#define XFS_TEST_ERROR(expr, mp, tag, rf) (expr)
150#define xfs_errortag_add(tag, mp) (ENOSYS) 141#define xfs_errortag_add(tag, mp) (ENOSYS)
151#define xfs_errortag_clearall(mp, loud) (ENOSYS) 142#define xfs_errortag_clearall(mp, loud) (ENOSYS)
152#endif /* (DEBUG || INDUCE_IO_ERROR) */ 143#endif /* DEBUG */
153 144
154/* 145/*
155 * XFS panic tags -- allow a call to xfs_cmn_err() be turned into 146 * XFS panic tags -- allow a call to xfs_cmn_err() be turned into
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 132bd07b9bb8..8aa28f751b2a 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -41,8 +41,7 @@ xfs_efi_item_free(xfs_efi_log_item_t *efip)
41 int nexts = efip->efi_format.efi_nextents; 41 int nexts = efip->efi_format.efi_nextents;
42 42
43 if (nexts > XFS_EFI_MAX_FAST_EXTENTS) { 43 if (nexts > XFS_EFI_MAX_FAST_EXTENTS) {
44 kmem_free(efip, sizeof(xfs_efi_log_item_t) + 44 kmem_free(efip);
45 (nexts - 1) * sizeof(xfs_extent_t));
46 } else { 45 } else {
47 kmem_zone_free(xfs_efi_zone, efip); 46 kmem_zone_free(xfs_efi_zone, efip);
48 } 47 }
@@ -374,8 +373,7 @@ xfs_efd_item_free(xfs_efd_log_item_t *efdp)
374 int nexts = efdp->efd_format.efd_nextents; 373 int nexts = efdp->efd_format.efd_nextents;
375 374
376 if (nexts > XFS_EFD_MAX_FAST_EXTENTS) { 375 if (nexts > XFS_EFD_MAX_FAST_EXTENTS) {
377 kmem_free(efdp, sizeof(xfs_efd_log_item_t) + 376 kmem_free(efdp);
378 (nexts - 1) * sizeof(xfs_extent_t));
379 } else { 377 } else {
380 kmem_zone_free(xfs_efd_zone, efdp); 378 kmem_zone_free(xfs_efd_zone, efdp);
381 } 379 }
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 3f3785b10804..f3bb75da384e 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -397,10 +397,12 @@ int
397xfs_filestream_init(void) 397xfs_filestream_init(void)
398{ 398{
399 item_zone = kmem_zone_init(sizeof(fstrm_item_t), "fstrm_item"); 399 item_zone = kmem_zone_init(sizeof(fstrm_item_t), "fstrm_item");
400 if (!item_zone)
401 return -ENOMEM;
400#ifdef XFS_FILESTREAMS_TRACE 402#ifdef XFS_FILESTREAMS_TRACE
401 xfs_filestreams_trace_buf = ktrace_alloc(XFS_FSTRM_KTRACE_SIZE, KM_SLEEP); 403 xfs_filestreams_trace_buf = ktrace_alloc(XFS_FSTRM_KTRACE_SIZE, KM_NOFS);
402#endif 404#endif
403 return item_zone ? 0 : -ENOMEM; 405 return 0;
404} 406}
405 407
406/* 408/*
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index 3bed6433d050..01c0cc88d3f3 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -239,6 +239,7 @@ typedef struct xfs_fsop_resblks {
239#define XFS_FSOP_GEOM_FLAGS_LOGV2 0x0100 /* log format version 2 */ 239#define XFS_FSOP_GEOM_FLAGS_LOGV2 0x0100 /* log format version 2 */
240#define XFS_FSOP_GEOM_FLAGS_SECTOR 0x0200 /* sector sizes >1BB */ 240#define XFS_FSOP_GEOM_FLAGS_SECTOR 0x0200 /* sector sizes >1BB */
241#define XFS_FSOP_GEOM_FLAGS_ATTR2 0x0400 /* inline attributes rework */ 241#define XFS_FSOP_GEOM_FLAGS_ATTR2 0x0400 /* inline attributes rework */
242#define XFS_FSOP_GEOM_FLAGS_DIRV2CI 0x1000 /* ASCII only CI names */
242#define XFS_FSOP_GEOM_FLAGS_LAZYSB 0x4000 /* lazy superblock counters */ 243#define XFS_FSOP_GEOM_FLAGS_LAZYSB 0x4000 /* lazy superblock counters */
243 244
244 245
@@ -371,6 +372,9 @@ typedef struct xfs_fsop_attrlist_handlereq {
371 372
372typedef struct xfs_attr_multiop { 373typedef struct xfs_attr_multiop {
373 __u32 am_opcode; 374 __u32 am_opcode;
375#define ATTR_OP_GET 1 /* return the indicated attr's value */
376#define ATTR_OP_SET 2 /* set/create the indicated attr/value pair */
377#define ATTR_OP_REMOVE 3 /* remove the indicated attr */
374 __s32 am_error; 378 __s32 am_error;
375 void __user *am_attrname; 379 void __user *am_attrname;
376 void __user *am_attrvalue; 380 void __user *am_attrvalue;
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 381ebda4f7bc..84583cf73db3 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -95,6 +95,8 @@ xfs_fs_geometry(
95 XFS_FSOP_GEOM_FLAGS_DIRV2 : 0) | 95 XFS_FSOP_GEOM_FLAGS_DIRV2 : 0) |
96 (xfs_sb_version_hassector(&mp->m_sb) ? 96 (xfs_sb_version_hassector(&mp->m_sb) ?
97 XFS_FSOP_GEOM_FLAGS_SECTOR : 0) | 97 XFS_FSOP_GEOM_FLAGS_SECTOR : 0) |
98 (xfs_sb_version_hasasciici(&mp->m_sb) ?
99 XFS_FSOP_GEOM_FLAGS_DIRV2CI : 0) |
98 (xfs_sb_version_haslazysbcount(&mp->m_sb) ? 100 (xfs_sb_version_haslazysbcount(&mp->m_sb) ?
99 XFS_FSOP_GEOM_FLAGS_LAZYSB : 0) | 101 XFS_FSOP_GEOM_FLAGS_LAZYSB : 0) |
100 (xfs_sb_version_hasattr2(&mp->m_sb) ? 102 (xfs_sb_version_hasattr2(&mp->m_sb) ?
@@ -625,7 +627,7 @@ xfs_fs_goingdown(
625 xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT); 627 xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT);
626 thaw_bdev(sb->s_bdev, sb); 628 thaw_bdev(sb->s_bdev, sb);
627 } 629 }
628 630
629 break; 631 break;
630 } 632 }
631 case XFS_FSOP_GOING_FLAGS_LOGFLUSH: 633 case XFS_FSOP_GOING_FLAGS_LOGFLUSH:
diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c
index e5310c90e50f..83502f3edef0 100644
--- a/fs/xfs/xfs_ialloc_btree.c
+++ b/fs/xfs/xfs_ialloc_btree.c
@@ -181,7 +181,7 @@ xfs_inobt_delrec(
181 * then we can get rid of this level. 181 * then we can get rid of this level.
182 */ 182 */
183 if (numrecs == 1 && level > 0) { 183 if (numrecs == 1 && level > 0) {
184 agbp = cur->bc_private.i.agbp; 184 agbp = cur->bc_private.a.agbp;
185 agi = XFS_BUF_TO_AGI(agbp); 185 agi = XFS_BUF_TO_AGI(agbp);
186 /* 186 /*
187 * pp is still set to the first pointer in the block. 187 * pp is still set to the first pointer in the block.
@@ -194,7 +194,7 @@ xfs_inobt_delrec(
194 * Free the block. 194 * Free the block.
195 */ 195 */
196 if ((error = xfs_free_extent(cur->bc_tp, 196 if ((error = xfs_free_extent(cur->bc_tp,
197 XFS_AGB_TO_FSB(mp, cur->bc_private.i.agno, bno), 1))) 197 XFS_AGB_TO_FSB(mp, cur->bc_private.a.agno, bno), 1)))
198 return error; 198 return error;
199 xfs_trans_binval(cur->bc_tp, bp); 199 xfs_trans_binval(cur->bc_tp, bp);
200 xfs_ialloc_log_agi(cur->bc_tp, agbp, 200 xfs_ialloc_log_agi(cur->bc_tp, agbp,
@@ -379,7 +379,7 @@ xfs_inobt_delrec(
379 rrecs = be16_to_cpu(right->bb_numrecs); 379 rrecs = be16_to_cpu(right->bb_numrecs);
380 rbp = bp; 380 rbp = bp;
381 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, 381 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
382 cur->bc_private.i.agno, lbno, 0, &lbp, 382 cur->bc_private.a.agno, lbno, 0, &lbp,
383 XFS_INO_BTREE_REF))) 383 XFS_INO_BTREE_REF)))
384 return error; 384 return error;
385 left = XFS_BUF_TO_INOBT_BLOCK(lbp); 385 left = XFS_BUF_TO_INOBT_BLOCK(lbp);
@@ -401,7 +401,7 @@ xfs_inobt_delrec(
401 lrecs = be16_to_cpu(left->bb_numrecs); 401 lrecs = be16_to_cpu(left->bb_numrecs);
402 lbp = bp; 402 lbp = bp;
403 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, 403 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
404 cur->bc_private.i.agno, rbno, 0, &rbp, 404 cur->bc_private.a.agno, rbno, 0, &rbp,
405 XFS_INO_BTREE_REF))) 405 XFS_INO_BTREE_REF)))
406 return error; 406 return error;
407 right = XFS_BUF_TO_INOBT_BLOCK(rbp); 407 right = XFS_BUF_TO_INOBT_BLOCK(rbp);
@@ -484,7 +484,7 @@ xfs_inobt_delrec(
484 xfs_buf_t *rrbp; 484 xfs_buf_t *rrbp;
485 485
486 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, 486 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
487 cur->bc_private.i.agno, be32_to_cpu(left->bb_rightsib), 0, 487 cur->bc_private.a.agno, be32_to_cpu(left->bb_rightsib), 0,
488 &rrbp, XFS_INO_BTREE_REF))) 488 &rrbp, XFS_INO_BTREE_REF)))
489 return error; 489 return error;
490 rrblock = XFS_BUF_TO_INOBT_BLOCK(rrbp); 490 rrblock = XFS_BUF_TO_INOBT_BLOCK(rrbp);
@@ -497,7 +497,7 @@ xfs_inobt_delrec(
497 * Free the deleting block. 497 * Free the deleting block.
498 */ 498 */
499 if ((error = xfs_free_extent(cur->bc_tp, XFS_AGB_TO_FSB(mp, 499 if ((error = xfs_free_extent(cur->bc_tp, XFS_AGB_TO_FSB(mp,
500 cur->bc_private.i.agno, rbno), 1))) 500 cur->bc_private.a.agno, rbno), 1)))
501 return error; 501 return error;
502 xfs_trans_binval(cur->bc_tp, rbp); 502 xfs_trans_binval(cur->bc_tp, rbp);
503 /* 503 /*
@@ -854,7 +854,7 @@ xfs_inobt_lookup(
854 { 854 {
855 xfs_agi_t *agi; /* a.g. inode header */ 855 xfs_agi_t *agi; /* a.g. inode header */
856 856
857 agi = XFS_BUF_TO_AGI(cur->bc_private.i.agbp); 857 agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp);
858 agno = be32_to_cpu(agi->agi_seqno); 858 agno = be32_to_cpu(agi->agi_seqno);
859 agbno = be32_to_cpu(agi->agi_root); 859 agbno = be32_to_cpu(agi->agi_root);
860 } 860 }
@@ -1089,7 +1089,7 @@ xfs_inobt_lshift(
1089 * Set up the left neighbor as "left". 1089 * Set up the left neighbor as "left".
1090 */ 1090 */
1091 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, 1091 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1092 cur->bc_private.i.agno, be32_to_cpu(right->bb_leftsib), 1092 cur->bc_private.a.agno, be32_to_cpu(right->bb_leftsib),
1093 0, &lbp, XFS_INO_BTREE_REF))) 1093 0, &lbp, XFS_INO_BTREE_REF)))
1094 return error; 1094 return error;
1095 left = XFS_BUF_TO_INOBT_BLOCK(lbp); 1095 left = XFS_BUF_TO_INOBT_BLOCK(lbp);
@@ -1207,10 +1207,10 @@ xfs_inobt_newroot(
1207 /* 1207 /*
1208 * Get a block & a buffer. 1208 * Get a block & a buffer.
1209 */ 1209 */
1210 agi = XFS_BUF_TO_AGI(cur->bc_private.i.agbp); 1210 agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp);
1211 args.tp = cur->bc_tp; 1211 args.tp = cur->bc_tp;
1212 args.mp = cur->bc_mp; 1212 args.mp = cur->bc_mp;
1213 args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.i.agno, 1213 args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno,
1214 be32_to_cpu(agi->agi_root)); 1214 be32_to_cpu(agi->agi_root));
1215 args.mod = args.minleft = args.alignment = args.total = args.wasdel = 1215 args.mod = args.minleft = args.alignment = args.total = args.wasdel =
1216 args.isfl = args.userdata = args.minalignslop = 0; 1216 args.isfl = args.userdata = args.minalignslop = 0;
@@ -1233,7 +1233,7 @@ xfs_inobt_newroot(
1233 */ 1233 */
1234 agi->agi_root = cpu_to_be32(args.agbno); 1234 agi->agi_root = cpu_to_be32(args.agbno);
1235 be32_add_cpu(&agi->agi_level, 1); 1235 be32_add_cpu(&agi->agi_level, 1);
1236 xfs_ialloc_log_agi(args.tp, cur->bc_private.i.agbp, 1236 xfs_ialloc_log_agi(args.tp, cur->bc_private.a.agbp,
1237 XFS_AGI_ROOT | XFS_AGI_LEVEL); 1237 XFS_AGI_ROOT | XFS_AGI_LEVEL);
1238 /* 1238 /*
1239 * At the previous root level there are now two blocks: the old 1239 * At the previous root level there are now two blocks: the old
@@ -1376,7 +1376,7 @@ xfs_inobt_rshift(
1376 * Set up the right neighbor as "right". 1376 * Set up the right neighbor as "right".
1377 */ 1377 */
1378 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, 1378 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1379 cur->bc_private.i.agno, be32_to_cpu(left->bb_rightsib), 1379 cur->bc_private.a.agno, be32_to_cpu(left->bb_rightsib),
1380 0, &rbp, XFS_INO_BTREE_REF))) 1380 0, &rbp, XFS_INO_BTREE_REF)))
1381 return error; 1381 return error;
1382 right = XFS_BUF_TO_INOBT_BLOCK(rbp); 1382 right = XFS_BUF_TO_INOBT_BLOCK(rbp);
@@ -1492,7 +1492,7 @@ xfs_inobt_split(
1492 * Allocate the new block. 1492 * Allocate the new block.
1493 * If we can't do it, we're toast. Give up. 1493 * If we can't do it, we're toast. Give up.
1494 */ 1494 */
1495 args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.i.agno, lbno); 1495 args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno, lbno);
1496 args.mod = args.minleft = args.alignment = args.total = args.wasdel = 1496 args.mod = args.minleft = args.alignment = args.total = args.wasdel =
1497 args.isfl = args.userdata = args.minalignslop = 0; 1497 args.isfl = args.userdata = args.minalignslop = 0;
1498 args.minlen = args.maxlen = args.prod = 1; 1498 args.minlen = args.maxlen = args.prod = 1;
@@ -1725,7 +1725,7 @@ xfs_inobt_decrement(
1725 1725
1726 agbno = be32_to_cpu(*XFS_INOBT_PTR_ADDR(block, cur->bc_ptrs[lev], cur)); 1726 agbno = be32_to_cpu(*XFS_INOBT_PTR_ADDR(block, cur->bc_ptrs[lev], cur));
1727 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, 1727 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1728 cur->bc_private.i.agno, agbno, 0, &bp, 1728 cur->bc_private.a.agno, agbno, 0, &bp,
1729 XFS_INO_BTREE_REF))) 1729 XFS_INO_BTREE_REF)))
1730 return error; 1730 return error;
1731 lev--; 1731 lev--;
@@ -1897,7 +1897,7 @@ xfs_inobt_increment(
1897 1897
1898 agbno = be32_to_cpu(*XFS_INOBT_PTR_ADDR(block, cur->bc_ptrs[lev], cur)); 1898 agbno = be32_to_cpu(*XFS_INOBT_PTR_ADDR(block, cur->bc_ptrs[lev], cur));
1899 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, 1899 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1900 cur->bc_private.i.agno, agbno, 0, &bp, 1900 cur->bc_private.a.agno, agbno, 0, &bp,
1901 XFS_INO_BTREE_REF))) 1901 XFS_INO_BTREE_REF)))
1902 return error; 1902 return error;
1903 lev--; 1903 lev--;
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index b07604b94d9f..e229e9e001c2 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -216,7 +216,14 @@ finish_inode:
216 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); 216 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
217 init_waitqueue_head(&ip->i_ipin_wait); 217 init_waitqueue_head(&ip->i_ipin_wait);
218 atomic_set(&ip->i_pincount, 0); 218 atomic_set(&ip->i_pincount, 0);
219 initnsema(&ip->i_flock, 1, "xfsfino"); 219
220 /*
221 * Because we want to use a counting completion, complete
222 * the flush completion once to allow a single access to
223 * the flush completion without blocking.
224 */
225 init_completion(&ip->i_flush);
226 complete(&ip->i_flush);
220 227
221 if (lock_flags) 228 if (lock_flags)
222 xfs_ilock(ip, lock_flags); 229 xfs_ilock(ip, lock_flags);
@@ -288,10 +295,17 @@ finish_inode:
288 *ipp = ip; 295 *ipp = ip;
289 296
290 /* 297 /*
298 * Set up the Linux with the Linux inode.
299 */
300 ip->i_vnode = inode;
301 inode->i_private = ip;
302
303 /*
291 * If we have a real type for an on-disk inode, we can set ops(&unlock) 304 * If we have a real type for an on-disk inode, we can set ops(&unlock)
292 * now. If it's a new inode being created, xfs_ialloc will handle it. 305 * now. If it's a new inode being created, xfs_ialloc will handle it.
293 */ 306 */
294 xfs_initialize_vnode(mp, inode, ip); 307 if (ip->i_d.di_mode != 0)
308 xfs_setup_inode(ip);
295 return 0; 309 return 0;
296} 310}
297 311
@@ -411,10 +425,11 @@ xfs_iput(xfs_inode_t *ip,
411 * Special iput for brand-new inodes that are still locked 425 * Special iput for brand-new inodes that are still locked
412 */ 426 */
413void 427void
414xfs_iput_new(xfs_inode_t *ip, 428xfs_iput_new(
415 uint lock_flags) 429 xfs_inode_t *ip,
430 uint lock_flags)
416{ 431{
417 struct inode *inode = ip->i_vnode; 432 struct inode *inode = VFS_I(ip);
418 433
419 xfs_itrace_entry(ip); 434 xfs_itrace_entry(ip);
420 435
@@ -775,26 +790,3 @@ xfs_isilocked(
775} 790}
776#endif 791#endif
777 792
778/*
779 * The following three routines simply manage the i_flock
780 * semaphore embedded in the inode. This semaphore synchronizes
781 * processes attempting to flush the in-core inode back to disk.
782 */
783void
784xfs_iflock(xfs_inode_t *ip)
785{
786 psema(&(ip->i_flock), PINOD|PLTWAIT);
787}
788
789int
790xfs_iflock_nowait(xfs_inode_t *ip)
791{
792 return (cpsema(&(ip->i_flock)));
793}
794
795void
796xfs_ifunlock(xfs_inode_t *ip)
797{
798 ASSERT(issemalocked(&(ip->i_flock)));
799 vsema(&(ip->i_flock));
800}
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index e569bf5d6cf0..00e80df9dd9d 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -580,8 +580,8 @@ xfs_iformat_extents(
580 xfs_validate_extents(ifp, nex, XFS_EXTFMT_INODE(ip)); 580 xfs_validate_extents(ifp, nex, XFS_EXTFMT_INODE(ip));
581 for (i = 0; i < nex; i++, dp++) { 581 for (i = 0; i < nex; i++, dp++) {
582 xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); 582 xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
583 ep->l0 = be64_to_cpu(get_unaligned(&dp->l0)); 583 ep->l0 = get_unaligned_be64(&dp->l0);
584 ep->l1 = be64_to_cpu(get_unaligned(&dp->l1)); 584 ep->l1 = get_unaligned_be64(&dp->l1);
585 } 585 }
586 XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork); 586 XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork);
587 if (whichfork != XFS_DATA_FORK || 587 if (whichfork != XFS_DATA_FORK ||
@@ -835,22 +835,22 @@ xfs_iread(
835 * Do this before xfs_iformat in case it adds entries. 835 * Do this before xfs_iformat in case it adds entries.
836 */ 836 */
837#ifdef XFS_INODE_TRACE 837#ifdef XFS_INODE_TRACE
838 ip->i_trace = ktrace_alloc(INODE_TRACE_SIZE, KM_SLEEP); 838 ip->i_trace = ktrace_alloc(INODE_TRACE_SIZE, KM_NOFS);
839#endif 839#endif
840#ifdef XFS_BMAP_TRACE 840#ifdef XFS_BMAP_TRACE
841 ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_SLEEP); 841 ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_NOFS);
842#endif 842#endif
843#ifdef XFS_BMBT_TRACE 843#ifdef XFS_BMBT_TRACE
844 ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_SLEEP); 844 ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_NOFS);
845#endif 845#endif
846#ifdef XFS_RW_TRACE 846#ifdef XFS_RW_TRACE
847 ip->i_rwtrace = ktrace_alloc(XFS_RW_KTRACE_SIZE, KM_SLEEP); 847 ip->i_rwtrace = ktrace_alloc(XFS_RW_KTRACE_SIZE, KM_NOFS);
848#endif 848#endif
849#ifdef XFS_ILOCK_TRACE 849#ifdef XFS_ILOCK_TRACE
850 ip->i_lock_trace = ktrace_alloc(XFS_ILOCK_KTRACE_SIZE, KM_SLEEP); 850 ip->i_lock_trace = ktrace_alloc(XFS_ILOCK_KTRACE_SIZE, KM_NOFS);
851#endif 851#endif
852#ifdef XFS_DIR2_TRACE 852#ifdef XFS_DIR2_TRACE
853 ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_SLEEP); 853 ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS);
854#endif 854#endif
855 855
856 /* 856 /*
@@ -1046,9 +1046,9 @@ xfs_ialloc(
1046{ 1046{
1047 xfs_ino_t ino; 1047 xfs_ino_t ino;
1048 xfs_inode_t *ip; 1048 xfs_inode_t *ip;
1049 bhv_vnode_t *vp;
1050 uint flags; 1049 uint flags;
1051 int error; 1050 int error;
1051 timespec_t tv;
1052 1052
1053 /* 1053 /*
1054 * Call the space management code to pick 1054 * Call the space management code to pick
@@ -1077,13 +1077,12 @@ xfs_ialloc(
1077 } 1077 }
1078 ASSERT(ip != NULL); 1078 ASSERT(ip != NULL);
1079 1079
1080 vp = XFS_ITOV(ip);
1081 ip->i_d.di_mode = (__uint16_t)mode; 1080 ip->i_d.di_mode = (__uint16_t)mode;
1082 ip->i_d.di_onlink = 0; 1081 ip->i_d.di_onlink = 0;
1083 ip->i_d.di_nlink = nlink; 1082 ip->i_d.di_nlink = nlink;
1084 ASSERT(ip->i_d.di_nlink == nlink); 1083 ASSERT(ip->i_d.di_nlink == nlink);
1085 ip->i_d.di_uid = current_fsuid(cr); 1084 ip->i_d.di_uid = current_fsuid();
1086 ip->i_d.di_gid = current_fsgid(cr); 1085 ip->i_d.di_gid = current_fsgid();
1087 ip->i_d.di_projid = prid; 1086 ip->i_d.di_projid = prid;
1088 memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); 1087 memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
1089 1088
@@ -1130,7 +1129,13 @@ xfs_ialloc(
1130 ip->i_size = 0; 1129 ip->i_size = 0;
1131 ip->i_d.di_nextents = 0; 1130 ip->i_d.di_nextents = 0;
1132 ASSERT(ip->i_d.di_nblocks == 0); 1131 ASSERT(ip->i_d.di_nblocks == 0);
1133 xfs_ichgtime(ip, XFS_ICHGTIME_CHG|XFS_ICHGTIME_ACC|XFS_ICHGTIME_MOD); 1132
1133 nanotime(&tv);
1134 ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec;
1135 ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec;
1136 ip->i_d.di_atime = ip->i_d.di_mtime;
1137 ip->i_d.di_ctime = ip->i_d.di_mtime;
1138
1134 /* 1139 /*
1135 * di_gen will have been taken care of in xfs_iread. 1140 * di_gen will have been taken care of in xfs_iread.
1136 */ 1141 */
@@ -1220,7 +1225,7 @@ xfs_ialloc(
1220 xfs_trans_log_inode(tp, ip, flags); 1225 xfs_trans_log_inode(tp, ip, flags);
1221 1226
1222 /* now that we have an i_mode we can setup inode ops and unlock */ 1227 /* now that we have an i_mode we can setup inode ops and unlock */
1223 xfs_initialize_vnode(tp->t_mountp, vp, ip); 1228 xfs_setup_inode(ip);
1224 1229
1225 *ipp = ip; 1230 *ipp = ip;
1226 return 0; 1231 return 0;
@@ -1399,7 +1404,6 @@ xfs_itruncate_start(
1399 xfs_fsize_t last_byte; 1404 xfs_fsize_t last_byte;
1400 xfs_off_t toss_start; 1405 xfs_off_t toss_start;
1401 xfs_mount_t *mp; 1406 xfs_mount_t *mp;
1402 bhv_vnode_t *vp;
1403 int error = 0; 1407 int error = 0;
1404 1408
1405 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 1409 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
@@ -1408,7 +1412,6 @@ xfs_itruncate_start(
1408 (flags == XFS_ITRUNC_MAYBE)); 1412 (flags == XFS_ITRUNC_MAYBE));
1409 1413
1410 mp = ip->i_mount; 1414 mp = ip->i_mount;
1411 vp = XFS_ITOV(ip);
1412 1415
1413 /* wait for the completion of any pending DIOs */ 1416 /* wait for the completion of any pending DIOs */
1414 if (new_size < ip->i_size) 1417 if (new_size < ip->i_size)
@@ -1457,7 +1460,7 @@ xfs_itruncate_start(
1457 1460
1458#ifdef DEBUG 1461#ifdef DEBUG
1459 if (new_size == 0) { 1462 if (new_size == 0) {
1460 ASSERT(VN_CACHED(vp) == 0); 1463 ASSERT(VN_CACHED(VFS_I(ip)) == 0);
1461 } 1464 }
1462#endif 1465#endif
1463 return error; 1466 return error;
@@ -1763,67 +1766,6 @@ xfs_itruncate_finish(
1763 return 0; 1766 return 0;
1764} 1767}
1765 1768
1766
1767/*
1768 * xfs_igrow_start
1769 *
1770 * Do the first part of growing a file: zero any data in the last
1771 * block that is beyond the old EOF. We need to do this before
1772 * the inode is joined to the transaction to modify the i_size.
1773 * That way we can drop the inode lock and call into the buffer
1774 * cache to get the buffer mapping the EOF.
1775 */
1776int
1777xfs_igrow_start(
1778 xfs_inode_t *ip,
1779 xfs_fsize_t new_size,
1780 cred_t *credp)
1781{
1782 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
1783 ASSERT(new_size > ip->i_size);
1784
1785 /*
1786 * Zero any pages that may have been created by
1787 * xfs_write_file() beyond the end of the file
1788 * and any blocks between the old and new file sizes.
1789 */
1790 return xfs_zero_eof(ip, new_size, ip->i_size);
1791}
1792
1793/*
1794 * xfs_igrow_finish
1795 *
1796 * This routine is called to extend the size of a file.
1797 * The inode must have both the iolock and the ilock locked
1798 * for update and it must be a part of the current transaction.
1799 * The xfs_igrow_start() function must have been called previously.
1800 * If the change_flag is not zero, the inode change timestamp will
1801 * be updated.
1802 */
1803void
1804xfs_igrow_finish(
1805 xfs_trans_t *tp,
1806 xfs_inode_t *ip,
1807 xfs_fsize_t new_size,
1808 int change_flag)
1809{
1810 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
1811 ASSERT(ip->i_transp == tp);
1812 ASSERT(new_size > ip->i_size);
1813
1814 /*
1815 * Update the file size. Update the inode change timestamp
1816 * if change_flag set.
1817 */
1818 ip->i_d.di_size = new_size;
1819 ip->i_size = new_size;
1820 if (change_flag)
1821 xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
1822 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1823
1824}
1825
1826
1827/* 1769/*
1828 * This is called when the inode's link count goes to 0. 1770 * This is called when the inode's link count goes to 0.
1829 * We place the on-disk inode on a list in the AGI. It 1771 * We place the on-disk inode on a list in the AGI. It
@@ -2258,7 +2200,7 @@ xfs_ifree_cluster(
2258 xfs_trans_binval(tp, bp); 2200 xfs_trans_binval(tp, bp);
2259 } 2201 }
2260 2202
2261 kmem_free(ip_found, ninodes * sizeof(xfs_inode_t *)); 2203 kmem_free(ip_found);
2262 xfs_put_perag(mp, pag); 2204 xfs_put_perag(mp, pag);
2263} 2205}
2264 2206
@@ -2470,7 +2412,7 @@ xfs_iroot_realloc(
2470 (int)new_size); 2412 (int)new_size);
2471 memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t)); 2413 memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t));
2472 } 2414 }
2473 kmem_free(ifp->if_broot, ifp->if_broot_bytes); 2415 kmem_free(ifp->if_broot);
2474 ifp->if_broot = new_broot; 2416 ifp->if_broot = new_broot;
2475 ifp->if_broot_bytes = (int)new_size; 2417 ifp->if_broot_bytes = (int)new_size;
2476 ASSERT(ifp->if_broot_bytes <= 2418 ASSERT(ifp->if_broot_bytes <=
@@ -2514,7 +2456,7 @@ xfs_idata_realloc(
2514 2456
2515 if (new_size == 0) { 2457 if (new_size == 0) {
2516 if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { 2458 if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
2517 kmem_free(ifp->if_u1.if_data, ifp->if_real_bytes); 2459 kmem_free(ifp->if_u1.if_data);
2518 } 2460 }
2519 ifp->if_u1.if_data = NULL; 2461 ifp->if_u1.if_data = NULL;
2520 real_size = 0; 2462 real_size = 0;
@@ -2529,7 +2471,7 @@ xfs_idata_realloc(
2529 ASSERT(ifp->if_real_bytes != 0); 2471 ASSERT(ifp->if_real_bytes != 0);
2530 memcpy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data, 2472 memcpy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data,
2531 new_size); 2473 new_size);
2532 kmem_free(ifp->if_u1.if_data, ifp->if_real_bytes); 2474 kmem_free(ifp->if_u1.if_data);
2533 ifp->if_u1.if_data = ifp->if_u2.if_inline_data; 2475 ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
2534 } 2476 }
2535 real_size = 0; 2477 real_size = 0;
@@ -2636,7 +2578,7 @@ xfs_idestroy_fork(
2636 2578
2637 ifp = XFS_IFORK_PTR(ip, whichfork); 2579 ifp = XFS_IFORK_PTR(ip, whichfork);
2638 if (ifp->if_broot != NULL) { 2580 if (ifp->if_broot != NULL) {
2639 kmem_free(ifp->if_broot, ifp->if_broot_bytes); 2581 kmem_free(ifp->if_broot);
2640 ifp->if_broot = NULL; 2582 ifp->if_broot = NULL;
2641 } 2583 }
2642 2584
@@ -2650,7 +2592,7 @@ xfs_idestroy_fork(
2650 if ((ifp->if_u1.if_data != ifp->if_u2.if_inline_data) && 2592 if ((ifp->if_u1.if_data != ifp->if_u2.if_inline_data) &&
2651 (ifp->if_u1.if_data != NULL)) { 2593 (ifp->if_u1.if_data != NULL)) {
2652 ASSERT(ifp->if_real_bytes != 0); 2594 ASSERT(ifp->if_real_bytes != 0);
2653 kmem_free(ifp->if_u1.if_data, ifp->if_real_bytes); 2595 kmem_free(ifp->if_u1.if_data);
2654 ifp->if_u1.if_data = NULL; 2596 ifp->if_u1.if_data = NULL;
2655 ifp->if_real_bytes = 0; 2597 ifp->if_real_bytes = 0;
2656 } 2598 }
@@ -2691,7 +2633,6 @@ xfs_idestroy(
2691 xfs_idestroy_fork(ip, XFS_ATTR_FORK); 2633 xfs_idestroy_fork(ip, XFS_ATTR_FORK);
2692 mrfree(&ip->i_lock); 2634 mrfree(&ip->i_lock);
2693 mrfree(&ip->i_iolock); 2635 mrfree(&ip->i_iolock);
2694 freesema(&ip->i_flock);
2695 2636
2696#ifdef XFS_INODE_TRACE 2637#ifdef XFS_INODE_TRACE
2697 ktrace_free(ip->i_trace); 2638 ktrace_free(ip->i_trace);
@@ -3058,7 +2999,7 @@ xfs_iflush_cluster(
3058 2999
3059out_free: 3000out_free:
3060 read_unlock(&pag->pag_ici_lock); 3001 read_unlock(&pag->pag_ici_lock);
3061 kmem_free(ilist, ilist_size); 3002 kmem_free(ilist);
3062 return 0; 3003 return 0;
3063 3004
3064 3005
@@ -3102,17 +3043,17 @@ cluster_corrupt_out:
3102 * Unlocks the flush lock 3043 * Unlocks the flush lock
3103 */ 3044 */
3104 xfs_iflush_abort(iq); 3045 xfs_iflush_abort(iq);
3105 kmem_free(ilist, ilist_size); 3046 kmem_free(ilist);
3106 return XFS_ERROR(EFSCORRUPTED); 3047 return XFS_ERROR(EFSCORRUPTED);
3107} 3048}
3108 3049
3109/* 3050/*
3110 * xfs_iflush() will write a modified inode's changes out to the 3051 * xfs_iflush() will write a modified inode's changes out to the
3111 * inode's on disk home. The caller must have the inode lock held 3052 * inode's on disk home. The caller must have the inode lock held
3112 * in at least shared mode and the inode flush semaphore must be 3053 * in at least shared mode and the inode flush completion must be
3113 * held as well. The inode lock will still be held upon return from 3054 * active as well. The inode lock will still be held upon return from
3114 * the call and the caller is free to unlock it. 3055 * the call and the caller is free to unlock it.
3115 * The inode flush lock will be unlocked when the inode reaches the disk. 3056 * The inode flush will be completed when the inode reaches the disk.
3116 * The flags indicate how the inode's buffer should be written out. 3057 * The flags indicate how the inode's buffer should be written out.
3117 */ 3058 */
3118int 3059int
@@ -3131,7 +3072,7 @@ xfs_iflush(
3131 XFS_STATS_INC(xs_iflush_count); 3072 XFS_STATS_INC(xs_iflush_count);
3132 3073
3133 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 3074 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
3134 ASSERT(issemalocked(&(ip->i_flock))); 3075 ASSERT(!completion_done(&ip->i_flush));
3135 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || 3076 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
3136 ip->i_d.di_nextents > ip->i_df.if_ext_max); 3077 ip->i_d.di_nextents > ip->i_df.if_ext_max);
3137 3078
@@ -3143,8 +3084,6 @@ xfs_iflush(
3143 * flush lock and do nothing. 3084 * flush lock and do nothing.
3144 */ 3085 */
3145 if (xfs_inode_clean(ip)) { 3086 if (xfs_inode_clean(ip)) {
3146 ASSERT((iip != NULL) ?
3147 !(iip->ili_item.li_flags & XFS_LI_IN_AIL) : 1);
3148 xfs_ifunlock(ip); 3087 xfs_ifunlock(ip);
3149 return 0; 3088 return 0;
3150 } 3089 }
@@ -3296,7 +3235,7 @@ xfs_iflush_int(
3296#endif 3235#endif
3297 3236
3298 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 3237 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
3299 ASSERT(issemalocked(&(ip->i_flock))); 3238 ASSERT(!completion_done(&ip->i_flush));
3300 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || 3239 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
3301 ip->i_d.di_nextents > ip->i_df.if_ext_max); 3240 ip->i_d.di_nextents > ip->i_df.if_ext_max);
3302 3241
@@ -3528,7 +3467,6 @@ xfs_iflush_all(
3528 xfs_mount_t *mp) 3467 xfs_mount_t *mp)
3529{ 3468{
3530 xfs_inode_t *ip; 3469 xfs_inode_t *ip;
3531 bhv_vnode_t *vp;
3532 3470
3533 again: 3471 again:
3534 XFS_MOUNT_ILOCK(mp); 3472 XFS_MOUNT_ILOCK(mp);
@@ -3543,14 +3481,13 @@ xfs_iflush_all(
3543 continue; 3481 continue;
3544 } 3482 }
3545 3483
3546 vp = XFS_ITOV_NULL(ip); 3484 if (!VFS_I(ip)) {
3547 if (!vp) {
3548 XFS_MOUNT_IUNLOCK(mp); 3485 XFS_MOUNT_IUNLOCK(mp);
3549 xfs_finish_reclaim(ip, 0, XFS_IFLUSH_ASYNC); 3486 xfs_finish_reclaim(ip, 0, XFS_IFLUSH_ASYNC);
3550 goto again; 3487 goto again;
3551 } 3488 }
3552 3489
3553 ASSERT(vn_count(vp) == 0); 3490 ASSERT(vn_count(VFS_I(ip)) == 0);
3554 3491
3555 ip = ip->i_mnext; 3492 ip = ip->i_mnext;
3556 } while (ip != mp->m_inodes); 3493 } while (ip != mp->m_inodes);
@@ -3770,7 +3707,7 @@ xfs_iext_add_indirect_multi(
3770 * (all extents past */ 3707 * (all extents past */
3771 if (nex2) { 3708 if (nex2) {
3772 byte_diff = nex2 * sizeof(xfs_bmbt_rec_t); 3709 byte_diff = nex2 * sizeof(xfs_bmbt_rec_t);
3773 nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_SLEEP); 3710 nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_NOFS);
3774 memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff); 3711 memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff);
3775 erp->er_extcount -= nex2; 3712 erp->er_extcount -= nex2;
3776 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2); 3713 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2);
@@ -3836,7 +3773,7 @@ xfs_iext_add_indirect_multi(
3836 erp = xfs_iext_irec_new(ifp, erp_idx); 3773 erp = xfs_iext_irec_new(ifp, erp_idx);
3837 } 3774 }
3838 memmove(&erp->er_extbuf[i], nex2_ep, byte_diff); 3775 memmove(&erp->er_extbuf[i], nex2_ep, byte_diff);
3839 kmem_free(nex2_ep, byte_diff); 3776 kmem_free(nex2_ep);
3840 erp->er_extcount += nex2; 3777 erp->er_extcount += nex2;
3841 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, nex2); 3778 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, nex2);
3842 } 3779 }
@@ -4070,8 +4007,7 @@ xfs_iext_realloc_direct(
4070 ifp->if_u1.if_extents = 4007 ifp->if_u1.if_extents =
4071 kmem_realloc(ifp->if_u1.if_extents, 4008 kmem_realloc(ifp->if_u1.if_extents,
4072 rnew_size, 4009 rnew_size,
4073 ifp->if_real_bytes, 4010 ifp->if_real_bytes, KM_NOFS);
4074 KM_SLEEP);
4075 } 4011 }
4076 if (rnew_size > ifp->if_real_bytes) { 4012 if (rnew_size > ifp->if_real_bytes) {
4077 memset(&ifp->if_u1.if_extents[ifp->if_bytes / 4013 memset(&ifp->if_u1.if_extents[ifp->if_bytes /
@@ -4112,7 +4048,7 @@ xfs_iext_direct_to_inline(
4112 */ 4048 */
4113 memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents, 4049 memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents,
4114 nextents * sizeof(xfs_bmbt_rec_t)); 4050 nextents * sizeof(xfs_bmbt_rec_t));
4115 kmem_free(ifp->if_u1.if_extents, ifp->if_real_bytes); 4051 kmem_free(ifp->if_u1.if_extents);
4116 ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; 4052 ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
4117 ifp->if_real_bytes = 0; 4053 ifp->if_real_bytes = 0;
4118} 4054}
@@ -4130,7 +4066,7 @@ xfs_iext_inline_to_direct(
4130 xfs_ifork_t *ifp, /* inode fork pointer */ 4066 xfs_ifork_t *ifp, /* inode fork pointer */
4131 int new_size) /* number of extents in file */ 4067 int new_size) /* number of extents in file */
4132{ 4068{
4133 ifp->if_u1.if_extents = kmem_alloc(new_size, KM_SLEEP); 4069 ifp->if_u1.if_extents = kmem_alloc(new_size, KM_NOFS);
4134 memset(ifp->if_u1.if_extents, 0, new_size); 4070 memset(ifp->if_u1.if_extents, 0, new_size);
4135 if (ifp->if_bytes) { 4071 if (ifp->if_bytes) {
4136 memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext, 4072 memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext,
@@ -4162,7 +4098,7 @@ xfs_iext_realloc_indirect(
4162 } else { 4098 } else {
4163 ifp->if_u1.if_ext_irec = (xfs_ext_irec_t *) 4099 ifp->if_u1.if_ext_irec = (xfs_ext_irec_t *)
4164 kmem_realloc(ifp->if_u1.if_ext_irec, 4100 kmem_realloc(ifp->if_u1.if_ext_irec,
4165 new_size, size, KM_SLEEP); 4101 new_size, size, KM_NOFS);
4166 } 4102 }
4167} 4103}
4168 4104
@@ -4186,7 +4122,7 @@ xfs_iext_indirect_to_direct(
4186 ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ); 4122 ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ);
4187 4123
4188 ep = ifp->if_u1.if_ext_irec->er_extbuf; 4124 ep = ifp->if_u1.if_ext_irec->er_extbuf;
4189 kmem_free(ifp->if_u1.if_ext_irec, sizeof(xfs_ext_irec_t)); 4125 kmem_free(ifp->if_u1.if_ext_irec);
4190 ifp->if_flags &= ~XFS_IFEXTIREC; 4126 ifp->if_flags &= ~XFS_IFEXTIREC;
4191 ifp->if_u1.if_extents = ep; 4127 ifp->if_u1.if_extents = ep;
4192 ifp->if_bytes = size; 4128 ifp->if_bytes = size;
@@ -4212,7 +4148,7 @@ xfs_iext_destroy(
4212 } 4148 }
4213 ifp->if_flags &= ~XFS_IFEXTIREC; 4149 ifp->if_flags &= ~XFS_IFEXTIREC;
4214 } else if (ifp->if_real_bytes) { 4150 } else if (ifp->if_real_bytes) {
4215 kmem_free(ifp->if_u1.if_extents, ifp->if_real_bytes); 4151 kmem_free(ifp->if_u1.if_extents);
4216 } else if (ifp->if_bytes) { 4152 } else if (ifp->if_bytes) {
4217 memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS * 4153 memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
4218 sizeof(xfs_bmbt_rec_t)); 4154 sizeof(xfs_bmbt_rec_t));
@@ -4404,11 +4340,10 @@ xfs_iext_irec_init(
4404 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 4340 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
4405 ASSERT(nextents <= XFS_LINEAR_EXTS); 4341 ASSERT(nextents <= XFS_LINEAR_EXTS);
4406 4342
4407 erp = (xfs_ext_irec_t *) 4343 erp = kmem_alloc(sizeof(xfs_ext_irec_t), KM_NOFS);
4408 kmem_alloc(sizeof(xfs_ext_irec_t), KM_SLEEP);
4409 4344
4410 if (nextents == 0) { 4345 if (nextents == 0) {
4411 ifp->if_u1.if_extents = kmem_alloc(XFS_IEXT_BUFSZ, KM_SLEEP); 4346 ifp->if_u1.if_extents = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS);
4412 } else if (!ifp->if_real_bytes) { 4347 } else if (!ifp->if_real_bytes) {
4413 xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ); 4348 xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ);
4414 } else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) { 4349 } else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) {
@@ -4456,7 +4391,7 @@ xfs_iext_irec_new(
4456 4391
4457 /* Initialize new extent record */ 4392 /* Initialize new extent record */
4458 erp = ifp->if_u1.if_ext_irec; 4393 erp = ifp->if_u1.if_ext_irec;
4459 erp[erp_idx].er_extbuf = kmem_alloc(XFS_IEXT_BUFSZ, KM_SLEEP); 4394 erp[erp_idx].er_extbuf = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS);
4460 ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ; 4395 ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
4461 memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ); 4396 memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ);
4462 erp[erp_idx].er_extcount = 0; 4397 erp[erp_idx].er_extcount = 0;
@@ -4483,7 +4418,7 @@ xfs_iext_irec_remove(
4483 if (erp->er_extbuf) { 4418 if (erp->er_extbuf) {
4484 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, 4419 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1,
4485 -erp->er_extcount); 4420 -erp->er_extcount);
4486 kmem_free(erp->er_extbuf, XFS_IEXT_BUFSZ); 4421 kmem_free(erp->er_extbuf);
4487 } 4422 }
4488 /* Compact extent records */ 4423 /* Compact extent records */
4489 erp = ifp->if_u1.if_ext_irec; 4424 erp = ifp->if_u1.if_ext_irec;
@@ -4501,8 +4436,7 @@ xfs_iext_irec_remove(
4501 xfs_iext_realloc_indirect(ifp, 4436 xfs_iext_realloc_indirect(ifp,
4502 nlists * sizeof(xfs_ext_irec_t)); 4437 nlists * sizeof(xfs_ext_irec_t));
4503 } else { 4438 } else {
4504 kmem_free(ifp->if_u1.if_ext_irec, 4439 kmem_free(ifp->if_u1.if_ext_irec);
4505 sizeof(xfs_ext_irec_t));
4506 } 4440 }
4507 ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ; 4441 ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
4508} 4442}
@@ -4571,7 +4505,7 @@ xfs_iext_irec_compact_pages(
4571 * so er_extoffs don't get modified in 4505 * so er_extoffs don't get modified in
4572 * xfs_iext_irec_remove. 4506 * xfs_iext_irec_remove.
4573 */ 4507 */
4574 kmem_free(erp_next->er_extbuf, XFS_IEXT_BUFSZ); 4508 kmem_free(erp_next->er_extbuf);
4575 erp_next->er_extbuf = NULL; 4509 erp_next->er_extbuf = NULL;
4576 xfs_iext_irec_remove(ifp, erp_idx + 1); 4510 xfs_iext_irec_remove(ifp, erp_idx + 1);
4577 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 4511 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
@@ -4596,40 +4530,63 @@ xfs_iext_irec_compact_full(
4596 int nlists; /* number of irec's (ex lists) */ 4530 int nlists; /* number of irec's (ex lists) */
4597 4531
4598 ASSERT(ifp->if_flags & XFS_IFEXTIREC); 4532 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
4533
4599 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 4534 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
4600 erp = ifp->if_u1.if_ext_irec; 4535 erp = ifp->if_u1.if_ext_irec;
4601 ep = &erp->er_extbuf[erp->er_extcount]; 4536 ep = &erp->er_extbuf[erp->er_extcount];
4602 erp_next = erp + 1; 4537 erp_next = erp + 1;
4603 ep_next = erp_next->er_extbuf; 4538 ep_next = erp_next->er_extbuf;
4539
4604 while (erp_idx < nlists - 1) { 4540 while (erp_idx < nlists - 1) {
4541 /*
4542 * Check how many extent records are available in this irec.
4543 * If there is none skip the whole exercise.
4544 */
4605 ext_avail = XFS_LINEAR_EXTS - erp->er_extcount; 4545 ext_avail = XFS_LINEAR_EXTS - erp->er_extcount;
4606 ext_diff = MIN(ext_avail, erp_next->er_extcount); 4546 if (ext_avail) {
4607 memcpy(ep, ep_next, ext_diff * sizeof(xfs_bmbt_rec_t)); 4547
4608 erp->er_extcount += ext_diff;
4609 erp_next->er_extcount -= ext_diff;
4610 /* Remove next page */
4611 if (erp_next->er_extcount == 0) {
4612 /* 4548 /*
4613 * Free page before removing extent record 4549 * Copy over as many as possible extent records into
4614 * so er_extoffs don't get modified in 4550 * the previous page.
4615 * xfs_iext_irec_remove.
4616 */ 4551 */
4617 kmem_free(erp_next->er_extbuf, 4552 ext_diff = MIN(ext_avail, erp_next->er_extcount);
4618 erp_next->er_extcount * sizeof(xfs_bmbt_rec_t)); 4553 memcpy(ep, ep_next, ext_diff * sizeof(xfs_bmbt_rec_t));
4619 erp_next->er_extbuf = NULL; 4554 erp->er_extcount += ext_diff;
4620 xfs_iext_irec_remove(ifp, erp_idx + 1); 4555 erp_next->er_extcount -= ext_diff;
4621 erp = &ifp->if_u1.if_ext_irec[erp_idx]; 4556
4622 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 4557 /*
4623 /* Update next page */ 4558 * If the next irec is empty now we can simply
4624 } else { 4559 * remove it.
4625 /* Move rest of page up to become next new page */ 4560 */
4626 memmove(erp_next->er_extbuf, ep_next, 4561 if (erp_next->er_extcount == 0) {
4627 erp_next->er_extcount * sizeof(xfs_bmbt_rec_t)); 4562 /*
4628 ep_next = erp_next->er_extbuf; 4563 * Free page before removing extent record
4629 memset(&ep_next[erp_next->er_extcount], 0, 4564 * so er_extoffs don't get modified in
4630 (XFS_LINEAR_EXTS - erp_next->er_extcount) * 4565 * xfs_iext_irec_remove.
4631 sizeof(xfs_bmbt_rec_t)); 4566 */
4567 kmem_free(erp_next->er_extbuf);
4568 erp_next->er_extbuf = NULL;
4569 xfs_iext_irec_remove(ifp, erp_idx + 1);
4570 erp = &ifp->if_u1.if_ext_irec[erp_idx];
4571 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
4572
4573 /*
4574 * If the next irec is not empty move up the content
4575 * that has not been copied to the previous page to
4576 * the beggining of this one.
4577 */
4578 } else {
4579 memmove(erp_next->er_extbuf, &ep_next[ext_diff],
4580 erp_next->er_extcount *
4581 sizeof(xfs_bmbt_rec_t));
4582 ep_next = erp_next->er_extbuf;
4583 memset(&ep_next[erp_next->er_extcount], 0,
4584 (XFS_LINEAR_EXTS -
4585 erp_next->er_extcount) *
4586 sizeof(xfs_bmbt_rec_t));
4587 }
4632 } 4588 }
4589
4633 if (erp->er_extcount == XFS_LINEAR_EXTS) { 4590 if (erp->er_extcount == XFS_LINEAR_EXTS) {
4634 erp_idx++; 4591 erp_idx++;
4635 if (erp_idx < nlists) 4592 if (erp_idx < nlists)
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 0a999fee4f03..1420c49674d7 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -87,8 +87,7 @@ typedef struct xfs_ifork {
87 * Flags for xfs_ichgtime(). 87 * Flags for xfs_ichgtime().
88 */ 88 */
89#define XFS_ICHGTIME_MOD 0x1 /* data fork modification timestamp */ 89#define XFS_ICHGTIME_MOD 0x1 /* data fork modification timestamp */
90#define XFS_ICHGTIME_ACC 0x2 /* data fork access timestamp */ 90#define XFS_ICHGTIME_CHG 0x2 /* inode field change timestamp */
91#define XFS_ICHGTIME_CHG 0x4 /* inode field change timestamp */
92 91
93/* 92/*
94 * Per-fork incore inode flags. 93 * Per-fork incore inode flags.
@@ -204,7 +203,7 @@ typedef struct xfs_inode {
204 struct xfs_inode *i_mprev; /* ptr to prev inode */ 203 struct xfs_inode *i_mprev; /* ptr to prev inode */
205 struct xfs_mount *i_mount; /* fs mount struct ptr */ 204 struct xfs_mount *i_mount; /* fs mount struct ptr */
206 struct list_head i_reclaim; /* reclaim list */ 205 struct list_head i_reclaim; /* reclaim list */
207 bhv_vnode_t *i_vnode; /* vnode backpointer */ 206 struct inode *i_vnode; /* vnode backpointer */
208 struct xfs_dquot *i_udquot; /* user dquot */ 207 struct xfs_dquot *i_udquot; /* user dquot */
209 struct xfs_dquot *i_gdquot; /* group dquot */ 208 struct xfs_dquot *i_gdquot; /* group dquot */
210 209
@@ -223,7 +222,7 @@ typedef struct xfs_inode {
223 struct xfs_inode_log_item *i_itemp; /* logging information */ 222 struct xfs_inode_log_item *i_itemp; /* logging information */
224 mrlock_t i_lock; /* inode lock */ 223 mrlock_t i_lock; /* inode lock */
225 mrlock_t i_iolock; /* inode IO lock */ 224 mrlock_t i_iolock; /* inode IO lock */
226 sema_t i_flock; /* inode flush lock */ 225 struct completion i_flush; /* inode flush completion q */
227 atomic_t i_pincount; /* inode pin count */ 226 atomic_t i_pincount; /* inode pin count */
228 wait_queue_head_t i_ipin_wait; /* inode pinning wait queue */ 227 wait_queue_head_t i_ipin_wait; /* inode pinning wait queue */
229 spinlock_t i_flags_lock; /* inode i_flags lock */ 228 spinlock_t i_flags_lock; /* inode i_flags lock */
@@ -263,6 +262,18 @@ typedef struct xfs_inode {
263#define XFS_ISIZE(ip) (((ip)->i_d.di_mode & S_IFMT) == S_IFREG) ? \ 262#define XFS_ISIZE(ip) (((ip)->i_d.di_mode & S_IFMT) == S_IFREG) ? \
264 (ip)->i_size : (ip)->i_d.di_size; 263 (ip)->i_size : (ip)->i_d.di_size;
265 264
265/* Convert from vfs inode to xfs inode */
266static inline struct xfs_inode *XFS_I(struct inode *inode)
267{
268 return (struct xfs_inode *)inode->i_private;
269}
270
271/* convert from xfs inode to vfs inode */
272static inline struct inode *VFS_I(struct xfs_inode *ip)
273{
274 return (struct inode *)ip->i_vnode;
275}
276
266/* 277/*
267 * i_flags helper functions 278 * i_flags helper functions
268 */ 279 */
@@ -439,9 +450,6 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags)
439#define XFS_ITRUNC_DEFINITE 0x1 450#define XFS_ITRUNC_DEFINITE 0x1
440#define XFS_ITRUNC_MAYBE 0x2 451#define XFS_ITRUNC_MAYBE 0x2
441 452
442#define XFS_ITOV(ip) ((ip)->i_vnode)
443#define XFS_ITOV_NULL(ip) ((ip)->i_vnode)
444
445/* 453/*
446 * For multiple groups support: if S_ISGID bit is set in the parent 454 * For multiple groups support: if S_ISGID bit is set in the parent
447 * directory, group of new file is set to that of the parent, and 455 * directory, group of new file is set to that of the parent, and
@@ -473,11 +481,8 @@ int xfs_ilock_nowait(xfs_inode_t *, uint);
473void xfs_iunlock(xfs_inode_t *, uint); 481void xfs_iunlock(xfs_inode_t *, uint);
474void xfs_ilock_demote(xfs_inode_t *, uint); 482void xfs_ilock_demote(xfs_inode_t *, uint);
475int xfs_isilocked(xfs_inode_t *, uint); 483int xfs_isilocked(xfs_inode_t *, uint);
476void xfs_iflock(xfs_inode_t *);
477int xfs_iflock_nowait(xfs_inode_t *);
478uint xfs_ilock_map_shared(xfs_inode_t *); 484uint xfs_ilock_map_shared(xfs_inode_t *);
479void xfs_iunlock_map_shared(xfs_inode_t *, uint); 485void xfs_iunlock_map_shared(xfs_inode_t *, uint);
480void xfs_ifunlock(xfs_inode_t *);
481void xfs_ireclaim(xfs_inode_t *); 486void xfs_ireclaim(xfs_inode_t *);
482int xfs_finish_reclaim(xfs_inode_t *, int, int); 487int xfs_finish_reclaim(xfs_inode_t *, int, int);
483int xfs_finish_reclaim_all(struct xfs_mount *, int); 488int xfs_finish_reclaim_all(struct xfs_mount *, int);
@@ -507,9 +512,6 @@ int xfs_itruncate_start(xfs_inode_t *, uint, xfs_fsize_t);
507int xfs_itruncate_finish(struct xfs_trans **, xfs_inode_t *, 512int xfs_itruncate_finish(struct xfs_trans **, xfs_inode_t *,
508 xfs_fsize_t, int, int); 513 xfs_fsize_t, int, int);
509int xfs_iunlink(struct xfs_trans *, xfs_inode_t *); 514int xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
510int xfs_igrow_start(xfs_inode_t *, xfs_fsize_t, struct cred *);
511void xfs_igrow_finish(struct xfs_trans *, xfs_inode_t *,
512 xfs_fsize_t, int);
513 515
514void xfs_idestroy_fork(xfs_inode_t *, int); 516void xfs_idestroy_fork(xfs_inode_t *, int);
515void xfs_idestroy(xfs_inode_t *); 517void xfs_idestroy(xfs_inode_t *);
@@ -525,6 +527,7 @@ void xfs_iflush_all(struct xfs_mount *);
525void xfs_ichgtime(xfs_inode_t *, int); 527void xfs_ichgtime(xfs_inode_t *, int);
526xfs_fsize_t xfs_file_last_byte(xfs_inode_t *); 528xfs_fsize_t xfs_file_last_byte(xfs_inode_t *);
527void xfs_lock_inodes(xfs_inode_t **, int, uint); 529void xfs_lock_inodes(xfs_inode_t **, int, uint);
530void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint);
528 531
529void xfs_synchronize_atime(xfs_inode_t *); 532void xfs_synchronize_atime(xfs_inode_t *);
530void xfs_mark_inode_dirty_sync(xfs_inode_t *); 533void xfs_mark_inode_dirty_sync(xfs_inode_t *);
@@ -573,6 +576,26 @@ extern struct kmem_zone *xfs_ifork_zone;
573extern struct kmem_zone *xfs_inode_zone; 576extern struct kmem_zone *xfs_inode_zone;
574extern struct kmem_zone *xfs_ili_zone; 577extern struct kmem_zone *xfs_ili_zone;
575 578
579/*
580 * Manage the i_flush queue embedded in the inode. This completion
581 * queue synchronizes processes attempting to flush the in-core
582 * inode back to disk.
583 */
584static inline void xfs_iflock(xfs_inode_t *ip)
585{
586 wait_for_completion(&ip->i_flush);
587}
588
589static inline int xfs_iflock_nowait(xfs_inode_t *ip)
590{
591 return try_wait_for_completion(&ip->i_flush);
592}
593
594static inline void xfs_ifunlock(xfs_inode_t *ip)
595{
596 complete(&ip->i_flush);
597}
598
576#endif /* __KERNEL__ */ 599#endif /* __KERNEL__ */
577 600
578#endif /* __XFS_INODE_H__ */ 601#endif /* __XFS_INODE_H__ */
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 167b33f15772..97c7452e2620 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -686,7 +686,7 @@ xfs_inode_item_unlock(
686 ASSERT(ip->i_d.di_nextents > 0); 686 ASSERT(ip->i_d.di_nextents > 0);
687 ASSERT(iip->ili_format.ilf_fields & XFS_ILOG_DEXT); 687 ASSERT(iip->ili_format.ilf_fields & XFS_ILOG_DEXT);
688 ASSERT(ip->i_df.if_bytes > 0); 688 ASSERT(ip->i_df.if_bytes > 0);
689 kmem_free(iip->ili_extents_buf, ip->i_df.if_bytes); 689 kmem_free(iip->ili_extents_buf);
690 iip->ili_extents_buf = NULL; 690 iip->ili_extents_buf = NULL;
691 } 691 }
692 if (iip->ili_aextents_buf != NULL) { 692 if (iip->ili_aextents_buf != NULL) {
@@ -694,7 +694,7 @@ xfs_inode_item_unlock(
694 ASSERT(ip->i_d.di_anextents > 0); 694 ASSERT(ip->i_d.di_anextents > 0);
695 ASSERT(iip->ili_format.ilf_fields & XFS_ILOG_AEXT); 695 ASSERT(iip->ili_format.ilf_fields & XFS_ILOG_AEXT);
696 ASSERT(ip->i_afp->if_bytes > 0); 696 ASSERT(ip->i_afp->if_bytes > 0);
697 kmem_free(iip->ili_aextents_buf, ip->i_afp->if_bytes); 697 kmem_free(iip->ili_aextents_buf);
698 iip->ili_aextents_buf = NULL; 698 iip->ili_aextents_buf = NULL;
699 } 699 }
700 700
@@ -779,11 +779,10 @@ xfs_inode_item_pushbuf(
779 ASSERT(iip->ili_push_owner == current_pid()); 779 ASSERT(iip->ili_push_owner == current_pid());
780 780
781 /* 781 /*
782 * If flushlock isn't locked anymore, chances are that the 782 * If a flush is not in progress anymore, chances are that the
783 * inode flush completed and the inode was taken off the AIL. 783 * inode was taken off the AIL. So, just get out.
784 * So, just get out.
785 */ 784 */
786 if (!issemalocked(&(ip->i_flock)) || 785 if (completion_done(&ip->i_flush) ||
787 ((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0)) { 786 ((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0)) {
788 iip->ili_pushbuf_flag = 0; 787 iip->ili_pushbuf_flag = 0;
789 xfs_iunlock(ip, XFS_ILOCK_SHARED); 788 xfs_iunlock(ip, XFS_ILOCK_SHARED);
@@ -805,7 +804,7 @@ xfs_inode_item_pushbuf(
805 * If not, we can flush it async. 804 * If not, we can flush it async.
806 */ 805 */
807 dopush = ((iip->ili_item.li_flags & XFS_LI_IN_AIL) && 806 dopush = ((iip->ili_item.li_flags & XFS_LI_IN_AIL) &&
808 issemalocked(&(ip->i_flock))); 807 !completion_done(&ip->i_flush));
809 iip->ili_pushbuf_flag = 0; 808 iip->ili_pushbuf_flag = 0;
810 xfs_iunlock(ip, XFS_ILOCK_SHARED); 809 xfs_iunlock(ip, XFS_ILOCK_SHARED);
811 xfs_buftrace("INODE ITEM PUSH", bp); 810 xfs_buftrace("INODE ITEM PUSH", bp);
@@ -858,7 +857,7 @@ xfs_inode_item_push(
858 ip = iip->ili_inode; 857 ip = iip->ili_inode;
859 858
860 ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); 859 ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
861 ASSERT(issemalocked(&(ip->i_flock))); 860 ASSERT(!completion_done(&ip->i_flush));
862 /* 861 /*
863 * Since we were able to lock the inode's flush lock and 862 * Since we were able to lock the inode's flush lock and
864 * we found it on the AIL, the inode must be dirty. This 863 * we found it on the AIL, the inode must be dirty. This
@@ -957,8 +956,7 @@ xfs_inode_item_destroy(
957{ 956{
958#ifdef XFS_TRANS_DEBUG 957#ifdef XFS_TRANS_DEBUG
959 if (ip->i_itemp->ili_root_size != 0) { 958 if (ip->i_itemp->ili_root_size != 0) {
960 kmem_free(ip->i_itemp->ili_orig_root, 959 kmem_free(ip->i_itemp->ili_orig_root);
961 ip->i_itemp->ili_root_size);
962 } 960 }
963#endif 961#endif
964 kmem_zone_free(xfs_ili_zone, ip->i_itemp); 962 kmem_zone_free(xfs_ili_zone, ip->i_itemp);
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 7edcde691d1a..67f22b2b44b3 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -889,6 +889,16 @@ xfs_iomap_write_unwritten(
889 count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); 889 count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
890 count_fsb = (xfs_filblks_t)(count_fsb - offset_fsb); 890 count_fsb = (xfs_filblks_t)(count_fsb - offset_fsb);
891 891
892 /*
893 * Reserve enough blocks in this transaction for two complete extent
894 * btree splits. We may be converting the middle part of an unwritten
895 * extent and in this case we will insert two new extents in the btree
896 * each of which could cause a full split.
897 *
898 * This reservation amount will be used in the first call to
899 * xfs_bmbt_split() to select an AG with enough space to satisfy the
900 * rest of the operation.
901 */
892 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1; 902 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1;
893 903
894 do { 904 do {
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 419de15aeb43..cf6754a3c5b3 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -59,7 +59,6 @@ xfs_bulkstat_one_iget(
59{ 59{
60 xfs_icdinode_t *dic; /* dinode core info pointer */ 60 xfs_icdinode_t *dic; /* dinode core info pointer */
61 xfs_inode_t *ip; /* incore inode pointer */ 61 xfs_inode_t *ip; /* incore inode pointer */
62 bhv_vnode_t *vp;
63 int error; 62 int error;
64 63
65 error = xfs_iget(mp, NULL, ino, 64 error = xfs_iget(mp, NULL, ino,
@@ -72,7 +71,6 @@ xfs_bulkstat_one_iget(
72 ASSERT(ip != NULL); 71 ASSERT(ip != NULL);
73 ASSERT(ip->i_blkno != (xfs_daddr_t)0); 72 ASSERT(ip->i_blkno != (xfs_daddr_t)0);
74 73
75 vp = XFS_ITOV(ip);
76 dic = &ip->i_d; 74 dic = &ip->i_d;
77 75
78 /* xfs_iget returns the following without needing 76 /* xfs_iget returns the following without needing
@@ -85,7 +83,7 @@ xfs_bulkstat_one_iget(
85 buf->bs_uid = dic->di_uid; 83 buf->bs_uid = dic->di_uid;
86 buf->bs_gid = dic->di_gid; 84 buf->bs_gid = dic->di_gid;
87 buf->bs_size = dic->di_size; 85 buf->bs_size = dic->di_size;
88 vn_atime_to_bstime(vp, &buf->bs_atime); 86 vn_atime_to_bstime(VFS_I(ip), &buf->bs_atime);
89 buf->bs_mtime.tv_sec = dic->di_mtime.t_sec; 87 buf->bs_mtime.tv_sec = dic->di_mtime.t_sec;
90 buf->bs_mtime.tv_nsec = dic->di_mtime.t_nsec; 88 buf->bs_mtime.tv_nsec = dic->di_mtime.t_nsec;
91 buf->bs_ctime.tv_sec = dic->di_ctime.t_sec; 89 buf->bs_ctime.tv_sec = dic->di_ctime.t_sec;
@@ -257,7 +255,7 @@ xfs_bulkstat_one(
257 *ubused = error; 255 *ubused = error;
258 256
259 out_free: 257 out_free:
260 kmem_free(buf, sizeof(*buf)); 258 kmem_free(buf);
261 return error; 259 return error;
262} 260}
263 261
@@ -708,7 +706,7 @@ xfs_bulkstat(
708 /* 706 /*
709 * Done, we're either out of filesystem or space to put the data. 707 * Done, we're either out of filesystem or space to put the data.
710 */ 708 */
711 kmem_free(irbuf, irbsize); 709 kmem_free(irbuf);
712 *ubcountp = ubelem; 710 *ubcountp = ubelem;
713 /* 711 /*
714 * Found some inodes, return them now and return the error next time. 712 * Found some inodes, return them now and return the error next time.
@@ -914,7 +912,7 @@ xfs_inumbers(
914 } 912 }
915 *lastino = XFS_AGINO_TO_INO(mp, agno, agino); 913 *lastino = XFS_AGINO_TO_INO(mp, agno, agino);
916 } 914 }
917 kmem_free(buffer, bcount * sizeof(*buffer)); 915 kmem_free(buffer);
918 if (cur) 916 if (cur)
919 xfs_btree_del_cursor(cur, (error ? XFS_BTREE_ERROR : 917 xfs_btree_del_cursor(cur, (error ? XFS_BTREE_ERROR :
920 XFS_BTREE_NOERROR)); 918 XFS_BTREE_NOERROR));
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index ad3d26ddfe31..ccba14eb9dbe 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -160,7 +160,7 @@ void
160xlog_trace_iclog(xlog_in_core_t *iclog, uint state) 160xlog_trace_iclog(xlog_in_core_t *iclog, uint state)
161{ 161{
162 if (!iclog->ic_trace) 162 if (!iclog->ic_trace)
163 iclog->ic_trace = ktrace_alloc(256, KM_SLEEP); 163 iclog->ic_trace = ktrace_alloc(256, KM_NOFS);
164 ktrace_enter(iclog->ic_trace, 164 ktrace_enter(iclog->ic_trace,
165 (void *)((unsigned long)state), 165 (void *)((unsigned long)state),
166 (void *)((unsigned long)current_pid()), 166 (void *)((unsigned long)current_pid()),
@@ -226,20 +226,24 @@ xlog_grant_sub_space(struct log *log, int bytes)
226static void 226static void
227xlog_grant_add_space_write(struct log *log, int bytes) 227xlog_grant_add_space_write(struct log *log, int bytes)
228{ 228{
229 log->l_grant_write_bytes += bytes; 229 int tmp = log->l_logsize - log->l_grant_write_bytes;
230 if (log->l_grant_write_bytes > log->l_logsize) { 230 if (tmp > bytes)
231 log->l_grant_write_bytes -= log->l_logsize; 231 log->l_grant_write_bytes += bytes;
232 else {
232 log->l_grant_write_cycle++; 233 log->l_grant_write_cycle++;
234 log->l_grant_write_bytes = bytes - tmp;
233 } 235 }
234} 236}
235 237
236static void 238static void
237xlog_grant_add_space_reserve(struct log *log, int bytes) 239xlog_grant_add_space_reserve(struct log *log, int bytes)
238{ 240{
239 log->l_grant_reserve_bytes += bytes; 241 int tmp = log->l_logsize - log->l_grant_reserve_bytes;
240 if (log->l_grant_reserve_bytes > log->l_logsize) { 242 if (tmp > bytes)
241 log->l_grant_reserve_bytes -= log->l_logsize; 243 log->l_grant_reserve_bytes += bytes;
244 else {
242 log->l_grant_reserve_cycle++; 245 log->l_grant_reserve_cycle++;
246 log->l_grant_reserve_bytes = bytes - tmp;
243 } 247 }
244} 248}
245 249
@@ -332,15 +336,12 @@ xfs_log_done(xfs_mount_t *mp,
332 } else { 336 } else {
333 xlog_trace_loggrant(log, ticket, "xfs_log_done: (permanent)"); 337 xlog_trace_loggrant(log, ticket, "xfs_log_done: (permanent)");
334 xlog_regrant_reserve_log_space(log, ticket); 338 xlog_regrant_reserve_log_space(log, ticket);
335 } 339 /* If this ticket was a permanent reservation and we aren't
336 340 * trying to release it, reset the inited flags; so next time
337 /* If this ticket was a permanent reservation and we aren't 341 * we write, a start record will be written out.
338 * trying to release it, reset the inited flags; so next time 342 */
339 * we write, a start record will be written out.
340 */
341 if ((ticket->t_flags & XLOG_TIC_PERM_RESERV) &&
342 (flags & XFS_LOG_REL_PERM_RESERV) == 0)
343 ticket->t_flags |= XLOG_TIC_INITED; 343 ticket->t_flags |= XLOG_TIC_INITED;
344 }
344 345
345 return lsn; 346 return lsn;
346} /* xfs_log_done */ 347} /* xfs_log_done */
@@ -353,11 +354,11 @@ xfs_log_done(xfs_mount_t *mp,
353 * Asynchronous forces are implemented by setting the WANT_SYNC 354 * Asynchronous forces are implemented by setting the WANT_SYNC
354 * bit in the appropriate in-core log and then returning. 355 * bit in the appropriate in-core log and then returning.
355 * 356 *
356 * Synchronous forces are implemented with a semaphore. All callers 357 * Synchronous forces are implemented with a signal variable. All callers
357 * to force a given lsn to disk will wait on a semaphore attached to the 358 * to force a given lsn to disk will wait on a the sv attached to the
358 * specific in-core log. When given in-core log finally completes its 359 * specific in-core log. When given in-core log finally completes its
359 * write to disk, that thread will wake up all threads waiting on the 360 * write to disk, that thread will wake up all threads waiting on the
360 * semaphore. 361 * sv.
361 */ 362 */
362int 363int
363_xfs_log_force( 364_xfs_log_force(
@@ -584,12 +585,12 @@ error:
584 * mp - ubiquitous xfs mount point structure 585 * mp - ubiquitous xfs mount point structure
585 */ 586 */
586int 587int
587xfs_log_mount_finish(xfs_mount_t *mp, int mfsi_flags) 588xfs_log_mount_finish(xfs_mount_t *mp)
588{ 589{
589 int error; 590 int error;
590 591
591 if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) 592 if (!(mp->m_flags & XFS_MOUNT_NORECOVERY))
592 error = xlog_recover_finish(mp->m_log, mfsi_flags); 593 error = xlog_recover_finish(mp->m_log);
593 else { 594 else {
594 error = 0; 595 error = 0;
595 ASSERT(mp->m_flags & XFS_MOUNT_RDONLY); 596 ASSERT(mp->m_flags & XFS_MOUNT_RDONLY);
@@ -703,7 +704,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
703 if (!(iclog->ic_state == XLOG_STATE_ACTIVE || 704 if (!(iclog->ic_state == XLOG_STATE_ACTIVE ||
704 iclog->ic_state == XLOG_STATE_DIRTY)) { 705 iclog->ic_state == XLOG_STATE_DIRTY)) {
705 if (!XLOG_FORCED_SHUTDOWN(log)) { 706 if (!XLOG_FORCED_SHUTDOWN(log)) {
706 sv_wait(&iclog->ic_forcesema, PMEM, 707 sv_wait(&iclog->ic_force_wait, PMEM,
707 &log->l_icloglock, s); 708 &log->l_icloglock, s);
708 } else { 709 } else {
709 spin_unlock(&log->l_icloglock); 710 spin_unlock(&log->l_icloglock);
@@ -744,7 +745,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
744 || iclog->ic_state == XLOG_STATE_DIRTY 745 || iclog->ic_state == XLOG_STATE_DIRTY
745 || iclog->ic_state == XLOG_STATE_IOERROR) ) { 746 || iclog->ic_state == XLOG_STATE_IOERROR) ) {
746 747
747 sv_wait(&iclog->ic_forcesema, PMEM, 748 sv_wait(&iclog->ic_force_wait, PMEM,
748 &log->l_icloglock, s); 749 &log->l_icloglock, s);
749 } else { 750 } else {
750 spin_unlock(&log->l_icloglock); 751 spin_unlock(&log->l_icloglock);
@@ -834,7 +835,7 @@ xfs_log_move_tail(xfs_mount_t *mp,
834 break; 835 break;
835 tail_lsn = 0; 836 tail_lsn = 0;
836 free_bytes -= tic->t_unit_res; 837 free_bytes -= tic->t_unit_res;
837 sv_signal(&tic->t_sema); 838 sv_signal(&tic->t_wait);
838 tic = tic->t_next; 839 tic = tic->t_next;
839 } while (tic != log->l_write_headq); 840 } while (tic != log->l_write_headq);
840 } 841 }
@@ -855,7 +856,7 @@ xfs_log_move_tail(xfs_mount_t *mp,
855 break; 856 break;
856 tail_lsn = 0; 857 tail_lsn = 0;
857 free_bytes -= need_bytes; 858 free_bytes -= need_bytes;
858 sv_signal(&tic->t_sema); 859 sv_signal(&tic->t_wait);
859 tic = tic->t_next; 860 tic = tic->t_next;
860 } while (tic != log->l_reserve_headq); 861 } while (tic != log->l_reserve_headq);
861 } 862 }
@@ -1228,7 +1229,7 @@ xlog_alloc_log(xfs_mount_t *mp,
1228 1229
1229 spin_lock_init(&log->l_icloglock); 1230 spin_lock_init(&log->l_icloglock);
1230 spin_lock_init(&log->l_grant_lock); 1231 spin_lock_init(&log->l_grant_lock);
1231 initnsema(&log->l_flushsema, 0, "ic-flush"); 1232 sv_init(&log->l_flush_wait, 0, "flush_wait");
1232 1233
1233 /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ 1234 /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */
1234 ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0); 1235 ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0);
@@ -1281,8 +1282,8 @@ xlog_alloc_log(xfs_mount_t *mp,
1281 1282
1282 ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp)); 1283 ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp));
1283 ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0); 1284 ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0);
1284 sv_init(&iclog->ic_forcesema, SV_DEFAULT, "iclog-force"); 1285 sv_init(&iclog->ic_force_wait, SV_DEFAULT, "iclog-force");
1285 sv_init(&iclog->ic_writesema, SV_DEFAULT, "iclog-write"); 1286 sv_init(&iclog->ic_write_wait, SV_DEFAULT, "iclog-write");
1286 1287
1287 iclogp = &iclog->ic_next; 1288 iclogp = &iclog->ic_next;
1288 } 1289 }
@@ -1561,8 +1562,8 @@ xlog_dealloc_log(xlog_t *log)
1561 1562
1562 iclog = log->l_iclog; 1563 iclog = log->l_iclog;
1563 for (i=0; i<log->l_iclog_bufs; i++) { 1564 for (i=0; i<log->l_iclog_bufs; i++) {
1564 sv_destroy(&iclog->ic_forcesema); 1565 sv_destroy(&iclog->ic_force_wait);
1565 sv_destroy(&iclog->ic_writesema); 1566 sv_destroy(&iclog->ic_write_wait);
1566 xfs_buf_free(iclog->ic_bp); 1567 xfs_buf_free(iclog->ic_bp);
1567#ifdef XFS_LOG_TRACE 1568#ifdef XFS_LOG_TRACE
1568 if (iclog->ic_trace != NULL) { 1569 if (iclog->ic_trace != NULL) {
@@ -1570,10 +1571,9 @@ xlog_dealloc_log(xlog_t *log)
1570 } 1571 }
1571#endif 1572#endif
1572 next_iclog = iclog->ic_next; 1573 next_iclog = iclog->ic_next;
1573 kmem_free(iclog, sizeof(xlog_in_core_t)); 1574 kmem_free(iclog);
1574 iclog = next_iclog; 1575 iclog = next_iclog;
1575 } 1576 }
1576 freesema(&log->l_flushsema);
1577 spinlock_destroy(&log->l_icloglock); 1577 spinlock_destroy(&log->l_icloglock);
1578 spinlock_destroy(&log->l_grant_lock); 1578 spinlock_destroy(&log->l_grant_lock);
1579 1579
@@ -1587,7 +1587,7 @@ xlog_dealloc_log(xlog_t *log)
1587 } 1587 }
1588#endif 1588#endif
1589 log->l_mp->m_log = NULL; 1589 log->l_mp->m_log = NULL;
1590 kmem_free(log, sizeof(xlog_t)); 1590 kmem_free(log);
1591} /* xlog_dealloc_log */ 1591} /* xlog_dealloc_log */
1592 1592
1593/* 1593/*
@@ -1973,7 +1973,7 @@ xlog_write(xfs_mount_t * mp,
1973/* Clean iclogs starting from the head. This ordering must be 1973/* Clean iclogs starting from the head. This ordering must be
1974 * maintained, so an iclog doesn't become ACTIVE beyond one that 1974 * maintained, so an iclog doesn't become ACTIVE beyond one that
1975 * is SYNCING. This is also required to maintain the notion that we use 1975 * is SYNCING. This is also required to maintain the notion that we use
1976 * a counting semaphore to hold off would be writers to the log when every 1976 * a ordered wait queue to hold off would be writers to the log when every
1977 * iclog is trying to sync to disk. 1977 * iclog is trying to sync to disk.
1978 * 1978 *
1979 * State Change: DIRTY -> ACTIVE 1979 * State Change: DIRTY -> ACTIVE
@@ -2097,6 +2097,7 @@ xlog_state_do_callback(
2097 int funcdidcallbacks; /* flag: function did callbacks */ 2097 int funcdidcallbacks; /* flag: function did callbacks */
2098 int repeats; /* for issuing console warnings if 2098 int repeats; /* for issuing console warnings if
2099 * looping too many times */ 2099 * looping too many times */
2100 int wake = 0;
2100 2101
2101 spin_lock(&log->l_icloglock); 2102 spin_lock(&log->l_icloglock);
2102 first_iclog = iclog = log->l_iclog; 2103 first_iclog = iclog = log->l_iclog;
@@ -2236,7 +2237,7 @@ xlog_state_do_callback(
2236 xlog_state_clean_log(log); 2237 xlog_state_clean_log(log);
2237 2238
2238 /* wake up threads waiting in xfs_log_force() */ 2239 /* wake up threads waiting in xfs_log_force() */
2239 sv_broadcast(&iclog->ic_forcesema); 2240 sv_broadcast(&iclog->ic_force_wait);
2240 2241
2241 iclog = iclog->ic_next; 2242 iclog = iclog->ic_next;
2242 } while (first_iclog != iclog); 2243 } while (first_iclog != iclog);
@@ -2278,15 +2279,13 @@ xlog_state_do_callback(
2278 } 2279 }
2279#endif 2280#endif
2280 2281
2281 flushcnt = 0; 2282 if (log->l_iclog->ic_state & (XLOG_STATE_ACTIVE|XLOG_STATE_IOERROR))
2282 if (log->l_iclog->ic_state & (XLOG_STATE_ACTIVE|XLOG_STATE_IOERROR)) { 2283 wake = 1;
2283 flushcnt = log->l_flushcnt;
2284 log->l_flushcnt = 0;
2285 }
2286 spin_unlock(&log->l_icloglock); 2284 spin_unlock(&log->l_icloglock);
2287 while (flushcnt--) 2285
2288 vsema(&log->l_flushsema); 2286 if (wake)
2289} /* xlog_state_do_callback */ 2287 sv_broadcast(&log->l_flush_wait);
2288}
2290 2289
2291 2290
2292/* 2291/*
@@ -2300,8 +2299,7 @@ xlog_state_do_callback(
2300 * the second completion goes through. 2299 * the second completion goes through.
2301 * 2300 *
2302 * Callbacks could take time, so they are done outside the scope of the 2301 * Callbacks could take time, so they are done outside the scope of the
2303 * global state machine log lock. Assume that the calls to cvsema won't 2302 * global state machine log lock.
2304 * take a long time. At least we know it won't sleep.
2305 */ 2303 */
2306STATIC void 2304STATIC void
2307xlog_state_done_syncing( 2305xlog_state_done_syncing(
@@ -2337,7 +2335,7 @@ xlog_state_done_syncing(
2337 * iclog buffer, we wake them all, one will get to do the 2335 * iclog buffer, we wake them all, one will get to do the
2338 * I/O, the others get to wait for the result. 2336 * I/O, the others get to wait for the result.
2339 */ 2337 */
2340 sv_broadcast(&iclog->ic_writesema); 2338 sv_broadcast(&iclog->ic_write_wait);
2341 spin_unlock(&log->l_icloglock); 2339 spin_unlock(&log->l_icloglock);
2342 xlog_state_do_callback(log, aborted, iclog); /* also cleans log */ 2340 xlog_state_do_callback(log, aborted, iclog); /* also cleans log */
2343} /* xlog_state_done_syncing */ 2341} /* xlog_state_done_syncing */
@@ -2345,11 +2343,9 @@ xlog_state_done_syncing(
2345 2343
2346/* 2344/*
2347 * If the head of the in-core log ring is not (ACTIVE or DIRTY), then we must 2345 * If the head of the in-core log ring is not (ACTIVE or DIRTY), then we must
2348 * sleep. The flush semaphore is set to the number of in-core buffers and 2346 * sleep. We wait on the flush queue on the head iclog as that should be
2349 * decremented around disk syncing. Therefore, if all buffers are syncing, 2347 * the first iclog to complete flushing. Hence if all iclogs are syncing,
2350 * this semaphore will cause new writes to sleep until a sync completes. 2348 * we will wait here and all new writes will sleep until a sync completes.
2351 * Otherwise, this code just does p() followed by v(). This approximates
2352 * a sleep/wakeup except we can't race.
2353 * 2349 *
2354 * The in-core logs are used in a circular fashion. They are not used 2350 * The in-core logs are used in a circular fashion. They are not used
2355 * out-of-order even when an iclog past the head is free. 2351 * out-of-order even when an iclog past the head is free.
@@ -2384,16 +2380,15 @@ restart:
2384 } 2380 }
2385 2381
2386 iclog = log->l_iclog; 2382 iclog = log->l_iclog;
2387 if (! (iclog->ic_state == XLOG_STATE_ACTIVE)) { 2383 if (iclog->ic_state != XLOG_STATE_ACTIVE) {
2388 log->l_flushcnt++;
2389 spin_unlock(&log->l_icloglock);
2390 xlog_trace_iclog(iclog, XLOG_TRACE_SLEEP_FLUSH); 2384 xlog_trace_iclog(iclog, XLOG_TRACE_SLEEP_FLUSH);
2391 XFS_STATS_INC(xs_log_noiclogs); 2385 XFS_STATS_INC(xs_log_noiclogs);
2392 /* Ensure that log writes happen */ 2386
2393 psema(&log->l_flushsema, PINOD); 2387 /* Wait for log writes to have flushed */
2388 sv_wait(&log->l_flush_wait, 0, &log->l_icloglock, 0);
2394 goto restart; 2389 goto restart;
2395 } 2390 }
2396 ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE); 2391
2397 head = &iclog->ic_header; 2392 head = &iclog->ic_header;
2398 2393
2399 atomic_inc(&iclog->ic_refcnt); /* prevents sync */ 2394 atomic_inc(&iclog->ic_refcnt); /* prevents sync */
@@ -2507,7 +2502,7 @@ xlog_grant_log_space(xlog_t *log,
2507 goto error_return; 2502 goto error_return;
2508 2503
2509 XFS_STATS_INC(xs_sleep_logspace); 2504 XFS_STATS_INC(xs_sleep_logspace);
2510 sv_wait(&tic->t_sema, PINOD|PLTWAIT, &log->l_grant_lock, s); 2505 sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
2511 /* 2506 /*
2512 * If we got an error, and the filesystem is shutting down, 2507 * If we got an error, and the filesystem is shutting down,
2513 * we'll catch it down below. So just continue... 2508 * we'll catch it down below. So just continue...
@@ -2533,7 +2528,7 @@ redo:
2533 xlog_trace_loggrant(log, tic, 2528 xlog_trace_loggrant(log, tic,
2534 "xlog_grant_log_space: sleep 2"); 2529 "xlog_grant_log_space: sleep 2");
2535 XFS_STATS_INC(xs_sleep_logspace); 2530 XFS_STATS_INC(xs_sleep_logspace);
2536 sv_wait(&tic->t_sema, PINOD|PLTWAIT, &log->l_grant_lock, s); 2531 sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
2537 2532
2538 if (XLOG_FORCED_SHUTDOWN(log)) { 2533 if (XLOG_FORCED_SHUTDOWN(log)) {
2539 spin_lock(&log->l_grant_lock); 2534 spin_lock(&log->l_grant_lock);
@@ -2632,7 +2627,7 @@ xlog_regrant_write_log_space(xlog_t *log,
2632 if (free_bytes < ntic->t_unit_res) 2627 if (free_bytes < ntic->t_unit_res)
2633 break; 2628 break;
2634 free_bytes -= ntic->t_unit_res; 2629 free_bytes -= ntic->t_unit_res;
2635 sv_signal(&ntic->t_sema); 2630 sv_signal(&ntic->t_wait);
2636 ntic = ntic->t_next; 2631 ntic = ntic->t_next;
2637 } while (ntic != log->l_write_headq); 2632 } while (ntic != log->l_write_headq);
2638 2633
@@ -2643,7 +2638,7 @@ xlog_regrant_write_log_space(xlog_t *log,
2643 xlog_trace_loggrant(log, tic, 2638 xlog_trace_loggrant(log, tic,
2644 "xlog_regrant_write_log_space: sleep 1"); 2639 "xlog_regrant_write_log_space: sleep 1");
2645 XFS_STATS_INC(xs_sleep_logspace); 2640 XFS_STATS_INC(xs_sleep_logspace);
2646 sv_wait(&tic->t_sema, PINOD|PLTWAIT, 2641 sv_wait(&tic->t_wait, PINOD|PLTWAIT,
2647 &log->l_grant_lock, s); 2642 &log->l_grant_lock, s);
2648 2643
2649 /* If we're shutting down, this tic is already 2644 /* If we're shutting down, this tic is already
@@ -2672,7 +2667,7 @@ redo:
2672 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) 2667 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
2673 xlog_ins_ticketq(&log->l_write_headq, tic); 2668 xlog_ins_ticketq(&log->l_write_headq, tic);
2674 XFS_STATS_INC(xs_sleep_logspace); 2669 XFS_STATS_INC(xs_sleep_logspace);
2675 sv_wait(&tic->t_sema, PINOD|PLTWAIT, &log->l_grant_lock, s); 2670 sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
2676 2671
2677 /* If we're shutting down, this tic is already off the queue */ 2672 /* If we're shutting down, this tic is already off the queue */
2678 if (XLOG_FORCED_SHUTDOWN(log)) { 2673 if (XLOG_FORCED_SHUTDOWN(log)) {
@@ -2915,7 +2910,7 @@ xlog_state_switch_iclogs(xlog_t *log,
2915 * 2. the current iclog is drity, and the previous iclog is in the 2910 * 2. the current iclog is drity, and the previous iclog is in the
2916 * active or dirty state. 2911 * active or dirty state.
2917 * 2912 *
2918 * We may sleep (call psema) if: 2913 * We may sleep if:
2919 * 2914 *
2920 * 1. the current iclog is not in the active nor dirty state. 2915 * 1. the current iclog is not in the active nor dirty state.
2921 * 2. the current iclog dirty, and the previous iclog is not in the 2916 * 2. the current iclog dirty, and the previous iclog is not in the
@@ -3012,7 +3007,7 @@ maybe_sleep:
3012 return XFS_ERROR(EIO); 3007 return XFS_ERROR(EIO);
3013 } 3008 }
3014 XFS_STATS_INC(xs_log_force_sleep); 3009 XFS_STATS_INC(xs_log_force_sleep);
3015 sv_wait(&iclog->ic_forcesema, PINOD, &log->l_icloglock, s); 3010 sv_wait(&iclog->ic_force_wait, PINOD, &log->l_icloglock, s);
3016 /* 3011 /*
3017 * No need to grab the log lock here since we're 3012 * No need to grab the log lock here since we're
3018 * only deciding whether or not to return EIO 3013 * only deciding whether or not to return EIO
@@ -3095,7 +3090,7 @@ try_again:
3095 XLOG_STATE_SYNCING))) { 3090 XLOG_STATE_SYNCING))) {
3096 ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR)); 3091 ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR));
3097 XFS_STATS_INC(xs_log_force_sleep); 3092 XFS_STATS_INC(xs_log_force_sleep);
3098 sv_wait(&iclog->ic_prev->ic_writesema, PSWP, 3093 sv_wait(&iclog->ic_prev->ic_write_wait, PSWP,
3099 &log->l_icloglock, s); 3094 &log->l_icloglock, s);
3100 *log_flushed = 1; 3095 *log_flushed = 1;
3101 already_slept = 1; 3096 already_slept = 1;
@@ -3115,7 +3110,7 @@ try_again:
3115 !(iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))) { 3110 !(iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))) {
3116 3111
3117 /* 3112 /*
3118 * Don't wait on the forcesema if we know that we've 3113 * Don't wait on completion if we know that we've
3119 * gotten a log write error. 3114 * gotten a log write error.
3120 */ 3115 */
3121 if (iclog->ic_state & XLOG_STATE_IOERROR) { 3116 if (iclog->ic_state & XLOG_STATE_IOERROR) {
@@ -3123,7 +3118,7 @@ try_again:
3123 return XFS_ERROR(EIO); 3118 return XFS_ERROR(EIO);
3124 } 3119 }
3125 XFS_STATS_INC(xs_log_force_sleep); 3120 XFS_STATS_INC(xs_log_force_sleep);
3126 sv_wait(&iclog->ic_forcesema, PSWP, &log->l_icloglock, s); 3121 sv_wait(&iclog->ic_force_wait, PSWP, &log->l_icloglock, s);
3127 /* 3122 /*
3128 * No need to grab the log lock here since we're 3123 * No need to grab the log lock here since we're
3129 * only deciding whether or not to return EIO 3124 * only deciding whether or not to return EIO
@@ -3179,7 +3174,7 @@ STATIC void
3179xlog_ticket_put(xlog_t *log, 3174xlog_ticket_put(xlog_t *log,
3180 xlog_ticket_t *ticket) 3175 xlog_ticket_t *ticket)
3181{ 3176{
3182 sv_destroy(&ticket->t_sema); 3177 sv_destroy(&ticket->t_wait);
3183 kmem_zone_free(xfs_log_ticket_zone, ticket); 3178 kmem_zone_free(xfs_log_ticket_zone, ticket);
3184} /* xlog_ticket_put */ 3179} /* xlog_ticket_put */
3185 3180
@@ -3269,7 +3264,7 @@ xlog_ticket_get(xlog_t *log,
3269 tic->t_trans_type = 0; 3264 tic->t_trans_type = 0;
3270 if (xflags & XFS_LOG_PERM_RESERV) 3265 if (xflags & XFS_LOG_PERM_RESERV)
3271 tic->t_flags |= XLOG_TIC_PERM_RESERV; 3266 tic->t_flags |= XLOG_TIC_PERM_RESERV;
3272 sv_init(&(tic->t_sema), SV_DEFAULT, "logtick"); 3267 sv_init(&(tic->t_wait), SV_DEFAULT, "logtick");
3273 3268
3274 xlog_tic_reset_res(tic); 3269 xlog_tic_reset_res(tic);
3275 3270
@@ -3556,14 +3551,14 @@ xfs_log_force_umount(
3556 */ 3551 */
3557 if ((tic = log->l_reserve_headq)) { 3552 if ((tic = log->l_reserve_headq)) {
3558 do { 3553 do {
3559 sv_signal(&tic->t_sema); 3554 sv_signal(&tic->t_wait);
3560 tic = tic->t_next; 3555 tic = tic->t_next;
3561 } while (tic != log->l_reserve_headq); 3556 } while (tic != log->l_reserve_headq);
3562 } 3557 }
3563 3558
3564 if ((tic = log->l_write_headq)) { 3559 if ((tic = log->l_write_headq)) {
3565 do { 3560 do {
3566 sv_signal(&tic->t_sema); 3561 sv_signal(&tic->t_wait);
3567 tic = tic->t_next; 3562 tic = tic->t_next;
3568 } while (tic != log->l_write_headq); 3563 } while (tic != log->l_write_headq);
3569 } 3564 }
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index d1d678ecb63e..d47b91f10822 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -149,7 +149,7 @@ int xfs_log_mount(struct xfs_mount *mp,
149 struct xfs_buftarg *log_target, 149 struct xfs_buftarg *log_target,
150 xfs_daddr_t start_block, 150 xfs_daddr_t start_block,
151 int num_bblocks); 151 int num_bblocks);
152int xfs_log_mount_finish(struct xfs_mount *mp, int); 152int xfs_log_mount_finish(struct xfs_mount *mp);
153void xfs_log_move_tail(struct xfs_mount *mp, 153void xfs_log_move_tail(struct xfs_mount *mp,
154 xfs_lsn_t tail_lsn); 154 xfs_lsn_t tail_lsn);
155int xfs_log_notify(struct xfs_mount *mp, 155int xfs_log_notify(struct xfs_mount *mp,
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 8952a392b5f3..c8a5b22ee3e3 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -241,7 +241,7 @@ typedef struct xlog_res {
241} xlog_res_t; 241} xlog_res_t;
242 242
243typedef struct xlog_ticket { 243typedef struct xlog_ticket {
244 sv_t t_sema; /* sleep on this semaphore : 20 */ 244 sv_t t_wait; /* ticket wait queue : 20 */
245 struct xlog_ticket *t_next; /* :4|8 */ 245 struct xlog_ticket *t_next; /* :4|8 */
246 struct xlog_ticket *t_prev; /* :4|8 */ 246 struct xlog_ticket *t_prev; /* :4|8 */
247 xlog_tid_t t_tid; /* transaction identifier : 4 */ 247 xlog_tid_t t_tid; /* transaction identifier : 4 */
@@ -314,7 +314,7 @@ typedef struct xlog_rec_ext_header {
314 * xlog_rec_header_t into the reserved space. 314 * xlog_rec_header_t into the reserved space.
315 * - ic_data follows, so a write to disk can start at the beginning of 315 * - ic_data follows, so a write to disk can start at the beginning of
316 * the iclog. 316 * the iclog.
317 * - ic_forcesema is used to implement synchronous forcing of the iclog to disk. 317 * - ic_forcewait is used to implement synchronous forcing of the iclog to disk.
318 * - ic_next is the pointer to the next iclog in the ring. 318 * - ic_next is the pointer to the next iclog in the ring.
319 * - ic_bp is a pointer to the buffer used to write this incore log to disk. 319 * - ic_bp is a pointer to the buffer used to write this incore log to disk.
320 * - ic_log is a pointer back to the global log structure. 320 * - ic_log is a pointer back to the global log structure.
@@ -339,8 +339,8 @@ typedef struct xlog_rec_ext_header {
339 * and move everything else out to subsequent cachelines. 339 * and move everything else out to subsequent cachelines.
340 */ 340 */
341typedef struct xlog_iclog_fields { 341typedef struct xlog_iclog_fields {
342 sv_t ic_forcesema; 342 sv_t ic_force_wait;
343 sv_t ic_writesema; 343 sv_t ic_write_wait;
344 struct xlog_in_core *ic_next; 344 struct xlog_in_core *ic_next;
345 struct xlog_in_core *ic_prev; 345 struct xlog_in_core *ic_prev;
346 struct xfs_buf *ic_bp; 346 struct xfs_buf *ic_bp;
@@ -377,8 +377,8 @@ typedef struct xlog_in_core {
377/* 377/*
378 * Defines to save our code from this glop. 378 * Defines to save our code from this glop.
379 */ 379 */
380#define ic_forcesema hic_fields.ic_forcesema 380#define ic_force_wait hic_fields.ic_force_wait
381#define ic_writesema hic_fields.ic_writesema 381#define ic_write_wait hic_fields.ic_write_wait
382#define ic_next hic_fields.ic_next 382#define ic_next hic_fields.ic_next
383#define ic_prev hic_fields.ic_prev 383#define ic_prev hic_fields.ic_prev
384#define ic_bp hic_fields.ic_bp 384#define ic_bp hic_fields.ic_bp
@@ -423,10 +423,8 @@ typedef struct log {
423 int l_logBBsize; /* size of log in BB chunks */ 423 int l_logBBsize; /* size of log in BB chunks */
424 424
425 /* The following block of fields are changed while holding icloglock */ 425 /* The following block of fields are changed while holding icloglock */
426 sema_t l_flushsema ____cacheline_aligned_in_smp; 426 sv_t l_flush_wait ____cacheline_aligned_in_smp;
427 /* iclog flushing semaphore */ 427 /* waiting for iclog flush */
428 int l_flushcnt; /* # of procs waiting on this
429 * sema */
430 int l_covered_state;/* state of "covering disk 428 int l_covered_state;/* state of "covering disk
431 * log entries" */ 429 * log entries" */
432 xlog_in_core_t *l_iclog; /* head log queue */ 430 xlog_in_core_t *l_iclog; /* head log queue */
@@ -470,7 +468,7 @@ extern int xlog_find_tail(xlog_t *log,
470 xfs_daddr_t *head_blk, 468 xfs_daddr_t *head_blk,
471 xfs_daddr_t *tail_blk); 469 xfs_daddr_t *tail_blk);
472extern int xlog_recover(xlog_t *log); 470extern int xlog_recover(xlog_t *log);
473extern int xlog_recover_finish(xlog_t *log, int mfsi_flags); 471extern int xlog_recover_finish(xlog_t *log);
474extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int); 472extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int);
475extern void xlog_recover_process_iunlinks(xlog_t *log); 473extern void xlog_recover_process_iunlinks(xlog_t *log);
476 474
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index e65ab4af0955..82d46ce69d5f 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1715,8 +1715,7 @@ xlog_check_buffer_cancelled(
1715 } else { 1715 } else {
1716 prevp->bc_next = bcp->bc_next; 1716 prevp->bc_next = bcp->bc_next;
1717 } 1717 }
1718 kmem_free(bcp, 1718 kmem_free(bcp);
1719 sizeof(xfs_buf_cancel_t));
1720 } 1719 }
1721 } 1720 }
1722 return 1; 1721 return 1;
@@ -2519,7 +2518,7 @@ write_inode_buffer:
2519 2518
2520error: 2519error:
2521 if (need_free) 2520 if (need_free)
2522 kmem_free(in_f, sizeof(*in_f)); 2521 kmem_free(in_f);
2523 return XFS_ERROR(error); 2522 return XFS_ERROR(error);
2524} 2523}
2525 2524
@@ -2830,16 +2829,14 @@ xlog_recover_free_trans(
2830 item = item->ri_next; 2829 item = item->ri_next;
2831 /* Free the regions in the item. */ 2830 /* Free the regions in the item. */
2832 for (i = 0; i < free_item->ri_cnt; i++) { 2831 for (i = 0; i < free_item->ri_cnt; i++) {
2833 kmem_free(free_item->ri_buf[i].i_addr, 2832 kmem_free(free_item->ri_buf[i].i_addr);
2834 free_item->ri_buf[i].i_len);
2835 } 2833 }
2836 /* Free the item itself */ 2834 /* Free the item itself */
2837 kmem_free(free_item->ri_buf, 2835 kmem_free(free_item->ri_buf);
2838 (free_item->ri_total * sizeof(xfs_log_iovec_t))); 2836 kmem_free(free_item);
2839 kmem_free(free_item, sizeof(xlog_recover_item_t));
2840 } while (first_item != item); 2837 } while (first_item != item);
2841 /* Free the transaction recover structure */ 2838 /* Free the transaction recover structure */
2842 kmem_free(trans, sizeof(xlog_recover_t)); 2839 kmem_free(trans);
2843} 2840}
2844 2841
2845STATIC int 2842STATIC int
@@ -3786,8 +3783,7 @@ xlog_do_log_recovery(
3786 error = xlog_do_recovery_pass(log, head_blk, tail_blk, 3783 error = xlog_do_recovery_pass(log, head_blk, tail_blk,
3787 XLOG_RECOVER_PASS1); 3784 XLOG_RECOVER_PASS1);
3788 if (error != 0) { 3785 if (error != 0) {
3789 kmem_free(log->l_buf_cancel_table, 3786 kmem_free(log->l_buf_cancel_table);
3790 XLOG_BC_TABLE_SIZE * sizeof(xfs_buf_cancel_t*));
3791 log->l_buf_cancel_table = NULL; 3787 log->l_buf_cancel_table = NULL;
3792 return error; 3788 return error;
3793 } 3789 }
@@ -3806,8 +3802,7 @@ xlog_do_log_recovery(
3806 } 3802 }
3807#endif /* DEBUG */ 3803#endif /* DEBUG */
3808 3804
3809 kmem_free(log->l_buf_cancel_table, 3805 kmem_free(log->l_buf_cancel_table);
3810 XLOG_BC_TABLE_SIZE * sizeof(xfs_buf_cancel_t*));
3811 log->l_buf_cancel_table = NULL; 3806 log->l_buf_cancel_table = NULL;
3812 3807
3813 return error; 3808 return error;
@@ -3945,8 +3940,7 @@ xlog_recover(
3945 */ 3940 */
3946int 3941int
3947xlog_recover_finish( 3942xlog_recover_finish(
3948 xlog_t *log, 3943 xlog_t *log)
3949 int mfsi_flags)
3950{ 3944{
3951 /* 3945 /*
3952 * Now we're ready to do the transactions needed for the 3946 * Now we're ready to do the transactions needed for the
@@ -3974,9 +3968,7 @@ xlog_recover_finish(
3974 xfs_log_force(log->l_mp, (xfs_lsn_t)0, 3968 xfs_log_force(log->l_mp, (xfs_lsn_t)0,
3975 (XFS_LOG_FORCE | XFS_LOG_SYNC)); 3969 (XFS_LOG_FORCE | XFS_LOG_SYNC));
3976 3970
3977 if ( (mfsi_flags & XFS_MFSI_NOUNLINK) == 0 ) { 3971 xlog_recover_process_iunlinks(log);
3978 xlog_recover_process_iunlinks(log);
3979 }
3980 3972
3981 xlog_recover_check_summary(log); 3973 xlog_recover_check_summary(log);
3982 3974
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index da3988453b71..a4503f5e9497 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -47,12 +47,10 @@
47 47
48STATIC int xfs_mount_log_sb(xfs_mount_t *, __int64_t); 48STATIC int xfs_mount_log_sb(xfs_mount_t *, __int64_t);
49STATIC int xfs_uuid_mount(xfs_mount_t *); 49STATIC int xfs_uuid_mount(xfs_mount_t *);
50STATIC void xfs_uuid_unmount(xfs_mount_t *mp);
51STATIC void xfs_unmountfs_wait(xfs_mount_t *); 50STATIC void xfs_unmountfs_wait(xfs_mount_t *);
52 51
53 52
54#ifdef HAVE_PERCPU_SB 53#ifdef HAVE_PERCPU_SB
55STATIC void xfs_icsb_destroy_counters(xfs_mount_t *);
56STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t, 54STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t,
57 int); 55 int);
58STATIC void xfs_icsb_balance_counter_locked(xfs_mount_t *, xfs_sb_field_t, 56STATIC void xfs_icsb_balance_counter_locked(xfs_mount_t *, xfs_sb_field_t,
@@ -63,7 +61,6 @@ STATIC void xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t);
63 61
64#else 62#else
65 63
66#define xfs_icsb_destroy_counters(mp) do { } while (0)
67#define xfs_icsb_balance_counter(mp, a, b) do { } while (0) 64#define xfs_icsb_balance_counter(mp, a, b) do { } while (0)
68#define xfs_icsb_balance_counter_locked(mp, a, b) do { } while (0) 65#define xfs_icsb_balance_counter_locked(mp, a, b) do { } while (0)
69#define xfs_icsb_modify_counters(mp, a, b, c) do { } while (0) 66#define xfs_icsb_modify_counters(mp, a, b, c) do { } while (0)
@@ -126,34 +123,12 @@ static const struct {
126}; 123};
127 124
128/* 125/*
129 * Return a pointer to an initialized xfs_mount structure.
130 */
131xfs_mount_t *
132xfs_mount_init(void)
133{
134 xfs_mount_t *mp;
135
136 mp = kmem_zalloc(sizeof(xfs_mount_t), KM_SLEEP);
137
138 if (xfs_icsb_init_counters(mp)) {
139 mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB;
140 }
141
142 spin_lock_init(&mp->m_sb_lock);
143 mutex_init(&mp->m_ilock);
144 mutex_init(&mp->m_growlock);
145 atomic_set(&mp->m_active_trans, 0);
146
147 return mp;
148}
149
150/*
151 * Free up the resources associated with a mount structure. Assume that 126 * Free up the resources associated with a mount structure. Assume that
152 * the structure was initially zeroed, so we can tell which fields got 127 * the structure was initially zeroed, so we can tell which fields got
153 * initialized. 128 * initialized.
154 */ 129 */
155void 130STATIC void
156xfs_mount_free( 131xfs_free_perag(
157 xfs_mount_t *mp) 132 xfs_mount_t *mp)
158{ 133{
159 if (mp->m_perag) { 134 if (mp->m_perag) {
@@ -161,28 +136,9 @@ xfs_mount_free(
161 136
162 for (agno = 0; agno < mp->m_maxagi; agno++) 137 for (agno = 0; agno < mp->m_maxagi; agno++)
163 if (mp->m_perag[agno].pagb_list) 138 if (mp->m_perag[agno].pagb_list)
164 kmem_free(mp->m_perag[agno].pagb_list, 139 kmem_free(mp->m_perag[agno].pagb_list);
165 sizeof(xfs_perag_busy_t) * 140 kmem_free(mp->m_perag);
166 XFS_PAGB_NUM_SLOTS);
167 kmem_free(mp->m_perag,
168 sizeof(xfs_perag_t) * mp->m_sb.sb_agcount);
169 } 141 }
170
171 spinlock_destroy(&mp->m_ail_lock);
172 spinlock_destroy(&mp->m_sb_lock);
173 mutex_destroy(&mp->m_ilock);
174 mutex_destroy(&mp->m_growlock);
175 if (mp->m_quotainfo)
176 XFS_QM_DONE(mp);
177
178 if (mp->m_fsname != NULL)
179 kmem_free(mp->m_fsname, mp->m_fsname_len);
180 if (mp->m_rtname != NULL)
181 kmem_free(mp->m_rtname, strlen(mp->m_rtname) + 1);
182 if (mp->m_logname != NULL)
183 kmem_free(mp->m_logname, strlen(mp->m_logname) + 1);
184
185 xfs_icsb_destroy_counters(mp);
186} 142}
187 143
188/* 144/*
@@ -288,6 +244,19 @@ xfs_mount_validate_sb(
288 return XFS_ERROR(EFSCORRUPTED); 244 return XFS_ERROR(EFSCORRUPTED);
289 } 245 }
290 246
247 /*
248 * Until this is fixed only page-sized or smaller data blocks work.
249 */
250 if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) {
251 xfs_fs_mount_cmn_err(flags,
252 "file system with blocksize %d bytes",
253 sbp->sb_blocksize);
254 xfs_fs_mount_cmn_err(flags,
255 "only pagesize (%ld) or less will currently work.",
256 PAGE_SIZE);
257 return XFS_ERROR(ENOSYS);
258 }
259
291 if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) || 260 if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) ||
292 xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) { 261 xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) {
293 xfs_fs_mount_cmn_err(flags, 262 xfs_fs_mount_cmn_err(flags,
@@ -309,19 +278,6 @@ xfs_mount_validate_sb(
309 return XFS_ERROR(ENOSYS); 278 return XFS_ERROR(ENOSYS);
310 } 279 }
311 280
312 /*
313 * Until this is fixed only page-sized or smaller data blocks work.
314 */
315 if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) {
316 xfs_fs_mount_cmn_err(flags,
317 "file system with blocksize %d bytes",
318 sbp->sb_blocksize);
319 xfs_fs_mount_cmn_err(flags,
320 "only pagesize (%ld) or less will currently work.",
321 PAGE_SIZE);
322 return XFS_ERROR(ENOSYS);
323 }
324
325 return 0; 281 return 0;
326} 282}
327 283
@@ -734,11 +690,11 @@ xfs_initialize_perag_data(xfs_mount_t *mp, xfs_agnumber_t agcount)
734 * Update alignment values based on mount options and sb values 690 * Update alignment values based on mount options and sb values
735 */ 691 */
736STATIC int 692STATIC int
737xfs_update_alignment(xfs_mount_t *mp, int mfsi_flags, __uint64_t *update_flags) 693xfs_update_alignment(xfs_mount_t *mp, __uint64_t *update_flags)
738{ 694{
739 xfs_sb_t *sbp = &(mp->m_sb); 695 xfs_sb_t *sbp = &(mp->m_sb);
740 696
741 if (mp->m_dalign && !(mfsi_flags & XFS_MFSI_SECOND)) { 697 if (mp->m_dalign) {
742 /* 698 /*
743 * If stripe unit and stripe width are not multiples 699 * If stripe unit and stripe width are not multiples
744 * of the fs blocksize turn off alignment. 700 * of the fs blocksize turn off alignment.
@@ -894,7 +850,7 @@ xfs_set_inoalignment(xfs_mount_t *mp)
894 * Check that the data (and log if separate) are an ok size. 850 * Check that the data (and log if separate) are an ok size.
895 */ 851 */
896STATIC int 852STATIC int
897xfs_check_sizes(xfs_mount_t *mp, int mfsi_flags) 853xfs_check_sizes(xfs_mount_t *mp)
898{ 854{
899 xfs_buf_t *bp; 855 xfs_buf_t *bp;
900 xfs_daddr_t d; 856 xfs_daddr_t d;
@@ -917,8 +873,7 @@ xfs_check_sizes(xfs_mount_t *mp, int mfsi_flags)
917 return error; 873 return error;
918 } 874 }
919 875
920 if (((mfsi_flags & XFS_MFSI_CLIENT) == 0) && 876 if (mp->m_logdev_targp != mp->m_ddev_targp) {
921 mp->m_logdev_targp != mp->m_ddev_targp) {
922 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); 877 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
923 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) { 878 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) {
924 cmn_err(CE_WARN, "XFS: size check 3 failed"); 879 cmn_err(CE_WARN, "XFS: size check 3 failed");
@@ -953,15 +908,13 @@ xfs_check_sizes(xfs_mount_t *mp, int mfsi_flags)
953 */ 908 */
954int 909int
955xfs_mountfs( 910xfs_mountfs(
956 xfs_mount_t *mp, 911 xfs_mount_t *mp)
957 int mfsi_flags)
958{ 912{
959 xfs_sb_t *sbp = &(mp->m_sb); 913 xfs_sb_t *sbp = &(mp->m_sb);
960 xfs_inode_t *rip; 914 xfs_inode_t *rip;
961 __uint64_t resblks; 915 __uint64_t resblks;
962 __int64_t update_flags = 0LL; 916 __int64_t update_flags = 0LL;
963 uint quotamount, quotaflags; 917 uint quotamount, quotaflags;
964 int agno;
965 int uuid_mounted = 0; 918 int uuid_mounted = 0;
966 int error = 0; 919 int error = 0;
967 920
@@ -994,9 +947,19 @@ xfs_mountfs(
994 * Re-check for ATTR2 in case it was found in bad_features2 947 * Re-check for ATTR2 in case it was found in bad_features2
995 * slot. 948 * slot.
996 */ 949 */
997 if (xfs_sb_version_hasattr2(&mp->m_sb)) 950 if (xfs_sb_version_hasattr2(&mp->m_sb) &&
951 !(mp->m_flags & XFS_MOUNT_NOATTR2))
998 mp->m_flags |= XFS_MOUNT_ATTR2; 952 mp->m_flags |= XFS_MOUNT_ATTR2;
953 }
954
955 if (xfs_sb_version_hasattr2(&mp->m_sb) &&
956 (mp->m_flags & XFS_MOUNT_NOATTR2)) {
957 xfs_sb_version_removeattr2(&mp->m_sb);
958 update_flags |= XFS_SB_FEATURES2;
999 959
960 /* update sb_versionnum for the clearing of the morebits */
961 if (!sbp->sb_features2)
962 update_flags |= XFS_SB_VERSIONNUM;
1000 } 963 }
1001 964
1002 /* 965 /*
@@ -1005,7 +968,7 @@ xfs_mountfs(
1005 * allocator alignment is within an ag, therefore ag has 968 * allocator alignment is within an ag, therefore ag has
1006 * to be aligned at stripe boundary. 969 * to be aligned at stripe boundary.
1007 */ 970 */
1008 error = xfs_update_alignment(mp, mfsi_flags, &update_flags); 971 error = xfs_update_alignment(mp, &update_flags);
1009 if (error) 972 if (error)
1010 goto error1; 973 goto error1;
1011 974
@@ -1024,8 +987,7 @@ xfs_mountfs(
1024 * since a single partition filesystem is identical to a single 987 * since a single partition filesystem is identical to a single
1025 * partition volume/filesystem. 988 * partition volume/filesystem.
1026 */ 989 */
1027 if ((mfsi_flags & XFS_MFSI_SECOND) == 0 && 990 if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0) {
1028 (mp->m_flags & XFS_MOUNT_NOUUID) == 0) {
1029 if (xfs_uuid_mount(mp)) { 991 if (xfs_uuid_mount(mp)) {
1030 error = XFS_ERROR(EINVAL); 992 error = XFS_ERROR(EINVAL);
1031 goto error1; 993 goto error1;
@@ -1053,7 +1015,7 @@ xfs_mountfs(
1053 /* 1015 /*
1054 * Check that the data (and log if separate) are an ok size. 1016 * Check that the data (and log if separate) are an ok size.
1055 */ 1017 */
1056 error = xfs_check_sizes(mp, mfsi_flags); 1018 error = xfs_check_sizes(mp);
1057 if (error) 1019 if (error)
1058 goto error1; 1020 goto error1;
1059 1021
@@ -1067,13 +1029,6 @@ xfs_mountfs(
1067 } 1029 }
1068 1030
1069 /* 1031 /*
1070 * For client case we are done now
1071 */
1072 if (mfsi_flags & XFS_MFSI_CLIENT) {
1073 return 0;
1074 }
1075
1076 /*
1077 * Copies the low order bits of the timestamp and the randomly 1032 * Copies the low order bits of the timestamp and the randomly
1078 * set "sequence" number out of a UUID. 1033 * set "sequence" number out of a UUID.
1079 */ 1034 */
@@ -1097,8 +1052,10 @@ xfs_mountfs(
1097 * Allocate and initialize the per-ag data. 1052 * Allocate and initialize the per-ag data.
1098 */ 1053 */
1099 init_rwsem(&mp->m_peraglock); 1054 init_rwsem(&mp->m_peraglock);
1100 mp->m_perag = 1055 mp->m_perag = kmem_zalloc(sbp->sb_agcount * sizeof(xfs_perag_t),
1101 kmem_zalloc(sbp->sb_agcount * sizeof(xfs_perag_t), KM_SLEEP); 1056 KM_MAYFAIL);
1057 if (!mp->m_perag)
1058 goto error1;
1102 1059
1103 mp->m_maxagi = xfs_initialize_perag(mp, sbp->sb_agcount); 1060 mp->m_maxagi = xfs_initialize_perag(mp, sbp->sb_agcount);
1104 1061
@@ -1210,7 +1167,7 @@ xfs_mountfs(
1210 * delayed until after the root and real-time bitmap inodes 1167 * delayed until after the root and real-time bitmap inodes
1211 * were consistently read in. 1168 * were consistently read in.
1212 */ 1169 */
1213 error = xfs_log_mount_finish(mp, mfsi_flags); 1170 error = xfs_log_mount_finish(mp);
1214 if (error) { 1171 if (error) {
1215 cmn_err(CE_WARN, "XFS: log mount finish failed"); 1172 cmn_err(CE_WARN, "XFS: log mount finish failed");
1216 goto error4; 1173 goto error4;
@@ -1219,7 +1176,7 @@ xfs_mountfs(
1219 /* 1176 /*
1220 * Complete the quota initialisation, post-log-replay component. 1177 * Complete the quota initialisation, post-log-replay component.
1221 */ 1178 */
1222 error = XFS_QM_MOUNT(mp, quotamount, quotaflags, mfsi_flags); 1179 error = XFS_QM_MOUNT(mp, quotamount, quotaflags);
1223 if (error) 1180 if (error)
1224 goto error4; 1181 goto error4;
1225 1182
@@ -1253,31 +1210,25 @@ xfs_mountfs(
1253 error3: 1210 error3:
1254 xfs_log_unmount_dealloc(mp); 1211 xfs_log_unmount_dealloc(mp);
1255 error2: 1212 error2:
1256 for (agno = 0; agno < sbp->sb_agcount; agno++) 1213 xfs_free_perag(mp);
1257 if (mp->m_perag[agno].pagb_list)
1258 kmem_free(mp->m_perag[agno].pagb_list,
1259 sizeof(xfs_perag_busy_t) * XFS_PAGB_NUM_SLOTS);
1260 kmem_free(mp->m_perag, sbp->sb_agcount * sizeof(xfs_perag_t));
1261 mp->m_perag = NULL;
1262 /* FALLTHROUGH */
1263 error1: 1214 error1:
1264 if (uuid_mounted) 1215 if (uuid_mounted)
1265 xfs_uuid_unmount(mp); 1216 uuid_table_remove(&mp->m_sb.sb_uuid);
1266 xfs_freesb(mp);
1267 return error; 1217 return error;
1268} 1218}
1269 1219
1270/* 1220/*
1271 * xfs_unmountfs
1272 *
1273 * This flushes out the inodes,dquots and the superblock, unmounts the 1221 * This flushes out the inodes,dquots and the superblock, unmounts the
1274 * log and makes sure that incore structures are freed. 1222 * log and makes sure that incore structures are freed.
1275 */ 1223 */
1276int 1224void
1277xfs_unmountfs(xfs_mount_t *mp, struct cred *cr) 1225xfs_unmountfs(
1226 struct xfs_mount *mp)
1278{ 1227{
1279 __uint64_t resblks; 1228 __uint64_t resblks;
1280 int error = 0; 1229 int error;
1230
1231 IRELE(mp->m_rootip);
1281 1232
1282 /* 1233 /*
1283 * We can potentially deadlock here if we have an inode cluster 1234 * We can potentially deadlock here if we have an inode cluster
@@ -1334,32 +1285,20 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
1334 xfs_unmountfs_wait(mp); /* wait for async bufs */ 1285 xfs_unmountfs_wait(mp); /* wait for async bufs */
1335 xfs_log_unmount(mp); /* Done! No more fs ops. */ 1286 xfs_log_unmount(mp); /* Done! No more fs ops. */
1336 1287
1337 xfs_freesb(mp);
1338
1339 /* 1288 /*
1340 * All inodes from this mount point should be freed. 1289 * All inodes from this mount point should be freed.
1341 */ 1290 */
1342 ASSERT(mp->m_inodes == NULL); 1291 ASSERT(mp->m_inodes == NULL);
1343 1292
1344 xfs_unmountfs_close(mp, cr);
1345 if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0) 1293 if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0)
1346 xfs_uuid_unmount(mp); 1294 uuid_table_remove(&mp->m_sb.sb_uuid);
1347 1295
1348#if defined(DEBUG) || defined(INDUCE_IO_ERROR) 1296#if defined(DEBUG)
1349 xfs_errortag_clearall(mp, 0); 1297 xfs_errortag_clearall(mp, 0);
1350#endif 1298#endif
1351 xfs_mount_free(mp); 1299 xfs_free_perag(mp);
1352 return 0; 1300 if (mp->m_quotainfo)
1353} 1301 XFS_QM_DONE(mp);
1354
1355void
1356xfs_unmountfs_close(xfs_mount_t *mp, struct cred *cr)
1357{
1358 if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp)
1359 xfs_free_buftarg(mp->m_logdev_targp, 1);
1360 if (mp->m_rtdev_targp)
1361 xfs_free_buftarg(mp->m_rtdev_targp, 1);
1362 xfs_free_buftarg(mp->m_ddev_targp, 0);
1363} 1302}
1364 1303
1365STATIC void 1304STATIC void
@@ -1905,16 +1844,6 @@ xfs_uuid_mount(
1905} 1844}
1906 1845
1907/* 1846/*
1908 * Remove filesystem from the UUID table.
1909 */
1910STATIC void
1911xfs_uuid_unmount(
1912 xfs_mount_t *mp)
1913{
1914 uuid_table_remove(&mp->m_sb.sb_uuid);
1915}
1916
1917/*
1918 * Used to log changes to the superblock unit and width fields which could 1847 * Used to log changes to the superblock unit and width fields which could
1919 * be altered by the mount options, as well as any potential sb_features2 1848 * be altered by the mount options, as well as any potential sb_features2
1920 * fixup. Only the first superblock is updated. 1849 * fixup. Only the first superblock is updated.
@@ -1928,7 +1857,8 @@ xfs_mount_log_sb(
1928 int error; 1857 int error;
1929 1858
1930 ASSERT(fields & (XFS_SB_UNIT | XFS_SB_WIDTH | XFS_SB_UUID | 1859 ASSERT(fields & (XFS_SB_UNIT | XFS_SB_WIDTH | XFS_SB_UUID |
1931 XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2)); 1860 XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2 |
1861 XFS_SB_VERSIONNUM));
1932 1862
1933 tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT); 1863 tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT);
1934 error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, 1864 error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
@@ -2109,7 +2039,7 @@ xfs_icsb_reinit_counters(
2109 xfs_icsb_unlock(mp); 2039 xfs_icsb_unlock(mp);
2110} 2040}
2111 2041
2112STATIC void 2042void
2113xfs_icsb_destroy_counters( 2043xfs_icsb_destroy_counters(
2114 xfs_mount_t *mp) 2044 xfs_mount_t *mp)
2115{ 2045{
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 63e0693a358a..f3c1024b1241 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -61,6 +61,7 @@ struct xfs_bmap_free;
61struct xfs_extdelta; 61struct xfs_extdelta;
62struct xfs_swapext; 62struct xfs_swapext;
63struct xfs_mru_cache; 63struct xfs_mru_cache;
64struct xfs_nameops;
64 65
65/* 66/*
66 * Prototypes and functions for the Data Migration subsystem. 67 * Prototypes and functions for the Data Migration subsystem.
@@ -113,7 +114,7 @@ struct xfs_dqtrxops;
113struct xfs_quotainfo; 114struct xfs_quotainfo;
114 115
115typedef int (*xfs_qminit_t)(struct xfs_mount *, uint *, uint *); 116typedef int (*xfs_qminit_t)(struct xfs_mount *, uint *, uint *);
116typedef int (*xfs_qmmount_t)(struct xfs_mount *, uint, uint, int); 117typedef int (*xfs_qmmount_t)(struct xfs_mount *, uint, uint);
117typedef int (*xfs_qmunmount_t)(struct xfs_mount *); 118typedef int (*xfs_qmunmount_t)(struct xfs_mount *);
118typedef void (*xfs_qmdone_t)(struct xfs_mount *); 119typedef void (*xfs_qmdone_t)(struct xfs_mount *);
119typedef void (*xfs_dqrele_t)(struct xfs_dquot *); 120typedef void (*xfs_dqrele_t)(struct xfs_dquot *);
@@ -157,8 +158,8 @@ typedef struct xfs_qmops {
157 158
158#define XFS_QM_INIT(mp, mnt, fl) \ 159#define XFS_QM_INIT(mp, mnt, fl) \
159 (*(mp)->m_qm_ops->xfs_qminit)(mp, mnt, fl) 160 (*(mp)->m_qm_ops->xfs_qminit)(mp, mnt, fl)
160#define XFS_QM_MOUNT(mp, mnt, fl, mfsi_flags) \ 161#define XFS_QM_MOUNT(mp, mnt, fl) \
161 (*(mp)->m_qm_ops->xfs_qmmount)(mp, mnt, fl, mfsi_flags) 162 (*(mp)->m_qm_ops->xfs_qmmount)(mp, mnt, fl)
162#define XFS_QM_UNMOUNT(mp) \ 163#define XFS_QM_UNMOUNT(mp) \
163 (*(mp)->m_qm_ops->xfs_qmunmount)(mp) 164 (*(mp)->m_qm_ops->xfs_qmunmount)(mp)
164#define XFS_QM_DONE(mp) \ 165#define XFS_QM_DONE(mp) \
@@ -210,12 +211,14 @@ typedef struct xfs_icsb_cnts {
210 211
211extern int xfs_icsb_init_counters(struct xfs_mount *); 212extern int xfs_icsb_init_counters(struct xfs_mount *);
212extern void xfs_icsb_reinit_counters(struct xfs_mount *); 213extern void xfs_icsb_reinit_counters(struct xfs_mount *);
214extern void xfs_icsb_destroy_counters(struct xfs_mount *);
213extern void xfs_icsb_sync_counters(struct xfs_mount *, int); 215extern void xfs_icsb_sync_counters(struct xfs_mount *, int);
214extern void xfs_icsb_sync_counters_locked(struct xfs_mount *, int); 216extern void xfs_icsb_sync_counters_locked(struct xfs_mount *, int);
215 217
216#else 218#else
217#define xfs_icsb_init_counters(mp) (0) 219#define xfs_icsb_init_counters(mp) (0)
218#define xfs_icsb_reinit_counters(mp) do { } while (0) 220#define xfs_icsb_destroy_counters(mp) do { } while (0)
221#define xfs_icsb_reinit_counters(mp) do { } while (0)
219#define xfs_icsb_sync_counters(mp, flags) do { } while (0) 222#define xfs_icsb_sync_counters(mp, flags) do { } while (0)
220#define xfs_icsb_sync_counters_locked(mp, flags) do { } while (0) 223#define xfs_icsb_sync_counters_locked(mp, flags) do { } while (0)
221#endif 224#endif
@@ -313,6 +316,7 @@ typedef struct xfs_mount {
313 __uint8_t m_inode_quiesce;/* call quiesce on new inodes. 316 __uint8_t m_inode_quiesce;/* call quiesce on new inodes.
314 field governed by m_ilock */ 317 field governed by m_ilock */
315 __uint8_t m_sectbb_log; /* sectlog - BBSHIFT */ 318 __uint8_t m_sectbb_log; /* sectlog - BBSHIFT */
319 const struct xfs_nameops *m_dirnameops; /* vector of dir name ops */
316 int m_dirblksize; /* directory block sz--bytes */ 320 int m_dirblksize; /* directory block sz--bytes */
317 int m_dirblkfsbs; /* directory block sz--fsbs */ 321 int m_dirblkfsbs; /* directory block sz--fsbs */
318 xfs_dablk_t m_dirdatablk; /* blockno of dir data v2 */ 322 xfs_dablk_t m_dirdatablk; /* blockno of dir data v2 */
@@ -378,6 +382,7 @@ typedef struct xfs_mount {
378 counters */ 382 counters */
379#define XFS_MOUNT_FILESTREAMS (1ULL << 24) /* enable the filestreams 383#define XFS_MOUNT_FILESTREAMS (1ULL << 24) /* enable the filestreams
380 allocator */ 384 allocator */
385#define XFS_MOUNT_NOATTR2 (1ULL << 25) /* disable use of attr2 format */
381 386
382 387
383/* 388/*
@@ -437,13 +442,6 @@ void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname,
437/* 442/*
438 * Flags for xfs_mountfs 443 * Flags for xfs_mountfs
439 */ 444 */
440#define XFS_MFSI_SECOND 0x01 /* Secondary mount -- skip stuff */
441#define XFS_MFSI_CLIENT 0x02 /* Is a client -- skip lots of stuff */
442/* XFS_MFSI_RRINODES */
443#define XFS_MFSI_NOUNLINK 0x08 /* Skip unlinked inode processing in */
444 /* log recovery */
445#define XFS_MFSI_NO_QUOTACHECK 0x10 /* Skip quotacheck processing */
446/* XFS_MFSI_CONVERT_SUNIT */
447#define XFS_MFSI_QUIET 0x40 /* Be silent if mount errors found */ 445#define XFS_MFSI_QUIET 0x40 /* Be silent if mount errors found */
448 446
449#define XFS_DADDR_TO_AGNO(mp,d) xfs_daddr_to_agno(mp,d) 447#define XFS_DADDR_TO_AGNO(mp,d) xfs_daddr_to_agno(mp,d)
@@ -510,15 +508,12 @@ typedef struct xfs_mod_sb {
510#define XFS_MOUNT_ILOCK(mp) mutex_lock(&((mp)->m_ilock)) 508#define XFS_MOUNT_ILOCK(mp) mutex_lock(&((mp)->m_ilock))
511#define XFS_MOUNT_IUNLOCK(mp) mutex_unlock(&((mp)->m_ilock)) 509#define XFS_MOUNT_IUNLOCK(mp) mutex_unlock(&((mp)->m_ilock))
512 510
513extern xfs_mount_t *xfs_mount_init(void);
514extern void xfs_mod_sb(xfs_trans_t *, __int64_t); 511extern void xfs_mod_sb(xfs_trans_t *, __int64_t);
515extern int xfs_log_sbcount(xfs_mount_t *, uint); 512extern int xfs_log_sbcount(xfs_mount_t *, uint);
516extern void xfs_mount_free(xfs_mount_t *mp); 513extern int xfs_mountfs(xfs_mount_t *mp);
517extern int xfs_mountfs(xfs_mount_t *mp, int);
518extern void xfs_mountfs_check_barriers(xfs_mount_t *mp); 514extern void xfs_mountfs_check_barriers(xfs_mount_t *mp);
519 515
520extern int xfs_unmountfs(xfs_mount_t *, struct cred *); 516extern void xfs_unmountfs(xfs_mount_t *);
521extern void xfs_unmountfs_close(xfs_mount_t *, struct cred *);
522extern int xfs_unmountfs_writesb(xfs_mount_t *); 517extern int xfs_unmountfs_writesb(xfs_mount_t *);
523extern int xfs_unmount_flush(xfs_mount_t *, int); 518extern int xfs_unmount_flush(xfs_mount_t *, int);
524extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int); 519extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int);
@@ -544,9 +539,6 @@ extern void xfs_qmops_put(struct xfs_mount *);
544 539
545extern struct xfs_dmops xfs_dmcore_xfs; 540extern struct xfs_dmops xfs_dmcore_xfs;
546 541
547extern int xfs_init(void);
548extern void xfs_cleanup(void);
549
550#endif /* __KERNEL__ */ 542#endif /* __KERNEL__ */
551 543
552#endif /* __XFS_MOUNT_H__ */ 544#endif /* __XFS_MOUNT_H__ */
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index a0b2c0a2589a..afee7eb24323 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -307,15 +307,18 @@ xfs_mru_cache_init(void)
307 xfs_mru_elem_zone = kmem_zone_init(sizeof(xfs_mru_cache_elem_t), 307 xfs_mru_elem_zone = kmem_zone_init(sizeof(xfs_mru_cache_elem_t),
308 "xfs_mru_cache_elem"); 308 "xfs_mru_cache_elem");
309 if (!xfs_mru_elem_zone) 309 if (!xfs_mru_elem_zone)
310 return ENOMEM; 310 goto out;
311 311
312 xfs_mru_reap_wq = create_singlethread_workqueue("xfs_mru_cache"); 312 xfs_mru_reap_wq = create_singlethread_workqueue("xfs_mru_cache");
313 if (!xfs_mru_reap_wq) { 313 if (!xfs_mru_reap_wq)
314 kmem_zone_destroy(xfs_mru_elem_zone); 314 goto out_destroy_mru_elem_zone;
315 return ENOMEM;
316 }
317 315
318 return 0; 316 return 0;
317
318 out_destroy_mru_elem_zone:
319 kmem_zone_destroy(xfs_mru_elem_zone);
320 out:
321 return -ENOMEM;
319} 322}
320 323
321void 324void
@@ -382,9 +385,9 @@ xfs_mru_cache_create(
382 385
383exit: 386exit:
384 if (err && mru && mru->lists) 387 if (err && mru && mru->lists)
385 kmem_free(mru->lists, mru->grp_count * sizeof(*mru->lists)); 388 kmem_free(mru->lists);
386 if (err && mru) 389 if (err && mru)
387 kmem_free(mru, sizeof(*mru)); 390 kmem_free(mru);
388 391
389 return err; 392 return err;
390} 393}
@@ -424,8 +427,8 @@ xfs_mru_cache_destroy(
424 427
425 xfs_mru_cache_flush(mru); 428 xfs_mru_cache_flush(mru);
426 429
427 kmem_free(mru->lists, mru->grp_count * sizeof(*mru->lists)); 430 kmem_free(mru->lists);
428 kmem_free(mru, sizeof(*mru)); 431 kmem_free(mru);
429} 432}
430 433
431/* 434/*
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index d8063e1ad298..d700dacdb10e 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -336,22 +336,18 @@ xfs_rename(
336 ASSERT(error != EEXIST); 336 ASSERT(error != EEXIST);
337 if (error) 337 if (error)
338 goto abort_return; 338 goto abort_return;
339 xfs_ichgtime(src_ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
340
341 } else {
342 /*
343 * We always want to hit the ctime on the source inode.
344 * We do it in the if clause above for the 'new_parent &&
345 * src_is_directory' case, and here we get all the other
346 * cases. This isn't strictly required by the standards
347 * since the source inode isn't really being changed,
348 * but old unix file systems did it and some incremental
349 * backup programs won't work without it.
350 */
351 xfs_ichgtime(src_ip, XFS_ICHGTIME_CHG);
352 } 339 }
353 340
354 /* 341 /*
342 * We always want to hit the ctime on the source inode.
343 *
344 * This isn't strictly required by the standards since the source
345 * inode isn't really being changed, but old unix file systems did
346 * it and some incremental backup programs won't work without it.
347 */
348 xfs_ichgtime(src_ip, XFS_ICHGTIME_CHG);
349
350 /*
355 * Adjust the link count on src_dp. This is necessary when 351 * Adjust the link count on src_dp. This is necessary when
356 * renaming a directory, either within one parent when 352 * renaming a directory, either within one parent when
357 * the target existed, or across two parent directories. 353 * the target existed, or across two parent directories.
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index a0dc6e5bc5b9..e2f68de16159 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -74,18 +74,6 @@ STATIC int xfs_rtmodify_summary(xfs_mount_t *, xfs_trans_t *, int,
74 */ 74 */
75 75
76/* 76/*
77 * xfs_lowbit32: get low bit set out of 32-bit argument, -1 if none set.
78 */
79STATIC int
80xfs_lowbit32(
81 __uint32_t v)
82{
83 if (v)
84 return ffs(v) - 1;
85 return -1;
86}
87
88/*
89 * Allocate space to the bitmap or summary file, and zero it, for growfs. 77 * Allocate space to the bitmap or summary file, and zero it, for growfs.
90 */ 78 */
91STATIC int /* error */ 79STATIC int /* error */
@@ -450,6 +438,7 @@ xfs_rtallocate_extent_near(
450 } 438 }
451 bbno = XFS_BITTOBLOCK(mp, bno); 439 bbno = XFS_BITTOBLOCK(mp, bno);
452 i = 0; 440 i = 0;
441 ASSERT(minlen != 0);
453 log2len = xfs_highbit32(minlen); 442 log2len = xfs_highbit32(minlen);
454 /* 443 /*
455 * Loop over all bitmap blocks (bbno + i is current block). 444 * Loop over all bitmap blocks (bbno + i is current block).
@@ -618,6 +607,8 @@ xfs_rtallocate_extent_size(
618 xfs_suminfo_t sum; /* summary information for extents */ 607 xfs_suminfo_t sum; /* summary information for extents */
619 608
620 ASSERT(minlen % prod == 0 && maxlen % prod == 0); 609 ASSERT(minlen % prod == 0 && maxlen % prod == 0);
610 ASSERT(maxlen != 0);
611
621 /* 612 /*
622 * Loop over all the levels starting with maxlen. 613 * Loop over all the levels starting with maxlen.
623 * At each level, look at all the bitmap blocks, to see if there 614 * At each level, look at all the bitmap blocks, to see if there
@@ -675,6 +666,9 @@ xfs_rtallocate_extent_size(
675 *rtblock = NULLRTBLOCK; 666 *rtblock = NULLRTBLOCK;
676 return 0; 667 return 0;
677 } 668 }
669 ASSERT(minlen != 0);
670 ASSERT(maxlen != 0);
671
678 /* 672 /*
679 * Loop over sizes, from maxlen down to minlen. 673 * Loop over sizes, from maxlen down to minlen.
680 * This time, when we do the allocations, allow smaller ones 674 * This time, when we do the allocations, allow smaller ones
@@ -1961,6 +1955,7 @@ xfs_growfs_rt(
1961 nsbp->sb_blocksize * nsbp->sb_rextsize); 1955 nsbp->sb_blocksize * nsbp->sb_rextsize);
1962 nsbp->sb_rextents = nsbp->sb_rblocks; 1956 nsbp->sb_rextents = nsbp->sb_rblocks;
1963 do_div(nsbp->sb_rextents, nsbp->sb_rextsize); 1957 do_div(nsbp->sb_rextents, nsbp->sb_rextsize);
1958 ASSERT(nsbp->sb_rextents != 0);
1964 nsbp->sb_rextslog = xfs_highbit32(nsbp->sb_rextents); 1959 nsbp->sb_rextslog = xfs_highbit32(nsbp->sb_rextents);
1965 nrsumlevels = nmp->m_rsumlevels = nsbp->sb_rextslog + 1; 1960 nrsumlevels = nmp->m_rsumlevels = nsbp->sb_rextslog + 1;
1966 nrsumsize = 1961 nrsumsize =
@@ -2062,7 +2057,7 @@ xfs_growfs_rt(
2062 /* 2057 /*
2063 * Free the fake mp structure. 2058 * Free the fake mp structure.
2064 */ 2059 */
2065 kmem_free(nmp, sizeof(*nmp)); 2060 kmem_free(nmp);
2066 2061
2067 return error; 2062 return error;
2068} 2063}
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c
index b0f31c09a76d..3a82576dde9a 100644
--- a/fs/xfs/xfs_rw.c
+++ b/fs/xfs/xfs_rw.c
@@ -314,7 +314,7 @@ xfs_bioerror_relse(
314 * ASYNC buffers. 314 * ASYNC buffers.
315 */ 315 */
316 XFS_BUF_ERROR(bp, EIO); 316 XFS_BUF_ERROR(bp, EIO);
317 XFS_BUF_V_IODONESEMA(bp); 317 XFS_BUF_FINISH_IOWAIT(bp);
318 } else { 318 } else {
319 xfs_buf_relse(bp); 319 xfs_buf_relse(bp);
320 } 320 }
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h
index d904efe7f871..3f8cf1587f4c 100644
--- a/fs/xfs/xfs_sb.h
+++ b/fs/xfs/xfs_sb.h
@@ -46,10 +46,12 @@ struct xfs_mount;
46#define XFS_SB_VERSION_SECTORBIT 0x0800 46#define XFS_SB_VERSION_SECTORBIT 0x0800
47#define XFS_SB_VERSION_EXTFLGBIT 0x1000 47#define XFS_SB_VERSION_EXTFLGBIT 0x1000
48#define XFS_SB_VERSION_DIRV2BIT 0x2000 48#define XFS_SB_VERSION_DIRV2BIT 0x2000
49#define XFS_SB_VERSION_BORGBIT 0x4000 /* ASCII only case-insens. */
49#define XFS_SB_VERSION_MOREBITSBIT 0x8000 50#define XFS_SB_VERSION_MOREBITSBIT 0x8000
50#define XFS_SB_VERSION_OKSASHFBITS \ 51#define XFS_SB_VERSION_OKSASHFBITS \
51 (XFS_SB_VERSION_EXTFLGBIT | \ 52 (XFS_SB_VERSION_EXTFLGBIT | \
52 XFS_SB_VERSION_DIRV2BIT) 53 XFS_SB_VERSION_DIRV2BIT | \
54 XFS_SB_VERSION_BORGBIT)
53#define XFS_SB_VERSION_OKREALFBITS \ 55#define XFS_SB_VERSION_OKREALFBITS \
54 (XFS_SB_VERSION_ATTRBIT | \ 56 (XFS_SB_VERSION_ATTRBIT | \
55 XFS_SB_VERSION_NLINKBIT | \ 57 XFS_SB_VERSION_NLINKBIT | \
@@ -437,6 +439,12 @@ static inline int xfs_sb_version_hassector(xfs_sb_t *sbp)
437 ((sbp)->sb_versionnum & XFS_SB_VERSION_SECTORBIT); 439 ((sbp)->sb_versionnum & XFS_SB_VERSION_SECTORBIT);
438} 440}
439 441
442static inline int xfs_sb_version_hasasciici(xfs_sb_t *sbp)
443{
444 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
445 (sbp->sb_versionnum & XFS_SB_VERSION_BORGBIT);
446}
447
440static inline int xfs_sb_version_hasmorebits(xfs_sb_t *sbp) 448static inline int xfs_sb_version_hasmorebits(xfs_sb_t *sbp)
441{ 449{
442 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ 450 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
@@ -473,6 +481,13 @@ static inline void xfs_sb_version_addattr2(xfs_sb_t *sbp)
473 ((sbp)->sb_features2 | XFS_SB_VERSION2_ATTR2BIT))); 481 ((sbp)->sb_features2 | XFS_SB_VERSION2_ATTR2BIT)));
474} 482}
475 483
484static inline void xfs_sb_version_removeattr2(xfs_sb_t *sbp)
485{
486 sbp->sb_features2 &= ~XFS_SB_VERSION2_ATTR2BIT;
487 if (!sbp->sb_features2)
488 sbp->sb_versionnum &= ~XFS_SB_VERSION_MOREBITSBIT;
489}
490
476/* 491/*
477 * end of superblock version macros 492 * end of superblock version macros
478 */ 493 */
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 140386434aa3..4e1c22a23be5 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -43,6 +43,7 @@
43#include "xfs_quota.h" 43#include "xfs_quota.h"
44#include "xfs_trans_priv.h" 44#include "xfs_trans_priv.h"
45#include "xfs_trans_space.h" 45#include "xfs_trans_space.h"
46#include "xfs_inode_item.h"
46 47
47 48
48STATIC void xfs_trans_apply_sb_deltas(xfs_trans_t *); 49STATIC void xfs_trans_apply_sb_deltas(xfs_trans_t *);
@@ -253,7 +254,7 @@ _xfs_trans_alloc(
253 tp->t_mountp = mp; 254 tp->t_mountp = mp;
254 tp->t_items_free = XFS_LIC_NUM_SLOTS; 255 tp->t_items_free = XFS_LIC_NUM_SLOTS;
255 tp->t_busy_free = XFS_LBC_NUM_SLOTS; 256 tp->t_busy_free = XFS_LBC_NUM_SLOTS;
256 XFS_LIC_INIT(&(tp->t_items)); 257 xfs_lic_init(&(tp->t_items));
257 XFS_LBC_INIT(&(tp->t_busy)); 258 XFS_LBC_INIT(&(tp->t_busy));
258 return tp; 259 return tp;
259} 260}
@@ -282,7 +283,7 @@ xfs_trans_dup(
282 ntp->t_mountp = tp->t_mountp; 283 ntp->t_mountp = tp->t_mountp;
283 ntp->t_items_free = XFS_LIC_NUM_SLOTS; 284 ntp->t_items_free = XFS_LIC_NUM_SLOTS;
284 ntp->t_busy_free = XFS_LBC_NUM_SLOTS; 285 ntp->t_busy_free = XFS_LBC_NUM_SLOTS;
285 XFS_LIC_INIT(&(ntp->t_items)); 286 xfs_lic_init(&(ntp->t_items));
286 XFS_LBC_INIT(&(ntp->t_busy)); 287 XFS_LBC_INIT(&(ntp->t_busy));
287 288
288 ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); 289 ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
@@ -889,7 +890,7 @@ shut_us_down:
889 890
890 tp->t_commit_lsn = commit_lsn; 891 tp->t_commit_lsn = commit_lsn;
891 if (nvec > XFS_TRANS_LOGVEC_COUNT) { 892 if (nvec > XFS_TRANS_LOGVEC_COUNT) {
892 kmem_free(log_vector, nvec * sizeof(xfs_log_iovec_t)); 893 kmem_free(log_vector);
893 } 894 }
894 895
895 /* 896 /*
@@ -1169,7 +1170,7 @@ xfs_trans_cancel(
1169 while (licp != NULL) { 1170 while (licp != NULL) {
1170 lidp = licp->lic_descs; 1171 lidp = licp->lic_descs;
1171 for (i = 0; i < licp->lic_unused; i++, lidp++) { 1172 for (i = 0; i < licp->lic_unused; i++, lidp++) {
1172 if (XFS_LIC_ISFREE(licp, i)) { 1173 if (xfs_lic_isfree(licp, i)) {
1173 continue; 1174 continue;
1174 } 1175 }
1175 1176
@@ -1216,6 +1217,68 @@ xfs_trans_free(
1216 kmem_zone_free(xfs_trans_zone, tp); 1217 kmem_zone_free(xfs_trans_zone, tp);
1217} 1218}
1218 1219
1220/*
1221 * Roll from one trans in the sequence of PERMANENT transactions to
1222 * the next: permanent transactions are only flushed out when
1223 * committed with XFS_TRANS_RELEASE_LOG_RES, but we still want as soon
1224 * as possible to let chunks of it go to the log. So we commit the
1225 * chunk we've been working on and get a new transaction to continue.
1226 */
1227int
1228xfs_trans_roll(
1229 struct xfs_trans **tpp,
1230 struct xfs_inode *dp)
1231{
1232 struct xfs_trans *trans;
1233 unsigned int logres, count;
1234 int error;
1235
1236 /*
1237 * Ensure that the inode is always logged.
1238 */
1239 trans = *tpp;
1240 xfs_trans_log_inode(trans, dp, XFS_ILOG_CORE);
1241
1242 /*
1243 * Copy the critical parameters from one trans to the next.
1244 */
1245 logres = trans->t_log_res;
1246 count = trans->t_log_count;
1247 *tpp = xfs_trans_dup(trans);
1248
1249 /*
1250 * Commit the current transaction.
1251 * If this commit failed, then it'd just unlock those items that
1252 * are not marked ihold. That also means that a filesystem shutdown
1253 * is in progress. The caller takes the responsibility to cancel
1254 * the duplicate transaction that gets returned.
1255 */
1256 error = xfs_trans_commit(trans, 0);
1257 if (error)
1258 return (error);
1259
1260 trans = *tpp;
1261
1262 /*
1263 * Reserve space in the log for th next transaction.
1264 * This also pushes items in the "AIL", the list of logged items,
1265 * out to disk if they are taking up space at the tail of the log
1266 * that we want to use. This requires that either nothing be locked
1267 * across this call, or that anything that is locked be logged in
1268 * the prior and the next transactions.
1269 */
1270 error = xfs_trans_reserve(trans, 0, logres, 0,
1271 XFS_TRANS_PERM_LOG_RES, count);
1272 /*
1273 * Ensure that the inode is in the new transaction and locked.
1274 */
1275 if (error)
1276 return error;
1277
1278 xfs_trans_ijoin(trans, dp, XFS_ILOCK_EXCL);
1279 xfs_trans_ihold(trans, dp);
1280 return 0;
1281}
1219 1282
1220/* 1283/*
1221 * THIS SHOULD BE REWRITTEN TO USE xfs_trans_next_item(). 1284 * THIS SHOULD BE REWRITTEN TO USE xfs_trans_next_item().
@@ -1253,7 +1316,7 @@ xfs_trans_committed(
1253 * Special case the chunk embedded in the transaction. 1316 * Special case the chunk embedded in the transaction.
1254 */ 1317 */
1255 licp = &(tp->t_items); 1318 licp = &(tp->t_items);
1256 if (!(XFS_LIC_ARE_ALL_FREE(licp))) { 1319 if (!(xfs_lic_are_all_free(licp))) {
1257 xfs_trans_chunk_committed(licp, tp->t_lsn, abortflag); 1320 xfs_trans_chunk_committed(licp, tp->t_lsn, abortflag);
1258 } 1321 }
1259 1322
@@ -1262,10 +1325,10 @@ xfs_trans_committed(
1262 */ 1325 */
1263 licp = licp->lic_next; 1326 licp = licp->lic_next;
1264 while (licp != NULL) { 1327 while (licp != NULL) {
1265 ASSERT(!XFS_LIC_ARE_ALL_FREE(licp)); 1328 ASSERT(!xfs_lic_are_all_free(licp));
1266 xfs_trans_chunk_committed(licp, tp->t_lsn, abortflag); 1329 xfs_trans_chunk_committed(licp, tp->t_lsn, abortflag);
1267 next_licp = licp->lic_next; 1330 next_licp = licp->lic_next;
1268 kmem_free(licp, sizeof(xfs_log_item_chunk_t)); 1331 kmem_free(licp);
1269 licp = next_licp; 1332 licp = next_licp;
1270 } 1333 }
1271 1334
@@ -1325,7 +1388,7 @@ xfs_trans_chunk_committed(
1325 1388
1326 lidp = licp->lic_descs; 1389 lidp = licp->lic_descs;
1327 for (i = 0; i < licp->lic_unused; i++, lidp++) { 1390 for (i = 0; i < licp->lic_unused; i++, lidp++) {
1328 if (XFS_LIC_ISFREE(licp, i)) { 1391 if (xfs_lic_isfree(licp, i)) {
1329 continue; 1392 continue;
1330 } 1393 }
1331 1394
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 0804207c7391..74c80bd2b0ec 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -210,62 +210,52 @@ typedef struct xfs_log_item_chunk {
210 * lic_unused to the right value (0 matches all free). The 210 * lic_unused to the right value (0 matches all free). The
211 * lic_descs.lid_index values are set up as each desc is allocated. 211 * lic_descs.lid_index values are set up as each desc is allocated.
212 */ 212 */
213#define XFS_LIC_INIT(cp) xfs_lic_init(cp)
214static inline void xfs_lic_init(xfs_log_item_chunk_t *cp) 213static inline void xfs_lic_init(xfs_log_item_chunk_t *cp)
215{ 214{
216 cp->lic_free = XFS_LIC_FREEMASK; 215 cp->lic_free = XFS_LIC_FREEMASK;
217} 216}
218 217
219#define XFS_LIC_INIT_SLOT(cp,slot) xfs_lic_init_slot(cp, slot)
220static inline void xfs_lic_init_slot(xfs_log_item_chunk_t *cp, int slot) 218static inline void xfs_lic_init_slot(xfs_log_item_chunk_t *cp, int slot)
221{ 219{
222 cp->lic_descs[slot].lid_index = (unsigned char)(slot); 220 cp->lic_descs[slot].lid_index = (unsigned char)(slot);
223} 221}
224 222
225#define XFS_LIC_VACANCY(cp) xfs_lic_vacancy(cp)
226static inline int xfs_lic_vacancy(xfs_log_item_chunk_t *cp) 223static inline int xfs_lic_vacancy(xfs_log_item_chunk_t *cp)
227{ 224{
228 return cp->lic_free & XFS_LIC_FREEMASK; 225 return cp->lic_free & XFS_LIC_FREEMASK;
229} 226}
230 227
231#define XFS_LIC_ALL_FREE(cp) xfs_lic_all_free(cp)
232static inline void xfs_lic_all_free(xfs_log_item_chunk_t *cp) 228static inline void xfs_lic_all_free(xfs_log_item_chunk_t *cp)
233{ 229{
234 cp->lic_free = XFS_LIC_FREEMASK; 230 cp->lic_free = XFS_LIC_FREEMASK;
235} 231}
236 232
237#define XFS_LIC_ARE_ALL_FREE(cp) xfs_lic_are_all_free(cp)
238static inline int xfs_lic_are_all_free(xfs_log_item_chunk_t *cp) 233static inline int xfs_lic_are_all_free(xfs_log_item_chunk_t *cp)
239{ 234{
240 return ((cp->lic_free & XFS_LIC_FREEMASK) == XFS_LIC_FREEMASK); 235 return ((cp->lic_free & XFS_LIC_FREEMASK) == XFS_LIC_FREEMASK);
241} 236}
242 237
243#define XFS_LIC_ISFREE(cp,slot) xfs_lic_isfree(cp,slot)
244static inline int xfs_lic_isfree(xfs_log_item_chunk_t *cp, int slot) 238static inline int xfs_lic_isfree(xfs_log_item_chunk_t *cp, int slot)
245{ 239{
246 return (cp->lic_free & (1 << slot)); 240 return (cp->lic_free & (1 << slot));
247} 241}
248 242
249#define XFS_LIC_CLAIM(cp,slot) xfs_lic_claim(cp,slot)
250static inline void xfs_lic_claim(xfs_log_item_chunk_t *cp, int slot) 243static inline void xfs_lic_claim(xfs_log_item_chunk_t *cp, int slot)
251{ 244{
252 cp->lic_free &= ~(1 << slot); 245 cp->lic_free &= ~(1 << slot);
253} 246}
254 247
255#define XFS_LIC_RELSE(cp,slot) xfs_lic_relse(cp,slot)
256static inline void xfs_lic_relse(xfs_log_item_chunk_t *cp, int slot) 248static inline void xfs_lic_relse(xfs_log_item_chunk_t *cp, int slot)
257{ 249{
258 cp->lic_free |= 1 << slot; 250 cp->lic_free |= 1 << slot;
259} 251}
260 252
261#define XFS_LIC_SLOT(cp,slot) xfs_lic_slot(cp,slot)
262static inline xfs_log_item_desc_t * 253static inline xfs_log_item_desc_t *
263xfs_lic_slot(xfs_log_item_chunk_t *cp, int slot) 254xfs_lic_slot(xfs_log_item_chunk_t *cp, int slot)
264{ 255{
265 return &(cp->lic_descs[slot]); 256 return &(cp->lic_descs[slot]);
266} 257}
267 258
268#define XFS_LIC_DESC_TO_SLOT(dp) xfs_lic_desc_to_slot(dp)
269static inline int xfs_lic_desc_to_slot(xfs_log_item_desc_t *dp) 259static inline int xfs_lic_desc_to_slot(xfs_log_item_desc_t *dp)
270{ 260{
271 return (uint)dp->lid_index; 261 return (uint)dp->lid_index;
@@ -278,7 +268,6 @@ static inline int xfs_lic_desc_to_slot(xfs_log_item_desc_t *dp)
278 * All of this yields the address of the chunk, which is 268 * All of this yields the address of the chunk, which is
279 * cast to a chunk pointer. 269 * cast to a chunk pointer.
280 */ 270 */
281#define XFS_LIC_DESC_TO_CHUNK(dp) xfs_lic_desc_to_chunk(dp)
282static inline xfs_log_item_chunk_t * 271static inline xfs_log_item_chunk_t *
283xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp) 272xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp)
284{ 273{
@@ -986,6 +975,7 @@ int _xfs_trans_commit(xfs_trans_t *,
986 int *); 975 int *);
987#define xfs_trans_commit(tp, flags) _xfs_trans_commit(tp, flags, NULL) 976#define xfs_trans_commit(tp, flags) _xfs_trans_commit(tp, flags, NULL)
988void xfs_trans_cancel(xfs_trans_t *, int); 977void xfs_trans_cancel(xfs_trans_t *, int);
978int xfs_trans_roll(struct xfs_trans **, struct xfs_inode *);
989int xfs_trans_ail_init(struct xfs_mount *); 979int xfs_trans_ail_init(struct xfs_mount *);
990void xfs_trans_ail_destroy(struct xfs_mount *); 980void xfs_trans_ail_destroy(struct xfs_mount *);
991void xfs_trans_push_ail(struct xfs_mount *, xfs_lsn_t); 981void xfs_trans_push_ail(struct xfs_mount *, xfs_lsn_t);
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index cb0c5839154b..4e855b5ced66 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -1021,16 +1021,16 @@ xfs_trans_buf_item_match(
1021 bp = NULL; 1021 bp = NULL;
1022 len = BBTOB(len); 1022 len = BBTOB(len);
1023 licp = &tp->t_items; 1023 licp = &tp->t_items;
1024 if (!XFS_LIC_ARE_ALL_FREE(licp)) { 1024 if (!xfs_lic_are_all_free(licp)) {
1025 for (i = 0; i < licp->lic_unused; i++) { 1025 for (i = 0; i < licp->lic_unused; i++) {
1026 /* 1026 /*
1027 * Skip unoccupied slots. 1027 * Skip unoccupied slots.
1028 */ 1028 */
1029 if (XFS_LIC_ISFREE(licp, i)) { 1029 if (xfs_lic_isfree(licp, i)) {
1030 continue; 1030 continue;
1031 } 1031 }
1032 1032
1033 lidp = XFS_LIC_SLOT(licp, i); 1033 lidp = xfs_lic_slot(licp, i);
1034 blip = (xfs_buf_log_item_t *)lidp->lid_item; 1034 blip = (xfs_buf_log_item_t *)lidp->lid_item;
1035 if (blip->bli_item.li_type != XFS_LI_BUF) { 1035 if (blip->bli_item.li_type != XFS_LI_BUF) {
1036 continue; 1036 continue;
@@ -1074,7 +1074,7 @@ xfs_trans_buf_item_match_all(
1074 bp = NULL; 1074 bp = NULL;
1075 len = BBTOB(len); 1075 len = BBTOB(len);
1076 for (licp = &tp->t_items; licp != NULL; licp = licp->lic_next) { 1076 for (licp = &tp->t_items; licp != NULL; licp = licp->lic_next) {
1077 if (XFS_LIC_ARE_ALL_FREE(licp)) { 1077 if (xfs_lic_are_all_free(licp)) {
1078 ASSERT(licp == &tp->t_items); 1078 ASSERT(licp == &tp->t_items);
1079 ASSERT(licp->lic_next == NULL); 1079 ASSERT(licp->lic_next == NULL);
1080 return NULL; 1080 return NULL;
@@ -1083,11 +1083,11 @@ xfs_trans_buf_item_match_all(
1083 /* 1083 /*
1084 * Skip unoccupied slots. 1084 * Skip unoccupied slots.
1085 */ 1085 */
1086 if (XFS_LIC_ISFREE(licp, i)) { 1086 if (xfs_lic_isfree(licp, i)) {
1087 continue; 1087 continue;
1088 } 1088 }
1089 1089
1090 lidp = XFS_LIC_SLOT(licp, i); 1090 lidp = xfs_lic_slot(licp, i);
1091 blip = (xfs_buf_log_item_t *)lidp->lid_item; 1091 blip = (xfs_buf_log_item_t *)lidp->lid_item;
1092 if (blip->bli_item.li_type != XFS_LI_BUF) { 1092 if (blip->bli_item.li_type != XFS_LI_BUF) {
1093 continue; 1093 continue;
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index 4c70bf5e9985..2a1c0f071f91 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -291,7 +291,7 @@ xfs_trans_inode_broot_debug(
291 iip = ip->i_itemp; 291 iip = ip->i_itemp;
292 if (iip->ili_root_size != 0) { 292 if (iip->ili_root_size != 0) {
293 ASSERT(iip->ili_orig_root != NULL); 293 ASSERT(iip->ili_orig_root != NULL);
294 kmem_free(iip->ili_orig_root, iip->ili_root_size); 294 kmem_free(iip->ili_orig_root);
295 iip->ili_root_size = 0; 295 iip->ili_root_size = 0;
296 iip->ili_orig_root = NULL; 296 iip->ili_orig_root = NULL;
297 } 297 }
diff --git a/fs/xfs/xfs_trans_item.c b/fs/xfs/xfs_trans_item.c
index 66a09f0d894b..3c666e8317f8 100644
--- a/fs/xfs/xfs_trans_item.c
+++ b/fs/xfs/xfs_trans_item.c
@@ -53,11 +53,11 @@ xfs_trans_add_item(xfs_trans_t *tp, xfs_log_item_t *lip)
53 * Initialize the chunk, and then 53 * Initialize the chunk, and then
54 * claim the first slot in the newly allocated chunk. 54 * claim the first slot in the newly allocated chunk.
55 */ 55 */
56 XFS_LIC_INIT(licp); 56 xfs_lic_init(licp);
57 XFS_LIC_CLAIM(licp, 0); 57 xfs_lic_claim(licp, 0);
58 licp->lic_unused = 1; 58 licp->lic_unused = 1;
59 XFS_LIC_INIT_SLOT(licp, 0); 59 xfs_lic_init_slot(licp, 0);
60 lidp = XFS_LIC_SLOT(licp, 0); 60 lidp = xfs_lic_slot(licp, 0);
61 61
62 /* 62 /*
63 * Link in the new chunk and update the free count. 63 * Link in the new chunk and update the free count.
@@ -88,14 +88,14 @@ xfs_trans_add_item(xfs_trans_t *tp, xfs_log_item_t *lip)
88 */ 88 */
89 licp = &tp->t_items; 89 licp = &tp->t_items;
90 while (licp != NULL) { 90 while (licp != NULL) {
91 if (XFS_LIC_VACANCY(licp)) { 91 if (xfs_lic_vacancy(licp)) {
92 if (licp->lic_unused <= XFS_LIC_MAX_SLOT) { 92 if (licp->lic_unused <= XFS_LIC_MAX_SLOT) {
93 i = licp->lic_unused; 93 i = licp->lic_unused;
94 ASSERT(XFS_LIC_ISFREE(licp, i)); 94 ASSERT(xfs_lic_isfree(licp, i));
95 break; 95 break;
96 } 96 }
97 for (i = 0; i <= XFS_LIC_MAX_SLOT; i++) { 97 for (i = 0; i <= XFS_LIC_MAX_SLOT; i++) {
98 if (XFS_LIC_ISFREE(licp, i)) 98 if (xfs_lic_isfree(licp, i))
99 break; 99 break;
100 } 100 }
101 ASSERT(i <= XFS_LIC_MAX_SLOT); 101 ASSERT(i <= XFS_LIC_MAX_SLOT);
@@ -108,12 +108,12 @@ xfs_trans_add_item(xfs_trans_t *tp, xfs_log_item_t *lip)
108 * If we find a free descriptor, claim it, 108 * If we find a free descriptor, claim it,
109 * initialize it, and return it. 109 * initialize it, and return it.
110 */ 110 */
111 XFS_LIC_CLAIM(licp, i); 111 xfs_lic_claim(licp, i);
112 if (licp->lic_unused <= i) { 112 if (licp->lic_unused <= i) {
113 licp->lic_unused = i + 1; 113 licp->lic_unused = i + 1;
114 XFS_LIC_INIT_SLOT(licp, i); 114 xfs_lic_init_slot(licp, i);
115 } 115 }
116 lidp = XFS_LIC_SLOT(licp, i); 116 lidp = xfs_lic_slot(licp, i);
117 tp->t_items_free--; 117 tp->t_items_free--;
118 lidp->lid_item = lip; 118 lidp->lid_item = lip;
119 lidp->lid_flags = 0; 119 lidp->lid_flags = 0;
@@ -136,9 +136,9 @@ xfs_trans_free_item(xfs_trans_t *tp, xfs_log_item_desc_t *lidp)
136 xfs_log_item_chunk_t *licp; 136 xfs_log_item_chunk_t *licp;
137 xfs_log_item_chunk_t **licpp; 137 xfs_log_item_chunk_t **licpp;
138 138
139 slot = XFS_LIC_DESC_TO_SLOT(lidp); 139 slot = xfs_lic_desc_to_slot(lidp);
140 licp = XFS_LIC_DESC_TO_CHUNK(lidp); 140 licp = xfs_lic_desc_to_chunk(lidp);
141 XFS_LIC_RELSE(licp, slot); 141 xfs_lic_relse(licp, slot);
142 lidp->lid_item->li_desc = NULL; 142 lidp->lid_item->li_desc = NULL;
143 tp->t_items_free++; 143 tp->t_items_free++;
144 144
@@ -154,14 +154,14 @@ xfs_trans_free_item(xfs_trans_t *tp, xfs_log_item_desc_t *lidp)
154 * Also decrement the transaction structure's count of free items 154 * Also decrement the transaction structure's count of free items
155 * by the number in a chunk since we are freeing an empty chunk. 155 * by the number in a chunk since we are freeing an empty chunk.
156 */ 156 */
157 if (XFS_LIC_ARE_ALL_FREE(licp) && (licp != &(tp->t_items))) { 157 if (xfs_lic_are_all_free(licp) && (licp != &(tp->t_items))) {
158 licpp = &(tp->t_items.lic_next); 158 licpp = &(tp->t_items.lic_next);
159 while (*licpp != licp) { 159 while (*licpp != licp) {
160 ASSERT(*licpp != NULL); 160 ASSERT(*licpp != NULL);
161 licpp = &((*licpp)->lic_next); 161 licpp = &((*licpp)->lic_next);
162 } 162 }
163 *licpp = licp->lic_next; 163 *licpp = licp->lic_next;
164 kmem_free(licp, sizeof(xfs_log_item_chunk_t)); 164 kmem_free(licp);
165 tp->t_items_free -= XFS_LIC_NUM_SLOTS; 165 tp->t_items_free -= XFS_LIC_NUM_SLOTS;
166 } 166 }
167} 167}
@@ -207,20 +207,20 @@ xfs_trans_first_item(xfs_trans_t *tp)
207 /* 207 /*
208 * If it's not in the first chunk, skip to the second. 208 * If it's not in the first chunk, skip to the second.
209 */ 209 */
210 if (XFS_LIC_ARE_ALL_FREE(licp)) { 210 if (xfs_lic_are_all_free(licp)) {
211 licp = licp->lic_next; 211 licp = licp->lic_next;
212 } 212 }
213 213
214 /* 214 /*
215 * Return the first non-free descriptor in the chunk. 215 * Return the first non-free descriptor in the chunk.
216 */ 216 */
217 ASSERT(!XFS_LIC_ARE_ALL_FREE(licp)); 217 ASSERT(!xfs_lic_are_all_free(licp));
218 for (i = 0; i < licp->lic_unused; i++) { 218 for (i = 0; i < licp->lic_unused; i++) {
219 if (XFS_LIC_ISFREE(licp, i)) { 219 if (xfs_lic_isfree(licp, i)) {
220 continue; 220 continue;
221 } 221 }
222 222
223 return XFS_LIC_SLOT(licp, i); 223 return xfs_lic_slot(licp, i);
224 } 224 }
225 cmn_err(CE_WARN, "xfs_trans_first_item() -- no first item"); 225 cmn_err(CE_WARN, "xfs_trans_first_item() -- no first item");
226 return NULL; 226 return NULL;
@@ -242,18 +242,18 @@ xfs_trans_next_item(xfs_trans_t *tp, xfs_log_item_desc_t *lidp)
242 xfs_log_item_chunk_t *licp; 242 xfs_log_item_chunk_t *licp;
243 int i; 243 int i;
244 244
245 licp = XFS_LIC_DESC_TO_CHUNK(lidp); 245 licp = xfs_lic_desc_to_chunk(lidp);
246 246
247 /* 247 /*
248 * First search the rest of the chunk. The for loop keeps us 248 * First search the rest of the chunk. The for loop keeps us
249 * from referencing things beyond the end of the chunk. 249 * from referencing things beyond the end of the chunk.
250 */ 250 */
251 for (i = (int)XFS_LIC_DESC_TO_SLOT(lidp) + 1; i < licp->lic_unused; i++) { 251 for (i = (int)xfs_lic_desc_to_slot(lidp) + 1; i < licp->lic_unused; i++) {
252 if (XFS_LIC_ISFREE(licp, i)) { 252 if (xfs_lic_isfree(licp, i)) {
253 continue; 253 continue;
254 } 254 }
255 255
256 return XFS_LIC_SLOT(licp, i); 256 return xfs_lic_slot(licp, i);
257 } 257 }
258 258
259 /* 259 /*
@@ -266,13 +266,13 @@ xfs_trans_next_item(xfs_trans_t *tp, xfs_log_item_desc_t *lidp)
266 } 266 }
267 267
268 licp = licp->lic_next; 268 licp = licp->lic_next;
269 ASSERT(!XFS_LIC_ARE_ALL_FREE(licp)); 269 ASSERT(!xfs_lic_are_all_free(licp));
270 for (i = 0; i < licp->lic_unused; i++) { 270 for (i = 0; i < licp->lic_unused; i++) {
271 if (XFS_LIC_ISFREE(licp, i)) { 271 if (xfs_lic_isfree(licp, i)) {
272 continue; 272 continue;
273 } 273 }
274 274
275 return XFS_LIC_SLOT(licp, i); 275 return xfs_lic_slot(licp, i);
276 } 276 }
277 ASSERT(0); 277 ASSERT(0);
278 /* NOTREACHED */ 278 /* NOTREACHED */
@@ -300,9 +300,9 @@ xfs_trans_free_items(
300 /* 300 /*
301 * Special case the embedded chunk so we don't free it below. 301 * Special case the embedded chunk so we don't free it below.
302 */ 302 */
303 if (!XFS_LIC_ARE_ALL_FREE(licp)) { 303 if (!xfs_lic_are_all_free(licp)) {
304 (void) xfs_trans_unlock_chunk(licp, 1, abort, NULLCOMMITLSN); 304 (void) xfs_trans_unlock_chunk(licp, 1, abort, NULLCOMMITLSN);
305 XFS_LIC_ALL_FREE(licp); 305 xfs_lic_all_free(licp);
306 licp->lic_unused = 0; 306 licp->lic_unused = 0;
307 } 307 }
308 licp = licp->lic_next; 308 licp = licp->lic_next;
@@ -311,10 +311,10 @@ xfs_trans_free_items(
311 * Unlock each item in each chunk and free the chunks. 311 * Unlock each item in each chunk and free the chunks.
312 */ 312 */
313 while (licp != NULL) { 313 while (licp != NULL) {
314 ASSERT(!XFS_LIC_ARE_ALL_FREE(licp)); 314 ASSERT(!xfs_lic_are_all_free(licp));
315 (void) xfs_trans_unlock_chunk(licp, 1, abort, NULLCOMMITLSN); 315 (void) xfs_trans_unlock_chunk(licp, 1, abort, NULLCOMMITLSN);
316 next_licp = licp->lic_next; 316 next_licp = licp->lic_next;
317 kmem_free(licp, sizeof(xfs_log_item_chunk_t)); 317 kmem_free(licp);
318 licp = next_licp; 318 licp = next_licp;
319 } 319 }
320 320
@@ -347,7 +347,7 @@ xfs_trans_unlock_items(xfs_trans_t *tp, xfs_lsn_t commit_lsn)
347 /* 347 /*
348 * Special case the embedded chunk so we don't free. 348 * Special case the embedded chunk so we don't free.
349 */ 349 */
350 if (!XFS_LIC_ARE_ALL_FREE(licp)) { 350 if (!xfs_lic_are_all_free(licp)) {
351 freed = xfs_trans_unlock_chunk(licp, 0, 0, commit_lsn); 351 freed = xfs_trans_unlock_chunk(licp, 0, 0, commit_lsn);
352 } 352 }
353 licpp = &(tp->t_items.lic_next); 353 licpp = &(tp->t_items.lic_next);
@@ -358,12 +358,12 @@ xfs_trans_unlock_items(xfs_trans_t *tp, xfs_lsn_t commit_lsn)
358 * and free empty chunks. 358 * and free empty chunks.
359 */ 359 */
360 while (licp != NULL) { 360 while (licp != NULL) {
361 ASSERT(!XFS_LIC_ARE_ALL_FREE(licp)); 361 ASSERT(!xfs_lic_are_all_free(licp));
362 freed += xfs_trans_unlock_chunk(licp, 0, 0, commit_lsn); 362 freed += xfs_trans_unlock_chunk(licp, 0, 0, commit_lsn);
363 next_licp = licp->lic_next; 363 next_licp = licp->lic_next;
364 if (XFS_LIC_ARE_ALL_FREE(licp)) { 364 if (xfs_lic_are_all_free(licp)) {
365 *licpp = next_licp; 365 *licpp = next_licp;
366 kmem_free(licp, sizeof(xfs_log_item_chunk_t)); 366 kmem_free(licp);
367 freed -= XFS_LIC_NUM_SLOTS; 367 freed -= XFS_LIC_NUM_SLOTS;
368 } else { 368 } else {
369 licpp = &(licp->lic_next); 369 licpp = &(licp->lic_next);
@@ -402,7 +402,7 @@ xfs_trans_unlock_chunk(
402 freed = 0; 402 freed = 0;
403 lidp = licp->lic_descs; 403 lidp = licp->lic_descs;
404 for (i = 0; i < licp->lic_unused; i++, lidp++) { 404 for (i = 0; i < licp->lic_unused; i++, lidp++) {
405 if (XFS_LIC_ISFREE(licp, i)) { 405 if (xfs_lic_isfree(licp, i)) {
406 continue; 406 continue;
407 } 407 }
408 lip = lidp->lid_item; 408 lip = lidp->lid_item;
@@ -421,7 +421,7 @@ xfs_trans_unlock_chunk(
421 */ 421 */
422 if (!(freeing_chunk) && 422 if (!(freeing_chunk) &&
423 (!(lidp->lid_flags & XFS_LID_DIRTY) || abort)) { 423 (!(lidp->lid_flags & XFS_LID_DIRTY) || abort)) {
424 XFS_LIC_RELSE(licp, i); 424 xfs_lic_relse(licp, i);
425 freed++; 425 freed++;
426 } 426 }
427 } 427 }
@@ -530,7 +530,7 @@ xfs_trans_free_busy(xfs_trans_t *tp)
530 lbcp = tp->t_busy.lbc_next; 530 lbcp = tp->t_busy.lbc_next;
531 while (lbcp != NULL) { 531 while (lbcp != NULL) {
532 lbcq = lbcp->lbc_next; 532 lbcq = lbcp->lbc_next;
533 kmem_free(lbcp, sizeof(xfs_log_busy_chunk_t)); 533 kmem_free(lbcp);
534 lbcp = lbcq; 534 lbcp = lbcq;
535 } 535 }
536 536
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c
index 98e5f110ba5f..35d4d414bcc2 100644
--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -237,7 +237,7 @@ xfs_droplink(
237 237
238 ASSERT (ip->i_d.di_nlink > 0); 238 ASSERT (ip->i_d.di_nlink > 0);
239 ip->i_d.di_nlink--; 239 ip->i_d.di_nlink--;
240 drop_nlink(ip->i_vnode); 240 drop_nlink(VFS_I(ip));
241 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 241 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
242 242
243 error = 0; 243 error = 0;
@@ -301,7 +301,7 @@ xfs_bumplink(
301 301
302 ASSERT(ip->i_d.di_nlink > 0); 302 ASSERT(ip->i_d.di_nlink > 0);
303 ip->i_d.di_nlink++; 303 ip->i_d.di_nlink++;
304 inc_nlink(ip->i_vnode); 304 inc_nlink(VFS_I(ip));
305 if ((ip->i_d.di_version == XFS_DINODE_VERSION_1) && 305 if ((ip->i_d.di_version == XFS_DINODE_VERSION_1) &&
306 (ip->i_d.di_nlink > XFS_MAXLINK_1)) { 306 (ip->i_d.di_nlink > XFS_MAXLINK_1)) {
307 /* 307 /*
diff --git a/fs/xfs/xfs_utils.h b/fs/xfs/xfs_utils.h
index f316cb85d8e2..ef321225d269 100644
--- a/fs/xfs/xfs_utils.h
+++ b/fs/xfs/xfs_utils.h
@@ -18,9 +18,6 @@
18#ifndef __XFS_UTILS_H__ 18#ifndef __XFS_UTILS_H__
19#define __XFS_UTILS_H__ 19#define __XFS_UTILS_H__
20 20
21#define IRELE(ip) VN_RELE(XFS_ITOV(ip))
22#define IHOLD(ip) VN_HOLD(XFS_ITOV(ip))
23
24extern int xfs_truncate_file(xfs_mount_t *, xfs_inode_t *); 21extern int xfs_truncate_file(xfs_mount_t *, xfs_inode_t *);
25extern int xfs_dir_ialloc(xfs_trans_t **, xfs_inode_t *, mode_t, xfs_nlink_t, 22extern int xfs_dir_ialloc(xfs_trans_t **, xfs_inode_t *, mode_t, xfs_nlink_t,
26 xfs_dev_t, cred_t *, prid_t, int, 23 xfs_dev_t, cred_t *, prid_t, int,
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 30bacd8bb0e5..439dd3939dda 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -58,586 +58,6 @@
58#include "xfs_utils.h" 58#include "xfs_utils.h"
59 59
60 60
61int __init
62xfs_init(void)
63{
64#ifdef XFS_DABUF_DEBUG
65 extern spinlock_t xfs_dabuf_global_lock;
66 spin_lock_init(&xfs_dabuf_global_lock);
67#endif
68
69 /*
70 * Initialize all of the zone allocators we use.
71 */
72 xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t),
73 "xfs_log_ticket");
74 xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t),
75 "xfs_bmap_free_item");
76 xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t),
77 "xfs_btree_cur");
78 xfs_da_state_zone = kmem_zone_init(sizeof(xfs_da_state_t),
79 "xfs_da_state");
80 xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf");
81 xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork");
82 xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans");
83 xfs_acl_zone_init(xfs_acl_zone, "xfs_acl");
84 xfs_mru_cache_init();
85 xfs_filestream_init();
86
87 /*
88 * The size of the zone allocated buf log item is the maximum
89 * size possible under XFS. This wastes a little bit of memory,
90 * but it is much faster.
91 */
92 xfs_buf_item_zone =
93 kmem_zone_init((sizeof(xfs_buf_log_item_t) +
94 (((XFS_MAX_BLOCKSIZE / XFS_BLI_CHUNK) /
95 NBWORD) * sizeof(int))),
96 "xfs_buf_item");
97 xfs_efd_zone =
98 kmem_zone_init((sizeof(xfs_efd_log_item_t) +
99 ((XFS_EFD_MAX_FAST_EXTENTS - 1) *
100 sizeof(xfs_extent_t))),
101 "xfs_efd_item");
102 xfs_efi_zone =
103 kmem_zone_init((sizeof(xfs_efi_log_item_t) +
104 ((XFS_EFI_MAX_FAST_EXTENTS - 1) *
105 sizeof(xfs_extent_t))),
106 "xfs_efi_item");
107
108 /*
109 * These zones warrant special memory allocator hints
110 */
111 xfs_inode_zone =
112 kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode",
113 KM_ZONE_HWALIGN | KM_ZONE_RECLAIM |
114 KM_ZONE_SPREAD, NULL);
115 xfs_ili_zone =
116 kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili",
117 KM_ZONE_SPREAD, NULL);
118
119 /*
120 * Allocate global trace buffers.
121 */
122#ifdef XFS_ALLOC_TRACE
123 xfs_alloc_trace_buf = ktrace_alloc(XFS_ALLOC_TRACE_SIZE, KM_SLEEP);
124#endif
125#ifdef XFS_BMAP_TRACE
126 xfs_bmap_trace_buf = ktrace_alloc(XFS_BMAP_TRACE_SIZE, KM_SLEEP);
127#endif
128#ifdef XFS_BMBT_TRACE
129 xfs_bmbt_trace_buf = ktrace_alloc(XFS_BMBT_TRACE_SIZE, KM_SLEEP);
130#endif
131#ifdef XFS_ATTR_TRACE
132 xfs_attr_trace_buf = ktrace_alloc(XFS_ATTR_TRACE_SIZE, KM_SLEEP);
133#endif
134#ifdef XFS_DIR2_TRACE
135 xfs_dir2_trace_buf = ktrace_alloc(XFS_DIR2_GTRACE_SIZE, KM_SLEEP);
136#endif
137
138 xfs_dir_startup();
139
140#if (defined(DEBUG) || defined(INDUCE_IO_ERROR))
141 xfs_error_test_init();
142#endif /* DEBUG || INDUCE_IO_ERROR */
143
144 xfs_init_procfs();
145 xfs_sysctl_register();
146 return 0;
147}
148
149void __exit
150xfs_cleanup(void)
151{
152 extern kmem_zone_t *xfs_inode_zone;
153 extern kmem_zone_t *xfs_efd_zone;
154 extern kmem_zone_t *xfs_efi_zone;
155
156 xfs_cleanup_procfs();
157 xfs_sysctl_unregister();
158 xfs_filestream_uninit();
159 xfs_mru_cache_uninit();
160 xfs_acl_zone_destroy(xfs_acl_zone);
161
162#ifdef XFS_DIR2_TRACE
163 ktrace_free(xfs_dir2_trace_buf);
164#endif
165#ifdef XFS_ATTR_TRACE
166 ktrace_free(xfs_attr_trace_buf);
167#endif
168#ifdef XFS_BMBT_TRACE
169 ktrace_free(xfs_bmbt_trace_buf);
170#endif
171#ifdef XFS_BMAP_TRACE
172 ktrace_free(xfs_bmap_trace_buf);
173#endif
174#ifdef XFS_ALLOC_TRACE
175 ktrace_free(xfs_alloc_trace_buf);
176#endif
177
178 kmem_zone_destroy(xfs_bmap_free_item_zone);
179 kmem_zone_destroy(xfs_btree_cur_zone);
180 kmem_zone_destroy(xfs_inode_zone);
181 kmem_zone_destroy(xfs_trans_zone);
182 kmem_zone_destroy(xfs_da_state_zone);
183 kmem_zone_destroy(xfs_dabuf_zone);
184 kmem_zone_destroy(xfs_buf_item_zone);
185 kmem_zone_destroy(xfs_efd_zone);
186 kmem_zone_destroy(xfs_efi_zone);
187 kmem_zone_destroy(xfs_ifork_zone);
188 kmem_zone_destroy(xfs_ili_zone);
189 kmem_zone_destroy(xfs_log_ticket_zone);
190}
191
192/*
193 * xfs_start_flags
194 *
195 * This function fills in xfs_mount_t fields based on mount args.
196 * Note: the superblock has _not_ yet been read in.
197 */
198STATIC int
199xfs_start_flags(
200 struct xfs_mount_args *ap,
201 struct xfs_mount *mp)
202{
203 /* Values are in BBs */
204 if ((ap->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) {
205 /*
206 * At this point the superblock has not been read
207 * in, therefore we do not know the block size.
208 * Before the mount call ends we will convert
209 * these to FSBs.
210 */
211 mp->m_dalign = ap->sunit;
212 mp->m_swidth = ap->swidth;
213 }
214
215 if (ap->logbufs != -1 &&
216 ap->logbufs != 0 &&
217 (ap->logbufs < XLOG_MIN_ICLOGS ||
218 ap->logbufs > XLOG_MAX_ICLOGS)) {
219 cmn_err(CE_WARN,
220 "XFS: invalid logbufs value: %d [not %d-%d]",
221 ap->logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
222 return XFS_ERROR(EINVAL);
223 }
224 mp->m_logbufs = ap->logbufs;
225 if (ap->logbufsize != -1 &&
226 ap->logbufsize != 0 &&
227 (ap->logbufsize < XLOG_MIN_RECORD_BSIZE ||
228 ap->logbufsize > XLOG_MAX_RECORD_BSIZE ||
229 !is_power_of_2(ap->logbufsize))) {
230 cmn_err(CE_WARN,
231 "XFS: invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
232 ap->logbufsize);
233 return XFS_ERROR(EINVAL);
234 }
235 mp->m_logbsize = ap->logbufsize;
236 mp->m_fsname_len = strlen(ap->fsname) + 1;
237 mp->m_fsname = kmem_alloc(mp->m_fsname_len, KM_SLEEP);
238 strcpy(mp->m_fsname, ap->fsname);
239 if (ap->rtname[0]) {
240 mp->m_rtname = kmem_alloc(strlen(ap->rtname) + 1, KM_SLEEP);
241 strcpy(mp->m_rtname, ap->rtname);
242 }
243 if (ap->logname[0]) {
244 mp->m_logname = kmem_alloc(strlen(ap->logname) + 1, KM_SLEEP);
245 strcpy(mp->m_logname, ap->logname);
246 }
247
248 if (ap->flags & XFSMNT_WSYNC)
249 mp->m_flags |= XFS_MOUNT_WSYNC;
250#if XFS_BIG_INUMS
251 if (ap->flags & XFSMNT_INO64) {
252 mp->m_flags |= XFS_MOUNT_INO64;
253 mp->m_inoadd = XFS_INO64_OFFSET;
254 }
255#endif
256 if (ap->flags & XFSMNT_RETERR)
257 mp->m_flags |= XFS_MOUNT_RETERR;
258 if (ap->flags & XFSMNT_NOALIGN)
259 mp->m_flags |= XFS_MOUNT_NOALIGN;
260 if (ap->flags & XFSMNT_SWALLOC)
261 mp->m_flags |= XFS_MOUNT_SWALLOC;
262 if (ap->flags & XFSMNT_OSYNCISOSYNC)
263 mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC;
264 if (ap->flags & XFSMNT_32BITINODES)
265 mp->m_flags |= XFS_MOUNT_32BITINODES;
266
267 if (ap->flags & XFSMNT_IOSIZE) {
268 if (ap->iosizelog > XFS_MAX_IO_LOG ||
269 ap->iosizelog < XFS_MIN_IO_LOG) {
270 cmn_err(CE_WARN,
271 "XFS: invalid log iosize: %d [not %d-%d]",
272 ap->iosizelog, XFS_MIN_IO_LOG,
273 XFS_MAX_IO_LOG);
274 return XFS_ERROR(EINVAL);
275 }
276
277 mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE;
278 mp->m_readio_log = mp->m_writeio_log = ap->iosizelog;
279 }
280
281 if (ap->flags & XFSMNT_IKEEP)
282 mp->m_flags |= XFS_MOUNT_IKEEP;
283 if (ap->flags & XFSMNT_DIRSYNC)
284 mp->m_flags |= XFS_MOUNT_DIRSYNC;
285 if (ap->flags & XFSMNT_ATTR2)
286 mp->m_flags |= XFS_MOUNT_ATTR2;
287
288 if (ap->flags2 & XFSMNT2_COMPAT_IOSIZE)
289 mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
290
291 /*
292 * no recovery flag requires a read-only mount
293 */
294 if (ap->flags & XFSMNT_NORECOVERY) {
295 if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
296 cmn_err(CE_WARN,
297 "XFS: tried to mount a FS read-write without recovery!");
298 return XFS_ERROR(EINVAL);
299 }
300 mp->m_flags |= XFS_MOUNT_NORECOVERY;
301 }
302
303 if (ap->flags & XFSMNT_NOUUID)
304 mp->m_flags |= XFS_MOUNT_NOUUID;
305 if (ap->flags & XFSMNT_BARRIER)
306 mp->m_flags |= XFS_MOUNT_BARRIER;
307 else
308 mp->m_flags &= ~XFS_MOUNT_BARRIER;
309
310 if (ap->flags2 & XFSMNT2_FILESTREAMS)
311 mp->m_flags |= XFS_MOUNT_FILESTREAMS;
312
313 if (ap->flags & XFSMNT_DMAPI)
314 mp->m_flags |= XFS_MOUNT_DMAPI;
315 return 0;
316}
317
318/*
319 * This function fills in xfs_mount_t fields based on mount args.
320 * Note: the superblock _has_ now been read in.
321 */
322STATIC int
323xfs_finish_flags(
324 struct xfs_mount_args *ap,
325 struct xfs_mount *mp)
326{
327 int ronly = (mp->m_flags & XFS_MOUNT_RDONLY);
328
329 /* Fail a mount where the logbuf is smaller then the log stripe */
330 if (xfs_sb_version_haslogv2(&mp->m_sb)) {
331 if ((ap->logbufsize <= 0) &&
332 (mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE)) {
333 mp->m_logbsize = mp->m_sb.sb_logsunit;
334 } else if (ap->logbufsize > 0 &&
335 ap->logbufsize < mp->m_sb.sb_logsunit) {
336 cmn_err(CE_WARN,
337 "XFS: logbuf size must be greater than or equal to log stripe size");
338 return XFS_ERROR(EINVAL);
339 }
340 } else {
341 /* Fail a mount if the logbuf is larger than 32K */
342 if (ap->logbufsize > XLOG_BIG_RECORD_BSIZE) {
343 cmn_err(CE_WARN,
344 "XFS: logbuf size for version 1 logs must be 16K or 32K");
345 return XFS_ERROR(EINVAL);
346 }
347 }
348
349 if (xfs_sb_version_hasattr2(&mp->m_sb))
350 mp->m_flags |= XFS_MOUNT_ATTR2;
351
352 /*
353 * prohibit r/w mounts of read-only filesystems
354 */
355 if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) {
356 cmn_err(CE_WARN,
357 "XFS: cannot mount a read-only filesystem as read-write");
358 return XFS_ERROR(EROFS);
359 }
360
361 /*
362 * check for shared mount.
363 */
364 if (ap->flags & XFSMNT_SHARED) {
365 if (!xfs_sb_version_hasshared(&mp->m_sb))
366 return XFS_ERROR(EINVAL);
367
368 /*
369 * For IRIX 6.5, shared mounts must have the shared
370 * version bit set, have the persistent readonly
371 * field set, must be version 0 and can only be mounted
372 * read-only.
373 */
374 if (!ronly || !(mp->m_sb.sb_flags & XFS_SBF_READONLY) ||
375 (mp->m_sb.sb_shared_vn != 0))
376 return XFS_ERROR(EINVAL);
377
378 mp->m_flags |= XFS_MOUNT_SHARED;
379
380 /*
381 * Shared XFS V0 can't deal with DMI. Return EINVAL.
382 */
383 if (mp->m_sb.sb_shared_vn == 0 && (ap->flags & XFSMNT_DMAPI))
384 return XFS_ERROR(EINVAL);
385 }
386
387 if (ap->flags & XFSMNT_UQUOTA) {
388 mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
389 if (ap->flags & XFSMNT_UQUOTAENF)
390 mp->m_qflags |= XFS_UQUOTA_ENFD;
391 }
392
393 if (ap->flags & XFSMNT_GQUOTA) {
394 mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
395 if (ap->flags & XFSMNT_GQUOTAENF)
396 mp->m_qflags |= XFS_OQUOTA_ENFD;
397 } else if (ap->flags & XFSMNT_PQUOTA) {
398 mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
399 if (ap->flags & XFSMNT_PQUOTAENF)
400 mp->m_qflags |= XFS_OQUOTA_ENFD;
401 }
402
403 return 0;
404}
405
406/*
407 * xfs_mount
408 *
409 * The file system configurations are:
410 * (1) device (partition) with data and internal log
411 * (2) logical volume with data and log subvolumes.
412 * (3) logical volume with data, log, and realtime subvolumes.
413 *
414 * We only have to handle opening the log and realtime volumes here if
415 * they are present. The data subvolume has already been opened by
416 * get_sb_bdev() and is stored in vfsp->vfs_super->s_bdev.
417 */
418int
419xfs_mount(
420 struct xfs_mount *mp,
421 struct xfs_mount_args *args,
422 cred_t *credp)
423{
424 struct block_device *ddev, *logdev, *rtdev;
425 int flags = 0, error;
426
427 ddev = mp->m_super->s_bdev;
428 logdev = rtdev = NULL;
429
430 error = xfs_dmops_get(mp, args);
431 if (error)
432 return error;
433 error = xfs_qmops_get(mp, args);
434 if (error)
435 return error;
436
437 if (args->flags & XFSMNT_QUIET)
438 flags |= XFS_MFSI_QUIET;
439
440 /*
441 * Open real time and log devices - order is important.
442 */
443 if (args->logname[0]) {
444 error = xfs_blkdev_get(mp, args->logname, &logdev);
445 if (error)
446 return error;
447 }
448 if (args->rtname[0]) {
449 error = xfs_blkdev_get(mp, args->rtname, &rtdev);
450 if (error) {
451 xfs_blkdev_put(logdev);
452 return error;
453 }
454
455 if (rtdev == ddev || rtdev == logdev) {
456 cmn_err(CE_WARN,
457 "XFS: Cannot mount filesystem with identical rtdev and ddev/logdev.");
458 xfs_blkdev_put(logdev);
459 xfs_blkdev_put(rtdev);
460 return EINVAL;
461 }
462 }
463
464 /*
465 * Setup xfs_mount buffer target pointers
466 */
467 error = ENOMEM;
468 mp->m_ddev_targp = xfs_alloc_buftarg(ddev, 0);
469 if (!mp->m_ddev_targp) {
470 xfs_blkdev_put(logdev);
471 xfs_blkdev_put(rtdev);
472 return error;
473 }
474 if (rtdev) {
475 mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev, 1);
476 if (!mp->m_rtdev_targp) {
477 xfs_blkdev_put(logdev);
478 xfs_blkdev_put(rtdev);
479 goto error0;
480 }
481 }
482 mp->m_logdev_targp = (logdev && logdev != ddev) ?
483 xfs_alloc_buftarg(logdev, 1) : mp->m_ddev_targp;
484 if (!mp->m_logdev_targp) {
485 xfs_blkdev_put(logdev);
486 xfs_blkdev_put(rtdev);
487 goto error0;
488 }
489
490 /*
491 * Setup flags based on mount(2) options and then the superblock
492 */
493 error = xfs_start_flags(args, mp);
494 if (error)
495 goto error1;
496 error = xfs_readsb(mp, flags);
497 if (error)
498 goto error1;
499 error = xfs_finish_flags(args, mp);
500 if (error)
501 goto error2;
502
503 /*
504 * Setup xfs_mount buffer target pointers based on superblock
505 */
506 error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_blocksize,
507 mp->m_sb.sb_sectsize);
508 if (!error && logdev && logdev != ddev) {
509 unsigned int log_sector_size = BBSIZE;
510
511 if (xfs_sb_version_hassector(&mp->m_sb))
512 log_sector_size = mp->m_sb.sb_logsectsize;
513 error = xfs_setsize_buftarg(mp->m_logdev_targp,
514 mp->m_sb.sb_blocksize,
515 log_sector_size);
516 }
517 if (!error && rtdev)
518 error = xfs_setsize_buftarg(mp->m_rtdev_targp,
519 mp->m_sb.sb_blocksize,
520 mp->m_sb.sb_sectsize);
521 if (error)
522 goto error2;
523
524 if (mp->m_flags & XFS_MOUNT_BARRIER)
525 xfs_mountfs_check_barriers(mp);
526
527 if ((error = xfs_filestream_mount(mp)))
528 goto error2;
529
530 error = xfs_mountfs(mp, flags);
531 if (error)
532 goto error2;
533
534 XFS_SEND_MOUNT(mp, DM_RIGHT_NULL, args->mtpt, args->fsname);
535
536 return 0;
537
538error2:
539 if (mp->m_sb_bp)
540 xfs_freesb(mp);
541error1:
542 xfs_binval(mp->m_ddev_targp);
543 if (logdev && logdev != ddev)
544 xfs_binval(mp->m_logdev_targp);
545 if (rtdev)
546 xfs_binval(mp->m_rtdev_targp);
547error0:
548 xfs_unmountfs_close(mp, credp);
549 xfs_qmops_put(mp);
550 xfs_dmops_put(mp);
551 return error;
552}
553
554int
555xfs_unmount(
556 xfs_mount_t *mp,
557 int flags,
558 cred_t *credp)
559{
560 xfs_inode_t *rip;
561 bhv_vnode_t *rvp;
562 int unmount_event_wanted = 0;
563 int unmount_event_flags = 0;
564 int xfs_unmountfs_needed = 0;
565 int error;
566
567 rip = mp->m_rootip;
568 rvp = XFS_ITOV(rip);
569
570#ifdef HAVE_DMAPI
571 if (mp->m_flags & XFS_MOUNT_DMAPI) {
572 error = XFS_SEND_PREUNMOUNT(mp,
573 rip, DM_RIGHT_NULL, rip, DM_RIGHT_NULL,
574 NULL, NULL, 0, 0,
575 (mp->m_dmevmask & (1<<DM_EVENT_PREUNMOUNT))?
576 0:DM_FLAGS_UNWANTED);
577 if (error)
578 return XFS_ERROR(error);
579 unmount_event_wanted = 1;
580 unmount_event_flags = (mp->m_dmevmask & (1<<DM_EVENT_UNMOUNT))?
581 0 : DM_FLAGS_UNWANTED;
582 }
583#endif
584
585 /*
586 * Blow away any referenced inode in the filestreams cache.
587 * This can and will cause log traffic as inodes go inactive
588 * here.
589 */
590 xfs_filestream_unmount(mp);
591
592 XFS_bflush(mp->m_ddev_targp);
593 error = xfs_unmount_flush(mp, 0);
594 if (error)
595 goto out;
596
597 ASSERT(vn_count(rvp) == 1);
598
599 /*
600 * Drop the reference count
601 */
602 IRELE(rip);
603
604 /*
605 * If we're forcing a shutdown, typically because of a media error,
606 * we want to make sure we invalidate dirty pages that belong to
607 * referenced vnodes as well.
608 */
609 if (XFS_FORCED_SHUTDOWN(mp)) {
610 error = xfs_sync(mp, SYNC_WAIT | SYNC_CLOSE);
611 ASSERT(error != EFSCORRUPTED);
612 }
613 xfs_unmountfs_needed = 1;
614
615out:
616 /* Send DMAPI event, if required.
617 * Then do xfs_unmountfs() if needed.
618 * Then return error (or zero).
619 */
620 if (unmount_event_wanted) {
621 /* Note: mp structure must still exist for
622 * XFS_SEND_UNMOUNT() call.
623 */
624 XFS_SEND_UNMOUNT(mp, error == 0 ? rip : NULL,
625 DM_RIGHT_NULL, 0, error, unmount_event_flags);
626 }
627 if (xfs_unmountfs_needed) {
628 /*
629 * Call common unmount function to flush to disk
630 * and free the super block buffer & mount structures.
631 */
632 xfs_unmountfs(mp, credp);
633 xfs_qmops_put(mp);
634 xfs_dmops_put(mp);
635 kmem_free(mp, sizeof(xfs_mount_t));
636 }
637
638 return XFS_ERROR(error);
639}
640
641STATIC void 61STATIC void
642xfs_quiesce_fs( 62xfs_quiesce_fs(
643 xfs_mount_t *mp) 63 xfs_mount_t *mp)
@@ -694,30 +114,6 @@ xfs_attr_quiesce(
694 xfs_unmountfs_writesb(mp); 114 xfs_unmountfs_writesb(mp);
695} 115}
696 116
697int
698xfs_mntupdate(
699 struct xfs_mount *mp,
700 int *flags,
701 struct xfs_mount_args *args)
702{
703 if (!(*flags & MS_RDONLY)) { /* rw/ro -> rw */
704 if (mp->m_flags & XFS_MOUNT_RDONLY)
705 mp->m_flags &= ~XFS_MOUNT_RDONLY;
706 if (args->flags & XFSMNT_BARRIER) {
707 mp->m_flags |= XFS_MOUNT_BARRIER;
708 xfs_mountfs_check_barriers(mp);
709 } else {
710 mp->m_flags &= ~XFS_MOUNT_BARRIER;
711 }
712 } else if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { /* rw -> ro */
713 xfs_filestream_flush(mp);
714 xfs_sync(mp, SYNC_DATA_QUIESCE);
715 xfs_attr_quiesce(mp);
716 mp->m_flags |= XFS_MOUNT_RDONLY;
717 }
718 return 0;
719}
720
721/* 117/*
722 * xfs_unmount_flush implements a set of flush operation on special 118 * xfs_unmount_flush implements a set of flush operation on special
723 * inodes, which are needed as a separate set of operations so that 119 * inodes, which are needed as a separate set of operations so that
@@ -732,7 +128,6 @@ xfs_unmount_flush(
732 xfs_inode_t *rip = mp->m_rootip; 128 xfs_inode_t *rip = mp->m_rootip;
733 xfs_inode_t *rbmip; 129 xfs_inode_t *rbmip;
734 xfs_inode_t *rsumip = NULL; 130 xfs_inode_t *rsumip = NULL;
735 bhv_vnode_t *rvp = XFS_ITOV(rip);
736 int error; 131 int error;
737 132
738 xfs_ilock(rip, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); 133 xfs_ilock(rip, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
@@ -750,7 +145,7 @@ xfs_unmount_flush(
750 if (error == EFSCORRUPTED) 145 if (error == EFSCORRUPTED)
751 goto fscorrupt_out; 146 goto fscorrupt_out;
752 147
753 ASSERT(vn_count(XFS_ITOV(rbmip)) == 1); 148 ASSERT(vn_count(VFS_I(rbmip)) == 1);
754 149
755 rsumip = mp->m_rsumip; 150 rsumip = mp->m_rsumip;
756 xfs_ilock(rsumip, XFS_ILOCK_EXCL); 151 xfs_ilock(rsumip, XFS_ILOCK_EXCL);
@@ -761,7 +156,7 @@ xfs_unmount_flush(
761 if (error == EFSCORRUPTED) 156 if (error == EFSCORRUPTED)
762 goto fscorrupt_out; 157 goto fscorrupt_out;
763 158
764 ASSERT(vn_count(XFS_ITOV(rsumip)) == 1); 159 ASSERT(vn_count(VFS_I(rsumip)) == 1);
765 } 160 }
766 161
767 /* 162 /*
@@ -771,7 +166,7 @@ xfs_unmount_flush(
771 if (error == EFSCORRUPTED) 166 if (error == EFSCORRUPTED)
772 goto fscorrupt_out2; 167 goto fscorrupt_out2;
773 168
774 if (vn_count(rvp) != 1 && !relocation) { 169 if (vn_count(VFS_I(rip)) != 1 && !relocation) {
775 xfs_iunlock(rip, XFS_ILOCK_EXCL); 170 xfs_iunlock(rip, XFS_ILOCK_EXCL);
776 return XFS_ERROR(EBUSY); 171 return XFS_ERROR(EBUSY);
777 } 172 }
@@ -888,7 +283,7 @@ xfs_sync_inodes(
888 int *bypassed) 283 int *bypassed)
889{ 284{
890 xfs_inode_t *ip = NULL; 285 xfs_inode_t *ip = NULL;
891 bhv_vnode_t *vp = NULL; 286 struct inode *vp = NULL;
892 int error; 287 int error;
893 int last_error; 288 int last_error;
894 uint64_t fflag; 289 uint64_t fflag;
@@ -1008,7 +403,7 @@ xfs_sync_inodes(
1008 continue; 403 continue;
1009 } 404 }
1010 405
1011 vp = XFS_ITOV_NULL(ip); 406 vp = VFS_I(ip);
1012 407
1013 /* 408 /*
1014 * If the vnode is gone then this is being torn down, 409 * If the vnode is gone then this is being torn down,
@@ -1048,7 +443,7 @@ xfs_sync_inodes(
1048 443
1049 if (XFS_FORCED_SHUTDOWN(mp) && !(flags & SYNC_CLOSE)) { 444 if (XFS_FORCED_SHUTDOWN(mp) && !(flags & SYNC_CLOSE)) {
1050 XFS_MOUNT_IUNLOCK(mp); 445 XFS_MOUNT_IUNLOCK(mp);
1051 kmem_free(ipointer, sizeof(xfs_iptr_t)); 446 kmem_free(ipointer);
1052 return 0; 447 return 0;
1053 } 448 }
1054 449
@@ -1083,7 +478,7 @@ xfs_sync_inodes(
1083 IPOINTER_INSERT(ip, mp); 478 IPOINTER_INSERT(ip, mp);
1084 xfs_ilock(ip, lock_flags); 479 xfs_ilock(ip, lock_flags);
1085 480
1086 ASSERT(vp == XFS_ITOV(ip)); 481 ASSERT(vp == VFS_I(ip));
1087 ASSERT(ip->i_mount == mp); 482 ASSERT(ip->i_mount == mp);
1088 483
1089 vnode_refed = B_TRUE; 484 vnode_refed = B_TRUE;
@@ -1194,7 +589,7 @@ xfs_sync_inodes(
1194 } 589 }
1195 XFS_MOUNT_IUNLOCK(mp); 590 XFS_MOUNT_IUNLOCK(mp);
1196 ASSERT(ipointer_in == B_FALSE); 591 ASSERT(ipointer_in == B_FALSE);
1197 kmem_free(ipointer, sizeof(xfs_iptr_t)); 592 kmem_free(ipointer);
1198 return XFS_ERROR(error); 593 return XFS_ERROR(error);
1199 } 594 }
1200 595
@@ -1224,7 +619,7 @@ xfs_sync_inodes(
1224 619
1225 ASSERT(ipointer_in == B_FALSE); 620 ASSERT(ipointer_in == B_FALSE);
1226 621
1227 kmem_free(ipointer, sizeof(xfs_iptr_t)); 622 kmem_free(ipointer);
1228 return XFS_ERROR(last_error); 623 return XFS_ERROR(last_error);
1229} 624}
1230 625
diff --git a/fs/xfs/xfs_vfsops.h b/fs/xfs/xfs_vfsops.h
index 1688817c55ed..a74b05087da4 100644
--- a/fs/xfs/xfs_vfsops.h
+++ b/fs/xfs/xfs_vfsops.h
@@ -8,11 +8,6 @@ struct kstatfs;
8struct xfs_mount; 8struct xfs_mount;
9struct xfs_mount_args; 9struct xfs_mount_args;
10 10
11int xfs_mount(struct xfs_mount *mp, struct xfs_mount_args *args,
12 struct cred *credp);
13int xfs_unmount(struct xfs_mount *mp, int flags, struct cred *credp);
14int xfs_mntupdate(struct xfs_mount *mp, int *flags,
15 struct xfs_mount_args *args);
16int xfs_sync(struct xfs_mount *mp, int flags); 11int xfs_sync(struct xfs_mount *mp, int flags);
17void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname, 12void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname,
18 int lnnum); 13 int lnnum);
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index e475e3717eb3..aa238c8fbd7a 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -75,26 +75,23 @@ xfs_open(
75 return 0; 75 return 0;
76} 76}
77 77
78/*
79 * xfs_setattr
80 */
81int 78int
82xfs_setattr( 79xfs_setattr(
83 xfs_inode_t *ip, 80 struct xfs_inode *ip,
84 bhv_vattr_t *vap, 81 struct iattr *iattr,
85 int flags, 82 int flags,
86 cred_t *credp) 83 cred_t *credp)
87{ 84{
88 xfs_mount_t *mp = ip->i_mount; 85 xfs_mount_t *mp = ip->i_mount;
86 struct inode *inode = VFS_I(ip);
87 int mask = iattr->ia_valid;
89 xfs_trans_t *tp; 88 xfs_trans_t *tp;
90 int mask;
91 int code; 89 int code;
92 uint lock_flags; 90 uint lock_flags;
93 uint commit_flags=0; 91 uint commit_flags=0;
94 uid_t uid=0, iuid=0; 92 uid_t uid=0, iuid=0;
95 gid_t gid=0, igid=0; 93 gid_t gid=0, igid=0;
96 int timeflags = 0; 94 int timeflags = 0;
97 xfs_prid_t projid=0, iprojid=0;
98 struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2; 95 struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2;
99 int file_owner; 96 int file_owner;
100 int need_iolock = 1; 97 int need_iolock = 1;
@@ -104,30 +101,9 @@ xfs_setattr(
104 if (mp->m_flags & XFS_MOUNT_RDONLY) 101 if (mp->m_flags & XFS_MOUNT_RDONLY)
105 return XFS_ERROR(EROFS); 102 return XFS_ERROR(EROFS);
106 103
107 /*
108 * Cannot set certain attributes.
109 */
110 mask = vap->va_mask;
111 if (mask & XFS_AT_NOSET) {
112 return XFS_ERROR(EINVAL);
113 }
114
115 if (XFS_FORCED_SHUTDOWN(mp)) 104 if (XFS_FORCED_SHUTDOWN(mp))
116 return XFS_ERROR(EIO); 105 return XFS_ERROR(EIO);
117 106
118 /*
119 * Timestamps do not need to be logged and hence do not
120 * need to be done within a transaction.
121 */
122 if (mask & XFS_AT_UPDTIMES) {
123 ASSERT((mask & ~XFS_AT_UPDTIMES) == 0);
124 timeflags = ((mask & XFS_AT_UPDATIME) ? XFS_ICHGTIME_ACC : 0) |
125 ((mask & XFS_AT_UPDCTIME) ? XFS_ICHGTIME_CHG : 0) |
126 ((mask & XFS_AT_UPDMTIME) ? XFS_ICHGTIME_MOD : 0);
127 xfs_ichgtime(ip, timeflags);
128 return 0;
129 }
130
131 olddquot1 = olddquot2 = NULL; 107 olddquot1 = olddquot2 = NULL;
132 udqp = gdqp = NULL; 108 udqp = gdqp = NULL;
133 109
@@ -139,28 +115,22 @@ xfs_setattr(
139 * If the IDs do change before we take the ilock, we're covered 115 * If the IDs do change before we take the ilock, we're covered
140 * because the i_*dquot fields will get updated anyway. 116 * because the i_*dquot fields will get updated anyway.
141 */ 117 */
142 if (XFS_IS_QUOTA_ON(mp) && 118 if (XFS_IS_QUOTA_ON(mp) && (mask & (ATTR_UID|ATTR_GID))) {
143 (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID))) {
144 uint qflags = 0; 119 uint qflags = 0;
145 120
146 if ((mask & XFS_AT_UID) && XFS_IS_UQUOTA_ON(mp)) { 121 if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) {
147 uid = vap->va_uid; 122 uid = iattr->ia_uid;
148 qflags |= XFS_QMOPT_UQUOTA; 123 qflags |= XFS_QMOPT_UQUOTA;
149 } else { 124 } else {
150 uid = ip->i_d.di_uid; 125 uid = ip->i_d.di_uid;
151 } 126 }
152 if ((mask & XFS_AT_GID) && XFS_IS_GQUOTA_ON(mp)) { 127 if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) {
153 gid = vap->va_gid; 128 gid = iattr->ia_gid;
154 qflags |= XFS_QMOPT_GQUOTA; 129 qflags |= XFS_QMOPT_GQUOTA;
155 } else { 130 } else {
156 gid = ip->i_d.di_gid; 131 gid = ip->i_d.di_gid;
157 } 132 }
158 if ((mask & XFS_AT_PROJID) && XFS_IS_PQUOTA_ON(mp)) { 133
159 projid = vap->va_projid;
160 qflags |= XFS_QMOPT_PQUOTA;
161 } else {
162 projid = ip->i_d.di_projid;
163 }
164 /* 134 /*
165 * We take a reference when we initialize udqp and gdqp, 135 * We take a reference when we initialize udqp and gdqp,
166 * so it is important that we never blindly double trip on 136 * so it is important that we never blindly double trip on
@@ -168,8 +138,8 @@ xfs_setattr(
168 */ 138 */
169 ASSERT(udqp == NULL); 139 ASSERT(udqp == NULL);
170 ASSERT(gdqp == NULL); 140 ASSERT(gdqp == NULL);
171 code = XFS_QM_DQVOPALLOC(mp, ip, uid, gid, projid, qflags, 141 code = XFS_QM_DQVOPALLOC(mp, ip, uid, gid, ip->i_d.di_projid,
172 &udqp, &gdqp); 142 qflags, &udqp, &gdqp);
173 if (code) 143 if (code)
174 return code; 144 return code;
175 } 145 }
@@ -180,10 +150,10 @@ xfs_setattr(
180 */ 150 */
181 tp = NULL; 151 tp = NULL;
182 lock_flags = XFS_ILOCK_EXCL; 152 lock_flags = XFS_ILOCK_EXCL;
183 if (flags & ATTR_NOLOCK) 153 if (flags & XFS_ATTR_NOLOCK)
184 need_iolock = 0; 154 need_iolock = 0;
185 if (!(mask & XFS_AT_SIZE)) { 155 if (!(mask & ATTR_SIZE)) {
186 if ((mask != (XFS_AT_CTIME|XFS_AT_ATIME|XFS_AT_MTIME)) || 156 if ((mask != (ATTR_CTIME|ATTR_ATIME|ATTR_MTIME)) ||
187 (mp->m_flags & XFS_MOUNT_WSYNC)) { 157 (mp->m_flags & XFS_MOUNT_WSYNC)) {
188 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); 158 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
189 commit_flags = 0; 159 commit_flags = 0;
@@ -196,10 +166,10 @@ xfs_setattr(
196 } 166 }
197 } else { 167 } else {
198 if (DM_EVENT_ENABLED(ip, DM_EVENT_TRUNCATE) && 168 if (DM_EVENT_ENABLED(ip, DM_EVENT_TRUNCATE) &&
199 !(flags & ATTR_DMI)) { 169 !(flags & XFS_ATTR_DMI)) {
200 int dmflags = AT_DELAY_FLAG(flags) | DM_SEM_FLAG_WR; 170 int dmflags = AT_DELAY_FLAG(flags) | DM_SEM_FLAG_WR;
201 code = XFS_SEND_DATA(mp, DM_EVENT_TRUNCATE, ip, 171 code = XFS_SEND_DATA(mp, DM_EVENT_TRUNCATE, ip,
202 vap->va_size, 0, dmflags, NULL); 172 iattr->ia_size, 0, dmflags, NULL);
203 if (code) { 173 if (code) {
204 lock_flags = 0; 174 lock_flags = 0;
205 goto error_return; 175 goto error_return;
@@ -212,16 +182,14 @@ xfs_setattr(
212 xfs_ilock(ip, lock_flags); 182 xfs_ilock(ip, lock_flags);
213 183
214 /* boolean: are we the file owner? */ 184 /* boolean: are we the file owner? */
215 file_owner = (current_fsuid(credp) == ip->i_d.di_uid); 185 file_owner = (current_fsuid() == ip->i_d.di_uid);
216 186
217 /* 187 /*
218 * Change various properties of a file. 188 * Change various properties of a file.
219 * Only the owner or users with CAP_FOWNER 189 * Only the owner or users with CAP_FOWNER
220 * capability may do these things. 190 * capability may do these things.
221 */ 191 */
222 if (mask & 192 if (mask & (ATTR_MODE|ATTR_UID|ATTR_GID)) {
223 (XFS_AT_MODE|XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_UID|
224 XFS_AT_GID|XFS_AT_PROJID)) {
225 /* 193 /*
226 * CAP_FOWNER overrides the following restrictions: 194 * CAP_FOWNER overrides the following restrictions:
227 * 195 *
@@ -245,21 +213,21 @@ xfs_setattr(
245 * IDs of the calling process shall match the group owner of 213 * IDs of the calling process shall match the group owner of
246 * the file when setting the set-group-ID bit on that file 214 * the file when setting the set-group-ID bit on that file
247 */ 215 */
248 if (mask & XFS_AT_MODE) { 216 if (mask & ATTR_MODE) {
249 mode_t m = 0; 217 mode_t m = 0;
250 218
251 if ((vap->va_mode & S_ISUID) && !file_owner) 219 if ((iattr->ia_mode & S_ISUID) && !file_owner)
252 m |= S_ISUID; 220 m |= S_ISUID;
253 if ((vap->va_mode & S_ISGID) && 221 if ((iattr->ia_mode & S_ISGID) &&
254 !in_group_p((gid_t)ip->i_d.di_gid)) 222 !in_group_p((gid_t)ip->i_d.di_gid))
255 m |= S_ISGID; 223 m |= S_ISGID;
256#if 0 224#if 0
257 /* Linux allows this, Irix doesn't. */ 225 /* Linux allows this, Irix doesn't. */
258 if ((vap->va_mode & S_ISVTX) && !S_ISDIR(ip->i_d.di_mode)) 226 if ((iattr->ia_mode & S_ISVTX) && !S_ISDIR(ip->i_d.di_mode))
259 m |= S_ISVTX; 227 m |= S_ISVTX;
260#endif 228#endif
261 if (m && !capable(CAP_FSETID)) 229 if (m && !capable(CAP_FSETID))
262 vap->va_mode &= ~m; 230 iattr->ia_mode &= ~m;
263 } 231 }
264 } 232 }
265 233
@@ -270,7 +238,7 @@ xfs_setattr(
270 * and can change the group id only to a group of which he 238 * and can change the group id only to a group of which he
271 * or she is a member. 239 * or she is a member.
272 */ 240 */
273 if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID)) { 241 if (mask & (ATTR_UID|ATTR_GID)) {
274 /* 242 /*
275 * These IDs could have changed since we last looked at them. 243 * These IDs could have changed since we last looked at them.
276 * But, we're assured that if the ownership did change 244 * But, we're assured that if the ownership did change
@@ -278,12 +246,9 @@ xfs_setattr(
278 * would have changed also. 246 * would have changed also.
279 */ 247 */
280 iuid = ip->i_d.di_uid; 248 iuid = ip->i_d.di_uid;
281 iprojid = ip->i_d.di_projid;
282 igid = ip->i_d.di_gid; 249 igid = ip->i_d.di_gid;
283 gid = (mask & XFS_AT_GID) ? vap->va_gid : igid; 250 gid = (mask & ATTR_GID) ? iattr->ia_gid : igid;
284 uid = (mask & XFS_AT_UID) ? vap->va_uid : iuid; 251 uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid;
285 projid = (mask & XFS_AT_PROJID) ? (xfs_prid_t)vap->va_projid :
286 iprojid;
287 252
288 /* 253 /*
289 * CAP_CHOWN overrides the following restrictions: 254 * CAP_CHOWN overrides the following restrictions:
@@ -303,11 +268,10 @@ xfs_setattr(
303 goto error_return; 268 goto error_return;
304 } 269 }
305 /* 270 /*
306 * Do a quota reservation only if uid/projid/gid is actually 271 * Do a quota reservation only if uid/gid is actually
307 * going to change. 272 * going to change.
308 */ 273 */
309 if ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) || 274 if ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
310 (XFS_IS_PQUOTA_ON(mp) && iprojid != projid) ||
311 (XFS_IS_GQUOTA_ON(mp) && igid != gid)) { 275 (XFS_IS_GQUOTA_ON(mp) && igid != gid)) {
312 ASSERT(tp); 276 ASSERT(tp);
313 code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp, 277 code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp,
@@ -321,13 +285,13 @@ xfs_setattr(
321 /* 285 /*
322 * Truncate file. Must have write permission and not be a directory. 286 * Truncate file. Must have write permission and not be a directory.
323 */ 287 */
324 if (mask & XFS_AT_SIZE) { 288 if (mask & ATTR_SIZE) {
325 /* Short circuit the truncate case for zero length files */ 289 /* Short circuit the truncate case for zero length files */
326 if ((vap->va_size == 0) && 290 if (iattr->ia_size == 0 &&
327 (ip->i_size == 0) && (ip->i_d.di_nextents == 0)) { 291 ip->i_size == 0 && ip->i_d.di_nextents == 0) {
328 xfs_iunlock(ip, XFS_ILOCK_EXCL); 292 xfs_iunlock(ip, XFS_ILOCK_EXCL);
329 lock_flags &= ~XFS_ILOCK_EXCL; 293 lock_flags &= ~XFS_ILOCK_EXCL;
330 if (mask & XFS_AT_CTIME) 294 if (mask & ATTR_CTIME)
331 xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 295 xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
332 code = 0; 296 code = 0;
333 goto error_return; 297 goto error_return;
@@ -350,9 +314,9 @@ xfs_setattr(
350 /* 314 /*
351 * Change file access or modified times. 315 * Change file access or modified times.
352 */ 316 */
353 if (mask & (XFS_AT_ATIME|XFS_AT_MTIME)) { 317 if (mask & (ATTR_ATIME|ATTR_MTIME)) {
354 if (!file_owner) { 318 if (!file_owner) {
355 if ((flags & ATTR_UTIME) && 319 if ((mask & (ATTR_MTIME_SET|ATTR_ATIME_SET)) &&
356 !capable(CAP_FOWNER)) { 320 !capable(CAP_FOWNER)) {
357 code = XFS_ERROR(EPERM); 321 code = XFS_ERROR(EPERM);
358 goto error_return; 322 goto error_return;
@@ -361,90 +325,23 @@ xfs_setattr(
361 } 325 }
362 326
363 /* 327 /*
364 * Change extent size or realtime flag.
365 */
366 if (mask & (XFS_AT_EXTSIZE|XFS_AT_XFLAGS)) {
367 /*
368 * Can't change extent size if any extents are allocated.
369 */
370 if (ip->i_d.di_nextents && (mask & XFS_AT_EXTSIZE) &&
371 ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) !=
372 vap->va_extsize) ) {
373 code = XFS_ERROR(EINVAL); /* EFBIG? */
374 goto error_return;
375 }
376
377 /*
378 * Can't change realtime flag if any extents are allocated.
379 */
380 if ((ip->i_d.di_nextents || ip->i_delayed_blks) &&
381 (mask & XFS_AT_XFLAGS) &&
382 (XFS_IS_REALTIME_INODE(ip)) !=
383 (vap->va_xflags & XFS_XFLAG_REALTIME)) {
384 code = XFS_ERROR(EINVAL); /* EFBIG? */
385 goto error_return;
386 }
387 /*
388 * Extent size must be a multiple of the appropriate block
389 * size, if set at all.
390 */
391 if ((mask & XFS_AT_EXTSIZE) && vap->va_extsize != 0) {
392 xfs_extlen_t size;
393
394 if (XFS_IS_REALTIME_INODE(ip) ||
395 ((mask & XFS_AT_XFLAGS) &&
396 (vap->va_xflags & XFS_XFLAG_REALTIME))) {
397 size = mp->m_sb.sb_rextsize <<
398 mp->m_sb.sb_blocklog;
399 } else {
400 size = mp->m_sb.sb_blocksize;
401 }
402 if (vap->va_extsize % size) {
403 code = XFS_ERROR(EINVAL);
404 goto error_return;
405 }
406 }
407 /*
408 * If realtime flag is set then must have realtime data.
409 */
410 if ((mask & XFS_AT_XFLAGS) &&
411 (vap->va_xflags & XFS_XFLAG_REALTIME)) {
412 if ((mp->m_sb.sb_rblocks == 0) ||
413 (mp->m_sb.sb_rextsize == 0) ||
414 (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) {
415 code = XFS_ERROR(EINVAL);
416 goto error_return;
417 }
418 }
419
420 /*
421 * Can't modify an immutable/append-only file unless
422 * we have appropriate permission.
423 */
424 if ((mask & XFS_AT_XFLAGS) &&
425 (ip->i_d.di_flags &
426 (XFS_DIFLAG_IMMUTABLE|XFS_DIFLAG_APPEND) ||
427 (vap->va_xflags &
428 (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) &&
429 !capable(CAP_LINUX_IMMUTABLE)) {
430 code = XFS_ERROR(EPERM);
431 goto error_return;
432 }
433 }
434
435 /*
436 * Now we can make the changes. Before we join the inode 328 * Now we can make the changes. Before we join the inode
437 * to the transaction, if XFS_AT_SIZE is set then take care of 329 * to the transaction, if ATTR_SIZE is set then take care of
438 * the part of the truncation that must be done without the 330 * the part of the truncation that must be done without the
439 * inode lock. This needs to be done before joining the inode 331 * inode lock. This needs to be done before joining the inode
440 * to the transaction, because the inode cannot be unlocked 332 * to the transaction, because the inode cannot be unlocked
441 * once it is a part of the transaction. 333 * once it is a part of the transaction.
442 */ 334 */
443 if (mask & XFS_AT_SIZE) { 335 if (mask & ATTR_SIZE) {
444 code = 0; 336 code = 0;
445 if ((vap->va_size > ip->i_size) && 337 if (iattr->ia_size > ip->i_size) {
446 (flags & ATTR_NOSIZETOK) == 0) { 338 /*
447 code = xfs_igrow_start(ip, vap->va_size, credp); 339 * Do the first part of growing a file: zero any data
340 * in the last block that is beyond the old EOF. We
341 * need to do this before the inode is joined to the
342 * transaction to modify the i_size.
343 */
344 code = xfs_zero_eof(ip, iattr->ia_size, ip->i_size);
448 } 345 }
449 xfs_iunlock(ip, XFS_ILOCK_EXCL); 346 xfs_iunlock(ip, XFS_ILOCK_EXCL);
450 347
@@ -461,10 +358,10 @@ xfs_setattr(
461 * not within the range we care about here. 358 * not within the range we care about here.
462 */ 359 */
463 if (!code && 360 if (!code &&
464 (ip->i_size != ip->i_d.di_size) && 361 ip->i_size != ip->i_d.di_size &&
465 (vap->va_size > ip->i_d.di_size)) { 362 iattr->ia_size > ip->i_d.di_size) {
466 code = xfs_flush_pages(ip, 363 code = xfs_flush_pages(ip,
467 ip->i_d.di_size, vap->va_size, 364 ip->i_d.di_size, iattr->ia_size,
468 XFS_B_ASYNC, FI_NONE); 365 XFS_B_ASYNC, FI_NONE);
469 } 366 }
470 367
@@ -472,7 +369,7 @@ xfs_setattr(
472 vn_iowait(ip); 369 vn_iowait(ip);
473 370
474 if (!code) 371 if (!code)
475 code = xfs_itruncate_data(ip, vap->va_size); 372 code = xfs_itruncate_data(ip, iattr->ia_size);
476 if (code) { 373 if (code) {
477 ASSERT(tp == NULL); 374 ASSERT(tp == NULL);
478 lock_flags &= ~XFS_ILOCK_EXCL; 375 lock_flags &= ~XFS_ILOCK_EXCL;
@@ -501,28 +398,30 @@ xfs_setattr(
501 /* 398 /*
502 * Truncate file. Must have write permission and not be a directory. 399 * Truncate file. Must have write permission and not be a directory.
503 */ 400 */
504 if (mask & XFS_AT_SIZE) { 401 if (mask & ATTR_SIZE) {
505 /* 402 /*
506 * Only change the c/mtime if we are changing the size 403 * Only change the c/mtime if we are changing the size
507 * or we are explicitly asked to change it. This handles 404 * or we are explicitly asked to change it. This handles
508 * the semantic difference between truncate() and ftruncate() 405 * the semantic difference between truncate() and ftruncate()
509 * as implemented in the VFS. 406 * as implemented in the VFS.
510 */ 407 */
511 if (vap->va_size != ip->i_size || (mask & XFS_AT_CTIME)) 408 if (iattr->ia_size != ip->i_size || (mask & ATTR_CTIME))
512 timeflags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG; 409 timeflags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG;
513 410
514 if (vap->va_size > ip->i_size) { 411 if (iattr->ia_size > ip->i_size) {
515 xfs_igrow_finish(tp, ip, vap->va_size, 412 ip->i_d.di_size = iattr->ia_size;
516 !(flags & ATTR_DMI)); 413 ip->i_size = iattr->ia_size;
517 } else if ((vap->va_size <= ip->i_size) || 414 if (!(flags & XFS_ATTR_DMI))
518 ((vap->va_size == 0) && ip->i_d.di_nextents)) { 415 xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
416 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
417 } else if (iattr->ia_size <= ip->i_size ||
418 (iattr->ia_size == 0 && ip->i_d.di_nextents)) {
519 /* 419 /*
520 * signal a sync transaction unless 420 * signal a sync transaction unless
521 * we're truncating an already unlinked 421 * we're truncating an already unlinked
522 * file on a wsync filesystem 422 * file on a wsync filesystem
523 */ 423 */
524 code = xfs_itruncate_finish(&tp, ip, 424 code = xfs_itruncate_finish(&tp, ip, iattr->ia_size,
525 (xfs_fsize_t)vap->va_size,
526 XFS_DATA_FORK, 425 XFS_DATA_FORK,
527 ((ip->i_d.di_nlink != 0 || 426 ((ip->i_d.di_nlink != 0 ||
528 !(mp->m_flags & XFS_MOUNT_WSYNC)) 427 !(mp->m_flags & XFS_MOUNT_WSYNC))
@@ -544,9 +443,12 @@ xfs_setattr(
544 /* 443 /*
545 * Change file access modes. 444 * Change file access modes.
546 */ 445 */
547 if (mask & XFS_AT_MODE) { 446 if (mask & ATTR_MODE) {
548 ip->i_d.di_mode &= S_IFMT; 447 ip->i_d.di_mode &= S_IFMT;
549 ip->i_d.di_mode |= vap->va_mode & ~S_IFMT; 448 ip->i_d.di_mode |= iattr->ia_mode & ~S_IFMT;
449
450 inode->i_mode &= S_IFMT;
451 inode->i_mode |= iattr->ia_mode & ~S_IFMT;
550 452
551 xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE); 453 xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
552 timeflags |= XFS_ICHGTIME_CHG; 454 timeflags |= XFS_ICHGTIME_CHG;
@@ -559,7 +461,7 @@ xfs_setattr(
559 * and can change the group id only to a group of which he 461 * and can change the group id only to a group of which he
560 * or she is a member. 462 * or she is a member.
561 */ 463 */
562 if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID)) { 464 if (mask & (ATTR_UID|ATTR_GID)) {
563 /* 465 /*
564 * CAP_FSETID overrides the following restrictions: 466 * CAP_FSETID overrides the following restrictions:
565 * 467 *
@@ -577,39 +479,24 @@ xfs_setattr(
577 */ 479 */
578 if (iuid != uid) { 480 if (iuid != uid) {
579 if (XFS_IS_UQUOTA_ON(mp)) { 481 if (XFS_IS_UQUOTA_ON(mp)) {
580 ASSERT(mask & XFS_AT_UID); 482 ASSERT(mask & ATTR_UID);
581 ASSERT(udqp); 483 ASSERT(udqp);
582 olddquot1 = XFS_QM_DQVOPCHOWN(mp, tp, ip, 484 olddquot1 = XFS_QM_DQVOPCHOWN(mp, tp, ip,
583 &ip->i_udquot, udqp); 485 &ip->i_udquot, udqp);
584 } 486 }
585 ip->i_d.di_uid = uid; 487 ip->i_d.di_uid = uid;
488 inode->i_uid = uid;
586 } 489 }
587 if (igid != gid) { 490 if (igid != gid) {
588 if (XFS_IS_GQUOTA_ON(mp)) { 491 if (XFS_IS_GQUOTA_ON(mp)) {
589 ASSERT(!XFS_IS_PQUOTA_ON(mp)); 492 ASSERT(!XFS_IS_PQUOTA_ON(mp));
590 ASSERT(mask & XFS_AT_GID); 493 ASSERT(mask & ATTR_GID);
591 ASSERT(gdqp); 494 ASSERT(gdqp);
592 olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip, 495 olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip,
593 &ip->i_gdquot, gdqp); 496 &ip->i_gdquot, gdqp);
594 } 497 }
595 ip->i_d.di_gid = gid; 498 ip->i_d.di_gid = gid;
596 } 499 inode->i_gid = gid;
597 if (iprojid != projid) {
598 if (XFS_IS_PQUOTA_ON(mp)) {
599 ASSERT(!XFS_IS_GQUOTA_ON(mp));
600 ASSERT(mask & XFS_AT_PROJID);
601 ASSERT(gdqp);
602 olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip,
603 &ip->i_gdquot, gdqp);
604 }
605 ip->i_d.di_projid = projid;
606 /*
607 * We may have to rev the inode as well as
608 * the superblock version number since projids didn't
609 * exist before DINODE_VERSION_2 and SB_VERSION_NLINK.
610 */
611 if (ip->i_d.di_version == XFS_DINODE_VERSION_1)
612 xfs_bump_ino_vers2(tp, ip);
613 } 500 }
614 501
615 xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE); 502 xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
@@ -620,82 +507,33 @@ xfs_setattr(
620 /* 507 /*
621 * Change file access or modified times. 508 * Change file access or modified times.
622 */ 509 */
623 if (mask & (XFS_AT_ATIME|XFS_AT_MTIME)) { 510 if (mask & (ATTR_ATIME|ATTR_MTIME)) {
624 if (mask & XFS_AT_ATIME) { 511 if (mask & ATTR_ATIME) {
625 ip->i_d.di_atime.t_sec = vap->va_atime.tv_sec; 512 inode->i_atime = iattr->ia_atime;
626 ip->i_d.di_atime.t_nsec = vap->va_atime.tv_nsec; 513 ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec;
514 ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec;
627 ip->i_update_core = 1; 515 ip->i_update_core = 1;
628 timeflags &= ~XFS_ICHGTIME_ACC;
629 } 516 }
630 if (mask & XFS_AT_MTIME) { 517 if (mask & ATTR_MTIME) {
631 ip->i_d.di_mtime.t_sec = vap->va_mtime.tv_sec; 518 inode->i_mtime = iattr->ia_mtime;
632 ip->i_d.di_mtime.t_nsec = vap->va_mtime.tv_nsec; 519 ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
520 ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
633 timeflags &= ~XFS_ICHGTIME_MOD; 521 timeflags &= ~XFS_ICHGTIME_MOD;
634 timeflags |= XFS_ICHGTIME_CHG; 522 timeflags |= XFS_ICHGTIME_CHG;
635 } 523 }
636 if (tp && (flags & ATTR_UTIME)) 524 if (tp && (mask & (ATTR_MTIME_SET|ATTR_ATIME_SET)))
637 xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE); 525 xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
638 } 526 }
639 527
640 /* 528 /*
641 * Change XFS-added attributes. 529 * Change file inode change time only if ATTR_CTIME set
642 */
643 if (mask & (XFS_AT_EXTSIZE|XFS_AT_XFLAGS)) {
644 if (mask & XFS_AT_EXTSIZE) {
645 /*
646 * Converting bytes to fs blocks.
647 */
648 ip->i_d.di_extsize = vap->va_extsize >>
649 mp->m_sb.sb_blocklog;
650 }
651 if (mask & XFS_AT_XFLAGS) {
652 uint di_flags;
653
654 /* can't set PREALLOC this way, just preserve it */
655 di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC);
656 if (vap->va_xflags & XFS_XFLAG_IMMUTABLE)
657 di_flags |= XFS_DIFLAG_IMMUTABLE;
658 if (vap->va_xflags & XFS_XFLAG_APPEND)
659 di_flags |= XFS_DIFLAG_APPEND;
660 if (vap->va_xflags & XFS_XFLAG_SYNC)
661 di_flags |= XFS_DIFLAG_SYNC;
662 if (vap->va_xflags & XFS_XFLAG_NOATIME)
663 di_flags |= XFS_DIFLAG_NOATIME;
664 if (vap->va_xflags & XFS_XFLAG_NODUMP)
665 di_flags |= XFS_DIFLAG_NODUMP;
666 if (vap->va_xflags & XFS_XFLAG_PROJINHERIT)
667 di_flags |= XFS_DIFLAG_PROJINHERIT;
668 if (vap->va_xflags & XFS_XFLAG_NODEFRAG)
669 di_flags |= XFS_DIFLAG_NODEFRAG;
670 if (vap->va_xflags & XFS_XFLAG_FILESTREAM)
671 di_flags |= XFS_DIFLAG_FILESTREAM;
672 if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
673 if (vap->va_xflags & XFS_XFLAG_RTINHERIT)
674 di_flags |= XFS_DIFLAG_RTINHERIT;
675 if (vap->va_xflags & XFS_XFLAG_NOSYMLINKS)
676 di_flags |= XFS_DIFLAG_NOSYMLINKS;
677 if (vap->va_xflags & XFS_XFLAG_EXTSZINHERIT)
678 di_flags |= XFS_DIFLAG_EXTSZINHERIT;
679 } else if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) {
680 if (vap->va_xflags & XFS_XFLAG_REALTIME)
681 di_flags |= XFS_DIFLAG_REALTIME;
682 if (vap->va_xflags & XFS_XFLAG_EXTSIZE)
683 di_flags |= XFS_DIFLAG_EXTSIZE;
684 }
685 ip->i_d.di_flags = di_flags;
686 }
687 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
688 timeflags |= XFS_ICHGTIME_CHG;
689 }
690
691 /*
692 * Change file inode change time only if XFS_AT_CTIME set
693 * AND we have been called by a DMI function. 530 * AND we have been called by a DMI function.
694 */ 531 */
695 532
696 if ( (flags & ATTR_DMI) && (mask & XFS_AT_CTIME) ) { 533 if ((flags & XFS_ATTR_DMI) && (mask & ATTR_CTIME)) {
697 ip->i_d.di_ctime.t_sec = vap->va_ctime.tv_sec; 534 inode->i_ctime = iattr->ia_ctime;
698 ip->i_d.di_ctime.t_nsec = vap->va_ctime.tv_nsec; 535 ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
536 ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
699 ip->i_update_core = 1; 537 ip->i_update_core = 1;
700 timeflags &= ~XFS_ICHGTIME_CHG; 538 timeflags &= ~XFS_ICHGTIME_CHG;
701 } 539 }
@@ -704,7 +542,7 @@ xfs_setattr(
704 * Send out timestamp changes that need to be set to the 542 * Send out timestamp changes that need to be set to the
705 * current time. Not done when called by a DMI function. 543 * current time. Not done when called by a DMI function.
706 */ 544 */
707 if (timeflags && !(flags & ATTR_DMI)) 545 if (timeflags && !(flags & XFS_ATTR_DMI))
708 xfs_ichgtime(ip, timeflags); 546 xfs_ichgtime(ip, timeflags);
709 547
710 XFS_STATS_INC(xs_ig_attrchg); 548 XFS_STATS_INC(xs_ig_attrchg);
@@ -742,7 +580,7 @@ xfs_setattr(
742 } 580 }
743 581
744 if (DM_EVENT_ENABLED(ip, DM_EVENT_ATTRIBUTE) && 582 if (DM_EVENT_ENABLED(ip, DM_EVENT_ATTRIBUTE) &&
745 !(flags & ATTR_DMI)) { 583 !(flags & XFS_ATTR_DMI)) {
746 (void) XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, ip, DM_RIGHT_NULL, 584 (void) XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, ip, DM_RIGHT_NULL,
747 NULL, DM_RIGHT_NULL, NULL, NULL, 585 NULL, DM_RIGHT_NULL, NULL, NULL,
748 0, 0, AT_DELAY_FLAG(flags)); 586 0, 0, AT_DELAY_FLAG(flags));
@@ -875,7 +713,7 @@ xfs_fsync(
875 return XFS_ERROR(EIO); 713 return XFS_ERROR(EIO);
876 714
877 /* capture size updates in I/O completion before writing the inode. */ 715 /* capture size updates in I/O completion before writing the inode. */
878 error = filemap_fdatawait(vn_to_inode(XFS_ITOV(ip))->i_mapping); 716 error = filemap_fdatawait(VFS_I(ip)->i_mapping);
879 if (error) 717 if (error)
880 return XFS_ERROR(error); 718 return XFS_ERROR(error);
881 719
@@ -1321,7 +1159,6 @@ int
1321xfs_release( 1159xfs_release(
1322 xfs_inode_t *ip) 1160 xfs_inode_t *ip)
1323{ 1161{
1324 bhv_vnode_t *vp = XFS_ITOV(ip);
1325 xfs_mount_t *mp = ip->i_mount; 1162 xfs_mount_t *mp = ip->i_mount;
1326 int error; 1163 int error;
1327 1164
@@ -1356,13 +1193,13 @@ xfs_release(
1356 * be exposed to that problem. 1193 * be exposed to that problem.
1357 */ 1194 */
1358 truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED); 1195 truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED);
1359 if (truncated && VN_DIRTY(vp) && ip->i_delayed_blks > 0) 1196 if (truncated && VN_DIRTY(VFS_I(ip)) && ip->i_delayed_blks > 0)
1360 xfs_flush_pages(ip, 0, -1, XFS_B_ASYNC, FI_NONE); 1197 xfs_flush_pages(ip, 0, -1, XFS_B_ASYNC, FI_NONE);
1361 } 1198 }
1362 1199
1363 if (ip->i_d.di_nlink != 0) { 1200 if (ip->i_d.di_nlink != 0) {
1364 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && 1201 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
1365 ((ip->i_size > 0) || (VN_CACHED(vp) > 0 || 1202 ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 ||
1366 ip->i_delayed_blks > 0)) && 1203 ip->i_delayed_blks > 0)) &&
1367 (ip->i_df.if_flags & XFS_IFEXTENTS)) && 1204 (ip->i_df.if_flags & XFS_IFEXTENTS)) &&
1368 (!(ip->i_d.di_flags & 1205 (!(ip->i_d.di_flags &
@@ -1388,7 +1225,6 @@ int
1388xfs_inactive( 1225xfs_inactive(
1389 xfs_inode_t *ip) 1226 xfs_inode_t *ip)
1390{ 1227{
1391 bhv_vnode_t *vp = XFS_ITOV(ip);
1392 xfs_bmap_free_t free_list; 1228 xfs_bmap_free_t free_list;
1393 xfs_fsblock_t first_block; 1229 xfs_fsblock_t first_block;
1394 int committed; 1230 int committed;
@@ -1403,7 +1239,7 @@ xfs_inactive(
1403 * If the inode is already free, then there can be nothing 1239 * If the inode is already free, then there can be nothing
1404 * to clean up here. 1240 * to clean up here.
1405 */ 1241 */
1406 if (ip->i_d.di_mode == 0 || VN_BAD(vp)) { 1242 if (ip->i_d.di_mode == 0 || VN_BAD(VFS_I(ip))) {
1407 ASSERT(ip->i_df.if_real_bytes == 0); 1243 ASSERT(ip->i_df.if_real_bytes == 0);
1408 ASSERT(ip->i_df.if_broot_bytes == 0); 1244 ASSERT(ip->i_df.if_broot_bytes == 0);
1409 return VN_INACTIVE_CACHE; 1245 return VN_INACTIVE_CACHE;
@@ -1433,7 +1269,7 @@ xfs_inactive(
1433 1269
1434 if (ip->i_d.di_nlink != 0) { 1270 if (ip->i_d.di_nlink != 0) {
1435 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && 1271 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
1436 ((ip->i_size > 0) || (VN_CACHED(vp) > 0 || 1272 ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 ||
1437 ip->i_delayed_blks > 0)) && 1273 ip->i_delayed_blks > 0)) &&
1438 (ip->i_df.if_flags & XFS_IFEXTENTS) && 1274 (ip->i_df.if_flags & XFS_IFEXTENTS) &&
1439 (!(ip->i_d.di_flags & 1275 (!(ip->i_d.di_flags &
@@ -1601,12 +1437,18 @@ xfs_inactive(
1601 return VN_INACTIVE_CACHE; 1437 return VN_INACTIVE_CACHE;
1602} 1438}
1603 1439
1604 1440/*
1441 * Lookups up an inode from "name". If ci_name is not NULL, then a CI match
1442 * is allowed, otherwise it has to be an exact match. If a CI match is found,
1443 * ci_name->name will point to a the actual name (caller must free) or
1444 * will be set to NULL if an exact match is found.
1445 */
1605int 1446int
1606xfs_lookup( 1447xfs_lookup(
1607 xfs_inode_t *dp, 1448 xfs_inode_t *dp,
1608 struct xfs_name *name, 1449 struct xfs_name *name,
1609 xfs_inode_t **ipp) 1450 xfs_inode_t **ipp,
1451 struct xfs_name *ci_name)
1610{ 1452{
1611 xfs_ino_t inum; 1453 xfs_ino_t inum;
1612 int error; 1454 int error;
@@ -1618,7 +1460,7 @@ xfs_lookup(
1618 return XFS_ERROR(EIO); 1460 return XFS_ERROR(EIO);
1619 1461
1620 lock_mode = xfs_ilock_map_shared(dp); 1462 lock_mode = xfs_ilock_map_shared(dp);
1621 error = xfs_dir_lookup(NULL, dp, name, &inum); 1463 error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name);
1622 xfs_iunlock_map_shared(dp, lock_mode); 1464 xfs_iunlock_map_shared(dp, lock_mode);
1623 1465
1624 if (error) 1466 if (error)
@@ -1626,12 +1468,15 @@ xfs_lookup(
1626 1468
1627 error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp, 0); 1469 error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp, 0);
1628 if (error) 1470 if (error)
1629 goto out; 1471 goto out_free_name;
1630 1472
1631 xfs_itrace_ref(*ipp); 1473 xfs_itrace_ref(*ipp);
1632 return 0; 1474 return 0;
1633 1475
1634 out: 1476out_free_name:
1477 if (ci_name)
1478 kmem_free(ci_name->name);
1479out:
1635 *ipp = NULL; 1480 *ipp = NULL;
1636 return error; 1481 return error;
1637} 1482}
@@ -1688,7 +1533,7 @@ xfs_create(
1688 * Make sure that we have allocated dquot(s) on disk. 1533 * Make sure that we have allocated dquot(s) on disk.
1689 */ 1534 */
1690 error = XFS_QM_DQVOPALLOC(mp, dp, 1535 error = XFS_QM_DQVOPALLOC(mp, dp,
1691 current_fsuid(credp), current_fsgid(credp), prid, 1536 current_fsuid(), current_fsgid(), prid,
1692 XFS_QMOPT_QUOTALL|XFS_QMOPT_INHERIT, &udqp, &gdqp); 1537 XFS_QMOPT_QUOTALL|XFS_QMOPT_INHERIT, &udqp, &gdqp);
1693 if (error) 1538 if (error)
1694 goto std_return; 1539 goto std_return;
@@ -1860,111 +1705,6 @@ std_return:
1860} 1705}
1861 1706
1862#ifdef DEBUG 1707#ifdef DEBUG
1863/*
1864 * Some counters to see if (and how often) we are hitting some deadlock
1865 * prevention code paths.
1866 */
1867
1868int xfs_rm_locks;
1869int xfs_rm_lock_delays;
1870int xfs_rm_attempts;
1871#endif
1872
1873/*
1874 * The following routine will lock the inodes associated with the
1875 * directory and the named entry in the directory. The locks are
1876 * acquired in increasing inode number.
1877 *
1878 * If the entry is "..", then only the directory is locked. The
1879 * vnode ref count will still include that from the .. entry in
1880 * this case.
1881 *
1882 * There is a deadlock we need to worry about. If the locked directory is
1883 * in the AIL, it might be blocking up the log. The next inode we lock
1884 * could be already locked by another thread waiting for log space (e.g
1885 * a permanent log reservation with a long running transaction (see
1886 * xfs_itruncate_finish)). To solve this, we must check if the directory
1887 * is in the ail and use lock_nowait. If we can't lock, we need to
1888 * drop the inode lock on the directory and try again. xfs_iunlock will
1889 * potentially push the tail if we were holding up the log.
1890 */
1891STATIC int
1892xfs_lock_dir_and_entry(
1893 xfs_inode_t *dp,
1894 xfs_inode_t *ip) /* inode of entry 'name' */
1895{
1896 int attempts;
1897 xfs_ino_t e_inum;
1898 xfs_inode_t *ips[2];
1899 xfs_log_item_t *lp;
1900
1901#ifdef DEBUG
1902 xfs_rm_locks++;
1903#endif
1904 attempts = 0;
1905
1906again:
1907 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
1908
1909 e_inum = ip->i_ino;
1910
1911 xfs_itrace_ref(ip);
1912
1913 /*
1914 * We want to lock in increasing inum. Since we've already
1915 * acquired the lock on the directory, we may need to release
1916 * if if the inum of the entry turns out to be less.
1917 */
1918 if (e_inum > dp->i_ino) {
1919 /*
1920 * We are already in the right order, so just
1921 * lock on the inode of the entry.
1922 * We need to use nowait if dp is in the AIL.
1923 */
1924
1925 lp = (xfs_log_item_t *)dp->i_itemp;
1926 if (lp && (lp->li_flags & XFS_LI_IN_AIL)) {
1927 if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
1928 attempts++;
1929#ifdef DEBUG
1930 xfs_rm_attempts++;
1931#endif
1932
1933 /*
1934 * Unlock dp and try again.
1935 * xfs_iunlock will try to push the tail
1936 * if the inode is in the AIL.
1937 */
1938
1939 xfs_iunlock(dp, XFS_ILOCK_EXCL);
1940
1941 if ((attempts % 5) == 0) {
1942 delay(1); /* Don't just spin the CPU */
1943#ifdef DEBUG
1944 xfs_rm_lock_delays++;
1945#endif
1946 }
1947 goto again;
1948 }
1949 } else {
1950 xfs_ilock(ip, XFS_ILOCK_EXCL);
1951 }
1952 } else if (e_inum < dp->i_ino) {
1953 xfs_iunlock(dp, XFS_ILOCK_EXCL);
1954
1955 ips[0] = ip;
1956 ips[1] = dp;
1957 xfs_lock_inodes(ips, 2, XFS_ILOCK_EXCL);
1958 }
1959 /* else e_inum == dp->i_ino */
1960 /* This can happen if we're asked to lock /x/..
1961 * the entry is "..", which is also the parent directory.
1962 */
1963
1964 return 0;
1965}
1966
1967#ifdef DEBUG
1968int xfs_locked_n; 1708int xfs_locked_n;
1969int xfs_small_retries; 1709int xfs_small_retries;
1970int xfs_middle_retries; 1710int xfs_middle_retries;
@@ -2098,12 +1838,44 @@ again:
2098#endif 1838#endif
2099} 1839}
2100 1840
2101#ifdef DEBUG 1841void
2102#define REMOVE_DEBUG_TRACE(x) {remove_which_error_return = (x);} 1842xfs_lock_two_inodes(
2103int remove_which_error_return = 0; 1843 xfs_inode_t *ip0,
2104#else /* ! DEBUG */ 1844 xfs_inode_t *ip1,
2105#define REMOVE_DEBUG_TRACE(x) 1845 uint lock_mode)
2106#endif /* ! DEBUG */ 1846{
1847 xfs_inode_t *temp;
1848 int attempts = 0;
1849 xfs_log_item_t *lp;
1850
1851 ASSERT(ip0->i_ino != ip1->i_ino);
1852
1853 if (ip0->i_ino > ip1->i_ino) {
1854 temp = ip0;
1855 ip0 = ip1;
1856 ip1 = temp;
1857 }
1858
1859 again:
1860 xfs_ilock(ip0, xfs_lock_inumorder(lock_mode, 0));
1861
1862 /*
1863 * If the first lock we have locked is in the AIL, we must TRY to get
1864 * the second lock. If we can't get it, we must release the first one
1865 * and try again.
1866 */
1867 lp = (xfs_log_item_t *)ip0->i_itemp;
1868 if (lp && (lp->li_flags & XFS_LI_IN_AIL)) {
1869 if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(lock_mode, 1))) {
1870 xfs_iunlock(ip0, lock_mode);
1871 if ((++attempts % 5) == 0)
1872 delay(1); /* Don't just spin the CPU */
1873 goto again;
1874 }
1875 } else {
1876 xfs_ilock(ip1, xfs_lock_inumorder(lock_mode, 1));
1877 }
1878}
2107 1879
2108int 1880int
2109xfs_remove( 1881xfs_remove(
@@ -2113,6 +1885,7 @@ xfs_remove(
2113{ 1885{
2114 xfs_mount_t *mp = dp->i_mount; 1886 xfs_mount_t *mp = dp->i_mount;
2115 xfs_trans_t *tp = NULL; 1887 xfs_trans_t *tp = NULL;
1888 int is_dir = S_ISDIR(ip->i_d.di_mode);
2116 int error = 0; 1889 int error = 0;
2117 xfs_bmap_free_t free_list; 1890 xfs_bmap_free_t free_list;
2118 xfs_fsblock_t first_block; 1891 xfs_fsblock_t first_block;
@@ -2120,8 +1893,10 @@ xfs_remove(
2120 int committed; 1893 int committed;
2121 int link_zero; 1894 int link_zero;
2122 uint resblks; 1895 uint resblks;
1896 uint log_count;
2123 1897
2124 xfs_itrace_entry(dp); 1898 xfs_itrace_entry(dp);
1899 xfs_itrace_entry(ip);
2125 1900
2126 if (XFS_FORCED_SHUTDOWN(mp)) 1901 if (XFS_FORCED_SHUTDOWN(mp))
2127 return XFS_ERROR(EIO); 1902 return XFS_ERROR(EIO);
@@ -2134,19 +1909,23 @@ xfs_remove(
2134 return error; 1909 return error;
2135 } 1910 }
2136 1911
2137 xfs_itrace_entry(ip);
2138 xfs_itrace_ref(ip);
2139
2140 error = XFS_QM_DQATTACH(mp, dp, 0); 1912 error = XFS_QM_DQATTACH(mp, dp, 0);
2141 if (!error) 1913 if (error)
2142 error = XFS_QM_DQATTACH(mp, ip, 0);
2143 if (error) {
2144 REMOVE_DEBUG_TRACE(__LINE__);
2145 goto std_return; 1914 goto std_return;
2146 }
2147 1915
2148 tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE); 1916 error = XFS_QM_DQATTACH(mp, ip, 0);
1917 if (error)
1918 goto std_return;
1919
1920 if (is_dir) {
1921 tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR);
1922 log_count = XFS_DEFAULT_LOG_COUNT;
1923 } else {
1924 tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE);
1925 log_count = XFS_REMOVE_LOG_COUNT;
1926 }
2149 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 1927 cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
1928
2150 /* 1929 /*
2151 * We try to get the real space reservation first, 1930 * We try to get the real space reservation first,
2152 * allowing for directory btree deletion(s) implying 1931 * allowing for directory btree deletion(s) implying
@@ -2158,25 +1937,19 @@ xfs_remove(
2158 */ 1937 */
2159 resblks = XFS_REMOVE_SPACE_RES(mp); 1938 resblks = XFS_REMOVE_SPACE_RES(mp);
2160 error = xfs_trans_reserve(tp, resblks, XFS_REMOVE_LOG_RES(mp), 0, 1939 error = xfs_trans_reserve(tp, resblks, XFS_REMOVE_LOG_RES(mp), 0,
2161 XFS_TRANS_PERM_LOG_RES, XFS_REMOVE_LOG_COUNT); 1940 XFS_TRANS_PERM_LOG_RES, log_count);
2162 if (error == ENOSPC) { 1941 if (error == ENOSPC) {
2163 resblks = 0; 1942 resblks = 0;
2164 error = xfs_trans_reserve(tp, 0, XFS_REMOVE_LOG_RES(mp), 0, 1943 error = xfs_trans_reserve(tp, 0, XFS_REMOVE_LOG_RES(mp), 0,
2165 XFS_TRANS_PERM_LOG_RES, XFS_REMOVE_LOG_COUNT); 1944 XFS_TRANS_PERM_LOG_RES, log_count);
2166 } 1945 }
2167 if (error) { 1946 if (error) {
2168 ASSERT(error != ENOSPC); 1947 ASSERT(error != ENOSPC);
2169 REMOVE_DEBUG_TRACE(__LINE__); 1948 cancel_flags = 0;
2170 xfs_trans_cancel(tp, 0); 1949 goto out_trans_cancel;
2171 return error;
2172 } 1950 }
2173 1951
2174 error = xfs_lock_dir_and_entry(dp, ip); 1952 xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL);
2175 if (error) {
2176 REMOVE_DEBUG_TRACE(__LINE__);
2177 xfs_trans_cancel(tp, cancel_flags);
2178 goto std_return;
2179 }
2180 1953
2181 /* 1954 /*
2182 * At this point, we've gotten both the directory and the entry 1955 * At this point, we've gotten both the directory and the entry
@@ -2189,46 +1962,83 @@ xfs_remove(
2189 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 1962 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
2190 1963
2191 /* 1964 /*
2192 * Entry must exist since we did a lookup in xfs_lock_dir_and_entry. 1965 * If we're removing a directory perform some additional validation.
2193 */ 1966 */
1967 if (is_dir) {
1968 ASSERT(ip->i_d.di_nlink >= 2);
1969 if (ip->i_d.di_nlink != 2) {
1970 error = XFS_ERROR(ENOTEMPTY);
1971 goto out_trans_cancel;
1972 }
1973 if (!xfs_dir_isempty(ip)) {
1974 error = XFS_ERROR(ENOTEMPTY);
1975 goto out_trans_cancel;
1976 }
1977 }
1978
2194 XFS_BMAP_INIT(&free_list, &first_block); 1979 XFS_BMAP_INIT(&free_list, &first_block);
2195 error = xfs_dir_removename(tp, dp, name, ip->i_ino, 1980 error = xfs_dir_removename(tp, dp, name, ip->i_ino,
2196 &first_block, &free_list, resblks); 1981 &first_block, &free_list, resblks);
2197 if (error) { 1982 if (error) {
2198 ASSERT(error != ENOENT); 1983 ASSERT(error != ENOENT);
2199 REMOVE_DEBUG_TRACE(__LINE__); 1984 goto out_bmap_cancel;
2200 goto error1;
2201 } 1985 }
2202 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 1986 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
2203 1987
1988 /*
1989 * Bump the in memory generation count on the parent
1990 * directory so that other can know that it has changed.
1991 */
2204 dp->i_gen++; 1992 dp->i_gen++;
2205 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 1993 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
2206 1994
2207 error = xfs_droplink(tp, ip); 1995 if (is_dir) {
2208 if (error) { 1996 /*
2209 REMOVE_DEBUG_TRACE(__LINE__); 1997 * Drop the link from ip's "..".
2210 goto error1; 1998 */
1999 error = xfs_droplink(tp, dp);
2000 if (error)
2001 goto out_bmap_cancel;
2002
2003 /*
2004 * Drop the link from dp to ip.
2005 */
2006 error = xfs_droplink(tp, ip);
2007 if (error)
2008 goto out_bmap_cancel;
2009 } else {
2010 /*
2011 * When removing a non-directory we need to log the parent
2012 * inode here for the i_gen update. For a directory this is
2013 * done implicitly by the xfs_droplink call for the ".." entry.
2014 */
2015 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
2211 } 2016 }
2212 2017
2213 /* Determine if this is the last link while 2018 /*
2019 * Drop the "." link from ip to self.
2020 */
2021 error = xfs_droplink(tp, ip);
2022 if (error)
2023 goto out_bmap_cancel;
2024
2025 /*
2026 * Determine if this is the last link while
2214 * we are in the transaction. 2027 * we are in the transaction.
2215 */ 2028 */
2216 link_zero = (ip)->i_d.di_nlink==0; 2029 link_zero = (ip->i_d.di_nlink == 0);
2217 2030
2218 /* 2031 /*
2219 * If this is a synchronous mount, make sure that the 2032 * If this is a synchronous mount, make sure that the
2220 * remove transaction goes to disk before returning to 2033 * remove transaction goes to disk before returning to
2221 * the user. 2034 * the user.
2222 */ 2035 */
2223 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 2036 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
2224 xfs_trans_set_sync(tp); 2037 xfs_trans_set_sync(tp);
2225 }
2226 2038
2227 error = xfs_bmap_finish(&tp, &free_list, &committed); 2039 error = xfs_bmap_finish(&tp, &free_list, &committed);
2228 if (error) { 2040 if (error)
2229 REMOVE_DEBUG_TRACE(__LINE__); 2041 goto out_bmap_cancel;
2230 goto error_rele;
2231 }
2232 2042
2233 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 2043 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
2234 if (error) 2044 if (error)
@@ -2240,38 +2050,26 @@ xfs_remove(
2240 * will get killed on last close in xfs_close() so we don't 2050 * will get killed on last close in xfs_close() so we don't
2241 * have to worry about that. 2051 * have to worry about that.
2242 */ 2052 */
2243 if (link_zero && xfs_inode_is_filestream(ip)) 2053 if (!is_dir && link_zero && xfs_inode_is_filestream(ip))
2244 xfs_filestream_deassociate(ip); 2054 xfs_filestream_deassociate(ip);
2245 2055
2246 xfs_itrace_exit(ip); 2056 xfs_itrace_exit(ip);
2057 xfs_itrace_exit(dp);
2247 2058
2248/* Fall through to std_return with error = 0 */
2249 std_return: 2059 std_return:
2250 if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) { 2060 if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) {
2251 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE, 2061 XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE, dp, DM_RIGHT_NULL,
2252 dp, DM_RIGHT_NULL, 2062 NULL, DM_RIGHT_NULL, name->name, NULL,
2253 NULL, DM_RIGHT_NULL, 2063 ip->i_d.di_mode, error, 0);
2254 name->name, NULL, ip->i_d.di_mode, error, 0);
2255 } 2064 }
2256 return error;
2257 2065
2258 error1: 2066 return error;
2259 xfs_bmap_cancel(&free_list);
2260 cancel_flags |= XFS_TRANS_ABORT;
2261 xfs_trans_cancel(tp, cancel_flags);
2262 goto std_return;
2263 2067
2264 error_rele: 2068 out_bmap_cancel:
2265 /*
2266 * In this case make sure to not release the inode until after
2267 * the current transaction is aborted. Releasing it beforehand
2268 * can cause us to go to xfs_inactive and start a recursive
2269 * transaction which can easily deadlock with the current one.
2270 */
2271 xfs_bmap_cancel(&free_list); 2069 xfs_bmap_cancel(&free_list);
2272 cancel_flags |= XFS_TRANS_ABORT; 2070 cancel_flags |= XFS_TRANS_ABORT;
2071 out_trans_cancel:
2273 xfs_trans_cancel(tp, cancel_flags); 2072 xfs_trans_cancel(tp, cancel_flags);
2274
2275 goto std_return; 2073 goto std_return;
2276} 2074}
2277 2075
@@ -2283,7 +2081,6 @@ xfs_link(
2283{ 2081{
2284 xfs_mount_t *mp = tdp->i_mount; 2082 xfs_mount_t *mp = tdp->i_mount;
2285 xfs_trans_t *tp; 2083 xfs_trans_t *tp;
2286 xfs_inode_t *ips[2];
2287 int error; 2084 int error;
2288 xfs_bmap_free_t free_list; 2085 xfs_bmap_free_t free_list;
2289 xfs_fsblock_t first_block; 2086 xfs_fsblock_t first_block;
@@ -2331,15 +2128,7 @@ xfs_link(
2331 goto error_return; 2128 goto error_return;
2332 } 2129 }
2333 2130
2334 if (sip->i_ino < tdp->i_ino) { 2131 xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL);
2335 ips[0] = sip;
2336 ips[1] = tdp;
2337 } else {
2338 ips[0] = tdp;
2339 ips[1] = sip;
2340 }
2341
2342 xfs_lock_inodes(ips, 2, XFS_ILOCK_EXCL);
2343 2132
2344 /* 2133 /*
2345 * Increment vnode ref counts since xfs_trans_commit & 2134 * Increment vnode ref counts since xfs_trans_commit &
@@ -2480,7 +2269,7 @@ xfs_mkdir(
2480 * Make sure that we have allocated dquot(s) on disk. 2269 * Make sure that we have allocated dquot(s) on disk.
2481 */ 2270 */
2482 error = XFS_QM_DQVOPALLOC(mp, dp, 2271 error = XFS_QM_DQVOPALLOC(mp, dp,
2483 current_fsuid(credp), current_fsgid(credp), prid, 2272 current_fsuid(), current_fsgid(), prid,
2484 XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); 2273 XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp);
2485 if (error) 2274 if (error)
2486 goto std_return; 2275 goto std_return;
@@ -2638,186 +2427,6 @@ std_return:
2638} 2427}
2639 2428
2640int 2429int
2641xfs_rmdir(
2642 xfs_inode_t *dp,
2643 struct xfs_name *name,
2644 xfs_inode_t *cdp)
2645{
2646 xfs_mount_t *mp = dp->i_mount;
2647 xfs_trans_t *tp;
2648 int error;
2649 xfs_bmap_free_t free_list;
2650 xfs_fsblock_t first_block;
2651 int cancel_flags;
2652 int committed;
2653 int last_cdp_link;
2654 uint resblks;
2655
2656 xfs_itrace_entry(dp);
2657
2658 if (XFS_FORCED_SHUTDOWN(mp))
2659 return XFS_ERROR(EIO);
2660
2661 if (DM_EVENT_ENABLED(dp, DM_EVENT_REMOVE)) {
2662 error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE,
2663 dp, DM_RIGHT_NULL,
2664 NULL, DM_RIGHT_NULL, name->name,
2665 NULL, cdp->i_d.di_mode, 0, 0);
2666 if (error)
2667 return XFS_ERROR(error);
2668 }
2669
2670 /*
2671 * Get the dquots for the inodes.
2672 */
2673 error = XFS_QM_DQATTACH(mp, dp, 0);
2674 if (!error)
2675 error = XFS_QM_DQATTACH(mp, cdp, 0);
2676 if (error) {
2677 REMOVE_DEBUG_TRACE(__LINE__);
2678 goto std_return;
2679 }
2680
2681 tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR);
2682 cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
2683 /*
2684 * We try to get the real space reservation first,
2685 * allowing for directory btree deletion(s) implying
2686 * possible bmap insert(s). If we can't get the space
2687 * reservation then we use 0 instead, and avoid the bmap
2688 * btree insert(s) in the directory code by, if the bmap
2689 * insert tries to happen, instead trimming the LAST
2690 * block from the directory.
2691 */
2692 resblks = XFS_REMOVE_SPACE_RES(mp);
2693 error = xfs_trans_reserve(tp, resblks, XFS_REMOVE_LOG_RES(mp), 0,
2694 XFS_TRANS_PERM_LOG_RES, XFS_DEFAULT_LOG_COUNT);
2695 if (error == ENOSPC) {
2696 resblks = 0;
2697 error = xfs_trans_reserve(tp, 0, XFS_REMOVE_LOG_RES(mp), 0,
2698 XFS_TRANS_PERM_LOG_RES, XFS_DEFAULT_LOG_COUNT);
2699 }
2700 if (error) {
2701 ASSERT(error != ENOSPC);
2702 cancel_flags = 0;
2703 goto error_return;
2704 }
2705 XFS_BMAP_INIT(&free_list, &first_block);
2706
2707 /*
2708 * Now lock the child directory inode and the parent directory
2709 * inode in the proper order. This will take care of validating
2710 * that the directory entry for the child directory inode has
2711 * not changed while we were obtaining a log reservation.
2712 */
2713 error = xfs_lock_dir_and_entry(dp, cdp);
2714 if (error) {
2715 xfs_trans_cancel(tp, cancel_flags);
2716 goto std_return;
2717 }
2718
2719 IHOLD(dp);
2720 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
2721
2722 IHOLD(cdp);
2723 xfs_trans_ijoin(tp, cdp, XFS_ILOCK_EXCL);
2724
2725 ASSERT(cdp->i_d.di_nlink >= 2);
2726 if (cdp->i_d.di_nlink != 2) {
2727 error = XFS_ERROR(ENOTEMPTY);
2728 goto error_return;
2729 }
2730 if (!xfs_dir_isempty(cdp)) {
2731 error = XFS_ERROR(ENOTEMPTY);
2732 goto error_return;
2733 }
2734
2735 error = xfs_dir_removename(tp, dp, name, cdp->i_ino,
2736 &first_block, &free_list, resblks);
2737 if (error)
2738 goto error1;
2739
2740 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
2741
2742 /*
2743 * Bump the in memory generation count on the parent
2744 * directory so that other can know that it has changed.
2745 */
2746 dp->i_gen++;
2747
2748 /*
2749 * Drop the link from cdp's "..".
2750 */
2751 error = xfs_droplink(tp, dp);
2752 if (error) {
2753 goto error1;
2754 }
2755
2756 /*
2757 * Drop the link from dp to cdp.
2758 */
2759 error = xfs_droplink(tp, cdp);
2760 if (error) {
2761 goto error1;
2762 }
2763
2764 /*
2765 * Drop the "." link from cdp to self.
2766 */
2767 error = xfs_droplink(tp, cdp);
2768 if (error) {
2769 goto error1;
2770 }
2771
2772 /* Determine these before committing transaction */
2773 last_cdp_link = (cdp)->i_d.di_nlink==0;
2774
2775 /*
2776 * If this is a synchronous mount, make sure that the
2777 * rmdir transaction goes to disk before returning to
2778 * the user.
2779 */
2780 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) {
2781 xfs_trans_set_sync(tp);
2782 }
2783
2784 error = xfs_bmap_finish (&tp, &free_list, &committed);
2785 if (error) {
2786 xfs_bmap_cancel(&free_list);
2787 xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES |
2788 XFS_TRANS_ABORT));
2789 goto std_return;
2790 }
2791
2792 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
2793 if (error) {
2794 goto std_return;
2795 }
2796
2797
2798 /* Fall through to std_return with error = 0 or the errno
2799 * from xfs_trans_commit. */
2800 std_return:
2801 if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) {
2802 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE,
2803 dp, DM_RIGHT_NULL,
2804 NULL, DM_RIGHT_NULL,
2805 name->name, NULL, cdp->i_d.di_mode,
2806 error, 0);
2807 }
2808 return error;
2809
2810 error1:
2811 xfs_bmap_cancel(&free_list);
2812 cancel_flags |= XFS_TRANS_ABORT;
2813 /* FALLTHROUGH */
2814
2815 error_return:
2816 xfs_trans_cancel(tp, cancel_flags);
2817 goto std_return;
2818}
2819
2820int
2821xfs_symlink( 2430xfs_symlink(
2822 xfs_inode_t *dp, 2431 xfs_inode_t *dp,
2823 struct xfs_name *link_name, 2432 struct xfs_name *link_name,
@@ -2886,7 +2495,7 @@ xfs_symlink(
2886 * Make sure that we have allocated dquot(s) on disk. 2495 * Make sure that we have allocated dquot(s) on disk.
2887 */ 2496 */
2888 error = XFS_QM_DQVOPALLOC(mp, dp, 2497 error = XFS_QM_DQVOPALLOC(mp, dp,
2889 current_fsuid(credp), current_fsgid(credp), prid, 2498 current_fsuid(), current_fsgid(), prid,
2890 XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); 2499 XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp);
2891 if (error) 2500 if (error)
2892 goto std_return; 2501 goto std_return;
@@ -3181,14 +2790,13 @@ int
3181xfs_reclaim( 2790xfs_reclaim(
3182 xfs_inode_t *ip) 2791 xfs_inode_t *ip)
3183{ 2792{
3184 bhv_vnode_t *vp = XFS_ITOV(ip);
3185 2793
3186 xfs_itrace_entry(ip); 2794 xfs_itrace_entry(ip);
3187 2795
3188 ASSERT(!VN_MAPPED(vp)); 2796 ASSERT(!VN_MAPPED(VFS_I(ip)));
3189 2797
3190 /* bad inode, get out here ASAP */ 2798 /* bad inode, get out here ASAP */
3191 if (VN_BAD(vp)) { 2799 if (VN_BAD(VFS_I(ip))) {
3192 xfs_ireclaim(ip); 2800 xfs_ireclaim(ip);
3193 return 0; 2801 return 0;
3194 } 2802 }
@@ -3225,7 +2833,7 @@ xfs_reclaim(
3225 XFS_MOUNT_ILOCK(mp); 2833 XFS_MOUNT_ILOCK(mp);
3226 spin_lock(&ip->i_flags_lock); 2834 spin_lock(&ip->i_flags_lock);
3227 __xfs_iflags_set(ip, XFS_IRECLAIMABLE); 2835 __xfs_iflags_set(ip, XFS_IRECLAIMABLE);
3228 vn_to_inode(vp)->i_private = NULL; 2836 VFS_I(ip)->i_private = NULL;
3229 ip->i_vnode = NULL; 2837 ip->i_vnode = NULL;
3230 spin_unlock(&ip->i_flags_lock); 2838 spin_unlock(&ip->i_flags_lock);
3231 list_add_tail(&ip->i_reclaim, &mp->m_del_inodes); 2839 list_add_tail(&ip->i_reclaim, &mp->m_del_inodes);
@@ -3241,8 +2849,7 @@ xfs_finish_reclaim(
3241 int sync_mode) 2849 int sync_mode)
3242{ 2850{
3243 xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino); 2851 xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino);
3244 bhv_vnode_t *vp = XFS_ITOV_NULL(ip); 2852 struct inode *vp = VFS_I(ip);
3245 int error;
3246 2853
3247 if (vp && VN_BAD(vp)) 2854 if (vp && VN_BAD(vp))
3248 goto reclaim; 2855 goto reclaim;
@@ -3285,29 +2892,16 @@ xfs_finish_reclaim(
3285 xfs_iflock(ip); 2892 xfs_iflock(ip);
3286 } 2893 }
3287 2894
3288 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { 2895 /*
3289 if (ip->i_update_core || 2896 * In the case of a forced shutdown we rely on xfs_iflush() to
3290 ((ip->i_itemp != NULL) && 2897 * wait for the inode to be unpinned before returning an error.
3291 (ip->i_itemp->ili_format.ilf_fields != 0))) { 2898 */
3292 error = xfs_iflush(ip, sync_mode); 2899 if (xfs_iflush(ip, sync_mode) == 0) {
3293 /* 2900 /* synchronize with xfs_iflush_done */
3294 * If we hit an error, typically because of filesystem 2901 xfs_iflock(ip);
3295 * shutdown, we don't need to let vn_reclaim to know 2902 xfs_ifunlock(ip);
3296 * because we're gonna reclaim the inode anyway.
3297 */
3298 if (error) {
3299 xfs_iunlock(ip, XFS_ILOCK_EXCL);
3300 goto reclaim;
3301 }
3302 xfs_iflock(ip); /* synchronize with xfs_iflush_done */
3303 }
3304
3305 ASSERT(ip->i_update_core == 0);
3306 ASSERT(ip->i_itemp == NULL ||
3307 ip->i_itemp->ili_format.ilf_fields == 0);
3308 } 2903 }
3309 2904
3310 xfs_ifunlock(ip);
3311 xfs_iunlock(ip, XFS_ILOCK_EXCL); 2905 xfs_iunlock(ip, XFS_ILOCK_EXCL);
3312 2906
3313 reclaim: 2907 reclaim:
@@ -3418,7 +3012,7 @@ xfs_alloc_file_space(
3418 3012
3419 /* Generate a DMAPI event if needed. */ 3013 /* Generate a DMAPI event if needed. */
3420 if (alloc_type != 0 && offset < ip->i_size && 3014 if (alloc_type != 0 && offset < ip->i_size &&
3421 (attr_flags&ATTR_DMI) == 0 && 3015 (attr_flags & XFS_ATTR_DMI) == 0 &&
3422 DM_EVENT_ENABLED(ip, DM_EVENT_WRITE)) { 3016 DM_EVENT_ENABLED(ip, DM_EVENT_WRITE)) {
3423 xfs_off_t end_dmi_offset; 3017 xfs_off_t end_dmi_offset;
3424 3018
@@ -3532,7 +3126,7 @@ retry:
3532 allocatesize_fsb -= allocated_fsb; 3126 allocatesize_fsb -= allocated_fsb;
3533 } 3127 }
3534dmapi_enospc_check: 3128dmapi_enospc_check:
3535 if (error == ENOSPC && (attr_flags & ATTR_DMI) == 0 && 3129 if (error == ENOSPC && (attr_flags & XFS_ATTR_DMI) == 0 &&
3536 DM_EVENT_ENABLED(ip, DM_EVENT_NOSPACE)) { 3130 DM_EVENT_ENABLED(ip, DM_EVENT_NOSPACE)) {
3537 error = XFS_SEND_NAMESP(mp, DM_EVENT_NOSPACE, 3131 error = XFS_SEND_NAMESP(mp, DM_EVENT_NOSPACE,
3538 ip, DM_RIGHT_NULL, 3132 ip, DM_RIGHT_NULL,
@@ -3643,7 +3237,6 @@ xfs_free_file_space(
3643 xfs_off_t len, 3237 xfs_off_t len,
3644 int attr_flags) 3238 int attr_flags)
3645{ 3239{
3646 bhv_vnode_t *vp;
3647 int committed; 3240 int committed;
3648 int done; 3241 int done;
3649 xfs_off_t end_dmi_offset; 3242 xfs_off_t end_dmi_offset;
@@ -3663,7 +3256,6 @@ xfs_free_file_space(
3663 xfs_trans_t *tp; 3256 xfs_trans_t *tp;
3664 int need_iolock = 1; 3257 int need_iolock = 1;
3665 3258
3666 vp = XFS_ITOV(ip);
3667 mp = ip->i_mount; 3259 mp = ip->i_mount;
3668 3260
3669 xfs_itrace_entry(ip); 3261 xfs_itrace_entry(ip);
@@ -3679,7 +3271,7 @@ xfs_free_file_space(
3679 end_dmi_offset = offset + len; 3271 end_dmi_offset = offset + len;
3680 endoffset_fsb = XFS_B_TO_FSBT(mp, end_dmi_offset); 3272 endoffset_fsb = XFS_B_TO_FSBT(mp, end_dmi_offset);
3681 3273
3682 if (offset < ip->i_size && (attr_flags & ATTR_DMI) == 0 && 3274 if (offset < ip->i_size && (attr_flags & XFS_ATTR_DMI) == 0 &&
3683 DM_EVENT_ENABLED(ip, DM_EVENT_WRITE)) { 3275 DM_EVENT_ENABLED(ip, DM_EVENT_WRITE)) {
3684 if (end_dmi_offset > ip->i_size) 3276 if (end_dmi_offset > ip->i_size)
3685 end_dmi_offset = ip->i_size; 3277 end_dmi_offset = ip->i_size;
@@ -3690,7 +3282,7 @@ xfs_free_file_space(
3690 return error; 3282 return error;
3691 } 3283 }
3692 3284
3693 if (attr_flags & ATTR_NOLOCK) 3285 if (attr_flags & XFS_ATTR_NOLOCK)
3694 need_iolock = 0; 3286 need_iolock = 0;
3695 if (need_iolock) { 3287 if (need_iolock) {
3696 xfs_ilock(ip, XFS_IOLOCK_EXCL); 3288 xfs_ilock(ip, XFS_IOLOCK_EXCL);
@@ -3700,7 +3292,7 @@ xfs_free_file_space(
3700 rounding = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE); 3292 rounding = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE);
3701 ioffset = offset & ~(rounding - 1); 3293 ioffset = offset & ~(rounding - 1);
3702 3294
3703 if (VN_CACHED(vp) != 0) { 3295 if (VN_CACHED(VFS_I(ip)) != 0) {
3704 xfs_inval_cached_trace(ip, ioffset, -1, ioffset, -1); 3296 xfs_inval_cached_trace(ip, ioffset, -1, ioffset, -1);
3705 error = xfs_flushinval_pages(ip, ioffset, -1, FI_REMAPF_LOCKED); 3297 error = xfs_flushinval_pages(ip, ioffset, -1, FI_REMAPF_LOCKED);
3706 if (error) 3298 if (error)
@@ -3867,7 +3459,7 @@ xfs_change_file_space(
3867 xfs_off_t startoffset; 3459 xfs_off_t startoffset;
3868 xfs_off_t llen; 3460 xfs_off_t llen;
3869 xfs_trans_t *tp; 3461 xfs_trans_t *tp;
3870 bhv_vattr_t va; 3462 struct iattr iattr;
3871 3463
3872 xfs_itrace_entry(ip); 3464 xfs_itrace_entry(ip);
3873 3465
@@ -3941,10 +3533,10 @@ xfs_change_file_space(
3941 break; 3533 break;
3942 } 3534 }
3943 3535
3944 va.va_mask = XFS_AT_SIZE; 3536 iattr.ia_valid = ATTR_SIZE;
3945 va.va_size = startoffset; 3537 iattr.ia_size = startoffset;
3946 3538
3947 error = xfs_setattr(ip, &va, attr_flags, credp); 3539 error = xfs_setattr(ip, &iattr, attr_flags, credp);
3948 3540
3949 if (error) 3541 if (error)
3950 return error; 3542 return error;
@@ -3974,7 +3566,7 @@ xfs_change_file_space(
3974 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 3566 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
3975 xfs_trans_ihold(tp, ip); 3567 xfs_trans_ihold(tp, ip);
3976 3568
3977 if ((attr_flags & ATTR_DMI) == 0) { 3569 if ((attr_flags & XFS_ATTR_DMI) == 0) {
3978 ip->i_d.di_mode &= ~S_ISUID; 3570 ip->i_d.di_mode &= ~S_ISUID;
3979 3571
3980 /* 3572 /*
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index 57335ba4ce53..e932a96bec54 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -2,9 +2,9 @@
2#define _XFS_VNODEOPS_H 1 2#define _XFS_VNODEOPS_H 1
3 3
4struct attrlist_cursor_kern; 4struct attrlist_cursor_kern;
5struct bhv_vattr;
6struct cred; 5struct cred;
7struct file; 6struct file;
7struct iattr;
8struct inode; 8struct inode;
9struct iovec; 9struct iovec;
10struct kiocb; 10struct kiocb;
@@ -15,14 +15,18 @@ struct xfs_iomap;
15 15
16 16
17int xfs_open(struct xfs_inode *ip); 17int xfs_open(struct xfs_inode *ip);
18int xfs_setattr(struct xfs_inode *ip, struct bhv_vattr *vap, int flags, 18int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags,
19 struct cred *credp); 19 struct cred *credp);
20#define XFS_ATTR_DMI 0x01 /* invocation from a DMI function */
21#define XFS_ATTR_NONBLOCK 0x02 /* return EAGAIN if operation would block */
22#define XFS_ATTR_NOLOCK 0x04 /* Don't grab any conflicting locks */
23
20int xfs_readlink(struct xfs_inode *ip, char *link); 24int xfs_readlink(struct xfs_inode *ip, char *link);
21int xfs_fsync(struct xfs_inode *ip); 25int xfs_fsync(struct xfs_inode *ip);
22int xfs_release(struct xfs_inode *ip); 26int xfs_release(struct xfs_inode *ip);
23int xfs_inactive(struct xfs_inode *ip); 27int xfs_inactive(struct xfs_inode *ip);
24int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name, 28int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name,
25 struct xfs_inode **ipp); 29 struct xfs_inode **ipp, struct xfs_name *ci_name);
26int xfs_create(struct xfs_inode *dp, struct xfs_name *name, mode_t mode, 30int xfs_create(struct xfs_inode *dp, struct xfs_name *name, mode_t mode,
27 xfs_dev_t rdev, struct xfs_inode **ipp, struct cred *credp); 31 xfs_dev_t rdev, struct xfs_inode **ipp, struct cred *credp);
28int xfs_remove(struct xfs_inode *dp, struct xfs_name *name, 32int xfs_remove(struct xfs_inode *dp, struct xfs_name *name,
@@ -31,8 +35,6 @@ int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip,
31 struct xfs_name *target_name); 35 struct xfs_name *target_name);
32int xfs_mkdir(struct xfs_inode *dp, struct xfs_name *dir_name, 36int xfs_mkdir(struct xfs_inode *dp, struct xfs_name *dir_name,
33 mode_t mode, struct xfs_inode **ipp, struct cred *credp); 37 mode_t mode, struct xfs_inode **ipp, struct cred *credp);
34int xfs_rmdir(struct xfs_inode *dp, struct xfs_name *name,
35 struct xfs_inode *cdp);
36int xfs_readdir(struct xfs_inode *dp, void *dirent, size_t bufsize, 38int xfs_readdir(struct xfs_inode *dp, void *dirent, size_t bufsize,
37 xfs_off_t *offset, filldir_t filldir); 39 xfs_off_t *offset, filldir_t filldir);
38int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name, 40int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name,