aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorDavid Woodhouse <dwmw2@infradead.org>2008-04-22 07:34:25 -0400
committerDavid Woodhouse <dwmw2@infradead.org>2008-04-22 07:34:25 -0400
commitf838bad1b3be8ca0c785ee0e0c570dfda74cf377 (patch)
tree5a842a8056a708cfad55a20fa8ab733dd94b0903 /fs
parentdd919660aacdf4adfcd279556aa03e595f7f0fc2 (diff)
parent807501475fce0ebe68baedf87f202c3e4ee0d12c (diff)
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/fid.c1
-rw-r--r--fs/Kconfig34
-rw-r--r--fs/Kconfig.binfmt2
-rw-r--r--fs/afs/cell.c16
-rw-r--r--fs/afs/internal.h3
-rw-r--r--fs/afs/main.c2
-rw-r--r--fs/afs/mntpt.c8
-rw-r--r--fs/afs/super.c1
-rw-r--r--fs/aio.c26
-rw-r--r--fs/anon_inodes.c18
-rw-r--r--fs/binfmt_elf.c15
-rw-r--r--fs/bio.c162
-rw-r--r--fs/block_dev.c201
-rw-r--r--fs/buffer.c37
-rw-r--r--fs/cifs/CHANGES4
-rw-r--r--fs/cifs/README2
-rw-r--r--fs/cifs/cifs_debug.c17
-rw-r--r--fs/cifs/cifs_debug.h9
-rw-r--r--fs/cifs/cifs_dfs_ref.c21
-rw-r--r--fs/cifs/cifs_spnego.c2
-rw-r--r--fs/cifs/cifs_unicode.c4
-rw-r--r--fs/cifs/cifs_unicode.h9
-rw-r--r--fs/cifs/cifsacl.c86
-rw-r--r--fs/cifs/cifsfs.c15
-rw-r--r--fs/cifs/cifsglob.h2
-rw-r--r--fs/cifs/cifsproto.h31
-rw-r--r--fs/cifs/cifssmb.c218
-rw-r--r--fs/cifs/connect.c50
-rw-r--r--fs/cifs/dir.c25
-rw-r--r--fs/cifs/dns_resolve.c8
-rw-r--r--fs/cifs/dns_resolve.h2
-rw-r--r--fs/cifs/fcntl.c6
-rw-r--r--fs/cifs/file.c37
-rw-r--r--fs/cifs/inode.c548
-rw-r--r--fs/cifs/ioctl.c2
-rw-r--r--fs/cifs/link.c2
-rw-r--r--fs/cifs/md4.c4
-rw-r--r--fs/cifs/md5.c9
-rw-r--r--fs/cifs/misc.c14
-rw-r--r--fs/cifs/netmisc.c23
-rw-r--r--fs/cifs/readdir.c54
-rw-r--r--fs/cifs/sess.c4
-rw-r--r--fs/cifs/smbdes.c22
-rw-r--r--fs/cifs/transport.c11
-rw-r--r--fs/cifs/xattr.c9
-rw-r--r--fs/cramfs/inode.c1
-rw-r--r--fs/debugfs/inode.c4
-rw-r--r--fs/dlm/dlm_internal.h1
-rw-r--r--fs/dlm/rcom.c2
-rw-r--r--fs/dquot.c2
-rw-r--r--fs/ecryptfs/dentry.c2
-rw-r--r--fs/ecryptfs/mmap.c102
-rw-r--r--fs/efs/dir.c2
-rw-r--r--fs/efs/efs.h140
-rw-r--r--fs/efs/file.c2
-rw-r--r--fs/efs/inode.c6
-rw-r--r--fs/efs/namei.c2
-rw-r--r--fs/efs/super.c7
-rw-r--r--fs/efs/symlink.c2
-rw-r--r--fs/exec.c10
-rw-r--r--fs/ext2/ialloc.c2
-rw-r--r--fs/ext2/inode.c4
-rw-r--r--fs/ext2/ioctl.c57
-rw-r--r--fs/ext2/xattr.c2
-rw-r--r--fs/ext3/acl.c8
-rw-r--r--fs/ext3/ialloc.c2
-rw-r--r--fs/ext3/inode.c6
-rw-r--r--fs/ext3/ioctl.c103
-rw-r--r--fs/ext3/resize.c4
-rw-r--r--fs/ext3/super.c2
-rw-r--r--fs/ext3/xattr.c6
-rw-r--r--fs/ext4/dir.c2
-rw-r--r--fs/ext4/extents.c59
-rw-r--r--fs/ext4/ialloc.c24
-rw-r--r--fs/ext4/inode.c62
-rw-r--r--fs/ext4/ioctl.c86
-rw-r--r--fs/ext4/mballoc.c80
-rw-r--r--fs/ext4/migrate.c5
-rw-r--r--fs/ext4/namei.c18
-rw-r--r--fs/ext4/resize.c1
-rw-r--r--fs/ext4/xattr.c2
-rw-r--r--fs/fat/file.c12
-rw-r--r--fs/file_table.c48
-rw-r--r--fs/fs-writeback.c6
-rw-r--r--fs/fuse/dir.c2
-rw-r--r--fs/gfs2/Kconfig2
-rw-r--r--fs/gfs2/Makefile2
-rw-r--r--fs/gfs2/acl.c6
-rw-r--r--fs/gfs2/bmap.c670
-rw-r--r--fs/gfs2/dir.c84
-rw-r--r--fs/gfs2/eattr.c58
-rw-r--r--fs/gfs2/glock.c188
-rw-r--r--fs/gfs2/glock.h14
-rw-r--r--fs/gfs2/glops.c10
-rw-r--r--fs/gfs2/incore.h40
-rw-r--r--fs/gfs2/inode.c72
-rw-r--r--fs/gfs2/inode.h22
-rw-r--r--fs/gfs2/lm.c210
-rw-r--r--fs/gfs2/lm.h42
-rw-r--r--fs/gfs2/locking/dlm/lock.c7
-rw-r--r--fs/gfs2/locking/dlm/lock_dlm.h5
-rw-r--r--fs/gfs2/locking/dlm/main.c2
-rw-r--r--fs/gfs2/locking/dlm/sysfs.c2
-rw-r--r--fs/gfs2/locking/dlm/thread.c10
-rw-r--r--fs/gfs2/locking/nolock/main.c2
-rw-r--r--fs/gfs2/log.c19
-rw-r--r--fs/gfs2/lops.c21
-rw-r--r--fs/gfs2/lops.h11
-rw-r--r--fs/gfs2/main.c10
-rw-r--r--fs/gfs2/ops_address.c44
-rw-r--r--fs/gfs2/ops_dentry.c4
-rw-r--r--fs/gfs2/ops_export.c2
-rw-r--r--fs/gfs2/ops_file.c37
-rw-r--r--fs/gfs2/ops_fstype.c80
-rw-r--r--fs/gfs2/ops_inode.c42
-rw-r--r--fs/gfs2/ops_inode.h1
-rw-r--r--fs/gfs2/ops_super.c1
-rw-r--r--fs/gfs2/quota.c74
-rw-r--r--fs/gfs2/quota.h17
-rw-r--r--fs/gfs2/recovery.c15
-rw-r--r--fs/gfs2/rgrp.c370
-rw-r--r--fs/gfs2/rgrp.h8
-rw-r--r--fs/gfs2/super.c6
-rw-r--r--fs/gfs2/super.h1
-rw-r--r--fs/gfs2/sys.c7
-rw-r--r--fs/gfs2/trans.c25
-rw-r--r--fs/gfs2/trans.h2
-rw-r--r--fs/gfs2/util.c24
-rw-r--r--fs/gfs2/util.h2
-rw-r--r--fs/hfs/brec.c18
-rw-r--r--fs/hfsplus/dir.c23
-rw-r--r--fs/hfsplus/ioctl.c40
-rw-r--r--fs/hppfs/hppfs_kern.c435
-rw-r--r--fs/hugetlbfs/inode.c2
-rw-r--r--fs/inode.c51
-rw-r--r--fs/isofs/compress.c11
-rw-r--r--fs/jbd/journal.c9
-rw-r--r--fs/jbd/recovery.c2
-rw-r--r--fs/jbd/revoke.c4
-rw-r--r--fs/jbd/transaction.c25
-rw-r--r--fs/jbd2/journal.c12
-rw-r--r--fs/jbd2/recovery.c2
-rw-r--r--fs/jbd2/revoke.c4
-rw-r--r--fs/jffs2/file.c2
-rw-r--r--fs/jffs2/jffs2_fs_i.h2
-rw-r--r--fs/jffs2/jffs2_fs_sb.h2
-rw-r--r--fs/jfs/ioctl.c33
-rw-r--r--fs/jfs/jfs_dmap.c11
-rw-r--r--fs/jfs/jfs_dmap.h2
-rw-r--r--fs/jfs/jfs_imap.c15
-rw-r--r--fs/jfs/jfs_xtree.c26
-rw-r--r--fs/lockd/svc.c2
-rw-r--r--fs/locks.c53
-rw-r--r--fs/mbcache.c4
-rw-r--r--fs/mpage.c11
-rw-r--r--fs/namei.c344
-rw-r--r--fs/namespace.c516
-rw-r--r--fs/ncpfs/ioctl.c54
-rw-r--r--fs/nfs/callback.c3
-rw-r--r--fs/nfs/callback_xdr.c6
-rw-r--r--fs/nfs/delegation.c2
-rw-r--r--fs/nfs/dir.c5
-rw-r--r--fs/nfs/file.c4
-rw-r--r--fs/nfs/idmap.c2
-rw-r--r--fs/nfs/inode.c7
-rw-r--r--fs/nfs/internal.h3
-rw-r--r--fs/nfs/nfs4state.c2
-rw-r--r--fs/nfs/read.c5
-rw-r--r--fs/nfs/super.c68
-rw-r--r--fs/nfs/write.c10
-rw-r--r--fs/nfsd/nfs4proc.c7
-rw-r--r--fs/nfsd/nfs4recover.c16
-rw-r--r--fs/nfsd/nfs4state.c3
-rw-r--r--fs/nfsd/nfsfh.c6
-rw-r--r--fs/nfsd/vfs.c72
-rw-r--r--fs/ocfs2/Makefile14
-rw-r--r--fs/ocfs2/alloc.c465
-rw-r--r--fs/ocfs2/aops.c8
-rw-r--r--fs/ocfs2/cluster/Makefile2
-rw-r--r--fs/ocfs2/cluster/netdebug.c441
-rw-r--r--fs/ocfs2/cluster/nodemanager.c5
-rw-r--r--fs/ocfs2/cluster/sys.c9
-rw-r--r--fs/ocfs2/cluster/tcp.c165
-rw-r--r--fs/ocfs2/cluster/tcp.h32
-rw-r--r--fs/ocfs2/cluster/tcp_internal.h26
-rw-r--r--fs/ocfs2/dir.c5
-rw-r--r--fs/ocfs2/dlm/Makefile2
-rw-r--r--fs/ocfs2/dlm/dlmcommon.h70
-rw-r--r--fs/ocfs2/dlm/dlmconvert.c2
-rw-r--r--fs/ocfs2/dlm/dlmdebug.c911
-rw-r--r--fs/ocfs2/dlm/dlmdebug.h86
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c173
-rw-r--r--fs/ocfs2/dlm/dlmlock.c22
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c224
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c57
-rw-r--r--fs/ocfs2/dlm/dlmthread.c6
-rw-r--r--fs/ocfs2/dlmglue.c655
-rw-r--r--fs/ocfs2/dlmglue.h7
-rw-r--r--fs/ocfs2/file.c4
-rw-r--r--fs/ocfs2/heartbeat.c202
-rw-r--r--fs/ocfs2/heartbeat.h22
-rw-r--r--fs/ocfs2/ioctl.c24
-rw-r--r--fs/ocfs2/ioctl.h3
-rw-r--r--fs/ocfs2/journal.c211
-rw-r--r--fs/ocfs2/journal.h4
-rw-r--r--fs/ocfs2/localalloc.c10
-rw-r--r--fs/ocfs2/namei.c4
-rw-r--r--fs/ocfs2/ocfs2.h77
-rw-r--r--fs/ocfs2/ocfs2_fs.h79
-rw-r--r--fs/ocfs2/ocfs2_lockid.h2
-rw-r--r--fs/ocfs2/resize.c2
-rw-r--r--fs/ocfs2/slot_map.c454
-rw-r--r--fs/ocfs2/slot_map.h32
-rw-r--r--fs/ocfs2/stack_o2cb.c420
-rw-r--r--fs/ocfs2/stack_user.c883
-rw-r--r--fs/ocfs2/stackglue.c568
-rw-r--r--fs/ocfs2/stackglue.h261
-rw-r--r--fs/ocfs2/suballoc.c103
-rw-r--r--fs/ocfs2/suballoc.h1
-rw-r--r--fs/ocfs2/super.c208
-rw-r--r--fs/open.c169
-rw-r--r--fs/partitions/check.c4
-rw-r--r--fs/pipe.c19
-rw-r--r--fs/pnode.c2
-rw-r--r--fs/proc/base.c53
-rw-r--r--fs/proc/generic.c26
-rw-r--r--fs/proc/internal.h7
-rw-r--r--fs/proc/proc_misc.c3
-rw-r--r--fs/proc/proc_net.c123
-rw-r--r--fs/proc/task_mmu.c68
-rw-r--r--fs/reiserfs/do_balan.c8
-rw-r--r--fs/reiserfs/fix_node.c8
-rw-r--r--fs/reiserfs/ioctl.c63
-rw-r--r--fs/reiserfs/journal.c2
-rw-r--r--fs/reiserfs/lbalance.c2
-rw-r--r--fs/reiserfs/namei.c2
-rw-r--r--fs/reiserfs/super.c2
-rw-r--r--fs/reiserfs/xattr.c111
-rw-r--r--fs/romfs/inode.c30
-rw-r--r--fs/select.c2
-rw-r--r--fs/signalfd.c7
-rw-r--r--fs/smbfs/smbiod.c2
-rw-r--r--fs/splice.c45
-rw-r--r--fs/super.c35
-rw-r--r--fs/sysfs/dir.c1
-rw-r--r--fs/sysfs/file.c14
-rw-r--r--fs/sysfs/symlink.c9
-rw-r--r--fs/ufs/balloc.c4
-rw-r--r--fs/ufs/util.h2
-rw-r--r--fs/utimes.c18
-rw-r--r--fs/xattr.c40
-rw-r--r--fs/xfs/Kbuild6
-rw-r--r--fs/xfs/Kconfig12
-rw-r--r--fs/xfs/Makefile118
-rw-r--r--fs/xfs/Makefile-linux-2.6117
-rw-r--r--fs/xfs/linux-2.6/kmem.c6
-rw-r--r--fs/xfs/linux-2.6/sema.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c12
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c8
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h8
-rw-r--r--fs/xfs/linux-2.6/xfs_cred.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c14
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c13
-rw-r--r--fs/xfs/linux-2.6/xfs_fs_subr.c36
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c689
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c230
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c79
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.h3
-rw-r--r--fs/xfs/linux-2.6/xfs_stats.h4
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c41
-rw-r--r--fs/xfs/linux-2.6/xfs_super.h8
-rw-r--r--fs/xfs/linux-2.6/xfs_vfs.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.h30
-rw-r--r--fs/xfs/quota/xfs_dquot.c20
-rw-r--r--fs/xfs/quota/xfs_dquot_item.c14
-rw-r--r--fs/xfs/quota/xfs_qm.c82
-rw-r--r--fs/xfs/quota/xfs_qm.h2
-rw-r--r--fs/xfs/quota/xfs_qm_bhv.c2
-rw-r--r--fs/xfs/quota/xfs_qm_stats.h4
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c48
-rw-r--r--fs/xfs/support/ktrace.c37
-rw-r--r--fs/xfs/support/ktrace.h3
-rw-r--r--fs/xfs/xfs.h2
-rw-r--r--fs/xfs/xfs_acl.c16
-rw-r--r--fs/xfs/xfs_alloc.c65
-rw-r--r--fs/xfs/xfs_attr.c10
-rw-r--r--fs/xfs/xfs_attr_leaf.c8
-rw-r--r--fs/xfs/xfs_bit.c103
-rw-r--r--fs/xfs/xfs_bit.h27
-rw-r--r--fs/xfs/xfs_bmap.c77
-rw-r--r--fs/xfs/xfs_bmap.h2
-rw-r--r--fs/xfs/xfs_bmap_btree.c54
-rw-r--r--fs/xfs/xfs_bmap_btree.h2
-rw-r--r--fs/xfs/xfs_buf_item.c7
-rw-r--r--fs/xfs/xfs_clnt.h2
-rw-r--r--fs/xfs/xfs_dir2.c64
-rw-r--r--fs/xfs/xfs_dir2.h12
-rw-r--r--fs/xfs/xfs_filestream.c2
-rw-r--r--fs/xfs/xfs_fsops.c24
-rw-r--r--fs/xfs/xfs_ialloc.c48
-rw-r--r--fs/xfs/xfs_iget.c50
-rw-r--r--fs/xfs/xfs_inode.c829
-rw-r--r--fs/xfs/xfs_inode.h23
-rw-r--r--fs/xfs/xfs_inode_item.c12
-rw-r--r--fs/xfs/xfs_inode_item.h8
-rw-r--r--fs/xfs/xfs_iomap.c7
-rw-r--r--fs/xfs/xfs_itable.c9
-rw-r--r--fs/xfs/xfs_log.c273
-rw-r--r--fs/xfs/xfs_log.h5
-rw-r--r--fs/xfs/xfs_log_priv.h97
-rw-r--r--fs/xfs/xfs_log_recover.c139
-rw-r--r--fs/xfs/xfs_mount.c121
-rw-r--r--fs/xfs/xfs_mount.h32
-rw-r--r--fs/xfs/xfs_rename.c121
-rw-r--r--fs/xfs/xfs_rtalloc.c60
-rw-r--r--fs/xfs/xfs_rw.c8
-rw-r--r--fs/xfs/xfs_sb.h107
-rw-r--r--fs/xfs/xfs_trans.h8
-rw-r--r--fs/xfs/xfs_trans_ail.c168
-rw-r--r--fs/xfs/xfs_trans_buf.c15
-rw-r--r--fs/xfs/xfs_types.h5
-rw-r--r--fs/xfs/xfs_utils.c32
-rw-r--r--fs/xfs/xfs_utils.h15
-rw-r--r--fs/xfs/xfs_vfsops.c89
-rw-r--r--fs/xfs/xfs_vnodeops.c507
-rw-r--r--fs/xfs/xfs_vnodeops.h33
327 files changed, 12512 insertions, 7151 deletions
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index dfebdbe7440e..3031e3233dd6 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -26,7 +26,6 @@
26#include <linux/fs.h> 26#include <linux/fs.h>
27#include <linux/sched.h> 27#include <linux/sched.h>
28#include <linux/idr.h> 28#include <linux/idr.h>
29#include <asm/semaphore.h>
30#include <net/9p/9p.h> 29#include <net/9p/9p.h>
31#include <net/9p/client.h> 30#include <net/9p/client.h>
32 31
diff --git a/fs/Kconfig b/fs/Kconfig
index d7312825592b..028ae38ecc52 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -444,6 +444,32 @@ config OCFS2_FS
444 For more information on OCFS2, see the file 444 For more information on OCFS2, see the file
445 <file:Documentation/filesystems/ocfs2.txt>. 445 <file:Documentation/filesystems/ocfs2.txt>.
446 446
447config OCFS2_FS_O2CB
448 tristate "O2CB Kernelspace Clustering"
449 depends on OCFS2_FS
450 default y
451 help
452 OCFS2 includes a simple kernelspace clustering package, the OCFS2
453 Cluster Base. It only requires a very small userspace component
454 to configure it. This comes with the standard ocfs2-tools package.
455 O2CB is limited to maintaining a cluster for OCFS2 file systems.
456 It cannot manage any other cluster applications.
457
458 It is always safe to say Y here, as the clustering method is
459 run-time selectable.
460
461config OCFS2_FS_USERSPACE_CLUSTER
462 tristate "OCFS2 Userspace Clustering"
463 depends on OCFS2_FS && DLM
464 default y
465 help
466 This option will allow OCFS2 to use userspace clustering services
467 in conjunction with the DLM in fs/dlm. If you are using a
468 userspace cluster manager, say Y here.
469
470 It is safe to say Y, as the clustering method is run-time
471 selectable.
472
447config OCFS2_DEBUG_MASKLOG 473config OCFS2_DEBUG_MASKLOG
448 bool "OCFS2 logging support" 474 bool "OCFS2 logging support"
449 depends on OCFS2_FS 475 depends on OCFS2_FS
@@ -1744,10 +1770,10 @@ config ROOT_NFS
1744 If you want your Linux box to mount its whole root file system (the 1770 If you want your Linux box to mount its whole root file system (the
1745 one containing the directory /) from some other computer over the 1771 one containing the directory /) from some other computer over the
1746 net via NFS (presumably because your box doesn't have a hard disk), 1772 net via NFS (presumably because your box doesn't have a hard disk),
1747 say Y. Read <file:Documentation/nfsroot.txt> for details. It is 1773 say Y. Read <file:Documentation/filesystems/nfsroot.txt> for
1748 likely that in this case, you also want to say Y to "Kernel level IP 1774 details. It is likely that in this case, you also want to say Y to
1749 autoconfiguration" so that your box can discover its network address 1775 "Kernel level IP autoconfiguration" so that your box can discover
1750 at boot time. 1776 its network address at boot time.
1751 1777
1752 Most people say N here. 1778 Most people say N here.
1753 1779
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index b5c3b6114add..853845abcca6 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -62,7 +62,7 @@ config BINFMT_SHARED_FLAT
62config BINFMT_AOUT 62config BINFMT_AOUT
63 tristate "Kernel support for a.out and ECOFF binaries" 63 tristate "Kernel support for a.out and ECOFF binaries"
64 depends on ARCH_SUPPORTS_AOUT && \ 64 depends on ARCH_SUPPORTS_AOUT && \
65 (X86_32 || ALPHA || ARM || M68K || SPARC32) 65 (X86_32 || ALPHA || ARM || M68K)
66 ---help--- 66 ---help---
67 A.out (Assembler.OUTput) is a set of formats for libraries and 67 A.out (Assembler.OUTput) is a set of formats for libraries and
68 executables used in the earliest versions of UNIX. Linux used 68 executables used in the earliest versions of UNIX. Linux used
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index 970d38f30565..584bb0f9c36a 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -127,14 +127,21 @@ struct afs_cell *afs_cell_create(const char *name, char *vllist)
127 127
128 _enter("%s,%s", name, vllist); 128 _enter("%s,%s", name, vllist);
129 129
130 down_write(&afs_cells_sem);
131 read_lock(&afs_cells_lock);
132 list_for_each_entry(cell, &afs_cells, link) {
133 if (strcasecmp(cell->name, name) == 0)
134 goto duplicate_name;
135 }
136 read_unlock(&afs_cells_lock);
137
130 cell = afs_cell_alloc(name, vllist); 138 cell = afs_cell_alloc(name, vllist);
131 if (IS_ERR(cell)) { 139 if (IS_ERR(cell)) {
132 _leave(" = %ld", PTR_ERR(cell)); 140 _leave(" = %ld", PTR_ERR(cell));
141 up_write(&afs_cells_sem);
133 return cell; 142 return cell;
134 } 143 }
135 144
136 down_write(&afs_cells_sem);
137
138 /* add a proc directory for this cell */ 145 /* add a proc directory for this cell */
139 ret = afs_proc_cell_setup(cell); 146 ret = afs_proc_cell_setup(cell);
140 if (ret < 0) 147 if (ret < 0)
@@ -167,6 +174,11 @@ error:
167 kfree(cell); 174 kfree(cell);
168 _leave(" = %d", ret); 175 _leave(" = %d", ret);
169 return ERR_PTR(ret); 176 return ERR_PTR(ret);
177
178duplicate_name:
179 read_unlock(&afs_cells_lock);
180 up_write(&afs_cells_sem);
181 return ERR_PTR(-EEXIST);
170} 182}
171 183
172/* 184/*
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 5ca3625cd39e..eec41c76de72 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -573,7 +573,6 @@ extern const struct file_operations afs_mntpt_file_operations;
573 573
574extern int afs_mntpt_check_symlink(struct afs_vnode *, struct key *); 574extern int afs_mntpt_check_symlink(struct afs_vnode *, struct key *);
575extern void afs_mntpt_kill_timer(void); 575extern void afs_mntpt_kill_timer(void);
576extern void afs_umount_begin(struct vfsmount *, int);
577 576
578/* 577/*
579 * proc.c 578 * proc.c
@@ -750,7 +749,7 @@ extern int afs_fsync(struct file *, struct dentry *, int);
750extern unsigned afs_debug; 749extern unsigned afs_debug;
751 750
752#define dbgprintk(FMT,...) \ 751#define dbgprintk(FMT,...) \
753 printk("[%x%-6.6s] "FMT"\n", smp_processor_id(), current->comm ,##__VA_ARGS__) 752 printk("[%-6.6s] "FMT"\n", current->comm ,##__VA_ARGS__)
754 753
755/* make sure we maintain the format strings, even when debugging is disabled */ 754/* make sure we maintain the format strings, even when debugging is disabled */
756static inline __attribute__((format(printf,1,2))) 755static inline __attribute__((format(printf,1,2)))
diff --git a/fs/afs/main.c b/fs/afs/main.c
index 0f60f6b35769..2d3e5d4fb9f7 100644
--- a/fs/afs/main.c
+++ b/fs/afs/main.c
@@ -22,7 +22,7 @@ MODULE_LICENSE("GPL");
22 22
23unsigned afs_debug; 23unsigned afs_debug;
24module_param_named(debug, afs_debug, uint, S_IWUSR | S_IRUGO); 24module_param_named(debug, afs_debug, uint, S_IWUSR | S_IRUGO);
25MODULE_PARM_DESC(afs_debug, "AFS debugging mask"); 25MODULE_PARM_DESC(debug, "AFS debugging mask");
26 26
27static char *rootcell; 27static char *rootcell;
28 28
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index a3510b8ba3e7..2f5503902c37 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -283,11 +283,3 @@ void afs_mntpt_kill_timer(void)
283 cancel_delayed_work(&afs_mntpt_expiry_timer); 283 cancel_delayed_work(&afs_mntpt_expiry_timer);
284 flush_scheduled_work(); 284 flush_scheduled_work();
285} 285}
286
287/*
288 * begin unmount by attempting to remove all automounted mountpoints we added
289 */
290void afs_umount_begin(struct vfsmount *vfsmnt, int flags)
291{
292 shrink_submounts(vfsmnt, &afs_vfsmounts);
293}
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 36bbce45f44b..4b572b801d8d 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -50,7 +50,6 @@ static const struct super_operations afs_super_ops = {
50 .write_inode = afs_write_inode, 50 .write_inode = afs_write_inode,
51 .destroy_inode = afs_destroy_inode, 51 .destroy_inode = afs_destroy_inode,
52 .clear_inode = afs_clear_inode, 52 .clear_inode = afs_clear_inode,
53 .umount_begin = afs_umount_begin,
54 .put_super = afs_put_super, 53 .put_super = afs_put_super,
55 .show_options = generic_show_options, 54 .show_options = generic_show_options,
56}; 55};
diff --git a/fs/aio.c b/fs/aio.c
index b74c567383bc..228368610dfa 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -936,14 +936,6 @@ int aio_complete(struct kiocb *iocb, long res, long res2)
936 return 1; 936 return 1;
937 } 937 }
938 938
939 /*
940 * Check if the user asked us to deliver the result through an
941 * eventfd. The eventfd_signal() function is safe to be called
942 * from IRQ context.
943 */
944 if (!IS_ERR(iocb->ki_eventfd))
945 eventfd_signal(iocb->ki_eventfd, 1);
946
947 info = &ctx->ring_info; 939 info = &ctx->ring_info;
948 940
949 /* add a completion event to the ring buffer. 941 /* add a completion event to the ring buffer.
@@ -992,10 +984,27 @@ int aio_complete(struct kiocb *iocb, long res, long res2)
992 kunmap_atomic(ring, KM_IRQ1); 984 kunmap_atomic(ring, KM_IRQ1);
993 985
994 pr_debug("added to ring %p at [%lu]\n", iocb, tail); 986 pr_debug("added to ring %p at [%lu]\n", iocb, tail);
987
988 /*
989 * Check if the user asked us to deliver the result through an
990 * eventfd. The eventfd_signal() function is safe to be called
991 * from IRQ context.
992 */
993 if (!IS_ERR(iocb->ki_eventfd))
994 eventfd_signal(iocb->ki_eventfd, 1);
995
995put_rq: 996put_rq:
996 /* everything turned out well, dispose of the aiocb. */ 997 /* everything turned out well, dispose of the aiocb. */
997 ret = __aio_put_req(ctx, iocb); 998 ret = __aio_put_req(ctx, iocb);
998 999
1000 /*
1001 * We have to order our ring_info tail store above and test
1002 * of the wait list below outside the wait lock. This is
1003 * like in wake_up_bit() where clearing a bit has to be
1004 * ordered with the unlocked test.
1005 */
1006 smp_mb();
1007
999 if (waitqueue_active(&ctx->wait)) 1008 if (waitqueue_active(&ctx->wait))
1000 wake_up(&ctx->wait); 1009 wake_up(&ctx->wait);
1001 1010
@@ -1782,6 +1791,7 @@ asmlinkage long sys_io_getevents(aio_context_t ctx_id,
1782 put_ioctx(ioctx); 1791 put_ioctx(ioctx);
1783 } 1792 }
1784 1793
1794 asmlinkage_protect(5, ret, ctx_id, min_nr, nr, events, timeout);
1785 return ret; 1795 return ret;
1786} 1796}
1787 1797
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 23321889d9b0..f42be069e085 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -81,13 +81,10 @@ int anon_inode_getfd(int *pfd, struct inode **pinode, struct file **pfile,
81 81
82 if (IS_ERR(anon_inode_inode)) 82 if (IS_ERR(anon_inode_inode))
83 return -ENODEV; 83 return -ENODEV;
84 file = get_empty_filp();
85 if (!file)
86 return -ENFILE;
87 84
88 error = get_unused_fd(); 85 error = get_unused_fd();
89 if (error < 0) 86 if (error < 0)
90 goto err_put_filp; 87 return error;
91 fd = error; 88 fd = error;
92 89
93 /* 90 /*
@@ -114,14 +111,15 @@ int anon_inode_getfd(int *pfd, struct inode **pinode, struct file **pfile,
114 dentry->d_flags &= ~DCACHE_UNHASHED; 111 dentry->d_flags &= ~DCACHE_UNHASHED;
115 d_instantiate(dentry, anon_inode_inode); 112 d_instantiate(dentry, anon_inode_inode);
116 113
117 file->f_path.mnt = mntget(anon_inode_mnt); 114 error = -ENFILE;
118 file->f_path.dentry = dentry; 115 file = alloc_file(anon_inode_mnt, dentry,
116 FMODE_READ | FMODE_WRITE, fops);
117 if (!file)
118 goto err_dput;
119 file->f_mapping = anon_inode_inode->i_mapping; 119 file->f_mapping = anon_inode_inode->i_mapping;
120 120
121 file->f_pos = 0; 121 file->f_pos = 0;
122 file->f_flags = O_RDWR; 122 file->f_flags = O_RDWR;
123 file->f_op = fops;
124 file->f_mode = FMODE_READ | FMODE_WRITE;
125 file->f_version = 0; 123 file->f_version = 0;
126 file->private_data = priv; 124 file->private_data = priv;
127 125
@@ -132,10 +130,10 @@ int anon_inode_getfd(int *pfd, struct inode **pinode, struct file **pfile,
132 *pfile = file; 130 *pfile = file;
133 return 0; 131 return 0;
134 132
133err_dput:
134 dput(dentry);
135err_put_unused_fd: 135err_put_unused_fd:
136 put_unused_fd(fd); 136 put_unused_fd(fd);
137err_put_filp:
138 put_filp(file);
139 return error; 137 return error;
140} 138}
141EXPORT_SYMBOL_GPL(anon_inode_getfd); 139EXPORT_SYMBOL_GPL(anon_inode_getfd);
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 41a958a7585e..5e1a4fb5cacb 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1424,6 +1424,18 @@ struct elf_note_info {
1424 int thread_notes; 1424 int thread_notes;
1425}; 1425};
1426 1426
1427/*
1428 * When a regset has a writeback hook, we call it on each thread before
1429 * dumping user memory. On register window machines, this makes sure the
1430 * user memory backing the register data is up to date before we read it.
1431 */
1432static void do_thread_regset_writeback(struct task_struct *task,
1433 const struct user_regset *regset)
1434{
1435 if (regset->writeback)
1436 regset->writeback(task, regset, 1);
1437}
1438
1427static int fill_thread_core_info(struct elf_thread_core_info *t, 1439static int fill_thread_core_info(struct elf_thread_core_info *t,
1428 const struct user_regset_view *view, 1440 const struct user_regset_view *view,
1429 long signr, size_t *total) 1441 long signr, size_t *total)
@@ -1445,6 +1457,8 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
1445 sizeof(t->prstatus), &t->prstatus); 1457 sizeof(t->prstatus), &t->prstatus);
1446 *total += notesize(&t->notes[0]); 1458 *total += notesize(&t->notes[0]);
1447 1459
1460 do_thread_regset_writeback(t->task, &view->regsets[0]);
1461
1448 /* 1462 /*
1449 * Each other regset might generate a note too. For each regset 1463 * Each other regset might generate a note too. For each regset
1450 * that has no core_note_type or is inactive, we leave t->notes[i] 1464 * that has no core_note_type or is inactive, we leave t->notes[i]
@@ -1452,6 +1466,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
1452 */ 1466 */
1453 for (i = 1; i < view->n; ++i) { 1467 for (i = 1; i < view->n; ++i) {
1454 const struct user_regset *regset = &view->regsets[i]; 1468 const struct user_regset *regset = &view->regsets[i];
1469 do_thread_regset_writeback(t->task, regset);
1455 if (regset->core_note_type && 1470 if (regset->core_note_type &&
1456 (!regset->active || regset->active(t->task, regset))) { 1471 (!regset->active || regset->active(t->task, regset))) {
1457 int ret; 1472 int ret;
diff --git a/fs/bio.c b/fs/bio.c
index 242e409dab4b..6e0b6f66df03 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -444,22 +444,27 @@ int bio_add_page(struct bio *bio, struct page *page, unsigned int len,
444 444
445struct bio_map_data { 445struct bio_map_data {
446 struct bio_vec *iovecs; 446 struct bio_vec *iovecs;
447 void __user *userptr; 447 int nr_sgvecs;
448 struct sg_iovec *sgvecs;
448}; 449};
449 450
450static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio) 451static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio,
452 struct sg_iovec *iov, int iov_count)
451{ 453{
452 memcpy(bmd->iovecs, bio->bi_io_vec, sizeof(struct bio_vec) * bio->bi_vcnt); 454 memcpy(bmd->iovecs, bio->bi_io_vec, sizeof(struct bio_vec) * bio->bi_vcnt);
455 memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count);
456 bmd->nr_sgvecs = iov_count;
453 bio->bi_private = bmd; 457 bio->bi_private = bmd;
454} 458}
455 459
456static void bio_free_map_data(struct bio_map_data *bmd) 460static void bio_free_map_data(struct bio_map_data *bmd)
457{ 461{
458 kfree(bmd->iovecs); 462 kfree(bmd->iovecs);
463 kfree(bmd->sgvecs);
459 kfree(bmd); 464 kfree(bmd);
460} 465}
461 466
462static struct bio_map_data *bio_alloc_map_data(int nr_segs) 467static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count)
463{ 468{
464 struct bio_map_data *bmd = kmalloc(sizeof(*bmd), GFP_KERNEL); 469 struct bio_map_data *bmd = kmalloc(sizeof(*bmd), GFP_KERNEL);
465 470
@@ -467,13 +472,71 @@ static struct bio_map_data *bio_alloc_map_data(int nr_segs)
467 return NULL; 472 return NULL;
468 473
469 bmd->iovecs = kmalloc(sizeof(struct bio_vec) * nr_segs, GFP_KERNEL); 474 bmd->iovecs = kmalloc(sizeof(struct bio_vec) * nr_segs, GFP_KERNEL);
470 if (bmd->iovecs) 475 if (!bmd->iovecs) {
476 kfree(bmd);
477 return NULL;
478 }
479
480 bmd->sgvecs = kmalloc(sizeof(struct sg_iovec) * iov_count, GFP_KERNEL);
481 if (bmd->sgvecs)
471 return bmd; 482 return bmd;
472 483
484 kfree(bmd->iovecs);
473 kfree(bmd); 485 kfree(bmd);
474 return NULL; 486 return NULL;
475} 487}
476 488
489static int __bio_copy_iov(struct bio *bio, struct sg_iovec *iov, int iov_count,
490 int uncopy)
491{
492 int ret = 0, i;
493 struct bio_vec *bvec;
494 int iov_idx = 0;
495 unsigned int iov_off = 0;
496 int read = bio_data_dir(bio) == READ;
497
498 __bio_for_each_segment(bvec, bio, i, 0) {
499 char *bv_addr = page_address(bvec->bv_page);
500 unsigned int bv_len = bvec->bv_len;
501
502 while (bv_len && iov_idx < iov_count) {
503 unsigned int bytes;
504 char *iov_addr;
505
506 bytes = min_t(unsigned int,
507 iov[iov_idx].iov_len - iov_off, bv_len);
508 iov_addr = iov[iov_idx].iov_base + iov_off;
509
510 if (!ret) {
511 if (!read && !uncopy)
512 ret = copy_from_user(bv_addr, iov_addr,
513 bytes);
514 if (read && uncopy)
515 ret = copy_to_user(iov_addr, bv_addr,
516 bytes);
517
518 if (ret)
519 ret = -EFAULT;
520 }
521
522 bv_len -= bytes;
523 bv_addr += bytes;
524 iov_addr += bytes;
525 iov_off += bytes;
526
527 if (iov[iov_idx].iov_len == iov_off) {
528 iov_idx++;
529 iov_off = 0;
530 }
531 }
532
533 if (uncopy)
534 __free_page(bvec->bv_page);
535 }
536
537 return ret;
538}
539
477/** 540/**
478 * bio_uncopy_user - finish previously mapped bio 541 * bio_uncopy_user - finish previously mapped bio
479 * @bio: bio being terminated 542 * @bio: bio being terminated
@@ -484,55 +547,56 @@ static struct bio_map_data *bio_alloc_map_data(int nr_segs)
484int bio_uncopy_user(struct bio *bio) 547int bio_uncopy_user(struct bio *bio)
485{ 548{
486 struct bio_map_data *bmd = bio->bi_private; 549 struct bio_map_data *bmd = bio->bi_private;
487 const int read = bio_data_dir(bio) == READ; 550 int ret;
488 struct bio_vec *bvec;
489 int i, ret = 0;
490 551
491 __bio_for_each_segment(bvec, bio, i, 0) { 552 ret = __bio_copy_iov(bio, bmd->sgvecs, bmd->nr_sgvecs, 1);
492 char *addr = page_address(bvec->bv_page);
493 unsigned int len = bmd->iovecs[i].bv_len;
494 553
495 if (read && !ret && copy_to_user(bmd->userptr, addr, len))
496 ret = -EFAULT;
497
498 __free_page(bvec->bv_page);
499 bmd->userptr += len;
500 }
501 bio_free_map_data(bmd); 554 bio_free_map_data(bmd);
502 bio_put(bio); 555 bio_put(bio);
503 return ret; 556 return ret;
504} 557}
505 558
506/** 559/**
507 * bio_copy_user - copy user data to bio 560 * bio_copy_user_iov - copy user data to bio
508 * @q: destination block queue 561 * @q: destination block queue
509 * @uaddr: start of user address 562 * @iov: the iovec.
510 * @len: length in bytes 563 * @iov_count: number of elements in the iovec
511 * @write_to_vm: bool indicating writing to pages or not 564 * @write_to_vm: bool indicating writing to pages or not
512 * 565 *
513 * Prepares and returns a bio for indirect user io, bouncing data 566 * Prepares and returns a bio for indirect user io, bouncing data
514 * to/from kernel pages as necessary. Must be paired with 567 * to/from kernel pages as necessary. Must be paired with
515 * call bio_uncopy_user() on io completion. 568 * call bio_uncopy_user() on io completion.
516 */ 569 */
517struct bio *bio_copy_user(struct request_queue *q, unsigned long uaddr, 570struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov,
518 unsigned int len, int write_to_vm) 571 int iov_count, int write_to_vm)
519{ 572{
520 unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
521 unsigned long start = uaddr >> PAGE_SHIFT;
522 struct bio_map_data *bmd; 573 struct bio_map_data *bmd;
523 struct bio_vec *bvec; 574 struct bio_vec *bvec;
524 struct page *page; 575 struct page *page;
525 struct bio *bio; 576 struct bio *bio;
526 int i, ret; 577 int i, ret;
578 int nr_pages = 0;
579 unsigned int len = 0;
527 580
528 bmd = bio_alloc_map_data(end - start); 581 for (i = 0; i < iov_count; i++) {
582 unsigned long uaddr;
583 unsigned long end;
584 unsigned long start;
585
586 uaddr = (unsigned long)iov[i].iov_base;
587 end = (uaddr + iov[i].iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
588 start = uaddr >> PAGE_SHIFT;
589
590 nr_pages += end - start;
591 len += iov[i].iov_len;
592 }
593
594 bmd = bio_alloc_map_data(nr_pages, iov_count);
529 if (!bmd) 595 if (!bmd)
530 return ERR_PTR(-ENOMEM); 596 return ERR_PTR(-ENOMEM);
531 597
532 bmd->userptr = (void __user *) uaddr;
533
534 ret = -ENOMEM; 598 ret = -ENOMEM;
535 bio = bio_alloc(GFP_KERNEL, end - start); 599 bio = bio_alloc(GFP_KERNEL, nr_pages);
536 if (!bio) 600 if (!bio)
537 goto out_bmd; 601 goto out_bmd;
538 602
@@ -564,22 +628,12 @@ struct bio *bio_copy_user(struct request_queue *q, unsigned long uaddr,
564 * success 628 * success
565 */ 629 */
566 if (!write_to_vm) { 630 if (!write_to_vm) {
567 char __user *p = (char __user *) uaddr; 631 ret = __bio_copy_iov(bio, iov, iov_count, 0);
568 632 if (ret)
569 /* 633 goto cleanup;
570 * for a write, copy in data to kernel pages
571 */
572 ret = -EFAULT;
573 bio_for_each_segment(bvec, bio, i) {
574 char *addr = page_address(bvec->bv_page);
575
576 if (copy_from_user(addr, p, bvec->bv_len))
577 goto cleanup;
578 p += bvec->bv_len;
579 }
580 } 634 }
581 635
582 bio_set_map_data(bmd, bio); 636 bio_set_map_data(bmd, bio, iov, iov_count);
583 return bio; 637 return bio;
584cleanup: 638cleanup:
585 bio_for_each_segment(bvec, bio, i) 639 bio_for_each_segment(bvec, bio, i)
@@ -591,6 +645,28 @@ out_bmd:
591 return ERR_PTR(ret); 645 return ERR_PTR(ret);
592} 646}
593 647
648/**
649 * bio_copy_user - copy user data to bio
650 * @q: destination block queue
651 * @uaddr: start of user address
652 * @len: length in bytes
653 * @write_to_vm: bool indicating writing to pages or not
654 *
655 * Prepares and returns a bio for indirect user io, bouncing data
656 * to/from kernel pages as necessary. Must be paired with
657 * call bio_uncopy_user() on io completion.
658 */
659struct bio *bio_copy_user(struct request_queue *q, unsigned long uaddr,
660 unsigned int len, int write_to_vm)
661{
662 struct sg_iovec iov;
663
664 iov.iov_base = (void __user *)uaddr;
665 iov.iov_len = len;
666
667 return bio_copy_user_iov(q, &iov, 1, write_to_vm);
668}
669
594static struct bio *__bio_map_user_iov(struct request_queue *q, 670static struct bio *__bio_map_user_iov(struct request_queue *q,
595 struct block_device *bdev, 671 struct block_device *bdev,
596 struct sg_iovec *iov, int iov_count, 672 struct sg_iovec *iov, int iov_count,
@@ -903,7 +979,7 @@ void bio_set_pages_dirty(struct bio *bio)
903 } 979 }
904} 980}
905 981
906void bio_release_pages(struct bio *bio) 982static void bio_release_pages(struct bio *bio)
907{ 983{
908 struct bio_vec *bvec = bio->bi_io_vec; 984 struct bio_vec *bvec = bio->bi_io_vec;
909 int i; 985 int i;
@@ -1194,6 +1270,8 @@ EXPORT_SYMBOL(bio_hw_segments);
1194EXPORT_SYMBOL(bio_add_page); 1270EXPORT_SYMBOL(bio_add_page);
1195EXPORT_SYMBOL(bio_add_pc_page); 1271EXPORT_SYMBOL(bio_add_pc_page);
1196EXPORT_SYMBOL(bio_get_nr_vecs); 1272EXPORT_SYMBOL(bio_get_nr_vecs);
1273EXPORT_SYMBOL(bio_map_user);
1274EXPORT_SYMBOL(bio_unmap_user);
1197EXPORT_SYMBOL(bio_map_kern); 1275EXPORT_SYMBOL(bio_map_kern);
1198EXPORT_SYMBOL(bio_pair_release); 1276EXPORT_SYMBOL(bio_pair_release);
1199EXPORT_SYMBOL(bio_split); 1277EXPORT_SYMBOL(bio_split);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 67fe72ce6ac7..7d822fae7765 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -31,6 +31,8 @@ struct bdev_inode {
31 struct inode vfs_inode; 31 struct inode vfs_inode;
32}; 32};
33 33
34static const struct address_space_operations def_blk_aops;
35
34static inline struct bdev_inode *BDEV_I(struct inode *inode) 36static inline struct bdev_inode *BDEV_I(struct inode *inode)
35{ 37{
36 return container_of(inode, struct bdev_inode, vfs_inode); 38 return container_of(inode, struct bdev_inode, vfs_inode);
@@ -171,203 +173,6 @@ blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
171 iov, offset, nr_segs, blkdev_get_blocks, NULL); 173 iov, offset, nr_segs, blkdev_get_blocks, NULL);
172} 174}
173 175
174#if 0
175static void blk_end_aio(struct bio *bio, int error)
176{
177 struct kiocb *iocb = bio->bi_private;
178 atomic_t *bio_count = &iocb->ki_bio_count;
179
180 if (bio_data_dir(bio) == READ)
181 bio_check_pages_dirty(bio);
182 else {
183 bio_release_pages(bio);
184 bio_put(bio);
185 }
186
187 /* iocb->ki_nbytes stores error code from LLDD */
188 if (error)
189 iocb->ki_nbytes = -EIO;
190
191 if (atomic_dec_and_test(bio_count)) {
192 if ((long)iocb->ki_nbytes < 0)
193 aio_complete(iocb, iocb->ki_nbytes, 0);
194 else
195 aio_complete(iocb, iocb->ki_left, 0);
196 }
197
198 return 0;
199}
200
201#define VEC_SIZE 16
202struct pvec {
203 unsigned short nr;
204 unsigned short idx;
205 struct page *page[VEC_SIZE];
206};
207
208#define PAGES_SPANNED(addr, len) \
209 (DIV_ROUND_UP((addr) + (len), PAGE_SIZE) - (addr) / PAGE_SIZE);
210
211/*
212 * get page pointer for user addr, we internally cache struct page array for
213 * (addr, count) range in pvec to avoid frequent call to get_user_pages. If
214 * internal page list is exhausted, a batch count of up to VEC_SIZE is used
215 * to get next set of page struct.
216 */
217static struct page *blk_get_page(unsigned long addr, size_t count, int rw,
218 struct pvec *pvec)
219{
220 int ret, nr_pages;
221 if (pvec->idx == pvec->nr) {
222 nr_pages = PAGES_SPANNED(addr, count);
223 nr_pages = min(nr_pages, VEC_SIZE);
224 down_read(&current->mm->mmap_sem);
225 ret = get_user_pages(current, current->mm, addr, nr_pages,
226 rw == READ, 0, pvec->page, NULL);
227 up_read(&current->mm->mmap_sem);
228 if (ret < 0)
229 return ERR_PTR(ret);
230 pvec->nr = ret;
231 pvec->idx = 0;
232 }
233 return pvec->page[pvec->idx++];
234}
235
236/* return a page back to pvec array */
237static void blk_unget_page(struct page *page, struct pvec *pvec)
238{
239 pvec->page[--pvec->idx] = page;
240}
241
242static ssize_t
243blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
244 loff_t pos, unsigned long nr_segs)
245{
246 struct inode *inode = iocb->ki_filp->f_mapping->host;
247 unsigned blkbits = blksize_bits(bdev_hardsect_size(I_BDEV(inode)));
248 unsigned blocksize_mask = (1 << blkbits) - 1;
249 unsigned long seg = 0; /* iov segment iterator */
250 unsigned long nvec; /* number of bio vec needed */
251 unsigned long cur_off; /* offset into current page */
252 unsigned long cur_len; /* I/O len of current page, up to PAGE_SIZE */
253
254 unsigned long addr; /* user iovec address */
255 size_t count; /* user iovec len */
256 size_t nbytes = iocb->ki_nbytes = iocb->ki_left; /* total xfer size */
257 loff_t size; /* size of block device */
258 struct bio *bio;
259 atomic_t *bio_count = &iocb->ki_bio_count;
260 struct page *page;
261 struct pvec pvec;
262
263 pvec.nr = 0;
264 pvec.idx = 0;
265
266 if (pos & blocksize_mask)
267 return -EINVAL;
268
269 size = i_size_read(inode);
270 if (pos + nbytes > size) {
271 nbytes = size - pos;
272 iocb->ki_left = nbytes;
273 }
274
275 /*
276 * check first non-zero iov alignment, the remaining
277 * iov alignment is checked inside bio loop below.
278 */
279 do {
280 addr = (unsigned long) iov[seg].iov_base;
281 count = min(iov[seg].iov_len, nbytes);
282 if (addr & blocksize_mask || count & blocksize_mask)
283 return -EINVAL;
284 } while (!count && ++seg < nr_segs);
285 atomic_set(bio_count, 1);
286
287 while (nbytes) {
288 /* roughly estimate number of bio vec needed */
289 nvec = (nbytes + PAGE_SIZE - 1) / PAGE_SIZE;
290 nvec = max(nvec, nr_segs - seg);
291 nvec = min(nvec, (unsigned long) BIO_MAX_PAGES);
292
293 /* bio_alloc should not fail with GFP_KERNEL flag */
294 bio = bio_alloc(GFP_KERNEL, nvec);
295 bio->bi_bdev = I_BDEV(inode);
296 bio->bi_end_io = blk_end_aio;
297 bio->bi_private = iocb;
298 bio->bi_sector = pos >> blkbits;
299same_bio:
300 cur_off = addr & ~PAGE_MASK;
301 cur_len = PAGE_SIZE - cur_off;
302 if (count < cur_len)
303 cur_len = count;
304
305 page = blk_get_page(addr, count, rw, &pvec);
306 if (unlikely(IS_ERR(page)))
307 goto backout;
308
309 if (bio_add_page(bio, page, cur_len, cur_off)) {
310 pos += cur_len;
311 addr += cur_len;
312 count -= cur_len;
313 nbytes -= cur_len;
314
315 if (count)
316 goto same_bio;
317 while (++seg < nr_segs) {
318 addr = (unsigned long) iov[seg].iov_base;
319 count = iov[seg].iov_len;
320 if (!count)
321 continue;
322 if (unlikely(addr & blocksize_mask ||
323 count & blocksize_mask)) {
324 page = ERR_PTR(-EINVAL);
325 goto backout;
326 }
327 count = min(count, nbytes);
328 goto same_bio;
329 }
330 } else {
331 blk_unget_page(page, &pvec);
332 }
333
334 /* bio is ready, submit it */
335 if (rw == READ)
336 bio_set_pages_dirty(bio);
337 atomic_inc(bio_count);
338 submit_bio(rw, bio);
339 }
340
341completion:
342 iocb->ki_left -= nbytes;
343 nbytes = iocb->ki_left;
344 iocb->ki_pos += nbytes;
345
346 blk_run_address_space(inode->i_mapping);
347 if (atomic_dec_and_test(bio_count))
348 aio_complete(iocb, nbytes, 0);
349
350 return -EIOCBQUEUED;
351
352backout:
353 /*
354 * back out nbytes count constructed so far for this bio,
355 * we will throw away current bio.
356 */
357 nbytes += bio->bi_size;
358 bio_release_pages(bio);
359 bio_put(bio);
360
361 /*
362 * if no bio was submmitted, return the error code.
363 * otherwise, proceed with pending I/O completion.
364 */
365 if (atomic_read(bio_count) == 1)
366 return PTR_ERR(page);
367 goto completion;
368}
369#endif
370
371static int blkdev_writepage(struct page *page, struct writeback_control *wbc) 176static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
372{ 177{
373 return block_write_full_page(page, blkdev_get_block, wbc); 178 return block_write_full_page(page, blkdev_get_block, wbc);
@@ -1334,7 +1139,7 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1334 return blkdev_ioctl(file->f_mapping->host, file, cmd, arg); 1139 return blkdev_ioctl(file->f_mapping->host, file, cmd, arg);
1335} 1140}
1336 1141
1337const struct address_space_operations def_blk_aops = { 1142static const struct address_space_operations def_blk_aops = {
1338 .readpage = blkdev_readpage, 1143 .readpage = blkdev_readpage,
1339 .writepage = blkdev_writepage, 1144 .writepage = blkdev_writepage,
1340 .sync_page = block_sync_page, 1145 .sync_page = block_sync_page,
diff --git a/fs/buffer.c b/fs/buffer.c
index 3ebccf4aa7e3..39ff14403d13 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -627,8 +627,7 @@ repeat:
627} 627}
628 628
629/** 629/**
630 * sync_mapping_buffers - write out and wait upon a mapping's "associated" 630 * sync_mapping_buffers - write out & wait upon a mapping's "associated" buffers
631 * buffers
632 * @mapping: the mapping which wants those buffers written 631 * @mapping: the mapping which wants those buffers written
633 * 632 *
634 * Starts I/O against the buffers at mapping->private_list, and waits upon 633 * Starts I/O against the buffers at mapping->private_list, and waits upon
@@ -836,7 +835,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
836 smp_mb(); 835 smp_mb();
837 if (buffer_dirty(bh)) { 836 if (buffer_dirty(bh)) {
838 list_add(&bh->b_assoc_buffers, 837 list_add(&bh->b_assoc_buffers,
839 &bh->b_assoc_map->private_list); 838 &mapping->private_list);
840 bh->b_assoc_map = mapping; 839 bh->b_assoc_map = mapping;
841 } 840 }
842 spin_unlock(lock); 841 spin_unlock(lock);
@@ -1182,7 +1181,20 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size)
1182void mark_buffer_dirty(struct buffer_head *bh) 1181void mark_buffer_dirty(struct buffer_head *bh)
1183{ 1182{
1184 WARN_ON_ONCE(!buffer_uptodate(bh)); 1183 WARN_ON_ONCE(!buffer_uptodate(bh));
1185 if (!buffer_dirty(bh) && !test_set_buffer_dirty(bh)) 1184
1185 /*
1186 * Very *carefully* optimize the it-is-already-dirty case.
1187 *
1188 * Don't let the final "is it dirty" escape to before we
1189 * perhaps modified the buffer.
1190 */
1191 if (buffer_dirty(bh)) {
1192 smp_mb();
1193 if (buffer_dirty(bh))
1194 return;
1195 }
1196
1197 if (!test_set_buffer_dirty(bh))
1186 __set_page_dirty(bh->b_page, page_mapping(bh->b_page), 0); 1198 __set_page_dirty(bh->b_page, page_mapping(bh->b_page), 0);
1187} 1199}
1188 1200
@@ -2565,14 +2577,13 @@ int nobh_write_end(struct file *file, struct address_space *mapping,
2565 struct inode *inode = page->mapping->host; 2577 struct inode *inode = page->mapping->host;
2566 struct buffer_head *head = fsdata; 2578 struct buffer_head *head = fsdata;
2567 struct buffer_head *bh; 2579 struct buffer_head *bh;
2580 BUG_ON(fsdata != NULL && page_has_buffers(page));
2568 2581
2569 if (!PageMappedToDisk(page)) { 2582 if (unlikely(copied < len) && !page_has_buffers(page))
2570 if (unlikely(copied < len) && !page_has_buffers(page)) 2583 attach_nobh_buffers(page, head);
2571 attach_nobh_buffers(page, head); 2584 if (page_has_buffers(page))
2572 if (page_has_buffers(page)) 2585 return generic_write_end(file, mapping, pos, len,
2573 return generic_write_end(file, mapping, pos, len, 2586 copied, page, fsdata);
2574 copied, page, fsdata);
2575 }
2576 2587
2577 SetPageUptodate(page); 2588 SetPageUptodate(page);
2578 set_page_dirty(page); 2589 set_page_dirty(page);
@@ -3214,7 +3225,7 @@ static int buffer_cpu_notify(struct notifier_block *self,
3214} 3225}
3215 3226
3216/** 3227/**
3217 * bh_uptodate_or_lock: Test whether the buffer is uptodate 3228 * bh_uptodate_or_lock - Test whether the buffer is uptodate
3218 * @bh: struct buffer_head 3229 * @bh: struct buffer_head
3219 * 3230 *
3220 * Return true if the buffer is up-to-date and false, 3231 * Return true if the buffer is up-to-date and false,
@@ -3233,7 +3244,7 @@ int bh_uptodate_or_lock(struct buffer_head *bh)
3233EXPORT_SYMBOL(bh_uptodate_or_lock); 3244EXPORT_SYMBOL(bh_uptodate_or_lock);
3234 3245
3235/** 3246/**
3236 * bh_submit_read: Submit a locked buffer for reading 3247 * bh_submit_read - Submit a locked buffer for reading
3237 * @bh: struct buffer_head 3248 * @bh: struct buffer_head
3238 * 3249 *
3239 * Returns zero on success and -EIO on error. 3250 * Returns zero on success and -EIO on error.
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index edd248367b36..dbd91461853c 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -6,7 +6,9 @@ and sync so that events like out of disk space get reported properly on
6cached files. Fix setxattr failure to certain Samba versions. Fix mount 6cached files. Fix setxattr failure to certain Samba versions. Fix mount
7of second share to disconnected server session (autoreconnect on this). 7of second share to disconnected server session (autoreconnect on this).
8Add ability to modify cifs acls for handling chmod (when mounted with 8Add ability to modify cifs acls for handling chmod (when mounted with
9cifsacl flag). 9cifsacl flag). Fix prefixpath path separator so we can handle mounts
10with prefixpaths longer than one directory (one path component) when
11mounted to Windows servers.
10 12
11Version 1.51 13Version 1.51
12------------ 14------------
diff --git a/fs/cifs/README b/fs/cifs/README
index c623e2f9c5db..50306229b0f9 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -461,7 +461,7 @@ A partial list of the supported mount options follows:
461 cifsacl Report mode bits (e.g. on stat) based on the Windows ACL for 461 cifsacl Report mode bits (e.g. on stat) based on the Windows ACL for
462 the file. (EXPERIMENTAL) 462 the file. (EXPERIMENTAL)
463 servern Specify the server 's netbios name (RFC1001 name) to use 463 servern Specify the server 's netbios name (RFC1001 name) to use
464 when attempting to setup a session to the server. This is 464 when attempting to setup a session to the server.
465 This is needed for mounting to some older servers (such 465 This is needed for mounting to some older servers (such
466 as OS/2 or Windows 98 and Windows ME) since they do not 466 as OS/2 or Windows 98 and Windows ME) since they do not
467 support a default server name. A server name can be up 467 support a default server name. A server name can be up
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 73c4c419663c..0228ed06069e 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -98,8 +98,7 @@ void cifs_dump_mids(struct TCP_Server_Info *server)
98 if (mid_entry->resp_buf) { 98 if (mid_entry->resp_buf) {
99 cifs_dump_detail(mid_entry->resp_buf); 99 cifs_dump_detail(mid_entry->resp_buf);
100 cifs_dump_mem("existing buf: ", 100 cifs_dump_mem("existing buf: ",
101 mid_entry->resp_buf, 101 mid_entry->resp_buf, 62);
102 62 /* fixme */);
103 } 102 }
104 } 103 }
105 } 104 }
@@ -439,7 +438,7 @@ cifs_stats_read(char *buf, char **beginBuffer, off_t offset,
439 438
440 return length; 439 return length;
441} 440}
442#endif 441#endif /* STATS */
443 442
444static struct proc_dir_entry *proc_fs_cifs; 443static struct proc_dir_entry *proc_fs_cifs;
445read_proc_t cifs_txanchor_read; 444read_proc_t cifs_txanchor_read;
@@ -482,7 +481,7 @@ cifs_proc_init(void)
482 cifs_stats_read, NULL); 481 cifs_stats_read, NULL);
483 if (pde) 482 if (pde)
484 pde->write_proc = cifs_stats_write; 483 pde->write_proc = cifs_stats_write;
485#endif 484#endif /* STATS */
486 pde = create_proc_read_entry("cifsFYI", 0, proc_fs_cifs, 485 pde = create_proc_read_entry("cifsFYI", 0, proc_fs_cifs,
487 cifsFYI_read, NULL); 486 cifsFYI_read, NULL);
488 if (pde) 487 if (pde)
@@ -918,4 +917,12 @@ security_flags_write(struct file *file, const char __user *buffer,
918 /* BB should we turn on MAY flags for other MUST options? */ 917 /* BB should we turn on MAY flags for other MUST options? */
919 return count; 918 return count;
920} 919}
921#endif 920#else
921inline void cifs_proc_init(void)
922{
923}
924
925inline void cifs_proc_clean(void)
926{
927}
928#endif /* PROC_FS */
diff --git a/fs/cifs/cifs_debug.h b/fs/cifs/cifs_debug.h
index c26cd0d2c6d5..5eb3b83bbfa7 100644
--- a/fs/cifs/cifs_debug.h
+++ b/fs/cifs/cifs_debug.h
@@ -25,8 +25,11 @@
25 25
26void cifs_dump_mem(char *label, void *data, int length); 26void cifs_dump_mem(char *label, void *data, int length);
27#ifdef CONFIG_CIFS_DEBUG2 27#ifdef CONFIG_CIFS_DEBUG2
28#define DBG2 2
28void cifs_dump_detail(struct smb_hdr *); 29void cifs_dump_detail(struct smb_hdr *);
29void cifs_dump_mids(struct TCP_Server_Info *); 30void cifs_dump_mids(struct TCP_Server_Info *);
31#else
32#define DBG2 0
30#endif 33#endif
31extern int traceSMB; /* flag which enables the function below */ 34extern int traceSMB; /* flag which enables the function below */
32void dump_smb(struct smb_hdr *, int); 35void dump_smb(struct smb_hdr *, int);
@@ -64,10 +67,10 @@ extern int cifsERROR;
64 * --------- 67 * ---------
65 */ 68 */
66#else /* _CIFS_DEBUG */ 69#else /* _CIFS_DEBUG */
67#define cERROR(button,prspec) 70#define cERROR(button, prspec)
68#define cEVENT(format,arg...) 71#define cEVENT(format, arg...)
69#define cFYI(button, prspec) 72#define cFYI(button, prspec)
70#define cifserror(format,arg...) 73#define cifserror(format, arg...)
71#endif /* _CIFS_DEBUG */ 74#endif /* _CIFS_DEBUG */
72 75
73#endif /* _H_CIFS_DEBUG */ 76#endif /* _H_CIFS_DEBUG */
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 6ad447529961..56c924033b78 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -33,7 +33,6 @@ void dfs_shrink_umount_helper(struct vfsmount *vfsmnt)
33{ 33{
34 mark_mounts_for_expiry(&cifs_dfs_automount_list); 34 mark_mounts_for_expiry(&cifs_dfs_automount_list);
35 mark_mounts_for_expiry(&cifs_dfs_automount_list); 35 mark_mounts_for_expiry(&cifs_dfs_automount_list);
36 shrink_submounts(vfsmnt, &cifs_dfs_automount_list);
37} 36}
38 37
39/** 38/**
@@ -74,7 +73,7 @@ static char *cifs_get_share_name(const char *node_name)
74 pSep = memchr(UNC+2, '\\', len-2); 73 pSep = memchr(UNC+2, '\\', len-2);
75 if (!pSep) { 74 if (!pSep) {
76 cERROR(1, ("%s: no server name end in node name: %s", 75 cERROR(1, ("%s: no server name end in node name: %s",
77 __FUNCTION__, node_name)); 76 __func__, node_name));
78 kfree(UNC); 77 kfree(UNC);
79 return NULL; 78 return NULL;
80 } 79 }
@@ -84,7 +83,7 @@ static char *cifs_get_share_name(const char *node_name)
84 pSep = memchr(UNC+(pSep-UNC), '\\', len-(pSep-UNC)); 83 pSep = memchr(UNC+(pSep-UNC), '\\', len-(pSep-UNC));
85 if (!pSep) { 84 if (!pSep) {
86 cERROR(1, ("%s:2 cant find share name in node name: %s", 85 cERROR(1, ("%s:2 cant find share name in node name: %s",
87 __FUNCTION__, node_name)); 86 __func__, node_name));
88 kfree(UNC); 87 kfree(UNC);
89 return NULL; 88 return NULL;
90 } 89 }
@@ -127,7 +126,7 @@ static char *compose_mount_options(const char *sb_mountdata,
127 rc = dns_resolve_server_name_to_ip(*devname, &srvIP); 126 rc = dns_resolve_server_name_to_ip(*devname, &srvIP);
128 if (rc != 0) { 127 if (rc != 0) {
129 cERROR(1, ("%s: Failed to resolve server part of %s to IP", 128 cERROR(1, ("%s: Failed to resolve server part of %s to IP",
130 __FUNCTION__, *devname)); 129 __func__, *devname));
131 mountdata = ERR_PTR(rc); 130 mountdata = ERR_PTR(rc);
132 goto compose_mount_options_out; 131 goto compose_mount_options_out;
133 } 132 }
@@ -181,8 +180,8 @@ static char *compose_mount_options(const char *sb_mountdata,
181 } 180 }
182 } 181 }
183 182
184 /*cFYI(1,("%s: parent mountdata: %s", __FUNCTION__,sb_mountdata));*/ 183 /*cFYI(1,("%s: parent mountdata: %s", __func__,sb_mountdata));*/
185 /*cFYI(1, ("%s: submount mountdata: %s", __FUNCTION__, mountdata ));*/ 184 /*cFYI(1, ("%s: submount mountdata: %s", __func__, mountdata ));*/
186 185
187compose_mount_options_out: 186compose_mount_options_out:
188 kfree(srvIP); 187 kfree(srvIP);
@@ -286,7 +285,7 @@ static void dump_referral(const struct dfs_info3_param *ref)
286 cFYI(1, ("DFS: node path: %s", ref->node_name)); 285 cFYI(1, ("DFS: node path: %s", ref->node_name));
287 cFYI(1, ("DFS: fl: %hd, srv_type: %hd", ref->flags, ref->server_type)); 286 cFYI(1, ("DFS: fl: %hd, srv_type: %hd", ref->flags, ref->server_type));
288 cFYI(1, ("DFS: ref_flags: %hd, path_consumed: %hd", ref->ref_flag, 287 cFYI(1, ("DFS: ref_flags: %hd, path_consumed: %hd", ref->ref_flag,
289 ref->PathConsumed)); 288 ref->path_consumed));
290} 289}
291 290
292 291
@@ -302,7 +301,7 @@ cifs_dfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
302 int rc = 0; 301 int rc = 0;
303 struct vfsmount *mnt = ERR_PTR(-ENOENT); 302 struct vfsmount *mnt = ERR_PTR(-ENOENT);
304 303
305 cFYI(1, ("in %s", __FUNCTION__)); 304 cFYI(1, ("in %s", __func__));
306 BUG_ON(IS_ROOT(dentry)); 305 BUG_ON(IS_ROOT(dentry));
307 306
308 xid = GetXid(); 307 xid = GetXid();
@@ -336,7 +335,7 @@ cifs_dfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
336 len = strlen(referrals[i].node_name); 335 len = strlen(referrals[i].node_name);
337 if (len < 2) { 336 if (len < 2) {
338 cERROR(1, ("%s: Net Address path too short: %s", 337 cERROR(1, ("%s: Net Address path too short: %s",
339 __FUNCTION__, referrals[i].node_name)); 338 __func__, referrals[i].node_name));
340 rc = -EINVAL; 339 rc = -EINVAL;
341 goto out_err; 340 goto out_err;
342 } 341 }
@@ -344,7 +343,7 @@ cifs_dfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
344 nd->path.dentry, 343 nd->path.dentry,
345 referrals[i].node_name); 344 referrals[i].node_name);
346 cFYI(1, ("%s: cifs_dfs_do_refmount:%s , mnt:%p", 345 cFYI(1, ("%s: cifs_dfs_do_refmount:%s , mnt:%p",
347 __FUNCTION__, 346 __func__,
348 referrals[i].node_name, mnt)); 347 referrals[i].node_name, mnt));
349 348
350 /* complete mount procedure if we accured submount */ 349 /* complete mount procedure if we accured submount */
@@ -365,7 +364,7 @@ out:
365 FreeXid(xid); 364 FreeXid(xid);
366 free_dfs_info_array(referrals, num_referrals); 365 free_dfs_info_array(referrals, num_referrals);
367 kfree(full_path); 366 kfree(full_path);
368 cFYI(1, ("leaving %s" , __FUNCTION__)); 367 cFYI(1, ("leaving %s" , __func__));
369 return ERR_PTR(rc); 368 return ERR_PTR(rc);
370out_err: 369out_err:
371 path_put(&nd->path); 370 path_put(&nd->path);
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index d543accc10dd..6653e29637a7 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -125,7 +125,7 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
125#ifdef CONFIG_CIFS_DEBUG2 125#ifdef CONFIG_CIFS_DEBUG2
126 if (cifsFYI && !IS_ERR(spnego_key)) { 126 if (cifsFYI && !IS_ERR(spnego_key)) {
127 struct cifs_spnego_msg *msg = spnego_key->payload.data; 127 struct cifs_spnego_msg *msg = spnego_key->payload.data;
128 cifs_dump_mem("SPNEGO reply blob:", msg->data, min(1024, 128 cifs_dump_mem("SPNEGO reply blob:", msg->data, min(1024U,
129 msg->secblob_len + msg->sesskey_len)); 129 msg->secblob_len + msg->sesskey_len));
130 } 130 }
131#endif /* CONFIG_CIFS_DEBUG2 */ 131#endif /* CONFIG_CIFS_DEBUG2 */
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index b5903b89250d..7d75272a6b3f 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -32,7 +32,7 @@
32 * 32 *
33 */ 33 */
34int 34int
35cifs_strfromUCS_le(char *to, const __le16 * from, 35cifs_strfromUCS_le(char *to, const __le16 *from,
36 int len, const struct nls_table *codepage) 36 int len, const struct nls_table *codepage)
37{ 37{
38 int i; 38 int i;
@@ -61,7 +61,7 @@ cifs_strfromUCS_le(char *to, const __le16 * from,
61 * 61 *
62 */ 62 */
63int 63int
64cifs_strtoUCS(__le16 * to, const char *from, int len, 64cifs_strtoUCS(__le16 *to, const char *from, int len,
65 const struct nls_table *codepage) 65 const struct nls_table *codepage)
66{ 66{
67 int charlen; 67 int charlen;
diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h
index 614c11fcdcb6..14eb9a2395d3 100644
--- a/fs/cifs/cifs_unicode.h
+++ b/fs/cifs/cifs_unicode.h
@@ -254,7 +254,8 @@ UniStrstr(const wchar_t *ucs1, const wchar_t *ucs2)
254 const wchar_t *anchor2 = ucs2; 254 const wchar_t *anchor2 = ucs2;
255 255
256 while (*ucs1) { 256 while (*ucs1) {
257 if (*ucs1 == *ucs2) { /* Partial match found */ 257 if (*ucs1 == *ucs2) {
258 /* Partial match found */
258 ucs1++; 259 ucs1++;
259 ucs2++; 260 ucs2++;
260 } else { 261 } else {
@@ -279,7 +280,8 @@ UniToupper(register wchar_t uc)
279{ 280{
280 register const struct UniCaseRange *rp; 281 register const struct UniCaseRange *rp;
281 282
282 if (uc < sizeof (CifsUniUpperTable)) { /* Latin characters */ 283 if (uc < sizeof(CifsUniUpperTable)) {
284 /* Latin characters */
283 return uc + CifsUniUpperTable[uc]; /* Use base tables */ 285 return uc + CifsUniUpperTable[uc]; /* Use base tables */
284 } else { 286 } else {
285 rp = CifsUniUpperRange; /* Use range tables */ 287 rp = CifsUniUpperRange; /* Use range tables */
@@ -320,7 +322,8 @@ UniTolower(wchar_t uc)
320{ 322{
321 register struct UniCaseRange *rp; 323 register struct UniCaseRange *rp;
322 324
323 if (uc < sizeof (UniLowerTable)) { /* Latin characters */ 325 if (uc < sizeof(UniLowerTable)) {
326 /* Latin characters */
324 return uc + UniLowerTable[uc]; /* Use base tables */ 327 return uc + UniLowerTable[uc]; /* Use base tables */
325 } else { 328 } else {
326 rp = UniLowerRange; /* Use range tables */ 329 rp = UniLowerRange; /* Use range tables */
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index a7035bd18e4e..1cb5b0a9f2ac 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * fs/cifs/cifsacl.c 2 * fs/cifs/cifsacl.c
3 * 3 *
4 * Copyright (C) International Business Machines Corp., 2007 4 * Copyright (C) International Business Machines Corp., 2007,2008
5 * Author(s): Steve French (sfrench@us.ibm.com) 5 * Author(s): Steve French (sfrench@us.ibm.com)
6 * 6 *
7 * Contains the routines for mapping CIFS/NTFS ACLs 7 * Contains the routines for mapping CIFS/NTFS ACLs
@@ -46,8 +46,7 @@ static struct cifs_wksid wksidarr[NUM_WK_SIDS] = {
46static const struct cifs_sid sid_everyone = { 46static const struct cifs_sid sid_everyone = {
47 1, 1, {0, 0, 0, 0, 0, 1}, {0} }; 47 1, 1, {0, 0, 0, 0, 0, 1}, {0} };
48/* group users */ 48/* group users */
49static const struct cifs_sid sid_user = 49static const struct cifs_sid sid_user = {1, 2 , {0, 0, 0, 0, 0, 5}, {} };
50 {1, 2 , {0, 0, 0, 0, 0, 5}, {} };
51 50
52 51
53int match_sid(struct cifs_sid *ctsid) 52int match_sid(struct cifs_sid *ctsid)
@@ -195,9 +194,9 @@ static void access_flags_to_mode(__le32 ace_flags, int type, umode_t *pmode,
195 /* For deny ACEs we change the mask so that subsequent allow access 194 /* For deny ACEs we change the mask so that subsequent allow access
196 control entries do not turn on the bits we are denying */ 195 control entries do not turn on the bits we are denying */
197 if (type == ACCESS_DENIED) { 196 if (type == ACCESS_DENIED) {
198 if (flags & GENERIC_ALL) { 197 if (flags & GENERIC_ALL)
199 *pbits_to_set &= ~S_IRWXUGO; 198 *pbits_to_set &= ~S_IRWXUGO;
200 } 199
201 if ((flags & GENERIC_WRITE) || 200 if ((flags & GENERIC_WRITE) ||
202 ((flags & FILE_WRITE_RIGHTS) == FILE_WRITE_RIGHTS)) 201 ((flags & FILE_WRITE_RIGHTS) == FILE_WRITE_RIGHTS))
203 *pbits_to_set &= ~S_IWUGO; 202 *pbits_to_set &= ~S_IWUGO;
@@ -216,9 +215,7 @@ static void access_flags_to_mode(__le32 ace_flags, int type, umode_t *pmode,
216 215
217 if (flags & GENERIC_ALL) { 216 if (flags & GENERIC_ALL) {
218 *pmode |= (S_IRWXUGO & (*pbits_to_set)); 217 *pmode |= (S_IRWXUGO & (*pbits_to_set));
219#ifdef CONFIG_CIFS_DEBUG2 218 cFYI(DBG2, ("all perms"));
220 cFYI(1, ("all perms"));
221#endif
222 return; 219 return;
223 } 220 }
224 if ((flags & GENERIC_WRITE) || 221 if ((flags & GENERIC_WRITE) ||
@@ -231,9 +228,7 @@ static void access_flags_to_mode(__le32 ace_flags, int type, umode_t *pmode,
231 ((flags & FILE_EXEC_RIGHTS) == FILE_EXEC_RIGHTS)) 228 ((flags & FILE_EXEC_RIGHTS) == FILE_EXEC_RIGHTS))
232 *pmode |= (S_IXUGO & (*pbits_to_set)); 229 *pmode |= (S_IXUGO & (*pbits_to_set));
233 230
234#ifdef CONFIG_CIFS_DEBUG2 231 cFYI(DBG2, ("access flags 0x%x mode now 0x%x", flags, *pmode));
235 cFYI(1, ("access flags 0x%x mode now 0x%x", flags, *pmode));
236#endif
237 return; 232 return;
238} 233}
239 234
@@ -262,13 +257,11 @@ static void mode_to_access_flags(umode_t mode, umode_t bits_to_use,
262 if (mode & S_IXUGO) 257 if (mode & S_IXUGO)
263 *pace_flags |= SET_FILE_EXEC_RIGHTS; 258 *pace_flags |= SET_FILE_EXEC_RIGHTS;
264 259
265#ifdef CONFIG_CIFS_DEBUG2 260 cFYI(DBG2, ("mode: 0x%x, access flags now 0x%x", mode, *pace_flags));
266 cFYI(1, ("mode: 0x%x, access flags now 0x%x", mode, *pace_flags));
267#endif
268 return; 261 return;
269} 262}
270 263
271static __le16 fill_ace_for_sid(struct cifs_ace *pntace, 264static __u16 fill_ace_for_sid(struct cifs_ace *pntace,
272 const struct cifs_sid *psid, __u64 nmode, umode_t bits) 265 const struct cifs_sid *psid, __u64 nmode, umode_t bits)
273{ 266{
274 int i; 267 int i;
@@ -358,11 +351,9 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl,
358 return; 351 return;
359 } 352 }
360 353
361#ifdef CONFIG_CIFS_DEBUG2 354 cFYI(DBG2, ("DACL revision %d size %d num aces %d",
362 cFYI(1, ("DACL revision %d size %d num aces %d",
363 le16_to_cpu(pdacl->revision), le16_to_cpu(pdacl->size), 355 le16_to_cpu(pdacl->revision), le16_to_cpu(pdacl->size),
364 le32_to_cpu(pdacl->num_aces))); 356 le32_to_cpu(pdacl->num_aces)));
365#endif
366 357
367 /* reset rwx permissions for user/group/other. 358 /* reset rwx permissions for user/group/other.
368 Also, if num_aces is 0 i.e. DACL has no ACEs, 359 Also, if num_aces is 0 i.e. DACL has no ACEs,
@@ -381,10 +372,6 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl,
381 ppace = kmalloc(num_aces * sizeof(struct cifs_ace *), 372 ppace = kmalloc(num_aces * sizeof(struct cifs_ace *),
382 GFP_KERNEL); 373 GFP_KERNEL);
383 374
384/* cifscred->cecount = pdacl->num_aces;
385 cifscred->aces = kmalloc(num_aces *
386 sizeof(struct cifs_ace *), GFP_KERNEL);*/
387
388 for (i = 0; i < num_aces; ++i) { 375 for (i = 0; i < num_aces; ++i) {
389 ppace[i] = (struct cifs_ace *) (acl_base + acl_size); 376 ppace[i] = (struct cifs_ace *) (acl_base + acl_size);
390#ifdef CONFIG_CIFS_DEBUG2 377#ifdef CONFIG_CIFS_DEBUG2
@@ -424,7 +411,7 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl,
424static int set_chmod_dacl(struct cifs_acl *pndacl, struct cifs_sid *pownersid, 411static int set_chmod_dacl(struct cifs_acl *pndacl, struct cifs_sid *pownersid,
425 struct cifs_sid *pgrpsid, __u64 nmode) 412 struct cifs_sid *pgrpsid, __u64 nmode)
426{ 413{
427 __le16 size = 0; 414 u16 size = 0;
428 struct cifs_acl *pnndacl; 415 struct cifs_acl *pnndacl;
429 416
430 pnndacl = (struct cifs_acl *)((char *)pndacl + sizeof(struct cifs_acl)); 417 pnndacl = (struct cifs_acl *)((char *)pndacl + sizeof(struct cifs_acl));
@@ -437,7 +424,7 @@ static int set_chmod_dacl(struct cifs_acl *pndacl, struct cifs_sid *pownersid,
437 &sid_everyone, nmode, S_IRWXO); 424 &sid_everyone, nmode, S_IRWXO);
438 425
439 pndacl->size = cpu_to_le16(size + sizeof(struct cifs_acl)); 426 pndacl->size = cpu_to_le16(size + sizeof(struct cifs_acl));
440 pndacl->num_aces = 3; 427 pndacl->num_aces = cpu_to_le32(3);
441 428
442 return (0); 429 return (0);
443} 430}
@@ -495,13 +482,11 @@ static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len,
495 le32_to_cpu(pntsd->gsidoffset)); 482 le32_to_cpu(pntsd->gsidoffset));
496 dacloffset = le32_to_cpu(pntsd->dacloffset); 483 dacloffset = le32_to_cpu(pntsd->dacloffset);
497 dacl_ptr = (struct cifs_acl *)((char *)pntsd + dacloffset); 484 dacl_ptr = (struct cifs_acl *)((char *)pntsd + dacloffset);
498#ifdef CONFIG_CIFS_DEBUG2 485 cFYI(DBG2, ("revision %d type 0x%x ooffset 0x%x goffset 0x%x "
499 cFYI(1, ("revision %d type 0x%x ooffset 0x%x goffset 0x%x "
500 "sacloffset 0x%x dacloffset 0x%x", 486 "sacloffset 0x%x dacloffset 0x%x",
501 pntsd->revision, pntsd->type, le32_to_cpu(pntsd->osidoffset), 487 pntsd->revision, pntsd->type, le32_to_cpu(pntsd->osidoffset),
502 le32_to_cpu(pntsd->gsidoffset), 488 le32_to_cpu(pntsd->gsidoffset),
503 le32_to_cpu(pntsd->sacloffset), dacloffset)); 489 le32_to_cpu(pntsd->sacloffset), dacloffset));
504#endif
505/* cifs_dump_mem("owner_sid: ", owner_sid_ptr, 64); */ 490/* cifs_dump_mem("owner_sid: ", owner_sid_ptr, 64); */
506 rc = parse_sid(owner_sid_ptr, end_of_acl); 491 rc = parse_sid(owner_sid_ptr, end_of_acl);
507 if (rc) 492 if (rc)
@@ -571,9 +556,9 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
571 556
572/* Retrieve an ACL from the server */ 557/* Retrieve an ACL from the server */
573static struct cifs_ntsd *get_cifs_acl(u32 *pacllen, struct inode *inode, 558static struct cifs_ntsd *get_cifs_acl(u32 *pacllen, struct inode *inode,
574 const char *path) 559 const char *path, const __u16 *pfid)
575{ 560{
576 struct cifsFileInfo *open_file; 561 struct cifsFileInfo *open_file = NULL;
577 int unlock_file = FALSE; 562 int unlock_file = FALSE;
578 int xid; 563 int xid;
579 int rc = -EIO; 564 int rc = -EIO;
@@ -588,7 +573,11 @@ static struct cifs_ntsd *get_cifs_acl(u32 *pacllen, struct inode *inode,
588 return NULL; 573 return NULL;
589 574
590 xid = GetXid(); 575 xid = GetXid();
591 open_file = find_readable_file(CIFS_I(inode)); 576 if (pfid == NULL)
577 open_file = find_readable_file(CIFS_I(inode));
578 else
579 fid = *pfid;
580
592 sb = inode->i_sb; 581 sb = inode->i_sb;
593 if (sb == NULL) { 582 if (sb == NULL) {
594 FreeXid(xid); 583 FreeXid(xid);
@@ -599,7 +588,7 @@ static struct cifs_ntsd *get_cifs_acl(u32 *pacllen, struct inode *inode,
599 if (open_file) { 588 if (open_file) {
600 unlock_file = TRUE; 589 unlock_file = TRUE;
601 fid = open_file->netfid; 590 fid = open_file->netfid;
602 } else { 591 } else if (pfid == NULL) {
603 int oplock = FALSE; 592 int oplock = FALSE;
604 /* open file */ 593 /* open file */
605 rc = CIFSSMBOpen(xid, cifs_sb->tcon, path, FILE_OPEN, 594 rc = CIFSSMBOpen(xid, cifs_sb->tcon, path, FILE_OPEN,
@@ -615,10 +604,11 @@ static struct cifs_ntsd *get_cifs_acl(u32 *pacllen, struct inode *inode,
615 604
616 rc = CIFSSMBGetCIFSACL(xid, cifs_sb->tcon, fid, &pntsd, pacllen); 605 rc = CIFSSMBGetCIFSACL(xid, cifs_sb->tcon, fid, &pntsd, pacllen);
617 cFYI(1, ("GetCIFSACL rc = %d ACL len %d", rc, *pacllen)); 606 cFYI(1, ("GetCIFSACL rc = %d ACL len %d", rc, *pacllen));
618 if (unlock_file == TRUE) 607 if (unlock_file == TRUE) /* find_readable_file increments ref count */
619 atomic_dec(&open_file->wrtPending); 608 atomic_dec(&open_file->wrtPending);
620 else 609 else if (pfid == NULL) /* if opened above we have to close the handle */
621 CIFSSMBClose(xid, cifs_sb->tcon, fid); 610 CIFSSMBClose(xid, cifs_sb->tcon, fid);
611 /* else handle was passed in by caller */
622 612
623 FreeXid(xid); 613 FreeXid(xid);
624 return pntsd; 614 return pntsd;
@@ -636,9 +626,7 @@ static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
636 struct super_block *sb; 626 struct super_block *sb;
637 struct cifs_sb_info *cifs_sb; 627 struct cifs_sb_info *cifs_sb;
638 628
639#ifdef CONFIG_CIFS_DEBUG2 629 cFYI(DBG2, ("set ACL for %s from mode 0x%x", path, inode->i_mode));
640 cFYI(1, ("set ACL for %s from mode 0x%x", path, inode->i_mode));
641#endif
642 630
643 if (!inode) 631 if (!inode)
644 return (rc); 632 return (rc);
@@ -669,9 +657,7 @@ static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
669 } 657 }
670 658
671 rc = CIFSSMBSetCIFSACL(xid, cifs_sb->tcon, fid, pnntsd, acllen); 659 rc = CIFSSMBSetCIFSACL(xid, cifs_sb->tcon, fid, pnntsd, acllen);
672#ifdef CONFIG_CIFS_DEBUG2 660 cFYI(DBG2, ("SetCIFSACL rc = %d", rc));
673 cFYI(1, ("SetCIFSACL rc = %d", rc));
674#endif
675 if (unlock_file == TRUE) 661 if (unlock_file == TRUE)
676 atomic_dec(&open_file->wrtPending); 662 atomic_dec(&open_file->wrtPending);
677 else 663 else
@@ -683,16 +669,14 @@ static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
683} 669}
684 670
685/* Translate the CIFS ACL (simlar to NTFS ACL) for a file into mode bits */ 671/* Translate the CIFS ACL (simlar to NTFS ACL) for a file into mode bits */
686void acl_to_uid_mode(struct inode *inode, const char *path) 672void acl_to_uid_mode(struct inode *inode, const char *path, const __u16 *pfid)
687{ 673{
688 struct cifs_ntsd *pntsd = NULL; 674 struct cifs_ntsd *pntsd = NULL;
689 u32 acllen = 0; 675 u32 acllen = 0;
690 int rc = 0; 676 int rc = 0;
691 677
692#ifdef CONFIG_CIFS_DEBUG2 678 cFYI(DBG2, ("converting ACL to mode for %s", path));
693 cFYI(1, ("converting ACL to mode for %s", path)); 679 pntsd = get_cifs_acl(&acllen, inode, path, pfid);
694#endif
695 pntsd = get_cifs_acl(&acllen, inode, path);
696 680
697 /* if we can retrieve the ACL, now parse Access Control Entries, ACEs */ 681 /* if we can retrieve the ACL, now parse Access Control Entries, ACEs */
698 if (pntsd) 682 if (pntsd)
@@ -712,12 +696,10 @@ int mode_to_acl(struct inode *inode, const char *path, __u64 nmode)
712 struct cifs_ntsd *pntsd = NULL; /* acl obtained from server */ 696 struct cifs_ntsd *pntsd = NULL; /* acl obtained from server */
713 struct cifs_ntsd *pnntsd = NULL; /* modified acl to be sent to server */ 697 struct cifs_ntsd *pnntsd = NULL; /* modified acl to be sent to server */
714 698
715#ifdef CONFIG_CIFS_DEBUG2 699 cFYI(DBG2, ("set ACL from mode for %s", path));
716 cFYI(1, ("set ACL from mode for %s", path));
717#endif
718 700
719 /* Get the security descriptor */ 701 /* Get the security descriptor */
720 pntsd = get_cifs_acl(&acllen, inode, path); 702 pntsd = get_cifs_acl(&acllen, inode, path, NULL);
721 703
722 /* Add three ACEs for owner, group, everyone getting rid of 704 /* Add three ACEs for owner, group, everyone getting rid of
723 other ACEs as chmod disables ACEs and set the security descriptor */ 705 other ACEs as chmod disables ACEs and set the security descriptor */
@@ -736,16 +718,12 @@ int mode_to_acl(struct inode *inode, const char *path, __u64 nmode)
736 718
737 rc = build_sec_desc(pntsd, pnntsd, acllen, inode, nmode); 719 rc = build_sec_desc(pntsd, pnntsd, acllen, inode, nmode);
738 720
739#ifdef CONFIG_CIFS_DEBUG2 721 cFYI(DBG2, ("build_sec_desc rc: %d", rc));
740 cFYI(1, ("build_sec_desc rc: %d", rc));
741#endif
742 722
743 if (!rc) { 723 if (!rc) {
744 /* Set the security descriptor */ 724 /* Set the security descriptor */
745 rc = set_cifs_acl(pnntsd, acllen, inode, path); 725 rc = set_cifs_acl(pnntsd, acllen, inode, path);
746#ifdef CONFIG_CIFS_DEBUG2 726 cFYI(DBG2, ("set_cifs_acl rc: %d", rc));
747 cFYI(1, ("set_cifs_acl rc: %d", rc));
748#endif
749 } 727 }
750 728
751 kfree(pnntsd); 729 kfree(pnntsd);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index fcc434227691..a04b17e5a9d0 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -204,9 +204,8 @@ cifs_put_super(struct super_block *sb)
204 return; 204 return;
205 } 205 }
206 rc = cifs_umount(sb, cifs_sb); 206 rc = cifs_umount(sb, cifs_sb);
207 if (rc) { 207 if (rc)
208 cERROR(1, ("cifs_umount failed with return code %d", rc)); 208 cERROR(1, ("cifs_umount failed with return code %d", rc));
209 }
210#ifdef CONFIG_CIFS_DFS_UPCALL 209#ifdef CONFIG_CIFS_DFS_UPCALL
211 if (cifs_sb->mountdata) { 210 if (cifs_sb->mountdata) {
212 kfree(cifs_sb->mountdata); 211 kfree(cifs_sb->mountdata);
@@ -461,7 +460,7 @@ int cifs_xstate_get(struct super_block *sb, struct fs_quota_stat *qstats)
461 460
462static struct quotactl_ops cifs_quotactl_ops = { 461static struct quotactl_ops cifs_quotactl_ops = {
463 .set_xquota = cifs_xquota_set, 462 .set_xquota = cifs_xquota_set,
464 .get_xquota = cifs_xquota_set, 463 .get_xquota = cifs_xquota_get,
465 .set_xstate = cifs_xstate_set, 464 .set_xstate = cifs_xstate_set,
466 .get_xstate = cifs_xstate_get, 465 .get_xstate = cifs_xstate_get,
467}; 466};
@@ -472,9 +471,7 @@ static void cifs_umount_begin(struct vfsmount *vfsmnt, int flags)
472 struct cifs_sb_info *cifs_sb; 471 struct cifs_sb_info *cifs_sb;
473 struct cifsTconInfo *tcon; 472 struct cifsTconInfo *tcon;
474 473
475#ifdef CONFIG_CIFS_DFS_UPCALL
476 dfs_shrink_umount_helper(vfsmnt); 474 dfs_shrink_umount_helper(vfsmnt);
477#endif /* CONFIG CIFS_DFS_UPCALL */
478 475
479 if (!(flags & MNT_FORCE)) 476 if (!(flags & MNT_FORCE))
480 return; 477 return;
@@ -992,9 +989,7 @@ static int __init
992init_cifs(void) 989init_cifs(void)
993{ 990{
994 int rc = 0; 991 int rc = 0;
995#ifdef CONFIG_PROC_FS
996 cifs_proc_init(); 992 cifs_proc_init();
997#endif
998/* INIT_LIST_HEAD(&GlobalServerList);*/ /* BB not implemented yet */ 993/* INIT_LIST_HEAD(&GlobalServerList);*/ /* BB not implemented yet */
999 INIT_LIST_HEAD(&GlobalSMBSessionList); 994 INIT_LIST_HEAD(&GlobalSMBSessionList);
1000 INIT_LIST_HEAD(&GlobalTreeConnectionList); 995 INIT_LIST_HEAD(&GlobalTreeConnectionList);
@@ -1095,19 +1090,15 @@ init_cifs(void)
1095 out_destroy_inodecache: 1090 out_destroy_inodecache:
1096 cifs_destroy_inodecache(); 1091 cifs_destroy_inodecache();
1097 out_clean_proc: 1092 out_clean_proc:
1098#ifdef CONFIG_PROC_FS
1099 cifs_proc_clean(); 1093 cifs_proc_clean();
1100#endif
1101 return rc; 1094 return rc;
1102} 1095}
1103 1096
1104static void __exit 1097static void __exit
1105exit_cifs(void) 1098exit_cifs(void)
1106{ 1099{
1107 cFYI(0, ("exit_cifs")); 1100 cFYI(DBG2, ("exit_cifs"));
1108#ifdef CONFIG_PROC_FS
1109 cifs_proc_clean(); 1101 cifs_proc_clean();
1110#endif
1111#ifdef CONFIG_CIFS_DFS_UPCALL 1102#ifdef CONFIG_CIFS_DFS_UPCALL
1112 unregister_key_type(&key_type_dns_resolver); 1103 unregister_key_type(&key_type_dns_resolver);
1113#endif 1104#endif
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 5d32d8ddc82e..69a2e1942542 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -454,7 +454,7 @@ struct dir_notify_req {
454 454
455struct dfs_info3_param { 455struct dfs_info3_param {
456 int flags; /* DFSREF_REFERRAL_SERVER, DFSREF_STORAGE_SERVER*/ 456 int flags; /* DFSREF_REFERRAL_SERVER, DFSREF_STORAGE_SERVER*/
457 int PathConsumed; 457 int path_consumed;
458 int server_type; 458 int server_type;
459 int ref_flag; 459 int ref_flag;
460 char *path_name; 460 char *path_name;
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 2f09f565a3d9..7e5e0e78cd72 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -39,8 +39,8 @@ extern int smb_send(struct socket *, struct smb_hdr *,
39 unsigned int /* length */ , struct sockaddr *); 39 unsigned int /* length */ , struct sockaddr *);
40extern unsigned int _GetXid(void); 40extern unsigned int _GetXid(void);
41extern void _FreeXid(unsigned int); 41extern void _FreeXid(unsigned int);
42#define GetXid() (int)_GetXid(); cFYI(1,("CIFS VFS: in %s as Xid: %d with uid: %d",__FUNCTION__, xid,current->fsuid)); 42#define GetXid() (int)_GetXid(); cFYI(1,("CIFS VFS: in %s as Xid: %d with uid: %d",__func__, xid,current->fsuid));
43#define FreeXid(curr_xid) {_FreeXid(curr_xid); cFYI(1,("CIFS VFS: leaving %s (xid = %d) rc = %d",__FUNCTION__,curr_xid,(int)rc));} 43#define FreeXid(curr_xid) {_FreeXid(curr_xid); cFYI(1,("CIFS VFS: leaving %s (xid = %d) rc = %d",__func__,curr_xid,(int)rc));}
44extern char *build_path_from_dentry(struct dentry *); 44extern char *build_path_from_dentry(struct dentry *);
45extern char *build_wildcard_path_from_dentry(struct dentry *direntry); 45extern char *build_wildcard_path_from_dentry(struct dentry *direntry);
46/* extern void renew_parental_timestamps(struct dentry *direntry);*/ 46/* extern void renew_parental_timestamps(struct dentry *direntry);*/
@@ -53,11 +53,11 @@ extern int SendReceiveNoRsp(const unsigned int xid, struct cifsSesInfo *ses,
53extern int SendReceive2(const unsigned int /* xid */ , struct cifsSesInfo *, 53extern int SendReceive2(const unsigned int /* xid */ , struct cifsSesInfo *,
54 struct kvec *, int /* nvec to send */, 54 struct kvec *, int /* nvec to send */,
55 int * /* type of buf returned */ , const int flags); 55 int * /* type of buf returned */ , const int flags);
56extern int SendReceiveBlockingLock(const unsigned int /* xid */ , 56extern int SendReceiveBlockingLock(const unsigned int xid,
57 struct cifsTconInfo *, 57 struct cifsTconInfo *ptcon,
58 struct smb_hdr * /* input */ , 58 struct smb_hdr *in_buf ,
59 struct smb_hdr * /* out */ , 59 struct smb_hdr *out_buf,
60 int * /* bytes returned */); 60 int *bytes_returned);
61extern int checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length); 61extern int checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length);
62extern int is_valid_oplock_break(struct smb_hdr *smb, struct TCP_Server_Info *); 62extern int is_valid_oplock_break(struct smb_hdr *smb, struct TCP_Server_Info *);
63extern int is_size_safe_to_change(struct cifsInodeInfo *, __u64 eof); 63extern int is_size_safe_to_change(struct cifsInodeInfo *, __u64 eof);
@@ -84,7 +84,7 @@ extern __u16 GetNextMid(struct TCP_Server_Info *server);
84extern struct oplock_q_entry *AllocOplockQEntry(struct inode *, u16, 84extern struct oplock_q_entry *AllocOplockQEntry(struct inode *, u16,
85 struct cifsTconInfo *); 85 struct cifsTconInfo *);
86extern void DeleteOplockQEntry(struct oplock_q_entry *); 86extern void DeleteOplockQEntry(struct oplock_q_entry *);
87extern struct timespec cifs_NTtimeToUnix(u64 /* utc nanoseconds since 1601 */ ); 87extern struct timespec cifs_NTtimeToUnix(u64 utc_nanoseconds_since_1601);
88extern u64 cifs_UnixTimeToNT(struct timespec); 88extern u64 cifs_UnixTimeToNT(struct timespec);
89extern __le64 cnvrtDosCifsTm(__u16 date, __u16 time); 89extern __le64 cnvrtDosCifsTm(__u16 date, __u16 time);
90extern struct timespec cnvrtDosUnixTm(__u16 date, __u16 time); 90extern struct timespec cnvrtDosUnixTm(__u16 date, __u16 time);
@@ -92,11 +92,12 @@ extern struct timespec cnvrtDosUnixTm(__u16 date, __u16 time);
92extern int cifs_get_inode_info(struct inode **pinode, 92extern int cifs_get_inode_info(struct inode **pinode,
93 const unsigned char *search_path, 93 const unsigned char *search_path,
94 FILE_ALL_INFO * pfile_info, 94 FILE_ALL_INFO * pfile_info,
95 struct super_block *sb, int xid); 95 struct super_block *sb, int xid, const __u16 *pfid);
96extern int cifs_get_inode_info_unix(struct inode **pinode, 96extern int cifs_get_inode_info_unix(struct inode **pinode,
97 const unsigned char *search_path, 97 const unsigned char *search_path,
98 struct super_block *sb, int xid); 98 struct super_block *sb, int xid);
99extern void acl_to_uid_mode(struct inode *inode, const char *search_path); 99extern void acl_to_uid_mode(struct inode *inode, const char *path,
100 const __u16 *pfid);
100extern int mode_to_acl(struct inode *inode, const char *path, __u64); 101extern int mode_to_acl(struct inode *inode, const char *path, __u64);
101 102
102extern int cifs_mount(struct super_block *, struct cifs_sb_info *, char *, 103extern int cifs_mount(struct super_block *, struct cifs_sb_info *, char *,
@@ -104,7 +105,11 @@ extern int cifs_mount(struct super_block *, struct cifs_sb_info *, char *,
104extern int cifs_umount(struct super_block *, struct cifs_sb_info *); 105extern int cifs_umount(struct super_block *, struct cifs_sb_info *);
105#ifdef CONFIG_CIFS_DFS_UPCALL 106#ifdef CONFIG_CIFS_DFS_UPCALL
106extern void dfs_shrink_umount_helper(struct vfsmount *vfsmnt); 107extern void dfs_shrink_umount_helper(struct vfsmount *vfsmnt);
107#endif 108#else
109static inline void dfs_shrink_umount_helper(struct vfsmount *vfsmnt)
110{
111}
112#endif /* DFS_UPCALL */
108void cifs_proc_init(void); 113void cifs_proc_init(void);
109void cifs_proc_clean(void); 114void cifs_proc_clean(void);
110 115
@@ -175,11 +180,11 @@ extern int CIFSSMBQFSPosixInfo(const int xid, struct cifsTconInfo *tcon,
175 struct kstatfs *FSData); 180 struct kstatfs *FSData);
176 181
177extern int CIFSSMBSetTimes(const int xid, struct cifsTconInfo *tcon, 182extern int CIFSSMBSetTimes(const int xid, struct cifsTconInfo *tcon,
178 const char *fileName, const FILE_BASIC_INFO * data, 183 const char *fileName, const FILE_BASIC_INFO *data,
179 const struct nls_table *nls_codepage, 184 const struct nls_table *nls_codepage,
180 int remap_special_chars); 185 int remap_special_chars);
181extern int CIFSSMBSetFileTimes(const int xid, struct cifsTconInfo *tcon, 186extern int CIFSSMBSetFileTimes(const int xid, struct cifsTconInfo *tcon,
182 const FILE_BASIC_INFO * data, __u16 fid); 187 const FILE_BASIC_INFO *data, __u16 fid);
183#if 0 188#if 0
184extern int CIFSSMBSetAttrLegacy(int xid, struct cifsTconInfo *tcon, 189extern int CIFSSMBSetAttrLegacy(int xid, struct cifsTconInfo *tcon,
185 char *fileName, __u16 dos_attributes, 190 char *fileName, __u16 dos_attributes,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 9409524e4bf8..30bbe448e260 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * fs/cifs/cifssmb.c 2 * fs/cifs/cifssmb.c
3 * 3 *
4 * Copyright (C) International Business Machines Corp., 2002,2007 4 * Copyright (C) International Business Machines Corp., 2002,2008
5 * Author(s): Steve French (sfrench@us.ibm.com) 5 * Author(s): Steve French (sfrench@us.ibm.com)
6 * 6 *
7 * Contains the routines for constructing the SMB PDUs themselves 7 * Contains the routines for constructing the SMB PDUs themselves
@@ -102,10 +102,12 @@ static void mark_open_files_invalid(struct cifsTconInfo *pTcon)
102 to this tcon */ 102 to this tcon */
103} 103}
104 104
105/* If the return code is zero, this function must fill in request_buf pointer */ 105/* Allocate and return pointer to an SMB request buffer, and set basic
106 SMB information in the SMB header. If the return code is zero, this
107 function must have filled in request_buf pointer */
106static int 108static int
107small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, 109small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
108 void **request_buf /* returned */) 110 void **request_buf)
109{ 111{
110 int rc = 0; 112 int rc = 0;
111 113
@@ -363,7 +365,7 @@ smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
363 *response_buf = *request_buf; 365 *response_buf = *request_buf;
364 366
365 header_assemble((struct smb_hdr *) *request_buf, smb_command, tcon, 367 header_assemble((struct smb_hdr *) *request_buf, smb_command, tcon,
366 wct /*wct */ ); 368 wct);
367 369
368 if (tcon != NULL) 370 if (tcon != NULL)
369 cifs_stats_inc(&tcon->num_smbs_sent); 371 cifs_stats_inc(&tcon->num_smbs_sent);
@@ -523,7 +525,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
523 if (remain >= (MIN_TZ_ADJ / 2)) 525 if (remain >= (MIN_TZ_ADJ / 2))
524 result += MIN_TZ_ADJ; 526 result += MIN_TZ_ADJ;
525 if (val < 0) 527 if (val < 0)
526 result = - result; 528 result = -result;
527 server->timeAdj = result; 529 server->timeAdj = result;
528 } else { 530 } else {
529 server->timeAdj = (int)tmp; 531 server->timeAdj = (int)tmp;
@@ -600,7 +602,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
600 server->maxBuf = min(le32_to_cpu(pSMBr->MaxBufferSize), 602 server->maxBuf = min(le32_to_cpu(pSMBr->MaxBufferSize),
601 (__u32) CIFSMaxBufSize + MAX_CIFS_HDR_SIZE); 603 (__u32) CIFSMaxBufSize + MAX_CIFS_HDR_SIZE);
602 server->maxRw = le32_to_cpu(pSMBr->MaxRawSize); 604 server->maxRw = le32_to_cpu(pSMBr->MaxRawSize);
603 cFYI(0, ("Max buf = %d", ses->server->maxBuf)); 605 cFYI(DBG2, ("Max buf = %d", ses->server->maxBuf));
604 GETU32(ses->server->sessid) = le32_to_cpu(pSMBr->SessionKey); 606 GETU32(ses->server->sessid) = le32_to_cpu(pSMBr->SessionKey);
605 server->capabilities = le32_to_cpu(pSMBr->Capabilities); 607 server->capabilities = le32_to_cpu(pSMBr->Capabilities);
606 server->timeAdj = (int)(__s16)le16_to_cpu(pSMBr->ServerTimeZone); 608 server->timeAdj = (int)(__s16)le16_to_cpu(pSMBr->ServerTimeZone);
@@ -868,9 +870,8 @@ PsxDelete:
868 pSMB->ByteCount = cpu_to_le16(byte_count); 870 pSMB->ByteCount = cpu_to_le16(byte_count);
869 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 871 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
870 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 872 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
871 if (rc) { 873 if (rc)
872 cFYI(1, ("Posix delete returned %d", rc)); 874 cFYI(1, ("Posix delete returned %d", rc));
873 }
874 cifs_buf_release(pSMB); 875 cifs_buf_release(pSMB);
875 876
876 cifs_stats_inc(&tcon->num_deletes); 877 cifs_stats_inc(&tcon->num_deletes);
@@ -916,9 +917,8 @@ DelFileRetry:
916 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 917 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
917 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 918 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
918 cifs_stats_inc(&tcon->num_deletes); 919 cifs_stats_inc(&tcon->num_deletes);
919 if (rc) { 920 if (rc)
920 cFYI(1, ("Error in RMFile = %d", rc)); 921 cFYI(1, ("Error in RMFile = %d", rc));
921 }
922 922
923 cifs_buf_release(pSMB); 923 cifs_buf_release(pSMB);
924 if (rc == -EAGAIN) 924 if (rc == -EAGAIN)
@@ -961,9 +961,8 @@ RmDirRetry:
961 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 961 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
962 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 962 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
963 cifs_stats_inc(&tcon->num_rmdirs); 963 cifs_stats_inc(&tcon->num_rmdirs);
964 if (rc) { 964 if (rc)
965 cFYI(1, ("Error in RMDir = %d", rc)); 965 cFYI(1, ("Error in RMDir = %d", rc));
966 }
967 966
968 cifs_buf_release(pSMB); 967 cifs_buf_release(pSMB);
969 if (rc == -EAGAIN) 968 if (rc == -EAGAIN)
@@ -1005,9 +1004,8 @@ MkDirRetry:
1005 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 1004 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
1006 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 1005 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
1007 cifs_stats_inc(&tcon->num_mkdirs); 1006 cifs_stats_inc(&tcon->num_mkdirs);
1008 if (rc) { 1007 if (rc)
1009 cFYI(1, ("Error in Mkdir = %d", rc)); 1008 cFYI(1, ("Error in Mkdir = %d", rc));
1010 }
1011 1009
1012 cifs_buf_release(pSMB); 1010 cifs_buf_release(pSMB);
1013 if (rc == -EAGAIN) 1011 if (rc == -EAGAIN)
@@ -1017,7 +1015,7 @@ MkDirRetry:
1017 1015
1018int 1016int
1019CIFSPOSIXCreate(const int xid, struct cifsTconInfo *tcon, __u32 posix_flags, 1017CIFSPOSIXCreate(const int xid, struct cifsTconInfo *tcon, __u32 posix_flags,
1020 __u64 mode, __u16 * netfid, FILE_UNIX_BASIC_INFO *pRetData, 1018 __u64 mode, __u16 *netfid, FILE_UNIX_BASIC_INFO *pRetData,
1021 __u32 *pOplock, const char *name, 1019 __u32 *pOplock, const char *name,
1022 const struct nls_table *nls_codepage, int remap) 1020 const struct nls_table *nls_codepage, int remap)
1023{ 1021{
@@ -1027,8 +1025,8 @@ CIFSPOSIXCreate(const int xid, struct cifsTconInfo *tcon, __u32 posix_flags,
1027 int rc = 0; 1025 int rc = 0;
1028 int bytes_returned = 0; 1026 int bytes_returned = 0;
1029 __u16 params, param_offset, offset, byte_count, count; 1027 __u16 params, param_offset, offset, byte_count, count;
1030 OPEN_PSX_REQ * pdata; 1028 OPEN_PSX_REQ *pdata;
1031 OPEN_PSX_RSP * psx_rsp; 1029 OPEN_PSX_RSP *psx_rsp;
1032 1030
1033 cFYI(1, ("In POSIX Create")); 1031 cFYI(1, ("In POSIX Create"));
1034PsxCreat: 1032PsxCreat:
@@ -1110,9 +1108,7 @@ PsxCreat:
1110 /* check to make sure response data is there */ 1108 /* check to make sure response data is there */
1111 if (psx_rsp->ReturnedLevel != cpu_to_le16(SMB_QUERY_FILE_UNIX_BASIC)) { 1109 if (psx_rsp->ReturnedLevel != cpu_to_le16(SMB_QUERY_FILE_UNIX_BASIC)) {
1112 pRetData->Type = cpu_to_le32(-1); /* unknown */ 1110 pRetData->Type = cpu_to_le32(-1); /* unknown */
1113#ifdef CONFIG_CIFS_DEBUG2 1111 cFYI(DBG2, ("unknown type"));
1114 cFYI(1, ("unknown type"));
1115#endif
1116 } else { 1112 } else {
1117 if (pSMBr->ByteCount < sizeof(OPEN_PSX_RSP) 1113 if (pSMBr->ByteCount < sizeof(OPEN_PSX_RSP)
1118 + sizeof(FILE_UNIX_BASIC_INFO)) { 1114 + sizeof(FILE_UNIX_BASIC_INFO)) {
@@ -1169,8 +1165,8 @@ static __u16 convert_disposition(int disposition)
1169int 1165int
1170SMBLegacyOpen(const int xid, struct cifsTconInfo *tcon, 1166SMBLegacyOpen(const int xid, struct cifsTconInfo *tcon,
1171 const char *fileName, const int openDisposition, 1167 const char *fileName, const int openDisposition,
1172 const int access_flags, const int create_options, __u16 * netfid, 1168 const int access_flags, const int create_options, __u16 *netfid,
1173 int *pOplock, FILE_ALL_INFO * pfile_info, 1169 int *pOplock, FILE_ALL_INFO *pfile_info,
1174 const struct nls_table *nls_codepage, int remap) 1170 const struct nls_table *nls_codepage, int remap)
1175{ 1171{
1176 int rc = -EACCES; 1172 int rc = -EACCES;
@@ -1221,8 +1217,8 @@ OldOpenRetry:
1221 1217
1222 if (create_options & CREATE_OPTION_SPECIAL) 1218 if (create_options & CREATE_OPTION_SPECIAL)
1223 pSMB->FileAttributes = cpu_to_le16(ATTR_SYSTEM); 1219 pSMB->FileAttributes = cpu_to_le16(ATTR_SYSTEM);
1224 else 1220 else /* BB FIXME BB */
1225 pSMB->FileAttributes = cpu_to_le16(0/*ATTR_NORMAL*/); /* BB FIXME */ 1221 pSMB->FileAttributes = cpu_to_le16(0/*ATTR_NORMAL*/);
1226 1222
1227 /* if ((omode & S_IWUGO) == 0) 1223 /* if ((omode & S_IWUGO) == 0)
1228 pSMB->FileAttributes |= cpu_to_le32(ATTR_READONLY);*/ 1224 pSMB->FileAttributes |= cpu_to_le32(ATTR_READONLY);*/
@@ -1284,8 +1280,8 @@ OldOpenRetry:
1284int 1280int
1285CIFSSMBOpen(const int xid, struct cifsTconInfo *tcon, 1281CIFSSMBOpen(const int xid, struct cifsTconInfo *tcon,
1286 const char *fileName, const int openDisposition, 1282 const char *fileName, const int openDisposition,
1287 const int access_flags, const int create_options, __u16 * netfid, 1283 const int access_flags, const int create_options, __u16 *netfid,
1288 int *pOplock, FILE_ALL_INFO * pfile_info, 1284 int *pOplock, FILE_ALL_INFO *pfile_info,
1289 const struct nls_table *nls_codepage, int remap) 1285 const struct nls_table *nls_codepage, int remap)
1290{ 1286{
1291 int rc = -EACCES; 1287 int rc = -EACCES;
@@ -1556,9 +1552,9 @@ CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon,
1556 } /* else setting file size with write of zero bytes */ 1552 } /* else setting file size with write of zero bytes */
1557 if (wct == 14) 1553 if (wct == 14)
1558 byte_count = bytes_sent + 1; /* pad */ 1554 byte_count = bytes_sent + 1; /* pad */
1559 else /* wct == 12 */ { 1555 else /* wct == 12 */
1560 byte_count = bytes_sent + 5; /* bigger pad, smaller smb hdr */ 1556 byte_count = bytes_sent + 5; /* bigger pad, smaller smb hdr */
1561 } 1557
1562 pSMB->DataLengthLow = cpu_to_le16(bytes_sent & 0xFFFF); 1558 pSMB->DataLengthLow = cpu_to_le16(bytes_sent & 0xFFFF);
1563 pSMB->DataLengthHigh = cpu_to_le16(bytes_sent >> 16); 1559 pSMB->DataLengthHigh = cpu_to_le16(bytes_sent >> 16);
1564 pSMB->hdr.smb_buf_length += byte_count; 1560 pSMB->hdr.smb_buf_length += byte_count;
@@ -1663,7 +1659,7 @@ CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon,
1663 rc = -EIO; 1659 rc = -EIO;
1664 *nbytes = 0; 1660 *nbytes = 0;
1665 } else { 1661 } else {
1666 WRITE_RSP * pSMBr = (WRITE_RSP *)iov[0].iov_base; 1662 WRITE_RSP *pSMBr = (WRITE_RSP *)iov[0].iov_base;
1667 *nbytes = le16_to_cpu(pSMBr->CountHigh); 1663 *nbytes = le16_to_cpu(pSMBr->CountHigh);
1668 *nbytes = (*nbytes) << 16; 1664 *nbytes = (*nbytes) << 16;
1669 *nbytes += le16_to_cpu(pSMBr->Count); 1665 *nbytes += le16_to_cpu(pSMBr->Count);
@@ -1744,9 +1740,8 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon,
1744 /* SMB buffer freed by function above */ 1740 /* SMB buffer freed by function above */
1745 } 1741 }
1746 cifs_stats_inc(&tcon->num_locks); 1742 cifs_stats_inc(&tcon->num_locks);
1747 if (rc) { 1743 if (rc)
1748 cFYI(1, ("Send error in Lock = %d", rc)); 1744 cFYI(1, ("Send error in Lock = %d", rc));
1749 }
1750 1745
1751 /* Note: On -EAGAIN error only caller can retry on handle based calls 1746 /* Note: On -EAGAIN error only caller can retry on handle based calls
1752 since file handle passed in no longer valid */ 1747 since file handle passed in no longer valid */
@@ -1791,7 +1786,7 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon,
1791 1786
1792 count = sizeof(struct cifs_posix_lock); 1787 count = sizeof(struct cifs_posix_lock);
1793 pSMB->MaxParameterCount = cpu_to_le16(2); 1788 pSMB->MaxParameterCount = cpu_to_le16(2);
1794 pSMB->MaxDataCount = cpu_to_le16(1000); /* BB find max SMB PDU from sess */ 1789 pSMB->MaxDataCount = cpu_to_le16(1000); /* BB find max SMB from sess */
1795 pSMB->SetupCount = 1; 1790 pSMB->SetupCount = 1;
1796 pSMB->Reserved3 = 0; 1791 pSMB->Reserved3 = 0;
1797 if (get_flag) 1792 if (get_flag)
@@ -1972,9 +1967,8 @@ renameRetry:
1972 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 1967 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
1973 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 1968 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
1974 cifs_stats_inc(&tcon->num_renames); 1969 cifs_stats_inc(&tcon->num_renames);
1975 if (rc) { 1970 if (rc)
1976 cFYI(1, ("Send error in rename = %d", rc)); 1971 cFYI(1, ("Send error in rename = %d", rc));
1977 }
1978 1972
1979 cifs_buf_release(pSMB); 1973 cifs_buf_release(pSMB);
1980 1974
@@ -2016,7 +2010,7 @@ int CIFSSMBRenameOpenFile(const int xid, struct cifsTconInfo *pTcon,
2016 data_offset = (char *) (&pSMB->hdr.Protocol) + offset; 2010 data_offset = (char *) (&pSMB->hdr.Protocol) + offset;
2017 rename_info = (struct set_file_rename *) data_offset; 2011 rename_info = (struct set_file_rename *) data_offset;
2018 pSMB->MaxParameterCount = cpu_to_le16(2); 2012 pSMB->MaxParameterCount = cpu_to_le16(2);
2019 pSMB->MaxDataCount = cpu_to_le16(1000); /* BB find max SMB PDU from sess */ 2013 pSMB->MaxDataCount = cpu_to_le16(1000); /* BB find max SMB from sess */
2020 pSMB->SetupCount = 1; 2014 pSMB->SetupCount = 1;
2021 pSMB->Reserved3 = 0; 2015 pSMB->Reserved3 = 0;
2022 pSMB->SubCommand = cpu_to_le16(TRANS2_SET_FILE_INFORMATION); 2016 pSMB->SubCommand = cpu_to_le16(TRANS2_SET_FILE_INFORMATION);
@@ -2052,9 +2046,8 @@ int CIFSSMBRenameOpenFile(const int xid, struct cifsTconInfo *pTcon,
2052 rc = SendReceive(xid, pTcon->ses, (struct smb_hdr *) pSMB, 2046 rc = SendReceive(xid, pTcon->ses, (struct smb_hdr *) pSMB,
2053 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 2047 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
2054 cifs_stats_inc(&pTcon->num_t2renames); 2048 cifs_stats_inc(&pTcon->num_t2renames);
2055 if (rc) { 2049 if (rc)
2056 cFYI(1, ("Send error in Rename (by file handle) = %d", rc)); 2050 cFYI(1, ("Send error in Rename (by file handle) = %d", rc));
2057 }
2058 2051
2059 cifs_buf_release(pSMB); 2052 cifs_buf_release(pSMB);
2060 2053
@@ -2211,9 +2204,8 @@ createSymLinkRetry:
2211 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 2204 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
2212 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 2205 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
2213 cifs_stats_inc(&tcon->num_symlinks); 2206 cifs_stats_inc(&tcon->num_symlinks);
2214 if (rc) { 2207 if (rc)
2215 cFYI(1, ("Send error in SetPathInfo create symlink = %d", rc)); 2208 cFYI(1, ("Send error in SetPathInfo create symlink = %d", rc));
2216 }
2217 2209
2218 if (pSMB) 2210 if (pSMB)
2219 cifs_buf_release(pSMB); 2211 cifs_buf_release(pSMB);
@@ -2299,9 +2291,8 @@ createHardLinkRetry:
2299 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 2291 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
2300 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 2292 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
2301 cifs_stats_inc(&tcon->num_hardlinks); 2293 cifs_stats_inc(&tcon->num_hardlinks);
2302 if (rc) { 2294 if (rc)
2303 cFYI(1, ("Send error in SetPathInfo (hard link) = %d", rc)); 2295 cFYI(1, ("Send error in SetPathInfo (hard link) = %d", rc));
2304 }
2305 2296
2306 cifs_buf_release(pSMB); 2297 cifs_buf_release(pSMB);
2307 if (rc == -EAGAIN) 2298 if (rc == -EAGAIN)
@@ -2370,9 +2361,9 @@ winCreateHardLinkRetry:
2370 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 2361 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
2371 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 2362 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
2372 cifs_stats_inc(&tcon->num_hardlinks); 2363 cifs_stats_inc(&tcon->num_hardlinks);
2373 if (rc) { 2364 if (rc)
2374 cFYI(1, ("Send error in hard link (NT rename) = %d", rc)); 2365 cFYI(1, ("Send error in hard link (NT rename) = %d", rc));
2375 } 2366
2376 cifs_buf_release(pSMB); 2367 cifs_buf_release(pSMB);
2377 if (rc == -EAGAIN) 2368 if (rc == -EAGAIN)
2378 goto winCreateHardLinkRetry; 2369 goto winCreateHardLinkRetry;
@@ -2968,9 +2959,8 @@ setAclRetry:
2968 pSMB->ByteCount = cpu_to_le16(byte_count); 2959 pSMB->ByteCount = cpu_to_le16(byte_count);
2969 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 2960 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
2970 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 2961 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
2971 if (rc) { 2962 if (rc)
2972 cFYI(1, ("Set POSIX ACL returned %d", rc)); 2963 cFYI(1, ("Set POSIX ACL returned %d", rc));
2973 }
2974 2964
2975setACLerrorExit: 2965setACLerrorExit:
2976 cifs_buf_release(pSMB); 2966 cifs_buf_release(pSMB);
@@ -2982,7 +2972,7 @@ setACLerrorExit:
2982/* BB fix tabs in this function FIXME BB */ 2972/* BB fix tabs in this function FIXME BB */
2983int 2973int
2984CIFSGetExtAttr(const int xid, struct cifsTconInfo *tcon, 2974CIFSGetExtAttr(const int xid, struct cifsTconInfo *tcon,
2985 const int netfid, __u64 * pExtAttrBits, __u64 *pMask) 2975 const int netfid, __u64 *pExtAttrBits, __u64 *pMask)
2986{ 2976{
2987 int rc = 0; 2977 int rc = 0;
2988 struct smb_t2_qfi_req *pSMB = NULL; 2978 struct smb_t2_qfi_req *pSMB = NULL;
@@ -3000,7 +2990,7 @@ GetExtAttrRetry:
3000 if (rc) 2990 if (rc)
3001 return rc; 2991 return rc;
3002 2992
3003 params = 2 /* level */ +2 /* fid */; 2993 params = 2 /* level */ + 2 /* fid */;
3004 pSMB->t2.TotalDataCount = 0; 2994 pSMB->t2.TotalDataCount = 0;
3005 pSMB->t2.MaxParameterCount = cpu_to_le16(4); 2995 pSMB->t2.MaxParameterCount = cpu_to_le16(4);
3006 /* BB find exact max data count below from sess structure BB */ 2996 /* BB find exact max data count below from sess structure BB */
@@ -3071,7 +3061,7 @@ CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid,
3071{ 3061{
3072 int rc = 0; 3062 int rc = 0;
3073 int buf_type = 0; 3063 int buf_type = 0;
3074 QUERY_SEC_DESC_REQ * pSMB; 3064 QUERY_SEC_DESC_REQ *pSMB;
3075 struct kvec iov[1]; 3065 struct kvec iov[1];
3076 3066
3077 cFYI(1, ("GetCifsACL")); 3067 cFYI(1, ("GetCifsACL"));
@@ -3101,7 +3091,7 @@ CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid,
3101 if (rc) { 3091 if (rc) {
3102 cFYI(1, ("Send error in QuerySecDesc = %d", rc)); 3092 cFYI(1, ("Send error in QuerySecDesc = %d", rc));
3103 } else { /* decode response */ 3093 } else { /* decode response */
3104 __le32 * parm; 3094 __le32 *parm;
3105 __u32 parm_len; 3095 __u32 parm_len;
3106 __u32 acl_len; 3096 __u32 acl_len;
3107 struct smb_com_ntransact_rsp *pSMBr; 3097 struct smb_com_ntransact_rsp *pSMBr;
@@ -3230,8 +3220,8 @@ int SMBQueryInformation(const int xid, struct cifsTconInfo *tcon,
3230 FILE_ALL_INFO *pFinfo, 3220 FILE_ALL_INFO *pFinfo,
3231 const struct nls_table *nls_codepage, int remap) 3221 const struct nls_table *nls_codepage, int remap)
3232{ 3222{
3233 QUERY_INFORMATION_REQ * pSMB; 3223 QUERY_INFORMATION_REQ *pSMB;
3234 QUERY_INFORMATION_RSP * pSMBr; 3224 QUERY_INFORMATION_RSP *pSMBr;
3235 int rc = 0; 3225 int rc = 0;
3236 int bytes_returned; 3226 int bytes_returned;
3237 int name_len; 3227 int name_len;
@@ -3263,9 +3253,11 @@ QInfRetry:
3263 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 3253 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
3264 if (rc) { 3254 if (rc) {
3265 cFYI(1, ("Send error in QueryInfo = %d", rc)); 3255 cFYI(1, ("Send error in QueryInfo = %d", rc));
3266 } else if (pFinfo) { /* decode response */ 3256 } else if (pFinfo) {
3267 struct timespec ts; 3257 struct timespec ts;
3268 __u32 time = le32_to_cpu(pSMBr->last_write_time); 3258 __u32 time = le32_to_cpu(pSMBr->last_write_time);
3259
3260 /* decode response */
3269 /* BB FIXME - add time zone adjustment BB */ 3261 /* BB FIXME - add time zone adjustment BB */
3270 memset(pFinfo, 0, sizeof(FILE_ALL_INFO)); 3262 memset(pFinfo, 0, sizeof(FILE_ALL_INFO));
3271 ts.tv_nsec = 0; 3263 ts.tv_nsec = 0;
@@ -3296,7 +3288,7 @@ QInfRetry:
3296int 3288int
3297CIFSSMBQPathInfo(const int xid, struct cifsTconInfo *tcon, 3289CIFSSMBQPathInfo(const int xid, struct cifsTconInfo *tcon,
3298 const unsigned char *searchName, 3290 const unsigned char *searchName,
3299 FILE_ALL_INFO * pFindData, 3291 FILE_ALL_INFO *pFindData,
3300 int legacy /* old style infolevel */, 3292 int legacy /* old style infolevel */,
3301 const struct nls_table *nls_codepage, int remap) 3293 const struct nls_table *nls_codepage, int remap)
3302{ 3294{
@@ -3371,10 +3363,12 @@ QPathInfoRetry:
3371 else if (pFindData) { 3363 else if (pFindData) {
3372 int size; 3364 int size;
3373 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); 3365 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
3374 if (legacy) /* we do not read the last field, EAsize, 3366
3375 fortunately since it varies by subdialect 3367 /* On legacy responses we do not read the last field,
3376 and on Set vs. Get, is two bytes or 4 3368 EAsize, fortunately since it varies by subdialect and
3377 bytes depending but we don't care here */ 3369 also note it differs on Set vs. Get, ie two bytes or 4
3370 bytes depending but we don't care here */
3371 if (legacy)
3378 size = sizeof(FILE_INFO_STANDARD); 3372 size = sizeof(FILE_INFO_STANDARD);
3379 else 3373 else
3380 size = sizeof(FILE_ALL_INFO); 3374 size = sizeof(FILE_ALL_INFO);
@@ -3476,85 +3470,6 @@ UnixQPathInfoRetry:
3476 return rc; 3470 return rc;
3477} 3471}
3478 3472
3479#if 0 /* function unused at present */
3480int CIFSFindSingle(const int xid, struct cifsTconInfo *tcon,
3481 const char *searchName, FILE_ALL_INFO * findData,
3482 const struct nls_table *nls_codepage)
3483{
3484/* level 257 SMB_ */
3485 TRANSACTION2_FFIRST_REQ *pSMB = NULL;
3486 TRANSACTION2_FFIRST_RSP *pSMBr = NULL;
3487 int rc = 0;
3488 int bytes_returned;
3489 int name_len;
3490 __u16 params, byte_count;
3491
3492 cFYI(1, ("In FindUnique"));
3493findUniqueRetry:
3494 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
3495 (void **) &pSMBr);
3496 if (rc)
3497 return rc;
3498
3499 if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) {
3500 name_len =
3501 cifsConvertToUCS((__le16 *) pSMB->FileName, searchName,
3502 PATH_MAX, nls_codepage);
3503 name_len++; /* trailing null */
3504 name_len *= 2;
3505 } else { /* BB improve the check for buffer overruns BB */
3506 name_len = strnlen(searchName, PATH_MAX);
3507 name_len++; /* trailing null */
3508 strncpy(pSMB->FileName, searchName, name_len);
3509 }
3510
3511 params = 12 + name_len /* includes null */ ;
3512 pSMB->TotalDataCount = 0; /* no EAs */
3513 pSMB->MaxParameterCount = cpu_to_le16(2);
3514 pSMB->MaxDataCount = cpu_to_le16(4000); /* BB find exact max SMB PDU from sess structure BB */
3515 pSMB->MaxSetupCount = 0;
3516 pSMB->Reserved = 0;
3517 pSMB->Flags = 0;
3518 pSMB->Timeout = 0;
3519 pSMB->Reserved2 = 0;
3520 pSMB->ParameterOffset = cpu_to_le16(
3521 offsetof(struct smb_com_transaction2_ffirst_req, InformationLevel)-4);
3522 pSMB->DataCount = 0;
3523 pSMB->DataOffset = 0;
3524 pSMB->SetupCount = 1; /* one byte, no need to le convert */
3525 pSMB->Reserved3 = 0;
3526 pSMB->SubCommand = cpu_to_le16(TRANS2_FIND_FIRST);
3527 byte_count = params + 1 /* pad */ ;
3528 pSMB->TotalParameterCount = cpu_to_le16(params);
3529 pSMB->ParameterCount = pSMB->TotalParameterCount;
3530 pSMB->SearchAttributes =
3531 cpu_to_le16(ATTR_READONLY | ATTR_HIDDEN | ATTR_SYSTEM |
3532 ATTR_DIRECTORY);
3533 pSMB->SearchCount = cpu_to_le16(16); /* BB increase */
3534 pSMB->SearchFlags = cpu_to_le16(1);
3535 pSMB->InformationLevel = cpu_to_le16(SMB_FIND_FILE_DIRECTORY_INFO);
3536 pSMB->SearchStorageType = 0; /* BB what should we set this to? BB */
3537 pSMB->hdr.smb_buf_length += byte_count;
3538 pSMB->ByteCount = cpu_to_le16(byte_count);
3539
3540 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
3541 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
3542
3543 if (rc) {
3544 cFYI(1, ("Send error in FindFileDirInfo = %d", rc));
3545 } else { /* decode response */
3546 cifs_stats_inc(&tcon->num_ffirst);
3547 /* BB fill in */
3548 }
3549
3550 cifs_buf_release(pSMB);
3551 if (rc == -EAGAIN)
3552 goto findUniqueRetry;
3553
3554 return rc;
3555}
3556#endif /* end unused (temporarily) function */
3557
3558/* xid, tcon, searchName and codepage are input parms, rest are returned */ 3473/* xid, tcon, searchName and codepage are input parms, rest are returned */
3559int 3474int
3560CIFSFindFirst(const int xid, struct cifsTconInfo *tcon, 3475CIFSFindFirst(const int xid, struct cifsTconInfo *tcon,
@@ -3566,7 +3481,7 @@ CIFSFindFirst(const int xid, struct cifsTconInfo *tcon,
3566/* level 257 SMB_ */ 3481/* level 257 SMB_ */
3567 TRANSACTION2_FFIRST_REQ *pSMB = NULL; 3482 TRANSACTION2_FFIRST_REQ *pSMB = NULL;
3568 TRANSACTION2_FFIRST_RSP *pSMBr = NULL; 3483 TRANSACTION2_FFIRST_RSP *pSMBr = NULL;
3569 T2_FFIRST_RSP_PARMS * parms; 3484 T2_FFIRST_RSP_PARMS *parms;
3570 int rc = 0; 3485 int rc = 0;
3571 int bytes_returned = 0; 3486 int bytes_returned = 0;
3572 int name_len; 3487 int name_len;
@@ -3697,7 +3612,7 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon,
3697{ 3612{
3698 TRANSACTION2_FNEXT_REQ *pSMB = NULL; 3613 TRANSACTION2_FNEXT_REQ *pSMB = NULL;
3699 TRANSACTION2_FNEXT_RSP *pSMBr = NULL; 3614 TRANSACTION2_FNEXT_RSP *pSMBr = NULL;
3700 T2_FNEXT_RSP_PARMS * parms; 3615 T2_FNEXT_RSP_PARMS *parms;
3701 char *response_data; 3616 char *response_data;
3702 int rc = 0; 3617 int rc = 0;
3703 int bytes_returned, name_len; 3618 int bytes_returned, name_len;
@@ -3836,9 +3751,9 @@ CIFSFindClose(const int xid, struct cifsTconInfo *tcon,
3836 pSMB->FileID = searchHandle; 3751 pSMB->FileID = searchHandle;
3837 pSMB->ByteCount = 0; 3752 pSMB->ByteCount = 0;
3838 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0); 3753 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0);
3839 if (rc) { 3754 if (rc)
3840 cERROR(1, ("Send error in FindClose = %d", rc)); 3755 cERROR(1, ("Send error in FindClose = %d", rc));
3841 } 3756
3842 cifs_stats_inc(&tcon->num_fclose); 3757 cifs_stats_inc(&tcon->num_fclose);
3843 3758
3844 /* Since session is dead, search handle closed on server already */ 3759 /* Since session is dead, search handle closed on server already */
@@ -3851,7 +3766,7 @@ CIFSFindClose(const int xid, struct cifsTconInfo *tcon,
3851int 3766int
3852CIFSGetSrvInodeNumber(const int xid, struct cifsTconInfo *tcon, 3767CIFSGetSrvInodeNumber(const int xid, struct cifsTconInfo *tcon,
3853 const unsigned char *searchName, 3768 const unsigned char *searchName,
3854 __u64 * inode_number, 3769 __u64 *inode_number,
3855 const struct nls_table *nls_codepage, int remap) 3770 const struct nls_table *nls_codepage, int remap)
3856{ 3771{
3857 int rc = 0; 3772 int rc = 0;
@@ -4560,9 +4475,8 @@ SETFSUnixRetry:
4560 cERROR(1, ("Send error in SETFSUnixInfo = %d", rc)); 4475 cERROR(1, ("Send error in SETFSUnixInfo = %d", rc));
4561 } else { /* decode response */ 4476 } else { /* decode response */
4562 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4477 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4563 if (rc) { 4478 if (rc)
4564 rc = -EIO; /* bad smb */ 4479 rc = -EIO; /* bad smb */
4565 }
4566 } 4480 }
4567 cifs_buf_release(pSMB); 4481 cifs_buf_release(pSMB);
4568 4482
@@ -4744,9 +4658,8 @@ SetEOFRetry:
4744 pSMB->ByteCount = cpu_to_le16(byte_count); 4658 pSMB->ByteCount = cpu_to_le16(byte_count);
4745 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4659 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
4746 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 4660 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
4747 if (rc) { 4661 if (rc)
4748 cFYI(1, ("SetPathInfo (file size) returned %d", rc)); 4662 cFYI(1, ("SetPathInfo (file size) returned %d", rc));
4749 }
4750 4663
4751 cifs_buf_release(pSMB); 4664 cifs_buf_release(pSMB);
4752 4665
@@ -4897,9 +4810,8 @@ CIFSSMBSetFileTimes(const int xid, struct cifsTconInfo *tcon,
4897 pSMB->ByteCount = cpu_to_le16(byte_count); 4810 pSMB->ByteCount = cpu_to_le16(byte_count);
4898 memcpy(data_offset, data, sizeof(FILE_BASIC_INFO)); 4811 memcpy(data_offset, data, sizeof(FILE_BASIC_INFO));
4899 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0); 4812 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0);
4900 if (rc) { 4813 if (rc)
4901 cFYI(1, ("Send error in Set Time (SetFileInfo) = %d", rc)); 4814 cFYI(1, ("Send error in Set Time (SetFileInfo) = %d", rc));
4902 }
4903 4815
4904 /* Note: On -EAGAIN error only caller can retry on handle based calls 4816 /* Note: On -EAGAIN error only caller can retry on handle based calls
4905 since file handle passed in no longer valid */ 4817 since file handle passed in no longer valid */
@@ -4975,9 +4887,8 @@ SetTimesRetry:
4975 pSMB->ByteCount = cpu_to_le16(byte_count); 4887 pSMB->ByteCount = cpu_to_le16(byte_count);
4976 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4888 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
4977 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 4889 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
4978 if (rc) { 4890 if (rc)
4979 cFYI(1, ("SetPathInfo (times) returned %d", rc)); 4891 cFYI(1, ("SetPathInfo (times) returned %d", rc));
4980 }
4981 4892
4982 cifs_buf_release(pSMB); 4893 cifs_buf_release(pSMB);
4983 4894
@@ -5027,9 +4938,8 @@ SetAttrLgcyRetry:
5027 pSMB->ByteCount = cpu_to_le16(name_len + 1); 4938 pSMB->ByteCount = cpu_to_le16(name_len + 1);
5028 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4939 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
5029 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 4940 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
5030 if (rc) { 4941 if (rc)
5031 cFYI(1, ("Error in LegacySetAttr = %d", rc)); 4942 cFYI(1, ("Error in LegacySetAttr = %d", rc));
5032 }
5033 4943
5034 cifs_buf_release(pSMB); 4944 cifs_buf_release(pSMB);
5035 4945
@@ -5138,9 +5048,8 @@ setPermsRetry:
5138 pSMB->ByteCount = cpu_to_le16(byte_count); 5048 pSMB->ByteCount = cpu_to_le16(byte_count);
5139 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 5049 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
5140 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 5050 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
5141 if (rc) { 5051 if (rc)
5142 cFYI(1, ("SetPathInfo (perms) returned %d", rc)); 5052 cFYI(1, ("SetPathInfo (perms) returned %d", rc));
5143 }
5144 5053
5145 if (pSMB) 5054 if (pSMB)
5146 cifs_buf_release(pSMB); 5055 cifs_buf_release(pSMB);
@@ -5615,9 +5524,8 @@ SetEARetry:
5615 pSMB->ByteCount = cpu_to_le16(byte_count); 5524 pSMB->ByteCount = cpu_to_le16(byte_count);
5616 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 5525 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
5617 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 5526 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
5618 if (rc) { 5527 if (rc)
5619 cFYI(1, ("SetPathInfo (EA) returned %d", rc)); 5528 cFYI(1, ("SetPathInfo (EA) returned %d", rc));
5620 }
5621 5529
5622 cifs_buf_release(pSMB); 5530 cifs_buf_release(pSMB);
5623 5531
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 65d0ba72e78f..8dbfa97cd18c 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1722,8 +1722,15 @@ void reset_cifs_unix_caps(int xid, struct cifsTconInfo *tcon,
1722 originally at mount time */ 1722 originally at mount time */
1723 if ((saved_cap & CIFS_UNIX_POSIX_ACL_CAP) == 0) 1723 if ((saved_cap & CIFS_UNIX_POSIX_ACL_CAP) == 0)
1724 cap &= ~CIFS_UNIX_POSIX_ACL_CAP; 1724 cap &= ~CIFS_UNIX_POSIX_ACL_CAP;
1725 if ((saved_cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) == 0) 1725 if ((saved_cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) == 0) {
1726 if (cap & CIFS_UNIX_POSIX_PATHNAMES_CAP)
1727 cERROR(1, ("POSIXPATH support change"));
1726 cap &= ~CIFS_UNIX_POSIX_PATHNAMES_CAP; 1728 cap &= ~CIFS_UNIX_POSIX_PATHNAMES_CAP;
1729 } else if ((cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) == 0) {
1730 cERROR(1, ("possible reconnect error"));
1731 cERROR(1,
1732 ("server disabled POSIX path support"));
1733 }
1727 } 1734 }
1728 1735
1729 cap &= CIFS_UNIX_CAP_MASK; 1736 cap &= CIFS_UNIX_CAP_MASK;
@@ -1753,9 +1760,8 @@ void reset_cifs_unix_caps(int xid, struct cifsTconInfo *tcon,
1753 if (sb && (CIFS_SB(sb)->rsize > 127 * 1024)) { 1760 if (sb && (CIFS_SB(sb)->rsize > 127 * 1024)) {
1754 if ((cap & CIFS_UNIX_LARGE_READ_CAP) == 0) { 1761 if ((cap & CIFS_UNIX_LARGE_READ_CAP) == 0) {
1755 CIFS_SB(sb)->rsize = 127 * 1024; 1762 CIFS_SB(sb)->rsize = 127 * 1024;
1756#ifdef CONFIG_CIFS_DEBUG2 1763 cFYI(DBG2,
1757 cFYI(1, ("larger reads not supported by srv")); 1764 ("larger reads not supported by srv"));
1758#endif
1759 } 1765 }
1760 } 1766 }
1761 1767
@@ -1792,6 +1798,26 @@ void reset_cifs_unix_caps(int xid, struct cifsTconInfo *tcon,
1792 } 1798 }
1793} 1799}
1794 1800
1801static void
1802convert_delimiter(char *path, char delim)
1803{
1804 int i;
1805 char old_delim;
1806
1807 if (path == NULL)
1808 return;
1809
1810 if (delim == '/')
1811 old_delim = '\\';
1812 else
1813 old_delim = '/';
1814
1815 for (i = 0; path[i] != '\0'; i++) {
1816 if (path[i] == old_delim)
1817 path[i] = delim;
1818 }
1819}
1820
1795int 1821int
1796cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, 1822cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
1797 char *mount_data, const char *devname) 1823 char *mount_data, const char *devname)
@@ -2057,7 +2083,11 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2057 cifs_sb->prepath = volume_info.prepath; 2083 cifs_sb->prepath = volume_info.prepath;
2058 if (cifs_sb->prepath) { 2084 if (cifs_sb->prepath) {
2059 cifs_sb->prepathlen = strlen(cifs_sb->prepath); 2085 cifs_sb->prepathlen = strlen(cifs_sb->prepath);
2060 cifs_sb->prepath[0] = CIFS_DIR_SEP(cifs_sb); 2086 /* we can not convert the / to \ in the path
2087 separators in the prefixpath yet because we do not
2088 know (until reset_cifs_unix_caps is called later)
2089 whether POSIX PATH CAP is available. We normalize
2090 the / to \ after reset_cifs_unix_caps is called */
2061 volume_info.prepath = NULL; 2091 volume_info.prepath = NULL;
2062 } else 2092 } else
2063 cifs_sb->prepathlen = 0; 2093 cifs_sb->prepathlen = 0;
@@ -2225,11 +2255,15 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2225 else 2255 else
2226 tcon->unix_ext = 0; /* server does not support them */ 2256 tcon->unix_ext = 0; /* server does not support them */
2227 2257
2258 /* convert forward to back slashes in prepath here if needed */
2259 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) == 0)
2260 convert_delimiter(cifs_sb->prepath,
2261 CIFS_DIR_SEP(cifs_sb));
2262
2228 if ((tcon->unix_ext == 0) && (cifs_sb->rsize > (1024 * 127))) { 2263 if ((tcon->unix_ext == 0) && (cifs_sb->rsize > (1024 * 127))) {
2229 cifs_sb->rsize = 1024 * 127; 2264 cifs_sb->rsize = 1024 * 127;
2230#ifdef CONFIG_CIFS_DEBUG2 2265 cFYI(DBG2,
2231 cFYI(1, ("no very large read support, rsize now 127K")); 2266 ("no very large read support, rsize now 127K"));
2232#endif
2233 } 2267 }
2234 if (!(tcon->ses->capabilities & CAP_LARGE_WRITE_X)) 2268 if (!(tcon->ses->capabilities & CAP_LARGE_WRITE_X))
2235 cifs_sb->wsize = min(cifs_sb->wsize, 2269 cifs_sb->wsize = min(cifs_sb->wsize,
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 699ec1198409..0f5c62ba4038 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -3,7 +3,7 @@
3 * 3 *
4 * vfs operations that deal with dentries 4 * vfs operations that deal with dentries
5 * 5 *
6 * Copyright (C) International Business Machines Corp., 2002,2007 6 * Copyright (C) International Business Machines Corp., 2002,2008
7 * Author(s): Steve French (sfrench@us.ibm.com) 7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * 8 *
9 * This library is free software; you can redistribute it and/or modify 9 * This library is free software; you can redistribute it and/or modify
@@ -111,16 +111,6 @@ cifs_bp_rename_retry:
111 return full_path; 111 return full_path;
112} 112}
113 113
114/* char * build_wildcard_path_from_dentry(struct dentry *direntry)
115{
116 if(full_path == NULL)
117 return full_path;
118
119 full_path[namelen] = '\\';
120 full_path[namelen+1] = '*';
121 full_path[namelen+2] = 0;
122BB remove above eight lines BB */
123
124/* Inode operations in similar order to how they appear in Linux file fs.h */ 114/* Inode operations in similar order to how they appear in Linux file fs.h */
125 115
126int 116int
@@ -171,9 +161,8 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
171 disposition = FILE_OVERWRITE_IF; 161 disposition = FILE_OVERWRITE_IF;
172 else if ((oflags & O_CREAT) == O_CREAT) 162 else if ((oflags & O_CREAT) == O_CREAT)
173 disposition = FILE_OPEN_IF; 163 disposition = FILE_OPEN_IF;
174 else { 164 else
175 cFYI(1, ("Create flag not set in create function")); 165 cFYI(1, ("Create flag not set in create function"));
176 }
177 } 166 }
178 167
179 /* BB add processing to set equivalent of mode - e.g. via CreateX with 168 /* BB add processing to set equivalent of mode - e.g. via CreateX with
@@ -240,7 +229,8 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
240 inode->i_sb, xid); 229 inode->i_sb, xid);
241 else { 230 else {
242 rc = cifs_get_inode_info(&newinode, full_path, 231 rc = cifs_get_inode_info(&newinode, full_path,
243 buf, inode->i_sb, xid); 232 buf, inode->i_sb, xid,
233 &fileHandle);
244 if (newinode) { 234 if (newinode) {
245 newinode->i_mode = mode; 235 newinode->i_mode = mode;
246 if ((oplock & CIFS_CREATE_ACTION) && 236 if ((oplock & CIFS_CREATE_ACTION) &&
@@ -367,7 +357,7 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
367 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) { 357 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) {
368 int oplock = 0; 358 int oplock = 0;
369 u16 fileHandle; 359 u16 fileHandle;
370 FILE_ALL_INFO * buf; 360 FILE_ALL_INFO *buf;
371 361
372 cFYI(1, ("sfu compat create special file")); 362 cFYI(1, ("sfu compat create special file"));
373 363
@@ -494,7 +484,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
494 parent_dir_inode->i_sb, xid); 484 parent_dir_inode->i_sb, xid);
495 else 485 else
496 rc = cifs_get_inode_info(&newInode, full_path, NULL, 486 rc = cifs_get_inode_info(&newInode, full_path, NULL,
497 parent_dir_inode->i_sb, xid); 487 parent_dir_inode->i_sb, xid, NULL);
498 488
499 if ((rc == 0) && (newInode != NULL)) { 489 if ((rc == 0) && (newInode != NULL)) {
500 if (pTcon->nocase) 490 if (pTcon->nocase)
@@ -534,9 +524,8 @@ cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd)
534 int isValid = 1; 524 int isValid = 1;
535 525
536 if (direntry->d_inode) { 526 if (direntry->d_inode) {
537 if (cifs_revalidate(direntry)) { 527 if (cifs_revalidate(direntry))
538 return 0; 528 return 0;
539 }
540 } else { 529 } else {
541 cFYI(1, ("neg dentry 0x%p name = %s", 530 cFYI(1, ("neg dentry 0x%p name = %s",
542 direntry, direntry->d_name.name)); 531 direntry, direntry->d_name.name));
diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c
index ef7f43824347..7cc86c418182 100644
--- a/fs/cifs/dns_resolve.c
+++ b/fs/cifs/dns_resolve.c
@@ -77,14 +77,14 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr)
77 /* search for server name delimiter */ 77 /* search for server name delimiter */
78 len = strlen(unc); 78 len = strlen(unc);
79 if (len < 3) { 79 if (len < 3) {
80 cFYI(1, ("%s: unc is too short: %s", __FUNCTION__, unc)); 80 cFYI(1, ("%s: unc is too short: %s", __func__, unc));
81 return -EINVAL; 81 return -EINVAL;
82 } 82 }
83 len -= 2; 83 len -= 2;
84 name = memchr(unc+2, '\\', len); 84 name = memchr(unc+2, '\\', len);
85 if (!name) { 85 if (!name) {
86 cFYI(1, ("%s: probably server name is whole unc: %s", 86 cFYI(1, ("%s: probably server name is whole unc: %s",
87 __FUNCTION__, unc)); 87 __func__, unc));
88 } else { 88 } else {
89 len = (name - unc) - 2/* leading // */; 89 len = (name - unc) - 2/* leading // */;
90 } 90 }
@@ -104,7 +104,7 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr)
104 if (*ip_addr) { 104 if (*ip_addr) {
105 memcpy(*ip_addr, rkey->payload.data, len); 105 memcpy(*ip_addr, rkey->payload.data, len);
106 (*ip_addr)[len] = '\0'; 106 (*ip_addr)[len] = '\0';
107 cFYI(1, ("%s: resolved: %s to %s", __FUNCTION__, 107 cFYI(1, ("%s: resolved: %s to %s", __func__,
108 rkey->description, 108 rkey->description,
109 *ip_addr 109 *ip_addr
110 )); 110 ));
@@ -114,7 +114,7 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr)
114 } 114 }
115 key_put(rkey); 115 key_put(rkey);
116 } else { 116 } else {
117 cERROR(1, ("%s: unable to resolve: %s", __FUNCTION__, name)); 117 cERROR(1, ("%s: unable to resolve: %s", __func__, name));
118 } 118 }
119 119
120 kfree(name); 120 kfree(name);
diff --git a/fs/cifs/dns_resolve.h b/fs/cifs/dns_resolve.h
index 073fdc3db419..966e9288930b 100644
--- a/fs/cifs/dns_resolve.h
+++ b/fs/cifs/dns_resolve.h
@@ -1,7 +1,7 @@
1/* 1/*
2 * fs/cifs/dns_resolve.h -- DNS Resolver upcall management for CIFS DFS 2 * fs/cifs/dns_resolve.h -- DNS Resolver upcall management for CIFS DFS
3 * Handles host name to IP address resolution 3 * Handles host name to IP address resolution
4 * 4 *
5 * Copyright (c) International Business Machines Corp., 2008 5 * Copyright (c) International Business Machines Corp., 2008
6 * Author(s): Steve French (sfrench@us.ibm.com) 6 * Author(s): Steve French (sfrench@us.ibm.com)
7 * 7 *
diff --git a/fs/cifs/fcntl.c b/fs/cifs/fcntl.c
index 995474c90885..7d1d5aa4c430 100644
--- a/fs/cifs/fcntl.c
+++ b/fs/cifs/fcntl.c
@@ -35,9 +35,8 @@ static __u32 convert_to_cifs_notify_flags(unsigned long fcntl_notify_flags)
35 35
36 /* No way on Linux VFS to ask to monitor xattr 36 /* No way on Linux VFS to ask to monitor xattr
37 changes (and no stream support either */ 37 changes (and no stream support either */
38 if (fcntl_notify_flags & DN_ACCESS) { 38 if (fcntl_notify_flags & DN_ACCESS)
39 cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_LAST_ACCESS; 39 cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_LAST_ACCESS;
40 }
41 if (fcntl_notify_flags & DN_MODIFY) { 40 if (fcntl_notify_flags & DN_MODIFY) {
42 /* What does this mean on directories? */ 41 /* What does this mean on directories? */
43 cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_LAST_WRITE | 42 cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_LAST_WRITE |
@@ -47,9 +46,8 @@ static __u32 convert_to_cifs_notify_flags(unsigned long fcntl_notify_flags)
47 cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_CREATION | 46 cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_CREATION |
48 FILE_NOTIFY_CHANGE_LAST_WRITE; 47 FILE_NOTIFY_CHANGE_LAST_WRITE;
49 } 48 }
50 if (fcntl_notify_flags & DN_DELETE) { 49 if (fcntl_notify_flags & DN_DELETE)
51 cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_LAST_WRITE; 50 cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_LAST_WRITE;
52 }
53 if (fcntl_notify_flags & DN_RENAME) { 51 if (fcntl_notify_flags & DN_RENAME) {
54 /* BB review this - checking various server behaviors */ 52 /* BB review this - checking various server behaviors */
55 cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_DIR_NAME | 53 cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_DIR_NAME |
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 5f7c374ae89c..40b690073fc1 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -145,7 +145,7 @@ client_can_cache:
145 full_path, inode->i_sb, xid); 145 full_path, inode->i_sb, xid);
146 else 146 else
147 rc = cifs_get_inode_info(&file->f_path.dentry->d_inode, 147 rc = cifs_get_inode_info(&file->f_path.dentry->d_inode,
148 full_path, buf, inode->i_sb, xid); 148 full_path, buf, inode->i_sb, xid, NULL);
149 149
150 if ((*oplock & 0xF) == OPLOCK_EXCLUSIVE) { 150 if ((*oplock & 0xF) == OPLOCK_EXCLUSIVE) {
151 pCifsInode->clientCanCacheAll = TRUE; 151 pCifsInode->clientCanCacheAll = TRUE;
@@ -353,9 +353,9 @@ static int cifs_reopen_file(struct file *file, int can_flush)
353 int disposition = FILE_OPEN; 353 int disposition = FILE_OPEN;
354 __u16 netfid; 354 __u16 netfid;
355 355
356 if (file->private_data) { 356 if (file->private_data)
357 pCifsFile = (struct cifsFileInfo *)file->private_data; 357 pCifsFile = (struct cifsFileInfo *)file->private_data;
358 } else 358 else
359 return -EBADF; 359 return -EBADF;
360 360
361 xid = GetXid(); 361 xid = GetXid();
@@ -440,7 +440,7 @@ reopen_error_exit:
440 else 440 else
441 rc = cifs_get_inode_info(&inode, 441 rc = cifs_get_inode_info(&inode,
442 full_path, NULL, inode->i_sb, 442 full_path, NULL, inode->i_sb,
443 xid); 443 xid, NULL);
444 } /* else we are writing out data to server already 444 } /* else we are writing out data to server already
445 and could deadlock if we tried to flush data, and 445 and could deadlock if we tried to flush data, and
446 since we do not know if we have data that would 446 since we do not know if we have data that would
@@ -499,9 +499,8 @@ int cifs_close(struct inode *inode, struct file *file)
499 the struct would be in each open file, 499 the struct would be in each open file,
500 but this should give enough time to 500 but this should give enough time to
501 clear the socket */ 501 clear the socket */
502#ifdef CONFIG_CIFS_DEBUG2 502 cFYI(DBG2,
503 cFYI(1, ("close delay, write pending")); 503 ("close delay, write pending"));
504#endif /* DEBUG2 */
505 msleep(timeout); 504 msleep(timeout);
506 timeout *= 4; 505 timeout *= 4;
507 } 506 }
@@ -1423,9 +1422,8 @@ static int cifs_writepage(struct page *page, struct writeback_control *wbc)
1423 xid = GetXid(); 1422 xid = GetXid();
1424/* BB add check for wbc flags */ 1423/* BB add check for wbc flags */
1425 page_cache_get(page); 1424 page_cache_get(page);
1426 if (!PageUptodate(page)) { 1425 if (!PageUptodate(page))
1427 cFYI(1, ("ppw - page not up to date")); 1426 cFYI(1, ("ppw - page not up to date"));
1428 }
1429 1427
1430 /* 1428 /*
1431 * Set the "writeback" flag, and clear "dirty" in the radix tree. 1429 * Set the "writeback" flag, and clear "dirty" in the radix tree.
@@ -1460,9 +1458,9 @@ static int cifs_commit_write(struct file *file, struct page *page,
1460 cFYI(1, ("commit write for page %p up to position %lld for %d", 1458 cFYI(1, ("commit write for page %p up to position %lld for %d",
1461 page, position, to)); 1459 page, position, to));
1462 spin_lock(&inode->i_lock); 1460 spin_lock(&inode->i_lock);
1463 if (position > inode->i_size) { 1461 if (position > inode->i_size)
1464 i_size_write(inode, position); 1462 i_size_write(inode, position);
1465 } 1463
1466 spin_unlock(&inode->i_lock); 1464 spin_unlock(&inode->i_lock);
1467 if (!PageUptodate(page)) { 1465 if (!PageUptodate(page)) {
1468 position = ((loff_t)page->index << PAGE_CACHE_SHIFT) + offset; 1466 position = ((loff_t)page->index << PAGE_CACHE_SHIFT) + offset;
@@ -1596,9 +1594,9 @@ ssize_t cifs_user_read(struct file *file, char __user *read_data,
1596 } 1594 }
1597 open_file = (struct cifsFileInfo *)file->private_data; 1595 open_file = (struct cifsFileInfo *)file->private_data;
1598 1596
1599 if ((file->f_flags & O_ACCMODE) == O_WRONLY) { 1597 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
1600 cFYI(1, ("attempting read on write only file instance")); 1598 cFYI(1, ("attempting read on write only file instance"));
1601 } 1599
1602 for (total_read = 0, current_offset = read_data; 1600 for (total_read = 0, current_offset = read_data;
1603 read_size > total_read; 1601 read_size > total_read;
1604 total_read += bytes_read, current_offset += bytes_read) { 1602 total_read += bytes_read, current_offset += bytes_read) {
@@ -1625,9 +1623,8 @@ ssize_t cifs_user_read(struct file *file, char __user *read_data,
1625 smb_read_data + 1623 smb_read_data +
1626 4 /* RFC1001 length field */ + 1624 4 /* RFC1001 length field */ +
1627 le16_to_cpu(pSMBr->DataOffset), 1625 le16_to_cpu(pSMBr->DataOffset),
1628 bytes_read)) { 1626 bytes_read))
1629 rc = -EFAULT; 1627 rc = -EFAULT;
1630 }
1631 1628
1632 if (buf_type == CIFS_SMALL_BUFFER) 1629 if (buf_type == CIFS_SMALL_BUFFER)
1633 cifs_small_buf_release(smb_read_data); 1630 cifs_small_buf_release(smb_read_data);
@@ -1814,9 +1811,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
1814 pTcon = cifs_sb->tcon; 1811 pTcon = cifs_sb->tcon;
1815 1812
1816 pagevec_init(&lru_pvec, 0); 1813 pagevec_init(&lru_pvec, 0);
1817#ifdef CONFIG_CIFS_DEBUG2 1814 cFYI(DBG2, ("rpages: num pages %d", num_pages));
1818 cFYI(1, ("rpages: num pages %d", num_pages));
1819#endif
1820 for (i = 0; i < num_pages; ) { 1815 for (i = 0; i < num_pages; ) {
1821 unsigned contig_pages; 1816 unsigned contig_pages;
1822 struct page *tmp_page; 1817 struct page *tmp_page;
@@ -1849,10 +1844,8 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
1849 /* Read size needs to be in multiples of one page */ 1844 /* Read size needs to be in multiples of one page */
1850 read_size = min_t(const unsigned int, read_size, 1845 read_size = min_t(const unsigned int, read_size,
1851 cifs_sb->rsize & PAGE_CACHE_MASK); 1846 cifs_sb->rsize & PAGE_CACHE_MASK);
1852#ifdef CONFIG_CIFS_DEBUG2 1847 cFYI(DBG2, ("rpages: read size 0x%x contiguous pages %d",
1853 cFYI(1, ("rpages: read size 0x%x contiguous pages %d",
1854 read_size, contig_pages)); 1848 read_size, contig_pages));
1855#endif
1856 rc = -EAGAIN; 1849 rc = -EAGAIN;
1857 while (rc == -EAGAIN) { 1850 while (rc == -EAGAIN) {
1858 if ((open_file->invalidHandle) && 1851 if ((open_file->invalidHandle) &&
@@ -2026,7 +2019,7 @@ int is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
2026 struct cifs_sb_info *cifs_sb; 2019 struct cifs_sb_info *cifs_sb;
2027 2020
2028 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb); 2021 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
2029 if ( cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO ) { 2022 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
2030 /* since no page cache to corrupt on directio 2023 /* since no page cache to corrupt on directio
2031 we can change size safely */ 2024 we can change size safely */
2032 return 1; 2025 return 1;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index b1a4a65eaa08..bc673c8c1e6b 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -29,6 +29,162 @@
29#include "cifs_debug.h" 29#include "cifs_debug.h"
30#include "cifs_fs_sb.h" 30#include "cifs_fs_sb.h"
31 31
32
33static void cifs_set_ops(struct inode *inode, const bool is_dfs_referral)
34{
35 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
36
37 switch (inode->i_mode & S_IFMT) {
38 case S_IFREG:
39 inode->i_op = &cifs_file_inode_ops;
40 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
41 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
42 inode->i_fop = &cifs_file_direct_nobrl_ops;
43 else
44 inode->i_fop = &cifs_file_direct_ops;
45 } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
46 inode->i_fop = &cifs_file_nobrl_ops;
47 else { /* not direct, send byte range locks */
48 inode->i_fop = &cifs_file_ops;
49 }
50
51
52 /* check if server can support readpages */
53 if (cifs_sb->tcon->ses->server->maxBuf <
54 PAGE_CACHE_SIZE + MAX_CIFS_HDR_SIZE)
55 inode->i_data.a_ops = &cifs_addr_ops_smallbuf;
56 else
57 inode->i_data.a_ops = &cifs_addr_ops;
58 break;
59 case S_IFDIR:
60#ifdef CONFIG_CIFS_DFS_UPCALL
61 if (is_dfs_referral) {
62 inode->i_op = &cifs_dfs_referral_inode_operations;
63 } else {
64#else /* NO DFS support, treat as a directory */
65 {
66#endif
67 inode->i_op = &cifs_dir_inode_ops;
68 inode->i_fop = &cifs_dir_ops;
69 }
70 break;
71 case S_IFLNK:
72 inode->i_op = &cifs_symlink_inode_ops;
73 break;
74 default:
75 init_special_inode(inode, inode->i_mode, inode->i_rdev);
76 break;
77 }
78}
79
80static void cifs_unix_info_to_inode(struct inode *inode,
81 FILE_UNIX_BASIC_INFO *info, int force_uid_gid)
82{
83 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
84 struct cifsInodeInfo *cifsInfo = CIFS_I(inode);
85 __u64 num_of_bytes = le64_to_cpu(info->NumOfBytes);
86 __u64 end_of_file = le64_to_cpu(info->EndOfFile);
87
88 inode->i_atime = cifs_NTtimeToUnix(le64_to_cpu(info->LastAccessTime));
89 inode->i_mtime =
90 cifs_NTtimeToUnix(le64_to_cpu(info->LastModificationTime));
91 inode->i_ctime = cifs_NTtimeToUnix(le64_to_cpu(info->LastStatusChange));
92 inode->i_mode = le64_to_cpu(info->Permissions);
93
94 /*
95 * Since we set the inode type below we need to mask off
96 * to avoid strange results if bits set above.
97 */
98 inode->i_mode &= ~S_IFMT;
99 switch (le32_to_cpu(info->Type)) {
100 case UNIX_FILE:
101 inode->i_mode |= S_IFREG;
102 break;
103 case UNIX_SYMLINK:
104 inode->i_mode |= S_IFLNK;
105 break;
106 case UNIX_DIR:
107 inode->i_mode |= S_IFDIR;
108 break;
109 case UNIX_CHARDEV:
110 inode->i_mode |= S_IFCHR;
111 inode->i_rdev = MKDEV(le64_to_cpu(info->DevMajor),
112 le64_to_cpu(info->DevMinor) & MINORMASK);
113 break;
114 case UNIX_BLOCKDEV:
115 inode->i_mode |= S_IFBLK;
116 inode->i_rdev = MKDEV(le64_to_cpu(info->DevMajor),
117 le64_to_cpu(info->DevMinor) & MINORMASK);
118 break;
119 case UNIX_FIFO:
120 inode->i_mode |= S_IFIFO;
121 break;
122 case UNIX_SOCKET:
123 inode->i_mode |= S_IFSOCK;
124 break;
125 default:
126 /* safest to call it a file if we do not know */
127 inode->i_mode |= S_IFREG;
128 cFYI(1, ("unknown type %d", le32_to_cpu(info->Type)));
129 break;
130 }
131
132 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID) &&
133 !force_uid_gid)
134 inode->i_uid = cifs_sb->mnt_uid;
135 else
136 inode->i_uid = le64_to_cpu(info->Uid);
137
138 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID) &&
139 !force_uid_gid)
140 inode->i_gid = cifs_sb->mnt_gid;
141 else
142 inode->i_gid = le64_to_cpu(info->Gid);
143
144 inode->i_nlink = le64_to_cpu(info->Nlinks);
145
146 spin_lock(&inode->i_lock);
147 if (is_size_safe_to_change(cifsInfo, end_of_file)) {
148 /*
149 * We can not safely change the file size here if the client
150 * is writing to it due to potential races.
151 */
152 i_size_write(inode, end_of_file);
153
154 /*
155 * i_blocks is not related to (i_size / i_blksize),
156 * but instead 512 byte (2**9) size is required for
157 * calculating num blocks.
158 */
159 inode->i_blocks = (512 - 1 + num_of_bytes) >> 9;
160 }
161 spin_unlock(&inode->i_lock);
162}
163
164static const unsigned char *cifs_get_search_path(struct cifsTconInfo *pTcon,
165 const char *search_path)
166{
167 int tree_len;
168 int path_len;
169 char *tmp_path;
170
171 if (!(pTcon->Flags & SMB_SHARE_IS_IN_DFS))
172 return search_path;
173
174 /* use full path name for working with DFS */
175 tree_len = strnlen(pTcon->treeName, MAX_TREE_SIZE + 1);
176 path_len = strnlen(search_path, MAX_PATHCONF);
177
178 tmp_path = kmalloc(tree_len+path_len+1, GFP_KERNEL);
179 if (tmp_path == NULL)
180 return search_path;
181
182 strncpy(tmp_path, pTcon->treeName, tree_len);
183 strncpy(tmp_path+tree_len, search_path, path_len);
184 tmp_path[tree_len+path_len] = 0;
185 return tmp_path;
186}
187
32int cifs_get_inode_info_unix(struct inode **pinode, 188int cifs_get_inode_info_unix(struct inode **pinode,
33 const unsigned char *search_path, struct super_block *sb, int xid) 189 const unsigned char *search_path, struct super_block *sb, int xid)
34{ 190{
@@ -37,52 +193,43 @@ int cifs_get_inode_info_unix(struct inode **pinode,
37 struct cifsTconInfo *pTcon; 193 struct cifsTconInfo *pTcon;
38 struct inode *inode; 194 struct inode *inode;
39 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 195 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
40 char *tmp_path; 196 const unsigned char *full_path;
197 bool is_dfs_referral = false;
41 198
42 pTcon = cifs_sb->tcon; 199 pTcon = cifs_sb->tcon;
43 cFYI(1, ("Getting info on %s", search_path)); 200 cFYI(1, ("Getting info on %s", search_path));
201
202 full_path = cifs_get_search_path(pTcon, search_path);
203
204try_again_CIFSSMBUnixQPathInfo:
44 /* could have done a find first instead but this returns more info */ 205 /* could have done a find first instead but this returns more info */
45 rc = CIFSSMBUnixQPathInfo(xid, pTcon, search_path, &findData, 206 rc = CIFSSMBUnixQPathInfo(xid, pTcon, full_path, &findData,
46 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & 207 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
47 CIFS_MOUNT_MAP_SPECIAL_CHR); 208 CIFS_MOUNT_MAP_SPECIAL_CHR);
48/* dump_mem("\nUnixQPathInfo return data", &findData, 209/* dump_mem("\nUnixQPathInfo return data", &findData,
49 sizeof(findData)); */ 210 sizeof(findData)); */
50 if (rc) { 211 if (rc) {
51 if (rc == -EREMOTE) { 212 if (rc == -EREMOTE && !is_dfs_referral) {
52 tmp_path = 213 is_dfs_referral = true;
53 kmalloc(strnlen(pTcon->treeName, 214 if (full_path != search_path) {
54 MAX_TREE_SIZE + 1) + 215 kfree(full_path);
55 strnlen(search_path, MAX_PATHCONF) + 1, 216 full_path = search_path;
56 GFP_KERNEL); 217 }
57 if (tmp_path == NULL) 218 goto try_again_CIFSSMBUnixQPathInfo;
58 return -ENOMEM;
59
60 /* have to skip first of the double backslash of
61 UNC name */
62 strncpy(tmp_path, pTcon->treeName, MAX_TREE_SIZE);
63 strncat(tmp_path, search_path, MAX_PATHCONF);
64 rc = connect_to_dfs_path(xid, pTcon->ses,
65 /* treename + */ tmp_path,
66 cifs_sb->local_nls,
67 cifs_sb->mnt_cifs_flags &
68 CIFS_MOUNT_MAP_SPECIAL_CHR);
69 kfree(tmp_path);
70
71 /* BB fix up inode etc. */
72 } else if (rc) {
73 return rc;
74 } 219 }
220 goto cgiiu_exit;
75 } else { 221 } else {
76 struct cifsInodeInfo *cifsInfo; 222 struct cifsInodeInfo *cifsInfo;
77 __u32 type = le32_to_cpu(findData.Type);
78 __u64 num_of_bytes = le64_to_cpu(findData.NumOfBytes); 223 __u64 num_of_bytes = le64_to_cpu(findData.NumOfBytes);
79 __u64 end_of_file = le64_to_cpu(findData.EndOfFile); 224 __u64 end_of_file = le64_to_cpu(findData.EndOfFile);
80 225
81 /* get new inode */ 226 /* get new inode */
82 if (*pinode == NULL) { 227 if (*pinode == NULL) {
83 *pinode = new_inode(sb); 228 *pinode = new_inode(sb);
84 if (*pinode == NULL) 229 if (*pinode == NULL) {
85 return -ENOMEM; 230 rc = -ENOMEM;
231 goto cgiiu_exit;
232 }
86 /* Is an i_ino of zero legal? */ 233 /* Is an i_ino of zero legal? */
87 /* Are there sanity checks we can use to ensure that 234 /* Are there sanity checks we can use to ensure that
88 the server is really filling in that field? */ 235 the server is really filling in that field? */
@@ -105,113 +252,20 @@ int cifs_get_inode_info_unix(struct inode **pinode,
105 /* this is ok to set on every inode revalidate */ 252 /* this is ok to set on every inode revalidate */
106 atomic_set(&cifsInfo->inUse, 1); 253 atomic_set(&cifsInfo->inUse, 1);
107 254
108 inode->i_atime = 255 cifs_unix_info_to_inode(inode, &findData, 0);
109 cifs_NTtimeToUnix(le64_to_cpu(findData.LastAccessTime));
110 inode->i_mtime =
111 cifs_NTtimeToUnix(le64_to_cpu
112 (findData.LastModificationTime));
113 inode->i_ctime =
114 cifs_NTtimeToUnix(le64_to_cpu(findData.LastStatusChange));
115 inode->i_mode = le64_to_cpu(findData.Permissions);
116 /* since we set the inode type below we need to mask off
117 to avoid strange results if bits set above */
118 inode->i_mode &= ~S_IFMT;
119 if (type == UNIX_FILE) {
120 inode->i_mode |= S_IFREG;
121 } else if (type == UNIX_SYMLINK) {
122 inode->i_mode |= S_IFLNK;
123 } else if (type == UNIX_DIR) {
124 inode->i_mode |= S_IFDIR;
125 } else if (type == UNIX_CHARDEV) {
126 inode->i_mode |= S_IFCHR;
127 inode->i_rdev = MKDEV(le64_to_cpu(findData.DevMajor),
128 le64_to_cpu(findData.DevMinor) & MINORMASK);
129 } else if (type == UNIX_BLOCKDEV) {
130 inode->i_mode |= S_IFBLK;
131 inode->i_rdev = MKDEV(le64_to_cpu(findData.DevMajor),
132 le64_to_cpu(findData.DevMinor) & MINORMASK);
133 } else if (type == UNIX_FIFO) {
134 inode->i_mode |= S_IFIFO;
135 } else if (type == UNIX_SOCKET) {
136 inode->i_mode |= S_IFSOCK;
137 } else {
138 /* safest to call it a file if we do not know */
139 inode->i_mode |= S_IFREG;
140 cFYI(1, ("unknown type %d", type));
141 }
142
143 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID)
144 inode->i_uid = cifs_sb->mnt_uid;
145 else
146 inode->i_uid = le64_to_cpu(findData.Uid);
147
148 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID)
149 inode->i_gid = cifs_sb->mnt_gid;
150 else
151 inode->i_gid = le64_to_cpu(findData.Gid);
152
153 inode->i_nlink = le64_to_cpu(findData.Nlinks);
154 256
155 spin_lock(&inode->i_lock);
156 if (is_size_safe_to_change(cifsInfo, end_of_file)) {
157 /* can not safely change the file size here if the
158 client is writing to it due to potential races */
159 i_size_write(inode, end_of_file);
160
161 /* blksize needs to be multiple of two. So safer to default to
162 blksize and blkbits set in superblock so 2**blkbits and blksize
163 will match rather than setting to:
164 (pTcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE) & 0xFFFFFE00;*/
165
166 /* This seems incredibly stupid but it turns out that i_blocks
167 is not related to (i_size / i_blksize), instead 512 byte size
168 is required for calculating num blocks */
169
170 /* 512 bytes (2**9) is the fake blocksize that must be used */
171 /* for this calculation */
172 inode->i_blocks = (512 - 1 + num_of_bytes) >> 9;
173 }
174 spin_unlock(&inode->i_lock);
175 257
176 if (num_of_bytes < end_of_file) 258 if (num_of_bytes < end_of_file)
177 cFYI(1, ("allocation size less than end of file")); 259 cFYI(1, ("allocation size less than end of file"));
178 cFYI(1, ("Size %ld and blocks %llu", 260 cFYI(1, ("Size %ld and blocks %llu",
179 (unsigned long) inode->i_size, 261 (unsigned long) inode->i_size,
180 (unsigned long long)inode->i_blocks)); 262 (unsigned long long)inode->i_blocks));
181 if (S_ISREG(inode->i_mode)) { 263
182 cFYI(1, ("File inode")); 264 cifs_set_ops(inode, is_dfs_referral);
183 inode->i_op = &cifs_file_inode_ops;
184 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
185 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
186 inode->i_fop =
187 &cifs_file_direct_nobrl_ops;
188 else
189 inode->i_fop = &cifs_file_direct_ops;
190 } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
191 inode->i_fop = &cifs_file_nobrl_ops;
192 else /* not direct, send byte range locks */
193 inode->i_fop = &cifs_file_ops;
194
195 /* check if server can support readpages */
196 if (pTcon->ses->server->maxBuf <
197 PAGE_CACHE_SIZE + MAX_CIFS_HDR_SIZE)
198 inode->i_data.a_ops = &cifs_addr_ops_smallbuf;
199 else
200 inode->i_data.a_ops = &cifs_addr_ops;
201 } else if (S_ISDIR(inode->i_mode)) {
202 cFYI(1, ("Directory inode"));
203 inode->i_op = &cifs_dir_inode_ops;
204 inode->i_fop = &cifs_dir_ops;
205 } else if (S_ISLNK(inode->i_mode)) {
206 cFYI(1, ("Symbolic Link inode"));
207 inode->i_op = &cifs_symlink_inode_ops;
208 /* tmp_inode->i_fop = */ /* do not need to set to anything */
209 } else {
210 cFYI(1, ("Init special inode"));
211 init_special_inode(inode, inode->i_mode,
212 inode->i_rdev);
213 }
214 } 265 }
266cgiiu_exit:
267 if (full_path != search_path)
268 kfree(full_path);
215 return rc; 269 return rc;
216} 270}
217 271
@@ -320,15 +374,16 @@ static int get_sfu_mode(struct inode *inode,
320 374
321int cifs_get_inode_info(struct inode **pinode, 375int cifs_get_inode_info(struct inode **pinode,
322 const unsigned char *search_path, FILE_ALL_INFO *pfindData, 376 const unsigned char *search_path, FILE_ALL_INFO *pfindData,
323 struct super_block *sb, int xid) 377 struct super_block *sb, int xid, const __u16 *pfid)
324{ 378{
325 int rc = 0; 379 int rc = 0;
326 struct cifsTconInfo *pTcon; 380 struct cifsTconInfo *pTcon;
327 struct inode *inode; 381 struct inode *inode;
328 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 382 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
329 char *tmp_path; 383 const unsigned char *full_path = NULL;
330 char *buf = NULL; 384 char *buf = NULL;
331 int adjustTZ = FALSE; 385 int adjustTZ = FALSE;
386 bool is_dfs_referral = false;
332 387
333 pTcon = cifs_sb->tcon; 388 pTcon = cifs_sb->tcon;
334 cFYI(1, ("Getting info on %s", search_path)); 389 cFYI(1, ("Getting info on %s", search_path));
@@ -346,8 +401,12 @@ int cifs_get_inode_info(struct inode **pinode,
346 if (buf == NULL) 401 if (buf == NULL)
347 return -ENOMEM; 402 return -ENOMEM;
348 pfindData = (FILE_ALL_INFO *)buf; 403 pfindData = (FILE_ALL_INFO *)buf;
404
405 full_path = cifs_get_search_path(pTcon, search_path);
406
407try_again_CIFSSMBQPathInfo:
349 /* could do find first instead but this returns more info */ 408 /* could do find first instead but this returns more info */
350 rc = CIFSSMBQPathInfo(xid, pTcon, search_path, pfindData, 409 rc = CIFSSMBQPathInfo(xid, pTcon, full_path, pfindData,
351 0 /* not legacy */, 410 0 /* not legacy */,
352 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & 411 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
353 CIFS_MOUNT_MAP_SPECIAL_CHR); 412 CIFS_MOUNT_MAP_SPECIAL_CHR);
@@ -355,7 +414,7 @@ int cifs_get_inode_info(struct inode **pinode,
355 when server claims no NT SMB support and the above call 414 when server claims no NT SMB support and the above call
356 failed at least once - set flag in tcon or mount */ 415 failed at least once - set flag in tcon or mount */
357 if ((rc == -EOPNOTSUPP) || (rc == -EINVAL)) { 416 if ((rc == -EOPNOTSUPP) || (rc == -EINVAL)) {
358 rc = SMBQueryInformation(xid, pTcon, search_path, 417 rc = SMBQueryInformation(xid, pTcon, full_path,
359 pfindData, cifs_sb->local_nls, 418 pfindData, cifs_sb->local_nls,
360 cifs_sb->mnt_cifs_flags & 419 cifs_sb->mnt_cifs_flags &
361 CIFS_MOUNT_MAP_SPECIAL_CHR); 420 CIFS_MOUNT_MAP_SPECIAL_CHR);
@@ -364,31 +423,15 @@ int cifs_get_inode_info(struct inode **pinode,
364 } 423 }
365 /* dump_mem("\nQPathInfo return data",&findData, sizeof(findData)); */ 424 /* dump_mem("\nQPathInfo return data",&findData, sizeof(findData)); */
366 if (rc) { 425 if (rc) {
367 if (rc == -EREMOTE) { 426 if (rc == -EREMOTE && !is_dfs_referral) {
368 tmp_path = 427 is_dfs_referral = true;
369 kmalloc(strnlen 428 if (full_path != search_path) {
370 (pTcon->treeName, 429 kfree(full_path);
371 MAX_TREE_SIZE + 1) + 430 full_path = search_path;
372 strnlen(search_path, MAX_PATHCONF) + 1,
373 GFP_KERNEL);
374 if (tmp_path == NULL) {
375 kfree(buf);
376 return -ENOMEM;
377 } 431 }
378 432 goto try_again_CIFSSMBQPathInfo;
379 strncpy(tmp_path, pTcon->treeName, MAX_TREE_SIZE);
380 strncat(tmp_path, search_path, MAX_PATHCONF);
381 rc = connect_to_dfs_path(xid, pTcon->ses,
382 /* treename + */ tmp_path,
383 cifs_sb->local_nls,
384 cifs_sb->mnt_cifs_flags &
385 CIFS_MOUNT_MAP_SPECIAL_CHR);
386 kfree(tmp_path);
387 /* BB fix up inode etc. */
388 } else if (rc) {
389 kfree(buf);
390 return rc;
391 } 433 }
434 goto cgii_exit;
392 } else { 435 } else {
393 struct cifsInodeInfo *cifsInfo; 436 struct cifsInodeInfo *cifsInfo;
394 __u32 attr = le32_to_cpu(pfindData->Attributes); 437 __u32 attr = le32_to_cpu(pfindData->Attributes);
@@ -397,8 +440,8 @@ int cifs_get_inode_info(struct inode **pinode,
397 if (*pinode == NULL) { 440 if (*pinode == NULL) {
398 *pinode = new_inode(sb); 441 *pinode = new_inode(sb);
399 if (*pinode == NULL) { 442 if (*pinode == NULL) {
400 kfree(buf); 443 rc = -ENOMEM;
401 return -ENOMEM; 444 goto cgii_exit;
402 } 445 }
403 /* Is an i_ino of zero legal? Can we use that to check 446 /* Is an i_ino of zero legal? Can we use that to check
404 if the server supports returning inode numbers? Are 447 if the server supports returning inode numbers? Are
@@ -490,9 +533,9 @@ int cifs_get_inode_info(struct inode **pinode,
490 if (decode_sfu_inode(inode, 533 if (decode_sfu_inode(inode,
491 le64_to_cpu(pfindData->EndOfFile), 534 le64_to_cpu(pfindData->EndOfFile),
492 search_path, 535 search_path,
493 cifs_sb, xid)) { 536 cifs_sb, xid))
494 cFYI(1, ("Unrecognized sfu inode type")); 537 cFYI(1, ("Unrecognized sfu inode type"));
495 } 538
496 cFYI(1, ("sfu mode 0%o", inode->i_mode)); 539 cFYI(1, ("sfu mode 0%o", inode->i_mode));
497 } else { 540 } else {
498 inode->i_mode |= S_IFREG; 541 inode->i_mode |= S_IFREG;
@@ -532,7 +575,7 @@ int cifs_get_inode_info(struct inode **pinode,
532 /* fill in 0777 bits from ACL */ 575 /* fill in 0777 bits from ACL */
533 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) { 576 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) {
534 cFYI(1, ("Getting mode bits from ACL")); 577 cFYI(1, ("Getting mode bits from ACL"));
535 acl_to_uid_mode(inode, search_path); 578 acl_to_uid_mode(inode, search_path, pfid);
536 } 579 }
537#endif 580#endif
538 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) { 581 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) {
@@ -546,37 +589,11 @@ int cifs_get_inode_info(struct inode **pinode,
546 atomic_set(&cifsInfo->inUse, 1); 589 atomic_set(&cifsInfo->inUse, 1);
547 } 590 }
548 591
549 if (S_ISREG(inode->i_mode)) { 592 cifs_set_ops(inode, is_dfs_referral);
550 cFYI(1, ("File inode"));
551 inode->i_op = &cifs_file_inode_ops;
552 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
553 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
554 inode->i_fop =
555 &cifs_file_direct_nobrl_ops;
556 else
557 inode->i_fop = &cifs_file_direct_ops;
558 } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
559 inode->i_fop = &cifs_file_nobrl_ops;
560 else /* not direct, send byte range locks */
561 inode->i_fop = &cifs_file_ops;
562
563 if (pTcon->ses->server->maxBuf <
564 PAGE_CACHE_SIZE + MAX_CIFS_HDR_SIZE)
565 inode->i_data.a_ops = &cifs_addr_ops_smallbuf;
566 else
567 inode->i_data.a_ops = &cifs_addr_ops;
568 } else if (S_ISDIR(inode->i_mode)) {
569 cFYI(1, ("Directory inode"));
570 inode->i_op = &cifs_dir_inode_ops;
571 inode->i_fop = &cifs_dir_ops;
572 } else if (S_ISLNK(inode->i_mode)) {
573 cFYI(1, ("Symbolic Link inode"));
574 inode->i_op = &cifs_symlink_inode_ops;
575 } else {
576 init_special_inode(inode, inode->i_mode,
577 inode->i_rdev);
578 }
579 } 593 }
594cgii_exit:
595 if (full_path != search_path)
596 kfree(full_path);
580 kfree(buf); 597 kfree(buf);
581 return rc; 598 return rc;
582} 599}
@@ -605,7 +622,8 @@ struct inode *cifs_iget(struct super_block *sb, unsigned long ino)
605 if (cifs_sb->tcon->unix_ext) 622 if (cifs_sb->tcon->unix_ext)
606 rc = cifs_get_inode_info_unix(&inode, "", inode->i_sb, xid); 623 rc = cifs_get_inode_info_unix(&inode, "", inode->i_sb, xid);
607 else 624 else
608 rc = cifs_get_inode_info(&inode, "", NULL, inode->i_sb, xid); 625 rc = cifs_get_inode_info(&inode, "", NULL, inode->i_sb, xid,
626 NULL);
609 if (rc && cifs_sb->tcon->ipc) { 627 if (rc && cifs_sb->tcon->ipc) {
610 cFYI(1, ("ipc connection - fake read inode")); 628 cFYI(1, ("ipc connection - fake read inode"));
611 inode->i_mode |= S_IFDIR; 629 inode->i_mode |= S_IFDIR;
@@ -792,17 +810,12 @@ psx_del_no_retry:
792} 810}
793 811
794static void posix_fill_in_inode(struct inode *tmp_inode, 812static void posix_fill_in_inode(struct inode *tmp_inode,
795 FILE_UNIX_BASIC_INFO *pData, int *pobject_type, int isNewInode) 813 FILE_UNIX_BASIC_INFO *pData, int isNewInode)
796{ 814{
815 struct cifsInodeInfo *cifsInfo = CIFS_I(tmp_inode);
797 loff_t local_size; 816 loff_t local_size;
798 struct timespec local_mtime; 817 struct timespec local_mtime;
799 818
800 struct cifsInodeInfo *cifsInfo = CIFS_I(tmp_inode);
801 struct cifs_sb_info *cifs_sb = CIFS_SB(tmp_inode->i_sb);
802
803 __u32 type = le32_to_cpu(pData->Type);
804 __u64 num_of_bytes = le64_to_cpu(pData->NumOfBytes);
805 __u64 end_of_file = le64_to_cpu(pData->EndOfFile);
806 cifsInfo->time = jiffies; 819 cifsInfo->time = jiffies;
807 atomic_inc(&cifsInfo->inUse); 820 atomic_inc(&cifsInfo->inUse);
808 821
@@ -810,115 +823,27 @@ static void posix_fill_in_inode(struct inode *tmp_inode,
810 local_mtime = tmp_inode->i_mtime; 823 local_mtime = tmp_inode->i_mtime;
811 local_size = tmp_inode->i_size; 824 local_size = tmp_inode->i_size;
812 825
813 tmp_inode->i_atime = 826 cifs_unix_info_to_inode(tmp_inode, pData, 1);
814 cifs_NTtimeToUnix(le64_to_cpu(pData->LastAccessTime)); 827 cifs_set_ops(tmp_inode, false);
815 tmp_inode->i_mtime =
816 cifs_NTtimeToUnix(le64_to_cpu(pData->LastModificationTime));
817 tmp_inode->i_ctime =
818 cifs_NTtimeToUnix(le64_to_cpu(pData->LastStatusChange));
819
820 tmp_inode->i_mode = le64_to_cpu(pData->Permissions);
821 /* since we set the inode type below we need to mask off type
822 to avoid strange results if bits above were corrupt */
823 tmp_inode->i_mode &= ~S_IFMT;
824 if (type == UNIX_FILE) {
825 *pobject_type = DT_REG;
826 tmp_inode->i_mode |= S_IFREG;
827 } else if (type == UNIX_SYMLINK) {
828 *pobject_type = DT_LNK;
829 tmp_inode->i_mode |= S_IFLNK;
830 } else if (type == UNIX_DIR) {
831 *pobject_type = DT_DIR;
832 tmp_inode->i_mode |= S_IFDIR;
833 } else if (type == UNIX_CHARDEV) {
834 *pobject_type = DT_CHR;
835 tmp_inode->i_mode |= S_IFCHR;
836 tmp_inode->i_rdev = MKDEV(le64_to_cpu(pData->DevMajor),
837 le64_to_cpu(pData->DevMinor) & MINORMASK);
838 } else if (type == UNIX_BLOCKDEV) {
839 *pobject_type = DT_BLK;
840 tmp_inode->i_mode |= S_IFBLK;
841 tmp_inode->i_rdev = MKDEV(le64_to_cpu(pData->DevMajor),
842 le64_to_cpu(pData->DevMinor) & MINORMASK);
843 } else if (type == UNIX_FIFO) {
844 *pobject_type = DT_FIFO;
845 tmp_inode->i_mode |= S_IFIFO;
846 } else if (type == UNIX_SOCKET) {
847 *pobject_type = DT_SOCK;
848 tmp_inode->i_mode |= S_IFSOCK;
849 } else {
850 /* safest to just call it a file */
851 *pobject_type = DT_REG;
852 tmp_inode->i_mode |= S_IFREG;
853 cFYI(1, ("unknown inode type %d", type));
854 }
855
856#ifdef CONFIG_CIFS_DEBUG2
857 cFYI(1, ("object type: %d", type));
858#endif
859 tmp_inode->i_uid = le64_to_cpu(pData->Uid);
860 tmp_inode->i_gid = le64_to_cpu(pData->Gid);
861 tmp_inode->i_nlink = le64_to_cpu(pData->Nlinks);
862
863 spin_lock(&tmp_inode->i_lock);
864 if (is_size_safe_to_change(cifsInfo, end_of_file)) {
865 /* can not safely change the file size here if the
866 client is writing to it due to potential races */
867 i_size_write(tmp_inode, end_of_file);
868
869 /* 512 bytes (2**9) is the fake blocksize that must be used */
870 /* for this calculation, not the real blocksize */
871 tmp_inode->i_blocks = (512 - 1 + num_of_bytes) >> 9;
872 }
873 spin_unlock(&tmp_inode->i_lock);
874
875 if (S_ISREG(tmp_inode->i_mode)) {
876 cFYI(1, ("File inode"));
877 tmp_inode->i_op = &cifs_file_inode_ops;
878
879 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
880 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
881 tmp_inode->i_fop = &cifs_file_direct_nobrl_ops;
882 else
883 tmp_inode->i_fop = &cifs_file_direct_ops;
884 828
885 } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) 829 if (!S_ISREG(tmp_inode->i_mode))
886 tmp_inode->i_fop = &cifs_file_nobrl_ops; 830 return;
887 else
888 tmp_inode->i_fop = &cifs_file_ops;
889 831
890 if ((cifs_sb->tcon) && (cifs_sb->tcon->ses) && 832 /*
891 (cifs_sb->tcon->ses->server->maxBuf < 833 * No sense invalidating pages for new inode
892 PAGE_CACHE_SIZE + MAX_CIFS_HDR_SIZE)) 834 * since we we have not started caching
893 tmp_inode->i_data.a_ops = &cifs_addr_ops_smallbuf; 835 * readahead file data yet.
894 else 836 */
895 tmp_inode->i_data.a_ops = &cifs_addr_ops; 837 if (isNewInode)
896 838 return;
897 if (isNewInode)
898 return; /* No sense invalidating pages for new inode
899 since we we have not started caching
900 readahead file data yet */
901 839
902 if (timespec_equal(&tmp_inode->i_mtime, &local_mtime) && 840 if (timespec_equal(&tmp_inode->i_mtime, &local_mtime) &&
903 (local_size == tmp_inode->i_size)) { 841 (local_size == tmp_inode->i_size)) {
904 cFYI(1, ("inode exists but unchanged")); 842 cFYI(1, ("inode exists but unchanged"));
905 } else {
906 /* file may have changed on server */
907 cFYI(1, ("invalidate inode, readdir detected change"));
908 invalidate_remote_inode(tmp_inode);
909 }
910 } else if (S_ISDIR(tmp_inode->i_mode)) {
911 cFYI(1, ("Directory inode"));
912 tmp_inode->i_op = &cifs_dir_inode_ops;
913 tmp_inode->i_fop = &cifs_dir_ops;
914 } else if (S_ISLNK(tmp_inode->i_mode)) {
915 cFYI(1, ("Symbolic Link inode"));
916 tmp_inode->i_op = &cifs_symlink_inode_ops;
917/* tmp_inode->i_fop = *//* do not need to set to anything */
918 } else { 843 } else {
919 cFYI(1, ("Special inode")); 844 /* file may have changed on server */
920 init_special_inode(tmp_inode, tmp_inode->i_mode, 845 cFYI(1, ("invalidate inode, readdir detected change"));
921 tmp_inode->i_rdev); 846 invalidate_remote_inode(tmp_inode);
922 } 847 }
923} 848}
924 849
@@ -968,7 +893,6 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
968 cFYI(1, ("posix mkdir returned 0x%x", rc)); 893 cFYI(1, ("posix mkdir returned 0x%x", rc));
969 d_drop(direntry); 894 d_drop(direntry);
970 } else { 895 } else {
971 int obj_type;
972 if (pInfo->Type == cpu_to_le32(-1)) { 896 if (pInfo->Type == cpu_to_le32(-1)) {
973 /* no return info, go query for it */ 897 /* no return info, go query for it */
974 kfree(pInfo); 898 kfree(pInfo);
@@ -1004,7 +928,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
1004 /* we already checked in POSIXCreate whether 928 /* we already checked in POSIXCreate whether
1005 frame was long enough */ 929 frame was long enough */
1006 posix_fill_in_inode(direntry->d_inode, 930 posix_fill_in_inode(direntry->d_inode,
1007 pInfo, &obj_type, 1 /* NewInode */); 931 pInfo, 1 /* NewInode */);
1008#ifdef CONFIG_CIFS_DEBUG2 932#ifdef CONFIG_CIFS_DEBUG2
1009 cFYI(1, ("instantiated dentry %p %s to inode %p", 933 cFYI(1, ("instantiated dentry %p %s to inode %p",
1010 direntry, direntry->d_name.name, newinode)); 934 direntry, direntry->d_name.name, newinode));
@@ -1032,7 +956,7 @@ mkdir_get_info:
1032 inode->i_sb, xid); 956 inode->i_sb, xid);
1033 else 957 else
1034 rc = cifs_get_inode_info(&newinode, full_path, NULL, 958 rc = cifs_get_inode_info(&newinode, full_path, NULL,
1035 inode->i_sb, xid); 959 inode->i_sb, xid, NULL);
1036 960
1037 if (pTcon->nocase) 961 if (pTcon->nocase)
1038 direntry->d_op = &cifs_ci_dentry_ops; 962 direntry->d_op = &cifs_ci_dentry_ops;
@@ -1214,9 +1138,8 @@ int cifs_rename(struct inode *source_inode, struct dentry *source_direntry,
1214 } /* if we can not get memory just leave rc as EEXIST */ 1138 } /* if we can not get memory just leave rc as EEXIST */
1215 } 1139 }
1216 1140
1217 if (rc) { 1141 if (rc)
1218 cFYI(1, ("rename rc %d", rc)); 1142 cFYI(1, ("rename rc %d", rc));
1219 }
1220 1143
1221 if ((rc == -EIO) || (rc == -EEXIST)) { 1144 if ((rc == -EIO) || (rc == -EEXIST)) {
1222 int oplock = FALSE; 1145 int oplock = FALSE;
@@ -1315,7 +1238,7 @@ int cifs_revalidate(struct dentry *direntry)
1315 } 1238 }
1316 } else { 1239 } else {
1317 rc = cifs_get_inode_info(&direntry->d_inode, full_path, NULL, 1240 rc = cifs_get_inode_info(&direntry->d_inode, full_path, NULL,
1318 direntry->d_sb, xid); 1241 direntry->d_sb, xid, NULL);
1319 if (rc) { 1242 if (rc) {
1320 cFYI(1, ("error on getting revalidate info %d", rc)); 1243 cFYI(1, ("error on getting revalidate info %d", rc));
1321/* if (rc != -ENOENT) 1244/* if (rc != -ENOENT)
@@ -1504,11 +1427,10 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1504 } 1427 }
1505 cifsInode = CIFS_I(direntry->d_inode); 1428 cifsInode = CIFS_I(direntry->d_inode);
1506 1429
1507 /* BB check if we need to refresh inode from server now ? BB */ 1430 if ((attrs->ia_valid & ATTR_MTIME) || (attrs->ia_valid & ATTR_SIZE)) {
1508
1509 if (attrs->ia_valid & ATTR_SIZE) {
1510 /* 1431 /*
1511 Flush data before changing file size on server. If the 1432 Flush data before changing file size or changing the last
1433 write time of the file on the server. If the
1512 flush returns error, store it to report later and continue. 1434 flush returns error, store it to report later and continue.
1513 BB: This should be smarter. Why bother flushing pages that 1435 BB: This should be smarter. Why bother flushing pages that
1514 will be truncated anyway? Also, should we error out here if 1436 will be truncated anyway? Also, should we error out here if
@@ -1519,7 +1441,9 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1519 CIFS_I(direntry->d_inode)->write_behind_rc = rc; 1441 CIFS_I(direntry->d_inode)->write_behind_rc = rc;
1520 rc = 0; 1442 rc = 0;
1521 } 1443 }
1444 }
1522 1445
1446 if (attrs->ia_valid & ATTR_SIZE) {
1523 /* To avoid spurious oplock breaks from server, in the case of 1447 /* To avoid spurious oplock breaks from server, in the case of
1524 inodes that we already have open, avoid doing path based 1448 inodes that we already have open, avoid doing path based
1525 setting of file size if we can do it by handle. 1449 setting of file size if we can do it by handle.
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index d24fe6880a04..5c792df13d62 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -30,7 +30,7 @@
30 30
31#define CIFS_IOC_CHECKUMOUNT _IO(0xCF, 2) 31#define CIFS_IOC_CHECKUMOUNT _IO(0xCF, 2)
32 32
33int cifs_ioctl (struct inode *inode, struct file *filep, 33int cifs_ioctl(struct inode *inode, struct file *filep,
34 unsigned int command, unsigned long arg) 34 unsigned int command, unsigned long arg)
35{ 35{
36 int rc = -ENOTTY; /* strange error - but the precedent */ 36 int rc = -ENOTTY; /* strange error - but the precedent */
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 1d6fb01b8e6d..d4e7ec93285f 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -205,7 +205,7 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname)
205 inode->i_sb, xid); 205 inode->i_sb, xid);
206 else 206 else
207 rc = cifs_get_inode_info(&newinode, full_path, NULL, 207 rc = cifs_get_inode_info(&newinode, full_path, NULL,
208 inode->i_sb, xid); 208 inode->i_sb, xid, NULL);
209 209
210 if (rc != 0) { 210 if (rc != 0) {
211 cFYI(1, ("Create symlink ok, getinodeinfo fail rc = %d", 211 cFYI(1, ("Create symlink ok, getinodeinfo fail rc = %d",
diff --git a/fs/cifs/md4.c b/fs/cifs/md4.c
index a2415c1a14db..a725c2609d67 100644
--- a/fs/cifs/md4.c
+++ b/fs/cifs/md4.c
@@ -56,7 +56,7 @@ lshift(__u32 x, int s)
56 56
57/* this applies md4 to 64 byte chunks */ 57/* this applies md4 to 64 byte chunks */
58static void 58static void
59mdfour64(__u32 * M, __u32 * A, __u32 *B, __u32 * C, __u32 *D) 59mdfour64(__u32 *M, __u32 *A, __u32 *B, __u32 *C, __u32 *D)
60{ 60{
61 int j; 61 int j;
62 __u32 AA, BB, CC, DD; 62 __u32 AA, BB, CC, DD;
@@ -137,7 +137,7 @@ mdfour64(__u32 * M, __u32 * A, __u32 *B, __u32 * C, __u32 *D)
137} 137}
138 138
139static void 139static void
140copy64(__u32 * M, unsigned char *in) 140copy64(__u32 *M, unsigned char *in)
141{ 141{
142 int i; 142 int i;
143 143
diff --git a/fs/cifs/md5.c b/fs/cifs/md5.c
index f13f96d42fcf..462bbfefd4b6 100644
--- a/fs/cifs/md5.c
+++ b/fs/cifs/md5.c
@@ -161,7 +161,7 @@ MD5Final(unsigned char digest[16], struct MD5Context *ctx)
161 161
162/* This is the central step in the MD5 algorithm. */ 162/* This is the central step in the MD5 algorithm. */
163#define MD5STEP(f, w, x, y, z, data, s) \ 163#define MD5STEP(f, w, x, y, z, data, s) \
164 ( w += f(x, y, z) + data, w = w<<s | w>>(32-s), w += x ) 164 (w += f(x, y, z) + data, w = w<<s | w>>(32-s), w += x)
165 165
166/* 166/*
167 * The core of the MD5 algorithm, this alters an existing MD5 hash to 167 * The core of the MD5 algorithm, this alters an existing MD5 hash to
@@ -302,9 +302,8 @@ hmac_md5_init_limK_to_64(const unsigned char *key, int key_len,
302 int i; 302 int i;
303 303
304 /* if key is longer than 64 bytes truncate it */ 304 /* if key is longer than 64 bytes truncate it */
305 if (key_len > 64) { 305 if (key_len > 64)
306 key_len = 64; 306 key_len = 64;
307 }
308 307
309 /* start out by storing key in pads */ 308 /* start out by storing key in pads */
310 memset(ctx->k_ipad, 0, sizeof(ctx->k_ipad)); 309 memset(ctx->k_ipad, 0, sizeof(ctx->k_ipad));
@@ -359,9 +358,9 @@ hmac_md5(unsigned char key[16], unsigned char *data, int data_len,
359{ 358{
360 struct HMACMD5Context ctx; 359 struct HMACMD5Context ctx;
361 hmac_md5_init_limK_to_64(key, 16, &ctx); 360 hmac_md5_init_limK_to_64(key, 16, &ctx);
362 if (data_len != 0) { 361 if (data_len != 0)
363 hmac_md5_update(data, data_len, &ctx); 362 hmac_md5_update(data, data_len, &ctx);
364 } 363
365 hmac_md5_final(digest, &ctx); 364 hmac_md5_final(digest, &ctx);
366} 365}
367#endif 366#endif
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 15546c2354c5..2a42d9fedbb2 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * fs/cifs/misc.c 2 * fs/cifs/misc.c
3 * 3 *
4 * Copyright (C) International Business Machines Corp., 2002,2007 4 * Copyright (C) International Business Machines Corp., 2002,2008
5 * Author(s): Steve French (sfrench@us.ibm.com) 5 * Author(s): Steve French (sfrench@us.ibm.com)
6 * 6 *
7 * This library is free software; you can redistribute it and/or modify 7 * This library is free software; you can redistribute it and/or modify
@@ -320,9 +320,9 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ ,
320 if (treeCon->ses) { 320 if (treeCon->ses) {
321 if (treeCon->ses->capabilities & CAP_UNICODE) 321 if (treeCon->ses->capabilities & CAP_UNICODE)
322 buffer->Flags2 |= SMBFLG2_UNICODE; 322 buffer->Flags2 |= SMBFLG2_UNICODE;
323 if (treeCon->ses->capabilities & CAP_STATUS32) { 323 if (treeCon->ses->capabilities & CAP_STATUS32)
324 buffer->Flags2 |= SMBFLG2_ERR_STATUS; 324 buffer->Flags2 |= SMBFLG2_ERR_STATUS;
325 } 325
326 /* Uid is not converted */ 326 /* Uid is not converted */
327 buffer->Uid = treeCon->ses->Suid; 327 buffer->Uid = treeCon->ses->Suid;
328 buffer->Mid = GetNextMid(treeCon->ses->server); 328 buffer->Mid = GetNextMid(treeCon->ses->server);
@@ -610,7 +610,8 @@ dump_smb(struct smb_hdr *smb_buf, int smb_buf_length)
610 610
611 buffer = (unsigned char *) smb_buf; 611 buffer = (unsigned char *) smb_buf;
612 for (i = 0, j = 0; i < smb_buf_length; i++, j++) { 612 for (i = 0, j = 0; i < smb_buf_length; i++, j++) {
613 if (i % 8 == 0) { /* have reached the beginning of line */ 613 if (i % 8 == 0) {
614 /* have reached the beginning of line */
614 printk(KERN_DEBUG "| "); 615 printk(KERN_DEBUG "| ");
615 j = 0; 616 j = 0;
616 } 617 }
@@ -621,7 +622,8 @@ dump_smb(struct smb_hdr *smb_buf, int smb_buf_length)
621 else 622 else
622 debug_line[1 + (2 * j)] = '_'; 623 debug_line[1 + (2 * j)] = '_';
623 624
624 if (i % 8 == 7) { /* reached end of line, time to print ascii */ 625 if (i % 8 == 7) {
626 /* reached end of line, time to print ascii */
625 debug_line[16] = 0; 627 debug_line[16] = 0;
626 printk(" | %s\n", debug_line); 628 printk(" | %s\n", debug_line);
627 } 629 }
@@ -631,7 +633,7 @@ dump_smb(struct smb_hdr *smb_buf, int smb_buf_length)
631 debug_line[2 * j] = ' '; 633 debug_line[2 * j] = ' ';
632 debug_line[1 + (2 * j)] = ' '; 634 debug_line[1 + (2 * j)] = ' ';
633 } 635 }
634 printk( " | %s\n", debug_line); 636 printk(" | %s\n", debug_line);
635 return; 637 return;
636} 638}
637 639
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index 646e1f06941b..3b5a5ce882b6 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * fs/cifs/netmisc.c 2 * fs/cifs/netmisc.c
3 * 3 *
4 * Copyright (c) International Business Machines Corp., 2002 4 * Copyright (c) International Business Machines Corp., 2002,2008
5 * Author(s): Steve French (sfrench@us.ibm.com) 5 * Author(s): Steve French (sfrench@us.ibm.com)
6 * 6 *
7 * Error mapping routines from Samba libsmb/errormap.c 7 * Error mapping routines from Samba libsmb/errormap.c
@@ -150,9 +150,7 @@ static int canonicalize_unc(char *cp)
150 if (cp[i] == '\\') 150 if (cp[i] == '\\')
151 break; 151 break;
152 if (cp[i] == '/') { 152 if (cp[i] == '/') {
153#ifdef CONFIG_CIFS_DEBUG2 153 cFYI(DBG2, ("change slash to \\ in malformed UNC"));
154 cFYI(1, ("change slash to backslash in malformed UNC"));
155#endif
156 cp[i] = '\\'; 154 cp[i] = '\\';
157 return 1; 155 return 1;
158 } 156 }
@@ -178,9 +176,7 @@ cifs_inet_pton(int address_family, char *cp, void *dst)
178 } else if (address_family == AF_INET6) { 176 } else if (address_family == AF_INET6) {
179 ret = in6_pton(cp, -1 /* len */, dst , '\\', NULL); 177 ret = in6_pton(cp, -1 /* len */, dst , '\\', NULL);
180 } 178 }
181#ifdef CONFIG_CIFS_DEBUG2 179 cFYI(DBG2, ("address conversion returned %d for %s", ret, cp));
182 cFYI(1, ("address conversion returned %d for %s", ret, cp));
183#endif
184 if (ret > 0) 180 if (ret > 0)
185 ret = 1; 181 ret = 1;
186 return ret; 182 return ret;
@@ -253,7 +249,8 @@ static const struct {
253 ERRDOS, 87, NT_STATUS_INVALID_PARAMETER_MIX}, { 249 ERRDOS, 87, NT_STATUS_INVALID_PARAMETER_MIX}, {
254 ERRHRD, ERRgeneral, NT_STATUS_INVALID_QUOTA_LOWER}, { 250 ERRHRD, ERRgeneral, NT_STATUS_INVALID_QUOTA_LOWER}, {
255 ERRHRD, ERRgeneral, NT_STATUS_DISK_CORRUPT_ERROR}, { 251 ERRHRD, ERRgeneral, NT_STATUS_DISK_CORRUPT_ERROR}, {
256 ERRDOS, ERRbadfile, NT_STATUS_OBJECT_NAME_INVALID}, { /* mapping changed since shell does lookup on * and expects file not found */ 252 /* mapping changed since shell does lookup on * expects FileNotFound */
253 ERRDOS, ERRbadfile, NT_STATUS_OBJECT_NAME_INVALID}, {
257 ERRDOS, ERRbadfile, NT_STATUS_OBJECT_NAME_NOT_FOUND}, { 254 ERRDOS, ERRbadfile, NT_STATUS_OBJECT_NAME_NOT_FOUND}, {
258 ERRDOS, ERRalreadyexists, NT_STATUS_OBJECT_NAME_COLLISION}, { 255 ERRDOS, ERRalreadyexists, NT_STATUS_OBJECT_NAME_COLLISION}, {
259 ERRHRD, ERRgeneral, NT_STATUS_HANDLE_NOT_WAITABLE}, { 256 ERRHRD, ERRgeneral, NT_STATUS_HANDLE_NOT_WAITABLE}, {
@@ -820,7 +817,8 @@ map_smb_to_linux_error(struct smb_hdr *smb, int logErr)
820 /* old style errors */ 817 /* old style errors */
821 818
822 /* DOS class smb error codes - map DOS */ 819 /* DOS class smb error codes - map DOS */
823 if (smberrclass == ERRDOS) { /* 1 byte field no need to byte reverse */ 820 if (smberrclass == ERRDOS) {
821 /* 1 byte field no need to byte reverse */
824 for (i = 0; 822 for (i = 0;
825 i < 823 i <
826 sizeof(mapping_table_ERRDOS) / 824 sizeof(mapping_table_ERRDOS) /
@@ -834,7 +832,8 @@ map_smb_to_linux_error(struct smb_hdr *smb, int logErr)
834 } 832 }
835 /* else try next error mapping one to see if match */ 833 /* else try next error mapping one to see if match */
836 } 834 }
837 } else if (smberrclass == ERRSRV) { /* server class of error codes */ 835 } else if (smberrclass == ERRSRV) {
836 /* server class of error codes */
838 for (i = 0; 837 for (i = 0;
839 i < 838 i <
840 sizeof(mapping_table_ERRSRV) / 839 sizeof(mapping_table_ERRSRV) /
@@ -922,8 +921,8 @@ struct timespec cnvrtDosUnixTm(__u16 date, __u16 time)
922{ 921{
923 struct timespec ts; 922 struct timespec ts;
924 int sec, min, days, month, year; 923 int sec, min, days, month, year;
925 SMB_TIME * st = (SMB_TIME *)&time; 924 SMB_TIME *st = (SMB_TIME *)&time;
926 SMB_DATE * sd = (SMB_DATE *)&date; 925 SMB_DATE *sd = (SMB_DATE *)&date;
927 926
928 cFYI(1, ("date %d time %d", date, time)); 927 cFYI(1, ("date %d time %d", date, time));
929 928
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 0f22def4bdff..32b445edc882 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -3,7 +3,7 @@
3 * 3 *
4 * Directory search handling 4 * Directory search handling
5 * 5 *
6 * Copyright (C) International Business Machines Corp., 2004, 2007 6 * Copyright (C) International Business Machines Corp., 2004, 2008
7 * Author(s): Steve French (sfrench@us.ibm.com) 7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * 8 *
9 * This library is free software; you can redistribute it and/or modify 9 * This library is free software; you can redistribute it and/or modify
@@ -42,17 +42,18 @@ static void dump_cifs_file_struct(struct file *file, char *label)
42 cFYI(1, ("empty cifs private file data")); 42 cFYI(1, ("empty cifs private file data"));
43 return; 43 return;
44 } 44 }
45 if (cf->invalidHandle) { 45 if (cf->invalidHandle)
46 cFYI(1, ("invalid handle")); 46 cFYI(1, ("invalid handle"));
47 } 47 if (cf->srch_inf.endOfSearch)
48 if (cf->srch_inf.endOfSearch) {
49 cFYI(1, ("end of search")); 48 cFYI(1, ("end of search"));
50 } 49 if (cf->srch_inf.emptyDir)
51 if (cf->srch_inf.emptyDir) {
52 cFYI(1, ("empty dir")); 50 cFYI(1, ("empty dir"));
53 }
54 } 51 }
55} 52}
53#else
54static inline void dump_cifs_file_struct(struct file *file, char *label)
55{
56}
56#endif /* DEBUG2 */ 57#endif /* DEBUG2 */
57 58
58/* Returns one if new inode created (which therefore needs to be hashed) */ 59/* Returns one if new inode created (which therefore needs to be hashed) */
@@ -150,7 +151,7 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type,
150 cifs_NTtimeToUnix(le64_to_cpu(pfindData->ChangeTime)); 151 cifs_NTtimeToUnix(le64_to_cpu(pfindData->ChangeTime));
151 } else { /* legacy, OS2 and DOS style */ 152 } else { /* legacy, OS2 and DOS style */
152/* struct timespec ts;*/ 153/* struct timespec ts;*/
153 FIND_FILE_STANDARD_INFO * pfindData = 154 FIND_FILE_STANDARD_INFO *pfindData =
154 (FIND_FILE_STANDARD_INFO *)buf; 155 (FIND_FILE_STANDARD_INFO *)buf;
155 156
156 tmp_inode->i_mtime = cnvrtDosUnixTm( 157 tmp_inode->i_mtime = cnvrtDosUnixTm(
@@ -198,9 +199,8 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type,
198 if (attr & ATTR_DIRECTORY) { 199 if (attr & ATTR_DIRECTORY) {
199 *pobject_type = DT_DIR; 200 *pobject_type = DT_DIR;
200 /* override default perms since we do not lock dirs */ 201 /* override default perms since we do not lock dirs */
201 if (atomic_read(&cifsInfo->inUse) == 0) { 202 if (atomic_read(&cifsInfo->inUse) == 0)
202 tmp_inode->i_mode = cifs_sb->mnt_dir_mode; 203 tmp_inode->i_mode = cifs_sb->mnt_dir_mode;
203 }
204 tmp_inode->i_mode |= S_IFDIR; 204 tmp_inode->i_mode |= S_IFDIR;
205 } else if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) && 205 } else if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) &&
206 (attr & ATTR_SYSTEM)) { 206 (attr & ATTR_SYSTEM)) {
@@ -231,9 +231,8 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type,
231 } /* could add code here - to validate if device or weird share type? */ 231 } /* could add code here - to validate if device or weird share type? */
232 232
233 /* can not fill in nlink here as in qpathinfo version and Unx search */ 233 /* can not fill in nlink here as in qpathinfo version and Unx search */
234 if (atomic_read(&cifsInfo->inUse) == 0) { 234 if (atomic_read(&cifsInfo->inUse) == 0)
235 atomic_set(&cifsInfo->inUse, 1); 235 atomic_set(&cifsInfo->inUse, 1);
236 }
237 236
238 spin_lock(&tmp_inode->i_lock); 237 spin_lock(&tmp_inode->i_lock);
239 if (is_size_safe_to_change(cifsInfo, end_of_file)) { 238 if (is_size_safe_to_change(cifsInfo, end_of_file)) {
@@ -461,9 +460,8 @@ static int initiate_cifs_search(const int xid, struct file *file)
461 460
462 full_path = build_path_from_dentry(file->f_path.dentry); 461 full_path = build_path_from_dentry(file->f_path.dentry);
463 462
464 if (full_path == NULL) { 463 if (full_path == NULL)
465 return -ENOMEM; 464 return -ENOMEM;
466 }
467 465
468 cFYI(1, ("Full path: %s start at: %lld", full_path, file->f_pos)); 466 cFYI(1, ("Full path: %s start at: %lld", full_path, file->f_pos));
469 467
@@ -471,9 +469,9 @@ ffirst_retry:
471 /* test for Unix extensions */ 469 /* test for Unix extensions */
472 /* but now check for them on the share/mount not on the SMB session */ 470 /* but now check for them on the share/mount not on the SMB session */
473/* if (pTcon->ses->capabilities & CAP_UNIX) { */ 471/* if (pTcon->ses->capabilities & CAP_UNIX) { */
474 if (pTcon->unix_ext) { 472 if (pTcon->unix_ext)
475 cifsFile->srch_inf.info_level = SMB_FIND_FILE_UNIX; 473 cifsFile->srch_inf.info_level = SMB_FIND_FILE_UNIX;
476 } else if ((pTcon->ses->capabilities & 474 else if ((pTcon->ses->capabilities &
477 (CAP_NT_SMBS | CAP_NT_FIND)) == 0) { 475 (CAP_NT_SMBS | CAP_NT_FIND)) == 0) {
478 cifsFile->srch_inf.info_level = SMB_FIND_FILE_INFO_STANDARD; 476 cifsFile->srch_inf.info_level = SMB_FIND_FILE_INFO_STANDARD;
479 } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { 477 } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) {
@@ -514,10 +512,10 @@ static int cifs_unicode_bytelen(char *str)
514static char *nxt_dir_entry(char *old_entry, char *end_of_smb, int level) 512static char *nxt_dir_entry(char *old_entry, char *end_of_smb, int level)
515{ 513{
516 char *new_entry; 514 char *new_entry;
517 FILE_DIRECTORY_INFO * pDirInfo = (FILE_DIRECTORY_INFO *)old_entry; 515 FILE_DIRECTORY_INFO *pDirInfo = (FILE_DIRECTORY_INFO *)old_entry;
518 516
519 if (level == SMB_FIND_FILE_INFO_STANDARD) { 517 if (level == SMB_FIND_FILE_INFO_STANDARD) {
520 FIND_FILE_STANDARD_INFO * pfData; 518 FIND_FILE_STANDARD_INFO *pfData;
521 pfData = (FIND_FILE_STANDARD_INFO *)pDirInfo; 519 pfData = (FIND_FILE_STANDARD_INFO *)pDirInfo;
522 520
523 new_entry = old_entry + sizeof(FIND_FILE_STANDARD_INFO) + 521 new_entry = old_entry + sizeof(FIND_FILE_STANDARD_INFO) +
@@ -553,7 +551,7 @@ static int cifs_entry_is_dot(char *current_entry, struct cifsFileInfo *cfile)
553 int len = 0; 551 int len = 0;
554 552
555 if (cfile->srch_inf.info_level == SMB_FIND_FILE_UNIX) { 553 if (cfile->srch_inf.info_level == SMB_FIND_FILE_UNIX) {
556 FILE_UNIX_INFO * pFindData = (FILE_UNIX_INFO *)current_entry; 554 FILE_UNIX_INFO *pFindData = (FILE_UNIX_INFO *)current_entry;
557 filename = &pFindData->FileName[0]; 555 filename = &pFindData->FileName[0];
558 if (cfile->srch_inf.unicode) { 556 if (cfile->srch_inf.unicode) {
559 len = cifs_unicode_bytelen(filename); 557 len = cifs_unicode_bytelen(filename);
@@ -562,30 +560,30 @@ static int cifs_entry_is_dot(char *current_entry, struct cifsFileInfo *cfile)
562 len = strnlen(filename, 5); 560 len = strnlen(filename, 5);
563 } 561 }
564 } else if (cfile->srch_inf.info_level == SMB_FIND_FILE_DIRECTORY_INFO) { 562 } else if (cfile->srch_inf.info_level == SMB_FIND_FILE_DIRECTORY_INFO) {
565 FILE_DIRECTORY_INFO * pFindData = 563 FILE_DIRECTORY_INFO *pFindData =
566 (FILE_DIRECTORY_INFO *)current_entry; 564 (FILE_DIRECTORY_INFO *)current_entry;
567 filename = &pFindData->FileName[0]; 565 filename = &pFindData->FileName[0];
568 len = le32_to_cpu(pFindData->FileNameLength); 566 len = le32_to_cpu(pFindData->FileNameLength);
569 } else if (cfile->srch_inf.info_level == 567 } else if (cfile->srch_inf.info_level ==
570 SMB_FIND_FILE_FULL_DIRECTORY_INFO) { 568 SMB_FIND_FILE_FULL_DIRECTORY_INFO) {
571 FILE_FULL_DIRECTORY_INFO * pFindData = 569 FILE_FULL_DIRECTORY_INFO *pFindData =
572 (FILE_FULL_DIRECTORY_INFO *)current_entry; 570 (FILE_FULL_DIRECTORY_INFO *)current_entry;
573 filename = &pFindData->FileName[0]; 571 filename = &pFindData->FileName[0];
574 len = le32_to_cpu(pFindData->FileNameLength); 572 len = le32_to_cpu(pFindData->FileNameLength);
575 } else if (cfile->srch_inf.info_level == 573 } else if (cfile->srch_inf.info_level ==
576 SMB_FIND_FILE_ID_FULL_DIR_INFO) { 574 SMB_FIND_FILE_ID_FULL_DIR_INFO) {
577 SEARCH_ID_FULL_DIR_INFO * pFindData = 575 SEARCH_ID_FULL_DIR_INFO *pFindData =
578 (SEARCH_ID_FULL_DIR_INFO *)current_entry; 576 (SEARCH_ID_FULL_DIR_INFO *)current_entry;
579 filename = &pFindData->FileName[0]; 577 filename = &pFindData->FileName[0];
580 len = le32_to_cpu(pFindData->FileNameLength); 578 len = le32_to_cpu(pFindData->FileNameLength);
581 } else if (cfile->srch_inf.info_level == 579 } else if (cfile->srch_inf.info_level ==
582 SMB_FIND_FILE_BOTH_DIRECTORY_INFO) { 580 SMB_FIND_FILE_BOTH_DIRECTORY_INFO) {
583 FILE_BOTH_DIRECTORY_INFO * pFindData = 581 FILE_BOTH_DIRECTORY_INFO *pFindData =
584 (FILE_BOTH_DIRECTORY_INFO *)current_entry; 582 (FILE_BOTH_DIRECTORY_INFO *)current_entry;
585 filename = &pFindData->FileName[0]; 583 filename = &pFindData->FileName[0];
586 len = le32_to_cpu(pFindData->FileNameLength); 584 len = le32_to_cpu(pFindData->FileNameLength);
587 } else if (cfile->srch_inf.info_level == SMB_FIND_FILE_INFO_STANDARD) { 585 } else if (cfile->srch_inf.info_level == SMB_FIND_FILE_INFO_STANDARD) {
588 FIND_FILE_STANDARD_INFO * pFindData = 586 FIND_FILE_STANDARD_INFO *pFindData =
589 (FIND_FILE_STANDARD_INFO *)current_entry; 587 (FIND_FILE_STANDARD_INFO *)current_entry;
590 filename = &pFindData->FileName[0]; 588 filename = &pFindData->FileName[0];
591 len = pFindData->FileNameLength; 589 len = pFindData->FileNameLength;
@@ -666,9 +664,7 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
666 . and .. for the root of a drive and for those we need 664 . and .. for the root of a drive and for those we need
667 to start two entries earlier */ 665 to start two entries earlier */
668 666
669#ifdef CONFIG_CIFS_DEBUG2
670 dump_cifs_file_struct(file, "In fce "); 667 dump_cifs_file_struct(file, "In fce ");
671#endif
672 if (((index_to_find < cifsFile->srch_inf.index_of_last_entry) && 668 if (((index_to_find < cifsFile->srch_inf.index_of_last_entry) &&
673 is_dir_changed(file)) || 669 is_dir_changed(file)) ||
674 (index_to_find < first_entry_in_buffer)) { 670 (index_to_find < first_entry_in_buffer)) {
@@ -718,7 +714,7 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
718 pos_in_buf = index_to_find - first_entry_in_buffer; 714 pos_in_buf = index_to_find - first_entry_in_buffer;
719 cFYI(1, ("found entry - pos_in_buf %d", pos_in_buf)); 715 cFYI(1, ("found entry - pos_in_buf %d", pos_in_buf));
720 716
721 for (i=0; (i < (pos_in_buf)) && (current_entry != NULL); i++) { 717 for (i = 0; (i < (pos_in_buf)) && (current_entry != NULL); i++) {
722 /* go entry by entry figuring out which is first */ 718 /* go entry by entry figuring out which is first */
723 current_entry = nxt_dir_entry(current_entry, end_of_smb, 719 current_entry = nxt_dir_entry(current_entry, end_of_smb,
724 cifsFile->srch_inf.info_level); 720 cifsFile->srch_inf.info_level);
@@ -793,7 +789,7 @@ static int cifs_get_name_from_search_buf(struct qstr *pqst,
793 filename = &pFindData->FileName[0]; 789 filename = &pFindData->FileName[0];
794 len = le32_to_cpu(pFindData->FileNameLength); 790 len = le32_to_cpu(pFindData->FileNameLength);
795 } else if (level == SMB_FIND_FILE_INFO_STANDARD) { 791 } else if (level == SMB_FIND_FILE_INFO_STANDARD) {
796 FIND_FILE_STANDARD_INFO * pFindData = 792 FIND_FILE_STANDARD_INFO *pFindData =
797 (FIND_FILE_STANDARD_INFO *)current_entry; 793 (FIND_FILE_STANDARD_INFO *)current_entry;
798 filename = &pFindData->FileName[0]; 794 filename = &pFindData->FileName[0];
799 /* one byte length, no name conversion */ 795 /* one byte length, no name conversion */
@@ -928,7 +924,7 @@ static int cifs_save_resume_key(const char *current_entry,
928 level = cifsFile->srch_inf.info_level; 924 level = cifsFile->srch_inf.info_level;
929 925
930 if (level == SMB_FIND_FILE_UNIX) { 926 if (level == SMB_FIND_FILE_UNIX) {
931 FILE_UNIX_INFO * pFindData = (FILE_UNIX_INFO *)current_entry; 927 FILE_UNIX_INFO *pFindData = (FILE_UNIX_INFO *)current_entry;
932 928
933 filename = &pFindData->FileName[0]; 929 filename = &pFindData->FileName[0];
934 if (cifsFile->srch_inf.unicode) { 930 if (cifsFile->srch_inf.unicode) {
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index d2153abcba6d..ed150efbe27c 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -417,10 +417,6 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time,
417 417
418 calc_lanman_hash(ses, lnm_session_key); 418 calc_lanman_hash(ses, lnm_session_key);
419 ses->flags |= CIFS_SES_LANMAN; 419 ses->flags |= CIFS_SES_LANMAN;
420/* #ifdef CONFIG_CIFS_DEBUG2
421 cifs_dump_mem("cryptkey: ",ses->server->cryptKey,
422 CIFS_SESS_KEY_SIZE);
423#endif */
424 memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_SESS_KEY_SIZE); 420 memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_SESS_KEY_SIZE);
425 bcc_ptr += CIFS_SESS_KEY_SIZE; 421 bcc_ptr += CIFS_SESS_KEY_SIZE;
426 422
diff --git a/fs/cifs/smbdes.c b/fs/cifs/smbdes.c
index cfa6d21fb4e8..04943c976f98 100644
--- a/fs/cifs/smbdes.c
+++ b/fs/cifs/smbdes.c
@@ -114,42 +114,42 @@ static uchar sbox[8][4][16] = {
114 {{14, 4, 13, 1, 2, 15, 11, 8, 3, 10, 6, 12, 5, 9, 0, 7}, 114 {{14, 4, 13, 1, 2, 15, 11, 8, 3, 10, 6, 12, 5, 9, 0, 7},
115 {0, 15, 7, 4, 14, 2, 13, 1, 10, 6, 12, 11, 9, 5, 3, 8}, 115 {0, 15, 7, 4, 14, 2, 13, 1, 10, 6, 12, 11, 9, 5, 3, 8},
116 {4, 1, 14, 8, 13, 6, 2, 11, 15, 12, 9, 7, 3, 10, 5, 0}, 116 {4, 1, 14, 8, 13, 6, 2, 11, 15, 12, 9, 7, 3, 10, 5, 0},
117 {15, 12, 8, 2, 4, 9, 1, 7, 5, 11, 3, 14, 10, 0, 6, 13}}, 117 {15, 12, 8, 2, 4, 9, 1, 7, 5, 11, 3, 14, 10, 0, 6, 13} },
118 118
119 {{15, 1, 8, 14, 6, 11, 3, 4, 9, 7, 2, 13, 12, 0, 5, 10}, 119 {{15, 1, 8, 14, 6, 11, 3, 4, 9, 7, 2, 13, 12, 0, 5, 10},
120 {3, 13, 4, 7, 15, 2, 8, 14, 12, 0, 1, 10, 6, 9, 11, 5}, 120 {3, 13, 4, 7, 15, 2, 8, 14, 12, 0, 1, 10, 6, 9, 11, 5},
121 {0, 14, 7, 11, 10, 4, 13, 1, 5, 8, 12, 6, 9, 3, 2, 15}, 121 {0, 14, 7, 11, 10, 4, 13, 1, 5, 8, 12, 6, 9, 3, 2, 15},
122 {13, 8, 10, 1, 3, 15, 4, 2, 11, 6, 7, 12, 0, 5, 14, 9}}, 122 {13, 8, 10, 1, 3, 15, 4, 2, 11, 6, 7, 12, 0, 5, 14, 9} },
123 123
124 {{10, 0, 9, 14, 6, 3, 15, 5, 1, 13, 12, 7, 11, 4, 2, 8}, 124 {{10, 0, 9, 14, 6, 3, 15, 5, 1, 13, 12, 7, 11, 4, 2, 8},
125 {13, 7, 0, 9, 3, 4, 6, 10, 2, 8, 5, 14, 12, 11, 15, 1}, 125 {13, 7, 0, 9, 3, 4, 6, 10, 2, 8, 5, 14, 12, 11, 15, 1},
126 {13, 6, 4, 9, 8, 15, 3, 0, 11, 1, 2, 12, 5, 10, 14, 7}, 126 {13, 6, 4, 9, 8, 15, 3, 0, 11, 1, 2, 12, 5, 10, 14, 7},
127 {1, 10, 13, 0, 6, 9, 8, 7, 4, 15, 14, 3, 11, 5, 2, 12}}, 127 {1, 10, 13, 0, 6, 9, 8, 7, 4, 15, 14, 3, 11, 5, 2, 12} },
128 128
129 {{7, 13, 14, 3, 0, 6, 9, 10, 1, 2, 8, 5, 11, 12, 4, 15}, 129 {{7, 13, 14, 3, 0, 6, 9, 10, 1, 2, 8, 5, 11, 12, 4, 15},
130 {13, 8, 11, 5, 6, 15, 0, 3, 4, 7, 2, 12, 1, 10, 14, 9}, 130 {13, 8, 11, 5, 6, 15, 0, 3, 4, 7, 2, 12, 1, 10, 14, 9},
131 {10, 6, 9, 0, 12, 11, 7, 13, 15, 1, 3, 14, 5, 2, 8, 4}, 131 {10, 6, 9, 0, 12, 11, 7, 13, 15, 1, 3, 14, 5, 2, 8, 4},
132 {3, 15, 0, 6, 10, 1, 13, 8, 9, 4, 5, 11, 12, 7, 2, 14}}, 132 {3, 15, 0, 6, 10, 1, 13, 8, 9, 4, 5, 11, 12, 7, 2, 14} },
133 133
134 {{2, 12, 4, 1, 7, 10, 11, 6, 8, 5, 3, 15, 13, 0, 14, 9}, 134 {{2, 12, 4, 1, 7, 10, 11, 6, 8, 5, 3, 15, 13, 0, 14, 9},
135 {14, 11, 2, 12, 4, 7, 13, 1, 5, 0, 15, 10, 3, 9, 8, 6}, 135 {14, 11, 2, 12, 4, 7, 13, 1, 5, 0, 15, 10, 3, 9, 8, 6},
136 {4, 2, 1, 11, 10, 13, 7, 8, 15, 9, 12, 5, 6, 3, 0, 14}, 136 {4, 2, 1, 11, 10, 13, 7, 8, 15, 9, 12, 5, 6, 3, 0, 14},
137 {11, 8, 12, 7, 1, 14, 2, 13, 6, 15, 0, 9, 10, 4, 5, 3}}, 137 {11, 8, 12, 7, 1, 14, 2, 13, 6, 15, 0, 9, 10, 4, 5, 3} },
138 138
139 {{12, 1, 10, 15, 9, 2, 6, 8, 0, 13, 3, 4, 14, 7, 5, 11}, 139 {{12, 1, 10, 15, 9, 2, 6, 8, 0, 13, 3, 4, 14, 7, 5, 11},
140 {10, 15, 4, 2, 7, 12, 9, 5, 6, 1, 13, 14, 0, 11, 3, 8}, 140 {10, 15, 4, 2, 7, 12, 9, 5, 6, 1, 13, 14, 0, 11, 3, 8},
141 {9, 14, 15, 5, 2, 8, 12, 3, 7, 0, 4, 10, 1, 13, 11, 6}, 141 {9, 14, 15, 5, 2, 8, 12, 3, 7, 0, 4, 10, 1, 13, 11, 6},
142 {4, 3, 2, 12, 9, 5, 15, 10, 11, 14, 1, 7, 6, 0, 8, 13}}, 142 {4, 3, 2, 12, 9, 5, 15, 10, 11, 14, 1, 7, 6, 0, 8, 13} },
143 143
144 {{4, 11, 2, 14, 15, 0, 8, 13, 3, 12, 9, 7, 5, 10, 6, 1}, 144 {{4, 11, 2, 14, 15, 0, 8, 13, 3, 12, 9, 7, 5, 10, 6, 1},
145 {13, 0, 11, 7, 4, 9, 1, 10, 14, 3, 5, 12, 2, 15, 8, 6}, 145 {13, 0, 11, 7, 4, 9, 1, 10, 14, 3, 5, 12, 2, 15, 8, 6},
146 {1, 4, 11, 13, 12, 3, 7, 14, 10, 15, 6, 8, 0, 5, 9, 2}, 146 {1, 4, 11, 13, 12, 3, 7, 14, 10, 15, 6, 8, 0, 5, 9, 2},
147 {6, 11, 13, 8, 1, 4, 10, 7, 9, 5, 0, 15, 14, 2, 3, 12}}, 147 {6, 11, 13, 8, 1, 4, 10, 7, 9, 5, 0, 15, 14, 2, 3, 12} },
148 148
149 {{13, 2, 8, 4, 6, 15, 11, 1, 10, 9, 3, 14, 5, 0, 12, 7}, 149 {{13, 2, 8, 4, 6, 15, 11, 1, 10, 9, 3, 14, 5, 0, 12, 7},
150 {1, 15, 13, 8, 10, 3, 7, 4, 12, 5, 6, 11, 0, 14, 9, 2}, 150 {1, 15, 13, 8, 10, 3, 7, 4, 12, 5, 6, 11, 0, 14, 9, 2},
151 {7, 11, 4, 1, 9, 12, 14, 2, 0, 6, 10, 13, 15, 3, 5, 8}, 151 {7, 11, 4, 1, 9, 12, 14, 2, 0, 6, 10, 13, 15, 3, 5, 8},
152 {2, 1, 14, 7, 4, 10, 8, 13, 15, 12, 9, 0, 3, 5, 6, 11}} 152 {2, 1, 14, 7, 4, 10, 8, 13, 15, 12, 9, 0, 3, 5, 6, 11} }
153}; 153};
154 154
155static void 155static void
@@ -313,9 +313,8 @@ str_to_key(unsigned char *str, unsigned char *key)
313 key[5] = ((str[4] & 0x1F) << 2) | (str[5] >> 6); 313 key[5] = ((str[4] & 0x1F) << 2) | (str[5] >> 6);
314 key[6] = ((str[5] & 0x3F) << 1) | (str[6] >> 7); 314 key[6] = ((str[5] & 0x3F) << 1) | (str[6] >> 7);
315 key[7] = str[6] & 0x7F; 315 key[7] = str[6] & 0x7F;
316 for (i = 0; i < 8; i++) { 316 for (i = 0; i < 8; i++)
317 key[i] = (key[i] << 1); 317 key[i] = (key[i] << 1);
318 }
319} 318}
320 319
321static void 320static void
@@ -344,9 +343,8 @@ smbhash(unsigned char *out, unsigned char *in, unsigned char *key, int forw)
344 343
345 dohash(outb, inb, keyb, forw); 344 dohash(outb, inb, keyb, forw);
346 345
347 for (i = 0; i < 8; i++) { 346 for (i = 0; i < 8; i++)
348 out[i] = 0; 347 out[i] = 0;
349 }
350 348
351 for (i = 0; i < 64; i++) { 349 for (i = 0; i < 64; i++) {
352 if (outb[i]) 350 if (outb[i])
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 50b623ad9320..3612d6c0a0bb 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * fs/cifs/transport.c 2 * fs/cifs/transport.c
3 * 3 *
4 * Copyright (C) International Business Machines Corp., 2002,2007 4 * Copyright (C) International Business Machines Corp., 2002,2008
5 * Author(s): Steve French (sfrench@us.ibm.com) 5 * Author(s): Steve French (sfrench@us.ibm.com)
6 * Jeremy Allison (jra@samba.org) 2006. 6 * Jeremy Allison (jra@samba.org) 2006.
7 * 7 *
@@ -358,9 +358,9 @@ static int allocate_mid(struct cifsSesInfo *ses, struct smb_hdr *in_buf,
358 } else if (ses->status != CifsGood) { 358 } else if (ses->status != CifsGood) {
359 /* check if SMB session is bad because we are setting it up */ 359 /* check if SMB session is bad because we are setting it up */
360 if ((in_buf->Command != SMB_COM_SESSION_SETUP_ANDX) && 360 if ((in_buf->Command != SMB_COM_SESSION_SETUP_ANDX) &&
361 (in_buf->Command != SMB_COM_NEGOTIATE)) { 361 (in_buf->Command != SMB_COM_NEGOTIATE))
362 return -EAGAIN; 362 return -EAGAIN;
363 } /* else ok - we are setting up session */ 363 /* else ok - we are setting up session */
364 } 364 }
365 *ppmidQ = AllocMidQEntry(in_buf, ses); 365 *ppmidQ = AllocMidQEntry(in_buf, ses);
366 if (*ppmidQ == NULL) 366 if (*ppmidQ == NULL)
@@ -437,9 +437,8 @@ SendReceiveNoRsp(const unsigned int xid, struct cifsSesInfo *ses,
437 iov[0].iov_len = in_buf->smb_buf_length + 4; 437 iov[0].iov_len = in_buf->smb_buf_length + 4;
438 flags |= CIFS_NO_RESP; 438 flags |= CIFS_NO_RESP;
439 rc = SendReceive2(xid, ses, iov, 1, &resp_buf_type, flags); 439 rc = SendReceive2(xid, ses, iov, 1, &resp_buf_type, flags);
440#ifdef CONFIG_CIFS_DEBUG2 440 cFYI(DBG2, ("SendRcvNoRsp flags %d rc %d", flags, rc));
441 cFYI(1, ("SendRcvNoR flags %d rc %d", flags, rc)); 441
442#endif
443 return rc; 442 return rc;
444} 443}
445 444
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index 54e8ef96cb79..8cd6a445b017 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -139,9 +139,9 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name,
139 } else if (strncmp(ea_name, CIFS_XATTR_USER_PREFIX, 5) == 0) { 139 } else if (strncmp(ea_name, CIFS_XATTR_USER_PREFIX, 5) == 0) {
140 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) 140 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR)
141 goto set_ea_exit; 141 goto set_ea_exit;
142 if (strncmp(ea_name, CIFS_XATTR_DOS_ATTRIB, 14) == 0) { 142 if (strncmp(ea_name, CIFS_XATTR_DOS_ATTRIB, 14) == 0)
143 cFYI(1, ("attempt to set cifs inode metadata")); 143 cFYI(1, ("attempt to set cifs inode metadata"));
144 } 144
145 ea_name += 5; /* skip past user. prefix */ 145 ea_name += 5; /* skip past user. prefix */
146 rc = CIFSSMBSetEA(xid, pTcon, full_path, ea_name, ea_value, 146 rc = CIFSSMBSetEA(xid, pTcon, full_path, ea_name, ea_value,
147 (__u16)value_size, cifs_sb->local_nls, 147 (__u16)value_size, cifs_sb->local_nls,
@@ -262,7 +262,7 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name,
262 cifs_sb->mnt_cifs_flags & 262 cifs_sb->mnt_cifs_flags &
263 CIFS_MOUNT_MAP_SPECIAL_CHR); 263 CIFS_MOUNT_MAP_SPECIAL_CHR);
264#ifdef CONFIG_CIFS_EXPERIMENTAL 264#ifdef CONFIG_CIFS_EXPERIMENTAL
265 else if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) { 265 else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) {
266 __u16 fid; 266 __u16 fid;
267 int oplock = FALSE; 267 int oplock = FALSE;
268 struct cifs_ntsd *pacl = NULL; 268 struct cifs_ntsd *pacl = NULL;
@@ -303,11 +303,10 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name,
303 } else if (strncmp(ea_name, 303 } else if (strncmp(ea_name,
304 CIFS_XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) == 0) { 304 CIFS_XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) == 0) {
305 cFYI(1, ("Security xattr namespace not supported yet")); 305 cFYI(1, ("Security xattr namespace not supported yet"));
306 } else { 306 } else
307 cFYI(1, 307 cFYI(1,
308 ("illegal xattr request %s (only user namespace supported)", 308 ("illegal xattr request %s (only user namespace supported)",
309 ea_name)); 309 ea_name));
310 }
311 310
312 /* We could add an additional check for streams ie 311 /* We could add an additional check for streams ie
313 if proc/fs/cifs/streamstoxattr is set then 312 if proc/fs/cifs/streamstoxattr is set then
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 350680fd7da7..0c3b618c15b3 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -23,7 +23,6 @@
23#include <linux/buffer_head.h> 23#include <linux/buffer_head.h>
24#include <linux/vfs.h> 24#include <linux/vfs.h>
25#include <linux/mutex.h> 25#include <linux/mutex.h>
26#include <asm/semaphore.h>
27 26
28#include <asm/uaccess.h> 27#include <asm/uaccess.h>
29 28
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index d26e2826ba5b..e9602d85c11d 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -29,10 +29,6 @@
29 29
30#define DEBUGFS_MAGIC 0x64626720 30#define DEBUGFS_MAGIC 0x64626720
31 31
32/* declared over in file.c */
33extern struct file_operations debugfs_file_operations;
34extern struct inode_operations debugfs_link_operations;
35
36static struct vfsmount *debugfs_mount; 32static struct vfsmount *debugfs_mount;
37static int debugfs_mount_count; 33static int debugfs_mount_count;
38 34
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index d30ea8b433a2..7a8824f475f2 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -37,7 +37,6 @@
37#include <linux/jhash.h> 37#include <linux/jhash.h>
38#include <linux/miscdevice.h> 38#include <linux/miscdevice.h>
39#include <linux/mutex.h> 39#include <linux/mutex.h>
40#include <asm/semaphore.h>
41#include <asm/uaccess.h> 40#include <asm/uaccess.h>
42 41
43#include <linux/dlm.h> 42#include <linux/dlm.h>
diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c
index 035e6f9990b0..67522c268c14 100644
--- a/fs/dlm/rcom.c
+++ b/fs/dlm/rcom.c
@@ -215,6 +215,8 @@ int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len)
215 ls->ls_recover_nodeid = nodeid; 215 ls->ls_recover_nodeid = nodeid;
216 216
217 if (nodeid == dlm_our_nodeid()) { 217 if (nodeid == dlm_our_nodeid()) {
218 ls->ls_recover_buf->rc_header.h_length =
219 dlm_config.ci_buffer_size;
218 dlm_copy_master_names(ls, last_name, last_len, 220 dlm_copy_master_names(ls, last_name, last_len,
219 ls->ls_recover_buf->rc_buf, 221 ls->ls_recover_buf->rc_buf,
220 max_size, nodeid); 222 max_size, nodeid);
diff --git a/fs/dquot.c b/fs/dquot.c
index 9c7feb62eed1..41b9dbd68b0e 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -1522,8 +1522,8 @@ int vfs_quota_off(struct super_block *sb, int type)
1522 truncate_inode_pages(&toputinode[cnt]->i_data, 0); 1522 truncate_inode_pages(&toputinode[cnt]->i_data, 0);
1523 mutex_unlock(&toputinode[cnt]->i_mutex); 1523 mutex_unlock(&toputinode[cnt]->i_mutex);
1524 mark_inode_dirty(toputinode[cnt]); 1524 mark_inode_dirty(toputinode[cnt]);
1525 iput(toputinode[cnt]);
1526 } 1525 }
1526 iput(toputinode[cnt]);
1527 mutex_unlock(&dqopt->dqonoff_mutex); 1527 mutex_unlock(&dqopt->dqonoff_mutex);
1528 } 1528 }
1529 if (sb->s_bdev) 1529 if (sb->s_bdev)
diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
index 841a032050a7..5e596583946c 100644
--- a/fs/ecryptfs/dentry.c
+++ b/fs/ecryptfs/dentry.c
@@ -80,8 +80,8 @@ static void ecryptfs_d_release(struct dentry *dentry)
80{ 80{
81 if (ecryptfs_dentry_to_private(dentry)) { 81 if (ecryptfs_dentry_to_private(dentry)) {
82 if (ecryptfs_dentry_to_lower(dentry)) { 82 if (ecryptfs_dentry_to_lower(dentry)) {
83 mntput(ecryptfs_dentry_to_lower_mnt(dentry));
84 dput(ecryptfs_dentry_to_lower(dentry)); 83 dput(ecryptfs_dentry_to_lower(dentry));
84 mntput(ecryptfs_dentry_to_lower_mnt(dentry));
85 } 85 }
86 kmem_cache_free(ecryptfs_dentry_info_cache, 86 kmem_cache_free(ecryptfs_dentry_info_cache,
87 ecryptfs_dentry_to_private(dentry)); 87 ecryptfs_dentry_to_private(dentry));
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index dc74b186145d..6df1debdccce 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -263,52 +263,102 @@ out:
263 return 0; 263 return 0;
264} 264}
265 265
266/* This function must zero any hole we create */ 266/**
267 * ecryptfs_prepare_write
268 * @file: The eCryptfs file
269 * @page: The eCryptfs page
270 * @from: The start byte from which we will write
271 * @to: The end byte to which we will write
272 *
273 * This function must zero any hole we create
274 *
275 * Returns zero on success; non-zero otherwise
276 */
267static int ecryptfs_prepare_write(struct file *file, struct page *page, 277static int ecryptfs_prepare_write(struct file *file, struct page *page,
268 unsigned from, unsigned to) 278 unsigned from, unsigned to)
269{ 279{
270 int rc = 0;
271 loff_t prev_page_end_size; 280 loff_t prev_page_end_size;
281 int rc = 0;
272 282
273 if (!PageUptodate(page)) { 283 if (!PageUptodate(page)) {
274 rc = ecryptfs_read_lower_page_segment(page, page->index, 0, 284 struct ecryptfs_crypt_stat *crypt_stat =
275 PAGE_CACHE_SIZE, 285 &ecryptfs_inode_to_private(
276 page->mapping->host); 286 file->f_path.dentry->d_inode)->crypt_stat;
277 if (rc) { 287
278 printk(KERN_ERR "%s: Error attemping to read lower " 288 if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)
279 "page segment; rc = [%d]\n", __FUNCTION__, rc); 289 || (crypt_stat->flags & ECRYPTFS_NEW_FILE)) {
280 ClearPageUptodate(page); 290 rc = ecryptfs_read_lower_page_segment(
281 goto out; 291 page, page->index, 0, PAGE_CACHE_SIZE,
282 } else 292 page->mapping->host);
293 if (rc) {
294 printk(KERN_ERR "%s: Error attemping to read "
295 "lower page segment; rc = [%d]\n",
296 __FUNCTION__, rc);
297 ClearPageUptodate(page);
298 goto out;
299 } else
300 SetPageUptodate(page);
301 } else if (crypt_stat->flags & ECRYPTFS_VIEW_AS_ENCRYPTED) {
302 if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) {
303 rc = ecryptfs_copy_up_encrypted_with_header(
304 page, crypt_stat);
305 if (rc) {
306 printk(KERN_ERR "%s: Error attempting "
307 "to copy the encrypted content "
308 "from the lower file whilst "
309 "inserting the metadata from "
310 "the xattr into the header; rc "
311 "= [%d]\n", __FUNCTION__, rc);
312 ClearPageUptodate(page);
313 goto out;
314 }
315 SetPageUptodate(page);
316 } else {
317 rc = ecryptfs_read_lower_page_segment(
318 page, page->index, 0, PAGE_CACHE_SIZE,
319 page->mapping->host);
320 if (rc) {
321 printk(KERN_ERR "%s: Error reading "
322 "page; rc = [%d]\n",
323 __FUNCTION__, rc);
324 ClearPageUptodate(page);
325 goto out;
326 }
327 SetPageUptodate(page);
328 }
329 } else {
330 rc = ecryptfs_decrypt_page(page);
331 if (rc) {
332 printk(KERN_ERR "%s: Error decrypting page "
333 "at index [%ld]; rc = [%d]\n",
334 __FUNCTION__, page->index, rc);
335 ClearPageUptodate(page);
336 goto out;
337 }
283 SetPageUptodate(page); 338 SetPageUptodate(page);
339 }
284 } 340 }
285
286 prev_page_end_size = ((loff_t)page->index << PAGE_CACHE_SHIFT); 341 prev_page_end_size = ((loff_t)page->index << PAGE_CACHE_SHIFT);
287 342 /* If creating a page or more of holes, zero them out via truncate.
288 /* 343 * Note, this will increase i_size. */
289 * If creating a page or more of holes, zero them out via truncate.
290 * Note, this will increase i_size.
291 */
292 if (page->index != 0) { 344 if (page->index != 0) {
293 if (prev_page_end_size > i_size_read(page->mapping->host)) { 345 if (prev_page_end_size > i_size_read(page->mapping->host)) {
294 rc = ecryptfs_truncate(file->f_path.dentry, 346 rc = ecryptfs_truncate(file->f_path.dentry,
295 prev_page_end_size); 347 prev_page_end_size);
296 if (rc) { 348 if (rc) {
297 printk(KERN_ERR "Error on attempt to " 349 printk(KERN_ERR "%s: Error on attempt to "
298 "truncate to (higher) offset [%lld];" 350 "truncate to (higher) offset [%lld];"
299 " rc = [%d]\n", prev_page_end_size, rc); 351 " rc = [%d]\n", __FUNCTION__,
352 prev_page_end_size, rc);
300 goto out; 353 goto out;
301 } 354 }
302 } 355 }
303 } 356 }
304 /* 357 /* Writing to a new page, and creating a small hole from start
305 * Writing to a new page, and creating a small hole from start of page? 358 * of page? Zero it out. */
306 * Zero it out. 359 if ((i_size_read(page->mapping->host) == prev_page_end_size)
307 */ 360 && (from != 0))
308 if ((i_size_read(page->mapping->host) == prev_page_end_size) &&
309 (from != 0)) {
310 zero_user(page, 0, PAGE_CACHE_SIZE); 361 zero_user(page, 0, PAGE_CACHE_SIZE);
311 }
312out: 362out:
313 return rc; 363 return rc;
314} 364}
diff --git a/fs/efs/dir.c b/fs/efs/dir.c
index dfb5cb400217..49308a29798a 100644
--- a/fs/efs/dir.c
+++ b/fs/efs/dir.c
@@ -5,8 +5,8 @@
5 */ 5 */
6 6
7#include <linux/buffer_head.h> 7#include <linux/buffer_head.h>
8#include <linux/efs_fs.h>
9#include <linux/smp_lock.h> 8#include <linux/smp_lock.h>
9#include "efs.h"
10 10
11static int efs_readdir(struct file *, void *, filldir_t); 11static int efs_readdir(struct file *, void *, filldir_t);
12 12
diff --git a/fs/efs/efs.h b/fs/efs/efs.h
new file mode 100644
index 000000000000..d8305b582ab0
--- /dev/null
+++ b/fs/efs/efs.h
@@ -0,0 +1,140 @@
1/*
2 * Copyright (c) 1999 Al Smith
3 *
4 * Portions derived from work (c) 1995,1996 Christian Vogelgsang.
5 * Portions derived from IRIX header files (c) 1988 Silicon Graphics
6 */
7#ifndef _EFS_EFS_H_
8#define _EFS_EFS_H_
9
10#include <linux/fs.h>
11#include <asm/uaccess.h>
12
13#define EFS_VERSION "1.0a"
14
15static const char cprt[] = "EFS: "EFS_VERSION" - (c) 1999 Al Smith <Al.Smith@aeschi.ch.eu.org>";
16
17
18/* 1 block is 512 bytes */
19#define EFS_BLOCKSIZE_BITS 9
20#define EFS_BLOCKSIZE (1 << EFS_BLOCKSIZE_BITS)
21
22typedef int32_t efs_block_t;
23typedef uint32_t efs_ino_t;
24
25#define EFS_DIRECTEXTENTS 12
26
27/*
28 * layout of an extent, in memory and on disk. 8 bytes exactly.
29 */
30typedef union extent_u {
31 unsigned char raw[8];
32 struct extent_s {
33 unsigned int ex_magic:8; /* magic # (zero) */
34 unsigned int ex_bn:24; /* basic block */
35 unsigned int ex_length:8; /* numblocks in this extent */
36 unsigned int ex_offset:24; /* logical offset into file */
37 } cooked;
38} efs_extent;
39
40typedef struct edevs {
41 __be16 odev;
42 __be32 ndev;
43} efs_devs;
44
45/*
46 * extent based filesystem inode as it appears on disk. The efs inode
47 * is exactly 128 bytes long.
48 */
49struct efs_dinode {
50 __be16 di_mode; /* mode and type of file */
51 __be16 di_nlink; /* number of links to file */
52 __be16 di_uid; /* owner's user id */
53 __be16 di_gid; /* owner's group id */
54 __be32 di_size; /* number of bytes in file */
55 __be32 di_atime; /* time last accessed */
56 __be32 di_mtime; /* time last modified */
57 __be32 di_ctime; /* time created */
58 __be32 di_gen; /* generation number */
59 __be16 di_numextents; /* # of extents */
60 u_char di_version; /* version of inode */
61 u_char di_spare; /* spare - used by AFS */
62 union di_addr {
63 efs_extent di_extents[EFS_DIRECTEXTENTS];
64 efs_devs di_dev; /* device for IFCHR/IFBLK */
65 } di_u;
66};
67
68/* efs inode storage in memory */
69struct efs_inode_info {
70 int numextents;
71 int lastextent;
72
73 efs_extent extents[EFS_DIRECTEXTENTS];
74 struct inode vfs_inode;
75};
76
77#include <linux/efs_fs_sb.h>
78
79#define EFS_DIRBSIZE_BITS EFS_BLOCKSIZE_BITS
80#define EFS_DIRBSIZE (1 << EFS_DIRBSIZE_BITS)
81
82struct efs_dentry {
83 __be32 inode;
84 unsigned char namelen;
85 char name[3];
86};
87
88#define EFS_DENTSIZE (sizeof(struct efs_dentry) - 3 + 1)
89#define EFS_MAXNAMELEN ((1 << (sizeof(char) * 8)) - 1)
90
91#define EFS_DIRBLK_HEADERSIZE 4
92#define EFS_DIRBLK_MAGIC 0xbeef /* moo */
93
94struct efs_dir {
95 __be16 magic;
96 unsigned char firstused;
97 unsigned char slots;
98
99 unsigned char space[EFS_DIRBSIZE - EFS_DIRBLK_HEADERSIZE];
100};
101
102#define EFS_MAXENTS \
103 ((EFS_DIRBSIZE - EFS_DIRBLK_HEADERSIZE) / \
104 (EFS_DENTSIZE + sizeof(char)))
105
106#define EFS_SLOTAT(dir, slot) EFS_REALOFF((dir)->space[slot])
107
108#define EFS_REALOFF(offset) ((offset << 1))
109
110
111static inline struct efs_inode_info *INODE_INFO(struct inode *inode)
112{
113 return container_of(inode, struct efs_inode_info, vfs_inode);
114}
115
116static inline struct efs_sb_info *SUPER_INFO(struct super_block *sb)
117{
118 return sb->s_fs_info;
119}
120
121struct statfs;
122struct fid;
123
124extern const struct inode_operations efs_dir_inode_operations;
125extern const struct file_operations efs_dir_operations;
126extern const struct address_space_operations efs_symlink_aops;
127
128extern struct inode *efs_iget(struct super_block *, unsigned long);
129extern efs_block_t efs_map_block(struct inode *, efs_block_t);
130extern int efs_get_block(struct inode *, sector_t, struct buffer_head *, int);
131
132extern struct dentry *efs_lookup(struct inode *, struct dentry *, struct nameidata *);
133extern struct dentry *efs_fh_to_dentry(struct super_block *sb, struct fid *fid,
134 int fh_len, int fh_type);
135extern struct dentry *efs_fh_to_parent(struct super_block *sb, struct fid *fid,
136 int fh_len, int fh_type);
137extern struct dentry *efs_get_parent(struct dentry *);
138extern int efs_bmap(struct inode *, int);
139
140#endif /* _EFS_EFS_H_ */
diff --git a/fs/efs/file.c b/fs/efs/file.c
index 5db20129681e..1ccb364ffa63 100644
--- a/fs/efs/file.c
+++ b/fs/efs/file.c
@@ -7,7 +7,7 @@
7 */ 7 */
8 8
9#include <linux/buffer_head.h> 9#include <linux/buffer_head.h>
10#include <linux/efs_fs.h> 10#include "efs.h"
11 11
12int efs_get_block(struct inode *inode, sector_t iblock, 12int efs_get_block(struct inode *inode, sector_t iblock,
13 struct buffer_head *bh_result, int create) 13 struct buffer_head *bh_result, int create)
diff --git a/fs/efs/inode.c b/fs/efs/inode.c
index 627c3026946d..a8e7797b9477 100644
--- a/fs/efs/inode.c
+++ b/fs/efs/inode.c
@@ -7,11 +7,11 @@
7 * and from work (c) 1998 Mike Shaver. 7 * and from work (c) 1998 Mike Shaver.
8 */ 8 */
9 9
10#include <linux/efs_fs.h>
11#include <linux/efs_fs_sb.h>
12#include <linux/buffer_head.h> 10#include <linux/buffer_head.h>
13#include <linux/module.h> 11#include <linux/module.h>
14#include <linux/fs.h> 12#include <linux/fs.h>
13#include "efs.h"
14#include <linux/efs_fs_sb.h>
15 15
16static int efs_readpage(struct file *file, struct page *page) 16static int efs_readpage(struct file *file, struct page *page)
17{ 17{
@@ -140,7 +140,7 @@ struct inode *efs_iget(struct super_block *super, unsigned long ino)
140 brelse(bh); 140 brelse(bh);
141 141
142#ifdef DEBUG 142#ifdef DEBUG
143 printk(KERN_DEBUG "EFS: read_inode(): inode %lu, extents %d, mode %o\n", 143 printk(KERN_DEBUG "EFS: efs_iget(): inode %lu, extents %d, mode %o\n",
144 inode->i_ino, in->numextents, inode->i_mode); 144 inode->i_ino, in->numextents, inode->i_mode);
145#endif 145#endif
146 146
diff --git a/fs/efs/namei.c b/fs/efs/namei.c
index e26704742d41..3a404e7fad53 100644
--- a/fs/efs/namei.c
+++ b/fs/efs/namei.c
@@ -8,9 +8,9 @@
8 8
9#include <linux/buffer_head.h> 9#include <linux/buffer_head.h>
10#include <linux/string.h> 10#include <linux/string.h>
11#include <linux/efs_fs.h>
12#include <linux/smp_lock.h> 11#include <linux/smp_lock.h>
13#include <linux/exportfs.h> 12#include <linux/exportfs.h>
13#include "efs.h"
14 14
15 15
16static efs_ino_t efs_find_entry(struct inode *inode, const char *name, int len) { 16static efs_ino_t efs_find_entry(struct inode *inode, const char *name, int len) {
diff --git a/fs/efs/super.c b/fs/efs/super.c
index 14082405cdd1..d733531b55e2 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -8,14 +8,15 @@
8 8
9#include <linux/init.h> 9#include <linux/init.h>
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/efs_fs.h>
12#include <linux/efs_vh.h>
13#include <linux/efs_fs_sb.h>
14#include <linux/exportfs.h> 11#include <linux/exportfs.h>
15#include <linux/slab.h> 12#include <linux/slab.h>
16#include <linux/buffer_head.h> 13#include <linux/buffer_head.h>
17#include <linux/vfs.h> 14#include <linux/vfs.h>
18 15
16#include "efs.h"
17#include <linux/efs_vh.h>
18#include <linux/efs_fs_sb.h>
19
19static int efs_statfs(struct dentry *dentry, struct kstatfs *buf); 20static int efs_statfs(struct dentry *dentry, struct kstatfs *buf);
20static int efs_fill_super(struct super_block *s, void *d, int silent); 21static int efs_fill_super(struct super_block *s, void *d, int silent);
21 22
diff --git a/fs/efs/symlink.c b/fs/efs/symlink.c
index 1d30d2ff440f..41911ec83aaf 100644
--- a/fs/efs/symlink.c
+++ b/fs/efs/symlink.c
@@ -7,10 +7,10 @@
7 */ 7 */
8 8
9#include <linux/string.h> 9#include <linux/string.h>
10#include <linux/efs_fs.h>
11#include <linux/pagemap.h> 10#include <linux/pagemap.h>
12#include <linux/buffer_head.h> 11#include <linux/buffer_head.h>
13#include <linux/smp_lock.h> 12#include <linux/smp_lock.h>
13#include "efs.h"
14 14
15static int efs_symlink_readpage(struct file *file, struct page *page) 15static int efs_symlink_readpage(struct file *file, struct page *page)
16{ 16{
diff --git a/fs/exec.c b/fs/exec.c
index a44b142fb460..54a0a557b678 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -173,8 +173,15 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
173 return NULL; 173 return NULL;
174 174
175 if (write) { 175 if (write) {
176 struct rlimit *rlim = current->signal->rlim;
177 unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start; 176 unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start;
177 struct rlimit *rlim;
178
179 /*
180 * We've historically supported up to 32 pages (ARG_MAX)
181 * of argument strings even with small stacks
182 */
183 if (size <= ARG_MAX)
184 return page;
178 185
179 /* 186 /*
180 * Limit to 1/4-th the stack size for the argv+env strings. 187 * Limit to 1/4-th the stack size for the argv+env strings.
@@ -183,6 +190,7 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
183 * - the program will have a reasonable amount of stack left 190 * - the program will have a reasonable amount of stack left
184 * to work from. 191 * to work from.
185 */ 192 */
193 rlim = current->signal->rlim;
186 if (size > rlim[RLIMIT_STACK].rlim_cur / 4) { 194 if (size > rlim[RLIMIT_STACK].rlim_cur / 4) {
187 put_page(page); 195 put_page(page);
188 return NULL; 196 return NULL;
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index 5deb8b74e649..08f647d8188d 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -253,7 +253,7 @@ static int find_group_dir(struct super_block *sb, struct inode *parent)
253 * it has too few free inodes left (min_inodes) or 253 * it has too few free inodes left (min_inodes) or
254 * it has too few free blocks left (min_blocks) or 254 * it has too few free blocks left (min_blocks) or
255 * it's already running too large debt (max_debt). 255 * it's already running too large debt (max_debt).
256 * Parent's group is prefered, if it doesn't satisfy these 256 * Parent's group is preferred, if it doesn't satisfy these
257 * conditions we search cyclically through the rest. If none 257 * conditions we search cyclically through the rest. If none
258 * of the groups look good we just look for a group with more 258 * of the groups look good we just look for a group with more
259 * free inodes than average (starting at parent's group). 259 * free inodes than average (starting at parent's group).
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index c62006805427..b8a2990bab83 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -239,7 +239,7 @@ no_block:
239 * @inode: owner 239 * @inode: owner
240 * @ind: descriptor of indirect block. 240 * @ind: descriptor of indirect block.
241 * 241 *
242 * This function returns the prefered place for block allocation. 242 * This function returns the preferred place for block allocation.
243 * It is used when heuristic for sequential allocation fails. 243 * It is used when heuristic for sequential allocation fails.
244 * Rules are: 244 * Rules are:
245 * + if there is a block to the left of our position - allocate near it. 245 * + if there is a block to the left of our position - allocate near it.
@@ -283,7 +283,7 @@ static unsigned long ext2_find_near(struct inode *inode, Indirect *ind)
283} 283}
284 284
285/** 285/**
286 * ext2_find_goal - find a prefered place for allocation. 286 * ext2_find_goal - find a preferred place for allocation.
287 * @inode: owner 287 * @inode: owner
288 * @block: block we want 288 * @block: block we want
289 * @partial: pointer to the last triple within a chain 289 * @partial: pointer to the last triple within a chain
diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c
index b8ea11fee5c6..de876fa793e1 100644
--- a/fs/ext2/ioctl.c
+++ b/fs/ext2/ioctl.c
@@ -12,6 +12,7 @@
12#include <linux/time.h> 12#include <linux/time.h>
13#include <linux/sched.h> 13#include <linux/sched.h>
14#include <linux/compat.h> 14#include <linux/compat.h>
15#include <linux/mount.h>
15#include <linux/smp_lock.h> 16#include <linux/smp_lock.h>
16#include <asm/current.h> 17#include <asm/current.h>
17#include <asm/uaccess.h> 18#include <asm/uaccess.h>
@@ -23,6 +24,7 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
23 struct ext2_inode_info *ei = EXT2_I(inode); 24 struct ext2_inode_info *ei = EXT2_I(inode);
24 unsigned int flags; 25 unsigned int flags;
25 unsigned short rsv_window_size; 26 unsigned short rsv_window_size;
27 int ret;
26 28
27 ext2_debug ("cmd = %u, arg = %lu\n", cmd, arg); 29 ext2_debug ("cmd = %u, arg = %lu\n", cmd, arg);
28 30
@@ -34,14 +36,19 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
34 case EXT2_IOC_SETFLAGS: { 36 case EXT2_IOC_SETFLAGS: {
35 unsigned int oldflags; 37 unsigned int oldflags;
36 38
37 if (IS_RDONLY(inode)) 39 ret = mnt_want_write(filp->f_path.mnt);
38 return -EROFS; 40 if (ret)
41 return ret;
39 42
40 if (!is_owner_or_cap(inode)) 43 if (!is_owner_or_cap(inode)) {
41 return -EACCES; 44 ret = -EACCES;
45 goto setflags_out;
46 }
42 47
43 if (get_user(flags, (int __user *) arg)) 48 if (get_user(flags, (int __user *) arg)) {
44 return -EFAULT; 49 ret = -EFAULT;
50 goto setflags_out;
51 }
45 52
46 if (!S_ISDIR(inode->i_mode)) 53 if (!S_ISDIR(inode->i_mode))
47 flags &= ~EXT2_DIRSYNC_FL; 54 flags &= ~EXT2_DIRSYNC_FL;
@@ -50,7 +57,8 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
50 /* Is it quota file? Do not allow user to mess with it */ 57 /* Is it quota file? Do not allow user to mess with it */
51 if (IS_NOQUOTA(inode)) { 58 if (IS_NOQUOTA(inode)) {
52 mutex_unlock(&inode->i_mutex); 59 mutex_unlock(&inode->i_mutex);
53 return -EPERM; 60 ret = -EPERM;
61 goto setflags_out;
54 } 62 }
55 oldflags = ei->i_flags; 63 oldflags = ei->i_flags;
56 64
@@ -63,7 +71,8 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
63 if ((flags ^ oldflags) & (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL)) { 71 if ((flags ^ oldflags) & (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL)) {
64 if (!capable(CAP_LINUX_IMMUTABLE)) { 72 if (!capable(CAP_LINUX_IMMUTABLE)) {
65 mutex_unlock(&inode->i_mutex); 73 mutex_unlock(&inode->i_mutex);
66 return -EPERM; 74 ret = -EPERM;
75 goto setflags_out;
67 } 76 }
68 } 77 }
69 78
@@ -75,20 +84,26 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
75 ext2_set_inode_flags(inode); 84 ext2_set_inode_flags(inode);
76 inode->i_ctime = CURRENT_TIME_SEC; 85 inode->i_ctime = CURRENT_TIME_SEC;
77 mark_inode_dirty(inode); 86 mark_inode_dirty(inode);
78 return 0; 87setflags_out:
88 mnt_drop_write(filp->f_path.mnt);
89 return ret;
79 } 90 }
80 case EXT2_IOC_GETVERSION: 91 case EXT2_IOC_GETVERSION:
81 return put_user(inode->i_generation, (int __user *) arg); 92 return put_user(inode->i_generation, (int __user *) arg);
82 case EXT2_IOC_SETVERSION: 93 case EXT2_IOC_SETVERSION:
83 if (!is_owner_or_cap(inode)) 94 if (!is_owner_or_cap(inode))
84 return -EPERM; 95 return -EPERM;
85 if (IS_RDONLY(inode)) 96 ret = mnt_want_write(filp->f_path.mnt);
86 return -EROFS; 97 if (ret)
87 if (get_user(inode->i_generation, (int __user *) arg)) 98 return ret;
88 return -EFAULT; 99 if (get_user(inode->i_generation, (int __user *) arg)) {
89 inode->i_ctime = CURRENT_TIME_SEC; 100 ret = -EFAULT;
90 mark_inode_dirty(inode); 101 } else {
91 return 0; 102 inode->i_ctime = CURRENT_TIME_SEC;
103 mark_inode_dirty(inode);
104 }
105 mnt_drop_write(filp->f_path.mnt);
106 return ret;
92 case EXT2_IOC_GETRSVSZ: 107 case EXT2_IOC_GETRSVSZ:
93 if (test_opt(inode->i_sb, RESERVATION) 108 if (test_opt(inode->i_sb, RESERVATION)
94 && S_ISREG(inode->i_mode) 109 && S_ISREG(inode->i_mode)
@@ -102,15 +117,16 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
102 if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode)) 117 if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode))
103 return -ENOTTY; 118 return -ENOTTY;
104 119
105 if (IS_RDONLY(inode)) 120 if (!is_owner_or_cap(inode))
106 return -EROFS;
107
108 if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
109 return -EACCES; 121 return -EACCES;
110 122
111 if (get_user(rsv_window_size, (int __user *)arg)) 123 if (get_user(rsv_window_size, (int __user *)arg))
112 return -EFAULT; 124 return -EFAULT;
113 125
126 ret = mnt_want_write(filp->f_path.mnt);
127 if (ret)
128 return ret;
129
114 if (rsv_window_size > EXT2_MAX_RESERVE_BLOCKS) 130 if (rsv_window_size > EXT2_MAX_RESERVE_BLOCKS)
115 rsv_window_size = EXT2_MAX_RESERVE_BLOCKS; 131 rsv_window_size = EXT2_MAX_RESERVE_BLOCKS;
116 132
@@ -131,6 +147,7 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
131 rsv->rsv_goal_size = rsv_window_size; 147 rsv->rsv_goal_size = rsv_window_size;
132 } 148 }
133 mutex_unlock(&ei->truncate_mutex); 149 mutex_unlock(&ei->truncate_mutex);
150 mnt_drop_write(filp->f_path.mnt);
134 return 0; 151 return 0;
135 } 152 }
136 default: 153 default:
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index 3e8683dbb13f..a99d46f3b26e 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -835,7 +835,7 @@ ext2_xattr_cache_insert(struct buffer_head *bh)
835 struct mb_cache_entry *ce; 835 struct mb_cache_entry *ce;
836 int error; 836 int error;
837 837
838 ce = mb_cache_entry_alloc(ext2_xattr_cache); 838 ce = mb_cache_entry_alloc(ext2_xattr_cache, GFP_NOFS);
839 if (!ce) 839 if (!ce)
840 return -ENOMEM; 840 return -ENOMEM;
841 error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, &hash); 841 error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, &hash);
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index d34e9967430a..a754d1848173 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -37,7 +37,7 @@ ext3_acl_from_disk(const void *value, size_t size)
37 return ERR_PTR(-EINVAL); 37 return ERR_PTR(-EINVAL);
38 if (count == 0) 38 if (count == 0)
39 return NULL; 39 return NULL;
40 acl = posix_acl_alloc(count, GFP_KERNEL); 40 acl = posix_acl_alloc(count, GFP_NOFS);
41 if (!acl) 41 if (!acl)
42 return ERR_PTR(-ENOMEM); 42 return ERR_PTR(-ENOMEM);
43 for (n=0; n < count; n++) { 43 for (n=0; n < count; n++) {
@@ -91,7 +91,7 @@ ext3_acl_to_disk(const struct posix_acl *acl, size_t *size)
91 91
92 *size = ext3_acl_size(acl->a_count); 92 *size = ext3_acl_size(acl->a_count);
93 ext_acl = kmalloc(sizeof(ext3_acl_header) + acl->a_count * 93 ext_acl = kmalloc(sizeof(ext3_acl_header) + acl->a_count *
94 sizeof(ext3_acl_entry), GFP_KERNEL); 94 sizeof(ext3_acl_entry), GFP_NOFS);
95 if (!ext_acl) 95 if (!ext_acl)
96 return ERR_PTR(-ENOMEM); 96 return ERR_PTR(-ENOMEM);
97 ext_acl->a_version = cpu_to_le32(EXT3_ACL_VERSION); 97 ext_acl->a_version = cpu_to_le32(EXT3_ACL_VERSION);
@@ -187,7 +187,7 @@ ext3_get_acl(struct inode *inode, int type)
187 } 187 }
188 retval = ext3_xattr_get(inode, name_index, "", NULL, 0); 188 retval = ext3_xattr_get(inode, name_index, "", NULL, 0);
189 if (retval > 0) { 189 if (retval > 0) {
190 value = kmalloc(retval, GFP_KERNEL); 190 value = kmalloc(retval, GFP_NOFS);
191 if (!value) 191 if (!value)
192 return ERR_PTR(-ENOMEM); 192 return ERR_PTR(-ENOMEM);
193 retval = ext3_xattr_get(inode, name_index, "", value, retval); 193 retval = ext3_xattr_get(inode, name_index, "", value, retval);
@@ -335,7 +335,7 @@ ext3_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
335 if (error) 335 if (error)
336 goto cleanup; 336 goto cleanup;
337 } 337 }
338 clone = posix_acl_clone(acl, GFP_KERNEL); 338 clone = posix_acl_clone(acl, GFP_NOFS);
339 error = -ENOMEM; 339 error = -ENOMEM;
340 if (!clone) 340 if (!clone)
341 goto cleanup; 341 goto cleanup;
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 4f4020c54683..96dd5573e49b 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -239,7 +239,7 @@ static int find_group_dir(struct super_block *sb, struct inode *parent)
239 * it has too few free inodes left (min_inodes) or 239 * it has too few free inodes left (min_inodes) or
240 * it has too few free blocks left (min_blocks) or 240 * it has too few free blocks left (min_blocks) or
241 * it's already running too large debt (max_debt). 241 * it's already running too large debt (max_debt).
242 * Parent's group is prefered, if it doesn't satisfy these 242 * Parent's group is preferred, if it doesn't satisfy these
243 * conditions we search cyclically through the rest. If none 243 * conditions we search cyclically through the rest. If none
244 * of the groups look good we just look for a group with more 244 * of the groups look good we just look for a group with more
245 * free inodes than average (starting at parent's group). 245 * free inodes than average (starting at parent's group).
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index eb95670a27eb..c683609b0e3a 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -392,7 +392,7 @@ no_block:
392 * @inode: owner 392 * @inode: owner
393 * @ind: descriptor of indirect block. 393 * @ind: descriptor of indirect block.
394 * 394 *
395 * This function returns the prefered place for block allocation. 395 * This function returns the preferred place for block allocation.
396 * It is used when heuristic for sequential allocation fails. 396 * It is used when heuristic for sequential allocation fails.
397 * Rules are: 397 * Rules are:
398 * + if there is a block to the left of our position - allocate near it. 398 * + if there is a block to the left of our position - allocate near it.
@@ -436,12 +436,12 @@ static ext3_fsblk_t ext3_find_near(struct inode *inode, Indirect *ind)
436} 436}
437 437
438/** 438/**
439 * ext3_find_goal - find a prefered place for allocation. 439 * ext3_find_goal - find a preferred place for allocation.
440 * @inode: owner 440 * @inode: owner
441 * @block: block we want 441 * @block: block we want
442 * @partial: pointer to the last triple within a chain 442 * @partial: pointer to the last triple within a chain
443 * 443 *
444 * Normally this function find the prefered place for block allocation, 444 * Normally this function find the preferred place for block allocation,
445 * returns it. 445 * returns it.
446 */ 446 */
447 447
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c
index 023a070f55f1..0d0c70151642 100644
--- a/fs/ext3/ioctl.c
+++ b/fs/ext3/ioctl.c
@@ -12,6 +12,7 @@
12#include <linux/capability.h> 12#include <linux/capability.h>
13#include <linux/ext3_fs.h> 13#include <linux/ext3_fs.h>
14#include <linux/ext3_jbd.h> 14#include <linux/ext3_jbd.h>
15#include <linux/mount.h>
15#include <linux/time.h> 16#include <linux/time.h>
16#include <linux/compat.h> 17#include <linux/compat.h>
17#include <linux/smp_lock.h> 18#include <linux/smp_lock.h>
@@ -38,14 +39,19 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
38 unsigned int oldflags; 39 unsigned int oldflags;
39 unsigned int jflag; 40 unsigned int jflag;
40 41
41 if (IS_RDONLY(inode)) 42 err = mnt_want_write(filp->f_path.mnt);
42 return -EROFS; 43 if (err)
44 return err;
43 45
44 if (!is_owner_or_cap(inode)) 46 if (!is_owner_or_cap(inode)) {
45 return -EACCES; 47 err = -EACCES;
48 goto flags_out;
49 }
46 50
47 if (get_user(flags, (int __user *) arg)) 51 if (get_user(flags, (int __user *) arg)) {
48 return -EFAULT; 52 err = -EFAULT;
53 goto flags_out;
54 }
49 55
50 if (!S_ISDIR(inode->i_mode)) 56 if (!S_ISDIR(inode->i_mode))
51 flags &= ~EXT3_DIRSYNC_FL; 57 flags &= ~EXT3_DIRSYNC_FL;
@@ -54,7 +60,8 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
54 /* Is it quota file? Do not allow user to mess with it */ 60 /* Is it quota file? Do not allow user to mess with it */
55 if (IS_NOQUOTA(inode)) { 61 if (IS_NOQUOTA(inode)) {
56 mutex_unlock(&inode->i_mutex); 62 mutex_unlock(&inode->i_mutex);
57 return -EPERM; 63 err = -EPERM;
64 goto flags_out;
58 } 65 }
59 oldflags = ei->i_flags; 66 oldflags = ei->i_flags;
60 67
@@ -70,7 +77,8 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
70 if ((flags ^ oldflags) & (EXT3_APPEND_FL | EXT3_IMMUTABLE_FL)) { 77 if ((flags ^ oldflags) & (EXT3_APPEND_FL | EXT3_IMMUTABLE_FL)) {
71 if (!capable(CAP_LINUX_IMMUTABLE)) { 78 if (!capable(CAP_LINUX_IMMUTABLE)) {
72 mutex_unlock(&inode->i_mutex); 79 mutex_unlock(&inode->i_mutex);
73 return -EPERM; 80 err = -EPERM;
81 goto flags_out;
74 } 82 }
75 } 83 }
76 84
@@ -81,7 +89,8 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
81 if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) { 89 if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) {
82 if (!capable(CAP_SYS_RESOURCE)) { 90 if (!capable(CAP_SYS_RESOURCE)) {
83 mutex_unlock(&inode->i_mutex); 91 mutex_unlock(&inode->i_mutex);
84 return -EPERM; 92 err = -EPERM;
93 goto flags_out;
85 } 94 }
86 } 95 }
87 96
@@ -89,7 +98,8 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
89 handle = ext3_journal_start(inode, 1); 98 handle = ext3_journal_start(inode, 1);
90 if (IS_ERR(handle)) { 99 if (IS_ERR(handle)) {
91 mutex_unlock(&inode->i_mutex); 100 mutex_unlock(&inode->i_mutex);
92 return PTR_ERR(handle); 101 err = PTR_ERR(handle);
102 goto flags_out;
93 } 103 }
94 if (IS_SYNC(inode)) 104 if (IS_SYNC(inode))
95 handle->h_sync = 1; 105 handle->h_sync = 1;
@@ -115,6 +125,8 @@ flags_err:
115 if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) 125 if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL))
116 err = ext3_change_inode_journal_flag(inode, jflag); 126 err = ext3_change_inode_journal_flag(inode, jflag);
117 mutex_unlock(&inode->i_mutex); 127 mutex_unlock(&inode->i_mutex);
128flags_out:
129 mnt_drop_write(filp->f_path.mnt);
118 return err; 130 return err;
119 } 131 }
120 case EXT3_IOC_GETVERSION: 132 case EXT3_IOC_GETVERSION:
@@ -129,14 +141,18 @@ flags_err:
129 141
130 if (!is_owner_or_cap(inode)) 142 if (!is_owner_or_cap(inode))
131 return -EPERM; 143 return -EPERM;
132 if (IS_RDONLY(inode)) 144 err = mnt_want_write(filp->f_path.mnt);
133 return -EROFS; 145 if (err)
134 if (get_user(generation, (int __user *) arg)) 146 return err;
135 return -EFAULT; 147 if (get_user(generation, (int __user *) arg)) {
136 148 err = -EFAULT;
149 goto setversion_out;
150 }
137 handle = ext3_journal_start(inode, 1); 151 handle = ext3_journal_start(inode, 1);
138 if (IS_ERR(handle)) 152 if (IS_ERR(handle)) {
139 return PTR_ERR(handle); 153 err = PTR_ERR(handle);
154 goto setversion_out;
155 }
140 err = ext3_reserve_inode_write(handle, inode, &iloc); 156 err = ext3_reserve_inode_write(handle, inode, &iloc);
141 if (err == 0) { 157 if (err == 0) {
142 inode->i_ctime = CURRENT_TIME_SEC; 158 inode->i_ctime = CURRENT_TIME_SEC;
@@ -144,6 +160,8 @@ flags_err:
144 err = ext3_mark_iloc_dirty(handle, inode, &iloc); 160 err = ext3_mark_iloc_dirty(handle, inode, &iloc);
145 } 161 }
146 ext3_journal_stop(handle); 162 ext3_journal_stop(handle);
163setversion_out:
164 mnt_drop_write(filp->f_path.mnt);
147 return err; 165 return err;
148 } 166 }
149#ifdef CONFIG_JBD_DEBUG 167#ifdef CONFIG_JBD_DEBUG
@@ -179,18 +197,24 @@ flags_err:
179 } 197 }
180 return -ENOTTY; 198 return -ENOTTY;
181 case EXT3_IOC_SETRSVSZ: { 199 case EXT3_IOC_SETRSVSZ: {
200 int err;
182 201
183 if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode)) 202 if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode))
184 return -ENOTTY; 203 return -ENOTTY;
185 204
186 if (IS_RDONLY(inode)) 205 err = mnt_want_write(filp->f_path.mnt);
187 return -EROFS; 206 if (err)
207 return err;
188 208
189 if (!is_owner_or_cap(inode)) 209 if (!is_owner_or_cap(inode)) {
190 return -EACCES; 210 err = -EACCES;
211 goto setrsvsz_out;
212 }
191 213
192 if (get_user(rsv_window_size, (int __user *)arg)) 214 if (get_user(rsv_window_size, (int __user *)arg)) {
193 return -EFAULT; 215 err = -EFAULT;
216 goto setrsvsz_out;
217 }
194 218
195 if (rsv_window_size > EXT3_MAX_RESERVE_BLOCKS) 219 if (rsv_window_size > EXT3_MAX_RESERVE_BLOCKS)
196 rsv_window_size = EXT3_MAX_RESERVE_BLOCKS; 220 rsv_window_size = EXT3_MAX_RESERVE_BLOCKS;
@@ -208,7 +232,9 @@ flags_err:
208 rsv->rsv_goal_size = rsv_window_size; 232 rsv->rsv_goal_size = rsv_window_size;
209 } 233 }
210 mutex_unlock(&ei->truncate_mutex); 234 mutex_unlock(&ei->truncate_mutex);
211 return 0; 235setrsvsz_out:
236 mnt_drop_write(filp->f_path.mnt);
237 return err;
212 } 238 }
213 case EXT3_IOC_GROUP_EXTEND: { 239 case EXT3_IOC_GROUP_EXTEND: {
214 ext3_fsblk_t n_blocks_count; 240 ext3_fsblk_t n_blocks_count;
@@ -218,17 +244,20 @@ flags_err:
218 if (!capable(CAP_SYS_RESOURCE)) 244 if (!capable(CAP_SYS_RESOURCE))
219 return -EPERM; 245 return -EPERM;
220 246
221 if (IS_RDONLY(inode)) 247 err = mnt_want_write(filp->f_path.mnt);
222 return -EROFS; 248 if (err)
223 249 return err;
224 if (get_user(n_blocks_count, (__u32 __user *)arg))
225 return -EFAULT;
226 250
251 if (get_user(n_blocks_count, (__u32 __user *)arg)) {
252 err = -EFAULT;
253 goto group_extend_out;
254 }
227 err = ext3_group_extend(sb, EXT3_SB(sb)->s_es, n_blocks_count); 255 err = ext3_group_extend(sb, EXT3_SB(sb)->s_es, n_blocks_count);
228 journal_lock_updates(EXT3_SB(sb)->s_journal); 256 journal_lock_updates(EXT3_SB(sb)->s_journal);
229 journal_flush(EXT3_SB(sb)->s_journal); 257 journal_flush(EXT3_SB(sb)->s_journal);
230 journal_unlock_updates(EXT3_SB(sb)->s_journal); 258 journal_unlock_updates(EXT3_SB(sb)->s_journal);
231 259group_extend_out:
260 mnt_drop_write(filp->f_path.mnt);
232 return err; 261 return err;
233 } 262 }
234 case EXT3_IOC_GROUP_ADD: { 263 case EXT3_IOC_GROUP_ADD: {
@@ -239,18 +268,22 @@ flags_err:
239 if (!capable(CAP_SYS_RESOURCE)) 268 if (!capable(CAP_SYS_RESOURCE))
240 return -EPERM; 269 return -EPERM;
241 270
242 if (IS_RDONLY(inode)) 271 err = mnt_want_write(filp->f_path.mnt);
243 return -EROFS; 272 if (err)
273 return err;
244 274
245 if (copy_from_user(&input, (struct ext3_new_group_input __user *)arg, 275 if (copy_from_user(&input, (struct ext3_new_group_input __user *)arg,
246 sizeof(input))) 276 sizeof(input))) {
247 return -EFAULT; 277 err = -EFAULT;
278 goto group_add_out;
279 }
248 280
249 err = ext3_group_add(sb, &input); 281 err = ext3_group_add(sb, &input);
250 journal_lock_updates(EXT3_SB(sb)->s_journal); 282 journal_lock_updates(EXT3_SB(sb)->s_journal);
251 journal_flush(EXT3_SB(sb)->s_journal); 283 journal_flush(EXT3_SB(sb)->s_journal);
252 journal_unlock_updates(EXT3_SB(sb)->s_journal); 284 journal_unlock_updates(EXT3_SB(sb)->s_journal);
253 285group_add_out:
286 mnt_drop_write(filp->f_path.mnt);
254 return err; 287 return err;
255 } 288 }
256 289
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 9397d779c43d..0e97b6e07cb0 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -485,7 +485,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
485 goto exit_dindj; 485 goto exit_dindj;
486 486
487 n_group_desc = kmalloc((gdb_num + 1) * sizeof(struct buffer_head *), 487 n_group_desc = kmalloc((gdb_num + 1) * sizeof(struct buffer_head *),
488 GFP_KERNEL); 488 GFP_NOFS);
489 if (!n_group_desc) { 489 if (!n_group_desc) {
490 err = -ENOMEM; 490 err = -ENOMEM;
491 ext3_warning (sb, __FUNCTION__, 491 ext3_warning (sb, __FUNCTION__,
@@ -568,7 +568,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
568 int res, i; 568 int res, i;
569 int err; 569 int err;
570 570
571 primary = kmalloc(reserved_gdb * sizeof(*primary), GFP_KERNEL); 571 primary = kmalloc(reserved_gdb * sizeof(*primary), GFP_NOFS);
572 if (!primary) 572 if (!primary)
573 return -ENOMEM; 573 return -ENOMEM;
574 574
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 18769cc32377..ad5360664082 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -806,8 +806,8 @@ static match_table_t tokens = {
806 {Opt_quota, "quota"}, 806 {Opt_quota, "quota"},
807 {Opt_usrquota, "usrquota"}, 807 {Opt_usrquota, "usrquota"},
808 {Opt_barrier, "barrier=%u"}, 808 {Opt_barrier, "barrier=%u"},
809 {Opt_err, NULL},
810 {Opt_resize, "resize"}, 809 {Opt_resize, "resize"},
810 {Opt_err, NULL},
811}; 811};
812 812
813static ext3_fsblk_t get_sb_block(void **data) 813static ext3_fsblk_t get_sb_block(void **data)
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index fb89c299bece..42856541e9a5 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -728,7 +728,7 @@ ext3_xattr_block_set(handle_t *handle, struct inode *inode,
728 ce = NULL; 728 ce = NULL;
729 } 729 }
730 ea_bdebug(bs->bh, "cloning"); 730 ea_bdebug(bs->bh, "cloning");
731 s->base = kmalloc(bs->bh->b_size, GFP_KERNEL); 731 s->base = kmalloc(bs->bh->b_size, GFP_NOFS);
732 error = -ENOMEM; 732 error = -ENOMEM;
733 if (s->base == NULL) 733 if (s->base == NULL)
734 goto cleanup; 734 goto cleanup;
@@ -740,7 +740,7 @@ ext3_xattr_block_set(handle_t *handle, struct inode *inode,
740 } 740 }
741 } else { 741 } else {
742 /* Allocate a buffer where we construct the new block. */ 742 /* Allocate a buffer where we construct the new block. */
743 s->base = kzalloc(sb->s_blocksize, GFP_KERNEL); 743 s->base = kzalloc(sb->s_blocksize, GFP_NOFS);
744 /* assert(header == s->base) */ 744 /* assert(header == s->base) */
745 error = -ENOMEM; 745 error = -ENOMEM;
746 if (s->base == NULL) 746 if (s->base == NULL)
@@ -1126,7 +1126,7 @@ ext3_xattr_cache_insert(struct buffer_head *bh)
1126 struct mb_cache_entry *ce; 1126 struct mb_cache_entry *ce;
1127 int error; 1127 int error;
1128 1128
1129 ce = mb_cache_entry_alloc(ext3_xattr_cache); 1129 ce = mb_cache_entry_alloc(ext3_xattr_cache, GFP_NOFS);
1130 if (!ce) { 1130 if (!ce) {
1131 ea_bdebug(bh, "out of memory"); 1131 ea_bdebug(bh, "out of memory");
1132 return; 1132 return;
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 33888bb58144..2c23bade9aa6 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -46,7 +46,7 @@ const struct file_operations ext4_dir_operations = {
46#ifdef CONFIG_COMPAT 46#ifdef CONFIG_COMPAT
47 .compat_ioctl = ext4_compat_ioctl, 47 .compat_ioctl = ext4_compat_ioctl,
48#endif 48#endif
49 .fsync = ext4_sync_file, /* BKL held */ 49 .fsync = ext4_sync_file,
50 .release = ext4_release_dir, 50 .release = ext4_release_dir,
51}; 51};
52 52
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index bc7081f1fbe8..9ae6e67090cd 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -148,6 +148,7 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
148{ 148{
149 struct ext4_inode_info *ei = EXT4_I(inode); 149 struct ext4_inode_info *ei = EXT4_I(inode);
150 ext4_fsblk_t bg_start; 150 ext4_fsblk_t bg_start;
151 ext4_fsblk_t last_block;
151 ext4_grpblk_t colour; 152 ext4_grpblk_t colour;
152 int depth; 153 int depth;
153 154
@@ -169,8 +170,13 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
169 /* OK. use inode's group */ 170 /* OK. use inode's group */
170 bg_start = (ei->i_block_group * EXT4_BLOCKS_PER_GROUP(inode->i_sb)) + 171 bg_start = (ei->i_block_group * EXT4_BLOCKS_PER_GROUP(inode->i_sb)) +
171 le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_first_data_block); 172 le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_first_data_block);
172 colour = (current->pid % 16) * 173 last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1;
174
175 if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block)
176 colour = (current->pid % 16) *
173 (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16); 177 (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16);
178 else
179 colour = (current->pid % 16) * ((last_block - bg_start) / 16);
174 return bg_start + colour + block; 180 return bg_start + colour + block;
175} 181}
176 182
@@ -349,7 +355,7 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
349#define ext4_ext_show_leaf(inode,path) 355#define ext4_ext_show_leaf(inode,path)
350#endif 356#endif
351 357
352static void ext4_ext_drop_refs(struct ext4_ext_path *path) 358void ext4_ext_drop_refs(struct ext4_ext_path *path)
353{ 359{
354 int depth = path->p_depth; 360 int depth = path->p_depth;
355 int i; 361 int i;
@@ -2168,6 +2174,10 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2168 newblock = iblock - ee_block + ext_pblock(ex); 2174 newblock = iblock - ee_block + ext_pblock(ex);
2169 ex2 = ex; 2175 ex2 = ex;
2170 2176
2177 err = ext4_ext_get_access(handle, inode, path + depth);
2178 if (err)
2179 goto out;
2180
2171 /* ex1: ee_block to iblock - 1 : uninitialized */ 2181 /* ex1: ee_block to iblock - 1 : uninitialized */
2172 if (iblock > ee_block) { 2182 if (iblock > ee_block) {
2173 ex1 = ex; 2183 ex1 = ex;
@@ -2200,16 +2210,20 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2200 newdepth = ext_depth(inode); 2210 newdepth = ext_depth(inode);
2201 if (newdepth != depth) { 2211 if (newdepth != depth) {
2202 depth = newdepth; 2212 depth = newdepth;
2203 path = ext4_ext_find_extent(inode, iblock, NULL); 2213 ext4_ext_drop_refs(path);
2214 path = ext4_ext_find_extent(inode, iblock, path);
2204 if (IS_ERR(path)) { 2215 if (IS_ERR(path)) {
2205 err = PTR_ERR(path); 2216 err = PTR_ERR(path);
2206 path = NULL;
2207 goto out; 2217 goto out;
2208 } 2218 }
2209 eh = path[depth].p_hdr; 2219 eh = path[depth].p_hdr;
2210 ex = path[depth].p_ext; 2220 ex = path[depth].p_ext;
2211 if (ex2 != &newex) 2221 if (ex2 != &newex)
2212 ex2 = ex; 2222 ex2 = ex;
2223
2224 err = ext4_ext_get_access(handle, inode, path + depth);
2225 if (err)
2226 goto out;
2213 } 2227 }
2214 allocated = max_blocks; 2228 allocated = max_blocks;
2215 } 2229 }
@@ -2230,9 +2244,6 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2230 ex2->ee_len = cpu_to_le16(allocated); 2244 ex2->ee_len = cpu_to_le16(allocated);
2231 if (ex2 != ex) 2245 if (ex2 != ex)
2232 goto insert; 2246 goto insert;
2233 err = ext4_ext_get_access(handle, inode, path + depth);
2234 if (err)
2235 goto out;
2236 /* 2247 /*
2237 * New (initialized) extent starts from the first block 2248 * New (initialized) extent starts from the first block
2238 * in the current extent. i.e., ex2 == ex 2249 * in the current extent. i.e., ex2 == ex
@@ -2276,9 +2287,22 @@ out:
2276} 2287}
2277 2288
2278/* 2289/*
2290 * Block allocation/map/preallocation routine for extents based files
2291 *
2292 *
2279 * Need to be called with 2293 * Need to be called with
2280 * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block 2294 * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block
2281 * (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem) 2295 * (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem)
2296 *
2297 * return > 0, number of of blocks already mapped/allocated
2298 * if create == 0 and these are pre-allocated blocks
2299 * buffer head is unmapped
2300 * otherwise blocks are mapped
2301 *
2302 * return = 0, if plain look up failed (blocks have not been allocated)
2303 * buffer head is unmapped
2304 *
2305 * return < 0, error case.
2282 */ 2306 */
2283int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, 2307int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2284 ext4_lblk_t iblock, 2308 ext4_lblk_t iblock,
@@ -2623,7 +2647,7 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
2623 * modify 1 super block, 1 block bitmap and 1 group descriptor. 2647 * modify 1 super block, 1 block bitmap and 1 group descriptor.
2624 */ 2648 */
2625 credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + 3; 2649 credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + 3;
2626 down_write((&EXT4_I(inode)->i_data_sem)); 2650 mutex_lock(&inode->i_mutex);
2627retry: 2651retry:
2628 while (ret >= 0 && ret < max_blocks) { 2652 while (ret >= 0 && ret < max_blocks) {
2629 block = block + ret; 2653 block = block + ret;
@@ -2634,16 +2658,17 @@ retry:
2634 break; 2658 break;
2635 } 2659 }
2636 2660
2637 ret = ext4_ext_get_blocks(handle, inode, block, 2661 ret = ext4_get_blocks_wrap(handle, inode, block,
2638 max_blocks, &map_bh, 2662 max_blocks, &map_bh,
2639 EXT4_CREATE_UNINITIALIZED_EXT, 0); 2663 EXT4_CREATE_UNINITIALIZED_EXT, 0);
2640 WARN_ON(ret <= 0);
2641 if (ret <= 0) { 2664 if (ret <= 0) {
2642 ext4_error(inode->i_sb, "ext4_fallocate", 2665#ifdef EXT4FS_DEBUG
2643 "ext4_ext_get_blocks returned error: " 2666 WARN_ON(ret <= 0);
2644 "inode#%lu, block=%u, max_blocks=%lu", 2667 printk(KERN_ERR "%s: ext4_ext_get_blocks "
2668 "returned error inode#%lu, block=%u, "
2669 "max_blocks=%lu", __func__,
2645 inode->i_ino, block, max_blocks); 2670 inode->i_ino, block, max_blocks);
2646 ret = -EIO; 2671#endif
2647 ext4_mark_inode_dirty(handle, inode); 2672 ext4_mark_inode_dirty(handle, inode);
2648 ret2 = ext4_journal_stop(handle); 2673 ret2 = ext4_journal_stop(handle);
2649 break; 2674 break;
@@ -2680,7 +2705,6 @@ retry:
2680 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 2705 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
2681 goto retry; 2706 goto retry;
2682 2707
2683 up_write((&EXT4_I(inode)->i_data_sem));
2684 /* 2708 /*
2685 * Time to update the file size. 2709 * Time to update the file size.
2686 * Update only when preallocation was requested beyond the file size. 2710 * Update only when preallocation was requested beyond the file size.
@@ -2692,21 +2716,18 @@ retry:
2692 * if no error, we assume preallocation succeeded 2716 * if no error, we assume preallocation succeeded
2693 * completely 2717 * completely
2694 */ 2718 */
2695 mutex_lock(&inode->i_mutex);
2696 i_size_write(inode, offset + len); 2719 i_size_write(inode, offset + len);
2697 EXT4_I(inode)->i_disksize = i_size_read(inode); 2720 EXT4_I(inode)->i_disksize = i_size_read(inode);
2698 mutex_unlock(&inode->i_mutex);
2699 } else if (ret < 0 && nblocks) { 2721 } else if (ret < 0 && nblocks) {
2700 /* Handle partial allocation scenario */ 2722 /* Handle partial allocation scenario */
2701 loff_t newsize; 2723 loff_t newsize;
2702 2724
2703 mutex_lock(&inode->i_mutex);
2704 newsize = (nblocks << blkbits) + i_size_read(inode); 2725 newsize = (nblocks << blkbits) + i_size_read(inode);
2705 i_size_write(inode, EXT4_BLOCK_ALIGN(newsize, blkbits)); 2726 i_size_write(inode, EXT4_BLOCK_ALIGN(newsize, blkbits));
2706 EXT4_I(inode)->i_disksize = i_size_read(inode); 2727 EXT4_I(inode)->i_disksize = i_size_read(inode);
2707 mutex_unlock(&inode->i_mutex);
2708 } 2728 }
2709 } 2729 }
2710 2730
2731 mutex_unlock(&inode->i_mutex);
2711 return ret > 0 ? ret2 : ret; 2732 return ret > 0 ? ret2 : ret;
2712} 2733}
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index da18a74b966a..486e46a3918d 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -305,7 +305,7 @@ static int find_group_dir(struct super_block *sb, struct inode *parent,
305 * it has too few free inodes left (min_inodes) or 305 * it has too few free inodes left (min_inodes) or
306 * it has too few free blocks left (min_blocks) or 306 * it has too few free blocks left (min_blocks) or
307 * it's already running too large debt (max_debt). 307 * it's already running too large debt (max_debt).
308 * Parent's group is prefered, if it doesn't satisfy these 308 * Parent's group is preferred, if it doesn't satisfy these
309 * conditions we search cyclically through the rest. If none 309 * conditions we search cyclically through the rest. If none
310 * of the groups look good we just look for a group with more 310 * of the groups look good we just look for a group with more
311 * free inodes than average (starting at parent's group). 311 * free inodes than average (starting at parent's group).
@@ -702,7 +702,12 @@ got:
702 ei->i_dir_start_lookup = 0; 702 ei->i_dir_start_lookup = 0;
703 ei->i_disksize = 0; 703 ei->i_disksize = 0;
704 704
705 ei->i_flags = EXT4_I(dir)->i_flags & ~EXT4_INDEX_FL; 705 /*
706 * Don't inherit extent flag from directory. We set extent flag on
707 * newly created directory and file only if -o extent mount option is
708 * specified
709 */
710 ei->i_flags = EXT4_I(dir)->i_flags & ~(EXT4_INDEX_FL|EXT4_EXTENTS_FL);
706 if (S_ISLNK(mode)) 711 if (S_ISLNK(mode))
707 ei->i_flags &= ~(EXT4_IMMUTABLE_FL|EXT4_APPEND_FL); 712 ei->i_flags &= ~(EXT4_IMMUTABLE_FL|EXT4_APPEND_FL);
708 /* dirsync only applies to directories */ 713 /* dirsync only applies to directories */
@@ -745,12 +750,15 @@ got:
745 goto fail_free_drop; 750 goto fail_free_drop;
746 } 751 }
747 if (test_opt(sb, EXTENTS)) { 752 if (test_opt(sb, EXTENTS)) {
748 EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL; 753 /* set extent flag only for directory and file */
749 ext4_ext_tree_init(handle, inode); 754 if (S_ISDIR(mode) || S_ISREG(mode)) {
750 err = ext4_update_incompat_feature(handle, sb, 755 EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL;
751 EXT4_FEATURE_INCOMPAT_EXTENTS); 756 ext4_ext_tree_init(handle, inode);
752 if (err) 757 err = ext4_update_incompat_feature(handle, sb,
753 goto fail; 758 EXT4_FEATURE_INCOMPAT_EXTENTS);
759 if (err)
760 goto fail;
761 }
754 } 762 }
755 763
756 ext4_debug("allocating inode %lu\n", inode->i_ino); 764 ext4_debug("allocating inode %lu\n", inode->i_ino);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 7dd9b50d5ebc..8fab233cb05f 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -382,7 +382,7 @@ no_block:
382 * @inode: owner 382 * @inode: owner
383 * @ind: descriptor of indirect block. 383 * @ind: descriptor of indirect block.
384 * 384 *
385 * This function returns the prefered place for block allocation. 385 * This function returns the preferred place for block allocation.
386 * It is used when heuristic for sequential allocation fails. 386 * It is used when heuristic for sequential allocation fails.
387 * Rules are: 387 * Rules are:
388 * + if there is a block to the left of our position - allocate near it. 388 * + if there is a block to the left of our position - allocate near it.
@@ -403,6 +403,7 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind)
403 __le32 *start = ind->bh ? (__le32*) ind->bh->b_data : ei->i_data; 403 __le32 *start = ind->bh ? (__le32*) ind->bh->b_data : ei->i_data;
404 __le32 *p; 404 __le32 *p;
405 ext4_fsblk_t bg_start; 405 ext4_fsblk_t bg_start;
406 ext4_fsblk_t last_block;
406 ext4_grpblk_t colour; 407 ext4_grpblk_t colour;
407 408
408 /* Try to find previous block */ 409 /* Try to find previous block */
@@ -420,18 +421,23 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind)
420 * into the same cylinder group then. 421 * into the same cylinder group then.
421 */ 422 */
422 bg_start = ext4_group_first_block_no(inode->i_sb, ei->i_block_group); 423 bg_start = ext4_group_first_block_no(inode->i_sb, ei->i_block_group);
423 colour = (current->pid % 16) * 424 last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1;
425
426 if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block)
427 colour = (current->pid % 16) *
424 (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16); 428 (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16);
429 else
430 colour = (current->pid % 16) * ((last_block - bg_start) / 16);
425 return bg_start + colour; 431 return bg_start + colour;
426} 432}
427 433
428/** 434/**
429 * ext4_find_goal - find a prefered place for allocation. 435 * ext4_find_goal - find a preferred place for allocation.
430 * @inode: owner 436 * @inode: owner
431 * @block: block we want 437 * @block: block we want
432 * @partial: pointer to the last triple within a chain 438 * @partial: pointer to the last triple within a chain
433 * 439 *
434 * Normally this function find the prefered place for block allocation, 440 * Normally this function find the preferred place for block allocation,
435 * returns it. 441 * returns it.
436 */ 442 */
437static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, 443static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block,
@@ -768,7 +774,6 @@ err_out:
768 * 774 *
769 * `handle' can be NULL if create == 0. 775 * `handle' can be NULL if create == 0.
770 * 776 *
771 * The BKL may not be held on entry here. Be sure to take it early.
772 * return > 0, # of blocks mapped or allocated. 777 * return > 0, # of blocks mapped or allocated.
773 * return = 0, if plain lookup failed. 778 * return = 0, if plain lookup failed.
774 * return < 0, error case. 779 * return < 0, error case.
@@ -903,11 +908,38 @@ out:
903 */ 908 */
904#define DIO_CREDITS 25 909#define DIO_CREDITS 25
905 910
911
912/*
913 *
914 *
915 * ext4_ext4 get_block() wrapper function
916 * It will do a look up first, and returns if the blocks already mapped.
917 * Otherwise it takes the write lock of the i_data_sem and allocate blocks
918 * and store the allocated blocks in the result buffer head and mark it
919 * mapped.
920 *
921 * If file type is extents based, it will call ext4_ext_get_blocks(),
922 * Otherwise, call with ext4_get_blocks_handle() to handle indirect mapping
923 * based files
924 *
925 * On success, it returns the number of blocks being mapped or allocate.
926 * if create==0 and the blocks are pre-allocated and uninitialized block,
927 * the result buffer head is unmapped. If the create ==1, it will make sure
928 * the buffer head is mapped.
929 *
930 * It returns 0 if plain look up failed (blocks have not been allocated), in
931 * that casem, buffer head is unmapped
932 *
933 * It returns the error in case of allocation failure.
934 */
906int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, 935int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
907 unsigned long max_blocks, struct buffer_head *bh, 936 unsigned long max_blocks, struct buffer_head *bh,
908 int create, int extend_disksize) 937 int create, int extend_disksize)
909{ 938{
910 int retval; 939 int retval;
940
941 clear_buffer_mapped(bh);
942
911 /* 943 /*
912 * Try to see if we can get the block without requesting 944 * Try to see if we can get the block without requesting
913 * for new file system block. 945 * for new file system block.
@@ -921,12 +953,26 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
921 inode, block, max_blocks, bh, 0, 0); 953 inode, block, max_blocks, bh, 0, 0);
922 } 954 }
923 up_read((&EXT4_I(inode)->i_data_sem)); 955 up_read((&EXT4_I(inode)->i_data_sem));
924 if (!create || (retval > 0)) 956
957 /* If it is only a block(s) look up */
958 if (!create)
959 return retval;
960
961 /*
962 * Returns if the blocks have already allocated
963 *
964 * Note that if blocks have been preallocated
965 * ext4_ext_get_block() returns th create = 0
966 * with buffer head unmapped.
967 */
968 if (retval > 0 && buffer_mapped(bh))
925 return retval; 969 return retval;
926 970
927 /* 971 /*
928 * We need to allocate new blocks which will result 972 * New blocks allocate and/or writing to uninitialized extent
929 * in i_data update 973 * will possibly result in updating i_data, so we take
974 * the write lock of i_data_sem, and call get_blocks()
975 * with create == 1 flag.
930 */ 976 */
931 down_write((&EXT4_I(inode)->i_data_sem)); 977 down_write((&EXT4_I(inode)->i_data_sem));
932 /* 978 /*
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 2ed7c37f897e..25b13ede8086 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -15,6 +15,7 @@
15#include <linux/time.h> 15#include <linux/time.h>
16#include <linux/compat.h> 16#include <linux/compat.h>
17#include <linux/smp_lock.h> 17#include <linux/smp_lock.h>
18#include <linux/mount.h>
18#include <asm/uaccess.h> 19#include <asm/uaccess.h>
19 20
20int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, 21int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
@@ -38,24 +39,25 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
38 unsigned int oldflags; 39 unsigned int oldflags;
39 unsigned int jflag; 40 unsigned int jflag;
40 41
41 if (IS_RDONLY(inode))
42 return -EROFS;
43
44 if (!is_owner_or_cap(inode)) 42 if (!is_owner_or_cap(inode))
45 return -EACCES; 43 return -EACCES;
46 44
47 if (get_user(flags, (int __user *) arg)) 45 if (get_user(flags, (int __user *) arg))
48 return -EFAULT; 46 return -EFAULT;
49 47
48 err = mnt_want_write(filp->f_path.mnt);
49 if (err)
50 return err;
51
50 if (!S_ISDIR(inode->i_mode)) 52 if (!S_ISDIR(inode->i_mode))
51 flags &= ~EXT4_DIRSYNC_FL; 53 flags &= ~EXT4_DIRSYNC_FL;
52 54
55 err = -EPERM;
53 mutex_lock(&inode->i_mutex); 56 mutex_lock(&inode->i_mutex);
54 /* Is it quota file? Do not allow user to mess with it */ 57 /* Is it quota file? Do not allow user to mess with it */
55 if (IS_NOQUOTA(inode)) { 58 if (IS_NOQUOTA(inode))
56 mutex_unlock(&inode->i_mutex); 59 goto flags_out;
57 return -EPERM; 60
58 }
59 oldflags = ei->i_flags; 61 oldflags = ei->i_flags;
60 62
61 /* The JOURNAL_DATA flag is modifiable only by root */ 63 /* The JOURNAL_DATA flag is modifiable only by root */
@@ -68,10 +70,8 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
68 * This test looks nicer. Thanks to Pauline Middelink 70 * This test looks nicer. Thanks to Pauline Middelink
69 */ 71 */
70 if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) { 72 if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) {
71 if (!capable(CAP_LINUX_IMMUTABLE)) { 73 if (!capable(CAP_LINUX_IMMUTABLE))
72 mutex_unlock(&inode->i_mutex); 74 goto flags_out;
73 return -EPERM;
74 }
75 } 75 }
76 76
77 /* 77 /*
@@ -79,17 +79,14 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
79 * the relevant capability. 79 * the relevant capability.
80 */ 80 */
81 if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) { 81 if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
82 if (!capable(CAP_SYS_RESOURCE)) { 82 if (!capable(CAP_SYS_RESOURCE))
83 mutex_unlock(&inode->i_mutex); 83 goto flags_out;
84 return -EPERM;
85 }
86 } 84 }
87 85
88
89 handle = ext4_journal_start(inode, 1); 86 handle = ext4_journal_start(inode, 1);
90 if (IS_ERR(handle)) { 87 if (IS_ERR(handle)) {
91 mutex_unlock(&inode->i_mutex); 88 err = PTR_ERR(handle);
92 return PTR_ERR(handle); 89 goto flags_out;
93 } 90 }
94 if (IS_SYNC(inode)) 91 if (IS_SYNC(inode))
95 handle->h_sync = 1; 92 handle->h_sync = 1;
@@ -107,14 +104,14 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
107 err = ext4_mark_iloc_dirty(handle, inode, &iloc); 104 err = ext4_mark_iloc_dirty(handle, inode, &iloc);
108flags_err: 105flags_err:
109 ext4_journal_stop(handle); 106 ext4_journal_stop(handle);
110 if (err) { 107 if (err)
111 mutex_unlock(&inode->i_mutex); 108 goto flags_out;
112 return err;
113 }
114 109
115 if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) 110 if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL))
116 err = ext4_change_inode_journal_flag(inode, jflag); 111 err = ext4_change_inode_journal_flag(inode, jflag);
112flags_out:
117 mutex_unlock(&inode->i_mutex); 113 mutex_unlock(&inode->i_mutex);
114 mnt_drop_write(filp->f_path.mnt);
118 return err; 115 return err;
119 } 116 }
120 case EXT4_IOC_GETVERSION: 117 case EXT4_IOC_GETVERSION:
@@ -129,14 +126,20 @@ flags_err:
129 126
130 if (!is_owner_or_cap(inode)) 127 if (!is_owner_or_cap(inode))
131 return -EPERM; 128 return -EPERM;
132 if (IS_RDONLY(inode)) 129
133 return -EROFS; 130 err = mnt_want_write(filp->f_path.mnt);
134 if (get_user(generation, (int __user *) arg)) 131 if (err)
135 return -EFAULT; 132 return err;
133 if (get_user(generation, (int __user *) arg)) {
134 err = -EFAULT;
135 goto setversion_out;
136 }
136 137
137 handle = ext4_journal_start(inode, 1); 138 handle = ext4_journal_start(inode, 1);
138 if (IS_ERR(handle)) 139 if (IS_ERR(handle)) {
139 return PTR_ERR(handle); 140 err = PTR_ERR(handle);
141 goto setversion_out;
142 }
140 err = ext4_reserve_inode_write(handle, inode, &iloc); 143 err = ext4_reserve_inode_write(handle, inode, &iloc);
141 if (err == 0) { 144 if (err == 0) {
142 inode->i_ctime = ext4_current_time(inode); 145 inode->i_ctime = ext4_current_time(inode);
@@ -144,6 +147,8 @@ flags_err:
144 err = ext4_mark_iloc_dirty(handle, inode, &iloc); 147 err = ext4_mark_iloc_dirty(handle, inode, &iloc);
145 } 148 }
146 ext4_journal_stop(handle); 149 ext4_journal_stop(handle);
150setversion_out:
151 mnt_drop_write(filp->f_path.mnt);
147 return err; 152 return err;
148 } 153 }
149#ifdef CONFIG_JBD2_DEBUG 154#ifdef CONFIG_JBD2_DEBUG
@@ -179,19 +184,21 @@ flags_err:
179 } 184 }
180 return -ENOTTY; 185 return -ENOTTY;
181 case EXT4_IOC_SETRSVSZ: { 186 case EXT4_IOC_SETRSVSZ: {
187 int err;
182 188
183 if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode)) 189 if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode))
184 return -ENOTTY; 190 return -ENOTTY;
185 191
186 if (IS_RDONLY(inode))
187 return -EROFS;
188
189 if (!is_owner_or_cap(inode)) 192 if (!is_owner_or_cap(inode))
190 return -EACCES; 193 return -EACCES;
191 194
192 if (get_user(rsv_window_size, (int __user *)arg)) 195 if (get_user(rsv_window_size, (int __user *)arg))
193 return -EFAULT; 196 return -EFAULT;
194 197
198 err = mnt_want_write(filp->f_path.mnt);
199 if (err)
200 return err;
201
195 if (rsv_window_size > EXT4_MAX_RESERVE_BLOCKS) 202 if (rsv_window_size > EXT4_MAX_RESERVE_BLOCKS)
196 rsv_window_size = EXT4_MAX_RESERVE_BLOCKS; 203 rsv_window_size = EXT4_MAX_RESERVE_BLOCKS;
197 204
@@ -208,6 +215,7 @@ flags_err:
208 rsv->rsv_goal_size = rsv_window_size; 215 rsv->rsv_goal_size = rsv_window_size;
209 } 216 }
210 up_write(&ei->i_data_sem); 217 up_write(&ei->i_data_sem);
218 mnt_drop_write(filp->f_path.mnt);
211 return 0; 219 return 0;
212 } 220 }
213 case EXT4_IOC_GROUP_EXTEND: { 221 case EXT4_IOC_GROUP_EXTEND: {
@@ -218,16 +226,18 @@ flags_err:
218 if (!capable(CAP_SYS_RESOURCE)) 226 if (!capable(CAP_SYS_RESOURCE))
219 return -EPERM; 227 return -EPERM;
220 228
221 if (IS_RDONLY(inode))
222 return -EROFS;
223
224 if (get_user(n_blocks_count, (__u32 __user *)arg)) 229 if (get_user(n_blocks_count, (__u32 __user *)arg))
225 return -EFAULT; 230 return -EFAULT;
226 231
232 err = mnt_want_write(filp->f_path.mnt);
233 if (err)
234 return err;
235
227 err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count); 236 err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count);
228 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); 237 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
229 jbd2_journal_flush(EXT4_SB(sb)->s_journal); 238 jbd2_journal_flush(EXT4_SB(sb)->s_journal);
230 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 239 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
240 mnt_drop_write(filp->f_path.mnt);
231 241
232 return err; 242 return err;
233 } 243 }
@@ -239,17 +249,19 @@ flags_err:
239 if (!capable(CAP_SYS_RESOURCE)) 249 if (!capable(CAP_SYS_RESOURCE))
240 return -EPERM; 250 return -EPERM;
241 251
242 if (IS_RDONLY(inode))
243 return -EROFS;
244
245 if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg, 252 if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg,
246 sizeof(input))) 253 sizeof(input)))
247 return -EFAULT; 254 return -EFAULT;
248 255
256 err = mnt_want_write(filp->f_path.mnt);
257 if (err)
258 return err;
259
249 err = ext4_group_add(sb, &input); 260 err = ext4_group_add(sb, &input);
250 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); 261 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
251 jbd2_journal_flush(EXT4_SB(sb)->s_journal); 262 jbd2_journal_flush(EXT4_SB(sb)->s_journal);
252 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 263 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
264 mnt_drop_write(filp->f_path.mnt);
253 265
254 return err; 266 return err;
255 } 267 }
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index dd0fcfcb35ce..ef97f19c2f9d 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -627,21 +627,19 @@ static ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
627 return block; 627 return block;
628} 628}
629 629
630static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
631{
630#if BITS_PER_LONG == 64 632#if BITS_PER_LONG == 64
631#define mb_correct_addr_and_bit(bit, addr) \ 633 *bit += ((unsigned long) addr & 7UL) << 3;
632{ \ 634 addr = (void *) ((unsigned long) addr & ~7UL);
633 bit += ((unsigned long) addr & 7UL) << 3; \
634 addr = (void *) ((unsigned long) addr & ~7UL); \
635}
636#elif BITS_PER_LONG == 32 635#elif BITS_PER_LONG == 32
637#define mb_correct_addr_and_bit(bit, addr) \ 636 *bit += ((unsigned long) addr & 3UL) << 3;
638{ \ 637 addr = (void *) ((unsigned long) addr & ~3UL);
639 bit += ((unsigned long) addr & 3UL) << 3; \
640 addr = (void *) ((unsigned long) addr & ~3UL); \
641}
642#else 638#else
643#error "how many bits you are?!" 639#error "how many bits you are?!"
644#endif 640#endif
641 return addr;
642}
645 643
646static inline int mb_test_bit(int bit, void *addr) 644static inline int mb_test_bit(int bit, void *addr)
647{ 645{
@@ -649,34 +647,54 @@ static inline int mb_test_bit(int bit, void *addr)
649 * ext4_test_bit on architecture like powerpc 647 * ext4_test_bit on architecture like powerpc
650 * needs unsigned long aligned address 648 * needs unsigned long aligned address
651 */ 649 */
652 mb_correct_addr_and_bit(bit, addr); 650 addr = mb_correct_addr_and_bit(&bit, addr);
653 return ext4_test_bit(bit, addr); 651 return ext4_test_bit(bit, addr);
654} 652}
655 653
656static inline void mb_set_bit(int bit, void *addr) 654static inline void mb_set_bit(int bit, void *addr)
657{ 655{
658 mb_correct_addr_and_bit(bit, addr); 656 addr = mb_correct_addr_and_bit(&bit, addr);
659 ext4_set_bit(bit, addr); 657 ext4_set_bit(bit, addr);
660} 658}
661 659
662static inline void mb_set_bit_atomic(spinlock_t *lock, int bit, void *addr) 660static inline void mb_set_bit_atomic(spinlock_t *lock, int bit, void *addr)
663{ 661{
664 mb_correct_addr_and_bit(bit, addr); 662 addr = mb_correct_addr_and_bit(&bit, addr);
665 ext4_set_bit_atomic(lock, bit, addr); 663 ext4_set_bit_atomic(lock, bit, addr);
666} 664}
667 665
668static inline void mb_clear_bit(int bit, void *addr) 666static inline void mb_clear_bit(int bit, void *addr)
669{ 667{
670 mb_correct_addr_and_bit(bit, addr); 668 addr = mb_correct_addr_and_bit(&bit, addr);
671 ext4_clear_bit(bit, addr); 669 ext4_clear_bit(bit, addr);
672} 670}
673 671
674static inline void mb_clear_bit_atomic(spinlock_t *lock, int bit, void *addr) 672static inline void mb_clear_bit_atomic(spinlock_t *lock, int bit, void *addr)
675{ 673{
676 mb_correct_addr_and_bit(bit, addr); 674 addr = mb_correct_addr_and_bit(&bit, addr);
677 ext4_clear_bit_atomic(lock, bit, addr); 675 ext4_clear_bit_atomic(lock, bit, addr);
678} 676}
679 677
678static inline int mb_find_next_zero_bit(void *addr, int max, int start)
679{
680 int fix = 0;
681 addr = mb_correct_addr_and_bit(&fix, addr);
682 max += fix;
683 start += fix;
684
685 return ext4_find_next_zero_bit(addr, max, start) - fix;
686}
687
688static inline int mb_find_next_bit(void *addr, int max, int start)
689{
690 int fix = 0;
691 addr = mb_correct_addr_and_bit(&fix, addr);
692 max += fix;
693 start += fix;
694
695 return ext4_find_next_bit(addr, max, start) - fix;
696}
697
680static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max) 698static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
681{ 699{
682 char *bb; 700 char *bb;
@@ -906,7 +924,7 @@ static void ext4_mb_mark_free_simple(struct super_block *sb,
906 unsigned short chunk; 924 unsigned short chunk;
907 unsigned short border; 925 unsigned short border;
908 926
909 BUG_ON(len >= EXT4_BLOCKS_PER_GROUP(sb)); 927 BUG_ON(len > EXT4_BLOCKS_PER_GROUP(sb));
910 928
911 border = 2 << sb->s_blocksize_bits; 929 border = 2 << sb->s_blocksize_bits;
912 930
@@ -946,12 +964,12 @@ static void ext4_mb_generate_buddy(struct super_block *sb,
946 964
947 /* initialize buddy from bitmap which is aggregation 965 /* initialize buddy from bitmap which is aggregation
948 * of on-disk bitmap and preallocations */ 966 * of on-disk bitmap and preallocations */
949 i = ext4_find_next_zero_bit(bitmap, max, 0); 967 i = mb_find_next_zero_bit(bitmap, max, 0);
950 grp->bb_first_free = i; 968 grp->bb_first_free = i;
951 while (i < max) { 969 while (i < max) {
952 fragments++; 970 fragments++;
953 first = i; 971 first = i;
954 i = ext4_find_next_bit(bitmap, max, i); 972 i = mb_find_next_bit(bitmap, max, i);
955 len = i - first; 973 len = i - first;
956 free += len; 974 free += len;
957 if (len > 1) 975 if (len > 1)
@@ -959,7 +977,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb,
959 else 977 else
960 grp->bb_counters[0]++; 978 grp->bb_counters[0]++;
961 if (i < max) 979 if (i < max)
962 i = ext4_find_next_zero_bit(bitmap, max, i); 980 i = mb_find_next_zero_bit(bitmap, max, i);
963 } 981 }
964 grp->bb_fragments = fragments; 982 grp->bb_fragments = fragments;
965 983
@@ -967,6 +985,10 @@ static void ext4_mb_generate_buddy(struct super_block *sb,
967 ext4_error(sb, __FUNCTION__, 985 ext4_error(sb, __FUNCTION__,
968 "EXT4-fs: group %lu: %u blocks in bitmap, %u in gd\n", 986 "EXT4-fs: group %lu: %u blocks in bitmap, %u in gd\n",
969 group, free, grp->bb_free); 987 group, free, grp->bb_free);
988 /*
989 * If we intent to continue, we consider group descritor
990 * corrupt and update bb_free using bitmap value
991 */
970 grp->bb_free = free; 992 grp->bb_free = free;
971 } 993 }
972 994
@@ -1778,7 +1800,7 @@ static void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac,
1778 buddy = mb_find_buddy(e4b, i, &max); 1800 buddy = mb_find_buddy(e4b, i, &max);
1779 BUG_ON(buddy == NULL); 1801 BUG_ON(buddy == NULL);
1780 1802
1781 k = ext4_find_next_zero_bit(buddy, max, 0); 1803 k = mb_find_next_zero_bit(buddy, max, 0);
1782 BUG_ON(k >= max); 1804 BUG_ON(k >= max);
1783 1805
1784 ac->ac_found++; 1806 ac->ac_found++;
@@ -1818,11 +1840,11 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
1818 i = e4b->bd_info->bb_first_free; 1840 i = e4b->bd_info->bb_first_free;
1819 1841
1820 while (free && ac->ac_status == AC_STATUS_CONTINUE) { 1842 while (free && ac->ac_status == AC_STATUS_CONTINUE) {
1821 i = ext4_find_next_zero_bit(bitmap, 1843 i = mb_find_next_zero_bit(bitmap,
1822 EXT4_BLOCKS_PER_GROUP(sb), i); 1844 EXT4_BLOCKS_PER_GROUP(sb), i);
1823 if (i >= EXT4_BLOCKS_PER_GROUP(sb)) { 1845 if (i >= EXT4_BLOCKS_PER_GROUP(sb)) {
1824 /* 1846 /*
1825 * IF we corrupt the bitmap we won't find any 1847 * IF we have corrupt bitmap, we won't find any
1826 * free blocks even though group info says we 1848 * free blocks even though group info says we
1827 * we have free blocks 1849 * we have free blocks
1828 */ 1850 */
@@ -1838,6 +1860,12 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
1838 ext4_error(sb, __FUNCTION__, "%d free blocks as per " 1860 ext4_error(sb, __FUNCTION__, "%d free blocks as per "
1839 "group info. But got %d blocks\n", 1861 "group info. But got %d blocks\n",
1840 free, ex.fe_len); 1862 free, ex.fe_len);
1863 /*
1864 * The number of free blocks differs. This mostly
1865 * indicate that the bitmap is corrupt. So exit
1866 * without claiming the space.
1867 */
1868 break;
1841 } 1869 }
1842 1870
1843 ext4_mb_measure_extent(ac, &ex, e4b); 1871 ext4_mb_measure_extent(ac, &ex, e4b);
@@ -3740,10 +3768,10 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
3740 } 3768 }
3741 3769
3742 while (bit < end) { 3770 while (bit < end) {
3743 bit = ext4_find_next_zero_bit(bitmap_bh->b_data, end, bit); 3771 bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit);
3744 if (bit >= end) 3772 if (bit >= end)
3745 break; 3773 break;
3746 next = ext4_find_next_bit(bitmap_bh->b_data, end, bit); 3774 next = mb_find_next_bit(bitmap_bh->b_data, end, bit);
3747 if (next > end) 3775 if (next > end)
3748 next = end; 3776 next = end;
3749 start = group * EXT4_BLOCKS_PER_GROUP(sb) + bit + 3777 start = group * EXT4_BLOCKS_PER_GROUP(sb) + bit +
@@ -3771,6 +3799,10 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
3771 (unsigned long) pa->pa_len); 3799 (unsigned long) pa->pa_len);
3772 ext4_error(sb, __FUNCTION__, "free %u, pa_free %u\n", 3800 ext4_error(sb, __FUNCTION__, "free %u, pa_free %u\n",
3773 free, pa->pa_free); 3801 free, pa->pa_free);
3802 /*
3803 * pa is already deleted so we use the value obtained
3804 * from the bitmap and continue.
3805 */
3774 } 3806 }
3775 atomic_add(free, &sbi->s_mb_discarded); 3807 atomic_add(free, &sbi->s_mb_discarded);
3776 if (ac) 3808 if (ac)
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 8c6c685b9d22..5c1e27de7755 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -43,6 +43,7 @@ static int finish_range(handle_t *handle, struct inode *inode,
43 43
44 if (IS_ERR(path)) { 44 if (IS_ERR(path)) {
45 retval = PTR_ERR(path); 45 retval = PTR_ERR(path);
46 path = NULL;
46 goto err_out; 47 goto err_out;
47 } 48 }
48 49
@@ -74,6 +75,10 @@ static int finish_range(handle_t *handle, struct inode *inode,
74 } 75 }
75 retval = ext4_ext_insert_extent(handle, inode, path, &newext); 76 retval = ext4_ext_insert_extent(handle, inode, path, &newext);
76err_out: 77err_out:
78 if (path) {
79 ext4_ext_drop_refs(path);
80 kfree(path);
81 }
77 lb->first_pblock = 0; 82 lb->first_pblock = 0;
78 return retval; 83 return retval;
79} 84}
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index a9347fb43bcc..28aa2ed4297e 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1804,12 +1804,8 @@ retry:
1804 inode->i_fop = &ext4_dir_operations; 1804 inode->i_fop = &ext4_dir_operations;
1805 inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize; 1805 inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize;
1806 dir_block = ext4_bread (handle, inode, 0, 1, &err); 1806 dir_block = ext4_bread (handle, inode, 0, 1, &err);
1807 if (!dir_block) { 1807 if (!dir_block)
1808 ext4_dec_count(handle, inode); /* is this nlink == 0? */ 1808 goto out_clear_inode;
1809 ext4_mark_inode_dirty(handle, inode);
1810 iput (inode);
1811 goto out_stop;
1812 }
1813 BUFFER_TRACE(dir_block, "get_write_access"); 1809 BUFFER_TRACE(dir_block, "get_write_access");
1814 ext4_journal_get_write_access(handle, dir_block); 1810 ext4_journal_get_write_access(handle, dir_block);
1815 de = (struct ext4_dir_entry_2 *) dir_block->b_data; 1811 de = (struct ext4_dir_entry_2 *) dir_block->b_data;
@@ -1832,7 +1828,8 @@ retry:
1832 ext4_mark_inode_dirty(handle, inode); 1828 ext4_mark_inode_dirty(handle, inode);
1833 err = ext4_add_entry (handle, dentry, inode); 1829 err = ext4_add_entry (handle, dentry, inode);
1834 if (err) { 1830 if (err) {
1835 inode->i_nlink = 0; 1831out_clear_inode:
1832 clear_nlink(inode);
1836 ext4_mark_inode_dirty(handle, inode); 1833 ext4_mark_inode_dirty(handle, inode);
1837 iput (inode); 1834 iput (inode);
1838 goto out_stop; 1835 goto out_stop;
@@ -2164,7 +2161,7 @@ static int ext4_unlink(struct inode * dir, struct dentry *dentry)
2164 dir->i_ctime = dir->i_mtime = ext4_current_time(dir); 2161 dir->i_ctime = dir->i_mtime = ext4_current_time(dir);
2165 ext4_update_dx_flag(dir); 2162 ext4_update_dx_flag(dir);
2166 ext4_mark_inode_dirty(handle, dir); 2163 ext4_mark_inode_dirty(handle, dir);
2167 ext4_dec_count(handle, inode); 2164 drop_nlink(inode);
2168 if (!inode->i_nlink) 2165 if (!inode->i_nlink)
2169 ext4_orphan_add(handle, inode); 2166 ext4_orphan_add(handle, inode);
2170 inode->i_ctime = ext4_current_time(inode); 2167 inode->i_ctime = ext4_current_time(inode);
@@ -2214,7 +2211,7 @@ retry:
2214 err = __page_symlink(inode, symname, l, 2211 err = __page_symlink(inode, symname, l,
2215 mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); 2212 mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
2216 if (err) { 2213 if (err) {
2217 ext4_dec_count(handle, inode); 2214 clear_nlink(inode);
2218 ext4_mark_inode_dirty(handle, inode); 2215 ext4_mark_inode_dirty(handle, inode);
2219 iput (inode); 2216 iput (inode);
2220 goto out_stop; 2217 goto out_stop;
@@ -2223,7 +2220,6 @@ retry:
2223 inode->i_op = &ext4_fast_symlink_inode_operations; 2220 inode->i_op = &ext4_fast_symlink_inode_operations;
2224 memcpy((char*)&EXT4_I(inode)->i_data,symname,l); 2221 memcpy((char*)&EXT4_I(inode)->i_data,symname,l);
2225 inode->i_size = l-1; 2222 inode->i_size = l-1;
2226 EXT4_I(inode)->i_flags &= ~EXT4_EXTENTS_FL;
2227 } 2223 }
2228 EXT4_I(inode)->i_disksize = inode->i_size; 2224 EXT4_I(inode)->i_disksize = inode->i_size;
2229 err = ext4_add_nondir(handle, dentry, inode); 2225 err = ext4_add_nondir(handle, dentry, inode);
@@ -2407,7 +2403,7 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry,
2407 ext4_dec_count(handle, old_dir); 2403 ext4_dec_count(handle, old_dir);
2408 if (new_inode) { 2404 if (new_inode) {
2409 /* checked empty_dir above, can't have another parent, 2405 /* checked empty_dir above, can't have another parent,
2410 * ext3_dec_count() won't work for many-linked dirs */ 2406 * ext4_dec_count() won't work for many-linked dirs */
2411 new_inode->i_nlink = 0; 2407 new_inode->i_nlink = 0;
2412 } else { 2408 } else {
2413 ext4_inc_count(handle, new_dir); 2409 ext4_inc_count(handle, new_dir);
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 9477a2bd6ff2..e29efa0f9d62 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -1037,6 +1037,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1037 ext4_warning(sb, __FUNCTION__, 1037 ext4_warning(sb, __FUNCTION__,
1038 "multiple resizers run on filesystem!"); 1038 "multiple resizers run on filesystem!");
1039 unlock_super(sb); 1039 unlock_super(sb);
1040 ext4_journal_stop(handle);
1040 err = -EBUSY; 1041 err = -EBUSY;
1041 goto exit_put; 1042 goto exit_put;
1042 } 1043 }
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index d7962139c010..e9054c1c7d93 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1386,7 +1386,7 @@ ext4_xattr_cache_insert(struct buffer_head *bh)
1386 struct mb_cache_entry *ce; 1386 struct mb_cache_entry *ce;
1387 int error; 1387 int error;
1388 1388
1389 ce = mb_cache_entry_alloc(ext4_xattr_cache); 1389 ce = mb_cache_entry_alloc(ext4_xattr_cache, GFP_NOFS);
1390 if (!ce) { 1390 if (!ce) {
1391 ea_bdebug(bh, "out of memory"); 1391 ea_bdebug(bh, "out of memory");
1392 return; 1392 return;
diff --git a/fs/fat/file.c b/fs/fat/file.c
index c614175876e0..2a3bed967041 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -8,6 +8,7 @@
8 8
9#include <linux/capability.h> 9#include <linux/capability.h>
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/mount.h>
11#include <linux/time.h> 12#include <linux/time.h>
12#include <linux/msdos_fs.h> 13#include <linux/msdos_fs.h>
13#include <linux/smp_lock.h> 14#include <linux/smp_lock.h>
@@ -46,10 +47,9 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp,
46 47
47 mutex_lock(&inode->i_mutex); 48 mutex_lock(&inode->i_mutex);
48 49
49 if (IS_RDONLY(inode)) { 50 err = mnt_want_write(filp->f_path.mnt);
50 err = -EROFS; 51 if (err)
51 goto up; 52 goto up_no_drop_write;
52 }
53 53
54 /* 54 /*
55 * ATTR_VOLUME and ATTR_DIR cannot be changed; this also 55 * ATTR_VOLUME and ATTR_DIR cannot be changed; this also
@@ -105,7 +105,9 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp,
105 105
106 MSDOS_I(inode)->i_attrs = attr & ATTR_UNUSED; 106 MSDOS_I(inode)->i_attrs = attr & ATTR_UNUSED;
107 mark_inode_dirty(inode); 107 mark_inode_dirty(inode);
108 up: 108up:
109 mnt_drop_write(filp->f_path.mnt);
110up_no_drop_write:
109 mutex_unlock(&inode->i_mutex); 111 mutex_unlock(&inode->i_mutex);
110 return err; 112 return err;
111 } 113 }
diff --git a/fs/file_table.c b/fs/file_table.c
index 6d27befe2d48..7a0a9b872251 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -42,6 +42,7 @@ static inline void file_free_rcu(struct rcu_head *head)
42static inline void file_free(struct file *f) 42static inline void file_free(struct file *f)
43{ 43{
44 percpu_counter_dec(&nr_files); 44 percpu_counter_dec(&nr_files);
45 file_check_state(f);
45 call_rcu(&f->f_u.fu_rcuhead, file_free_rcu); 46 call_rcu(&f->f_u.fu_rcuhead, file_free_rcu);
46} 47}
47 48
@@ -83,6 +84,12 @@ int proc_nr_files(ctl_table *table, int write, struct file *filp,
83/* Find an unused file structure and return a pointer to it. 84/* Find an unused file structure and return a pointer to it.
84 * Returns NULL, if there are no more free file structures or 85 * Returns NULL, if there are no more free file structures or
85 * we run out of memory. 86 * we run out of memory.
87 *
88 * Be very careful using this. You are responsible for
89 * getting write access to any mount that you might assign
90 * to this filp, if it is opened for write. If this is not
91 * done, you will imbalance int the mount's writer count
92 * and a warning at __fput() time.
86 */ 93 */
87struct file *get_empty_filp(void) 94struct file *get_empty_filp(void)
88{ 95{
@@ -193,6 +200,18 @@ int init_file(struct file *file, struct vfsmount *mnt, struct dentry *dentry,
193 file->f_mapping = dentry->d_inode->i_mapping; 200 file->f_mapping = dentry->d_inode->i_mapping;
194 file->f_mode = mode; 201 file->f_mode = mode;
195 file->f_op = fop; 202 file->f_op = fop;
203
204 /*
205 * These mounts don't really matter in practice
206 * for r/o bind mounts. They aren't userspace-
207 * visible. We do this for consistency, and so
208 * that we can do debugging checks at __fput()
209 */
210 if ((mode & FMODE_WRITE) && !special_file(dentry->d_inode->i_mode)) {
211 file_take_write(file);
212 error = mnt_want_write(mnt);
213 WARN_ON(error);
214 }
196 return error; 215 return error;
197} 216}
198EXPORT_SYMBOL(init_file); 217EXPORT_SYMBOL(init_file);
@@ -205,6 +224,31 @@ void fput(struct file *file)
205 224
206EXPORT_SYMBOL(fput); 225EXPORT_SYMBOL(fput);
207 226
227/**
228 * drop_file_write_access - give up ability to write to a file
229 * @file: the file to which we will stop writing
230 *
231 * This is a central place which will give up the ability
232 * to write to @file, along with access to write through
233 * its vfsmount.
234 */
235void drop_file_write_access(struct file *file)
236{
237 struct vfsmount *mnt = file->f_path.mnt;
238 struct dentry *dentry = file->f_path.dentry;
239 struct inode *inode = dentry->d_inode;
240
241 put_write_access(inode);
242
243 if (special_file(inode->i_mode))
244 return;
245 if (file_check_writeable(file) != 0)
246 return;
247 mnt_drop_write(mnt);
248 file_release_write(file);
249}
250EXPORT_SYMBOL_GPL(drop_file_write_access);
251
208/* __fput is called from task context when aio completion releases the last 252/* __fput is called from task context when aio completion releases the last
209 * last use of a struct file *. Do not use otherwise. 253 * last use of a struct file *. Do not use otherwise.
210 */ 254 */
@@ -230,10 +274,10 @@ void __fput(struct file *file)
230 if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL)) 274 if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL))
231 cdev_put(inode->i_cdev); 275 cdev_put(inode->i_cdev);
232 fops_put(file->f_op); 276 fops_put(file->f_op);
233 if (file->f_mode & FMODE_WRITE)
234 put_write_access(inode);
235 put_pid(file->f_owner.pid); 277 put_pid(file->f_owner.pid);
236 file_kill(file); 278 file_kill(file);
279 if (file->f_mode & FMODE_WRITE)
280 drop_file_write_access(file);
237 file->f_path.dentry = NULL; 281 file->f_path.dentry = NULL;
238 file->f_path.mnt = NULL; 282 file->f_path.mnt = NULL;
239 file_free(file); 283 file_free(file);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index c0076077d338..06557679ca41 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -751,7 +751,7 @@ int generic_osync_inode(struct inode *inode, struct address_space *mapping, int
751EXPORT_SYMBOL(generic_osync_inode); 751EXPORT_SYMBOL(generic_osync_inode);
752 752
753/** 753/**
754 * writeback_acquire: attempt to get exclusive writeback access to a device 754 * writeback_acquire - attempt to get exclusive writeback access to a device
755 * @bdi: the device's backing_dev_info structure 755 * @bdi: the device's backing_dev_info structure
756 * 756 *
757 * It is a waste of resources to have more than one pdflush thread blocked on 757 * It is a waste of resources to have more than one pdflush thread blocked on
@@ -768,7 +768,7 @@ int writeback_acquire(struct backing_dev_info *bdi)
768} 768}
769 769
770/** 770/**
771 * writeback_in_progress: determine whether there is writeback in progress 771 * writeback_in_progress - determine whether there is writeback in progress
772 * @bdi: the device's backing_dev_info structure. 772 * @bdi: the device's backing_dev_info structure.
773 * 773 *
774 * Determine whether there is writeback in progress against a backing device. 774 * Determine whether there is writeback in progress against a backing device.
@@ -779,7 +779,7 @@ int writeback_in_progress(struct backing_dev_info *bdi)
779} 779}
780 780
781/** 781/**
782 * writeback_release: relinquish exclusive writeback access against a device. 782 * writeback_release - relinquish exclusive writeback access against a device.
783 * @bdi: the device's backing_dev_info structure 783 * @bdi: the device's backing_dev_info structure
784 */ 784 */
785void writeback_release(struct backing_dev_info *bdi) 785void writeback_release(struct backing_dev_info *bdi)
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 7fb514b6d852..c4807b3fc8a3 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -906,7 +906,7 @@ static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd)
906 } 906 }
907 907
908 if (fc->flags & FUSE_DEFAULT_PERMISSIONS) { 908 if (fc->flags & FUSE_DEFAULT_PERMISSIONS) {
909 int err = generic_permission(inode, mask, NULL); 909 err = generic_permission(inode, mask, NULL);
910 910
911 /* If permission is denied, try to refresh file 911 /* If permission is denied, try to refresh file
912 attributes. This is also needed, because the root 912 attributes. This is also needed, because the root
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
index de8e64c03f73..7f7947e3dfbb 100644
--- a/fs/gfs2/Kconfig
+++ b/fs/gfs2/Kconfig
@@ -1,6 +1,6 @@
1config GFS2_FS 1config GFS2_FS
2 tristate "GFS2 file system support" 2 tristate "GFS2 file system support"
3 depends on EXPERIMENTAL 3 depends on EXPERIMENTAL && (64BIT || (LSF && LBD))
4 select FS_POSIX_ACL 4 select FS_POSIX_ACL
5 select CRC32 5 select CRC32
6 help 6 help
diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile
index 8fff11058cee..e2350df02a07 100644
--- a/fs/gfs2/Makefile
+++ b/fs/gfs2/Makefile
@@ -1,6 +1,6 @@
1obj-$(CONFIG_GFS2_FS) += gfs2.o 1obj-$(CONFIG_GFS2_FS) += gfs2.o
2gfs2-y := acl.o bmap.o daemon.o dir.o eaops.o eattr.o glock.o \ 2gfs2-y := acl.o bmap.o daemon.o dir.o eaops.o eattr.o glock.o \
3 glops.o inode.o lm.o log.o lops.o locking.o main.o meta_io.o \ 3 glops.o inode.o log.o lops.o locking.o main.o meta_io.o \
4 mount.o ops_address.o ops_dentry.o ops_export.o ops_file.o \ 4 mount.o ops_address.o ops_dentry.o ops_export.o ops_file.o \
5 ops_fstype.o ops_inode.o ops_super.o quota.o \ 5 ops_fstype.o ops_inode.o ops_super.o quota.o \
6 recovery.o rgrp.o super.o sys.o trans.o util.o 6 recovery.o rgrp.o super.o sys.o trans.o util.o
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 1047a8c7226a..3e9bd46f27e3 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -116,7 +116,7 @@ static int acl_get(struct gfs2_inode *ip, int access, struct posix_acl **acl,
116 goto out; 116 goto out;
117 117
118 er.er_data_len = GFS2_EA_DATA_LEN(el->el_ea); 118 er.er_data_len = GFS2_EA_DATA_LEN(el->el_ea);
119 er.er_data = kmalloc(er.er_data_len, GFP_KERNEL); 119 er.er_data = kmalloc(er.er_data_len, GFP_NOFS);
120 error = -ENOMEM; 120 error = -ENOMEM;
121 if (!er.er_data) 121 if (!er.er_data)
122 goto out; 122 goto out;
@@ -222,7 +222,7 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip)
222 return error; 222 return error;
223 } 223 }
224 224
225 clone = posix_acl_clone(acl, GFP_KERNEL); 225 clone = posix_acl_clone(acl, GFP_NOFS);
226 error = -ENOMEM; 226 error = -ENOMEM;
227 if (!clone) 227 if (!clone)
228 goto out; 228 goto out;
@@ -272,7 +272,7 @@ int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr)
272 if (!acl) 272 if (!acl)
273 return gfs2_setattr_simple(ip, attr); 273 return gfs2_setattr_simple(ip, attr);
274 274
275 clone = posix_acl_clone(acl, GFP_KERNEL); 275 clone = posix_acl_clone(acl, GFP_NOFS);
276 error = -ENOMEM; 276 error = -ENOMEM;
277 if (!clone) 277 if (!clone)
278 goto out; 278 goto out;
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index e9456ebd3bb6..c19184f2e70e 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -33,6 +33,7 @@
33 * keep it small. 33 * keep it small.
34 */ 34 */
35struct metapath { 35struct metapath {
36 struct buffer_head *mp_bh[GFS2_MAX_META_HEIGHT];
36 __u16 mp_list[GFS2_MAX_META_HEIGHT]; 37 __u16 mp_list[GFS2_MAX_META_HEIGHT];
37}; 38};
38 39
@@ -135,9 +136,10 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page)
135 /* Get a free block, fill it with the stuffed data, 136 /* Get a free block, fill it with the stuffed data,
136 and write it out to disk */ 137 and write it out to disk */
137 138
139 unsigned int n = 1;
140 block = gfs2_alloc_block(ip, &n);
138 if (isdir) { 141 if (isdir) {
139 block = gfs2_alloc_meta(ip); 142 gfs2_trans_add_unrevoke(GFS2_SB(&ip->i_inode), block, 1);
140
141 error = gfs2_dir_get_new_buffer(ip, block, &bh); 143 error = gfs2_dir_get_new_buffer(ip, block, &bh);
142 if (error) 144 if (error)
143 goto out_brelse; 145 goto out_brelse;
@@ -145,8 +147,6 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page)
145 dibh, sizeof(struct gfs2_dinode)); 147 dibh, sizeof(struct gfs2_dinode));
146 brelse(bh); 148 brelse(bh);
147 } else { 149 } else {
148 block = gfs2_alloc_data(ip);
149
150 error = gfs2_unstuffer_page(ip, dibh, block, page); 150 error = gfs2_unstuffer_page(ip, dibh, block, page);
151 if (error) 151 if (error)
152 goto out_brelse; 152 goto out_brelse;
@@ -161,12 +161,11 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page)
161 161
162 if (ip->i_di.di_size) { 162 if (ip->i_di.di_size) {
163 *(__be64 *)(di + 1) = cpu_to_be64(block); 163 *(__be64 *)(di + 1) = cpu_to_be64(block);
164 ip->i_di.di_blocks++; 164 gfs2_add_inode_blocks(&ip->i_inode, 1);
165 gfs2_set_inode_blocks(&ip->i_inode); 165 di->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
166 di->di_blocks = cpu_to_be64(ip->i_di.di_blocks);
167 } 166 }
168 167
169 ip->i_di.di_height = 1; 168 ip->i_height = 1;
170 di->di_height = cpu_to_be16(1); 169 di->di_height = cpu_to_be16(1);
171 170
172out_brelse: 171out_brelse:
@@ -176,114 +175,13 @@ out:
176 return error; 175 return error;
177} 176}
178 177
179/**
180 * calc_tree_height - Calculate the height of a metadata tree
181 * @ip: The GFS2 inode
182 * @size: The proposed size of the file
183 *
184 * Work out how tall a metadata tree needs to be in order to accommodate a
185 * file of a particular size. If size is less than the current size of
186 * the inode, then the current size of the inode is used instead of the
187 * supplied one.
188 *
189 * Returns: the height the tree should be
190 */
191
192static unsigned int calc_tree_height(struct gfs2_inode *ip, u64 size)
193{
194 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
195 u64 *arr;
196 unsigned int max, height;
197
198 if (ip->i_di.di_size > size)
199 size = ip->i_di.di_size;
200
201 if (gfs2_is_dir(ip)) {
202 arr = sdp->sd_jheightsize;
203 max = sdp->sd_max_jheight;
204 } else {
205 arr = sdp->sd_heightsize;
206 max = sdp->sd_max_height;
207 }
208
209 for (height = 0; height < max; height++)
210 if (arr[height] >= size)
211 break;
212
213 return height;
214}
215
216/**
217 * build_height - Build a metadata tree of the requested height
218 * @ip: The GFS2 inode
219 * @height: The height to build to
220 *
221 *
222 * Returns: errno
223 */
224
225static int build_height(struct inode *inode, unsigned height)
226{
227 struct gfs2_inode *ip = GFS2_I(inode);
228 unsigned new_height = height - ip->i_di.di_height;
229 struct buffer_head *dibh;
230 struct buffer_head *blocks[GFS2_MAX_META_HEIGHT];
231 struct gfs2_dinode *di;
232 int error;
233 __be64 *bp;
234 u64 bn;
235 unsigned n;
236
237 if (height <= ip->i_di.di_height)
238 return 0;
239
240 error = gfs2_meta_inode_buffer(ip, &dibh);
241 if (error)
242 return error;
243
244 for(n = 0; n < new_height; n++) {
245 bn = gfs2_alloc_meta(ip);
246 blocks[n] = gfs2_meta_new(ip->i_gl, bn);
247 gfs2_trans_add_bh(ip->i_gl, blocks[n], 1);
248 }
249
250 n = 0;
251 bn = blocks[0]->b_blocknr;
252 if (new_height > 1) {
253 for(; n < new_height-1; n++) {
254 gfs2_metatype_set(blocks[n], GFS2_METATYPE_IN,
255 GFS2_FORMAT_IN);
256 gfs2_buffer_clear_tail(blocks[n],
257 sizeof(struct gfs2_meta_header));
258 bp = (__be64 *)(blocks[n]->b_data +
259 sizeof(struct gfs2_meta_header));
260 *bp = cpu_to_be64(blocks[n+1]->b_blocknr);
261 brelse(blocks[n]);
262 blocks[n] = NULL;
263 }
264 }
265 gfs2_metatype_set(blocks[n], GFS2_METATYPE_IN, GFS2_FORMAT_IN);
266 gfs2_buffer_copy_tail(blocks[n], sizeof(struct gfs2_meta_header),
267 dibh, sizeof(struct gfs2_dinode));
268 brelse(blocks[n]);
269 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
270 di = (struct gfs2_dinode *)dibh->b_data;
271 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
272 *(__be64 *)(di + 1) = cpu_to_be64(bn);
273 ip->i_di.di_height += new_height;
274 ip->i_di.di_blocks += new_height;
275 gfs2_set_inode_blocks(&ip->i_inode);
276 di->di_height = cpu_to_be16(ip->i_di.di_height);
277 di->di_blocks = cpu_to_be64(ip->i_di.di_blocks);
278 brelse(dibh);
279 return error;
280}
281 178
282/** 179/**
283 * find_metapath - Find path through the metadata tree 180 * find_metapath - Find path through the metadata tree
284 * @ip: The inode pointer 181 * @sdp: The superblock
285 * @mp: The metapath to return the result in 182 * @mp: The metapath to return the result in
286 * @block: The disk block to look up 183 * @block: The disk block to look up
184 * @height: The pre-calculated height of the metadata tree
287 * 185 *
288 * This routine returns a struct metapath structure that defines a path 186 * This routine returns a struct metapath structure that defines a path
289 * through the metadata of inode "ip" to get to block "block". 187 * through the metadata of inode "ip" to get to block "block".
@@ -338,21 +236,29 @@ static int build_height(struct inode *inode, unsigned height)
338 * 236 *
339 */ 237 */
340 238
341static void find_metapath(struct gfs2_inode *ip, u64 block, 239static void find_metapath(const struct gfs2_sbd *sdp, u64 block,
342 struct metapath *mp) 240 struct metapath *mp, unsigned int height)
343{ 241{
344 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
345 u64 b = block;
346 unsigned int i; 242 unsigned int i;
347 243
348 for (i = ip->i_di.di_height; i--;) 244 for (i = height; i--;)
349 mp->mp_list[i] = do_div(b, sdp->sd_inptrs); 245 mp->mp_list[i] = do_div(block, sdp->sd_inptrs);
246
247}
350 248
249static inline unsigned int zero_metapath_length(const struct metapath *mp,
250 unsigned height)
251{
252 unsigned int i;
253 for (i = 0; i < height - 1; i++) {
254 if (mp->mp_list[i] != 0)
255 return i;
256 }
257 return height;
351} 258}
352 259
353/** 260/**
354 * metapointer - Return pointer to start of metadata in a buffer 261 * metapointer - Return pointer to start of metadata in a buffer
355 * @bh: The buffer
356 * @height: The metadata height (0 = dinode) 262 * @height: The metadata height (0 = dinode)
357 * @mp: The metapath 263 * @mp: The metapath
358 * 264 *
@@ -361,93 +267,302 @@ static void find_metapath(struct gfs2_inode *ip, u64 block,
361 * metadata tree. 267 * metadata tree.
362 */ 268 */
363 269
364static inline __be64 *metapointer(struct buffer_head *bh, int *boundary, 270static inline __be64 *metapointer(unsigned int height, const struct metapath *mp)
365 unsigned int height, const struct metapath *mp)
366{ 271{
272 struct buffer_head *bh = mp->mp_bh[height];
367 unsigned int head_size = (height > 0) ? 273 unsigned int head_size = (height > 0) ?
368 sizeof(struct gfs2_meta_header) : sizeof(struct gfs2_dinode); 274 sizeof(struct gfs2_meta_header) : sizeof(struct gfs2_dinode);
369 __be64 *ptr; 275 return ((__be64 *)(bh->b_data + head_size)) + mp->mp_list[height];
370 *boundary = 0;
371 ptr = ((__be64 *)(bh->b_data + head_size)) + mp->mp_list[height];
372 if (ptr + 1 == (__be64 *)(bh->b_data + bh->b_size))
373 *boundary = 1;
374 return ptr;
375} 276}
376 277
377/** 278/**
378 * lookup_block - Get the next metadata block in metadata tree 279 * lookup_metapath - Walk the metadata tree to a specific point
379 * @ip: The GFS2 inode 280 * @ip: The inode
380 * @bh: Buffer containing the pointers to metadata blocks
381 * @height: The height of the tree (0 = dinode)
382 * @mp: The metapath 281 * @mp: The metapath
383 * @create: Non-zero if we may create a new meatdata block
384 * @new: Used to indicate if we did create a new metadata block
385 * @block: the returned disk block number
386 * 282 *
387 * Given a metatree, complete to a particular height, checks to see if the next 283 * Assumes that the inode's buffer has already been looked up and
388 * height of the tree exists. If not the next height of the tree is created. 284 * hooked onto mp->mp_bh[0] and that the metapath has been initialised
389 * The block number of the next height of the metadata tree is returned. 285 * by find_metapath().
286 *
287 * If this function encounters part of the tree which has not been
288 * allocated, it returns the current height of the tree at the point
289 * at which it found the unallocated block. Blocks which are found are
290 * added to the mp->mp_bh[] list.
390 * 291 *
292 * Returns: error or height of metadata tree
391 */ 293 */
392 294
393static int lookup_block(struct gfs2_inode *ip, struct buffer_head *bh, 295static int lookup_metapath(struct gfs2_inode *ip, struct metapath *mp)
394 unsigned int height, struct metapath *mp, int create,
395 int *new, u64 *block)
396{ 296{
397 int boundary; 297 unsigned int end_of_metadata = ip->i_height - 1;
398 __be64 *ptr = metapointer(bh, &boundary, height, mp); 298 unsigned int x;
299 __be64 *ptr;
300 u64 dblock;
301 int ret;
399 302
400 if (*ptr) { 303 for (x = 0; x < end_of_metadata; x++) {
401 *block = be64_to_cpu(*ptr); 304 ptr = metapointer(x, mp);
402 return boundary; 305 dblock = be64_to_cpu(*ptr);
403 } 306 if (!dblock)
307 return x + 1;
404 308
405 *block = 0; 309 ret = gfs2_meta_indirect_buffer(ip, x+1, dblock, 0, &mp->mp_bh[x+1]);
310 if (ret)
311 return ret;
312 }
406 313
407 if (!create) 314 return ip->i_height;
408 return 0; 315}
409 316
410 if (height == ip->i_di.di_height - 1 && !gfs2_is_dir(ip)) 317static inline void release_metapath(struct metapath *mp)
411 *block = gfs2_alloc_data(ip); 318{
412 else 319 int i;
413 *block = gfs2_alloc_meta(ip);
414 320
415 gfs2_trans_add_bh(ip->i_gl, bh, 1); 321 for (i = 0; i < GFS2_MAX_META_HEIGHT; i++) {
322 if (mp->mp_bh[i] == NULL)
323 break;
324 brelse(mp->mp_bh[i]);
325 }
326}
416 327
417 *ptr = cpu_to_be64(*block); 328/**
418 ip->i_di.di_blocks++; 329 * gfs2_extent_length - Returns length of an extent of blocks
419 gfs2_set_inode_blocks(&ip->i_inode); 330 * @start: Start of the buffer
331 * @len: Length of the buffer in bytes
332 * @ptr: Current position in the buffer
333 * @limit: Max extent length to return (0 = unlimited)
334 * @eob: Set to 1 if we hit "end of block"
335 *
336 * If the first block is zero (unallocated) it will return the number of
337 * unallocated blocks in the extent, otherwise it will return the number
338 * of contiguous blocks in the extent.
339 *
340 * Returns: The length of the extent (minimum of one block)
341 */
420 342
421 *new = 1; 343static inline unsigned int gfs2_extent_length(void *start, unsigned int len, __be64 *ptr, unsigned limit, int *eob)
422 return 0; 344{
345 const __be64 *end = (start + len);
346 const __be64 *first = ptr;
347 u64 d = be64_to_cpu(*ptr);
348
349 *eob = 0;
350 do {
351 ptr++;
352 if (ptr >= end)
353 break;
354 if (limit && --limit == 0)
355 break;
356 if (d)
357 d++;
358 } while(be64_to_cpu(*ptr) == d);
359 if (ptr >= end)
360 *eob = 1;
361 return (ptr - first);
423} 362}
424 363
425static inline void bmap_lock(struct inode *inode, int create) 364static inline void bmap_lock(struct gfs2_inode *ip, int create)
426{ 365{
427 struct gfs2_inode *ip = GFS2_I(inode);
428 if (create) 366 if (create)
429 down_write(&ip->i_rw_mutex); 367 down_write(&ip->i_rw_mutex);
430 else 368 else
431 down_read(&ip->i_rw_mutex); 369 down_read(&ip->i_rw_mutex);
432} 370}
433 371
434static inline void bmap_unlock(struct inode *inode, int create) 372static inline void bmap_unlock(struct gfs2_inode *ip, int create)
435{ 373{
436 struct gfs2_inode *ip = GFS2_I(inode);
437 if (create) 374 if (create)
438 up_write(&ip->i_rw_mutex); 375 up_write(&ip->i_rw_mutex);
439 else 376 else
440 up_read(&ip->i_rw_mutex); 377 up_read(&ip->i_rw_mutex);
441} 378}
442 379
380static inline __be64 *gfs2_indirect_init(struct metapath *mp,
381 struct gfs2_glock *gl, unsigned int i,
382 unsigned offset, u64 bn)
383{
384 __be64 *ptr = (__be64 *)(mp->mp_bh[i - 1]->b_data +
385 ((i > 1) ? sizeof(struct gfs2_meta_header) :
386 sizeof(struct gfs2_dinode)));
387 BUG_ON(i < 1);
388 BUG_ON(mp->mp_bh[i] != NULL);
389 mp->mp_bh[i] = gfs2_meta_new(gl, bn);
390 gfs2_trans_add_bh(gl, mp->mp_bh[i], 1);
391 gfs2_metatype_set(mp->mp_bh[i], GFS2_METATYPE_IN, GFS2_FORMAT_IN);
392 gfs2_buffer_clear_tail(mp->mp_bh[i], sizeof(struct gfs2_meta_header));
393 ptr += offset;
394 *ptr = cpu_to_be64(bn);
395 return ptr;
396}
397
398enum alloc_state {
399 ALLOC_DATA = 0,
400 ALLOC_GROW_DEPTH = 1,
401 ALLOC_GROW_HEIGHT = 2,
402 /* ALLOC_UNSTUFF = 3, TBD and rather complicated */
403};
404
405/**
406 * gfs2_bmap_alloc - Build a metadata tree of the requested height
407 * @inode: The GFS2 inode
408 * @lblock: The logical starting block of the extent
409 * @bh_map: This is used to return the mapping details
410 * @mp: The metapath
411 * @sheight: The starting height (i.e. whats already mapped)
412 * @height: The height to build to
413 * @maxlen: The max number of data blocks to alloc
414 *
415 * In this routine we may have to alloc:
416 * i) Indirect blocks to grow the metadata tree height
417 * ii) Indirect blocks to fill in lower part of the metadata tree
418 * iii) Data blocks
419 *
420 * The function is in two parts. The first part works out the total
421 * number of blocks which we need. The second part does the actual
422 * allocation asking for an extent at a time (if enough contiguous free
423 * blocks are available, there will only be one request per bmap call)
424 * and uses the state machine to initialise the blocks in order.
425 *
426 * Returns: errno on error
427 */
428
429static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
430 struct buffer_head *bh_map, struct metapath *mp,
431 const unsigned int sheight,
432 const unsigned int height,
433 const unsigned int maxlen)
434{
435 struct gfs2_inode *ip = GFS2_I(inode);
436 struct gfs2_sbd *sdp = GFS2_SB(inode);
437 struct buffer_head *dibh = mp->mp_bh[0];
438 u64 bn, dblock = 0;
439 unsigned n, i, blks, alloced = 0, iblks = 0, zmpl = 0;
440 unsigned dblks = 0;
441 unsigned ptrs_per_blk;
442 const unsigned end_of_metadata = height - 1;
443 int eob = 0;
444 enum alloc_state state;
445 __be64 *ptr;
446 __be64 zero_bn = 0;
447
448 BUG_ON(sheight < 1);
449 BUG_ON(dibh == NULL);
450
451 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
452
453 if (height == sheight) {
454 struct buffer_head *bh;
455 /* Bottom indirect block exists, find unalloced extent size */
456 ptr = metapointer(end_of_metadata, mp);
457 bh = mp->mp_bh[end_of_metadata];
458 dblks = gfs2_extent_length(bh->b_data, bh->b_size, ptr, maxlen,
459 &eob);
460 BUG_ON(dblks < 1);
461 state = ALLOC_DATA;
462 } else {
463 /* Need to allocate indirect blocks */
464 ptrs_per_blk = height > 1 ? sdp->sd_inptrs : sdp->sd_diptrs;
465 dblks = min(maxlen, ptrs_per_blk - mp->mp_list[end_of_metadata]);
466 if (height == ip->i_height) {
467 /* Writing into existing tree, extend tree down */
468 iblks = height - sheight;
469 state = ALLOC_GROW_DEPTH;
470 } else {
471 /* Building up tree height */
472 state = ALLOC_GROW_HEIGHT;
473 iblks = height - ip->i_height;
474 zmpl = zero_metapath_length(mp, height);
475 iblks -= zmpl;
476 iblks += height;
477 }
478 }
479
480 /* start of the second part of the function (state machine) */
481
482 blks = dblks + iblks;
483 i = sheight;
484 do {
485 n = blks - alloced;
486 bn = gfs2_alloc_block(ip, &n);
487 alloced += n;
488 if (state != ALLOC_DATA || gfs2_is_jdata(ip))
489 gfs2_trans_add_unrevoke(sdp, bn, n);
490 switch (state) {
491 /* Growing height of tree */
492 case ALLOC_GROW_HEIGHT:
493 if (i == 1) {
494 ptr = (__be64 *)(dibh->b_data +
495 sizeof(struct gfs2_dinode));
496 zero_bn = *ptr;
497 }
498 for (; i - 1 < height - ip->i_height && n > 0; i++, n--)
499 gfs2_indirect_init(mp, ip->i_gl, i, 0, bn++);
500 if (i - 1 == height - ip->i_height) {
501 i--;
502 gfs2_buffer_copy_tail(mp->mp_bh[i],
503 sizeof(struct gfs2_meta_header),
504 dibh, sizeof(struct gfs2_dinode));
505 gfs2_buffer_clear_tail(dibh,
506 sizeof(struct gfs2_dinode) +
507 sizeof(__be64));
508 ptr = (__be64 *)(mp->mp_bh[i]->b_data +
509 sizeof(struct gfs2_meta_header));
510 *ptr = zero_bn;
511 state = ALLOC_GROW_DEPTH;
512 for(i = zmpl; i < height; i++) {
513 if (mp->mp_bh[i] == NULL)
514 break;
515 brelse(mp->mp_bh[i]);
516 mp->mp_bh[i] = NULL;
517 }
518 i = zmpl;
519 }
520 if (n == 0)
521 break;
522 /* Branching from existing tree */
523 case ALLOC_GROW_DEPTH:
524 if (i > 1 && i < height)
525 gfs2_trans_add_bh(ip->i_gl, mp->mp_bh[i-1], 1);
526 for (; i < height && n > 0; i++, n--)
527 gfs2_indirect_init(mp, ip->i_gl, i,
528 mp->mp_list[i-1], bn++);
529 if (i == height)
530 state = ALLOC_DATA;
531 if (n == 0)
532 break;
533 /* Tree complete, adding data blocks */
534 case ALLOC_DATA:
535 BUG_ON(n > dblks);
536 BUG_ON(mp->mp_bh[end_of_metadata] == NULL);
537 gfs2_trans_add_bh(ip->i_gl, mp->mp_bh[end_of_metadata], 1);
538 dblks = n;
539 ptr = metapointer(end_of_metadata, mp);
540 dblock = bn;
541 while (n-- > 0)
542 *ptr++ = cpu_to_be64(bn++);
543 break;
544 }
545 } while (state != ALLOC_DATA);
546
547 ip->i_height = height;
548 gfs2_add_inode_blocks(&ip->i_inode, alloced);
549 gfs2_dinode_out(ip, mp->mp_bh[0]->b_data);
550 map_bh(bh_map, inode->i_sb, dblock);
551 bh_map->b_size = dblks << inode->i_blkbits;
552 set_buffer_new(bh_map);
553 return 0;
554}
555
443/** 556/**
444 * gfs2_block_map - Map a block from an inode to a disk block 557 * gfs2_block_map - Map a block from an inode to a disk block
445 * @inode: The inode 558 * @inode: The inode
446 * @lblock: The logical block number 559 * @lblock: The logical block number
447 * @bh_map: The bh to be mapped 560 * @bh_map: The bh to be mapped
561 * @create: True if its ok to alloc blocks to satify the request
448 * 562 *
449 * Find the block number on the current device which corresponds to an 563 * Sets buffer_mapped() if successful, sets buffer_boundary() if a
450 * inode's block. If the block had to be created, "new" will be set. 564 * read of metadata will be required before the next block can be
565 * mapped. Sets buffer_new() if new blocks were allocated.
451 * 566 *
452 * Returns: errno 567 * Returns: errno
453 */ 568 */
@@ -457,97 +572,78 @@ int gfs2_block_map(struct inode *inode, sector_t lblock,
457{ 572{
458 struct gfs2_inode *ip = GFS2_I(inode); 573 struct gfs2_inode *ip = GFS2_I(inode);
459 struct gfs2_sbd *sdp = GFS2_SB(inode); 574 struct gfs2_sbd *sdp = GFS2_SB(inode);
460 struct buffer_head *bh; 575 unsigned int bsize = sdp->sd_sb.sb_bsize;
461 unsigned int bsize; 576 const unsigned int maxlen = bh_map->b_size >> inode->i_blkbits;
462 unsigned int height; 577 const u64 *arr = sdp->sd_heightsize;
463 unsigned int end_of_metadata; 578 __be64 *ptr;
464 unsigned int x;
465 int error = 0;
466 int new = 0;
467 u64 dblock = 0;
468 int boundary;
469 unsigned int maxlen = bh_map->b_size >> inode->i_blkbits;
470 struct metapath mp;
471 u64 size; 579 u64 size;
472 struct buffer_head *dibh = NULL; 580 struct metapath mp;
581 int ret;
582 int eob;
583 unsigned int len;
584 struct buffer_head *bh;
585 u8 height;
473 586
474 BUG_ON(maxlen == 0); 587 BUG_ON(maxlen == 0);
475 588
476 if (gfs2_assert_warn(sdp, !gfs2_is_stuffed(ip))) 589 memset(mp.mp_bh, 0, sizeof(mp.mp_bh));
477 return 0; 590 bmap_lock(ip, create);
478
479 bmap_lock(inode, create);
480 clear_buffer_mapped(bh_map); 591 clear_buffer_mapped(bh_map);
481 clear_buffer_new(bh_map); 592 clear_buffer_new(bh_map);
482 clear_buffer_boundary(bh_map); 593 clear_buffer_boundary(bh_map);
483 bsize = gfs2_is_dir(ip) ? sdp->sd_jbsize : sdp->sd_sb.sb_bsize; 594 if (gfs2_is_dir(ip)) {
484 size = (lblock + 1) * bsize; 595 bsize = sdp->sd_jbsize;
485 596 arr = sdp->sd_jheightsize;
486 if (size > ip->i_di.di_size) {
487 height = calc_tree_height(ip, size);
488 if (ip->i_di.di_height < height) {
489 if (!create)
490 goto out_ok;
491
492 error = build_height(inode, height);
493 if (error)
494 goto out_fail;
495 }
496 } 597 }
497 598
498 find_metapath(ip, lblock, &mp); 599 ret = gfs2_meta_inode_buffer(ip, &mp.mp_bh[0]);
499 end_of_metadata = ip->i_di.di_height - 1; 600 if (ret)
500 error = gfs2_meta_inode_buffer(ip, &bh); 601 goto out;
501 if (error)
502 goto out_fail;
503 dibh = bh;
504 get_bh(dibh);
505 602
506 for (x = 0; x < end_of_metadata; x++) { 603 height = ip->i_height;
507 lookup_block(ip, bh, x, &mp, create, &new, &dblock); 604 size = (lblock + 1) * bsize;
508 brelse(bh); 605 while (size > arr[height])
509 if (!dblock) 606 height++;
510 goto out_ok; 607 find_metapath(sdp, lblock, &mp, height);
608 ret = 1;
609 if (height > ip->i_height || gfs2_is_stuffed(ip))
610 goto do_alloc;
611 ret = lookup_metapath(ip, &mp);
612 if (ret < 0)
613 goto out;
614 if (ret != ip->i_height)
615 goto do_alloc;
616 ptr = metapointer(ip->i_height - 1, &mp);
617 if (*ptr == 0)
618 goto do_alloc;
619 map_bh(bh_map, inode->i_sb, be64_to_cpu(*ptr));
620 bh = mp.mp_bh[ip->i_height - 1];
621 len = gfs2_extent_length(bh->b_data, bh->b_size, ptr, maxlen, &eob);
622 bh_map->b_size = (len << inode->i_blkbits);
623 if (eob)
624 set_buffer_boundary(bh_map);
625 ret = 0;
626out:
627 release_metapath(&mp);
628 bmap_unlock(ip, create);
629 return ret;
511 630
512 error = gfs2_meta_indirect_buffer(ip, x+1, dblock, new, &bh); 631do_alloc:
513 if (error) 632 /* All allocations are done here, firstly check create flag */
514 goto out_fail; 633 if (!create) {
634 BUG_ON(gfs2_is_stuffed(ip));
635 ret = 0;
636 goto out;
515 } 637 }
516 638
517 boundary = lookup_block(ip, bh, end_of_metadata, &mp, create, &new, &dblock); 639 /* At this point ret is the tree depth of already allocated blocks */
518 if (dblock) { 640 ret = gfs2_bmap_alloc(inode, lblock, bh_map, &mp, ret, height, maxlen);
519 map_bh(bh_map, inode->i_sb, dblock); 641 goto out;
520 if (boundary)
521 set_buffer_boundary(bh_map);
522 if (new) {
523 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
524 gfs2_dinode_out(ip, dibh->b_data);
525 set_buffer_new(bh_map);
526 goto out_brelse;
527 }
528 while(--maxlen && !buffer_boundary(bh_map)) {
529 u64 eblock;
530
531 mp.mp_list[end_of_metadata]++;
532 boundary = lookup_block(ip, bh, end_of_metadata, &mp, 0, &new, &eblock);
533 if (eblock != ++dblock)
534 break;
535 bh_map->b_size += (1 << inode->i_blkbits);
536 if (boundary)
537 set_buffer_boundary(bh_map);
538 }
539 }
540out_brelse:
541 brelse(bh);
542out_ok:
543 error = 0;
544out_fail:
545 if (dibh)
546 brelse(dibh);
547 bmap_unlock(inode, create);
548 return error;
549} 642}
550 643
644/*
645 * Deprecated: do not use in new code
646 */
551int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen) 647int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen)
552{ 648{
553 struct buffer_head bh = { .b_state = 0, .b_blocknr = 0 }; 649 struct buffer_head bh = { .b_state = 0, .b_blocknr = 0 };
@@ -558,7 +654,7 @@ int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsi
558 BUG_ON(!dblock); 654 BUG_ON(!dblock);
559 BUG_ON(!new); 655 BUG_ON(!new);
560 656
561 bh.b_size = 1 << (inode->i_blkbits + 5); 657 bh.b_size = 1 << (inode->i_blkbits + (create ? 0 : 5));
562 ret = gfs2_block_map(inode, lblock, &bh, create); 658 ret = gfs2_block_map(inode, lblock, &bh, create);
563 *extlen = bh.b_size >> inode->i_blkbits; 659 *extlen = bh.b_size >> inode->i_blkbits;
564 *dblock = bh.b_blocknr; 660 *dblock = bh.b_blocknr;
@@ -621,7 +717,7 @@ static int recursive_scan(struct gfs2_inode *ip, struct buffer_head *dibh,
621 if (error) 717 if (error)
622 goto out; 718 goto out;
623 719
624 if (height < ip->i_di.di_height - 1) 720 if (height < ip->i_height - 1)
625 for (; top < bottom; top++, first = 0) { 721 for (; top < bottom; top++, first = 0) {
626 if (!*top) 722 if (!*top)
627 continue; 723 continue;
@@ -679,7 +775,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
679 sm->sm_first = 0; 775 sm->sm_first = 0;
680 } 776 }
681 777
682 metadata = (height != ip->i_di.di_height - 1); 778 metadata = (height != ip->i_height - 1);
683 if (metadata) 779 if (metadata)
684 revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs; 780 revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs;
685 781
@@ -713,7 +809,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
713 else 809 else
714 goto out; /* Nothing to do */ 810 goto out; /* Nothing to do */
715 811
716 gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE, 0); 812 gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE);
717 813
718 for (x = 0; x < rlist.rl_rgrps; x++) { 814 for (x = 0; x < rlist.rl_rgrps; x++) {
719 struct gfs2_rgrpd *rgd; 815 struct gfs2_rgrpd *rgd;
@@ -760,10 +856,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
760 } 856 }
761 857
762 *p = 0; 858 *p = 0;
763 if (!ip->i_di.di_blocks) 859 gfs2_add_inode_blocks(&ip->i_inode, -1);
764 gfs2_consist_inode(ip);
765 ip->i_di.di_blocks--;
766 gfs2_set_inode_blocks(&ip->i_inode);
767 } 860 }
768 if (bstart) { 861 if (bstart) {
769 if (metadata) 862 if (metadata)
@@ -804,19 +897,16 @@ static int do_grow(struct gfs2_inode *ip, u64 size)
804 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 897 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
805 struct gfs2_alloc *al; 898 struct gfs2_alloc *al;
806 struct buffer_head *dibh; 899 struct buffer_head *dibh;
807 unsigned int h;
808 int error; 900 int error;
809 901
810 al = gfs2_alloc_get(ip); 902 al = gfs2_alloc_get(ip);
903 if (!al)
904 return -ENOMEM;
811 905
812 error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 906 error = gfs2_quota_lock_check(ip);
813 if (error) 907 if (error)
814 goto out; 908 goto out;
815 909
816 error = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid);
817 if (error)
818 goto out_gunlock_q;
819
820 al->al_requested = sdp->sd_max_height + RES_DATA; 910 al->al_requested = sdp->sd_max_height + RES_DATA;
821 911
822 error = gfs2_inplace_reserve(ip); 912 error = gfs2_inplace_reserve(ip);
@@ -829,34 +919,25 @@ static int do_grow(struct gfs2_inode *ip, u64 size)
829 if (error) 919 if (error)
830 goto out_ipres; 920 goto out_ipres;
831 921
922 error = gfs2_meta_inode_buffer(ip, &dibh);
923 if (error)
924 goto out_end_trans;
925
832 if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) { 926 if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) {
833 if (gfs2_is_stuffed(ip)) { 927 if (gfs2_is_stuffed(ip)) {
834 error = gfs2_unstuff_dinode(ip, NULL); 928 error = gfs2_unstuff_dinode(ip, NULL);
835 if (error) 929 if (error)
836 goto out_end_trans; 930 goto out_brelse;
837 }
838
839 h = calc_tree_height(ip, size);
840 if (ip->i_di.di_height < h) {
841 down_write(&ip->i_rw_mutex);
842 error = build_height(&ip->i_inode, h);
843 up_write(&ip->i_rw_mutex);
844 if (error)
845 goto out_end_trans;
846 } 931 }
847 } 932 }
848 933
849 ip->i_di.di_size = size; 934 ip->i_di.di_size = size;
850 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; 935 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
851
852 error = gfs2_meta_inode_buffer(ip, &dibh);
853 if (error)
854 goto out_end_trans;
855
856 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 936 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
857 gfs2_dinode_out(ip, dibh->b_data); 937 gfs2_dinode_out(ip, dibh->b_data);
858 brelse(dibh);
859 938
939out_brelse:
940 brelse(dibh);
860out_end_trans: 941out_end_trans:
861 gfs2_trans_end(sdp); 942 gfs2_trans_end(sdp);
862out_ipres: 943out_ipres:
@@ -986,7 +1067,8 @@ out:
986 1067
987static int trunc_dealloc(struct gfs2_inode *ip, u64 size) 1068static int trunc_dealloc(struct gfs2_inode *ip, u64 size)
988{ 1069{
989 unsigned int height = ip->i_di.di_height; 1070 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1071 unsigned int height = ip->i_height;
990 u64 lblock; 1072 u64 lblock;
991 struct metapath mp; 1073 struct metapath mp;
992 int error; 1074 int error;
@@ -994,10 +1076,11 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 size)
994 if (!size) 1076 if (!size)
995 lblock = 0; 1077 lblock = 0;
996 else 1078 else
997 lblock = (size - 1) >> GFS2_SB(&ip->i_inode)->sd_sb.sb_bsize_shift; 1079 lblock = (size - 1) >> sdp->sd_sb.sb_bsize_shift;
998 1080
999 find_metapath(ip, lblock, &mp); 1081 find_metapath(sdp, lblock, &mp, ip->i_height);
1000 gfs2_alloc_get(ip); 1082 if (!gfs2_alloc_get(ip))
1083 return -ENOMEM;
1001 1084
1002 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 1085 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
1003 if (error) 1086 if (error)
@@ -1037,10 +1120,8 @@ static int trunc_end(struct gfs2_inode *ip)
1037 goto out; 1120 goto out;
1038 1121
1039 if (!ip->i_di.di_size) { 1122 if (!ip->i_di.di_size) {
1040 ip->i_di.di_height = 0; 1123 ip->i_height = 0;
1041 ip->i_di.di_goal_meta = 1124 ip->i_goal = ip->i_no_addr;
1042 ip->i_di.di_goal_data =
1043 ip->i_no_addr;
1044 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); 1125 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
1045 } 1126 }
1046 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; 1127 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
@@ -1197,10 +1278,9 @@ int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
1197 unsigned int len, int *alloc_required) 1278 unsigned int len, int *alloc_required)
1198{ 1279{
1199 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1280 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1200 u64 lblock, lblock_stop, dblock; 1281 struct buffer_head bh;
1201 u32 extlen; 1282 unsigned int shift;
1202 int new = 0; 1283 u64 lblock, lblock_stop, size;
1203 int error = 0;
1204 1284
1205 *alloc_required = 0; 1285 *alloc_required = 0;
1206 1286
@@ -1214,6 +1294,8 @@ int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
1214 return 0; 1294 return 0;
1215 } 1295 }
1216 1296
1297 *alloc_required = 1;
1298 shift = sdp->sd_sb.sb_bsize_shift;
1217 if (gfs2_is_dir(ip)) { 1299 if (gfs2_is_dir(ip)) {
1218 unsigned int bsize = sdp->sd_jbsize; 1300 unsigned int bsize = sdp->sd_jbsize;
1219 lblock = offset; 1301 lblock = offset;
@@ -1221,27 +1303,25 @@ int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
1221 lblock_stop = offset + len + bsize - 1; 1303 lblock_stop = offset + len + bsize - 1;
1222 do_div(lblock_stop, bsize); 1304 do_div(lblock_stop, bsize);
1223 } else { 1305 } else {
1224 unsigned int shift = sdp->sd_sb.sb_bsize_shift;
1225 u64 end_of_file = (ip->i_di.di_size + sdp->sd_sb.sb_bsize - 1) >> shift; 1306 u64 end_of_file = (ip->i_di.di_size + sdp->sd_sb.sb_bsize - 1) >> shift;
1226 lblock = offset >> shift; 1307 lblock = offset >> shift;
1227 lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift; 1308 lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift;
1228 if (lblock_stop > end_of_file) { 1309 if (lblock_stop > end_of_file)
1229 *alloc_required = 1;
1230 return 0; 1310 return 0;
1231 }
1232 } 1311 }
1233 1312
1234 for (; lblock < lblock_stop; lblock += extlen) { 1313 size = (lblock_stop - lblock) << shift;
1235 error = gfs2_extent_map(&ip->i_inode, lblock, &new, &dblock, &extlen); 1314 do {
1236 if (error) 1315 bh.b_state = 0;
1237 return error; 1316 bh.b_size = size;
1238 1317 gfs2_block_map(&ip->i_inode, lblock, &bh, 0);
1239 if (!dblock) { 1318 if (!buffer_mapped(&bh))
1240 *alloc_required = 1;
1241 return 0; 1319 return 0;
1242 } 1320 size -= bh.b_size;
1243 } 1321 lblock += (bh.b_size >> ip->i_inode.i_blkbits);
1322 } while(size > 0);
1244 1323
1324 *alloc_required = 0;
1245 return 0; 1325 return 0;
1246} 1326}
1247 1327
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index c34709512b19..eed040d8ba3a 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -159,6 +159,7 @@ static int gfs2_dir_write_data(struct gfs2_inode *ip, const char *buf,
159 unsigned int o; 159 unsigned int o;
160 int copied = 0; 160 int copied = 0;
161 int error = 0; 161 int error = 0;
162 int new = 0;
162 163
163 if (!size) 164 if (!size)
164 return 0; 165 return 0;
@@ -183,7 +184,6 @@ static int gfs2_dir_write_data(struct gfs2_inode *ip, const char *buf,
183 while (copied < size) { 184 while (copied < size) {
184 unsigned int amount; 185 unsigned int amount;
185 struct buffer_head *bh; 186 struct buffer_head *bh;
186 int new = 0;
187 187
188 amount = size - copied; 188 amount = size - copied;
189 if (amount > sdp->sd_sb.sb_bsize - o) 189 if (amount > sdp->sd_sb.sb_bsize - o)
@@ -757,7 +757,7 @@ static struct gfs2_dirent *gfs2_dirent_search(struct inode *inode,
757 757
758 if (ip->i_di.di_flags & GFS2_DIF_EXHASH) { 758 if (ip->i_di.di_flags & GFS2_DIF_EXHASH) {
759 struct gfs2_leaf *leaf; 759 struct gfs2_leaf *leaf;
760 unsigned hsize = 1 << ip->i_di.di_depth; 760 unsigned hsize = 1 << ip->i_depth;
761 unsigned index; 761 unsigned index;
762 u64 ln; 762 u64 ln;
763 if (hsize * sizeof(u64) != ip->i_di.di_size) { 763 if (hsize * sizeof(u64) != ip->i_di.di_size) {
@@ -765,7 +765,7 @@ static struct gfs2_dirent *gfs2_dirent_search(struct inode *inode,
765 return ERR_PTR(-EIO); 765 return ERR_PTR(-EIO);
766 } 766 }
767 767
768 index = name->hash >> (32 - ip->i_di.di_depth); 768 index = name->hash >> (32 - ip->i_depth);
769 error = get_first_leaf(ip, index, &bh); 769 error = get_first_leaf(ip, index, &bh);
770 if (error) 770 if (error)
771 return ERR_PTR(error); 771 return ERR_PTR(error);
@@ -803,14 +803,15 @@ got_dent:
803static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh, u16 depth) 803static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh, u16 depth)
804{ 804{
805 struct gfs2_inode *ip = GFS2_I(inode); 805 struct gfs2_inode *ip = GFS2_I(inode);
806 u64 bn = gfs2_alloc_meta(ip); 806 unsigned int n = 1;
807 u64 bn = gfs2_alloc_block(ip, &n);
807 struct buffer_head *bh = gfs2_meta_new(ip->i_gl, bn); 808 struct buffer_head *bh = gfs2_meta_new(ip->i_gl, bn);
808 struct gfs2_leaf *leaf; 809 struct gfs2_leaf *leaf;
809 struct gfs2_dirent *dent; 810 struct gfs2_dirent *dent;
810 struct qstr name = { .name = "", .len = 0, .hash = 0 }; 811 struct qstr name = { .name = "", .len = 0, .hash = 0 };
811 if (!bh) 812 if (!bh)
812 return NULL; 813 return NULL;
813 814 gfs2_trans_add_unrevoke(GFS2_SB(inode), bn, 1);
814 gfs2_trans_add_bh(ip->i_gl, bh, 1); 815 gfs2_trans_add_bh(ip->i_gl, bh, 1);
815 gfs2_metatype_set(bh, GFS2_METATYPE_LF, GFS2_FORMAT_LF); 816 gfs2_metatype_set(bh, GFS2_METATYPE_LF, GFS2_FORMAT_LF);
816 leaf = (struct gfs2_leaf *)bh->b_data; 817 leaf = (struct gfs2_leaf *)bh->b_data;
@@ -905,12 +906,11 @@ static int dir_make_exhash(struct inode *inode)
905 *lp = cpu_to_be64(bn); 906 *lp = cpu_to_be64(bn);
906 907
907 dip->i_di.di_size = sdp->sd_sb.sb_bsize / 2; 908 dip->i_di.di_size = sdp->sd_sb.sb_bsize / 2;
908 dip->i_di.di_blocks++; 909 gfs2_add_inode_blocks(&dip->i_inode, 1);
909 gfs2_set_inode_blocks(&dip->i_inode);
910 dip->i_di.di_flags |= GFS2_DIF_EXHASH; 910 dip->i_di.di_flags |= GFS2_DIF_EXHASH;
911 911
912 for (x = sdp->sd_hash_ptrs, y = -1; x; x >>= 1, y++) ; 912 for (x = sdp->sd_hash_ptrs, y = -1; x; x >>= 1, y++) ;
913 dip->i_di.di_depth = y; 913 dip->i_depth = y;
914 914
915 gfs2_dinode_out(dip, dibh->b_data); 915 gfs2_dinode_out(dip, dibh->b_data);
916 916
@@ -941,7 +941,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
941 int x, moved = 0; 941 int x, moved = 0;
942 int error; 942 int error;
943 943
944 index = name->hash >> (32 - dip->i_di.di_depth); 944 index = name->hash >> (32 - dip->i_depth);
945 error = get_leaf_nr(dip, index, &leaf_no); 945 error = get_leaf_nr(dip, index, &leaf_no);
946 if (error) 946 if (error)
947 return error; 947 return error;
@@ -952,7 +952,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
952 return error; 952 return error;
953 953
954 oleaf = (struct gfs2_leaf *)obh->b_data; 954 oleaf = (struct gfs2_leaf *)obh->b_data;
955 if (dip->i_di.di_depth == be16_to_cpu(oleaf->lf_depth)) { 955 if (dip->i_depth == be16_to_cpu(oleaf->lf_depth)) {
956 brelse(obh); 956 brelse(obh);
957 return 1; /* can't split */ 957 return 1; /* can't split */
958 } 958 }
@@ -967,10 +967,10 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
967 bn = nbh->b_blocknr; 967 bn = nbh->b_blocknr;
968 968
969 /* Compute the start and len of leaf pointers in the hash table. */ 969 /* Compute the start and len of leaf pointers in the hash table. */
970 len = 1 << (dip->i_di.di_depth - be16_to_cpu(oleaf->lf_depth)); 970 len = 1 << (dip->i_depth - be16_to_cpu(oleaf->lf_depth));
971 half_len = len >> 1; 971 half_len = len >> 1;
972 if (!half_len) { 972 if (!half_len) {
973 printk(KERN_WARNING "di_depth %u lf_depth %u index %u\n", dip->i_di.di_depth, be16_to_cpu(oleaf->lf_depth), index); 973 printk(KERN_WARNING "i_depth %u lf_depth %u index %u\n", dip->i_depth, be16_to_cpu(oleaf->lf_depth), index);
974 gfs2_consist_inode(dip); 974 gfs2_consist_inode(dip);
975 error = -EIO; 975 error = -EIO;
976 goto fail_brelse; 976 goto fail_brelse;
@@ -997,7 +997,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
997 kfree(lp); 997 kfree(lp);
998 998
999 /* Compute the divider */ 999 /* Compute the divider */
1000 divider = (start + half_len) << (32 - dip->i_di.di_depth); 1000 divider = (start + half_len) << (32 - dip->i_depth);
1001 1001
1002 /* Copy the entries */ 1002 /* Copy the entries */
1003 dirent_first(dip, obh, &dent); 1003 dirent_first(dip, obh, &dent);
@@ -1021,13 +1021,13 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
1021 1021
1022 new->de_inum = dent->de_inum; /* No endian worries */ 1022 new->de_inum = dent->de_inum; /* No endian worries */
1023 new->de_type = dent->de_type; /* No endian worries */ 1023 new->de_type = dent->de_type; /* No endian worries */
1024 nleaf->lf_entries = cpu_to_be16(be16_to_cpu(nleaf->lf_entries)+1); 1024 be16_add_cpu(&nleaf->lf_entries, 1);
1025 1025
1026 dirent_del(dip, obh, prev, dent); 1026 dirent_del(dip, obh, prev, dent);
1027 1027
1028 if (!oleaf->lf_entries) 1028 if (!oleaf->lf_entries)
1029 gfs2_consist_inode(dip); 1029 gfs2_consist_inode(dip);
1030 oleaf->lf_entries = cpu_to_be16(be16_to_cpu(oleaf->lf_entries)-1); 1030 be16_add_cpu(&oleaf->lf_entries, -1);
1031 1031
1032 if (!prev) 1032 if (!prev)
1033 prev = dent; 1033 prev = dent;
@@ -1044,8 +1044,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
1044 error = gfs2_meta_inode_buffer(dip, &dibh); 1044 error = gfs2_meta_inode_buffer(dip, &dibh);
1045 if (!gfs2_assert_withdraw(GFS2_SB(&dip->i_inode), !error)) { 1045 if (!gfs2_assert_withdraw(GFS2_SB(&dip->i_inode), !error)) {
1046 gfs2_trans_add_bh(dip->i_gl, dibh, 1); 1046 gfs2_trans_add_bh(dip->i_gl, dibh, 1);
1047 dip->i_di.di_blocks++; 1047 gfs2_add_inode_blocks(&dip->i_inode, 1);
1048 gfs2_set_inode_blocks(&dip->i_inode);
1049 gfs2_dinode_out(dip, dibh->b_data); 1048 gfs2_dinode_out(dip, dibh->b_data);
1050 brelse(dibh); 1049 brelse(dibh);
1051 } 1050 }
@@ -1082,7 +1081,7 @@ static int dir_double_exhash(struct gfs2_inode *dip)
1082 int x; 1081 int x;
1083 int error = 0; 1082 int error = 0;
1084 1083
1085 hsize = 1 << dip->i_di.di_depth; 1084 hsize = 1 << dip->i_depth;
1086 if (hsize * sizeof(u64) != dip->i_di.di_size) { 1085 if (hsize * sizeof(u64) != dip->i_di.di_size) {
1087 gfs2_consist_inode(dip); 1086 gfs2_consist_inode(dip);
1088 return -EIO; 1087 return -EIO;
@@ -1090,7 +1089,7 @@ static int dir_double_exhash(struct gfs2_inode *dip)
1090 1089
1091 /* Allocate both the "from" and "to" buffers in one big chunk */ 1090 /* Allocate both the "from" and "to" buffers in one big chunk */
1092 1091
1093 buf = kcalloc(3, sdp->sd_hash_bsize, GFP_KERNEL | __GFP_NOFAIL); 1092 buf = kcalloc(3, sdp->sd_hash_bsize, GFP_NOFS | __GFP_NOFAIL);
1094 1093
1095 for (block = dip->i_di.di_size >> sdp->sd_hash_bsize_shift; block--;) { 1094 for (block = dip->i_di.di_size >> sdp->sd_hash_bsize_shift; block--;) {
1096 error = gfs2_dir_read_data(dip, (char *)buf, 1095 error = gfs2_dir_read_data(dip, (char *)buf,
@@ -1125,7 +1124,7 @@ static int dir_double_exhash(struct gfs2_inode *dip)
1125 1124
1126 error = gfs2_meta_inode_buffer(dip, &dibh); 1125 error = gfs2_meta_inode_buffer(dip, &dibh);
1127 if (!gfs2_assert_withdraw(sdp, !error)) { 1126 if (!gfs2_assert_withdraw(sdp, !error)) {
1128 dip->i_di.di_depth++; 1127 dip->i_depth++;
1129 gfs2_dinode_out(dip, dibh->b_data); 1128 gfs2_dinode_out(dip, dibh->b_data);
1130 brelse(dibh); 1129 brelse(dibh);
1131 } 1130 }
@@ -1370,16 +1369,16 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque,
1370 int error = 0; 1369 int error = 0;
1371 unsigned depth = 0; 1370 unsigned depth = 0;
1372 1371
1373 hsize = 1 << dip->i_di.di_depth; 1372 hsize = 1 << dip->i_depth;
1374 if (hsize * sizeof(u64) != dip->i_di.di_size) { 1373 if (hsize * sizeof(u64) != dip->i_di.di_size) {
1375 gfs2_consist_inode(dip); 1374 gfs2_consist_inode(dip);
1376 return -EIO; 1375 return -EIO;
1377 } 1376 }
1378 1377
1379 hash = gfs2_dir_offset2hash(*offset); 1378 hash = gfs2_dir_offset2hash(*offset);
1380 index = hash >> (32 - dip->i_di.di_depth); 1379 index = hash >> (32 - dip->i_depth);
1381 1380
1382 lp = kmalloc(sdp->sd_hash_bsize, GFP_KERNEL); 1381 lp = kmalloc(sdp->sd_hash_bsize, GFP_NOFS);
1383 if (!lp) 1382 if (!lp)
1384 return -ENOMEM; 1383 return -ENOMEM;
1385 1384
@@ -1405,7 +1404,7 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque,
1405 if (error) 1404 if (error)
1406 break; 1405 break;
1407 1406
1408 len = 1 << (dip->i_di.di_depth - depth); 1407 len = 1 << (dip->i_depth - depth);
1409 index = (index & ~(len - 1)) + len; 1408 index = (index & ~(len - 1)) + len;
1410 } 1409 }
1411 1410
@@ -1444,7 +1443,7 @@ int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
1444 1443
1445 error = -ENOMEM; 1444 error = -ENOMEM;
1446 /* 96 is max number of dirents which can be stuffed into an inode */ 1445 /* 96 is max number of dirents which can be stuffed into an inode */
1447 darr = kmalloc(96 * sizeof(struct gfs2_dirent *), GFP_KERNEL); 1446 darr = kmalloc(96 * sizeof(struct gfs2_dirent *), GFP_NOFS);
1448 if (darr) { 1447 if (darr) {
1449 g.pdent = darr; 1448 g.pdent = darr;
1450 g.offset = 0; 1449 g.offset = 0;
@@ -1549,7 +1548,7 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name)
1549 u32 index; 1548 u32 index;
1550 u64 bn; 1549 u64 bn;
1551 1550
1552 index = name->hash >> (32 - ip->i_di.di_depth); 1551 index = name->hash >> (32 - ip->i_depth);
1553 error = get_first_leaf(ip, index, &obh); 1552 error = get_first_leaf(ip, index, &obh);
1554 if (error) 1553 if (error)
1555 return error; 1554 return error;
@@ -1579,8 +1578,7 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name)
1579 if (error) 1578 if (error)
1580 return error; 1579 return error;
1581 gfs2_trans_add_bh(ip->i_gl, bh, 1); 1580 gfs2_trans_add_bh(ip->i_gl, bh, 1);
1582 ip->i_di.di_blocks++; 1581 gfs2_add_inode_blocks(&ip->i_inode, 1);
1583 gfs2_set_inode_blocks(&ip->i_inode);
1584 gfs2_dinode_out(ip, bh->b_data); 1582 gfs2_dinode_out(ip, bh->b_data);
1585 brelse(bh); 1583 brelse(bh);
1586 return 0; 1584 return 0;
@@ -1616,7 +1614,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
1616 dent->de_type = cpu_to_be16(type); 1614 dent->de_type = cpu_to_be16(type);
1617 if (ip->i_di.di_flags & GFS2_DIF_EXHASH) { 1615 if (ip->i_di.di_flags & GFS2_DIF_EXHASH) {
1618 leaf = (struct gfs2_leaf *)bh->b_data; 1616 leaf = (struct gfs2_leaf *)bh->b_data;
1619 leaf->lf_entries = cpu_to_be16(be16_to_cpu(leaf->lf_entries) + 1); 1617 be16_add_cpu(&leaf->lf_entries, 1);
1620 } 1618 }
1621 brelse(bh); 1619 brelse(bh);
1622 error = gfs2_meta_inode_buffer(ip, &bh); 1620 error = gfs2_meta_inode_buffer(ip, &bh);
@@ -1641,7 +1639,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
1641 continue; 1639 continue;
1642 if (error < 0) 1640 if (error < 0)
1643 break; 1641 break;
1644 if (ip->i_di.di_depth < GFS2_DIR_MAX_DEPTH) { 1642 if (ip->i_depth < GFS2_DIR_MAX_DEPTH) {
1645 error = dir_double_exhash(ip); 1643 error = dir_double_exhash(ip);
1646 if (error) 1644 if (error)
1647 break; 1645 break;
@@ -1785,13 +1783,13 @@ static int foreach_leaf(struct gfs2_inode *dip, leaf_call_t lc, void *data)
1785 u64 leaf_no; 1783 u64 leaf_no;
1786 int error = 0; 1784 int error = 0;
1787 1785
1788 hsize = 1 << dip->i_di.di_depth; 1786 hsize = 1 << dip->i_depth;
1789 if (hsize * sizeof(u64) != dip->i_di.di_size) { 1787 if (hsize * sizeof(u64) != dip->i_di.di_size) {
1790 gfs2_consist_inode(dip); 1788 gfs2_consist_inode(dip);
1791 return -EIO; 1789 return -EIO;
1792 } 1790 }
1793 1791
1794 lp = kmalloc(sdp->sd_hash_bsize, GFP_KERNEL); 1792 lp = kmalloc(sdp->sd_hash_bsize, GFP_NOFS);
1795 if (!lp) 1793 if (!lp)
1796 return -ENOMEM; 1794 return -ENOMEM;
1797 1795
@@ -1817,7 +1815,7 @@ static int foreach_leaf(struct gfs2_inode *dip, leaf_call_t lc, void *data)
1817 if (error) 1815 if (error)
1818 goto out; 1816 goto out;
1819 leaf = (struct gfs2_leaf *)bh->b_data; 1817 leaf = (struct gfs2_leaf *)bh->b_data;
1820 len = 1 << (dip->i_di.di_depth - be16_to_cpu(leaf->lf_depth)); 1818 len = 1 << (dip->i_depth - be16_to_cpu(leaf->lf_depth));
1821 brelse(bh); 1819 brelse(bh);
1822 1820
1823 error = lc(dip, index, len, leaf_no, data); 1821 error = lc(dip, index, len, leaf_no, data);
@@ -1866,15 +1864,18 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
1866 1864
1867 memset(&rlist, 0, sizeof(struct gfs2_rgrp_list)); 1865 memset(&rlist, 0, sizeof(struct gfs2_rgrp_list));
1868 1866
1869 ht = kzalloc(size, GFP_KERNEL); 1867 ht = kzalloc(size, GFP_NOFS);
1870 if (!ht) 1868 if (!ht)
1871 return -ENOMEM; 1869 return -ENOMEM;
1872 1870
1873 gfs2_alloc_get(dip); 1871 if (!gfs2_alloc_get(dip)) {
1872 error = -ENOMEM;
1873 goto out;
1874 }
1874 1875
1875 error = gfs2_quota_hold(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 1876 error = gfs2_quota_hold(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
1876 if (error) 1877 if (error)
1877 goto out; 1878 goto out_put;
1878 1879
1879 error = gfs2_rindex_hold(sdp, &dip->i_alloc->al_ri_gh); 1880 error = gfs2_rindex_hold(sdp, &dip->i_alloc->al_ri_gh);
1880 if (error) 1881 if (error)
@@ -1894,7 +1895,7 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
1894 l_blocks++; 1895 l_blocks++;
1895 } 1896 }
1896 1897
1897 gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE, 0); 1898 gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE);
1898 1899
1899 for (x = 0; x < rlist.rl_rgrps; x++) { 1900 for (x = 0; x < rlist.rl_rgrps; x++) {
1900 struct gfs2_rgrpd *rgd; 1901 struct gfs2_rgrpd *rgd;
@@ -1921,11 +1922,7 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
1921 brelse(bh); 1922 brelse(bh);
1922 1923
1923 gfs2_free_meta(dip, blk, 1); 1924 gfs2_free_meta(dip, blk, 1);
1924 1925 gfs2_add_inode_blocks(&dip->i_inode, -1);
1925 if (!dip->i_di.di_blocks)
1926 gfs2_consist_inode(dip);
1927 dip->i_di.di_blocks--;
1928 gfs2_set_inode_blocks(&dip->i_inode);
1929 } 1926 }
1930 1927
1931 error = gfs2_dir_write_data(dip, ht, index * sizeof(u64), size); 1928 error = gfs2_dir_write_data(dip, ht, index * sizeof(u64), size);
@@ -1952,8 +1949,9 @@ out_rlist:
1952 gfs2_glock_dq_uninit(&dip->i_alloc->al_ri_gh); 1949 gfs2_glock_dq_uninit(&dip->i_alloc->al_ri_gh);
1953out_qs: 1950out_qs:
1954 gfs2_quota_unhold(dip); 1951 gfs2_quota_unhold(dip);
1955out: 1952out_put:
1956 gfs2_alloc_put(dip); 1953 gfs2_alloc_put(dip);
1954out:
1957 kfree(ht); 1955 kfree(ht);
1958 return error; 1956 return error;
1959} 1957}
diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c
index bee99704ea10..e3f76f451b0a 100644
--- a/fs/gfs2/eattr.c
+++ b/fs/gfs2/eattr.c
@@ -277,10 +277,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
277 } 277 }
278 278
279 *dataptrs = 0; 279 *dataptrs = 0;
280 if (!ip->i_di.di_blocks) 280 gfs2_add_inode_blocks(&ip->i_inode, -1);
281 gfs2_consist_inode(ip);
282 ip->i_di.di_blocks--;
283 gfs2_set_inode_blocks(&ip->i_inode);
284 } 281 }
285 if (bstart) 282 if (bstart)
286 gfs2_free_meta(ip, bstart, blen); 283 gfs2_free_meta(ip, bstart, blen);
@@ -321,6 +318,8 @@ static int ea_remove_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
321 int error; 318 int error;
322 319
323 al = gfs2_alloc_get(ip); 320 al = gfs2_alloc_get(ip);
321 if (!al)
322 return -ENOMEM;
324 323
325 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 324 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
326 if (error) 325 if (error)
@@ -449,7 +448,7 @@ static int ea_get_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
449 unsigned int x; 448 unsigned int x;
450 int error = 0; 449 int error = 0;
451 450
452 bh = kcalloc(nptrs, sizeof(struct buffer_head *), GFP_KERNEL); 451 bh = kcalloc(nptrs, sizeof(struct buffer_head *), GFP_NOFS);
453 if (!bh) 452 if (!bh)
454 return -ENOMEM; 453 return -ENOMEM;
455 454
@@ -582,10 +581,11 @@ static int ea_alloc_blk(struct gfs2_inode *ip, struct buffer_head **bhp)
582{ 581{
583 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 582 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
584 struct gfs2_ea_header *ea; 583 struct gfs2_ea_header *ea;
584 unsigned int n = 1;
585 u64 block; 585 u64 block;
586 586
587 block = gfs2_alloc_meta(ip); 587 block = gfs2_alloc_block(ip, &n);
588 588 gfs2_trans_add_unrevoke(sdp, block, 1);
589 *bhp = gfs2_meta_new(ip->i_gl, block); 589 *bhp = gfs2_meta_new(ip->i_gl, block);
590 gfs2_trans_add_bh(ip->i_gl, *bhp, 1); 590 gfs2_trans_add_bh(ip->i_gl, *bhp, 1);
591 gfs2_metatype_set(*bhp, GFS2_METATYPE_EA, GFS2_FORMAT_EA); 591 gfs2_metatype_set(*bhp, GFS2_METATYPE_EA, GFS2_FORMAT_EA);
@@ -597,8 +597,7 @@ static int ea_alloc_blk(struct gfs2_inode *ip, struct buffer_head **bhp)
597 ea->ea_flags = GFS2_EAFLAG_LAST; 597 ea->ea_flags = GFS2_EAFLAG_LAST;
598 ea->ea_num_ptrs = 0; 598 ea->ea_num_ptrs = 0;
599 599
600 ip->i_di.di_blocks++; 600 gfs2_add_inode_blocks(&ip->i_inode, 1);
601 gfs2_set_inode_blocks(&ip->i_inode);
602 601
603 return 0; 602 return 0;
604} 603}
@@ -642,15 +641,15 @@ static int ea_write(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
642 struct buffer_head *bh; 641 struct buffer_head *bh;
643 u64 block; 642 u64 block;
644 int mh_size = sizeof(struct gfs2_meta_header); 643 int mh_size = sizeof(struct gfs2_meta_header);
644 unsigned int n = 1;
645 645
646 block = gfs2_alloc_meta(ip); 646 block = gfs2_alloc_block(ip, &n);
647 647 gfs2_trans_add_unrevoke(sdp, block, 1);
648 bh = gfs2_meta_new(ip->i_gl, block); 648 bh = gfs2_meta_new(ip->i_gl, block);
649 gfs2_trans_add_bh(ip->i_gl, bh, 1); 649 gfs2_trans_add_bh(ip->i_gl, bh, 1);
650 gfs2_metatype_set(bh, GFS2_METATYPE_ED, GFS2_FORMAT_ED); 650 gfs2_metatype_set(bh, GFS2_METATYPE_ED, GFS2_FORMAT_ED);
651 651
652 ip->i_di.di_blocks++; 652 gfs2_add_inode_blocks(&ip->i_inode, 1);
653 gfs2_set_inode_blocks(&ip->i_inode);
654 653
655 copy = data_len > sdp->sd_jbsize ? sdp->sd_jbsize : 654 copy = data_len > sdp->sd_jbsize ? sdp->sd_jbsize :
656 data_len; 655 data_len;
@@ -684,15 +683,13 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
684 int error; 683 int error;
685 684
686 al = gfs2_alloc_get(ip); 685 al = gfs2_alloc_get(ip);
686 if (!al)
687 return -ENOMEM;
687 688
688 error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 689 error = gfs2_quota_lock_check(ip);
689 if (error) 690 if (error)
690 goto out; 691 goto out;
691 692
692 error = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid);
693 if (error)
694 goto out_gunlock_q;
695
696 al->al_requested = blks; 693 al->al_requested = blks;
697 694
698 error = gfs2_inplace_reserve(ip); 695 error = gfs2_inplace_reserve(ip);
@@ -966,9 +963,9 @@ static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er,
966 gfs2_trans_add_bh(ip->i_gl, indbh, 1); 963 gfs2_trans_add_bh(ip->i_gl, indbh, 1);
967 } else { 964 } else {
968 u64 blk; 965 u64 blk;
969 966 unsigned int n = 1;
970 blk = gfs2_alloc_meta(ip); 967 blk = gfs2_alloc_block(ip, &n);
971 968 gfs2_trans_add_unrevoke(sdp, blk, 1);
972 indbh = gfs2_meta_new(ip->i_gl, blk); 969 indbh = gfs2_meta_new(ip->i_gl, blk);
973 gfs2_trans_add_bh(ip->i_gl, indbh, 1); 970 gfs2_trans_add_bh(ip->i_gl, indbh, 1);
974 gfs2_metatype_set(indbh, GFS2_METATYPE_IN, GFS2_FORMAT_IN); 971 gfs2_metatype_set(indbh, GFS2_METATYPE_IN, GFS2_FORMAT_IN);
@@ -978,8 +975,7 @@ static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er,
978 *eablk = cpu_to_be64(ip->i_di.di_eattr); 975 *eablk = cpu_to_be64(ip->i_di.di_eattr);
979 ip->i_di.di_eattr = blk; 976 ip->i_di.di_eattr = blk;
980 ip->i_di.di_flags |= GFS2_DIF_EA_INDIRECT; 977 ip->i_di.di_flags |= GFS2_DIF_EA_INDIRECT;
981 ip->i_di.di_blocks++; 978 gfs2_add_inode_blocks(&ip->i_inode, 1);
982 gfs2_set_inode_blocks(&ip->i_inode);
983 979
984 eablk++; 980 eablk++;
985 } 981 }
@@ -1210,7 +1206,7 @@ static int ea_acl_chmod_unstuffed(struct gfs2_inode *ip,
1210 unsigned int x; 1206 unsigned int x;
1211 int error; 1207 int error;
1212 1208
1213 bh = kcalloc(nptrs, sizeof(struct buffer_head *), GFP_KERNEL); 1209 bh = kcalloc(nptrs, sizeof(struct buffer_head *), GFP_NOFS);
1214 if (!bh) 1210 if (!bh)
1215 return -ENOMEM; 1211 return -ENOMEM;
1216 1212
@@ -1347,7 +1343,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip)
1347 else 1343 else
1348 goto out; 1344 goto out;
1349 1345
1350 gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE, 0); 1346 gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE);
1351 1347
1352 for (x = 0; x < rlist.rl_rgrps; x++) { 1348 for (x = 0; x < rlist.rl_rgrps; x++) {
1353 struct gfs2_rgrpd *rgd; 1349 struct gfs2_rgrpd *rgd;
@@ -1387,10 +1383,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip)
1387 } 1383 }
1388 1384
1389 *eablk = 0; 1385 *eablk = 0;
1390 if (!ip->i_di.di_blocks) 1386 gfs2_add_inode_blocks(&ip->i_inode, -1);
1391 gfs2_consist_inode(ip);
1392 ip->i_di.di_blocks--;
1393 gfs2_set_inode_blocks(&ip->i_inode);
1394 } 1387 }
1395 if (bstart) 1388 if (bstart)
1396 gfs2_free_meta(ip, bstart, blen); 1389 gfs2_free_meta(ip, bstart, blen);
@@ -1442,10 +1435,7 @@ static int ea_dealloc_block(struct gfs2_inode *ip)
1442 gfs2_free_meta(ip, ip->i_di.di_eattr, 1); 1435 gfs2_free_meta(ip, ip->i_di.di_eattr, 1);
1443 1436
1444 ip->i_di.di_eattr = 0; 1437 ip->i_di.di_eattr = 0;
1445 if (!ip->i_di.di_blocks) 1438 gfs2_add_inode_blocks(&ip->i_inode, -1);
1446 gfs2_consist_inode(ip);
1447 ip->i_di.di_blocks--;
1448 gfs2_set_inode_blocks(&ip->i_inode);
1449 1439
1450 error = gfs2_meta_inode_buffer(ip, &dibh); 1440 error = gfs2_meta_inode_buffer(ip, &dibh);
1451 if (!error) { 1441 if (!error) {
@@ -1474,6 +1464,8 @@ int gfs2_ea_dealloc(struct gfs2_inode *ip)
1474 int error; 1464 int error;
1475 1465
1476 al = gfs2_alloc_get(ip); 1466 al = gfs2_alloc_get(ip);
1467 if (!al)
1468 return -ENOMEM;
1477 1469
1478 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 1470 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
1479 if (error) 1471 if (error)
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 7175a4d06435..d636b3e80f5d 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. 3 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
4 * 4 *
5 * This copyrighted material is made available to anyone wishing to use, 5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions 6 * modify, copy, or redistribute it subject to the terms and conditions
@@ -35,7 +35,6 @@
35#include "glock.h" 35#include "glock.h"
36#include "glops.h" 36#include "glops.h"
37#include "inode.h" 37#include "inode.h"
38#include "lm.h"
39#include "lops.h" 38#include "lops.h"
40#include "meta_io.h" 39#include "meta_io.h"
41#include "quota.h" 40#include "quota.h"
@@ -183,7 +182,8 @@ static void glock_free(struct gfs2_glock *gl)
183 struct gfs2_sbd *sdp = gl->gl_sbd; 182 struct gfs2_sbd *sdp = gl->gl_sbd;
184 struct inode *aspace = gl->gl_aspace; 183 struct inode *aspace = gl->gl_aspace;
185 184
186 gfs2_lm_put_lock(sdp, gl->gl_lock); 185 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
186 sdp->sd_lockstruct.ls_ops->lm_put_lock(gl->gl_lock);
187 187
188 if (aspace) 188 if (aspace)
189 gfs2_aspace_put(aspace); 189 gfs2_aspace_put(aspace);
@@ -197,7 +197,7 @@ static void glock_free(struct gfs2_glock *gl)
197 * 197 *
198 */ 198 */
199 199
200void gfs2_glock_hold(struct gfs2_glock *gl) 200static void gfs2_glock_hold(struct gfs2_glock *gl)
201{ 201{
202 atomic_inc(&gl->gl_ref); 202 atomic_inc(&gl->gl_ref);
203} 203}
@@ -293,6 +293,16 @@ static void glock_work_func(struct work_struct *work)
293 gfs2_glock_put(gl); 293 gfs2_glock_put(gl);
294} 294}
295 295
296static int gfs2_lm_get_lock(struct gfs2_sbd *sdp, struct lm_lockname *name,
297 void **lockp)
298{
299 int error = -EIO;
300 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
301 error = sdp->sd_lockstruct.ls_ops->lm_get_lock(
302 sdp->sd_lockstruct.ls_lockspace, name, lockp);
303 return error;
304}
305
296/** 306/**
297 * gfs2_glock_get() - Get a glock, or create one if one doesn't exist 307 * gfs2_glock_get() - Get a glock, or create one if one doesn't exist
298 * @sdp: The GFS2 superblock 308 * @sdp: The GFS2 superblock
@@ -338,8 +348,6 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
338 gl->gl_ip = 0; 348 gl->gl_ip = 0;
339 gl->gl_ops = glops; 349 gl->gl_ops = glops;
340 gl->gl_req_gh = NULL; 350 gl->gl_req_gh = NULL;
341 gl->gl_req_bh = NULL;
342 gl->gl_vn = 0;
343 gl->gl_stamp = jiffies; 351 gl->gl_stamp = jiffies;
344 gl->gl_tchange = jiffies; 352 gl->gl_tchange = jiffies;
345 gl->gl_object = NULL; 353 gl->gl_object = NULL;
@@ -595,11 +603,12 @@ static void run_queue(struct gfs2_glock *gl)
595 blocked = rq_mutex(gh); 603 blocked = rq_mutex(gh);
596 } else if (test_bit(GLF_DEMOTE, &gl->gl_flags)) { 604 } else if (test_bit(GLF_DEMOTE, &gl->gl_flags)) {
597 blocked = rq_demote(gl); 605 blocked = rq_demote(gl);
598 if (gl->gl_waiters2 && !blocked) { 606 if (test_bit(GLF_WAITERS2, &gl->gl_flags) &&
607 !blocked) {
599 set_bit(GLF_DEMOTE, &gl->gl_flags); 608 set_bit(GLF_DEMOTE, &gl->gl_flags);
600 gl->gl_demote_state = LM_ST_UNLOCKED; 609 gl->gl_demote_state = LM_ST_UNLOCKED;
601 } 610 }
602 gl->gl_waiters2 = 0; 611 clear_bit(GLF_WAITERS2, &gl->gl_flags);
603 } else if (!list_empty(&gl->gl_waiters3)) { 612 } else if (!list_empty(&gl->gl_waiters3)) {
604 gh = list_entry(gl->gl_waiters3.next, 613 gh = list_entry(gl->gl_waiters3.next,
605 struct gfs2_holder, gh_list); 614 struct gfs2_holder, gh_list);
@@ -710,7 +719,7 @@ static void handle_callback(struct gfs2_glock *gl, unsigned int state,
710 } else if (gl->gl_demote_state != LM_ST_UNLOCKED && 719 } else if (gl->gl_demote_state != LM_ST_UNLOCKED &&
711 gl->gl_demote_state != state) { 720 gl->gl_demote_state != state) {
712 if (test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) 721 if (test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags))
713 gl->gl_waiters2 = 1; 722 set_bit(GLF_WAITERS2, &gl->gl_flags);
714 else 723 else
715 gl->gl_demote_state = LM_ST_UNLOCKED; 724 gl->gl_demote_state = LM_ST_UNLOCKED;
716 } 725 }
@@ -743,6 +752,43 @@ static void state_change(struct gfs2_glock *gl, unsigned int new_state)
743} 752}
744 753
745/** 754/**
755 * drop_bh - Called after a lock module unlock completes
756 * @gl: the glock
757 * @ret: the return status
758 *
759 * Doesn't wake up the process waiting on the struct gfs2_holder (if any)
760 * Doesn't drop the reference on the glock the top half took out
761 *
762 */
763
764static void drop_bh(struct gfs2_glock *gl, unsigned int ret)
765{
766 struct gfs2_sbd *sdp = gl->gl_sbd;
767 struct gfs2_holder *gh = gl->gl_req_gh;
768
769 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
770 gfs2_assert_warn(sdp, list_empty(&gl->gl_holders));
771 gfs2_assert_warn(sdp, !ret);
772
773 state_change(gl, LM_ST_UNLOCKED);
774
775 if (test_and_clear_bit(GLF_CONV_DEADLK, &gl->gl_flags)) {
776 spin_lock(&gl->gl_spin);
777 gh->gh_error = 0;
778 spin_unlock(&gl->gl_spin);
779 gfs2_glock_xmote_th(gl, gl->gl_req_gh);
780 gfs2_glock_put(gl);
781 return;
782 }
783
784 spin_lock(&gl->gl_spin);
785 gfs2_demote_wake(gl);
786 clear_bit(GLF_LOCK, &gl->gl_flags);
787 spin_unlock(&gl->gl_spin);
788 gfs2_glock_put(gl);
789}
790
791/**
746 * xmote_bh - Called after the lock module is done acquiring a lock 792 * xmote_bh - Called after the lock module is done acquiring a lock
747 * @gl: The glock in question 793 * @gl: The glock in question
748 * @ret: the int returned from the lock module 794 * @ret: the int returned from the lock module
@@ -754,25 +800,19 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret)
754 struct gfs2_sbd *sdp = gl->gl_sbd; 800 struct gfs2_sbd *sdp = gl->gl_sbd;
755 const struct gfs2_glock_operations *glops = gl->gl_ops; 801 const struct gfs2_glock_operations *glops = gl->gl_ops;
756 struct gfs2_holder *gh = gl->gl_req_gh; 802 struct gfs2_holder *gh = gl->gl_req_gh;
757 int prev_state = gl->gl_state;
758 int op_done = 1; 803 int op_done = 1;
759 804
805 if (!gh && (ret & LM_OUT_ST_MASK) == LM_ST_UNLOCKED) {
806 drop_bh(gl, ret);
807 return;
808 }
809
760 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); 810 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
761 gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); 811 gfs2_assert_warn(sdp, list_empty(&gl->gl_holders));
762 gfs2_assert_warn(sdp, !(ret & LM_OUT_ASYNC)); 812 gfs2_assert_warn(sdp, !(ret & LM_OUT_ASYNC));
763 813
764 state_change(gl, ret & LM_OUT_ST_MASK); 814 state_change(gl, ret & LM_OUT_ST_MASK);
765 815
766 if (prev_state != LM_ST_UNLOCKED && !(ret & LM_OUT_CACHEABLE)) {
767 if (glops->go_inval)
768 glops->go_inval(gl, DIO_METADATA);
769 } else if (gl->gl_state == LM_ST_DEFERRED) {
770 /* We might not want to do this here.
771 Look at moving to the inode glops. */
772 if (glops->go_inval)
773 glops->go_inval(gl, 0);
774 }
775
776 /* Deal with each possible exit condition */ 816 /* Deal with each possible exit condition */
777 817
778 if (!gh) { 818 if (!gh) {
@@ -782,7 +822,6 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret)
782 } else { 822 } else {
783 spin_lock(&gl->gl_spin); 823 spin_lock(&gl->gl_spin);
784 if (gl->gl_state != gl->gl_demote_state) { 824 if (gl->gl_state != gl->gl_demote_state) {
785 gl->gl_req_bh = NULL;
786 spin_unlock(&gl->gl_spin); 825 spin_unlock(&gl->gl_spin);
787 gfs2_glock_drop_th(gl); 826 gfs2_glock_drop_th(gl);
788 gfs2_glock_put(gl); 827 gfs2_glock_put(gl);
@@ -793,6 +832,14 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret)
793 } 832 }
794 } else { 833 } else {
795 spin_lock(&gl->gl_spin); 834 spin_lock(&gl->gl_spin);
835 if (ret & LM_OUT_CONV_DEADLK) {
836 gh->gh_error = 0;
837 set_bit(GLF_CONV_DEADLK, &gl->gl_flags);
838 spin_unlock(&gl->gl_spin);
839 gfs2_glock_drop_th(gl);
840 gfs2_glock_put(gl);
841 return;
842 }
796 list_del_init(&gh->gh_list); 843 list_del_init(&gh->gh_list);
797 gh->gh_error = -EIO; 844 gh->gh_error = -EIO;
798 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) 845 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
@@ -824,7 +871,6 @@ out:
824 if (op_done) { 871 if (op_done) {
825 spin_lock(&gl->gl_spin); 872 spin_lock(&gl->gl_spin);
826 gl->gl_req_gh = NULL; 873 gl->gl_req_gh = NULL;
827 gl->gl_req_bh = NULL;
828 clear_bit(GLF_LOCK, &gl->gl_flags); 874 clear_bit(GLF_LOCK, &gl->gl_flags);
829 spin_unlock(&gl->gl_spin); 875 spin_unlock(&gl->gl_spin);
830 } 876 }
@@ -835,6 +881,17 @@ out:
835 gfs2_holder_wake(gh); 881 gfs2_holder_wake(gh);
836} 882}
837 883
884static unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock,
885 unsigned int cur_state, unsigned int req_state,
886 unsigned int flags)
887{
888 int ret = 0;
889 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
890 ret = sdp->sd_lockstruct.ls_ops->lm_lock(lock, cur_state,
891 req_state, flags);
892 return ret;
893}
894
838/** 895/**
839 * gfs2_glock_xmote_th - Call into the lock module to acquire or change a glock 896 * gfs2_glock_xmote_th - Call into the lock module to acquire or change a glock
840 * @gl: The glock in question 897 * @gl: The glock in question
@@ -856,6 +913,8 @@ static void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh)
856 913
857 if (glops->go_xmote_th) 914 if (glops->go_xmote_th)
858 glops->go_xmote_th(gl); 915 glops->go_xmote_th(gl);
916 if (state == LM_ST_DEFERRED && glops->go_inval)
917 glops->go_inval(gl, DIO_METADATA);
859 918
860 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); 919 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
861 gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); 920 gfs2_assert_warn(sdp, list_empty(&gl->gl_holders));
@@ -863,7 +922,6 @@ static void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh)
863 gfs2_assert_warn(sdp, state != gl->gl_state); 922 gfs2_assert_warn(sdp, state != gl->gl_state);
864 923
865 gfs2_glock_hold(gl); 924 gfs2_glock_hold(gl);
866 gl->gl_req_bh = xmote_bh;
867 925
868 lck_ret = gfs2_lm_lock(sdp, gl->gl_lock, gl->gl_state, state, lck_flags); 926 lck_ret = gfs2_lm_lock(sdp, gl->gl_lock, gl->gl_state, state, lck_flags);
869 927
@@ -876,49 +934,13 @@ static void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh)
876 xmote_bh(gl, lck_ret); 934 xmote_bh(gl, lck_ret);
877} 935}
878 936
879/** 937static unsigned int gfs2_lm_unlock(struct gfs2_sbd *sdp, void *lock,
880 * drop_bh - Called after a lock module unlock completes 938 unsigned int cur_state)
881 * @gl: the glock
882 * @ret: the return status
883 *
884 * Doesn't wake up the process waiting on the struct gfs2_holder (if any)
885 * Doesn't drop the reference on the glock the top half took out
886 *
887 */
888
889static void drop_bh(struct gfs2_glock *gl, unsigned int ret)
890{ 939{
891 struct gfs2_sbd *sdp = gl->gl_sbd; 940 int ret = 0;
892 const struct gfs2_glock_operations *glops = gl->gl_ops; 941 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
893 struct gfs2_holder *gh = gl->gl_req_gh; 942 ret = sdp->sd_lockstruct.ls_ops->lm_unlock(lock, cur_state);
894 943 return ret;
895 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
896 gfs2_assert_warn(sdp, list_empty(&gl->gl_holders));
897 gfs2_assert_warn(sdp, !ret);
898
899 state_change(gl, LM_ST_UNLOCKED);
900
901 if (glops->go_inval)
902 glops->go_inval(gl, DIO_METADATA);
903
904 if (gh) {
905 spin_lock(&gl->gl_spin);
906 list_del_init(&gh->gh_list);
907 gh->gh_error = 0;
908 spin_unlock(&gl->gl_spin);
909 }
910
911 spin_lock(&gl->gl_spin);
912 gfs2_demote_wake(gl);
913 gl->gl_req_gh = NULL;
914 gl->gl_req_bh = NULL;
915 clear_bit(GLF_LOCK, &gl->gl_flags);
916 spin_unlock(&gl->gl_spin);
917
918 gfs2_glock_put(gl);
919
920 if (gh)
921 gfs2_holder_wake(gh);
922} 944}
923 945
924/** 946/**
@@ -935,13 +957,14 @@ static void gfs2_glock_drop_th(struct gfs2_glock *gl)
935 957
936 if (glops->go_xmote_th) 958 if (glops->go_xmote_th)
937 glops->go_xmote_th(gl); 959 glops->go_xmote_th(gl);
960 if (glops->go_inval)
961 glops->go_inval(gl, DIO_METADATA);
938 962
939 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); 963 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
940 gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); 964 gfs2_assert_warn(sdp, list_empty(&gl->gl_holders));
941 gfs2_assert_warn(sdp, gl->gl_state != LM_ST_UNLOCKED); 965 gfs2_assert_warn(sdp, gl->gl_state != LM_ST_UNLOCKED);
942 966
943 gfs2_glock_hold(gl); 967 gfs2_glock_hold(gl);
944 gl->gl_req_bh = drop_bh;
945 968
946 ret = gfs2_lm_unlock(sdp, gl->gl_lock, gl->gl_state); 969 ret = gfs2_lm_unlock(sdp, gl->gl_lock, gl->gl_state);
947 970
@@ -964,16 +987,17 @@ static void gfs2_glock_drop_th(struct gfs2_glock *gl)
964static void do_cancels(struct gfs2_holder *gh) 987static void do_cancels(struct gfs2_holder *gh)
965{ 988{
966 struct gfs2_glock *gl = gh->gh_gl; 989 struct gfs2_glock *gl = gh->gh_gl;
990 struct gfs2_sbd *sdp = gl->gl_sbd;
967 991
968 spin_lock(&gl->gl_spin); 992 spin_lock(&gl->gl_spin);
969 993
970 while (gl->gl_req_gh != gh && 994 while (gl->gl_req_gh != gh &&
971 !test_bit(HIF_HOLDER, &gh->gh_iflags) && 995 !test_bit(HIF_HOLDER, &gh->gh_iflags) &&
972 !list_empty(&gh->gh_list)) { 996 !list_empty(&gh->gh_list)) {
973 if (gl->gl_req_bh && !(gl->gl_req_gh && 997 if (!(gl->gl_req_gh && (gl->gl_req_gh->gh_flags & GL_NOCANCEL))) {
974 (gl->gl_req_gh->gh_flags & GL_NOCANCEL))) {
975 spin_unlock(&gl->gl_spin); 998 spin_unlock(&gl->gl_spin);
976 gfs2_lm_cancel(gl->gl_sbd, gl->gl_lock); 999 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
1000 sdp->sd_lockstruct.ls_ops->lm_cancel(gl->gl_lock);
977 msleep(100); 1001 msleep(100);
978 spin_lock(&gl->gl_spin); 1002 spin_lock(&gl->gl_spin);
979 } else { 1003 } else {
@@ -1041,7 +1065,6 @@ static int glock_wait_internal(struct gfs2_holder *gh)
1041 1065
1042 spin_lock(&gl->gl_spin); 1066 spin_lock(&gl->gl_spin);
1043 gl->gl_req_gh = NULL; 1067 gl->gl_req_gh = NULL;
1044 gl->gl_req_bh = NULL;
1045 clear_bit(GLF_LOCK, &gl->gl_flags); 1068 clear_bit(GLF_LOCK, &gl->gl_flags);
1046 run_queue(gl); 1069 run_queue(gl);
1047 spin_unlock(&gl->gl_spin); 1070 spin_unlock(&gl->gl_spin);
@@ -1428,6 +1451,14 @@ void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs)
1428 gfs2_glock_dq_uninit(&ghs[x]); 1451 gfs2_glock_dq_uninit(&ghs[x]);
1429} 1452}
1430 1453
1454static int gfs2_lm_hold_lvb(struct gfs2_sbd *sdp, void *lock, char **lvbp)
1455{
1456 int error = -EIO;
1457 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
1458 error = sdp->sd_lockstruct.ls_ops->lm_hold_lvb(lock, lvbp);
1459 return error;
1460}
1461
1431/** 1462/**
1432 * gfs2_lvb_hold - attach a LVB from a glock 1463 * gfs2_lvb_hold - attach a LVB from a glock
1433 * @gl: The glock in question 1464 * @gl: The glock in question
@@ -1463,12 +1494,15 @@ int gfs2_lvb_hold(struct gfs2_glock *gl)
1463 1494
1464void gfs2_lvb_unhold(struct gfs2_glock *gl) 1495void gfs2_lvb_unhold(struct gfs2_glock *gl)
1465{ 1496{
1497 struct gfs2_sbd *sdp = gl->gl_sbd;
1498
1466 gfs2_glock_hold(gl); 1499 gfs2_glock_hold(gl);
1467 gfs2_glmutex_lock(gl); 1500 gfs2_glmutex_lock(gl);
1468 1501
1469 gfs2_assert(gl->gl_sbd, atomic_read(&gl->gl_lvb_count) > 0); 1502 gfs2_assert(gl->gl_sbd, atomic_read(&gl->gl_lvb_count) > 0);
1470 if (atomic_dec_and_test(&gl->gl_lvb_count)) { 1503 if (atomic_dec_and_test(&gl->gl_lvb_count)) {
1471 gfs2_lm_unhold_lvb(gl->gl_sbd, gl->gl_lock, gl->gl_lvb); 1504 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
1505 sdp->sd_lockstruct.ls_ops->lm_unhold_lvb(gl->gl_lock, gl->gl_lvb);
1472 gl->gl_lvb = NULL; 1506 gl->gl_lvb = NULL;
1473 gfs2_glock_put(gl); 1507 gfs2_glock_put(gl);
1474 } 1508 }
@@ -1534,8 +1568,7 @@ void gfs2_glock_cb(void *cb_data, unsigned int type, void *data)
1534 gl = gfs2_glock_find(sdp, &async->lc_name); 1568 gl = gfs2_glock_find(sdp, &async->lc_name);
1535 if (gfs2_assert_warn(sdp, gl)) 1569 if (gfs2_assert_warn(sdp, gl))
1536 return; 1570 return;
1537 if (!gfs2_assert_warn(sdp, gl->gl_req_bh)) 1571 xmote_bh(gl, async->lc_ret);
1538 gl->gl_req_bh(gl, async->lc_ret);
1539 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) 1572 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
1540 gfs2_glock_put(gl); 1573 gfs2_glock_put(gl);
1541 up_read(&gfs2_umount_flush_sem); 1574 up_read(&gfs2_umount_flush_sem);
@@ -1594,10 +1627,10 @@ void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
1594 gfs2_glock_hold(gl); 1627 gfs2_glock_hold(gl);
1595 list_add(&gl->gl_reclaim, &sdp->sd_reclaim_list); 1628 list_add(&gl->gl_reclaim, &sdp->sd_reclaim_list);
1596 atomic_inc(&sdp->sd_reclaim_count); 1629 atomic_inc(&sdp->sd_reclaim_count);
1597 } 1630 spin_unlock(&sdp->sd_reclaim_lock);
1598 spin_unlock(&sdp->sd_reclaim_lock); 1631 wake_up(&sdp->sd_reclaim_wq);
1599 1632 } else
1600 wake_up(&sdp->sd_reclaim_wq); 1633 spin_unlock(&sdp->sd_reclaim_lock);
1601} 1634}
1602 1635
1603/** 1636/**
@@ -1897,7 +1930,6 @@ static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl)
1897 print_dbg(gi, " gl_owner = -1\n"); 1930 print_dbg(gi, " gl_owner = -1\n");
1898 print_dbg(gi, " gl_ip = %lu\n", gl->gl_ip); 1931 print_dbg(gi, " gl_ip = %lu\n", gl->gl_ip);
1899 print_dbg(gi, " req_gh = %s\n", (gl->gl_req_gh) ? "yes" : "no"); 1932 print_dbg(gi, " req_gh = %s\n", (gl->gl_req_gh) ? "yes" : "no");
1900 print_dbg(gi, " req_bh = %s\n", (gl->gl_req_bh) ? "yes" : "no");
1901 print_dbg(gi, " lvb_count = %d\n", atomic_read(&gl->gl_lvb_count)); 1933 print_dbg(gi, " lvb_count = %d\n", atomic_read(&gl->gl_lvb_count));
1902 print_dbg(gi, " object = %s\n", (gl->gl_object) ? "yes" : "no"); 1934 print_dbg(gi, " object = %s\n", (gl->gl_object) ? "yes" : "no");
1903 print_dbg(gi, " reclaim = %s\n", 1935 print_dbg(gi, " reclaim = %s\n",
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 2f9c6d136b37..cdad3e6f8150 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -32,24 +32,23 @@
32#define GLR_TRYFAILED 13 32#define GLR_TRYFAILED 13
33#define GLR_CANCELED 14 33#define GLR_CANCELED 14
34 34
35static inline int gfs2_glock_is_locked_by_me(struct gfs2_glock *gl) 35static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock *gl)
36{ 36{
37 struct gfs2_holder *gh; 37 struct gfs2_holder *gh;
38 int locked = 0;
39 struct pid *pid; 38 struct pid *pid;
40 39
41 /* Look in glock's list of holders for one with current task as owner */ 40 /* Look in glock's list of holders for one with current task as owner */
42 spin_lock(&gl->gl_spin); 41 spin_lock(&gl->gl_spin);
43 pid = task_pid(current); 42 pid = task_pid(current);
44 list_for_each_entry(gh, &gl->gl_holders, gh_list) { 43 list_for_each_entry(gh, &gl->gl_holders, gh_list) {
45 if (gh->gh_owner_pid == pid) { 44 if (gh->gh_owner_pid == pid)
46 locked = 1; 45 goto out;
47 break;
48 }
49 } 46 }
47 gh = NULL;
48out:
50 spin_unlock(&gl->gl_spin); 49 spin_unlock(&gl->gl_spin);
51 50
52 return locked; 51 return gh;
53} 52}
54 53
55static inline int gfs2_glock_is_held_excl(struct gfs2_glock *gl) 54static inline int gfs2_glock_is_held_excl(struct gfs2_glock *gl)
@@ -79,7 +78,6 @@ static inline int gfs2_glock_is_blocking(struct gfs2_glock *gl)
79int gfs2_glock_get(struct gfs2_sbd *sdp, 78int gfs2_glock_get(struct gfs2_sbd *sdp,
80 u64 number, const struct gfs2_glock_operations *glops, 79 u64 number, const struct gfs2_glock_operations *glops,
81 int create, struct gfs2_glock **glp); 80 int create, struct gfs2_glock **glp);
82void gfs2_glock_hold(struct gfs2_glock *gl);
83int gfs2_glock_put(struct gfs2_glock *gl); 81int gfs2_glock_put(struct gfs2_glock *gl);
84void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags, 82void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags,
85 struct gfs2_holder *gh); 83 struct gfs2_holder *gh);
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index c663b7a0f410..d31badadef8f 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. 3 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
4 * 4 *
5 * This copyrighted material is made available to anyone wishing to use, 5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions 6 * modify, copy, or redistribute it subject to the terms and conditions
@@ -126,7 +126,13 @@ static void meta_go_inval(struct gfs2_glock *gl, int flags)
126 return; 126 return;
127 127
128 gfs2_meta_inval(gl); 128 gfs2_meta_inval(gl);
129 gl->gl_vn++; 129 if (gl->gl_object == GFS2_I(gl->gl_sbd->sd_rindex))
130 gl->gl_sbd->sd_rindex_uptodate = 0;
131 else if (gl->gl_ops == &gfs2_rgrp_glops && gl->gl_object) {
132 struct gfs2_rgrpd *rgd = (struct gfs2_rgrpd *)gl->gl_object;
133
134 rgd->rd_flags &= ~GFS2_RDF_UPTODATE;
135 }
130} 136}
131 137
132/** 138/**
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 525dcae352d6..9c2c0b90b22a 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. 3 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
4 * 4 *
5 * This copyrighted material is made available to anyone wishing to use, 5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions 6 * modify, copy, or redistribute it subject to the terms and conditions
@@ -44,7 +44,6 @@ struct gfs2_log_header_host {
44 44
45struct gfs2_log_operations { 45struct gfs2_log_operations {
46 void (*lo_add) (struct gfs2_sbd *sdp, struct gfs2_log_element *le); 46 void (*lo_add) (struct gfs2_sbd *sdp, struct gfs2_log_element *le);
47 void (*lo_incore_commit) (struct gfs2_sbd *sdp, struct gfs2_trans *tr);
48 void (*lo_before_commit) (struct gfs2_sbd *sdp); 47 void (*lo_before_commit) (struct gfs2_sbd *sdp);
49 void (*lo_after_commit) (struct gfs2_sbd *sdp, struct gfs2_ail *ai); 48 void (*lo_after_commit) (struct gfs2_sbd *sdp, struct gfs2_ail *ai);
50 void (*lo_before_scan) (struct gfs2_jdesc *jd, 49 void (*lo_before_scan) (struct gfs2_jdesc *jd,
@@ -70,7 +69,6 @@ struct gfs2_bitmap {
70}; 69};
71 70
72struct gfs2_rgrp_host { 71struct gfs2_rgrp_host {
73 u32 rg_flags;
74 u32 rg_free; 72 u32 rg_free;
75 u32 rg_dinodes; 73 u32 rg_dinodes;
76 u64 rg_igeneration; 74 u64 rg_igeneration;
@@ -87,17 +85,17 @@ struct gfs2_rgrpd {
87 u32 rd_data; /* num of data blocks in rgrp */ 85 u32 rd_data; /* num of data blocks in rgrp */
88 u32 rd_bitbytes; /* number of bytes in data bitmaps */ 86 u32 rd_bitbytes; /* number of bytes in data bitmaps */
89 struct gfs2_rgrp_host rd_rg; 87 struct gfs2_rgrp_host rd_rg;
90 u64 rd_rg_vn;
91 struct gfs2_bitmap *rd_bits; 88 struct gfs2_bitmap *rd_bits;
92 unsigned int rd_bh_count; 89 unsigned int rd_bh_count;
93 struct mutex rd_mutex; 90 struct mutex rd_mutex;
94 u32 rd_free_clone; 91 u32 rd_free_clone;
95 struct gfs2_log_element rd_le; 92 struct gfs2_log_element rd_le;
96 u32 rd_last_alloc_data; 93 u32 rd_last_alloc;
97 u32 rd_last_alloc_meta;
98 struct gfs2_sbd *rd_sbd; 94 struct gfs2_sbd *rd_sbd;
99 unsigned long rd_flags; 95 unsigned char rd_flags;
100#define GFS2_RDF_CHECK 0x0001 /* Need to check for unlinked inodes */ 96#define GFS2_RDF_CHECK 0x01 /* Need to check for unlinked inodes */
97#define GFS2_RDF_NOALLOC 0x02 /* rg prohibits allocation */
98#define GFS2_RDF_UPTODATE 0x04 /* rg is up to date */
101}; 99};
102 100
103enum gfs2_state_bits { 101enum gfs2_state_bits {
@@ -168,6 +166,8 @@ enum {
168 GLF_DIRTY = 5, 166 GLF_DIRTY = 5,
169 GLF_DEMOTE_IN_PROGRESS = 6, 167 GLF_DEMOTE_IN_PROGRESS = 6,
170 GLF_LFLUSH = 7, 168 GLF_LFLUSH = 7,
169 GLF_WAITERS2 = 8,
170 GLF_CONV_DEADLK = 9,
171}; 171};
172 172
173struct gfs2_glock { 173struct gfs2_glock {
@@ -187,18 +187,15 @@ struct gfs2_glock {
187 struct list_head gl_holders; 187 struct list_head gl_holders;
188 struct list_head gl_waiters1; /* HIF_MUTEX */ 188 struct list_head gl_waiters1; /* HIF_MUTEX */
189 struct list_head gl_waiters3; /* HIF_PROMOTE */ 189 struct list_head gl_waiters3; /* HIF_PROMOTE */
190 int gl_waiters2; /* GIF_DEMOTE */
191 190
192 const struct gfs2_glock_operations *gl_ops; 191 const struct gfs2_glock_operations *gl_ops;
193 192
194 struct gfs2_holder *gl_req_gh; 193 struct gfs2_holder *gl_req_gh;
195 gfs2_glop_bh_t gl_req_bh;
196 194
197 void *gl_lock; 195 void *gl_lock;
198 char *gl_lvb; 196 char *gl_lvb;
199 atomic_t gl_lvb_count; 197 atomic_t gl_lvb_count;
200 198
201 u64 gl_vn;
202 unsigned long gl_stamp; 199 unsigned long gl_stamp;
203 unsigned long gl_tchange; 200 unsigned long gl_tchange;
204 void *gl_object; 201 void *gl_object;
@@ -213,6 +210,8 @@ struct gfs2_glock {
213 struct delayed_work gl_work; 210 struct delayed_work gl_work;
214}; 211};
215 212
213#define GFS2_MIN_LVB_SIZE 32 /* Min size of LVB that gfs2 supports */
214
216struct gfs2_alloc { 215struct gfs2_alloc {
217 /* Quota stuff */ 216 /* Quota stuff */
218 217
@@ -241,14 +240,9 @@ enum {
241 240
242struct gfs2_dinode_host { 241struct gfs2_dinode_host {
243 u64 di_size; /* number of bytes in file */ 242 u64 di_size; /* number of bytes in file */
244 u64 di_blocks; /* number of blocks in file */
245 u64 di_goal_meta; /* rgrp to alloc from next */
246 u64 di_goal_data; /* data block goal */
247 u64 di_generation; /* generation number for NFS */ 243 u64 di_generation; /* generation number for NFS */
248 u32 di_flags; /* GFS2_DIF_... */ 244 u32 di_flags; /* GFS2_DIF_... */
249 u16 di_height; /* height of metadata */
250 /* These only apply to directories */ 245 /* These only apply to directories */
251 u16 di_depth; /* Number of bits in the table */
252 u32 di_entries; /* The number of entries in the directory */ 246 u32 di_entries; /* The number of entries in the directory */
253 u64 di_eattr; /* extended attribute block number */ 247 u64 di_eattr; /* extended attribute block number */
254}; 248};
@@ -265,9 +259,10 @@ struct gfs2_inode {
265 struct gfs2_holder i_iopen_gh; 259 struct gfs2_holder i_iopen_gh;
266 struct gfs2_holder i_gh; /* for prepare/commit_write only */ 260 struct gfs2_holder i_gh; /* for prepare/commit_write only */
267 struct gfs2_alloc *i_alloc; 261 struct gfs2_alloc *i_alloc;
268 u64 i_last_rg_alloc; 262 u64 i_goal; /* goal block for allocations */
269
270 struct rw_semaphore i_rw_mutex; 263 struct rw_semaphore i_rw_mutex;
264 u8 i_height;
265 u8 i_depth;
271}; 266};
272 267
273/* 268/*
@@ -490,9 +485,9 @@ struct gfs2_sbd {
490 u32 sd_qc_per_block; 485 u32 sd_qc_per_block;
491 u32 sd_max_dirres; /* Max blocks needed to add a directory entry */ 486 u32 sd_max_dirres; /* Max blocks needed to add a directory entry */
492 u32 sd_max_height; /* Max height of a file's metadata tree */ 487 u32 sd_max_height; /* Max height of a file's metadata tree */
493 u64 sd_heightsize[GFS2_MAX_META_HEIGHT]; 488 u64 sd_heightsize[GFS2_MAX_META_HEIGHT + 1];
494 u32 sd_max_jheight; /* Max height of journaled file's meta tree */ 489 u32 sd_max_jheight; /* Max height of journaled file's meta tree */
495 u64 sd_jheightsize[GFS2_MAX_META_HEIGHT]; 490 u64 sd_jheightsize[GFS2_MAX_META_HEIGHT + 1];
496 491
497 struct gfs2_args sd_args; /* Mount arguments */ 492 struct gfs2_args sd_args; /* Mount arguments */
498 struct gfs2_tune sd_tune; /* Filesystem tuning structure */ 493 struct gfs2_tune sd_tune; /* Filesystem tuning structure */
@@ -533,7 +528,7 @@ struct gfs2_sbd {
533 528
534 /* Resource group stuff */ 529 /* Resource group stuff */
535 530
536 u64 sd_rindex_vn; 531 int sd_rindex_uptodate;
537 spinlock_t sd_rindex_spin; 532 spinlock_t sd_rindex_spin;
538 struct mutex sd_rindex_mutex; 533 struct mutex sd_rindex_mutex;
539 struct list_head sd_rindex_list; 534 struct list_head sd_rindex_list;
@@ -637,9 +632,6 @@ struct gfs2_sbd {
637 632
638 /* Counters */ 633 /* Counters */
639 634
640 atomic_t sd_glock_count;
641 atomic_t sd_glock_held_count;
642 atomic_t sd_inode_count;
643 atomic_t sd_reclaimed; 635 atomic_t sd_reclaimed;
644 636
645 char sd_fsname[GFS2_FSNAME_LEN]; 637 char sd_fsname[GFS2_FSNAME_LEN];
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 37725ade3c51..3a9ef526c308 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. 3 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
4 * 4 *
5 * This copyrighted material is made available to anyone wishing to use, 5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions 6 * modify, copy, or redistribute it subject to the terms and conditions
@@ -149,7 +149,8 @@ void gfs2_set_iop(struct inode *inode)
149 } else if (S_ISLNK(mode)) { 149 } else if (S_ISLNK(mode)) {
150 inode->i_op = &gfs2_symlink_iops; 150 inode->i_op = &gfs2_symlink_iops;
151 } else { 151 } else {
152 inode->i_op = &gfs2_dev_iops; 152 inode->i_op = &gfs2_file_iops;
153 init_special_inode(inode, inode->i_mode, inode->i_rdev);
153 } 154 }
154 155
155 unlock_new_inode(inode); 156 unlock_new_inode(inode);
@@ -248,12 +249,10 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
248{ 249{
249 struct gfs2_dinode_host *di = &ip->i_di; 250 struct gfs2_dinode_host *di = &ip->i_di;
250 const struct gfs2_dinode *str = buf; 251 const struct gfs2_dinode *str = buf;
252 u16 height, depth;
251 253
252 if (ip->i_no_addr != be64_to_cpu(str->di_num.no_addr)) { 254 if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr)))
253 if (gfs2_consist_inode(ip)) 255 goto corrupt;
254 gfs2_dinode_print(ip);
255 return -EIO;
256 }
257 ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino); 256 ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino);
258 ip->i_inode.i_mode = be32_to_cpu(str->di_mode); 257 ip->i_inode.i_mode = be32_to_cpu(str->di_mode);
259 ip->i_inode.i_rdev = 0; 258 ip->i_inode.i_rdev = 0;
@@ -275,8 +274,7 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
275 ip->i_inode.i_nlink = be32_to_cpu(str->di_nlink); 274 ip->i_inode.i_nlink = be32_to_cpu(str->di_nlink);
276 di->di_size = be64_to_cpu(str->di_size); 275 di->di_size = be64_to_cpu(str->di_size);
277 i_size_write(&ip->i_inode, di->di_size); 276 i_size_write(&ip->i_inode, di->di_size);
278 di->di_blocks = be64_to_cpu(str->di_blocks); 277 gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks));
279 gfs2_set_inode_blocks(&ip->i_inode);
280 ip->i_inode.i_atime.tv_sec = be64_to_cpu(str->di_atime); 278 ip->i_inode.i_atime.tv_sec = be64_to_cpu(str->di_atime);
281 ip->i_inode.i_atime.tv_nsec = be32_to_cpu(str->di_atime_nsec); 279 ip->i_inode.i_atime.tv_nsec = be32_to_cpu(str->di_atime_nsec);
282 ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime); 280 ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime);
@@ -284,15 +282,20 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
284 ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime); 282 ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime);
285 ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec); 283 ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec);
286 284
287 di->di_goal_meta = be64_to_cpu(str->di_goal_meta); 285 ip->i_goal = be64_to_cpu(str->di_goal_meta);
288 di->di_goal_data = be64_to_cpu(str->di_goal_data);
289 di->di_generation = be64_to_cpu(str->di_generation); 286 di->di_generation = be64_to_cpu(str->di_generation);
290 287
291 di->di_flags = be32_to_cpu(str->di_flags); 288 di->di_flags = be32_to_cpu(str->di_flags);
292 gfs2_set_inode_flags(&ip->i_inode); 289 gfs2_set_inode_flags(&ip->i_inode);
293 di->di_height = be16_to_cpu(str->di_height); 290 height = be16_to_cpu(str->di_height);
294 291 if (unlikely(height > GFS2_MAX_META_HEIGHT))
295 di->di_depth = be16_to_cpu(str->di_depth); 292 goto corrupt;
293 ip->i_height = (u8)height;
294
295 depth = be16_to_cpu(str->di_depth);
296 if (unlikely(depth > GFS2_DIR_MAX_DEPTH))
297 goto corrupt;
298 ip->i_depth = (u8)depth;
296 di->di_entries = be32_to_cpu(str->di_entries); 299 di->di_entries = be32_to_cpu(str->di_entries);
297 300
298 di->di_eattr = be64_to_cpu(str->di_eattr); 301 di->di_eattr = be64_to_cpu(str->di_eattr);
@@ -300,6 +303,10 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
300 gfs2_set_aops(&ip->i_inode); 303 gfs2_set_aops(&ip->i_inode);
301 304
302 return 0; 305 return 0;
306corrupt:
307 if (gfs2_consist_inode(ip))
308 gfs2_dinode_print(ip);
309 return -EIO;
303} 310}
304 311
305/** 312/**
@@ -337,13 +344,15 @@ int gfs2_dinode_dealloc(struct gfs2_inode *ip)
337 struct gfs2_rgrpd *rgd; 344 struct gfs2_rgrpd *rgd;
338 int error; 345 int error;
339 346
340 if (ip->i_di.di_blocks != 1) { 347 if (gfs2_get_inode_blocks(&ip->i_inode) != 1) {
341 if (gfs2_consist_inode(ip)) 348 if (gfs2_consist_inode(ip))
342 gfs2_dinode_print(ip); 349 gfs2_dinode_print(ip);
343 return -EIO; 350 return -EIO;
344 } 351 }
345 352
346 al = gfs2_alloc_get(ip); 353 al = gfs2_alloc_get(ip);
354 if (!al)
355 return -ENOMEM;
347 356
348 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 357 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
349 if (error) 358 if (error)
@@ -487,7 +496,7 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
487 return dir; 496 return dir;
488 } 497 }
489 498
490 if (gfs2_glock_is_locked_by_me(dip->i_gl) == 0) { 499 if (gfs2_glock_is_locked_by_me(dip->i_gl) == NULL) {
491 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); 500 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
492 if (error) 501 if (error)
493 return ERR_PTR(error); 502 return ERR_PTR(error);
@@ -818,7 +827,8 @@ static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
818 int error; 827 int error;
819 828
820 munge_mode_uid_gid(dip, &mode, &uid, &gid); 829 munge_mode_uid_gid(dip, &mode, &uid, &gid);
821 gfs2_alloc_get(dip); 830 if (!gfs2_alloc_get(dip))
831 return -ENOMEM;
822 832
823 error = gfs2_quota_lock(dip, uid, gid); 833 error = gfs2_quota_lock(dip, uid, gid);
824 if (error) 834 if (error)
@@ -853,6 +863,8 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
853 int error; 863 int error;
854 864
855 al = gfs2_alloc_get(dip); 865 al = gfs2_alloc_get(dip);
866 if (!al)
867 return -ENOMEM;
856 868
857 error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 869 error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
858 if (error) 870 if (error)
@@ -1219,7 +1231,7 @@ int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len)
1219 1231
1220 x = ip->i_di.di_size + 1; 1232 x = ip->i_di.di_size + 1;
1221 if (x > *len) { 1233 if (x > *len) {
1222 *buf = kmalloc(x, GFP_KERNEL); 1234 *buf = kmalloc(x, GFP_NOFS);
1223 if (!*buf) { 1235 if (!*buf) {
1224 error = -ENOMEM; 1236 error = -ENOMEM;
1225 goto out_brelse; 1237 goto out_brelse;
@@ -1391,21 +1403,21 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
1391 str->di_gid = cpu_to_be32(ip->i_inode.i_gid); 1403 str->di_gid = cpu_to_be32(ip->i_inode.i_gid);
1392 str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink); 1404 str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink);
1393 str->di_size = cpu_to_be64(di->di_size); 1405 str->di_size = cpu_to_be64(di->di_size);
1394 str->di_blocks = cpu_to_be64(di->di_blocks); 1406 str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
1395 str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec); 1407 str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
1396 str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec); 1408 str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec);
1397 str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec); 1409 str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec);
1398 1410
1399 str->di_goal_meta = cpu_to_be64(di->di_goal_meta); 1411 str->di_goal_meta = cpu_to_be64(ip->i_goal);
1400 str->di_goal_data = cpu_to_be64(di->di_goal_data); 1412 str->di_goal_data = cpu_to_be64(ip->i_goal);
1401 str->di_generation = cpu_to_be64(di->di_generation); 1413 str->di_generation = cpu_to_be64(di->di_generation);
1402 1414
1403 str->di_flags = cpu_to_be32(di->di_flags); 1415 str->di_flags = cpu_to_be32(di->di_flags);
1404 str->di_height = cpu_to_be16(di->di_height); 1416 str->di_height = cpu_to_be16(ip->i_height);
1405 str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) && 1417 str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) &&
1406 !(ip->i_di.di_flags & GFS2_DIF_EXHASH) ? 1418 !(ip->i_di.di_flags & GFS2_DIF_EXHASH) ?
1407 GFS2_FORMAT_DE : 0); 1419 GFS2_FORMAT_DE : 0);
1408 str->di_depth = cpu_to_be16(di->di_depth); 1420 str->di_depth = cpu_to_be16(ip->i_depth);
1409 str->di_entries = cpu_to_be32(di->di_entries); 1421 str->di_entries = cpu_to_be32(di->di_entries);
1410 1422
1411 str->di_eattr = cpu_to_be64(di->di_eattr); 1423 str->di_eattr = cpu_to_be64(di->di_eattr);
@@ -1423,15 +1435,13 @@ void gfs2_dinode_print(const struct gfs2_inode *ip)
1423 printk(KERN_INFO " no_addr = %llu\n", 1435 printk(KERN_INFO " no_addr = %llu\n",
1424 (unsigned long long)ip->i_no_addr); 1436 (unsigned long long)ip->i_no_addr);
1425 printk(KERN_INFO " di_size = %llu\n", (unsigned long long)di->di_size); 1437 printk(KERN_INFO " di_size = %llu\n", (unsigned long long)di->di_size);
1426 printk(KERN_INFO " di_blocks = %llu\n", 1438 printk(KERN_INFO " blocks = %llu\n",
1427 (unsigned long long)di->di_blocks); 1439 (unsigned long long)gfs2_get_inode_blocks(&ip->i_inode));
1428 printk(KERN_INFO " di_goal_meta = %llu\n", 1440 printk(KERN_INFO " i_goal = %llu\n",
1429 (unsigned long long)di->di_goal_meta); 1441 (unsigned long long)ip->i_goal);
1430 printk(KERN_INFO " di_goal_data = %llu\n",
1431 (unsigned long long)di->di_goal_data);
1432 printk(KERN_INFO " di_flags = 0x%.8X\n", di->di_flags); 1442 printk(KERN_INFO " di_flags = 0x%.8X\n", di->di_flags);
1433 printk(KERN_INFO " di_height = %u\n", di->di_height); 1443 printk(KERN_INFO " i_height = %u\n", ip->i_height);
1434 printk(KERN_INFO " di_depth = %u\n", di->di_depth); 1444 printk(KERN_INFO " i_depth = %u\n", ip->i_depth);
1435 printk(KERN_INFO " di_entries = %u\n", di->di_entries); 1445 printk(KERN_INFO " di_entries = %u\n", di->di_entries);
1436 printk(KERN_INFO " di_eattr = %llu\n", 1446 printk(KERN_INFO " di_eattr = %llu\n",
1437 (unsigned long long)di->di_eattr); 1447 (unsigned long long)di->di_eattr);
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index d44650662615..580da454b38f 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -10,9 +10,11 @@
10#ifndef __INODE_DOT_H__ 10#ifndef __INODE_DOT_H__
11#define __INODE_DOT_H__ 11#define __INODE_DOT_H__
12 12
13#include "util.h"
14
13static inline int gfs2_is_stuffed(const struct gfs2_inode *ip) 15static inline int gfs2_is_stuffed(const struct gfs2_inode *ip)
14{ 16{
15 return !ip->i_di.di_height; 17 return !ip->i_height;
16} 18}
17 19
18static inline int gfs2_is_jdata(const struct gfs2_inode *ip) 20static inline int gfs2_is_jdata(const struct gfs2_inode *ip)
@@ -37,13 +39,25 @@ static inline int gfs2_is_dir(const struct gfs2_inode *ip)
37 return S_ISDIR(ip->i_inode.i_mode); 39 return S_ISDIR(ip->i_inode.i_mode);
38} 40}
39 41
40static inline void gfs2_set_inode_blocks(struct inode *inode) 42static inline void gfs2_set_inode_blocks(struct inode *inode, u64 blocks)
43{
44 inode->i_blocks = blocks <<
45 (GFS2_SB(inode)->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT);
46}
47
48static inline u64 gfs2_get_inode_blocks(const struct inode *inode)
41{ 49{
42 struct gfs2_inode *ip = GFS2_I(inode); 50 return inode->i_blocks >>
43 inode->i_blocks = ip->i_di.di_blocks <<
44 (GFS2_SB(inode)->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT); 51 (GFS2_SB(inode)->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT);
45} 52}
46 53
54static inline void gfs2_add_inode_blocks(struct inode *inode, s64 change)
55{
56 gfs2_assert(GFS2_SB(inode), (change >= 0 || inode->i_blocks > -change));
57 change *= (GFS2_SB(inode)->sd_sb.sb_bsize/GFS2_BASIC_BLOCK);
58 inode->i_blocks += change;
59}
60
47static inline int gfs2_check_inum(const struct gfs2_inode *ip, u64 no_addr, 61static inline int gfs2_check_inum(const struct gfs2_inode *ip, u64 no_addr,
48 u64 no_formal_ino) 62 u64 no_formal_ino)
49{ 63{
diff --git a/fs/gfs2/lm.c b/fs/gfs2/lm.c
deleted file mode 100644
index cfcc39b86a53..000000000000
--- a/fs/gfs2/lm.c
+++ /dev/null
@@ -1,210 +0,0 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/slab.h>
11#include <linux/spinlock.h>
12#include <linux/completion.h>
13#include <linux/buffer_head.h>
14#include <linux/delay.h>
15#include <linux/gfs2_ondisk.h>
16#include <linux/lm_interface.h>
17
18#include "gfs2.h"
19#include "incore.h"
20#include "glock.h"
21#include "lm.h"
22#include "super.h"
23#include "util.h"
24
25/**
26 * gfs2_lm_mount - mount a locking protocol
27 * @sdp: the filesystem
28 * @args: mount arguements
29 * @silent: if 1, don't complain if the FS isn't a GFS2 fs
30 *
31 * Returns: errno
32 */
33
34int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent)
35{
36 char *proto = sdp->sd_proto_name;
37 char *table = sdp->sd_table_name;
38 int flags = 0;
39 int error;
40
41 if (sdp->sd_args.ar_spectator)
42 flags |= LM_MFLAG_SPECTATOR;
43
44 fs_info(sdp, "Trying to join cluster \"%s\", \"%s\"\n", proto, table);
45
46 error = gfs2_mount_lockproto(proto, table, sdp->sd_args.ar_hostdata,
47 gfs2_glock_cb, sdp,
48 GFS2_MIN_LVB_SIZE, flags,
49 &sdp->sd_lockstruct, &sdp->sd_kobj);
50 if (error) {
51 fs_info(sdp, "can't mount proto=%s, table=%s, hostdata=%s\n",
52 proto, table, sdp->sd_args.ar_hostdata);
53 goto out;
54 }
55
56 if (gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_lockspace) ||
57 gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_ops) ||
58 gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_lvb_size >=
59 GFS2_MIN_LVB_SIZE)) {
60 gfs2_unmount_lockproto(&sdp->sd_lockstruct);
61 goto out;
62 }
63
64 if (sdp->sd_args.ar_spectator)
65 snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.s", table);
66 else
67 snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.%u", table,
68 sdp->sd_lockstruct.ls_jid);
69
70 fs_info(sdp, "Joined cluster. Now mounting FS...\n");
71
72 if ((sdp->sd_lockstruct.ls_flags & LM_LSFLAG_LOCAL) &&
73 !sdp->sd_args.ar_ignore_local_fs) {
74 sdp->sd_args.ar_localflocks = 1;
75 sdp->sd_args.ar_localcaching = 1;
76 }
77
78out:
79 return error;
80}
81
82void gfs2_lm_others_may_mount(struct gfs2_sbd *sdp)
83{
84 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
85 sdp->sd_lockstruct.ls_ops->lm_others_may_mount(
86 sdp->sd_lockstruct.ls_lockspace);
87}
88
89void gfs2_lm_unmount(struct gfs2_sbd *sdp)
90{
91 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
92 gfs2_unmount_lockproto(&sdp->sd_lockstruct);
93}
94
95int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...)
96{
97 va_list args;
98
99 if (test_and_set_bit(SDF_SHUTDOWN, &sdp->sd_flags))
100 return 0;
101
102 va_start(args, fmt);
103 vprintk(fmt, args);
104 va_end(args);
105
106 fs_err(sdp, "about to withdraw this file system\n");
107 BUG_ON(sdp->sd_args.ar_debug);
108
109 fs_err(sdp, "telling LM to withdraw\n");
110 gfs2_withdraw_lockproto(&sdp->sd_lockstruct);
111 fs_err(sdp, "withdrawn\n");
112 dump_stack();
113
114 return -1;
115}
116
117int gfs2_lm_get_lock(struct gfs2_sbd *sdp, struct lm_lockname *name,
118 void **lockp)
119{
120 int error = -EIO;
121 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
122 error = sdp->sd_lockstruct.ls_ops->lm_get_lock(
123 sdp->sd_lockstruct.ls_lockspace, name, lockp);
124 return error;
125}
126
127void gfs2_lm_put_lock(struct gfs2_sbd *sdp, void *lock)
128{
129 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
130 sdp->sd_lockstruct.ls_ops->lm_put_lock(lock);
131}
132
133unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock,
134 unsigned int cur_state, unsigned int req_state,
135 unsigned int flags)
136{
137 int ret = 0;
138 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
139 ret = sdp->sd_lockstruct.ls_ops->lm_lock(lock, cur_state,
140 req_state, flags);
141 return ret;
142}
143
144unsigned int gfs2_lm_unlock(struct gfs2_sbd *sdp, void *lock,
145 unsigned int cur_state)
146{
147 int ret = 0;
148 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
149 ret = sdp->sd_lockstruct.ls_ops->lm_unlock(lock, cur_state);
150 return ret;
151}
152
153void gfs2_lm_cancel(struct gfs2_sbd *sdp, void *lock)
154{
155 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
156 sdp->sd_lockstruct.ls_ops->lm_cancel(lock);
157}
158
159int gfs2_lm_hold_lvb(struct gfs2_sbd *sdp, void *lock, char **lvbp)
160{
161 int error = -EIO;
162 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
163 error = sdp->sd_lockstruct.ls_ops->lm_hold_lvb(lock, lvbp);
164 return error;
165}
166
167void gfs2_lm_unhold_lvb(struct gfs2_sbd *sdp, void *lock, char *lvb)
168{
169 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
170 sdp->sd_lockstruct.ls_ops->lm_unhold_lvb(lock, lvb);
171}
172
173int gfs2_lm_plock_get(struct gfs2_sbd *sdp, struct lm_lockname *name,
174 struct file *file, struct file_lock *fl)
175{
176 int error = -EIO;
177 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
178 error = sdp->sd_lockstruct.ls_ops->lm_plock_get(
179 sdp->sd_lockstruct.ls_lockspace, name, file, fl);
180 return error;
181}
182
183int gfs2_lm_plock(struct gfs2_sbd *sdp, struct lm_lockname *name,
184 struct file *file, int cmd, struct file_lock *fl)
185{
186 int error = -EIO;
187 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
188 error = sdp->sd_lockstruct.ls_ops->lm_plock(
189 sdp->sd_lockstruct.ls_lockspace, name, file, cmd, fl);
190 return error;
191}
192
193int gfs2_lm_punlock(struct gfs2_sbd *sdp, struct lm_lockname *name,
194 struct file *file, struct file_lock *fl)
195{
196 int error = -EIO;
197 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
198 error = sdp->sd_lockstruct.ls_ops->lm_punlock(
199 sdp->sd_lockstruct.ls_lockspace, name, file, fl);
200 return error;
201}
202
203void gfs2_lm_recovery_done(struct gfs2_sbd *sdp, unsigned int jid,
204 unsigned int message)
205{
206 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
207 sdp->sd_lockstruct.ls_ops->lm_recovery_done(
208 sdp->sd_lockstruct.ls_lockspace, jid, message);
209}
210
diff --git a/fs/gfs2/lm.h b/fs/gfs2/lm.h
deleted file mode 100644
index 21cdc30ee08c..000000000000
--- a/fs/gfs2/lm.h
+++ /dev/null
@@ -1,42 +0,0 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __LM_DOT_H__
11#define __LM_DOT_H__
12
13struct gfs2_sbd;
14
15#define GFS2_MIN_LVB_SIZE 32
16
17int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent);
18void gfs2_lm_others_may_mount(struct gfs2_sbd *sdp);
19void gfs2_lm_unmount(struct gfs2_sbd *sdp);
20int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...)
21 __attribute__ ((format(printf, 2, 3)));
22int gfs2_lm_get_lock(struct gfs2_sbd *sdp, struct lm_lockname *name,
23 void **lockp);
24void gfs2_lm_put_lock(struct gfs2_sbd *sdp, void *lock);
25unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock,
26 unsigned int cur_state, unsigned int req_state,
27 unsigned int flags);
28unsigned int gfs2_lm_unlock(struct gfs2_sbd *sdp, void *lock,
29 unsigned int cur_state);
30void gfs2_lm_cancel(struct gfs2_sbd *sdp, void *lock);
31int gfs2_lm_hold_lvb(struct gfs2_sbd *sdp, void *lock, char **lvbp);
32void gfs2_lm_unhold_lvb(struct gfs2_sbd *sdp, void *lock, char *lvb);
33int gfs2_lm_plock_get(struct gfs2_sbd *sdp, struct lm_lockname *name,
34 struct file *file, struct file_lock *fl);
35int gfs2_lm_plock(struct gfs2_sbd *sdp, struct lm_lockname *name,
36 struct file *file, int cmd, struct file_lock *fl);
37int gfs2_lm_punlock(struct gfs2_sbd *sdp, struct lm_lockname *name,
38 struct file *file, struct file_lock *fl);
39void gfs2_lm_recovery_done(struct gfs2_sbd *sdp, unsigned int jid,
40 unsigned int message);
41
42#endif /* __LM_DOT_H__ */
diff --git a/fs/gfs2/locking/dlm/lock.c b/fs/gfs2/locking/dlm/lock.c
index 542a797ac89a..cf7ea8abec87 100644
--- a/fs/gfs2/locking/dlm/lock.c
+++ b/fs/gfs2/locking/dlm/lock.c
@@ -137,7 +137,8 @@ static inline unsigned int make_flags(struct gdlm_lock *lp,
137 137
138 /* Conversion deadlock avoidance by DLM */ 138 /* Conversion deadlock avoidance by DLM */
139 139
140 if (!test_bit(LFL_FORCE_PROMOTE, &lp->flags) && 140 if (!(lp->ls->fsflags & LM_MFLAG_CONV_NODROP) &&
141 !test_bit(LFL_FORCE_PROMOTE, &lp->flags) &&
141 !(lkf & DLM_LKF_NOQUEUE) && 142 !(lkf & DLM_LKF_NOQUEUE) &&
142 cur > DLM_LOCK_NL && req > DLM_LOCK_NL && cur != req) 143 cur > DLM_LOCK_NL && req > DLM_LOCK_NL && cur != req)
143 lkf |= DLM_LKF_CONVDEADLK; 144 lkf |= DLM_LKF_CONVDEADLK;
@@ -164,7 +165,7 @@ static int gdlm_create_lp(struct gdlm_ls *ls, struct lm_lockname *name,
164{ 165{
165 struct gdlm_lock *lp; 166 struct gdlm_lock *lp;
166 167
167 lp = kzalloc(sizeof(struct gdlm_lock), GFP_KERNEL); 168 lp = kzalloc(sizeof(struct gdlm_lock), GFP_NOFS);
168 if (!lp) 169 if (!lp)
169 return -ENOMEM; 170 return -ENOMEM;
170 171
@@ -382,7 +383,7 @@ static int gdlm_add_lvb(struct gdlm_lock *lp)
382{ 383{
383 char *lvb; 384 char *lvb;
384 385
385 lvb = kzalloc(GDLM_LVB_SIZE, GFP_KERNEL); 386 lvb = kzalloc(GDLM_LVB_SIZE, GFP_NOFS);
386 if (!lvb) 387 if (!lvb)
387 return -ENOMEM; 388 return -ENOMEM;
388 389
diff --git a/fs/gfs2/locking/dlm/lock_dlm.h b/fs/gfs2/locking/dlm/lock_dlm.h
index 9e8265d28377..58fcf8c5bf39 100644
--- a/fs/gfs2/locking/dlm/lock_dlm.h
+++ b/fs/gfs2/locking/dlm/lock_dlm.h
@@ -183,5 +183,10 @@ int gdlm_plock_get(void *, struct lm_lockname *, struct file *,
183 struct file_lock *); 183 struct file_lock *);
184int gdlm_punlock(void *, struct lm_lockname *, struct file *, 184int gdlm_punlock(void *, struct lm_lockname *, struct file *,
185 struct file_lock *); 185 struct file_lock *);
186
187/* mount.c */
188
189extern const struct lm_lockops gdlm_ops;
190
186#endif 191#endif
187 192
diff --git a/fs/gfs2/locking/dlm/main.c b/fs/gfs2/locking/dlm/main.c
index a0e7eda643ed..36a225850bd8 100644
--- a/fs/gfs2/locking/dlm/main.c
+++ b/fs/gfs2/locking/dlm/main.c
@@ -11,8 +11,6 @@
11 11
12#include "lock_dlm.h" 12#include "lock_dlm.h"
13 13
14extern struct lm_lockops gdlm_ops;
15
16static int __init init_lock_dlm(void) 14static int __init init_lock_dlm(void)
17{ 15{
18 int error; 16 int error;
diff --git a/fs/gfs2/locking/dlm/sysfs.c b/fs/gfs2/locking/dlm/sysfs.c
index a87b09839761..8479da47049c 100644
--- a/fs/gfs2/locking/dlm/sysfs.c
+++ b/fs/gfs2/locking/dlm/sysfs.c
@@ -12,8 +12,6 @@
12 12
13#include "lock_dlm.h" 13#include "lock_dlm.h"
14 14
15extern struct lm_lockops gdlm_ops;
16
17static ssize_t proto_name_show(struct gdlm_ls *ls, char *buf) 15static ssize_t proto_name_show(struct gdlm_ls *ls, char *buf)
18{ 16{
19 return sprintf(buf, "%s\n", gdlm_ops.lm_proto_name); 17 return sprintf(buf, "%s\n", gdlm_ops.lm_proto_name);
diff --git a/fs/gfs2/locking/dlm/thread.c b/fs/gfs2/locking/dlm/thread.c
index 521694fc19d6..e53db6fd28ab 100644
--- a/fs/gfs2/locking/dlm/thread.c
+++ b/fs/gfs2/locking/dlm/thread.c
@@ -135,7 +135,15 @@ static void process_complete(struct gdlm_lock *lp)
135 lp->lksb.sb_status, lp->lockname.ln_type, 135 lp->lksb.sb_status, lp->lockname.ln_type,
136 (unsigned long long)lp->lockname.ln_number, 136 (unsigned long long)lp->lockname.ln_number,
137 lp->flags); 137 lp->flags);
138 return; 138 if (lp->lksb.sb_status == -EDEADLOCK &&
139 lp->ls->fsflags & LM_MFLAG_CONV_NODROP) {
140 lp->req = lp->cur;
141 acb.lc_ret |= LM_OUT_CONV_DEADLK;
142 if (lp->cur == DLM_LOCK_IV)
143 lp->lksb.sb_lkid = 0;
144 goto out;
145 } else
146 return;
139 } 147 }
140 148
141 /* 149 /*
diff --git a/fs/gfs2/locking/nolock/main.c b/fs/gfs2/locking/nolock/main.c
index d3b8ce6fbbe3..284a5ece8d94 100644
--- a/fs/gfs2/locking/nolock/main.c
+++ b/fs/gfs2/locking/nolock/main.c
@@ -140,7 +140,7 @@ static int nolock_hold_lvb(void *lock, char **lvbp)
140 struct nolock_lockspace *nl = lock; 140 struct nolock_lockspace *nl = lock;
141 int error = 0; 141 int error = 0;
142 142
143 *lvbp = kzalloc(nl->nl_lvb_size, GFP_KERNEL); 143 *lvbp = kzalloc(nl->nl_lvb_size, GFP_NOFS);
144 if (!*lvbp) 144 if (!*lvbp)
145 error = -ENOMEM; 145 error = -ENOMEM;
146 146
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 161ab6f2058e..548264b1836d 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -769,8 +769,8 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
769 sdp->sd_log_commited_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm; 769 sdp->sd_log_commited_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm;
770 gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_revoke) >= 0); 770 gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_revoke) >= 0);
771 reserved = calc_reserved(sdp); 771 reserved = calc_reserved(sdp);
772 gfs2_assert_withdraw(sdp, sdp->sd_log_blks_reserved + tr->tr_reserved >= reserved);
772 unused = sdp->sd_log_blks_reserved - reserved + tr->tr_reserved; 773 unused = sdp->sd_log_blks_reserved - reserved + tr->tr_reserved;
773 gfs2_assert_withdraw(sdp, unused >= 0);
774 atomic_add(unused, &sdp->sd_log_blks_free); 774 atomic_add(unused, &sdp->sd_log_blks_free);
775 gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= 775 gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <=
776 sdp->sd_jdesc->jd_blocks); 776 sdp->sd_jdesc->jd_blocks);
@@ -779,6 +779,21 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
779 gfs2_log_unlock(sdp); 779 gfs2_log_unlock(sdp);
780} 780}
781 781
782static void buf_lo_incore_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
783{
784 struct list_head *head = &tr->tr_list_buf;
785 struct gfs2_bufdata *bd;
786
787 gfs2_log_lock(sdp);
788 while (!list_empty(head)) {
789 bd = list_entry(head->next, struct gfs2_bufdata, bd_list_tr);
790 list_del_init(&bd->bd_list_tr);
791 tr->tr_num_buf--;
792 }
793 gfs2_log_unlock(sdp);
794 gfs2_assert_warn(sdp, !tr->tr_num_buf);
795}
796
782/** 797/**
783 * gfs2_log_commit - Commit a transaction to the log 798 * gfs2_log_commit - Commit a transaction to the log
784 * @sdp: the filesystem 799 * @sdp: the filesystem
@@ -790,7 +805,7 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
790void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) 805void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
791{ 806{
792 log_refund(sdp, tr); 807 log_refund(sdp, tr);
793 lops_incore_commit(sdp, tr); 808 buf_lo_incore_commit(sdp, tr);
794 809
795 sdp->sd_vfs->s_dirt = 1; 810 sdp->sd_vfs->s_dirt = 1;
796 up_read(&sdp->sd_log_flush_lock); 811 up_read(&sdp->sd_log_flush_lock);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index fae59d69d01a..4390f6f4047d 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -152,21 +152,6 @@ out:
152 unlock_buffer(bd->bd_bh); 152 unlock_buffer(bd->bd_bh);
153} 153}
154 154
155static void buf_lo_incore_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
156{
157 struct list_head *head = &tr->tr_list_buf;
158 struct gfs2_bufdata *bd;
159
160 gfs2_log_lock(sdp);
161 while (!list_empty(head)) {
162 bd = list_entry(head->next, struct gfs2_bufdata, bd_list_tr);
163 list_del_init(&bd->bd_list_tr);
164 tr->tr_num_buf--;
165 }
166 gfs2_log_unlock(sdp);
167 gfs2_assert_warn(sdp, !tr->tr_num_buf);
168}
169
170static void buf_lo_before_commit(struct gfs2_sbd *sdp) 155static void buf_lo_before_commit(struct gfs2_sbd *sdp)
171{ 156{
172 struct buffer_head *bh; 157 struct buffer_head *bh;
@@ -419,8 +404,10 @@ static int revoke_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
419 blkno = be64_to_cpu(*(__be64 *)(bh->b_data + offset)); 404 blkno = be64_to_cpu(*(__be64 *)(bh->b_data + offset));
420 405
421 error = gfs2_revoke_add(sdp, blkno, start); 406 error = gfs2_revoke_add(sdp, blkno, start);
422 if (error < 0) 407 if (error < 0) {
408 brelse(bh);
423 return error; 409 return error;
410 }
424 else if (error) 411 else if (error)
425 sdp->sd_found_revokes++; 412 sdp->sd_found_revokes++;
426 413
@@ -737,7 +724,6 @@ static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
737 724
738const struct gfs2_log_operations gfs2_buf_lops = { 725const struct gfs2_log_operations gfs2_buf_lops = {
739 .lo_add = buf_lo_add, 726 .lo_add = buf_lo_add,
740 .lo_incore_commit = buf_lo_incore_commit,
741 .lo_before_commit = buf_lo_before_commit, 727 .lo_before_commit = buf_lo_before_commit,
742 .lo_after_commit = buf_lo_after_commit, 728 .lo_after_commit = buf_lo_after_commit,
743 .lo_before_scan = buf_lo_before_scan, 729 .lo_before_scan = buf_lo_before_scan,
@@ -763,7 +749,6 @@ const struct gfs2_log_operations gfs2_rg_lops = {
763 749
764const struct gfs2_log_operations gfs2_databuf_lops = { 750const struct gfs2_log_operations gfs2_databuf_lops = {
765 .lo_add = databuf_lo_add, 751 .lo_add = databuf_lo_add,
766 .lo_incore_commit = buf_lo_incore_commit,
767 .lo_before_commit = databuf_lo_before_commit, 752 .lo_before_commit = databuf_lo_before_commit,
768 .lo_after_commit = databuf_lo_after_commit, 753 .lo_after_commit = databuf_lo_after_commit,
769 .lo_scan_elements = databuf_lo_scan_elements, 754 .lo_scan_elements = databuf_lo_scan_elements,
diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h
index 41a00df75587..3c0b2737658a 100644
--- a/fs/gfs2/lops.h
+++ b/fs/gfs2/lops.h
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. 3 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
4 * 4 *
5 * This copyrighted material is made available to anyone wishing to use, 5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions 6 * modify, copy, or redistribute it subject to the terms and conditions
@@ -57,15 +57,6 @@ static inline void lops_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
57 le->le_ops->lo_add(sdp, le); 57 le->le_ops->lo_add(sdp, le);
58} 58}
59 59
60static inline void lops_incore_commit(struct gfs2_sbd *sdp,
61 struct gfs2_trans *tr)
62{
63 int x;
64 for (x = 0; gfs2_log_ops[x]; x++)
65 if (gfs2_log_ops[x]->lo_incore_commit)
66 gfs2_log_ops[x]->lo_incore_commit(sdp, tr);
67}
68
69static inline void lops_before_commit(struct gfs2_sbd *sdp) 60static inline void lops_before_commit(struct gfs2_sbd *sdp)
70{ 61{
71 int x; 62 int x;
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 9c7765c12d62..053e2ebbbd50 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -89,6 +89,12 @@ static int __init init_gfs2_fs(void)
89 if (!gfs2_bufdata_cachep) 89 if (!gfs2_bufdata_cachep)
90 goto fail; 90 goto fail;
91 91
92 gfs2_rgrpd_cachep = kmem_cache_create("gfs2_rgrpd",
93 sizeof(struct gfs2_rgrpd),
94 0, 0, NULL);
95 if (!gfs2_rgrpd_cachep)
96 goto fail;
97
92 error = register_filesystem(&gfs2_fs_type); 98 error = register_filesystem(&gfs2_fs_type);
93 if (error) 99 if (error)
94 goto fail; 100 goto fail;
@@ -108,6 +114,9 @@ fail_unregister:
108fail: 114fail:
109 gfs2_glock_exit(); 115 gfs2_glock_exit();
110 116
117 if (gfs2_rgrpd_cachep)
118 kmem_cache_destroy(gfs2_rgrpd_cachep);
119
111 if (gfs2_bufdata_cachep) 120 if (gfs2_bufdata_cachep)
112 kmem_cache_destroy(gfs2_bufdata_cachep); 121 kmem_cache_destroy(gfs2_bufdata_cachep);
113 122
@@ -133,6 +142,7 @@ static void __exit exit_gfs2_fs(void)
133 unregister_filesystem(&gfs2_fs_type); 142 unregister_filesystem(&gfs2_fs_type);
134 unregister_filesystem(&gfs2meta_fs_type); 143 unregister_filesystem(&gfs2meta_fs_type);
135 144
145 kmem_cache_destroy(gfs2_rgrpd_cachep);
136 kmem_cache_destroy(gfs2_bufdata_cachep); 146 kmem_cache_destroy(gfs2_bufdata_cachep);
137 kmem_cache_destroy(gfs2_inode_cachep); 147 kmem_cache_destroy(gfs2_inode_cachep);
138 kmem_cache_destroy(gfs2_glock_cachep); 148 kmem_cache_destroy(gfs2_glock_cachep);
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index ac772b6d9dbb..90a04a6e3789 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. 3 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
4 * 4 *
5 * This copyrighted material is made available to anyone wishing to use, 5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions 6 * modify, copy, or redistribute it subject to the terms and conditions
@@ -21,7 +21,6 @@
21#include <linux/gfs2_ondisk.h> 21#include <linux/gfs2_ondisk.h>
22#include <linux/lm_interface.h> 22#include <linux/lm_interface.h>
23#include <linux/backing-dev.h> 23#include <linux/backing-dev.h>
24#include <linux/pagevec.h>
25 24
26#include "gfs2.h" 25#include "gfs2.h"
27#include "incore.h" 26#include "incore.h"
@@ -104,11 +103,9 @@ static int gfs2_writepage_common(struct page *page,
104 loff_t i_size = i_size_read(inode); 103 loff_t i_size = i_size_read(inode);
105 pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; 104 pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
106 unsigned offset; 105 unsigned offset;
107 int ret = -EIO;
108 106
109 if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl))) 107 if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl)))
110 goto out; 108 goto out;
111 ret = 0;
112 if (current->journal_info) 109 if (current->journal_info)
113 goto redirty; 110 goto redirty;
114 /* Is the page fully outside i_size? (truncate in progress) */ 111 /* Is the page fully outside i_size? (truncate in progress) */
@@ -280,7 +277,7 @@ static int gfs2_write_jdata_pagevec(struct address_space *mapping,
280 int i; 277 int i;
281 int ret; 278 int ret;
282 279
283 ret = gfs2_trans_begin(sdp, nrblocks, 0); 280 ret = gfs2_trans_begin(sdp, nrblocks, nrblocks);
284 if (ret < 0) 281 if (ret < 0)
285 return ret; 282 return ret;
286 283
@@ -510,23 +507,26 @@ static int __gfs2_readpage(void *file, struct page *page)
510static int gfs2_readpage(struct file *file, struct page *page) 507static int gfs2_readpage(struct file *file, struct page *page)
511{ 508{
512 struct gfs2_inode *ip = GFS2_I(page->mapping->host); 509 struct gfs2_inode *ip = GFS2_I(page->mapping->host);
513 struct gfs2_holder gh; 510 struct gfs2_holder *gh;
514 int error; 511 int error;
515 512
516 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME|LM_FLAG_TRY_1CB, &gh); 513 gh = gfs2_glock_is_locked_by_me(ip->i_gl);
517 error = gfs2_glock_nq_atime(&gh); 514 if (!gh) {
518 if (unlikely(error)) { 515 gh = kmalloc(sizeof(struct gfs2_holder), GFP_NOFS);
516 if (!gh)
517 return -ENOBUFS;
518 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, gh);
519 unlock_page(page); 519 unlock_page(page);
520 goto out; 520 error = gfs2_glock_nq_atime(gh);
521 if (likely(error != 0))
522 goto out;
523 return AOP_TRUNCATED_PAGE;
521 } 524 }
522 error = __gfs2_readpage(file, page); 525 error = __gfs2_readpage(file, page);
523 gfs2_glock_dq(&gh); 526 gfs2_glock_dq(gh);
524out: 527out:
525 gfs2_holder_uninit(&gh); 528 gfs2_holder_uninit(gh);
526 if (error == GLR_TRYFAILED) { 529 kfree(gh);
527 yield();
528 return AOP_TRUNCATED_PAGE;
529 }
530 return error; 530 return error;
531} 531}
532 532
@@ -648,15 +648,15 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
648 648
649 if (alloc_required) { 649 if (alloc_required) {
650 al = gfs2_alloc_get(ip); 650 al = gfs2_alloc_get(ip);
651 if (!al) {
652 error = -ENOMEM;
653 goto out_unlock;
654 }
651 655
652 error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 656 error = gfs2_quota_lock_check(ip);
653 if (error) 657 if (error)
654 goto out_alloc_put; 658 goto out_alloc_put;
655 659
656 error = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid);
657 if (error)
658 goto out_qunlock;
659
660 al->al_requested = data_blocks + ind_blocks; 660 al->al_requested = data_blocks + ind_blocks;
661 error = gfs2_inplace_reserve(ip); 661 error = gfs2_inplace_reserve(ip);
662 if (error) 662 if (error)
@@ -828,7 +828,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
828 unsigned int to = from + len; 828 unsigned int to = from + len;
829 int ret; 829 int ret;
830 830
831 BUG_ON(gfs2_glock_is_locked_by_me(ip->i_gl) == 0); 831 BUG_ON(gfs2_glock_is_locked_by_me(ip->i_gl) == NULL);
832 832
833 ret = gfs2_meta_inode_buffer(ip, &dibh); 833 ret = gfs2_meta_inode_buffer(ip, &dibh);
834 if (unlikely(ret)) { 834 if (unlikely(ret)) {
diff --git a/fs/gfs2/ops_dentry.c b/fs/gfs2/ops_dentry.c
index 793e334d098e..4a5e676b4420 100644
--- a/fs/gfs2/ops_dentry.c
+++ b/fs/gfs2/ops_dentry.c
@@ -43,7 +43,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
43 struct gfs2_holder d_gh; 43 struct gfs2_holder d_gh;
44 struct gfs2_inode *ip = NULL; 44 struct gfs2_inode *ip = NULL;
45 int error; 45 int error;
46 int had_lock=0; 46 int had_lock = 0;
47 47
48 if (inode) { 48 if (inode) {
49 if (is_bad_inode(inode)) 49 if (is_bad_inode(inode))
@@ -54,7 +54,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
54 if (sdp->sd_args.ar_localcaching) 54 if (sdp->sd_args.ar_localcaching)
55 goto valid; 55 goto valid;
56 56
57 had_lock = gfs2_glock_is_locked_by_me(dip->i_gl); 57 had_lock = (gfs2_glock_is_locked_by_me(dip->i_gl) != NULL);
58 if (!had_lock) { 58 if (!had_lock) {
59 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); 59 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
60 if (error) 60 if (error)
diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c
index 334c7f85351b..990d9f4bc463 100644
--- a/fs/gfs2/ops_export.c
+++ b/fs/gfs2/ops_export.c
@@ -204,8 +204,6 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb,
204 inode = gfs2_inode_lookup(sb, DT_UNKNOWN, 204 inode = gfs2_inode_lookup(sb, DT_UNKNOWN,
205 inum->no_addr, 205 inum->no_addr,
206 0, 0); 206 0, 0);
207 if (!inode)
208 goto fail;
209 if (IS_ERR(inode)) { 207 if (IS_ERR(inode)) {
210 error = PTR_ERR(inode); 208 error = PTR_ERR(inode);
211 goto fail; 209 goto fail;
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index f4842f2548cd..e1b7d525a066 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -30,7 +30,6 @@
30#include "glock.h" 30#include "glock.h"
31#include "glops.h" 31#include "glops.h"
32#include "inode.h" 32#include "inode.h"
33#include "lm.h"
34#include "log.h" 33#include "log.h"
35#include "meta_io.h" 34#include "meta_io.h"
36#include "quota.h" 35#include "quota.h"
@@ -39,6 +38,7 @@
39#include "util.h" 38#include "util.h"
40#include "eaops.h" 39#include "eaops.h"
41#include "ops_address.h" 40#include "ops_address.h"
41#include "ops_inode.h"
42 42
43/** 43/**
44 * gfs2_llseek - seek to a location in a file 44 * gfs2_llseek - seek to a location in a file
@@ -369,12 +369,9 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page)
369 if (al == NULL) 369 if (al == NULL)
370 goto out_unlock; 370 goto out_unlock;
371 371
372 ret = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 372 ret = gfs2_quota_lock_check(ip);
373 if (ret) 373 if (ret)
374 goto out_alloc_put; 374 goto out_alloc_put;
375 ret = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid);
376 if (ret)
377 goto out_quota_unlock;
378 al->al_requested = data_blocks + ind_blocks; 375 al->al_requested = data_blocks + ind_blocks;
379 ret = gfs2_inplace_reserve(ip); 376 ret = gfs2_inplace_reserve(ip);
380 if (ret) 377 if (ret)
@@ -596,6 +593,36 @@ static int gfs2_setlease(struct file *file, long arg, struct file_lock **fl)
596 return generic_setlease(file, arg, fl); 593 return generic_setlease(file, arg, fl);
597} 594}
598 595
596static int gfs2_lm_plock_get(struct gfs2_sbd *sdp, struct lm_lockname *name,
597 struct file *file, struct file_lock *fl)
598{
599 int error = -EIO;
600 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
601 error = sdp->sd_lockstruct.ls_ops->lm_plock_get(
602 sdp->sd_lockstruct.ls_lockspace, name, file, fl);
603 return error;
604}
605
606static int gfs2_lm_plock(struct gfs2_sbd *sdp, struct lm_lockname *name,
607 struct file *file, int cmd, struct file_lock *fl)
608{
609 int error = -EIO;
610 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
611 error = sdp->sd_lockstruct.ls_ops->lm_plock(
612 sdp->sd_lockstruct.ls_lockspace, name, file, cmd, fl);
613 return error;
614}
615
616static int gfs2_lm_punlock(struct gfs2_sbd *sdp, struct lm_lockname *name,
617 struct file *file, struct file_lock *fl)
618{
619 int error = -EIO;
620 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
621 error = sdp->sd_lockstruct.ls_ops->lm_punlock(
622 sdp->sd_lockstruct.ls_lockspace, name, file, fl);
623 return error;
624}
625
599/** 626/**
600 * gfs2_lock - acquire/release a posix lock on a file 627 * gfs2_lock - acquire/release a posix lock on a file
601 * @file: the file pointer 628 * @file: the file pointer
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 4bee6aa845e4..ef9c6c4f80f6 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. 3 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
4 * 4 *
5 * This copyrighted material is made available to anyone wishing to use, 5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions 6 * modify, copy, or redistribute it subject to the terms and conditions
@@ -26,7 +26,6 @@
26#include "glock.h" 26#include "glock.h"
27#include "glops.h" 27#include "glops.h"
28#include "inode.h" 28#include "inode.h"
29#include "lm.h"
30#include "mount.h" 29#include "mount.h"
31#include "ops_fstype.h" 30#include "ops_fstype.h"
32#include "ops_dentry.h" 31#include "ops_dentry.h"
@@ -363,6 +362,13 @@ static int map_journal_extents(struct gfs2_sbd *sdp)
363 return rc; 362 return rc;
364} 363}
365 364
365static void gfs2_lm_others_may_mount(struct gfs2_sbd *sdp)
366{
367 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
368 sdp->sd_lockstruct.ls_ops->lm_others_may_mount(
369 sdp->sd_lockstruct.ls_lockspace);
370}
371
366static int init_journal(struct gfs2_sbd *sdp, int undo) 372static int init_journal(struct gfs2_sbd *sdp, int undo)
367{ 373{
368 struct gfs2_holder ji_gh; 374 struct gfs2_holder ji_gh;
@@ -542,7 +548,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo)
542 } 548 }
543 ip = GFS2_I(sdp->sd_rindex); 549 ip = GFS2_I(sdp->sd_rindex);
544 set_bit(GLF_STICKY, &ip->i_gl->gl_flags); 550 set_bit(GLF_STICKY, &ip->i_gl->gl_flags);
545 sdp->sd_rindex_vn = ip->i_gl->gl_vn - 1; 551 sdp->sd_rindex_uptodate = 0;
546 552
547 /* Read in the quota inode */ 553 /* Read in the quota inode */
548 sdp->sd_quota_inode = gfs2_lookup_simple(sdp->sd_master_dir, "quota"); 554 sdp->sd_quota_inode = gfs2_lookup_simple(sdp->sd_master_dir, "quota");
@@ -705,6 +711,69 @@ fail:
705} 711}
706 712
707/** 713/**
714 * gfs2_lm_mount - mount a locking protocol
715 * @sdp: the filesystem
716 * @args: mount arguements
717 * @silent: if 1, don't complain if the FS isn't a GFS2 fs
718 *
719 * Returns: errno
720 */
721
722static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent)
723{
724 char *proto = sdp->sd_proto_name;
725 char *table = sdp->sd_table_name;
726 int flags = LM_MFLAG_CONV_NODROP;
727 int error;
728
729 if (sdp->sd_args.ar_spectator)
730 flags |= LM_MFLAG_SPECTATOR;
731
732 fs_info(sdp, "Trying to join cluster \"%s\", \"%s\"\n", proto, table);
733
734 error = gfs2_mount_lockproto(proto, table, sdp->sd_args.ar_hostdata,
735 gfs2_glock_cb, sdp,
736 GFS2_MIN_LVB_SIZE, flags,
737 &sdp->sd_lockstruct, &sdp->sd_kobj);
738 if (error) {
739 fs_info(sdp, "can't mount proto=%s, table=%s, hostdata=%s\n",
740 proto, table, sdp->sd_args.ar_hostdata);
741 goto out;
742 }
743
744 if (gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_lockspace) ||
745 gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_ops) ||
746 gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_lvb_size >=
747 GFS2_MIN_LVB_SIZE)) {
748 gfs2_unmount_lockproto(&sdp->sd_lockstruct);
749 goto out;
750 }
751
752 if (sdp->sd_args.ar_spectator)
753 snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.s", table);
754 else
755 snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.%u", table,
756 sdp->sd_lockstruct.ls_jid);
757
758 fs_info(sdp, "Joined cluster. Now mounting FS...\n");
759
760 if ((sdp->sd_lockstruct.ls_flags & LM_LSFLAG_LOCAL) &&
761 !sdp->sd_args.ar_ignore_local_fs) {
762 sdp->sd_args.ar_localflocks = 1;
763 sdp->sd_args.ar_localcaching = 1;
764 }
765
766out:
767 return error;
768}
769
770void gfs2_lm_unmount(struct gfs2_sbd *sdp)
771{
772 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
773 gfs2_unmount_lockproto(&sdp->sd_lockstruct);
774}
775
776/**
708 * fill_super - Read in superblock 777 * fill_super - Read in superblock
709 * @sb: The VFS superblock 778 * @sb: The VFS superblock
710 * @data: Mount options 779 * @data: Mount options
@@ -874,7 +943,6 @@ static struct super_block* get_gfs2_sb(const char *dev_name)
874{ 943{
875 struct kstat stat; 944 struct kstat stat;
876 struct nameidata nd; 945 struct nameidata nd;
877 struct file_system_type *fstype;
878 struct super_block *sb = NULL, *s; 946 struct super_block *sb = NULL, *s;
879 int error; 947 int error;
880 948
@@ -886,8 +954,7 @@ static struct super_block* get_gfs2_sb(const char *dev_name)
886 } 954 }
887 error = vfs_getattr(nd.path.mnt, nd.path.dentry, &stat); 955 error = vfs_getattr(nd.path.mnt, nd.path.dentry, &stat);
888 956
889 fstype = get_fs_type("gfs2"); 957 list_for_each_entry(s, &gfs2_fs_type.fs_supers, s_instances) {
890 list_for_each_entry(s, &fstype->fs_supers, s_instances) {
891 if ((S_ISBLK(stat.mode) && s->s_dev == stat.rdev) || 958 if ((S_ISBLK(stat.mode) && s->s_dev == stat.rdev) ||
892 (S_ISDIR(stat.mode) && 959 (S_ISDIR(stat.mode) &&
893 s == nd.path.dentry->d_inode->i_sb)) { 960 s == nd.path.dentry->d_inode->i_sb)) {
@@ -931,7 +998,6 @@ static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags,
931 error = PTR_ERR(new); 998 error = PTR_ERR(new);
932 goto error; 999 goto error;
933 } 1000 }
934 module_put(fs_type->owner);
935 new->s_flags = flags; 1001 new->s_flags = flags;
936 strlcpy(new->s_id, sb->s_id, sizeof(new->s_id)); 1002 strlcpy(new->s_id, sb->s_id, sizeof(new->s_id));
937 sb_set_blocksize(new, sb->s_blocksize); 1003 sb_set_blocksize(new, sb->s_blocksize);
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index e87412902bed..2686ad4c0029 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -200,15 +200,15 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
200 200
201 if (alloc_required) { 201 if (alloc_required) {
202 struct gfs2_alloc *al = gfs2_alloc_get(dip); 202 struct gfs2_alloc *al = gfs2_alloc_get(dip);
203 if (!al) {
204 error = -ENOMEM;
205 goto out_gunlock;
206 }
203 207
204 error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 208 error = gfs2_quota_lock_check(dip);
205 if (error) 209 if (error)
206 goto out_alloc; 210 goto out_alloc;
207 211
208 error = gfs2_quota_check(dip, dip->i_inode.i_uid, dip->i_inode.i_gid);
209 if (error)
210 goto out_gunlock_q;
211
212 al->al_requested = sdp->sd_max_dirres; 212 al->al_requested = sdp->sd_max_dirres;
213 213
214 error = gfs2_inplace_reserve(dip); 214 error = gfs2_inplace_reserve(dip);
@@ -716,15 +716,15 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
716 716
717 if (alloc_required) { 717 if (alloc_required) {
718 struct gfs2_alloc *al = gfs2_alloc_get(ndip); 718 struct gfs2_alloc *al = gfs2_alloc_get(ndip);
719 if (!al) {
720 error = -ENOMEM;
721 goto out_gunlock;
722 }
719 723
720 error = gfs2_quota_lock(ndip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 724 error = gfs2_quota_lock_check(ndip);
721 if (error) 725 if (error)
722 goto out_alloc; 726 goto out_alloc;
723 727
724 error = gfs2_quota_check(ndip, ndip->i_inode.i_uid, ndip->i_inode.i_gid);
725 if (error)
726 goto out_gunlock_q;
727
728 al->al_requested = sdp->sd_max_dirres; 728 al->al_requested = sdp->sd_max_dirres;
729 729
730 error = gfs2_inplace_reserve(ndip); 730 error = gfs2_inplace_reserve(ndip);
@@ -898,7 +898,7 @@ static int gfs2_permission(struct inode *inode, int mask, struct nameidata *nd)
898 int error; 898 int error;
899 int unlock = 0; 899 int unlock = 0;
900 900
901 if (gfs2_glock_is_locked_by_me(ip->i_gl) == 0) { 901 if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
902 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); 902 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
903 if (error) 903 if (error)
904 return error; 904 return error;
@@ -953,7 +953,8 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
953 if (!(attr->ia_valid & ATTR_GID) || ogid == ngid) 953 if (!(attr->ia_valid & ATTR_GID) || ogid == ngid)
954 ogid = ngid = NO_QUOTA_CHANGE; 954 ogid = ngid = NO_QUOTA_CHANGE;
955 955
956 gfs2_alloc_get(ip); 956 if (!gfs2_alloc_get(ip))
957 return -ENOMEM;
957 958
958 error = gfs2_quota_lock(ip, nuid, ngid); 959 error = gfs2_quota_lock(ip, nuid, ngid);
959 if (error) 960 if (error)
@@ -981,8 +982,9 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
981 brelse(dibh); 982 brelse(dibh);
982 983
983 if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) { 984 if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
984 gfs2_quota_change(ip, -ip->i_di.di_blocks, ouid, ogid); 985 u64 blocks = gfs2_get_inode_blocks(&ip->i_inode);
985 gfs2_quota_change(ip, ip->i_di.di_blocks, nuid, ngid); 986 gfs2_quota_change(ip, -blocks, ouid, ogid);
987 gfs2_quota_change(ip, blocks, nuid, ngid);
986 } 988 }
987 989
988out_end_trans: 990out_end_trans:
@@ -1064,7 +1066,7 @@ static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
1064 int error; 1066 int error;
1065 int unlock = 0; 1067 int unlock = 0;
1066 1068
1067 if (gfs2_glock_is_locked_by_me(ip->i_gl) == 0) { 1069 if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
1068 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh); 1070 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
1069 if (error) 1071 if (error)
1070 return error; 1072 return error;
@@ -1148,16 +1150,6 @@ const struct inode_operations gfs2_file_iops = {
1148 .removexattr = gfs2_removexattr, 1150 .removexattr = gfs2_removexattr,
1149}; 1151};
1150 1152
1151const struct inode_operations gfs2_dev_iops = {
1152 .permission = gfs2_permission,
1153 .setattr = gfs2_setattr,
1154 .getattr = gfs2_getattr,
1155 .setxattr = gfs2_setxattr,
1156 .getxattr = gfs2_getxattr,
1157 .listxattr = gfs2_listxattr,
1158 .removexattr = gfs2_removexattr,
1159};
1160
1161const struct inode_operations gfs2_dir_iops = { 1153const struct inode_operations gfs2_dir_iops = {
1162 .create = gfs2_create, 1154 .create = gfs2_create,
1163 .lookup = gfs2_lookup, 1155 .lookup = gfs2_lookup,
diff --git a/fs/gfs2/ops_inode.h b/fs/gfs2/ops_inode.h
index fd8cee231e1d..14b4b797622a 100644
--- a/fs/gfs2/ops_inode.h
+++ b/fs/gfs2/ops_inode.h
@@ -15,7 +15,6 @@
15extern const struct inode_operations gfs2_file_iops; 15extern const struct inode_operations gfs2_file_iops;
16extern const struct inode_operations gfs2_dir_iops; 16extern const struct inode_operations gfs2_dir_iops;
17extern const struct inode_operations gfs2_symlink_iops; 17extern const struct inode_operations gfs2_symlink_iops;
18extern const struct inode_operations gfs2_dev_iops;
19extern const struct file_operations gfs2_file_fops; 18extern const struct file_operations gfs2_file_fops;
20extern const struct file_operations gfs2_dir_fops; 19extern const struct file_operations gfs2_dir_fops;
21extern const struct file_operations gfs2_file_fops_nolock; 20extern const struct file_operations gfs2_file_fops_nolock;
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c
index 5e524217944a..2278c68b7e35 100644
--- a/fs/gfs2/ops_super.c
+++ b/fs/gfs2/ops_super.c
@@ -25,7 +25,6 @@
25#include "incore.h" 25#include "incore.h"
26#include "glock.h" 26#include "glock.h"
27#include "inode.h" 27#include "inode.h"
28#include "lm.h"
29#include "log.h" 28#include "log.h"
30#include "mount.h" 29#include "mount.h"
31#include "ops_super.h" 30#include "ops_super.h"
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index a08dabd6ce90..56aaf915c59a 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -94,7 +94,7 @@ static int qd_alloc(struct gfs2_sbd *sdp, int user, u32 id,
94 struct gfs2_quota_data *qd; 94 struct gfs2_quota_data *qd;
95 int error; 95 int error;
96 96
97 qd = kzalloc(sizeof(struct gfs2_quota_data), GFP_KERNEL); 97 qd = kzalloc(sizeof(struct gfs2_quota_data), GFP_NOFS);
98 if (!qd) 98 if (!qd)
99 return -ENOMEM; 99 return -ENOMEM;
100 100
@@ -616,16 +616,9 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
616 s64 value; 616 s64 value;
617 int err = -EIO; 617 int err = -EIO;
618 618
619 if (gfs2_is_stuffed(ip)) { 619 if (gfs2_is_stuffed(ip))
620 struct gfs2_alloc *al = NULL;
621 al = gfs2_alloc_get(ip);
622 /* just request 1 blk */
623 al->al_requested = 1;
624 gfs2_inplace_reserve(ip);
625 gfs2_unstuff_dinode(ip, NULL); 620 gfs2_unstuff_dinode(ip, NULL);
626 gfs2_inplace_release(ip); 621
627 gfs2_alloc_put(ip);
628 }
629 page = grab_cache_page(mapping, index); 622 page = grab_cache_page(mapping, index);
630 if (!page) 623 if (!page)
631 return -ENOMEM; 624 return -ENOMEM;
@@ -690,14 +683,14 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
690 unsigned int qx, x; 683 unsigned int qx, x;
691 struct gfs2_quota_data *qd; 684 struct gfs2_quota_data *qd;
692 loff_t offset; 685 loff_t offset;
693 unsigned int nalloc = 0; 686 unsigned int nalloc = 0, blocks;
694 struct gfs2_alloc *al = NULL; 687 struct gfs2_alloc *al = NULL;
695 int error; 688 int error;
696 689
697 gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota), 690 gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota),
698 &data_blocks, &ind_blocks); 691 &data_blocks, &ind_blocks);
699 692
700 ghs = kcalloc(num_qd, sizeof(struct gfs2_holder), GFP_KERNEL); 693 ghs = kcalloc(num_qd, sizeof(struct gfs2_holder), GFP_NOFS);
701 if (!ghs) 694 if (!ghs)
702 return -ENOMEM; 695 return -ENOMEM;
703 696
@@ -727,30 +720,33 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
727 nalloc++; 720 nalloc++;
728 } 721 }
729 722
730 if (nalloc) { 723 al = gfs2_alloc_get(ip);
731 al = gfs2_alloc_get(ip); 724 if (!al) {
725 error = -ENOMEM;
726 goto out_gunlock;
727 }
728 /*
729 * 1 blk for unstuffing inode if stuffed. We add this extra
730 * block to the reservation unconditionally. If the inode
731 * doesn't need unstuffing, the block will be released to the
732 * rgrp since it won't be allocated during the transaction
733 */
734 al->al_requested = 1;
735 /* +1 in the end for block requested above for unstuffing */
736 blocks = num_qd * data_blocks + RES_DINODE + num_qd + 1;
732 737
733 al->al_requested = nalloc * (data_blocks + ind_blocks); 738 if (nalloc)
739 al->al_requested += nalloc * (data_blocks + ind_blocks);
740 error = gfs2_inplace_reserve(ip);
741 if (error)
742 goto out_alloc;
734 743
735 error = gfs2_inplace_reserve(ip); 744 if (nalloc)
736 if (error) 745 blocks += al->al_rgd->rd_length + nalloc * ind_blocks + RES_STATFS;
737 goto out_alloc; 746
738 747 error = gfs2_trans_begin(sdp, blocks, 0);
739 error = gfs2_trans_begin(sdp, 748 if (error)
740 al->al_rgd->rd_length + 749 goto out_ipres;
741 num_qd * data_blocks +
742 nalloc * ind_blocks +
743 RES_DINODE + num_qd +
744 RES_STATFS, 0);
745 if (error)
746 goto out_ipres;
747 } else {
748 error = gfs2_trans_begin(sdp,
749 num_qd * data_blocks +
750 RES_DINODE + num_qd, 0);
751 if (error)
752 goto out_gunlock;
753 }
754 750
755 for (x = 0; x < num_qd; x++) { 751 for (x = 0; x < num_qd; x++) {
756 qd = qda[x]; 752 qd = qda[x];
@@ -769,11 +765,9 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
769out_end_trans: 765out_end_trans:
770 gfs2_trans_end(sdp); 766 gfs2_trans_end(sdp);
771out_ipres: 767out_ipres:
772 if (nalloc) 768 gfs2_inplace_release(ip);
773 gfs2_inplace_release(ip);
774out_alloc: 769out_alloc:
775 if (nalloc) 770 gfs2_alloc_put(ip);
776 gfs2_alloc_put(ip);
777out_gunlock: 771out_gunlock:
778 gfs2_glock_dq_uninit(&i_gh); 772 gfs2_glock_dq_uninit(&i_gh);
779out: 773out:
@@ -1124,12 +1118,12 @@ int gfs2_quota_init(struct gfs2_sbd *sdp)
1124 error = -ENOMEM; 1118 error = -ENOMEM;
1125 1119
1126 sdp->sd_quota_bitmap = kcalloc(sdp->sd_quota_chunks, 1120 sdp->sd_quota_bitmap = kcalloc(sdp->sd_quota_chunks,
1127 sizeof(unsigned char *), GFP_KERNEL); 1121 sizeof(unsigned char *), GFP_NOFS);
1128 if (!sdp->sd_quota_bitmap) 1122 if (!sdp->sd_quota_bitmap)
1129 return error; 1123 return error;
1130 1124
1131 for (x = 0; x < sdp->sd_quota_chunks; x++) { 1125 for (x = 0; x < sdp->sd_quota_chunks; x++) {
1132 sdp->sd_quota_bitmap[x] = kzalloc(PAGE_SIZE, GFP_KERNEL); 1126 sdp->sd_quota_bitmap[x] = kzalloc(PAGE_SIZE, GFP_NOFS);
1133 if (!sdp->sd_quota_bitmap[x]) 1127 if (!sdp->sd_quota_bitmap[x])
1134 goto fail; 1128 goto fail;
1135 } 1129 }
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index a8be1417051f..3b7f4b0e5dfe 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -32,4 +32,21 @@ int gfs2_quota_init(struct gfs2_sbd *sdp);
32void gfs2_quota_scan(struct gfs2_sbd *sdp); 32void gfs2_quota_scan(struct gfs2_sbd *sdp);
33void gfs2_quota_cleanup(struct gfs2_sbd *sdp); 33void gfs2_quota_cleanup(struct gfs2_sbd *sdp);
34 34
35static inline int gfs2_quota_lock_check(struct gfs2_inode *ip)
36{
37 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
38 int ret;
39 if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
40 return 0;
41 ret = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
42 if (ret)
43 return ret;
44 if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
45 return 0;
46 ret = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid);
47 if (ret)
48 gfs2_quota_unlock(ip);
49 return ret;
50}
51
35#endif /* __QUOTA_DOT_H__ */ 52#endif /* __QUOTA_DOT_H__ */
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index 6fb07d67ca8a..2888e4b4b1c5 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -20,7 +20,6 @@
20#include "bmap.h" 20#include "bmap.h"
21#include "glock.h" 21#include "glock.h"
22#include "glops.h" 22#include "glops.h"
23#include "lm.h"
24#include "lops.h" 23#include "lops.h"
25#include "meta_io.h" 24#include "meta_io.h"
26#include "recovery.h" 25#include "recovery.h"
@@ -69,7 +68,7 @@ int gfs2_revoke_add(struct gfs2_sbd *sdp, u64 blkno, unsigned int where)
69 return 0; 68 return 0;
70 } 69 }
71 70
72 rr = kmalloc(sizeof(struct gfs2_revoke_replay), GFP_KERNEL); 71 rr = kmalloc(sizeof(struct gfs2_revoke_replay), GFP_NOFS);
73 if (!rr) 72 if (!rr)
74 return -ENOMEM; 73 return -ENOMEM;
75 74
@@ -150,7 +149,7 @@ static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
150 struct gfs2_log_header_host *head) 149 struct gfs2_log_header_host *head)
151{ 150{
152 struct buffer_head *bh; 151 struct buffer_head *bh;
153 struct gfs2_log_header_host lh; 152 struct gfs2_log_header_host uninitialized_var(lh);
154 const u32 nothing = 0; 153 const u32 nothing = 0;
155 u32 hash; 154 u32 hash;
156 int error; 155 int error;
@@ -425,6 +424,16 @@ static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header_host *hea
425 return error; 424 return error;
426} 425}
427 426
427
428static void gfs2_lm_recovery_done(struct gfs2_sbd *sdp, unsigned int jid,
429 unsigned int message)
430{
431 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
432 sdp->sd_lockstruct.ls_ops->lm_recovery_done(
433 sdp->sd_lockstruct.ls_lockspace, jid, message);
434}
435
436
428/** 437/**
429 * gfs2_recover_journal - recovery a given journal 438 * gfs2_recover_journal - recovery a given journal
430 * @jd: the struct gfs2_jdesc describing the journal 439 * @jd: the struct gfs2_jdesc describing the journal
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 3552110b2e5f..7e8f0b1d6c6e 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. 3 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
4 * 4 *
5 * This copyrighted material is made available to anyone wishing to use, 5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions 6 * modify, copy, or redistribute it subject to the terms and conditions
@@ -14,6 +14,7 @@
14#include <linux/fs.h> 14#include <linux/fs.h>
15#include <linux/gfs2_ondisk.h> 15#include <linux/gfs2_ondisk.h>
16#include <linux/lm_interface.h> 16#include <linux/lm_interface.h>
17#include <linux/prefetch.h>
17 18
18#include "gfs2.h" 19#include "gfs2.h"
19#include "incore.h" 20#include "incore.h"
@@ -33,6 +34,16 @@
33#define BFITNOENT ((u32)~0) 34#define BFITNOENT ((u32)~0)
34#define NO_BLOCK ((u64)~0) 35#define NO_BLOCK ((u64)~0)
35 36
37#if BITS_PER_LONG == 32
38#define LBITMASK (0x55555555UL)
39#define LBITSKIP55 (0x55555555UL)
40#define LBITSKIP00 (0x00000000UL)
41#else
42#define LBITMASK (0x5555555555555555UL)
43#define LBITSKIP55 (0x5555555555555555UL)
44#define LBITSKIP00 (0x0000000000000000UL)
45#endif
46
36/* 47/*
37 * These routines are used by the resource group routines (rgrp.c) 48 * These routines are used by the resource group routines (rgrp.c)
38 * to keep track of block allocation. Each block is represented by two 49 * to keep track of block allocation. Each block is represented by two
@@ -53,7 +64,8 @@ static const char valid_change[16] = {
53}; 64};
54 65
55static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, 66static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
56 unsigned char old_state, unsigned char new_state); 67 unsigned char old_state, unsigned char new_state,
68 unsigned int *n);
57 69
58/** 70/**
59 * gfs2_setbit - Set a bit in the bitmaps 71 * gfs2_setbit - Set a bit in the bitmaps
@@ -64,26 +76,32 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
64 * 76 *
65 */ 77 */
66 78
67static void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buffer, 79static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf1,
68 unsigned int buflen, u32 block, 80 unsigned char *buf2, unsigned int offset,
69 unsigned char new_state) 81 unsigned int buflen, u32 block,
82 unsigned char new_state)
70{ 83{
71 unsigned char *byte, *end, cur_state; 84 unsigned char *byte1, *byte2, *end, cur_state;
72 unsigned int bit; 85 const unsigned int bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE;
73 86
74 byte = buffer + (block / GFS2_NBBY); 87 byte1 = buf1 + offset + (block / GFS2_NBBY);
75 bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE; 88 end = buf1 + offset + buflen;
76 end = buffer + buflen;
77 89
78 gfs2_assert(rgd->rd_sbd, byte < end); 90 BUG_ON(byte1 >= end);
79 91
80 cur_state = (*byte >> bit) & GFS2_BIT_MASK; 92 cur_state = (*byte1 >> bit) & GFS2_BIT_MASK;
81 93
82 if (valid_change[new_state * 4 + cur_state]) { 94 if (unlikely(!valid_change[new_state * 4 + cur_state])) {
83 *byte ^= cur_state << bit;
84 *byte |= new_state << bit;
85 } else
86 gfs2_consist_rgrpd(rgd); 95 gfs2_consist_rgrpd(rgd);
96 return;
97 }
98 *byte1 ^= (cur_state ^ new_state) << bit;
99
100 if (buf2) {
101 byte2 = buf2 + offset + (block / GFS2_NBBY);
102 cur_state = (*byte2 >> bit) & GFS2_BIT_MASK;
103 *byte2 ^= (cur_state ^ new_state) << bit;
104 }
87} 105}
88 106
89/** 107/**
@@ -94,10 +112,12 @@ static void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buffer,
94 * 112 *
95 */ 113 */
96 114
97static unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd, unsigned char *buffer, 115static inline unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd,
98 unsigned int buflen, u32 block) 116 const unsigned char *buffer,
117 unsigned int buflen, u32 block)
99{ 118{
100 unsigned char *byte, *end, cur_state; 119 const unsigned char *byte, *end;
120 unsigned char cur_state;
101 unsigned int bit; 121 unsigned int bit;
102 122
103 byte = buffer + (block / GFS2_NBBY); 123 byte = buffer + (block / GFS2_NBBY);
@@ -126,47 +146,66 @@ static unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd, unsigned char *buffer,
126 * Return: the block number (bitmap buffer scope) that was found 146 * Return: the block number (bitmap buffer scope) that was found
127 */ 147 */
128 148
129static u32 gfs2_bitfit(unsigned char *buffer, unsigned int buflen, u32 goal, 149static u32 gfs2_bitfit(const u8 *buffer, unsigned int buflen, u32 goal,
130 unsigned char old_state) 150 u8 old_state)
131{ 151{
132 unsigned char *byte; 152 const u8 *byte, *start, *end;
133 u32 blk = goal; 153 int bit, startbit;
134 unsigned int bit, bitlong; 154 u32 g1, g2, misaligned;
135 unsigned long *plong, plong55; 155 unsigned long *plong;
136 156 unsigned long lskipval;
137 byte = buffer + (goal / GFS2_NBBY); 157
138 plong = (unsigned long *)(buffer + (goal / GFS2_NBBY)); 158 lskipval = (old_state & GFS2_BLKST_USED) ? LBITSKIP00 : LBITSKIP55;
139 bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE; 159 g1 = (goal / GFS2_NBBY);
140 bitlong = bit; 160 start = buffer + g1;
141#if BITS_PER_LONG == 32 161 byte = start;
142 plong55 = 0x55555555; 162 end = buffer + buflen;
143#else 163 g2 = ALIGN(g1, sizeof(unsigned long));
144 plong55 = 0x5555555555555555; 164 plong = (unsigned long *)(buffer + g2);
145#endif 165 startbit = bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE;
146 while (byte < buffer + buflen) { 166 misaligned = g2 - g1;
147 167 if (!misaligned)
148 if (bitlong == 0 && old_state == 0 && *plong == plong55) { 168 goto ulong_aligned;
149 plong++; 169/* parse the bitmap a byte at a time */
150 byte += sizeof(unsigned long); 170misaligned:
151 blk += sizeof(unsigned long) * GFS2_NBBY; 171 while (byte < end) {
152 continue; 172 if (((*byte >> bit) & GFS2_BIT_MASK) == old_state) {
173 return goal +
174 (((byte - start) * GFS2_NBBY) +
175 ((bit - startbit) >> 1));
153 } 176 }
154 if (((*byte >> bit) & GFS2_BIT_MASK) == old_state)
155 return blk;
156 bit += GFS2_BIT_SIZE; 177 bit += GFS2_BIT_SIZE;
157 if (bit >= 8) { 178 if (bit >= GFS2_NBBY * GFS2_BIT_SIZE) {
158 bit = 0; 179 bit = 0;
159 byte++; 180 byte++;
181 misaligned--;
182 if (!misaligned) {
183 plong = (unsigned long *)byte;
184 goto ulong_aligned;
185 }
160 } 186 }
161 bitlong += GFS2_BIT_SIZE;
162 if (bitlong >= sizeof(unsigned long) * 8) {
163 bitlong = 0;
164 plong++;
165 }
166
167 blk++;
168 } 187 }
188 return BFITNOENT;
169 189
190/* parse the bitmap a unsigned long at a time */
191ulong_aligned:
192 /* Stop at "end - 1" or else prefetch can go past the end and segfault.
193 We could "if" it but we'd lose some of the performance gained.
194 This way will only slow down searching the very last 4/8 bytes
195 depending on architecture. I've experimented with several ways
196 of writing this section such as using an else before the goto
197 but this one seems to be the fastest. */
198 while ((unsigned char *)plong < end - 1) {
199 prefetch(plong + 1);
200 if (((*plong) & LBITMASK) != lskipval)
201 break;
202 plong++;
203 }
204 if ((unsigned char *)plong < end) {
205 byte = (const u8 *)plong;
206 misaligned += sizeof(unsigned long) - 1;
207 goto misaligned;
208 }
170 return BFITNOENT; 209 return BFITNOENT;
171} 210}
172 211
@@ -179,14 +218,14 @@ static u32 gfs2_bitfit(unsigned char *buffer, unsigned int buflen, u32 goal,
179 * Returns: The number of bits 218 * Returns: The number of bits
180 */ 219 */
181 220
182static u32 gfs2_bitcount(struct gfs2_rgrpd *rgd, unsigned char *buffer, 221static u32 gfs2_bitcount(struct gfs2_rgrpd *rgd, const u8 *buffer,
183 unsigned int buflen, unsigned char state) 222 unsigned int buflen, u8 state)
184{ 223{
185 unsigned char *byte = buffer; 224 const u8 *byte = buffer;
186 unsigned char *end = buffer + buflen; 225 const u8 *end = buffer + buflen;
187 unsigned char state1 = state << 2; 226 const u8 state1 = state << 2;
188 unsigned char state2 = state << 4; 227 const u8 state2 = state << 4;
189 unsigned char state3 = state << 6; 228 const u8 state3 = state << 6;
190 u32 count = 0; 229 u32 count = 0;
191 230
192 for (; byte < end; byte++) { 231 for (; byte < end; byte++) {
@@ -353,7 +392,7 @@ static void clear_rgrpdi(struct gfs2_sbd *sdp)
353 } 392 }
354 393
355 kfree(rgd->rd_bits); 394 kfree(rgd->rd_bits);
356 kfree(rgd); 395 kmem_cache_free(gfs2_rgrpd_cachep, rgd);
357 } 396 }
358} 397}
359 398
@@ -516,7 +555,7 @@ static int read_rindex_entry(struct gfs2_inode *ip,
516 return error; 555 return error;
517 } 556 }
518 557
519 rgd = kzalloc(sizeof(struct gfs2_rgrpd), GFP_NOFS); 558 rgd = kmem_cache_zalloc(gfs2_rgrpd_cachep, GFP_NOFS);
520 error = -ENOMEM; 559 error = -ENOMEM;
521 if (!rgd) 560 if (!rgd)
522 return error; 561 return error;
@@ -539,7 +578,7 @@ static int read_rindex_entry(struct gfs2_inode *ip,
539 return error; 578 return error;
540 579
541 rgd->rd_gl->gl_object = rgd; 580 rgd->rd_gl->gl_object = rgd;
542 rgd->rd_rg_vn = rgd->rd_gl->gl_vn - 1; 581 rgd->rd_flags &= ~GFS2_RDF_UPTODATE;
543 rgd->rd_flags |= GFS2_RDF_CHECK; 582 rgd->rd_flags |= GFS2_RDF_CHECK;
544 return error; 583 return error;
545} 584}
@@ -575,7 +614,7 @@ static int gfs2_ri_update(struct gfs2_inode *ip)
575 } 614 }
576 } 615 }
577 616
578 sdp->sd_rindex_vn = ip->i_gl->gl_vn; 617 sdp->sd_rindex_uptodate = 1;
579 return 0; 618 return 0;
580} 619}
581 620
@@ -609,7 +648,7 @@ static int gfs2_ri_update_special(struct gfs2_inode *ip)
609 } 648 }
610 } 649 }
611 650
612 sdp->sd_rindex_vn = ip->i_gl->gl_vn; 651 sdp->sd_rindex_uptodate = 1;
613 return 0; 652 return 0;
614} 653}
615 654
@@ -642,9 +681,9 @@ int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh)
642 return error; 681 return error;
643 682
644 /* Read new copy from disk if we don't have the latest */ 683 /* Read new copy from disk if we don't have the latest */
645 if (sdp->sd_rindex_vn != gl->gl_vn) { 684 if (!sdp->sd_rindex_uptodate) {
646 mutex_lock(&sdp->sd_rindex_mutex); 685 mutex_lock(&sdp->sd_rindex_mutex);
647 if (sdp->sd_rindex_vn != gl->gl_vn) { 686 if (!sdp->sd_rindex_uptodate) {
648 error = gfs2_ri_update(ip); 687 error = gfs2_ri_update(ip);
649 if (error) 688 if (error)
650 gfs2_glock_dq_uninit(ri_gh); 689 gfs2_glock_dq_uninit(ri_gh);
@@ -655,21 +694,31 @@ int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh)
655 return error; 694 return error;
656} 695}
657 696
658static void gfs2_rgrp_in(struct gfs2_rgrp_host *rg, const void *buf) 697static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf)
659{ 698{
660 const struct gfs2_rgrp *str = buf; 699 const struct gfs2_rgrp *str = buf;
700 struct gfs2_rgrp_host *rg = &rgd->rd_rg;
701 u32 rg_flags;
661 702
662 rg->rg_flags = be32_to_cpu(str->rg_flags); 703 rg_flags = be32_to_cpu(str->rg_flags);
704 if (rg_flags & GFS2_RGF_NOALLOC)
705 rgd->rd_flags |= GFS2_RDF_NOALLOC;
706 else
707 rgd->rd_flags &= ~GFS2_RDF_NOALLOC;
663 rg->rg_free = be32_to_cpu(str->rg_free); 708 rg->rg_free = be32_to_cpu(str->rg_free);
664 rg->rg_dinodes = be32_to_cpu(str->rg_dinodes); 709 rg->rg_dinodes = be32_to_cpu(str->rg_dinodes);
665 rg->rg_igeneration = be64_to_cpu(str->rg_igeneration); 710 rg->rg_igeneration = be64_to_cpu(str->rg_igeneration);
666} 711}
667 712
668static void gfs2_rgrp_out(const struct gfs2_rgrp_host *rg, void *buf) 713static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf)
669{ 714{
670 struct gfs2_rgrp *str = buf; 715 struct gfs2_rgrp *str = buf;
716 struct gfs2_rgrp_host *rg = &rgd->rd_rg;
717 u32 rg_flags = 0;
671 718
672 str->rg_flags = cpu_to_be32(rg->rg_flags); 719 if (rgd->rd_flags & GFS2_RDF_NOALLOC)
720 rg_flags |= GFS2_RGF_NOALLOC;
721 str->rg_flags = cpu_to_be32(rg_flags);
673 str->rg_free = cpu_to_be32(rg->rg_free); 722 str->rg_free = cpu_to_be32(rg->rg_free);
674 str->rg_dinodes = cpu_to_be32(rg->rg_dinodes); 723 str->rg_dinodes = cpu_to_be32(rg->rg_dinodes);
675 str->__pad = cpu_to_be32(0); 724 str->__pad = cpu_to_be32(0);
@@ -726,9 +775,9 @@ int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
726 } 775 }
727 } 776 }
728 777
729 if (rgd->rd_rg_vn != gl->gl_vn) { 778 if (!(rgd->rd_flags & GFS2_RDF_UPTODATE)) {
730 gfs2_rgrp_in(&rgd->rd_rg, (rgd->rd_bits[0].bi_bh)->b_data); 779 gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data);
731 rgd->rd_rg_vn = gl->gl_vn; 780 rgd->rd_flags |= GFS2_RDF_UPTODATE;
732 } 781 }
733 782
734 spin_lock(&sdp->sd_rindex_spin); 783 spin_lock(&sdp->sd_rindex_spin);
@@ -840,7 +889,7 @@ static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al)
840 struct gfs2_sbd *sdp = rgd->rd_sbd; 889 struct gfs2_sbd *sdp = rgd->rd_sbd;
841 int ret = 0; 890 int ret = 0;
842 891
843 if (rgd->rd_rg.rg_flags & GFS2_RGF_NOALLOC) 892 if (rgd->rd_flags & GFS2_RDF_NOALLOC)
844 return 0; 893 return 0;
845 894
846 spin_lock(&sdp->sd_rindex_spin); 895 spin_lock(&sdp->sd_rindex_spin);
@@ -866,13 +915,15 @@ static struct inode *try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked)
866 u32 goal = 0, block; 915 u32 goal = 0, block;
867 u64 no_addr; 916 u64 no_addr;
868 struct gfs2_sbd *sdp = rgd->rd_sbd; 917 struct gfs2_sbd *sdp = rgd->rd_sbd;
918 unsigned int n;
869 919
870 for(;;) { 920 for(;;) {
871 if (goal >= rgd->rd_data) 921 if (goal >= rgd->rd_data)
872 break; 922 break;
873 down_write(&sdp->sd_log_flush_lock); 923 down_write(&sdp->sd_log_flush_lock);
924 n = 1;
874 block = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED, 925 block = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED,
875 GFS2_BLKST_UNLINKED); 926 GFS2_BLKST_UNLINKED, &n);
876 up_write(&sdp->sd_log_flush_lock); 927 up_write(&sdp->sd_log_flush_lock);
877 if (block == BFITNOENT) 928 if (block == BFITNOENT)
878 break; 929 break;
@@ -904,24 +955,20 @@ static struct inode *try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked)
904static struct gfs2_rgrpd *recent_rgrp_first(struct gfs2_sbd *sdp, 955static struct gfs2_rgrpd *recent_rgrp_first(struct gfs2_sbd *sdp,
905 u64 rglast) 956 u64 rglast)
906{ 957{
907 struct gfs2_rgrpd *rgd = NULL; 958 struct gfs2_rgrpd *rgd;
908 959
909 spin_lock(&sdp->sd_rindex_spin); 960 spin_lock(&sdp->sd_rindex_spin);
910 961
911 if (list_empty(&sdp->sd_rindex_recent_list)) 962 if (rglast) {
912 goto out; 963 list_for_each_entry(rgd, &sdp->sd_rindex_recent_list, rd_recent) {
913 964 if (rgrp_contains_block(rgd, rglast))
914 if (!rglast) 965 goto out;
915 goto first; 966 }
916
917 list_for_each_entry(rgd, &sdp->sd_rindex_recent_list, rd_recent) {
918 if (rgd->rd_addr == rglast)
919 goto out;
920 } 967 }
921 968 rgd = NULL;
922first: 969 if (!list_empty(&sdp->sd_rindex_recent_list))
923 rgd = list_entry(sdp->sd_rindex_recent_list.next, struct gfs2_rgrpd, 970 rgd = list_entry(sdp->sd_rindex_recent_list.next,
924 rd_recent); 971 struct gfs2_rgrpd, rd_recent);
925out: 972out:
926 spin_unlock(&sdp->sd_rindex_spin); 973 spin_unlock(&sdp->sd_rindex_spin);
927 return rgd; 974 return rgd;
@@ -1067,7 +1114,7 @@ static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
1067 1114
1068 /* Try recently successful rgrps */ 1115 /* Try recently successful rgrps */
1069 1116
1070 rgd = recent_rgrp_first(sdp, ip->i_last_rg_alloc); 1117 rgd = recent_rgrp_first(sdp, ip->i_goal);
1071 1118
1072 while (rgd) { 1119 while (rgd) {
1073 rg_locked = 0; 1120 rg_locked = 0;
@@ -1151,8 +1198,6 @@ static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
1151 } 1198 }
1152 1199
1153out: 1200out:
1154 ip->i_last_rg_alloc = rgd->rd_addr;
1155
1156 if (begin) { 1201 if (begin) {
1157 recent_rgrp_add(rgd); 1202 recent_rgrp_add(rgd);
1158 rgd = gfs2_rgrpd_get_next(rgd); 1203 rgd = gfs2_rgrpd_get_next(rgd);
@@ -1275,6 +1320,7 @@ unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
1275 * @goal: the goal block within the RG (start here to search for avail block) 1320 * @goal: the goal block within the RG (start here to search for avail block)
1276 * @old_state: GFS2_BLKST_XXX the before-allocation state to find 1321 * @old_state: GFS2_BLKST_XXX the before-allocation state to find
1277 * @new_state: GFS2_BLKST_XXX the after-allocation block state 1322 * @new_state: GFS2_BLKST_XXX the after-allocation block state
1323 * @n: The extent length
1278 * 1324 *
1279 * Walk rgrp's bitmap to find bits that represent a block in @old_state. 1325 * Walk rgrp's bitmap to find bits that represent a block in @old_state.
1280 * Add the found bitmap buffer to the transaction. 1326 * Add the found bitmap buffer to the transaction.
@@ -1290,13 +1336,17 @@ unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
1290 */ 1336 */
1291 1337
1292static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, 1338static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
1293 unsigned char old_state, unsigned char new_state) 1339 unsigned char old_state, unsigned char new_state,
1340 unsigned int *n)
1294{ 1341{
1295 struct gfs2_bitmap *bi = NULL; 1342 struct gfs2_bitmap *bi = NULL;
1296 u32 length = rgd->rd_length; 1343 const u32 length = rgd->rd_length;
1297 u32 blk = 0; 1344 u32 blk = 0;
1298 unsigned int buf, x; 1345 unsigned int buf, x;
1346 const unsigned int elen = *n;
1347 const u8 *buffer;
1299 1348
1349 *n = 0;
1300 /* Find bitmap block that contains bits for goal block */ 1350 /* Find bitmap block that contains bits for goal block */
1301 for (buf = 0; buf < length; buf++) { 1351 for (buf = 0; buf < length; buf++) {
1302 bi = rgd->rd_bits + buf; 1352 bi = rgd->rd_bits + buf;
@@ -1317,12 +1367,11 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
1317 for (x = 0; x <= length; x++) { 1367 for (x = 0; x <= length; x++) {
1318 /* The GFS2_BLKST_UNLINKED state doesn't apply to the clone 1368 /* The GFS2_BLKST_UNLINKED state doesn't apply to the clone
1319 bitmaps, so we must search the originals for that. */ 1369 bitmaps, so we must search the originals for that. */
1370 buffer = bi->bi_bh->b_data + bi->bi_offset;
1320 if (old_state != GFS2_BLKST_UNLINKED && bi->bi_clone) 1371 if (old_state != GFS2_BLKST_UNLINKED && bi->bi_clone)
1321 blk = gfs2_bitfit(bi->bi_clone + bi->bi_offset, 1372 buffer = bi->bi_clone + bi->bi_offset;
1322 bi->bi_len, goal, old_state); 1373
1323 else 1374 blk = gfs2_bitfit(buffer, bi->bi_len, goal, old_state);
1324 blk = gfs2_bitfit(bi->bi_bh->b_data + bi->bi_offset,
1325 bi->bi_len, goal, old_state);
1326 if (blk != BFITNOENT) 1375 if (blk != BFITNOENT)
1327 break; 1376 break;
1328 1377
@@ -1333,12 +1382,23 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
1333 } 1382 }
1334 1383
1335 if (blk != BFITNOENT && old_state != new_state) { 1384 if (blk != BFITNOENT && old_state != new_state) {
1385 *n = 1;
1336 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); 1386 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
1337 gfs2_setbit(rgd, bi->bi_bh->b_data + bi->bi_offset, 1387 gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset,
1338 bi->bi_len, blk, new_state); 1388 bi->bi_len, blk, new_state);
1339 if (bi->bi_clone) 1389 goal = blk;
1340 gfs2_setbit(rgd, bi->bi_clone + bi->bi_offset, 1390 while (*n < elen) {
1341 bi->bi_len, blk, new_state); 1391 goal++;
1392 if (goal >= (bi->bi_len * GFS2_NBBY))
1393 break;
1394 if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) !=
1395 GFS2_BLKST_FREE)
1396 break;
1397 gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone,
1398 bi->bi_offset, bi->bi_len, goal,
1399 new_state);
1400 (*n)++;
1401 }
1342 } 1402 }
1343 1403
1344 return (blk == BFITNOENT) ? blk : (bi->bi_start * GFS2_NBBY) + blk; 1404 return (blk == BFITNOENT) ? blk : (bi->bi_start * GFS2_NBBY) + blk;
@@ -1393,7 +1453,7 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
1393 bi->bi_len); 1453 bi->bi_len);
1394 } 1454 }
1395 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); 1455 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
1396 gfs2_setbit(rgd, bi->bi_bh->b_data + bi->bi_offset, 1456 gfs2_setbit(rgd, bi->bi_bh->b_data, NULL, bi->bi_offset,
1397 bi->bi_len, buf_blk, new_state); 1457 bi->bi_len, buf_blk, new_state);
1398 } 1458 }
1399 1459
@@ -1401,13 +1461,13 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
1401} 1461}
1402 1462
1403/** 1463/**
1404 * gfs2_alloc_data - Allocate a data block 1464 * gfs2_alloc_block - Allocate a block
1405 * @ip: the inode to allocate the data block for 1465 * @ip: the inode to allocate the block for
1406 * 1466 *
1407 * Returns: the allocated block 1467 * Returns: the allocated block
1408 */ 1468 */
1409 1469
1410u64 gfs2_alloc_data(struct gfs2_inode *ip) 1470u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n)
1411{ 1471{
1412 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1472 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1413 struct gfs2_alloc *al = ip->i_alloc; 1473 struct gfs2_alloc *al = ip->i_alloc;
@@ -1415,77 +1475,31 @@ u64 gfs2_alloc_data(struct gfs2_inode *ip)
1415 u32 goal, blk; 1475 u32 goal, blk;
1416 u64 block; 1476 u64 block;
1417 1477
1418 if (rgrp_contains_block(rgd, ip->i_di.di_goal_data)) 1478 if (rgrp_contains_block(rgd, ip->i_goal))
1419 goal = ip->i_di.di_goal_data - rgd->rd_data0; 1479 goal = ip->i_goal - rgd->rd_data0;
1420 else 1480 else
1421 goal = rgd->rd_last_alloc_data; 1481 goal = rgd->rd_last_alloc;
1422 1482
1423 blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED); 1483 blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED, n);
1424 BUG_ON(blk == BFITNOENT); 1484 BUG_ON(blk == BFITNOENT);
1425 rgd->rd_last_alloc_data = blk;
1426 1485
1486 rgd->rd_last_alloc = blk;
1427 block = rgd->rd_data0 + blk; 1487 block = rgd->rd_data0 + blk;
1428 ip->i_di.di_goal_data = block; 1488 ip->i_goal = block;
1429 1489
1430 gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free); 1490 gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free >= *n);
1431 rgd->rd_rg.rg_free--; 1491 rgd->rd_rg.rg_free -= *n;
1432 1492
1433 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 1493 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1434 gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data); 1494 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
1435 1495
1436 al->al_alloced++; 1496 al->al_alloced += *n;
1437 1497
1438 gfs2_statfs_change(sdp, 0, -1, 0); 1498 gfs2_statfs_change(sdp, 0, -*n, 0);
1439 gfs2_quota_change(ip, +1, ip->i_inode.i_uid, ip->i_inode.i_gid); 1499 gfs2_quota_change(ip, *n, ip->i_inode.i_uid, ip->i_inode.i_gid);
1440 1500
1441 spin_lock(&sdp->sd_rindex_spin); 1501 spin_lock(&sdp->sd_rindex_spin);
1442 rgd->rd_free_clone--; 1502 rgd->rd_free_clone -= *n;
1443 spin_unlock(&sdp->sd_rindex_spin);
1444
1445 return block;
1446}
1447
1448/**
1449 * gfs2_alloc_meta - Allocate a metadata block
1450 * @ip: the inode to allocate the metadata block for
1451 *
1452 * Returns: the allocated block
1453 */
1454
1455u64 gfs2_alloc_meta(struct gfs2_inode *ip)
1456{
1457 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1458 struct gfs2_alloc *al = ip->i_alloc;
1459 struct gfs2_rgrpd *rgd = al->al_rgd;
1460 u32 goal, blk;
1461 u64 block;
1462
1463 if (rgrp_contains_block(rgd, ip->i_di.di_goal_meta))
1464 goal = ip->i_di.di_goal_meta - rgd->rd_data0;
1465 else
1466 goal = rgd->rd_last_alloc_meta;
1467
1468 blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED);
1469 BUG_ON(blk == BFITNOENT);
1470 rgd->rd_last_alloc_meta = blk;
1471
1472 block = rgd->rd_data0 + blk;
1473 ip->i_di.di_goal_meta = block;
1474
1475 gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free);
1476 rgd->rd_rg.rg_free--;
1477
1478 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1479 gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data);
1480
1481 al->al_alloced++;
1482
1483 gfs2_statfs_change(sdp, 0, -1, 0);
1484 gfs2_quota_change(ip, +1, ip->i_inode.i_uid, ip->i_inode.i_gid);
1485 gfs2_trans_add_unrevoke(sdp, block);
1486
1487 spin_lock(&sdp->sd_rindex_spin);
1488 rgd->rd_free_clone--;
1489 spin_unlock(&sdp->sd_rindex_spin); 1503 spin_unlock(&sdp->sd_rindex_spin);
1490 1504
1491 return block; 1505 return block;
@@ -1505,12 +1519,13 @@ u64 gfs2_alloc_di(struct gfs2_inode *dip, u64 *generation)
1505 struct gfs2_rgrpd *rgd = al->al_rgd; 1519 struct gfs2_rgrpd *rgd = al->al_rgd;
1506 u32 blk; 1520 u32 blk;
1507 u64 block; 1521 u64 block;
1522 unsigned int n = 1;
1508 1523
1509 blk = rgblk_search(rgd, rgd->rd_last_alloc_meta, 1524 blk = rgblk_search(rgd, rgd->rd_last_alloc,
1510 GFS2_BLKST_FREE, GFS2_BLKST_DINODE); 1525 GFS2_BLKST_FREE, GFS2_BLKST_DINODE, &n);
1511 BUG_ON(blk == BFITNOENT); 1526 BUG_ON(blk == BFITNOENT);
1512 1527
1513 rgd->rd_last_alloc_meta = blk; 1528 rgd->rd_last_alloc = blk;
1514 1529
1515 block = rgd->rd_data0 + blk; 1530 block = rgd->rd_data0 + blk;
1516 1531
@@ -1519,12 +1534,12 @@ u64 gfs2_alloc_di(struct gfs2_inode *dip, u64 *generation)
1519 rgd->rd_rg.rg_dinodes++; 1534 rgd->rd_rg.rg_dinodes++;
1520 *generation = rgd->rd_rg.rg_igeneration++; 1535 *generation = rgd->rd_rg.rg_igeneration++;
1521 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 1536 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1522 gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data); 1537 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
1523 1538
1524 al->al_alloced++; 1539 al->al_alloced++;
1525 1540
1526 gfs2_statfs_change(sdp, 0, -1, +1); 1541 gfs2_statfs_change(sdp, 0, -1, +1);
1527 gfs2_trans_add_unrevoke(sdp, block); 1542 gfs2_trans_add_unrevoke(sdp, block, 1);
1528 1543
1529 spin_lock(&sdp->sd_rindex_spin); 1544 spin_lock(&sdp->sd_rindex_spin);
1530 rgd->rd_free_clone--; 1545 rgd->rd_free_clone--;
@@ -1553,7 +1568,7 @@ void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
1553 rgd->rd_rg.rg_free += blen; 1568 rgd->rd_rg.rg_free += blen;
1554 1569
1555 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 1570 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1556 gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data); 1571 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
1557 1572
1558 gfs2_trans_add_rg(rgd); 1573 gfs2_trans_add_rg(rgd);
1559 1574
@@ -1581,7 +1596,7 @@ void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
1581 rgd->rd_rg.rg_free += blen; 1596 rgd->rd_rg.rg_free += blen;
1582 1597
1583 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 1598 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1584 gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data); 1599 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
1585 1600
1586 gfs2_trans_add_rg(rgd); 1601 gfs2_trans_add_rg(rgd);
1587 1602
@@ -1601,7 +1616,7 @@ void gfs2_unlink_di(struct inode *inode)
1601 if (!rgd) 1616 if (!rgd)
1602 return; 1617 return;
1603 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 1618 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1604 gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data); 1619 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
1605 gfs2_trans_add_rg(rgd); 1620 gfs2_trans_add_rg(rgd);
1606} 1621}
1607 1622
@@ -1621,7 +1636,7 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
1621 rgd->rd_rg.rg_free++; 1636 rgd->rd_rg.rg_free++;
1622 1637
1623 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 1638 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1624 gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data); 1639 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
1625 1640
1626 gfs2_statfs_change(sdp, 0, +1, -1); 1641 gfs2_statfs_change(sdp, 0, +1, -1);
1627 gfs2_trans_add_rg(rgd); 1642 gfs2_trans_add_rg(rgd);
@@ -1699,8 +1714,7 @@ void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist,
1699 * 1714 *
1700 */ 1715 */
1701 1716
1702void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state, 1717void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state)
1703 int flags)
1704{ 1718{
1705 unsigned int x; 1719 unsigned int x;
1706 1720
@@ -1708,7 +1722,7 @@ void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state,
1708 GFP_NOFS | __GFP_NOFAIL); 1722 GFP_NOFS | __GFP_NOFAIL);
1709 for (x = 0; x < rlist->rl_rgrps; x++) 1723 for (x = 0; x < rlist->rl_rgrps; x++)
1710 gfs2_holder_init(rlist->rl_rgd[x]->rd_gl, 1724 gfs2_holder_init(rlist->rl_rgd[x]->rd_gl,
1711 state, flags, 1725 state, 0,
1712 &rlist->rl_ghs[x]); 1726 &rlist->rl_ghs[x]);
1713} 1727}
1714 1728
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index 149bb161f4b6..3181c7e624bf 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. 3 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
4 * 4 *
5 * This copyrighted material is made available to anyone wishing to use, 5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions 6 * modify, copy, or redistribute it subject to the terms and conditions
@@ -46,8 +46,7 @@ void gfs2_inplace_release(struct gfs2_inode *ip);
46 46
47unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block); 47unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block);
48 48
49u64 gfs2_alloc_data(struct gfs2_inode *ip); 49u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n);
50u64 gfs2_alloc_meta(struct gfs2_inode *ip);
51u64 gfs2_alloc_di(struct gfs2_inode *ip, u64 *generation); 50u64 gfs2_alloc_di(struct gfs2_inode *ip, u64 *generation);
52 51
53void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen); 52void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen);
@@ -64,8 +63,7 @@ struct gfs2_rgrp_list {
64 63
65void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist, 64void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist,
66 u64 block); 65 u64 block);
67void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state, 66void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state);
68 int flags);
69void gfs2_rlist_free(struct gfs2_rgrp_list *rlist); 67void gfs2_rlist_free(struct gfs2_rgrp_list *rlist);
70u64 gfs2_ri_total(struct gfs2_sbd *sdp); 68u64 gfs2_ri_total(struct gfs2_sbd *sdp);
71 69
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index ef0562c3bc71..7aeacbc65f35 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -210,7 +210,7 @@ int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector)
210 struct page *page; 210 struct page *page;
211 struct bio *bio; 211 struct bio *bio;
212 212
213 page = alloc_page(GFP_KERNEL); 213 page = alloc_page(GFP_NOFS);
214 if (unlikely(!page)) 214 if (unlikely(!page))
215 return -ENOBUFS; 215 return -ENOBUFS;
216 216
@@ -218,7 +218,7 @@ int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector)
218 ClearPageDirty(page); 218 ClearPageDirty(page);
219 lock_page(page); 219 lock_page(page);
220 220
221 bio = bio_alloc(GFP_KERNEL, 1); 221 bio = bio_alloc(GFP_NOFS, 1);
222 if (unlikely(!bio)) { 222 if (unlikely(!bio)) {
223 __free_page(page); 223 __free_page(page);
224 return -ENOBUFS; 224 return -ENOBUFS;
@@ -316,6 +316,7 @@ int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent)
316 sdp->sd_heightsize[x] = space; 316 sdp->sd_heightsize[x] = space;
317 } 317 }
318 sdp->sd_max_height = x; 318 sdp->sd_max_height = x;
319 sdp->sd_heightsize[x] = ~0;
319 gfs2_assert(sdp, sdp->sd_max_height <= GFS2_MAX_META_HEIGHT); 320 gfs2_assert(sdp, sdp->sd_max_height <= GFS2_MAX_META_HEIGHT);
320 321
321 sdp->sd_jheightsize[0] = sdp->sd_sb.sb_bsize - 322 sdp->sd_jheightsize[0] = sdp->sd_sb.sb_bsize -
@@ -334,6 +335,7 @@ int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent)
334 sdp->sd_jheightsize[x] = space; 335 sdp->sd_jheightsize[x] = space;
335 } 336 }
336 sdp->sd_max_jheight = x; 337 sdp->sd_max_jheight = x;
338 sdp->sd_jheightsize[x] = ~0;
337 gfs2_assert(sdp, sdp->sd_max_jheight <= GFS2_MAX_META_HEIGHT); 339 gfs2_assert(sdp, sdp->sd_max_jheight <= GFS2_MAX_META_HEIGHT);
338 340
339 return 0; 341 return 0;
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index 60a870e430be..44361ecc44f7 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -17,6 +17,7 @@ void gfs2_tune_init(struct gfs2_tune *gt);
17int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent); 17int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent);
18int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent); 18int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent);
19int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector); 19int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector);
20void gfs2_lm_unmount(struct gfs2_sbd *sdp);
20 21
21static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp) 22static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp)
22{ 23{
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index eaa3b7b2f99e..9ab9fc85ecd0 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -20,7 +20,6 @@
20 20
21#include "gfs2.h" 21#include "gfs2.h"
22#include "incore.h" 22#include "incore.h"
23#include "lm.h"
24#include "sys.h" 23#include "sys.h"
25#include "super.h" 24#include "super.h"
26#include "glock.h" 25#include "glock.h"
@@ -328,15 +327,9 @@ static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \
328} \ 327} \
329static struct counters_attr counters_attr_##name = __ATTR_RO(name) 328static struct counters_attr counters_attr_##name = __ATTR_RO(name)
330 329
331COUNTERS_ATTR(glock_count, "%u\n");
332COUNTERS_ATTR(glock_held_count, "%u\n");
333COUNTERS_ATTR(inode_count, "%u\n");
334COUNTERS_ATTR(reclaimed, "%u\n"); 330COUNTERS_ATTR(reclaimed, "%u\n");
335 331
336static struct attribute *counters_attrs[] = { 332static struct attribute *counters_attrs[] = {
337 &counters_attr_glock_count.attr,
338 &counters_attr_glock_held_count.attr,
339 &counters_attr_inode_count.attr,
340 &counters_attr_reclaimed.attr, 333 &counters_attr_reclaimed.attr,
341 NULL, 334 NULL,
342}; 335};
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index 73e5d92a657c..f677b8a83f0c 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -146,30 +146,25 @@ void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
146 lops_add(sdp, &bd->bd_le); 146 lops_add(sdp, &bd->bd_le);
147} 147}
148 148
149void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno) 149void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len)
150{ 150{
151 struct gfs2_bufdata *bd; 151 struct gfs2_bufdata *bd, *tmp;
152 int found = 0; 152 struct gfs2_trans *tr = current->journal_info;
153 unsigned int n = len;
153 154
154 gfs2_log_lock(sdp); 155 gfs2_log_lock(sdp);
155 156 list_for_each_entry_safe(bd, tmp, &sdp->sd_log_le_revoke, bd_le.le_list) {
156 list_for_each_entry(bd, &sdp->sd_log_le_revoke, bd_le.le_list) { 157 if ((bd->bd_blkno >= blkno) && (bd->bd_blkno < (blkno + len))) {
157 if (bd->bd_blkno == blkno) {
158 list_del_init(&bd->bd_le.le_list); 158 list_del_init(&bd->bd_le.le_list);
159 gfs2_assert_withdraw(sdp, sdp->sd_log_num_revoke); 159 gfs2_assert_withdraw(sdp, sdp->sd_log_num_revoke);
160 sdp->sd_log_num_revoke--; 160 sdp->sd_log_num_revoke--;
161 found = 1; 161 kmem_cache_free(gfs2_bufdata_cachep, bd);
162 break; 162 tr->tr_num_revoke_rm++;
163 if (--n == 0)
164 break;
163 } 165 }
164 } 166 }
165
166 gfs2_log_unlock(sdp); 167 gfs2_log_unlock(sdp);
167
168 if (found) {
169 struct gfs2_trans *tr = current->journal_info;
170 kmem_cache_free(gfs2_bufdata_cachep, bd);
171 tr->tr_num_revoke_rm++;
172 }
173} 168}
174 169
175void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd) 170void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd)
diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h
index e826f0dab80a..edf9d4bd908e 100644
--- a/fs/gfs2/trans.h
+++ b/fs/gfs2/trans.h
@@ -32,7 +32,7 @@ void gfs2_trans_end(struct gfs2_sbd *sdp);
32 32
33void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta); 33void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta);
34void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd); 34void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd);
35void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno); 35void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len);
36void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd); 36void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd);
37 37
38#endif /* __TRANS_DOT_H__ */ 38#endif /* __TRANS_DOT_H__ */
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index 424a0774eda8..d31e355c61fb 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -19,12 +19,12 @@
19#include "gfs2.h" 19#include "gfs2.h"
20#include "incore.h" 20#include "incore.h"
21#include "glock.h" 21#include "glock.h"
22#include "lm.h"
23#include "util.h" 22#include "util.h"
24 23
25struct kmem_cache *gfs2_glock_cachep __read_mostly; 24struct kmem_cache *gfs2_glock_cachep __read_mostly;
26struct kmem_cache *gfs2_inode_cachep __read_mostly; 25struct kmem_cache *gfs2_inode_cachep __read_mostly;
27struct kmem_cache *gfs2_bufdata_cachep __read_mostly; 26struct kmem_cache *gfs2_bufdata_cachep __read_mostly;
27struct kmem_cache *gfs2_rgrpd_cachep __read_mostly;
28 28
29void gfs2_assert_i(struct gfs2_sbd *sdp) 29void gfs2_assert_i(struct gfs2_sbd *sdp)
30{ 30{
@@ -32,6 +32,28 @@ void gfs2_assert_i(struct gfs2_sbd *sdp)
32 sdp->sd_fsname); 32 sdp->sd_fsname);
33} 33}
34 34
35int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...)
36{
37 va_list args;
38
39 if (test_and_set_bit(SDF_SHUTDOWN, &sdp->sd_flags))
40 return 0;
41
42 va_start(args, fmt);
43 vprintk(fmt, args);
44 va_end(args);
45
46 fs_err(sdp, "about to withdraw this file system\n");
47 BUG_ON(sdp->sd_args.ar_debug);
48
49 fs_err(sdp, "telling LM to withdraw\n");
50 gfs2_withdraw_lockproto(&sdp->sd_lockstruct);
51 fs_err(sdp, "withdrawn\n");
52 dump_stack();
53
54 return -1;
55}
56
35/** 57/**
36 * gfs2_assert_withdraw_i - Cause the machine to withdraw if @assertion is false 58 * gfs2_assert_withdraw_i - Cause the machine to withdraw if @assertion is false
37 * Returns: -1 if this call withdrew the machine, 59 * Returns: -1 if this call withdrew the machine,
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h
index 28938a46cf47..509c5d60bd80 100644
--- a/fs/gfs2/util.h
+++ b/fs/gfs2/util.h
@@ -147,6 +147,7 @@ gfs2_io_error_bh_i((sdp), (bh), __FUNCTION__, __FILE__, __LINE__);
147extern struct kmem_cache *gfs2_glock_cachep; 147extern struct kmem_cache *gfs2_glock_cachep;
148extern struct kmem_cache *gfs2_inode_cachep; 148extern struct kmem_cache *gfs2_inode_cachep;
149extern struct kmem_cache *gfs2_bufdata_cachep; 149extern struct kmem_cache *gfs2_bufdata_cachep;
150extern struct kmem_cache *gfs2_rgrpd_cachep;
150 151
151static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt, 152static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt,
152 unsigned int *p) 153 unsigned int *p)
@@ -163,6 +164,7 @@ gfs2_tune_get_i(&(sdp)->sd_tune, &(sdp)->sd_tune.field)
163 164
164void gfs2_icbit_munge(struct gfs2_sbd *sdp, unsigned char **bitmap, 165void gfs2_icbit_munge(struct gfs2_sbd *sdp, unsigned char **bitmap,
165 unsigned int bit, int new_value); 166 unsigned int bit, int new_value);
167int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...);
166 168
167#endif /* __UTIL_DOT_H__ */ 169#endif /* __UTIL_DOT_H__ */
168 170
diff --git a/fs/hfs/brec.c b/fs/hfs/brec.c
index 878bf25dbc6a..92fb358ce824 100644
--- a/fs/hfs/brec.c
+++ b/fs/hfs/brec.c
@@ -229,7 +229,7 @@ skip:
229static struct hfs_bnode *hfs_bnode_split(struct hfs_find_data *fd) 229static struct hfs_bnode *hfs_bnode_split(struct hfs_find_data *fd)
230{ 230{
231 struct hfs_btree *tree; 231 struct hfs_btree *tree;
232 struct hfs_bnode *node, *new_node; 232 struct hfs_bnode *node, *new_node, *next_node;
233 struct hfs_bnode_desc node_desc; 233 struct hfs_bnode_desc node_desc;
234 int num_recs, new_rec_off, new_off, old_rec_off; 234 int num_recs, new_rec_off, new_off, old_rec_off;
235 int data_start, data_end, size; 235 int data_start, data_end, size;
@@ -248,6 +248,17 @@ static struct hfs_bnode *hfs_bnode_split(struct hfs_find_data *fd)
248 new_node->type = node->type; 248 new_node->type = node->type;
249 new_node->height = node->height; 249 new_node->height = node->height;
250 250
251 if (node->next)
252 next_node = hfs_bnode_find(tree, node->next);
253 else
254 next_node = NULL;
255
256 if (IS_ERR(next_node)) {
257 hfs_bnode_put(node);
258 hfs_bnode_put(new_node);
259 return next_node;
260 }
261
251 size = tree->node_size / 2 - node->num_recs * 2 - 14; 262 size = tree->node_size / 2 - node->num_recs * 2 - 14;
252 old_rec_off = tree->node_size - 4; 263 old_rec_off = tree->node_size - 4;
253 num_recs = 1; 264 num_recs = 1;
@@ -261,6 +272,8 @@ static struct hfs_bnode *hfs_bnode_split(struct hfs_find_data *fd)
261 /* panic? */ 272 /* panic? */
262 hfs_bnode_put(node); 273 hfs_bnode_put(node);
263 hfs_bnode_put(new_node); 274 hfs_bnode_put(new_node);
275 if (next_node)
276 hfs_bnode_put(next_node);
264 return ERR_PTR(-ENOSPC); 277 return ERR_PTR(-ENOSPC);
265 } 278 }
266 279
@@ -315,8 +328,7 @@ static struct hfs_bnode *hfs_bnode_split(struct hfs_find_data *fd)
315 hfs_bnode_write(node, &node_desc, 0, sizeof(node_desc)); 328 hfs_bnode_write(node, &node_desc, 0, sizeof(node_desc));
316 329
317 /* update next bnode header */ 330 /* update next bnode header */
318 if (new_node->next) { 331 if (next_node) {
319 struct hfs_bnode *next_node = hfs_bnode_find(tree, new_node->next);
320 next_node->prev = new_node->this; 332 next_node->prev = new_node->this;
321 hfs_bnode_read(next_node, &node_desc, 0, sizeof(node_desc)); 333 hfs_bnode_read(next_node, &node_desc, 0, sizeof(node_desc));
322 node_desc.prev = cpu_to_be32(next_node->prev); 334 node_desc.prev = cpu_to_be32(next_node->prev);
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 29683645fa0a..5f4023678251 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -340,16 +340,23 @@ static int hfsplus_unlink(struct inode *dir, struct dentry *dentry)
340 340
341 if (inode->i_nlink > 0) 341 if (inode->i_nlink > 0)
342 drop_nlink(inode); 342 drop_nlink(inode);
343 hfsplus_delete_inode(inode); 343 if (inode->i_ino == cnid)
344 if (inode->i_ino != cnid && !inode->i_nlink) { 344 clear_nlink(inode);
345 if (!atomic_read(&HFSPLUS_I(inode).opencnt)) { 345 if (!inode->i_nlink) {
346 res = hfsplus_delete_cat(inode->i_ino, HFSPLUS_SB(sb).hidden_dir, NULL); 346 if (inode->i_ino != cnid) {
347 if (!res) 347 HFSPLUS_SB(sb).file_count--;
348 hfsplus_delete_inode(inode); 348 if (!atomic_read(&HFSPLUS_I(inode).opencnt)) {
349 res = hfsplus_delete_cat(inode->i_ino,
350 HFSPLUS_SB(sb).hidden_dir,
351 NULL);
352 if (!res)
353 hfsplus_delete_inode(inode);
354 } else
355 inode->i_flags |= S_DEAD;
349 } else 356 } else
350 inode->i_flags |= S_DEAD; 357 hfsplus_delete_inode(inode);
351 } else 358 } else
352 clear_nlink(inode); 359 HFSPLUS_SB(sb).file_count--;
353 inode->i_ctime = CURRENT_TIME_SEC; 360 inode->i_ctime = CURRENT_TIME_SEC;
354 mark_inode_dirty(inode); 361 mark_inode_dirty(inode);
355 362
diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c
index b60c0affbec5..f457d2ca51ab 100644
--- a/fs/hfsplus/ioctl.c
+++ b/fs/hfsplus/ioctl.c
@@ -14,6 +14,7 @@
14 14
15#include <linux/capability.h> 15#include <linux/capability.h>
16#include <linux/fs.h> 16#include <linux/fs.h>
17#include <linux/mount.h>
17#include <linux/sched.h> 18#include <linux/sched.h>
18#include <linux/xattr.h> 19#include <linux/xattr.h>
19#include <asm/uaccess.h> 20#include <asm/uaccess.h>
@@ -35,25 +36,32 @@ int hfsplus_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
35 flags |= FS_NODUMP_FL; /* EXT2_NODUMP_FL */ 36 flags |= FS_NODUMP_FL; /* EXT2_NODUMP_FL */
36 return put_user(flags, (int __user *)arg); 37 return put_user(flags, (int __user *)arg);
37 case HFSPLUS_IOC_EXT2_SETFLAGS: { 38 case HFSPLUS_IOC_EXT2_SETFLAGS: {
38 if (IS_RDONLY(inode)) 39 int err = 0;
39 return -EROFS; 40 err = mnt_want_write(filp->f_path.mnt);
40 41 if (err)
41 if (!is_owner_or_cap(inode)) 42 return err;
42 return -EACCES; 43
43 44 if (!is_owner_or_cap(inode)) {
44 if (get_user(flags, (int __user *)arg)) 45 err = -EACCES;
45 return -EFAULT; 46 goto setflags_out;
46 47 }
48 if (get_user(flags, (int __user *)arg)) {
49 err = -EFAULT;
50 goto setflags_out;
51 }
47 if (flags & (FS_IMMUTABLE_FL|FS_APPEND_FL) || 52 if (flags & (FS_IMMUTABLE_FL|FS_APPEND_FL) ||
48 HFSPLUS_I(inode).rootflags & (HFSPLUS_FLG_IMMUTABLE|HFSPLUS_FLG_APPEND)) { 53 HFSPLUS_I(inode).rootflags & (HFSPLUS_FLG_IMMUTABLE|HFSPLUS_FLG_APPEND)) {
49 if (!capable(CAP_LINUX_IMMUTABLE)) 54 if (!capable(CAP_LINUX_IMMUTABLE)) {
50 return -EPERM; 55 err = -EPERM;
56 goto setflags_out;
57 }
51 } 58 }
52 59
53 /* don't silently ignore unsupported ext2 flags */ 60 /* don't silently ignore unsupported ext2 flags */
54 if (flags & ~(FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NODUMP_FL)) 61 if (flags & ~(FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NODUMP_FL)) {
55 return -EOPNOTSUPP; 62 err = -EOPNOTSUPP;
56 63 goto setflags_out;
64 }
57 if (flags & FS_IMMUTABLE_FL) { /* EXT2_IMMUTABLE_FL */ 65 if (flags & FS_IMMUTABLE_FL) { /* EXT2_IMMUTABLE_FL */
58 inode->i_flags |= S_IMMUTABLE; 66 inode->i_flags |= S_IMMUTABLE;
59 HFSPLUS_I(inode).rootflags |= HFSPLUS_FLG_IMMUTABLE; 67 HFSPLUS_I(inode).rootflags |= HFSPLUS_FLG_IMMUTABLE;
@@ -75,7 +83,9 @@ int hfsplus_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
75 83
76 inode->i_ctime = CURRENT_TIME_SEC; 84 inode->i_ctime = CURRENT_TIME_SEC;
77 mark_inode_dirty(inode); 85 mark_inode_dirty(inode);
78 return 0; 86setflags_out:
87 mnt_drop_write(filp->f_path.mnt);
88 return err;
79 } 89 }
80 default: 90 default:
81 return -ENOTTY; 91 return -ENOTTY;
diff --git a/fs/hppfs/hppfs_kern.c b/fs/hppfs/hppfs_kern.c
index a1e1f0f61aa5..8601d8ef3b55 100644
--- a/fs/hppfs/hppfs_kern.c
+++ b/fs/hppfs/hppfs_kern.c
@@ -1,23 +1,24 @@
1/* 1/*
2 * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) 2 * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
3 * Licensed under the GPL 3 * Licensed under the GPL
4 */ 4 */
5 5
6#include <linux/fs.h> 6#include <linux/ctype.h>
7#include <linux/dcache.h>
7#include <linux/file.h> 8#include <linux/file.h>
8#include <linux/module.h> 9#include <linux/fs.h>
9#include <linux/init.h> 10#include <linux/init.h>
10#include <linux/slab.h>
11#include <linux/list.h>
12#include <linux/kernel.h> 11#include <linux/kernel.h>
13#include <linux/ctype.h> 12#include <linux/list.h>
14#include <linux/dcache.h> 13#include <linux/module.h>
14#include <linux/mount.h>
15#include <linux/slab.h>
15#include <linux/statfs.h> 16#include <linux/statfs.h>
17#include <linux/types.h>
16#include <asm/uaccess.h> 18#include <asm/uaccess.h>
17#include <asm/fcntl.h>
18#include "os.h" 19#include "os.h"
19 20
20static int init_inode(struct inode *inode, struct dentry *dentry); 21static struct inode *get_inode(struct super_block *, struct dentry *);
21 22
22struct hppfs_data { 23struct hppfs_data {
23 struct list_head list; 24 struct list_head list;
@@ -51,14 +52,14 @@ static int is_pid(struct dentry *dentry)
51 int i; 52 int i;
52 53
53 sb = dentry->d_sb; 54 sb = dentry->d_sb;
54 if((sb->s_op != &hppfs_sbops) || (dentry->d_parent != sb->s_root)) 55 if ((sb->s_op != &hppfs_sbops) || (dentry->d_parent != sb->s_root))
55 return(0); 56 return 0;
56 57
57 for(i = 0; i < dentry->d_name.len; i++){ 58 for (i = 0; i < dentry->d_name.len; i++) {
58 if(!isdigit(dentry->d_name.name[i])) 59 if (!isdigit(dentry->d_name.name[i]))
59 return(0); 60 return 0;
60 } 61 }
61 return(1); 62 return 1;
62} 63}
63 64
64static char *dentry_name(struct dentry *dentry, int extra) 65static char *dentry_name(struct dentry *dentry, int extra)
@@ -70,8 +71,8 @@ static char *dentry_name(struct dentry *dentry, int extra)
70 71
71 len = 0; 72 len = 0;
72 parent = dentry; 73 parent = dentry;
73 while(parent->d_parent != parent){ 74 while (parent->d_parent != parent) {
74 if(is_pid(parent)) 75 if (is_pid(parent))
75 len += strlen("pid") + 1; 76 len += strlen("pid") + 1;
76 else len += parent->d_name.len + 1; 77 else len += parent->d_name.len + 1;
77 parent = parent->d_parent; 78 parent = parent->d_parent;
@@ -80,12 +81,13 @@ static char *dentry_name(struct dentry *dentry, int extra)
80 root = "proc"; 81 root = "proc";
81 len += strlen(root); 82 len += strlen(root);
82 name = kmalloc(len + extra + 1, GFP_KERNEL); 83 name = kmalloc(len + extra + 1, GFP_KERNEL);
83 if(name == NULL) return(NULL); 84 if (name == NULL)
85 return NULL;
84 86
85 name[len] = '\0'; 87 name[len] = '\0';
86 parent = dentry; 88 parent = dentry;
87 while(parent->d_parent != parent){ 89 while (parent->d_parent != parent) {
88 if(is_pid(parent)){ 90 if (is_pid(parent)) {
89 seg_name = "pid"; 91 seg_name = "pid";
90 seg_len = strlen("pid"); 92 seg_len = strlen("pid");
91 } 93 }
@@ -100,27 +102,25 @@ static char *dentry_name(struct dentry *dentry, int extra)
100 parent = parent->d_parent; 102 parent = parent->d_parent;
101 } 103 }
102 strncpy(name, root, strlen(root)); 104 strncpy(name, root, strlen(root));
103 return(name); 105 return name;
104} 106}
105 107
106struct dentry_operations hppfs_dentry_ops = {
107};
108
109static int file_removed(struct dentry *dentry, const char *file) 108static int file_removed(struct dentry *dentry, const char *file)
110{ 109{
111 char *host_file; 110 char *host_file;
112 int extra, fd; 111 int extra, fd;
113 112
114 extra = 0; 113 extra = 0;
115 if(file != NULL) extra += strlen(file) + 1; 114 if (file != NULL)
115 extra += strlen(file) + 1;
116 116
117 host_file = dentry_name(dentry, extra + strlen("/remove")); 117 host_file = dentry_name(dentry, extra + strlen("/remove"));
118 if(host_file == NULL){ 118 if (host_file == NULL) {
119 printk("file_removed : allocation failed\n"); 119 printk(KERN_ERR "file_removed : allocation failed\n");
120 return(-ENOMEM); 120 return -ENOMEM;
121 } 121 }
122 122
123 if(file != NULL){ 123 if (file != NULL) {
124 strcat(host_file, "/"); 124 strcat(host_file, "/");
125 strcat(host_file, file); 125 strcat(host_file, file);
126 } 126 }
@@ -128,45 +128,11 @@ static int file_removed(struct dentry *dentry, const char *file)
128 128
129 fd = os_open_file(host_file, of_read(OPENFLAGS()), 0); 129 fd = os_open_file(host_file, of_read(OPENFLAGS()), 0);
130 kfree(host_file); 130 kfree(host_file);
131 if(fd > 0){ 131 if (fd > 0) {
132 os_close_file(fd); 132 os_close_file(fd);
133 return(1); 133 return 1;
134 }
135 return(0);
136}
137
138static void hppfs_read_inode(struct inode *ino)
139{
140 struct inode *proc_ino;
141
142 if(HPPFS_I(ino)->proc_dentry == NULL)
143 return;
144
145 proc_ino = HPPFS_I(ino)->proc_dentry->d_inode;
146 ino->i_uid = proc_ino->i_uid;
147 ino->i_gid = proc_ino->i_gid;
148 ino->i_atime = proc_ino->i_atime;
149 ino->i_mtime = proc_ino->i_mtime;
150 ino->i_ctime = proc_ino->i_ctime;
151 ino->i_ino = proc_ino->i_ino;
152 ino->i_mode = proc_ino->i_mode;
153 ino->i_nlink = proc_ino->i_nlink;
154 ino->i_size = proc_ino->i_size;
155 ino->i_blocks = proc_ino->i_blocks;
156}
157
158static struct inode *hppfs_iget(struct super_block *sb)
159{
160 struct inode *inode;
161
162 inode = iget_locked(sb, 0);
163 if (!inode)
164 return ERR_PTR(-ENOMEM);
165 if (inode->i_state & I_NEW) {
166 hppfs_read_inode(inode);
167 unlock_new_inode(inode);
168 } 134 }
169 return inode; 135 return 0;
170} 136}
171 137
172static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry, 138static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry,
@@ -177,55 +143,45 @@ static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry,
177 int err, deleted; 143 int err, deleted;
178 144
179 deleted = file_removed(dentry, NULL); 145 deleted = file_removed(dentry, NULL);
180 if(deleted < 0) 146 if (deleted < 0)
181 return(ERR_PTR(deleted)); 147 return ERR_PTR(deleted);
182 else if(deleted) 148 else if (deleted)
183 return(ERR_PTR(-ENOENT)); 149 return ERR_PTR(-ENOENT);
184 150
185 err = -ENOMEM; 151 err = -ENOMEM;
186 parent = HPPFS_I(ino)->proc_dentry; 152 parent = HPPFS_I(ino)->proc_dentry;
187 mutex_lock(&parent->d_inode->i_mutex); 153 mutex_lock(&parent->d_inode->i_mutex);
188 proc_dentry = d_lookup(parent, &dentry->d_name); 154 proc_dentry = d_lookup(parent, &dentry->d_name);
189 if(proc_dentry == NULL){ 155 if (proc_dentry == NULL) {
190 proc_dentry = d_alloc(parent, &dentry->d_name); 156 proc_dentry = d_alloc(parent, &dentry->d_name);
191 if(proc_dentry == NULL){ 157 if (proc_dentry == NULL) {
192 mutex_unlock(&parent->d_inode->i_mutex); 158 mutex_unlock(&parent->d_inode->i_mutex);
193 goto out; 159 goto out;
194 } 160 }
195 new = (*parent->d_inode->i_op->lookup)(parent->d_inode, 161 new = (*parent->d_inode->i_op->lookup)(parent->d_inode,
196 proc_dentry, NULL); 162 proc_dentry, NULL);
197 if(new){ 163 if (new) {
198 dput(proc_dentry); 164 dput(proc_dentry);
199 proc_dentry = new; 165 proc_dentry = new;
200 } 166 }
201 } 167 }
202 mutex_unlock(&parent->d_inode->i_mutex); 168 mutex_unlock(&parent->d_inode->i_mutex);
203 169
204 if(IS_ERR(proc_dentry)) 170 if (IS_ERR(proc_dentry))
205 return(proc_dentry); 171 return proc_dentry;
206 172
207 inode = hppfs_iget(ino->i_sb); 173 err = -ENOMEM;
208 if (IS_ERR(inode)) { 174 inode = get_inode(ino->i_sb, proc_dentry);
209 err = PTR_ERR(inode); 175 if (!inode)
210 goto out_dput; 176 goto out_dput;
211 }
212
213 err = init_inode(inode, proc_dentry);
214 if(err)
215 goto out_put;
216
217 hppfs_read_inode(inode);
218 177
219 d_add(dentry, inode); 178 d_add(dentry, inode);
220 dentry->d_op = &hppfs_dentry_ops; 179 return NULL;
221 return(NULL);
222 180
223 out_put:
224 iput(inode);
225 out_dput: 181 out_dput:
226 dput(proc_dentry); 182 dput(proc_dentry);
227 out: 183 out:
228 return(ERR_PTR(err)); 184 return ERR_PTR(err);
229} 185}
230 186
231static const struct inode_operations hppfs_file_iops = { 187static const struct inode_operations hppfs_file_iops = {
@@ -239,15 +195,16 @@ static ssize_t read_proc(struct file *file, char __user *buf, ssize_t count,
239 195
240 read = file->f_path.dentry->d_inode->i_fop->read; 196 read = file->f_path.dentry->d_inode->i_fop->read;
241 197
242 if(!is_user) 198 if (!is_user)
243 set_fs(KERNEL_DS); 199 set_fs(KERNEL_DS);
244 200
245 n = (*read)(file, buf, count, &file->f_pos); 201 n = (*read)(file, buf, count, &file->f_pos);
246 202
247 if(!is_user) 203 if (!is_user)
248 set_fs(USER_DS); 204 set_fs(USER_DS);
249 205
250 if(ppos) *ppos = file->f_pos; 206 if (ppos)
207 *ppos = file->f_pos;
251 return n; 208 return n;
252} 209}
253 210
@@ -259,24 +216,23 @@ static ssize_t hppfs_read_file(int fd, char __user *buf, ssize_t count)
259 216
260 n = -ENOMEM; 217 n = -ENOMEM;
261 new_buf = kmalloc(PAGE_SIZE, GFP_KERNEL); 218 new_buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
262 if(new_buf == NULL){ 219 if (new_buf == NULL) {
263 printk("hppfs_read_file : kmalloc failed\n"); 220 printk(KERN_ERR "hppfs_read_file : kmalloc failed\n");
264 goto out; 221 goto out;
265 } 222 }
266 n = 0; 223 n = 0;
267 while(count > 0){ 224 while (count > 0) {
268 cur = min_t(ssize_t, count, PAGE_SIZE); 225 cur = min_t(ssize_t, count, PAGE_SIZE);
269 err = os_read_file(fd, new_buf, cur); 226 err = os_read_file(fd, new_buf, cur);
270 if(err < 0){ 227 if (err < 0) {
271 printk("hppfs_read : read failed, errno = %d\n", 228 printk(KERN_ERR "hppfs_read : read failed, "
272 err); 229 "errno = %d\n", err);
273 n = err; 230 n = err;
274 goto out_free; 231 goto out_free;
275 } 232 } else if (err == 0)
276 else if(err == 0)
277 break; 233 break;
278 234
279 if(copy_to_user(buf, new_buf, err)){ 235 if (copy_to_user(buf, new_buf, err)) {
280 n = -EFAULT; 236 n = -EFAULT;
281 goto out_free; 237 goto out_free;
282 } 238 }
@@ -297,35 +253,36 @@ static ssize_t hppfs_read(struct file *file, char __user *buf, size_t count,
297 loff_t off; 253 loff_t off;
298 int err; 254 int err;
299 255
300 if(hppfs->contents != NULL){ 256 if (hppfs->contents != NULL) {
301 if(*ppos >= hppfs->len) return(0); 257 if (*ppos >= hppfs->len)
258 return 0;
302 259
303 data = hppfs->contents; 260 data = hppfs->contents;
304 off = *ppos; 261 off = *ppos;
305 while(off >= sizeof(data->contents)){ 262 while (off >= sizeof(data->contents)) {
306 data = list_entry(data->list.next, struct hppfs_data, 263 data = list_entry(data->list.next, struct hppfs_data,
307 list); 264 list);
308 off -= sizeof(data->contents); 265 off -= sizeof(data->contents);
309 } 266 }
310 267
311 if(off + count > hppfs->len) 268 if (off + count > hppfs->len)
312 count = hppfs->len - off; 269 count = hppfs->len - off;
313 copy_to_user(buf, &data->contents[off], count); 270 copy_to_user(buf, &data->contents[off], count);
314 *ppos += count; 271 *ppos += count;
315 } 272 } else if (hppfs->host_fd != -1) {
316 else if(hppfs->host_fd != -1){
317 err = os_seek_file(hppfs->host_fd, *ppos); 273 err = os_seek_file(hppfs->host_fd, *ppos);
318 if(err){ 274 if (err) {
319 printk("hppfs_read : seek failed, errno = %d\n", err); 275 printk(KERN_ERR "hppfs_read : seek failed, "
320 return(err); 276 "errno = %d\n", err);
277 return err;
321 } 278 }
322 count = hppfs_read_file(hppfs->host_fd, buf, count); 279 count = hppfs_read_file(hppfs->host_fd, buf, count);
323 if(count > 0) 280 if (count > 0)
324 *ppos += count; 281 *ppos += count;
325 } 282 }
326 else count = read_proc(hppfs->proc_file, buf, count, ppos, 1); 283 else count = read_proc(hppfs->proc_file, buf, count, ppos, 1);
327 284
328 return(count); 285 return count;
329} 286}
330 287
331static ssize_t hppfs_write(struct file *file, const char __user *buf, size_t len, 288static ssize_t hppfs_write(struct file *file, const char __user *buf, size_t len,
@@ -342,7 +299,7 @@ static ssize_t hppfs_write(struct file *file, const char __user *buf, size_t len
342 err = (*write)(proc_file, buf, len, &proc_file->f_pos); 299 err = (*write)(proc_file, buf, len, &proc_file->f_pos);
343 file->f_pos = proc_file->f_pos; 300 file->f_pos = proc_file->f_pos;
344 301
345 return(err); 302 return err;
346} 303}
347 304
348static int open_host_sock(char *host_file, int *filter_out) 305static int open_host_sock(char *host_file, int *filter_out)
@@ -354,13 +311,13 @@ static int open_host_sock(char *host_file, int *filter_out)
354 strcpy(end, "/rw"); 311 strcpy(end, "/rw");
355 *filter_out = 1; 312 *filter_out = 1;
356 fd = os_connect_socket(host_file); 313 fd = os_connect_socket(host_file);
357 if(fd > 0) 314 if (fd > 0)
358 return(fd); 315 return fd;
359 316
360 strcpy(end, "/r"); 317 strcpy(end, "/r");
361 *filter_out = 0; 318 *filter_out = 0;
362 fd = os_connect_socket(host_file); 319 fd = os_connect_socket(host_file);
363 return(fd); 320 return fd;
364} 321}
365 322
366static void free_contents(struct hppfs_data *head) 323static void free_contents(struct hppfs_data *head)
@@ -368,9 +325,10 @@ static void free_contents(struct hppfs_data *head)
368 struct hppfs_data *data; 325 struct hppfs_data *data;
369 struct list_head *ele, *next; 326 struct list_head *ele, *next;
370 327
371 if(head == NULL) return; 328 if (head == NULL)
329 return;
372 330
373 list_for_each_safe(ele, next, &head->list){ 331 list_for_each_safe(ele, next, &head->list) {
374 data = list_entry(ele, struct hppfs_data, list); 332 data = list_entry(ele, struct hppfs_data, list);
375 kfree(data); 333 kfree(data);
376 } 334 }
@@ -387,8 +345,8 @@ static struct hppfs_data *hppfs_get_data(int fd, int filter,
387 345
388 err = -ENOMEM; 346 err = -ENOMEM;
389 data = kmalloc(sizeof(*data), GFP_KERNEL); 347 data = kmalloc(sizeof(*data), GFP_KERNEL);
390 if(data == NULL){ 348 if (data == NULL) {
391 printk("hppfs_get_data : head allocation failed\n"); 349 printk(KERN_ERR "hppfs_get_data : head allocation failed\n");
392 goto failed; 350 goto failed;
393 } 351 }
394 352
@@ -397,36 +355,36 @@ static struct hppfs_data *hppfs_get_data(int fd, int filter,
397 head = data; 355 head = data;
398 *size_out = 0; 356 *size_out = 0;
399 357
400 if(filter){ 358 if (filter) {
401 while((n = read_proc(proc_file, data->contents, 359 while ((n = read_proc(proc_file, data->contents,
402 sizeof(data->contents), NULL, 0)) > 0) 360 sizeof(data->contents), NULL, 0)) > 0)
403 os_write_file(fd, data->contents, n); 361 os_write_file(fd, data->contents, n);
404 err = os_shutdown_socket(fd, 0, 1); 362 err = os_shutdown_socket(fd, 0, 1);
405 if(err){ 363 if (err) {
406 printk("hppfs_get_data : failed to shut down " 364 printk(KERN_ERR "hppfs_get_data : failed to shut down "
407 "socket\n"); 365 "socket\n");
408 goto failed_free; 366 goto failed_free;
409 } 367 }
410 } 368 }
411 while(1){ 369 while (1) {
412 n = os_read_file(fd, data->contents, sizeof(data->contents)); 370 n = os_read_file(fd, data->contents, sizeof(data->contents));
413 if(n < 0){ 371 if (n < 0) {
414 err = n; 372 err = n;
415 printk("hppfs_get_data : read failed, errno = %d\n", 373 printk(KERN_ERR "hppfs_get_data : read failed, "
416 err); 374 "errno = %d\n", err);
417 goto failed_free; 375 goto failed_free;
418 } 376 } else if (n == 0)
419 else if(n == 0)
420 break; 377 break;
421 378
422 *size_out += n; 379 *size_out += n;
423 380
424 if(n < sizeof(data->contents)) 381 if (n < sizeof(data->contents))
425 break; 382 break;
426 383
427 new = kmalloc(sizeof(*data), GFP_KERNEL); 384 new = kmalloc(sizeof(*data), GFP_KERNEL);
428 if(new == 0){ 385 if (new == 0) {
429 printk("hppfs_get_data : data allocation failed\n"); 386 printk(KERN_ERR "hppfs_get_data : data allocation "
387 "failed\n");
430 err = -ENOMEM; 388 err = -ENOMEM;
431 goto failed_free; 389 goto failed_free;
432 } 390 }
@@ -435,12 +393,12 @@ static struct hppfs_data *hppfs_get_data(int fd, int filter,
435 list_add(&new->list, &data->list); 393 list_add(&new->list, &data->list);
436 data = new; 394 data = new;
437 } 395 }
438 return(head); 396 return head;
439 397
440 failed_free: 398 failed_free:
441 free_contents(head); 399 free_contents(head);
442 failed: 400 failed:
443 return(ERR_PTR(err)); 401 return ERR_PTR(err);
444} 402}
445 403
446static struct hppfs_private *hppfs_data(void) 404static struct hppfs_private *hppfs_data(void)
@@ -448,77 +406,79 @@ static struct hppfs_private *hppfs_data(void)
448 struct hppfs_private *data; 406 struct hppfs_private *data;
449 407
450 data = kmalloc(sizeof(*data), GFP_KERNEL); 408 data = kmalloc(sizeof(*data), GFP_KERNEL);
451 if(data == NULL) 409 if (data == NULL)
452 return(data); 410 return data;
453 411
454 *data = ((struct hppfs_private ) { .host_fd = -1, 412 *data = ((struct hppfs_private ) { .host_fd = -1,
455 .len = -1, 413 .len = -1,
456 .contents = NULL } ); 414 .contents = NULL } );
457 return(data); 415 return data;
458} 416}
459 417
460static int file_mode(int fmode) 418static int file_mode(int fmode)
461{ 419{
462 if(fmode == (FMODE_READ | FMODE_WRITE)) 420 if (fmode == (FMODE_READ | FMODE_WRITE))
463 return(O_RDWR); 421 return O_RDWR;
464 if(fmode == FMODE_READ) 422 if (fmode == FMODE_READ)
465 return(O_RDONLY); 423 return O_RDONLY;
466 if(fmode == FMODE_WRITE) 424 if (fmode == FMODE_WRITE)
467 return(O_WRONLY); 425 return O_WRONLY;
468 return(0); 426 return 0;
469} 427}
470 428
471static int hppfs_open(struct inode *inode, struct file *file) 429static int hppfs_open(struct inode *inode, struct file *file)
472{ 430{
473 struct hppfs_private *data; 431 struct hppfs_private *data;
474 struct dentry *proc_dentry; 432 struct dentry *proc_dentry;
433 struct vfsmount *proc_mnt;
475 char *host_file; 434 char *host_file;
476 int err, fd, type, filter; 435 int err, fd, type, filter;
477 436
478 err = -ENOMEM; 437 err = -ENOMEM;
479 data = hppfs_data(); 438 data = hppfs_data();
480 if(data == NULL) 439 if (data == NULL)
481 goto out; 440 goto out;
482 441
483 host_file = dentry_name(file->f_path.dentry, strlen("/rw")); 442 host_file = dentry_name(file->f_path.dentry, strlen("/rw"));
484 if(host_file == NULL) 443 if (host_file == NULL)
485 goto out_free2; 444 goto out_free2;
486 445
487 proc_dentry = HPPFS_I(inode)->proc_dentry; 446 proc_dentry = HPPFS_I(inode)->proc_dentry;
447 proc_mnt = inode->i_sb->s_fs_info;
488 448
489 /* XXX This isn't closed anywhere */ 449 /* XXX This isn't closed anywhere */
490 data->proc_file = dentry_open(dget(proc_dentry), NULL, 450 data->proc_file = dentry_open(dget(proc_dentry), mntget(proc_mnt),
491 file_mode(file->f_mode)); 451 file_mode(file->f_mode));
492 err = PTR_ERR(data->proc_file); 452 err = PTR_ERR(data->proc_file);
493 if(IS_ERR(data->proc_file)) 453 if (IS_ERR(data->proc_file))
494 goto out_free1; 454 goto out_free1;
495 455
496 type = os_file_type(host_file); 456 type = os_file_type(host_file);
497 if(type == OS_TYPE_FILE){ 457 if (type == OS_TYPE_FILE) {
498 fd = os_open_file(host_file, of_read(OPENFLAGS()), 0); 458 fd = os_open_file(host_file, of_read(OPENFLAGS()), 0);
499 if(fd >= 0) 459 if (fd >= 0)
500 data->host_fd = fd; 460 data->host_fd = fd;
501 else printk("hppfs_open : failed to open '%s', errno = %d\n", 461 else
502 host_file, -fd); 462 printk(KERN_ERR "hppfs_open : failed to open '%s', "
463 "errno = %d\n", host_file, -fd);
503 464
504 data->contents = NULL; 465 data->contents = NULL;
505 } 466 } else if (type == OS_TYPE_DIR) {
506 else if(type == OS_TYPE_DIR){
507 fd = open_host_sock(host_file, &filter); 467 fd = open_host_sock(host_file, &filter);
508 if(fd > 0){ 468 if (fd > 0) {
509 data->contents = hppfs_get_data(fd, filter, 469 data->contents = hppfs_get_data(fd, filter,
510 data->proc_file, 470 data->proc_file,
511 file, &data->len); 471 file, &data->len);
512 if(!IS_ERR(data->contents)) 472 if (!IS_ERR(data->contents))
513 data->host_fd = fd; 473 data->host_fd = fd;
514 } 474 } else
515 else printk("hppfs_open : failed to open a socket in " 475 printk(KERN_ERR "hppfs_open : failed to open a socket "
516 "'%s', errno = %d\n", host_file, -fd); 476 "in '%s', errno = %d\n", host_file, -fd);
517 } 477 }
518 kfree(host_file); 478 kfree(host_file);
519 479
520 file->private_data = data; 480 file->private_data = data;
521 return(0); 481 return 0;
522 482
523 out_free1: 483 out_free1:
524 kfree(host_file); 484 kfree(host_file);
@@ -526,34 +486,36 @@ static int hppfs_open(struct inode *inode, struct file *file)
526 free_contents(data->contents); 486 free_contents(data->contents);
527 kfree(data); 487 kfree(data);
528 out: 488 out:
529 return(err); 489 return err;
530} 490}
531 491
532static int hppfs_dir_open(struct inode *inode, struct file *file) 492static int hppfs_dir_open(struct inode *inode, struct file *file)
533{ 493{
534 struct hppfs_private *data; 494 struct hppfs_private *data;
535 struct dentry *proc_dentry; 495 struct dentry *proc_dentry;
496 struct vfsmount *proc_mnt;
536 int err; 497 int err;
537 498
538 err = -ENOMEM; 499 err = -ENOMEM;
539 data = hppfs_data(); 500 data = hppfs_data();
540 if(data == NULL) 501 if (data == NULL)
541 goto out; 502 goto out;
542 503
543 proc_dentry = HPPFS_I(inode)->proc_dentry; 504 proc_dentry = HPPFS_I(inode)->proc_dentry;
544 data->proc_file = dentry_open(dget(proc_dentry), NULL, 505 proc_mnt = inode->i_sb->s_fs_info;
506 data->proc_file = dentry_open(dget(proc_dentry), mntget(proc_mnt),
545 file_mode(file->f_mode)); 507 file_mode(file->f_mode));
546 err = PTR_ERR(data->proc_file); 508 err = PTR_ERR(data->proc_file);
547 if(IS_ERR(data->proc_file)) 509 if (IS_ERR(data->proc_file))
548 goto out_free; 510 goto out_free;
549 511
550 file->private_data = data; 512 file->private_data = data;
551 return(0); 513 return 0;
552 514
553 out_free: 515 out_free:
554 kfree(data); 516 kfree(data);
555 out: 517 out:
556 return(err); 518 return err;
557} 519}
558 520
559static loff_t hppfs_llseek(struct file *file, loff_t off, int where) 521static loff_t hppfs_llseek(struct file *file, loff_t off, int where)
@@ -564,13 +526,13 @@ static loff_t hppfs_llseek(struct file *file, loff_t off, int where)
564 loff_t ret; 526 loff_t ret;
565 527
566 llseek = proc_file->f_path.dentry->d_inode->i_fop->llseek; 528 llseek = proc_file->f_path.dentry->d_inode->i_fop->llseek;
567 if(llseek != NULL){ 529 if (llseek != NULL) {
568 ret = (*llseek)(proc_file, off, where); 530 ret = (*llseek)(proc_file, off, where);
569 if(ret < 0) 531 if (ret < 0)
570 return(ret); 532 return ret;
571 } 533 }
572 534
573 return(default_llseek(file, off, where)); 535 return default_llseek(file, off, where);
574} 536}
575 537
576static const struct file_operations hppfs_file_fops = { 538static const struct file_operations hppfs_file_fops = {
@@ -592,11 +554,11 @@ static int hppfs_filldir(void *d, const char *name, int size,
592{ 554{
593 struct hppfs_dirent *dirent = d; 555 struct hppfs_dirent *dirent = d;
594 556
595 if(file_removed(dirent->dentry, name)) 557 if (file_removed(dirent->dentry, name))
596 return(0); 558 return 0;
597 559
598 return((*dirent->filldir)(dirent->vfs_dirent, name, size, offset, 560 return (*dirent->filldir)(dirent->vfs_dirent, name, size, offset,
599 inode, type)); 561 inode, type);
600} 562}
601 563
602static int hppfs_readdir(struct file *file, void *ent, filldir_t filldir) 564static int hppfs_readdir(struct file *file, void *ent, filldir_t filldir)
@@ -607,7 +569,8 @@ static int hppfs_readdir(struct file *file, void *ent, filldir_t filldir)
607 struct hppfs_dirent dirent = ((struct hppfs_dirent) 569 struct hppfs_dirent dirent = ((struct hppfs_dirent)
608 { .vfs_dirent = ent, 570 { .vfs_dirent = ent,
609 .filldir = filldir, 571 .filldir = filldir,
610 .dentry = file->f_path.dentry } ); 572 .dentry = file->f_path.dentry
573 });
611 int err; 574 int err;
612 575
613 readdir = proc_file->f_path.dentry->d_inode->i_fop->readdir; 576 readdir = proc_file->f_path.dentry->d_inode->i_fop->readdir;
@@ -616,12 +579,12 @@ static int hppfs_readdir(struct file *file, void *ent, filldir_t filldir)
616 err = (*readdir)(proc_file, &dirent, hppfs_filldir); 579 err = (*readdir)(proc_file, &dirent, hppfs_filldir);
617 file->f_pos = proc_file->f_pos; 580 file->f_pos = proc_file->f_pos;
618 581
619 return(err); 582 return err;
620} 583}
621 584
622static int hppfs_fsync(struct file *file, struct dentry *dentry, int datasync) 585static int hppfs_fsync(struct file *file, struct dentry *dentry, int datasync)
623{ 586{
624 return(0); 587 return 0;
625} 588}
626 589
627static const struct file_operations hppfs_dir_fops = { 590static const struct file_operations hppfs_dir_fops = {
@@ -639,7 +602,7 @@ static int hppfs_statfs(struct dentry *dentry, struct kstatfs *sf)
639 sf->f_files = 0; 602 sf->f_files = 0;
640 sf->f_ffree = 0; 603 sf->f_ffree = 0;
641 sf->f_type = HPPFS_SUPER_MAGIC; 604 sf->f_type = HPPFS_SUPER_MAGIC;
642 return(0); 605 return 0;
643} 606}
644 607
645static struct inode *hppfs_alloc_inode(struct super_block *sb) 608static struct inode *hppfs_alloc_inode(struct super_block *sb)
@@ -647,12 +610,12 @@ static struct inode *hppfs_alloc_inode(struct super_block *sb)
647 struct hppfs_inode_info *hi; 610 struct hppfs_inode_info *hi;
648 611
649 hi = kmalloc(sizeof(*hi), GFP_KERNEL); 612 hi = kmalloc(sizeof(*hi), GFP_KERNEL);
650 if(hi == NULL) 613 if (!hi)
651 return(NULL); 614 return NULL;
652 615
653 *hi = ((struct hppfs_inode_info) { .proc_dentry = NULL }); 616 hi->proc_dentry = NULL;
654 inode_init_once(&hi->vfs_inode); 617 inode_init_once(&hi->vfs_inode);
655 return(&hi->vfs_inode); 618 return &hi->vfs_inode;
656} 619}
657 620
658void hppfs_delete_inode(struct inode *ino) 621void hppfs_delete_inode(struct inode *ino)
@@ -665,21 +628,31 @@ static void hppfs_destroy_inode(struct inode *inode)
665 kfree(HPPFS_I(inode)); 628 kfree(HPPFS_I(inode));
666} 629}
667 630
631static void hppfs_put_super(struct super_block *sb)
632{
633 mntput(sb->s_fs_info);
634}
635
668static const struct super_operations hppfs_sbops = { 636static const struct super_operations hppfs_sbops = {
669 .alloc_inode = hppfs_alloc_inode, 637 .alloc_inode = hppfs_alloc_inode,
670 .destroy_inode = hppfs_destroy_inode, 638 .destroy_inode = hppfs_destroy_inode,
671 .delete_inode = hppfs_delete_inode, 639 .delete_inode = hppfs_delete_inode,
672 .statfs = hppfs_statfs, 640 .statfs = hppfs_statfs,
641 .put_super = hppfs_put_super,
673}; 642};
674 643
675static int hppfs_readlink(struct dentry *dentry, char __user *buffer, int buflen) 644static int hppfs_readlink(struct dentry *dentry, char __user *buffer,
645 int buflen)
676{ 646{
677 struct file *proc_file; 647 struct file *proc_file;
678 struct dentry *proc_dentry; 648 struct dentry *proc_dentry;
649 struct vfsmount *proc_mnt;
679 int ret; 650 int ret;
680 651
681 proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry; 652 proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
682 proc_file = dentry_open(dget(proc_dentry), NULL, O_RDONLY); 653 proc_mnt = dentry->d_sb->s_fs_info;
654
655 proc_file = dentry_open(dget(proc_dentry), mntget(proc_mnt), O_RDONLY);
683 if (IS_ERR(proc_file)) 656 if (IS_ERR(proc_file))
684 return PTR_ERR(proc_file); 657 return PTR_ERR(proc_file);
685 658
@@ -694,10 +667,13 @@ static void* hppfs_follow_link(struct dentry *dentry, struct nameidata *nd)
694{ 667{
695 struct file *proc_file; 668 struct file *proc_file;
696 struct dentry *proc_dentry; 669 struct dentry *proc_dentry;
670 struct vfsmount *proc_mnt;
697 void *ret; 671 void *ret;
698 672
699 proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry; 673 proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
700 proc_file = dentry_open(dget(proc_dentry), NULL, O_RDONLY); 674 proc_mnt = dentry->d_sb->s_fs_info;
675
676 proc_file = dentry_open(dget(proc_dentry), mntget(proc_mnt), O_RDONLY);
701 if (IS_ERR(proc_file)) 677 if (IS_ERR(proc_file))
702 return proc_file; 678 return proc_file;
703 679
@@ -717,70 +693,72 @@ static const struct inode_operations hppfs_link_iops = {
717 .follow_link = hppfs_follow_link, 693 .follow_link = hppfs_follow_link,
718}; 694};
719 695
720static int init_inode(struct inode *inode, struct dentry *dentry) 696static struct inode *get_inode(struct super_block *sb, struct dentry *dentry)
721{ 697{
722 if(S_ISDIR(dentry->d_inode->i_mode)){ 698 struct inode *proc_ino = dentry->d_inode;
699 struct inode *inode = new_inode(sb);
700
701 if (!inode)
702 return ERR_PTR(-ENOMEM);
703
704 if (S_ISDIR(dentry->d_inode->i_mode)) {
723 inode->i_op = &hppfs_dir_iops; 705 inode->i_op = &hppfs_dir_iops;
724 inode->i_fop = &hppfs_dir_fops; 706 inode->i_fop = &hppfs_dir_fops;
725 } 707 } else if (S_ISLNK(dentry->d_inode->i_mode)) {
726 else if(S_ISLNK(dentry->d_inode->i_mode)){
727 inode->i_op = &hppfs_link_iops; 708 inode->i_op = &hppfs_link_iops;
728 inode->i_fop = &hppfs_file_fops; 709 inode->i_fop = &hppfs_file_fops;
729 } 710 } else {
730 else {
731 inode->i_op = &hppfs_file_iops; 711 inode->i_op = &hppfs_file_iops;
732 inode->i_fop = &hppfs_file_fops; 712 inode->i_fop = &hppfs_file_fops;
733 } 713 }
734 714
735 HPPFS_I(inode)->proc_dentry = dentry; 715 HPPFS_I(inode)->proc_dentry = dentry;
736 716
737 return(0); 717 inode->i_uid = proc_ino->i_uid;
718 inode->i_gid = proc_ino->i_gid;
719 inode->i_atime = proc_ino->i_atime;
720 inode->i_mtime = proc_ino->i_mtime;
721 inode->i_ctime = proc_ino->i_ctime;
722 inode->i_ino = proc_ino->i_ino;
723 inode->i_mode = proc_ino->i_mode;
724 inode->i_nlink = proc_ino->i_nlink;
725 inode->i_size = proc_ino->i_size;
726 inode->i_blocks = proc_ino->i_blocks;
727
728 return 0;
738} 729}
739 730
740static int hppfs_fill_super(struct super_block *sb, void *d, int silent) 731static int hppfs_fill_super(struct super_block *sb, void *d, int silent)
741{ 732{
742 struct inode *root_inode; 733 struct inode *root_inode;
743 struct file_system_type *procfs; 734 struct vfsmount *proc_mnt;
744 struct super_block *proc_sb; 735 int err = -ENOENT;
745 int err;
746 736
747 err = -ENOENT; 737 proc_mnt = do_kern_mount("proc", 0, "proc", NULL);
748 procfs = get_fs_type("proc"); 738 if (IS_ERR(proc_mnt))
749 if(procfs == NULL)
750 goto out; 739 goto out;
751 740
752 if(list_empty(&procfs->fs_supers))
753 goto out;
754
755 proc_sb = list_entry(procfs->fs_supers.next, struct super_block,
756 s_instances);
757
758 sb->s_blocksize = 1024; 741 sb->s_blocksize = 1024;
759 sb->s_blocksize_bits = 10; 742 sb->s_blocksize_bits = 10;
760 sb->s_magic = HPPFS_SUPER_MAGIC; 743 sb->s_magic = HPPFS_SUPER_MAGIC;
761 sb->s_op = &hppfs_sbops; 744 sb->s_op = &hppfs_sbops;
762 745 sb->s_fs_info = proc_mnt;
763 root_inode = hppfs_iget(sb);
764 if (IS_ERR(root_inode)) {
765 err = PTR_ERR(root_inode);
766 goto out;
767 }
768
769 err = init_inode(root_inode, proc_sb->s_root);
770 if(err)
771 goto out_put;
772 746
773 err = -ENOMEM; 747 err = -ENOMEM;
774 sb->s_root = d_alloc_root(root_inode); 748 root_inode = get_inode(sb, proc_mnt->mnt_sb->s_root);
775 if(sb->s_root == NULL) 749 if (!root_inode)
776 goto out_put; 750 goto out_mntput;
777 751
778 hppfs_read_inode(root_inode); 752 sb->s_root = d_alloc_root(root_inode);
753 if (!sb->s_root)
754 goto out_iput;
779 755
780 return(0); 756 return 0;
781 757
782 out_put: 758 out_iput:
783 iput(root_inode); 759 iput(root_inode);
760 out_mntput:
761 mntput(proc_mnt);
784 out: 762 out:
785 return(err); 763 return(err);
786} 764}
@@ -802,7 +780,7 @@ static struct file_system_type hppfs_type = {
802 780
803static int __init init_hppfs(void) 781static int __init init_hppfs(void)
804{ 782{
805 return(register_filesystem(&hppfs_type)); 783 return register_filesystem(&hppfs_type);
806} 784}
807 785
808static void __exit exit_hppfs(void) 786static void __exit exit_hppfs(void)
@@ -813,14 +791,3 @@ static void __exit exit_hppfs(void)
813module_init(init_hppfs) 791module_init(init_hppfs)
814module_exit(exit_hppfs) 792module_exit(exit_hppfs)
815MODULE_LICENSE("GPL"); 793MODULE_LICENSE("GPL");
816
817/*
818 * Overrides for Emacs so that we follow Linus's tabbing style.
819 * Emacs will notice this stuff at the end of the file and automatically
820 * adjust the settings for this buffer only. This must remain at the end
821 * of the file.
822 * ---------------------------------------------------------------------------
823 * Local variables:
824 * c-file-style: "linux"
825 * End:
826 */
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index eee9487ae47f..6846785fe904 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -954,7 +954,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size)
954 FMODE_WRITE | FMODE_READ, 954 FMODE_WRITE | FMODE_READ,
955 &hugetlbfs_file_operations); 955 &hugetlbfs_file_operations);
956 if (!file) 956 if (!file)
957 goto out_inode; 957 goto out_dentry; /* inode is already attached */
958 958
959 return file; 959 return file;
960 960
diff --git a/fs/inode.c b/fs/inode.c
index 53245ffcf93d..27ee1af50d02 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1199,42 +1199,37 @@ void touch_atime(struct vfsmount *mnt, struct dentry *dentry)
1199 struct inode *inode = dentry->d_inode; 1199 struct inode *inode = dentry->d_inode;
1200 struct timespec now; 1200 struct timespec now;
1201 1201
1202 if (inode->i_flags & S_NOATIME) 1202 if (mnt_want_write(mnt))
1203 return; 1203 return;
1204 if (inode->i_flags & S_NOATIME)
1205 goto out;
1204 if (IS_NOATIME(inode)) 1206 if (IS_NOATIME(inode))
1205 return; 1207 goto out;
1206 if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode)) 1208 if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
1207 return; 1209 goto out;
1208 1210
1209 /* 1211 if (mnt->mnt_flags & MNT_NOATIME)
1210 * We may have a NULL vfsmount when coming from NFSD 1212 goto out;
1211 */ 1213 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
1212 if (mnt) { 1214 goto out;
1213 if (mnt->mnt_flags & MNT_NOATIME) 1215 if (mnt->mnt_flags & MNT_RELATIME) {
1214 return; 1216 /*
1215 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)) 1217 * With relative atime, only update atime if the previous
1216 return; 1218 * atime is earlier than either the ctime or mtime.
1217 1219 */
1218 if (mnt->mnt_flags & MNT_RELATIME) { 1220 if (timespec_compare(&inode->i_mtime, &inode->i_atime) < 0 &&
1219 /* 1221 timespec_compare(&inode->i_ctime, &inode->i_atime) < 0)
1220 * With relative atime, only update atime if the 1222 goto out;
1221 * previous atime is earlier than either the ctime or
1222 * mtime.
1223 */
1224 if (timespec_compare(&inode->i_mtime,
1225 &inode->i_atime) < 0 &&
1226 timespec_compare(&inode->i_ctime,
1227 &inode->i_atime) < 0)
1228 return;
1229 }
1230 } 1223 }
1231 1224
1232 now = current_fs_time(inode->i_sb); 1225 now = current_fs_time(inode->i_sb);
1233 if (timespec_equal(&inode->i_atime, &now)) 1226 if (timespec_equal(&inode->i_atime, &now))
1234 return; 1227 goto out;
1235 1228
1236 inode->i_atime = now; 1229 inode->i_atime = now;
1237 mark_inode_dirty_sync(inode); 1230 mark_inode_dirty_sync(inode);
1231out:
1232 mnt_drop_write(mnt);
1238} 1233}
1239EXPORT_SYMBOL(touch_atime); 1234EXPORT_SYMBOL(touch_atime);
1240 1235
@@ -1255,10 +1250,13 @@ void file_update_time(struct file *file)
1255 struct inode *inode = file->f_path.dentry->d_inode; 1250 struct inode *inode = file->f_path.dentry->d_inode;
1256 struct timespec now; 1251 struct timespec now;
1257 int sync_it = 0; 1252 int sync_it = 0;
1253 int err;
1258 1254
1259 if (IS_NOCMTIME(inode)) 1255 if (IS_NOCMTIME(inode))
1260 return; 1256 return;
1261 if (IS_RDONLY(inode)) 1257
1258 err = mnt_want_write(file->f_path.mnt);
1259 if (err)
1262 return; 1260 return;
1263 1261
1264 now = current_fs_time(inode->i_sb); 1262 now = current_fs_time(inode->i_sb);
@@ -1279,6 +1277,7 @@ void file_update_time(struct file *file)
1279 1277
1280 if (sync_it) 1278 if (sync_it)
1281 mark_inode_dirty_sync(inode); 1279 mark_inode_dirty_sync(inode);
1280 mnt_drop_write(file->f_path.mnt);
1282} 1281}
1283 1282
1284EXPORT_SYMBOL(file_update_time); 1283EXPORT_SYMBOL(file_update_time);
diff --git a/fs/isofs/compress.c b/fs/isofs/compress.c
index 37dbd6404787..defb932eee9a 100644
--- a/fs/isofs/compress.c
+++ b/fs/isofs/compress.c
@@ -72,6 +72,17 @@ static int zisofs_readpage(struct file *file, struct page *page)
72 offset = index & ~zisofs_block_page_mask; 72 offset = index & ~zisofs_block_page_mask;
73 blockindex = offset >> zisofs_block_page_shift; 73 blockindex = offset >> zisofs_block_page_shift;
74 maxpage = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 74 maxpage = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
75
76 /*
77 * If this page is wholly outside i_size we just return zero;
78 * do_generic_file_read() will handle this for us
79 */
80 if (page->index >= maxpage) {
81 SetPageUptodate(page);
82 unlock_page(page);
83 return 0;
84 }
85
75 maxpage = min(zisofs_block_pages, maxpage-offset); 86 maxpage = min(zisofs_block_pages, maxpage-offset);
76 87
77 for ( i = 0 ; i < maxpage ; i++, offset++ ) { 88 for ( i = 0 ; i < maxpage ; i++, offset++ ) {
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 3943a8905eb2..0e081d5f32e8 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -697,13 +697,14 @@ fail:
697 */ 697 */
698 698
699/** 699/**
700 * journal_t * journal_init_dev() - creates an initialises a journal structure 700 * journal_t * journal_init_dev() - creates and initialises a journal structure
701 * @bdev: Block device on which to create the journal 701 * @bdev: Block device on which to create the journal
702 * @fs_dev: Device which hold journalled filesystem for this journal. 702 * @fs_dev: Device which hold journalled filesystem for this journal.
703 * @start: Block nr Start of journal. 703 * @start: Block nr Start of journal.
704 * @len: Length of the journal in blocks. 704 * @len: Length of the journal in blocks.
705 * @blocksize: blocksize of journalling device 705 * @blocksize: blocksize of journalling device
706 * @returns: a newly created journal_t * 706 *
707 * Returns: a newly created journal_t *
707 * 708 *
708 * journal_init_dev creates a journal which maps a fixed contiguous 709 * journal_init_dev creates a journal which maps a fixed contiguous
709 * range of blocks on an arbitrary block device. 710 * range of blocks on an arbitrary block device.
@@ -1619,14 +1620,14 @@ static int journal_init_journal_head_cache(void)
1619{ 1620{
1620 int retval; 1621 int retval;
1621 1622
1622 J_ASSERT(journal_head_cache == 0); 1623 J_ASSERT(journal_head_cache == NULL);
1623 journal_head_cache = kmem_cache_create("journal_head", 1624 journal_head_cache = kmem_cache_create("journal_head",
1624 sizeof(struct journal_head), 1625 sizeof(struct journal_head),
1625 0, /* offset */ 1626 0, /* offset */
1626 SLAB_TEMPORARY, /* flags */ 1627 SLAB_TEMPORARY, /* flags */
1627 NULL); /* ctor */ 1628 NULL); /* ctor */
1628 retval = 0; 1629 retval = 0;
1629 if (journal_head_cache == 0) { 1630 if (!journal_head_cache) {
1630 retval = -ENOMEM; 1631 retval = -ENOMEM;
1631 printk(KERN_EMERG "JBD: no memory for journal_head cache\n"); 1632 printk(KERN_EMERG "JBD: no memory for journal_head cache\n");
1632 } 1633 }
diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c
index 2b8edf4d6eaa..43bc5e5ed064 100644
--- a/fs/jbd/recovery.c
+++ b/fs/jbd/recovery.c
@@ -478,7 +478,7 @@ static int do_one_pass(journal_t *journal,
478 memcpy(nbh->b_data, obh->b_data, 478 memcpy(nbh->b_data, obh->b_data,
479 journal->j_blocksize); 479 journal->j_blocksize);
480 if (flags & JFS_FLAG_ESCAPE) { 480 if (flags & JFS_FLAG_ESCAPE) {
481 *((__be32 *)bh->b_data) = 481 *((__be32 *)nbh->b_data) =
482 cpu_to_be32(JFS_MAGIC_NUMBER); 482 cpu_to_be32(JFS_MAGIC_NUMBER);
483 } 483 }
484 484
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index ad2eacf570c6..d5f8eee7c88c 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -173,13 +173,13 @@ int __init journal_init_revoke_caches(void)
173 0, 173 0,
174 SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY, 174 SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY,
175 NULL); 175 NULL);
176 if (revoke_record_cache == 0) 176 if (!revoke_record_cache)
177 return -ENOMEM; 177 return -ENOMEM;
178 178
179 revoke_table_cache = kmem_cache_create("revoke_table", 179 revoke_table_cache = kmem_cache_create("revoke_table",
180 sizeof(struct jbd_revoke_table_s), 180 sizeof(struct jbd_revoke_table_s),
181 0, SLAB_TEMPORARY, NULL); 181 0, SLAB_TEMPORARY, NULL);
182 if (revoke_table_cache == 0) { 182 if (!revoke_table_cache) {
183 kmem_cache_destroy(revoke_record_cache); 183 kmem_cache_destroy(revoke_record_cache);
184 revoke_record_cache = NULL; 184 revoke_record_cache = NULL;
185 return -ENOMEM; 185 return -ENOMEM;
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 038ed7436199..2c9e8f5d13aa 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -369,7 +369,7 @@ out:
369 369
370 370
371/** 371/**
372 * int journal_restart() - restart a handle . 372 * int journal_restart() - restart a handle.
373 * @handle: handle to restart 373 * @handle: handle to restart
374 * @nblocks: nr credits requested 374 * @nblocks: nr credits requested
375 * 375 *
@@ -844,8 +844,7 @@ out:
844} 844}
845 845
846/** 846/**
847 * int journal_get_undo_access() - Notify intent to modify metadata with 847 * int journal_get_undo_access() - Notify intent to modify metadata with non-rewindable consequences
848 * non-rewindable consequences
849 * @handle: transaction 848 * @handle: transaction
850 * @bh: buffer to undo 849 * @bh: buffer to undo
851 * @credits: store the number of taken credits here (if not NULL) 850 * @credits: store the number of taken credits here (if not NULL)
@@ -921,12 +920,14 @@ out:
921} 920}
922 921
923/** 922/**
924 * int journal_dirty_data() - mark a buffer as containing dirty data which 923 * int journal_dirty_data() - mark a buffer as containing dirty data to be flushed
925 * needs to be flushed before we can commit the
926 * current transaction.
927 * @handle: transaction 924 * @handle: transaction
928 * @bh: bufferhead to mark 925 * @bh: bufferhead to mark
929 * 926 *
927 * Description:
928 * Mark a buffer as containing dirty data which needs to be flushed before
929 * we can commit the current transaction.
930 *
930 * The buffer is placed on the transaction's data list and is marked as 931 * The buffer is placed on the transaction's data list and is marked as
931 * belonging to the transaction. 932 * belonging to the transaction.
932 * 933 *
@@ -1098,11 +1099,11 @@ no_journal:
1098} 1099}
1099 1100
1100/** 1101/**
1101 * int journal_dirty_metadata() - mark a buffer as containing dirty metadata 1102 * int journal_dirty_metadata() - mark a buffer as containing dirty metadata
1102 * @handle: transaction to add buffer to. 1103 * @handle: transaction to add buffer to.
1103 * @bh: buffer to mark 1104 * @bh: buffer to mark
1104 * 1105 *
1105 * mark dirty metadata which needs to be journaled as part of the current 1106 * Mark dirty metadata which needs to be journaled as part of the current
1106 * transaction. 1107 * transaction.
1107 * 1108 *
1108 * The buffer is placed on the transaction's metadata list and is marked 1109 * The buffer is placed on the transaction's metadata list and is marked
@@ -1425,7 +1426,8 @@ int journal_stop(handle_t *handle)
1425 return err; 1426 return err;
1426} 1427}
1427 1428
1428/**int journal_force_commit() - force any uncommitted transactions 1429/**
1430 * int journal_force_commit() - force any uncommitted transactions
1429 * @journal: journal to force 1431 * @journal: journal to force
1430 * 1432 *
1431 * For synchronous operations: force any uncommitted transactions 1433 * For synchronous operations: force any uncommitted transactions
@@ -1902,13 +1904,12 @@ zap_buffer_unlocked:
1902} 1904}
1903 1905
1904/** 1906/**
1905 * void journal_invalidatepage() 1907 * void journal_invalidatepage() - invalidate a journal page
1906 * @journal: journal to use for flush... 1908 * @journal: journal to use for flush
1907 * @page: page to flush 1909 * @page: page to flush
1908 * @offset: length of page to invalidate. 1910 * @offset: length of page to invalidate.
1909 * 1911 *
1910 * Reap page buffers containing data after offset in page. 1912 * Reap page buffers containing data after offset in page.
1911 *
1912 */ 1913 */
1913void journal_invalidatepage(journal_t *journal, 1914void journal_invalidatepage(journal_t *journal,
1914 struct page *page, 1915 struct page *page,
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 96ba846992e9..954cff001df6 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -219,7 +219,7 @@ static int jbd2_journal_start_thread(journal_t *journal)
219 if (IS_ERR(t)) 219 if (IS_ERR(t))
220 return PTR_ERR(t); 220 return PTR_ERR(t);
221 221
222 wait_event(journal->j_wait_done_commit, journal->j_task != 0); 222 wait_event(journal->j_wait_done_commit, journal->j_task != NULL);
223 return 0; 223 return 0;
224} 224}
225 225
@@ -231,7 +231,7 @@ static void journal_kill_thread(journal_t *journal)
231 while (journal->j_task) { 231 while (journal->j_task) {
232 wake_up(&journal->j_wait_commit); 232 wake_up(&journal->j_wait_commit);
233 spin_unlock(&journal->j_state_lock); 233 spin_unlock(&journal->j_state_lock);
234 wait_event(journal->j_wait_done_commit, journal->j_task == 0); 234 wait_event(journal->j_wait_done_commit, journal->j_task == NULL);
235 spin_lock(&journal->j_state_lock); 235 spin_lock(&journal->j_state_lock);
236 } 236 }
237 spin_unlock(&journal->j_state_lock); 237 spin_unlock(&journal->j_state_lock);
@@ -1969,14 +1969,14 @@ static int journal_init_jbd2_journal_head_cache(void)
1969{ 1969{
1970 int retval; 1970 int retval;
1971 1971
1972 J_ASSERT(jbd2_journal_head_cache == 0); 1972 J_ASSERT(jbd2_journal_head_cache == NULL);
1973 jbd2_journal_head_cache = kmem_cache_create("jbd2_journal_head", 1973 jbd2_journal_head_cache = kmem_cache_create("jbd2_journal_head",
1974 sizeof(struct journal_head), 1974 sizeof(struct journal_head),
1975 0, /* offset */ 1975 0, /* offset */
1976 SLAB_TEMPORARY, /* flags */ 1976 SLAB_TEMPORARY, /* flags */
1977 NULL); /* ctor */ 1977 NULL); /* ctor */
1978 retval = 0; 1978 retval = 0;
1979 if (jbd2_journal_head_cache == 0) { 1979 if (!jbd2_journal_head_cache) {
1980 retval = -ENOMEM; 1980 retval = -ENOMEM;
1981 printk(KERN_EMERG "JBD: no memory for journal_head cache\n"); 1981 printk(KERN_EMERG "JBD: no memory for journal_head cache\n");
1982 } 1982 }
@@ -2002,14 +2002,14 @@ static struct journal_head *journal_alloc_journal_head(void)
2002 atomic_inc(&nr_journal_heads); 2002 atomic_inc(&nr_journal_heads);
2003#endif 2003#endif
2004 ret = kmem_cache_alloc(jbd2_journal_head_cache, GFP_NOFS); 2004 ret = kmem_cache_alloc(jbd2_journal_head_cache, GFP_NOFS);
2005 if (ret == 0) { 2005 if (!ret) {
2006 jbd_debug(1, "out of memory for journal_head\n"); 2006 jbd_debug(1, "out of memory for journal_head\n");
2007 if (time_after(jiffies, last_warning + 5*HZ)) { 2007 if (time_after(jiffies, last_warning + 5*HZ)) {
2008 printk(KERN_NOTICE "ENOMEM in %s, retrying.\n", 2008 printk(KERN_NOTICE "ENOMEM in %s, retrying.\n",
2009 __FUNCTION__); 2009 __FUNCTION__);
2010 last_warning = jiffies; 2010 last_warning = jiffies;
2011 } 2011 }
2012 while (ret == 0) { 2012 while (!ret) {
2013 yield(); 2013 yield();
2014 ret = kmem_cache_alloc(jbd2_journal_head_cache, GFP_NOFS); 2014 ret = kmem_cache_alloc(jbd2_journal_head_cache, GFP_NOFS);
2015 } 2015 }
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 146411387ada..5d0405a9e7ca 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -535,7 +535,7 @@ static int do_one_pass(journal_t *journal,
535 memcpy(nbh->b_data, obh->b_data, 535 memcpy(nbh->b_data, obh->b_data,
536 journal->j_blocksize); 536 journal->j_blocksize);
537 if (flags & JBD2_FLAG_ESCAPE) { 537 if (flags & JBD2_FLAG_ESCAPE) {
538 *((__be32 *)bh->b_data) = 538 *((__be32 *)nbh->b_data) =
539 cpu_to_be32(JBD2_MAGIC_NUMBER); 539 cpu_to_be32(JBD2_MAGIC_NUMBER);
540 } 540 }
541 541
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index df36f42e19e1..2e1453a5e998 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -174,13 +174,13 @@ int __init jbd2_journal_init_revoke_caches(void)
174 0, 174 0,
175 SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY, 175 SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY,
176 NULL); 176 NULL);
177 if (jbd2_revoke_record_cache == 0) 177 if (!jbd2_revoke_record_cache)
178 return -ENOMEM; 178 return -ENOMEM;
179 179
180 jbd2_revoke_table_cache = kmem_cache_create("jbd2_revoke_table", 180 jbd2_revoke_table_cache = kmem_cache_create("jbd2_revoke_table",
181 sizeof(struct jbd2_revoke_table_s), 181 sizeof(struct jbd2_revoke_table_s),
182 0, SLAB_TEMPORARY, NULL); 182 0, SLAB_TEMPORARY, NULL);
183 if (jbd2_revoke_table_cache == 0) { 183 if (!jbd2_revoke_table_cache) {
184 kmem_cache_destroy(jbd2_revoke_record_cache); 184 kmem_cache_destroy(jbd2_revoke_record_cache);
185 jbd2_revoke_record_cache = NULL; 185 jbd2_revoke_record_cache = NULL;
186 return -ENOMEM; 186 return -ENOMEM;
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index f9c5dd6f4b64..dcc2734e0b5d 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -129,7 +129,7 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
129 struct inode *inode = mapping->host; 129 struct inode *inode = mapping->host;
130 struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); 130 struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
131 pgoff_t index = pos >> PAGE_CACHE_SHIFT; 131 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
132 uint32_t pageofs = pos & (PAGE_CACHE_SIZE - 1); 132 uint32_t pageofs = index << PAGE_CACHE_SHIFT;
133 int ret = 0; 133 int ret = 0;
134 134
135 pg = __grab_cache_page(mapping, index); 135 pg = __grab_cache_page(mapping, index);
diff --git a/fs/jffs2/jffs2_fs_i.h b/fs/jffs2/jffs2_fs_i.h
index 0b78fdc9773b..a841f4973a74 100644
--- a/fs/jffs2/jffs2_fs_i.h
+++ b/fs/jffs2/jffs2_fs_i.h
@@ -15,7 +15,7 @@
15#include <linux/version.h> 15#include <linux/version.h>
16#include <linux/rbtree.h> 16#include <linux/rbtree.h>
17#include <linux/posix_acl.h> 17#include <linux/posix_acl.h>
18#include <asm/semaphore.h> 18#include <linux/semaphore.h>
19 19
20struct jffs2_inode_info { 20struct jffs2_inode_info {
21 /* We need an internal mutex similar to inode->i_mutex. 21 /* We need an internal mutex similar to inode->i_mutex.
diff --git a/fs/jffs2/jffs2_fs_sb.h b/fs/jffs2/jffs2_fs_sb.h
index 3a2197f3c812..18fca2b9e531 100644
--- a/fs/jffs2/jffs2_fs_sb.h
+++ b/fs/jffs2/jffs2_fs_sb.h
@@ -16,7 +16,7 @@
16#include <linux/spinlock.h> 16#include <linux/spinlock.h>
17#include <linux/workqueue.h> 17#include <linux/workqueue.h>
18#include <linux/completion.h> 18#include <linux/completion.h>
19#include <asm/semaphore.h> 19#include <linux/semaphore.h>
20#include <linux/timer.h> 20#include <linux/timer.h>
21#include <linux/wait.h> 21#include <linux/wait.h>
22#include <linux/list.h> 22#include <linux/list.h>
diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c
index a1f8e375ad21..afe222bf300f 100644
--- a/fs/jfs/ioctl.c
+++ b/fs/jfs/ioctl.c
@@ -8,6 +8,7 @@
8#include <linux/fs.h> 8#include <linux/fs.h>
9#include <linux/ctype.h> 9#include <linux/ctype.h>
10#include <linux/capability.h> 10#include <linux/capability.h>
11#include <linux/mount.h>
11#include <linux/time.h> 12#include <linux/time.h>
12#include <linux/sched.h> 13#include <linux/sched.h>
13#include <asm/current.h> 14#include <asm/current.h>
@@ -65,23 +66,30 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
65 return put_user(flags, (int __user *) arg); 66 return put_user(flags, (int __user *) arg);
66 case JFS_IOC_SETFLAGS: { 67 case JFS_IOC_SETFLAGS: {
67 unsigned int oldflags; 68 unsigned int oldflags;
69 int err;
68 70
69 if (IS_RDONLY(inode)) 71 err = mnt_want_write(filp->f_path.mnt);
70 return -EROFS; 72 if (err)
73 return err;
71 74
72 if (!is_owner_or_cap(inode)) 75 if (!is_owner_or_cap(inode)) {
73 return -EACCES; 76 err = -EACCES;
74 77 goto setflags_out;
75 if (get_user(flags, (int __user *) arg)) 78 }
76 return -EFAULT; 79 if (get_user(flags, (int __user *) arg)) {
80 err = -EFAULT;
81 goto setflags_out;
82 }
77 83
78 flags = jfs_map_ext2(flags, 1); 84 flags = jfs_map_ext2(flags, 1);
79 if (!S_ISDIR(inode->i_mode)) 85 if (!S_ISDIR(inode->i_mode))
80 flags &= ~JFS_DIRSYNC_FL; 86 flags &= ~JFS_DIRSYNC_FL;
81 87
82 /* Is it quota file? Do not allow user to mess with it */ 88 /* Is it quota file? Do not allow user to mess with it */
83 if (IS_NOQUOTA(inode)) 89 if (IS_NOQUOTA(inode)) {
84 return -EPERM; 90 err = -EPERM;
91 goto setflags_out;
92 }
85 93
86 /* Lock against other parallel changes of flags */ 94 /* Lock against other parallel changes of flags */
87 mutex_lock(&inode->i_mutex); 95 mutex_lock(&inode->i_mutex);
@@ -98,7 +106,8 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
98 (JFS_APPEND_FL | JFS_IMMUTABLE_FL))) { 106 (JFS_APPEND_FL | JFS_IMMUTABLE_FL))) {
99 if (!capable(CAP_LINUX_IMMUTABLE)) { 107 if (!capable(CAP_LINUX_IMMUTABLE)) {
100 mutex_unlock(&inode->i_mutex); 108 mutex_unlock(&inode->i_mutex);
101 return -EPERM; 109 err = -EPERM;
110 goto setflags_out;
102 } 111 }
103 } 112 }
104 113
@@ -110,7 +119,9 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
110 mutex_unlock(&inode->i_mutex); 119 mutex_unlock(&inode->i_mutex);
111 inode->i_ctime = CURRENT_TIME_SEC; 120 inode->i_ctime = CURRENT_TIME_SEC;
112 mark_inode_dirty(inode); 121 mark_inode_dirty(inode);
113 return 0; 122setflags_out:
123 mnt_drop_write(filp->f_path.mnt);
124 return err;
114 } 125 }
115 default: 126 default:
116 return -ENOTTY; 127 return -ENOTTY;
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index e1985066b1c6..2bc7d8aa5740 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -2172,7 +2172,7 @@ static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
2172 } 2172 }
2173 2173
2174 /* update the free count for this dmap */ 2174 /* update the free count for this dmap */
2175 dp->nfree = cpu_to_le32(le32_to_cpu(dp->nfree) - nblocks); 2175 le32_add_cpu(&dp->nfree, -nblocks);
2176 2176
2177 BMAP_LOCK(bmp); 2177 BMAP_LOCK(bmp);
2178 2178
@@ -2316,7 +2316,7 @@ static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
2316 2316
2317 /* update the free count for this dmap. 2317 /* update the free count for this dmap.
2318 */ 2318 */
2319 dp->nfree = cpu_to_le32(le32_to_cpu(dp->nfree) + nblocks); 2319 le32_add_cpu(&dp->nfree, nblocks);
2320 2320
2321 BMAP_LOCK(bmp); 2321 BMAP_LOCK(bmp);
2322 2322
@@ -3226,7 +3226,7 @@ static int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno,
3226 } 3226 }
3227 3227
3228 /* update the free count for this dmap */ 3228 /* update the free count for this dmap */
3229 dp->nfree = cpu_to_le32(le32_to_cpu(dp->nfree) - nblocks); 3229 le32_add_cpu(&dp->nfree, -nblocks);
3230 3230
3231 /* reconstruct summary tree */ 3231 /* reconstruct summary tree */
3232 dbInitDmapTree(dp); 3232 dbInitDmapTree(dp);
@@ -3660,9 +3660,8 @@ static int dbInitDmap(struct dmap * dp, s64 Blkno, int nblocks)
3660 goto initTree; 3660 goto initTree;
3661 } 3661 }
3662 } else { 3662 } else {
3663 dp->nblocks = 3663 le32_add_cpu(&dp->nblocks, nblocks);
3664 cpu_to_le32(le32_to_cpu(dp->nblocks) + nblocks); 3664 le32_add_cpu(&dp->nfree, nblocks);
3665 dp->nfree = cpu_to_le32(le32_to_cpu(dp->nfree) + nblocks);
3666 } 3665 }
3667 3666
3668 /* word number containing start block number */ 3667 /* word number containing start block number */
diff --git a/fs/jfs/jfs_dmap.h b/fs/jfs/jfs_dmap.h
index 11e6d471b364..1a6eb41569bc 100644
--- a/fs/jfs/jfs_dmap.h
+++ b/fs/jfs/jfs_dmap.h
@@ -61,7 +61,7 @@
61 * determine the maximum free string for four (lower level) nodes 61 * determine the maximum free string for four (lower level) nodes
62 * of the tree. 62 * of the tree.
63 */ 63 */
64static __inline signed char TREEMAX(signed char *cp) 64static inline signed char TREEMAX(signed char *cp)
65{ 65{
66 signed char tmp1, tmp2; 66 signed char tmp1, tmp2;
67 67
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index 9bf29f771737..734ec916beaf 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -1019,8 +1019,7 @@ int diFree(struct inode *ip)
1019 /* update the free inode counts at the iag, ag and 1019 /* update the free inode counts at the iag, ag and
1020 * map level. 1020 * map level.
1021 */ 1021 */
1022 iagp->nfreeinos = 1022 le32_add_cpu(&iagp->nfreeinos, 1);
1023 cpu_to_le32(le32_to_cpu(iagp->nfreeinos) + 1);
1024 imap->im_agctl[agno].numfree += 1; 1023 imap->im_agctl[agno].numfree += 1;
1025 atomic_inc(&imap->im_numfree); 1024 atomic_inc(&imap->im_numfree);
1026 1025
@@ -1219,9 +1218,8 @@ int diFree(struct inode *ip)
1219 /* update the number of free inodes and number of free extents 1218 /* update the number of free inodes and number of free extents
1220 * for the iag. 1219 * for the iag.
1221 */ 1220 */
1222 iagp->nfreeinos = cpu_to_le32(le32_to_cpu(iagp->nfreeinos) - 1221 le32_add_cpu(&iagp->nfreeinos, -(INOSPEREXT - 1));
1223 (INOSPEREXT - 1)); 1222 le32_add_cpu(&iagp->nfreeexts, 1);
1224 iagp->nfreeexts = cpu_to_le32(le32_to_cpu(iagp->nfreeexts) + 1);
1225 1223
1226 /* update the number of free inodes and backed inodes 1224 /* update the number of free inodes and backed inodes
1227 * at the ag and inode map level. 1225 * at the ag and inode map level.
@@ -2124,7 +2122,7 @@ static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino)
2124 /* update the free inode count at the iag, ag, inode 2122 /* update the free inode count at the iag, ag, inode
2125 * map levels. 2123 * map levels.
2126 */ 2124 */
2127 iagp->nfreeinos = cpu_to_le32(le32_to_cpu(iagp->nfreeinos) - 1); 2125 le32_add_cpu(&iagp->nfreeinos, -1);
2128 imap->im_agctl[agno].numfree -= 1; 2126 imap->im_agctl[agno].numfree -= 1;
2129 atomic_dec(&imap->im_numfree); 2127 atomic_dec(&imap->im_numfree);
2130 2128
@@ -2378,9 +2376,8 @@ static int diNewExt(struct inomap * imap, struct iag * iagp, int extno)
2378 /* update the free inode and free extent counts for the 2376 /* update the free inode and free extent counts for the
2379 * iag. 2377 * iag.
2380 */ 2378 */
2381 iagp->nfreeinos = cpu_to_le32(le32_to_cpu(iagp->nfreeinos) + 2379 le32_add_cpu(&iagp->nfreeinos, (INOSPEREXT - 1));
2382 (INOSPEREXT - 1)); 2380 le32_add_cpu(&iagp->nfreeexts, -1);
2383 iagp->nfreeexts = cpu_to_le32(le32_to_cpu(iagp->nfreeexts) - 1);
2384 2381
2385 /* update the free and backed inode counts for the ag. 2382 /* update the free and backed inode counts for the ag.
2386 */ 2383 */
diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c
index a000aaa75136..5a61ebf2cbcc 100644
--- a/fs/jfs/jfs_xtree.c
+++ b/fs/jfs/jfs_xtree.c
@@ -905,8 +905,7 @@ int xtInsert(tid_t tid, /* transaction id */
905 XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); 905 XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
906 906
907 /* advance next available entry index */ 907 /* advance next available entry index */
908 p->header.nextindex = 908 le16_add_cpu(&p->header.nextindex, 1);
909 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
910 909
911 /* Don't log it if there are no links to the file */ 910 /* Don't log it if there are no links to the file */
912 if (!test_cflag(COMMIT_Nolink, ip)) { 911 if (!test_cflag(COMMIT_Nolink, ip)) {
@@ -997,8 +996,7 @@ xtSplitUp(tid_t tid,
997 split->addr); 996 split->addr);
998 997
999 /* advance next available entry index */ 998 /* advance next available entry index */
1000 sp->header.nextindex = 999 le16_add_cpu(&sp->header.nextindex, 1);
1001 cpu_to_le16(le16_to_cpu(sp->header.nextindex) + 1);
1002 1000
1003 /* Don't log it if there are no links to the file */ 1001 /* Don't log it if there are no links to the file */
1004 if (!test_cflag(COMMIT_Nolink, ip)) { 1002 if (!test_cflag(COMMIT_Nolink, ip)) {
@@ -1167,9 +1165,7 @@ xtSplitUp(tid_t tid,
1167 JFS_SBI(ip->i_sb)->nbperpage, rcbn); 1165 JFS_SBI(ip->i_sb)->nbperpage, rcbn);
1168 1166
1169 /* advance next available entry index. */ 1167 /* advance next available entry index. */
1170 sp->header.nextindex = 1168 le16_add_cpu(&sp->header.nextindex, 1);
1171 cpu_to_le16(le16_to_cpu(sp->header.nextindex) +
1172 1);
1173 1169
1174 /* Don't log it if there are no links to the file */ 1170 /* Don't log it if there are no links to the file */
1175 if (!test_cflag(COMMIT_Nolink, ip)) { 1171 if (!test_cflag(COMMIT_Nolink, ip)) {
@@ -1738,8 +1734,7 @@ int xtExtend(tid_t tid, /* transaction id */
1738 XT_PUTENTRY(xad, XAD_NEW, xoff, len, xaddr); 1734 XT_PUTENTRY(xad, XAD_NEW, xoff, len, xaddr);
1739 1735
1740 /* advance next available entry index */ 1736 /* advance next available entry index */
1741 p->header.nextindex = 1737 le16_add_cpu(&p->header.nextindex, 1);
1742 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
1743 } 1738 }
1744 1739
1745 /* get back old entry */ 1740 /* get back old entry */
@@ -1905,8 +1900,7 @@ printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n",
1905 XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr); 1900 XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr);
1906 1901
1907 /* advance next available entry index */ 1902 /* advance next available entry index */
1908 p->header.nextindex = 1903 le16_add_cpu(&p->header.nextindex, 1);
1909 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
1910 } 1904 }
1911 1905
1912 /* get back old XAD */ 1906 /* get back old XAD */
@@ -2567,8 +2561,7 @@ int xtAppend(tid_t tid, /* transaction id */
2567 XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); 2561 XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
2568 2562
2569 /* advance next available entry index */ 2563 /* advance next available entry index */
2570 p->header.nextindex = 2564 le16_add_cpu(&p->header.nextindex, 1);
2571 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
2572 2565
2573 xtlck->lwm.offset = 2566 xtlck->lwm.offset =
2574 (xtlck->lwm.offset) ? min(index,(int) xtlck->lwm.offset) : index; 2567 (xtlck->lwm.offset) ? min(index,(int) xtlck->lwm.offset) : index;
@@ -2631,8 +2624,7 @@ int xtDelete(tid_t tid, struct inode *ip, s64 xoff, s32 xlen, int flag)
2631 * delete the entry from the leaf page 2624 * delete the entry from the leaf page
2632 */ 2625 */
2633 nextindex = le16_to_cpu(p->header.nextindex); 2626 nextindex = le16_to_cpu(p->header.nextindex);
2634 p->header.nextindex = 2627 le16_add_cpu(&p->header.nextindex, -1);
2635 cpu_to_le16(le16_to_cpu(p->header.nextindex) - 1);
2636 2628
2637 /* 2629 /*
2638 * if the leaf page bocome empty, free the page 2630 * if the leaf page bocome empty, free the page
@@ -2795,9 +2787,7 @@ xtDeleteUp(tid_t tid, struct inode *ip,
2795 (nextindex - index - 2787 (nextindex - index -
2796 1) << L2XTSLOTSIZE); 2788 1) << L2XTSLOTSIZE);
2797 2789
2798 p->header.nextindex = 2790 le16_add_cpu(&p->header.nextindex, -1);
2799 cpu_to_le16(le16_to_cpu(p->header.nextindex) -
2800 1);
2801 jfs_info("xtDeleteUp(entry): 0x%lx[%d]", 2791 jfs_info("xtDeleteUp(entry): 0x%lx[%d]",
2802 (ulong) parent->bn, index); 2792 (ulong) parent->bn, index);
2803 } 2793 }
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 08226464e563..1ed8bd4de941 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -153,7 +153,7 @@ lockd(struct svc_rqst *rqstp)
153 */ 153 */
154 while ((nlmsvc_users || !signalled()) && nlmsvc_pid == current->pid) { 154 while ((nlmsvc_users || !signalled()) && nlmsvc_pid == current->pid) {
155 long timeout = MAX_SCHEDULE_TIMEOUT; 155 long timeout = MAX_SCHEDULE_TIMEOUT;
156 char buf[RPC_MAX_ADDRBUFLEN]; 156 RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
157 157
158 if (signalled()) { 158 if (signalled()) {
159 flush_signals(current); 159 flush_signals(current);
diff --git a/fs/locks.c b/fs/locks.c
index f36f0e61558d..592faadbcec1 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -127,7 +127,6 @@
127#include <linux/rcupdate.h> 127#include <linux/rcupdate.h>
128#include <linux/pid_namespace.h> 128#include <linux/pid_namespace.h>
129 129
130#include <asm/semaphore.h>
131#include <asm/uaccess.h> 130#include <asm/uaccess.h>
132 131
133#define IS_POSIX(fl) (fl->fl_flags & FL_POSIX) 132#define IS_POSIX(fl) (fl->fl_flags & FL_POSIX)
@@ -1275,13 +1274,13 @@ out:
1275EXPORT_SYMBOL(__break_lease); 1274EXPORT_SYMBOL(__break_lease);
1276 1275
1277/** 1276/**
1278 * lease_get_mtime 1277 * lease_get_mtime - get the last modified time of an inode
1279 * @inode: the inode 1278 * @inode: the inode
1280 * @time: pointer to a timespec which will contain the last modified time 1279 * @time: pointer to a timespec which will contain the last modified time
1281 * 1280 *
1282 * This is to force NFS clients to flush their caches for files with 1281 * This is to force NFS clients to flush their caches for files with
1283 * exclusive leases. The justification is that if someone has an 1282 * exclusive leases. The justification is that if someone has an
1284 * exclusive lease, then they could be modifiying it. 1283 * exclusive lease, then they could be modifying it.
1285 */ 1284 */
1286void lease_get_mtime(struct inode *inode, struct timespec *time) 1285void lease_get_mtime(struct inode *inode, struct timespec *time)
1287{ 1286{
@@ -1801,17 +1800,21 @@ again:
1801 if (error) 1800 if (error)
1802 goto out; 1801 goto out;
1803 1802
1804 for (;;) { 1803 if (filp->f_op && filp->f_op->lock != NULL)
1805 error = vfs_lock_file(filp, cmd, file_lock, NULL); 1804 error = filp->f_op->lock(filp, cmd, file_lock);
1806 if (error != -EAGAIN || cmd == F_SETLK) 1805 else {
1807 break; 1806 for (;;) {
1808 error = wait_event_interruptible(file_lock->fl_wait, 1807 error = posix_lock_file(filp, file_lock, NULL);
1809 !file_lock->fl_next); 1808 if (error != -EAGAIN || cmd == F_SETLK)
1810 if (!error) 1809 break;
1811 continue; 1810 error = wait_event_interruptible(file_lock->fl_wait,
1811 !file_lock->fl_next);
1812 if (!error)
1813 continue;
1812 1814
1813 locks_delete_block(file_lock); 1815 locks_delete_block(file_lock);
1814 break; 1816 break;
1817 }
1815 } 1818 }
1816 1819
1817 /* 1820 /*
@@ -1925,17 +1928,21 @@ again:
1925 if (error) 1928 if (error)
1926 goto out; 1929 goto out;
1927 1930
1928 for (;;) { 1931 if (filp->f_op && filp->f_op->lock != NULL)
1929 error = vfs_lock_file(filp, cmd, file_lock, NULL); 1932 error = filp->f_op->lock(filp, cmd, file_lock);
1930 if (error != -EAGAIN || cmd == F_SETLK64) 1933 else {
1931 break; 1934 for (;;) {
1932 error = wait_event_interruptible(file_lock->fl_wait, 1935 error = posix_lock_file(filp, file_lock, NULL);
1933 !file_lock->fl_next); 1936 if (error != -EAGAIN || cmd == F_SETLK64)
1934 if (!error) 1937 break;
1935 continue; 1938 error = wait_event_interruptible(file_lock->fl_wait,
1939 !file_lock->fl_next);
1940 if (!error)
1941 continue;
1936 1942
1937 locks_delete_block(file_lock); 1943 locks_delete_block(file_lock);
1938 break; 1944 break;
1945 }
1939 } 1946 }
1940 1947
1941 /* 1948 /*
diff --git a/fs/mbcache.c b/fs/mbcache.c
index eb31b73e7d69..ec88ff3d04a9 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -399,11 +399,11 @@ mb_cache_destroy(struct mb_cache *cache)
399 * if no more memory was available. 399 * if no more memory was available.
400 */ 400 */
401struct mb_cache_entry * 401struct mb_cache_entry *
402mb_cache_entry_alloc(struct mb_cache *cache) 402mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags)
403{ 403{
404 struct mb_cache_entry *ce; 404 struct mb_cache_entry *ce;
405 405
406 ce = kmem_cache_alloc(cache->c_entry_cache, GFP_KERNEL); 406 ce = kmem_cache_alloc(cache->c_entry_cache, gfp_flags);
407 if (ce) { 407 if (ce) {
408 atomic_inc(&cache->c_entry_count); 408 atomic_inc(&cache->c_entry_count);
409 INIT_LIST_HEAD(&ce->e_lru_list); 409 INIT_LIST_HEAD(&ce->e_lru_list);
diff --git a/fs/mpage.c b/fs/mpage.c
index 5df564366f36..235e4d3873a8 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -325,16 +325,12 @@ confused:
325} 325}
326 326
327/** 327/**
328 * mpage_readpages - populate an address space with some pages, and 328 * mpage_readpages - populate an address space with some pages & start reads against them
329 * start reads against them.
330 *
331 * @mapping: the address_space 329 * @mapping: the address_space
332 * @pages: The address of a list_head which contains the target pages. These 330 * @pages: The address of a list_head which contains the target pages. These
333 * pages have their ->index populated and are otherwise uninitialised. 331 * pages have their ->index populated and are otherwise uninitialised.
334 *
335 * The page at @pages->prev has the lowest file offset, and reads should be 332 * The page at @pages->prev has the lowest file offset, and reads should be
336 * issued in @pages->prev to @pages->next order. 333 * issued in @pages->prev to @pages->next order.
337 *
338 * @nr_pages: The number of pages at *@pages 334 * @nr_pages: The number of pages at *@pages
339 * @get_block: The filesystem's block mapper function. 335 * @get_block: The filesystem's block mapper function.
340 * 336 *
@@ -360,6 +356,7 @@ confused:
360 * So an mpage read of the first 16 blocks of an ext2 file will cause I/O to be 356 * So an mpage read of the first 16 blocks of an ext2 file will cause I/O to be
361 * submitted in the following order: 357 * submitted in the following order:
362 * 12 0 1 2 3 4 5 6 7 8 9 10 11 13 14 15 16 358 * 12 0 1 2 3 4 5 6 7 8 9 10 11 13 14 15 16
359 *
363 * because the indirect block has to be read to get the mappings of blocks 360 * because the indirect block has to be read to get the mappings of blocks
364 * 13,14,15,16. Obviously, this impacts performance. 361 * 13,14,15,16. Obviously, this impacts performance.
365 * 362 *
@@ -656,9 +653,7 @@ out:
656} 653}
657 654
658/** 655/**
659 * mpage_writepages - walk the list of dirty pages of the given 656 * mpage_writepages - walk the list of dirty pages of the given address space & writepage() all of them
660 * address space and writepage() all of them.
661 *
662 * @mapping: address space structure to write 657 * @mapping: address space structure to write
663 * @wbc: subtract the number of written pages from *@wbc->nr_to_write 658 * @wbc: subtract the number of written pages from *@wbc->nr_to_write
664 * @get_block: the filesystem's block mapper function. 659 * @get_block: the filesystem's block mapper function.
diff --git a/fs/namei.c b/fs/namei.c
index 941c8e8228c0..e179f71bfcb0 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -106,7 +106,7 @@
106 * any extra contention... 106 * any extra contention...
107 */ 107 */
108 108
109static int link_path_walk(const char *name, struct nameidata *nd); 109static int __link_path_walk(const char *name, struct nameidata *nd);
110 110
111/* In order to reduce some races, while at the same time doing additional 111/* In order to reduce some races, while at the same time doing additional
112 * checking and hopefully speeding things up, we copy filenames to the 112 * checking and hopefully speeding things up, we copy filenames to the
@@ -563,6 +563,37 @@ walk_init_root(const char *name, struct nameidata *nd)
563 return 1; 563 return 1;
564} 564}
565 565
566/*
567 * Wrapper to retry pathname resolution whenever the underlying
568 * file system returns an ESTALE.
569 *
570 * Retry the whole path once, forcing real lookup requests
571 * instead of relying on the dcache.
572 */
573static __always_inline int link_path_walk(const char *name, struct nameidata *nd)
574{
575 struct path save = nd->path;
576 int result;
577
578 /* make sure the stuff we saved doesn't go away */
579 dget(save.dentry);
580 mntget(save.mnt);
581
582 result = __link_path_walk(name, nd);
583 if (result == -ESTALE) {
584 /* nd->path had been dropped */
585 nd->path = save;
586 dget(nd->path.dentry);
587 mntget(nd->path.mnt);
588 nd->flags |= LOOKUP_REVAL;
589 result = __link_path_walk(name, nd);
590 }
591
592 path_put(&save);
593
594 return result;
595}
596
566static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link) 597static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link)
567{ 598{
568 int res = 0; 599 int res = 0;
@@ -1020,36 +1051,6 @@ return_err:
1020 return err; 1051 return err;
1021} 1052}
1022 1053
1023/*
1024 * Wrapper to retry pathname resolution whenever the underlying
1025 * file system returns an ESTALE.
1026 *
1027 * Retry the whole path once, forcing real lookup requests
1028 * instead of relying on the dcache.
1029 */
1030static int link_path_walk(const char *name, struct nameidata *nd)
1031{
1032 struct nameidata save = *nd;
1033 int result;
1034
1035 /* make sure the stuff we saved doesn't go away */
1036 dget(save.path.dentry);
1037 mntget(save.path.mnt);
1038
1039 result = __link_path_walk(name, nd);
1040 if (result == -ESTALE) {
1041 *nd = save;
1042 dget(nd->path.dentry);
1043 mntget(nd->path.mnt);
1044 nd->flags |= LOOKUP_REVAL;
1045 result = __link_path_walk(name, nd);
1046 }
1047
1048 path_put(&save.path);
1049
1050 return result;
1051}
1052
1053static int path_walk(const char *name, struct nameidata *nd) 1054static int path_walk(const char *name, struct nameidata *nd)
1054{ 1055{
1055 current->total_link_count = 0; 1056 current->total_link_count = 0;
@@ -1364,13 +1365,13 @@ static int __lookup_one_len(const char *name, struct qstr *this,
1364} 1365}
1365 1366
1366/** 1367/**
1367 * lookup_one_len: filesystem helper to lookup single pathname component 1368 * lookup_one_len - filesystem helper to lookup single pathname component
1368 * @name: pathname component to lookup 1369 * @name: pathname component to lookup
1369 * @base: base directory to lookup from 1370 * @base: base directory to lookup from
1370 * @len: maximum length @len should be interpreted to 1371 * @len: maximum length @len should be interpreted to
1371 * 1372 *
1372 * Note that this routine is purely a helper for filesystem useage and should 1373 * Note that this routine is purely a helper for filesystem usage and should
1373 * not be called by generic code. Also note that by using this function to 1374 * not be called by generic code. Also note that by using this function the
1374 * nameidata argument is passed to the filesystem methods and a filesystem 1375 * nameidata argument is passed to the filesystem methods and a filesystem
1375 * using this helper needs to be prepared for that. 1376 * using this helper needs to be prepared for that.
1376 */ 1377 */
@@ -1622,8 +1623,7 @@ int may_open(struct nameidata *nd, int acc_mode, int flag)
1622 return -EACCES; 1623 return -EACCES;
1623 1624
1624 flag &= ~O_TRUNC; 1625 flag &= ~O_TRUNC;
1625 } else if (IS_RDONLY(inode) && (acc_mode & MAY_WRITE)) 1626 }
1626 return -EROFS;
1627 1627
1628 error = vfs_permission(nd, acc_mode); 1628 error = vfs_permission(nd, acc_mode);
1629 if (error) 1629 if (error)
@@ -1676,7 +1676,12 @@ int may_open(struct nameidata *nd, int acc_mode, int flag)
1676 return 0; 1676 return 0;
1677} 1677}
1678 1678
1679static int open_namei_create(struct nameidata *nd, struct path *path, 1679/*
1680 * Be careful about ever adding any more callers of this
1681 * function. Its flags must be in the namei format, not
1682 * what get passed to sys_open().
1683 */
1684static int __open_namei_create(struct nameidata *nd, struct path *path,
1680 int flag, int mode) 1685 int flag, int mode)
1681{ 1686{
1682 int error; 1687 int error;
@@ -1695,26 +1700,56 @@ static int open_namei_create(struct nameidata *nd, struct path *path,
1695} 1700}
1696 1701
1697/* 1702/*
1698 * open_namei() 1703 * Note that while the flag value (low two bits) for sys_open means:
1704 * 00 - read-only
1705 * 01 - write-only
1706 * 10 - read-write
1707 * 11 - special
1708 * it is changed into
1709 * 00 - no permissions needed
1710 * 01 - read-permission
1711 * 10 - write-permission
1712 * 11 - read-write
1713 * for the internal routines (ie open_namei()/follow_link() etc)
1714 * This is more logical, and also allows the 00 "no perm needed"
1715 * to be used for symlinks (where the permissions are checked
1716 * later).
1699 * 1717 *
1700 * namei for open - this is in fact almost the whole open-routine. 1718*/
1701 * 1719static inline int open_to_namei_flags(int flag)
1702 * Note that the low bits of "flag" aren't the same as in the open 1720{
1703 * system call - they are 00 - no permissions needed 1721 if ((flag+1) & O_ACCMODE)
1704 * 01 - read permission needed 1722 flag++;
1705 * 10 - write permission needed 1723 return flag;
1706 * 11 - read/write permissions needed 1724}
1707 * which is a lot more logical, and also allows the "no perm" needed 1725
1708 * for symlinks (where the permissions are checked later). 1726static int open_will_write_to_fs(int flag, struct inode *inode)
1709 * SMP-safe 1727{
1728 /*
1729 * We'll never write to the fs underlying
1730 * a device file.
1731 */
1732 if (special_file(inode->i_mode))
1733 return 0;
1734 return (flag & O_TRUNC);
1735}
1736
1737/*
1738 * Note that the low bits of the passed in "open_flag"
1739 * are not the same as in the local variable "flag". See
1740 * open_to_namei_flags() for more details.
1710 */ 1741 */
1711int open_namei(int dfd, const char *pathname, int flag, 1742struct file *do_filp_open(int dfd, const char *pathname,
1712 int mode, struct nameidata *nd) 1743 int open_flag, int mode)
1713{ 1744{
1745 struct file *filp;
1746 struct nameidata nd;
1714 int acc_mode, error; 1747 int acc_mode, error;
1715 struct path path; 1748 struct path path;
1716 struct dentry *dir; 1749 struct dentry *dir;
1717 int count = 0; 1750 int count = 0;
1751 int will_write;
1752 int flag = open_to_namei_flags(open_flag);
1718 1753
1719 acc_mode = ACC_MODE(flag); 1754 acc_mode = ACC_MODE(flag);
1720 1755
@@ -1732,18 +1767,19 @@ int open_namei(int dfd, const char *pathname, int flag,
1732 */ 1767 */
1733 if (!(flag & O_CREAT)) { 1768 if (!(flag & O_CREAT)) {
1734 error = path_lookup_open(dfd, pathname, lookup_flags(flag), 1769 error = path_lookup_open(dfd, pathname, lookup_flags(flag),
1735 nd, flag); 1770 &nd, flag);
1736 if (error) 1771 if (error)
1737 return error; 1772 return ERR_PTR(error);
1738 goto ok; 1773 goto ok;
1739 } 1774 }
1740 1775
1741 /* 1776 /*
1742 * Create - we need to know the parent. 1777 * Create - we need to know the parent.
1743 */ 1778 */
1744 error = path_lookup_create(dfd,pathname,LOOKUP_PARENT,nd,flag,mode); 1779 error = path_lookup_create(dfd, pathname, LOOKUP_PARENT,
1780 &nd, flag, mode);
1745 if (error) 1781 if (error)
1746 return error; 1782 return ERR_PTR(error);
1747 1783
1748 /* 1784 /*
1749 * We have the parent and last component. First of all, check 1785 * We have the parent and last component. First of all, check
@@ -1751,14 +1787,14 @@ int open_namei(int dfd, const char *pathname, int flag,
1751 * will not do. 1787 * will not do.
1752 */ 1788 */
1753 error = -EISDIR; 1789 error = -EISDIR;
1754 if (nd->last_type != LAST_NORM || nd->last.name[nd->last.len]) 1790 if (nd.last_type != LAST_NORM || nd.last.name[nd.last.len])
1755 goto exit; 1791 goto exit;
1756 1792
1757 dir = nd->path.dentry; 1793 dir = nd.path.dentry;
1758 nd->flags &= ~LOOKUP_PARENT; 1794 nd.flags &= ~LOOKUP_PARENT;
1759 mutex_lock(&dir->d_inode->i_mutex); 1795 mutex_lock(&dir->d_inode->i_mutex);
1760 path.dentry = lookup_hash(nd); 1796 path.dentry = lookup_hash(&nd);
1761 path.mnt = nd->path.mnt; 1797 path.mnt = nd.path.mnt;
1762 1798
1763do_last: 1799do_last:
1764 error = PTR_ERR(path.dentry); 1800 error = PTR_ERR(path.dentry);
@@ -1767,18 +1803,31 @@ do_last:
1767 goto exit; 1803 goto exit;
1768 } 1804 }
1769 1805
1770 if (IS_ERR(nd->intent.open.file)) { 1806 if (IS_ERR(nd.intent.open.file)) {
1771 mutex_unlock(&dir->d_inode->i_mutex); 1807 error = PTR_ERR(nd.intent.open.file);
1772 error = PTR_ERR(nd->intent.open.file); 1808 goto exit_mutex_unlock;
1773 goto exit_dput;
1774 } 1809 }
1775 1810
1776 /* Negative dentry, just create the file */ 1811 /* Negative dentry, just create the file */
1777 if (!path.dentry->d_inode) { 1812 if (!path.dentry->d_inode) {
1778 error = open_namei_create(nd, &path, flag, mode); 1813 /*
1814 * This write is needed to ensure that a
1815 * ro->rw transition does not occur between
1816 * the time when the file is created and when
1817 * a permanent write count is taken through
1818 * the 'struct file' in nameidata_to_filp().
1819 */
1820 error = mnt_want_write(nd.path.mnt);
1779 if (error) 1821 if (error)
1822 goto exit_mutex_unlock;
1823 error = __open_namei_create(&nd, &path, flag, mode);
1824 if (error) {
1825 mnt_drop_write(nd.path.mnt);
1780 goto exit; 1826 goto exit;
1781 return 0; 1827 }
1828 filp = nameidata_to_filp(&nd, open_flag);
1829 mnt_drop_write(nd.path.mnt);
1830 return filp;
1782 } 1831 }
1783 1832
1784 /* 1833 /*
@@ -1803,23 +1852,52 @@ do_last:
1803 if (path.dentry->d_inode->i_op && path.dentry->d_inode->i_op->follow_link) 1852 if (path.dentry->d_inode->i_op && path.dentry->d_inode->i_op->follow_link)
1804 goto do_link; 1853 goto do_link;
1805 1854
1806 path_to_nameidata(&path, nd); 1855 path_to_nameidata(&path, &nd);
1807 error = -EISDIR; 1856 error = -EISDIR;
1808 if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode)) 1857 if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode))
1809 goto exit; 1858 goto exit;
1810ok: 1859ok:
1811 error = may_open(nd, acc_mode, flag); 1860 /*
1812 if (error) 1861 * Consider:
1862 * 1. may_open() truncates a file
1863 * 2. a rw->ro mount transition occurs
1864 * 3. nameidata_to_filp() fails due to
1865 * the ro mount.
1866 * That would be inconsistent, and should
1867 * be avoided. Taking this mnt write here
1868 * ensures that (2) can not occur.
1869 */
1870 will_write = open_will_write_to_fs(flag, nd.path.dentry->d_inode);
1871 if (will_write) {
1872 error = mnt_want_write(nd.path.mnt);
1873 if (error)
1874 goto exit;
1875 }
1876 error = may_open(&nd, acc_mode, flag);
1877 if (error) {
1878 if (will_write)
1879 mnt_drop_write(nd.path.mnt);
1813 goto exit; 1880 goto exit;
1814 return 0; 1881 }
1882 filp = nameidata_to_filp(&nd, open_flag);
1883 /*
1884 * It is now safe to drop the mnt write
1885 * because the filp has had a write taken
1886 * on its behalf.
1887 */
1888 if (will_write)
1889 mnt_drop_write(nd.path.mnt);
1890 return filp;
1815 1891
1892exit_mutex_unlock:
1893 mutex_unlock(&dir->d_inode->i_mutex);
1816exit_dput: 1894exit_dput:
1817 path_put_conditional(&path, nd); 1895 path_put_conditional(&path, &nd);
1818exit: 1896exit:
1819 if (!IS_ERR(nd->intent.open.file)) 1897 if (!IS_ERR(nd.intent.open.file))
1820 release_open_intent(nd); 1898 release_open_intent(&nd);
1821 path_put(&nd->path); 1899 path_put(&nd.path);
1822 return error; 1900 return ERR_PTR(error);
1823 1901
1824do_link: 1902do_link:
1825 error = -ELOOP; 1903 error = -ELOOP;
@@ -1835,43 +1913,60 @@ do_link:
1835 * stored in nd->last.name and we will have to putname() it when we 1913 * stored in nd->last.name and we will have to putname() it when we
1836 * are done. Procfs-like symlinks just set LAST_BIND. 1914 * are done. Procfs-like symlinks just set LAST_BIND.
1837 */ 1915 */
1838 nd->flags |= LOOKUP_PARENT; 1916 nd.flags |= LOOKUP_PARENT;
1839 error = security_inode_follow_link(path.dentry, nd); 1917 error = security_inode_follow_link(path.dentry, &nd);
1840 if (error) 1918 if (error)
1841 goto exit_dput; 1919 goto exit_dput;
1842 error = __do_follow_link(&path, nd); 1920 error = __do_follow_link(&path, &nd);
1843 if (error) { 1921 if (error) {
1844 /* Does someone understand code flow here? Or it is only 1922 /* Does someone understand code flow here? Or it is only
1845 * me so stupid? Anathema to whoever designed this non-sense 1923 * me so stupid? Anathema to whoever designed this non-sense
1846 * with "intent.open". 1924 * with "intent.open".
1847 */ 1925 */
1848 release_open_intent(nd); 1926 release_open_intent(&nd);
1849 return error; 1927 return ERR_PTR(error);
1850 } 1928 }
1851 nd->flags &= ~LOOKUP_PARENT; 1929 nd.flags &= ~LOOKUP_PARENT;
1852 if (nd->last_type == LAST_BIND) 1930 if (nd.last_type == LAST_BIND)
1853 goto ok; 1931 goto ok;
1854 error = -EISDIR; 1932 error = -EISDIR;
1855 if (nd->last_type != LAST_NORM) 1933 if (nd.last_type != LAST_NORM)
1856 goto exit; 1934 goto exit;
1857 if (nd->last.name[nd->last.len]) { 1935 if (nd.last.name[nd.last.len]) {
1858 __putname(nd->last.name); 1936 __putname(nd.last.name);
1859 goto exit; 1937 goto exit;
1860 } 1938 }
1861 error = -ELOOP; 1939 error = -ELOOP;
1862 if (count++==32) { 1940 if (count++==32) {
1863 __putname(nd->last.name); 1941 __putname(nd.last.name);
1864 goto exit; 1942 goto exit;
1865 } 1943 }
1866 dir = nd->path.dentry; 1944 dir = nd.path.dentry;
1867 mutex_lock(&dir->d_inode->i_mutex); 1945 mutex_lock(&dir->d_inode->i_mutex);
1868 path.dentry = lookup_hash(nd); 1946 path.dentry = lookup_hash(&nd);
1869 path.mnt = nd->path.mnt; 1947 path.mnt = nd.path.mnt;
1870 __putname(nd->last.name); 1948 __putname(nd.last.name);
1871 goto do_last; 1949 goto do_last;
1872} 1950}
1873 1951
1874/** 1952/**
1953 * filp_open - open file and return file pointer
1954 *
1955 * @filename: path to open
1956 * @flags: open flags as per the open(2) second argument
1957 * @mode: mode for the new file if O_CREAT is set, else ignored
1958 *
1959 * This is the helper to open a file from kernelspace if you really
1960 * have to. But in generally you should not do this, so please move
1961 * along, nothing to see here..
1962 */
1963struct file *filp_open(const char *filename, int flags, int mode)
1964{
1965 return do_filp_open(AT_FDCWD, filename, flags, mode);
1966}
1967EXPORT_SYMBOL(filp_open);
1968
1969/**
1875 * lookup_create - lookup a dentry, creating it if it doesn't exist 1970 * lookup_create - lookup a dentry, creating it if it doesn't exist
1876 * @nd: nameidata info 1971 * @nd: nameidata info
1877 * @is_dir: directory flag 1972 * @is_dir: directory flag
@@ -1944,6 +2039,23 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
1944 return error; 2039 return error;
1945} 2040}
1946 2041
2042static int may_mknod(mode_t mode)
2043{
2044 switch (mode & S_IFMT) {
2045 case S_IFREG:
2046 case S_IFCHR:
2047 case S_IFBLK:
2048 case S_IFIFO:
2049 case S_IFSOCK:
2050 case 0: /* zero mode translates to S_IFREG */
2051 return 0;
2052 case S_IFDIR:
2053 return -EPERM;
2054 default:
2055 return -EINVAL;
2056 }
2057}
2058
1947asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode, 2059asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode,
1948 unsigned dev) 2060 unsigned dev)
1949{ 2061{
@@ -1962,12 +2074,19 @@ asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode,
1962 if (error) 2074 if (error)
1963 goto out; 2075 goto out;
1964 dentry = lookup_create(&nd, 0); 2076 dentry = lookup_create(&nd, 0);
1965 error = PTR_ERR(dentry); 2077 if (IS_ERR(dentry)) {
1966 2078 error = PTR_ERR(dentry);
2079 goto out_unlock;
2080 }
1967 if (!IS_POSIXACL(nd.path.dentry->d_inode)) 2081 if (!IS_POSIXACL(nd.path.dentry->d_inode))
1968 mode &= ~current->fs->umask; 2082 mode &= ~current->fs->umask;
1969 if (!IS_ERR(dentry)) { 2083 error = may_mknod(mode);
1970 switch (mode & S_IFMT) { 2084 if (error)
2085 goto out_dput;
2086 error = mnt_want_write(nd.path.mnt);
2087 if (error)
2088 goto out_dput;
2089 switch (mode & S_IFMT) {
1971 case 0: case S_IFREG: 2090 case 0: case S_IFREG:
1972 error = vfs_create(nd.path.dentry->d_inode,dentry,mode,&nd); 2091 error = vfs_create(nd.path.dentry->d_inode,dentry,mode,&nd);
1973 break; 2092 break;
@@ -1978,14 +2097,11 @@ asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode,
1978 case S_IFIFO: case S_IFSOCK: 2097 case S_IFIFO: case S_IFSOCK:
1979 error = vfs_mknod(nd.path.dentry->d_inode,dentry,mode,0); 2098 error = vfs_mknod(nd.path.dentry->d_inode,dentry,mode,0);
1980 break; 2099 break;
1981 case S_IFDIR:
1982 error = -EPERM;
1983 break;
1984 default:
1985 error = -EINVAL;
1986 }
1987 dput(dentry);
1988 } 2100 }
2101 mnt_drop_write(nd.path.mnt);
2102out_dput:
2103 dput(dentry);
2104out_unlock:
1989 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 2105 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
1990 path_put(&nd.path); 2106 path_put(&nd.path);
1991out: 2107out:
@@ -2043,7 +2159,12 @@ asmlinkage long sys_mkdirat(int dfd, const char __user *pathname, int mode)
2043 2159
2044 if (!IS_POSIXACL(nd.path.dentry->d_inode)) 2160 if (!IS_POSIXACL(nd.path.dentry->d_inode))
2045 mode &= ~current->fs->umask; 2161 mode &= ~current->fs->umask;
2162 error = mnt_want_write(nd.path.mnt);
2163 if (error)
2164 goto out_dput;
2046 error = vfs_mkdir(nd.path.dentry->d_inode, dentry, mode); 2165 error = vfs_mkdir(nd.path.dentry->d_inode, dentry, mode);
2166 mnt_drop_write(nd.path.mnt);
2167out_dput:
2047 dput(dentry); 2168 dput(dentry);
2048out_unlock: 2169out_unlock:
2049 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 2170 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
@@ -2150,7 +2271,12 @@ static long do_rmdir(int dfd, const char __user *pathname)
2150 error = PTR_ERR(dentry); 2271 error = PTR_ERR(dentry);
2151 if (IS_ERR(dentry)) 2272 if (IS_ERR(dentry))
2152 goto exit2; 2273 goto exit2;
2274 error = mnt_want_write(nd.path.mnt);
2275 if (error)
2276 goto exit3;
2153 error = vfs_rmdir(nd.path.dentry->d_inode, dentry); 2277 error = vfs_rmdir(nd.path.dentry->d_inode, dentry);
2278 mnt_drop_write(nd.path.mnt);
2279exit3:
2154 dput(dentry); 2280 dput(dentry);
2155exit2: 2281exit2:
2156 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 2282 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
@@ -2231,7 +2357,11 @@ static long do_unlinkat(int dfd, const char __user *pathname)
2231 inode = dentry->d_inode; 2357 inode = dentry->d_inode;
2232 if (inode) 2358 if (inode)
2233 atomic_inc(&inode->i_count); 2359 atomic_inc(&inode->i_count);
2360 error = mnt_want_write(nd.path.mnt);
2361 if (error)
2362 goto exit2;
2234 error = vfs_unlink(nd.path.dentry->d_inode, dentry); 2363 error = vfs_unlink(nd.path.dentry->d_inode, dentry);
2364 mnt_drop_write(nd.path.mnt);
2235 exit2: 2365 exit2:
2236 dput(dentry); 2366 dput(dentry);
2237 } 2367 }
@@ -2312,7 +2442,12 @@ asmlinkage long sys_symlinkat(const char __user *oldname,
2312 if (IS_ERR(dentry)) 2442 if (IS_ERR(dentry))
2313 goto out_unlock; 2443 goto out_unlock;
2314 2444
2445 error = mnt_want_write(nd.path.mnt);
2446 if (error)
2447 goto out_dput;
2315 error = vfs_symlink(nd.path.dentry->d_inode, dentry, from, S_IALLUGO); 2448 error = vfs_symlink(nd.path.dentry->d_inode, dentry, from, S_IALLUGO);
2449 mnt_drop_write(nd.path.mnt);
2450out_dput:
2316 dput(dentry); 2451 dput(dentry);
2317out_unlock: 2452out_unlock:
2318 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 2453 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
@@ -2407,7 +2542,12 @@ asmlinkage long sys_linkat(int olddfd, const char __user *oldname,
2407 error = PTR_ERR(new_dentry); 2542 error = PTR_ERR(new_dentry);
2408 if (IS_ERR(new_dentry)) 2543 if (IS_ERR(new_dentry))
2409 goto out_unlock; 2544 goto out_unlock;
2545 error = mnt_want_write(nd.path.mnt);
2546 if (error)
2547 goto out_dput;
2410 error = vfs_link(old_nd.path.dentry, nd.path.dentry->d_inode, new_dentry); 2548 error = vfs_link(old_nd.path.dentry, nd.path.dentry->d_inode, new_dentry);
2549 mnt_drop_write(nd.path.mnt);
2550out_dput:
2411 dput(new_dentry); 2551 dput(new_dentry);
2412out_unlock: 2552out_unlock:
2413 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 2553 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
@@ -2633,8 +2773,12 @@ static int do_rename(int olddfd, const char *oldname,
2633 if (new_dentry == trap) 2773 if (new_dentry == trap)
2634 goto exit5; 2774 goto exit5;
2635 2775
2776 error = mnt_want_write(oldnd.path.mnt);
2777 if (error)
2778 goto exit5;
2636 error = vfs_rename(old_dir->d_inode, old_dentry, 2779 error = vfs_rename(old_dir->d_inode, old_dentry,
2637 new_dir->d_inode, new_dentry); 2780 new_dir->d_inode, new_dentry);
2781 mnt_drop_write(oldnd.path.mnt);
2638exit5: 2782exit5:
2639 dput(new_dentry); 2783 dput(new_dentry);
2640exit4: 2784exit4:
diff --git a/fs/namespace.c b/fs/namespace.c
index 7953c96a2071..678f7ce060f2 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -17,6 +17,7 @@
17#include <linux/quotaops.h> 17#include <linux/quotaops.h>
18#include <linux/acct.h> 18#include <linux/acct.h>
19#include <linux/capability.h> 19#include <linux/capability.h>
20#include <linux/cpumask.h>
20#include <linux/module.h> 21#include <linux/module.h>
21#include <linux/sysfs.h> 22#include <linux/sysfs.h>
22#include <linux/seq_file.h> 23#include <linux/seq_file.h>
@@ -55,6 +56,8 @@ static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
55 return tmp & (HASH_SIZE - 1); 56 return tmp & (HASH_SIZE - 1);
56} 57}
57 58
59#define MNT_WRITER_UNDERFLOW_LIMIT -(1<<16)
60
58struct vfsmount *alloc_vfsmnt(const char *name) 61struct vfsmount *alloc_vfsmnt(const char *name)
59{ 62{
60 struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL); 63 struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
@@ -68,6 +71,7 @@ struct vfsmount *alloc_vfsmnt(const char *name)
68 INIT_LIST_HEAD(&mnt->mnt_share); 71 INIT_LIST_HEAD(&mnt->mnt_share);
69 INIT_LIST_HEAD(&mnt->mnt_slave_list); 72 INIT_LIST_HEAD(&mnt->mnt_slave_list);
70 INIT_LIST_HEAD(&mnt->mnt_slave); 73 INIT_LIST_HEAD(&mnt->mnt_slave);
74 atomic_set(&mnt->__mnt_writers, 0);
71 if (name) { 75 if (name) {
72 int size = strlen(name) + 1; 76 int size = strlen(name) + 1;
73 char *newname = kmalloc(size, GFP_KERNEL); 77 char *newname = kmalloc(size, GFP_KERNEL);
@@ -80,6 +84,263 @@ struct vfsmount *alloc_vfsmnt(const char *name)
80 return mnt; 84 return mnt;
81} 85}
82 86
87/*
88 * Most r/o checks on a fs are for operations that take
89 * discrete amounts of time, like a write() or unlink().
90 * We must keep track of when those operations start
91 * (for permission checks) and when they end, so that
92 * we can determine when writes are able to occur to
93 * a filesystem.
94 */
95/*
96 * __mnt_is_readonly: check whether a mount is read-only
97 * @mnt: the mount to check for its write status
98 *
99 * This shouldn't be used directly ouside of the VFS.
100 * It does not guarantee that the filesystem will stay
101 * r/w, just that it is right *now*. This can not and
102 * should not be used in place of IS_RDONLY(inode).
103 * mnt_want/drop_write() will _keep_ the filesystem
104 * r/w.
105 */
106int __mnt_is_readonly(struct vfsmount *mnt)
107{
108 if (mnt->mnt_flags & MNT_READONLY)
109 return 1;
110 if (mnt->mnt_sb->s_flags & MS_RDONLY)
111 return 1;
112 return 0;
113}
114EXPORT_SYMBOL_GPL(__mnt_is_readonly);
115
116struct mnt_writer {
117 /*
118 * If holding multiple instances of this lock, they
119 * must be ordered by cpu number.
120 */
121 spinlock_t lock;
122 struct lock_class_key lock_class; /* compiles out with !lockdep */
123 unsigned long count;
124 struct vfsmount *mnt;
125} ____cacheline_aligned_in_smp;
126static DEFINE_PER_CPU(struct mnt_writer, mnt_writers);
127
128static int __init init_mnt_writers(void)
129{
130 int cpu;
131 for_each_possible_cpu(cpu) {
132 struct mnt_writer *writer = &per_cpu(mnt_writers, cpu);
133 spin_lock_init(&writer->lock);
134 lockdep_set_class(&writer->lock, &writer->lock_class);
135 writer->count = 0;
136 }
137 return 0;
138}
139fs_initcall(init_mnt_writers);
140
141static void unlock_mnt_writers(void)
142{
143 int cpu;
144 struct mnt_writer *cpu_writer;
145
146 for_each_possible_cpu(cpu) {
147 cpu_writer = &per_cpu(mnt_writers, cpu);
148 spin_unlock(&cpu_writer->lock);
149 }
150}
151
152static inline void __clear_mnt_count(struct mnt_writer *cpu_writer)
153{
154 if (!cpu_writer->mnt)
155 return;
156 /*
157 * This is in case anyone ever leaves an invalid,
158 * old ->mnt and a count of 0.
159 */
160 if (!cpu_writer->count)
161 return;
162 atomic_add(cpu_writer->count, &cpu_writer->mnt->__mnt_writers);
163 cpu_writer->count = 0;
164}
165 /*
166 * must hold cpu_writer->lock
167 */
168static inline void use_cpu_writer_for_mount(struct mnt_writer *cpu_writer,
169 struct vfsmount *mnt)
170{
171 if (cpu_writer->mnt == mnt)
172 return;
173 __clear_mnt_count(cpu_writer);
174 cpu_writer->mnt = mnt;
175}
176
177/*
178 * Most r/o checks on a fs are for operations that take
179 * discrete amounts of time, like a write() or unlink().
180 * We must keep track of when those operations start
181 * (for permission checks) and when they end, so that
182 * we can determine when writes are able to occur to
183 * a filesystem.
184 */
185/**
186 * mnt_want_write - get write access to a mount
187 * @mnt: the mount on which to take a write
188 *
189 * This tells the low-level filesystem that a write is
190 * about to be performed to it, and makes sure that
191 * writes are allowed before returning success. When
192 * the write operation is finished, mnt_drop_write()
193 * must be called. This is effectively a refcount.
194 */
195int mnt_want_write(struct vfsmount *mnt)
196{
197 int ret = 0;
198 struct mnt_writer *cpu_writer;
199
200 cpu_writer = &get_cpu_var(mnt_writers);
201 spin_lock(&cpu_writer->lock);
202 if (__mnt_is_readonly(mnt)) {
203 ret = -EROFS;
204 goto out;
205 }
206 use_cpu_writer_for_mount(cpu_writer, mnt);
207 cpu_writer->count++;
208out:
209 spin_unlock(&cpu_writer->lock);
210 put_cpu_var(mnt_writers);
211 return ret;
212}
213EXPORT_SYMBOL_GPL(mnt_want_write);
214
215static void lock_mnt_writers(void)
216{
217 int cpu;
218 struct mnt_writer *cpu_writer;
219
220 for_each_possible_cpu(cpu) {
221 cpu_writer = &per_cpu(mnt_writers, cpu);
222 spin_lock(&cpu_writer->lock);
223 __clear_mnt_count(cpu_writer);
224 cpu_writer->mnt = NULL;
225 }
226}
227
228/*
229 * These per-cpu write counts are not guaranteed to have
230 * matched increments and decrements on any given cpu.
231 * A file open()ed for write on one cpu and close()d on
232 * another cpu will imbalance this count. Make sure it
233 * does not get too far out of whack.
234 */
235static void handle_write_count_underflow(struct vfsmount *mnt)
236{
237 if (atomic_read(&mnt->__mnt_writers) >=
238 MNT_WRITER_UNDERFLOW_LIMIT)
239 return;
240 /*
241 * It isn't necessary to hold all of the locks
242 * at the same time, but doing it this way makes
243 * us share a lot more code.
244 */
245 lock_mnt_writers();
246 /*
247 * vfsmount_lock is for mnt_flags.
248 */
249 spin_lock(&vfsmount_lock);
250 /*
251 * If coalescing the per-cpu writer counts did not
252 * get us back to a positive writer count, we have
253 * a bug.
254 */
255 if ((atomic_read(&mnt->__mnt_writers) < 0) &&
256 !(mnt->mnt_flags & MNT_IMBALANCED_WRITE_COUNT)) {
257 printk(KERN_DEBUG "leak detected on mount(%p) writers "
258 "count: %d\n",
259 mnt, atomic_read(&mnt->__mnt_writers));
260 WARN_ON(1);
261 /* use the flag to keep the dmesg spam down */
262 mnt->mnt_flags |= MNT_IMBALANCED_WRITE_COUNT;
263 }
264 spin_unlock(&vfsmount_lock);
265 unlock_mnt_writers();
266}
267
268/**
269 * mnt_drop_write - give up write access to a mount
270 * @mnt: the mount on which to give up write access
271 *
272 * Tells the low-level filesystem that we are done
273 * performing writes to it. Must be matched with
274 * mnt_want_write() call above.
275 */
276void mnt_drop_write(struct vfsmount *mnt)
277{
278 int must_check_underflow = 0;
279 struct mnt_writer *cpu_writer;
280
281 cpu_writer = &get_cpu_var(mnt_writers);
282 spin_lock(&cpu_writer->lock);
283
284 use_cpu_writer_for_mount(cpu_writer, mnt);
285 if (cpu_writer->count > 0) {
286 cpu_writer->count--;
287 } else {
288 must_check_underflow = 1;
289 atomic_dec(&mnt->__mnt_writers);
290 }
291
292 spin_unlock(&cpu_writer->lock);
293 /*
294 * Logically, we could call this each time,
295 * but the __mnt_writers cacheline tends to
296 * be cold, and makes this expensive.
297 */
298 if (must_check_underflow)
299 handle_write_count_underflow(mnt);
300 /*
301 * This could be done right after the spinlock
302 * is taken because the spinlock keeps us on
303 * the cpu, and disables preemption. However,
304 * putting it here bounds the amount that
305 * __mnt_writers can underflow. Without it,
306 * we could theoretically wrap __mnt_writers.
307 */
308 put_cpu_var(mnt_writers);
309}
310EXPORT_SYMBOL_GPL(mnt_drop_write);
311
312static int mnt_make_readonly(struct vfsmount *mnt)
313{
314 int ret = 0;
315
316 lock_mnt_writers();
317 /*
318 * With all the locks held, this value is stable
319 */
320 if (atomic_read(&mnt->__mnt_writers) > 0) {
321 ret = -EBUSY;
322 goto out;
323 }
324 /*
325 * nobody can do a successful mnt_want_write() with all
326 * of the counts in MNT_DENIED_WRITE and the locks held.
327 */
328 spin_lock(&vfsmount_lock);
329 if (!ret)
330 mnt->mnt_flags |= MNT_READONLY;
331 spin_unlock(&vfsmount_lock);
332out:
333 unlock_mnt_writers();
334 return ret;
335}
336
337static void __mnt_unmake_readonly(struct vfsmount *mnt)
338{
339 spin_lock(&vfsmount_lock);
340 mnt->mnt_flags &= ~MNT_READONLY;
341 spin_unlock(&vfsmount_lock);
342}
343
83int simple_set_mnt(struct vfsmount *mnt, struct super_block *sb) 344int simple_set_mnt(struct vfsmount *mnt, struct super_block *sb)
84{ 345{
85 mnt->mnt_sb = sb; 346 mnt->mnt_sb = sb;
@@ -155,15 +416,15 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns)
155 } 416 }
156} 417}
157 418
158static void detach_mnt(struct vfsmount *mnt, struct nameidata *old_nd) 419static void detach_mnt(struct vfsmount *mnt, struct path *old_path)
159{ 420{
160 old_nd->path.dentry = mnt->mnt_mountpoint; 421 old_path->dentry = mnt->mnt_mountpoint;
161 old_nd->path.mnt = mnt->mnt_parent; 422 old_path->mnt = mnt->mnt_parent;
162 mnt->mnt_parent = mnt; 423 mnt->mnt_parent = mnt;
163 mnt->mnt_mountpoint = mnt->mnt_root; 424 mnt->mnt_mountpoint = mnt->mnt_root;
164 list_del_init(&mnt->mnt_child); 425 list_del_init(&mnt->mnt_child);
165 list_del_init(&mnt->mnt_hash); 426 list_del_init(&mnt->mnt_hash);
166 old_nd->path.dentry->d_mounted--; 427 old_path->dentry->d_mounted--;
167} 428}
168 429
169void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry, 430void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry,
@@ -174,12 +435,12 @@ void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry,
174 dentry->d_mounted++; 435 dentry->d_mounted++;
175} 436}
176 437
177static void attach_mnt(struct vfsmount *mnt, struct nameidata *nd) 438static void attach_mnt(struct vfsmount *mnt, struct path *path)
178{ 439{
179 mnt_set_mountpoint(nd->path.mnt, nd->path.dentry, mnt); 440 mnt_set_mountpoint(path->mnt, path->dentry, mnt);
180 list_add_tail(&mnt->mnt_hash, mount_hashtable + 441 list_add_tail(&mnt->mnt_hash, mount_hashtable +
181 hash(nd->path.mnt, nd->path.dentry)); 442 hash(path->mnt, path->dentry));
182 list_add_tail(&mnt->mnt_child, &nd->path.mnt->mnt_mounts); 443 list_add_tail(&mnt->mnt_child, &path->mnt->mnt_mounts);
183} 444}
184 445
185/* 446/*
@@ -262,10 +523,8 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
262 /* stick the duplicate mount on the same expiry list 523 /* stick the duplicate mount on the same expiry list
263 * as the original if that was on one */ 524 * as the original if that was on one */
264 if (flag & CL_EXPIRE) { 525 if (flag & CL_EXPIRE) {
265 spin_lock(&vfsmount_lock);
266 if (!list_empty(&old->mnt_expire)) 526 if (!list_empty(&old->mnt_expire))
267 list_add(&mnt->mnt_expire, &old->mnt_expire); 527 list_add(&mnt->mnt_expire, &old->mnt_expire);
268 spin_unlock(&vfsmount_lock);
269 } 528 }
270 } 529 }
271 return mnt; 530 return mnt;
@@ -273,7 +532,36 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
273 532
274static inline void __mntput(struct vfsmount *mnt) 533static inline void __mntput(struct vfsmount *mnt)
275{ 534{
535 int cpu;
276 struct super_block *sb = mnt->mnt_sb; 536 struct super_block *sb = mnt->mnt_sb;
537 /*
538 * We don't have to hold all of the locks at the
539 * same time here because we know that we're the
540 * last reference to mnt and that no new writers
541 * can come in.
542 */
543 for_each_possible_cpu(cpu) {
544 struct mnt_writer *cpu_writer = &per_cpu(mnt_writers, cpu);
545 if (cpu_writer->mnt != mnt)
546 continue;
547 spin_lock(&cpu_writer->lock);
548 atomic_add(cpu_writer->count, &mnt->__mnt_writers);
549 cpu_writer->count = 0;
550 /*
551 * Might as well do this so that no one
552 * ever sees the pointer and expects
553 * it to be valid.
554 */
555 cpu_writer->mnt = NULL;
556 spin_unlock(&cpu_writer->lock);
557 }
558 /*
559 * This probably indicates that somebody messed
560 * up a mnt_want/drop_write() pair. If this
561 * happens, the filesystem was probably unable
562 * to make r/w->r/o transitions.
563 */
564 WARN_ON(atomic_read(&mnt->__mnt_writers));
277 dput(mnt->mnt_root); 565 dput(mnt->mnt_root);
278 free_vfsmnt(mnt); 566 free_vfsmnt(mnt);
279 deactivate_super(sb); 567 deactivate_super(sb);
@@ -419,7 +707,7 @@ static int show_vfsmnt(struct seq_file *m, void *v)
419 seq_putc(m, '.'); 707 seq_putc(m, '.');
420 mangle(m, mnt->mnt_sb->s_subtype); 708 mangle(m, mnt->mnt_sb->s_subtype);
421 } 709 }
422 seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? " ro" : " rw"); 710 seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw");
423 for (fs_infop = fs_info; fs_infop->flag; fs_infop++) { 711 for (fs_infop = fs_info; fs_infop->flag; fs_infop++) {
424 if (mnt->mnt_sb->s_flags & fs_infop->flag) 712 if (mnt->mnt_sb->s_flags & fs_infop->flag)
425 seq_puts(m, fs_infop->str); 713 seq_puts(m, fs_infop->str);
@@ -548,6 +836,7 @@ void release_mounts(struct list_head *head)
548 m = mnt->mnt_parent; 836 m = mnt->mnt_parent;
549 mnt->mnt_mountpoint = mnt->mnt_root; 837 mnt->mnt_mountpoint = mnt->mnt_root;
550 mnt->mnt_parent = mnt; 838 mnt->mnt_parent = mnt;
839 m->mnt_ghosts--;
551 spin_unlock(&vfsmount_lock); 840 spin_unlock(&vfsmount_lock);
552 dput(dentry); 841 dput(dentry);
553 mntput(m); 842 mntput(m);
@@ -572,12 +861,16 @@ void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
572 __touch_mnt_namespace(p->mnt_ns); 861 __touch_mnt_namespace(p->mnt_ns);
573 p->mnt_ns = NULL; 862 p->mnt_ns = NULL;
574 list_del_init(&p->mnt_child); 863 list_del_init(&p->mnt_child);
575 if (p->mnt_parent != p) 864 if (p->mnt_parent != p) {
865 p->mnt_parent->mnt_ghosts++;
576 p->mnt_mountpoint->d_mounted--; 866 p->mnt_mountpoint->d_mounted--;
867 }
577 change_mnt_propagation(p, MS_PRIVATE); 868 change_mnt_propagation(p, MS_PRIVATE);
578 } 869 }
579} 870}
580 871
872static void shrink_submounts(struct vfsmount *mnt, struct list_head *umounts);
873
581static int do_umount(struct vfsmount *mnt, int flags) 874static int do_umount(struct vfsmount *mnt, int flags)
582{ 875{
583 struct super_block *sb = mnt->mnt_sb; 876 struct super_block *sb = mnt->mnt_sb;
@@ -650,6 +943,9 @@ static int do_umount(struct vfsmount *mnt, int flags)
650 spin_lock(&vfsmount_lock); 943 spin_lock(&vfsmount_lock);
651 event++; 944 event++;
652 945
946 if (!(flags & MNT_DETACH))
947 shrink_submounts(mnt, &umount_list);
948
653 retval = -EBUSY; 949 retval = -EBUSY;
654 if (flags & MNT_DETACH || !propagate_mount_busy(mnt, 2)) { 950 if (flags & MNT_DETACH || !propagate_mount_busy(mnt, 2)) {
655 if (!list_empty(&mnt->mnt_list)) 951 if (!list_empty(&mnt->mnt_list))
@@ -744,7 +1040,7 @@ struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry,
744 int flag) 1040 int flag)
745{ 1041{
746 struct vfsmount *res, *p, *q, *r, *s; 1042 struct vfsmount *res, *p, *q, *r, *s;
747 struct nameidata nd; 1043 struct path path;
748 1044
749 if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(mnt)) 1045 if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(mnt))
750 return NULL; 1046 return NULL;
@@ -769,14 +1065,14 @@ struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry,
769 q = q->mnt_parent; 1065 q = q->mnt_parent;
770 } 1066 }
771 p = s; 1067 p = s;
772 nd.path.mnt = q; 1068 path.mnt = q;
773 nd.path.dentry = p->mnt_mountpoint; 1069 path.dentry = p->mnt_mountpoint;
774 q = clone_mnt(p, p->mnt_root, flag); 1070 q = clone_mnt(p, p->mnt_root, flag);
775 if (!q) 1071 if (!q)
776 goto Enomem; 1072 goto Enomem;
777 spin_lock(&vfsmount_lock); 1073 spin_lock(&vfsmount_lock);
778 list_add_tail(&q->mnt_list, &res->mnt_list); 1074 list_add_tail(&q->mnt_list, &res->mnt_list);
779 attach_mnt(q, &nd); 1075 attach_mnt(q, &path);
780 spin_unlock(&vfsmount_lock); 1076 spin_unlock(&vfsmount_lock);
781 } 1077 }
782 } 1078 }
@@ -876,11 +1172,11 @@ void drop_collected_mounts(struct vfsmount *mnt)
876 * in allocations. 1172 * in allocations.
877 */ 1173 */
878static int attach_recursive_mnt(struct vfsmount *source_mnt, 1174static int attach_recursive_mnt(struct vfsmount *source_mnt,
879 struct nameidata *nd, struct nameidata *parent_nd) 1175 struct path *path, struct path *parent_path)
880{ 1176{
881 LIST_HEAD(tree_list); 1177 LIST_HEAD(tree_list);
882 struct vfsmount *dest_mnt = nd->path.mnt; 1178 struct vfsmount *dest_mnt = path->mnt;
883 struct dentry *dest_dentry = nd->path.dentry; 1179 struct dentry *dest_dentry = path->dentry;
884 struct vfsmount *child, *p; 1180 struct vfsmount *child, *p;
885 1181
886 if (propagate_mnt(dest_mnt, dest_dentry, source_mnt, &tree_list)) 1182 if (propagate_mnt(dest_mnt, dest_dentry, source_mnt, &tree_list))
@@ -892,9 +1188,9 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt,
892 } 1188 }
893 1189
894 spin_lock(&vfsmount_lock); 1190 spin_lock(&vfsmount_lock);
895 if (parent_nd) { 1191 if (parent_path) {
896 detach_mnt(source_mnt, parent_nd); 1192 detach_mnt(source_mnt, parent_path);
897 attach_mnt(source_mnt, nd); 1193 attach_mnt(source_mnt, path);
898 touch_mnt_namespace(current->nsproxy->mnt_ns); 1194 touch_mnt_namespace(current->nsproxy->mnt_ns);
899 } else { 1195 } else {
900 mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt); 1196 mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt);
@@ -930,7 +1226,7 @@ static int graft_tree(struct vfsmount *mnt, struct nameidata *nd)
930 1226
931 err = -ENOENT; 1227 err = -ENOENT;
932 if (IS_ROOT(nd->path.dentry) || !d_unhashed(nd->path.dentry)) 1228 if (IS_ROOT(nd->path.dentry) || !d_unhashed(nd->path.dentry))
933 err = attach_recursive_mnt(mnt, nd, NULL); 1229 err = attach_recursive_mnt(mnt, &nd->path, NULL);
934out_unlock: 1230out_unlock:
935 mutex_unlock(&nd->path.dentry->d_inode->i_mutex); 1231 mutex_unlock(&nd->path.dentry->d_inode->i_mutex);
936 if (!err) 1232 if (!err)
@@ -1013,6 +1309,23 @@ out:
1013 return err; 1309 return err;
1014} 1310}
1015 1311
1312static int change_mount_flags(struct vfsmount *mnt, int ms_flags)
1313{
1314 int error = 0;
1315 int readonly_request = 0;
1316
1317 if (ms_flags & MS_RDONLY)
1318 readonly_request = 1;
1319 if (readonly_request == __mnt_is_readonly(mnt))
1320 return 0;
1321
1322 if (readonly_request)
1323 error = mnt_make_readonly(mnt);
1324 else
1325 __mnt_unmake_readonly(mnt);
1326 return error;
1327}
1328
1016/* 1329/*
1017 * change filesystem flags. dir should be a physical root of filesystem. 1330 * change filesystem flags. dir should be a physical root of filesystem.
1018 * If you've mounted a non-root directory somewhere and want to do remount 1331 * If you've mounted a non-root directory somewhere and want to do remount
@@ -1035,7 +1348,10 @@ static noinline int do_remount(struct nameidata *nd, int flags, int mnt_flags,
1035 return -EINVAL; 1348 return -EINVAL;
1036 1349
1037 down_write(&sb->s_umount); 1350 down_write(&sb->s_umount);
1038 err = do_remount_sb(sb, flags, data, 0); 1351 if (flags & MS_BIND)
1352 err = change_mount_flags(nd->path.mnt, flags);
1353 else
1354 err = do_remount_sb(sb, flags, data, 0);
1039 if (!err) 1355 if (!err)
1040 nd->path.mnt->mnt_flags = mnt_flags; 1356 nd->path.mnt->mnt_flags = mnt_flags;
1041 up_write(&sb->s_umount); 1357 up_write(&sb->s_umount);
@@ -1059,7 +1375,8 @@ static inline int tree_contains_unbindable(struct vfsmount *mnt)
1059 */ 1375 */
1060static noinline int do_move_mount(struct nameidata *nd, char *old_name) 1376static noinline int do_move_mount(struct nameidata *nd, char *old_name)
1061{ 1377{
1062 struct nameidata old_nd, parent_nd; 1378 struct nameidata old_nd;
1379 struct path parent_path;
1063 struct vfsmount *p; 1380 struct vfsmount *p;
1064 int err = 0; 1381 int err = 0;
1065 if (!capable(CAP_SYS_ADMIN)) 1382 if (!capable(CAP_SYS_ADMIN))
@@ -1114,21 +1431,19 @@ static noinline int do_move_mount(struct nameidata *nd, char *old_name)
1114 if (p == old_nd.path.mnt) 1431 if (p == old_nd.path.mnt)
1115 goto out1; 1432 goto out1;
1116 1433
1117 err = attach_recursive_mnt(old_nd.path.mnt, nd, &parent_nd); 1434 err = attach_recursive_mnt(old_nd.path.mnt, &nd->path, &parent_path);
1118 if (err) 1435 if (err)
1119 goto out1; 1436 goto out1;
1120 1437
1121 spin_lock(&vfsmount_lock);
1122 /* if the mount is moved, it should no longer be expire 1438 /* if the mount is moved, it should no longer be expire
1123 * automatically */ 1439 * automatically */
1124 list_del_init(&old_nd.path.mnt->mnt_expire); 1440 list_del_init(&old_nd.path.mnt->mnt_expire);
1125 spin_unlock(&vfsmount_lock);
1126out1: 1441out1:
1127 mutex_unlock(&nd->path.dentry->d_inode->i_mutex); 1442 mutex_unlock(&nd->path.dentry->d_inode->i_mutex);
1128out: 1443out:
1129 up_write(&namespace_sem); 1444 up_write(&namespace_sem);
1130 if (!err) 1445 if (!err)
1131 path_put(&parent_nd.path); 1446 path_put(&parent_path);
1132 path_put(&old_nd.path); 1447 path_put(&old_nd.path);
1133 return err; 1448 return err;
1134} 1449}
@@ -1189,12 +1504,9 @@ int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd,
1189 if ((err = graft_tree(newmnt, nd))) 1504 if ((err = graft_tree(newmnt, nd)))
1190 goto unlock; 1505 goto unlock;
1191 1506
1192 if (fslist) { 1507 if (fslist) /* add to the specified expiration list */
1193 /* add to the specified expiration list */
1194 spin_lock(&vfsmount_lock);
1195 list_add_tail(&newmnt->mnt_expire, fslist); 1508 list_add_tail(&newmnt->mnt_expire, fslist);
1196 spin_unlock(&vfsmount_lock); 1509
1197 }
1198 up_write(&namespace_sem); 1510 up_write(&namespace_sem);
1199 return 0; 1511 return 0;
1200 1512
@@ -1206,75 +1518,6 @@ unlock:
1206 1518
1207EXPORT_SYMBOL_GPL(do_add_mount); 1519EXPORT_SYMBOL_GPL(do_add_mount);
1208 1520
1209static void expire_mount(struct vfsmount *mnt, struct list_head *mounts,
1210 struct list_head *umounts)
1211{
1212 spin_lock(&vfsmount_lock);
1213
1214 /*
1215 * Check if mount is still attached, if not, let whoever holds it deal
1216 * with the sucker
1217 */
1218 if (mnt->mnt_parent == mnt) {
1219 spin_unlock(&vfsmount_lock);
1220 return;
1221 }
1222
1223 /*
1224 * Check that it is still dead: the count should now be 2 - as
1225 * contributed by the vfsmount parent and the mntget above
1226 */
1227 if (!propagate_mount_busy(mnt, 2)) {
1228 /* delete from the namespace */
1229 touch_mnt_namespace(mnt->mnt_ns);
1230 list_del_init(&mnt->mnt_list);
1231 mnt->mnt_ns = NULL;
1232 umount_tree(mnt, 1, umounts);
1233 spin_unlock(&vfsmount_lock);
1234 } else {
1235 /*
1236 * Someone brought it back to life whilst we didn't have any
1237 * locks held so return it to the expiration list
1238 */
1239 list_add_tail(&mnt->mnt_expire, mounts);
1240 spin_unlock(&vfsmount_lock);
1241 }
1242}
1243
1244/*
1245 * go through the vfsmounts we've just consigned to the graveyard to
1246 * - check that they're still dead
1247 * - delete the vfsmount from the appropriate namespace under lock
1248 * - dispose of the corpse
1249 */
1250static void expire_mount_list(struct list_head *graveyard, struct list_head *mounts)
1251{
1252 struct mnt_namespace *ns;
1253 struct vfsmount *mnt;
1254
1255 while (!list_empty(graveyard)) {
1256 LIST_HEAD(umounts);
1257 mnt = list_first_entry(graveyard, struct vfsmount, mnt_expire);
1258 list_del_init(&mnt->mnt_expire);
1259
1260 /* don't do anything if the namespace is dead - all the
1261 * vfsmounts from it are going away anyway */
1262 ns = mnt->mnt_ns;
1263 if (!ns || !ns->root)
1264 continue;
1265 get_mnt_ns(ns);
1266
1267 spin_unlock(&vfsmount_lock);
1268 down_write(&namespace_sem);
1269 expire_mount(mnt, mounts, &umounts);
1270 up_write(&namespace_sem);
1271 release_mounts(&umounts);
1272 mntput(mnt);
1273 put_mnt_ns(ns);
1274 spin_lock(&vfsmount_lock);
1275 }
1276}
1277
1278/* 1521/*
1279 * process a list of expirable mountpoints with the intent of discarding any 1522 * process a list of expirable mountpoints with the intent of discarding any
1280 * mountpoints that aren't in use and haven't been touched since last we came 1523 * mountpoints that aren't in use and haven't been touched since last we came
@@ -1284,10 +1527,12 @@ void mark_mounts_for_expiry(struct list_head *mounts)
1284{ 1527{
1285 struct vfsmount *mnt, *next; 1528 struct vfsmount *mnt, *next;
1286 LIST_HEAD(graveyard); 1529 LIST_HEAD(graveyard);
1530 LIST_HEAD(umounts);
1287 1531
1288 if (list_empty(mounts)) 1532 if (list_empty(mounts))
1289 return; 1533 return;
1290 1534
1535 down_write(&namespace_sem);
1291 spin_lock(&vfsmount_lock); 1536 spin_lock(&vfsmount_lock);
1292 1537
1293 /* extract from the expiration list every vfsmount that matches the 1538 /* extract from the expiration list every vfsmount that matches the
@@ -1298,16 +1543,19 @@ void mark_mounts_for_expiry(struct list_head *mounts)
1298 */ 1543 */
1299 list_for_each_entry_safe(mnt, next, mounts, mnt_expire) { 1544 list_for_each_entry_safe(mnt, next, mounts, mnt_expire) {
1300 if (!xchg(&mnt->mnt_expiry_mark, 1) || 1545 if (!xchg(&mnt->mnt_expiry_mark, 1) ||
1301 atomic_read(&mnt->mnt_count) != 1) 1546 propagate_mount_busy(mnt, 1))
1302 continue; 1547 continue;
1303
1304 mntget(mnt);
1305 list_move(&mnt->mnt_expire, &graveyard); 1548 list_move(&mnt->mnt_expire, &graveyard);
1306 } 1549 }
1307 1550 while (!list_empty(&graveyard)) {
1308 expire_mount_list(&graveyard, mounts); 1551 mnt = list_first_entry(&graveyard, struct vfsmount, mnt_expire);
1309 1552 touch_mnt_namespace(mnt->mnt_ns);
1553 umount_tree(mnt, 1, &umounts);
1554 }
1310 spin_unlock(&vfsmount_lock); 1555 spin_unlock(&vfsmount_lock);
1556 up_write(&namespace_sem);
1557
1558 release_mounts(&umounts);
1311} 1559}
1312 1560
1313EXPORT_SYMBOL_GPL(mark_mounts_for_expiry); 1561EXPORT_SYMBOL_GPL(mark_mounts_for_expiry);
@@ -1343,7 +1591,6 @@ resume:
1343 } 1591 }
1344 1592
1345 if (!propagate_mount_busy(mnt, 1)) { 1593 if (!propagate_mount_busy(mnt, 1)) {
1346 mntget(mnt);
1347 list_move_tail(&mnt->mnt_expire, graveyard); 1594 list_move_tail(&mnt->mnt_expire, graveyard);
1348 found++; 1595 found++;
1349 } 1596 }
@@ -1363,22 +1610,22 @@ resume:
1363 * process a list of expirable mountpoints with the intent of discarding any 1610 * process a list of expirable mountpoints with the intent of discarding any
1364 * submounts of a specific parent mountpoint 1611 * submounts of a specific parent mountpoint
1365 */ 1612 */
1366void shrink_submounts(struct vfsmount *mountpoint, struct list_head *mounts) 1613static void shrink_submounts(struct vfsmount *mnt, struct list_head *umounts)
1367{ 1614{
1368 LIST_HEAD(graveyard); 1615 LIST_HEAD(graveyard);
1369 int found; 1616 struct vfsmount *m;
1370
1371 spin_lock(&vfsmount_lock);
1372 1617
1373 /* extract submounts of 'mountpoint' from the expiration list */ 1618 /* extract submounts of 'mountpoint' from the expiration list */
1374 while ((found = select_submounts(mountpoint, &graveyard)) != 0) 1619 while (select_submounts(mnt, &graveyard)) {
1375 expire_mount_list(&graveyard, mounts); 1620 while (!list_empty(&graveyard)) {
1376 1621 m = list_first_entry(&graveyard, struct vfsmount,
1377 spin_unlock(&vfsmount_lock); 1622 mnt_expire);
1623 touch_mnt_namespace(mnt->mnt_ns);
1624 umount_tree(mnt, 1, umounts);
1625 }
1626 }
1378} 1627}
1379 1628
1380EXPORT_SYMBOL_GPL(shrink_submounts);
1381
1382/* 1629/*
1383 * Some copy_from_user() implementations do not return the exact number of 1630 * Some copy_from_user() implementations do not return the exact number of
1384 * bytes remaining to copy on a fault. But copy_mount_options() requires that. 1631 * bytes remaining to copy on a fault. But copy_mount_options() requires that.
@@ -1488,6 +1735,8 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
1488 mnt_flags |= MNT_NODIRATIME; 1735 mnt_flags |= MNT_NODIRATIME;
1489 if (flags & MS_RELATIME) 1736 if (flags & MS_RELATIME)
1490 mnt_flags |= MNT_RELATIME; 1737 mnt_flags |= MNT_RELATIME;
1738 if (flags & MS_RDONLY)
1739 mnt_flags |= MNT_READONLY;
1491 1740
1492 flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | 1741 flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE |
1493 MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT); 1742 MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT);
@@ -1683,7 +1932,7 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path)
1683 path_put(&old_pwd); 1932 path_put(&old_pwd);
1684} 1933}
1685 1934
1686static void chroot_fs_refs(struct nameidata *old_nd, struct nameidata *new_nd) 1935static void chroot_fs_refs(struct path *old_root, struct path *new_root)
1687{ 1936{
1688 struct task_struct *g, *p; 1937 struct task_struct *g, *p;
1689 struct fs_struct *fs; 1938 struct fs_struct *fs;
@@ -1695,12 +1944,12 @@ static void chroot_fs_refs(struct nameidata *old_nd, struct nameidata *new_nd)
1695 if (fs) { 1944 if (fs) {
1696 atomic_inc(&fs->count); 1945 atomic_inc(&fs->count);
1697 task_unlock(p); 1946 task_unlock(p);
1698 if (fs->root.dentry == old_nd->path.dentry 1947 if (fs->root.dentry == old_root->dentry
1699 && fs->root.mnt == old_nd->path.mnt) 1948 && fs->root.mnt == old_root->mnt)
1700 set_fs_root(fs, &new_nd->path); 1949 set_fs_root(fs, new_root);
1701 if (fs->pwd.dentry == old_nd->path.dentry 1950 if (fs->pwd.dentry == old_root->dentry
1702 && fs->pwd.mnt == old_nd->path.mnt) 1951 && fs->pwd.mnt == old_root->mnt)
1703 set_fs_pwd(fs, &new_nd->path); 1952 set_fs_pwd(fs, new_root);
1704 put_fs_struct(fs); 1953 put_fs_struct(fs);
1705 } else 1954 } else
1706 task_unlock(p); 1955 task_unlock(p);
@@ -1737,7 +1986,8 @@ asmlinkage long sys_pivot_root(const char __user * new_root,
1737 const char __user * put_old) 1986 const char __user * put_old)
1738{ 1987{
1739 struct vfsmount *tmp; 1988 struct vfsmount *tmp;
1740 struct nameidata new_nd, old_nd, parent_nd, root_parent, user_nd; 1989 struct nameidata new_nd, old_nd, user_nd;
1990 struct path parent_path, root_parent;
1741 int error; 1991 int error;
1742 1992
1743 if (!capable(CAP_SYS_ADMIN)) 1993 if (!capable(CAP_SYS_ADMIN))
@@ -1811,19 +2061,19 @@ asmlinkage long sys_pivot_root(const char __user * new_root,
1811 goto out3; 2061 goto out3;
1812 } else if (!is_subdir(old_nd.path.dentry, new_nd.path.dentry)) 2062 } else if (!is_subdir(old_nd.path.dentry, new_nd.path.dentry))
1813 goto out3; 2063 goto out3;
1814 detach_mnt(new_nd.path.mnt, &parent_nd); 2064 detach_mnt(new_nd.path.mnt, &parent_path);
1815 detach_mnt(user_nd.path.mnt, &root_parent); 2065 detach_mnt(user_nd.path.mnt, &root_parent);
1816 /* mount old root on put_old */ 2066 /* mount old root on put_old */
1817 attach_mnt(user_nd.path.mnt, &old_nd); 2067 attach_mnt(user_nd.path.mnt, &old_nd.path);
1818 /* mount new_root on / */ 2068 /* mount new_root on / */
1819 attach_mnt(new_nd.path.mnt, &root_parent); 2069 attach_mnt(new_nd.path.mnt, &root_parent);
1820 touch_mnt_namespace(current->nsproxy->mnt_ns); 2070 touch_mnt_namespace(current->nsproxy->mnt_ns);
1821 spin_unlock(&vfsmount_lock); 2071 spin_unlock(&vfsmount_lock);
1822 chroot_fs_refs(&user_nd, &new_nd); 2072 chroot_fs_refs(&user_nd.path, &new_nd.path);
1823 security_sb_post_pivotroot(&user_nd, &new_nd); 2073 security_sb_post_pivotroot(&user_nd, &new_nd);
1824 error = 0; 2074 error = 0;
1825 path_put(&root_parent.path); 2075 path_put(&root_parent);
1826 path_put(&parent_nd.path); 2076 path_put(&parent_path);
1827out2: 2077out2:
1828 mutex_unlock(&old_nd.path.dentry->d_inode->i_mutex); 2078 mutex_unlock(&old_nd.path.dentry->d_inode->i_mutex);
1829 up_write(&namespace_sem); 2079 up_write(&namespace_sem);
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index c67b4bdcf719..ad8f167e54bc 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -14,6 +14,7 @@
14#include <linux/ioctl.h> 14#include <linux/ioctl.h>
15#include <linux/time.h> 15#include <linux/time.h>
16#include <linux/mm.h> 16#include <linux/mm.h>
17#include <linux/mount.h>
17#include <linux/highuid.h> 18#include <linux/highuid.h>
18#include <linux/smp_lock.h> 19#include <linux/smp_lock.h>
19#include <linux/vmalloc.h> 20#include <linux/vmalloc.h>
@@ -261,7 +262,7 @@ ncp_get_charsets(struct ncp_server* server, struct ncp_nls_ioctl __user *arg)
261} 262}
262#endif /* CONFIG_NCPFS_NLS */ 263#endif /* CONFIG_NCPFS_NLS */
263 264
264int ncp_ioctl(struct inode *inode, struct file *filp, 265static int __ncp_ioctl(struct inode *inode, struct file *filp,
265 unsigned int cmd, unsigned long arg) 266 unsigned int cmd, unsigned long arg)
266{ 267{
267 struct ncp_server *server = NCP_SERVER(inode); 268 struct ncp_server *server = NCP_SERVER(inode);
@@ -822,6 +823,57 @@ outrel:
822 return -EINVAL; 823 return -EINVAL;
823} 824}
824 825
826static int ncp_ioctl_need_write(unsigned int cmd)
827{
828 switch (cmd) {
829 case NCP_IOC_GET_FS_INFO:
830 case NCP_IOC_GET_FS_INFO_V2:
831 case NCP_IOC_NCPREQUEST:
832 case NCP_IOC_SETDENTRYTTL:
833 case NCP_IOC_SIGN_INIT:
834 case NCP_IOC_LOCKUNLOCK:
835 case NCP_IOC_SET_SIGN_WANTED:
836 return 1;
837 case NCP_IOC_GETOBJECTNAME:
838 case NCP_IOC_SETOBJECTNAME:
839 case NCP_IOC_GETPRIVATEDATA:
840 case NCP_IOC_SETPRIVATEDATA:
841 case NCP_IOC_SETCHARSETS:
842 case NCP_IOC_GETCHARSETS:
843 case NCP_IOC_CONN_LOGGED_IN:
844 case NCP_IOC_GETDENTRYTTL:
845 case NCP_IOC_GETMOUNTUID2:
846 case NCP_IOC_SIGN_WANTED:
847 case NCP_IOC_GETROOT:
848 case NCP_IOC_SETROOT:
849 return 0;
850 default:
851 /* unkown IOCTL command, assume write */
852 return 1;
853 }
854}
855
856int ncp_ioctl(struct inode *inode, struct file *filp,
857 unsigned int cmd, unsigned long arg)
858{
859 int ret;
860
861 if (ncp_ioctl_need_write(cmd)) {
862 /*
863 * inside the ioctl(), any failures which
864 * are because of file_permission() are
865 * -EACCESS, so it seems consistent to keep
866 * that here.
867 */
868 if (mnt_want_write(filp->f_path.mnt))
869 return -EACCES;
870 }
871 ret = __ncp_ioctl(inode, filp, cmd, arg);
872 if (ncp_ioctl_need_write(cmd))
873 mnt_drop_write(filp->f_path.mnt);
874 return ret;
875}
876
825#ifdef CONFIG_COMPAT 877#ifdef CONFIG_COMPAT
826long ncp_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 878long ncp_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
827{ 879{
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index ecc06c619494..66648dd92d97 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -93,6 +93,7 @@ static void nfs_callback_svc(struct svc_rqst *rqstp)
93 svc_process(rqstp); 93 svc_process(rqstp);
94 } 94 }
95 95
96 flush_signals(current);
96 svc_exit_thread(rqstp); 97 svc_exit_thread(rqstp);
97 nfs_callback_info.pid = 0; 98 nfs_callback_info.pid = 0;
98 complete(&nfs_callback_info.stopped); 99 complete(&nfs_callback_info.stopped);
@@ -171,7 +172,7 @@ void nfs_callback_down(void)
171static int nfs_callback_authenticate(struct svc_rqst *rqstp) 172static int nfs_callback_authenticate(struct svc_rqst *rqstp)
172{ 173{
173 struct nfs_client *clp; 174 struct nfs_client *clp;
174 char buf[RPC_MAX_ADDRBUFLEN]; 175 RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
175 176
176 /* Don't talk to strangers */ 177 /* Don't talk to strangers */
177 clp = nfs_find_client(svc_addr(rqstp), 4); 178 clp = nfs_find_client(svc_addr(rqstp), 4);
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index c63eb720b68b..13619d24f023 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -254,7 +254,7 @@ static __be32 encode_attr_change(struct xdr_stream *xdr, const uint32_t *bitmap,
254 if (!(bitmap[0] & FATTR4_WORD0_CHANGE)) 254 if (!(bitmap[0] & FATTR4_WORD0_CHANGE))
255 return 0; 255 return 0;
256 p = xdr_reserve_space(xdr, 8); 256 p = xdr_reserve_space(xdr, 8);
257 if (unlikely(p == 0)) 257 if (unlikely(!p))
258 return htonl(NFS4ERR_RESOURCE); 258 return htonl(NFS4ERR_RESOURCE);
259 p = xdr_encode_hyper(p, change); 259 p = xdr_encode_hyper(p, change);
260 return 0; 260 return 0;
@@ -267,7 +267,7 @@ static __be32 encode_attr_size(struct xdr_stream *xdr, const uint32_t *bitmap, u
267 if (!(bitmap[0] & FATTR4_WORD0_SIZE)) 267 if (!(bitmap[0] & FATTR4_WORD0_SIZE))
268 return 0; 268 return 0;
269 p = xdr_reserve_space(xdr, 8); 269 p = xdr_reserve_space(xdr, 8);
270 if (unlikely(p == 0)) 270 if (unlikely(!p))
271 return htonl(NFS4ERR_RESOURCE); 271 return htonl(NFS4ERR_RESOURCE);
272 p = xdr_encode_hyper(p, size); 272 p = xdr_encode_hyper(p, size);
273 return 0; 273 return 0;
@@ -278,7 +278,7 @@ static __be32 encode_attr_time(struct xdr_stream *xdr, const struct timespec *ti
278 __be32 *p; 278 __be32 *p;
279 279
280 p = xdr_reserve_space(xdr, 12); 280 p = xdr_reserve_space(xdr, 12);
281 if (unlikely(p == 0)) 281 if (unlikely(!p))
282 return htonl(NFS4ERR_RESOURCE); 282 return htonl(NFS4ERR_RESOURCE);
283 p = xdr_encode_hyper(p, time->tv_sec); 283 p = xdr_encode_hyper(p, time->tv_sec);
284 *p = htonl(time->tv_nsec); 284 *p = htonl(time->tv_nsec);
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index b9eadd18ba70..00a5e4405e16 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -49,7 +49,7 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_
49 struct file_lock *fl; 49 struct file_lock *fl;
50 int status; 50 int status;
51 51
52 for (fl = inode->i_flock; fl != 0; fl = fl->fl_next) { 52 for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
53 if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) 53 if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK)))
54 continue; 54 continue;
55 if (nfs_file_open_context(fl->fl_file) != ctx) 55 if (nfs_file_open_context(fl->fl_file) != ctx)
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index ae04892a5e5d..d9e30ac2798d 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -710,6 +710,8 @@ int nfs_lookup_verify_inode(struct inode *inode, struct nameidata *nd)
710{ 710{
711 struct nfs_server *server = NFS_SERVER(inode); 711 struct nfs_server *server = NFS_SERVER(inode);
712 712
713 if (test_bit(NFS_INO_MOUNTPOINT, &NFS_I(inode)->flags))
714 return 0;
713 if (nd != NULL) { 715 if (nd != NULL) {
714 /* VFS wants an on-the-wire revalidation */ 716 /* VFS wants an on-the-wire revalidation */
715 if (nd->flags & LOOKUP_REVAL) 717 if (nd->flags & LOOKUP_REVAL)
@@ -965,7 +967,8 @@ static int is_atomic_open(struct inode *dir, struct nameidata *nd)
965 if (nd->flags & LOOKUP_DIRECTORY) 967 if (nd->flags & LOOKUP_DIRECTORY)
966 return 0; 968 return 0;
967 /* Are we trying to write to a read only partition? */ 969 /* Are we trying to write to a read only partition? */
968 if (IS_RDONLY(dir) && (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE))) 970 if (__mnt_is_readonly(nd->path.mnt) &&
971 (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE)))
969 return 0; 972 return 0;
970 return 1; 973 return 1;
971} 974}
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index ef57a5ae5904..5d2e9d9a4e28 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -64,7 +64,11 @@ const struct file_operations nfs_file_operations = {
64 .write = do_sync_write, 64 .write = do_sync_write,
65 .aio_read = nfs_file_read, 65 .aio_read = nfs_file_read,
66 .aio_write = nfs_file_write, 66 .aio_write = nfs_file_write,
67#ifdef CONFIG_MMU
67 .mmap = nfs_file_mmap, 68 .mmap = nfs_file_mmap,
69#else
70 .mmap = generic_file_mmap,
71#endif
68 .open = nfs_file_open, 72 .open = nfs_file_open,
69 .flush = nfs_file_flush, 73 .flush = nfs_file_flush,
70 .release = nfs_file_release, 74 .release = nfs_file_release,
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 8ae5dba2d4e5..86147b0ab2cf 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -309,7 +309,7 @@ nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h,
309 mutex_lock(&idmap->idmap_im_lock); 309 mutex_lock(&idmap->idmap_im_lock);
310 310
311 he = idmap_lookup_id(h, id); 311 he = idmap_lookup_id(h, id);
312 if (he != 0) { 312 if (he) {
313 memcpy(name, he->ih_name, he->ih_namelen); 313 memcpy(name, he->ih_name, he->ih_namelen);
314 ret = he->ih_namelen; 314 ret = he->ih_namelen;
315 goto out; 315 goto out;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 966a8850aa30..6f88d7c77ac9 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -299,6 +299,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
299 else 299 else
300 inode->i_op = &nfs_mountpoint_inode_operations; 300 inode->i_op = &nfs_mountpoint_inode_operations;
301 inode->i_fop = NULL; 301 inode->i_fop = NULL;
302 set_bit(NFS_INO_MOUNTPOINT, &nfsi->flags);
302 } 303 }
303 } else if (S_ISLNK(inode->i_mode)) 304 } else if (S_ISLNK(inode->i_mode))
304 inode->i_op = &nfs_symlink_inode_operations; 305 inode->i_op = &nfs_symlink_inode_operations;
@@ -505,6 +506,7 @@ static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, str
505 ctx->cred = get_rpccred(cred); 506 ctx->cred = get_rpccred(cred);
506 ctx->state = NULL; 507 ctx->state = NULL;
507 ctx->lockowner = current->files; 508 ctx->lockowner = current->files;
509 ctx->flags = 0;
508 ctx->error = 0; 510 ctx->error = 0;
509 ctx->dir_cookie = 0; 511 ctx->dir_cookie = 0;
510 atomic_set(&ctx->count, 1); 512 atomic_set(&ctx->count, 1);
@@ -1003,8 +1005,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1003 1005
1004 server = NFS_SERVER(inode); 1006 server = NFS_SERVER(inode);
1005 /* Update the fsid? */ 1007 /* Update the fsid? */
1006 if (S_ISDIR(inode->i_mode) 1008 if (S_ISDIR(inode->i_mode) &&
1007 && !nfs_fsid_equal(&server->fsid, &fattr->fsid)) 1009 !nfs_fsid_equal(&server->fsid, &fattr->fsid) &&
1010 !test_bit(NFS_INO_MOUNTPOINT, &nfsi->flags))
1008 server->fsid = fattr->fsid; 1011 server->fsid = fattr->fsid;
1009 1012
1010 /* 1013 /*
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 0f5619611b8d..931992763e68 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -3,6 +3,7 @@
3 */ 3 */
4 4
5#include <linux/mount.h> 5#include <linux/mount.h>
6#include <linux/security.h>
6 7
7struct nfs_string; 8struct nfs_string;
8 9
@@ -57,6 +58,8 @@ struct nfs_parsed_mount_data {
57 char *export_path; 58 char *export_path;
58 int protocol; 59 int protocol;
59 } nfs_server; 60 } nfs_server;
61
62 struct security_mnt_opts lsm_opts;
60}; 63};
61 64
62/* client.c */ 65/* client.c */
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 6233eb5e98c1..b962397004c1 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -785,7 +785,7 @@ static int nfs4_reclaim_locks(struct nfs4_state_recovery_ops *ops, struct nfs4_s
785 struct file_lock *fl; 785 struct file_lock *fl;
786 int status = 0; 786 int status = 0;
787 787
788 for (fl = inode->i_flock; fl != 0; fl = fl->fl_next) { 788 for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
789 if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) 789 if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK)))
790 continue; 790 continue;
791 if (nfs_file_open_context(fl->fl_file)->state != state) 791 if (nfs_file_open_context(fl->fl_file)->state != state)
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 3d7d9631e125..5a70be589bbe 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -533,7 +533,10 @@ readpage_async_filler(void *data, struct page *page)
533 533
534 if (len < PAGE_CACHE_SIZE) 534 if (len < PAGE_CACHE_SIZE)
535 zero_user_segment(page, len, PAGE_CACHE_SIZE); 535 zero_user_segment(page, len, PAGE_CACHE_SIZE);
536 nfs_pageio_add_request(desc->pgio, new); 536 if (!nfs_pageio_add_request(desc->pgio, new)) {
537 error = desc->pgio->pg_error;
538 goto out_unlock;
539 }
537 return 0; 540 return 0;
538out_error: 541out_error:
539 error = PTR_ERR(new); 542 error = PTR_ERR(new);
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 1fb381843650..f9219024f31a 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -589,8 +589,6 @@ static void nfs_umount_begin(struct vfsmount *vfsmnt, int flags)
589 struct nfs_server *server = NFS_SB(vfsmnt->mnt_sb); 589 struct nfs_server *server = NFS_SB(vfsmnt->mnt_sb);
590 struct rpc_clnt *rpc; 590 struct rpc_clnt *rpc;
591 591
592 shrink_submounts(vfsmnt, &nfs_automount_list);
593
594 if (!(flags & MNT_FORCE)) 592 if (!(flags & MNT_FORCE))
595 return; 593 return;
596 /* -EIO all pending I/O */ 594 /* -EIO all pending I/O */
@@ -632,7 +630,7 @@ static int nfs_verify_server_address(struct sockaddr *addr)
632 switch (addr->sa_family) { 630 switch (addr->sa_family) {
633 case AF_INET: { 631 case AF_INET: {
634 struct sockaddr_in *sa = (struct sockaddr_in *)addr; 632 struct sockaddr_in *sa = (struct sockaddr_in *)addr;
635 return sa->sin_addr.s_addr != INADDR_ANY; 633 return sa->sin_addr.s_addr != htonl(INADDR_ANY);
636 } 634 }
637 case AF_INET6: { 635 case AF_INET6: {
638 struct in6_addr *sa = &((struct sockaddr_in6 *)addr)->sin6_addr; 636 struct in6_addr *sa = &((struct sockaddr_in6 *)addr)->sin6_addr;
@@ -684,8 +682,9 @@ static void nfs_parse_server_address(char *value,
684static int nfs_parse_mount_options(char *raw, 682static int nfs_parse_mount_options(char *raw,
685 struct nfs_parsed_mount_data *mnt) 683 struct nfs_parsed_mount_data *mnt)
686{ 684{
687 char *p, *string; 685 char *p, *string, *secdata;
688 unsigned short port = 0; 686 unsigned short port = 0;
687 int rc;
689 688
690 if (!raw) { 689 if (!raw) {
691 dfprintk(MOUNT, "NFS: mount options string was NULL.\n"); 690 dfprintk(MOUNT, "NFS: mount options string was NULL.\n");
@@ -693,6 +692,20 @@ static int nfs_parse_mount_options(char *raw,
693 } 692 }
694 dfprintk(MOUNT, "NFS: nfs mount opts='%s'\n", raw); 693 dfprintk(MOUNT, "NFS: nfs mount opts='%s'\n", raw);
695 694
695 secdata = alloc_secdata();
696 if (!secdata)
697 goto out_nomem;
698
699 rc = security_sb_copy_data(raw, secdata);
700 if (rc)
701 goto out_security_failure;
702
703 rc = security_sb_parse_opts_str(secdata, &mnt->lsm_opts);
704 if (rc)
705 goto out_security_failure;
706
707 free_secdata(secdata);
708
696 while ((p = strsep(&raw, ",")) != NULL) { 709 while ((p = strsep(&raw, ",")) != NULL) {
697 substring_t args[MAX_OPT_ARGS]; 710 substring_t args[MAX_OPT_ARGS];
698 int option, token; 711 int option, token;
@@ -1042,7 +1055,10 @@ static int nfs_parse_mount_options(char *raw,
1042out_nomem: 1055out_nomem:
1043 printk(KERN_INFO "NFS: not enough memory to parse option\n"); 1056 printk(KERN_INFO "NFS: not enough memory to parse option\n");
1044 return 0; 1057 return 0;
1045 1058out_security_failure:
1059 free_secdata(secdata);
1060 printk(KERN_INFO "NFS: security options invalid: %d\n", rc);
1061 return 0;
1046out_unrec_vers: 1062out_unrec_vers:
1047 printk(KERN_INFO "NFS: unrecognized NFS version number\n"); 1063 printk(KERN_INFO "NFS: unrecognized NFS version number\n");
1048 return 0; 1064 return 0;
@@ -1214,6 +1230,33 @@ static int nfs_validate_mount_data(void *options,
1214 args->namlen = data->namlen; 1230 args->namlen = data->namlen;
1215 args->bsize = data->bsize; 1231 args->bsize = data->bsize;
1216 args->auth_flavors[0] = data->pseudoflavor; 1232 args->auth_flavors[0] = data->pseudoflavor;
1233
1234 /*
1235 * The legacy version 6 binary mount data from userspace has a
1236 * field used only to transport selinux information into the
1237 * the kernel. To continue to support that functionality we
1238 * have a touch of selinux knowledge here in the NFS code. The
1239 * userspace code converted context=blah to just blah so we are
1240 * converting back to the full string selinux understands.
1241 */
1242 if (data->context[0]){
1243#ifdef CONFIG_SECURITY_SELINUX
1244 int rc;
1245 char *opts_str = kmalloc(sizeof(data->context) + 8, GFP_KERNEL);
1246 if (!opts_str)
1247 return -ENOMEM;
1248 strcpy(opts_str, "context=");
1249 data->context[NFS_MAX_CONTEXT_LEN] = '\0';
1250 strcat(opts_str, &data->context[0]);
1251 rc = security_sb_parse_opts_str(opts_str, &args->lsm_opts);
1252 kfree(opts_str);
1253 if (rc)
1254 return rc;
1255#else
1256 return -EINVAL;
1257#endif
1258 }
1259
1217 break; 1260 break;
1218 default: { 1261 default: {
1219 unsigned int len; 1262 unsigned int len;
@@ -1476,6 +1519,8 @@ static int nfs_get_sb(struct file_system_type *fs_type,
1476 }; 1519 };
1477 int error; 1520 int error;
1478 1521
1522 security_init_mnt_opts(&data.lsm_opts);
1523
1479 /* Validate the mount data */ 1524 /* Validate the mount data */
1480 error = nfs_validate_mount_data(raw_data, &data, &mntfh, dev_name); 1525 error = nfs_validate_mount_data(raw_data, &data, &mntfh, dev_name);
1481 if (error < 0) 1526 if (error < 0)
@@ -1515,6 +1560,10 @@ static int nfs_get_sb(struct file_system_type *fs_type,
1515 goto error_splat_super; 1560 goto error_splat_super;
1516 } 1561 }
1517 1562
1563 error = security_sb_set_mnt_opts(s, &data.lsm_opts);
1564 if (error)
1565 goto error_splat_root;
1566
1518 s->s_flags |= MS_ACTIVE; 1567 s->s_flags |= MS_ACTIVE;
1519 mnt->mnt_sb = s; 1568 mnt->mnt_sb = s;
1520 mnt->mnt_root = mntroot; 1569 mnt->mnt_root = mntroot;
@@ -1523,12 +1572,15 @@ static int nfs_get_sb(struct file_system_type *fs_type,
1523out: 1572out:
1524 kfree(data.nfs_server.hostname); 1573 kfree(data.nfs_server.hostname);
1525 kfree(data.mount_server.hostname); 1574 kfree(data.mount_server.hostname);
1575 security_free_mnt_opts(&data.lsm_opts);
1526 return error; 1576 return error;
1527 1577
1528out_err_nosb: 1578out_err_nosb:
1529 nfs_free_server(server); 1579 nfs_free_server(server);
1530 goto out; 1580 goto out;
1531 1581
1582error_splat_root:
1583 dput(mntroot);
1532error_splat_super: 1584error_splat_super:
1533 up_write(&s->s_umount); 1585 up_write(&s->s_umount);
1534 deactivate_super(s); 1586 deactivate_super(s);
@@ -1608,6 +1660,9 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
1608 mnt->mnt_sb = s; 1660 mnt->mnt_sb = s;
1609 mnt->mnt_root = mntroot; 1661 mnt->mnt_root = mntroot;
1610 1662
1663 /* clone any lsm security options from the parent to the new sb */
1664 security_sb_clone_mnt_opts(data->sb, s);
1665
1611 dprintk("<-- nfs_xdev_get_sb() = 0\n"); 1666 dprintk("<-- nfs_xdev_get_sb() = 0\n");
1612 return 0; 1667 return 0;
1613 1668
@@ -1850,6 +1905,8 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
1850 }; 1905 };
1851 int error; 1906 int error;
1852 1907
1908 security_init_mnt_opts(&data.lsm_opts);
1909
1853 /* Validate the mount data */ 1910 /* Validate the mount data */
1854 error = nfs4_validate_mount_data(raw_data, &data, dev_name); 1911 error = nfs4_validate_mount_data(raw_data, &data, dev_name);
1855 if (error < 0) 1912 if (error < 0)
@@ -1898,6 +1955,7 @@ out:
1898 kfree(data.client_address); 1955 kfree(data.client_address);
1899 kfree(data.nfs_server.export_path); 1956 kfree(data.nfs_server.export_path);
1900 kfree(data.nfs_server.hostname); 1957 kfree(data.nfs_server.hostname);
1958 security_free_mnt_opts(&data.lsm_opts);
1901 return error; 1959 return error;
1902 1960
1903out_free: 1961out_free:
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index f55c437124a2..bed63416a55b 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -39,6 +39,7 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context*,
39 unsigned int, unsigned int); 39 unsigned int, unsigned int);
40static void nfs_pageio_init_write(struct nfs_pageio_descriptor *desc, 40static void nfs_pageio_init_write(struct nfs_pageio_descriptor *desc,
41 struct inode *inode, int ioflags); 41 struct inode *inode, int ioflags);
42static void nfs_redirty_request(struct nfs_page *req);
42static const struct rpc_call_ops nfs_write_partial_ops; 43static const struct rpc_call_ops nfs_write_partial_ops;
43static const struct rpc_call_ops nfs_write_full_ops; 44static const struct rpc_call_ops nfs_write_full_ops;
44static const struct rpc_call_ops nfs_commit_ops; 45static const struct rpc_call_ops nfs_commit_ops;
@@ -288,7 +289,12 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
288 BUG(); 289 BUG();
289 } 290 }
290 spin_unlock(&inode->i_lock); 291 spin_unlock(&inode->i_lock);
291 nfs_pageio_add_request(pgio, req); 292 if (!nfs_pageio_add_request(pgio, req)) {
293 nfs_redirty_request(req);
294 nfs_end_page_writeback(page);
295 nfs_clear_page_tag_locked(req);
296 return pgio->pg_error;
297 }
292 return 0; 298 return 0;
293} 299}
294 300
@@ -734,7 +740,7 @@ int nfs_updatepage(struct file *file, struct page *page,
734 */ 740 */
735 if (nfs_write_pageuptodate(page, inode) && 741 if (nfs_write_pageuptodate(page, inode) &&
736 inode->i_flock == NULL && 742 inode->i_flock == NULL &&
737 !(file->f_mode & O_SYNC)) { 743 !(file->f_flags & O_SYNC)) {
738 count = max(count + offset, nfs_page_length(page)); 744 count = max(count + offset, nfs_page_length(page));
739 offset = 0; 745 offset = 0;
740 } 746 }
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index c593db047d8b..c309c881bd4e 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -658,14 +658,19 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
658 return status; 658 return status;
659 } 659 }
660 } 660 }
661 status = mnt_want_write(cstate->current_fh.fh_export->ex_path.mnt);
662 if (status)
663 return status;
661 status = nfs_ok; 664 status = nfs_ok;
662 if (setattr->sa_acl != NULL) 665 if (setattr->sa_acl != NULL)
663 status = nfsd4_set_nfs4_acl(rqstp, &cstate->current_fh, 666 status = nfsd4_set_nfs4_acl(rqstp, &cstate->current_fh,
664 setattr->sa_acl); 667 setattr->sa_acl);
665 if (status) 668 if (status)
666 return status; 669 goto out;
667 status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr, 670 status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr,
668 0, (time_t)0); 671 0, (time_t)0);
672out:
673 mnt_drop_write(cstate->current_fh.fh_export->ex_path.mnt);
669 return status; 674 return status;
670} 675}
671 676
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 1ff90625860f..145b3c877a27 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -46,6 +46,7 @@
46#include <linux/scatterlist.h> 46#include <linux/scatterlist.h>
47#include <linux/crypto.h> 47#include <linux/crypto.h>
48#include <linux/sched.h> 48#include <linux/sched.h>
49#include <linux/mount.h>
49 50
50#define NFSDDBG_FACILITY NFSDDBG_PROC 51#define NFSDDBG_FACILITY NFSDDBG_PROC
51 52
@@ -154,7 +155,11 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
154 dprintk("NFSD: nfsd4_create_clid_dir: DIRECTORY EXISTS\n"); 155 dprintk("NFSD: nfsd4_create_clid_dir: DIRECTORY EXISTS\n");
155 goto out_put; 156 goto out_put;
156 } 157 }
158 status = mnt_want_write(rec_dir.path.mnt);
159 if (status)
160 goto out_put;
157 status = vfs_mkdir(rec_dir.path.dentry->d_inode, dentry, S_IRWXU); 161 status = vfs_mkdir(rec_dir.path.dentry->d_inode, dentry, S_IRWXU);
162 mnt_drop_write(rec_dir.path.mnt);
158out_put: 163out_put:
159 dput(dentry); 164 dput(dentry);
160out_unlock: 165out_unlock:
@@ -313,12 +318,17 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp)
313 if (!rec_dir_init || !clp->cl_firststate) 318 if (!rec_dir_init || !clp->cl_firststate)
314 return; 319 return;
315 320
321 status = mnt_want_write(rec_dir.path.mnt);
322 if (status)
323 goto out;
316 clp->cl_firststate = 0; 324 clp->cl_firststate = 0;
317 nfs4_save_user(&uid, &gid); 325 nfs4_save_user(&uid, &gid);
318 status = nfsd4_unlink_clid_dir(clp->cl_recdir, HEXDIR_LEN-1); 326 status = nfsd4_unlink_clid_dir(clp->cl_recdir, HEXDIR_LEN-1);
319 nfs4_reset_user(uid, gid); 327 nfs4_reset_user(uid, gid);
320 if (status == 0) 328 if (status == 0)
321 nfsd4_sync_rec_dir(); 329 nfsd4_sync_rec_dir();
330 mnt_drop_write(rec_dir.path.mnt);
331out:
322 if (status) 332 if (status)
323 printk("NFSD: Failed to remove expired client state directory" 333 printk("NFSD: Failed to remove expired client state directory"
324 " %.*s\n", HEXDIR_LEN, clp->cl_recdir); 334 " %.*s\n", HEXDIR_LEN, clp->cl_recdir);
@@ -347,13 +357,17 @@ nfsd4_recdir_purge_old(void) {
347 357
348 if (!rec_dir_init) 358 if (!rec_dir_init)
349 return; 359 return;
360 status = mnt_want_write(rec_dir.path.mnt);
361 if (status)
362 goto out;
350 status = nfsd4_list_rec_dir(rec_dir.path.dentry, purge_old); 363 status = nfsd4_list_rec_dir(rec_dir.path.dentry, purge_old);
351 if (status == 0) 364 if (status == 0)
352 nfsd4_sync_rec_dir(); 365 nfsd4_sync_rec_dir();
366 mnt_drop_write(rec_dir.path.mnt);
367out:
353 if (status) 368 if (status)
354 printk("nfsd4: failed to purge old clients from recovery" 369 printk("nfsd4: failed to purge old clients from recovery"
355 " directory %s\n", rec_dir.path.dentry->d_name.name); 370 " directory %s\n", rec_dir.path.dentry->d_name.name);
356 return;
357} 371}
358 372
359static int 373static int
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index bcb97d8e8b8b..81a75f3081f4 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -41,6 +41,7 @@
41#include <linux/sunrpc/svc.h> 41#include <linux/sunrpc/svc.h>
42#include <linux/nfsd/nfsd.h> 42#include <linux/nfsd/nfsd.h>
43#include <linux/nfsd/cache.h> 43#include <linux/nfsd/cache.h>
44#include <linux/file.h>
44#include <linux/mount.h> 45#include <linux/mount.h>
45#include <linux/workqueue.h> 46#include <linux/workqueue.h>
46#include <linux/smp_lock.h> 47#include <linux/smp_lock.h>
@@ -1239,7 +1240,7 @@ static inline void
1239nfs4_file_downgrade(struct file *filp, unsigned int share_access) 1240nfs4_file_downgrade(struct file *filp, unsigned int share_access)
1240{ 1241{
1241 if (share_access & NFS4_SHARE_ACCESS_WRITE) { 1242 if (share_access & NFS4_SHARE_ACCESS_WRITE) {
1242 put_write_access(filp->f_path.dentry->d_inode); 1243 drop_file_write_access(filp);
1243 filp->f_mode = (filp->f_mode | FMODE_READ) & ~FMODE_WRITE; 1244 filp->f_mode = (filp->f_mode | FMODE_READ) & ~FMODE_WRITE;
1244 } 1245 }
1245} 1246}
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 0130b345234d..3e6b3f41ee1f 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -101,7 +101,7 @@ static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp,
101{ 101{
102 /* Check if the request originated from a secure port. */ 102 /* Check if the request originated from a secure port. */
103 if (!rqstp->rq_secure && EX_SECURE(exp)) { 103 if (!rqstp->rq_secure && EX_SECURE(exp)) {
104 char buf[RPC_MAX_ADDRBUFLEN]; 104 RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
105 dprintk(KERN_WARNING 105 dprintk(KERN_WARNING
106 "nfsd: request from insecure port %s!\n", 106 "nfsd: request from insecure port %s!\n",
107 svc_print_addr(rqstp, buf, sizeof(buf))); 107 svc_print_addr(rqstp, buf, sizeof(buf)));
@@ -232,6 +232,7 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
232 fhp->fh_dentry = dentry; 232 fhp->fh_dentry = dentry;
233 fhp->fh_export = exp; 233 fhp->fh_export = exp;
234 nfsd_nr_verified++; 234 nfsd_nr_verified++;
235 cache_get(&exp->h);
235 } else { 236 } else {
236 /* 237 /*
237 * just rechecking permissions 238 * just rechecking permissions
@@ -241,6 +242,7 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
241 dprintk("nfsd: fh_verify - just checking\n"); 242 dprintk("nfsd: fh_verify - just checking\n");
242 dentry = fhp->fh_dentry; 243 dentry = fhp->fh_dentry;
243 exp = fhp->fh_export; 244 exp = fhp->fh_export;
245 cache_get(&exp->h);
244 /* 246 /*
245 * Set user creds for this exportpoint; necessary even 247 * Set user creds for this exportpoint; necessary even
246 * in the "just checking" case because this may be a 248 * in the "just checking" case because this may be a
@@ -252,8 +254,6 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
252 if (error) 254 if (error)
253 goto out; 255 goto out;
254 } 256 }
255 cache_get(&exp->h);
256
257 257
258 error = nfsd_mode_check(rqstp, dentry->d_inode->i_mode, type); 258 error = nfsd_mode_check(rqstp, dentry->d_inode->i_mode, type);
259 if (error) 259 if (error)
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 46f59d5365a0..304bf5f643c9 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1255,23 +1255,35 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1255 err = 0; 1255 err = 0;
1256 switch (type) { 1256 switch (type) {
1257 case S_IFREG: 1257 case S_IFREG:
1258 host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
1259 if (host_err)
1260 goto out_nfserr;
1258 host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL); 1261 host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
1259 break; 1262 break;
1260 case S_IFDIR: 1263 case S_IFDIR:
1264 host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
1265 if (host_err)
1266 goto out_nfserr;
1261 host_err = vfs_mkdir(dirp, dchild, iap->ia_mode); 1267 host_err = vfs_mkdir(dirp, dchild, iap->ia_mode);
1262 break; 1268 break;
1263 case S_IFCHR: 1269 case S_IFCHR:
1264 case S_IFBLK: 1270 case S_IFBLK:
1265 case S_IFIFO: 1271 case S_IFIFO:
1266 case S_IFSOCK: 1272 case S_IFSOCK:
1273 host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
1274 if (host_err)
1275 goto out_nfserr;
1267 host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev); 1276 host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev);
1268 break; 1277 break;
1269 default: 1278 default:
1270 printk("nfsd: bad file type %o in nfsd_create\n", type); 1279 printk("nfsd: bad file type %o in nfsd_create\n", type);
1271 host_err = -EINVAL; 1280 host_err = -EINVAL;
1281 goto out_nfserr;
1272 } 1282 }
1273 if (host_err < 0) 1283 if (host_err < 0) {
1284 mnt_drop_write(fhp->fh_export->ex_path.mnt);
1274 goto out_nfserr; 1285 goto out_nfserr;
1286 }
1275 1287
1276 if (EX_ISSYNC(fhp->fh_export)) { 1288 if (EX_ISSYNC(fhp->fh_export)) {
1277 err = nfserrno(nfsd_sync_dir(dentry)); 1289 err = nfserrno(nfsd_sync_dir(dentry));
@@ -1282,6 +1294,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1282 err2 = nfsd_create_setattr(rqstp, resfhp, iap); 1294 err2 = nfsd_create_setattr(rqstp, resfhp, iap);
1283 if (err2) 1295 if (err2)
1284 err = err2; 1296 err = err2;
1297 mnt_drop_write(fhp->fh_export->ex_path.mnt);
1285 /* 1298 /*
1286 * Update the file handle to get the new inode info. 1299 * Update the file handle to get the new inode info.
1287 */ 1300 */
@@ -1359,6 +1372,9 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
1359 v_atime = verifier[1]&0x7fffffff; 1372 v_atime = verifier[1]&0x7fffffff;
1360 } 1373 }
1361 1374
1375 host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
1376 if (host_err)
1377 goto out_nfserr;
1362 if (dchild->d_inode) { 1378 if (dchild->d_inode) {
1363 err = 0; 1379 err = 0;
1364 1380
@@ -1390,12 +1406,15 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
1390 case NFS3_CREATE_GUARDED: 1406 case NFS3_CREATE_GUARDED:
1391 err = nfserr_exist; 1407 err = nfserr_exist;
1392 } 1408 }
1409 mnt_drop_write(fhp->fh_export->ex_path.mnt);
1393 goto out; 1410 goto out;
1394 } 1411 }
1395 1412
1396 host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL); 1413 host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
1397 if (host_err < 0) 1414 if (host_err < 0) {
1415 mnt_drop_write(fhp->fh_export->ex_path.mnt);
1398 goto out_nfserr; 1416 goto out_nfserr;
1417 }
1399 if (created) 1418 if (created)
1400 *created = 1; 1419 *created = 1;
1401 1420
@@ -1420,6 +1439,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
1420 if (err2) 1439 if (err2)
1421 err = err2; 1440 err = err2;
1422 1441
1442 mnt_drop_write(fhp->fh_export->ex_path.mnt);
1423 /* 1443 /*
1424 * Update the filehandle to get the new inode info. 1444 * Update the filehandle to get the new inode info.
1425 */ 1445 */
@@ -1522,6 +1542,10 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
1522 if (iap && (iap->ia_valid & ATTR_MODE)) 1542 if (iap && (iap->ia_valid & ATTR_MODE))
1523 mode = iap->ia_mode & S_IALLUGO; 1543 mode = iap->ia_mode & S_IALLUGO;
1524 1544
1545 host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
1546 if (host_err)
1547 goto out_nfserr;
1548
1525 if (unlikely(path[plen] != 0)) { 1549 if (unlikely(path[plen] != 0)) {
1526 char *path_alloced = kmalloc(plen+1, GFP_KERNEL); 1550 char *path_alloced = kmalloc(plen+1, GFP_KERNEL);
1527 if (path_alloced == NULL) 1551 if (path_alloced == NULL)
@@ -1542,6 +1566,8 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
1542 err = nfserrno(host_err); 1566 err = nfserrno(host_err);
1543 fh_unlock(fhp); 1567 fh_unlock(fhp);
1544 1568
1569 mnt_drop_write(fhp->fh_export->ex_path.mnt);
1570
1545 cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp); 1571 cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp);
1546 dput(dnew); 1572 dput(dnew);
1547 if (err==0) err = cerr; 1573 if (err==0) err = cerr;
@@ -1592,6 +1618,11 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
1592 dold = tfhp->fh_dentry; 1618 dold = tfhp->fh_dentry;
1593 dest = dold->d_inode; 1619 dest = dold->d_inode;
1594 1620
1621 host_err = mnt_want_write(tfhp->fh_export->ex_path.mnt);
1622 if (host_err) {
1623 err = nfserrno(host_err);
1624 goto out_dput;
1625 }
1595 host_err = vfs_link(dold, dirp, dnew); 1626 host_err = vfs_link(dold, dirp, dnew);
1596 if (!host_err) { 1627 if (!host_err) {
1597 if (EX_ISSYNC(ffhp->fh_export)) { 1628 if (EX_ISSYNC(ffhp->fh_export)) {
@@ -1605,7 +1636,8 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
1605 else 1636 else
1606 err = nfserrno(host_err); 1637 err = nfserrno(host_err);
1607 } 1638 }
1608 1639 mnt_drop_write(tfhp->fh_export->ex_path.mnt);
1640out_dput:
1609 dput(dnew); 1641 dput(dnew);
1610out_unlock: 1642out_unlock:
1611 fh_unlock(ffhp); 1643 fh_unlock(ffhp);
@@ -1678,13 +1710,20 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
1678 if (ndentry == trap) 1710 if (ndentry == trap)
1679 goto out_dput_new; 1711 goto out_dput_new;
1680 1712
1681#ifdef MSNFS 1713 if (svc_msnfs(ffhp) &&
1682 if ((ffhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
1683 ((atomic_read(&odentry->d_count) > 1) 1714 ((atomic_read(&odentry->d_count) > 1)
1684 || (atomic_read(&ndentry->d_count) > 1))) { 1715 || (atomic_read(&ndentry->d_count) > 1))) {
1685 host_err = -EPERM; 1716 host_err = -EPERM;
1686 } else 1717 goto out_dput_new;
1687#endif 1718 }
1719
1720 host_err = -EXDEV;
1721 if (ffhp->fh_export->ex_path.mnt != tfhp->fh_export->ex_path.mnt)
1722 goto out_dput_new;
1723 host_err = mnt_want_write(ffhp->fh_export->ex_path.mnt);
1724 if (host_err)
1725 goto out_dput_new;
1726
1688 host_err = vfs_rename(fdir, odentry, tdir, ndentry); 1727 host_err = vfs_rename(fdir, odentry, tdir, ndentry);
1689 if (!host_err && EX_ISSYNC(tfhp->fh_export)) { 1728 if (!host_err && EX_ISSYNC(tfhp->fh_export)) {
1690 host_err = nfsd_sync_dir(tdentry); 1729 host_err = nfsd_sync_dir(tdentry);
@@ -1692,6 +1731,8 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
1692 host_err = nfsd_sync_dir(fdentry); 1731 host_err = nfsd_sync_dir(fdentry);
1693 } 1732 }
1694 1733
1734 mnt_drop_write(ffhp->fh_export->ex_path.mnt);
1735
1695 out_dput_new: 1736 out_dput_new:
1696 dput(ndentry); 1737 dput(ndentry);
1697 out_dput_old: 1738 out_dput_old:
@@ -1750,6 +1791,10 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
1750 if (!type) 1791 if (!type)
1751 type = rdentry->d_inode->i_mode & S_IFMT; 1792 type = rdentry->d_inode->i_mode & S_IFMT;
1752 1793
1794 host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
1795 if (host_err)
1796 goto out_nfserr;
1797
1753 if (type != S_IFDIR) { /* It's UNLINK */ 1798 if (type != S_IFDIR) { /* It's UNLINK */
1754#ifdef MSNFS 1799#ifdef MSNFS
1755 if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) && 1800 if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
@@ -1765,10 +1810,12 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
1765 dput(rdentry); 1810 dput(rdentry);
1766 1811
1767 if (host_err) 1812 if (host_err)
1768 goto out_nfserr; 1813 goto out_drop;
1769 if (EX_ISSYNC(fhp->fh_export)) 1814 if (EX_ISSYNC(fhp->fh_export))
1770 host_err = nfsd_sync_dir(dentry); 1815 host_err = nfsd_sync_dir(dentry);
1771 1816
1817out_drop:
1818 mnt_drop_write(fhp->fh_export->ex_path.mnt);
1772out_nfserr: 1819out_nfserr:
1773 err = nfserrno(host_err); 1820 err = nfserrno(host_err);
1774out: 1821out:
@@ -1865,7 +1912,7 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
1865 inode->i_mode, 1912 inode->i_mode,
1866 IS_IMMUTABLE(inode)? " immut" : "", 1913 IS_IMMUTABLE(inode)? " immut" : "",
1867 IS_APPEND(inode)? " append" : "", 1914 IS_APPEND(inode)? " append" : "",
1868 IS_RDONLY(inode)? " ro" : ""); 1915 __mnt_is_readonly(exp->ex_path.mnt)? " ro" : "");
1869 dprintk(" owner %d/%d user %d/%d\n", 1916 dprintk(" owner %d/%d user %d/%d\n",
1870 inode->i_uid, inode->i_gid, current->fsuid, current->fsgid); 1917 inode->i_uid, inode->i_gid, current->fsuid, current->fsgid);
1871#endif 1918#endif
@@ -1876,7 +1923,8 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
1876 */ 1923 */
1877 if (!(acc & MAY_LOCAL_ACCESS)) 1924 if (!(acc & MAY_LOCAL_ACCESS))
1878 if (acc & (MAY_WRITE | MAY_SATTR | MAY_TRUNC)) { 1925 if (acc & (MAY_WRITE | MAY_SATTR | MAY_TRUNC)) {
1879 if (exp_rdonly(rqstp, exp) || IS_RDONLY(inode)) 1926 if (exp_rdonly(rqstp, exp) ||
1927 __mnt_is_readonly(exp->ex_path.mnt))
1880 return nfserr_rofs; 1928 return nfserr_rofs;
1881 if (/* (acc & MAY_WRITE) && */ IS_IMMUTABLE(inode)) 1929 if (/* (acc & MAY_WRITE) && */ IS_IMMUTABLE(inode))
1882 return nfserr_perm; 1930 return nfserr_perm;
@@ -2039,6 +2087,9 @@ nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl)
2039 } else 2087 } else
2040 size = 0; 2088 size = 0;
2041 2089
2090 error = mnt_want_write(fhp->fh_export->ex_path.mnt);
2091 if (error)
2092 goto getout;
2042 if (size) 2093 if (size)
2043 error = vfs_setxattr(fhp->fh_dentry, name, value, size, 0); 2094 error = vfs_setxattr(fhp->fh_dentry, name, value, size, 0);
2044 else { 2095 else {
@@ -2050,6 +2101,7 @@ nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl)
2050 error = 0; 2101 error = 0;
2051 } 2102 }
2052 } 2103 }
2104 mnt_drop_write(fhp->fh_export->ex_path.mnt);
2053 2105
2054getout: 2106getout:
2055 kfree(value); 2107 kfree(value);
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile
index 4d4ce48bb42c..f6956de56fdb 100644
--- a/fs/ocfs2/Makefile
+++ b/fs/ocfs2/Makefile
@@ -2,7 +2,12 @@ EXTRA_CFLAGS += -Ifs/ocfs2
2 2
3EXTRA_CFLAGS += -DCATCH_BH_JBD_RACES 3EXTRA_CFLAGS += -DCATCH_BH_JBD_RACES
4 4
5obj-$(CONFIG_OCFS2_FS) += ocfs2.o 5obj-$(CONFIG_OCFS2_FS) += \
6 ocfs2.o \
7 ocfs2_stackglue.o
8
9obj-$(CONFIG_OCFS2_FS_O2CB) += ocfs2_stack_o2cb.o
10obj-$(CONFIG_OCFS2_FS_USERSPACE_CLUSTER) += ocfs2_stack_user.o
6 11
7ocfs2-objs := \ 12ocfs2-objs := \
8 alloc.o \ 13 alloc.o \
@@ -31,5 +36,10 @@ ocfs2-objs := \
31 uptodate.o \ 36 uptodate.o \
32 ver.o 37 ver.o
33 38
39ocfs2_stackglue-objs := stackglue.o
40ocfs2_stack_o2cb-objs := stack_o2cb.o
41ocfs2_stack_user-objs := stack_user.o
42
43# cluster/ is always needed when OCFS2_FS for masklog support
34obj-$(CONFIG_OCFS2_FS) += cluster/ 44obj-$(CONFIG_OCFS2_FS) += cluster/
35obj-$(CONFIG_OCFS2_FS) += dlm/ 45obj-$(CONFIG_OCFS2_FS_O2CB) += dlm/
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 447206eb5c2e..41f84c92094f 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -1029,8 +1029,7 @@ static void ocfs2_rotate_leaf(struct ocfs2_extent_list *el,
1029 BUG_ON(!next_free); 1029 BUG_ON(!next_free);
1030 1030
1031 /* The tree code before us didn't allow enough room in the leaf. */ 1031 /* The tree code before us didn't allow enough room in the leaf. */
1032 if (el->l_next_free_rec == el->l_count && !has_empty) 1032 BUG_ON(el->l_next_free_rec == el->l_count && !has_empty);
1033 BUG();
1034 1033
1035 /* 1034 /*
1036 * The easiest way to approach this is to just remove the 1035 * The easiest way to approach this is to just remove the
@@ -1450,6 +1449,8 @@ static void ocfs2_adjust_root_records(struct ocfs2_extent_list *root_el,
1450 * - When our insert into the right path leaf is at the leftmost edge 1449 * - When our insert into the right path leaf is at the leftmost edge
1451 * and requires an update of the path immediately to it's left. This 1450 * and requires an update of the path immediately to it's left. This
1452 * can occur at the end of some types of rotation and appending inserts. 1451 * can occur at the end of some types of rotation and appending inserts.
1452 * - When we've adjusted the last extent record in the left path leaf and the
1453 * 1st extent record in the right path leaf during cross extent block merge.
1453 */ 1454 */
1454static void ocfs2_complete_edge_insert(struct inode *inode, handle_t *handle, 1455static void ocfs2_complete_edge_insert(struct inode *inode, handle_t *handle,
1455 struct ocfs2_path *left_path, 1456 struct ocfs2_path *left_path,
@@ -2712,24 +2713,147 @@ static void ocfs2_cleanup_merge(struct ocfs2_extent_list *el,
2712 } 2713 }
2713} 2714}
2714 2715
2716static int ocfs2_get_right_path(struct inode *inode,
2717 struct ocfs2_path *left_path,
2718 struct ocfs2_path **ret_right_path)
2719{
2720 int ret;
2721 u32 right_cpos;
2722 struct ocfs2_path *right_path = NULL;
2723 struct ocfs2_extent_list *left_el;
2724
2725 *ret_right_path = NULL;
2726
2727 /* This function shouldn't be called for non-trees. */
2728 BUG_ON(left_path->p_tree_depth == 0);
2729
2730 left_el = path_leaf_el(left_path);
2731 BUG_ON(left_el->l_next_free_rec != left_el->l_count);
2732
2733 ret = ocfs2_find_cpos_for_right_leaf(inode->i_sb, left_path,
2734 &right_cpos);
2735 if (ret) {
2736 mlog_errno(ret);
2737 goto out;
2738 }
2739
2740 /* This function shouldn't be called for the rightmost leaf. */
2741 BUG_ON(right_cpos == 0);
2742
2743 right_path = ocfs2_new_path(path_root_bh(left_path),
2744 path_root_el(left_path));
2745 if (!right_path) {
2746 ret = -ENOMEM;
2747 mlog_errno(ret);
2748 goto out;
2749 }
2750
2751 ret = ocfs2_find_path(inode, right_path, right_cpos);
2752 if (ret) {
2753 mlog_errno(ret);
2754 goto out;
2755 }
2756
2757 *ret_right_path = right_path;
2758out:
2759 if (ret)
2760 ocfs2_free_path(right_path);
2761 return ret;
2762}
2763
2715/* 2764/*
2716 * Remove split_rec clusters from the record at index and merge them 2765 * Remove split_rec clusters from the record at index and merge them
2717 * onto the beginning of the record at index + 1. 2766 * onto the beginning of the record "next" to it.
2767 * For index < l_count - 1, the next means the extent rec at index + 1.
2768 * For index == l_count - 1, the "next" means the 1st extent rec of the
2769 * next extent block.
2718 */ 2770 */
2719static int ocfs2_merge_rec_right(struct inode *inode, struct buffer_head *bh, 2771static int ocfs2_merge_rec_right(struct inode *inode,
2720 handle_t *handle, 2772 struct ocfs2_path *left_path,
2721 struct ocfs2_extent_rec *split_rec, 2773 handle_t *handle,
2722 struct ocfs2_extent_list *el, int index) 2774 struct ocfs2_extent_rec *split_rec,
2775 int index)
2723{ 2776{
2724 int ret; 2777 int ret, next_free, i;
2725 unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters); 2778 unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters);
2726 struct ocfs2_extent_rec *left_rec; 2779 struct ocfs2_extent_rec *left_rec;
2727 struct ocfs2_extent_rec *right_rec; 2780 struct ocfs2_extent_rec *right_rec;
2781 struct ocfs2_extent_list *right_el;
2782 struct ocfs2_path *right_path = NULL;
2783 int subtree_index = 0;
2784 struct ocfs2_extent_list *el = path_leaf_el(left_path);
2785 struct buffer_head *bh = path_leaf_bh(left_path);
2786 struct buffer_head *root_bh = NULL;
2728 2787
2729 BUG_ON(index >= le16_to_cpu(el->l_next_free_rec)); 2788 BUG_ON(index >= le16_to_cpu(el->l_next_free_rec));
2730
2731 left_rec = &el->l_recs[index]; 2789 left_rec = &el->l_recs[index];
2732 right_rec = &el->l_recs[index + 1]; 2790
2791 if (index == le16_to_cpu(el->l_next_free_rec - 1) &&
2792 le16_to_cpu(el->l_next_free_rec) == le16_to_cpu(el->l_count)) {
2793 /* we meet with a cross extent block merge. */
2794 ret = ocfs2_get_right_path(inode, left_path, &right_path);
2795 if (ret) {
2796 mlog_errno(ret);
2797 goto out;
2798 }
2799
2800 right_el = path_leaf_el(right_path);
2801 next_free = le16_to_cpu(right_el->l_next_free_rec);
2802 BUG_ON(next_free <= 0);
2803 right_rec = &right_el->l_recs[0];
2804 if (ocfs2_is_empty_extent(right_rec)) {
2805 BUG_ON(le16_to_cpu(next_free) <= 1);
2806 right_rec = &right_el->l_recs[1];
2807 }
2808
2809 BUG_ON(le32_to_cpu(left_rec->e_cpos) +
2810 le16_to_cpu(left_rec->e_leaf_clusters) !=
2811 le32_to_cpu(right_rec->e_cpos));
2812
2813 subtree_index = ocfs2_find_subtree_root(inode,
2814 left_path, right_path);
2815
2816 ret = ocfs2_extend_rotate_transaction(handle, subtree_index,
2817 handle->h_buffer_credits,
2818 right_path);
2819 if (ret) {
2820 mlog_errno(ret);
2821 goto out;
2822 }
2823
2824 root_bh = left_path->p_node[subtree_index].bh;
2825 BUG_ON(root_bh != right_path->p_node[subtree_index].bh);
2826
2827 ret = ocfs2_journal_access(handle, inode, root_bh,
2828 OCFS2_JOURNAL_ACCESS_WRITE);
2829 if (ret) {
2830 mlog_errno(ret);
2831 goto out;
2832 }
2833
2834 for (i = subtree_index + 1;
2835 i < path_num_items(right_path); i++) {
2836 ret = ocfs2_journal_access(handle, inode,
2837 right_path->p_node[i].bh,
2838 OCFS2_JOURNAL_ACCESS_WRITE);
2839 if (ret) {
2840 mlog_errno(ret);
2841 goto out;
2842 }
2843
2844 ret = ocfs2_journal_access(handle, inode,
2845 left_path->p_node[i].bh,
2846 OCFS2_JOURNAL_ACCESS_WRITE);
2847 if (ret) {
2848 mlog_errno(ret);
2849 goto out;
2850 }
2851 }
2852
2853 } else {
2854 BUG_ON(index == le16_to_cpu(el->l_next_free_rec) - 1);
2855 right_rec = &el->l_recs[index + 1];
2856 }
2733 2857
2734 ret = ocfs2_journal_access(handle, inode, bh, 2858 ret = ocfs2_journal_access(handle, inode, bh,
2735 OCFS2_JOURNAL_ACCESS_WRITE); 2859 OCFS2_JOURNAL_ACCESS_WRITE);
@@ -2751,30 +2875,156 @@ static int ocfs2_merge_rec_right(struct inode *inode, struct buffer_head *bh,
2751 if (ret) 2875 if (ret)
2752 mlog_errno(ret); 2876 mlog_errno(ret);
2753 2877
2878 if (right_path) {
2879 ret = ocfs2_journal_dirty(handle, path_leaf_bh(right_path));
2880 if (ret)
2881 mlog_errno(ret);
2882
2883 ocfs2_complete_edge_insert(inode, handle, left_path,
2884 right_path, subtree_index);
2885 }
2886out:
2887 if (right_path)
2888 ocfs2_free_path(right_path);
2889 return ret;
2890}
2891
2892static int ocfs2_get_left_path(struct inode *inode,
2893 struct ocfs2_path *right_path,
2894 struct ocfs2_path **ret_left_path)
2895{
2896 int ret;
2897 u32 left_cpos;
2898 struct ocfs2_path *left_path = NULL;
2899
2900 *ret_left_path = NULL;
2901
2902 /* This function shouldn't be called for non-trees. */
2903 BUG_ON(right_path->p_tree_depth == 0);
2904
2905 ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb,
2906 right_path, &left_cpos);
2907 if (ret) {
2908 mlog_errno(ret);
2909 goto out;
2910 }
2911
2912 /* This function shouldn't be called for the leftmost leaf. */
2913 BUG_ON(left_cpos == 0);
2914
2915 left_path = ocfs2_new_path(path_root_bh(right_path),
2916 path_root_el(right_path));
2917 if (!left_path) {
2918 ret = -ENOMEM;
2919 mlog_errno(ret);
2920 goto out;
2921 }
2922
2923 ret = ocfs2_find_path(inode, left_path, left_cpos);
2924 if (ret) {
2925 mlog_errno(ret);
2926 goto out;
2927 }
2928
2929 *ret_left_path = left_path;
2754out: 2930out:
2931 if (ret)
2932 ocfs2_free_path(left_path);
2755 return ret; 2933 return ret;
2756} 2934}
2757 2935
2758/* 2936/*
2759 * Remove split_rec clusters from the record at index and merge them 2937 * Remove split_rec clusters from the record at index and merge them
2760 * onto the tail of the record at index - 1. 2938 * onto the tail of the record "before" it.
2939 * For index > 0, the "before" means the extent rec at index - 1.
2940 *
2941 * For index == 0, the "before" means the last record of the previous
2942 * extent block. And there is also a situation that we may need to
2943 * remove the rightmost leaf extent block in the right_path and change
2944 * the right path to indicate the new rightmost path.
2761 */ 2945 */
2762static int ocfs2_merge_rec_left(struct inode *inode, struct buffer_head *bh, 2946static int ocfs2_merge_rec_left(struct inode *inode,
2947 struct ocfs2_path *right_path,
2763 handle_t *handle, 2948 handle_t *handle,
2764 struct ocfs2_extent_rec *split_rec, 2949 struct ocfs2_extent_rec *split_rec,
2765 struct ocfs2_extent_list *el, int index) 2950 struct ocfs2_cached_dealloc_ctxt *dealloc,
2951 int index)
2766{ 2952{
2767 int ret, has_empty_extent = 0; 2953 int ret, i, subtree_index = 0, has_empty_extent = 0;
2768 unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters); 2954 unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters);
2769 struct ocfs2_extent_rec *left_rec; 2955 struct ocfs2_extent_rec *left_rec;
2770 struct ocfs2_extent_rec *right_rec; 2956 struct ocfs2_extent_rec *right_rec;
2957 struct ocfs2_extent_list *el = path_leaf_el(right_path);
2958 struct buffer_head *bh = path_leaf_bh(right_path);
2959 struct buffer_head *root_bh = NULL;
2960 struct ocfs2_path *left_path = NULL;
2961 struct ocfs2_extent_list *left_el;
2771 2962
2772 BUG_ON(index <= 0); 2963 BUG_ON(index < 0);
2773 2964
2774 left_rec = &el->l_recs[index - 1];
2775 right_rec = &el->l_recs[index]; 2965 right_rec = &el->l_recs[index];
2776 if (ocfs2_is_empty_extent(&el->l_recs[0])) 2966 if (index == 0) {
2777 has_empty_extent = 1; 2967 /* we meet with a cross extent block merge. */
2968 ret = ocfs2_get_left_path(inode, right_path, &left_path);
2969 if (ret) {
2970 mlog_errno(ret);
2971 goto out;
2972 }
2973
2974 left_el = path_leaf_el(left_path);
2975 BUG_ON(le16_to_cpu(left_el->l_next_free_rec) !=
2976 le16_to_cpu(left_el->l_count));
2977
2978 left_rec = &left_el->l_recs[
2979 le16_to_cpu(left_el->l_next_free_rec) - 1];
2980 BUG_ON(le32_to_cpu(left_rec->e_cpos) +
2981 le16_to_cpu(left_rec->e_leaf_clusters) !=
2982 le32_to_cpu(split_rec->e_cpos));
2983
2984 subtree_index = ocfs2_find_subtree_root(inode,
2985 left_path, right_path);
2986
2987 ret = ocfs2_extend_rotate_transaction(handle, subtree_index,
2988 handle->h_buffer_credits,
2989 left_path);
2990 if (ret) {
2991 mlog_errno(ret);
2992 goto out;
2993 }
2994
2995 root_bh = left_path->p_node[subtree_index].bh;
2996 BUG_ON(root_bh != right_path->p_node[subtree_index].bh);
2997
2998 ret = ocfs2_journal_access(handle, inode, root_bh,
2999 OCFS2_JOURNAL_ACCESS_WRITE);
3000 if (ret) {
3001 mlog_errno(ret);
3002 goto out;
3003 }
3004
3005 for (i = subtree_index + 1;
3006 i < path_num_items(right_path); i++) {
3007 ret = ocfs2_journal_access(handle, inode,
3008 right_path->p_node[i].bh,
3009 OCFS2_JOURNAL_ACCESS_WRITE);
3010 if (ret) {
3011 mlog_errno(ret);
3012 goto out;
3013 }
3014
3015 ret = ocfs2_journal_access(handle, inode,
3016 left_path->p_node[i].bh,
3017 OCFS2_JOURNAL_ACCESS_WRITE);
3018 if (ret) {
3019 mlog_errno(ret);
3020 goto out;
3021 }
3022 }
3023 } else {
3024 left_rec = &el->l_recs[index - 1];
3025 if (ocfs2_is_empty_extent(&el->l_recs[0]))
3026 has_empty_extent = 1;
3027 }
2778 3028
2779 ret = ocfs2_journal_access(handle, inode, bh, 3029 ret = ocfs2_journal_access(handle, inode, bh,
2780 OCFS2_JOURNAL_ACCESS_WRITE); 3030 OCFS2_JOURNAL_ACCESS_WRITE);
@@ -2790,9 +3040,8 @@ static int ocfs2_merge_rec_left(struct inode *inode, struct buffer_head *bh,
2790 *left_rec = *split_rec; 3040 *left_rec = *split_rec;
2791 3041
2792 has_empty_extent = 0; 3042 has_empty_extent = 0;
2793 } else { 3043 } else
2794 le16_add_cpu(&left_rec->e_leaf_clusters, split_clusters); 3044 le16_add_cpu(&left_rec->e_leaf_clusters, split_clusters);
2795 }
2796 3045
2797 le32_add_cpu(&right_rec->e_cpos, split_clusters); 3046 le32_add_cpu(&right_rec->e_cpos, split_clusters);
2798 le64_add_cpu(&right_rec->e_blkno, 3047 le64_add_cpu(&right_rec->e_blkno,
@@ -2805,13 +3054,44 @@ static int ocfs2_merge_rec_left(struct inode *inode, struct buffer_head *bh,
2805 if (ret) 3054 if (ret)
2806 mlog_errno(ret); 3055 mlog_errno(ret);
2807 3056
3057 if (left_path) {
3058 ret = ocfs2_journal_dirty(handle, path_leaf_bh(left_path));
3059 if (ret)
3060 mlog_errno(ret);
3061
3062 /*
3063 * In the situation that the right_rec is empty and the extent
3064 * block is empty also, ocfs2_complete_edge_insert can't handle
3065 * it and we need to delete the right extent block.
3066 */
3067 if (le16_to_cpu(right_rec->e_leaf_clusters) == 0 &&
3068 le16_to_cpu(el->l_next_free_rec) == 1) {
3069
3070 ret = ocfs2_remove_rightmost_path(inode, handle,
3071 right_path, dealloc);
3072 if (ret) {
3073 mlog_errno(ret);
3074 goto out;
3075 }
3076
3077 /* Now the rightmost extent block has been deleted.
3078 * So we use the new rightmost path.
3079 */
3080 ocfs2_mv_path(right_path, left_path);
3081 left_path = NULL;
3082 } else
3083 ocfs2_complete_edge_insert(inode, handle, left_path,
3084 right_path, subtree_index);
3085 }
2808out: 3086out:
3087 if (left_path)
3088 ocfs2_free_path(left_path);
2809 return ret; 3089 return ret;
2810} 3090}
2811 3091
2812static int ocfs2_try_to_merge_extent(struct inode *inode, 3092static int ocfs2_try_to_merge_extent(struct inode *inode,
2813 handle_t *handle, 3093 handle_t *handle,
2814 struct ocfs2_path *left_path, 3094 struct ocfs2_path *path,
2815 int split_index, 3095 int split_index,
2816 struct ocfs2_extent_rec *split_rec, 3096 struct ocfs2_extent_rec *split_rec,
2817 struct ocfs2_cached_dealloc_ctxt *dealloc, 3097 struct ocfs2_cached_dealloc_ctxt *dealloc,
@@ -2819,7 +3099,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
2819 3099
2820{ 3100{
2821 int ret = 0; 3101 int ret = 0;
2822 struct ocfs2_extent_list *el = path_leaf_el(left_path); 3102 struct ocfs2_extent_list *el = path_leaf_el(path);
2823 struct ocfs2_extent_rec *rec = &el->l_recs[split_index]; 3103 struct ocfs2_extent_rec *rec = &el->l_recs[split_index];
2824 3104
2825 BUG_ON(ctxt->c_contig_type == CONTIG_NONE); 3105 BUG_ON(ctxt->c_contig_type == CONTIG_NONE);
@@ -2832,7 +3112,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
2832 * extents - having more than one in a leaf is 3112 * extents - having more than one in a leaf is
2833 * illegal. 3113 * illegal.
2834 */ 3114 */
2835 ret = ocfs2_rotate_tree_left(inode, handle, left_path, 3115 ret = ocfs2_rotate_tree_left(inode, handle, path,
2836 dealloc); 3116 dealloc);
2837 if (ret) { 3117 if (ret) {
2838 mlog_errno(ret); 3118 mlog_errno(ret);
@@ -2847,7 +3127,6 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
2847 * Left-right contig implies this. 3127 * Left-right contig implies this.
2848 */ 3128 */
2849 BUG_ON(!ctxt->c_split_covers_rec); 3129 BUG_ON(!ctxt->c_split_covers_rec);
2850 BUG_ON(split_index == 0);
2851 3130
2852 /* 3131 /*
2853 * Since the leftright insert always covers the entire 3132 * Since the leftright insert always covers the entire
@@ -2858,9 +3137,14 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
2858 * Since the adding of an empty extent shifts 3137 * Since the adding of an empty extent shifts
2859 * everything back to the right, there's no need to 3138 * everything back to the right, there's no need to
2860 * update split_index here. 3139 * update split_index here.
3140 *
3141 * When the split_index is zero, we need to merge it to the
3142 * prevoius extent block. It is more efficient and easier
3143 * if we do merge_right first and merge_left later.
2861 */ 3144 */
2862 ret = ocfs2_merge_rec_left(inode, path_leaf_bh(left_path), 3145 ret = ocfs2_merge_rec_right(inode, path,
2863 handle, split_rec, el, split_index); 3146 handle, split_rec,
3147 split_index);
2864 if (ret) { 3148 if (ret) {
2865 mlog_errno(ret); 3149 mlog_errno(ret);
2866 goto out; 3150 goto out;
@@ -2871,32 +3155,30 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
2871 */ 3155 */
2872 BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0])); 3156 BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0]));
2873 3157
2874 /* 3158 /* The merge left us with an empty extent, remove it. */
2875 * The left merge left us with an empty extent, remove 3159 ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc);
2876 * it.
2877 */
2878 ret = ocfs2_rotate_tree_left(inode, handle, left_path, dealloc);
2879 if (ret) { 3160 if (ret) {
2880 mlog_errno(ret); 3161 mlog_errno(ret);
2881 goto out; 3162 goto out;
2882 } 3163 }
2883 split_index--; 3164
2884 rec = &el->l_recs[split_index]; 3165 rec = &el->l_recs[split_index];
2885 3166
2886 /* 3167 /*
2887 * Note that we don't pass split_rec here on purpose - 3168 * Note that we don't pass split_rec here on purpose -
2888 * we've merged it into the left side. 3169 * we've merged it into the rec already.
2889 */ 3170 */
2890 ret = ocfs2_merge_rec_right(inode, path_leaf_bh(left_path), 3171 ret = ocfs2_merge_rec_left(inode, path,
2891 handle, rec, el, split_index); 3172 handle, rec,
3173 dealloc,
3174 split_index);
3175
2892 if (ret) { 3176 if (ret) {
2893 mlog_errno(ret); 3177 mlog_errno(ret);
2894 goto out; 3178 goto out;
2895 } 3179 }
2896 3180
2897 BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0])); 3181 ret = ocfs2_rotate_tree_left(inode, handle, path,
2898
2899 ret = ocfs2_rotate_tree_left(inode, handle, left_path,
2900 dealloc); 3182 dealloc);
2901 /* 3183 /*
2902 * Error from this last rotate is not critical, so 3184 * Error from this last rotate is not critical, so
@@ -2915,8 +3197,9 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
2915 */ 3197 */
2916 if (ctxt->c_contig_type == CONTIG_RIGHT) { 3198 if (ctxt->c_contig_type == CONTIG_RIGHT) {
2917 ret = ocfs2_merge_rec_left(inode, 3199 ret = ocfs2_merge_rec_left(inode,
2918 path_leaf_bh(left_path), 3200 path,
2919 handle, split_rec, el, 3201 handle, split_rec,
3202 dealloc,
2920 split_index); 3203 split_index);
2921 if (ret) { 3204 if (ret) {
2922 mlog_errno(ret); 3205 mlog_errno(ret);
@@ -2924,8 +3207,8 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
2924 } 3207 }
2925 } else { 3208 } else {
2926 ret = ocfs2_merge_rec_right(inode, 3209 ret = ocfs2_merge_rec_right(inode,
2927 path_leaf_bh(left_path), 3210 path,
2928 handle, split_rec, el, 3211 handle, split_rec,
2929 split_index); 3212 split_index);
2930 if (ret) { 3213 if (ret) {
2931 mlog_errno(ret); 3214 mlog_errno(ret);
@@ -2938,7 +3221,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
2938 * The merge may have left an empty extent in 3221 * The merge may have left an empty extent in
2939 * our leaf. Try to rotate it away. 3222 * our leaf. Try to rotate it away.
2940 */ 3223 */
2941 ret = ocfs2_rotate_tree_left(inode, handle, left_path, 3224 ret = ocfs2_rotate_tree_left(inode, handle, path,
2942 dealloc); 3225 dealloc);
2943 if (ret) 3226 if (ret)
2944 mlog_errno(ret); 3227 mlog_errno(ret);
@@ -3498,20 +3781,57 @@ out:
3498} 3781}
3499 3782
3500static enum ocfs2_contig_type 3783static enum ocfs2_contig_type
3501ocfs2_figure_merge_contig_type(struct inode *inode, 3784ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
3502 struct ocfs2_extent_list *el, int index, 3785 struct ocfs2_extent_list *el, int index,
3503 struct ocfs2_extent_rec *split_rec) 3786 struct ocfs2_extent_rec *split_rec)
3504{ 3787{
3505 struct ocfs2_extent_rec *rec; 3788 int status;
3506 enum ocfs2_contig_type ret = CONTIG_NONE; 3789 enum ocfs2_contig_type ret = CONTIG_NONE;
3790 u32 left_cpos, right_cpos;
3791 struct ocfs2_extent_rec *rec = NULL;
3792 struct ocfs2_extent_list *new_el;
3793 struct ocfs2_path *left_path = NULL, *right_path = NULL;
3794 struct buffer_head *bh;
3795 struct ocfs2_extent_block *eb;
3796
3797 if (index > 0) {
3798 rec = &el->l_recs[index - 1];
3799 } else if (path->p_tree_depth > 0) {
3800 status = ocfs2_find_cpos_for_left_leaf(inode->i_sb,
3801 path, &left_cpos);
3802 if (status)
3803 goto out;
3804
3805 if (left_cpos != 0) {
3806 left_path = ocfs2_new_path(path_root_bh(path),
3807 path_root_el(path));
3808 if (!left_path)
3809 goto out;
3810
3811 status = ocfs2_find_path(inode, left_path, left_cpos);
3812 if (status)
3813 goto out;
3814
3815 new_el = path_leaf_el(left_path);
3816
3817 if (le16_to_cpu(new_el->l_next_free_rec) !=
3818 le16_to_cpu(new_el->l_count)) {
3819 bh = path_leaf_bh(left_path);
3820 eb = (struct ocfs2_extent_block *)bh->b_data;
3821 OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb,
3822 eb);
3823 goto out;
3824 }
3825 rec = &new_el->l_recs[
3826 le16_to_cpu(new_el->l_next_free_rec) - 1];
3827 }
3828 }
3507 3829
3508 /* 3830 /*
3509 * We're careful to check for an empty extent record here - 3831 * We're careful to check for an empty extent record here -
3510 * the merge code will know what to do if it sees one. 3832 * the merge code will know what to do if it sees one.
3511 */ 3833 */
3512 3834 if (rec) {
3513 if (index > 0) {
3514 rec = &el->l_recs[index - 1];
3515 if (index == 1 && ocfs2_is_empty_extent(rec)) { 3835 if (index == 1 && ocfs2_is_empty_extent(rec)) {
3516 if (split_rec->e_cpos == el->l_recs[index].e_cpos) 3836 if (split_rec->e_cpos == el->l_recs[index].e_cpos)
3517 ret = CONTIG_RIGHT; 3837 ret = CONTIG_RIGHT;
@@ -3520,10 +3840,45 @@ ocfs2_figure_merge_contig_type(struct inode *inode,
3520 } 3840 }
3521 } 3841 }
3522 3842
3523 if (index < (le16_to_cpu(el->l_next_free_rec) - 1)) { 3843 rec = NULL;
3844 if (index < (le16_to_cpu(el->l_next_free_rec) - 1))
3845 rec = &el->l_recs[index + 1];
3846 else if (le16_to_cpu(el->l_next_free_rec) == le16_to_cpu(el->l_count) &&
3847 path->p_tree_depth > 0) {
3848 status = ocfs2_find_cpos_for_right_leaf(inode->i_sb,
3849 path, &right_cpos);
3850 if (status)
3851 goto out;
3852
3853 if (right_cpos == 0)
3854 goto out;
3855
3856 right_path = ocfs2_new_path(path_root_bh(path),
3857 path_root_el(path));
3858 if (!right_path)
3859 goto out;
3860
3861 status = ocfs2_find_path(inode, right_path, right_cpos);
3862 if (status)
3863 goto out;
3864
3865 new_el = path_leaf_el(right_path);
3866 rec = &new_el->l_recs[0];
3867 if (ocfs2_is_empty_extent(rec)) {
3868 if (le16_to_cpu(new_el->l_next_free_rec) <= 1) {
3869 bh = path_leaf_bh(right_path);
3870 eb = (struct ocfs2_extent_block *)bh->b_data;
3871 OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb,
3872 eb);
3873 goto out;
3874 }
3875 rec = &new_el->l_recs[1];
3876 }
3877 }
3878
3879 if (rec) {
3524 enum ocfs2_contig_type contig_type; 3880 enum ocfs2_contig_type contig_type;
3525 3881
3526 rec = &el->l_recs[index + 1];
3527 contig_type = ocfs2_extent_contig(inode, rec, split_rec); 3882 contig_type = ocfs2_extent_contig(inode, rec, split_rec);
3528 3883
3529 if (contig_type == CONTIG_LEFT && ret == CONTIG_RIGHT) 3884 if (contig_type == CONTIG_LEFT && ret == CONTIG_RIGHT)
@@ -3532,6 +3887,12 @@ ocfs2_figure_merge_contig_type(struct inode *inode,
3532 ret = contig_type; 3887 ret = contig_type;
3533 } 3888 }
3534 3889
3890out:
3891 if (left_path)
3892 ocfs2_free_path(left_path);
3893 if (right_path)
3894 ocfs2_free_path(right_path);
3895
3535 return ret; 3896 return ret;
3536} 3897}
3537 3898
@@ -3994,7 +4355,7 @@ static int __ocfs2_mark_extent_written(struct inode *inode,
3994 goto out; 4355 goto out;
3995 } 4356 }
3996 4357
3997 ctxt.c_contig_type = ocfs2_figure_merge_contig_type(inode, el, 4358 ctxt.c_contig_type = ocfs2_figure_merge_contig_type(inode, path, el,
3998 split_index, 4359 split_index,
3999 split_rec); 4360 split_rec);
4000 4361
@@ -4788,6 +5149,8 @@ static void ocfs2_truncate_log_worker(struct work_struct *work)
4788 status = ocfs2_flush_truncate_log(osb); 5149 status = ocfs2_flush_truncate_log(osb);
4789 if (status < 0) 5150 if (status < 0)
4790 mlog_errno(status); 5151 mlog_errno(status);
5152 else
5153 ocfs2_init_inode_steal_slot(osb);
4791 5154
4792 mlog_exit(status); 5155 mlog_exit(status);
4793} 5156}
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 82243127eebf..17964c0505a9 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -257,7 +257,7 @@ static int ocfs2_readpage_inline(struct inode *inode, struct page *page)
257 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 257 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
258 258
259 BUG_ON(!PageLocked(page)); 259 BUG_ON(!PageLocked(page));
260 BUG_ON(!OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL); 260 BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL));
261 261
262 ret = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &di_bh, 262 ret = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &di_bh,
263 OCFS2_BH_CACHED, inode); 263 OCFS2_BH_CACHED, inode);
@@ -467,11 +467,11 @@ handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
467 unsigned to) 467 unsigned to)
468{ 468{
469 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 469 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
470 handle_t *handle = NULL; 470 handle_t *handle;
471 int ret = 0; 471 int ret = 0;
472 472
473 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); 473 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
474 if (!handle) { 474 if (IS_ERR(handle)) {
475 ret = -ENOMEM; 475 ret = -ENOMEM;
476 mlog_errno(ret); 476 mlog_errno(ret);
477 goto out; 477 goto out;
@@ -487,7 +487,7 @@ handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
487 } 487 }
488out: 488out:
489 if (ret) { 489 if (ret) {
490 if (handle) 490 if (!IS_ERR(handle))
491 ocfs2_commit_trans(osb, handle); 491 ocfs2_commit_trans(osb, handle);
492 handle = ERR_PTR(ret); 492 handle = ERR_PTR(ret);
493 } 493 }
diff --git a/fs/ocfs2/cluster/Makefile b/fs/ocfs2/cluster/Makefile
index cdd162f13650..bc8c5e7d8608 100644
--- a/fs/ocfs2/cluster/Makefile
+++ b/fs/ocfs2/cluster/Makefile
@@ -1,4 +1,4 @@
1obj-$(CONFIG_OCFS2_FS) += ocfs2_nodemanager.o 1obj-$(CONFIG_OCFS2_FS) += ocfs2_nodemanager.o
2 2
3ocfs2_nodemanager-objs := heartbeat.o masklog.o sys.o nodemanager.o \ 3ocfs2_nodemanager-objs := heartbeat.o masklog.o sys.o nodemanager.o \
4 quorum.o tcp.o ver.o 4 quorum.o tcp.o netdebug.o ver.o
diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c
new file mode 100644
index 000000000000..7bf3c0ea7bd9
--- /dev/null
+++ b/fs/ocfs2/cluster/netdebug.c
@@ -0,0 +1,441 @@
1/* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 *
4 * netdebug.c
5 *
6 * debug functionality for o2net
7 *
8 * Copyright (C) 2005, 2008 Oracle. All rights reserved.
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public
12 * License as published by the Free Software Foundation; either
13 * version 2 of the License, or (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public
21 * License along with this program; if not, write to the
22 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 * Boston, MA 021110-1307, USA.
24 *
25 */
26
27#ifdef CONFIG_DEBUG_FS
28
29#include <linux/module.h>
30#include <linux/types.h>
31#include <linux/slab.h>
32#include <linux/idr.h>
33#include <linux/kref.h>
34#include <linux/seq_file.h>
35#include <linux/debugfs.h>
36
37#include <linux/uaccess.h>
38
39#include "tcp.h"
40#include "nodemanager.h"
41#define MLOG_MASK_PREFIX ML_TCP
42#include "masklog.h"
43
44#include "tcp_internal.h"
45
46#define O2NET_DEBUG_DIR "o2net"
47#define SC_DEBUG_NAME "sock_containers"
48#define NST_DEBUG_NAME "send_tracking"
49
50static struct dentry *o2net_dentry;
51static struct dentry *sc_dentry;
52static struct dentry *nst_dentry;
53
54static DEFINE_SPINLOCK(o2net_debug_lock);
55
56static LIST_HEAD(sock_containers);
57static LIST_HEAD(send_tracking);
58
59void o2net_debug_add_nst(struct o2net_send_tracking *nst)
60{
61 spin_lock(&o2net_debug_lock);
62 list_add(&nst->st_net_debug_item, &send_tracking);
63 spin_unlock(&o2net_debug_lock);
64}
65
66void o2net_debug_del_nst(struct o2net_send_tracking *nst)
67{
68 spin_lock(&o2net_debug_lock);
69 if (!list_empty(&nst->st_net_debug_item))
70 list_del_init(&nst->st_net_debug_item);
71 spin_unlock(&o2net_debug_lock);
72}
73
74static struct o2net_send_tracking
75 *next_nst(struct o2net_send_tracking *nst_start)
76{
77 struct o2net_send_tracking *nst, *ret = NULL;
78
79 assert_spin_locked(&o2net_debug_lock);
80
81 list_for_each_entry(nst, &nst_start->st_net_debug_item,
82 st_net_debug_item) {
83 /* discover the head of the list */
84 if (&nst->st_net_debug_item == &send_tracking)
85 break;
86
87 /* use st_task to detect real nsts in the list */
88 if (nst->st_task != NULL) {
89 ret = nst;
90 break;
91 }
92 }
93
94 return ret;
95}
96
97static void *nst_seq_start(struct seq_file *seq, loff_t *pos)
98{
99 struct o2net_send_tracking *nst, *dummy_nst = seq->private;
100
101 spin_lock(&o2net_debug_lock);
102 nst = next_nst(dummy_nst);
103 spin_unlock(&o2net_debug_lock);
104
105 return nst;
106}
107
108static void *nst_seq_next(struct seq_file *seq, void *v, loff_t *pos)
109{
110 struct o2net_send_tracking *nst, *dummy_nst = seq->private;
111
112 spin_lock(&o2net_debug_lock);
113 nst = next_nst(dummy_nst);
114 list_del_init(&dummy_nst->st_net_debug_item);
115 if (nst)
116 list_add(&dummy_nst->st_net_debug_item,
117 &nst->st_net_debug_item);
118 spin_unlock(&o2net_debug_lock);
119
120 return nst; /* unused, just needs to be null when done */
121}
122
123static int nst_seq_show(struct seq_file *seq, void *v)
124{
125 struct o2net_send_tracking *nst, *dummy_nst = seq->private;
126
127 spin_lock(&o2net_debug_lock);
128 nst = next_nst(dummy_nst);
129
130 if (nst != NULL) {
131 /* get_task_comm isn't exported. oh well. */
132 seq_printf(seq, "%p:\n"
133 " pid: %lu\n"
134 " tgid: %lu\n"
135 " process name: %s\n"
136 " node: %u\n"
137 " sc: %p\n"
138 " message id: %d\n"
139 " message type: %u\n"
140 " message key: 0x%08x\n"
141 " sock acquiry: %lu.%lu\n"
142 " send start: %lu.%lu\n"
143 " wait start: %lu.%lu\n",
144 nst, (unsigned long)nst->st_task->pid,
145 (unsigned long)nst->st_task->tgid,
146 nst->st_task->comm, nst->st_node,
147 nst->st_sc, nst->st_id, nst->st_msg_type,
148 nst->st_msg_key,
149 nst->st_sock_time.tv_sec, nst->st_sock_time.tv_usec,
150 nst->st_send_time.tv_sec, nst->st_send_time.tv_usec,
151 nst->st_status_time.tv_sec,
152 nst->st_status_time.tv_usec);
153 }
154
155 spin_unlock(&o2net_debug_lock);
156
157 return 0;
158}
159
160static void nst_seq_stop(struct seq_file *seq, void *v)
161{
162}
163
164static struct seq_operations nst_seq_ops = {
165 .start = nst_seq_start,
166 .next = nst_seq_next,
167 .stop = nst_seq_stop,
168 .show = nst_seq_show,
169};
170
171static int nst_fop_open(struct inode *inode, struct file *file)
172{
173 struct o2net_send_tracking *dummy_nst;
174 struct seq_file *seq;
175 int ret;
176
177 dummy_nst = kmalloc(sizeof(struct o2net_send_tracking), GFP_KERNEL);
178 if (dummy_nst == NULL) {
179 ret = -ENOMEM;
180 goto out;
181 }
182 dummy_nst->st_task = NULL;
183
184 ret = seq_open(file, &nst_seq_ops);
185 if (ret)
186 goto out;
187
188 seq = file->private_data;
189 seq->private = dummy_nst;
190 o2net_debug_add_nst(dummy_nst);
191
192 dummy_nst = NULL;
193
194out:
195 kfree(dummy_nst);
196 return ret;
197}
198
199static int nst_fop_release(struct inode *inode, struct file *file)
200{
201 struct seq_file *seq = file->private_data;
202 struct o2net_send_tracking *dummy_nst = seq->private;
203
204 o2net_debug_del_nst(dummy_nst);
205 return seq_release_private(inode, file);
206}
207
208static struct file_operations nst_seq_fops = {
209 .open = nst_fop_open,
210 .read = seq_read,
211 .llseek = seq_lseek,
212 .release = nst_fop_release,
213};
214
215void o2net_debug_add_sc(struct o2net_sock_container *sc)
216{
217 spin_lock(&o2net_debug_lock);
218 list_add(&sc->sc_net_debug_item, &sock_containers);
219 spin_unlock(&o2net_debug_lock);
220}
221
222void o2net_debug_del_sc(struct o2net_sock_container *sc)
223{
224 spin_lock(&o2net_debug_lock);
225 list_del_init(&sc->sc_net_debug_item);
226 spin_unlock(&o2net_debug_lock);
227}
228
229static struct o2net_sock_container
230 *next_sc(struct o2net_sock_container *sc_start)
231{
232 struct o2net_sock_container *sc, *ret = NULL;
233
234 assert_spin_locked(&o2net_debug_lock);
235
236 list_for_each_entry(sc, &sc_start->sc_net_debug_item,
237 sc_net_debug_item) {
238 /* discover the head of the list miscast as a sc */
239 if (&sc->sc_net_debug_item == &sock_containers)
240 break;
241
242 /* use sc_page to detect real scs in the list */
243 if (sc->sc_page != NULL) {
244 ret = sc;
245 break;
246 }
247 }
248
249 return ret;
250}
251
252static void *sc_seq_start(struct seq_file *seq, loff_t *pos)
253{
254 struct o2net_sock_container *sc, *dummy_sc = seq->private;
255
256 spin_lock(&o2net_debug_lock);
257 sc = next_sc(dummy_sc);
258 spin_unlock(&o2net_debug_lock);
259
260 return sc;
261}
262
263static void *sc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
264{
265 struct o2net_sock_container *sc, *dummy_sc = seq->private;
266
267 spin_lock(&o2net_debug_lock);
268 sc = next_sc(dummy_sc);
269 list_del_init(&dummy_sc->sc_net_debug_item);
270 if (sc)
271 list_add(&dummy_sc->sc_net_debug_item, &sc->sc_net_debug_item);
272 spin_unlock(&o2net_debug_lock);
273
274 return sc; /* unused, just needs to be null when done */
275}
276
277#define TV_SEC_USEC(TV) TV.tv_sec, TV.tv_usec
278
279static int sc_seq_show(struct seq_file *seq, void *v)
280{
281 struct o2net_sock_container *sc, *dummy_sc = seq->private;
282
283 spin_lock(&o2net_debug_lock);
284 sc = next_sc(dummy_sc);
285
286 if (sc != NULL) {
287 struct inet_sock *inet = NULL;
288
289 __be32 saddr = 0, daddr = 0;
290 __be16 sport = 0, dport = 0;
291
292 if (sc->sc_sock) {
293 inet = inet_sk(sc->sc_sock->sk);
294 /* the stack's structs aren't sparse endian clean */
295 saddr = (__force __be32)inet->saddr;
296 daddr = (__force __be32)inet->daddr;
297 sport = (__force __be16)inet->sport;
298 dport = (__force __be16)inet->dport;
299 }
300
301 /* XXX sigh, inet-> doesn't have sparse annotation so any
302 * use of it here generates a warning with -Wbitwise */
303 seq_printf(seq, "%p:\n"
304 " krefs: %d\n"
305 " sock: %u.%u.%u.%u:%u -> "
306 "%u.%u.%u.%u:%u\n"
307 " remote node: %s\n"
308 " page off: %zu\n"
309 " handshake ok: %u\n"
310 " timer: %lu.%lu\n"
311 " data ready: %lu.%lu\n"
312 " advance start: %lu.%lu\n"
313 " advance stop: %lu.%lu\n"
314 " func start: %lu.%lu\n"
315 " func stop: %lu.%lu\n"
316 " func key: %u\n"
317 " func type: %u\n",
318 sc,
319 atomic_read(&sc->sc_kref.refcount),
320 NIPQUAD(saddr), inet ? ntohs(sport) : 0,
321 NIPQUAD(daddr), inet ? ntohs(dport) : 0,
322 sc->sc_node->nd_name,
323 sc->sc_page_off,
324 sc->sc_handshake_ok,
325 TV_SEC_USEC(sc->sc_tv_timer),
326 TV_SEC_USEC(sc->sc_tv_data_ready),
327 TV_SEC_USEC(sc->sc_tv_advance_start),
328 TV_SEC_USEC(sc->sc_tv_advance_stop),
329 TV_SEC_USEC(sc->sc_tv_func_start),
330 TV_SEC_USEC(sc->sc_tv_func_stop),
331 sc->sc_msg_key,
332 sc->sc_msg_type);
333 }
334
335
336 spin_unlock(&o2net_debug_lock);
337
338 return 0;
339}
340
341static void sc_seq_stop(struct seq_file *seq, void *v)
342{
343}
344
345static struct seq_operations sc_seq_ops = {
346 .start = sc_seq_start,
347 .next = sc_seq_next,
348 .stop = sc_seq_stop,
349 .show = sc_seq_show,
350};
351
352static int sc_fop_open(struct inode *inode, struct file *file)
353{
354 struct o2net_sock_container *dummy_sc;
355 struct seq_file *seq;
356 int ret;
357
358 dummy_sc = kmalloc(sizeof(struct o2net_sock_container), GFP_KERNEL);
359 if (dummy_sc == NULL) {
360 ret = -ENOMEM;
361 goto out;
362 }
363 dummy_sc->sc_page = NULL;
364
365 ret = seq_open(file, &sc_seq_ops);
366 if (ret)
367 goto out;
368
369 seq = file->private_data;
370 seq->private = dummy_sc;
371 o2net_debug_add_sc(dummy_sc);
372
373 dummy_sc = NULL;
374
375out:
376 kfree(dummy_sc);
377 return ret;
378}
379
380static int sc_fop_release(struct inode *inode, struct file *file)
381{
382 struct seq_file *seq = file->private_data;
383 struct o2net_sock_container *dummy_sc = seq->private;
384
385 o2net_debug_del_sc(dummy_sc);
386 return seq_release_private(inode, file);
387}
388
389static struct file_operations sc_seq_fops = {
390 .open = sc_fop_open,
391 .read = seq_read,
392 .llseek = seq_lseek,
393 .release = sc_fop_release,
394};
395
396int o2net_debugfs_init(void)
397{
398 o2net_dentry = debugfs_create_dir(O2NET_DEBUG_DIR, NULL);
399 if (!o2net_dentry) {
400 mlog_errno(-ENOMEM);
401 goto bail;
402 }
403
404 nst_dentry = debugfs_create_file(NST_DEBUG_NAME, S_IFREG|S_IRUSR,
405 o2net_dentry, NULL,
406 &nst_seq_fops);
407 if (!nst_dentry) {
408 mlog_errno(-ENOMEM);
409 goto bail;
410 }
411
412 sc_dentry = debugfs_create_file(SC_DEBUG_NAME, S_IFREG|S_IRUSR,
413 o2net_dentry, NULL,
414 &sc_seq_fops);
415 if (!sc_dentry) {
416 mlog_errno(-ENOMEM);
417 goto bail;
418 }
419
420 return 0;
421bail:
422 if (sc_dentry)
423 debugfs_remove(sc_dentry);
424 if (nst_dentry)
425 debugfs_remove(nst_dentry);
426 if (o2net_dentry)
427 debugfs_remove(o2net_dentry);
428 return -ENOMEM;
429}
430
431void o2net_debugfs_exit(void)
432{
433 if (sc_dentry)
434 debugfs_remove(sc_dentry);
435 if (nst_dentry)
436 debugfs_remove(nst_dentry);
437 if (o2net_dentry)
438 debugfs_remove(o2net_dentry);
439}
440
441#endif /* CONFIG_DEBUG_FS */
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c
index 709fba25bf7e..cf9401e8cd0b 100644
--- a/fs/ocfs2/cluster/nodemanager.c
+++ b/fs/ocfs2/cluster/nodemanager.c
@@ -959,7 +959,10 @@ static int __init init_o2nm(void)
959 cluster_print_version(); 959 cluster_print_version();
960 960
961 o2hb_init(); 961 o2hb_init();
962 o2net_init(); 962
963 ret = o2net_init();
964 if (ret)
965 goto out;
963 966
964 ocfs2_table_header = register_sysctl_table(ocfs2_root_table); 967 ocfs2_table_header = register_sysctl_table(ocfs2_root_table);
965 if (!ocfs2_table_header) { 968 if (!ocfs2_table_header) {
diff --git a/fs/ocfs2/cluster/sys.c b/fs/ocfs2/cluster/sys.c
index 0c095ce7723d..98429fd68499 100644
--- a/fs/ocfs2/cluster/sys.c
+++ b/fs/ocfs2/cluster/sys.c
@@ -57,6 +57,7 @@ static struct kset *o2cb_kset;
57void o2cb_sys_shutdown(void) 57void o2cb_sys_shutdown(void)
58{ 58{
59 mlog_sys_shutdown(); 59 mlog_sys_shutdown();
60 sysfs_remove_link(NULL, "o2cb");
60 kset_unregister(o2cb_kset); 61 kset_unregister(o2cb_kset);
61} 62}
62 63
@@ -68,6 +69,14 @@ int o2cb_sys_init(void)
68 if (!o2cb_kset) 69 if (!o2cb_kset)
69 return -ENOMEM; 70 return -ENOMEM;
70 71
72 /*
73 * Create this symlink for backwards compatibility with old
74 * versions of ocfs2-tools which look for things in /sys/o2cb.
75 */
76 ret = sysfs_create_link(NULL, &o2cb_kset->kobj, "o2cb");
77 if (ret)
78 goto error;
79
71 ret = sysfs_create_group(&o2cb_kset->kobj, &o2cb_attr_group); 80 ret = sysfs_create_group(&o2cb_kset->kobj, &o2cb_attr_group);
72 if (ret) 81 if (ret)
73 goto error; 82 goto error;
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index ee50c9610e7f..1e44ad14881a 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -142,23 +142,65 @@ static void o2net_idle_timer(unsigned long data);
142static void o2net_sc_postpone_idle(struct o2net_sock_container *sc); 142static void o2net_sc_postpone_idle(struct o2net_sock_container *sc);
143static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc); 143static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc);
144 144
145/* 145static void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype,
146 * FIXME: These should use to_o2nm_cluster_from_node(), but we end up 146 u32 msgkey, struct task_struct *task, u8 node)
147 * losing our parent link to the cluster during shutdown. This can be 147{
148 * solved by adding a pre-removal callback to configfs, or passing 148#ifdef CONFIG_DEBUG_FS
149 * around the cluster with the node. -jeffm 149 INIT_LIST_HEAD(&nst->st_net_debug_item);
150 */ 150 nst->st_task = task;
151static inline int o2net_reconnect_delay(struct o2nm_node *node) 151 nst->st_msg_type = msgtype;
152 nst->st_msg_key = msgkey;
153 nst->st_node = node;
154#endif
155}
156
157static void o2net_set_nst_sock_time(struct o2net_send_tracking *nst)
158{
159#ifdef CONFIG_DEBUG_FS
160 do_gettimeofday(&nst->st_sock_time);
161#endif
162}
163
164static void o2net_set_nst_send_time(struct o2net_send_tracking *nst)
165{
166#ifdef CONFIG_DEBUG_FS
167 do_gettimeofday(&nst->st_send_time);
168#endif
169}
170
171static void o2net_set_nst_status_time(struct o2net_send_tracking *nst)
172{
173#ifdef CONFIG_DEBUG_FS
174 do_gettimeofday(&nst->st_status_time);
175#endif
176}
177
178static void o2net_set_nst_sock_container(struct o2net_send_tracking *nst,
179 struct o2net_sock_container *sc)
180{
181#ifdef CONFIG_DEBUG_FS
182 nst->st_sc = sc;
183#endif
184}
185
186static void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id)
187{
188#ifdef CONFIG_DEBUG_FS
189 nst->st_id = msg_id;
190#endif
191}
192
193static inline int o2net_reconnect_delay(void)
152{ 194{
153 return o2nm_single_cluster->cl_reconnect_delay_ms; 195 return o2nm_single_cluster->cl_reconnect_delay_ms;
154} 196}
155 197
156static inline int o2net_keepalive_delay(struct o2nm_node *node) 198static inline int o2net_keepalive_delay(void)
157{ 199{
158 return o2nm_single_cluster->cl_keepalive_delay_ms; 200 return o2nm_single_cluster->cl_keepalive_delay_ms;
159} 201}
160 202
161static inline int o2net_idle_timeout(struct o2nm_node *node) 203static inline int o2net_idle_timeout(void)
162{ 204{
163 return o2nm_single_cluster->cl_idle_timeout_ms; 205 return o2nm_single_cluster->cl_idle_timeout_ms;
164} 206}
@@ -296,6 +338,7 @@ static void sc_kref_release(struct kref *kref)
296 o2nm_node_put(sc->sc_node); 338 o2nm_node_put(sc->sc_node);
297 sc->sc_node = NULL; 339 sc->sc_node = NULL;
298 340
341 o2net_debug_del_sc(sc);
299 kfree(sc); 342 kfree(sc);
300} 343}
301 344
@@ -336,6 +379,7 @@ static struct o2net_sock_container *sc_alloc(struct o2nm_node *node)
336 379
337 ret = sc; 380 ret = sc;
338 sc->sc_page = page; 381 sc->sc_page = page;
382 o2net_debug_add_sc(sc);
339 sc = NULL; 383 sc = NULL;
340 page = NULL; 384 page = NULL;
341 385
@@ -399,8 +443,6 @@ static void o2net_set_nn_state(struct o2net_node *nn,
399 mlog_bug_on_msg(err && valid, "err %d valid %u\n", err, valid); 443 mlog_bug_on_msg(err && valid, "err %d valid %u\n", err, valid);
400 mlog_bug_on_msg(valid && !sc, "valid %u sc %p\n", valid, sc); 444 mlog_bug_on_msg(valid && !sc, "valid %u sc %p\n", valid, sc);
401 445
402 /* we won't reconnect after our valid conn goes away for
403 * this hb iteration.. here so it shows up in the logs */
404 if (was_valid && !valid && err == 0) 446 if (was_valid && !valid && err == 0)
405 err = -ENOTCONN; 447 err = -ENOTCONN;
406 448
@@ -430,11 +472,6 @@ static void o2net_set_nn_state(struct o2net_node *nn,
430 472
431 if (!was_valid && valid) { 473 if (!was_valid && valid) {
432 o2quo_conn_up(o2net_num_from_nn(nn)); 474 o2quo_conn_up(o2net_num_from_nn(nn));
433 /* this is a bit of a hack. we only try reconnecting
434 * when heartbeating starts until we get a connection.
435 * if that connection then dies we don't try reconnecting.
436 * the only way to start connecting again is to down
437 * heartbeat and bring it back up. */
438 cancel_delayed_work(&nn->nn_connect_expired); 475 cancel_delayed_work(&nn->nn_connect_expired);
439 printk(KERN_INFO "o2net: %s " SC_NODEF_FMT "\n", 476 printk(KERN_INFO "o2net: %s " SC_NODEF_FMT "\n",
440 o2nm_this_node() > sc->sc_node->nd_num ? 477 o2nm_this_node() > sc->sc_node->nd_num ?
@@ -451,12 +488,24 @@ static void o2net_set_nn_state(struct o2net_node *nn,
451 /* delay if we're withing a RECONNECT_DELAY of the 488 /* delay if we're withing a RECONNECT_DELAY of the
452 * last attempt */ 489 * last attempt */
453 delay = (nn->nn_last_connect_attempt + 490 delay = (nn->nn_last_connect_attempt +
454 msecs_to_jiffies(o2net_reconnect_delay(sc->sc_node))) 491 msecs_to_jiffies(o2net_reconnect_delay()))
455 - jiffies; 492 - jiffies;
456 if (delay > msecs_to_jiffies(o2net_reconnect_delay(sc->sc_node))) 493 if (delay > msecs_to_jiffies(o2net_reconnect_delay()))
457 delay = 0; 494 delay = 0;
458 mlog(ML_CONN, "queueing conn attempt in %lu jiffies\n", delay); 495 mlog(ML_CONN, "queueing conn attempt in %lu jiffies\n", delay);
459 queue_delayed_work(o2net_wq, &nn->nn_connect_work, delay); 496 queue_delayed_work(o2net_wq, &nn->nn_connect_work, delay);
497
498 /*
499 * Delay the expired work after idle timeout.
500 *
501 * We might have lots of failed connection attempts that run
502 * through here but we only cancel the connect_expired work when
503 * a connection attempt succeeds. So only the first enqueue of
504 * the connect_expired work will do anything. The rest will see
505 * that it's already queued and do nothing.
506 */
507 delay += msecs_to_jiffies(o2net_idle_timeout());
508 queue_delayed_work(o2net_wq, &nn->nn_connect_expired, delay);
460 } 509 }
461 510
462 /* keep track of the nn's sc ref for the caller */ 511 /* keep track of the nn's sc ref for the caller */
@@ -914,6 +963,9 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec,
914 struct o2net_status_wait nsw = { 963 struct o2net_status_wait nsw = {
915 .ns_node_item = LIST_HEAD_INIT(nsw.ns_node_item), 964 .ns_node_item = LIST_HEAD_INIT(nsw.ns_node_item),
916 }; 965 };
966 struct o2net_send_tracking nst;
967
968 o2net_init_nst(&nst, msg_type, key, current, target_node);
917 969
918 if (o2net_wq == NULL) { 970 if (o2net_wq == NULL) {
919 mlog(0, "attempt to tx without o2netd running\n"); 971 mlog(0, "attempt to tx without o2netd running\n");
@@ -939,6 +991,10 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec,
939 goto out; 991 goto out;
940 } 992 }
941 993
994 o2net_debug_add_nst(&nst);
995
996 o2net_set_nst_sock_time(&nst);
997
942 ret = wait_event_interruptible(nn->nn_sc_wq, 998 ret = wait_event_interruptible(nn->nn_sc_wq,
943 o2net_tx_can_proceed(nn, &sc, &error)); 999 o2net_tx_can_proceed(nn, &sc, &error));
944 if (!ret && error) 1000 if (!ret && error)
@@ -946,6 +1002,8 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec,
946 if (ret) 1002 if (ret)
947 goto out; 1003 goto out;
948 1004
1005 o2net_set_nst_sock_container(&nst, sc);
1006
949 veclen = caller_veclen + 1; 1007 veclen = caller_veclen + 1;
950 vec = kmalloc(sizeof(struct kvec) * veclen, GFP_ATOMIC); 1008 vec = kmalloc(sizeof(struct kvec) * veclen, GFP_ATOMIC);
951 if (vec == NULL) { 1009 if (vec == NULL) {
@@ -972,6 +1030,9 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec,
972 goto out; 1030 goto out;
973 1031
974 msg->msg_num = cpu_to_be32(nsw.ns_id); 1032 msg->msg_num = cpu_to_be32(nsw.ns_id);
1033 o2net_set_nst_msg_id(&nst, nsw.ns_id);
1034
1035 o2net_set_nst_send_time(&nst);
975 1036
976 /* finally, convert the message header to network byte-order 1037 /* finally, convert the message header to network byte-order
977 * and send */ 1038 * and send */
@@ -986,6 +1047,7 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec,
986 } 1047 }
987 1048
988 /* wait on other node's handler */ 1049 /* wait on other node's handler */
1050 o2net_set_nst_status_time(&nst);
989 wait_event(nsw.ns_wq, o2net_nsw_completed(nn, &nsw)); 1051 wait_event(nsw.ns_wq, o2net_nsw_completed(nn, &nsw));
990 1052
991 /* Note that we avoid overwriting the callers status return 1053 /* Note that we avoid overwriting the callers status return
@@ -998,6 +1060,7 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec,
998 mlog(0, "woken, returning system status %d, user status %d\n", 1060 mlog(0, "woken, returning system status %d, user status %d\n",
999 ret, nsw.ns_status); 1061 ret, nsw.ns_status);
1000out: 1062out:
1063 o2net_debug_del_nst(&nst); /* must be before dropping sc and node */
1001 if (sc) 1064 if (sc)
1002 sc_put(sc); 1065 sc_put(sc);
1003 if (vec) 1066 if (vec)
@@ -1154,23 +1217,23 @@ static int o2net_check_handshake(struct o2net_sock_container *sc)
1154 * but isn't. This can ultimately cause corruption. 1217 * but isn't. This can ultimately cause corruption.
1155 */ 1218 */
1156 if (be32_to_cpu(hand->o2net_idle_timeout_ms) != 1219 if (be32_to_cpu(hand->o2net_idle_timeout_ms) !=
1157 o2net_idle_timeout(sc->sc_node)) { 1220 o2net_idle_timeout()) {
1158 mlog(ML_NOTICE, SC_NODEF_FMT " uses a network idle timeout of " 1221 mlog(ML_NOTICE, SC_NODEF_FMT " uses a network idle timeout of "
1159 "%u ms, but we use %u ms locally. disconnecting\n", 1222 "%u ms, but we use %u ms locally. disconnecting\n",
1160 SC_NODEF_ARGS(sc), 1223 SC_NODEF_ARGS(sc),
1161 be32_to_cpu(hand->o2net_idle_timeout_ms), 1224 be32_to_cpu(hand->o2net_idle_timeout_ms),
1162 o2net_idle_timeout(sc->sc_node)); 1225 o2net_idle_timeout());
1163 o2net_ensure_shutdown(nn, sc, -ENOTCONN); 1226 o2net_ensure_shutdown(nn, sc, -ENOTCONN);
1164 return -1; 1227 return -1;
1165 } 1228 }
1166 1229
1167 if (be32_to_cpu(hand->o2net_keepalive_delay_ms) != 1230 if (be32_to_cpu(hand->o2net_keepalive_delay_ms) !=
1168 o2net_keepalive_delay(sc->sc_node)) { 1231 o2net_keepalive_delay()) {
1169 mlog(ML_NOTICE, SC_NODEF_FMT " uses a keepalive delay of " 1232 mlog(ML_NOTICE, SC_NODEF_FMT " uses a keepalive delay of "
1170 "%u ms, but we use %u ms locally. disconnecting\n", 1233 "%u ms, but we use %u ms locally. disconnecting\n",
1171 SC_NODEF_ARGS(sc), 1234 SC_NODEF_ARGS(sc),
1172 be32_to_cpu(hand->o2net_keepalive_delay_ms), 1235 be32_to_cpu(hand->o2net_keepalive_delay_ms),
1173 o2net_keepalive_delay(sc->sc_node)); 1236 o2net_keepalive_delay());
1174 o2net_ensure_shutdown(nn, sc, -ENOTCONN); 1237 o2net_ensure_shutdown(nn, sc, -ENOTCONN);
1175 return -1; 1238 return -1;
1176 } 1239 }
@@ -1193,6 +1256,7 @@ static int o2net_check_handshake(struct o2net_sock_container *sc)
1193 * shut down already */ 1256 * shut down already */
1194 if (nn->nn_sc == sc) { 1257 if (nn->nn_sc == sc) {
1195 o2net_sc_reset_idle_timer(sc); 1258 o2net_sc_reset_idle_timer(sc);
1259 atomic_set(&nn->nn_timeout, 0);
1196 o2net_set_nn_state(nn, sc, 1, 0); 1260 o2net_set_nn_state(nn, sc, 1, 0);
1197 } 1261 }
1198 spin_unlock(&nn->nn_lock); 1262 spin_unlock(&nn->nn_lock);
@@ -1347,12 +1411,11 @@ static void o2net_initialize_handshake(void)
1347{ 1411{
1348 o2net_hand->o2hb_heartbeat_timeout_ms = cpu_to_be32( 1412 o2net_hand->o2hb_heartbeat_timeout_ms = cpu_to_be32(
1349 O2HB_MAX_WRITE_TIMEOUT_MS); 1413 O2HB_MAX_WRITE_TIMEOUT_MS);
1350 o2net_hand->o2net_idle_timeout_ms = cpu_to_be32( 1414 o2net_hand->o2net_idle_timeout_ms = cpu_to_be32(o2net_idle_timeout());
1351 o2net_idle_timeout(NULL));
1352 o2net_hand->o2net_keepalive_delay_ms = cpu_to_be32( 1415 o2net_hand->o2net_keepalive_delay_ms = cpu_to_be32(
1353 o2net_keepalive_delay(NULL)); 1416 o2net_keepalive_delay());
1354 o2net_hand->o2net_reconnect_delay_ms = cpu_to_be32( 1417 o2net_hand->o2net_reconnect_delay_ms = cpu_to_be32(
1355 o2net_reconnect_delay(NULL)); 1418 o2net_reconnect_delay());
1356} 1419}
1357 1420
1358/* ------------------------------------------------------------ */ 1421/* ------------------------------------------------------------ */
@@ -1391,14 +1454,15 @@ static void o2net_sc_send_keep_req(struct work_struct *work)
1391static void o2net_idle_timer(unsigned long data) 1454static void o2net_idle_timer(unsigned long data)
1392{ 1455{
1393 struct o2net_sock_container *sc = (struct o2net_sock_container *)data; 1456 struct o2net_sock_container *sc = (struct o2net_sock_container *)data;
1457 struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num);
1394 struct timeval now; 1458 struct timeval now;
1395 1459
1396 do_gettimeofday(&now); 1460 do_gettimeofday(&now);
1397 1461
1398 printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT " has been idle for %u.%u " 1462 printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT " has been idle for %u.%u "
1399 "seconds, shutting it down.\n", SC_NODEF_ARGS(sc), 1463 "seconds, shutting it down.\n", SC_NODEF_ARGS(sc),
1400 o2net_idle_timeout(sc->sc_node) / 1000, 1464 o2net_idle_timeout() / 1000,
1401 o2net_idle_timeout(sc->sc_node) % 1000); 1465 o2net_idle_timeout() % 1000);
1402 mlog(ML_NOTICE, "here are some times that might help debug the " 1466 mlog(ML_NOTICE, "here are some times that might help debug the "
1403 "situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv " 1467 "situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv "
1404 "%ld.%ld:%ld.%ld func (%08x:%u) %ld.%ld:%ld.%ld)\n", 1468 "%ld.%ld:%ld.%ld func (%08x:%u) %ld.%ld:%ld.%ld)\n",
@@ -1413,6 +1477,12 @@ static void o2net_idle_timer(unsigned long data)
1413 sc->sc_tv_func_start.tv_sec, (long) sc->sc_tv_func_start.tv_usec, 1477 sc->sc_tv_func_start.tv_sec, (long) sc->sc_tv_func_start.tv_usec,
1414 sc->sc_tv_func_stop.tv_sec, (long) sc->sc_tv_func_stop.tv_usec); 1478 sc->sc_tv_func_stop.tv_sec, (long) sc->sc_tv_func_stop.tv_usec);
1415 1479
1480 /*
1481 * Initialize the nn_timeout so that the next connection attempt
1482 * will continue in o2net_start_connect.
1483 */
1484 atomic_set(&nn->nn_timeout, 1);
1485
1416 o2net_sc_queue_work(sc, &sc->sc_shutdown_work); 1486 o2net_sc_queue_work(sc, &sc->sc_shutdown_work);
1417} 1487}
1418 1488
@@ -1420,10 +1490,10 @@ static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc)
1420{ 1490{
1421 o2net_sc_cancel_delayed_work(sc, &sc->sc_keepalive_work); 1491 o2net_sc_cancel_delayed_work(sc, &sc->sc_keepalive_work);
1422 o2net_sc_queue_delayed_work(sc, &sc->sc_keepalive_work, 1492 o2net_sc_queue_delayed_work(sc, &sc->sc_keepalive_work,
1423 msecs_to_jiffies(o2net_keepalive_delay(sc->sc_node))); 1493 msecs_to_jiffies(o2net_keepalive_delay()));
1424 do_gettimeofday(&sc->sc_tv_timer); 1494 do_gettimeofday(&sc->sc_tv_timer);
1425 mod_timer(&sc->sc_idle_timeout, 1495 mod_timer(&sc->sc_idle_timeout,
1426 jiffies + msecs_to_jiffies(o2net_idle_timeout(sc->sc_node))); 1496 jiffies + msecs_to_jiffies(o2net_idle_timeout()));
1427} 1497}
1428 1498
1429static void o2net_sc_postpone_idle(struct o2net_sock_container *sc) 1499static void o2net_sc_postpone_idle(struct o2net_sock_container *sc)
@@ -1447,6 +1517,7 @@ static void o2net_start_connect(struct work_struct *work)
1447 struct socket *sock = NULL; 1517 struct socket *sock = NULL;
1448 struct sockaddr_in myaddr = {0, }, remoteaddr = {0, }; 1518 struct sockaddr_in myaddr = {0, }, remoteaddr = {0, };
1449 int ret = 0, stop; 1519 int ret = 0, stop;
1520 unsigned int timeout;
1450 1521
1451 /* if we're greater we initiate tx, otherwise we accept */ 1522 /* if we're greater we initiate tx, otherwise we accept */
1452 if (o2nm_this_node() <= o2net_num_from_nn(nn)) 1523 if (o2nm_this_node() <= o2net_num_from_nn(nn))
@@ -1466,8 +1537,17 @@ static void o2net_start_connect(struct work_struct *work)
1466 } 1537 }
1467 1538
1468 spin_lock(&nn->nn_lock); 1539 spin_lock(&nn->nn_lock);
1469 /* see if we already have one pending or have given up */ 1540 /*
1470 stop = (nn->nn_sc || nn->nn_persistent_error); 1541 * see if we already have one pending or have given up.
1542 * For nn_timeout, it is set when we close the connection
1543 * because of the idle time out. So it means that we have
1544 * at least connected to that node successfully once,
1545 * now try to connect to it again.
1546 */
1547 timeout = atomic_read(&nn->nn_timeout);
1548 stop = (nn->nn_sc ||
1549 (nn->nn_persistent_error &&
1550 (nn->nn_persistent_error != -ENOTCONN || timeout == 0)));
1471 spin_unlock(&nn->nn_lock); 1551 spin_unlock(&nn->nn_lock);
1472 if (stop) 1552 if (stop)
1473 goto out; 1553 goto out;
@@ -1552,12 +1632,11 @@ static void o2net_connect_expired(struct work_struct *work)
1552 1632
1553 spin_lock(&nn->nn_lock); 1633 spin_lock(&nn->nn_lock);
1554 if (!nn->nn_sc_valid) { 1634 if (!nn->nn_sc_valid) {
1555 struct o2nm_node *node = nn->nn_sc->sc_node;
1556 mlog(ML_ERROR, "no connection established with node %u after " 1635 mlog(ML_ERROR, "no connection established with node %u after "
1557 "%u.%u seconds, giving up and returning errors.\n", 1636 "%u.%u seconds, giving up and returning errors.\n",
1558 o2net_num_from_nn(nn), 1637 o2net_num_from_nn(nn),
1559 o2net_idle_timeout(node) / 1000, 1638 o2net_idle_timeout() / 1000,
1560 o2net_idle_timeout(node) % 1000); 1639 o2net_idle_timeout() % 1000);
1561 1640
1562 o2net_set_nn_state(nn, NULL, 0, -ENOTCONN); 1641 o2net_set_nn_state(nn, NULL, 0, -ENOTCONN);
1563 } 1642 }
@@ -1580,6 +1659,7 @@ void o2net_disconnect_node(struct o2nm_node *node)
1580 1659
1581 /* don't reconnect until it's heartbeating again */ 1660 /* don't reconnect until it's heartbeating again */
1582 spin_lock(&nn->nn_lock); 1661 spin_lock(&nn->nn_lock);
1662 atomic_set(&nn->nn_timeout, 0);
1583 o2net_set_nn_state(nn, NULL, 0, -ENOTCONN); 1663 o2net_set_nn_state(nn, NULL, 0, -ENOTCONN);
1584 spin_unlock(&nn->nn_lock); 1664 spin_unlock(&nn->nn_lock);
1585 1665
@@ -1611,20 +1691,15 @@ static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num,
1611 1691
1612 /* ensure an immediate connect attempt */ 1692 /* ensure an immediate connect attempt */
1613 nn->nn_last_connect_attempt = jiffies - 1693 nn->nn_last_connect_attempt = jiffies -
1614 (msecs_to_jiffies(o2net_reconnect_delay(node)) + 1); 1694 (msecs_to_jiffies(o2net_reconnect_delay()) + 1);
1615 1695
1616 if (node_num != o2nm_this_node()) { 1696 if (node_num != o2nm_this_node()) {
1617 /* heartbeat doesn't work unless a local node number is
1618 * configured and doing so brings up the o2net_wq, so we can
1619 * use it.. */
1620 queue_delayed_work(o2net_wq, &nn->nn_connect_expired,
1621 msecs_to_jiffies(o2net_idle_timeout(node)));
1622
1623 /* believe it or not, accept and node hearbeating testing 1697 /* believe it or not, accept and node hearbeating testing
1624 * can succeed for this node before we got here.. so 1698 * can succeed for this node before we got here.. so
1625 * only use set_nn_state to clear the persistent error 1699 * only use set_nn_state to clear the persistent error
1626 * if that hasn't already happened */ 1700 * if that hasn't already happened */
1627 spin_lock(&nn->nn_lock); 1701 spin_lock(&nn->nn_lock);
1702 atomic_set(&nn->nn_timeout, 0);
1628 if (nn->nn_persistent_error) 1703 if (nn->nn_persistent_error)
1629 o2net_set_nn_state(nn, NULL, 0, 0); 1704 o2net_set_nn_state(nn, NULL, 0, 0);
1630 spin_unlock(&nn->nn_lock); 1705 spin_unlock(&nn->nn_lock);
@@ -1748,6 +1823,7 @@ static int o2net_accept_one(struct socket *sock)
1748 new_sock = NULL; 1823 new_sock = NULL;
1749 1824
1750 spin_lock(&nn->nn_lock); 1825 spin_lock(&nn->nn_lock);
1826 atomic_set(&nn->nn_timeout, 0);
1751 o2net_set_nn_state(nn, sc, 0, 0); 1827 o2net_set_nn_state(nn, sc, 0, 0);
1752 spin_unlock(&nn->nn_lock); 1828 spin_unlock(&nn->nn_lock);
1753 1829
@@ -1923,6 +1999,9 @@ int o2net_init(void)
1923 1999
1924 o2quo_init(); 2000 o2quo_init();
1925 2001
2002 if (o2net_debugfs_init())
2003 return -ENOMEM;
2004
1926 o2net_hand = kzalloc(sizeof(struct o2net_handshake), GFP_KERNEL); 2005 o2net_hand = kzalloc(sizeof(struct o2net_handshake), GFP_KERNEL);
1927 o2net_keep_req = kzalloc(sizeof(struct o2net_msg), GFP_KERNEL); 2006 o2net_keep_req = kzalloc(sizeof(struct o2net_msg), GFP_KERNEL);
1928 o2net_keep_resp = kzalloc(sizeof(struct o2net_msg), GFP_KERNEL); 2007 o2net_keep_resp = kzalloc(sizeof(struct o2net_msg), GFP_KERNEL);
@@ -1942,6 +2021,7 @@ int o2net_init(void)
1942 for (i = 0; i < ARRAY_SIZE(o2net_nodes); i++) { 2021 for (i = 0; i < ARRAY_SIZE(o2net_nodes); i++) {
1943 struct o2net_node *nn = o2net_nn_from_num(i); 2022 struct o2net_node *nn = o2net_nn_from_num(i);
1944 2023
2024 atomic_set(&nn->nn_timeout, 0);
1945 spin_lock_init(&nn->nn_lock); 2025 spin_lock_init(&nn->nn_lock);
1946 INIT_DELAYED_WORK(&nn->nn_connect_work, o2net_start_connect); 2026 INIT_DELAYED_WORK(&nn->nn_connect_work, o2net_start_connect);
1947 INIT_DELAYED_WORK(&nn->nn_connect_expired, 2027 INIT_DELAYED_WORK(&nn->nn_connect_expired,
@@ -1963,4 +2043,5 @@ void o2net_exit(void)
1963 kfree(o2net_hand); 2043 kfree(o2net_hand);
1964 kfree(o2net_keep_req); 2044 kfree(o2net_keep_req);
1965 kfree(o2net_keep_resp); 2045 kfree(o2net_keep_resp);
2046 o2net_debugfs_exit();
1966} 2047}
diff --git a/fs/ocfs2/cluster/tcp.h b/fs/ocfs2/cluster/tcp.h
index f36f66aab3dd..a705d5d19036 100644
--- a/fs/ocfs2/cluster/tcp.h
+++ b/fs/ocfs2/cluster/tcp.h
@@ -117,4 +117,36 @@ int o2net_num_connected_peers(void);
117int o2net_init(void); 117int o2net_init(void);
118void o2net_exit(void); 118void o2net_exit(void);
119 119
120struct o2net_send_tracking;
121struct o2net_sock_container;
122
123#ifdef CONFIG_DEBUG_FS
124int o2net_debugfs_init(void);
125void o2net_debugfs_exit(void);
126void o2net_debug_add_nst(struct o2net_send_tracking *nst);
127void o2net_debug_del_nst(struct o2net_send_tracking *nst);
128void o2net_debug_add_sc(struct o2net_sock_container *sc);
129void o2net_debug_del_sc(struct o2net_sock_container *sc);
130#else
131static int o2net_debugfs_init(void)
132{
133 return 0;
134}
135static void o2net_debugfs_exit(void)
136{
137}
138static void o2net_debug_add_nst(struct o2net_send_tracking *nst)
139{
140}
141static void o2net_debug_del_nst(struct o2net_send_tracking *nst)
142{
143}
144static void o2net_debug_add_sc(struct o2net_sock_container *sc)
145{
146}
147static void o2net_debug_del_sc(struct o2net_sock_container *sc)
148{
149}
150#endif /* CONFIG_DEBUG_FS */
151
120#endif /* O2CLUSTER_TCP_H */ 152#endif /* O2CLUSTER_TCP_H */
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index d25b9af28500..8d58cfe410b1 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -95,6 +95,8 @@ struct o2net_node {
95 unsigned nn_sc_valid:1; 95 unsigned nn_sc_valid:1;
96 /* if this is set tx just returns it */ 96 /* if this is set tx just returns it */
97 int nn_persistent_error; 97 int nn_persistent_error;
98 /* It is only set to 1 after the idle time out. */
99 atomic_t nn_timeout;
98 100
99 /* threads waiting for an sc to arrive wait on the wq for generation 101 /* threads waiting for an sc to arrive wait on the wq for generation
100 * to increase. it is increased when a connecting socket succeeds 102 * to increase. it is increased when a connecting socket succeeds
@@ -164,7 +166,9 @@ struct o2net_sock_container {
164 /* original handlers for the sockets */ 166 /* original handlers for the sockets */
165 void (*sc_state_change)(struct sock *sk); 167 void (*sc_state_change)(struct sock *sk);
166 void (*sc_data_ready)(struct sock *sk, int bytes); 168 void (*sc_data_ready)(struct sock *sk, int bytes);
167 169#ifdef CONFIG_DEBUG_FS
170 struct list_head sc_net_debug_item;
171#endif
168 struct timeval sc_tv_timer; 172 struct timeval sc_tv_timer;
169 struct timeval sc_tv_data_ready; 173 struct timeval sc_tv_data_ready;
170 struct timeval sc_tv_advance_start; 174 struct timeval sc_tv_advance_start;
@@ -206,4 +210,24 @@ struct o2net_status_wait {
206 struct list_head ns_node_item; 210 struct list_head ns_node_item;
207}; 211};
208 212
213#ifdef CONFIG_DEBUG_FS
214/* just for state dumps */
215struct o2net_send_tracking {
216 struct list_head st_net_debug_item;
217 struct task_struct *st_task;
218 struct o2net_sock_container *st_sc;
219 u32 st_id;
220 u32 st_msg_type;
221 u32 st_msg_key;
222 u8 st_node;
223 struct timeval st_sock_time;
224 struct timeval st_send_time;
225 struct timeval st_status_time;
226};
227#else
228struct o2net_send_tracking {
229 u32 dummy;
230};
231#endif /* CONFIG_DEBUG_FS */
232
209#endif /* O2CLUSTER_TCP_INTERNAL_H */ 233#endif /* O2CLUSTER_TCP_INTERNAL_H */
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index e280833ceb9a..8a1875848080 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -390,9 +390,8 @@ static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir,
390 goto bail; 390 goto bail;
391 } 391 }
392 if (pde) 392 if (pde)
393 pde->rec_len = 393 le16_add_cpu(&pde->rec_len,
394 cpu_to_le16(le16_to_cpu(pde->rec_len) + 394 le16_to_cpu(de->rec_len));
395 le16_to_cpu(de->rec_len));
396 else 395 else
397 de->inode = 0; 396 de->inode = 0;
398 dir->i_version++; 397 dir->i_version++;
diff --git a/fs/ocfs2/dlm/Makefile b/fs/ocfs2/dlm/Makefile
index ce3f7c29d270..190361375700 100644
--- a/fs/ocfs2/dlm/Makefile
+++ b/fs/ocfs2/dlm/Makefile
@@ -1,6 +1,6 @@
1EXTRA_CFLAGS += -Ifs/ocfs2 1EXTRA_CFLAGS += -Ifs/ocfs2
2 2
3obj-$(CONFIG_OCFS2_FS) += ocfs2_dlm.o ocfs2_dlmfs.o 3obj-$(CONFIG_OCFS2_FS_O2CB) += ocfs2_dlm.o ocfs2_dlmfs.o
4 4
5ocfs2_dlm-objs := dlmdomain.o dlmdebug.o dlmthread.o dlmrecovery.o \ 5ocfs2_dlm-objs := dlmdomain.o dlmdebug.o dlmthread.o dlmrecovery.o \
6 dlmmaster.o dlmast.o dlmconvert.o dlmlock.o dlmunlock.o dlmver.o 6 dlmmaster.o dlmast.o dlmconvert.o dlmlock.o dlmunlock.o dlmver.o
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index 9843ee17ea27..d5a86fb81a49 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -49,6 +49,41 @@
49/* Intended to make it easier for us to switch out hash functions */ 49/* Intended to make it easier for us to switch out hash functions */
50#define dlm_lockid_hash(_n, _l) full_name_hash(_n, _l) 50#define dlm_lockid_hash(_n, _l) full_name_hash(_n, _l)
51 51
52enum dlm_mle_type {
53 DLM_MLE_BLOCK,
54 DLM_MLE_MASTER,
55 DLM_MLE_MIGRATION
56};
57
58struct dlm_lock_name {
59 u8 len;
60 u8 name[DLM_LOCKID_NAME_MAX];
61};
62
63struct dlm_master_list_entry {
64 struct list_head list;
65 struct list_head hb_events;
66 struct dlm_ctxt *dlm;
67 spinlock_t spinlock;
68 wait_queue_head_t wq;
69 atomic_t woken;
70 struct kref mle_refs;
71 int inuse;
72 unsigned long maybe_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
73 unsigned long vote_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
74 unsigned long response_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
75 unsigned long node_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
76 u8 master;
77 u8 new_master;
78 enum dlm_mle_type type;
79 struct o2hb_callback_func mle_hb_up;
80 struct o2hb_callback_func mle_hb_down;
81 union {
82 struct dlm_lock_resource *res;
83 struct dlm_lock_name name;
84 } u;
85};
86
52enum dlm_ast_type { 87enum dlm_ast_type {
53 DLM_AST = 0, 88 DLM_AST = 0,
54 DLM_BAST, 89 DLM_BAST,
@@ -101,6 +136,7 @@ struct dlm_ctxt
101 struct list_head purge_list; 136 struct list_head purge_list;
102 struct list_head pending_asts; 137 struct list_head pending_asts;
103 struct list_head pending_basts; 138 struct list_head pending_basts;
139 struct list_head tracking_list;
104 unsigned int purge_count; 140 unsigned int purge_count;
105 spinlock_t spinlock; 141 spinlock_t spinlock;
106 spinlock_t ast_lock; 142 spinlock_t ast_lock;
@@ -122,6 +158,9 @@ struct dlm_ctxt
122 atomic_t remote_resources; 158 atomic_t remote_resources;
123 atomic_t unknown_resources; 159 atomic_t unknown_resources;
124 160
161 struct dlm_debug_ctxt *dlm_debug_ctxt;
162 struct dentry *dlm_debugfs_subroot;
163
125 /* NOTE: Next three are protected by dlm_domain_lock */ 164 /* NOTE: Next three are protected by dlm_domain_lock */
126 struct kref dlm_refs; 165 struct kref dlm_refs;
127 enum dlm_ctxt_state dlm_state; 166 enum dlm_ctxt_state dlm_state;
@@ -176,6 +215,7 @@ struct dlm_mig_lockres_priv
176{ 215{
177 struct dlm_lock_resource *lockres; 216 struct dlm_lock_resource *lockres;
178 u8 real_master; 217 u8 real_master;
218 u8 extra_ref;
179}; 219};
180 220
181struct dlm_assert_master_priv 221struct dlm_assert_master_priv
@@ -269,6 +309,9 @@ struct dlm_lock_resource
269 struct list_head dirty; 309 struct list_head dirty;
270 struct list_head recovering; // dlm_recovery_ctxt.resources list 310 struct list_head recovering; // dlm_recovery_ctxt.resources list
271 311
312 /* Added during init and removed during release */
313 struct list_head tracking; /* dlm->tracking_list */
314
272 /* unused lock resources have their last_used stamped and are 315 /* unused lock resources have their last_used stamped and are
273 * put on a list for the dlm thread to run. */ 316 * put on a list for the dlm thread to run. */
274 unsigned long last_used; 317 unsigned long last_used;
@@ -602,17 +645,19 @@ enum dlm_query_join_response_code {
602 JOIN_PROTOCOL_MISMATCH, 645 JOIN_PROTOCOL_MISMATCH,
603}; 646};
604 647
648struct dlm_query_join_packet {
649 u8 code; /* Response code. dlm_minor and fs_minor
650 are only valid if this is JOIN_OK */
651 u8 dlm_minor; /* The minor version of the protocol the
652 dlm is speaking. */
653 u8 fs_minor; /* The minor version of the protocol the
654 filesystem is speaking. */
655 u8 reserved;
656};
657
605union dlm_query_join_response { 658union dlm_query_join_response {
606 u32 intval; 659 u32 intval;
607 struct { 660 struct dlm_query_join_packet packet;
608 u8 code; /* Response code. dlm_minor and fs_minor
609 are only valid if this is JOIN_OK */
610 u8 dlm_minor; /* The minor version of the protocol the
611 dlm is speaking. */
612 u8 fs_minor; /* The minor version of the protocol the
613 filesystem is speaking. */
614 u8 reserved;
615 } packet;
616}; 661};
617 662
618struct dlm_lock_request 663struct dlm_lock_request
@@ -960,9 +1005,16 @@ static inline void __dlm_wait_on_lockres(struct dlm_lock_resource *res)
960 DLM_LOCK_RES_MIGRATING)); 1005 DLM_LOCK_RES_MIGRATING));
961} 1006}
962 1007
1008/* create/destroy slab caches */
1009int dlm_init_master_caches(void);
1010void dlm_destroy_master_caches(void);
1011
1012int dlm_init_lock_cache(void);
1013void dlm_destroy_lock_cache(void);
963 1014
964int dlm_init_mle_cache(void); 1015int dlm_init_mle_cache(void);
965void dlm_destroy_mle_cache(void); 1016void dlm_destroy_mle_cache(void);
1017
966void dlm_hb_event_notify_attached(struct dlm_ctxt *dlm, int idx, int node_up); 1018void dlm_hb_event_notify_attached(struct dlm_ctxt *dlm, int idx, int node_up);
967int dlm_drop_lockres_ref(struct dlm_ctxt *dlm, 1019int dlm_drop_lockres_ref(struct dlm_ctxt *dlm,
968 struct dlm_lock_resource *res); 1020 struct dlm_lock_resource *res);
diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c
index ecb4d997221e..75997b4deaf3 100644
--- a/fs/ocfs2/dlm/dlmconvert.c
+++ b/fs/ocfs2/dlm/dlmconvert.c
@@ -487,7 +487,7 @@ int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data,
487 "cookie=%u:%llu\n", 487 "cookie=%u:%llu\n",
488 dlm_get_lock_cookie_node(be64_to_cpu(cnv->cookie)), 488 dlm_get_lock_cookie_node(be64_to_cpu(cnv->cookie)),
489 dlm_get_lock_cookie_seq(be64_to_cpu(cnv->cookie))); 489 dlm_get_lock_cookie_seq(be64_to_cpu(cnv->cookie)));
490 __dlm_print_one_lock_resource(res); 490 dlm_print_one_lock_resource(res);
491 goto leave; 491 goto leave;
492 } 492 }
493 493
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index 64239b37e5d4..5f6d858770a2 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -5,7 +5,7 @@
5 * 5 *
6 * debug functionality for the dlm 6 * debug functionality for the dlm
7 * 7 *
8 * Copyright (C) 2004 Oracle. All rights reserved. 8 * Copyright (C) 2004, 2008 Oracle. All rights reserved.
9 * 9 *
10 * This program is free software; you can redistribute it and/or 10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public 11 * modify it under the terms of the GNU General Public
@@ -30,6 +30,7 @@
30#include <linux/utsname.h> 30#include <linux/utsname.h>
31#include <linux/sysctl.h> 31#include <linux/sysctl.h>
32#include <linux/spinlock.h> 32#include <linux/spinlock.h>
33#include <linux/debugfs.h>
33 34
34#include "cluster/heartbeat.h" 35#include "cluster/heartbeat.h"
35#include "cluster/nodemanager.h" 36#include "cluster/nodemanager.h"
@@ -37,17 +38,16 @@
37 38
38#include "dlmapi.h" 39#include "dlmapi.h"
39#include "dlmcommon.h" 40#include "dlmcommon.h"
40
41#include "dlmdomain.h" 41#include "dlmdomain.h"
42#include "dlmdebug.h"
42 43
43#define MLOG_MASK_PREFIX ML_DLM 44#define MLOG_MASK_PREFIX ML_DLM
44#include "cluster/masklog.h" 45#include "cluster/masklog.h"
45 46
47int stringify_lockname(const char *lockname, int locklen, char *buf, int len);
48
46void dlm_print_one_lock_resource(struct dlm_lock_resource *res) 49void dlm_print_one_lock_resource(struct dlm_lock_resource *res)
47{ 50{
48 mlog(ML_NOTICE, "lockres: %.*s, owner=%u, state=%u\n",
49 res->lockname.len, res->lockname.name,
50 res->owner, res->state);
51 spin_lock(&res->spinlock); 51 spin_lock(&res->spinlock);
52 __dlm_print_one_lock_resource(res); 52 __dlm_print_one_lock_resource(res);
53 spin_unlock(&res->spinlock); 53 spin_unlock(&res->spinlock);
@@ -58,7 +58,7 @@ static void dlm_print_lockres_refmap(struct dlm_lock_resource *res)
58 int bit; 58 int bit;
59 assert_spin_locked(&res->spinlock); 59 assert_spin_locked(&res->spinlock);
60 60
61 mlog(ML_NOTICE, " refmap nodes: [ "); 61 printk(" refmap nodes: [ ");
62 bit = 0; 62 bit = 0;
63 while (1) { 63 while (1) {
64 bit = find_next_bit(res->refmap, O2NM_MAX_NODES, bit); 64 bit = find_next_bit(res->refmap, O2NM_MAX_NODES, bit);
@@ -70,63 +70,66 @@ static void dlm_print_lockres_refmap(struct dlm_lock_resource *res)
70 printk("], inflight=%u\n", res->inflight_locks); 70 printk("], inflight=%u\n", res->inflight_locks);
71} 71}
72 72
73static void __dlm_print_lock(struct dlm_lock *lock)
74{
75 spin_lock(&lock->spinlock);
76
77 printk(" type=%d, conv=%d, node=%u, cookie=%u:%llu, "
78 "ref=%u, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c), "
79 "pending=(conv=%c,lock=%c,cancel=%c,unlock=%c)\n",
80 lock->ml.type, lock->ml.convert_type, lock->ml.node,
81 dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
82 dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
83 atomic_read(&lock->lock_refs.refcount),
84 (list_empty(&lock->ast_list) ? 'y' : 'n'),
85 (lock->ast_pending ? 'y' : 'n'),
86 (list_empty(&lock->bast_list) ? 'y' : 'n'),
87 (lock->bast_pending ? 'y' : 'n'),
88 (lock->convert_pending ? 'y' : 'n'),
89 (lock->lock_pending ? 'y' : 'n'),
90 (lock->cancel_pending ? 'y' : 'n'),
91 (lock->unlock_pending ? 'y' : 'n'));
92
93 spin_unlock(&lock->spinlock);
94}
95
73void __dlm_print_one_lock_resource(struct dlm_lock_resource *res) 96void __dlm_print_one_lock_resource(struct dlm_lock_resource *res)
74{ 97{
75 struct list_head *iter2; 98 struct list_head *iter2;
76 struct dlm_lock *lock; 99 struct dlm_lock *lock;
100 char buf[DLM_LOCKID_NAME_MAX];
77 101
78 assert_spin_locked(&res->spinlock); 102 assert_spin_locked(&res->spinlock);
79 103
80 mlog(ML_NOTICE, "lockres: %.*s, owner=%u, state=%u\n", 104 stringify_lockname(res->lockname.name, res->lockname.len,
81 res->lockname.len, res->lockname.name, 105 buf, sizeof(buf) - 1);
82 res->owner, res->state); 106 printk("lockres: %s, owner=%u, state=%u\n",
83 mlog(ML_NOTICE, " last used: %lu, on purge list: %s\n", 107 buf, res->owner, res->state);
84 res->last_used, list_empty(&res->purge) ? "no" : "yes"); 108 printk(" last used: %lu, refcnt: %u, on purge list: %s\n",
109 res->last_used, atomic_read(&res->refs.refcount),
110 list_empty(&res->purge) ? "no" : "yes");
111 printk(" on dirty list: %s, on reco list: %s, "
112 "migrating pending: %s\n",
113 list_empty(&res->dirty) ? "no" : "yes",
114 list_empty(&res->recovering) ? "no" : "yes",
115 res->migration_pending ? "yes" : "no");
116 printk(" inflight locks: %d, asts reserved: %d\n",
117 res->inflight_locks, atomic_read(&res->asts_reserved));
85 dlm_print_lockres_refmap(res); 118 dlm_print_lockres_refmap(res);
86 mlog(ML_NOTICE, " granted queue: \n"); 119 printk(" granted queue:\n");
87 list_for_each(iter2, &res->granted) { 120 list_for_each(iter2, &res->granted) {
88 lock = list_entry(iter2, struct dlm_lock, list); 121 lock = list_entry(iter2, struct dlm_lock, list);
89 spin_lock(&lock->spinlock); 122 __dlm_print_lock(lock);
90 mlog(ML_NOTICE, " type=%d, conv=%d, node=%u, "
91 "cookie=%u:%llu, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n",
92 lock->ml.type, lock->ml.convert_type, lock->ml.node,
93 dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
94 dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
95 list_empty(&lock->ast_list) ? 'y' : 'n',
96 lock->ast_pending ? 'y' : 'n',
97 list_empty(&lock->bast_list) ? 'y' : 'n',
98 lock->bast_pending ? 'y' : 'n');
99 spin_unlock(&lock->spinlock);
100 } 123 }
101 mlog(ML_NOTICE, " converting queue: \n"); 124 printk(" converting queue:\n");
102 list_for_each(iter2, &res->converting) { 125 list_for_each(iter2, &res->converting) {
103 lock = list_entry(iter2, struct dlm_lock, list); 126 lock = list_entry(iter2, struct dlm_lock, list);
104 spin_lock(&lock->spinlock); 127 __dlm_print_lock(lock);
105 mlog(ML_NOTICE, " type=%d, conv=%d, node=%u, "
106 "cookie=%u:%llu, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n",
107 lock->ml.type, lock->ml.convert_type, lock->ml.node,
108 dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
109 dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
110 list_empty(&lock->ast_list) ? 'y' : 'n',
111 lock->ast_pending ? 'y' : 'n',
112 list_empty(&lock->bast_list) ? 'y' : 'n',
113 lock->bast_pending ? 'y' : 'n');
114 spin_unlock(&lock->spinlock);
115 } 128 }
116 mlog(ML_NOTICE, " blocked queue: \n"); 129 printk(" blocked queue:\n");
117 list_for_each(iter2, &res->blocked) { 130 list_for_each(iter2, &res->blocked) {
118 lock = list_entry(iter2, struct dlm_lock, list); 131 lock = list_entry(iter2, struct dlm_lock, list);
119 spin_lock(&lock->spinlock); 132 __dlm_print_lock(lock);
120 mlog(ML_NOTICE, " type=%d, conv=%d, node=%u, "
121 "cookie=%u:%llu, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n",
122 lock->ml.type, lock->ml.convert_type, lock->ml.node,
123 dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
124 dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
125 list_empty(&lock->ast_list) ? 'y' : 'n',
126 lock->ast_pending ? 'y' : 'n',
127 list_empty(&lock->bast_list) ? 'y' : 'n',
128 lock->bast_pending ? 'y' : 'n');
129 spin_unlock(&lock->spinlock);
130 } 133 }
131} 134}
132 135
@@ -136,31 +139,6 @@ void dlm_print_one_lock(struct dlm_lock *lockid)
136} 139}
137EXPORT_SYMBOL_GPL(dlm_print_one_lock); 140EXPORT_SYMBOL_GPL(dlm_print_one_lock);
138 141
139#if 0
140void dlm_dump_lock_resources(struct dlm_ctxt *dlm)
141{
142 struct dlm_lock_resource *res;
143 struct hlist_node *iter;
144 struct hlist_head *bucket;
145 int i;
146
147 mlog(ML_NOTICE, "struct dlm_ctxt: %s, node=%u, key=%u\n",
148 dlm->name, dlm->node_num, dlm->key);
149 if (!dlm || !dlm->name) {
150 mlog(ML_ERROR, "dlm=%p\n", dlm);
151 return;
152 }
153
154 spin_lock(&dlm->spinlock);
155 for (i=0; i<DLM_HASH_BUCKETS; i++) {
156 bucket = dlm_lockres_hash(dlm, i);
157 hlist_for_each_entry(res, iter, bucket, hash_node)
158 dlm_print_one_lock_resource(res);
159 }
160 spin_unlock(&dlm->spinlock);
161}
162#endif /* 0 */
163
164static const char *dlm_errnames[] = { 142static const char *dlm_errnames[] = {
165 [DLM_NORMAL] = "DLM_NORMAL", 143 [DLM_NORMAL] = "DLM_NORMAL",
166 [DLM_GRANTED] = "DLM_GRANTED", 144 [DLM_GRANTED] = "DLM_GRANTED",
@@ -266,3 +244,792 @@ const char *dlm_errname(enum dlm_status err)
266 return dlm_errnames[err]; 244 return dlm_errnames[err];
267} 245}
268EXPORT_SYMBOL_GPL(dlm_errname); 246EXPORT_SYMBOL_GPL(dlm_errname);
247
248/* NOTE: This function converts a lockname into a string. It uses knowledge
249 * of the format of the lockname that should be outside the purview of the dlm.
250 * We are adding only to make dlm debugging slightly easier.
251 *
252 * For more on lockname formats, please refer to dlmglue.c and ocfs2_lockid.h.
253 */
254int stringify_lockname(const char *lockname, int locklen, char *buf, int len)
255{
256 int out = 0;
257 __be64 inode_blkno_be;
258
259#define OCFS2_DENTRY_LOCK_INO_START 18
260 if (*lockname == 'N') {
261 memcpy((__be64 *)&inode_blkno_be,
262 (char *)&lockname[OCFS2_DENTRY_LOCK_INO_START],
263 sizeof(__be64));
264 out += snprintf(buf + out, len - out, "%.*s%08x",
265 OCFS2_DENTRY_LOCK_INO_START - 1, lockname,
266 (unsigned int)be64_to_cpu(inode_blkno_be));
267 } else
268 out += snprintf(buf + out, len - out, "%.*s",
269 locklen, lockname);
270 return out;
271}
272
273static int stringify_nodemap(unsigned long *nodemap, int maxnodes,
274 char *buf, int len)
275{
276 int out = 0;
277 int i = -1;
278
279 while ((i = find_next_bit(nodemap, maxnodes, i + 1)) < maxnodes)
280 out += snprintf(buf + out, len - out, "%d ", i);
281
282 return out;
283}
284
285static int dump_mle(struct dlm_master_list_entry *mle, char *buf, int len)
286{
287 int out = 0;
288 unsigned int namelen;
289 const char *name;
290 char *mle_type;
291
292 if (mle->type != DLM_MLE_MASTER) {
293 namelen = mle->u.name.len;
294 name = mle->u.name.name;
295 } else {
296 namelen = mle->u.res->lockname.len;
297 name = mle->u.res->lockname.name;
298 }
299
300 if (mle->type == DLM_MLE_BLOCK)
301 mle_type = "BLK";
302 else if (mle->type == DLM_MLE_MASTER)
303 mle_type = "MAS";
304 else
305 mle_type = "MIG";
306
307 out += stringify_lockname(name, namelen, buf + out, len - out);
308 out += snprintf(buf + out, len - out,
309 "\t%3s\tmas=%3u\tnew=%3u\tevt=%1d\tuse=%1d\tref=%3d\n",
310 mle_type, mle->master, mle->new_master,
311 !list_empty(&mle->hb_events),
312 !!mle->inuse,
313 atomic_read(&mle->mle_refs.refcount));
314
315 out += snprintf(buf + out, len - out, "Maybe=");
316 out += stringify_nodemap(mle->maybe_map, O2NM_MAX_NODES,
317 buf + out, len - out);
318 out += snprintf(buf + out, len - out, "\n");
319
320 out += snprintf(buf + out, len - out, "Vote=");
321 out += stringify_nodemap(mle->vote_map, O2NM_MAX_NODES,
322 buf + out, len - out);
323 out += snprintf(buf + out, len - out, "\n");
324
325 out += snprintf(buf + out, len - out, "Response=");
326 out += stringify_nodemap(mle->response_map, O2NM_MAX_NODES,
327 buf + out, len - out);
328 out += snprintf(buf + out, len - out, "\n");
329
330 out += snprintf(buf + out, len - out, "Node=");
331 out += stringify_nodemap(mle->node_map, O2NM_MAX_NODES,
332 buf + out, len - out);
333 out += snprintf(buf + out, len - out, "\n");
334
335 out += snprintf(buf + out, len - out, "\n");
336
337 return out;
338}
339
340void dlm_print_one_mle(struct dlm_master_list_entry *mle)
341{
342 char *buf;
343
344 buf = (char *) get_zeroed_page(GFP_NOFS);
345 if (buf) {
346 dump_mle(mle, buf, PAGE_SIZE - 1);
347 free_page((unsigned long)buf);
348 }
349}
350
351#ifdef CONFIG_DEBUG_FS
352
353static struct dentry *dlm_debugfs_root = NULL;
354
355#define DLM_DEBUGFS_DIR "o2dlm"
356#define DLM_DEBUGFS_DLM_STATE "dlm_state"
357#define DLM_DEBUGFS_LOCKING_STATE "locking_state"
358#define DLM_DEBUGFS_MLE_STATE "mle_state"
359#define DLM_DEBUGFS_PURGE_LIST "purge_list"
360
361/* begin - utils funcs */
362static void dlm_debug_free(struct kref *kref)
363{
364 struct dlm_debug_ctxt *dc;
365
366 dc = container_of(kref, struct dlm_debug_ctxt, debug_refcnt);
367
368 kfree(dc);
369}
370
371void dlm_debug_put(struct dlm_debug_ctxt *dc)
372{
373 if (dc)
374 kref_put(&dc->debug_refcnt, dlm_debug_free);
375}
376
377static void dlm_debug_get(struct dlm_debug_ctxt *dc)
378{
379 kref_get(&dc->debug_refcnt);
380}
381
382static struct debug_buffer *debug_buffer_allocate(void)
383{
384 struct debug_buffer *db = NULL;
385
386 db = kzalloc(sizeof(struct debug_buffer), GFP_KERNEL);
387 if (!db)
388 goto bail;
389
390 db->len = PAGE_SIZE;
391 db->buf = kmalloc(db->len, GFP_KERNEL);
392 if (!db->buf)
393 goto bail;
394
395 return db;
396bail:
397 kfree(db);
398 return NULL;
399}
400
401static ssize_t debug_buffer_read(struct file *file, char __user *buf,
402 size_t nbytes, loff_t *ppos)
403{
404 struct debug_buffer *db = file->private_data;
405
406 return simple_read_from_buffer(buf, nbytes, ppos, db->buf, db->len);
407}
408
409static loff_t debug_buffer_llseek(struct file *file, loff_t off, int whence)
410{
411 struct debug_buffer *db = file->private_data;
412 loff_t new = -1;
413
414 switch (whence) {
415 case 0:
416 new = off;
417 break;
418 case 1:
419 new = file->f_pos + off;
420 break;
421 }
422
423 if (new < 0 || new > db->len)
424 return -EINVAL;
425
426 return (file->f_pos = new);
427}
428
429static int debug_buffer_release(struct inode *inode, struct file *file)
430{
431 struct debug_buffer *db = (struct debug_buffer *)file->private_data;
432
433 if (db)
434 kfree(db->buf);
435 kfree(db);
436
437 return 0;
438}
439/* end - util funcs */
440
441/* begin - purge list funcs */
442static int debug_purgelist_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
443{
444 struct dlm_lock_resource *res;
445 int out = 0;
446 unsigned long total = 0;
447
448 out += snprintf(db->buf + out, db->len - out,
449 "Dumping Purgelist for Domain: %s\n", dlm->name);
450
451 spin_lock(&dlm->spinlock);
452 list_for_each_entry(res, &dlm->purge_list, purge) {
453 ++total;
454 if (db->len - out < 100)
455 continue;
456 spin_lock(&res->spinlock);
457 out += stringify_lockname(res->lockname.name,
458 res->lockname.len,
459 db->buf + out, db->len - out);
460 out += snprintf(db->buf + out, db->len - out, "\t%ld\n",
461 (jiffies - res->last_used)/HZ);
462 spin_unlock(&res->spinlock);
463 }
464 spin_unlock(&dlm->spinlock);
465
466 out += snprintf(db->buf + out, db->len - out,
467 "Total on list: %ld\n", total);
468
469 return out;
470}
471
472static int debug_purgelist_open(struct inode *inode, struct file *file)
473{
474 struct dlm_ctxt *dlm = inode->i_private;
475 struct debug_buffer *db;
476
477 db = debug_buffer_allocate();
478 if (!db)
479 goto bail;
480
481 db->len = debug_purgelist_print(dlm, db);
482
483 file->private_data = db;
484
485 return 0;
486bail:
487 return -ENOMEM;
488}
489
490static struct file_operations debug_purgelist_fops = {
491 .open = debug_purgelist_open,
492 .release = debug_buffer_release,
493 .read = debug_buffer_read,
494 .llseek = debug_buffer_llseek,
495};
496/* end - purge list funcs */
497
498/* begin - debug mle funcs */
499static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
500{
501 struct dlm_master_list_entry *mle;
502 int out = 0;
503 unsigned long total = 0;
504
505 out += snprintf(db->buf + out, db->len - out,
506 "Dumping MLEs for Domain: %s\n", dlm->name);
507
508 spin_lock(&dlm->master_lock);
509 list_for_each_entry(mle, &dlm->master_list, list) {
510 ++total;
511 if (db->len - out < 200)
512 continue;
513 out += dump_mle(mle, db->buf + out, db->len - out);
514 }
515 spin_unlock(&dlm->master_lock);
516
517 out += snprintf(db->buf + out, db->len - out,
518 "Total on list: %ld\n", total);
519 return out;
520}
521
522static int debug_mle_open(struct inode *inode, struct file *file)
523{
524 struct dlm_ctxt *dlm = inode->i_private;
525 struct debug_buffer *db;
526
527 db = debug_buffer_allocate();
528 if (!db)
529 goto bail;
530
531 db->len = debug_mle_print(dlm, db);
532
533 file->private_data = db;
534
535 return 0;
536bail:
537 return -ENOMEM;
538}
539
540static struct file_operations debug_mle_fops = {
541 .open = debug_mle_open,
542 .release = debug_buffer_release,
543 .read = debug_buffer_read,
544 .llseek = debug_buffer_llseek,
545};
546
547/* end - debug mle funcs */
548
549/* begin - debug lockres funcs */
550static int dump_lock(struct dlm_lock *lock, int list_type, char *buf, int len)
551{
552 int out;
553
554#define DEBUG_LOCK_VERSION 1
555 spin_lock(&lock->spinlock);
556 out = snprintf(buf, len, "LOCK:%d,%d,%d,%d,%d,%d:%lld,%d,%d,%d,%d,%d,"
557 "%d,%d,%d,%d\n",
558 DEBUG_LOCK_VERSION,
559 list_type, lock->ml.type, lock->ml.convert_type,
560 lock->ml.node,
561 dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
562 dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
563 !list_empty(&lock->ast_list),
564 !list_empty(&lock->bast_list),
565 lock->ast_pending, lock->bast_pending,
566 lock->convert_pending, lock->lock_pending,
567 lock->cancel_pending, lock->unlock_pending,
568 atomic_read(&lock->lock_refs.refcount));
569 spin_unlock(&lock->spinlock);
570
571 return out;
572}
573
574static int dump_lockres(struct dlm_lock_resource *res, char *buf, int len)
575{
576 struct dlm_lock *lock;
577 int i;
578 int out = 0;
579
580 out += snprintf(buf + out, len - out, "NAME:");
581 out += stringify_lockname(res->lockname.name, res->lockname.len,
582 buf + out, len - out);
583 out += snprintf(buf + out, len - out, "\n");
584
585#define DEBUG_LRES_VERSION 1
586 out += snprintf(buf + out, len - out,
587 "LRES:%d,%d,%d,%ld,%d,%d,%d,%d,%d,%d,%d\n",
588 DEBUG_LRES_VERSION,
589 res->owner, res->state, res->last_used,
590 !list_empty(&res->purge),
591 !list_empty(&res->dirty),
592 !list_empty(&res->recovering),
593 res->inflight_locks, res->migration_pending,
594 atomic_read(&res->asts_reserved),
595 atomic_read(&res->refs.refcount));
596
597 /* refmap */
598 out += snprintf(buf + out, len - out, "RMAP:");
599 out += stringify_nodemap(res->refmap, O2NM_MAX_NODES,
600 buf + out, len - out);
601 out += snprintf(buf + out, len - out, "\n");
602
603 /* lvb */
604 out += snprintf(buf + out, len - out, "LVBX:");
605 for (i = 0; i < DLM_LVB_LEN; i++)
606 out += snprintf(buf + out, len - out,
607 "%02x", (unsigned char)res->lvb[i]);
608 out += snprintf(buf + out, len - out, "\n");
609
610 /* granted */
611 list_for_each_entry(lock, &res->granted, list)
612 out += dump_lock(lock, 0, buf + out, len - out);
613
614 /* converting */
615 list_for_each_entry(lock, &res->converting, list)
616 out += dump_lock(lock, 1, buf + out, len - out);
617
618 /* blocked */
619 list_for_each_entry(lock, &res->blocked, list)
620 out += dump_lock(lock, 2, buf + out, len - out);
621
622 out += snprintf(buf + out, len - out, "\n");
623
624 return out;
625}
626
627static void *lockres_seq_start(struct seq_file *m, loff_t *pos)
628{
629 struct debug_lockres *dl = m->private;
630 struct dlm_ctxt *dlm = dl->dl_ctxt;
631 struct dlm_lock_resource *res = NULL;
632
633 spin_lock(&dlm->spinlock);
634
635 if (dl->dl_res) {
636 list_for_each_entry(res, &dl->dl_res->tracking, tracking) {
637 if (dl->dl_res) {
638 dlm_lockres_put(dl->dl_res);
639 dl->dl_res = NULL;
640 }
641 if (&res->tracking == &dlm->tracking_list) {
642 mlog(0, "End of list found, %p\n", res);
643 dl = NULL;
644 break;
645 }
646 dlm_lockres_get(res);
647 dl->dl_res = res;
648 break;
649 }
650 } else {
651 if (!list_empty(&dlm->tracking_list)) {
652 list_for_each_entry(res, &dlm->tracking_list, tracking)
653 break;
654 dlm_lockres_get(res);
655 dl->dl_res = res;
656 } else
657 dl = NULL;
658 }
659
660 if (dl) {
661 spin_lock(&dl->dl_res->spinlock);
662 dump_lockres(dl->dl_res, dl->dl_buf, dl->dl_len - 1);
663 spin_unlock(&dl->dl_res->spinlock);
664 }
665
666 spin_unlock(&dlm->spinlock);
667
668 return dl;
669}
670
671static void lockres_seq_stop(struct seq_file *m, void *v)
672{
673}
674
675static void *lockres_seq_next(struct seq_file *m, void *v, loff_t *pos)
676{
677 return NULL;
678}
679
680static int lockres_seq_show(struct seq_file *s, void *v)
681{
682 struct debug_lockres *dl = (struct debug_lockres *)v;
683
684 seq_printf(s, "%s", dl->dl_buf);
685
686 return 0;
687}
688
689static struct seq_operations debug_lockres_ops = {
690 .start = lockres_seq_start,
691 .stop = lockres_seq_stop,
692 .next = lockres_seq_next,
693 .show = lockres_seq_show,
694};
695
696static int debug_lockres_open(struct inode *inode, struct file *file)
697{
698 struct dlm_ctxt *dlm = inode->i_private;
699 int ret = -ENOMEM;
700 struct seq_file *seq;
701 struct debug_lockres *dl = NULL;
702
703 dl = kzalloc(sizeof(struct debug_lockres), GFP_KERNEL);
704 if (!dl) {
705 mlog_errno(ret);
706 goto bail;
707 }
708
709 dl->dl_len = PAGE_SIZE;
710 dl->dl_buf = kmalloc(dl->dl_len, GFP_KERNEL);
711 if (!dl->dl_buf) {
712 mlog_errno(ret);
713 goto bail;
714 }
715
716 ret = seq_open(file, &debug_lockres_ops);
717 if (ret) {
718 mlog_errno(ret);
719 goto bail;
720 }
721
722 seq = (struct seq_file *) file->private_data;
723 seq->private = dl;
724
725 dlm_grab(dlm);
726 dl->dl_ctxt = dlm;
727
728 return 0;
729bail:
730 if (dl)
731 kfree(dl->dl_buf);
732 kfree(dl);
733 return ret;
734}
735
736static int debug_lockres_release(struct inode *inode, struct file *file)
737{
738 struct seq_file *seq = (struct seq_file *)file->private_data;
739 struct debug_lockres *dl = (struct debug_lockres *)seq->private;
740
741 if (dl->dl_res)
742 dlm_lockres_put(dl->dl_res);
743 dlm_put(dl->dl_ctxt);
744 kfree(dl->dl_buf);
745 return seq_release_private(inode, file);
746}
747
748static struct file_operations debug_lockres_fops = {
749 .open = debug_lockres_open,
750 .release = debug_lockres_release,
751 .read = seq_read,
752 .llseek = seq_lseek,
753};
754/* end - debug lockres funcs */
755
756/* begin - debug state funcs */
757static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
758{
759 int out = 0;
760 struct dlm_reco_node_data *node;
761 char *state;
762 int lres, rres, ures, tres;
763
764 lres = atomic_read(&dlm->local_resources);
765 rres = atomic_read(&dlm->remote_resources);
766 ures = atomic_read(&dlm->unknown_resources);
767 tres = lres + rres + ures;
768
769 spin_lock(&dlm->spinlock);
770
771 switch (dlm->dlm_state) {
772 case DLM_CTXT_NEW:
773 state = "NEW"; break;
774 case DLM_CTXT_JOINED:
775 state = "JOINED"; break;
776 case DLM_CTXT_IN_SHUTDOWN:
777 state = "SHUTDOWN"; break;
778 case DLM_CTXT_LEAVING:
779 state = "LEAVING"; break;
780 default:
781 state = "UNKNOWN"; break;
782 }
783
784 /* Domain: xxxxxxxxxx Key: 0xdfbac769 */
785 out += snprintf(db->buf + out, db->len - out,
786 "Domain: %s Key: 0x%08x\n", dlm->name, dlm->key);
787
788 /* Thread Pid: xxx Node: xxx State: xxxxx */
789 out += snprintf(db->buf + out, db->len - out,
790 "Thread Pid: %d Node: %d State: %s\n",
791 dlm->dlm_thread_task->pid, dlm->node_num, state);
792
793 /* Number of Joins: xxx Joining Node: xxx */
794 out += snprintf(db->buf + out, db->len - out,
795 "Number of Joins: %d Joining Node: %d\n",
796 dlm->num_joins, dlm->joining_node);
797
798 /* Domain Map: xx xx xx */
799 out += snprintf(db->buf + out, db->len - out, "Domain Map: ");
800 out += stringify_nodemap(dlm->domain_map, O2NM_MAX_NODES,
801 db->buf + out, db->len - out);
802 out += snprintf(db->buf + out, db->len - out, "\n");
803
804 /* Live Map: xx xx xx */
805 out += snprintf(db->buf + out, db->len - out, "Live Map: ");
806 out += stringify_nodemap(dlm->live_nodes_map, O2NM_MAX_NODES,
807 db->buf + out, db->len - out);
808 out += snprintf(db->buf + out, db->len - out, "\n");
809
810 /* Mastered Resources Total: xxx Locally: xxx Remotely: ... */
811 out += snprintf(db->buf + out, db->len - out,
812 "Mastered Resources Total: %d Locally: %d "
813 "Remotely: %d Unknown: %d\n",
814 tres, lres, rres, ures);
815
816 /* Lists: Dirty=Empty Purge=InUse PendingASTs=Empty ... */
817 out += snprintf(db->buf + out, db->len - out,
818 "Lists: Dirty=%s Purge=%s PendingASTs=%s "
819 "PendingBASTs=%s Master=%s\n",
820 (list_empty(&dlm->dirty_list) ? "Empty" : "InUse"),
821 (list_empty(&dlm->purge_list) ? "Empty" : "InUse"),
822 (list_empty(&dlm->pending_asts) ? "Empty" : "InUse"),
823 (list_empty(&dlm->pending_basts) ? "Empty" : "InUse"),
824 (list_empty(&dlm->master_list) ? "Empty" : "InUse"));
825
826 /* Purge Count: xxx Refs: xxx */
827 out += snprintf(db->buf + out, db->len - out,
828 "Purge Count: %d Refs: %d\n", dlm->purge_count,
829 atomic_read(&dlm->dlm_refs.refcount));
830
831 /* Dead Node: xxx */
832 out += snprintf(db->buf + out, db->len - out,
833 "Dead Node: %d\n", dlm->reco.dead_node);
834
835 /* What about DLM_RECO_STATE_FINALIZE? */
836 if (dlm->reco.state == DLM_RECO_STATE_ACTIVE)
837 state = "ACTIVE";
838 else
839 state = "INACTIVE";
840
841 /* Recovery Pid: xxxx Master: xxx State: xxxx */
842 out += snprintf(db->buf + out, db->len - out,
843 "Recovery Pid: %d Master: %d State: %s\n",
844 dlm->dlm_reco_thread_task->pid,
845 dlm->reco.new_master, state);
846
847 /* Recovery Map: xx xx */
848 out += snprintf(db->buf + out, db->len - out, "Recovery Map: ");
849 out += stringify_nodemap(dlm->recovery_map, O2NM_MAX_NODES,
850 db->buf + out, db->len - out);
851 out += snprintf(db->buf + out, db->len - out, "\n");
852
853 /* Recovery Node State: */
854 out += snprintf(db->buf + out, db->len - out, "Recovery Node State:\n");
855 list_for_each_entry(node, &dlm->reco.node_data, list) {
856 switch (node->state) {
857 case DLM_RECO_NODE_DATA_INIT:
858 state = "INIT";
859 break;
860 case DLM_RECO_NODE_DATA_REQUESTING:
861 state = "REQUESTING";
862 break;
863 case DLM_RECO_NODE_DATA_DEAD:
864 state = "DEAD";
865 break;
866 case DLM_RECO_NODE_DATA_RECEIVING:
867 state = "RECEIVING";
868 break;
869 case DLM_RECO_NODE_DATA_REQUESTED:
870 state = "REQUESTED";
871 break;
872 case DLM_RECO_NODE_DATA_DONE:
873 state = "DONE";
874 break;
875 case DLM_RECO_NODE_DATA_FINALIZE_SENT:
876 state = "FINALIZE-SENT";
877 break;
878 default:
879 state = "BAD";
880 break;
881 }
882 out += snprintf(db->buf + out, db->len - out, "\t%u - %s\n",
883 node->node_num, state);
884 }
885
886 spin_unlock(&dlm->spinlock);
887
888 return out;
889}
890
891static int debug_state_open(struct inode *inode, struct file *file)
892{
893 struct dlm_ctxt *dlm = inode->i_private;
894 struct debug_buffer *db = NULL;
895
896 db = debug_buffer_allocate();
897 if (!db)
898 goto bail;
899
900 db->len = debug_state_print(dlm, db);
901
902 file->private_data = db;
903
904 return 0;
905bail:
906 return -ENOMEM;
907}
908
909static struct file_operations debug_state_fops = {
910 .open = debug_state_open,
911 .release = debug_buffer_release,
912 .read = debug_buffer_read,
913 .llseek = debug_buffer_llseek,
914};
915/* end - debug state funcs */
916
917/* files in subroot */
918int dlm_debug_init(struct dlm_ctxt *dlm)
919{
920 struct dlm_debug_ctxt *dc = dlm->dlm_debug_ctxt;
921
922 /* for dumping dlm_ctxt */
923 dc->debug_state_dentry = debugfs_create_file(DLM_DEBUGFS_DLM_STATE,
924 S_IFREG|S_IRUSR,
925 dlm->dlm_debugfs_subroot,
926 dlm, &debug_state_fops);
927 if (!dc->debug_state_dentry) {
928 mlog_errno(-ENOMEM);
929 goto bail;
930 }
931
932 /* for dumping lockres */
933 dc->debug_lockres_dentry =
934 debugfs_create_file(DLM_DEBUGFS_LOCKING_STATE,
935 S_IFREG|S_IRUSR,
936 dlm->dlm_debugfs_subroot,
937 dlm, &debug_lockres_fops);
938 if (!dc->debug_lockres_dentry) {
939 mlog_errno(-ENOMEM);
940 goto bail;
941 }
942
943 /* for dumping mles */
944 dc->debug_mle_dentry = debugfs_create_file(DLM_DEBUGFS_MLE_STATE,
945 S_IFREG|S_IRUSR,
946 dlm->dlm_debugfs_subroot,
947 dlm, &debug_mle_fops);
948 if (!dc->debug_mle_dentry) {
949 mlog_errno(-ENOMEM);
950 goto bail;
951 }
952
953 /* for dumping lockres on the purge list */
954 dc->debug_purgelist_dentry =
955 debugfs_create_file(DLM_DEBUGFS_PURGE_LIST,
956 S_IFREG|S_IRUSR,
957 dlm->dlm_debugfs_subroot,
958 dlm, &debug_purgelist_fops);
959 if (!dc->debug_purgelist_dentry) {
960 mlog_errno(-ENOMEM);
961 goto bail;
962 }
963
964 dlm_debug_get(dc);
965 return 0;
966
967bail:
968 dlm_debug_shutdown(dlm);
969 return -ENOMEM;
970}
971
972void dlm_debug_shutdown(struct dlm_ctxt *dlm)
973{
974 struct dlm_debug_ctxt *dc = dlm->dlm_debug_ctxt;
975
976 if (dc) {
977 if (dc->debug_purgelist_dentry)
978 debugfs_remove(dc->debug_purgelist_dentry);
979 if (dc->debug_mle_dentry)
980 debugfs_remove(dc->debug_mle_dentry);
981 if (dc->debug_lockres_dentry)
982 debugfs_remove(dc->debug_lockres_dentry);
983 if (dc->debug_state_dentry)
984 debugfs_remove(dc->debug_state_dentry);
985 dlm_debug_put(dc);
986 }
987}
988
989/* subroot - domain dir */
990int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm)
991{
992 dlm->dlm_debugfs_subroot = debugfs_create_dir(dlm->name,
993 dlm_debugfs_root);
994 if (!dlm->dlm_debugfs_subroot) {
995 mlog_errno(-ENOMEM);
996 goto bail;
997 }
998
999 dlm->dlm_debug_ctxt = kzalloc(sizeof(struct dlm_debug_ctxt),
1000 GFP_KERNEL);
1001 if (!dlm->dlm_debug_ctxt) {
1002 mlog_errno(-ENOMEM);
1003 goto bail;
1004 }
1005 kref_init(&dlm->dlm_debug_ctxt->debug_refcnt);
1006
1007 return 0;
1008bail:
1009 dlm_destroy_debugfs_subroot(dlm);
1010 return -ENOMEM;
1011}
1012
1013void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm)
1014{
1015 if (dlm->dlm_debugfs_subroot)
1016 debugfs_remove(dlm->dlm_debugfs_subroot);
1017}
1018
1019/* debugfs root */
1020int dlm_create_debugfs_root(void)
1021{
1022 dlm_debugfs_root = debugfs_create_dir(DLM_DEBUGFS_DIR, NULL);
1023 if (!dlm_debugfs_root) {
1024 mlog_errno(-ENOMEM);
1025 return -ENOMEM;
1026 }
1027 return 0;
1028}
1029
1030void dlm_destroy_debugfs_root(void)
1031{
1032 if (dlm_debugfs_root)
1033 debugfs_remove(dlm_debugfs_root);
1034}
1035#endif /* CONFIG_DEBUG_FS */
diff --git a/fs/ocfs2/dlm/dlmdebug.h b/fs/ocfs2/dlm/dlmdebug.h
new file mode 100644
index 000000000000..d34a62a3a625
--- /dev/null
+++ b/fs/ocfs2/dlm/dlmdebug.h
@@ -0,0 +1,86 @@
1/* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 *
4 * dlmdebug.h
5 *
6 * Copyright (C) 2008 Oracle. All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public
19 * License along with this program; if not, write to the
20 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 * Boston, MA 021110-1307, USA.
22 *
23 */
24
25#ifndef DLMDEBUG_H
26#define DLMDEBUG_H
27
28void dlm_print_one_mle(struct dlm_master_list_entry *mle);
29
30#ifdef CONFIG_DEBUG_FS
31
32struct dlm_debug_ctxt {
33 struct kref debug_refcnt;
34 struct dentry *debug_state_dentry;
35 struct dentry *debug_lockres_dentry;
36 struct dentry *debug_mle_dentry;
37 struct dentry *debug_purgelist_dentry;
38};
39
40struct debug_buffer {
41 int len;
42 char *buf;
43};
44
45struct debug_lockres {
46 int dl_len;
47 char *dl_buf;
48 struct dlm_ctxt *dl_ctxt;
49 struct dlm_lock_resource *dl_res;
50};
51
52int dlm_debug_init(struct dlm_ctxt *dlm);
53void dlm_debug_shutdown(struct dlm_ctxt *dlm);
54
55int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm);
56void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm);
57
58int dlm_create_debugfs_root(void);
59void dlm_destroy_debugfs_root(void);
60
61#else
62
63static int dlm_debug_init(struct dlm_ctxt *dlm)
64{
65 return 0;
66}
67static void dlm_debug_shutdown(struct dlm_ctxt *dlm)
68{
69}
70static int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm)
71{
72 return 0;
73}
74static void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm)
75{
76}
77static int dlm_create_debugfs_root(void)
78{
79 return 0;
80}
81static void dlm_destroy_debugfs_root(void)
82{
83}
84
85#endif /* CONFIG_DEBUG_FS */
86#endif /* DLMDEBUG_H */
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 638d2ebb892b..63f8125824e8 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -33,6 +33,7 @@
33#include <linux/spinlock.h> 33#include <linux/spinlock.h>
34#include <linux/delay.h> 34#include <linux/delay.h>
35#include <linux/err.h> 35#include <linux/err.h>
36#include <linux/debugfs.h>
36 37
37#include "cluster/heartbeat.h" 38#include "cluster/heartbeat.h"
38#include "cluster/nodemanager.h" 39#include "cluster/nodemanager.h"
@@ -40,8 +41,8 @@
40 41
41#include "dlmapi.h" 42#include "dlmapi.h"
42#include "dlmcommon.h" 43#include "dlmcommon.h"
43
44#include "dlmdomain.h" 44#include "dlmdomain.h"
45#include "dlmdebug.h"
45 46
46#include "dlmver.h" 47#include "dlmver.h"
47 48
@@ -298,6 +299,8 @@ static int dlm_wait_on_domain_helper(const char *domain)
298 299
299static void dlm_free_ctxt_mem(struct dlm_ctxt *dlm) 300static void dlm_free_ctxt_mem(struct dlm_ctxt *dlm)
300{ 301{
302 dlm_destroy_debugfs_subroot(dlm);
303
301 if (dlm->lockres_hash) 304 if (dlm->lockres_hash)
302 dlm_free_pagevec((void **)dlm->lockres_hash, DLM_HASH_PAGES); 305 dlm_free_pagevec((void **)dlm->lockres_hash, DLM_HASH_PAGES);
303 306
@@ -395,6 +398,7 @@ static void dlm_destroy_dlm_worker(struct dlm_ctxt *dlm)
395static void dlm_complete_dlm_shutdown(struct dlm_ctxt *dlm) 398static void dlm_complete_dlm_shutdown(struct dlm_ctxt *dlm)
396{ 399{
397 dlm_unregister_domain_handlers(dlm); 400 dlm_unregister_domain_handlers(dlm);
401 dlm_debug_shutdown(dlm);
398 dlm_complete_thread(dlm); 402 dlm_complete_thread(dlm);
399 dlm_complete_recovery_thread(dlm); 403 dlm_complete_recovery_thread(dlm);
400 dlm_destroy_dlm_worker(dlm); 404 dlm_destroy_dlm_worker(dlm);
@@ -644,6 +648,7 @@ int dlm_shutting_down(struct dlm_ctxt *dlm)
644void dlm_unregister_domain(struct dlm_ctxt *dlm) 648void dlm_unregister_domain(struct dlm_ctxt *dlm)
645{ 649{
646 int leave = 0; 650 int leave = 0;
651 struct dlm_lock_resource *res;
647 652
648 spin_lock(&dlm_domain_lock); 653 spin_lock(&dlm_domain_lock);
649 BUG_ON(dlm->dlm_state != DLM_CTXT_JOINED); 654 BUG_ON(dlm->dlm_state != DLM_CTXT_JOINED);
@@ -673,6 +678,15 @@ void dlm_unregister_domain(struct dlm_ctxt *dlm)
673 msleep(500); 678 msleep(500);
674 mlog(0, "%s: more migration to do\n", dlm->name); 679 mlog(0, "%s: more migration to do\n", dlm->name);
675 } 680 }
681
682 /* This list should be empty. If not, print remaining lockres */
683 if (!list_empty(&dlm->tracking_list)) {
684 mlog(ML_ERROR, "Following lockres' are still on the "
685 "tracking list:\n");
686 list_for_each_entry(res, &dlm->tracking_list, tracking)
687 dlm_print_one_lock_resource(res);
688 }
689
676 dlm_mark_domain_leaving(dlm); 690 dlm_mark_domain_leaving(dlm);
677 dlm_leave_domain(dlm); 691 dlm_leave_domain(dlm);
678 dlm_complete_dlm_shutdown(dlm); 692 dlm_complete_dlm_shutdown(dlm);
@@ -713,14 +727,46 @@ static int dlm_query_join_proto_check(char *proto_type, int node,
713 return rc; 727 return rc;
714} 728}
715 729
730/*
731 * struct dlm_query_join_packet is made up of four one-byte fields. They
732 * are effectively in big-endian order already. However, little-endian
733 * machines swap them before putting the packet on the wire (because
734 * query_join's response is a status, and that status is treated as a u32
735 * on the wire). Thus, a big-endian and little-endian machines will treat
736 * this structure differently.
737 *
738 * The solution is to have little-endian machines swap the structure when
739 * converting from the structure to the u32 representation. This will
740 * result in the structure having the correct format on the wire no matter
741 * the host endian format.
742 */
743static void dlm_query_join_packet_to_wire(struct dlm_query_join_packet *packet,
744 u32 *wire)
745{
746 union dlm_query_join_response response;
747
748 response.packet = *packet;
749 *wire = cpu_to_be32(response.intval);
750}
751
752static void dlm_query_join_wire_to_packet(u32 wire,
753 struct dlm_query_join_packet *packet)
754{
755 union dlm_query_join_response response;
756
757 response.intval = cpu_to_be32(wire);
758 *packet = response.packet;
759}
760
716static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data, 761static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data,
717 void **ret_data) 762 void **ret_data)
718{ 763{
719 struct dlm_query_join_request *query; 764 struct dlm_query_join_request *query;
720 union dlm_query_join_response response = { 765 struct dlm_query_join_packet packet = {
721 .packet.code = JOIN_DISALLOW, 766 .code = JOIN_DISALLOW,
722 }; 767 };
723 struct dlm_ctxt *dlm = NULL; 768 struct dlm_ctxt *dlm = NULL;
769 u32 response;
724 u8 nodenum; 770 u8 nodenum;
725 771
726 query = (struct dlm_query_join_request *) msg->buf; 772 query = (struct dlm_query_join_request *) msg->buf;
@@ -737,11 +783,11 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data,
737 mlog(0, "node %u is not in our live map yet\n", 783 mlog(0, "node %u is not in our live map yet\n",
738 query->node_idx); 784 query->node_idx);
739 785
740 response.packet.code = JOIN_DISALLOW; 786 packet.code = JOIN_DISALLOW;
741 goto respond; 787 goto respond;
742 } 788 }
743 789
744 response.packet.code = JOIN_OK_NO_MAP; 790 packet.code = JOIN_OK_NO_MAP;
745 791
746 spin_lock(&dlm_domain_lock); 792 spin_lock(&dlm_domain_lock);
747 dlm = __dlm_lookup_domain_full(query->domain, query->name_len); 793 dlm = __dlm_lookup_domain_full(query->domain, query->name_len);
@@ -760,7 +806,7 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data,
760 mlog(0, "disallow join as node %u does not " 806 mlog(0, "disallow join as node %u does not "
761 "have node %u in its nodemap\n", 807 "have node %u in its nodemap\n",
762 query->node_idx, nodenum); 808 query->node_idx, nodenum);
763 response.packet.code = JOIN_DISALLOW; 809 packet.code = JOIN_DISALLOW;
764 goto unlock_respond; 810 goto unlock_respond;
765 } 811 }
766 } 812 }
@@ -780,23 +826,23 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data,
780 /*If this is a brand new context and we 826 /*If this is a brand new context and we
781 * haven't started our join process yet, then 827 * haven't started our join process yet, then
782 * the other node won the race. */ 828 * the other node won the race. */
783 response.packet.code = JOIN_OK_NO_MAP; 829 packet.code = JOIN_OK_NO_MAP;
784 } else if (dlm->joining_node != DLM_LOCK_RES_OWNER_UNKNOWN) { 830 } else if (dlm->joining_node != DLM_LOCK_RES_OWNER_UNKNOWN) {
785 /* Disallow parallel joins. */ 831 /* Disallow parallel joins. */
786 response.packet.code = JOIN_DISALLOW; 832 packet.code = JOIN_DISALLOW;
787 } else if (dlm->reco.state & DLM_RECO_STATE_ACTIVE) { 833 } else if (dlm->reco.state & DLM_RECO_STATE_ACTIVE) {
788 mlog(0, "node %u trying to join, but recovery " 834 mlog(0, "node %u trying to join, but recovery "
789 "is ongoing.\n", bit); 835 "is ongoing.\n", bit);
790 response.packet.code = JOIN_DISALLOW; 836 packet.code = JOIN_DISALLOW;
791 } else if (test_bit(bit, dlm->recovery_map)) { 837 } else if (test_bit(bit, dlm->recovery_map)) {
792 mlog(0, "node %u trying to join, but it " 838 mlog(0, "node %u trying to join, but it "
793 "still needs recovery.\n", bit); 839 "still needs recovery.\n", bit);
794 response.packet.code = JOIN_DISALLOW; 840 packet.code = JOIN_DISALLOW;
795 } else if (test_bit(bit, dlm->domain_map)) { 841 } else if (test_bit(bit, dlm->domain_map)) {
796 mlog(0, "node %u trying to join, but it " 842 mlog(0, "node %u trying to join, but it "
797 "is still in the domain! needs recovery?\n", 843 "is still in the domain! needs recovery?\n",
798 bit); 844 bit);
799 response.packet.code = JOIN_DISALLOW; 845 packet.code = JOIN_DISALLOW;
800 } else { 846 } else {
801 /* Alright we're fully a part of this domain 847 /* Alright we're fully a part of this domain
802 * so we keep some state as to who's joining 848 * so we keep some state as to who's joining
@@ -807,19 +853,15 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data,
807 if (dlm_query_join_proto_check("DLM", bit, 853 if (dlm_query_join_proto_check("DLM", bit,
808 &dlm->dlm_locking_proto, 854 &dlm->dlm_locking_proto,
809 &query->dlm_proto)) { 855 &query->dlm_proto)) {
810 response.packet.code = 856 packet.code = JOIN_PROTOCOL_MISMATCH;
811 JOIN_PROTOCOL_MISMATCH;
812 } else if (dlm_query_join_proto_check("fs", bit, 857 } else if (dlm_query_join_proto_check("fs", bit,
813 &dlm->fs_locking_proto, 858 &dlm->fs_locking_proto,
814 &query->fs_proto)) { 859 &query->fs_proto)) {
815 response.packet.code = 860 packet.code = JOIN_PROTOCOL_MISMATCH;
816 JOIN_PROTOCOL_MISMATCH;
817 } else { 861 } else {
818 response.packet.dlm_minor = 862 packet.dlm_minor = query->dlm_proto.pv_minor;
819 query->dlm_proto.pv_minor; 863 packet.fs_minor = query->fs_proto.pv_minor;
820 response.packet.fs_minor = 864 packet.code = JOIN_OK;
821 query->fs_proto.pv_minor;
822 response.packet.code = JOIN_OK;
823 __dlm_set_joining_node(dlm, query->node_idx); 865 __dlm_set_joining_node(dlm, query->node_idx);
824 } 866 }
825 } 867 }
@@ -830,9 +872,10 @@ unlock_respond:
830 spin_unlock(&dlm_domain_lock); 872 spin_unlock(&dlm_domain_lock);
831 873
832respond: 874respond:
833 mlog(0, "We respond with %u\n", response.packet.code); 875 mlog(0, "We respond with %u\n", packet.code);
834 876
835 return response.intval; 877 dlm_query_join_packet_to_wire(&packet, &response);
878 return response;
836} 879}
837 880
838static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data, 881static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data,
@@ -937,7 +980,7 @@ static int dlm_send_join_cancels(struct dlm_ctxt *dlm,
937 sizeof(unsigned long))) { 980 sizeof(unsigned long))) {
938 mlog(ML_ERROR, 981 mlog(ML_ERROR,
939 "map_size %u != BITS_TO_LONGS(O2NM_MAX_NODES) %u\n", 982 "map_size %u != BITS_TO_LONGS(O2NM_MAX_NODES) %u\n",
940 map_size, BITS_TO_LONGS(O2NM_MAX_NODES)); 983 map_size, (unsigned)BITS_TO_LONGS(O2NM_MAX_NODES));
941 return -EINVAL; 984 return -EINVAL;
942 } 985 }
943 986
@@ -968,7 +1011,8 @@ static int dlm_request_join(struct dlm_ctxt *dlm,
968{ 1011{
969 int status; 1012 int status;
970 struct dlm_query_join_request join_msg; 1013 struct dlm_query_join_request join_msg;
971 union dlm_query_join_response join_resp; 1014 struct dlm_query_join_packet packet;
1015 u32 join_resp;
972 1016
973 mlog(0, "querying node %d\n", node); 1017 mlog(0, "querying node %d\n", node);
974 1018
@@ -984,11 +1028,12 @@ static int dlm_request_join(struct dlm_ctxt *dlm,
984 1028
985 status = o2net_send_message(DLM_QUERY_JOIN_MSG, DLM_MOD_KEY, &join_msg, 1029 status = o2net_send_message(DLM_QUERY_JOIN_MSG, DLM_MOD_KEY, &join_msg,
986 sizeof(join_msg), node, 1030 sizeof(join_msg), node,
987 &join_resp.intval); 1031 &join_resp);
988 if (status < 0 && status != -ENOPROTOOPT) { 1032 if (status < 0 && status != -ENOPROTOOPT) {
989 mlog_errno(status); 1033 mlog_errno(status);
990 goto bail; 1034 goto bail;
991 } 1035 }
1036 dlm_query_join_wire_to_packet(join_resp, &packet);
992 1037
993 /* -ENOPROTOOPT from the net code means the other side isn't 1038 /* -ENOPROTOOPT from the net code means the other side isn't
994 listening for our message type -- that's fine, it means 1039 listening for our message type -- that's fine, it means
@@ -997,10 +1042,10 @@ static int dlm_request_join(struct dlm_ctxt *dlm,
997 if (status == -ENOPROTOOPT) { 1042 if (status == -ENOPROTOOPT) {
998 status = 0; 1043 status = 0;
999 *response = JOIN_OK_NO_MAP; 1044 *response = JOIN_OK_NO_MAP;
1000 } else if (join_resp.packet.code == JOIN_DISALLOW || 1045 } else if (packet.code == JOIN_DISALLOW ||
1001 join_resp.packet.code == JOIN_OK_NO_MAP) { 1046 packet.code == JOIN_OK_NO_MAP) {
1002 *response = join_resp.packet.code; 1047 *response = packet.code;
1003 } else if (join_resp.packet.code == JOIN_PROTOCOL_MISMATCH) { 1048 } else if (packet.code == JOIN_PROTOCOL_MISMATCH) {
1004 mlog(ML_NOTICE, 1049 mlog(ML_NOTICE,
1005 "This node requested DLM locking protocol %u.%u and " 1050 "This node requested DLM locking protocol %u.%u and "
1006 "filesystem locking protocol %u.%u. At least one of " 1051 "filesystem locking protocol %u.%u. At least one of "
@@ -1012,14 +1057,12 @@ static int dlm_request_join(struct dlm_ctxt *dlm,
1012 dlm->fs_locking_proto.pv_minor, 1057 dlm->fs_locking_proto.pv_minor,
1013 node); 1058 node);
1014 status = -EPROTO; 1059 status = -EPROTO;
1015 *response = join_resp.packet.code; 1060 *response = packet.code;
1016 } else if (join_resp.packet.code == JOIN_OK) { 1061 } else if (packet.code == JOIN_OK) {
1017 *response = join_resp.packet.code; 1062 *response = packet.code;
1018 /* Use the same locking protocol as the remote node */ 1063 /* Use the same locking protocol as the remote node */
1019 dlm->dlm_locking_proto.pv_minor = 1064 dlm->dlm_locking_proto.pv_minor = packet.dlm_minor;
1020 join_resp.packet.dlm_minor; 1065 dlm->fs_locking_proto.pv_minor = packet.fs_minor;
1021 dlm->fs_locking_proto.pv_minor =
1022 join_resp.packet.fs_minor;
1023 mlog(0, 1066 mlog(0,
1024 "Node %d responds JOIN_OK with DLM locking protocol " 1067 "Node %d responds JOIN_OK with DLM locking protocol "
1025 "%u.%u and fs locking protocol %u.%u\n", 1068 "%u.%u and fs locking protocol %u.%u\n",
@@ -1031,11 +1074,11 @@ static int dlm_request_join(struct dlm_ctxt *dlm,
1031 } else { 1074 } else {
1032 status = -EINVAL; 1075 status = -EINVAL;
1033 mlog(ML_ERROR, "invalid response %d from node %u\n", 1076 mlog(ML_ERROR, "invalid response %d from node %u\n",
1034 join_resp.packet.code, node); 1077 packet.code, node);
1035 } 1078 }
1036 1079
1037 mlog(0, "status %d, node %d response is %d\n", status, node, 1080 mlog(0, "status %d, node %d response is %d\n", status, node,
1038 *response); 1081 *response);
1039 1082
1040bail: 1083bail:
1041 return status; 1084 return status;
@@ -1376,6 +1419,12 @@ static int dlm_join_domain(struct dlm_ctxt *dlm)
1376 goto bail; 1419 goto bail;
1377 } 1420 }
1378 1421
1422 status = dlm_debug_init(dlm);
1423 if (status < 0) {
1424 mlog_errno(status);
1425 goto bail;
1426 }
1427
1379 status = dlm_launch_thread(dlm); 1428 status = dlm_launch_thread(dlm);
1380 if (status < 0) { 1429 if (status < 0) {
1381 mlog_errno(status); 1430 mlog_errno(status);
@@ -1443,6 +1492,7 @@ bail:
1443 1492
1444 if (status) { 1493 if (status) {
1445 dlm_unregister_domain_handlers(dlm); 1494 dlm_unregister_domain_handlers(dlm);
1495 dlm_debug_shutdown(dlm);
1446 dlm_complete_thread(dlm); 1496 dlm_complete_thread(dlm);
1447 dlm_complete_recovery_thread(dlm); 1497 dlm_complete_recovery_thread(dlm);
1448 dlm_destroy_dlm_worker(dlm); 1498 dlm_destroy_dlm_worker(dlm);
@@ -1455,6 +1505,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
1455 u32 key) 1505 u32 key)
1456{ 1506{
1457 int i; 1507 int i;
1508 int ret;
1458 struct dlm_ctxt *dlm = NULL; 1509 struct dlm_ctxt *dlm = NULL;
1459 1510
1460 dlm = kzalloc(sizeof(*dlm), GFP_KERNEL); 1511 dlm = kzalloc(sizeof(*dlm), GFP_KERNEL);
@@ -1487,6 +1538,15 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
1487 dlm->key = key; 1538 dlm->key = key;
1488 dlm->node_num = o2nm_this_node(); 1539 dlm->node_num = o2nm_this_node();
1489 1540
1541 ret = dlm_create_debugfs_subroot(dlm);
1542 if (ret < 0) {
1543 dlm_free_pagevec((void **)dlm->lockres_hash, DLM_HASH_PAGES);
1544 kfree(dlm->name);
1545 kfree(dlm);
1546 dlm = NULL;
1547 goto leave;
1548 }
1549
1490 spin_lock_init(&dlm->spinlock); 1550 spin_lock_init(&dlm->spinlock);
1491 spin_lock_init(&dlm->master_lock); 1551 spin_lock_init(&dlm->master_lock);
1492 spin_lock_init(&dlm->ast_lock); 1552 spin_lock_init(&dlm->ast_lock);
@@ -1497,6 +1557,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
1497 INIT_LIST_HEAD(&dlm->reco.node_data); 1557 INIT_LIST_HEAD(&dlm->reco.node_data);
1498 INIT_LIST_HEAD(&dlm->purge_list); 1558 INIT_LIST_HEAD(&dlm->purge_list);
1499 INIT_LIST_HEAD(&dlm->dlm_domain_handlers); 1559 INIT_LIST_HEAD(&dlm->dlm_domain_handlers);
1560 INIT_LIST_HEAD(&dlm->tracking_list);
1500 dlm->reco.state = 0; 1561 dlm->reco.state = 0;
1501 1562
1502 INIT_LIST_HEAD(&dlm->pending_asts); 1563 INIT_LIST_HEAD(&dlm->pending_asts);
@@ -1787,21 +1848,49 @@ static int __init dlm_init(void)
1787 dlm_print_version(); 1848 dlm_print_version();
1788 1849
1789 status = dlm_init_mle_cache(); 1850 status = dlm_init_mle_cache();
1790 if (status) 1851 if (status) {
1791 return -1; 1852 mlog(ML_ERROR, "Could not create o2dlm_mle slabcache\n");
1853 goto error;
1854 }
1855
1856 status = dlm_init_master_caches();
1857 if (status) {
1858 mlog(ML_ERROR, "Could not create o2dlm_lockres and "
1859 "o2dlm_lockname slabcaches\n");
1860 goto error;
1861 }
1862
1863 status = dlm_init_lock_cache();
1864 if (status) {
1865 mlog(ML_ERROR, "Count not create o2dlm_lock slabcache\n");
1866 goto error;
1867 }
1792 1868
1793 status = dlm_register_net_handlers(); 1869 status = dlm_register_net_handlers();
1794 if (status) { 1870 if (status) {
1795 dlm_destroy_mle_cache(); 1871 mlog(ML_ERROR, "Unable to register network handlers\n");
1796 return -1; 1872 goto error;
1797 } 1873 }
1798 1874
1875 status = dlm_create_debugfs_root();
1876 if (status)
1877 goto error;
1878
1799 return 0; 1879 return 0;
1880error:
1881 dlm_unregister_net_handlers();
1882 dlm_destroy_lock_cache();
1883 dlm_destroy_master_caches();
1884 dlm_destroy_mle_cache();
1885 return -1;
1800} 1886}
1801 1887
1802static void __exit dlm_exit (void) 1888static void __exit dlm_exit (void)
1803{ 1889{
1890 dlm_destroy_debugfs_root();
1804 dlm_unregister_net_handlers(); 1891 dlm_unregister_net_handlers();
1892 dlm_destroy_lock_cache();
1893 dlm_destroy_master_caches();
1805 dlm_destroy_mle_cache(); 1894 dlm_destroy_mle_cache();
1806} 1895}
1807 1896
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
index 52578d907d9a..83a9f2972ac8 100644
--- a/fs/ocfs2/dlm/dlmlock.c
+++ b/fs/ocfs2/dlm/dlmlock.c
@@ -53,6 +53,8 @@
53#define MLOG_MASK_PREFIX ML_DLM 53#define MLOG_MASK_PREFIX ML_DLM
54#include "cluster/masklog.h" 54#include "cluster/masklog.h"
55 55
56static struct kmem_cache *dlm_lock_cache = NULL;
57
56static DEFINE_SPINLOCK(dlm_cookie_lock); 58static DEFINE_SPINLOCK(dlm_cookie_lock);
57static u64 dlm_next_cookie = 1; 59static u64 dlm_next_cookie = 1;
58 60
@@ -64,6 +66,22 @@ static void dlm_init_lock(struct dlm_lock *newlock, int type,
64static void dlm_lock_release(struct kref *kref); 66static void dlm_lock_release(struct kref *kref);
65static void dlm_lock_detach_lockres(struct dlm_lock *lock); 67static void dlm_lock_detach_lockres(struct dlm_lock *lock);
66 68
69int dlm_init_lock_cache(void)
70{
71 dlm_lock_cache = kmem_cache_create("o2dlm_lock",
72 sizeof(struct dlm_lock),
73 0, SLAB_HWCACHE_ALIGN, NULL);
74 if (dlm_lock_cache == NULL)
75 return -ENOMEM;
76 return 0;
77}
78
79void dlm_destroy_lock_cache(void)
80{
81 if (dlm_lock_cache)
82 kmem_cache_destroy(dlm_lock_cache);
83}
84
67/* Tell us whether we can grant a new lock request. 85/* Tell us whether we can grant a new lock request.
68 * locking: 86 * locking:
69 * caller needs: res->spinlock 87 * caller needs: res->spinlock
@@ -353,7 +371,7 @@ static void dlm_lock_release(struct kref *kref)
353 mlog(0, "freeing kernel-allocated lksb\n"); 371 mlog(0, "freeing kernel-allocated lksb\n");
354 kfree(lock->lksb); 372 kfree(lock->lksb);
355 } 373 }
356 kfree(lock); 374 kmem_cache_free(dlm_lock_cache, lock);
357} 375}
358 376
359/* associate a lock with it's lockres, getting a ref on the lockres */ 377/* associate a lock with it's lockres, getting a ref on the lockres */
@@ -412,7 +430,7 @@ struct dlm_lock * dlm_new_lock(int type, u8 node, u64 cookie,
412 struct dlm_lock *lock; 430 struct dlm_lock *lock;
413 int kernel_allocated = 0; 431 int kernel_allocated = 0;
414 432
415 lock = kzalloc(sizeof(*lock), GFP_NOFS); 433 lock = (struct dlm_lock *) kmem_cache_zalloc(dlm_lock_cache, GFP_NOFS);
416 if (!lock) 434 if (!lock)
417 return NULL; 435 return NULL;
418 436
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index a54d33d95ada..efc015c6128a 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -48,47 +48,11 @@
48#include "dlmapi.h" 48#include "dlmapi.h"
49#include "dlmcommon.h" 49#include "dlmcommon.h"
50#include "dlmdomain.h" 50#include "dlmdomain.h"
51#include "dlmdebug.h"
51 52
52#define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_MASTER) 53#define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_MASTER)
53#include "cluster/masklog.h" 54#include "cluster/masklog.h"
54 55
55enum dlm_mle_type {
56 DLM_MLE_BLOCK,
57 DLM_MLE_MASTER,
58 DLM_MLE_MIGRATION
59};
60
61struct dlm_lock_name
62{
63 u8 len;
64 u8 name[DLM_LOCKID_NAME_MAX];
65};
66
67struct dlm_master_list_entry
68{
69 struct list_head list;
70 struct list_head hb_events;
71 struct dlm_ctxt *dlm;
72 spinlock_t spinlock;
73 wait_queue_head_t wq;
74 atomic_t woken;
75 struct kref mle_refs;
76 int inuse;
77 unsigned long maybe_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
78 unsigned long vote_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
79 unsigned long response_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
80 unsigned long node_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
81 u8 master;
82 u8 new_master;
83 enum dlm_mle_type type;
84 struct o2hb_callback_func mle_hb_up;
85 struct o2hb_callback_func mle_hb_down;
86 union {
87 struct dlm_lock_resource *res;
88 struct dlm_lock_name name;
89 } u;
90};
91
92static void dlm_mle_node_down(struct dlm_ctxt *dlm, 56static void dlm_mle_node_down(struct dlm_ctxt *dlm,
93 struct dlm_master_list_entry *mle, 57 struct dlm_master_list_entry *mle,
94 struct o2nm_node *node, 58 struct o2nm_node *node,
@@ -128,98 +92,10 @@ static inline int dlm_mle_equal(struct dlm_ctxt *dlm,
128 return 1; 92 return 1;
129} 93}
130 94
131#define dlm_print_nodemap(m) _dlm_print_nodemap(m,#m) 95static struct kmem_cache *dlm_lockres_cache = NULL;
132static void _dlm_print_nodemap(unsigned long *map, const char *mapname) 96static struct kmem_cache *dlm_lockname_cache = NULL;
133{
134 int i;
135 printk("%s=[ ", mapname);
136 for (i=0; i<O2NM_MAX_NODES; i++)
137 if (test_bit(i, map))
138 printk("%d ", i);
139 printk("]");
140}
141
142static void dlm_print_one_mle(struct dlm_master_list_entry *mle)
143{
144 int refs;
145 char *type;
146 char attached;
147 u8 master;
148 unsigned int namelen;
149 const char *name;
150 struct kref *k;
151 unsigned long *maybe = mle->maybe_map,
152 *vote = mle->vote_map,
153 *resp = mle->response_map,
154 *node = mle->node_map;
155
156 k = &mle->mle_refs;
157 if (mle->type == DLM_MLE_BLOCK)
158 type = "BLK";
159 else if (mle->type == DLM_MLE_MASTER)
160 type = "MAS";
161 else
162 type = "MIG";
163 refs = atomic_read(&k->refcount);
164 master = mle->master;
165 attached = (list_empty(&mle->hb_events) ? 'N' : 'Y');
166
167 if (mle->type != DLM_MLE_MASTER) {
168 namelen = mle->u.name.len;
169 name = mle->u.name.name;
170 } else {
171 namelen = mle->u.res->lockname.len;
172 name = mle->u.res->lockname.name;
173 }
174
175 mlog(ML_NOTICE, "%.*s: %3s refs=%3d mas=%3u new=%3u evt=%c inuse=%d ",
176 namelen, name, type, refs, master, mle->new_master, attached,
177 mle->inuse);
178 dlm_print_nodemap(maybe);
179 printk(", ");
180 dlm_print_nodemap(vote);
181 printk(", ");
182 dlm_print_nodemap(resp);
183 printk(", ");
184 dlm_print_nodemap(node);
185 printk(", ");
186 printk("\n");
187}
188
189#if 0
190/* Code here is included but defined out as it aids debugging */
191
192static void dlm_dump_mles(struct dlm_ctxt *dlm)
193{
194 struct dlm_master_list_entry *mle;
195
196 mlog(ML_NOTICE, "dumping all mles for domain %s:\n", dlm->name);
197 spin_lock(&dlm->master_lock);
198 list_for_each_entry(mle, &dlm->master_list, list)
199 dlm_print_one_mle(mle);
200 spin_unlock(&dlm->master_lock);
201}
202
203int dlm_dump_all_mles(const char __user *data, unsigned int len)
204{
205 struct dlm_ctxt *dlm;
206
207 spin_lock(&dlm_domain_lock);
208 list_for_each_entry(dlm, &dlm_domains, list) {
209 mlog(ML_NOTICE, "found dlm: %p, name=%s\n", dlm, dlm->name);
210 dlm_dump_mles(dlm);
211 }
212 spin_unlock(&dlm_domain_lock);
213 return len;
214}
215EXPORT_SYMBOL_GPL(dlm_dump_all_mles);
216
217#endif /* 0 */
218
219
220static struct kmem_cache *dlm_mle_cache = NULL; 97static struct kmem_cache *dlm_mle_cache = NULL;
221 98
222
223static void dlm_mle_release(struct kref *kref); 99static void dlm_mle_release(struct kref *kref);
224static void dlm_init_mle(struct dlm_master_list_entry *mle, 100static void dlm_init_mle(struct dlm_master_list_entry *mle,
225 enum dlm_mle_type type, 101 enum dlm_mle_type type,
@@ -507,7 +383,7 @@ static void dlm_mle_node_up(struct dlm_ctxt *dlm,
507 383
508int dlm_init_mle_cache(void) 384int dlm_init_mle_cache(void)
509{ 385{
510 dlm_mle_cache = kmem_cache_create("dlm_mle_cache", 386 dlm_mle_cache = kmem_cache_create("o2dlm_mle",
511 sizeof(struct dlm_master_list_entry), 387 sizeof(struct dlm_master_list_entry),
512 0, SLAB_HWCACHE_ALIGN, 388 0, SLAB_HWCACHE_ALIGN,
513 NULL); 389 NULL);
@@ -560,6 +436,35 @@ static void dlm_mle_release(struct kref *kref)
560 * LOCK RESOURCE FUNCTIONS 436 * LOCK RESOURCE FUNCTIONS
561 */ 437 */
562 438
439int dlm_init_master_caches(void)
440{
441 dlm_lockres_cache = kmem_cache_create("o2dlm_lockres",
442 sizeof(struct dlm_lock_resource),
443 0, SLAB_HWCACHE_ALIGN, NULL);
444 if (!dlm_lockres_cache)
445 goto bail;
446
447 dlm_lockname_cache = kmem_cache_create("o2dlm_lockname",
448 DLM_LOCKID_NAME_MAX, 0,
449 SLAB_HWCACHE_ALIGN, NULL);
450 if (!dlm_lockname_cache)
451 goto bail;
452
453 return 0;
454bail:
455 dlm_destroy_master_caches();
456 return -ENOMEM;
457}
458
459void dlm_destroy_master_caches(void)
460{
461 if (dlm_lockname_cache)
462 kmem_cache_destroy(dlm_lockname_cache);
463
464 if (dlm_lockres_cache)
465 kmem_cache_destroy(dlm_lockres_cache);
466}
467
563static void dlm_set_lockres_owner(struct dlm_ctxt *dlm, 468static void dlm_set_lockres_owner(struct dlm_ctxt *dlm,
564 struct dlm_lock_resource *res, 469 struct dlm_lock_resource *res,
565 u8 owner) 470 u8 owner)
@@ -610,6 +515,14 @@ static void dlm_lockres_release(struct kref *kref)
610 mlog(0, "destroying lockres %.*s\n", res->lockname.len, 515 mlog(0, "destroying lockres %.*s\n", res->lockname.len,
611 res->lockname.name); 516 res->lockname.name);
612 517
518 if (!list_empty(&res->tracking))
519 list_del_init(&res->tracking);
520 else {
521 mlog(ML_ERROR, "Resource %.*s not on the Tracking list\n",
522 res->lockname.len, res->lockname.name);
523 dlm_print_one_lock_resource(res);
524 }
525
613 if (!hlist_unhashed(&res->hash_node) || 526 if (!hlist_unhashed(&res->hash_node) ||
614 !list_empty(&res->granted) || 527 !list_empty(&res->granted) ||
615 !list_empty(&res->converting) || 528 !list_empty(&res->converting) ||
@@ -642,9 +555,9 @@ static void dlm_lockres_release(struct kref *kref)
642 BUG_ON(!list_empty(&res->recovering)); 555 BUG_ON(!list_empty(&res->recovering));
643 BUG_ON(!list_empty(&res->purge)); 556 BUG_ON(!list_empty(&res->purge));
644 557
645 kfree(res->lockname.name); 558 kmem_cache_free(dlm_lockname_cache, (void *)res->lockname.name);
646 559
647 kfree(res); 560 kmem_cache_free(dlm_lockres_cache, res);
648} 561}
649 562
650void dlm_lockres_put(struct dlm_lock_resource *res) 563void dlm_lockres_put(struct dlm_lock_resource *res)
@@ -677,6 +590,7 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm,
677 INIT_LIST_HEAD(&res->dirty); 590 INIT_LIST_HEAD(&res->dirty);
678 INIT_LIST_HEAD(&res->recovering); 591 INIT_LIST_HEAD(&res->recovering);
679 INIT_LIST_HEAD(&res->purge); 592 INIT_LIST_HEAD(&res->purge);
593 INIT_LIST_HEAD(&res->tracking);
680 atomic_set(&res->asts_reserved, 0); 594 atomic_set(&res->asts_reserved, 0);
681 res->migration_pending = 0; 595 res->migration_pending = 0;
682 res->inflight_locks = 0; 596 res->inflight_locks = 0;
@@ -692,6 +606,8 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm,
692 606
693 res->last_used = 0; 607 res->last_used = 0;
694 608
609 list_add_tail(&res->tracking, &dlm->tracking_list);
610
695 memset(res->lvb, 0, DLM_LVB_LEN); 611 memset(res->lvb, 0, DLM_LVB_LEN);
696 memset(res->refmap, 0, sizeof(res->refmap)); 612 memset(res->refmap, 0, sizeof(res->refmap));
697} 613}
@@ -700,20 +616,28 @@ struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm,
700 const char *name, 616 const char *name,
701 unsigned int namelen) 617 unsigned int namelen)
702{ 618{
703 struct dlm_lock_resource *res; 619 struct dlm_lock_resource *res = NULL;
704 620
705 res = kmalloc(sizeof(struct dlm_lock_resource), GFP_NOFS); 621 res = (struct dlm_lock_resource *)
622 kmem_cache_zalloc(dlm_lockres_cache, GFP_NOFS);
706 if (!res) 623 if (!res)
707 return NULL; 624 goto error;
708 625
709 res->lockname.name = kmalloc(namelen, GFP_NOFS); 626 res->lockname.name = (char *)
710 if (!res->lockname.name) { 627 kmem_cache_zalloc(dlm_lockname_cache, GFP_NOFS);
711 kfree(res); 628 if (!res->lockname.name)
712 return NULL; 629 goto error;
713 }
714 630
715 dlm_init_lockres(dlm, res, name, namelen); 631 dlm_init_lockres(dlm, res, name, namelen);
716 return res; 632 return res;
633
634error:
635 if (res && res->lockname.name)
636 kmem_cache_free(dlm_lockname_cache, (void *)res->lockname.name);
637
638 if (res)
639 kmem_cache_free(dlm_lockres_cache, res);
640 return NULL;
717} 641}
718 642
719void __dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm, 643void __dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm,
@@ -1663,7 +1587,12 @@ way_up_top:
1663 dlm_put_mle(tmpmle); 1587 dlm_put_mle(tmpmle);
1664 } 1588 }
1665send_response: 1589send_response:
1666 1590 /*
1591 * __dlm_lookup_lockres() grabbed a reference to this lockres.
1592 * The reference is released by dlm_assert_master_worker() under
1593 * the call to dlm_dispatch_assert_master(). If
1594 * dlm_assert_master_worker() isn't called, we drop it here.
1595 */
1667 if (dispatch_assert) { 1596 if (dispatch_assert) {
1668 if (response != DLM_MASTER_RESP_YES) 1597 if (response != DLM_MASTER_RESP_YES)
1669 mlog(ML_ERROR, "invalid response %d\n", response); 1598 mlog(ML_ERROR, "invalid response %d\n", response);
@@ -1678,7 +1607,11 @@ send_response:
1678 if (ret < 0) { 1607 if (ret < 0) {
1679 mlog(ML_ERROR, "failed to dispatch assert master work\n"); 1608 mlog(ML_ERROR, "failed to dispatch assert master work\n");
1680 response = DLM_MASTER_RESP_ERROR; 1609 response = DLM_MASTER_RESP_ERROR;
1610 dlm_lockres_put(res);
1681 } 1611 }
1612 } else {
1613 if (res)
1614 dlm_lockres_put(res);
1682 } 1615 }
1683 1616
1684 dlm_put(dlm); 1617 dlm_put(dlm);
@@ -1695,9 +1628,9 @@ send_response:
1695 * can periodically run all locks owned by this node 1628 * can periodically run all locks owned by this node
1696 * and re-assert across the cluster... 1629 * and re-assert across the cluster...
1697 */ 1630 */
1698int dlm_do_assert_master(struct dlm_ctxt *dlm, 1631static int dlm_do_assert_master(struct dlm_ctxt *dlm,
1699 struct dlm_lock_resource *res, 1632 struct dlm_lock_resource *res,
1700 void *nodemap, u32 flags) 1633 void *nodemap, u32 flags)
1701{ 1634{
1702 struct dlm_assert_master assert; 1635 struct dlm_assert_master assert;
1703 int to, tmpret; 1636 int to, tmpret;
@@ -2348,7 +2281,7 @@ int dlm_deref_lockres_handler(struct o2net_msg *msg, u32 len, void *data,
2348 mlog(ML_ERROR, "%s:%.*s: node %u trying to drop ref " 2281 mlog(ML_ERROR, "%s:%.*s: node %u trying to drop ref "
2349 "but it is already dropped!\n", dlm->name, 2282 "but it is already dropped!\n", dlm->name,
2350 res->lockname.len, res->lockname.name, node); 2283 res->lockname.len, res->lockname.name, node);
2351 __dlm_print_one_lock_resource(res); 2284 dlm_print_one_lock_resource(res);
2352 } 2285 }
2353 ret = 0; 2286 ret = 0;
2354 goto done; 2287 goto done;
@@ -2408,7 +2341,7 @@ static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data)
2408 mlog(ML_ERROR, "%s:%.*s: node %u trying to drop ref " 2341 mlog(ML_ERROR, "%s:%.*s: node %u trying to drop ref "
2409 "but it is already dropped!\n", dlm->name, 2342 "but it is already dropped!\n", dlm->name,
2410 res->lockname.len, res->lockname.name, node); 2343 res->lockname.len, res->lockname.name, node);
2411 __dlm_print_one_lock_resource(res); 2344 dlm_print_one_lock_resource(res);
2412 } 2345 }
2413 2346
2414 dlm_lockres_put(res); 2347 dlm_lockres_put(res);
@@ -2933,6 +2866,9 @@ static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm,
2933 dlm_lockres_clear_refmap_bit(lock->ml.node, res); 2866 dlm_lockres_clear_refmap_bit(lock->ml.node, res);
2934 list_del_init(&lock->list); 2867 list_del_init(&lock->list);
2935 dlm_lock_put(lock); 2868 dlm_lock_put(lock);
2869 /* In a normal unlock, we would have added a
2870 * DLM_UNLOCK_FREE_LOCK action. Force it. */
2871 dlm_lock_put(lock);
2936 } 2872 }
2937 } 2873 }
2938 queue++; 2874 queue++;
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 91f747b8a538..bcb9260c3735 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -519,9 +519,9 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
519 return 0; 519 return 0;
520 520
521master_here: 521master_here:
522 mlog(0, "(%d) mastering recovery of %s:%u here(this=%u)!\n", 522 mlog(ML_NOTICE, "(%d) Node %u is the Recovery Master for the Dead Node "
523 task_pid_nr(dlm->dlm_reco_thread_task), 523 "%u for Domain %s\n", task_pid_nr(dlm->dlm_reco_thread_task),
524 dlm->name, dlm->reco.dead_node, dlm->node_num); 524 dlm->node_num, dlm->reco.dead_node, dlm->name);
525 525
526 status = dlm_remaster_locks(dlm, dlm->reco.dead_node); 526 status = dlm_remaster_locks(dlm, dlm->reco.dead_node);
527 if (status < 0) { 527 if (status < 0) {
@@ -1191,7 +1191,7 @@ static int dlm_add_lock_to_array(struct dlm_lock *lock,
1191 (ml->type == LKM_EXMODE || 1191 (ml->type == LKM_EXMODE ||
1192 memcmp(mres->lvb, lock->lksb->lvb, DLM_LVB_LEN))) { 1192 memcmp(mres->lvb, lock->lksb->lvb, DLM_LVB_LEN))) {
1193 mlog(ML_ERROR, "mismatched lvbs!\n"); 1193 mlog(ML_ERROR, "mismatched lvbs!\n");
1194 __dlm_print_one_lock_resource(lock->lockres); 1194 dlm_print_one_lock_resource(lock->lockres);
1195 BUG(); 1195 BUG();
1196 } 1196 }
1197 memcpy(mres->lvb, lock->lksb->lvb, DLM_LVB_LEN); 1197 memcpy(mres->lvb, lock->lksb->lvb, DLM_LVB_LEN);
@@ -1327,6 +1327,7 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data,
1327 (struct dlm_migratable_lockres *)msg->buf; 1327 (struct dlm_migratable_lockres *)msg->buf;
1328 int ret = 0; 1328 int ret = 0;
1329 u8 real_master; 1329 u8 real_master;
1330 u8 extra_refs = 0;
1330 char *buf = NULL; 1331 char *buf = NULL;
1331 struct dlm_work_item *item = NULL; 1332 struct dlm_work_item *item = NULL;
1332 struct dlm_lock_resource *res = NULL; 1333 struct dlm_lock_resource *res = NULL;
@@ -1404,16 +1405,28 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data,
1404 __dlm_insert_lockres(dlm, res); 1405 __dlm_insert_lockres(dlm, res);
1405 spin_unlock(&dlm->spinlock); 1406 spin_unlock(&dlm->spinlock);
1406 1407
1408 /* Add an extra ref for this lock-less lockres lest the
1409 * dlm_thread purges it before we get the chance to add
1410 * locks to it */
1411 dlm_lockres_get(res);
1412
1413 /* There are three refs that need to be put.
1414 * 1. Taken above.
1415 * 2. kref_init in dlm_new_lockres()->dlm_init_lockres().
1416 * 3. dlm_lookup_lockres()
1417 * The first one is handled at the end of this function. The
1418 * other two are handled in the worker thread after locks have
1419 * been attached. Yes, we don't wait for purge time to match
1420 * kref_init. The lockres will still have atleast one ref
1421 * added because it is in the hash __dlm_insert_lockres() */
1422 extra_refs++;
1423
1407 /* now that the new lockres is inserted, 1424 /* now that the new lockres is inserted,
1408 * make it usable by other processes */ 1425 * make it usable by other processes */
1409 spin_lock(&res->spinlock); 1426 spin_lock(&res->spinlock);
1410 res->state &= ~DLM_LOCK_RES_IN_PROGRESS; 1427 res->state &= ~DLM_LOCK_RES_IN_PROGRESS;
1411 spin_unlock(&res->spinlock); 1428 spin_unlock(&res->spinlock);
1412 wake_up(&res->wq); 1429 wake_up(&res->wq);
1413
1414 /* add an extra ref for just-allocated lockres
1415 * otherwise the lockres will be purged immediately */
1416 dlm_lockres_get(res);
1417 } 1430 }
1418 1431
1419 /* at this point we have allocated everything we need, 1432 /* at this point we have allocated everything we need,
@@ -1443,12 +1456,17 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data,
1443 dlm_init_work_item(dlm, item, dlm_mig_lockres_worker, buf); 1456 dlm_init_work_item(dlm, item, dlm_mig_lockres_worker, buf);
1444 item->u.ml.lockres = res; /* already have a ref */ 1457 item->u.ml.lockres = res; /* already have a ref */
1445 item->u.ml.real_master = real_master; 1458 item->u.ml.real_master = real_master;
1459 item->u.ml.extra_ref = extra_refs;
1446 spin_lock(&dlm->work_lock); 1460 spin_lock(&dlm->work_lock);
1447 list_add_tail(&item->list, &dlm->work_list); 1461 list_add_tail(&item->list, &dlm->work_list);
1448 spin_unlock(&dlm->work_lock); 1462 spin_unlock(&dlm->work_lock);
1449 queue_work(dlm->dlm_worker, &dlm->dispatched_work); 1463 queue_work(dlm->dlm_worker, &dlm->dispatched_work);
1450 1464
1451leave: 1465leave:
1466 /* One extra ref taken needs to be put here */
1467 if (extra_refs)
1468 dlm_lockres_put(res);
1469
1452 dlm_put(dlm); 1470 dlm_put(dlm);
1453 if (ret < 0) { 1471 if (ret < 0) {
1454 if (buf) 1472 if (buf)
@@ -1464,17 +1482,19 @@ leave:
1464 1482
1465static void dlm_mig_lockres_worker(struct dlm_work_item *item, void *data) 1483static void dlm_mig_lockres_worker(struct dlm_work_item *item, void *data)
1466{ 1484{
1467 struct dlm_ctxt *dlm = data; 1485 struct dlm_ctxt *dlm;
1468 struct dlm_migratable_lockres *mres; 1486 struct dlm_migratable_lockres *mres;
1469 int ret = 0; 1487 int ret = 0;
1470 struct dlm_lock_resource *res; 1488 struct dlm_lock_resource *res;
1471 u8 real_master; 1489 u8 real_master;
1490 u8 extra_ref;
1472 1491
1473 dlm = item->dlm; 1492 dlm = item->dlm;
1474 mres = (struct dlm_migratable_lockres *)data; 1493 mres = (struct dlm_migratable_lockres *)data;
1475 1494
1476 res = item->u.ml.lockres; 1495 res = item->u.ml.lockres;
1477 real_master = item->u.ml.real_master; 1496 real_master = item->u.ml.real_master;
1497 extra_ref = item->u.ml.extra_ref;
1478 1498
1479 if (real_master == DLM_LOCK_RES_OWNER_UNKNOWN) { 1499 if (real_master == DLM_LOCK_RES_OWNER_UNKNOWN) {
1480 /* this case is super-rare. only occurs if 1500 /* this case is super-rare. only occurs if
@@ -1517,6 +1537,12 @@ again:
1517 } 1537 }
1518 1538
1519leave: 1539leave:
1540 /* See comment in dlm_mig_lockres_handler() */
1541 if (res) {
1542 if (extra_ref)
1543 dlm_lockres_put(res);
1544 dlm_lockres_put(res);
1545 }
1520 kfree(data); 1546 kfree(data);
1521 mlog_exit(ret); 1547 mlog_exit(ret);
1522} 1548}
@@ -1644,7 +1670,8 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data,
1644 /* retry!? */ 1670 /* retry!? */
1645 BUG(); 1671 BUG();
1646 } 1672 }
1647 } 1673 } else /* put.. incase we are not the master */
1674 dlm_lockres_put(res);
1648 spin_unlock(&res->spinlock); 1675 spin_unlock(&res->spinlock);
1649 } 1676 }
1650 spin_unlock(&dlm->spinlock); 1677 spin_unlock(&dlm->spinlock);
@@ -1921,6 +1948,7 @@ void dlm_move_lockres_to_recovery_list(struct dlm_ctxt *dlm,
1921 "Recovering res %s:%.*s, is already on recovery list!\n", 1948 "Recovering res %s:%.*s, is already on recovery list!\n",
1922 dlm->name, res->lockname.len, res->lockname.name); 1949 dlm->name, res->lockname.len, res->lockname.name);
1923 list_del_init(&res->recovering); 1950 list_del_init(&res->recovering);
1951 dlm_lockres_put(res);
1924 } 1952 }
1925 /* We need to hold a reference while on the recovery list */ 1953 /* We need to hold a reference while on the recovery list */
1926 dlm_lockres_get(res); 1954 dlm_lockres_get(res);
@@ -2130,11 +2158,16 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
2130 assert_spin_locked(&dlm->spinlock); 2158 assert_spin_locked(&dlm->spinlock);
2131 assert_spin_locked(&res->spinlock); 2159 assert_spin_locked(&res->spinlock);
2132 2160
2161 /* We do two dlm_lock_put(). One for removing from list and the other is
2162 * to force the DLM_UNLOCK_FREE_LOCK action so as to free the locks */
2163
2133 /* TODO: check pending_asts, pending_basts here */ 2164 /* TODO: check pending_asts, pending_basts here */
2134 list_for_each_entry_safe(lock, next, &res->granted, list) { 2165 list_for_each_entry_safe(lock, next, &res->granted, list) {
2135 if (lock->ml.node == dead_node) { 2166 if (lock->ml.node == dead_node) {
2136 list_del_init(&lock->list); 2167 list_del_init(&lock->list);
2137 dlm_lock_put(lock); 2168 dlm_lock_put(lock);
2169 /* Can't schedule DLM_UNLOCK_FREE_LOCK - do manually */
2170 dlm_lock_put(lock);
2138 freed++; 2171 freed++;
2139 } 2172 }
2140 } 2173 }
@@ -2142,6 +2175,8 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
2142 if (lock->ml.node == dead_node) { 2175 if (lock->ml.node == dead_node) {
2143 list_del_init(&lock->list); 2176 list_del_init(&lock->list);
2144 dlm_lock_put(lock); 2177 dlm_lock_put(lock);
2178 /* Can't schedule DLM_UNLOCK_FREE_LOCK - do manually */
2179 dlm_lock_put(lock);
2145 freed++; 2180 freed++;
2146 } 2181 }
2147 } 2182 }
@@ -2149,6 +2184,8 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
2149 if (lock->ml.node == dead_node) { 2184 if (lock->ml.node == dead_node) {
2150 list_del_init(&lock->list); 2185 list_del_init(&lock->list);
2151 dlm_lock_put(lock); 2186 dlm_lock_put(lock);
2187 /* Can't schedule DLM_UNLOCK_FREE_LOCK - do manually */
2188 dlm_lock_put(lock);
2152 freed++; 2189 freed++;
2153 } 2190 }
2154 } 2191 }
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index cebd089f8955..4060bb328bc8 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -176,12 +176,14 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
176 res->lockname.name, master); 176 res->lockname.name, master);
177 177
178 if (!master) { 178 if (!master) {
179 /* drop spinlock... retake below */
180 spin_unlock(&dlm->spinlock);
181
179 spin_lock(&res->spinlock); 182 spin_lock(&res->spinlock);
180 /* This ensures that clear refmap is sent after the set */ 183 /* This ensures that clear refmap is sent after the set */
181 __dlm_wait_on_lockres_flags(res, DLM_LOCK_RES_SETREF_INPROG); 184 __dlm_wait_on_lockres_flags(res, DLM_LOCK_RES_SETREF_INPROG);
182 spin_unlock(&res->spinlock); 185 spin_unlock(&res->spinlock);
183 /* drop spinlock to do messaging, retake below */ 186
184 spin_unlock(&dlm->spinlock);
185 /* clear our bit from the master's refmap, ignore errors */ 187 /* clear our bit from the master's refmap, ignore errors */
186 ret = dlm_drop_lockres_ref(dlm, res); 188 ret = dlm_drop_lockres_ref(dlm, res);
187 if (ret < 0) { 189 if (ret < 0) {
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 351130c9b734..394d25a131a5 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -27,18 +27,11 @@
27#include <linux/slab.h> 27#include <linux/slab.h>
28#include <linux/highmem.h> 28#include <linux/highmem.h>
29#include <linux/mm.h> 29#include <linux/mm.h>
30#include <linux/crc32.h>
31#include <linux/kthread.h> 30#include <linux/kthread.h>
32#include <linux/pagemap.h> 31#include <linux/pagemap.h>
33#include <linux/debugfs.h> 32#include <linux/debugfs.h>
34#include <linux/seq_file.h> 33#include <linux/seq_file.h>
35 34
36#include <cluster/heartbeat.h>
37#include <cluster/nodemanager.h>
38#include <cluster/tcp.h>
39
40#include <dlm/dlmapi.h>
41
42#define MLOG_MASK_PREFIX ML_DLM_GLUE 35#define MLOG_MASK_PREFIX ML_DLM_GLUE
43#include <cluster/masklog.h> 36#include <cluster/masklog.h>
44 37
@@ -53,6 +46,7 @@
53#include "heartbeat.h" 46#include "heartbeat.h"
54#include "inode.h" 47#include "inode.h"
55#include "journal.h" 48#include "journal.h"
49#include "stackglue.h"
56#include "slot_map.h" 50#include "slot_map.h"
57#include "super.h" 51#include "super.h"
58#include "uptodate.h" 52#include "uptodate.h"
@@ -113,7 +107,8 @@ static void ocfs2_dump_meta_lvb_info(u64 level,
113 unsigned int line, 107 unsigned int line,
114 struct ocfs2_lock_res *lockres) 108 struct ocfs2_lock_res *lockres)
115{ 109{
116 struct ocfs2_meta_lvb *lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb; 110 struct ocfs2_meta_lvb *lvb =
111 (struct ocfs2_meta_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb);
117 112
118 mlog(level, "LVB information for %s (called from %s:%u):\n", 113 mlog(level, "LVB information for %s (called from %s:%u):\n",
119 lockres->l_name, function, line); 114 lockres->l_name, function, line);
@@ -259,31 +254,6 @@ static struct ocfs2_lock_res_ops ocfs2_flock_lops = {
259 .flags = 0, 254 .flags = 0,
260}; 255};
261 256
262/*
263 * This is the filesystem locking protocol version.
264 *
265 * Whenever the filesystem does new things with locks (adds or removes a
266 * lock, orders them differently, does different things underneath a lock),
267 * the version must be changed. The protocol is negotiated when joining
268 * the dlm domain. A node may join the domain if its major version is
269 * identical to all other nodes and its minor version is greater than
270 * or equal to all other nodes. When its minor version is greater than
271 * the other nodes, it will run at the minor version specified by the
272 * other nodes.
273 *
274 * If a locking change is made that will not be compatible with older
275 * versions, the major number must be increased and the minor version set
276 * to zero. If a change merely adds a behavior that can be disabled when
277 * speaking to older versions, the minor version must be increased. If a
278 * change adds a fully backwards compatible change (eg, LVB changes that
279 * are just ignored by older versions), the version does not need to be
280 * updated.
281 */
282const struct dlm_protocol_version ocfs2_locking_protocol = {
283 .pv_major = OCFS2_LOCKING_PROTOCOL_MAJOR,
284 .pv_minor = OCFS2_LOCKING_PROTOCOL_MINOR,
285};
286
287static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) 257static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres)
288{ 258{
289 return lockres->l_type == OCFS2_LOCK_TYPE_META || 259 return lockres->l_type == OCFS2_LOCK_TYPE_META ||
@@ -316,7 +286,7 @@ static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *l
316static int ocfs2_lock_create(struct ocfs2_super *osb, 286static int ocfs2_lock_create(struct ocfs2_super *osb,
317 struct ocfs2_lock_res *lockres, 287 struct ocfs2_lock_res *lockres,
318 int level, 288 int level,
319 int dlm_flags); 289 u32 dlm_flags);
320static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres, 290static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres,
321 int wanted); 291 int wanted);
322static void ocfs2_cluster_unlock(struct ocfs2_super *osb, 292static void ocfs2_cluster_unlock(struct ocfs2_super *osb,
@@ -330,10 +300,9 @@ static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,
330 struct ocfs2_lock_res *lockres); 300 struct ocfs2_lock_res *lockres);
331static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, 301static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
332 int convert); 302 int convert);
333#define ocfs2_log_dlm_error(_func, _stat, _lockres) do { \ 303#define ocfs2_log_dlm_error(_func, _err, _lockres) do { \
334 mlog(ML_ERROR, "Dlm error \"%s\" while calling %s on " \ 304 mlog(ML_ERROR, "DLM error %d while calling %s on resource %s\n", \
335 "resource %s: %s\n", dlm_errname(_stat), _func, \ 305 _err, _func, _lockres->l_name); \
336 _lockres->l_name, dlm_errmsg(_stat)); \
337} while (0) 306} while (0)
338static int ocfs2_downconvert_thread(void *arg); 307static int ocfs2_downconvert_thread(void *arg);
339static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, 308static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
@@ -342,12 +311,13 @@ static int ocfs2_inode_lock_update(struct inode *inode,
342 struct buffer_head **bh); 311 struct buffer_head **bh);
343static void ocfs2_drop_osb_locks(struct ocfs2_super *osb); 312static void ocfs2_drop_osb_locks(struct ocfs2_super *osb);
344static inline int ocfs2_highest_compat_lock_level(int level); 313static inline int ocfs2_highest_compat_lock_level(int level);
345static void ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, 314static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres,
346 int new_level); 315 int new_level);
347static int ocfs2_downconvert_lock(struct ocfs2_super *osb, 316static int ocfs2_downconvert_lock(struct ocfs2_super *osb,
348 struct ocfs2_lock_res *lockres, 317 struct ocfs2_lock_res *lockres,
349 int new_level, 318 int new_level,
350 int lvb); 319 int lvb,
320 unsigned int generation);
351static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, 321static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb,
352 struct ocfs2_lock_res *lockres); 322 struct ocfs2_lock_res *lockres);
353static int ocfs2_cancel_convert(struct ocfs2_super *osb, 323static int ocfs2_cancel_convert(struct ocfs2_super *osb,
@@ -406,9 +376,9 @@ static void ocfs2_lock_res_init_common(struct ocfs2_super *osb,
406 res->l_ops = ops; 376 res->l_ops = ops;
407 res->l_priv = priv; 377 res->l_priv = priv;
408 378
409 res->l_level = LKM_IVMODE; 379 res->l_level = DLM_LOCK_IV;
410 res->l_requested = LKM_IVMODE; 380 res->l_requested = DLM_LOCK_IV;
411 res->l_blocking = LKM_IVMODE; 381 res->l_blocking = DLM_LOCK_IV;
412 res->l_action = OCFS2_AST_INVALID; 382 res->l_action = OCFS2_AST_INVALID;
413 res->l_unlock_action = OCFS2_UNLOCK_INVALID; 383 res->l_unlock_action = OCFS2_UNLOCK_INVALID;
414 384
@@ -604,10 +574,10 @@ static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres,
604 BUG_ON(!lockres); 574 BUG_ON(!lockres);
605 575
606 switch(level) { 576 switch(level) {
607 case LKM_EXMODE: 577 case DLM_LOCK_EX:
608 lockres->l_ex_holders++; 578 lockres->l_ex_holders++;
609 break; 579 break;
610 case LKM_PRMODE: 580 case DLM_LOCK_PR:
611 lockres->l_ro_holders++; 581 lockres->l_ro_holders++;
612 break; 582 break;
613 default: 583 default:
@@ -625,11 +595,11 @@ static inline void ocfs2_dec_holders(struct ocfs2_lock_res *lockres,
625 BUG_ON(!lockres); 595 BUG_ON(!lockres);
626 596
627 switch(level) { 597 switch(level) {
628 case LKM_EXMODE: 598 case DLM_LOCK_EX:
629 BUG_ON(!lockres->l_ex_holders); 599 BUG_ON(!lockres->l_ex_holders);
630 lockres->l_ex_holders--; 600 lockres->l_ex_holders--;
631 break; 601 break;
632 case LKM_PRMODE: 602 case DLM_LOCK_PR:
633 BUG_ON(!lockres->l_ro_holders); 603 BUG_ON(!lockres->l_ro_holders);
634 lockres->l_ro_holders--; 604 lockres->l_ro_holders--;
635 break; 605 break;
@@ -644,12 +614,12 @@ static inline void ocfs2_dec_holders(struct ocfs2_lock_res *lockres,
644 * lock types are added. */ 614 * lock types are added. */
645static inline int ocfs2_highest_compat_lock_level(int level) 615static inline int ocfs2_highest_compat_lock_level(int level)
646{ 616{
647 int new_level = LKM_EXMODE; 617 int new_level = DLM_LOCK_EX;
648 618
649 if (level == LKM_EXMODE) 619 if (level == DLM_LOCK_EX)
650 new_level = LKM_NLMODE; 620 new_level = DLM_LOCK_NL;
651 else if (level == LKM_PRMODE) 621 else if (level == DLM_LOCK_PR)
652 new_level = LKM_PRMODE; 622 new_level = DLM_LOCK_PR;
653 return new_level; 623 return new_level;
654} 624}
655 625
@@ -688,12 +658,12 @@ static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res
688 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); 658 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY));
689 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED)); 659 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED));
690 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED)); 660 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED));
691 BUG_ON(lockres->l_blocking <= LKM_NLMODE); 661 BUG_ON(lockres->l_blocking <= DLM_LOCK_NL);
692 662
693 lockres->l_level = lockres->l_requested; 663 lockres->l_level = lockres->l_requested;
694 if (lockres->l_level <= 664 if (lockres->l_level <=
695 ocfs2_highest_compat_lock_level(lockres->l_blocking)) { 665 ocfs2_highest_compat_lock_level(lockres->l_blocking)) {
696 lockres->l_blocking = LKM_NLMODE; 666 lockres->l_blocking = DLM_LOCK_NL;
697 lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED); 667 lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED);
698 } 668 }
699 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 669 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
@@ -712,7 +682,7 @@ static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lo
712 * information is already up to data. Convert from NL to 682 * information is already up to data. Convert from NL to
713 * *anything* however should mark ourselves as needing an 683 * *anything* however should mark ourselves as needing an
714 * update */ 684 * update */
715 if (lockres->l_level == LKM_NLMODE && 685 if (lockres->l_level == DLM_LOCK_NL &&
716 lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) 686 lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
717 lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); 687 lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
718 688
@@ -729,7 +699,7 @@ static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *loc
729 BUG_ON((!(lockres->l_flags & OCFS2_LOCK_BUSY))); 699 BUG_ON((!(lockres->l_flags & OCFS2_LOCK_BUSY)));
730 BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); 700 BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED);
731 701
732 if (lockres->l_requested > LKM_NLMODE && 702 if (lockres->l_requested > DLM_LOCK_NL &&
733 !(lockres->l_flags & OCFS2_LOCK_LOCAL) && 703 !(lockres->l_flags & OCFS2_LOCK_LOCAL) &&
734 lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) 704 lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
735 lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); 705 lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
@@ -767,6 +737,113 @@ static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres,
767 return needs_downconvert; 737 return needs_downconvert;
768} 738}
769 739
740/*
741 * OCFS2_LOCK_PENDING and l_pending_gen.
742 *
743 * Why does OCFS2_LOCK_PENDING exist? To close a race between setting
744 * OCFS2_LOCK_BUSY and calling ocfs2_dlm_lock(). See ocfs2_unblock_lock()
745 * for more details on the race.
746 *
747 * OCFS2_LOCK_PENDING closes the race quite nicely. However, it introduces
748 * a race on itself. In o2dlm, we can get the ast before ocfs2_dlm_lock()
749 * returns. The ast clears OCFS2_LOCK_BUSY, and must therefore clear
750 * OCFS2_LOCK_PENDING at the same time. When ocfs2_dlm_lock() returns,
751 * the caller is going to try to clear PENDING again. If nothing else is
752 * happening, __lockres_clear_pending() sees PENDING is unset and does
753 * nothing.
754 *
755 * But what if another path (eg downconvert thread) has just started a
756 * new locking action? The other path has re-set PENDING. Our path
757 * cannot clear PENDING, because that will re-open the original race
758 * window.
759 *
760 * [Example]
761 *
762 * ocfs2_meta_lock()
763 * ocfs2_cluster_lock()
764 * set BUSY
765 * set PENDING
766 * drop l_lock
767 * ocfs2_dlm_lock()
768 * ocfs2_locking_ast() ocfs2_downconvert_thread()
769 * clear PENDING ocfs2_unblock_lock()
770 * take_l_lock
771 * !BUSY
772 * ocfs2_prepare_downconvert()
773 * set BUSY
774 * set PENDING
775 * drop l_lock
776 * take l_lock
777 * clear PENDING
778 * drop l_lock
779 * <window>
780 * ocfs2_dlm_lock()
781 *
782 * So as you can see, we now have a window where l_lock is not held,
783 * PENDING is not set, and ocfs2_dlm_lock() has not been called.
784 *
785 * The core problem is that ocfs2_cluster_lock() has cleared the PENDING
786 * set by ocfs2_prepare_downconvert(). That wasn't nice.
787 *
788 * To solve this we introduce l_pending_gen. A call to
789 * lockres_clear_pending() will only do so when it is passed a generation
790 * number that matches the lockres. lockres_set_pending() will return the
791 * current generation number. When ocfs2_cluster_lock() goes to clear
792 * PENDING, it passes the generation it got from set_pending(). In our
793 * example above, the generation numbers will *not* match. Thus,
794 * ocfs2_cluster_lock() will not clear the PENDING set by
795 * ocfs2_prepare_downconvert().
796 */
797
798/* Unlocked version for ocfs2_locking_ast() */
799static void __lockres_clear_pending(struct ocfs2_lock_res *lockres,
800 unsigned int generation,
801 struct ocfs2_super *osb)
802{
803 assert_spin_locked(&lockres->l_lock);
804
805 /*
806 * The ast and locking functions can race us here. The winner
807 * will clear pending, the loser will not.
808 */
809 if (!(lockres->l_flags & OCFS2_LOCK_PENDING) ||
810 (lockres->l_pending_gen != generation))
811 return;
812
813 lockres_clear_flags(lockres, OCFS2_LOCK_PENDING);
814 lockres->l_pending_gen++;
815
816 /*
817 * The downconvert thread may have skipped us because we
818 * were PENDING. Wake it up.
819 */
820 if (lockres->l_flags & OCFS2_LOCK_BLOCKED)
821 ocfs2_wake_downconvert_thread(osb);
822}
823
824/* Locked version for callers of ocfs2_dlm_lock() */
825static void lockres_clear_pending(struct ocfs2_lock_res *lockres,
826 unsigned int generation,
827 struct ocfs2_super *osb)
828{
829 unsigned long flags;
830
831 spin_lock_irqsave(&lockres->l_lock, flags);
832 __lockres_clear_pending(lockres, generation, osb);
833 spin_unlock_irqrestore(&lockres->l_lock, flags);
834}
835
836static unsigned int lockres_set_pending(struct ocfs2_lock_res *lockres)
837{
838 assert_spin_locked(&lockres->l_lock);
839 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY));
840
841 lockres_or_flags(lockres, OCFS2_LOCK_PENDING);
842
843 return lockres->l_pending_gen;
844}
845
846
770static void ocfs2_blocking_ast(void *opaque, int level) 847static void ocfs2_blocking_ast(void *opaque, int level)
771{ 848{
772 struct ocfs2_lock_res *lockres = opaque; 849 struct ocfs2_lock_res *lockres = opaque;
@@ -774,7 +851,7 @@ static void ocfs2_blocking_ast(void *opaque, int level)
774 int needs_downconvert; 851 int needs_downconvert;
775 unsigned long flags; 852 unsigned long flags;
776 853
777 BUG_ON(level <= LKM_NLMODE); 854 BUG_ON(level <= DLM_LOCK_NL);
778 855
779 mlog(0, "BAST fired for lockres %s, blocking %d, level %d type %s\n", 856 mlog(0, "BAST fired for lockres %s, blocking %d, level %d type %s\n",
780 lockres->l_name, level, lockres->l_level, 857 lockres->l_name, level, lockres->l_level,
@@ -801,14 +878,22 @@ static void ocfs2_blocking_ast(void *opaque, int level)
801static void ocfs2_locking_ast(void *opaque) 878static void ocfs2_locking_ast(void *opaque)
802{ 879{
803 struct ocfs2_lock_res *lockres = opaque; 880 struct ocfs2_lock_res *lockres = opaque;
804 struct dlm_lockstatus *lksb = &lockres->l_lksb; 881 struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);
805 unsigned long flags; 882 unsigned long flags;
883 int status;
806 884
807 spin_lock_irqsave(&lockres->l_lock, flags); 885 spin_lock_irqsave(&lockres->l_lock, flags);
808 886
809 if (lksb->status != DLM_NORMAL) { 887 status = ocfs2_dlm_lock_status(&lockres->l_lksb);
810 mlog(ML_ERROR, "lockres %s: lksb status value of %u!\n", 888
811 lockres->l_name, lksb->status); 889 if (status == -EAGAIN) {
890 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
891 goto out;
892 }
893
894 if (status) {
895 mlog(ML_ERROR, "lockres %s: lksb status value of %d!\n",
896 lockres->l_name, status);
812 spin_unlock_irqrestore(&lockres->l_lock, flags); 897 spin_unlock_irqrestore(&lockres->l_lock, flags);
813 return; 898 return;
814 } 899 }
@@ -831,11 +916,23 @@ static void ocfs2_locking_ast(void *opaque)
831 lockres->l_unlock_action); 916 lockres->l_unlock_action);
832 BUG(); 917 BUG();
833 } 918 }
834 919out:
835 /* set it to something invalid so if we get called again we 920 /* set it to something invalid so if we get called again we
836 * can catch it. */ 921 * can catch it. */
837 lockres->l_action = OCFS2_AST_INVALID; 922 lockres->l_action = OCFS2_AST_INVALID;
838 923
924 /* Did we try to cancel this lock? Clear that state */
925 if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT)
926 lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
927
928 /*
929 * We may have beaten the locking functions here. We certainly
930 * know that dlm_lock() has been called :-)
931 * Because we can't have two lock calls in flight at once, we
932 * can use lockres->l_pending_gen.
933 */
934 __lockres_clear_pending(lockres, lockres->l_pending_gen, osb);
935
839 wake_up(&lockres->l_event); 936 wake_up(&lockres->l_event);
840 spin_unlock_irqrestore(&lockres->l_lock, flags); 937 spin_unlock_irqrestore(&lockres->l_lock, flags);
841} 938}
@@ -865,15 +962,15 @@ static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
865static int ocfs2_lock_create(struct ocfs2_super *osb, 962static int ocfs2_lock_create(struct ocfs2_super *osb,
866 struct ocfs2_lock_res *lockres, 963 struct ocfs2_lock_res *lockres,
867 int level, 964 int level,
868 int dlm_flags) 965 u32 dlm_flags)
869{ 966{
870 int ret = 0; 967 int ret = 0;
871 enum dlm_status status = DLM_NORMAL;
872 unsigned long flags; 968 unsigned long flags;
969 unsigned int gen;
873 970
874 mlog_entry_void(); 971 mlog_entry_void();
875 972
876 mlog(0, "lock %s, level = %d, flags = %d\n", lockres->l_name, level, 973 mlog(0, "lock %s, level = %d, flags = %u\n", lockres->l_name, level,
877 dlm_flags); 974 dlm_flags);
878 975
879 spin_lock_irqsave(&lockres->l_lock, flags); 976 spin_lock_irqsave(&lockres->l_lock, flags);
@@ -886,24 +983,23 @@ static int ocfs2_lock_create(struct ocfs2_super *osb,
886 lockres->l_action = OCFS2_AST_ATTACH; 983 lockres->l_action = OCFS2_AST_ATTACH;
887 lockres->l_requested = level; 984 lockres->l_requested = level;
888 lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 985 lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
986 gen = lockres_set_pending(lockres);
889 spin_unlock_irqrestore(&lockres->l_lock, flags); 987 spin_unlock_irqrestore(&lockres->l_lock, flags);
890 988
891 status = dlmlock(osb->dlm, 989 ret = ocfs2_dlm_lock(osb->cconn,
892 level, 990 level,
893 &lockres->l_lksb, 991 &lockres->l_lksb,
894 dlm_flags, 992 dlm_flags,
895 lockres->l_name, 993 lockres->l_name,
896 OCFS2_LOCK_ID_MAX_LEN - 1, 994 OCFS2_LOCK_ID_MAX_LEN - 1,
897 ocfs2_locking_ast, 995 lockres);
898 lockres, 996 lockres_clear_pending(lockres, gen, osb);
899 ocfs2_blocking_ast); 997 if (ret) {
900 if (status != DLM_NORMAL) { 998 ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres);
901 ocfs2_log_dlm_error("dlmlock", status, lockres);
902 ret = -EINVAL;
903 ocfs2_recover_from_dlm_error(lockres, 1); 999 ocfs2_recover_from_dlm_error(lockres, 1);
904 } 1000 }
905 1001
906 mlog(0, "lock %s, successfull return from dlmlock\n", lockres->l_name); 1002 mlog(0, "lock %s, return from ocfs2_dlm_lock\n", lockres->l_name);
907 1003
908bail: 1004bail:
909 mlog_exit(ret); 1005 mlog_exit(ret);
@@ -1016,21 +1112,22 @@ static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw,
1016static int ocfs2_cluster_lock(struct ocfs2_super *osb, 1112static int ocfs2_cluster_lock(struct ocfs2_super *osb,
1017 struct ocfs2_lock_res *lockres, 1113 struct ocfs2_lock_res *lockres,
1018 int level, 1114 int level,
1019 int lkm_flags, 1115 u32 lkm_flags,
1020 int arg_flags) 1116 int arg_flags)
1021{ 1117{
1022 struct ocfs2_mask_waiter mw; 1118 struct ocfs2_mask_waiter mw;
1023 enum dlm_status status;
1024 int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR); 1119 int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR);
1025 int ret = 0; /* gcc doesn't realize wait = 1 guarantees ret is set */ 1120 int ret = 0; /* gcc doesn't realize wait = 1 guarantees ret is set */
1026 unsigned long flags; 1121 unsigned long flags;
1122 unsigned int gen;
1123 int noqueue_attempted = 0;
1027 1124
1028 mlog_entry_void(); 1125 mlog_entry_void();
1029 1126
1030 ocfs2_init_mask_waiter(&mw); 1127 ocfs2_init_mask_waiter(&mw);
1031 1128
1032 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) 1129 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
1033 lkm_flags |= LKM_VALBLK; 1130 lkm_flags |= DLM_LKF_VALBLK;
1034 1131
1035again: 1132again:
1036 wait = 0; 1133 wait = 0;
@@ -1068,52 +1165,56 @@ again:
1068 } 1165 }
1069 1166
1070 if (level > lockres->l_level) { 1167 if (level > lockres->l_level) {
1168 if (noqueue_attempted > 0) {
1169 ret = -EAGAIN;
1170 goto unlock;
1171 }
1172 if (lkm_flags & DLM_LKF_NOQUEUE)
1173 noqueue_attempted = 1;
1174
1071 if (lockres->l_action != OCFS2_AST_INVALID) 1175 if (lockres->l_action != OCFS2_AST_INVALID)
1072 mlog(ML_ERROR, "lockres %s has action %u pending\n", 1176 mlog(ML_ERROR, "lockres %s has action %u pending\n",
1073 lockres->l_name, lockres->l_action); 1177 lockres->l_name, lockres->l_action);
1074 1178
1075 if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { 1179 if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {
1076 lockres->l_action = OCFS2_AST_ATTACH; 1180 lockres->l_action = OCFS2_AST_ATTACH;
1077 lkm_flags &= ~LKM_CONVERT; 1181 lkm_flags &= ~DLM_LKF_CONVERT;
1078 } else { 1182 } else {
1079 lockres->l_action = OCFS2_AST_CONVERT; 1183 lockres->l_action = OCFS2_AST_CONVERT;
1080 lkm_flags |= LKM_CONVERT; 1184 lkm_flags |= DLM_LKF_CONVERT;
1081 } 1185 }
1082 1186
1083 lockres->l_requested = level; 1187 lockres->l_requested = level;
1084 lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 1188 lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
1189 gen = lockres_set_pending(lockres);
1085 spin_unlock_irqrestore(&lockres->l_lock, flags); 1190 spin_unlock_irqrestore(&lockres->l_lock, flags);
1086 1191
1087 BUG_ON(level == LKM_IVMODE); 1192 BUG_ON(level == DLM_LOCK_IV);
1088 BUG_ON(level == LKM_NLMODE); 1193 BUG_ON(level == DLM_LOCK_NL);
1089 1194
1090 mlog(0, "lock %s, convert from %d to level = %d\n", 1195 mlog(0, "lock %s, convert from %d to level = %d\n",
1091 lockres->l_name, lockres->l_level, level); 1196 lockres->l_name, lockres->l_level, level);
1092 1197
1093 /* call dlm_lock to upgrade lock now */ 1198 /* call dlm_lock to upgrade lock now */
1094 status = dlmlock(osb->dlm, 1199 ret = ocfs2_dlm_lock(osb->cconn,
1095 level, 1200 level,
1096 &lockres->l_lksb, 1201 &lockres->l_lksb,
1097 lkm_flags, 1202 lkm_flags,
1098 lockres->l_name, 1203 lockres->l_name,
1099 OCFS2_LOCK_ID_MAX_LEN - 1, 1204 OCFS2_LOCK_ID_MAX_LEN - 1,
1100 ocfs2_locking_ast, 1205 lockres);
1101 lockres, 1206 lockres_clear_pending(lockres, gen, osb);
1102 ocfs2_blocking_ast); 1207 if (ret) {
1103 if (status != DLM_NORMAL) { 1208 if (!(lkm_flags & DLM_LKF_NOQUEUE) ||
1104 if ((lkm_flags & LKM_NOQUEUE) && 1209 (ret != -EAGAIN)) {
1105 (status == DLM_NOTQUEUED)) 1210 ocfs2_log_dlm_error("ocfs2_dlm_lock",
1106 ret = -EAGAIN; 1211 ret, lockres);
1107 else {
1108 ocfs2_log_dlm_error("dlmlock", status,
1109 lockres);
1110 ret = -EINVAL;
1111 } 1212 }
1112 ocfs2_recover_from_dlm_error(lockres, 1); 1213 ocfs2_recover_from_dlm_error(lockres, 1);
1113 goto out; 1214 goto out;
1114 } 1215 }
1115 1216
1116 mlog(0, "lock %s, successfull return from dlmlock\n", 1217 mlog(0, "lock %s, successfull return from ocfs2_dlm_lock\n",
1117 lockres->l_name); 1218 lockres->l_name);
1118 1219
1119 /* At this point we've gone inside the dlm and need to 1220 /* At this point we've gone inside the dlm and need to
@@ -1177,9 +1278,9 @@ static int ocfs2_create_new_lock(struct ocfs2_super *osb,
1177 int ex, 1278 int ex,
1178 int local) 1279 int local)
1179{ 1280{
1180 int level = ex ? LKM_EXMODE : LKM_PRMODE; 1281 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
1181 unsigned long flags; 1282 unsigned long flags;
1182 int lkm_flags = local ? LKM_LOCAL : 0; 1283 u32 lkm_flags = local ? DLM_LKF_LOCAL : 0;
1183 1284
1184 spin_lock_irqsave(&lockres->l_lock, flags); 1285 spin_lock_irqsave(&lockres->l_lock, flags);
1185 BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); 1286 BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED);
@@ -1222,7 +1323,7 @@ int ocfs2_create_new_inode_locks(struct inode *inode)
1222 } 1323 }
1223 1324
1224 /* 1325 /*
1225 * We don't want to use LKM_LOCAL on a meta data lock as they 1326 * We don't want to use DLM_LKF_LOCAL on a meta data lock as they
1226 * don't use a generation in their lock names. 1327 * don't use a generation in their lock names.
1227 */ 1328 */
1228 ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_inode_lockres, 1, 0); 1329 ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_inode_lockres, 1, 0);
@@ -1261,7 +1362,7 @@ int ocfs2_rw_lock(struct inode *inode, int write)
1261 1362
1262 lockres = &OCFS2_I(inode)->ip_rw_lockres; 1363 lockres = &OCFS2_I(inode)->ip_rw_lockres;
1263 1364
1264 level = write ? LKM_EXMODE : LKM_PRMODE; 1365 level = write ? DLM_LOCK_EX : DLM_LOCK_PR;
1265 1366
1266 status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, level, 0, 1367 status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, level, 0,
1267 0); 1368 0);
@@ -1274,7 +1375,7 @@ int ocfs2_rw_lock(struct inode *inode, int write)
1274 1375
1275void ocfs2_rw_unlock(struct inode *inode, int write) 1376void ocfs2_rw_unlock(struct inode *inode, int write)
1276{ 1377{
1277 int level = write ? LKM_EXMODE : LKM_PRMODE; 1378 int level = write ? DLM_LOCK_EX : DLM_LOCK_PR;
1278 struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_rw_lockres; 1379 struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_rw_lockres;
1279 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1380 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1280 1381
@@ -1312,7 +1413,7 @@ int ocfs2_open_lock(struct inode *inode)
1312 lockres = &OCFS2_I(inode)->ip_open_lockres; 1413 lockres = &OCFS2_I(inode)->ip_open_lockres;
1313 1414
1314 status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, 1415 status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres,
1315 LKM_PRMODE, 0, 0); 1416 DLM_LOCK_PR, 0, 0);
1316 if (status < 0) 1417 if (status < 0)
1317 mlog_errno(status); 1418 mlog_errno(status);
1318 1419
@@ -1340,16 +1441,16 @@ int ocfs2_try_open_lock(struct inode *inode, int write)
1340 1441
1341 lockres = &OCFS2_I(inode)->ip_open_lockres; 1442 lockres = &OCFS2_I(inode)->ip_open_lockres;
1342 1443
1343 level = write ? LKM_EXMODE : LKM_PRMODE; 1444 level = write ? DLM_LOCK_EX : DLM_LOCK_PR;
1344 1445
1345 /* 1446 /*
1346 * The file system may already holding a PRMODE/EXMODE open lock. 1447 * The file system may already holding a PRMODE/EXMODE open lock.
1347 * Since we pass LKM_NOQUEUE, the request won't block waiting on 1448 * Since we pass DLM_LKF_NOQUEUE, the request won't block waiting on
1348 * other nodes and the -EAGAIN will indicate to the caller that 1449 * other nodes and the -EAGAIN will indicate to the caller that
1349 * this inode is still in use. 1450 * this inode is still in use.
1350 */ 1451 */
1351 status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, 1452 status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres,
1352 level, LKM_NOQUEUE, 0); 1453 level, DLM_LKF_NOQUEUE, 0);
1353 1454
1354out: 1455out:
1355 mlog_exit(status); 1456 mlog_exit(status);
@@ -1374,10 +1475,10 @@ void ocfs2_open_unlock(struct inode *inode)
1374 1475
1375 if(lockres->l_ro_holders) 1476 if(lockres->l_ro_holders)
1376 ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, 1477 ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres,
1377 LKM_PRMODE); 1478 DLM_LOCK_PR);
1378 if(lockres->l_ex_holders) 1479 if(lockres->l_ex_holders)
1379 ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, 1480 ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres,
1380 LKM_EXMODE); 1481 DLM_LOCK_EX);
1381 1482
1382out: 1483out:
1383 mlog_exit_void(); 1484 mlog_exit_void();
@@ -1464,7 +1565,7 @@ int ocfs2_file_lock(struct file *file, int ex, int trylock)
1464 ocfs2_init_mask_waiter(&mw); 1565 ocfs2_init_mask_waiter(&mw);
1465 1566
1466 if ((lockres->l_flags & OCFS2_LOCK_BUSY) || 1567 if ((lockres->l_flags & OCFS2_LOCK_BUSY) ||
1467 (lockres->l_level > LKM_NLMODE)) { 1568 (lockres->l_level > DLM_LOCK_NL)) {
1468 mlog(ML_ERROR, 1569 mlog(ML_ERROR,
1469 "File lock \"%s\" has busy or locked state: flags: 0x%lx, " 1570 "File lock \"%s\" has busy or locked state: flags: 0x%lx, "
1470 "level: %u\n", lockres->l_name, lockres->l_flags, 1571 "level: %u\n", lockres->l_name, lockres->l_flags,
@@ -1503,14 +1604,12 @@ int ocfs2_file_lock(struct file *file, int ex, int trylock)
1503 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1604 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1504 spin_unlock_irqrestore(&lockres->l_lock, flags); 1605 spin_unlock_irqrestore(&lockres->l_lock, flags);
1505 1606
1506 ret = dlmlock(osb->dlm, level, &lockres->l_lksb, lkm_flags, 1607 ret = ocfs2_dlm_lock(osb->cconn, level, &lockres->l_lksb, lkm_flags,
1507 lockres->l_name, OCFS2_LOCK_ID_MAX_LEN - 1, 1608 lockres->l_name, OCFS2_LOCK_ID_MAX_LEN - 1,
1508 ocfs2_locking_ast, lockres, ocfs2_blocking_ast); 1609 lockres);
1509 if (ret != DLM_NORMAL) { 1610 if (ret) {
1510 if (trylock && ret == DLM_NOTQUEUED) 1611 if (!trylock || (ret != -EAGAIN)) {
1511 ret = -EAGAIN; 1612 ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres);
1512 else {
1513 ocfs2_log_dlm_error("dlmlock", ret, lockres);
1514 ret = -EINVAL; 1613 ret = -EINVAL;
1515 } 1614 }
1516 1615
@@ -1537,6 +1636,10 @@ int ocfs2_file_lock(struct file *file, int ex, int trylock)
1537 * to just bubble sucess back up to the user. 1636 * to just bubble sucess back up to the user.
1538 */ 1637 */
1539 ret = ocfs2_flock_handle_signal(lockres, level); 1638 ret = ocfs2_flock_handle_signal(lockres, level);
1639 } else if (!ret && (level > lockres->l_level)) {
1640 /* Trylock failed asynchronously */
1641 BUG_ON(!trylock);
1642 ret = -EAGAIN;
1540 } 1643 }
1541 1644
1542out: 1645out:
@@ -1549,6 +1652,7 @@ out:
1549void ocfs2_file_unlock(struct file *file) 1652void ocfs2_file_unlock(struct file *file)
1550{ 1653{
1551 int ret; 1654 int ret;
1655 unsigned int gen;
1552 unsigned long flags; 1656 unsigned long flags;
1553 struct ocfs2_file_private *fp = file->private_data; 1657 struct ocfs2_file_private *fp = file->private_data;
1554 struct ocfs2_lock_res *lockres = &fp->fp_flock; 1658 struct ocfs2_lock_res *lockres = &fp->fp_flock;
@@ -1572,13 +1676,13 @@ void ocfs2_file_unlock(struct file *file)
1572 * Fake a blocking ast for the downconvert code. 1676 * Fake a blocking ast for the downconvert code.
1573 */ 1677 */
1574 lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); 1678 lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED);
1575 lockres->l_blocking = LKM_EXMODE; 1679 lockres->l_blocking = DLM_LOCK_EX;
1576 1680
1577 ocfs2_prepare_downconvert(lockres, LKM_NLMODE); 1681 gen = ocfs2_prepare_downconvert(lockres, LKM_NLMODE);
1578 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1682 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1579 spin_unlock_irqrestore(&lockres->l_lock, flags); 1683 spin_unlock_irqrestore(&lockres->l_lock, flags);
1580 1684
1581 ret = ocfs2_downconvert_lock(osb, lockres, LKM_NLMODE, 0); 1685 ret = ocfs2_downconvert_lock(osb, lockres, LKM_NLMODE, 0, gen);
1582 if (ret) { 1686 if (ret) {
1583 mlog_errno(ret); 1687 mlog_errno(ret);
1584 return; 1688 return;
@@ -1601,11 +1705,11 @@ static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
1601 * condition. */ 1705 * condition. */
1602 if (lockres->l_flags & OCFS2_LOCK_BLOCKED) { 1706 if (lockres->l_flags & OCFS2_LOCK_BLOCKED) {
1603 switch(lockres->l_blocking) { 1707 switch(lockres->l_blocking) {
1604 case LKM_EXMODE: 1708 case DLM_LOCK_EX:
1605 if (!lockres->l_ex_holders && !lockres->l_ro_holders) 1709 if (!lockres->l_ex_holders && !lockres->l_ro_holders)
1606 kick = 1; 1710 kick = 1;
1607 break; 1711 break;
1608 case LKM_PRMODE: 1712 case DLM_LOCK_PR:
1609 if (!lockres->l_ex_holders) 1713 if (!lockres->l_ex_holders)
1610 kick = 1; 1714 kick = 1;
1611 break; 1715 break;
@@ -1648,7 +1752,7 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
1648 1752
1649 mlog_entry_void(); 1753 mlog_entry_void();
1650 1754
1651 lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb; 1755 lvb = (struct ocfs2_meta_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb);
1652 1756
1653 /* 1757 /*
1654 * Invalidate the LVB of a deleted inode - this way other 1758 * Invalidate the LVB of a deleted inode - this way other
@@ -1700,7 +1804,7 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
1700 1804
1701 mlog_meta_lvb(0, lockres); 1805 mlog_meta_lvb(0, lockres);
1702 1806
1703 lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb; 1807 lvb = (struct ocfs2_meta_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb);
1704 1808
1705 /* We're safe here without the lockres lock... */ 1809 /* We're safe here without the lockres lock... */
1706 spin_lock(&oi->ip_lock); 1810 spin_lock(&oi->ip_lock);
@@ -1735,7 +1839,8 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
1735static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode, 1839static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode,
1736 struct ocfs2_lock_res *lockres) 1840 struct ocfs2_lock_res *lockres)
1737{ 1841{
1738 struct ocfs2_meta_lvb *lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb; 1842 struct ocfs2_meta_lvb *lvb =
1843 (struct ocfs2_meta_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb);
1739 1844
1740 if (lvb->lvb_version == OCFS2_LVB_VERSION 1845 if (lvb->lvb_version == OCFS2_LVB_VERSION
1741 && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation) 1846 && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation)
@@ -1923,7 +2028,8 @@ int ocfs2_inode_lock_full(struct inode *inode,
1923 int ex, 2028 int ex,
1924 int arg_flags) 2029 int arg_flags)
1925{ 2030{
1926 int status, level, dlm_flags, acquired; 2031 int status, level, acquired;
2032 u32 dlm_flags;
1927 struct ocfs2_lock_res *lockres = NULL; 2033 struct ocfs2_lock_res *lockres = NULL;
1928 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2034 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1929 struct buffer_head *local_bh = NULL; 2035 struct buffer_head *local_bh = NULL;
@@ -1950,14 +2056,13 @@ int ocfs2_inode_lock_full(struct inode *inode,
1950 goto local; 2056 goto local;
1951 2057
1952 if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) 2058 if (!(arg_flags & OCFS2_META_LOCK_RECOVERY))
1953 wait_event(osb->recovery_event, 2059 ocfs2_wait_for_recovery(osb);
1954 ocfs2_node_map_is_empty(osb, &osb->recovery_map));
1955 2060
1956 lockres = &OCFS2_I(inode)->ip_inode_lockres; 2061 lockres = &OCFS2_I(inode)->ip_inode_lockres;
1957 level = ex ? LKM_EXMODE : LKM_PRMODE; 2062 level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
1958 dlm_flags = 0; 2063 dlm_flags = 0;
1959 if (arg_flags & OCFS2_META_LOCK_NOQUEUE) 2064 if (arg_flags & OCFS2_META_LOCK_NOQUEUE)
1960 dlm_flags |= LKM_NOQUEUE; 2065 dlm_flags |= DLM_LKF_NOQUEUE;
1961 2066
1962 status = ocfs2_cluster_lock(osb, lockres, level, dlm_flags, arg_flags); 2067 status = ocfs2_cluster_lock(osb, lockres, level, dlm_flags, arg_flags);
1963 if (status < 0) { 2068 if (status < 0) {
@@ -1974,8 +2079,7 @@ int ocfs2_inode_lock_full(struct inode *inode,
1974 * committed to owning this lock so we don't allow signals to 2079 * committed to owning this lock so we don't allow signals to
1975 * abort the operation. */ 2080 * abort the operation. */
1976 if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) 2081 if (!(arg_flags & OCFS2_META_LOCK_RECOVERY))
1977 wait_event(osb->recovery_event, 2082 ocfs2_wait_for_recovery(osb);
1978 ocfs2_node_map_is_empty(osb, &osb->recovery_map));
1979 2083
1980local: 2084local:
1981 /* 2085 /*
@@ -2109,7 +2213,7 @@ int ocfs2_inode_lock_atime(struct inode *inode,
2109void ocfs2_inode_unlock(struct inode *inode, 2213void ocfs2_inode_unlock(struct inode *inode,
2110 int ex) 2214 int ex)
2111{ 2215{
2112 int level = ex ? LKM_EXMODE : LKM_PRMODE; 2216 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2113 struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_inode_lockres; 2217 struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_inode_lockres;
2114 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2218 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2115 2219
@@ -2130,10 +2234,8 @@ int ocfs2_super_lock(struct ocfs2_super *osb,
2130 int ex) 2234 int ex)
2131{ 2235{
2132 int status = 0; 2236 int status = 0;
2133 int level = ex ? LKM_EXMODE : LKM_PRMODE; 2237 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2134 struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; 2238 struct ocfs2_lock_res *lockres = &osb->osb_super_lockres;
2135 struct buffer_head *bh;
2136 struct ocfs2_slot_info *si = osb->slot_info;
2137 2239
2138 mlog_entry_void(); 2240 mlog_entry_void();
2139 2241
@@ -2159,11 +2261,7 @@ int ocfs2_super_lock(struct ocfs2_super *osb,
2159 goto bail; 2261 goto bail;
2160 } 2262 }
2161 if (status) { 2263 if (status) {
2162 bh = si->si_bh; 2264 status = ocfs2_refresh_slot_info(osb);
2163 status = ocfs2_read_block(osb, bh->b_blocknr, &bh, 0,
2164 si->si_inode);
2165 if (status == 0)
2166 ocfs2_update_slot_info(si);
2167 2265
2168 ocfs2_complete_lock_res_refresh(lockres, status); 2266 ocfs2_complete_lock_res_refresh(lockres, status);
2169 2267
@@ -2178,7 +2276,7 @@ bail:
2178void ocfs2_super_unlock(struct ocfs2_super *osb, 2276void ocfs2_super_unlock(struct ocfs2_super *osb,
2179 int ex) 2277 int ex)
2180{ 2278{
2181 int level = ex ? LKM_EXMODE : LKM_PRMODE; 2279 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2182 struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; 2280 struct ocfs2_lock_res *lockres = &osb->osb_super_lockres;
2183 2281
2184 if (!ocfs2_mount_local(osb)) 2282 if (!ocfs2_mount_local(osb))
@@ -2196,7 +2294,7 @@ int ocfs2_rename_lock(struct ocfs2_super *osb)
2196 if (ocfs2_mount_local(osb)) 2294 if (ocfs2_mount_local(osb))
2197 return 0; 2295 return 0;
2198 2296
2199 status = ocfs2_cluster_lock(osb, lockres, LKM_EXMODE, 0, 0); 2297 status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0);
2200 if (status < 0) 2298 if (status < 0)
2201 mlog_errno(status); 2299 mlog_errno(status);
2202 2300
@@ -2208,13 +2306,13 @@ void ocfs2_rename_unlock(struct ocfs2_super *osb)
2208 struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres; 2306 struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres;
2209 2307
2210 if (!ocfs2_mount_local(osb)) 2308 if (!ocfs2_mount_local(osb))
2211 ocfs2_cluster_unlock(osb, lockres, LKM_EXMODE); 2309 ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX);
2212} 2310}
2213 2311
2214int ocfs2_dentry_lock(struct dentry *dentry, int ex) 2312int ocfs2_dentry_lock(struct dentry *dentry, int ex)
2215{ 2313{
2216 int ret; 2314 int ret;
2217 int level = ex ? LKM_EXMODE : LKM_PRMODE; 2315 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2218 struct ocfs2_dentry_lock *dl = dentry->d_fsdata; 2316 struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
2219 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); 2317 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
2220 2318
@@ -2235,7 +2333,7 @@ int ocfs2_dentry_lock(struct dentry *dentry, int ex)
2235 2333
2236void ocfs2_dentry_unlock(struct dentry *dentry, int ex) 2334void ocfs2_dentry_unlock(struct dentry *dentry, int ex)
2237{ 2335{
2238 int level = ex ? LKM_EXMODE : LKM_PRMODE; 2336 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2239 struct ocfs2_dentry_lock *dl = dentry->d_fsdata; 2337 struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
2240 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); 2338 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
2241 2339
@@ -2400,7 +2498,7 @@ static int ocfs2_dlm_seq_show(struct seq_file *m, void *v)
2400 lockres->l_blocking); 2498 lockres->l_blocking);
2401 2499
2402 /* Dump the raw LVB */ 2500 /* Dump the raw LVB */
2403 lvb = lockres->l_lksb.lvb; 2501 lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
2404 for(i = 0; i < DLM_LVB_LEN; i++) 2502 for(i = 0; i < DLM_LVB_LEN; i++)
2405 seq_printf(m, "0x%x\t", lvb[i]); 2503 seq_printf(m, "0x%x\t", lvb[i]);
2406 2504
@@ -2409,7 +2507,7 @@ static int ocfs2_dlm_seq_show(struct seq_file *m, void *v)
2409 return 0; 2507 return 0;
2410} 2508}
2411 2509
2412static struct seq_operations ocfs2_dlm_seq_ops = { 2510static const struct seq_operations ocfs2_dlm_seq_ops = {
2413 .start = ocfs2_dlm_seq_start, 2511 .start = ocfs2_dlm_seq_start,
2414 .stop = ocfs2_dlm_seq_stop, 2512 .stop = ocfs2_dlm_seq_stop,
2415 .next = ocfs2_dlm_seq_next, 2513 .next = ocfs2_dlm_seq_next,
@@ -2504,13 +2602,14 @@ static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb)
2504int ocfs2_dlm_init(struct ocfs2_super *osb) 2602int ocfs2_dlm_init(struct ocfs2_super *osb)
2505{ 2603{
2506 int status = 0; 2604 int status = 0;
2507 u32 dlm_key; 2605 struct ocfs2_cluster_connection *conn = NULL;
2508 struct dlm_ctxt *dlm = NULL;
2509 2606
2510 mlog_entry_void(); 2607 mlog_entry_void();
2511 2608
2512 if (ocfs2_mount_local(osb)) 2609 if (ocfs2_mount_local(osb)) {
2610 osb->node_num = 0;
2513 goto local; 2611 goto local;
2612 }
2514 2613
2515 status = ocfs2_dlm_init_debug(osb); 2614 status = ocfs2_dlm_init_debug(osb);
2516 if (status < 0) { 2615 if (status < 0) {
@@ -2527,26 +2626,31 @@ int ocfs2_dlm_init(struct ocfs2_super *osb)
2527 goto bail; 2626 goto bail;
2528 } 2627 }
2529 2628
2530 /* used by the dlm code to make message headers unique, each
2531 * node in this domain must agree on this. */
2532 dlm_key = crc32_le(0, osb->uuid_str, strlen(osb->uuid_str));
2533
2534 /* for now, uuid == domain */ 2629 /* for now, uuid == domain */
2535 dlm = dlm_register_domain(osb->uuid_str, dlm_key, 2630 status = ocfs2_cluster_connect(osb->osb_cluster_stack,
2536 &osb->osb_locking_proto); 2631 osb->uuid_str,
2537 if (IS_ERR(dlm)) { 2632 strlen(osb->uuid_str),
2538 status = PTR_ERR(dlm); 2633 ocfs2_do_node_down, osb,
2634 &conn);
2635 if (status) {
2539 mlog_errno(status); 2636 mlog_errno(status);
2540 goto bail; 2637 goto bail;
2541 } 2638 }
2542 2639
2543 dlm_register_eviction_cb(dlm, &osb->osb_eviction_cb); 2640 status = ocfs2_cluster_this_node(&osb->node_num);
2641 if (status < 0) {
2642 mlog_errno(status);
2643 mlog(ML_ERROR,
2644 "could not find this host's node number\n");
2645 ocfs2_cluster_disconnect(conn, 0);
2646 goto bail;
2647 }
2544 2648
2545local: 2649local:
2546 ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb); 2650 ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb);
2547 ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb); 2651 ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb);
2548 2652
2549 osb->dlm = dlm; 2653 osb->cconn = conn;
2550 2654
2551 status = 0; 2655 status = 0;
2552bail: 2656bail:
@@ -2560,14 +2664,19 @@ bail:
2560 return status; 2664 return status;
2561} 2665}
2562 2666
2563void ocfs2_dlm_shutdown(struct ocfs2_super *osb) 2667void ocfs2_dlm_shutdown(struct ocfs2_super *osb,
2668 int hangup_pending)
2564{ 2669{
2565 mlog_entry_void(); 2670 mlog_entry_void();
2566 2671
2567 dlm_unregister_eviction_cb(&osb->osb_eviction_cb);
2568
2569 ocfs2_drop_osb_locks(osb); 2672 ocfs2_drop_osb_locks(osb);
2570 2673
2674 /*
2675 * Now that we have dropped all locks and ocfs2_dismount_volume()
2676 * has disabled recovery, the DLM won't be talking to us. It's
2677 * safe to tear things down before disconnecting the cluster.
2678 */
2679
2571 if (osb->dc_task) { 2680 if (osb->dc_task) {
2572 kthread_stop(osb->dc_task); 2681 kthread_stop(osb->dc_task);
2573 osb->dc_task = NULL; 2682 osb->dc_task = NULL;
@@ -2576,15 +2685,15 @@ void ocfs2_dlm_shutdown(struct ocfs2_super *osb)
2576 ocfs2_lock_res_free(&osb->osb_super_lockres); 2685 ocfs2_lock_res_free(&osb->osb_super_lockres);
2577 ocfs2_lock_res_free(&osb->osb_rename_lockres); 2686 ocfs2_lock_res_free(&osb->osb_rename_lockres);
2578 2687
2579 dlm_unregister_domain(osb->dlm); 2688 ocfs2_cluster_disconnect(osb->cconn, hangup_pending);
2580 osb->dlm = NULL; 2689 osb->cconn = NULL;
2581 2690
2582 ocfs2_dlm_shutdown_debug(osb); 2691 ocfs2_dlm_shutdown_debug(osb);
2583 2692
2584 mlog_exit_void(); 2693 mlog_exit_void();
2585} 2694}
2586 2695
2587static void ocfs2_unlock_ast(void *opaque, enum dlm_status status) 2696static void ocfs2_unlock_ast(void *opaque, int error)
2588{ 2697{
2589 struct ocfs2_lock_res *lockres = opaque; 2698 struct ocfs2_lock_res *lockres = opaque;
2590 unsigned long flags; 2699 unsigned long flags;
@@ -2595,24 +2704,9 @@ static void ocfs2_unlock_ast(void *opaque, enum dlm_status status)
2595 lockres->l_unlock_action); 2704 lockres->l_unlock_action);
2596 2705
2597 spin_lock_irqsave(&lockres->l_lock, flags); 2706 spin_lock_irqsave(&lockres->l_lock, flags);
2598 /* We tried to cancel a convert request, but it was already 2707 if (error) {
2599 * granted. All we want to do here is clear our unlock 2708 mlog(ML_ERROR, "Dlm passes error %d for lock %s, "
2600 * state. The wake_up call done at the bottom is redundant 2709 "unlock_action %d\n", error, lockres->l_name,
2601 * (ocfs2_prepare_cancel_convert doesn't sleep on this) but doesn't
2602 * hurt anything anyway */
2603 if (status == DLM_CANCELGRANT &&
2604 lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) {
2605 mlog(0, "Got cancelgrant for %s\n", lockres->l_name);
2606
2607 /* We don't clear the busy flag in this case as it
2608 * should have been cleared by the ast which the dlm
2609 * has called. */
2610 goto complete_unlock;
2611 }
2612
2613 if (status != DLM_NORMAL) {
2614 mlog(ML_ERROR, "Dlm passes status %d for lock %s, "
2615 "unlock_action %d\n", status, lockres->l_name,
2616 lockres->l_unlock_action); 2710 lockres->l_unlock_action);
2617 spin_unlock_irqrestore(&lockres->l_lock, flags); 2711 spin_unlock_irqrestore(&lockres->l_lock, flags);
2618 return; 2712 return;
@@ -2624,14 +2718,13 @@ static void ocfs2_unlock_ast(void *opaque, enum dlm_status status)
2624 lockres->l_action = OCFS2_AST_INVALID; 2718 lockres->l_action = OCFS2_AST_INVALID;
2625 break; 2719 break;
2626 case OCFS2_UNLOCK_DROP_LOCK: 2720 case OCFS2_UNLOCK_DROP_LOCK:
2627 lockres->l_level = LKM_IVMODE; 2721 lockres->l_level = DLM_LOCK_IV;
2628 break; 2722 break;
2629 default: 2723 default:
2630 BUG(); 2724 BUG();
2631 } 2725 }
2632 2726
2633 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 2727 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
2634complete_unlock:
2635 lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; 2728 lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
2636 spin_unlock_irqrestore(&lockres->l_lock, flags); 2729 spin_unlock_irqrestore(&lockres->l_lock, flags);
2637 2730
@@ -2643,16 +2736,16 @@ complete_unlock:
2643static int ocfs2_drop_lock(struct ocfs2_super *osb, 2736static int ocfs2_drop_lock(struct ocfs2_super *osb,
2644 struct ocfs2_lock_res *lockres) 2737 struct ocfs2_lock_res *lockres)
2645{ 2738{
2646 enum dlm_status status; 2739 int ret;
2647 unsigned long flags; 2740 unsigned long flags;
2648 int lkm_flags = 0; 2741 u32 lkm_flags = 0;
2649 2742
2650 /* We didn't get anywhere near actually using this lockres. */ 2743 /* We didn't get anywhere near actually using this lockres. */
2651 if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) 2744 if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED))
2652 goto out; 2745 goto out;
2653 2746
2654 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) 2747 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
2655 lkm_flags |= LKM_VALBLK; 2748 lkm_flags |= DLM_LKF_VALBLK;
2656 2749
2657 spin_lock_irqsave(&lockres->l_lock, flags); 2750 spin_lock_irqsave(&lockres->l_lock, flags);
2658 2751
@@ -2678,7 +2771,7 @@ static int ocfs2_drop_lock(struct ocfs2_super *osb,
2678 2771
2679 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) { 2772 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) {
2680 if (lockres->l_flags & OCFS2_LOCK_ATTACHED && 2773 if (lockres->l_flags & OCFS2_LOCK_ATTACHED &&
2681 lockres->l_level == LKM_EXMODE && 2774 lockres->l_level == DLM_LOCK_EX &&
2682 !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) 2775 !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH))
2683 lockres->l_ops->set_lvb(lockres); 2776 lockres->l_ops->set_lvb(lockres);
2684 } 2777 }
@@ -2707,15 +2800,15 @@ static int ocfs2_drop_lock(struct ocfs2_super *osb,
2707 2800
2708 mlog(0, "lock %s\n", lockres->l_name); 2801 mlog(0, "lock %s\n", lockres->l_name);
2709 2802
2710 status = dlmunlock(osb->dlm, &lockres->l_lksb, lkm_flags, 2803 ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, lkm_flags,
2711 ocfs2_unlock_ast, lockres); 2804 lockres);
2712 if (status != DLM_NORMAL) { 2805 if (ret) {
2713 ocfs2_log_dlm_error("dlmunlock", status, lockres); 2806 ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres);
2714 mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags); 2807 mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags);
2715 dlm_print_one_lock(lockres->l_lksb.lockid); 2808 ocfs2_dlm_dump_lksb(&lockres->l_lksb);
2716 BUG(); 2809 BUG();
2717 } 2810 }
2718 mlog(0, "lock %s, successfull return from dlmunlock\n", 2811 mlog(0, "lock %s, successfull return from ocfs2_dlm_unlock\n",
2719 lockres->l_name); 2812 lockres->l_name);
2720 2813
2721 ocfs2_wait_on_busy_lock(lockres); 2814 ocfs2_wait_on_busy_lock(lockres);
@@ -2806,15 +2899,15 @@ int ocfs2_drop_inode_locks(struct inode *inode)
2806 return status; 2899 return status;
2807} 2900}
2808 2901
2809static void ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, 2902static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres,
2810 int new_level) 2903 int new_level)
2811{ 2904{
2812 assert_spin_locked(&lockres->l_lock); 2905 assert_spin_locked(&lockres->l_lock);
2813 2906
2814 BUG_ON(lockres->l_blocking <= LKM_NLMODE); 2907 BUG_ON(lockres->l_blocking <= DLM_LOCK_NL);
2815 2908
2816 if (lockres->l_level <= new_level) { 2909 if (lockres->l_level <= new_level) {
2817 mlog(ML_ERROR, "lockres->l_level (%u) <= new_level (%u)\n", 2910 mlog(ML_ERROR, "lockres->l_level (%d) <= new_level (%d)\n",
2818 lockres->l_level, new_level); 2911 lockres->l_level, new_level);
2819 BUG(); 2912 BUG();
2820 } 2913 }
@@ -2825,33 +2918,33 @@ static void ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres,
2825 lockres->l_action = OCFS2_AST_DOWNCONVERT; 2918 lockres->l_action = OCFS2_AST_DOWNCONVERT;
2826 lockres->l_requested = new_level; 2919 lockres->l_requested = new_level;
2827 lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 2920 lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
2921 return lockres_set_pending(lockres);
2828} 2922}
2829 2923
2830static int ocfs2_downconvert_lock(struct ocfs2_super *osb, 2924static int ocfs2_downconvert_lock(struct ocfs2_super *osb,
2831 struct ocfs2_lock_res *lockres, 2925 struct ocfs2_lock_res *lockres,
2832 int new_level, 2926 int new_level,
2833 int lvb) 2927 int lvb,
2928 unsigned int generation)
2834{ 2929{
2835 int ret, dlm_flags = LKM_CONVERT; 2930 int ret;
2836 enum dlm_status status; 2931 u32 dlm_flags = DLM_LKF_CONVERT;
2837 2932
2838 mlog_entry_void(); 2933 mlog_entry_void();
2839 2934
2840 if (lvb) 2935 if (lvb)
2841 dlm_flags |= LKM_VALBLK; 2936 dlm_flags |= DLM_LKF_VALBLK;
2842 2937
2843 status = dlmlock(osb->dlm, 2938 ret = ocfs2_dlm_lock(osb->cconn,
2844 new_level, 2939 new_level,
2845 &lockres->l_lksb, 2940 &lockres->l_lksb,
2846 dlm_flags, 2941 dlm_flags,
2847 lockres->l_name, 2942 lockres->l_name,
2848 OCFS2_LOCK_ID_MAX_LEN - 1, 2943 OCFS2_LOCK_ID_MAX_LEN - 1,
2849 ocfs2_locking_ast, 2944 lockres);
2850 lockres, 2945 lockres_clear_pending(lockres, generation, osb);
2851 ocfs2_blocking_ast); 2946 if (ret) {
2852 if (status != DLM_NORMAL) { 2947 ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres);
2853 ocfs2_log_dlm_error("dlmlock", status, lockres);
2854 ret = -EINVAL;
2855 ocfs2_recover_from_dlm_error(lockres, 1); 2948 ocfs2_recover_from_dlm_error(lockres, 1);
2856 goto bail; 2949 goto bail;
2857 } 2950 }
@@ -2862,7 +2955,7 @@ bail:
2862 return ret; 2955 return ret;
2863} 2956}
2864 2957
2865/* returns 1 when the caller should unlock and call dlmunlock */ 2958/* returns 1 when the caller should unlock and call ocfs2_dlm_unlock */
2866static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, 2959static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb,
2867 struct ocfs2_lock_res *lockres) 2960 struct ocfs2_lock_res *lockres)
2868{ 2961{
@@ -2898,24 +2991,18 @@ static int ocfs2_cancel_convert(struct ocfs2_super *osb,
2898 struct ocfs2_lock_res *lockres) 2991 struct ocfs2_lock_res *lockres)
2899{ 2992{
2900 int ret; 2993 int ret;
2901 enum dlm_status status;
2902 2994
2903 mlog_entry_void(); 2995 mlog_entry_void();
2904 mlog(0, "lock %s\n", lockres->l_name); 2996 mlog(0, "lock %s\n", lockres->l_name);
2905 2997
2906 ret = 0; 2998 ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb,
2907 status = dlmunlock(osb->dlm, 2999 DLM_LKF_CANCEL, lockres);
2908 &lockres->l_lksb, 3000 if (ret) {
2909 LKM_CANCEL, 3001 ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres);
2910 ocfs2_unlock_ast,
2911 lockres);
2912 if (status != DLM_NORMAL) {
2913 ocfs2_log_dlm_error("dlmunlock", status, lockres);
2914 ret = -EINVAL;
2915 ocfs2_recover_from_dlm_error(lockres, 0); 3002 ocfs2_recover_from_dlm_error(lockres, 0);
2916 } 3003 }
2917 3004
2918 mlog(0, "lock %s return from dlmunlock\n", lockres->l_name); 3005 mlog(0, "lock %s return from ocfs2_dlm_unlock\n", lockres->l_name);
2919 3006
2920 mlog_exit(ret); 3007 mlog_exit(ret);
2921 return ret; 3008 return ret;
@@ -2930,6 +3017,7 @@ static int ocfs2_unblock_lock(struct ocfs2_super *osb,
2930 int new_level; 3017 int new_level;
2931 int ret = 0; 3018 int ret = 0;
2932 int set_lvb = 0; 3019 int set_lvb = 0;
3020 unsigned int gen;
2933 3021
2934 mlog_entry_void(); 3022 mlog_entry_void();
2935 3023
@@ -2939,6 +3027,32 @@ static int ocfs2_unblock_lock(struct ocfs2_super *osb,
2939 3027
2940recheck: 3028recheck:
2941 if (lockres->l_flags & OCFS2_LOCK_BUSY) { 3029 if (lockres->l_flags & OCFS2_LOCK_BUSY) {
3030 /* XXX
3031 * This is a *big* race. The OCFS2_LOCK_PENDING flag
3032 * exists entirely for one reason - another thread has set
3033 * OCFS2_LOCK_BUSY, but has *NOT* yet called dlm_lock().
3034 *
3035 * If we do ocfs2_cancel_convert() before the other thread
3036 * calls dlm_lock(), our cancel will do nothing. We will
3037 * get no ast, and we will have no way of knowing the
3038 * cancel failed. Meanwhile, the other thread will call
3039 * into dlm_lock() and wait...forever.
3040 *
3041 * Why forever? Because another node has asked for the
3042 * lock first; that's why we're here in unblock_lock().
3043 *
3044 * The solution is OCFS2_LOCK_PENDING. When PENDING is
3045 * set, we just requeue the unblock. Only when the other
3046 * thread has called dlm_lock() and cleared PENDING will
3047 * we then cancel their request.
3048 *
3049 * All callers of dlm_lock() must set OCFS2_DLM_PENDING
3050 * at the same time they set OCFS2_DLM_BUSY. They must
3051 * clear OCFS2_DLM_PENDING after dlm_lock() returns.
3052 */
3053 if (lockres->l_flags & OCFS2_LOCK_PENDING)
3054 goto leave_requeue;
3055
2942 ctl->requeue = 1; 3056 ctl->requeue = 1;
2943 ret = ocfs2_prepare_cancel_convert(osb, lockres); 3057 ret = ocfs2_prepare_cancel_convert(osb, lockres);
2944 spin_unlock_irqrestore(&lockres->l_lock, flags); 3058 spin_unlock_irqrestore(&lockres->l_lock, flags);
@@ -2952,13 +3066,13 @@ recheck:
2952 3066
2953 /* if we're blocking an exclusive and we have *any* holders, 3067 /* if we're blocking an exclusive and we have *any* holders,
2954 * then requeue. */ 3068 * then requeue. */
2955 if ((lockres->l_blocking == LKM_EXMODE) 3069 if ((lockres->l_blocking == DLM_LOCK_EX)
2956 && (lockres->l_ex_holders || lockres->l_ro_holders)) 3070 && (lockres->l_ex_holders || lockres->l_ro_holders))
2957 goto leave_requeue; 3071 goto leave_requeue;
2958 3072
2959 /* If it's a PR we're blocking, then only 3073 /* If it's a PR we're blocking, then only
2960 * requeue if we've got any EX holders */ 3074 * requeue if we've got any EX holders */
2961 if (lockres->l_blocking == LKM_PRMODE && 3075 if (lockres->l_blocking == DLM_LOCK_PR &&
2962 lockres->l_ex_holders) 3076 lockres->l_ex_holders)
2963 goto leave_requeue; 3077 goto leave_requeue;
2964 3078
@@ -3005,7 +3119,7 @@ downconvert:
3005 ctl->requeue = 0; 3119 ctl->requeue = 0;
3006 3120
3007 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) { 3121 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) {
3008 if (lockres->l_level == LKM_EXMODE) 3122 if (lockres->l_level == DLM_LOCK_EX)
3009 set_lvb = 1; 3123 set_lvb = 1;
3010 3124
3011 /* 3125 /*
@@ -3018,9 +3132,11 @@ downconvert:
3018 lockres->l_ops->set_lvb(lockres); 3132 lockres->l_ops->set_lvb(lockres);
3019 } 3133 }
3020 3134
3021 ocfs2_prepare_downconvert(lockres, new_level); 3135 gen = ocfs2_prepare_downconvert(lockres, new_level);
3022 spin_unlock_irqrestore(&lockres->l_lock, flags); 3136 spin_unlock_irqrestore(&lockres->l_lock, flags);
3023 ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb); 3137 ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb,
3138 gen);
3139
3024leave: 3140leave:
3025 mlog_exit(ret); 3141 mlog_exit(ret);
3026 return ret; 3142 return ret;
@@ -3042,7 +3158,7 @@ static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
3042 inode = ocfs2_lock_res_inode(lockres); 3158 inode = ocfs2_lock_res_inode(lockres);
3043 mapping = inode->i_mapping; 3159 mapping = inode->i_mapping;
3044 3160
3045 if (S_ISREG(inode->i_mode)) 3161 if (!S_ISREG(inode->i_mode))
3046 goto out; 3162 goto out;
3047 3163
3048 /* 3164 /*
@@ -3059,7 +3175,7 @@ static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
3059 (unsigned long long)OCFS2_I(inode)->ip_blkno); 3175 (unsigned long long)OCFS2_I(inode)->ip_blkno);
3060 } 3176 }
3061 sync_mapping_buffers(mapping); 3177 sync_mapping_buffers(mapping);
3062 if (blocking == LKM_EXMODE) { 3178 if (blocking == DLM_LOCK_EX) {
3063 truncate_inode_pages(mapping, 0); 3179 truncate_inode_pages(mapping, 0);
3064 } else { 3180 } else {
3065 /* We only need to wait on the I/O if we're not also 3181 /* We only need to wait on the I/O if we're not also
@@ -3080,8 +3196,8 @@ static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
3080 struct inode *inode = ocfs2_lock_res_inode(lockres); 3196 struct inode *inode = ocfs2_lock_res_inode(lockres);
3081 int checkpointed = ocfs2_inode_fully_checkpointed(inode); 3197 int checkpointed = ocfs2_inode_fully_checkpointed(inode);
3082 3198
3083 BUG_ON(new_level != LKM_NLMODE && new_level != LKM_PRMODE); 3199 BUG_ON(new_level != DLM_LOCK_NL && new_level != DLM_LOCK_PR);
3084 BUG_ON(lockres->l_level != LKM_EXMODE && !checkpointed); 3200 BUG_ON(lockres->l_level != DLM_LOCK_EX && !checkpointed);
3085 3201
3086 if (checkpointed) 3202 if (checkpointed)
3087 return 1; 3203 return 1;
@@ -3145,7 +3261,7 @@ static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
3145 * valid. The downconvert code will retain a PR for this node, 3261 * valid. The downconvert code will retain a PR for this node,
3146 * so there's no further work to do. 3262 * so there's no further work to do.
3147 */ 3263 */
3148 if (blocking == LKM_PRMODE) 3264 if (blocking == DLM_LOCK_PR)
3149 return UNBLOCK_CONTINUE; 3265 return UNBLOCK_CONTINUE;
3150 3266
3151 /* 3267 /*
@@ -3219,8 +3335,47 @@ static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
3219 return UNBLOCK_CONTINUE_POST; 3335 return UNBLOCK_CONTINUE_POST;
3220} 3336}
3221 3337
3222void ocfs2_process_blocked_lock(struct ocfs2_super *osb, 3338/*
3223 struct ocfs2_lock_res *lockres) 3339 * This is the filesystem locking protocol. It provides the lock handling
3340 * hooks for the underlying DLM. It has a maximum version number.
3341 * The version number allows interoperability with systems running at
3342 * the same major number and an equal or smaller minor number.
3343 *
3344 * Whenever the filesystem does new things with locks (adds or removes a
3345 * lock, orders them differently, does different things underneath a lock),
3346 * the version must be changed. The protocol is negotiated when joining
3347 * the dlm domain. A node may join the domain if its major version is
3348 * identical to all other nodes and its minor version is greater than
3349 * or equal to all other nodes. When its minor version is greater than
3350 * the other nodes, it will run at the minor version specified by the
3351 * other nodes.
3352 *
3353 * If a locking change is made that will not be compatible with older
3354 * versions, the major number must be increased and the minor version set
3355 * to zero. If a change merely adds a behavior that can be disabled when
3356 * speaking to older versions, the minor version must be increased. If a
3357 * change adds a fully backwards compatible change (eg, LVB changes that
3358 * are just ignored by older versions), the version does not need to be
3359 * updated.
3360 */
3361static struct ocfs2_locking_protocol lproto = {
3362 .lp_max_version = {
3363 .pv_major = OCFS2_LOCKING_PROTOCOL_MAJOR,
3364 .pv_minor = OCFS2_LOCKING_PROTOCOL_MINOR,
3365 },
3366 .lp_lock_ast = ocfs2_locking_ast,
3367 .lp_blocking_ast = ocfs2_blocking_ast,
3368 .lp_unlock_ast = ocfs2_unlock_ast,
3369};
3370
3371void ocfs2_set_locking_protocol(void)
3372{
3373 ocfs2_stack_glue_set_locking_protocol(&lproto);
3374}
3375
3376
3377static void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
3378 struct ocfs2_lock_res *lockres)
3224{ 3379{
3225 int status; 3380 int status;
3226 struct ocfs2_unblock_ctl ctl = {0, 0,}; 3381 struct ocfs2_unblock_ctl ctl = {0, 0,};
@@ -3356,7 +3511,7 @@ static int ocfs2_downconvert_thread_should_wake(struct ocfs2_super *osb)
3356 return should_wake; 3511 return should_wake;
3357} 3512}
3358 3513
3359int ocfs2_downconvert_thread(void *arg) 3514static int ocfs2_downconvert_thread(void *arg)
3360{ 3515{
3361 int status = 0; 3516 int status = 0;
3362 struct ocfs2_super *osb = arg; 3517 struct ocfs2_super *osb = arg;
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index 1d5b0699d0a9..2bb01f09c1b1 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -58,7 +58,7 @@ struct ocfs2_meta_lvb {
58#define OCFS2_LOCK_NONBLOCK (0x04) 58#define OCFS2_LOCK_NONBLOCK (0x04)
59 59
60int ocfs2_dlm_init(struct ocfs2_super *osb); 60int ocfs2_dlm_init(struct ocfs2_super *osb);
61void ocfs2_dlm_shutdown(struct ocfs2_super *osb); 61void ocfs2_dlm_shutdown(struct ocfs2_super *osb, int hangup_pending);
62void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res); 62void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res);
63void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, 63void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
64 enum ocfs2_lock_type type, 64 enum ocfs2_lock_type type,
@@ -109,12 +109,11 @@ void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,
109 struct ocfs2_lock_res *lockres); 109 struct ocfs2_lock_res *lockres);
110 110
111/* for the downconvert thread */ 111/* for the downconvert thread */
112void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
113 struct ocfs2_lock_res *lockres);
114void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb); 112void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb);
115 113
116struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void); 114struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void);
117void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug); 115void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug);
118 116
119extern const struct dlm_protocol_version ocfs2_locking_protocol; 117/* To set the locking protocol on module initialization */
118void ocfs2_set_locking_protocol(void);
120#endif /* DLMGLUE_H */ 119#endif /* DLMGLUE_H */
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index ed5d5232e85d..9154c82d3258 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2242,7 +2242,7 @@ const struct file_operations ocfs2_fops = {
2242 .open = ocfs2_file_open, 2242 .open = ocfs2_file_open,
2243 .aio_read = ocfs2_file_aio_read, 2243 .aio_read = ocfs2_file_aio_read,
2244 .aio_write = ocfs2_file_aio_write, 2244 .aio_write = ocfs2_file_aio_write,
2245 .ioctl = ocfs2_ioctl, 2245 .unlocked_ioctl = ocfs2_ioctl,
2246#ifdef CONFIG_COMPAT 2246#ifdef CONFIG_COMPAT
2247 .compat_ioctl = ocfs2_compat_ioctl, 2247 .compat_ioctl = ocfs2_compat_ioctl,
2248#endif 2248#endif
@@ -2258,7 +2258,7 @@ const struct file_operations ocfs2_dops = {
2258 .fsync = ocfs2_sync_file, 2258 .fsync = ocfs2_sync_file,
2259 .release = ocfs2_dir_release, 2259 .release = ocfs2_dir_release,
2260 .open = ocfs2_dir_open, 2260 .open = ocfs2_dir_open,
2261 .ioctl = ocfs2_ioctl, 2261 .unlocked_ioctl = ocfs2_ioctl,
2262#ifdef CONFIG_COMPAT 2262#ifdef CONFIG_COMPAT
2263 .compat_ioctl = ocfs2_compat_ioctl, 2263 .compat_ioctl = ocfs2_compat_ioctl,
2264#endif 2264#endif
diff --git a/fs/ocfs2/heartbeat.c b/fs/ocfs2/heartbeat.c
index c0efd9489fe8..c6e7213db868 100644
--- a/fs/ocfs2/heartbeat.c
+++ b/fs/ocfs2/heartbeat.c
@@ -28,9 +28,6 @@
28#include <linux/types.h> 28#include <linux/types.h>
29#include <linux/slab.h> 29#include <linux/slab.h>
30#include <linux/highmem.h> 30#include <linux/highmem.h>
31#include <linux/kmod.h>
32
33#include <dlm/dlmapi.h>
34 31
35#define MLOG_MASK_PREFIX ML_SUPER 32#define MLOG_MASK_PREFIX ML_SUPER
36#include <cluster/masklog.h> 33#include <cluster/masklog.h>
@@ -48,32 +45,36 @@ static inline void __ocfs2_node_map_set_bit(struct ocfs2_node_map *map,
48 int bit); 45 int bit);
49static inline void __ocfs2_node_map_clear_bit(struct ocfs2_node_map *map, 46static inline void __ocfs2_node_map_clear_bit(struct ocfs2_node_map *map,
50 int bit); 47 int bit);
51static inline int __ocfs2_node_map_is_empty(struct ocfs2_node_map *map); 48
52static void __ocfs2_node_map_dup(struct ocfs2_node_map *target, 49/* special case -1 for now
53 struct ocfs2_node_map *from); 50 * TODO: should *really* make sure the calling func never passes -1!! */
54static void __ocfs2_node_map_set(struct ocfs2_node_map *target, 51static void ocfs2_node_map_init(struct ocfs2_node_map *map)
55 struct ocfs2_node_map *from); 52{
53 map->num_nodes = OCFS2_NODE_MAP_MAX_NODES;
54 memset(map->map, 0, BITS_TO_LONGS(OCFS2_NODE_MAP_MAX_NODES) *
55 sizeof(unsigned long));
56}
56 57
57void ocfs2_init_node_maps(struct ocfs2_super *osb) 58void ocfs2_init_node_maps(struct ocfs2_super *osb)
58{ 59{
59 spin_lock_init(&osb->node_map_lock); 60 spin_lock_init(&osb->node_map_lock);
60 ocfs2_node_map_init(&osb->recovery_map);
61 ocfs2_node_map_init(&osb->osb_recovering_orphan_dirs); 61 ocfs2_node_map_init(&osb->osb_recovering_orphan_dirs);
62} 62}
63 63
64static void ocfs2_do_node_down(int node_num, 64void ocfs2_do_node_down(int node_num, void *data)
65 struct ocfs2_super *osb)
66{ 65{
66 struct ocfs2_super *osb = data;
67
67 BUG_ON(osb->node_num == node_num); 68 BUG_ON(osb->node_num == node_num);
68 69
69 mlog(0, "ocfs2: node down event for %d\n", node_num); 70 mlog(0, "ocfs2: node down event for %d\n", node_num);
70 71
71 if (!osb->dlm) { 72 if (!osb->cconn) {
72 /* 73 /*
73 * No DLM means we're not even ready to participate yet. 74 * No cluster connection means we're not even ready to
74 * We check the slots after the DLM comes up, so we will 75 * participate yet. We check the slots after the cluster
75 * notice the node death then. We can safely ignore it 76 * comes up, so we will notice the node death then. We
76 * here. 77 * can safely ignore it here.
77 */ 78 */
78 return; 79 return;
79 } 80 }
@@ -81,70 +82,6 @@ static void ocfs2_do_node_down(int node_num,
81 ocfs2_recovery_thread(osb, node_num); 82 ocfs2_recovery_thread(osb, node_num);
82} 83}
83 84
84/* Called from the dlm when it's about to evict a node. We may also
85 * get a heartbeat callback later. */
86static void ocfs2_dlm_eviction_cb(int node_num,
87 void *data)
88{
89 struct ocfs2_super *osb = (struct ocfs2_super *) data;
90 struct super_block *sb = osb->sb;
91
92 mlog(ML_NOTICE, "device (%u,%u): dlm has evicted node %d\n",
93 MAJOR(sb->s_dev), MINOR(sb->s_dev), node_num);
94
95 ocfs2_do_node_down(node_num, osb);
96}
97
98void ocfs2_setup_hb_callbacks(struct ocfs2_super *osb)
99{
100 /* Not exactly a heartbeat callback, but leads to essentially
101 * the same path so we set it up here. */
102 dlm_setup_eviction_cb(&osb->osb_eviction_cb,
103 ocfs2_dlm_eviction_cb,
104 osb);
105}
106
107void ocfs2_stop_heartbeat(struct ocfs2_super *osb)
108{
109 int ret;
110 char *argv[5], *envp[3];
111
112 if (ocfs2_mount_local(osb))
113 return;
114
115 if (!osb->uuid_str) {
116 /* This can happen if we don't get far enough in mount... */
117 mlog(0, "No UUID with which to stop heartbeat!\n\n");
118 return;
119 }
120
121 argv[0] = (char *)o2nm_get_hb_ctl_path();
122 argv[1] = "-K";
123 argv[2] = "-u";
124 argv[3] = osb->uuid_str;
125 argv[4] = NULL;
126
127 mlog(0, "Run: %s %s %s %s\n", argv[0], argv[1], argv[2], argv[3]);
128
129 /* minimal command environment taken from cpu_run_sbin_hotplug */
130 envp[0] = "HOME=/";
131 envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
132 envp[2] = NULL;
133
134 ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
135 if (ret < 0)
136 mlog_errno(ret);
137}
138
139/* special case -1 for now
140 * TODO: should *really* make sure the calling func never passes -1!! */
141void ocfs2_node_map_init(struct ocfs2_node_map *map)
142{
143 map->num_nodes = OCFS2_NODE_MAP_MAX_NODES;
144 memset(map->map, 0, BITS_TO_LONGS(OCFS2_NODE_MAP_MAX_NODES) *
145 sizeof(unsigned long));
146}
147
148static inline void __ocfs2_node_map_set_bit(struct ocfs2_node_map *map, 85static inline void __ocfs2_node_map_set_bit(struct ocfs2_node_map *map,
149 int bit) 86 int bit)
150{ 87{
@@ -196,108 +133,3 @@ int ocfs2_node_map_test_bit(struct ocfs2_super *osb,
196 return ret; 133 return ret;
197} 134}
198 135
199static inline int __ocfs2_node_map_is_empty(struct ocfs2_node_map *map)
200{
201 int bit;
202 bit = find_next_bit(map->map, map->num_nodes, 0);
203 if (bit < map->num_nodes)
204 return 0;
205 return 1;
206}
207
208int ocfs2_node_map_is_empty(struct ocfs2_super *osb,
209 struct ocfs2_node_map *map)
210{
211 int ret;
212 BUG_ON(map->num_nodes == 0);
213 spin_lock(&osb->node_map_lock);
214 ret = __ocfs2_node_map_is_empty(map);
215 spin_unlock(&osb->node_map_lock);
216 return ret;
217}
218
219static void __ocfs2_node_map_dup(struct ocfs2_node_map *target,
220 struct ocfs2_node_map *from)
221{
222 BUG_ON(from->num_nodes == 0);
223 ocfs2_node_map_init(target);
224 __ocfs2_node_map_set(target, from);
225}
226
227/* returns 1 if bit is the only bit set in target, 0 otherwise */
228int ocfs2_node_map_is_only(struct ocfs2_super *osb,
229 struct ocfs2_node_map *target,
230 int bit)
231{
232 struct ocfs2_node_map temp;
233 int ret;
234
235 spin_lock(&osb->node_map_lock);
236 __ocfs2_node_map_dup(&temp, target);
237 __ocfs2_node_map_clear_bit(&temp, bit);
238 ret = __ocfs2_node_map_is_empty(&temp);
239 spin_unlock(&osb->node_map_lock);
240
241 return ret;
242}
243
244static void __ocfs2_node_map_set(struct ocfs2_node_map *target,
245 struct ocfs2_node_map *from)
246{
247 int num_longs, i;
248
249 BUG_ON(target->num_nodes != from->num_nodes);
250 BUG_ON(target->num_nodes == 0);
251
252 num_longs = BITS_TO_LONGS(target->num_nodes);
253 for (i = 0; i < num_longs; i++)
254 target->map[i] = from->map[i];
255}
256
257/* Returns whether the recovery bit was actually set - it may not be
258 * if a node is still marked as needing recovery */
259int ocfs2_recovery_map_set(struct ocfs2_super *osb,
260 int num)
261{
262 int set = 0;
263
264 spin_lock(&osb->node_map_lock);
265
266 if (!test_bit(num, osb->recovery_map.map)) {
267 __ocfs2_node_map_set_bit(&osb->recovery_map, num);
268 set = 1;
269 }
270
271 spin_unlock(&osb->node_map_lock);
272
273 return set;
274}
275
276void ocfs2_recovery_map_clear(struct ocfs2_super *osb,
277 int num)
278{
279 ocfs2_node_map_clear_bit(osb, &osb->recovery_map, num);
280}
281
282int ocfs2_node_map_iterate(struct ocfs2_super *osb,
283 struct ocfs2_node_map *map,
284 int idx)
285{
286 int i = idx;
287
288 idx = O2NM_INVALID_NODE_NUM;
289 spin_lock(&osb->node_map_lock);
290 if ((i != O2NM_INVALID_NODE_NUM) &&
291 (i >= 0) &&
292 (i < map->num_nodes)) {
293 while(i < map->num_nodes) {
294 if (test_bit(i, map->map)) {
295 idx = i;
296 break;
297 }
298 i++;
299 }
300 }
301 spin_unlock(&osb->node_map_lock);
302 return idx;
303}
diff --git a/fs/ocfs2/heartbeat.h b/fs/ocfs2/heartbeat.h
index 56859211888a..74b9c5dda28d 100644
--- a/fs/ocfs2/heartbeat.h
+++ b/fs/ocfs2/heartbeat.h
@@ -28,14 +28,10 @@
28 28
29void ocfs2_init_node_maps(struct ocfs2_super *osb); 29void ocfs2_init_node_maps(struct ocfs2_super *osb);
30 30
31void ocfs2_setup_hb_callbacks(struct ocfs2_super *osb); 31void ocfs2_do_node_down(int node_num, void *data);
32void ocfs2_stop_heartbeat(struct ocfs2_super *osb);
33 32
34/* node map functions - used to keep track of mounted and in-recovery 33/* node map functions - used to keep track of mounted and in-recovery
35 * nodes. */ 34 * nodes. */
36void ocfs2_node_map_init(struct ocfs2_node_map *map);
37int ocfs2_node_map_is_empty(struct ocfs2_super *osb,
38 struct ocfs2_node_map *map);
39void ocfs2_node_map_set_bit(struct ocfs2_super *osb, 35void ocfs2_node_map_set_bit(struct ocfs2_super *osb,
40 struct ocfs2_node_map *map, 36 struct ocfs2_node_map *map,
41 int bit); 37 int bit);
@@ -45,21 +41,5 @@ void ocfs2_node_map_clear_bit(struct ocfs2_super *osb,
45int ocfs2_node_map_test_bit(struct ocfs2_super *osb, 41int ocfs2_node_map_test_bit(struct ocfs2_super *osb,
46 struct ocfs2_node_map *map, 42 struct ocfs2_node_map *map,
47 int bit); 43 int bit);
48int ocfs2_node_map_iterate(struct ocfs2_super *osb,
49 struct ocfs2_node_map *map,
50 int idx);
51static inline int ocfs2_node_map_first_set_bit(struct ocfs2_super *osb,
52 struct ocfs2_node_map *map)
53{
54 return ocfs2_node_map_iterate(osb, map, 0);
55}
56int ocfs2_recovery_map_set(struct ocfs2_super *osb,
57 int num);
58void ocfs2_recovery_map_clear(struct ocfs2_super *osb,
59 int num);
60/* returns 1 if bit is the only bit set in target, 0 otherwise */
61int ocfs2_node_map_is_only(struct ocfs2_super *osb,
62 struct ocfs2_node_map *target,
63 int bit);
64 44
65#endif /* OCFS2_HEARTBEAT_H */ 45#endif /* OCFS2_HEARTBEAT_H */
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index 5177fba5162b..7b142f0ce995 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -7,6 +7,7 @@
7 7
8#include <linux/fs.h> 8#include <linux/fs.h>
9#include <linux/mount.h> 9#include <linux/mount.h>
10#include <linux/smp_lock.h>
10 11
11#define MLOG_MASK_PREFIX ML_INODE 12#define MLOG_MASK_PREFIX ML_INODE
12#include <cluster/masklog.h> 13#include <cluster/masklog.h>
@@ -59,10 +60,6 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
59 goto bail; 60 goto bail;
60 } 61 }
61 62
62 status = -EROFS;
63 if (IS_RDONLY(inode))
64 goto bail_unlock;
65
66 status = -EACCES; 63 status = -EACCES;
67 if (!is_owner_or_cap(inode)) 64 if (!is_owner_or_cap(inode))
68 goto bail_unlock; 65 goto bail_unlock;
@@ -112,9 +109,9 @@ bail:
112 return status; 109 return status;
113} 110}
114 111
115int ocfs2_ioctl(struct inode * inode, struct file * filp, 112long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
116 unsigned int cmd, unsigned long arg)
117{ 113{
114 struct inode *inode = filp->f_path.dentry->d_inode;
118 unsigned int flags; 115 unsigned int flags;
119 int new_clusters; 116 int new_clusters;
120 int status; 117 int status;
@@ -133,8 +130,13 @@ int ocfs2_ioctl(struct inode * inode, struct file * filp,
133 if (get_user(flags, (int __user *) arg)) 130 if (get_user(flags, (int __user *) arg))
134 return -EFAULT; 131 return -EFAULT;
135 132
136 return ocfs2_set_inode_attr(inode, flags, 133 status = mnt_want_write(filp->f_path.mnt);
134 if (status)
135 return status;
136 status = ocfs2_set_inode_attr(inode, flags,
137 OCFS2_FL_MODIFIABLE); 137 OCFS2_FL_MODIFIABLE);
138 mnt_drop_write(filp->f_path.mnt);
139 return status;
138 case OCFS2_IOC_RESVSP: 140 case OCFS2_IOC_RESVSP:
139 case OCFS2_IOC_RESVSP64: 141 case OCFS2_IOC_RESVSP64:
140 case OCFS2_IOC_UNRESVSP: 142 case OCFS2_IOC_UNRESVSP:
@@ -168,9 +170,6 @@ int ocfs2_ioctl(struct inode * inode, struct file * filp,
168#ifdef CONFIG_COMPAT 170#ifdef CONFIG_COMPAT
169long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) 171long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg)
170{ 172{
171 struct inode *inode = file->f_path.dentry->d_inode;
172 int ret;
173
174 switch (cmd) { 173 switch (cmd) {
175 case OCFS2_IOC32_GETFLAGS: 174 case OCFS2_IOC32_GETFLAGS:
176 cmd = OCFS2_IOC_GETFLAGS; 175 cmd = OCFS2_IOC_GETFLAGS;
@@ -190,9 +189,6 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg)
190 return -ENOIOCTLCMD; 189 return -ENOIOCTLCMD;
191 } 190 }
192 191
193 lock_kernel(); 192 return ocfs2_ioctl(file, cmd, arg);
194 ret = ocfs2_ioctl(inode, file, cmd, arg);
195 unlock_kernel();
196 return ret;
197} 193}
198#endif 194#endif
diff --git a/fs/ocfs2/ioctl.h b/fs/ocfs2/ioctl.h
index 4d6c4f430d0d..cf9a5ee30fef 100644
--- a/fs/ocfs2/ioctl.h
+++ b/fs/ocfs2/ioctl.h
@@ -10,8 +10,7 @@
10#ifndef OCFS2_IOCTL_H 10#ifndef OCFS2_IOCTL_H
11#define OCFS2_IOCTL_H 11#define OCFS2_IOCTL_H
12 12
13int ocfs2_ioctl(struct inode * inode, struct file * filp, 13long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
14 unsigned int cmd, unsigned long arg);
15long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg); 14long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg);
16 15
17#endif /* OCFS2_IOCTL_H */ 16#endif /* OCFS2_IOCTL_H */
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index f31c7e8c19c3..9698338adc39 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -64,6 +64,137 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
64 int slot); 64 int slot);
65static int ocfs2_commit_thread(void *arg); 65static int ocfs2_commit_thread(void *arg);
66 66
67
68/*
69 * The recovery_list is a simple linked list of node numbers to recover.
70 * It is protected by the recovery_lock.
71 */
72
73struct ocfs2_recovery_map {
74 unsigned int rm_used;
75 unsigned int *rm_entries;
76};
77
78int ocfs2_recovery_init(struct ocfs2_super *osb)
79{
80 struct ocfs2_recovery_map *rm;
81
82 mutex_init(&osb->recovery_lock);
83 osb->disable_recovery = 0;
84 osb->recovery_thread_task = NULL;
85 init_waitqueue_head(&osb->recovery_event);
86
87 rm = kzalloc(sizeof(struct ocfs2_recovery_map) +
88 osb->max_slots * sizeof(unsigned int),
89 GFP_KERNEL);
90 if (!rm) {
91 mlog_errno(-ENOMEM);
92 return -ENOMEM;
93 }
94
95 rm->rm_entries = (unsigned int *)((char *)rm +
96 sizeof(struct ocfs2_recovery_map));
97 osb->recovery_map = rm;
98
99 return 0;
100}
101
102/* we can't grab the goofy sem lock from inside wait_event, so we use
103 * memory barriers to make sure that we'll see the null task before
104 * being woken up */
105static int ocfs2_recovery_thread_running(struct ocfs2_super *osb)
106{
107 mb();
108 return osb->recovery_thread_task != NULL;
109}
110
111void ocfs2_recovery_exit(struct ocfs2_super *osb)
112{
113 struct ocfs2_recovery_map *rm;
114
115 /* disable any new recovery threads and wait for any currently
116 * running ones to exit. Do this before setting the vol_state. */
117 mutex_lock(&osb->recovery_lock);
118 osb->disable_recovery = 1;
119 mutex_unlock(&osb->recovery_lock);
120 wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb));
121
122 /* At this point, we know that no more recovery threads can be
123 * launched, so wait for any recovery completion work to
124 * complete. */
125 flush_workqueue(ocfs2_wq);
126
127 /*
128 * Now that recovery is shut down, and the osb is about to be
129 * freed, the osb_lock is not taken here.
130 */
131 rm = osb->recovery_map;
132 /* XXX: Should we bug if there are dirty entries? */
133
134 kfree(rm);
135}
136
137static int __ocfs2_recovery_map_test(struct ocfs2_super *osb,
138 unsigned int node_num)
139{
140 int i;
141 struct ocfs2_recovery_map *rm = osb->recovery_map;
142
143 assert_spin_locked(&osb->osb_lock);
144
145 for (i = 0; i < rm->rm_used; i++) {
146 if (rm->rm_entries[i] == node_num)
147 return 1;
148 }
149
150 return 0;
151}
152
153/* Behaves like test-and-set. Returns the previous value */
154static int ocfs2_recovery_map_set(struct ocfs2_super *osb,
155 unsigned int node_num)
156{
157 struct ocfs2_recovery_map *rm = osb->recovery_map;
158
159 spin_lock(&osb->osb_lock);
160 if (__ocfs2_recovery_map_test(osb, node_num)) {
161 spin_unlock(&osb->osb_lock);
162 return 1;
163 }
164
165 /* XXX: Can this be exploited? Not from o2dlm... */
166 BUG_ON(rm->rm_used >= osb->max_slots);
167
168 rm->rm_entries[rm->rm_used] = node_num;
169 rm->rm_used++;
170 spin_unlock(&osb->osb_lock);
171
172 return 0;
173}
174
175static void ocfs2_recovery_map_clear(struct ocfs2_super *osb,
176 unsigned int node_num)
177{
178 int i;
179 struct ocfs2_recovery_map *rm = osb->recovery_map;
180
181 spin_lock(&osb->osb_lock);
182
183 for (i = 0; i < rm->rm_used; i++) {
184 if (rm->rm_entries[i] == node_num)
185 break;
186 }
187
188 if (i < rm->rm_used) {
189 /* XXX: be careful with the pointer math */
190 memmove(&(rm->rm_entries[i]), &(rm->rm_entries[i + 1]),
191 (rm->rm_used - i - 1) * sizeof(unsigned int));
192 rm->rm_used--;
193 }
194
195 spin_unlock(&osb->osb_lock);
196}
197
67static int ocfs2_commit_cache(struct ocfs2_super *osb) 198static int ocfs2_commit_cache(struct ocfs2_super *osb)
68{ 199{
69 int status = 0; 200 int status = 0;
@@ -586,8 +717,7 @@ int ocfs2_journal_load(struct ocfs2_journal *journal, int local)
586 717
587 mlog_entry_void(); 718 mlog_entry_void();
588 719
589 if (!journal) 720 BUG_ON(!journal);
590 BUG();
591 721
592 osb = journal->j_osb; 722 osb = journal->j_osb;
593 723
@@ -650,6 +780,23 @@ bail:
650 return status; 780 return status;
651} 781}
652 782
783static int ocfs2_recovery_completed(struct ocfs2_super *osb)
784{
785 int empty;
786 struct ocfs2_recovery_map *rm = osb->recovery_map;
787
788 spin_lock(&osb->osb_lock);
789 empty = (rm->rm_used == 0);
790 spin_unlock(&osb->osb_lock);
791
792 return empty;
793}
794
795void ocfs2_wait_for_recovery(struct ocfs2_super *osb)
796{
797 wait_event(osb->recovery_event, ocfs2_recovery_completed(osb));
798}
799
653/* 800/*
654 * JBD Might read a cached version of another nodes journal file. We 801 * JBD Might read a cached version of another nodes journal file. We
655 * don't want this as this file changes often and we get no 802 * don't want this as this file changes often and we get no
@@ -848,6 +995,7 @@ static int __ocfs2_recovery_thread(void *arg)
848{ 995{
849 int status, node_num; 996 int status, node_num;
850 struct ocfs2_super *osb = arg; 997 struct ocfs2_super *osb = arg;
998 struct ocfs2_recovery_map *rm = osb->recovery_map;
851 999
852 mlog_entry_void(); 1000 mlog_entry_void();
853 1001
@@ -863,26 +1011,29 @@ restart:
863 goto bail; 1011 goto bail;
864 } 1012 }
865 1013
866 while(!ocfs2_node_map_is_empty(osb, &osb->recovery_map)) { 1014 spin_lock(&osb->osb_lock);
867 node_num = ocfs2_node_map_first_set_bit(osb, 1015 while (rm->rm_used) {
868 &osb->recovery_map); 1016 /* It's always safe to remove entry zero, as we won't
869 if (node_num == O2NM_INVALID_NODE_NUM) { 1017 * clear it until ocfs2_recover_node() has succeeded. */
870 mlog(0, "Out of nodes to recover.\n"); 1018 node_num = rm->rm_entries[0];
871 break; 1019 spin_unlock(&osb->osb_lock);
872 }
873 1020
874 status = ocfs2_recover_node(osb, node_num); 1021 status = ocfs2_recover_node(osb, node_num);
875 if (status < 0) { 1022 if (!status) {
1023 ocfs2_recovery_map_clear(osb, node_num);
1024 } else {
876 mlog(ML_ERROR, 1025 mlog(ML_ERROR,
877 "Error %d recovering node %d on device (%u,%u)!\n", 1026 "Error %d recovering node %d on device (%u,%u)!\n",
878 status, node_num, 1027 status, node_num,
879 MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); 1028 MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));
880 mlog(ML_ERROR, "Volume requires unmount.\n"); 1029 mlog(ML_ERROR, "Volume requires unmount.\n");
881 continue;
882 } 1030 }
883 1031
884 ocfs2_recovery_map_clear(osb, node_num); 1032 spin_lock(&osb->osb_lock);
885 } 1033 }
1034 spin_unlock(&osb->osb_lock);
1035 mlog(0, "All nodes recovered\n");
1036
886 ocfs2_super_unlock(osb, 1); 1037 ocfs2_super_unlock(osb, 1);
887 1038
888 /* We always run recovery on our own orphan dir - the dead 1039 /* We always run recovery on our own orphan dir - the dead
@@ -893,8 +1044,7 @@ restart:
893 1044
894bail: 1045bail:
895 mutex_lock(&osb->recovery_lock); 1046 mutex_lock(&osb->recovery_lock);
896 if (!status && 1047 if (!status && !ocfs2_recovery_completed(osb)) {
897 !ocfs2_node_map_is_empty(osb, &osb->recovery_map)) {
898 mutex_unlock(&osb->recovery_lock); 1048 mutex_unlock(&osb->recovery_lock);
899 goto restart; 1049 goto restart;
900 } 1050 }
@@ -924,8 +1074,8 @@ void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num)
924 1074
925 /* People waiting on recovery will wait on 1075 /* People waiting on recovery will wait on
926 * the recovery map to empty. */ 1076 * the recovery map to empty. */
927 if (!ocfs2_recovery_map_set(osb, node_num)) 1077 if (ocfs2_recovery_map_set(osb, node_num))
928 mlog(0, "node %d already be in recovery.\n", node_num); 1078 mlog(0, "node %d already in recovery map.\n", node_num);
929 1079
930 mlog(0, "starting recovery thread...\n"); 1080 mlog(0, "starting recovery thread...\n");
931 1081
@@ -1079,7 +1229,6 @@ static int ocfs2_recover_node(struct ocfs2_super *osb,
1079{ 1229{
1080 int status = 0; 1230 int status = 0;
1081 int slot_num; 1231 int slot_num;
1082 struct ocfs2_slot_info *si = osb->slot_info;
1083 struct ocfs2_dinode *la_copy = NULL; 1232 struct ocfs2_dinode *la_copy = NULL;
1084 struct ocfs2_dinode *tl_copy = NULL; 1233 struct ocfs2_dinode *tl_copy = NULL;
1085 1234
@@ -1092,8 +1241,8 @@ static int ocfs2_recover_node(struct ocfs2_super *osb,
1092 * case we should've called ocfs2_journal_load instead. */ 1241 * case we should've called ocfs2_journal_load instead. */
1093 BUG_ON(osb->node_num == node_num); 1242 BUG_ON(osb->node_num == node_num);
1094 1243
1095 slot_num = ocfs2_node_num_to_slot(si, node_num); 1244 slot_num = ocfs2_node_num_to_slot(osb, node_num);
1096 if (slot_num == OCFS2_INVALID_SLOT) { 1245 if (slot_num == -ENOENT) {
1097 status = 0; 1246 status = 0;
1098 mlog(0, "no slot for this node, so no recovery required.\n"); 1247 mlog(0, "no slot for this node, so no recovery required.\n");
1099 goto done; 1248 goto done;
@@ -1123,8 +1272,7 @@ static int ocfs2_recover_node(struct ocfs2_super *osb,
1123 1272
1124 /* Likewise, this would be a strange but ultimately not so 1273 /* Likewise, this would be a strange but ultimately not so
1125 * harmful place to get an error... */ 1274 * harmful place to get an error... */
1126 ocfs2_clear_slot(si, slot_num); 1275 status = ocfs2_clear_slot(osb, slot_num);
1127 status = ocfs2_update_disk_slots(osb, si);
1128 if (status < 0) 1276 if (status < 0)
1129 mlog_errno(status); 1277 mlog_errno(status);
1130 1278
@@ -1184,23 +1332,24 @@ bail:
1184 * slot info struct has been updated from disk. */ 1332 * slot info struct has been updated from disk. */
1185int ocfs2_mark_dead_nodes(struct ocfs2_super *osb) 1333int ocfs2_mark_dead_nodes(struct ocfs2_super *osb)
1186{ 1334{
1187 int status, i, node_num; 1335 unsigned int node_num;
1188 struct ocfs2_slot_info *si = osb->slot_info; 1336 int status, i;
1189 1337
1190 /* This is called with the super block cluster lock, so we 1338 /* This is called with the super block cluster lock, so we
1191 * know that the slot map can't change underneath us. */ 1339 * know that the slot map can't change underneath us. */
1192 1340
1193 spin_lock(&si->si_lock); 1341 spin_lock(&osb->osb_lock);
1194 for(i = 0; i < si->si_num_slots; i++) { 1342 for (i = 0; i < osb->max_slots; i++) {
1195 if (i == osb->slot_num) 1343 if (i == osb->slot_num)
1196 continue; 1344 continue;
1197 if (ocfs2_is_empty_slot(si, i)) 1345
1346 status = ocfs2_slot_to_node_num_locked(osb, i, &node_num);
1347 if (status == -ENOENT)
1198 continue; 1348 continue;
1199 1349
1200 node_num = si->si_global_node_nums[i]; 1350 if (__ocfs2_recovery_map_test(osb, node_num))
1201 if (ocfs2_node_map_test_bit(osb, &osb->recovery_map, node_num))
1202 continue; 1351 continue;
1203 spin_unlock(&si->si_lock); 1352 spin_unlock(&osb->osb_lock);
1204 1353
1205 /* Ok, we have a slot occupied by another node which 1354 /* Ok, we have a slot occupied by another node which
1206 * is not in the recovery map. We trylock his journal 1355 * is not in the recovery map. We trylock his journal
@@ -1216,9 +1365,9 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb)
1216 goto bail; 1365 goto bail;
1217 } 1366 }
1218 1367
1219 spin_lock(&si->si_lock); 1368 spin_lock(&osb->osb_lock);
1220 } 1369 }
1221 spin_unlock(&si->si_lock); 1370 spin_unlock(&osb->osb_lock);
1222 1371
1223 status = 0; 1372 status = 0;
1224bail: 1373bail:
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 220f3e818e78..db82be2532ed 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -134,6 +134,10 @@ static inline void ocfs2_inode_set_new(struct ocfs2_super *osb,
134 134
135/* Exported only for the journal struct init code in super.c. Do not call. */ 135/* Exported only for the journal struct init code in super.c. Do not call. */
136void ocfs2_complete_recovery(struct work_struct *work); 136void ocfs2_complete_recovery(struct work_struct *work);
137void ocfs2_wait_for_recovery(struct ocfs2_super *osb);
138
139int ocfs2_recovery_init(struct ocfs2_super *osb);
140void ocfs2_recovery_exit(struct ocfs2_super *osb);
137 141
138/* 142/*
139 * Journal Control: 143 * Journal Control:
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index add1ffdc5c6c..ce0dc147602a 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -120,9 +120,6 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb)
120 120
121 mlog_entry_void(); 121 mlog_entry_void();
122 122
123 if (ocfs2_mount_local(osb))
124 goto bail;
125
126 if (osb->local_alloc_size == 0) 123 if (osb->local_alloc_size == 0)
127 goto bail; 124 goto bail;
128 125
@@ -450,6 +447,8 @@ out_mutex:
450 iput(main_bm_inode); 447 iput(main_bm_inode);
451 448
452out: 449out:
450 if (!status)
451 ocfs2_init_inode_steal_slot(osb);
453 mlog_exit(status); 452 mlog_exit(status);
454 return status; 453 return status;
455} 454}
@@ -526,6 +525,8 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
526 } 525 }
527 526
528 ac->ac_inode = local_alloc_inode; 527 ac->ac_inode = local_alloc_inode;
528 /* We should never use localalloc from another slot */
529 ac->ac_alloc_slot = osb->slot_num;
529 ac->ac_which = OCFS2_AC_USE_LOCAL; 530 ac->ac_which = OCFS2_AC_USE_LOCAL;
530 get_bh(osb->local_alloc_bh); 531 get_bh(osb->local_alloc_bh);
531 ac->ac_bh = osb->local_alloc_bh; 532 ac->ac_bh = osb->local_alloc_bh;
@@ -588,8 +589,7 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
588 while(bits_wanted--) 589 while(bits_wanted--)
589 ocfs2_set_bit(start++, bitmap); 590 ocfs2_set_bit(start++, bitmap);
590 591
591 alloc->id1.bitmap1.i_used = cpu_to_le32(*num_bits + 592 le32_add_cpu(&alloc->id1.bitmap1.i_used, *num_bits);
592 le32_to_cpu(alloc->id1.bitmap1.i_used));
593 593
594 status = ocfs2_journal_dirty(handle, osb->local_alloc_bh); 594 status = ocfs2_journal_dirty(handle, osb->local_alloc_bh);
595 if (status < 0) { 595 if (status < 0) {
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index ae9ad9587516..d5d808fe0140 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -424,7 +424,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
424 fe->i_fs_generation = cpu_to_le32(osb->fs_generation); 424 fe->i_fs_generation = cpu_to_le32(osb->fs_generation);
425 fe->i_blkno = cpu_to_le64(fe_blkno); 425 fe->i_blkno = cpu_to_le64(fe_blkno);
426 fe->i_suballoc_bit = cpu_to_le16(suballoc_bit); 426 fe->i_suballoc_bit = cpu_to_le16(suballoc_bit);
427 fe->i_suballoc_slot = cpu_to_le16(osb->slot_num); 427 fe->i_suballoc_slot = cpu_to_le16(inode_ac->ac_alloc_slot);
428 fe->i_uid = cpu_to_le32(current->fsuid); 428 fe->i_uid = cpu_to_le32(current->fsuid);
429 if (dir->i_mode & S_ISGID) { 429 if (dir->i_mode & S_ISGID) {
430 fe->i_gid = cpu_to_le32(dir->i_gid); 430 fe->i_gid = cpu_to_le32(dir->i_gid);
@@ -997,7 +997,7 @@ static int ocfs2_rename(struct inode *old_dir,
997 * 997 *
998 * And that's why, just like the VFS, we need a file system 998 * And that's why, just like the VFS, we need a file system
999 * rename lock. */ 999 * rename lock. */
1000 if (old_dentry != new_dentry) { 1000 if (old_dir != new_dir && S_ISDIR(old_inode->i_mode)) {
1001 status = ocfs2_rename_lock(osb); 1001 status = ocfs2_rename_lock(osb);
1002 if (status < 0) { 1002 if (status < 0) {
1003 mlog_errno(status); 1003 mlog_errno(status);
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 6546cef212e3..31692379c170 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -36,11 +36,8 @@
36#include <linux/mutex.h> 36#include <linux/mutex.h>
37#include <linux/jbd.h> 37#include <linux/jbd.h>
38 38
39#include "cluster/nodemanager.h" 39/* For union ocfs2_dlm_lksb */
40#include "cluster/heartbeat.h" 40#include "stackglue.h"
41#include "cluster/tcp.h"
42
43#include "dlm/dlmapi.h"
44 41
45#include "ocfs2_fs.h" 42#include "ocfs2_fs.h"
46#include "ocfs2_lockid.h" 43#include "ocfs2_lockid.h"
@@ -101,6 +98,9 @@ enum ocfs2_unlock_action {
101 * dropped. */ 98 * dropped. */
102#define OCFS2_LOCK_QUEUED (0x00000100) /* queued for downconvert */ 99#define OCFS2_LOCK_QUEUED (0x00000100) /* queued for downconvert */
103#define OCFS2_LOCK_NOCACHE (0x00000200) /* don't use a holder count */ 100#define OCFS2_LOCK_NOCACHE (0x00000200) /* don't use a holder count */
101#define OCFS2_LOCK_PENDING (0x00000400) /* This lockres is pending a
102 call to dlm_lock. Only
103 exists with BUSY set. */
104 104
105struct ocfs2_lock_res_ops; 105struct ocfs2_lock_res_ops;
106 106
@@ -120,13 +120,14 @@ struct ocfs2_lock_res {
120 int l_level; 120 int l_level;
121 unsigned int l_ro_holders; 121 unsigned int l_ro_holders;
122 unsigned int l_ex_holders; 122 unsigned int l_ex_holders;
123 struct dlm_lockstatus l_lksb; 123 union ocfs2_dlm_lksb l_lksb;
124 124
125 /* used from AST/BAST funcs. */ 125 /* used from AST/BAST funcs. */
126 enum ocfs2_ast_action l_action; 126 enum ocfs2_ast_action l_action;
127 enum ocfs2_unlock_action l_unlock_action; 127 enum ocfs2_unlock_action l_unlock_action;
128 int l_requested; 128 int l_requested;
129 int l_blocking; 129 int l_blocking;
130 unsigned int l_pending_gen;
130 131
131 wait_queue_head_t l_event; 132 wait_queue_head_t l_event;
132 133
@@ -179,6 +180,8 @@ enum ocfs2_mount_options
179#define OCFS2_DEFAULT_ATIME_QUANTUM 60 180#define OCFS2_DEFAULT_ATIME_QUANTUM 60
180 181
181struct ocfs2_journal; 182struct ocfs2_journal;
183struct ocfs2_slot_info;
184struct ocfs2_recovery_map;
182struct ocfs2_super 185struct ocfs2_super
183{ 186{
184 struct task_struct *commit_task; 187 struct task_struct *commit_task;
@@ -190,7 +193,6 @@ struct ocfs2_super
190 struct ocfs2_slot_info *slot_info; 193 struct ocfs2_slot_info *slot_info;
191 194
192 spinlock_t node_map_lock; 195 spinlock_t node_map_lock;
193 struct ocfs2_node_map recovery_map;
194 196
195 u64 root_blkno; 197 u64 root_blkno;
196 u64 system_dir_blkno; 198 u64 system_dir_blkno;
@@ -206,25 +208,29 @@ struct ocfs2_super
206 u32 s_feature_incompat; 208 u32 s_feature_incompat;
207 u32 s_feature_ro_compat; 209 u32 s_feature_ro_compat;
208 210
209 /* Protects s_next_generaion, osb_flags. Could protect more on 211 /* Protects s_next_generation, osb_flags and s_inode_steal_slot.
210 * osb as it's very short lived. */ 212 * Could protect more on osb as it's very short lived.
213 */
211 spinlock_t osb_lock; 214 spinlock_t osb_lock;
212 u32 s_next_generation; 215 u32 s_next_generation;
213 unsigned long osb_flags; 216 unsigned long osb_flags;
217 s16 s_inode_steal_slot;
218 atomic_t s_num_inodes_stolen;
214 219
215 unsigned long s_mount_opt; 220 unsigned long s_mount_opt;
216 unsigned int s_atime_quantum; 221 unsigned int s_atime_quantum;
217 222
218 u16 max_slots; 223 unsigned int max_slots;
219 s16 node_num; 224 unsigned int node_num;
220 s16 slot_num; 225 int slot_num;
221 s16 preferred_slot; 226 int preferred_slot;
222 int s_sectsize_bits; 227 int s_sectsize_bits;
223 int s_clustersize; 228 int s_clustersize;
224 int s_clustersize_bits; 229 int s_clustersize_bits;
225 230
226 atomic_t vol_state; 231 atomic_t vol_state;
227 struct mutex recovery_lock; 232 struct mutex recovery_lock;
233 struct ocfs2_recovery_map *recovery_map;
228 struct task_struct *recovery_thread_task; 234 struct task_struct *recovery_thread_task;
229 int disable_recovery; 235 int disable_recovery;
230 wait_queue_head_t checkpoint_event; 236 wait_queue_head_t checkpoint_event;
@@ -245,12 +251,11 @@ struct ocfs2_super
245 struct ocfs2_alloc_stats alloc_stats; 251 struct ocfs2_alloc_stats alloc_stats;
246 char dev_str[20]; /* "major,minor" of the device */ 252 char dev_str[20]; /* "major,minor" of the device */
247 253
248 struct dlm_ctxt *dlm; 254 char osb_cluster_stack[OCFS2_STACK_LABEL_LEN + 1];
255 struct ocfs2_cluster_connection *cconn;
249 struct ocfs2_lock_res osb_super_lockres; 256 struct ocfs2_lock_res osb_super_lockres;
250 struct ocfs2_lock_res osb_rename_lockres; 257 struct ocfs2_lock_res osb_rename_lockres;
251 struct dlm_eviction_cb osb_eviction_cb;
252 struct ocfs2_dlm_debug *osb_dlm_debug; 258 struct ocfs2_dlm_debug *osb_dlm_debug;
253 struct dlm_protocol_version osb_locking_proto;
254 259
255 struct dentry *osb_debug_root; 260 struct dentry *osb_debug_root;
256 261
@@ -367,11 +372,24 @@ static inline int ocfs2_is_soft_readonly(struct ocfs2_super *osb)
367 return ret; 372 return ret;
368} 373}
369 374
375static inline int ocfs2_userspace_stack(struct ocfs2_super *osb)
376{
377 return (osb->s_feature_incompat &
378 OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK);
379}
380
370static inline int ocfs2_mount_local(struct ocfs2_super *osb) 381static inline int ocfs2_mount_local(struct ocfs2_super *osb)
371{ 382{
372 return (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT); 383 return (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT);
373} 384}
374 385
386static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb)
387{
388 return (osb->s_feature_incompat &
389 OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP);
390}
391
392
375#define OCFS2_IS_VALID_DINODE(ptr) \ 393#define OCFS2_IS_VALID_DINODE(ptr) \
376 (!strcmp((ptr)->i_signature, OCFS2_INODE_SIGNATURE)) 394 (!strcmp((ptr)->i_signature, OCFS2_INODE_SIGNATURE))
377 395
@@ -522,6 +540,33 @@ static inline unsigned int ocfs2_pages_per_cluster(struct super_block *sb)
522 return pages_per_cluster; 540 return pages_per_cluster;
523} 541}
524 542
543static inline void ocfs2_init_inode_steal_slot(struct ocfs2_super *osb)
544{
545 spin_lock(&osb->osb_lock);
546 osb->s_inode_steal_slot = OCFS2_INVALID_SLOT;
547 spin_unlock(&osb->osb_lock);
548 atomic_set(&osb->s_num_inodes_stolen, 0);
549}
550
551static inline void ocfs2_set_inode_steal_slot(struct ocfs2_super *osb,
552 s16 slot)
553{
554 spin_lock(&osb->osb_lock);
555 osb->s_inode_steal_slot = slot;
556 spin_unlock(&osb->osb_lock);
557}
558
559static inline s16 ocfs2_get_inode_steal_slot(struct ocfs2_super *osb)
560{
561 s16 slot;
562
563 spin_lock(&osb->osb_lock);
564 slot = osb->s_inode_steal_slot;
565 spin_unlock(&osb->osb_lock);
566
567 return slot;
568}
569
525#define ocfs2_set_bit ext2_set_bit 570#define ocfs2_set_bit ext2_set_bit
526#define ocfs2_clear_bit ext2_clear_bit 571#define ocfs2_clear_bit ext2_clear_bit
527#define ocfs2_test_bit ext2_test_bit 572#define ocfs2_test_bit ext2_test_bit
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 3633edd3982f..52c426665154 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -88,7 +88,9 @@
88#define OCFS2_FEATURE_COMPAT_SUPP OCFS2_FEATURE_COMPAT_BACKUP_SB 88#define OCFS2_FEATURE_COMPAT_SUPP OCFS2_FEATURE_COMPAT_BACKUP_SB
89#define OCFS2_FEATURE_INCOMPAT_SUPP (OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT \ 89#define OCFS2_FEATURE_INCOMPAT_SUPP (OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT \
90 | OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC \ 90 | OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC \
91 | OCFS2_FEATURE_INCOMPAT_INLINE_DATA) 91 | OCFS2_FEATURE_INCOMPAT_INLINE_DATA \
92 | OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP \
93 | OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK)
92#define OCFS2_FEATURE_RO_COMPAT_SUPP OCFS2_FEATURE_RO_COMPAT_UNWRITTEN 94#define OCFS2_FEATURE_RO_COMPAT_SUPP OCFS2_FEATURE_RO_COMPAT_UNWRITTEN
93 95
94/* 96/*
@@ -125,6 +127,21 @@
125/* Support for data packed into inode blocks */ 127/* Support for data packed into inode blocks */
126#define OCFS2_FEATURE_INCOMPAT_INLINE_DATA 0x0040 128#define OCFS2_FEATURE_INCOMPAT_INLINE_DATA 0x0040
127 129
130/* Support for the extended slot map */
131#define OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP 0x100
132
133
134/*
135 * Support for alternate, userspace cluster stacks. If set, the superblock
136 * field s_cluster_info contains a tag for the alternate stack in use as
137 * well as the name of the cluster being joined.
138 * mount.ocfs2 must pass in a matching stack name.
139 *
140 * If not set, the classic stack will be used. This is compatbile with
141 * all older versions.
142 */
143#define OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK 0x0080
144
128/* 145/*
129 * backup superblock flag is used to indicate that this volume 146 * backup superblock flag is used to indicate that this volume
130 * has backup superblocks. 147 * has backup superblocks.
@@ -267,6 +284,10 @@ struct ocfs2_new_group_input {
267#define OCFS2_VOL_UUID_LEN 16 284#define OCFS2_VOL_UUID_LEN 16
268#define OCFS2_MAX_VOL_LABEL_LEN 64 285#define OCFS2_MAX_VOL_LABEL_LEN 64
269 286
287/* The alternate, userspace stack fields */
288#define OCFS2_STACK_LABEL_LEN 4
289#define OCFS2_CLUSTER_NAME_LEN 16
290
270/* Journal limits (in bytes) */ 291/* Journal limits (in bytes) */
271#define OCFS2_MIN_JOURNAL_SIZE (4 * 1024 * 1024) 292#define OCFS2_MIN_JOURNAL_SIZE (4 * 1024 * 1024)
272 293
@@ -475,6 +496,47 @@ struct ocfs2_extent_block
475}; 496};
476 497
477/* 498/*
499 * On disk slot map for OCFS2. This defines the contents of the "slot_map"
500 * system file. A slot is valid if it contains a node number >= 0. The
501 * value -1 (0xFFFF) is OCFS2_INVALID_SLOT. This marks a slot empty.
502 */
503struct ocfs2_slot_map {
504/*00*/ __le16 sm_slots[0];
505/*
506 * Actual on-disk size is one block. OCFS2_MAX_SLOTS is 255,
507 * 255 * sizeof(__le16) == 512B, within the 512B block minimum blocksize.
508 */
509};
510
511struct ocfs2_extended_slot {
512/*00*/ __u8 es_valid;
513 __u8 es_reserved1[3];
514 __le32 es_node_num;
515/*10*/
516};
517
518/*
519 * The extended slot map, used when OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP
520 * is set. It separates out the valid marker from the node number, and
521 * has room to grow. Unlike the old slot map, this format is defined by
522 * i_size.
523 */
524struct ocfs2_slot_map_extended {
525/*00*/ struct ocfs2_extended_slot se_slots[0];
526/*
527 * Actual size is i_size of the slot_map system file. It should
528 * match s_max_slots * sizeof(struct ocfs2_extended_slot)
529 */
530};
531
532struct ocfs2_cluster_info {
533/*00*/ __u8 ci_stack[OCFS2_STACK_LABEL_LEN];
534 __le32 ci_reserved;
535/*08*/ __u8 ci_cluster[OCFS2_CLUSTER_NAME_LEN];
536/*18*/
537};
538
539/*
478 * On disk superblock for OCFS2 540 * On disk superblock for OCFS2
479 * Note that it is contained inside an ocfs2_dinode, so all offsets 541 * Note that it is contained inside an ocfs2_dinode, so all offsets
480 * are relative to the start of ocfs2_dinode.id2. 542 * are relative to the start of ocfs2_dinode.id2.
@@ -506,7 +568,20 @@ struct ocfs2_super_block {
506 * group header */ 568 * group header */
507/*50*/ __u8 s_label[OCFS2_MAX_VOL_LABEL_LEN]; /* Label for mounting, etc. */ 569/*50*/ __u8 s_label[OCFS2_MAX_VOL_LABEL_LEN]; /* Label for mounting, etc. */
508/*90*/ __u8 s_uuid[OCFS2_VOL_UUID_LEN]; /* 128-bit uuid */ 570/*90*/ __u8 s_uuid[OCFS2_VOL_UUID_LEN]; /* 128-bit uuid */
509/*A0*/ 571/*A0*/ struct ocfs2_cluster_info s_cluster_info; /* Selected userspace
572 stack. Only valid
573 with INCOMPAT flag. */
574/*B8*/ __le64 s_reserved2[17]; /* Fill out superblock */
575/*140*/
576
577 /*
578 * NOTE: As stated above, all offsets are relative to
579 * ocfs2_dinode.id2, which is at 0xC0 in the inode.
580 * 0xC0 + 0x140 = 0x200 or 512 bytes. A superblock must fit within
581 * our smallest blocksize, which is 512 bytes. To ensure this,
582 * we reserve the space in s_reserved2. Anything past s_reserved2
583 * will not be available on the smallest blocksize.
584 */
510}; 585};
511 586
512/* 587/*
diff --git a/fs/ocfs2/ocfs2_lockid.h b/fs/ocfs2/ocfs2_lockid.h
index 86f3e3799c2b..82c200f7a8f1 100644
--- a/fs/ocfs2/ocfs2_lockid.h
+++ b/fs/ocfs2/ocfs2_lockid.h
@@ -100,7 +100,7 @@ static char *ocfs2_lock_type_strings[] = {
100static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type) 100static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type)
101{ 101{
102#ifdef __KERNEL__ 102#ifdef __KERNEL__
103 mlog_bug_on_msg(type >= OCFS2_NUM_LOCK_TYPES, "%d\n", type); 103 BUG_ON(type >= OCFS2_NUM_LOCK_TYPES);
104#endif 104#endif
105 return ocfs2_lock_type_strings[type]; 105 return ocfs2_lock_type_strings[type];
106} 106}
diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c
index 37835ffcb039..8166968e9015 100644
--- a/fs/ocfs2/resize.c
+++ b/fs/ocfs2/resize.c
@@ -597,7 +597,7 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
597 memset(cr, 0, sizeof(struct ocfs2_chain_rec)); 597 memset(cr, 0, sizeof(struct ocfs2_chain_rec));
598 } 598 }
599 599
600 cr->c_blkno = le64_to_cpu(input->group); 600 cr->c_blkno = cpu_to_le64(input->group);
601 le32_add_cpu(&cr->c_total, input->clusters * cl_bpc); 601 le32_add_cpu(&cr->c_total, input->clusters * cl_bpc);
602 le32_add_cpu(&cr->c_free, input->frees * cl_bpc); 602 le32_add_cpu(&cr->c_free, input->frees * cl_bpc);
603 603
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c
index 3a50ce555e64..bb5ff8939bf1 100644
--- a/fs/ocfs2/slot_map.c
+++ b/fs/ocfs2/slot_map.c
@@ -42,81 +42,244 @@
42 42
43#include "buffer_head_io.h" 43#include "buffer_head_io.h"
44 44
45static s16 __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si, 45
46 s16 global); 46struct ocfs2_slot {
47static void __ocfs2_fill_slot(struct ocfs2_slot_info *si, 47 int sl_valid;
48 s16 slot_num, 48 unsigned int sl_node_num;
49 s16 node_num); 49};
50 50
51/* post the slot information on disk into our slot_info struct. */ 51struct ocfs2_slot_info {
52void ocfs2_update_slot_info(struct ocfs2_slot_info *si) 52 int si_extended;
53 int si_slots_per_block;
54 struct inode *si_inode;
55 unsigned int si_blocks;
56 struct buffer_head **si_bh;
57 unsigned int si_num_slots;
58 struct ocfs2_slot *si_slots;
59};
60
61
62static int __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si,
63 unsigned int node_num);
64
65static void ocfs2_invalidate_slot(struct ocfs2_slot_info *si,
66 int slot_num)
67{
68 BUG_ON((slot_num < 0) || (slot_num >= si->si_num_slots));
69 si->si_slots[slot_num].sl_valid = 0;
70}
71
72static void ocfs2_set_slot(struct ocfs2_slot_info *si,
73 int slot_num, unsigned int node_num)
74{
75 BUG_ON((slot_num < 0) || (slot_num >= si->si_num_slots));
76
77 si->si_slots[slot_num].sl_valid = 1;
78 si->si_slots[slot_num].sl_node_num = node_num;
79}
80
81/* This version is for the extended slot map */
82static void ocfs2_update_slot_info_extended(struct ocfs2_slot_info *si)
83{
84 int b, i, slotno;
85 struct ocfs2_slot_map_extended *se;
86
87 slotno = 0;
88 for (b = 0; b < si->si_blocks; b++) {
89 se = (struct ocfs2_slot_map_extended *)si->si_bh[b]->b_data;
90 for (i = 0;
91 (i < si->si_slots_per_block) &&
92 (slotno < si->si_num_slots);
93 i++, slotno++) {
94 if (se->se_slots[i].es_valid)
95 ocfs2_set_slot(si, slotno,
96 le32_to_cpu(se->se_slots[i].es_node_num));
97 else
98 ocfs2_invalidate_slot(si, slotno);
99 }
100 }
101}
102
103/*
104 * Post the slot information on disk into our slot_info struct.
105 * Must be protected by osb_lock.
106 */
107static void ocfs2_update_slot_info_old(struct ocfs2_slot_info *si)
53{ 108{
54 int i; 109 int i;
55 __le16 *disk_info; 110 struct ocfs2_slot_map *sm;
56 111
57 /* we don't read the slot block here as ocfs2_super_lock 112 sm = (struct ocfs2_slot_map *)si->si_bh[0]->b_data;
58 * should've made sure we have the most recent copy. */
59 spin_lock(&si->si_lock);
60 disk_info = (__le16 *) si->si_bh->b_data;
61 113
62 for (i = 0; i < si->si_size; i++) 114 for (i = 0; i < si->si_num_slots; i++) {
63 si->si_global_node_nums[i] = le16_to_cpu(disk_info[i]); 115 if (le16_to_cpu(sm->sm_slots[i]) == (u16)OCFS2_INVALID_SLOT)
116 ocfs2_invalidate_slot(si, i);
117 else
118 ocfs2_set_slot(si, i, le16_to_cpu(sm->sm_slots[i]));
119 }
120}
64 121
65 spin_unlock(&si->si_lock); 122static void ocfs2_update_slot_info(struct ocfs2_slot_info *si)
123{
124 /*
125 * The slot data will have been refreshed when ocfs2_super_lock
126 * was taken.
127 */
128 if (si->si_extended)
129 ocfs2_update_slot_info_extended(si);
130 else
131 ocfs2_update_slot_info_old(si);
132}
133
134int ocfs2_refresh_slot_info(struct ocfs2_super *osb)
135{
136 int ret;
137 struct ocfs2_slot_info *si = osb->slot_info;
138
139 if (si == NULL)
140 return 0;
141
142 BUG_ON(si->si_blocks == 0);
143 BUG_ON(si->si_bh == NULL);
144
145 mlog(0, "Refreshing slot map, reading %u block(s)\n",
146 si->si_blocks);
147
148 /*
149 * We pass -1 as blocknr because we expect all of si->si_bh to
150 * be !NULL. Thus, ocfs2_read_blocks() will ignore blocknr. If
151 * this is not true, the read of -1 (UINT64_MAX) will fail.
152 */
153 ret = ocfs2_read_blocks(osb, -1, si->si_blocks, si->si_bh, 0,
154 si->si_inode);
155 if (ret == 0) {
156 spin_lock(&osb->osb_lock);
157 ocfs2_update_slot_info(si);
158 spin_unlock(&osb->osb_lock);
159 }
160
161 return ret;
66} 162}
67 163
68/* post the our slot info stuff into it's destination bh and write it 164/* post the our slot info stuff into it's destination bh and write it
69 * out. */ 165 * out. */
70int ocfs2_update_disk_slots(struct ocfs2_super *osb, 166static void ocfs2_update_disk_slot_extended(struct ocfs2_slot_info *si,
71 struct ocfs2_slot_info *si) 167 int slot_num,
168 struct buffer_head **bh)
72{ 169{
73 int status, i; 170 int blkind = slot_num / si->si_slots_per_block;
74 __le16 *disk_info = (__le16 *) si->si_bh->b_data; 171 int slotno = slot_num % si->si_slots_per_block;
172 struct ocfs2_slot_map_extended *se;
173
174 BUG_ON(blkind >= si->si_blocks);
175
176 se = (struct ocfs2_slot_map_extended *)si->si_bh[blkind]->b_data;
177 se->se_slots[slotno].es_valid = si->si_slots[slot_num].sl_valid;
178 if (si->si_slots[slot_num].sl_valid)
179 se->se_slots[slotno].es_node_num =
180 cpu_to_le32(si->si_slots[slot_num].sl_node_num);
181 *bh = si->si_bh[blkind];
182}
75 183
76 spin_lock(&si->si_lock); 184static void ocfs2_update_disk_slot_old(struct ocfs2_slot_info *si,
77 for (i = 0; i < si->si_size; i++) 185 int slot_num,
78 disk_info[i] = cpu_to_le16(si->si_global_node_nums[i]); 186 struct buffer_head **bh)
79 spin_unlock(&si->si_lock); 187{
188 int i;
189 struct ocfs2_slot_map *sm;
190
191 sm = (struct ocfs2_slot_map *)si->si_bh[0]->b_data;
192 for (i = 0; i < si->si_num_slots; i++) {
193 if (si->si_slots[i].sl_valid)
194 sm->sm_slots[i] =
195 cpu_to_le16(si->si_slots[i].sl_node_num);
196 else
197 sm->sm_slots[i] = cpu_to_le16(OCFS2_INVALID_SLOT);
198 }
199 *bh = si->si_bh[0];
200}
201
202static int ocfs2_update_disk_slot(struct ocfs2_super *osb,
203 struct ocfs2_slot_info *si,
204 int slot_num)
205{
206 int status;
207 struct buffer_head *bh;
208
209 spin_lock(&osb->osb_lock);
210 if (si->si_extended)
211 ocfs2_update_disk_slot_extended(si, slot_num, &bh);
212 else
213 ocfs2_update_disk_slot_old(si, slot_num, &bh);
214 spin_unlock(&osb->osb_lock);
80 215
81 status = ocfs2_write_block(osb, si->si_bh, si->si_inode); 216 status = ocfs2_write_block(osb, bh, si->si_inode);
82 if (status < 0) 217 if (status < 0)
83 mlog_errno(status); 218 mlog_errno(status);
84 219
85 return status; 220 return status;
86} 221}
87 222
88/* try to find global node in the slot info. Returns 223/*
89 * OCFS2_INVALID_SLOT if nothing is found. */ 224 * Calculate how many bytes are needed by the slot map. Returns
90static s16 __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si, 225 * an error if the slot map file is too small.
91 s16 global) 226 */
227static int ocfs2_slot_map_physical_size(struct ocfs2_super *osb,
228 struct inode *inode,
229 unsigned long long *bytes)
92{ 230{
93 int i; 231 unsigned long long bytes_needed;
94 s16 ret = OCFS2_INVALID_SLOT; 232
233 if (ocfs2_uses_extended_slot_map(osb)) {
234 bytes_needed = osb->max_slots *
235 sizeof(struct ocfs2_extended_slot);
236 } else {
237 bytes_needed = osb->max_slots * sizeof(__le16);
238 }
239 if (bytes_needed > i_size_read(inode)) {
240 mlog(ML_ERROR,
241 "Slot map file is too small! (size %llu, needed %llu)\n",
242 i_size_read(inode), bytes_needed);
243 return -ENOSPC;
244 }
245
246 *bytes = bytes_needed;
247 return 0;
248}
249
250/* try to find global node in the slot info. Returns -ENOENT
251 * if nothing is found. */
252static int __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si,
253 unsigned int node_num)
254{
255 int i, ret = -ENOENT;
95 256
96 for(i = 0; i < si->si_num_slots; i++) { 257 for(i = 0; i < si->si_num_slots; i++) {
97 if (global == si->si_global_node_nums[i]) { 258 if (si->si_slots[i].sl_valid &&
98 ret = (s16) i; 259 (node_num == si->si_slots[i].sl_node_num)) {
260 ret = i;
99 break; 261 break;
100 } 262 }
101 } 263 }
264
102 return ret; 265 return ret;
103} 266}
104 267
105static s16 __ocfs2_find_empty_slot(struct ocfs2_slot_info *si, s16 preferred) 268static int __ocfs2_find_empty_slot(struct ocfs2_slot_info *si,
269 int preferred)
106{ 270{
107 int i; 271 int i, ret = -ENOSPC;
108 s16 ret = OCFS2_INVALID_SLOT;
109 272
110 if (preferred >= 0 && preferred < si->si_num_slots) { 273 if ((preferred >= 0) && (preferred < si->si_num_slots)) {
111 if (OCFS2_INVALID_SLOT == si->si_global_node_nums[preferred]) { 274 if (!si->si_slots[preferred].sl_valid) {
112 ret = preferred; 275 ret = preferred;
113 goto out; 276 goto out;
114 } 277 }
115 } 278 }
116 279
117 for(i = 0; i < si->si_num_slots; i++) { 280 for(i = 0; i < si->si_num_slots; i++) {
118 if (OCFS2_INVALID_SLOT == si->si_global_node_nums[i]) { 281 if (!si->si_slots[i].sl_valid) {
119 ret = (s16) i; 282 ret = i;
120 break; 283 break;
121 } 284 }
122 } 285 }
@@ -124,58 +287,155 @@ out:
124 return ret; 287 return ret;
125} 288}
126 289
127s16 ocfs2_node_num_to_slot(struct ocfs2_slot_info *si, 290int ocfs2_node_num_to_slot(struct ocfs2_super *osb, unsigned int node_num)
128 s16 global)
129{ 291{
130 s16 ret; 292 int slot;
293 struct ocfs2_slot_info *si = osb->slot_info;
131 294
132 spin_lock(&si->si_lock); 295 spin_lock(&osb->osb_lock);
133 ret = __ocfs2_node_num_to_slot(si, global); 296 slot = __ocfs2_node_num_to_slot(si, node_num);
134 spin_unlock(&si->si_lock); 297 spin_unlock(&osb->osb_lock);
135 return ret; 298
299 return slot;
300}
301
302int ocfs2_slot_to_node_num_locked(struct ocfs2_super *osb, int slot_num,
303 unsigned int *node_num)
304{
305 struct ocfs2_slot_info *si = osb->slot_info;
306
307 assert_spin_locked(&osb->osb_lock);
308
309 BUG_ON(slot_num < 0);
310 BUG_ON(slot_num > osb->max_slots);
311
312 if (!si->si_slots[slot_num].sl_valid)
313 return -ENOENT;
314
315 *node_num = si->si_slots[slot_num].sl_node_num;
316 return 0;
136} 317}
137 318
138static void __ocfs2_fill_slot(struct ocfs2_slot_info *si, 319static void __ocfs2_free_slot_info(struct ocfs2_slot_info *si)
139 s16 slot_num,
140 s16 node_num)
141{ 320{
142 BUG_ON(slot_num == OCFS2_INVALID_SLOT); 321 unsigned int i;
143 BUG_ON(slot_num >= si->si_num_slots); 322
144 BUG_ON((node_num != O2NM_INVALID_NODE_NUM) && 323 if (si == NULL)
145 (node_num >= O2NM_MAX_NODES)); 324 return;
325
326 if (si->si_inode)
327 iput(si->si_inode);
328 if (si->si_bh) {
329 for (i = 0; i < si->si_blocks; i++) {
330 if (si->si_bh[i]) {
331 brelse(si->si_bh[i]);
332 si->si_bh[i] = NULL;
333 }
334 }
335 kfree(si->si_bh);
336 }
146 337
147 si->si_global_node_nums[slot_num] = node_num; 338 kfree(si);
148} 339}
149 340
150void ocfs2_clear_slot(struct ocfs2_slot_info *si, 341int ocfs2_clear_slot(struct ocfs2_super *osb, int slot_num)
151 s16 slot_num)
152{ 342{
153 spin_lock(&si->si_lock); 343 struct ocfs2_slot_info *si = osb->slot_info;
154 __ocfs2_fill_slot(si, slot_num, OCFS2_INVALID_SLOT); 344
155 spin_unlock(&si->si_lock); 345 if (si == NULL)
346 return 0;
347
348 spin_lock(&osb->osb_lock);
349 ocfs2_invalidate_slot(si, slot_num);
350 spin_unlock(&osb->osb_lock);
351
352 return ocfs2_update_disk_slot(osb, osb->slot_info, slot_num);
156} 353}
157 354
158int ocfs2_init_slot_info(struct ocfs2_super *osb) 355static int ocfs2_map_slot_buffers(struct ocfs2_super *osb,
356 struct ocfs2_slot_info *si)
159{ 357{
160 int status, i; 358 int status = 0;
161 u64 blkno; 359 u64 blkno;
360 unsigned long long blocks, bytes;
361 unsigned int i;
362 struct buffer_head *bh;
363
364 status = ocfs2_slot_map_physical_size(osb, si->si_inode, &bytes);
365 if (status)
366 goto bail;
367
368 blocks = ocfs2_blocks_for_bytes(si->si_inode->i_sb, bytes);
369 BUG_ON(blocks > UINT_MAX);
370 si->si_blocks = blocks;
371 if (!si->si_blocks)
372 goto bail;
373
374 if (si->si_extended)
375 si->si_slots_per_block =
376 (osb->sb->s_blocksize /
377 sizeof(struct ocfs2_extended_slot));
378 else
379 si->si_slots_per_block = osb->sb->s_blocksize / sizeof(__le16);
380
381 /* The size checks above should ensure this */
382 BUG_ON((osb->max_slots / si->si_slots_per_block) > blocks);
383
384 mlog(0, "Slot map needs %u buffers for %llu bytes\n",
385 si->si_blocks, bytes);
386
387 si->si_bh = kzalloc(sizeof(struct buffer_head *) * si->si_blocks,
388 GFP_KERNEL);
389 if (!si->si_bh) {
390 status = -ENOMEM;
391 mlog_errno(status);
392 goto bail;
393 }
394
395 for (i = 0; i < si->si_blocks; i++) {
396 status = ocfs2_extent_map_get_blocks(si->si_inode, i,
397 &blkno, NULL, NULL);
398 if (status < 0) {
399 mlog_errno(status);
400 goto bail;
401 }
402
403 mlog(0, "Reading slot map block %u at %llu\n", i,
404 (unsigned long long)blkno);
405
406 bh = NULL; /* Acquire a fresh bh */
407 status = ocfs2_read_block(osb, blkno, &bh, 0, si->si_inode);
408 if (status < 0) {
409 mlog_errno(status);
410 goto bail;
411 }
412
413 si->si_bh[i] = bh;
414 }
415
416bail:
417 return status;
418}
419
420int ocfs2_init_slot_info(struct ocfs2_super *osb)
421{
422 int status;
162 struct inode *inode = NULL; 423 struct inode *inode = NULL;
163 struct buffer_head *bh = NULL;
164 struct ocfs2_slot_info *si; 424 struct ocfs2_slot_info *si;
165 425
166 si = kzalloc(sizeof(struct ocfs2_slot_info), GFP_KERNEL); 426 si = kzalloc(sizeof(struct ocfs2_slot_info) +
427 (sizeof(struct ocfs2_slot) * osb->max_slots),
428 GFP_KERNEL);
167 if (!si) { 429 if (!si) {
168 status = -ENOMEM; 430 status = -ENOMEM;
169 mlog_errno(status); 431 mlog_errno(status);
170 goto bail; 432 goto bail;
171 } 433 }
172 434
173 spin_lock_init(&si->si_lock); 435 si->si_extended = ocfs2_uses_extended_slot_map(osb);
174 si->si_num_slots = osb->max_slots; 436 si->si_num_slots = osb->max_slots;
175 si->si_size = OCFS2_MAX_SLOTS; 437 si->si_slots = (struct ocfs2_slot *)((char *)si +
176 438 sizeof(struct ocfs2_slot_info));
177 for(i = 0; i < si->si_num_slots; i++)
178 si->si_global_node_nums[i] = OCFS2_INVALID_SLOT;
179 439
180 inode = ocfs2_get_system_file_inode(osb, SLOT_MAP_SYSTEM_INODE, 440 inode = ocfs2_get_system_file_inode(osb, SLOT_MAP_SYSTEM_INODE,
181 OCFS2_INVALID_SLOT); 441 OCFS2_INVALID_SLOT);
@@ -185,61 +445,53 @@ int ocfs2_init_slot_info(struct ocfs2_super *osb)
185 goto bail; 445 goto bail;
186 } 446 }
187 447
188 status = ocfs2_extent_map_get_blocks(inode, 0ULL, &blkno, NULL, NULL); 448 si->si_inode = inode;
189 if (status < 0) { 449 status = ocfs2_map_slot_buffers(osb, si);
190 mlog_errno(status);
191 goto bail;
192 }
193
194 status = ocfs2_read_block(osb, blkno, &bh, 0, inode);
195 if (status < 0) { 450 if (status < 0) {
196 mlog_errno(status); 451 mlog_errno(status);
197 goto bail; 452 goto bail;
198 } 453 }
199 454
200 si->si_inode = inode; 455 osb->slot_info = (struct ocfs2_slot_info *)si;
201 si->si_bh = bh;
202 osb->slot_info = si;
203bail: 456bail:
204 if (status < 0 && si) 457 if (status < 0 && si)
205 ocfs2_free_slot_info(si); 458 __ocfs2_free_slot_info(si);
206 459
207 return status; 460 return status;
208} 461}
209 462
210void ocfs2_free_slot_info(struct ocfs2_slot_info *si) 463void ocfs2_free_slot_info(struct ocfs2_super *osb)
211{ 464{
212 if (si->si_inode) 465 struct ocfs2_slot_info *si = osb->slot_info;
213 iput(si->si_inode); 466
214 if (si->si_bh) 467 osb->slot_info = NULL;
215 brelse(si->si_bh); 468 __ocfs2_free_slot_info(si);
216 kfree(si);
217} 469}
218 470
219int ocfs2_find_slot(struct ocfs2_super *osb) 471int ocfs2_find_slot(struct ocfs2_super *osb)
220{ 472{
221 int status; 473 int status;
222 s16 slot; 474 int slot;
223 struct ocfs2_slot_info *si; 475 struct ocfs2_slot_info *si;
224 476
225 mlog_entry_void(); 477 mlog_entry_void();
226 478
227 si = osb->slot_info; 479 si = osb->slot_info;
228 480
481 spin_lock(&osb->osb_lock);
229 ocfs2_update_slot_info(si); 482 ocfs2_update_slot_info(si);
230 483
231 spin_lock(&si->si_lock);
232 /* search for ourselves first and take the slot if it already 484 /* search for ourselves first and take the slot if it already
233 * exists. Perhaps we need to mark this in a variable for our 485 * exists. Perhaps we need to mark this in a variable for our
234 * own journal recovery? Possibly not, though we certainly 486 * own journal recovery? Possibly not, though we certainly
235 * need to warn to the user */ 487 * need to warn to the user */
236 slot = __ocfs2_node_num_to_slot(si, osb->node_num); 488 slot = __ocfs2_node_num_to_slot(si, osb->node_num);
237 if (slot == OCFS2_INVALID_SLOT) { 489 if (slot < 0) {
238 /* if no slot yet, then just take 1st available 490 /* if no slot yet, then just take 1st available
239 * one. */ 491 * one. */
240 slot = __ocfs2_find_empty_slot(si, osb->preferred_slot); 492 slot = __ocfs2_find_empty_slot(si, osb->preferred_slot);
241 if (slot == OCFS2_INVALID_SLOT) { 493 if (slot < 0) {
242 spin_unlock(&si->si_lock); 494 spin_unlock(&osb->osb_lock);
243 mlog(ML_ERROR, "no free slots available!\n"); 495 mlog(ML_ERROR, "no free slots available!\n");
244 status = -EINVAL; 496 status = -EINVAL;
245 goto bail; 497 goto bail;
@@ -248,13 +500,13 @@ int ocfs2_find_slot(struct ocfs2_super *osb)
248 mlog(ML_NOTICE, "slot %d is already allocated to this node!\n", 500 mlog(ML_NOTICE, "slot %d is already allocated to this node!\n",
249 slot); 501 slot);
250 502
251 __ocfs2_fill_slot(si, slot, osb->node_num); 503 ocfs2_set_slot(si, slot, osb->node_num);
252 osb->slot_num = slot; 504 osb->slot_num = slot;
253 spin_unlock(&si->si_lock); 505 spin_unlock(&osb->osb_lock);
254 506
255 mlog(0, "taking node slot %d\n", osb->slot_num); 507 mlog(0, "taking node slot %d\n", osb->slot_num);
256 508
257 status = ocfs2_update_disk_slots(osb, si); 509 status = ocfs2_update_disk_slot(osb, si, osb->slot_num);
258 if (status < 0) 510 if (status < 0)
259 mlog_errno(status); 511 mlog_errno(status);
260 512
@@ -265,27 +517,27 @@ bail:
265 517
266void ocfs2_put_slot(struct ocfs2_super *osb) 518void ocfs2_put_slot(struct ocfs2_super *osb)
267{ 519{
268 int status; 520 int status, slot_num;
269 struct ocfs2_slot_info *si = osb->slot_info; 521 struct ocfs2_slot_info *si = osb->slot_info;
270 522
271 if (!si) 523 if (!si)
272 return; 524 return;
273 525
526 spin_lock(&osb->osb_lock);
274 ocfs2_update_slot_info(si); 527 ocfs2_update_slot_info(si);
275 528
276 spin_lock(&si->si_lock); 529 slot_num = osb->slot_num;
277 __ocfs2_fill_slot(si, osb->slot_num, OCFS2_INVALID_SLOT); 530 ocfs2_invalidate_slot(si, osb->slot_num);
278 osb->slot_num = OCFS2_INVALID_SLOT; 531 osb->slot_num = OCFS2_INVALID_SLOT;
279 spin_unlock(&si->si_lock); 532 spin_unlock(&osb->osb_lock);
280 533
281 status = ocfs2_update_disk_slots(osb, si); 534 status = ocfs2_update_disk_slot(osb, si, slot_num);
282 if (status < 0) { 535 if (status < 0) {
283 mlog_errno(status); 536 mlog_errno(status);
284 goto bail; 537 goto bail;
285 } 538 }
286 539
287bail: 540bail:
288 osb->slot_info = NULL; 541 ocfs2_free_slot_info(osb);
289 ocfs2_free_slot_info(si);
290} 542}
291 543
diff --git a/fs/ocfs2/slot_map.h b/fs/ocfs2/slot_map.h
index 1025872aaade..601c95fd7003 100644
--- a/fs/ocfs2/slot_map.h
+++ b/fs/ocfs2/slot_map.h
@@ -27,38 +27,18 @@
27#ifndef SLOTMAP_H 27#ifndef SLOTMAP_H
28#define SLOTMAP_H 28#define SLOTMAP_H
29 29
30struct ocfs2_slot_info {
31 spinlock_t si_lock;
32
33 struct inode *si_inode;
34 struct buffer_head *si_bh;
35 unsigned int si_num_slots;
36 unsigned int si_size;
37 s16 si_global_node_nums[OCFS2_MAX_SLOTS];
38};
39
40int ocfs2_init_slot_info(struct ocfs2_super *osb); 30int ocfs2_init_slot_info(struct ocfs2_super *osb);
41void ocfs2_free_slot_info(struct ocfs2_slot_info *si); 31void ocfs2_free_slot_info(struct ocfs2_super *osb);
42 32
43int ocfs2_find_slot(struct ocfs2_super *osb); 33int ocfs2_find_slot(struct ocfs2_super *osb);
44void ocfs2_put_slot(struct ocfs2_super *osb); 34void ocfs2_put_slot(struct ocfs2_super *osb);
45 35
46void ocfs2_update_slot_info(struct ocfs2_slot_info *si); 36int ocfs2_refresh_slot_info(struct ocfs2_super *osb);
47int ocfs2_update_disk_slots(struct ocfs2_super *osb,
48 struct ocfs2_slot_info *si);
49
50s16 ocfs2_node_num_to_slot(struct ocfs2_slot_info *si,
51 s16 global);
52void ocfs2_clear_slot(struct ocfs2_slot_info *si,
53 s16 slot_num);
54 37
55static inline int ocfs2_is_empty_slot(struct ocfs2_slot_info *si, 38int ocfs2_node_num_to_slot(struct ocfs2_super *osb, unsigned int node_num);
56 int slot_num) 39int ocfs2_slot_to_node_num_locked(struct ocfs2_super *osb, int slot_num,
57{ 40 unsigned int *node_num);
58 BUG_ON(slot_num == OCFS2_INVALID_SLOT);
59 assert_spin_locked(&si->si_lock);
60 41
61 return si->si_global_node_nums[slot_num] == OCFS2_INVALID_SLOT; 42int ocfs2_clear_slot(struct ocfs2_super *osb, int slot_num);
62}
63 43
64#endif 44#endif
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c
new file mode 100644
index 000000000000..ac1d74c63bf5
--- /dev/null
+++ b/fs/ocfs2/stack_o2cb.c
@@ -0,0 +1,420 @@
1/* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 *
4 * stack_o2cb.c
5 *
6 * Code which interfaces ocfs2 with the o2cb stack.
7 *
8 * Copyright (C) 2007 Oracle. All rights reserved.
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public
12 * License as published by the Free Software Foundation, version 2.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * General Public License for more details.
18 */
19
20#include <linux/crc32.h>
21#include <linux/module.h>
22
23/* Needed for AOP_TRUNCATED_PAGE in mlog_errno() */
24#include <linux/fs.h>
25
26#include "cluster/masklog.h"
27#include "cluster/nodemanager.h"
28#include "cluster/heartbeat.h"
29
30#include "stackglue.h"
31
32struct o2dlm_private {
33 struct dlm_eviction_cb op_eviction_cb;
34};
35
36static struct ocfs2_stack_plugin o2cb_stack;
37
38/* These should be identical */
39#if (DLM_LOCK_IV != LKM_IVMODE)
40# error Lock modes do not match
41#endif
42#if (DLM_LOCK_NL != LKM_NLMODE)
43# error Lock modes do not match
44#endif
45#if (DLM_LOCK_CR != LKM_CRMODE)
46# error Lock modes do not match
47#endif
48#if (DLM_LOCK_CW != LKM_CWMODE)
49# error Lock modes do not match
50#endif
51#if (DLM_LOCK_PR != LKM_PRMODE)
52# error Lock modes do not match
53#endif
54#if (DLM_LOCK_PW != LKM_PWMODE)
55# error Lock modes do not match
56#endif
57#if (DLM_LOCK_EX != LKM_EXMODE)
58# error Lock modes do not match
59#endif
60static inline int mode_to_o2dlm(int mode)
61{
62 BUG_ON(mode > LKM_MAXMODE);
63
64 return mode;
65}
66
67#define map_flag(_generic, _o2dlm) \
68 if (flags & (_generic)) { \
69 flags &= ~(_generic); \
70 o2dlm_flags |= (_o2dlm); \
71 }
72static int flags_to_o2dlm(u32 flags)
73{
74 int o2dlm_flags = 0;
75
76 map_flag(DLM_LKF_NOQUEUE, LKM_NOQUEUE);
77 map_flag(DLM_LKF_CANCEL, LKM_CANCEL);
78 map_flag(DLM_LKF_CONVERT, LKM_CONVERT);
79 map_flag(DLM_LKF_VALBLK, LKM_VALBLK);
80 map_flag(DLM_LKF_IVVALBLK, LKM_INVVALBLK);
81 map_flag(DLM_LKF_ORPHAN, LKM_ORPHAN);
82 map_flag(DLM_LKF_FORCEUNLOCK, LKM_FORCE);
83 map_flag(DLM_LKF_TIMEOUT, LKM_TIMEOUT);
84 map_flag(DLM_LKF_LOCAL, LKM_LOCAL);
85
86 /* map_flag() should have cleared every flag passed in */
87 BUG_ON(flags != 0);
88
89 return o2dlm_flags;
90}
91#undef map_flag
92
93/*
94 * Map an o2dlm status to standard errno values.
95 *
96 * o2dlm only uses a handful of these, and returns even fewer to the
97 * caller. Still, we try to assign sane values to each error.
98 *
99 * The following value pairs have special meanings to dlmglue, thus
100 * the right hand side needs to stay unique - never duplicate the
101 * mapping elsewhere in the table!
102 *
103 * DLM_NORMAL: 0
104 * DLM_NOTQUEUED: -EAGAIN
105 * DLM_CANCELGRANT: -EBUSY
106 * DLM_CANCEL: -DLM_ECANCEL
107 */
108/* Keep in sync with dlmapi.h */
109static int status_map[] = {
110 [DLM_NORMAL] = 0, /* Success */
111 [DLM_GRANTED] = -EINVAL,
112 [DLM_DENIED] = -EACCES,
113 [DLM_DENIED_NOLOCKS] = -EACCES,
114 [DLM_WORKING] = -EACCES,
115 [DLM_BLOCKED] = -EINVAL,
116 [DLM_BLOCKED_ORPHAN] = -EINVAL,
117 [DLM_DENIED_GRACE_PERIOD] = -EACCES,
118 [DLM_SYSERR] = -ENOMEM, /* It is what it is */
119 [DLM_NOSUPPORT] = -EPROTO,
120 [DLM_CANCELGRANT] = -EBUSY, /* Cancel after grant */
121 [DLM_IVLOCKID] = -EINVAL,
122 [DLM_SYNC] = -EINVAL,
123 [DLM_BADTYPE] = -EINVAL,
124 [DLM_BADRESOURCE] = -EINVAL,
125 [DLM_MAXHANDLES] = -ENOMEM,
126 [DLM_NOCLINFO] = -EINVAL,
127 [DLM_NOLOCKMGR] = -EINVAL,
128 [DLM_NOPURGED] = -EINVAL,
129 [DLM_BADARGS] = -EINVAL,
130 [DLM_VOID] = -EINVAL,
131 [DLM_NOTQUEUED] = -EAGAIN, /* Trylock failed */
132 [DLM_IVBUFLEN] = -EINVAL,
133 [DLM_CVTUNGRANT] = -EPERM,
134 [DLM_BADPARAM] = -EINVAL,
135 [DLM_VALNOTVALID] = -EINVAL,
136 [DLM_REJECTED] = -EPERM,
137 [DLM_ABORT] = -EINVAL,
138 [DLM_CANCEL] = -DLM_ECANCEL, /* Successful cancel */
139 [DLM_IVRESHANDLE] = -EINVAL,
140 [DLM_DEADLOCK] = -EDEADLK,
141 [DLM_DENIED_NOASTS] = -EINVAL,
142 [DLM_FORWARD] = -EINVAL,
143 [DLM_TIMEOUT] = -ETIMEDOUT,
144 [DLM_IVGROUPID] = -EINVAL,
145 [DLM_VERS_CONFLICT] = -EOPNOTSUPP,
146 [DLM_BAD_DEVICE_PATH] = -ENOENT,
147 [DLM_NO_DEVICE_PERMISSION] = -EPERM,
148 [DLM_NO_CONTROL_DEVICE] = -ENOENT,
149 [DLM_RECOVERING] = -ENOTCONN,
150 [DLM_MIGRATING] = -ERESTART,
151 [DLM_MAXSTATS] = -EINVAL,
152};
153
154static int dlm_status_to_errno(enum dlm_status status)
155{
156 BUG_ON(status > (sizeof(status_map) / sizeof(status_map[0])));
157
158 return status_map[status];
159}
160
161static void o2dlm_lock_ast_wrapper(void *astarg)
162{
163 BUG_ON(o2cb_stack.sp_proto == NULL);
164
165 o2cb_stack.sp_proto->lp_lock_ast(astarg);
166}
167
168static void o2dlm_blocking_ast_wrapper(void *astarg, int level)
169{
170 BUG_ON(o2cb_stack.sp_proto == NULL);
171
172 o2cb_stack.sp_proto->lp_blocking_ast(astarg, level);
173}
174
175static void o2dlm_unlock_ast_wrapper(void *astarg, enum dlm_status status)
176{
177 int error = dlm_status_to_errno(status);
178
179 BUG_ON(o2cb_stack.sp_proto == NULL);
180
181 /*
182 * In o2dlm, you can get both the lock_ast() for the lock being
183 * granted and the unlock_ast() for the CANCEL failing. A
184 * successful cancel sends DLM_NORMAL here. If the
185 * lock grant happened before the cancel arrived, you get
186 * DLM_CANCELGRANT.
187 *
188 * There's no need for the double-ast. If we see DLM_CANCELGRANT,
189 * we just ignore it. We expect the lock_ast() to handle the
190 * granted lock.
191 */
192 if (status == DLM_CANCELGRANT)
193 return;
194
195 o2cb_stack.sp_proto->lp_unlock_ast(astarg, error);
196}
197
198static int o2cb_dlm_lock(struct ocfs2_cluster_connection *conn,
199 int mode,
200 union ocfs2_dlm_lksb *lksb,
201 u32 flags,
202 void *name,
203 unsigned int namelen,
204 void *astarg)
205{
206 enum dlm_status status;
207 int o2dlm_mode = mode_to_o2dlm(mode);
208 int o2dlm_flags = flags_to_o2dlm(flags);
209 int ret;
210
211 status = dlmlock(conn->cc_lockspace, o2dlm_mode, &lksb->lksb_o2dlm,
212 o2dlm_flags, name, namelen,
213 o2dlm_lock_ast_wrapper, astarg,
214 o2dlm_blocking_ast_wrapper);
215 ret = dlm_status_to_errno(status);
216 return ret;
217}
218
219static int o2cb_dlm_unlock(struct ocfs2_cluster_connection *conn,
220 union ocfs2_dlm_lksb *lksb,
221 u32 flags,
222 void *astarg)
223{
224 enum dlm_status status;
225 int o2dlm_flags = flags_to_o2dlm(flags);
226 int ret;
227
228 status = dlmunlock(conn->cc_lockspace, &lksb->lksb_o2dlm,
229 o2dlm_flags, o2dlm_unlock_ast_wrapper, astarg);
230 ret = dlm_status_to_errno(status);
231 return ret;
232}
233
234static int o2cb_dlm_lock_status(union ocfs2_dlm_lksb *lksb)
235{
236 return dlm_status_to_errno(lksb->lksb_o2dlm.status);
237}
238
239static void *o2cb_dlm_lvb(union ocfs2_dlm_lksb *lksb)
240{
241 return (void *)(lksb->lksb_o2dlm.lvb);
242}
243
244static void o2cb_dump_lksb(union ocfs2_dlm_lksb *lksb)
245{
246 dlm_print_one_lock(lksb->lksb_o2dlm.lockid);
247}
248
249/*
250 * Called from the dlm when it's about to evict a node. This is how the
251 * classic stack signals node death.
252 */
253static void o2dlm_eviction_cb(int node_num, void *data)
254{
255 struct ocfs2_cluster_connection *conn = data;
256
257 mlog(ML_NOTICE, "o2dlm has evicted node %d from group %.*s\n",
258 node_num, conn->cc_namelen, conn->cc_name);
259
260 conn->cc_recovery_handler(node_num, conn->cc_recovery_data);
261}
262
263static int o2cb_cluster_connect(struct ocfs2_cluster_connection *conn)
264{
265 int rc = 0;
266 u32 dlm_key;
267 struct dlm_ctxt *dlm;
268 struct o2dlm_private *priv;
269 struct dlm_protocol_version dlm_version;
270
271 BUG_ON(conn == NULL);
272 BUG_ON(o2cb_stack.sp_proto == NULL);
273
274 /* for now we only have one cluster/node, make sure we see it
275 * in the heartbeat universe */
276 if (!o2hb_check_local_node_heartbeating()) {
277 rc = -EINVAL;
278 goto out;
279 }
280
281 priv = kzalloc(sizeof(struct o2dlm_private), GFP_KERNEL);
282 if (!priv) {
283 rc = -ENOMEM;
284 goto out_free;
285 }
286
287 /* This just fills the structure in. It is safe to pass conn. */
288 dlm_setup_eviction_cb(&priv->op_eviction_cb, o2dlm_eviction_cb,
289 conn);
290
291 conn->cc_private = priv;
292
293 /* used by the dlm code to make message headers unique, each
294 * node in this domain must agree on this. */
295 dlm_key = crc32_le(0, conn->cc_name, conn->cc_namelen);
296 dlm_version.pv_major = conn->cc_version.pv_major;
297 dlm_version.pv_minor = conn->cc_version.pv_minor;
298
299 dlm = dlm_register_domain(conn->cc_name, dlm_key, &dlm_version);
300 if (IS_ERR(dlm)) {
301 rc = PTR_ERR(dlm);
302 mlog_errno(rc);
303 goto out_free;
304 }
305
306 conn->cc_version.pv_major = dlm_version.pv_major;
307 conn->cc_version.pv_minor = dlm_version.pv_minor;
308 conn->cc_lockspace = dlm;
309
310 dlm_register_eviction_cb(dlm, &priv->op_eviction_cb);
311
312out_free:
313 if (rc && conn->cc_private)
314 kfree(conn->cc_private);
315
316out:
317 return rc;
318}
319
320static int o2cb_cluster_disconnect(struct ocfs2_cluster_connection *conn,
321 int hangup_pending)
322{
323 struct dlm_ctxt *dlm = conn->cc_lockspace;
324 struct o2dlm_private *priv = conn->cc_private;
325
326 dlm_unregister_eviction_cb(&priv->op_eviction_cb);
327 conn->cc_private = NULL;
328 kfree(priv);
329
330 dlm_unregister_domain(dlm);
331 conn->cc_lockspace = NULL;
332
333 return 0;
334}
335
336static void o2hb_stop(const char *group)
337{
338 int ret;
339 char *argv[5], *envp[3];
340
341 argv[0] = (char *)o2nm_get_hb_ctl_path();
342 argv[1] = "-K";
343 argv[2] = "-u";
344 argv[3] = (char *)group;
345 argv[4] = NULL;
346
347 mlog(0, "Run: %s %s %s %s\n", argv[0], argv[1], argv[2], argv[3]);
348
349 /* minimal command environment taken from cpu_run_sbin_hotplug */
350 envp[0] = "HOME=/";
351 envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
352 envp[2] = NULL;
353
354 ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
355 if (ret < 0)
356 mlog_errno(ret);
357}
358
359/*
360 * Hangup is a hack for tools compatibility. Older ocfs2-tools software
361 * expects the filesystem to call "ocfs2_hb_ctl" during unmount. This
362 * happens regardless of whether the DLM got started, so we can't do it
363 * in ocfs2_cluster_disconnect(). We bring the o2hb_stop() function into
364 * the glue and provide a "hangup" API for super.c to call.
365 *
366 * Other stacks will eventually provide a NULL ->hangup() pointer.
367 */
368static void o2cb_cluster_hangup(const char *group, int grouplen)
369{
370 o2hb_stop(group);
371}
372
373static int o2cb_cluster_this_node(unsigned int *node)
374{
375 int node_num;
376
377 node_num = o2nm_this_node();
378 if (node_num == O2NM_INVALID_NODE_NUM)
379 return -ENOENT;
380
381 if (node_num >= O2NM_MAX_NODES)
382 return -EOVERFLOW;
383
384 *node = node_num;
385 return 0;
386}
387
388struct ocfs2_stack_operations o2cb_stack_ops = {
389 .connect = o2cb_cluster_connect,
390 .disconnect = o2cb_cluster_disconnect,
391 .hangup = o2cb_cluster_hangup,
392 .this_node = o2cb_cluster_this_node,
393 .dlm_lock = o2cb_dlm_lock,
394 .dlm_unlock = o2cb_dlm_unlock,
395 .lock_status = o2cb_dlm_lock_status,
396 .lock_lvb = o2cb_dlm_lvb,
397 .dump_lksb = o2cb_dump_lksb,
398};
399
400static struct ocfs2_stack_plugin o2cb_stack = {
401 .sp_name = "o2cb",
402 .sp_ops = &o2cb_stack_ops,
403 .sp_owner = THIS_MODULE,
404};
405
406static int __init o2cb_stack_init(void)
407{
408 return ocfs2_stack_glue_register(&o2cb_stack);
409}
410
411static void __exit o2cb_stack_exit(void)
412{
413 ocfs2_stack_glue_unregister(&o2cb_stack);
414}
415
416MODULE_AUTHOR("Oracle");
417MODULE_DESCRIPTION("ocfs2 driver for the classic o2cb stack");
418MODULE_LICENSE("GPL");
419module_init(o2cb_stack_init);
420module_exit(o2cb_stack_exit);
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
new file mode 100644
index 000000000000..7428663f9cbb
--- /dev/null
+++ b/fs/ocfs2/stack_user.c
@@ -0,0 +1,883 @@
1/* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 *
4 * stack_user.c
5 *
6 * Code which interfaces ocfs2 with fs/dlm and a userspace stack.
7 *
8 * Copyright (C) 2007 Oracle. All rights reserved.
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public
12 * License as published by the Free Software Foundation, version 2.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * General Public License for more details.
18 */
19
20#include <linux/module.h>
21#include <linux/fs.h>
22#include <linux/miscdevice.h>
23#include <linux/mutex.h>
24#include <linux/reboot.h>
25#include <asm/uaccess.h>
26
27#include "ocfs2.h" /* For struct ocfs2_lock_res */
28#include "stackglue.h"
29
30
31/*
32 * The control protocol starts with a handshake. Until the handshake
33 * is complete, the control device will fail all write(2)s.
34 *
35 * The handshake is simple. First, the client reads until EOF. Each line
36 * of output is a supported protocol tag. All protocol tags are a single
37 * character followed by a two hex digit version number. Currently the
38 * only things supported is T01, for "Text-base version 0x01". Next, the
39 * client writes the version they would like to use, including the newline.
40 * Thus, the protocol tag is 'T01\n'. If the version tag written is
41 * unknown, -EINVAL is returned. Once the negotiation is complete, the
42 * client can start sending messages.
43 *
44 * The T01 protocol has three messages. First is the "SETN" message.
45 * It has the following syntax:
46 *
47 * SETN<space><8-char-hex-nodenum><newline>
48 *
49 * This is 14 characters.
50 *
51 * The "SETN" message must be the first message following the protocol.
52 * It tells ocfs2_control the local node number.
53 *
54 * Next comes the "SETV" message. It has the following syntax:
55 *
56 * SETV<space><2-char-hex-major><space><2-char-hex-minor><newline>
57 *
58 * This is 11 characters.
59 *
60 * The "SETV" message sets the filesystem locking protocol version as
61 * negotiated by the client. The client negotiates based on the maximum
62 * version advertised in /sys/fs/ocfs2/max_locking_protocol. The major
63 * number from the "SETV" message must match
64 * user_stack.sp_proto->lp_max_version.pv_major, and the minor number
65 * must be less than or equal to ...->lp_max_version.pv_minor.
66 *
67 * Once this information has been set, mounts will be allowed. From this
68 * point on, the "DOWN" message can be sent for node down notification.
69 * It has the following syntax:
70 *
71 * DOWN<space><32-char-cap-hex-uuid><space><8-char-hex-nodenum><newline>
72 *
73 * eg:
74 *
75 * DOWN 632A924FDD844190BDA93C0DF6B94899 00000001\n
76 *
77 * This is 47 characters.
78 */
79
80/*
81 * Whether or not the client has done the handshake.
82 * For now, we have just one protocol version.
83 */
84#define OCFS2_CONTROL_PROTO "T01\n"
85#define OCFS2_CONTROL_PROTO_LEN 4
86
87/* Handshake states */
88#define OCFS2_CONTROL_HANDSHAKE_INVALID (0)
89#define OCFS2_CONTROL_HANDSHAKE_READ (1)
90#define OCFS2_CONTROL_HANDSHAKE_PROTOCOL (2)
91#define OCFS2_CONTROL_HANDSHAKE_VALID (3)
92
93/* Messages */
94#define OCFS2_CONTROL_MESSAGE_OP_LEN 4
95#define OCFS2_CONTROL_MESSAGE_SETNODE_OP "SETN"
96#define OCFS2_CONTROL_MESSAGE_SETNODE_TOTAL_LEN 14
97#define OCFS2_CONTROL_MESSAGE_SETVERSION_OP "SETV"
98#define OCFS2_CONTROL_MESSAGE_SETVERSION_TOTAL_LEN 11
99#define OCFS2_CONTROL_MESSAGE_DOWN_OP "DOWN"
100#define OCFS2_CONTROL_MESSAGE_DOWN_TOTAL_LEN 47
101#define OCFS2_TEXT_UUID_LEN 32
102#define OCFS2_CONTROL_MESSAGE_VERNUM_LEN 2
103#define OCFS2_CONTROL_MESSAGE_NODENUM_LEN 8
104
105/*
106 * ocfs2_live_connection is refcounted because the filesystem and
107 * miscdevice sides can detach in different order. Let's just be safe.
108 */
109struct ocfs2_live_connection {
110 struct list_head oc_list;
111 struct ocfs2_cluster_connection *oc_conn;
112};
113
114struct ocfs2_control_private {
115 struct list_head op_list;
116 int op_state;
117 int op_this_node;
118 struct ocfs2_protocol_version op_proto;
119};
120
121/* SETN<space><8-char-hex-nodenum><newline> */
122struct ocfs2_control_message_setn {
123 char tag[OCFS2_CONTROL_MESSAGE_OP_LEN];
124 char space;
125 char nodestr[OCFS2_CONTROL_MESSAGE_NODENUM_LEN];
126 char newline;
127};
128
129/* SETV<space><2-char-hex-major><space><2-char-hex-minor><newline> */
130struct ocfs2_control_message_setv {
131 char tag[OCFS2_CONTROL_MESSAGE_OP_LEN];
132 char space1;
133 char major[OCFS2_CONTROL_MESSAGE_VERNUM_LEN];
134 char space2;
135 char minor[OCFS2_CONTROL_MESSAGE_VERNUM_LEN];
136 char newline;
137};
138
139/* DOWN<space><32-char-cap-hex-uuid><space><8-char-hex-nodenum><newline> */
140struct ocfs2_control_message_down {
141 char tag[OCFS2_CONTROL_MESSAGE_OP_LEN];
142 char space1;
143 char uuid[OCFS2_TEXT_UUID_LEN];
144 char space2;
145 char nodestr[OCFS2_CONTROL_MESSAGE_NODENUM_LEN];
146 char newline;
147};
148
149union ocfs2_control_message {
150 char tag[OCFS2_CONTROL_MESSAGE_OP_LEN];
151 struct ocfs2_control_message_setn u_setn;
152 struct ocfs2_control_message_setv u_setv;
153 struct ocfs2_control_message_down u_down;
154};
155
156static struct ocfs2_stack_plugin user_stack;
157
158static atomic_t ocfs2_control_opened;
159static int ocfs2_control_this_node = -1;
160static struct ocfs2_protocol_version running_proto;
161
162static LIST_HEAD(ocfs2_live_connection_list);
163static LIST_HEAD(ocfs2_control_private_list);
164static DEFINE_MUTEX(ocfs2_control_lock);
165
166static inline void ocfs2_control_set_handshake_state(struct file *file,
167 int state)
168{
169 struct ocfs2_control_private *p = file->private_data;
170 p->op_state = state;
171}
172
173static inline int ocfs2_control_get_handshake_state(struct file *file)
174{
175 struct ocfs2_control_private *p = file->private_data;
176 return p->op_state;
177}
178
179static struct ocfs2_live_connection *ocfs2_connection_find(const char *name)
180{
181 size_t len = strlen(name);
182 struct ocfs2_live_connection *c;
183
184 BUG_ON(!mutex_is_locked(&ocfs2_control_lock));
185
186 list_for_each_entry(c, &ocfs2_live_connection_list, oc_list) {
187 if ((c->oc_conn->cc_namelen == len) &&
188 !strncmp(c->oc_conn->cc_name, name, len))
189 return c;
190 }
191
192 return c;
193}
194
195/*
196 * ocfs2_live_connection structures are created underneath the ocfs2
197 * mount path. Since the VFS prevents multiple calls to
198 * fill_super(), we can't get dupes here.
199 */
200static int ocfs2_live_connection_new(struct ocfs2_cluster_connection *conn,
201 struct ocfs2_live_connection **c_ret)
202{
203 int rc = 0;
204 struct ocfs2_live_connection *c;
205
206 c = kzalloc(sizeof(struct ocfs2_live_connection), GFP_KERNEL);
207 if (!c)
208 return -ENOMEM;
209
210 mutex_lock(&ocfs2_control_lock);
211 c->oc_conn = conn;
212
213 if (atomic_read(&ocfs2_control_opened))
214 list_add(&c->oc_list, &ocfs2_live_connection_list);
215 else {
216 printk(KERN_ERR
217 "ocfs2: Userspace control daemon is not present\n");
218 rc = -ESRCH;
219 }
220
221 mutex_unlock(&ocfs2_control_lock);
222
223 if (!rc)
224 *c_ret = c;
225 else
226 kfree(c);
227
228 return rc;
229}
230
231/*
232 * This function disconnects the cluster connection from ocfs2_control.
233 * Afterwards, userspace can't affect the cluster connection.
234 */
235static void ocfs2_live_connection_drop(struct ocfs2_live_connection *c)
236{
237 mutex_lock(&ocfs2_control_lock);
238 list_del_init(&c->oc_list);
239 c->oc_conn = NULL;
240 mutex_unlock(&ocfs2_control_lock);
241
242 kfree(c);
243}
244
245static int ocfs2_control_cfu(void *target, size_t target_len,
246 const char __user *buf, size_t count)
247{
248 /* The T01 expects write(2) calls to have exactly one command */
249 if ((count != target_len) ||
250 (count > sizeof(union ocfs2_control_message)))
251 return -EINVAL;
252
253 if (copy_from_user(target, buf, target_len))
254 return -EFAULT;
255
256 return 0;
257}
258
259static ssize_t ocfs2_control_validate_protocol(struct file *file,
260 const char __user *buf,
261 size_t count)
262{
263 ssize_t ret;
264 char kbuf[OCFS2_CONTROL_PROTO_LEN];
265
266 ret = ocfs2_control_cfu(kbuf, OCFS2_CONTROL_PROTO_LEN,
267 buf, count);
268 if (ret)
269 return ret;
270
271 if (strncmp(kbuf, OCFS2_CONTROL_PROTO, OCFS2_CONTROL_PROTO_LEN))
272 return -EINVAL;
273
274 ocfs2_control_set_handshake_state(file,
275 OCFS2_CONTROL_HANDSHAKE_PROTOCOL);
276
277 return count;
278}
279
280static void ocfs2_control_send_down(const char *uuid,
281 int nodenum)
282{
283 struct ocfs2_live_connection *c;
284
285 mutex_lock(&ocfs2_control_lock);
286
287 c = ocfs2_connection_find(uuid);
288 if (c) {
289 BUG_ON(c->oc_conn == NULL);
290 c->oc_conn->cc_recovery_handler(nodenum,
291 c->oc_conn->cc_recovery_data);
292 }
293
294 mutex_unlock(&ocfs2_control_lock);
295}
296
297/*
298 * Called whenever configuration elements are sent to /dev/ocfs2_control.
299 * If all configuration elements are present, try to set the global
300 * values. If there is a problem, return an error. Skip any missing
301 * elements, and only bump ocfs2_control_opened when we have all elements
302 * and are successful.
303 */
304static int ocfs2_control_install_private(struct file *file)
305{
306 int rc = 0;
307 int set_p = 1;
308 struct ocfs2_control_private *p = file->private_data;
309
310 BUG_ON(p->op_state != OCFS2_CONTROL_HANDSHAKE_PROTOCOL);
311
312 mutex_lock(&ocfs2_control_lock);
313
314 if (p->op_this_node < 0) {
315 set_p = 0;
316 } else if ((ocfs2_control_this_node >= 0) &&
317 (ocfs2_control_this_node != p->op_this_node)) {
318 rc = -EINVAL;
319 goto out_unlock;
320 }
321
322 if (!p->op_proto.pv_major) {
323 set_p = 0;
324 } else if (!list_empty(&ocfs2_live_connection_list) &&
325 ((running_proto.pv_major != p->op_proto.pv_major) ||
326 (running_proto.pv_minor != p->op_proto.pv_minor))) {
327 rc = -EINVAL;
328 goto out_unlock;
329 }
330
331 if (set_p) {
332 ocfs2_control_this_node = p->op_this_node;
333 running_proto.pv_major = p->op_proto.pv_major;
334 running_proto.pv_minor = p->op_proto.pv_minor;
335 }
336
337out_unlock:
338 mutex_unlock(&ocfs2_control_lock);
339
340 if (!rc && set_p) {
341 /* We set the global values successfully */
342 atomic_inc(&ocfs2_control_opened);
343 ocfs2_control_set_handshake_state(file,
344 OCFS2_CONTROL_HANDSHAKE_VALID);
345 }
346
347 return rc;
348}
349
350static int ocfs2_control_get_this_node(void)
351{
352 int rc;
353
354 mutex_lock(&ocfs2_control_lock);
355 if (ocfs2_control_this_node < 0)
356 rc = -EINVAL;
357 else
358 rc = ocfs2_control_this_node;
359 mutex_unlock(&ocfs2_control_lock);
360
361 return rc;
362}
363
364static int ocfs2_control_do_setnode_msg(struct file *file,
365 struct ocfs2_control_message_setn *msg)
366{
367 long nodenum;
368 char *ptr = NULL;
369 struct ocfs2_control_private *p = file->private_data;
370
371 if (ocfs2_control_get_handshake_state(file) !=
372 OCFS2_CONTROL_HANDSHAKE_PROTOCOL)
373 return -EINVAL;
374
375 if (strncmp(msg->tag, OCFS2_CONTROL_MESSAGE_SETNODE_OP,
376 OCFS2_CONTROL_MESSAGE_OP_LEN))
377 return -EINVAL;
378
379 if ((msg->space != ' ') || (msg->newline != '\n'))
380 return -EINVAL;
381 msg->space = msg->newline = '\0';
382
383 nodenum = simple_strtol(msg->nodestr, &ptr, 16);
384 if (!ptr || *ptr)
385 return -EINVAL;
386
387 if ((nodenum == LONG_MIN) || (nodenum == LONG_MAX) ||
388 (nodenum > INT_MAX) || (nodenum < 0))
389 return -ERANGE;
390 p->op_this_node = nodenum;
391
392 return ocfs2_control_install_private(file);
393}
394
395static int ocfs2_control_do_setversion_msg(struct file *file,
396 struct ocfs2_control_message_setv *msg)
397 {
398 long major, minor;
399 char *ptr = NULL;
400 struct ocfs2_control_private *p = file->private_data;
401 struct ocfs2_protocol_version *max =
402 &user_stack.sp_proto->lp_max_version;
403
404 if (ocfs2_control_get_handshake_state(file) !=
405 OCFS2_CONTROL_HANDSHAKE_PROTOCOL)
406 return -EINVAL;
407
408 if (strncmp(msg->tag, OCFS2_CONTROL_MESSAGE_SETVERSION_OP,
409 OCFS2_CONTROL_MESSAGE_OP_LEN))
410 return -EINVAL;
411
412 if ((msg->space1 != ' ') || (msg->space2 != ' ') ||
413 (msg->newline != '\n'))
414 return -EINVAL;
415 msg->space1 = msg->space2 = msg->newline = '\0';
416
417 major = simple_strtol(msg->major, &ptr, 16);
418 if (!ptr || *ptr)
419 return -EINVAL;
420 minor = simple_strtol(msg->minor, &ptr, 16);
421 if (!ptr || *ptr)
422 return -EINVAL;
423
424 /*
425 * The major must be between 1 and 255, inclusive. The minor
426 * must be between 0 and 255, inclusive. The version passed in
427 * must be within the maximum version supported by the filesystem.
428 */
429 if ((major == LONG_MIN) || (major == LONG_MAX) ||
430 (major > (u8)-1) || (major < 1))
431 return -ERANGE;
432 if ((minor == LONG_MIN) || (minor == LONG_MAX) ||
433 (minor > (u8)-1) || (minor < 0))
434 return -ERANGE;
435 if ((major != max->pv_major) ||
436 (minor > max->pv_minor))
437 return -EINVAL;
438
439 p->op_proto.pv_major = major;
440 p->op_proto.pv_minor = minor;
441
442 return ocfs2_control_install_private(file);
443}
444
445static int ocfs2_control_do_down_msg(struct file *file,
446 struct ocfs2_control_message_down *msg)
447{
448 long nodenum;
449 char *p = NULL;
450
451 if (ocfs2_control_get_handshake_state(file) !=
452 OCFS2_CONTROL_HANDSHAKE_VALID)
453 return -EINVAL;
454
455 if (strncmp(msg->tag, OCFS2_CONTROL_MESSAGE_DOWN_OP,
456 OCFS2_CONTROL_MESSAGE_OP_LEN))
457 return -EINVAL;
458
459 if ((msg->space1 != ' ') || (msg->space2 != ' ') ||
460 (msg->newline != '\n'))
461 return -EINVAL;
462 msg->space1 = msg->space2 = msg->newline = '\0';
463
464 nodenum = simple_strtol(msg->nodestr, &p, 16);
465 if (!p || *p)
466 return -EINVAL;
467
468 if ((nodenum == LONG_MIN) || (nodenum == LONG_MAX) ||
469 (nodenum > INT_MAX) || (nodenum < 0))
470 return -ERANGE;
471
472 ocfs2_control_send_down(msg->uuid, nodenum);
473
474 return 0;
475}
476
477static ssize_t ocfs2_control_message(struct file *file,
478 const char __user *buf,
479 size_t count)
480{
481 ssize_t ret;
482 union ocfs2_control_message msg;
483
484 /* Try to catch padding issues */
485 WARN_ON(offsetof(struct ocfs2_control_message_down, uuid) !=
486 (sizeof(msg.u_down.tag) + sizeof(msg.u_down.space1)));
487
488 memset(&msg, 0, sizeof(union ocfs2_control_message));
489 ret = ocfs2_control_cfu(&msg, count, buf, count);
490 if (ret)
491 goto out;
492
493 if ((count == OCFS2_CONTROL_MESSAGE_SETNODE_TOTAL_LEN) &&
494 !strncmp(msg.tag, OCFS2_CONTROL_MESSAGE_SETNODE_OP,
495 OCFS2_CONTROL_MESSAGE_OP_LEN))
496 ret = ocfs2_control_do_setnode_msg(file, &msg.u_setn);
497 else if ((count == OCFS2_CONTROL_MESSAGE_SETVERSION_TOTAL_LEN) &&
498 !strncmp(msg.tag, OCFS2_CONTROL_MESSAGE_SETVERSION_OP,
499 OCFS2_CONTROL_MESSAGE_OP_LEN))
500 ret = ocfs2_control_do_setversion_msg(file, &msg.u_setv);
501 else if ((count == OCFS2_CONTROL_MESSAGE_DOWN_TOTAL_LEN) &&
502 !strncmp(msg.tag, OCFS2_CONTROL_MESSAGE_DOWN_OP,
503 OCFS2_CONTROL_MESSAGE_OP_LEN))
504 ret = ocfs2_control_do_down_msg(file, &msg.u_down);
505 else
506 ret = -EINVAL;
507
508out:
509 return ret ? ret : count;
510}
511
512static ssize_t ocfs2_control_write(struct file *file,
513 const char __user *buf,
514 size_t count,
515 loff_t *ppos)
516{
517 ssize_t ret;
518
519 switch (ocfs2_control_get_handshake_state(file)) {
520 case OCFS2_CONTROL_HANDSHAKE_INVALID:
521 ret = -EINVAL;
522 break;
523
524 case OCFS2_CONTROL_HANDSHAKE_READ:
525 ret = ocfs2_control_validate_protocol(file, buf,
526 count);
527 break;
528
529 case OCFS2_CONTROL_HANDSHAKE_PROTOCOL:
530 case OCFS2_CONTROL_HANDSHAKE_VALID:
531 ret = ocfs2_control_message(file, buf, count);
532 break;
533
534 default:
535 BUG();
536 ret = -EIO;
537 break;
538 }
539
540 return ret;
541}
542
543/*
544 * This is a naive version. If we ever have a new protocol, we'll expand
545 * it. Probably using seq_file.
546 */
547static ssize_t ocfs2_control_read(struct file *file,
548 char __user *buf,
549 size_t count,
550 loff_t *ppos)
551{
552 char *proto_string = OCFS2_CONTROL_PROTO;
553 size_t to_write = 0;
554
555 if (*ppos >= OCFS2_CONTROL_PROTO_LEN)
556 return 0;
557
558 to_write = OCFS2_CONTROL_PROTO_LEN - *ppos;
559 if (to_write > count)
560 to_write = count;
561 if (copy_to_user(buf, proto_string + *ppos, to_write))
562 return -EFAULT;
563
564 *ppos += to_write;
565
566 /* Have we read the whole protocol list? */
567 if (*ppos >= OCFS2_CONTROL_PROTO_LEN)
568 ocfs2_control_set_handshake_state(file,
569 OCFS2_CONTROL_HANDSHAKE_READ);
570
571 return to_write;
572}
573
574static int ocfs2_control_release(struct inode *inode, struct file *file)
575{
576 struct ocfs2_control_private *p = file->private_data;
577
578 mutex_lock(&ocfs2_control_lock);
579
580 if (ocfs2_control_get_handshake_state(file) !=
581 OCFS2_CONTROL_HANDSHAKE_VALID)
582 goto out;
583
584 if (atomic_dec_and_test(&ocfs2_control_opened)) {
585 if (!list_empty(&ocfs2_live_connection_list)) {
586 /* XXX: Do bad things! */
587 printk(KERN_ERR
588 "ocfs2: Unexpected release of ocfs2_control!\n"
589 " Loss of cluster connection requires "
590 "an emergency restart!\n");
591 emergency_restart();
592 }
593 /*
594 * Last valid close clears the node number and resets
595 * the locking protocol version
596 */
597 ocfs2_control_this_node = -1;
598 running_proto.pv_major = 0;
599 running_proto.pv_major = 0;
600 }
601
602out:
603 list_del_init(&p->op_list);
604 file->private_data = NULL;
605
606 mutex_unlock(&ocfs2_control_lock);
607
608 kfree(p);
609
610 return 0;
611}
612
613static int ocfs2_control_open(struct inode *inode, struct file *file)
614{
615 struct ocfs2_control_private *p;
616
617 p = kzalloc(sizeof(struct ocfs2_control_private), GFP_KERNEL);
618 if (!p)
619 return -ENOMEM;
620 p->op_this_node = -1;
621
622 mutex_lock(&ocfs2_control_lock);
623 file->private_data = p;
624 list_add(&p->op_list, &ocfs2_control_private_list);
625 mutex_unlock(&ocfs2_control_lock);
626
627 return 0;
628}
629
630static const struct file_operations ocfs2_control_fops = {
631 .open = ocfs2_control_open,
632 .release = ocfs2_control_release,
633 .read = ocfs2_control_read,
634 .write = ocfs2_control_write,
635 .owner = THIS_MODULE,
636};
637
638struct miscdevice ocfs2_control_device = {
639 .minor = MISC_DYNAMIC_MINOR,
640 .name = "ocfs2_control",
641 .fops = &ocfs2_control_fops,
642};
643
644static int ocfs2_control_init(void)
645{
646 int rc;
647
648 atomic_set(&ocfs2_control_opened, 0);
649
650 rc = misc_register(&ocfs2_control_device);
651 if (rc)
652 printk(KERN_ERR
653 "ocfs2: Unable to register ocfs2_control device "
654 "(errno %d)\n",
655 -rc);
656
657 return rc;
658}
659
660static void ocfs2_control_exit(void)
661{
662 int rc;
663
664 rc = misc_deregister(&ocfs2_control_device);
665 if (rc)
666 printk(KERN_ERR
667 "ocfs2: Unable to deregister ocfs2_control device "
668 "(errno %d)\n",
669 -rc);
670}
671
672static struct dlm_lksb *fsdlm_astarg_to_lksb(void *astarg)
673{
674 struct ocfs2_lock_res *res = astarg;
675 return &res->l_lksb.lksb_fsdlm;
676}
677
678static void fsdlm_lock_ast_wrapper(void *astarg)
679{
680 struct dlm_lksb *lksb = fsdlm_astarg_to_lksb(astarg);
681 int status = lksb->sb_status;
682
683 BUG_ON(user_stack.sp_proto == NULL);
684
685 /*
686 * For now we're punting on the issue of other non-standard errors
687 * where we can't tell if the unlock_ast or lock_ast should be called.
688 * The main "other error" that's possible is EINVAL which means the
689 * function was called with invalid args, which shouldn't be possible
690 * since the caller here is under our control. Other non-standard
691 * errors probably fall into the same category, or otherwise are fatal
692 * which means we can't carry on anyway.
693 */
694
695 if (status == -DLM_EUNLOCK || status == -DLM_ECANCEL)
696 user_stack.sp_proto->lp_unlock_ast(astarg, 0);
697 else
698 user_stack.sp_proto->lp_lock_ast(astarg);
699}
700
701static void fsdlm_blocking_ast_wrapper(void *astarg, int level)
702{
703 BUG_ON(user_stack.sp_proto == NULL);
704
705 user_stack.sp_proto->lp_blocking_ast(astarg, level);
706}
707
708static int user_dlm_lock(struct ocfs2_cluster_connection *conn,
709 int mode,
710 union ocfs2_dlm_lksb *lksb,
711 u32 flags,
712 void *name,
713 unsigned int namelen,
714 void *astarg)
715{
716 int ret;
717
718 if (!lksb->lksb_fsdlm.sb_lvbptr)
719 lksb->lksb_fsdlm.sb_lvbptr = (char *)lksb +
720 sizeof(struct dlm_lksb);
721
722 ret = dlm_lock(conn->cc_lockspace, mode, &lksb->lksb_fsdlm,
723 flags|DLM_LKF_NODLCKWT, name, namelen, 0,
724 fsdlm_lock_ast_wrapper, astarg,
725 fsdlm_blocking_ast_wrapper);
726 return ret;
727}
728
729static int user_dlm_unlock(struct ocfs2_cluster_connection *conn,
730 union ocfs2_dlm_lksb *lksb,
731 u32 flags,
732 void *astarg)
733{
734 int ret;
735
736 ret = dlm_unlock(conn->cc_lockspace, lksb->lksb_fsdlm.sb_lkid,
737 flags, &lksb->lksb_fsdlm, astarg);
738 return ret;
739}
740
741static int user_dlm_lock_status(union ocfs2_dlm_lksb *lksb)
742{
743 return lksb->lksb_fsdlm.sb_status;
744}
745
746static void *user_dlm_lvb(union ocfs2_dlm_lksb *lksb)
747{
748 return (void *)(lksb->lksb_fsdlm.sb_lvbptr);
749}
750
751static void user_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb)
752{
753}
754
755/*
756 * Compare a requested locking protocol version against the current one.
757 *
758 * If the major numbers are different, they are incompatible.
759 * If the current minor is greater than the request, they are incompatible.
760 * If the current minor is less than or equal to the request, they are
761 * compatible, and the requester should run at the current minor version.
762 */
763static int fs_protocol_compare(struct ocfs2_protocol_version *existing,
764 struct ocfs2_protocol_version *request)
765{
766 if (existing->pv_major != request->pv_major)
767 return 1;
768
769 if (existing->pv_minor > request->pv_minor)
770 return 1;
771
772 if (existing->pv_minor < request->pv_minor)
773 request->pv_minor = existing->pv_minor;
774
775 return 0;
776}
777
778static int user_cluster_connect(struct ocfs2_cluster_connection *conn)
779{
780 dlm_lockspace_t *fsdlm;
781 struct ocfs2_live_connection *control;
782 int rc = 0;
783
784 BUG_ON(conn == NULL);
785
786 rc = ocfs2_live_connection_new(conn, &control);
787 if (rc)
788 goto out;
789
790 /*
791 * running_proto must have been set before we allowed any mounts
792 * to proceed.
793 */
794 if (fs_protocol_compare(&running_proto, &conn->cc_version)) {
795 printk(KERN_ERR
796 "Unable to mount with fs locking protocol version "
797 "%u.%u because the userspace control daemon has "
798 "negotiated %u.%u\n",
799 conn->cc_version.pv_major, conn->cc_version.pv_minor,
800 running_proto.pv_major, running_proto.pv_minor);
801 rc = -EPROTO;
802 ocfs2_live_connection_drop(control);
803 goto out;
804 }
805
806 rc = dlm_new_lockspace(conn->cc_name, strlen(conn->cc_name),
807 &fsdlm, DLM_LSFL_FS, DLM_LVB_LEN);
808 if (rc) {
809 ocfs2_live_connection_drop(control);
810 goto out;
811 }
812
813 conn->cc_private = control;
814 conn->cc_lockspace = fsdlm;
815out:
816 return rc;
817}
818
819static int user_cluster_disconnect(struct ocfs2_cluster_connection *conn,
820 int hangup_pending)
821{
822 dlm_release_lockspace(conn->cc_lockspace, 2);
823 conn->cc_lockspace = NULL;
824 ocfs2_live_connection_drop(conn->cc_private);
825 conn->cc_private = NULL;
826 return 0;
827}
828
829static int user_cluster_this_node(unsigned int *this_node)
830{
831 int rc;
832
833 rc = ocfs2_control_get_this_node();
834 if (rc < 0)
835 return rc;
836
837 *this_node = rc;
838 return 0;
839}
840
841static struct ocfs2_stack_operations user_stack_ops = {
842 .connect = user_cluster_connect,
843 .disconnect = user_cluster_disconnect,
844 .this_node = user_cluster_this_node,
845 .dlm_lock = user_dlm_lock,
846 .dlm_unlock = user_dlm_unlock,
847 .lock_status = user_dlm_lock_status,
848 .lock_lvb = user_dlm_lvb,
849 .dump_lksb = user_dlm_dump_lksb,
850};
851
852static struct ocfs2_stack_plugin user_stack = {
853 .sp_name = "user",
854 .sp_ops = &user_stack_ops,
855 .sp_owner = THIS_MODULE,
856};
857
858
859static int __init user_stack_init(void)
860{
861 int rc;
862
863 rc = ocfs2_control_init();
864 if (!rc) {
865 rc = ocfs2_stack_glue_register(&user_stack);
866 if (rc)
867 ocfs2_control_exit();
868 }
869
870 return rc;
871}
872
873static void __exit user_stack_exit(void)
874{
875 ocfs2_stack_glue_unregister(&user_stack);
876 ocfs2_control_exit();
877}
878
879MODULE_AUTHOR("Oracle");
880MODULE_DESCRIPTION("ocfs2 driver for userspace cluster stacks");
881MODULE_LICENSE("GPL");
882module_init(user_stack_init);
883module_exit(user_stack_exit);
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
new file mode 100644
index 000000000000..119f60cea9cc
--- /dev/null
+++ b/fs/ocfs2/stackglue.c
@@ -0,0 +1,568 @@
1/* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 *
4 * stackglue.c
5 *
6 * Code which implements an OCFS2 specific interface to underlying
7 * cluster stacks.
8 *
9 * Copyright (C) 2007 Oracle. All rights reserved.
10 *
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public
13 * License as published by the Free Software Foundation, version 2.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 */
20
21#include <linux/list.h>
22#include <linux/spinlock.h>
23#include <linux/module.h>
24#include <linux/slab.h>
25#include <linux/kmod.h>
26#include <linux/fs.h>
27#include <linux/kobject.h>
28#include <linux/sysfs.h>
29
30#include "ocfs2_fs.h"
31
32#include "stackglue.h"
33
34#define OCFS2_STACK_PLUGIN_O2CB "o2cb"
35#define OCFS2_STACK_PLUGIN_USER "user"
36
37static struct ocfs2_locking_protocol *lproto;
38static DEFINE_SPINLOCK(ocfs2_stack_lock);
39static LIST_HEAD(ocfs2_stack_list);
40static char cluster_stack_name[OCFS2_STACK_LABEL_LEN + 1];
41
42/*
43 * The stack currently in use. If not null, active_stack->sp_count > 0,
44 * the module is pinned, and the locking protocol cannot be changed.
45 */
46static struct ocfs2_stack_plugin *active_stack;
47
48static struct ocfs2_stack_plugin *ocfs2_stack_lookup(const char *name)
49{
50 struct ocfs2_stack_plugin *p;
51
52 assert_spin_locked(&ocfs2_stack_lock);
53
54 list_for_each_entry(p, &ocfs2_stack_list, sp_list) {
55 if (!strcmp(p->sp_name, name))
56 return p;
57 }
58
59 return NULL;
60}
61
62static int ocfs2_stack_driver_request(const char *stack_name,
63 const char *plugin_name)
64{
65 int rc;
66 struct ocfs2_stack_plugin *p;
67
68 spin_lock(&ocfs2_stack_lock);
69
70 /*
71 * If the stack passed by the filesystem isn't the selected one,
72 * we can't continue.
73 */
74 if (strcmp(stack_name, cluster_stack_name)) {
75 rc = -EBUSY;
76 goto out;
77 }
78
79 if (active_stack) {
80 /*
81 * If the active stack isn't the one we want, it cannot
82 * be selected right now.
83 */
84 if (!strcmp(active_stack->sp_name, plugin_name))
85 rc = 0;
86 else
87 rc = -EBUSY;
88 goto out;
89 }
90
91 p = ocfs2_stack_lookup(plugin_name);
92 if (!p || !try_module_get(p->sp_owner)) {
93 rc = -ENOENT;
94 goto out;
95 }
96
97 /* Ok, the stack is pinned */
98 p->sp_count++;
99 active_stack = p;
100
101 rc = 0;
102
103out:
104 spin_unlock(&ocfs2_stack_lock);
105 return rc;
106}
107
108/*
109 * This function looks up the appropriate stack and makes it active. If
110 * there is no stack, it tries to load it. It will fail if the stack still
111 * cannot be found. It will also fail if a different stack is in use.
112 */
113static int ocfs2_stack_driver_get(const char *stack_name)
114{
115 int rc;
116 char *plugin_name = OCFS2_STACK_PLUGIN_O2CB;
117
118 /*
119 * Classic stack does not pass in a stack name. This is
120 * compatible with older tools as well.
121 */
122 if (!stack_name || !*stack_name)
123 stack_name = OCFS2_STACK_PLUGIN_O2CB;
124
125 if (strlen(stack_name) != OCFS2_STACK_LABEL_LEN) {
126 printk(KERN_ERR
127 "ocfs2 passed an invalid cluster stack label: \"%s\"\n",
128 stack_name);
129 return -EINVAL;
130 }
131
132 /* Anything that isn't the classic stack is a user stack */
133 if (strcmp(stack_name, OCFS2_STACK_PLUGIN_O2CB))
134 plugin_name = OCFS2_STACK_PLUGIN_USER;
135
136 rc = ocfs2_stack_driver_request(stack_name, plugin_name);
137 if (rc == -ENOENT) {
138 request_module("ocfs2_stack_%s", plugin_name);
139 rc = ocfs2_stack_driver_request(stack_name, plugin_name);
140 }
141
142 if (rc == -ENOENT) {
143 printk(KERN_ERR
144 "ocfs2: Cluster stack driver \"%s\" cannot be found\n",
145 plugin_name);
146 } else if (rc == -EBUSY) {
147 printk(KERN_ERR
148 "ocfs2: A different cluster stack is in use\n");
149 }
150
151 return rc;
152}
153
154static void ocfs2_stack_driver_put(void)
155{
156 spin_lock(&ocfs2_stack_lock);
157 BUG_ON(active_stack == NULL);
158 BUG_ON(active_stack->sp_count == 0);
159
160 active_stack->sp_count--;
161 if (!active_stack->sp_count) {
162 module_put(active_stack->sp_owner);
163 active_stack = NULL;
164 }
165 spin_unlock(&ocfs2_stack_lock);
166}
167
168int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin)
169{
170 int rc;
171
172 spin_lock(&ocfs2_stack_lock);
173 if (!ocfs2_stack_lookup(plugin->sp_name)) {
174 plugin->sp_count = 0;
175 plugin->sp_proto = lproto;
176 list_add(&plugin->sp_list, &ocfs2_stack_list);
177 printk(KERN_INFO "ocfs2: Registered cluster interface %s\n",
178 plugin->sp_name);
179 rc = 0;
180 } else {
181 printk(KERN_ERR "ocfs2: Stack \"%s\" already registered\n",
182 plugin->sp_name);
183 rc = -EEXIST;
184 }
185 spin_unlock(&ocfs2_stack_lock);
186
187 return rc;
188}
189EXPORT_SYMBOL_GPL(ocfs2_stack_glue_register);
190
191void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin)
192{
193 struct ocfs2_stack_plugin *p;
194
195 spin_lock(&ocfs2_stack_lock);
196 p = ocfs2_stack_lookup(plugin->sp_name);
197 if (p) {
198 BUG_ON(p != plugin);
199 BUG_ON(plugin == active_stack);
200 BUG_ON(plugin->sp_count != 0);
201 list_del_init(&plugin->sp_list);
202 printk(KERN_INFO "ocfs2: Unregistered cluster interface %s\n",
203 plugin->sp_name);
204 } else {
205 printk(KERN_ERR "Stack \"%s\" is not registered\n",
206 plugin->sp_name);
207 }
208 spin_unlock(&ocfs2_stack_lock);
209}
210EXPORT_SYMBOL_GPL(ocfs2_stack_glue_unregister);
211
212void ocfs2_stack_glue_set_locking_protocol(struct ocfs2_locking_protocol *proto)
213{
214 struct ocfs2_stack_plugin *p;
215
216 BUG_ON(proto == NULL);
217
218 spin_lock(&ocfs2_stack_lock);
219 BUG_ON(active_stack != NULL);
220
221 lproto = proto;
222 list_for_each_entry(p, &ocfs2_stack_list, sp_list) {
223 p->sp_proto = lproto;
224 }
225
226 spin_unlock(&ocfs2_stack_lock);
227}
228EXPORT_SYMBOL_GPL(ocfs2_stack_glue_set_locking_protocol);
229
230
231/*
232 * The ocfs2_dlm_lock() and ocfs2_dlm_unlock() functions take
233 * "struct ocfs2_lock_res *astarg" instead of "void *astarg" because the
234 * underlying stack plugins need to pilfer the lksb off of the lock_res.
235 * If some other structure needs to be passed as an astarg, the plugins
236 * will need to be given a different avenue to the lksb.
237 */
238int ocfs2_dlm_lock(struct ocfs2_cluster_connection *conn,
239 int mode,
240 union ocfs2_dlm_lksb *lksb,
241 u32 flags,
242 void *name,
243 unsigned int namelen,
244 struct ocfs2_lock_res *astarg)
245{
246 BUG_ON(lproto == NULL);
247
248 return active_stack->sp_ops->dlm_lock(conn, mode, lksb, flags,
249 name, namelen, astarg);
250}
251EXPORT_SYMBOL_GPL(ocfs2_dlm_lock);
252
253int ocfs2_dlm_unlock(struct ocfs2_cluster_connection *conn,
254 union ocfs2_dlm_lksb *lksb,
255 u32 flags,
256 struct ocfs2_lock_res *astarg)
257{
258 BUG_ON(lproto == NULL);
259
260 return active_stack->sp_ops->dlm_unlock(conn, lksb, flags, astarg);
261}
262EXPORT_SYMBOL_GPL(ocfs2_dlm_unlock);
263
264int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb)
265{
266 return active_stack->sp_ops->lock_status(lksb);
267}
268EXPORT_SYMBOL_GPL(ocfs2_dlm_lock_status);
269
270/*
271 * Why don't we cast to ocfs2_meta_lvb? The "clean" answer is that we
272 * don't cast at the glue level. The real answer is that the header
273 * ordering is nigh impossible.
274 */
275void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb)
276{
277 return active_stack->sp_ops->lock_lvb(lksb);
278}
279EXPORT_SYMBOL_GPL(ocfs2_dlm_lvb);
280
281void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb)
282{
283 active_stack->sp_ops->dump_lksb(lksb);
284}
285EXPORT_SYMBOL_GPL(ocfs2_dlm_dump_lksb);
286
287int ocfs2_cluster_connect(const char *stack_name,
288 const char *group,
289 int grouplen,
290 void (*recovery_handler)(int node_num,
291 void *recovery_data),
292 void *recovery_data,
293 struct ocfs2_cluster_connection **conn)
294{
295 int rc = 0;
296 struct ocfs2_cluster_connection *new_conn;
297
298 BUG_ON(group == NULL);
299 BUG_ON(conn == NULL);
300 BUG_ON(recovery_handler == NULL);
301
302 if (grouplen > GROUP_NAME_MAX) {
303 rc = -EINVAL;
304 goto out;
305 }
306
307 new_conn = kzalloc(sizeof(struct ocfs2_cluster_connection),
308 GFP_KERNEL);
309 if (!new_conn) {
310 rc = -ENOMEM;
311 goto out;
312 }
313
314 memcpy(new_conn->cc_name, group, grouplen);
315 new_conn->cc_namelen = grouplen;
316 new_conn->cc_recovery_handler = recovery_handler;
317 new_conn->cc_recovery_data = recovery_data;
318
319 /* Start the new connection at our maximum compatibility level */
320 new_conn->cc_version = lproto->lp_max_version;
321
322 /* This will pin the stack driver if successful */
323 rc = ocfs2_stack_driver_get(stack_name);
324 if (rc)
325 goto out_free;
326
327 rc = active_stack->sp_ops->connect(new_conn);
328 if (rc) {
329 ocfs2_stack_driver_put();
330 goto out_free;
331 }
332
333 *conn = new_conn;
334
335out_free:
336 if (rc)
337 kfree(new_conn);
338
339out:
340 return rc;
341}
342EXPORT_SYMBOL_GPL(ocfs2_cluster_connect);
343
344/* If hangup_pending is 0, the stack driver will be dropped */
345int ocfs2_cluster_disconnect(struct ocfs2_cluster_connection *conn,
346 int hangup_pending)
347{
348 int ret;
349
350 BUG_ON(conn == NULL);
351
352 ret = active_stack->sp_ops->disconnect(conn, hangup_pending);
353
354 /* XXX Should we free it anyway? */
355 if (!ret) {
356 kfree(conn);
357 if (!hangup_pending)
358 ocfs2_stack_driver_put();
359 }
360
361 return ret;
362}
363EXPORT_SYMBOL_GPL(ocfs2_cluster_disconnect);
364
365void ocfs2_cluster_hangup(const char *group, int grouplen)
366{
367 BUG_ON(group == NULL);
368 BUG_ON(group[grouplen] != '\0');
369
370 if (active_stack->sp_ops->hangup)
371 active_stack->sp_ops->hangup(group, grouplen);
372
373 /* cluster_disconnect() was called with hangup_pending==1 */
374 ocfs2_stack_driver_put();
375}
376EXPORT_SYMBOL_GPL(ocfs2_cluster_hangup);
377
378int ocfs2_cluster_this_node(unsigned int *node)
379{
380 return active_stack->sp_ops->this_node(node);
381}
382EXPORT_SYMBOL_GPL(ocfs2_cluster_this_node);
383
384
385/*
386 * Sysfs bits
387 */
388
389static ssize_t ocfs2_max_locking_protocol_show(struct kobject *kobj,
390 struct kobj_attribute *attr,
391 char *buf)
392{
393 ssize_t ret = 0;
394
395 spin_lock(&ocfs2_stack_lock);
396 if (lproto)
397 ret = snprintf(buf, PAGE_SIZE, "%u.%u\n",
398 lproto->lp_max_version.pv_major,
399 lproto->lp_max_version.pv_minor);
400 spin_unlock(&ocfs2_stack_lock);
401
402 return ret;
403}
404
405static struct kobj_attribute ocfs2_attr_max_locking_protocol =
406 __ATTR(max_locking_protocol, S_IFREG | S_IRUGO,
407 ocfs2_max_locking_protocol_show, NULL);
408
409static ssize_t ocfs2_loaded_cluster_plugins_show(struct kobject *kobj,
410 struct kobj_attribute *attr,
411 char *buf)
412{
413 ssize_t ret = 0, total = 0, remain = PAGE_SIZE;
414 struct ocfs2_stack_plugin *p;
415
416 spin_lock(&ocfs2_stack_lock);
417 list_for_each_entry(p, &ocfs2_stack_list, sp_list) {
418 ret = snprintf(buf, remain, "%s\n",
419 p->sp_name);
420 if (ret < 0) {
421 total = ret;
422 break;
423 }
424 if (ret == remain) {
425 /* snprintf() didn't fit */
426 total = -E2BIG;
427 break;
428 }
429 total += ret;
430 remain -= ret;
431 }
432 spin_unlock(&ocfs2_stack_lock);
433
434 return total;
435}
436
437static struct kobj_attribute ocfs2_attr_loaded_cluster_plugins =
438 __ATTR(loaded_cluster_plugins, S_IFREG | S_IRUGO,
439 ocfs2_loaded_cluster_plugins_show, NULL);
440
441static ssize_t ocfs2_active_cluster_plugin_show(struct kobject *kobj,
442 struct kobj_attribute *attr,
443 char *buf)
444{
445 ssize_t ret = 0;
446
447 spin_lock(&ocfs2_stack_lock);
448 if (active_stack) {
449 ret = snprintf(buf, PAGE_SIZE, "%s\n",
450 active_stack->sp_name);
451 if (ret == PAGE_SIZE)
452 ret = -E2BIG;
453 }
454 spin_unlock(&ocfs2_stack_lock);
455
456 return ret;
457}
458
459static struct kobj_attribute ocfs2_attr_active_cluster_plugin =
460 __ATTR(active_cluster_plugin, S_IFREG | S_IRUGO,
461 ocfs2_active_cluster_plugin_show, NULL);
462
463static ssize_t ocfs2_cluster_stack_show(struct kobject *kobj,
464 struct kobj_attribute *attr,
465 char *buf)
466{
467 ssize_t ret;
468 spin_lock(&ocfs2_stack_lock);
469 ret = snprintf(buf, PAGE_SIZE, "%s\n", cluster_stack_name);
470 spin_unlock(&ocfs2_stack_lock);
471
472 return ret;
473}
474
475static ssize_t ocfs2_cluster_stack_store(struct kobject *kobj,
476 struct kobj_attribute *attr,
477 const char *buf, size_t count)
478{
479 size_t len = count;
480 ssize_t ret;
481
482 if (len == 0)
483 return len;
484
485 if (buf[len - 1] == '\n')
486 len--;
487
488 if ((len != OCFS2_STACK_LABEL_LEN) ||
489 (strnlen(buf, len) != len))
490 return -EINVAL;
491
492 spin_lock(&ocfs2_stack_lock);
493 if (active_stack) {
494 if (!strncmp(buf, cluster_stack_name, len))
495 ret = count;
496 else
497 ret = -EBUSY;
498 } else {
499 memcpy(cluster_stack_name, buf, len);
500 ret = count;
501 }
502 spin_unlock(&ocfs2_stack_lock);
503
504 return ret;
505}
506
507
508static struct kobj_attribute ocfs2_attr_cluster_stack =
509 __ATTR(cluster_stack, S_IFREG | S_IRUGO | S_IWUSR,
510 ocfs2_cluster_stack_show,
511 ocfs2_cluster_stack_store);
512
513static struct attribute *ocfs2_attrs[] = {
514 &ocfs2_attr_max_locking_protocol.attr,
515 &ocfs2_attr_loaded_cluster_plugins.attr,
516 &ocfs2_attr_active_cluster_plugin.attr,
517 &ocfs2_attr_cluster_stack.attr,
518 NULL,
519};
520
521static struct attribute_group ocfs2_attr_group = {
522 .attrs = ocfs2_attrs,
523};
524
525static struct kset *ocfs2_kset;
526
527static void ocfs2_sysfs_exit(void)
528{
529 kset_unregister(ocfs2_kset);
530}
531
532static int ocfs2_sysfs_init(void)
533{
534 int ret;
535
536 ocfs2_kset = kset_create_and_add("ocfs2", NULL, fs_kobj);
537 if (!ocfs2_kset)
538 return -ENOMEM;
539
540 ret = sysfs_create_group(&ocfs2_kset->kobj, &ocfs2_attr_group);
541 if (ret)
542 goto error;
543
544 return 0;
545
546error:
547 kset_unregister(ocfs2_kset);
548 return ret;
549}
550
551static int __init ocfs2_stack_glue_init(void)
552{
553 strcpy(cluster_stack_name, OCFS2_STACK_PLUGIN_O2CB);
554
555 return ocfs2_sysfs_init();
556}
557
558static void __exit ocfs2_stack_glue_exit(void)
559{
560 lproto = NULL;
561 ocfs2_sysfs_exit();
562}
563
564MODULE_AUTHOR("Oracle");
565MODULE_DESCRIPTION("ocfs2 cluter stack glue layer");
566MODULE_LICENSE("GPL");
567module_init(ocfs2_stack_glue_init);
568module_exit(ocfs2_stack_glue_exit);
diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h
new file mode 100644
index 000000000000..005e4f170e0f
--- /dev/null
+++ b/fs/ocfs2/stackglue.h
@@ -0,0 +1,261 @@
1/* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 *
4 * stackglue.h
5 *
6 * Glue to the underlying cluster stack.
7 *
8 * Copyright (C) 2007 Oracle. All rights reserved.
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public
12 * License as published by the Free Software Foundation, version 2.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * General Public License for more details.
18 */
19
20
21#ifndef STACKGLUE_H
22#define STACKGLUE_H
23
24#include <linux/types.h>
25#include <linux/list.h>
26#include <linux/dlmconstants.h>
27
28#include "dlm/dlmapi.h"
29#include <linux/dlm.h>
30
31/*
32 * dlmconstants.h does not have a LOCAL flag. We hope to remove it
33 * some day, but right now we need it. Let's fake it. This value is larger
34 * than any flag in dlmconstants.h.
35 */
36#define DLM_LKF_LOCAL 0x00100000
37
38/*
39 * This shadows DLM_LOCKSPACE_LEN in fs/dlm/dlm_internal.h. That probably
40 * wants to be in a public header.
41 */
42#define GROUP_NAME_MAX 64
43
44
45/*
46 * ocfs2_protocol_version changes when ocfs2 does something different in
47 * its inter-node behavior. See dlmglue.c for more information.
48 */
49struct ocfs2_protocol_version {
50 u8 pv_major;
51 u8 pv_minor;
52};
53
54/*
55 * The ocfs2_locking_protocol defines the handlers called on ocfs2's behalf.
56 */
57struct ocfs2_locking_protocol {
58 struct ocfs2_protocol_version lp_max_version;
59 void (*lp_lock_ast)(void *astarg);
60 void (*lp_blocking_ast)(void *astarg, int level);
61 void (*lp_unlock_ast)(void *astarg, int error);
62};
63
64
65/*
66 * The dlm_lockstatus struct includes lvb space, but the dlm_lksb struct only
67 * has a pointer to separately allocated lvb space. This struct exists only to
68 * include in the lksb union to make space for a combined dlm_lksb and lvb.
69 */
70struct fsdlm_lksb_plus_lvb {
71 struct dlm_lksb lksb;
72 char lvb[DLM_LVB_LEN];
73};
74
75/*
76 * A union of all lock status structures. We define it here so that the
77 * size of the union is known. Lock status structures are embedded in
78 * ocfs2 inodes.
79 */
80union ocfs2_dlm_lksb {
81 struct dlm_lockstatus lksb_o2dlm;
82 struct dlm_lksb lksb_fsdlm;
83 struct fsdlm_lksb_plus_lvb padding;
84};
85
86/*
87 * A cluster connection. Mostly opaque to ocfs2, the connection holds
88 * state for the underlying stack. ocfs2 does use cc_version to determine
89 * locking compatibility.
90 */
91struct ocfs2_cluster_connection {
92 char cc_name[GROUP_NAME_MAX];
93 int cc_namelen;
94 struct ocfs2_protocol_version cc_version;
95 void (*cc_recovery_handler)(int node_num, void *recovery_data);
96 void *cc_recovery_data;
97 void *cc_lockspace;
98 void *cc_private;
99};
100
101/*
102 * Each cluster stack implements the stack operations structure. Not used
103 * in the ocfs2 code, the stackglue code translates generic cluster calls
104 * into stack operations.
105 */
106struct ocfs2_stack_operations {
107 /*
108 * The fs code calls ocfs2_cluster_connect() to attach a new
109 * filesystem to the cluster stack. The ->connect() op is passed
110 * an ocfs2_cluster_connection with the name and recovery field
111 * filled in.
112 *
113 * The stack must set up any notification mechanisms and create
114 * the filesystem lockspace in the DLM. The lockspace should be
115 * stored on cc_lockspace. Any other information can be stored on
116 * cc_private.
117 *
118 * ->connect() must not return until it is guaranteed that
119 *
120 * - Node down notifications for the filesystem will be recieved
121 * and passed to conn->cc_recovery_handler().
122 * - Locking requests for the filesystem will be processed.
123 */
124 int (*connect)(struct ocfs2_cluster_connection *conn);
125
126 /*
127 * The fs code calls ocfs2_cluster_disconnect() when a filesystem
128 * no longer needs cluster services. All DLM locks have been
129 * dropped, and recovery notification is being ignored by the
130 * fs code. The stack must disengage from the DLM and discontinue
131 * recovery notification.
132 *
133 * Once ->disconnect() has returned, the connection structure will
134 * be freed. Thus, a stack must not return from ->disconnect()
135 * until it will no longer reference the conn pointer.
136 *
137 * If hangup_pending is zero, ocfs2_cluster_disconnect() will also
138 * be dropping the reference on the module.
139 */
140 int (*disconnect)(struct ocfs2_cluster_connection *conn,
141 int hangup_pending);
142
143 /*
144 * ocfs2_cluster_hangup() exists for compatibility with older
145 * ocfs2 tools. Only the classic stack really needs it. As such
146 * ->hangup() is not required of all stacks. See the comment by
147 * ocfs2_cluster_hangup() for more details.
148 *
149 * Note that ocfs2_cluster_hangup() can only be called if
150 * hangup_pending was passed to ocfs2_cluster_disconnect().
151 */
152 void (*hangup)(const char *group, int grouplen);
153
154 /*
155 * ->this_node() returns the cluster's unique identifier for the
156 * local node.
157 */
158 int (*this_node)(unsigned int *node);
159
160 /*
161 * Call the underlying dlm lock function. The ->dlm_lock()
162 * callback should convert the flags and mode as appropriate.
163 *
164 * ast and bast functions are not part of the call because the
165 * stack will likely want to wrap ast and bast calls before passing
166 * them to stack->sp_proto.
167 */
168 int (*dlm_lock)(struct ocfs2_cluster_connection *conn,
169 int mode,
170 union ocfs2_dlm_lksb *lksb,
171 u32 flags,
172 void *name,
173 unsigned int namelen,
174 void *astarg);
175
176 /*
177 * Call the underlying dlm unlock function. The ->dlm_unlock()
178 * function should convert the flags as appropriate.
179 *
180 * The unlock ast is not passed, as the stack will want to wrap
181 * it before calling stack->sp_proto->lp_unlock_ast().
182 */
183 int (*dlm_unlock)(struct ocfs2_cluster_connection *conn,
184 union ocfs2_dlm_lksb *lksb,
185 u32 flags,
186 void *astarg);
187
188 /*
189 * Return the status of the current lock status block. The fs
190 * code should never dereference the union. The ->lock_status()
191 * callback pulls out the stack-specific lksb, converts the status
192 * to a proper errno, and returns it.
193 */
194 int (*lock_status)(union ocfs2_dlm_lksb *lksb);
195
196 /*
197 * Pull the lvb pointer off of the stack-specific lksb.
198 */
199 void *(*lock_lvb)(union ocfs2_dlm_lksb *lksb);
200
201 /*
202 * This is an optoinal debugging hook. If provided, the
203 * stack can dump debugging information about this lock.
204 */
205 void (*dump_lksb)(union ocfs2_dlm_lksb *lksb);
206};
207
208/*
209 * Each stack plugin must describe itself by registering a
210 * ocfs2_stack_plugin structure. This is only seen by stackglue and the
211 * stack driver.
212 */
213struct ocfs2_stack_plugin {
214 char *sp_name;
215 struct ocfs2_stack_operations *sp_ops;
216 struct module *sp_owner;
217
218 /* These are managed by the stackglue code. */
219 struct list_head sp_list;
220 unsigned int sp_count;
221 struct ocfs2_locking_protocol *sp_proto;
222};
223
224
225/* Used by the filesystem */
226int ocfs2_cluster_connect(const char *stack_name,
227 const char *group,
228 int grouplen,
229 void (*recovery_handler)(int node_num,
230 void *recovery_data),
231 void *recovery_data,
232 struct ocfs2_cluster_connection **conn);
233int ocfs2_cluster_disconnect(struct ocfs2_cluster_connection *conn,
234 int hangup_pending);
235void ocfs2_cluster_hangup(const char *group, int grouplen);
236int ocfs2_cluster_this_node(unsigned int *node);
237
238struct ocfs2_lock_res;
239int ocfs2_dlm_lock(struct ocfs2_cluster_connection *conn,
240 int mode,
241 union ocfs2_dlm_lksb *lksb,
242 u32 flags,
243 void *name,
244 unsigned int namelen,
245 struct ocfs2_lock_res *astarg);
246int ocfs2_dlm_unlock(struct ocfs2_cluster_connection *conn,
247 union ocfs2_dlm_lksb *lksb,
248 u32 flags,
249 struct ocfs2_lock_res *astarg);
250
251int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb);
252void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb);
253void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb);
254
255void ocfs2_stack_glue_set_locking_protocol(struct ocfs2_locking_protocol *proto);
256
257
258/* Used by stack plugins */
259int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin);
260void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin);
261#endif /* STACKGLUE_H */
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 72c198a004df..d2d278fb9819 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -46,6 +46,11 @@
46 46
47#include "buffer_head_io.h" 47#include "buffer_head_io.h"
48 48
49#define NOT_ALLOC_NEW_GROUP 0
50#define ALLOC_NEW_GROUP 1
51
52#define OCFS2_MAX_INODES_TO_STEAL 1024
53
49static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg); 54static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg);
50static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe); 55static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe);
51static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl); 56static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl);
@@ -106,7 +111,7 @@ static inline void ocfs2_block_to_cluster_group(struct inode *inode,
106 u64 *bg_blkno, 111 u64 *bg_blkno,
107 u16 *bg_bit_off); 112 u16 *bg_bit_off);
108 113
109void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac) 114static void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac)
110{ 115{
111 struct inode *inode = ac->ac_inode; 116 struct inode *inode = ac->ac_inode;
112 117
@@ -117,9 +122,17 @@ void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac)
117 mutex_unlock(&inode->i_mutex); 122 mutex_unlock(&inode->i_mutex);
118 123
119 iput(inode); 124 iput(inode);
125 ac->ac_inode = NULL;
120 } 126 }
121 if (ac->ac_bh) 127 if (ac->ac_bh) {
122 brelse(ac->ac_bh); 128 brelse(ac->ac_bh);
129 ac->ac_bh = NULL;
130 }
131}
132
133void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac)
134{
135 ocfs2_free_ac_resource(ac);
123 kfree(ac); 136 kfree(ac);
124} 137}
125 138
@@ -391,7 +404,8 @@ bail:
391static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb, 404static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
392 struct ocfs2_alloc_context *ac, 405 struct ocfs2_alloc_context *ac,
393 int type, 406 int type,
394 u32 slot) 407 u32 slot,
408 int alloc_new_group)
395{ 409{
396 int status; 410 int status;
397 u32 bits_wanted = ac->ac_bits_wanted; 411 u32 bits_wanted = ac->ac_bits_wanted;
@@ -420,6 +434,7 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
420 } 434 }
421 435
422 ac->ac_inode = alloc_inode; 436 ac->ac_inode = alloc_inode;
437 ac->ac_alloc_slot = slot;
423 438
424 fe = (struct ocfs2_dinode *) bh->b_data; 439 fe = (struct ocfs2_dinode *) bh->b_data;
425 if (!OCFS2_IS_VALID_DINODE(fe)) { 440 if (!OCFS2_IS_VALID_DINODE(fe)) {
@@ -446,6 +461,14 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
446 goto bail; 461 goto bail;
447 } 462 }
448 463
464 if (alloc_new_group != ALLOC_NEW_GROUP) {
465 mlog(0, "Alloc File %u Full: wanted=%u, free_bits=%u, "
466 "and we don't alloc a new group for it.\n",
467 slot, bits_wanted, free_bits);
468 status = -ENOSPC;
469 goto bail;
470 }
471
449 status = ocfs2_block_group_alloc(osb, alloc_inode, bh); 472 status = ocfs2_block_group_alloc(osb, alloc_inode, bh);
450 if (status < 0) { 473 if (status < 0) {
451 if (status != -ENOSPC) 474 if (status != -ENOSPC)
@@ -490,7 +513,8 @@ int ocfs2_reserve_new_metadata(struct ocfs2_super *osb,
490 (*ac)->ac_group_search = ocfs2_block_group_search; 513 (*ac)->ac_group_search = ocfs2_block_group_search;
491 514
492 status = ocfs2_reserve_suballoc_bits(osb, (*ac), 515 status = ocfs2_reserve_suballoc_bits(osb, (*ac),
493 EXTENT_ALLOC_SYSTEM_INODE, slot); 516 EXTENT_ALLOC_SYSTEM_INODE,
517 slot, ALLOC_NEW_GROUP);
494 if (status < 0) { 518 if (status < 0) {
495 if (status != -ENOSPC) 519 if (status != -ENOSPC)
496 mlog_errno(status); 520 mlog_errno(status);
@@ -508,10 +532,42 @@ bail:
508 return status; 532 return status;
509} 533}
510 534
535static int ocfs2_steal_inode_from_other_nodes(struct ocfs2_super *osb,
536 struct ocfs2_alloc_context *ac)
537{
538 int i, status = -ENOSPC;
539 s16 slot = ocfs2_get_inode_steal_slot(osb);
540
541 /* Start to steal inodes from the first slot after ours. */
542 if (slot == OCFS2_INVALID_SLOT)
543 slot = osb->slot_num + 1;
544
545 for (i = 0; i < osb->max_slots; i++, slot++) {
546 if (slot == osb->max_slots)
547 slot = 0;
548
549 if (slot == osb->slot_num)
550 continue;
551
552 status = ocfs2_reserve_suballoc_bits(osb, ac,
553 INODE_ALLOC_SYSTEM_INODE,
554 slot, NOT_ALLOC_NEW_GROUP);
555 if (status >= 0) {
556 ocfs2_set_inode_steal_slot(osb, slot);
557 break;
558 }
559
560 ocfs2_free_ac_resource(ac);
561 }
562
563 return status;
564}
565
511int ocfs2_reserve_new_inode(struct ocfs2_super *osb, 566int ocfs2_reserve_new_inode(struct ocfs2_super *osb,
512 struct ocfs2_alloc_context **ac) 567 struct ocfs2_alloc_context **ac)
513{ 568{
514 int status; 569 int status;
570 s16 slot = ocfs2_get_inode_steal_slot(osb);
515 571
516 *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL); 572 *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
517 if (!(*ac)) { 573 if (!(*ac)) {
@@ -525,9 +581,43 @@ int ocfs2_reserve_new_inode(struct ocfs2_super *osb,
525 581
526 (*ac)->ac_group_search = ocfs2_block_group_search; 582 (*ac)->ac_group_search = ocfs2_block_group_search;
527 583
584 /*
585 * slot is set when we successfully steal inode from other nodes.
586 * It is reset in 3 places:
587 * 1. when we flush the truncate log
588 * 2. when we complete local alloc recovery.
589 * 3. when we successfully allocate from our own slot.
590 * After it is set, we will go on stealing inodes until we find the
591 * need to check our slots to see whether there is some space for us.
592 */
593 if (slot != OCFS2_INVALID_SLOT &&
594 atomic_read(&osb->s_num_inodes_stolen) < OCFS2_MAX_INODES_TO_STEAL)
595 goto inode_steal;
596
597 atomic_set(&osb->s_num_inodes_stolen, 0);
528 status = ocfs2_reserve_suballoc_bits(osb, *ac, 598 status = ocfs2_reserve_suballoc_bits(osb, *ac,
529 INODE_ALLOC_SYSTEM_INODE, 599 INODE_ALLOC_SYSTEM_INODE,
530 osb->slot_num); 600 osb->slot_num, ALLOC_NEW_GROUP);
601 if (status >= 0) {
602 status = 0;
603
604 /*
605 * Some inodes must be freed by us, so try to allocate
606 * from our own next time.
607 */
608 if (slot != OCFS2_INVALID_SLOT)
609 ocfs2_init_inode_steal_slot(osb);
610 goto bail;
611 } else if (status < 0 && status != -ENOSPC) {
612 mlog_errno(status);
613 goto bail;
614 }
615
616 ocfs2_free_ac_resource(*ac);
617
618inode_steal:
619 status = ocfs2_steal_inode_from_other_nodes(osb, *ac);
620 atomic_inc(&osb->s_num_inodes_stolen);
531 if (status < 0) { 621 if (status < 0) {
532 if (status != -ENOSPC) 622 if (status != -ENOSPC)
533 mlog_errno(status); 623 mlog_errno(status);
@@ -557,7 +647,8 @@ int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb,
557 647
558 status = ocfs2_reserve_suballoc_bits(osb, ac, 648 status = ocfs2_reserve_suballoc_bits(osb, ac,
559 GLOBAL_BITMAP_SYSTEM_INODE, 649 GLOBAL_BITMAP_SYSTEM_INODE,
560 OCFS2_INVALID_SLOT); 650 OCFS2_INVALID_SLOT,
651 ALLOC_NEW_GROUP);
561 if (status < 0 && status != -ENOSPC) { 652 if (status < 0 && status != -ENOSPC) {
562 mlog_errno(status); 653 mlog_errno(status);
563 goto bail; 654 goto bail;
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
index 8799033bb459..544c600662bd 100644
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -36,6 +36,7 @@ typedef int (group_search_t)(struct inode *,
36struct ocfs2_alloc_context { 36struct ocfs2_alloc_context {
37 struct inode *ac_inode; /* which bitmap are we allocating from? */ 37 struct inode *ac_inode; /* which bitmap are we allocating from? */
38 struct buffer_head *ac_bh; /* file entry bh */ 38 struct buffer_head *ac_bh; /* file entry bh */
39 u32 ac_alloc_slot; /* which slot are we allocating from? */
39 u32 ac_bits_wanted; 40 u32 ac_bits_wanted;
40 u32 ac_bits_given; 41 u32 ac_bits_given;
41#define OCFS2_AC_USE_LOCAL 1 42#define OCFS2_AC_USE_LOCAL 1
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index bec75aff3d9f..df63ba20ae90 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -40,8 +40,7 @@
40#include <linux/crc32.h> 40#include <linux/crc32.h>
41#include <linux/debugfs.h> 41#include <linux/debugfs.h>
42#include <linux/mount.h> 42#include <linux/mount.h>
43 43#include <linux/seq_file.h>
44#include <cluster/nodemanager.h>
45 44
46#define MLOG_MASK_PREFIX ML_SUPER 45#define MLOG_MASK_PREFIX ML_SUPER
47#include <cluster/masklog.h> 46#include <cluster/masklog.h>
@@ -88,6 +87,7 @@ struct mount_options
88 unsigned int atime_quantum; 87 unsigned int atime_quantum;
89 signed short slot; 88 signed short slot;
90 unsigned int localalloc_opt; 89 unsigned int localalloc_opt;
90 char cluster_stack[OCFS2_STACK_LABEL_LEN + 1];
91}; 91};
92 92
93static int ocfs2_parse_options(struct super_block *sb, char *options, 93static int ocfs2_parse_options(struct super_block *sb, char *options,
@@ -109,7 +109,6 @@ static int ocfs2_sync_fs(struct super_block *sb, int wait);
109static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb); 109static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb);
110static int ocfs2_init_local_system_inodes(struct ocfs2_super *osb); 110static int ocfs2_init_local_system_inodes(struct ocfs2_super *osb);
111static void ocfs2_release_system_inodes(struct ocfs2_super *osb); 111static void ocfs2_release_system_inodes(struct ocfs2_super *osb);
112static int ocfs2_fill_local_node_info(struct ocfs2_super *osb);
113static int ocfs2_check_volume(struct ocfs2_super *osb); 112static int ocfs2_check_volume(struct ocfs2_super *osb);
114static int ocfs2_verify_volume(struct ocfs2_dinode *di, 113static int ocfs2_verify_volume(struct ocfs2_dinode *di,
115 struct buffer_head *bh, 114 struct buffer_head *bh,
@@ -154,6 +153,7 @@ enum {
154 Opt_commit, 153 Opt_commit,
155 Opt_localalloc, 154 Opt_localalloc,
156 Opt_localflocks, 155 Opt_localflocks,
156 Opt_stack,
157 Opt_err, 157 Opt_err,
158}; 158};
159 159
@@ -172,6 +172,7 @@ static match_table_t tokens = {
172 {Opt_commit, "commit=%u"}, 172 {Opt_commit, "commit=%u"},
173 {Opt_localalloc, "localalloc=%d"}, 173 {Opt_localalloc, "localalloc=%d"},
174 {Opt_localflocks, "localflocks"}, 174 {Opt_localflocks, "localflocks"},
175 {Opt_stack, "cluster_stack=%s"},
175 {Opt_err, NULL} 176 {Opt_err, NULL}
176}; 177};
177 178
@@ -551,8 +552,17 @@ static int ocfs2_verify_heartbeat(struct ocfs2_super *osb)
551 } 552 }
552 } 553 }
553 554
555 if (ocfs2_userspace_stack(osb)) {
556 if (osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) {
557 mlog(ML_ERROR, "Userspace stack expected, but "
558 "o2cb heartbeat arguments passed to mount\n");
559 return -EINVAL;
560 }
561 }
562
554 if (!(osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL)) { 563 if (!(osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL)) {
555 if (!ocfs2_mount_local(osb) && !ocfs2_is_hard_readonly(osb)) { 564 if (!ocfs2_mount_local(osb) && !ocfs2_is_hard_readonly(osb) &&
565 !ocfs2_userspace_stack(osb)) {
556 mlog(ML_ERROR, "Heartbeat has to be started to mount " 566 mlog(ML_ERROR, "Heartbeat has to be started to mount "
557 "a read-write clustered device.\n"); 567 "a read-write clustered device.\n");
558 return -EINVAL; 568 return -EINVAL;
@@ -562,6 +572,35 @@ static int ocfs2_verify_heartbeat(struct ocfs2_super *osb)
562 return 0; 572 return 0;
563} 573}
564 574
575/*
576 * If we're using a userspace stack, mount should have passed
577 * a name that matches the disk. If not, mount should not
578 * have passed a stack.
579 */
580static int ocfs2_verify_userspace_stack(struct ocfs2_super *osb,
581 struct mount_options *mopt)
582{
583 if (!ocfs2_userspace_stack(osb) && mopt->cluster_stack[0]) {
584 mlog(ML_ERROR,
585 "cluster stack passed to mount, but this filesystem "
586 "does not support it\n");
587 return -EINVAL;
588 }
589
590 if (ocfs2_userspace_stack(osb) &&
591 strncmp(osb->osb_cluster_stack, mopt->cluster_stack,
592 OCFS2_STACK_LABEL_LEN)) {
593 mlog(ML_ERROR,
594 "cluster stack passed to mount (\"%s\") does not "
595 "match the filesystem (\"%s\")\n",
596 mopt->cluster_stack,
597 osb->osb_cluster_stack);
598 return -EINVAL;
599 }
600
601 return 0;
602}
603
565static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) 604static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
566{ 605{
567 struct dentry *root; 606 struct dentry *root;
@@ -579,15 +618,6 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
579 goto read_super_error; 618 goto read_super_error;
580 } 619 }
581 620
582 /* for now we only have one cluster/node, make sure we see it
583 * in the heartbeat universe */
584 if (parsed_options.mount_opt & OCFS2_MOUNT_HB_LOCAL) {
585 if (!o2hb_check_local_node_heartbeating()) {
586 status = -EINVAL;
587 goto read_super_error;
588 }
589 }
590
591 /* probe for superblock */ 621 /* probe for superblock */
592 status = ocfs2_sb_probe(sb, &bh, &sector_size); 622 status = ocfs2_sb_probe(sb, &bh, &sector_size);
593 if (status < 0) { 623 if (status < 0) {
@@ -609,6 +639,10 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
609 osb->osb_commit_interval = parsed_options.commit_interval; 639 osb->osb_commit_interval = parsed_options.commit_interval;
610 osb->local_alloc_size = parsed_options.localalloc_opt; 640 osb->local_alloc_size = parsed_options.localalloc_opt;
611 641
642 status = ocfs2_verify_userspace_stack(osb, &parsed_options);
643 if (status)
644 goto read_super_error;
645
612 sb->s_magic = OCFS2_SUPER_MAGIC; 646 sb->s_magic = OCFS2_SUPER_MAGIC;
613 647
614 /* Hard readonly mode only if: bdev_read_only, MS_RDONLY, 648 /* Hard readonly mode only if: bdev_read_only, MS_RDONLY,
@@ -694,7 +728,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
694 if (ocfs2_mount_local(osb)) 728 if (ocfs2_mount_local(osb))
695 snprintf(nodestr, sizeof(nodestr), "local"); 729 snprintf(nodestr, sizeof(nodestr), "local");
696 else 730 else
697 snprintf(nodestr, sizeof(nodestr), "%d", osb->node_num); 731 snprintf(nodestr, sizeof(nodestr), "%u", osb->node_num);
698 732
699 printk(KERN_INFO "ocfs2: Mounting device (%s) on (node %s, slot %d) " 733 printk(KERN_INFO "ocfs2: Mounting device (%s) on (node %s, slot %d) "
700 "with %s data mode.\n", 734 "with %s data mode.\n",
@@ -763,6 +797,7 @@ static int ocfs2_parse_options(struct super_block *sb,
763 mopt->atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM; 797 mopt->atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM;
764 mopt->slot = OCFS2_INVALID_SLOT; 798 mopt->slot = OCFS2_INVALID_SLOT;
765 mopt->localalloc_opt = OCFS2_DEFAULT_LOCAL_ALLOC_SIZE; 799 mopt->localalloc_opt = OCFS2_DEFAULT_LOCAL_ALLOC_SIZE;
800 mopt->cluster_stack[0] = '\0';
766 801
767 if (!options) { 802 if (!options) {
768 status = 1; 803 status = 1;
@@ -864,6 +899,25 @@ static int ocfs2_parse_options(struct super_block *sb,
864 if (!is_remount) 899 if (!is_remount)
865 mopt->mount_opt |= OCFS2_MOUNT_LOCALFLOCKS; 900 mopt->mount_opt |= OCFS2_MOUNT_LOCALFLOCKS;
866 break; 901 break;
902 case Opt_stack:
903 /* Check both that the option we were passed
904 * is of the right length and that it is a proper
905 * string of the right length.
906 */
907 if (((args[0].to - args[0].from) !=
908 OCFS2_STACK_LABEL_LEN) ||
909 (strnlen(args[0].from,
910 OCFS2_STACK_LABEL_LEN) !=
911 OCFS2_STACK_LABEL_LEN)) {
912 mlog(ML_ERROR,
913 "Invalid cluster_stack option\n");
914 status = 0;
915 goto bail;
916 }
917 memcpy(mopt->cluster_stack, args[0].from,
918 OCFS2_STACK_LABEL_LEN);
919 mopt->cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0';
920 break;
867 default: 921 default:
868 mlog(ML_ERROR, 922 mlog(ML_ERROR,
869 "Unrecognized mount option \"%s\" " 923 "Unrecognized mount option \"%s\" "
@@ -922,6 +976,10 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
922 if (opts & OCFS2_MOUNT_LOCALFLOCKS) 976 if (opts & OCFS2_MOUNT_LOCALFLOCKS)
923 seq_printf(s, ",localflocks,"); 977 seq_printf(s, ",localflocks,");
924 978
979 if (osb->osb_cluster_stack[0])
980 seq_printf(s, ",cluster_stack=%.*s", OCFS2_STACK_LABEL_LEN,
981 osb->osb_cluster_stack);
982
925 return 0; 983 return 0;
926} 984}
927 985
@@ -957,6 +1015,8 @@ static int __init ocfs2_init(void)
957 mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n"); 1015 mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n");
958 } 1016 }
959 1017
1018 ocfs2_set_locking_protocol();
1019
960leave: 1020leave:
961 if (status < 0) { 1021 if (status < 0) {
962 ocfs2_free_mem_caches(); 1022 ocfs2_free_mem_caches();
@@ -1132,31 +1192,6 @@ static int ocfs2_get_sector(struct super_block *sb,
1132 return 0; 1192 return 0;
1133} 1193}
1134 1194
1135/* ocfs2 1.0 only allows one cluster and node identity per kernel image. */
1136static int ocfs2_fill_local_node_info(struct ocfs2_super *osb)
1137{
1138 int status;
1139
1140 /* XXX hold a ref on the node while mounte? easy enough, if
1141 * desirable. */
1142 if (ocfs2_mount_local(osb))
1143 osb->node_num = 0;
1144 else
1145 osb->node_num = o2nm_this_node();
1146
1147 if (osb->node_num == O2NM_MAX_NODES) {
1148 mlog(ML_ERROR, "could not find this host's node number\n");
1149 status = -ENOENT;
1150 goto bail;
1151 }
1152
1153 mlog(0, "I am node %d\n", osb->node_num);
1154
1155 status = 0;
1156bail:
1157 return status;
1158}
1159
1160static int ocfs2_mount_volume(struct super_block *sb) 1195static int ocfs2_mount_volume(struct super_block *sb)
1161{ 1196{
1162 int status = 0; 1197 int status = 0;
@@ -1168,12 +1203,6 @@ static int ocfs2_mount_volume(struct super_block *sb)
1168 if (ocfs2_is_hard_readonly(osb)) 1203 if (ocfs2_is_hard_readonly(osb))
1169 goto leave; 1204 goto leave;
1170 1205
1171 status = ocfs2_fill_local_node_info(osb);
1172 if (status < 0) {
1173 mlog_errno(status);
1174 goto leave;
1175 }
1176
1177 status = ocfs2_dlm_init(osb); 1206 status = ocfs2_dlm_init(osb);
1178 if (status < 0) { 1207 if (status < 0) {
1179 mlog_errno(status); 1208 mlog_errno(status);
@@ -1224,18 +1253,9 @@ leave:
1224 return status; 1253 return status;
1225} 1254}
1226 1255
1227/* we can't grab the goofy sem lock from inside wait_event, so we use
1228 * memory barriers to make sure that we'll see the null task before
1229 * being woken up */
1230static int ocfs2_recovery_thread_running(struct ocfs2_super *osb)
1231{
1232 mb();
1233 return osb->recovery_thread_task != NULL;
1234}
1235
1236static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) 1256static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
1237{ 1257{
1238 int tmp; 1258 int tmp, hangup_needed = 0;
1239 struct ocfs2_super *osb = NULL; 1259 struct ocfs2_super *osb = NULL;
1240 char nodestr[8]; 1260 char nodestr[8];
1241 1261
@@ -1249,25 +1269,16 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
1249 1269
1250 ocfs2_truncate_log_shutdown(osb); 1270 ocfs2_truncate_log_shutdown(osb);
1251 1271
1252 /* disable any new recovery threads and wait for any currently 1272 /* This will disable recovery and flush any recovery work. */
1253 * running ones to exit. Do this before setting the vol_state. */ 1273 ocfs2_recovery_exit(osb);
1254 mutex_lock(&osb->recovery_lock);
1255 osb->disable_recovery = 1;
1256 mutex_unlock(&osb->recovery_lock);
1257 wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb));
1258
1259 /* At this point, we know that no more recovery threads can be
1260 * launched, so wait for any recovery completion work to
1261 * complete. */
1262 flush_workqueue(ocfs2_wq);
1263 1274
1264 ocfs2_journal_shutdown(osb); 1275 ocfs2_journal_shutdown(osb);
1265 1276
1266 ocfs2_sync_blockdev(sb); 1277 ocfs2_sync_blockdev(sb);
1267 1278
1268 /* No dlm means we've failed during mount, so skip all the 1279 /* No cluster connection means we've failed during mount, so skip
1269 * steps which depended on that to complete. */ 1280 * all the steps which depended on that to complete. */
1270 if (osb->dlm) { 1281 if (osb->cconn) {
1271 tmp = ocfs2_super_lock(osb, 1); 1282 tmp = ocfs2_super_lock(osb, 1);
1272 if (tmp < 0) { 1283 if (tmp < 0) {
1273 mlog_errno(tmp); 1284 mlog_errno(tmp);
@@ -1278,25 +1289,34 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
1278 if (osb->slot_num != OCFS2_INVALID_SLOT) 1289 if (osb->slot_num != OCFS2_INVALID_SLOT)
1279 ocfs2_put_slot(osb); 1290 ocfs2_put_slot(osb);
1280 1291
1281 if (osb->dlm) 1292 if (osb->cconn)
1282 ocfs2_super_unlock(osb, 1); 1293 ocfs2_super_unlock(osb, 1);
1283 1294
1284 ocfs2_release_system_inodes(osb); 1295 ocfs2_release_system_inodes(osb);
1285 1296
1286 if (osb->dlm) 1297 /*
1287 ocfs2_dlm_shutdown(osb); 1298 * If we're dismounting due to mount error, mount.ocfs2 will clean
1299 * up heartbeat. If we're a local mount, there is no heartbeat.
1300 * If we failed before we got a uuid_str yet, we can't stop
1301 * heartbeat. Otherwise, do it.
1302 */
1303 if (!mnt_err && !ocfs2_mount_local(osb) && osb->uuid_str)
1304 hangup_needed = 1;
1305
1306 if (osb->cconn)
1307 ocfs2_dlm_shutdown(osb, hangup_needed);
1288 1308
1289 debugfs_remove(osb->osb_debug_root); 1309 debugfs_remove(osb->osb_debug_root);
1290 1310
1291 if (!mnt_err) 1311 if (hangup_needed)
1292 ocfs2_stop_heartbeat(osb); 1312 ocfs2_cluster_hangup(osb->uuid_str, strlen(osb->uuid_str));
1293 1313
1294 atomic_set(&osb->vol_state, VOLUME_DISMOUNTED); 1314 atomic_set(&osb->vol_state, VOLUME_DISMOUNTED);
1295 1315
1296 if (ocfs2_mount_local(osb)) 1316 if (ocfs2_mount_local(osb))
1297 snprintf(nodestr, sizeof(nodestr), "local"); 1317 snprintf(nodestr, sizeof(nodestr), "local");
1298 else 1318 else
1299 snprintf(nodestr, sizeof(nodestr), "%d", osb->node_num); 1319 snprintf(nodestr, sizeof(nodestr), "%u", osb->node_num);
1300 1320
1301 printk(KERN_INFO "ocfs2: Unmounting device (%s) on (node %s)\n", 1321 printk(KERN_INFO "ocfs2: Unmounting device (%s) on (node %s)\n",
1302 osb->dev_str, nodestr); 1322 osb->dev_str, nodestr);
@@ -1355,7 +1375,6 @@ static int ocfs2_initialize_super(struct super_block *sb,
1355 sb->s_fs_info = osb; 1375 sb->s_fs_info = osb;
1356 sb->s_op = &ocfs2_sops; 1376 sb->s_op = &ocfs2_sops;
1357 sb->s_export_op = &ocfs2_export_ops; 1377 sb->s_export_op = &ocfs2_export_ops;
1358 osb->osb_locking_proto = ocfs2_locking_protocol;
1359 sb->s_time_gran = 1; 1378 sb->s_time_gran = 1;
1360 sb->s_flags |= MS_NOATIME; 1379 sb->s_flags |= MS_NOATIME;
1361 /* this is needed to support O_LARGEFILE */ 1380 /* this is needed to support O_LARGEFILE */
@@ -1368,7 +1387,6 @@ static int ocfs2_initialize_super(struct super_block *sb,
1368 osb->s_sectsize_bits = blksize_bits(sector_size); 1387 osb->s_sectsize_bits = blksize_bits(sector_size);
1369 BUG_ON(!osb->s_sectsize_bits); 1388 BUG_ON(!osb->s_sectsize_bits);
1370 1389
1371 init_waitqueue_head(&osb->recovery_event);
1372 spin_lock_init(&osb->dc_task_lock); 1390 spin_lock_init(&osb->dc_task_lock);
1373 init_waitqueue_head(&osb->dc_event); 1391 init_waitqueue_head(&osb->dc_event);
1374 osb->dc_work_sequence = 0; 1392 osb->dc_work_sequence = 0;
@@ -1376,6 +1394,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
1376 INIT_LIST_HEAD(&osb->blocked_lock_list); 1394 INIT_LIST_HEAD(&osb->blocked_lock_list);
1377 osb->blocked_lock_count = 0; 1395 osb->blocked_lock_count = 0;
1378 spin_lock_init(&osb->osb_lock); 1396 spin_lock_init(&osb->osb_lock);
1397 ocfs2_init_inode_steal_slot(osb);
1379 1398
1380 atomic_set(&osb->alloc_stats.moves, 0); 1399 atomic_set(&osb->alloc_stats.moves, 0);
1381 atomic_set(&osb->alloc_stats.local_data, 0); 1400 atomic_set(&osb->alloc_stats.local_data, 0);
@@ -1388,24 +1407,23 @@ static int ocfs2_initialize_super(struct super_block *sb,
1388 snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u", 1407 snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u",
1389 MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); 1408 MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));
1390 1409
1391 mutex_init(&osb->recovery_lock); 1410 status = ocfs2_recovery_init(osb);
1392 1411 if (status) {
1393 osb->disable_recovery = 0; 1412 mlog(ML_ERROR, "Unable to initialize recovery state\n");
1394 osb->recovery_thread_task = NULL; 1413 mlog_errno(status);
1414 goto bail;
1415 }
1395 1416
1396 init_waitqueue_head(&osb->checkpoint_event); 1417 init_waitqueue_head(&osb->checkpoint_event);
1397 atomic_set(&osb->needs_checkpoint, 0); 1418 atomic_set(&osb->needs_checkpoint, 0);
1398 1419
1399 osb->s_atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM; 1420 osb->s_atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM;
1400 1421
1401 osb->node_num = O2NM_INVALID_NODE_NUM;
1402 osb->slot_num = OCFS2_INVALID_SLOT; 1422 osb->slot_num = OCFS2_INVALID_SLOT;
1403 1423
1404 osb->local_alloc_state = OCFS2_LA_UNUSED; 1424 osb->local_alloc_state = OCFS2_LA_UNUSED;
1405 osb->local_alloc_bh = NULL; 1425 osb->local_alloc_bh = NULL;
1406 1426
1407 ocfs2_setup_hb_callbacks(osb);
1408
1409 init_waitqueue_head(&osb->osb_mount_event); 1427 init_waitqueue_head(&osb->osb_mount_event);
1410 1428
1411 osb->vol_label = kmalloc(OCFS2_MAX_VOL_LABEL_LEN, GFP_KERNEL); 1429 osb->vol_label = kmalloc(OCFS2_MAX_VOL_LABEL_LEN, GFP_KERNEL);
@@ -1455,6 +1473,25 @@ static int ocfs2_initialize_super(struct super_block *sb,
1455 goto bail; 1473 goto bail;
1456 } 1474 }
1457 1475
1476 if (ocfs2_userspace_stack(osb)) {
1477 memcpy(osb->osb_cluster_stack,
1478 OCFS2_RAW_SB(di)->s_cluster_info.ci_stack,
1479 OCFS2_STACK_LABEL_LEN);
1480 osb->osb_cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0';
1481 if (strlen(osb->osb_cluster_stack) != OCFS2_STACK_LABEL_LEN) {
1482 mlog(ML_ERROR,
1483 "couldn't mount because of an invalid "
1484 "cluster stack label (%s) \n",
1485 osb->osb_cluster_stack);
1486 status = -EINVAL;
1487 goto bail;
1488 }
1489 } else {
1490 /* The empty string is identical with classic tools that
1491 * don't know about s_cluster_info. */
1492 osb->osb_cluster_stack[0] = '\0';
1493 }
1494
1458 get_random_bytes(&osb->s_next_generation, sizeof(u32)); 1495 get_random_bytes(&osb->s_next_generation, sizeof(u32));
1459 1496
1460 /* FIXME 1497 /* FIXME
@@ -1724,8 +1761,7 @@ static void ocfs2_delete_osb(struct ocfs2_super *osb)
1724 1761
1725 /* This function assumes that the caller has the main osb resource */ 1762 /* This function assumes that the caller has the main osb resource */
1726 1763
1727 if (osb->slot_info) 1764 ocfs2_free_slot_info(osb);
1728 ocfs2_free_slot_info(osb->slot_info);
1729 1765
1730 kfree(osb->osb_orphan_wipes); 1766 kfree(osb->osb_orphan_wipes);
1731 /* FIXME 1767 /* FIXME
diff --git a/fs/open.c b/fs/open.c
index 54198538b67e..b70e7666bb2c 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -244,21 +244,21 @@ static long do_sys_truncate(const char __user * path, loff_t length)
244 if (!S_ISREG(inode->i_mode)) 244 if (!S_ISREG(inode->i_mode))
245 goto dput_and_out; 245 goto dput_and_out;
246 246
247 error = vfs_permission(&nd, MAY_WRITE); 247 error = mnt_want_write(nd.path.mnt);
248 if (error) 248 if (error)
249 goto dput_and_out; 249 goto dput_and_out;
250 250
251 error = -EROFS; 251 error = vfs_permission(&nd, MAY_WRITE);
252 if (IS_RDONLY(inode)) 252 if (error)
253 goto dput_and_out; 253 goto mnt_drop_write_and_out;
254 254
255 error = -EPERM; 255 error = -EPERM;
256 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 256 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
257 goto dput_and_out; 257 goto mnt_drop_write_and_out;
258 258
259 error = get_write_access(inode); 259 error = get_write_access(inode);
260 if (error) 260 if (error)
261 goto dput_and_out; 261 goto mnt_drop_write_and_out;
262 262
263 /* 263 /*
264 * Make sure that there are no leases. get_write_access() protects 264 * Make sure that there are no leases. get_write_access() protects
@@ -276,6 +276,8 @@ static long do_sys_truncate(const char __user * path, loff_t length)
276 276
277put_write_and_out: 277put_write_and_out:
278 put_write_access(inode); 278 put_write_access(inode);
279mnt_drop_write_and_out:
280 mnt_drop_write(nd.path.mnt);
279dput_and_out: 281dput_and_out:
280 path_put(&nd.path); 282 path_put(&nd.path);
281out: 283out:
@@ -335,7 +337,7 @@ asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length)
335{ 337{
336 long ret = do_sys_ftruncate(fd, length, 1); 338 long ret = do_sys_ftruncate(fd, length, 1);
337 /* avoid REGPARM breakage on x86: */ 339 /* avoid REGPARM breakage on x86: */
338 prevent_tail_call(ret); 340 asmlinkage_protect(2, ret, fd, length);
339 return ret; 341 return ret;
340} 342}
341 343
@@ -350,7 +352,7 @@ asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length)
350{ 352{
351 long ret = do_sys_ftruncate(fd, length, 0); 353 long ret = do_sys_ftruncate(fd, length, 0);
352 /* avoid REGPARM breakage on x86: */ 354 /* avoid REGPARM breakage on x86: */
353 prevent_tail_call(ret); 355 asmlinkage_protect(2, ret, fd, length);
354 return ret; 356 return ret;
355} 357}
356#endif 358#endif
@@ -457,8 +459,17 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
457 if(res || !(mode & S_IWOTH) || 459 if(res || !(mode & S_IWOTH) ||
458 special_file(nd.path.dentry->d_inode->i_mode)) 460 special_file(nd.path.dentry->d_inode->i_mode))
459 goto out_path_release; 461 goto out_path_release;
460 462 /*
461 if(IS_RDONLY(nd.path.dentry->d_inode)) 463 * This is a rare case where using __mnt_is_readonly()
464 * is OK without a mnt_want/drop_write() pair. Since
465 * no actual write to the fs is performed here, we do
466 * not need to telegraph to that to anyone.
467 *
468 * By doing this, we accept that this access is
469 * inherently racy and know that the fs may change
470 * state before we even see this result.
471 */
472 if (__mnt_is_readonly(nd.path.mnt))
462 res = -EROFS; 473 res = -EROFS;
463 474
464out_path_release: 475out_path_release:
@@ -567,12 +578,12 @@ asmlinkage long sys_fchmod(unsigned int fd, mode_t mode)
567 578
568 audit_inode(NULL, dentry); 579 audit_inode(NULL, dentry);
569 580
570 err = -EROFS; 581 err = mnt_want_write(file->f_path.mnt);
571 if (IS_RDONLY(inode)) 582 if (err)
572 goto out_putf; 583 goto out_putf;
573 err = -EPERM; 584 err = -EPERM;
574 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 585 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
575 goto out_putf; 586 goto out_drop_write;
576 mutex_lock(&inode->i_mutex); 587 mutex_lock(&inode->i_mutex);
577 if (mode == (mode_t) -1) 588 if (mode == (mode_t) -1)
578 mode = inode->i_mode; 589 mode = inode->i_mode;
@@ -581,6 +592,8 @@ asmlinkage long sys_fchmod(unsigned int fd, mode_t mode)
581 err = notify_change(dentry, &newattrs); 592 err = notify_change(dentry, &newattrs);
582 mutex_unlock(&inode->i_mutex); 593 mutex_unlock(&inode->i_mutex);
583 594
595out_drop_write:
596 mnt_drop_write(file->f_path.mnt);
584out_putf: 597out_putf:
585 fput(file); 598 fput(file);
586out: 599out:
@@ -600,13 +613,13 @@ asmlinkage long sys_fchmodat(int dfd, const char __user *filename,
600 goto out; 613 goto out;
601 inode = nd.path.dentry->d_inode; 614 inode = nd.path.dentry->d_inode;
602 615
603 error = -EROFS; 616 error = mnt_want_write(nd.path.mnt);
604 if (IS_RDONLY(inode)) 617 if (error)
605 goto dput_and_out; 618 goto dput_and_out;
606 619
607 error = -EPERM; 620 error = -EPERM;
608 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 621 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
609 goto dput_and_out; 622 goto out_drop_write;
610 623
611 mutex_lock(&inode->i_mutex); 624 mutex_lock(&inode->i_mutex);
612 if (mode == (mode_t) -1) 625 if (mode == (mode_t) -1)
@@ -616,6 +629,8 @@ asmlinkage long sys_fchmodat(int dfd, const char __user *filename,
616 error = notify_change(nd.path.dentry, &newattrs); 629 error = notify_change(nd.path.dentry, &newattrs);
617 mutex_unlock(&inode->i_mutex); 630 mutex_unlock(&inode->i_mutex);
618 631
632out_drop_write:
633 mnt_drop_write(nd.path.mnt);
619dput_and_out: 634dput_and_out:
620 path_put(&nd.path); 635 path_put(&nd.path);
621out: 636out:
@@ -638,9 +653,6 @@ static int chown_common(struct dentry * dentry, uid_t user, gid_t group)
638 printk(KERN_ERR "chown_common: NULL inode\n"); 653 printk(KERN_ERR "chown_common: NULL inode\n");
639 goto out; 654 goto out;
640 } 655 }
641 error = -EROFS;
642 if (IS_RDONLY(inode))
643 goto out;
644 error = -EPERM; 656 error = -EPERM;
645 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 657 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
646 goto out; 658 goto out;
@@ -671,7 +683,12 @@ asmlinkage long sys_chown(const char __user * filename, uid_t user, gid_t group)
671 error = user_path_walk(filename, &nd); 683 error = user_path_walk(filename, &nd);
672 if (error) 684 if (error)
673 goto out; 685 goto out;
686 error = mnt_want_write(nd.path.mnt);
687 if (error)
688 goto out_release;
674 error = chown_common(nd.path.dentry, user, group); 689 error = chown_common(nd.path.dentry, user, group);
690 mnt_drop_write(nd.path.mnt);
691out_release:
675 path_put(&nd.path); 692 path_put(&nd.path);
676out: 693out:
677 return error; 694 return error;
@@ -691,7 +708,12 @@ asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user,
691 error = __user_walk_fd(dfd, filename, follow, &nd); 708 error = __user_walk_fd(dfd, filename, follow, &nd);
692 if (error) 709 if (error)
693 goto out; 710 goto out;
711 error = mnt_want_write(nd.path.mnt);
712 if (error)
713 goto out_release;
694 error = chown_common(nd.path.dentry, user, group); 714 error = chown_common(nd.path.dentry, user, group);
715 mnt_drop_write(nd.path.mnt);
716out_release:
695 path_put(&nd.path); 717 path_put(&nd.path);
696out: 718out:
697 return error; 719 return error;
@@ -705,7 +727,12 @@ asmlinkage long sys_lchown(const char __user * filename, uid_t user, gid_t group
705 error = user_path_walk_link(filename, &nd); 727 error = user_path_walk_link(filename, &nd);
706 if (error) 728 if (error)
707 goto out; 729 goto out;
730 error = mnt_want_write(nd.path.mnt);
731 if (error)
732 goto out_release;
708 error = chown_common(nd.path.dentry, user, group); 733 error = chown_common(nd.path.dentry, user, group);
734 mnt_drop_write(nd.path.mnt);
735out_release:
709 path_put(&nd.path); 736 path_put(&nd.path);
710out: 737out:
711 return error; 738 return error;
@@ -722,14 +749,48 @@ asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group)
722 if (!file) 749 if (!file)
723 goto out; 750 goto out;
724 751
752 error = mnt_want_write(file->f_path.mnt);
753 if (error)
754 goto out_fput;
725 dentry = file->f_path.dentry; 755 dentry = file->f_path.dentry;
726 audit_inode(NULL, dentry); 756 audit_inode(NULL, dentry);
727 error = chown_common(dentry, user, group); 757 error = chown_common(dentry, user, group);
758 mnt_drop_write(file->f_path.mnt);
759out_fput:
728 fput(file); 760 fput(file);
729out: 761out:
730 return error; 762 return error;
731} 763}
732 764
765/*
766 * You have to be very careful that these write
767 * counts get cleaned up in error cases and
768 * upon __fput(). This should probably never
769 * be called outside of __dentry_open().
770 */
771static inline int __get_file_write_access(struct inode *inode,
772 struct vfsmount *mnt)
773{
774 int error;
775 error = get_write_access(inode);
776 if (error)
777 return error;
778 /*
779 * Do not take mount writer counts on
780 * special files since no writes to
781 * the mount itself will occur.
782 */
783 if (!special_file(inode->i_mode)) {
784 /*
785 * Balanced in __fput()
786 */
787 error = mnt_want_write(mnt);
788 if (error)
789 put_write_access(inode);
790 }
791 return error;
792}
793
733static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, 794static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
734 int flags, struct file *f, 795 int flags, struct file *f,
735 int (*open)(struct inode *, struct file *)) 796 int (*open)(struct inode *, struct file *))
@@ -742,9 +803,11 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
742 FMODE_PREAD | FMODE_PWRITE; 803 FMODE_PREAD | FMODE_PWRITE;
743 inode = dentry->d_inode; 804 inode = dentry->d_inode;
744 if (f->f_mode & FMODE_WRITE) { 805 if (f->f_mode & FMODE_WRITE) {
745 error = get_write_access(inode); 806 error = __get_file_write_access(inode, mnt);
746 if (error) 807 if (error)
747 goto cleanup_file; 808 goto cleanup_file;
809 if (!special_file(inode->i_mode))
810 file_take_write(f);
748 } 811 }
749 812
750 f->f_mapping = inode->i_mapping; 813 f->f_mapping = inode->i_mapping;
@@ -784,8 +847,19 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
784 847
785cleanup_all: 848cleanup_all:
786 fops_put(f->f_op); 849 fops_put(f->f_op);
787 if (f->f_mode & FMODE_WRITE) 850 if (f->f_mode & FMODE_WRITE) {
788 put_write_access(inode); 851 put_write_access(inode);
852 if (!special_file(inode->i_mode)) {
853 /*
854 * We don't consider this a real
855 * mnt_want/drop_write() pair
856 * because it all happenend right
857 * here, so just reset the state.
858 */
859 file_reset_write(f);
860 mnt_drop_write(mnt);
861 }
862 }
789 file_kill(f); 863 file_kill(f);
790 f->f_path.dentry = NULL; 864 f->f_path.dentry = NULL;
791 f->f_path.mnt = NULL; 865 f->f_path.mnt = NULL;
@@ -796,43 +870,6 @@ cleanup_file:
796 return ERR_PTR(error); 870 return ERR_PTR(error);
797} 871}
798 872
799/*
800 * Note that while the flag value (low two bits) for sys_open means:
801 * 00 - read-only
802 * 01 - write-only
803 * 10 - read-write
804 * 11 - special
805 * it is changed into
806 * 00 - no permissions needed
807 * 01 - read-permission
808 * 10 - write-permission
809 * 11 - read-write
810 * for the internal routines (ie open_namei()/follow_link() etc). 00 is
811 * used by symlinks.
812 */
813static struct file *do_filp_open(int dfd, const char *filename, int flags,
814 int mode)
815{
816 int namei_flags, error;
817 struct nameidata nd;
818
819 namei_flags = flags;
820 if ((namei_flags+1) & O_ACCMODE)
821 namei_flags++;
822
823 error = open_namei(dfd, filename, namei_flags, mode, &nd);
824 if (!error)
825 return nameidata_to_filp(&nd, flags);
826
827 return ERR_PTR(error);
828}
829
830struct file *filp_open(const char *filename, int flags, int mode)
831{
832 return do_filp_open(AT_FDCWD, filename, flags, mode);
833}
834EXPORT_SYMBOL(filp_open);
835
836/** 873/**
837 * lookup_instantiate_filp - instantiates the open intent filp 874 * lookup_instantiate_filp - instantiates the open intent filp
838 * @nd: pointer to nameidata 875 * @nd: pointer to nameidata
@@ -903,6 +940,18 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
903 int error; 940 int error;
904 struct file *f; 941 struct file *f;
905 942
943 /*
944 * We must always pass in a valid mount pointer. Historically
945 * callers got away with not passing it, but we must enforce this at
946 * the earliest possible point now to avoid strange problems deep in the
947 * filesystem stack.
948 */
949 if (!mnt) {
950 printk(KERN_WARNING "%s called with NULL vfsmount\n", __func__);
951 dump_stack();
952 return ERR_PTR(-EINVAL);
953 }
954
906 error = -ENFILE; 955 error = -ENFILE;
907 f = get_empty_filp(); 956 f = get_empty_filp();
908 if (f == NULL) { 957 if (f == NULL) {
@@ -1055,7 +1104,7 @@ asmlinkage long sys_open(const char __user *filename, int flags, int mode)
1055 1104
1056 ret = do_sys_open(AT_FDCWD, filename, flags, mode); 1105 ret = do_sys_open(AT_FDCWD, filename, flags, mode);
1057 /* avoid REGPARM breakage on x86: */ 1106 /* avoid REGPARM breakage on x86: */
1058 prevent_tail_call(ret); 1107 asmlinkage_protect(3, ret, filename, flags, mode);
1059 return ret; 1108 return ret;
1060} 1109}
1061 1110
@@ -1069,7 +1118,7 @@ asmlinkage long sys_openat(int dfd, const char __user *filename, int flags,
1069 1118
1070 ret = do_sys_open(dfd, filename, flags, mode); 1119 ret = do_sys_open(dfd, filename, flags, mode);
1071 /* avoid REGPARM breakage on x86: */ 1120 /* avoid REGPARM breakage on x86: */
1072 prevent_tail_call(ret); 1121 asmlinkage_protect(4, ret, dfd, filename, flags, mode);
1073 return ret; 1122 return ret;
1074} 1123}
1075 1124
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 03f808c5b79d..6149e4b58c88 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -473,6 +473,10 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
473 return 0; 473 return 0;
474 if (IS_ERR(state)) /* I/O error reading the partition table */ 474 if (IS_ERR(state)) /* I/O error reading the partition table */
475 return -EIO; 475 return -EIO;
476
477 /* tell userspace that the media / partition table may have changed */
478 kobject_uevent(&disk->dev.kobj, KOBJ_CHANGE);
479
476 for (p = 1; p < state->limit; p++) { 480 for (p = 1; p < state->limit; p++) {
477 sector_t size = state->parts[p].size; 481 sector_t size = state->parts[p].size;
478 sector_t from = state->parts[p].from; 482 sector_t from = state->parts[p].from;
diff --git a/fs/pipe.c b/fs/pipe.c
index 3c185b6527bc..8be381bbcb54 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -957,13 +957,10 @@ struct file *create_write_pipe(void)
957 struct dentry *dentry; 957 struct dentry *dentry;
958 struct qstr name = { .name = "" }; 958 struct qstr name = { .name = "" };
959 959
960 f = get_empty_filp();
961 if (!f)
962 return ERR_PTR(-ENFILE);
963 err = -ENFILE; 960 err = -ENFILE;
964 inode = get_pipe_inode(); 961 inode = get_pipe_inode();
965 if (!inode) 962 if (!inode)
966 goto err_file; 963 goto err;
967 964
968 err = -ENOMEM; 965 err = -ENOMEM;
969 dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &name); 966 dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &name);
@@ -978,22 +975,24 @@ struct file *create_write_pipe(void)
978 */ 975 */
979 dentry->d_flags &= ~DCACHE_UNHASHED; 976 dentry->d_flags &= ~DCACHE_UNHASHED;
980 d_instantiate(dentry, inode); 977 d_instantiate(dentry, inode);
981 f->f_path.mnt = mntget(pipe_mnt); 978
982 f->f_path.dentry = dentry; 979 err = -ENFILE;
980 f = alloc_file(pipe_mnt, dentry, FMODE_WRITE, &write_pipe_fops);
981 if (!f)
982 goto err_dentry;
983 f->f_mapping = inode->i_mapping; 983 f->f_mapping = inode->i_mapping;
984 984
985 f->f_flags = O_WRONLY; 985 f->f_flags = O_WRONLY;
986 f->f_op = &write_pipe_fops;
987 f->f_mode = FMODE_WRITE;
988 f->f_version = 0; 986 f->f_version = 0;
989 987
990 return f; 988 return f;
991 989
990 err_dentry:
991 dput(dentry);
992 err_inode: 992 err_inode:
993 free_pipe_info(inode); 993 free_pipe_info(inode);
994 iput(inode); 994 iput(inode);
995 err_file: 995 err:
996 put_filp(f);
997 return ERR_PTR(err); 996 return ERR_PTR(err);
998} 997}
999 998
diff --git a/fs/pnode.c b/fs/pnode.c
index 05ba692bc540..1d8f5447f3f7 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -225,7 +225,7 @@ out:
225 */ 225 */
226static inline int do_refcount_check(struct vfsmount *mnt, int count) 226static inline int do_refcount_check(struct vfsmount *mnt, int count)
227{ 227{
228 int mycount = atomic_read(&mnt->mnt_count); 228 int mycount = atomic_read(&mnt->mnt_count) - mnt->mnt_ghosts;
229 return (mycount > count); 229 return (mycount > count);
230} 230}
231 231
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 88f8edf18258..81d7d145292a 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -314,9 +314,12 @@ static int proc_pid_schedstat(struct task_struct *task, char *buffer)
314static int lstats_show_proc(struct seq_file *m, void *v) 314static int lstats_show_proc(struct seq_file *m, void *v)
315{ 315{
316 int i; 316 int i;
317 struct task_struct *task = m->private; 317 struct inode *inode = m->private;
318 seq_puts(m, "Latency Top version : v0.1\n"); 318 struct task_struct *task = get_proc_task(inode);
319 319
320 if (!task)
321 return -ESRCH;
322 seq_puts(m, "Latency Top version : v0.1\n");
320 for (i = 0; i < 32; i++) { 323 for (i = 0; i < 32; i++) {
321 if (task->latency_record[i].backtrace[0]) { 324 if (task->latency_record[i].backtrace[0]) {
322 int q; 325 int q;
@@ -341,32 +344,24 @@ static int lstats_show_proc(struct seq_file *m, void *v)
341 } 344 }
342 345
343 } 346 }
347 put_task_struct(task);
344 return 0; 348 return 0;
345} 349}
346 350
347static int lstats_open(struct inode *inode, struct file *file) 351static int lstats_open(struct inode *inode, struct file *file)
348{ 352{
349 int ret; 353 return single_open(file, lstats_show_proc, inode);
350 struct seq_file *m;
351 struct task_struct *task = get_proc_task(inode);
352
353 ret = single_open(file, lstats_show_proc, NULL);
354 if (!ret) {
355 m = file->private_data;
356 m->private = task;
357 }
358 return ret;
359} 354}
360 355
361static ssize_t lstats_write(struct file *file, const char __user *buf, 356static ssize_t lstats_write(struct file *file, const char __user *buf,
362 size_t count, loff_t *offs) 357 size_t count, loff_t *offs)
363{ 358{
364 struct seq_file *m; 359 struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
365 struct task_struct *task;
366 360
367 m = file->private_data; 361 if (!task)
368 task = m->private; 362 return -ESRCH;
369 clear_all_latency_tracing(task); 363 clear_all_latency_tracing(task);
364 put_task_struct(task);
370 365
371 return count; 366 return count;
372} 367}
@@ -416,6 +411,7 @@ static const struct limit_names lnames[RLIM_NLIMITS] = {
416 [RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes"}, 411 [RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes"},
417 [RLIMIT_NICE] = {"Max nice priority", NULL}, 412 [RLIMIT_NICE] = {"Max nice priority", NULL},
418 [RLIMIT_RTPRIO] = {"Max realtime priority", NULL}, 413 [RLIMIT_RTPRIO] = {"Max realtime priority", NULL},
414 [RLIMIT_RTTIME] = {"Max realtime timeout", "us"},
419}; 415};
420 416
421/* Display limits for a process */ 417/* Display limits for a process */
@@ -1040,6 +1036,26 @@ static const struct file_operations proc_loginuid_operations = {
1040 .read = proc_loginuid_read, 1036 .read = proc_loginuid_read,
1041 .write = proc_loginuid_write, 1037 .write = proc_loginuid_write,
1042}; 1038};
1039
1040static ssize_t proc_sessionid_read(struct file * file, char __user * buf,
1041 size_t count, loff_t *ppos)
1042{
1043 struct inode * inode = file->f_path.dentry->d_inode;
1044 struct task_struct *task = get_proc_task(inode);
1045 ssize_t length;
1046 char tmpbuf[TMPBUFLEN];
1047
1048 if (!task)
1049 return -ESRCH;
1050 length = scnprintf(tmpbuf, TMPBUFLEN, "%u",
1051 audit_get_sessionid(task));
1052 put_task_struct(task);
1053 return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
1054}
1055
1056static const struct file_operations proc_sessionid_operations = {
1057 .read = proc_sessionid_read,
1058};
1043#endif 1059#endif
1044 1060
1045#ifdef CONFIG_FAULT_INJECTION 1061#ifdef CONFIG_FAULT_INJECTION
@@ -2273,6 +2289,9 @@ static const struct pid_entry tgid_base_stuff[] = {
2273 DIR("task", S_IRUGO|S_IXUGO, task), 2289 DIR("task", S_IRUGO|S_IXUGO, task),
2274 DIR("fd", S_IRUSR|S_IXUSR, fd), 2290 DIR("fd", S_IRUSR|S_IXUSR, fd),
2275 DIR("fdinfo", S_IRUSR|S_IXUSR, fdinfo), 2291 DIR("fdinfo", S_IRUSR|S_IXUSR, fdinfo),
2292#ifdef CONFIG_NET
2293 DIR("net", S_IRUGO|S_IXUGO, net),
2294#endif
2276 REG("environ", S_IRUSR, environ), 2295 REG("environ", S_IRUSR, environ),
2277 INF("auxv", S_IRUSR, pid_auxv), 2296 INF("auxv", S_IRUSR, pid_auxv),
2278 ONE("status", S_IRUGO, pid_status), 2297 ONE("status", S_IRUGO, pid_status),
@@ -2320,6 +2339,7 @@ static const struct pid_entry tgid_base_stuff[] = {
2320 REG("oom_adj", S_IRUGO|S_IWUSR, oom_adjust), 2339 REG("oom_adj", S_IRUGO|S_IWUSR, oom_adjust),
2321#ifdef CONFIG_AUDITSYSCALL 2340#ifdef CONFIG_AUDITSYSCALL
2322 REG("loginuid", S_IWUSR|S_IRUGO, loginuid), 2341 REG("loginuid", S_IWUSR|S_IRUGO, loginuid),
2342 REG("sessionid", S_IRUSR, sessionid),
2323#endif 2343#endif
2324#ifdef CONFIG_FAULT_INJECTION 2344#ifdef CONFIG_FAULT_INJECTION
2325 REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject), 2345 REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject),
@@ -2650,6 +2670,7 @@ static const struct pid_entry tid_base_stuff[] = {
2650 REG("oom_adj", S_IRUGO|S_IWUSR, oom_adjust), 2670 REG("oom_adj", S_IRUGO|S_IWUSR, oom_adjust),
2651#ifdef CONFIG_AUDITSYSCALL 2671#ifdef CONFIG_AUDITSYSCALL
2652 REG("loginuid", S_IWUSR|S_IRUGO, loginuid), 2672 REG("loginuid", S_IWUSR|S_IRUGO, loginuid),
2673 REG("sessionid", S_IRUSR, sessionid),
2653#endif 2674#endif
2654#ifdef CONFIG_FAULT_INJECTION 2675#ifdef CONFIG_FAULT_INJECTION
2655 REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject), 2676 REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject),
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 68971e66cd41..a36ad3c75cf4 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -377,15 +377,14 @@ static struct dentry_operations proc_dentry_operations =
377 * Don't create negative dentries here, return -ENOENT by hand 377 * Don't create negative dentries here, return -ENOENT by hand
378 * instead. 378 * instead.
379 */ 379 */
380struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) 380struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir,
381 struct dentry *dentry)
381{ 382{
382 struct inode *inode = NULL; 383 struct inode *inode = NULL;
383 struct proc_dir_entry * de;
384 int error = -ENOENT; 384 int error = -ENOENT;
385 385
386 lock_kernel(); 386 lock_kernel();
387 spin_lock(&proc_subdir_lock); 387 spin_lock(&proc_subdir_lock);
388 de = PDE(dir);
389 if (de) { 388 if (de) {
390 for (de = de->subdir; de ; de = de->next) { 389 for (de = de->subdir; de ; de = de->next) {
391 if (de->namelen != dentry->d_name.len) 390 if (de->namelen != dentry->d_name.len)
@@ -393,8 +392,6 @@ struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nam
393 if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { 392 if (!memcmp(dentry->d_name.name, de->name, de->namelen)) {
394 unsigned int ino; 393 unsigned int ino;
395 394
396 if (de->shadow_proc)
397 de = de->shadow_proc(current, de);
398 ino = de->low_ino; 395 ino = de->low_ino;
399 de_get(de); 396 de_get(de);
400 spin_unlock(&proc_subdir_lock); 397 spin_unlock(&proc_subdir_lock);
@@ -417,6 +414,12 @@ out_unlock:
417 return ERR_PTR(error); 414 return ERR_PTR(error);
418} 415}
419 416
417struct dentry *proc_lookup(struct inode *dir, struct dentry *dentry,
418 struct nameidata *nd)
419{
420 return proc_lookup_de(PDE(dir), dir, dentry);
421}
422
420/* 423/*
421 * This returns non-zero if at EOF, so that the /proc 424 * This returns non-zero if at EOF, so that the /proc
422 * root directory can use this and check if it should 425 * root directory can use this and check if it should
@@ -426,10 +429,9 @@ out_unlock:
426 * value of the readdir() call, as long as it's non-negative 429 * value of the readdir() call, as long as it's non-negative
427 * for success.. 430 * for success..
428 */ 431 */
429int proc_readdir(struct file * filp, 432int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
430 void * dirent, filldir_t filldir) 433 filldir_t filldir)
431{ 434{
432 struct proc_dir_entry * de;
433 unsigned int ino; 435 unsigned int ino;
434 int i; 436 int i;
435 struct inode *inode = filp->f_path.dentry->d_inode; 437 struct inode *inode = filp->f_path.dentry->d_inode;
@@ -438,7 +440,6 @@ int proc_readdir(struct file * filp,
438 lock_kernel(); 440 lock_kernel();
439 441
440 ino = inode->i_ino; 442 ino = inode->i_ino;
441 de = PDE(inode);
442 if (!de) { 443 if (!de) {
443 ret = -EINVAL; 444 ret = -EINVAL;
444 goto out; 445 goto out;
@@ -499,6 +500,13 @@ out: unlock_kernel();
499 return ret; 500 return ret;
500} 501}
501 502
503int proc_readdir(struct file *filp, void *dirent, filldir_t filldir)
504{
505 struct inode *inode = filp->f_path.dentry->d_inode;
506
507 return proc_readdir_de(PDE(inode), filp, dirent, filldir);
508}
509
502/* 510/*
503 * These are the generic /proc directory operations. They 511 * These are the generic /proc directory operations. They
504 * use the in-memory "struct proc_dir_entry" tree to parse 512 * use the in-memory "struct proc_dir_entry" tree to parse
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 1c81c8f1aeed..bc72f5c8c47d 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -64,6 +64,8 @@ extern const struct file_operations proc_numa_maps_operations;
64extern const struct file_operations proc_smaps_operations; 64extern const struct file_operations proc_smaps_operations;
65extern const struct file_operations proc_clear_refs_operations; 65extern const struct file_operations proc_clear_refs_operations;
66extern const struct file_operations proc_pagemap_operations; 66extern const struct file_operations proc_pagemap_operations;
67extern const struct file_operations proc_net_operations;
68extern const struct inode_operations proc_net_inode_operations;
67 69
68void free_proc_entry(struct proc_dir_entry *de); 70void free_proc_entry(struct proc_dir_entry *de);
69 71
@@ -83,3 +85,8 @@ static inline int proc_fd(struct inode *inode)
83{ 85{
84 return PROC_I(inode)->fd; 86 return PROC_I(inode)->fd;
85} 87}
88
89struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino,
90 struct dentry *dentry);
91int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
92 filldir_t filldir);
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 468805d40e2b..2d563979cb02 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -32,6 +32,7 @@
32#include <linux/interrupt.h> 32#include <linux/interrupt.h>
33#include <linux/swap.h> 33#include <linux/swap.h>
34#include <linux/slab.h> 34#include <linux/slab.h>
35#include <linux/genhd.h>
35#include <linux/smp.h> 36#include <linux/smp.h>
36#include <linux/signal.h> 37#include <linux/signal.h>
37#include <linux/module.h> 38#include <linux/module.h>
@@ -377,7 +378,6 @@ static int stram_read_proc(char *page, char **start, off_t off,
377#endif 378#endif
378 379
379#ifdef CONFIG_BLOCK 380#ifdef CONFIG_BLOCK
380extern const struct seq_operations partitions_op;
381static int partitions_open(struct inode *inode, struct file *file) 381static int partitions_open(struct inode *inode, struct file *file)
382{ 382{
383 return seq_open(file, &partitions_op); 383 return seq_open(file, &partitions_op);
@@ -389,7 +389,6 @@ static const struct file_operations proc_partitions_operations = {
389 .release = seq_release, 389 .release = seq_release,
390}; 390};
391 391
392extern const struct seq_operations diskstats_op;
393static int diskstats_open(struct inode *inode, struct file *file) 392static int diskstats_open(struct inode *inode, struct file *file)
394{ 393{
395 return seq_open(file, &diskstats_op); 394 return seq_open(file, &diskstats_op);
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 14e9b5aaf863..13cd7835d0df 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -44,7 +44,9 @@ int seq_open_net(struct inode *ino, struct file *f,
44 put_net(net); 44 put_net(net);
45 return -ENOMEM; 45 return -ENOMEM;
46 } 46 }
47#ifdef CONFIG_NET_NS
47 p->net = net; 48 p->net = net;
49#endif
48 return 0; 50 return 0;
49} 51}
50EXPORT_SYMBOL_GPL(seq_open_net); 52EXPORT_SYMBOL_GPL(seq_open_net);
@@ -52,17 +54,91 @@ EXPORT_SYMBOL_GPL(seq_open_net);
52int seq_release_net(struct inode *ino, struct file *f) 54int seq_release_net(struct inode *ino, struct file *f)
53{ 55{
54 struct seq_file *seq; 56 struct seq_file *seq;
55 struct seq_net_private *p;
56 57
57 seq = f->private_data; 58 seq = f->private_data;
58 p = seq->private;
59 59
60 put_net(p->net); 60 put_net(seq_file_net(seq));
61 seq_release_private(ino, f); 61 seq_release_private(ino, f);
62 return 0; 62 return 0;
63} 63}
64EXPORT_SYMBOL_GPL(seq_release_net); 64EXPORT_SYMBOL_GPL(seq_release_net);
65 65
66static struct net *get_proc_task_net(struct inode *dir)
67{
68 struct task_struct *task;
69 struct nsproxy *ns;
70 struct net *net = NULL;
71
72 rcu_read_lock();
73 task = pid_task(proc_pid(dir), PIDTYPE_PID);
74 if (task != NULL) {
75 ns = task_nsproxy(task);
76 if (ns != NULL)
77 net = get_net(ns->net_ns);
78 }
79 rcu_read_unlock();
80
81 return net;
82}
83
84static struct dentry *proc_tgid_net_lookup(struct inode *dir,
85 struct dentry *dentry, struct nameidata *nd)
86{
87 struct dentry *de;
88 struct net *net;
89
90 de = ERR_PTR(-ENOENT);
91 net = get_proc_task_net(dir);
92 if (net != NULL) {
93 de = proc_lookup_de(net->proc_net, dir, dentry);
94 put_net(net);
95 }
96 return de;
97}
98
99static int proc_tgid_net_getattr(struct vfsmount *mnt, struct dentry *dentry,
100 struct kstat *stat)
101{
102 struct inode *inode = dentry->d_inode;
103 struct net *net;
104
105 net = get_proc_task_net(inode);
106
107 generic_fillattr(inode, stat);
108
109 if (net != NULL) {
110 stat->nlink = net->proc_net->nlink;
111 put_net(net);
112 }
113
114 return 0;
115}
116
117const struct inode_operations proc_net_inode_operations = {
118 .lookup = proc_tgid_net_lookup,
119 .getattr = proc_tgid_net_getattr,
120};
121
122static int proc_tgid_net_readdir(struct file *filp, void *dirent,
123 filldir_t filldir)
124{
125 int ret;
126 struct net *net;
127
128 ret = -EINVAL;
129 net = get_proc_task_net(filp->f_path.dentry->d_inode);
130 if (net != NULL) {
131 ret = proc_readdir_de(net->proc_net, filp, dirent, filldir);
132 put_net(net);
133 }
134 return ret;
135}
136
137const struct file_operations proc_net_operations = {
138 .read = generic_read_dir,
139 .readdir = proc_tgid_net_readdir,
140};
141
66 142
67struct proc_dir_entry *proc_net_fops_create(struct net *net, 143struct proc_dir_entry *proc_net_fops_create(struct net *net,
68 const char *name, mode_t mode, const struct file_operations *fops) 144 const char *name, mode_t mode, const struct file_operations *fops)
@@ -83,14 +159,6 @@ struct net *get_proc_net(const struct inode *inode)
83} 159}
84EXPORT_SYMBOL_GPL(get_proc_net); 160EXPORT_SYMBOL_GPL(get_proc_net);
85 161
86static struct proc_dir_entry *shadow_pde;
87
88static struct proc_dir_entry *proc_net_shadow(struct task_struct *task,
89 struct proc_dir_entry *de)
90{
91 return task->nsproxy->net_ns->proc_net;
92}
93
94struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name, 162struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name,
95 struct proc_dir_entry *parent) 163 struct proc_dir_entry *parent)
96{ 164{
@@ -104,45 +172,39 @@ EXPORT_SYMBOL_GPL(proc_net_mkdir);
104 172
105static __net_init int proc_net_ns_init(struct net *net) 173static __net_init int proc_net_ns_init(struct net *net)
106{ 174{
107 struct proc_dir_entry *root, *netd, *net_statd; 175 struct proc_dir_entry *netd, *net_statd;
108 int err; 176 int err;
109 177
110 err = -ENOMEM; 178 err = -ENOMEM;
111 root = kzalloc(sizeof(*root), GFP_KERNEL); 179 netd = kzalloc(sizeof(*netd), GFP_KERNEL);
112 if (!root) 180 if (!netd)
113 goto out; 181 goto out;
114 182
115 err = -EEXIST; 183 netd->data = net;
116 netd = proc_net_mkdir(net, "net", root); 184 netd->nlink = 2;
117 if (!netd) 185 netd->name = "net";
118 goto free_root; 186 netd->namelen = 3;
187 netd->parent = &proc_root;
119 188
120 err = -EEXIST; 189 err = -EEXIST;
121 net_statd = proc_net_mkdir(net, "stat", netd); 190 net_statd = proc_net_mkdir(net, "stat", netd);
122 if (!net_statd) 191 if (!net_statd)
123 goto free_net; 192 goto free_net;
124 193
125 root->data = net;
126
127 net->proc_net_root = root;
128 net->proc_net = netd; 194 net->proc_net = netd;
129 net->proc_net_stat = net_statd; 195 net->proc_net_stat = net_statd;
130 err = 0; 196 return 0;
131 197
198free_net:
199 kfree(netd);
132out: 200out:
133 return err; 201 return err;
134free_net:
135 remove_proc_entry("net", root);
136free_root:
137 kfree(root);
138 goto out;
139} 202}
140 203
141static __net_exit void proc_net_ns_exit(struct net *net) 204static __net_exit void proc_net_ns_exit(struct net *net)
142{ 205{
143 remove_proc_entry("stat", net->proc_net); 206 remove_proc_entry("stat", net->proc_net);
144 remove_proc_entry("net", net->proc_net_root); 207 kfree(net->proc_net);
145 kfree(net->proc_net_root);
146} 208}
147 209
148static struct pernet_operations __net_initdata proc_net_ns_ops = { 210static struct pernet_operations __net_initdata proc_net_ns_ops = {
@@ -152,8 +214,7 @@ static struct pernet_operations __net_initdata proc_net_ns_ops = {
152 214
153int __init proc_net_init(void) 215int __init proc_net_init(void)
154{ 216{
155 shadow_pde = proc_mkdir("net", NULL); 217 proc_symlink("net", NULL, "self/net");
156 shadow_pde->shadow_proc = proc_net_shadow;
157 218
158 return register_pernet_subsys(&proc_net_ns_ops); 219 return register_pernet_subsys(&proc_net_ns_ops);
159} 220}
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 49958cffbd8d..9dfb5ff24209 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -527,13 +527,21 @@ struct pagemapread {
527 char __user *out, *end; 527 char __user *out, *end;
528}; 528};
529 529
530#define PM_ENTRY_BYTES sizeof(u64) 530#define PM_ENTRY_BYTES sizeof(u64)
531#define PM_RESERVED_BITS 3 531#define PM_STATUS_BITS 3
532#define PM_RESERVED_OFFSET (64 - PM_RESERVED_BITS) 532#define PM_STATUS_OFFSET (64 - PM_STATUS_BITS)
533#define PM_RESERVED_MASK (((1LL<<PM_RESERVED_BITS)-1) << PM_RESERVED_OFFSET) 533#define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET)
534#define PM_SPECIAL(nr) (((nr) << PM_RESERVED_OFFSET) | PM_RESERVED_MASK) 534#define PM_STATUS(nr) (((nr) << PM_STATUS_OFFSET) & PM_STATUS_MASK)
535#define PM_NOT_PRESENT PM_SPECIAL(1LL) 535#define PM_PSHIFT_BITS 6
536#define PM_SWAP PM_SPECIAL(2LL) 536#define PM_PSHIFT_OFFSET (PM_STATUS_OFFSET - PM_PSHIFT_BITS)
537#define PM_PSHIFT_MASK (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET)
538#define PM_PSHIFT(x) (((u64) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK)
539#define PM_PFRAME_MASK ((1LL << PM_PSHIFT_OFFSET) - 1)
540#define PM_PFRAME(x) ((x) & PM_PFRAME_MASK)
541
542#define PM_PRESENT PM_STATUS(4LL)
543#define PM_SWAP PM_STATUS(2LL)
544#define PM_NOT_PRESENT PM_PSHIFT(PAGE_SHIFT)
537#define PM_END_OF_BUFFER 1 545#define PM_END_OF_BUFFER 1
538 546
539static int add_to_pagemap(unsigned long addr, u64 pfn, 547static int add_to_pagemap(unsigned long addr, u64 pfn,
@@ -574,7 +582,7 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end,
574u64 swap_pte_to_pagemap_entry(pte_t pte) 582u64 swap_pte_to_pagemap_entry(pte_t pte)
575{ 583{
576 swp_entry_t e = pte_to_swp_entry(pte); 584 swp_entry_t e = pte_to_swp_entry(pte);
577 return PM_SWAP | swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT); 585 return swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT);
578} 586}
579 587
580static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, 588static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
@@ -588,9 +596,11 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
588 u64 pfn = PM_NOT_PRESENT; 596 u64 pfn = PM_NOT_PRESENT;
589 pte = pte_offset_map(pmd, addr); 597 pte = pte_offset_map(pmd, addr);
590 if (is_swap_pte(*pte)) 598 if (is_swap_pte(*pte))
591 pfn = swap_pte_to_pagemap_entry(*pte); 599 pfn = PM_PFRAME(swap_pte_to_pagemap_entry(*pte))
600 | PM_PSHIFT(PAGE_SHIFT) | PM_SWAP;
592 else if (pte_present(*pte)) 601 else if (pte_present(*pte))
593 pfn = pte_pfn(*pte); 602 pfn = PM_PFRAME(pte_pfn(*pte))
603 | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT;
594 /* unmap so we're not in atomic when we copy to userspace */ 604 /* unmap so we're not in atomic when we copy to userspace */
595 pte_unmap(pte); 605 pte_unmap(pte);
596 err = add_to_pagemap(addr, pfn, pm); 606 err = add_to_pagemap(addr, pfn, pm);
@@ -611,12 +621,20 @@ static struct mm_walk pagemap_walk = {
611/* 621/*
612 * /proc/pid/pagemap - an array mapping virtual pages to pfns 622 * /proc/pid/pagemap - an array mapping virtual pages to pfns
613 * 623 *
614 * For each page in the address space, this file contains one 64-bit 624 * For each page in the address space, this file contains one 64-bit entry
615 * entry representing the corresponding physical page frame number 625 * consisting of the following:
616 * (PFN) if the page is present. If there is a swap entry for the 626 *
617 * physical page, then an encoding of the swap file number and the 627 * Bits 0-55 page frame number (PFN) if present
618 * page's offset into the swap file are returned. If no page is 628 * Bits 0-4 swap type if swapped
619 * present at all, PM_NOT_PRESENT is returned. This allows determining 629 * Bits 5-55 swap offset if swapped
630 * Bits 55-60 page shift (page size = 1<<page shift)
631 * Bit 61 reserved for future use
632 * Bit 62 page swapped
633 * Bit 63 page present
634 *
635 * If the page is not present but in swap, then the PFN contains an
636 * encoding of the swap file number and the page's offset into the
637 * swap. Unmapped pages return a null PFN. This allows determining
620 * precisely which pages are mapped (or in swap) and comparing mapped 638 * precisely which pages are mapped (or in swap) and comparing mapped
621 * pages between processes. 639 * pages between processes.
622 * 640 *
@@ -640,17 +658,17 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
640 658
641 ret = -EACCES; 659 ret = -EACCES;
642 if (!ptrace_may_attach(task)) 660 if (!ptrace_may_attach(task))
643 goto out; 661 goto out_task;
644 662
645 ret = -EINVAL; 663 ret = -EINVAL;
646 /* file position must be aligned */ 664 /* file position must be aligned */
647 if (*ppos % PM_ENTRY_BYTES) 665 if (*ppos % PM_ENTRY_BYTES)
648 goto out; 666 goto out_task;
649 667
650 ret = 0; 668 ret = 0;
651 mm = get_task_mm(task); 669 mm = get_task_mm(task);
652 if (!mm) 670 if (!mm)
653 goto out; 671 goto out_task;
654 672
655 ret = -ENOMEM; 673 ret = -ENOMEM;
656 uaddr = (unsigned long)buf & PAGE_MASK; 674 uaddr = (unsigned long)buf & PAGE_MASK;
@@ -658,7 +676,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
658 pagecount = (PAGE_ALIGN(uend) - uaddr) / PAGE_SIZE; 676 pagecount = (PAGE_ALIGN(uend) - uaddr) / PAGE_SIZE;
659 pages = kmalloc(pagecount * sizeof(struct page *), GFP_KERNEL); 677 pages = kmalloc(pagecount * sizeof(struct page *), GFP_KERNEL);
660 if (!pages) 678 if (!pages)
661 goto out_task; 679 goto out_mm;
662 680
663 down_read(&current->mm->mmap_sem); 681 down_read(&current->mm->mmap_sem);
664 ret = get_user_pages(current, current->mm, uaddr, pagecount, 682 ret = get_user_pages(current, current->mm, uaddr, pagecount,
@@ -668,6 +686,12 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
668 if (ret < 0) 686 if (ret < 0)
669 goto out_free; 687 goto out_free;
670 688
689 if (ret != pagecount) {
690 pagecount = ret;
691 ret = -EFAULT;
692 goto out_pages;
693 }
694
671 pm.out = buf; 695 pm.out = buf;
672 pm.end = buf + count; 696 pm.end = buf + count;
673 697
@@ -699,15 +723,17 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
699 ret = pm.out - buf; 723 ret = pm.out - buf;
700 } 724 }
701 725
726out_pages:
702 for (; pagecount; pagecount--) { 727 for (; pagecount; pagecount--) {
703 page = pages[pagecount-1]; 728 page = pages[pagecount-1];
704 if (!PageReserved(page)) 729 if (!PageReserved(page))
705 SetPageDirty(page); 730 SetPageDirty(page);
706 page_cache_release(page); 731 page_cache_release(page);
707 } 732 }
708 mmput(mm);
709out_free: 733out_free:
710 kfree(pages); 734 kfree(pages);
735out_mm:
736 mmput(mm);
711out_task: 737out_task:
712 put_task_struct(task); 738 put_task_struct(task);
713out: 739out:
diff --git a/fs/reiserfs/do_balan.c b/fs/reiserfs/do_balan.c
index f85c5cf4934c..7ee4208793b6 100644
--- a/fs/reiserfs/do_balan.c
+++ b/fs/reiserfs/do_balan.c
@@ -283,7 +283,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h
283 return balance_leaf_when_delete(tb, flag); 283 return balance_leaf_when_delete(tb, flag);
284 284
285 zeros_num = 0; 285 zeros_num = 0;
286 if (flag == M_INSERT && body == 0) 286 if (flag == M_INSERT && !body)
287 zeros_num = ih_item_len(ih); 287 zeros_num = ih_item_len(ih);
288 288
289 pos_in_item = tb->tb_path->pos_in_item; 289 pos_in_item = tb->tb_path->pos_in_item;
@@ -1728,7 +1728,7 @@ struct buffer_head *get_FEB(struct tree_balance *tb)
1728 struct buffer_info bi; 1728 struct buffer_info bi;
1729 1729
1730 for (i = 0; i < MAX_FEB_SIZE; i++) 1730 for (i = 0; i < MAX_FEB_SIZE; i++)
1731 if (tb->FEB[i] != 0) 1731 if (tb->FEB[i] != NULL)
1732 break; 1732 break;
1733 1733
1734 if (i == MAX_FEB_SIZE) 1734 if (i == MAX_FEB_SIZE)
@@ -1827,7 +1827,7 @@ int get_left_neighbor_position(struct tree_balance *tb, int h)
1827{ 1827{
1828 int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1); 1828 int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1);
1829 1829
1830 RFALSE(PATH_H_PPARENT(tb->tb_path, h) == 0 || tb->FL[h] == 0, 1830 RFALSE(PATH_H_PPARENT(tb->tb_path, h) == NULL || tb->FL[h] == NULL,
1831 "vs-12325: FL[%d](%p) or F[%d](%p) does not exist", 1831 "vs-12325: FL[%d](%p) or F[%d](%p) does not exist",
1832 h, tb->FL[h], h, PATH_H_PPARENT(tb->tb_path, h)); 1832 h, tb->FL[h], h, PATH_H_PPARENT(tb->tb_path, h));
1833 1833
@@ -1841,7 +1841,7 @@ int get_right_neighbor_position(struct tree_balance *tb, int h)
1841{ 1841{
1842 int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1); 1842 int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1);
1843 1843
1844 RFALSE(PATH_H_PPARENT(tb->tb_path, h) == 0 || tb->FR[h] == 0, 1844 RFALSE(PATH_H_PPARENT(tb->tb_path, h) == NULL || tb->FR[h] == NULL,
1845 "vs-12330: F[%d](%p) or FR[%d](%p) does not exist", 1845 "vs-12330: F[%d](%p) or FR[%d](%p) does not exist",
1846 h, PATH_H_PPARENT(tb->tb_path, h), h, tb->FR[h]); 1846 h, PATH_H_PPARENT(tb->tb_path, h), h, tb->FR[h]);
1847 1847
diff --git a/fs/reiserfs/fix_node.c b/fs/reiserfs/fix_node.c
index 0ee35c6c9b72..07d05e0842b7 100644
--- a/fs/reiserfs/fix_node.c
+++ b/fs/reiserfs/fix_node.c
@@ -153,7 +153,7 @@ static void create_virtual_node(struct tree_balance *tb, int h)
153 if (vn->vn_mode == M_INSERT) { 153 if (vn->vn_mode == M_INSERT) {
154 struct virtual_item *vi = vn->vn_vi + vn->vn_affected_item_num; 154 struct virtual_item *vi = vn->vn_vi + vn->vn_affected_item_num;
155 155
156 RFALSE(vn->vn_ins_ih == 0, 156 RFALSE(vn->vn_ins_ih == NULL,
157 "vs-8040: item header of inserted item is not specified"); 157 "vs-8040: item header of inserted item is not specified");
158 vi->vi_item_len = tb->insert_size[0]; 158 vi->vi_item_len = tb->insert_size[0];
159 vi->vi_ih = vn->vn_ins_ih; 159 vi->vi_ih = vn->vn_ins_ih;
@@ -857,7 +857,8 @@ static int get_lfree(struct tree_balance *tb, int h)
857 struct buffer_head *l, *f; 857 struct buffer_head *l, *f;
858 int order; 858 int order;
859 859
860 if ((f = PATH_H_PPARENT(tb->tb_path, h)) == 0 || (l = tb->FL[h]) == 0) 860 if ((f = PATH_H_PPARENT(tb->tb_path, h)) == NULL ||
861 (l = tb->FL[h]) == NULL)
861 return 0; 862 return 0;
862 863
863 if (f == l) 864 if (f == l)
@@ -878,7 +879,8 @@ static int get_rfree(struct tree_balance *tb, int h)
878 struct buffer_head *r, *f; 879 struct buffer_head *r, *f;
879 int order; 880 int order;
880 881
881 if ((f = PATH_H_PPARENT(tb->tb_path, h)) == 0 || (r = tb->FR[h]) == 0) 882 if ((f = PATH_H_PPARENT(tb->tb_path, h)) == NULL ||
883 (r = tb->FR[h]) == NULL)
882 return 0; 884 return 0;
883 885
884 if (f == r) 886 if (f == r)
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index e0f0f098a523..74363a7aacbc 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -4,6 +4,7 @@
4 4
5#include <linux/capability.h> 5#include <linux/capability.h>
6#include <linux/fs.h> 6#include <linux/fs.h>
7#include <linux/mount.h>
7#include <linux/reiserfs_fs.h> 8#include <linux/reiserfs_fs.h>
8#include <linux/time.h> 9#include <linux/time.h>
9#include <asm/uaccess.h> 10#include <asm/uaccess.h>
@@ -25,6 +26,7 @@ int reiserfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
25 unsigned long arg) 26 unsigned long arg)
26{ 27{
27 unsigned int flags; 28 unsigned int flags;
29 int err = 0;
28 30
29 switch (cmd) { 31 switch (cmd) {
30 case REISERFS_IOC_UNPACK: 32 case REISERFS_IOC_UNPACK:
@@ -48,50 +50,67 @@ int reiserfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
48 if (!reiserfs_attrs(inode->i_sb)) 50 if (!reiserfs_attrs(inode->i_sb))
49 return -ENOTTY; 51 return -ENOTTY;
50 52
51 if (IS_RDONLY(inode)) 53 err = mnt_want_write(filp->f_path.mnt);
52 return -EROFS; 54 if (err)
55 return err;
53 56
54 if (!is_owner_or_cap(inode)) 57 if (!is_owner_or_cap(inode)) {
55 return -EPERM; 58 err = -EPERM;
56 59 goto setflags_out;
57 if (get_user(flags, (int __user *)arg)) 60 }
58 return -EFAULT; 61 if (get_user(flags, (int __user *)arg)) {
59 62 err = -EFAULT;
60 /* Is it quota file? Do not allow user to mess with it. */ 63 goto setflags_out;
61 if (IS_NOQUOTA(inode)) 64 }
62 return -EPERM; 65 /*
66 * Is it quota file? Do not allow user to mess with it
67 */
68 if (IS_NOQUOTA(inode)) {
69 err = -EPERM;
70 goto setflags_out;
71 }
63 if (((flags ^ REISERFS_I(inode)-> 72 if (((flags ^ REISERFS_I(inode)->
64 i_attrs) & (REISERFS_IMMUTABLE_FL | 73 i_attrs) & (REISERFS_IMMUTABLE_FL |
65 REISERFS_APPEND_FL)) 74 REISERFS_APPEND_FL))
66 && !capable(CAP_LINUX_IMMUTABLE)) 75 && !capable(CAP_LINUX_IMMUTABLE)) {
67 return -EPERM; 76 err = -EPERM;
68 77 goto setflags_out;
78 }
69 if ((flags & REISERFS_NOTAIL_FL) && 79 if ((flags & REISERFS_NOTAIL_FL) &&
70 S_ISREG(inode->i_mode)) { 80 S_ISREG(inode->i_mode)) {
71 int result; 81 int result;
72 82
73 result = reiserfs_unpack(inode, filp); 83 result = reiserfs_unpack(inode, filp);
74 if (result) 84 if (result) {
75 return result; 85 err = result;
86 goto setflags_out;
87 }
76 } 88 }
77 sd_attrs_to_i_attrs(flags, inode); 89 sd_attrs_to_i_attrs(flags, inode);
78 REISERFS_I(inode)->i_attrs = flags; 90 REISERFS_I(inode)->i_attrs = flags;
79 inode->i_ctime = CURRENT_TIME_SEC; 91 inode->i_ctime = CURRENT_TIME_SEC;
80 mark_inode_dirty(inode); 92 mark_inode_dirty(inode);
81 return 0; 93setflags_out:
94 mnt_drop_write(filp->f_path.mnt);
95 return err;
82 } 96 }
83 case REISERFS_IOC_GETVERSION: 97 case REISERFS_IOC_GETVERSION:
84 return put_user(inode->i_generation, (int __user *)arg); 98 return put_user(inode->i_generation, (int __user *)arg);
85 case REISERFS_IOC_SETVERSION: 99 case REISERFS_IOC_SETVERSION:
86 if (!is_owner_or_cap(inode)) 100 if (!is_owner_or_cap(inode))
87 return -EPERM; 101 return -EPERM;
88 if (IS_RDONLY(inode)) 102 err = mnt_want_write(filp->f_path.mnt);
89 return -EROFS; 103 if (err)
90 if (get_user(inode->i_generation, (int __user *)arg)) 104 return err;
91 return -EFAULT; 105 if (get_user(inode->i_generation, (int __user *)arg)) {
106 err = -EFAULT;
107 goto setversion_out;
108 }
92 inode->i_ctime = CURRENT_TIME_SEC; 109 inode->i_ctime = CURRENT_TIME_SEC;
93 mark_inode_dirty(inode); 110 mark_inode_dirty(inode);
94 return 0; 111setversion_out:
112 mnt_drop_write(filp->f_path.mnt);
113 return err;
95 default: 114 default:
96 return -ENOTTY; 115 return -ENOTTY;
97 } 116 }
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index bb05a3e51b93..060eb3f598e7 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -38,7 +38,7 @@
38#include <asm/system.h> 38#include <asm/system.h>
39 39
40#include <linux/time.h> 40#include <linux/time.h>
41#include <asm/semaphore.h> 41#include <linux/semaphore.h>
42 42
43#include <linux/vmalloc.h> 43#include <linux/vmalloc.h>
44#include <linux/reiserfs_fs.h> 44#include <linux/reiserfs_fs.h>
diff --git a/fs/reiserfs/lbalance.c b/fs/reiserfs/lbalance.c
index 281f8061ac58..6de060a6aa7f 100644
--- a/fs/reiserfs/lbalance.c
+++ b/fs/reiserfs/lbalance.c
@@ -626,7 +626,7 @@ static void leaf_define_dest_src_infos(int shift_mode, struct tree_balance *tb,
626 "vs-10250: leaf_define_dest_src_infos: shift type is unknown (%d)", 626 "vs-10250: leaf_define_dest_src_infos: shift type is unknown (%d)",
627 shift_mode); 627 shift_mode);
628 } 628 }
629 RFALSE(src_bi->bi_bh == 0 || dest_bi->bi_bh == 0, 629 RFALSE(!src_bi->bi_bh || !dest_bi->bi_bh,
630 "vs-10260: mode==%d, source (%p) or dest (%p) buffer is initialized incorrectly", 630 "vs-10260: mode==%d, source (%p) or dest (%p) buffer is initialized incorrectly",
631 shift_mode, src_bi->bi_bh, dest_bi->bi_bh); 631 shift_mode, src_bi->bi_bh, dest_bi->bi_bh);
632} 632}
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index b378eea332ca..8867533cb727 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -452,7 +452,7 @@ static int reiserfs_add_entry(struct reiserfs_transaction_handle *th,
452 buflen = DEH_SIZE + ROUND_UP(namelen); 452 buflen = DEH_SIZE + ROUND_UP(namelen);
453 if (buflen > sizeof(small_buf)) { 453 if (buflen > sizeof(small_buf)) {
454 buffer = kmalloc(buflen, GFP_NOFS); 454 buffer = kmalloc(buflen, GFP_NOFS);
455 if (buffer == 0) 455 if (!buffer)
456 return -ENOMEM; 456 return -ENOMEM;
457 } else 457 } else
458 buffer = small_buf; 458 buffer = small_buf;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 6841452e0dea..393cc22c1717 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -2031,7 +2031,7 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
2031 return -EXDEV; 2031 return -EXDEV;
2032 } 2032 }
2033 /* We must not pack tails for quota files on reiserfs for quota IO to work */ 2033 /* We must not pack tails for quota files on reiserfs for quota IO to work */
2034 if (!REISERFS_I(nd.path.dentry->d_inode)->i_flags & i_nopack_mask) { 2034 if (!(REISERFS_I(nd.path.dentry->d_inode)->i_flags & i_nopack_mask)) {
2035 reiserfs_warning(sb, 2035 reiserfs_warning(sb,
2036 "reiserfs: Quota file must have tail packing disabled."); 2036 "reiserfs: Quota file must have tail packing disabled.");
2037 path_put(&nd.path); 2037 path_put(&nd.path);
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index eba037b3338f..d7c4935c1034 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -44,7 +44,6 @@
44#include <net/checksum.h> 44#include <net/checksum.h>
45#include <linux/smp_lock.h> 45#include <linux/smp_lock.h>
46#include <linux/stat.h> 46#include <linux/stat.h>
47#include <asm/semaphore.h>
48 47
49#define FL_READONLY 128 48#define FL_READONLY 128
50#define FL_DIR_SEM_HELD 256 49#define FL_DIR_SEM_HELD 256
@@ -191,28 +190,11 @@ static struct dentry *get_xa_file_dentry(const struct inode *inode,
191 dput(xadir); 190 dput(xadir);
192 if (err) 191 if (err)
193 xafile = ERR_PTR(err); 192 xafile = ERR_PTR(err);
194 return xafile;
195}
196
197/* Opens a file pointer to the attribute associated with inode */
198static struct file *open_xa_file(const struct inode *inode, const char *name,
199 int flags)
200{
201 struct dentry *xafile;
202 struct file *fp;
203
204 xafile = get_xa_file_dentry(inode, name, flags);
205 if (IS_ERR(xafile))
206 return ERR_PTR(PTR_ERR(xafile));
207 else if (!xafile->d_inode) { 193 else if (!xafile->d_inode) {
208 dput(xafile); 194 dput(xafile);
209 return ERR_PTR(-ENODATA); 195 xafile = ERR_PTR(-ENODATA);
210 } 196 }
211 197 return xafile;
212 fp = dentry_open(xafile, NULL, O_RDWR);
213 /* dentry_open dputs the dentry if it fails */
214
215 return fp;
216} 198}
217 199
218/* 200/*
@@ -228,9 +210,8 @@ static struct file *open_xa_file(const struct inode *inode, const char *name,
228 * we're called with i_mutex held, so there are no worries about the directory 210 * we're called with i_mutex held, so there are no worries about the directory
229 * changing underneath us. 211 * changing underneath us.
230 */ 212 */
231static int __xattr_readdir(struct file *filp, void *dirent, filldir_t filldir) 213static int __xattr_readdir(struct inode *inode, void *dirent, filldir_t filldir)
232{ 214{
233 struct inode *inode = filp->f_path.dentry->d_inode;
234 struct cpu_key pos_key; /* key of current position in the directory (key of directory entry) */ 215 struct cpu_key pos_key; /* key of current position in the directory (key of directory entry) */
235 INITIALIZE_PATH(path_to_entry); 216 INITIALIZE_PATH(path_to_entry);
236 struct buffer_head *bh; 217 struct buffer_head *bh;
@@ -374,23 +355,16 @@ static int __xattr_readdir(struct file *filp, void *dirent, filldir_t filldir)
374 * 355 *
375 */ 356 */
376static 357static
377int xattr_readdir(struct file *file, filldir_t filler, void *buf) 358int xattr_readdir(struct inode *inode, filldir_t filler, void *buf)
378{ 359{
379 struct inode *inode = file->f_path.dentry->d_inode; 360 int res = -ENOENT;
380 int res = -ENOTDIR;
381 if (!file->f_op || !file->f_op->readdir)
382 goto out;
383 mutex_lock_nested(&inode->i_mutex, I_MUTEX_XATTR); 361 mutex_lock_nested(&inode->i_mutex, I_MUTEX_XATTR);
384// down(&inode->i_zombie);
385 res = -ENOENT;
386 if (!IS_DEADDIR(inode)) { 362 if (!IS_DEADDIR(inode)) {
387 lock_kernel(); 363 lock_kernel();
388 res = __xattr_readdir(file, buf, filler); 364 res = __xattr_readdir(inode, buf, filler);
389 unlock_kernel(); 365 unlock_kernel();
390 } 366 }
391// up(&inode->i_zombie);
392 mutex_unlock(&inode->i_mutex); 367 mutex_unlock(&inode->i_mutex);
393 out:
394 return res; 368 return res;
395} 369}
396 370
@@ -442,7 +416,7 @@ reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer,
442 size_t buffer_size, int flags) 416 size_t buffer_size, int flags)
443{ 417{
444 int err = 0; 418 int err = 0;
445 struct file *fp; 419 struct dentry *dentry;
446 struct page *page; 420 struct page *page;
447 char *data; 421 char *data;
448 struct address_space *mapping; 422 struct address_space *mapping;
@@ -460,18 +434,18 @@ reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer,
460 xahash = xattr_hash(buffer, buffer_size); 434 xahash = xattr_hash(buffer, buffer_size);
461 435
462 open_file: 436 open_file:
463 fp = open_xa_file(inode, name, flags); 437 dentry = get_xa_file_dentry(inode, name, flags);
464 if (IS_ERR(fp)) { 438 if (IS_ERR(dentry)) {
465 err = PTR_ERR(fp); 439 err = PTR_ERR(dentry);
466 goto out; 440 goto out;
467 } 441 }
468 442
469 xinode = fp->f_path.dentry->d_inode; 443 xinode = dentry->d_inode;
470 REISERFS_I(inode)->i_flags |= i_has_xattr_dir; 444 REISERFS_I(inode)->i_flags |= i_has_xattr_dir;
471 445
472 /* we need to copy it off.. */ 446 /* we need to copy it off.. */
473 if (xinode->i_nlink > 1) { 447 if (xinode->i_nlink > 1) {
474 fput(fp); 448 dput(dentry);
475 err = reiserfs_xattr_del(inode, name); 449 err = reiserfs_xattr_del(inode, name);
476 if (err < 0) 450 if (err < 0)
477 goto out; 451 goto out;
@@ -485,7 +459,7 @@ reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer,
485 newattrs.ia_size = buffer_size; 459 newattrs.ia_size = buffer_size;
486 newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; 460 newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
487 mutex_lock_nested(&xinode->i_mutex, I_MUTEX_XATTR); 461 mutex_lock_nested(&xinode->i_mutex, I_MUTEX_XATTR);
488 err = notify_change(fp->f_path.dentry, &newattrs); 462 err = notify_change(dentry, &newattrs);
489 if (err) 463 if (err)
490 goto out_filp; 464 goto out_filp;
491 465
@@ -518,15 +492,14 @@ reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer,
518 rxh->h_hash = cpu_to_le32(xahash); 492 rxh->h_hash = cpu_to_le32(xahash);
519 } 493 }
520 494
521 err = reiserfs_prepare_write(fp, page, page_offset, 495 err = reiserfs_prepare_write(NULL, page, page_offset,
522 page_offset + chunk + skip); 496 page_offset + chunk + skip);
523 if (!err) { 497 if (!err) {
524 if (buffer) 498 if (buffer)
525 memcpy(data + skip, buffer + buffer_pos, chunk); 499 memcpy(data + skip, buffer + buffer_pos, chunk);
526 err = 500 err = reiserfs_commit_write(NULL, page, page_offset,
527 reiserfs_commit_write(fp, page, page_offset, 501 page_offset + chunk +
528 page_offset + chunk + 502 skip);
529 skip);
530 } 503 }
531 unlock_page(page); 504 unlock_page(page);
532 reiserfs_put_page(page); 505 reiserfs_put_page(page);
@@ -548,7 +521,7 @@ reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer,
548 521
549 out_filp: 522 out_filp:
550 mutex_unlock(&xinode->i_mutex); 523 mutex_unlock(&xinode->i_mutex);
551 fput(fp); 524 dput(dentry);
552 525
553 out: 526 out:
554 return err; 527 return err;
@@ -562,7 +535,7 @@ reiserfs_xattr_get(const struct inode *inode, const char *name, void *buffer,
562 size_t buffer_size) 535 size_t buffer_size)
563{ 536{
564 ssize_t err = 0; 537 ssize_t err = 0;
565 struct file *fp; 538 struct dentry *dentry;
566 size_t isize; 539 size_t isize;
567 size_t file_pos = 0; 540 size_t file_pos = 0;
568 size_t buffer_pos = 0; 541 size_t buffer_pos = 0;
@@ -578,13 +551,13 @@ reiserfs_xattr_get(const struct inode *inode, const char *name, void *buffer,
578 if (get_inode_sd_version(inode) == STAT_DATA_V1) 551 if (get_inode_sd_version(inode) == STAT_DATA_V1)
579 return -EOPNOTSUPP; 552 return -EOPNOTSUPP;
580 553
581 fp = open_xa_file(inode, name, FL_READONLY); 554 dentry = get_xa_file_dentry(inode, name, FL_READONLY);
582 if (IS_ERR(fp)) { 555 if (IS_ERR(dentry)) {
583 err = PTR_ERR(fp); 556 err = PTR_ERR(dentry);
584 goto out; 557 goto out;
585 } 558 }
586 559
587 xinode = fp->f_path.dentry->d_inode; 560 xinode = dentry->d_inode;
588 isize = xinode->i_size; 561 isize = xinode->i_size;
589 REISERFS_I(inode)->i_flags |= i_has_xattr_dir; 562 REISERFS_I(inode)->i_flags |= i_has_xattr_dir;
590 563
@@ -652,7 +625,7 @@ reiserfs_xattr_get(const struct inode *inode, const char *name, void *buffer,
652 } 625 }
653 626
654 out_dput: 627 out_dput:
655 fput(fp); 628 dput(dentry);
656 629
657 out: 630 out:
658 return err; 631 return err;
@@ -742,7 +715,6 @@ reiserfs_delete_xattrs_filler(void *buf, const char *name, int namelen,
742/* This is called w/ inode->i_mutex downed */ 715/* This is called w/ inode->i_mutex downed */
743int reiserfs_delete_xattrs(struct inode *inode) 716int reiserfs_delete_xattrs(struct inode *inode)
744{ 717{
745 struct file *fp;
746 struct dentry *dir, *root; 718 struct dentry *dir, *root;
747 int err = 0; 719 int err = 0;
748 720
@@ -763,15 +735,8 @@ int reiserfs_delete_xattrs(struct inode *inode)
763 return 0; 735 return 0;
764 } 736 }
765 737
766 fp = dentry_open(dir, NULL, O_RDWR);
767 if (IS_ERR(fp)) {
768 err = PTR_ERR(fp);
769 /* dentry_open dputs the dentry if it fails */
770 goto out;
771 }
772
773 lock_kernel(); 738 lock_kernel();
774 err = xattr_readdir(fp, reiserfs_delete_xattrs_filler, dir); 739 err = xattr_readdir(dir->d_inode, reiserfs_delete_xattrs_filler, dir);
775 if (err) { 740 if (err) {
776 unlock_kernel(); 741 unlock_kernel();
777 goto out_dir; 742 goto out_dir;
@@ -791,7 +756,7 @@ int reiserfs_delete_xattrs(struct inode *inode)
791 unlock_kernel(); 756 unlock_kernel();
792 757
793 out_dir: 758 out_dir:
794 fput(fp); 759 dput(dir);
795 760
796 out: 761 out:
797 if (!err) 762 if (!err)
@@ -833,7 +798,6 @@ reiserfs_chown_xattrs_filler(void *buf, const char *name, int namelen,
833 798
834int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs) 799int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs)
835{ 800{
836 struct file *fp;
837 struct dentry *dir; 801 struct dentry *dir;
838 int err = 0; 802 int err = 0;
839 struct reiserfs_chown_buf buf; 803 struct reiserfs_chown_buf buf;
@@ -857,13 +821,6 @@ int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs)
857 goto out; 821 goto out;
858 } 822 }
859 823
860 fp = dentry_open(dir, NULL, O_RDWR);
861 if (IS_ERR(fp)) {
862 err = PTR_ERR(fp);
863 /* dentry_open dputs the dentry if it fails */
864 goto out;
865 }
866
867 lock_kernel(); 824 lock_kernel();
868 825
869 attrs->ia_valid &= (ATTR_UID | ATTR_GID | ATTR_CTIME); 826 attrs->ia_valid &= (ATTR_UID | ATTR_GID | ATTR_CTIME);
@@ -871,7 +828,7 @@ int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs)
871 buf.attrs = attrs; 828 buf.attrs = attrs;
872 buf.inode = inode; 829 buf.inode = inode;
873 830
874 err = xattr_readdir(fp, reiserfs_chown_xattrs_filler, &buf); 831 err = xattr_readdir(dir->d_inode, reiserfs_chown_xattrs_filler, &buf);
875 if (err) { 832 if (err) {
876 unlock_kernel(); 833 unlock_kernel();
877 goto out_dir; 834 goto out_dir;
@@ -881,7 +838,7 @@ int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs)
881 unlock_kernel(); 838 unlock_kernel();
882 839
883 out_dir: 840 out_dir:
884 fput(fp); 841 dput(dir);
885 842
886 out: 843 out:
887 attrs->ia_valid = ia_valid; 844 attrs->ia_valid = ia_valid;
@@ -1029,7 +986,6 @@ reiserfs_listxattr_filler(void *buf, const char *name, int namelen,
1029 */ 986 */
1030ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size) 987ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size)
1031{ 988{
1032 struct file *fp;
1033 struct dentry *dir; 989 struct dentry *dir;
1034 int err = 0; 990 int err = 0;
1035 struct reiserfs_listxattr_buf buf; 991 struct reiserfs_listxattr_buf buf;
@@ -1052,13 +1008,6 @@ ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size)
1052 goto out; 1008 goto out;
1053 } 1009 }
1054 1010
1055 fp = dentry_open(dir, NULL, O_RDWR);
1056 if (IS_ERR(fp)) {
1057 err = PTR_ERR(fp);
1058 /* dentry_open dputs the dentry if it fails */
1059 goto out;
1060 }
1061
1062 buf.r_buf = buffer; 1011 buf.r_buf = buffer;
1063 buf.r_size = buffer ? size : 0; 1012 buf.r_size = buffer ? size : 0;
1064 buf.r_pos = 0; 1013 buf.r_pos = 0;
@@ -1066,7 +1015,7 @@ ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size)
1066 1015
1067 REISERFS_I(dentry->d_inode)->i_flags |= i_has_xattr_dir; 1016 REISERFS_I(dentry->d_inode)->i_flags |= i_has_xattr_dir;
1068 1017
1069 err = xattr_readdir(fp, reiserfs_listxattr_filler, &buf); 1018 err = xattr_readdir(dir->d_inode, reiserfs_listxattr_filler, &buf);
1070 if (err) 1019 if (err)
1071 goto out_dir; 1020 goto out_dir;
1072 1021
@@ -1076,7 +1025,7 @@ ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size)
1076 err = buf.r_pos; 1025 err = buf.r_pos;
1077 1026
1078 out_dir: 1027 out_dir:
1079 fput(fp); 1028 dput(dir);
1080 1029
1081 out: 1030 out:
1082 reiserfs_read_unlock_xattr_i(dentry->d_inode); 1031 reiserfs_read_unlock_xattr_i(dentry->d_inode);
diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c
index 00b6f0a518c8..3f13d491c7c7 100644
--- a/fs/romfs/inode.c
+++ b/fs/romfs/inode.c
@@ -340,8 +340,9 @@ static struct dentry *
340romfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) 340romfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
341{ 341{
342 unsigned long offset, maxoff; 342 unsigned long offset, maxoff;
343 int fslen, res; 343 long res;
344 struct inode *inode; 344 int fslen;
345 struct inode *inode = NULL;
345 char fsname[ROMFS_MAXFN]; /* XXX dynamic? */ 346 char fsname[ROMFS_MAXFN]; /* XXX dynamic? */
346 struct romfs_inode ri; 347 struct romfs_inode ri;
347 const char *name; /* got from dentry */ 348 const char *name; /* got from dentry */
@@ -351,7 +352,7 @@ romfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
351 offset = dir->i_ino & ROMFH_MASK; 352 offset = dir->i_ino & ROMFH_MASK;
352 lock_kernel(); 353 lock_kernel();
353 if (romfs_copyfrom(dir, &ri, offset, ROMFH_SIZE) <= 0) 354 if (romfs_copyfrom(dir, &ri, offset, ROMFH_SIZE) <= 0)
354 goto out; 355 goto error;
355 356
356 maxoff = romfs_maxsize(dir->i_sb); 357 maxoff = romfs_maxsize(dir->i_sb);
357 offset = be32_to_cpu(ri.spec) & ROMFH_MASK; 358 offset = be32_to_cpu(ri.spec) & ROMFH_MASK;
@@ -364,9 +365,9 @@ romfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
364 365
365 for(;;) { 366 for(;;) {
366 if (!offset || offset >= maxoff) 367 if (!offset || offset >= maxoff)
367 goto out0; 368 goto success; /* negative success */
368 if (romfs_copyfrom(dir, &ri, offset, ROMFH_SIZE) <= 0) 369 if (romfs_copyfrom(dir, &ri, offset, ROMFH_SIZE) <= 0)
369 goto out; 370 goto error;
370 371
371 /* try to match the first 16 bytes of name */ 372 /* try to match the first 16 bytes of name */
372 fslen = romfs_strnlen(dir, offset+ROMFH_SIZE, ROMFH_SIZE); 373 fslen = romfs_strnlen(dir, offset+ROMFH_SIZE, ROMFH_SIZE);
@@ -397,23 +398,14 @@ romfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
397 inode = romfs_iget(dir->i_sb, offset); 398 inode = romfs_iget(dir->i_sb, offset);
398 if (IS_ERR(inode)) { 399 if (IS_ERR(inode)) {
399 res = PTR_ERR(inode); 400 res = PTR_ERR(inode);
400 goto out; 401 goto error;
401 } 402 }
402 403
403 /* 404success:
404 * it's a bit funky, _lookup needs to return an error code 405 d_add(dentry, inode);
405 * (negative) or a NULL, both as a dentry. ENOENT should not
406 * be returned, instead we need to create a negative dentry by
407 * d_add(dentry, NULL); and return 0 as no error.
408 * (Although as I see, it only matters on writable file
409 * systems).
410 */
411
412out0: inode = NULL;
413 res = 0; 406 res = 0;
414 d_add (dentry, inode); 407error:
415 408 unlock_kernel();
416out: unlock_kernel();
417 return ERR_PTR(res); 409 return ERR_PTR(res);
418} 410}
419 411
diff --git a/fs/select.c b/fs/select.c
index 5633fe980781..00f58c5c7e05 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -260,7 +260,7 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout)
260 wait = NULL; 260 wait = NULL;
261 if (retval || !*timeout || signal_pending(current)) 261 if (retval || !*timeout || signal_pending(current))
262 break; 262 break;
263 if(table.error) { 263 if (table.error) {
264 retval = table.error; 264 retval = table.error;
265 break; 265 break;
266 } 266 }
diff --git a/fs/signalfd.c b/fs/signalfd.c
index cb2b63ae0bf4..8ead0db35933 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -111,9 +111,14 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo,
111 err |= __put_user(kinfo->si_uid, &uinfo->ssi_uid); 111 err |= __put_user(kinfo->si_uid, &uinfo->ssi_uid);
112 err |= __put_user((long) kinfo->si_ptr, &uinfo->ssi_ptr); 112 err |= __put_user((long) kinfo->si_ptr, &uinfo->ssi_ptr);
113 break; 113 break;
114 default: /* this is just in case for now ... */ 114 default:
115 /*
116 * This case catches also the signals queued by sigqueue().
117 */
115 err |= __put_user(kinfo->si_pid, &uinfo->ssi_pid); 118 err |= __put_user(kinfo->si_pid, &uinfo->ssi_pid);
116 err |= __put_user(kinfo->si_uid, &uinfo->ssi_uid); 119 err |= __put_user(kinfo->si_uid, &uinfo->ssi_uid);
120 err |= __put_user((long) kinfo->si_ptr, &uinfo->ssi_ptr);
121 err |= __put_user(kinfo->si_int, &uinfo->ssi_int);
117 break; 122 break;
118 } 123 }
119 124
diff --git a/fs/smbfs/smbiod.c b/fs/smbfs/smbiod.c
index fae8e85af0ed..6bd9b691a463 100644
--- a/fs/smbfs/smbiod.c
+++ b/fs/smbfs/smbiod.c
@@ -206,7 +206,7 @@ int smbiod_retry(struct smb_sb_info *server)
206 206
207 smb_close_socket(server); 207 smb_close_socket(server);
208 208
209 if (pid == 0) { 209 if (!pid) {
210 /* FIXME: this is fatal, umount? */ 210 /* FIXME: this is fatal, umount? */
211 printk(KERN_ERR "smb_retry: no connection process\n"); 211 printk(KERN_ERR "smb_retry: no connection process\n");
212 server->state = CONN_RETRIED; 212 server->state = CONN_RETRIED;
diff --git a/fs/splice.c b/fs/splice.c
index 9b559ee711a8..eeb1a86a7014 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -320,7 +320,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
320 break; 320 break;
321 321
322 error = add_to_page_cache_lru(page, mapping, index, 322 error = add_to_page_cache_lru(page, mapping, index,
323 GFP_KERNEL); 323 mapping_gfp_mask(mapping));
324 if (unlikely(error)) { 324 if (unlikely(error)) {
325 page_cache_release(page); 325 page_cache_release(page);
326 if (error == -EEXIST) 326 if (error == -EEXIST)
@@ -370,8 +370,10 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
370 * for an in-flight io page 370 * for an in-flight io page
371 */ 371 */
372 if (flags & SPLICE_F_NONBLOCK) { 372 if (flags & SPLICE_F_NONBLOCK) {
373 if (TestSetPageLocked(page)) 373 if (TestSetPageLocked(page)) {
374 error = -EAGAIN;
374 break; 375 break;
376 }
375 } else 377 } else
376 lock_page(page); 378 lock_page(page);
377 379
@@ -479,9 +481,8 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
479 struct pipe_inode_info *pipe, size_t len, 481 struct pipe_inode_info *pipe, size_t len,
480 unsigned int flags) 482 unsigned int flags)
481{ 483{
482 ssize_t spliced;
483 int ret;
484 loff_t isize, left; 484 loff_t isize, left;
485 int ret;
485 486
486 isize = i_size_read(in->f_mapping->host); 487 isize = i_size_read(in->f_mapping->host);
487 if (unlikely(*ppos >= isize)) 488 if (unlikely(*ppos >= isize))
@@ -491,29 +492,9 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
491 if (unlikely(left < len)) 492 if (unlikely(left < len))
492 len = left; 493 len = left;
493 494
494 ret = 0; 495 ret = __generic_file_splice_read(in, ppos, pipe, len, flags);
495 spliced = 0; 496 if (ret > 0)
496 while (len && !spliced) {
497 ret = __generic_file_splice_read(in, ppos, pipe, len, flags);
498
499 if (ret < 0)
500 break;
501 else if (!ret) {
502 if (spliced)
503 break;
504 if (flags & SPLICE_F_NONBLOCK) {
505 ret = -EAGAIN;
506 break;
507 }
508 }
509
510 *ppos += ret; 497 *ppos += ret;
511 len -= ret;
512 spliced += ret;
513 }
514
515 if (spliced)
516 return spliced;
517 498
518 return ret; 499 return ret;
519} 500}
@@ -1669,6 +1650,13 @@ static int link_pipe(struct pipe_inode_info *ipipe,
1669 i++; 1650 i++;
1670 } while (len); 1651 } while (len);
1671 1652
1653 /*
1654 * return EAGAIN if we have the potential of some data in the
1655 * future, otherwise just return 0
1656 */
1657 if (!ret && ipipe->waiting_writers && (flags & SPLICE_F_NONBLOCK))
1658 ret = -EAGAIN;
1659
1672 inode_double_unlock(ipipe->inode, opipe->inode); 1660 inode_double_unlock(ipipe->inode, opipe->inode);
1673 1661
1674 /* 1662 /*
@@ -1709,11 +1697,8 @@ static long do_tee(struct file *in, struct file *out, size_t len,
1709 ret = link_ipipe_prep(ipipe, flags); 1697 ret = link_ipipe_prep(ipipe, flags);
1710 if (!ret) { 1698 if (!ret) {
1711 ret = link_opipe_prep(opipe, flags); 1699 ret = link_opipe_prep(opipe, flags);
1712 if (!ret) { 1700 if (!ret)
1713 ret = link_pipe(ipipe, opipe, len, flags); 1701 ret = link_pipe(ipipe, opipe, len, flags);
1714 if (!ret && (flags & SPLICE_F_NONBLOCK))
1715 ret = -EAGAIN;
1716 }
1717 } 1702 }
1718 } 1703 }
1719 1704
diff --git a/fs/super.c b/fs/super.c
index 88811f60c8de..1f8f05ede437 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -37,6 +37,7 @@
37#include <linux/idr.h> 37#include <linux/idr.h>
38#include <linux/kobject.h> 38#include <linux/kobject.h>
39#include <linux/mutex.h> 39#include <linux/mutex.h>
40#include <linux/file.h>
40#include <asm/uaccess.h> 41#include <asm/uaccess.h>
41 42
42 43
@@ -556,21 +557,40 @@ out:
556} 557}
557 558
558/** 559/**
559 * mark_files_ro 560 * mark_files_ro - mark all files read-only
560 * @sb: superblock in question 561 * @sb: superblock in question
561 * 562 *
562 * All files are marked read/only. We don't care about pending 563 * All files are marked read-only. We don't care about pending
563 * delete files so this should be used in 'force' mode only 564 * delete files so this should be used in 'force' mode only.
564 */ 565 */
565 566
566static void mark_files_ro(struct super_block *sb) 567static void mark_files_ro(struct super_block *sb)
567{ 568{
568 struct file *f; 569 struct file *f;
569 570
571retry:
570 file_list_lock(); 572 file_list_lock();
571 list_for_each_entry(f, &sb->s_files, f_u.fu_list) { 573 list_for_each_entry(f, &sb->s_files, f_u.fu_list) {
572 if (S_ISREG(f->f_path.dentry->d_inode->i_mode) && file_count(f)) 574 struct vfsmount *mnt;
573 f->f_mode &= ~FMODE_WRITE; 575 if (!S_ISREG(f->f_path.dentry->d_inode->i_mode))
576 continue;
577 if (!file_count(f))
578 continue;
579 if (!(f->f_mode & FMODE_WRITE))
580 continue;
581 f->f_mode &= ~FMODE_WRITE;
582 if (file_check_writeable(f) != 0)
583 continue;
584 file_release_write(f);
585 mnt = mntget(f->f_path.mnt);
586 file_list_unlock();
587 /*
588 * This can sleep, so we can't hold
589 * the file_list_lock() spinlock.
590 */
591 mnt_drop_write(mnt);
592 mntput(mnt);
593 goto retry;
574 } 594 }
575 file_list_unlock(); 595 file_list_unlock();
576} 596}
@@ -870,12 +890,12 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
870 if (!mnt) 890 if (!mnt)
871 goto out; 891 goto out;
872 892
873 if (data) { 893 if (data && !(type->fs_flags & FS_BINARY_MOUNTDATA)) {
874 secdata = alloc_secdata(); 894 secdata = alloc_secdata();
875 if (!secdata) 895 if (!secdata)
876 goto out_mnt; 896 goto out_mnt;
877 897
878 error = security_sb_copy_data(type, data, secdata); 898 error = security_sb_copy_data(data, secdata);
879 if (error) 899 if (error)
880 goto out_free_secdata; 900 goto out_free_secdata;
881 } 901 }
@@ -945,6 +965,7 @@ do_kern_mount(const char *fstype, int flags, const char *name, void *data)
945 put_filesystem(type); 965 put_filesystem(type);
946 return mnt; 966 return mnt;
947} 967}
968EXPORT_SYMBOL_GPL(do_kern_mount);
948 969
949struct vfsmount *kern_mount_data(struct file_system_type *type, void *data) 970struct vfsmount *kern_mount_data(struct file_system_type *type, void *data)
950{ 971{
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 4948d9bc405d..a1c3a1fab7f0 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -20,6 +20,7 @@
20#include <linux/idr.h> 20#include <linux/idr.h>
21#include <linux/completion.h> 21#include <linux/completion.h>
22#include <linux/mutex.h> 22#include <linux/mutex.h>
23#include <linux/slab.h>
23#include "sysfs.h" 24#include "sysfs.h"
24 25
25DEFINE_MUTEX(sysfs_mutex); 26DEFINE_MUTEX(sysfs_mutex);
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index a271c87c4472..ade9a7e6a757 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -12,6 +12,8 @@
12 12
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/kobject.h> 14#include <linux/kobject.h>
15#include <linux/kallsyms.h>
16#include <linux/slab.h>
15#include <linux/namei.h> 17#include <linux/namei.h>
16#include <linux/poll.h> 18#include <linux/poll.h>
17#include <linux/list.h> 19#include <linux/list.h>
@@ -86,7 +88,12 @@ static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer
86 * The code works fine with PAGE_SIZE return but it's likely to 88 * The code works fine with PAGE_SIZE return but it's likely to
87 * indicate truncated result or overflow in normal use cases. 89 * indicate truncated result or overflow in normal use cases.
88 */ 90 */
89 BUG_ON(count >= (ssize_t)PAGE_SIZE); 91 if (count >= (ssize_t)PAGE_SIZE) {
92 print_symbol("fill_read_buffer: %s returned bad count\n",
93 (unsigned long)ops->show);
94 /* Try to struggle along */
95 count = PAGE_SIZE - 1;
96 }
90 if (count >= 0) { 97 if (count >= 0) {
91 buffer->needs_read_fill = 0; 98 buffer->needs_read_fill = 0;
92 buffer->count = count; 99 buffer->count = count;
@@ -122,7 +129,7 @@ sysfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos)
122 ssize_t retval = 0; 129 ssize_t retval = 0;
123 130
124 mutex_lock(&buffer->mutex); 131 mutex_lock(&buffer->mutex);
125 if (buffer->needs_read_fill) { 132 if (buffer->needs_read_fill || *ppos == 0) {
126 retval = fill_read_buffer(file->f_path.dentry,buffer); 133 retval = fill_read_buffer(file->f_path.dentry,buffer);
127 if (retval) 134 if (retval)
128 goto out; 135 goto out;
@@ -403,8 +410,7 @@ static int sysfs_release(struct inode *inode, struct file *filp)
403 * return POLLERR|POLLPRI, and select will return the fd whether 410 * return POLLERR|POLLPRI, and select will return the fd whether
404 * it is waiting for read, write, or exceptions. 411 * it is waiting for read, write, or exceptions.
405 * Once poll/select indicates that the value has changed, you 412 * Once poll/select indicates that the value has changed, you
406 * need to close and re-open the file, as simply seeking and reading 413 * need to close and re-open the file, or seek to 0 and read again.
407 * again will not get new data, or reset the state of 'poll'.
408 * Reminder: this only works for attributes which actively support 414 * Reminder: this only works for attributes which actively support
409 * it, and it is not possible to test an attribute from userspace 415 * it, and it is not possible to test an attribute from userspace
410 * to see if it supports poll (Neither 'poll' nor 'select' return 416 * to see if it supports poll (Neither 'poll' nor 'select' return
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index 5f66c4466151..817f5966edca 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -87,7 +87,14 @@ int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char
87 87
88void sysfs_remove_link(struct kobject * kobj, const char * name) 88void sysfs_remove_link(struct kobject * kobj, const char * name)
89{ 89{
90 sysfs_hash_and_remove(kobj->sd, name); 90 struct sysfs_dirent *parent_sd = NULL;
91
92 if (!kobj)
93 parent_sd = &sysfs_root;
94 else
95 parent_sd = kobj->sd;
96
97 sysfs_hash_and_remove(parent_sd, name);
91} 98}
92 99
93static int sysfs_get_target_path(struct sysfs_dirent *parent_sd, 100static int sysfs_get_target_path(struct sysfs_dirent *parent_sd,
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index 1fca381f0ce2..1e7598fb9787 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -315,8 +315,8 @@ static void ufs_change_blocknr(struct inode *inode, sector_t beg,
315 } 315 }
316 316
317 UFSD(" change from %llu to %llu, pos %u\n", 317 UFSD(" change from %llu to %llu, pos %u\n",
318 (unsigned long long)pos + oldb, 318 (unsigned long long)(pos + oldb),
319 (unsigned long long)pos + newb, pos); 319 (unsigned long long)(pos + newb), pos);
320 320
321 bh->b_blocknr = newb + pos; 321 bh->b_blocknr = newb + pos;
322 unmap_underlying_metadata(bh->b_bdev, 322 unmap_underlying_metadata(bh->b_bdev,
diff --git a/fs/ufs/util.h b/fs/ufs/util.h
index b26fc4dec1e7..23ceed8c8fb9 100644
--- a/fs/ufs/util.h
+++ b/fs/ufs/util.h
@@ -58,7 +58,7 @@ ufs_set_fs_state(struct super_block *sb, struct ufs_super_block_first *usb1,
58{ 58{
59 switch (UFS_SB(sb)->s_flags & UFS_ST_MASK) { 59 switch (UFS_SB(sb)->s_flags & UFS_ST_MASK) {
60 case UFS_ST_SUNOS: 60 case UFS_ST_SUNOS:
61 if (fs32_to_cpu(sb, usb3->fs_postblformat == UFS_42POSTBLFMT)) { 61 if (fs32_to_cpu(sb, usb3->fs_postblformat) == UFS_42POSTBLFMT) {
62 usb1->fs_u0.fs_sun.fs_state = cpu_to_fs32(sb, value); 62 usb1->fs_u0.fs_sun.fs_state = cpu_to_fs32(sb, value);
63 break; 63 break;
64 } 64 }
diff --git a/fs/utimes.c b/fs/utimes.c
index b18da9c0b97f..a2bef77dc9c9 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -2,6 +2,7 @@
2#include <linux/file.h> 2#include <linux/file.h>
3#include <linux/fs.h> 3#include <linux/fs.h>
4#include <linux/linkage.h> 4#include <linux/linkage.h>
5#include <linux/mount.h>
5#include <linux/namei.h> 6#include <linux/namei.h>
6#include <linux/sched.h> 7#include <linux/sched.h>
7#include <linux/stat.h> 8#include <linux/stat.h>
@@ -59,6 +60,7 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags
59 struct inode *inode; 60 struct inode *inode;
60 struct iattr newattrs; 61 struct iattr newattrs;
61 struct file *f = NULL; 62 struct file *f = NULL;
63 struct vfsmount *mnt;
62 64
63 error = -EINVAL; 65 error = -EINVAL;
64 if (times && (!nsec_valid(times[0].tv_nsec) || 66 if (times && (!nsec_valid(times[0].tv_nsec) ||
@@ -79,18 +81,20 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags
79 if (!f) 81 if (!f)
80 goto out; 82 goto out;
81 dentry = f->f_path.dentry; 83 dentry = f->f_path.dentry;
84 mnt = f->f_path.mnt;
82 } else { 85 } else {
83 error = __user_walk_fd(dfd, filename, (flags & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW, &nd); 86 error = __user_walk_fd(dfd, filename, (flags & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW, &nd);
84 if (error) 87 if (error)
85 goto out; 88 goto out;
86 89
87 dentry = nd.path.dentry; 90 dentry = nd.path.dentry;
91 mnt = nd.path.mnt;
88 } 92 }
89 93
90 inode = dentry->d_inode; 94 inode = dentry->d_inode;
91 95
92 error = -EROFS; 96 error = mnt_want_write(mnt);
93 if (IS_RDONLY(inode)) 97 if (error)
94 goto dput_and_out; 98 goto dput_and_out;
95 99
96 /* Don't worry, the checks are done in inode_change_ok() */ 100 /* Don't worry, the checks are done in inode_change_ok() */
@@ -98,7 +102,7 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags
98 if (times) { 102 if (times) {
99 error = -EPERM; 103 error = -EPERM;
100 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) 104 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
101 goto dput_and_out; 105 goto mnt_drop_write_and_out;
102 106
103 if (times[0].tv_nsec == UTIME_OMIT) 107 if (times[0].tv_nsec == UTIME_OMIT)
104 newattrs.ia_valid &= ~ATTR_ATIME; 108 newattrs.ia_valid &= ~ATTR_ATIME;
@@ -118,22 +122,24 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags
118 } else { 122 } else {
119 error = -EACCES; 123 error = -EACCES;
120 if (IS_IMMUTABLE(inode)) 124 if (IS_IMMUTABLE(inode))
121 goto dput_and_out; 125 goto mnt_drop_write_and_out;
122 126
123 if (!is_owner_or_cap(inode)) { 127 if (!is_owner_or_cap(inode)) {
124 if (f) { 128 if (f) {
125 if (!(f->f_mode & FMODE_WRITE)) 129 if (!(f->f_mode & FMODE_WRITE))
126 goto dput_and_out; 130 goto mnt_drop_write_and_out;
127 } else { 131 } else {
128 error = vfs_permission(&nd, MAY_WRITE); 132 error = vfs_permission(&nd, MAY_WRITE);
129 if (error) 133 if (error)
130 goto dput_and_out; 134 goto mnt_drop_write_and_out;
131 } 135 }
132 } 136 }
133 } 137 }
134 mutex_lock(&inode->i_mutex); 138 mutex_lock(&inode->i_mutex);
135 error = notify_change(dentry, &newattrs); 139 error = notify_change(dentry, &newattrs);
136 mutex_unlock(&inode->i_mutex); 140 mutex_unlock(&inode->i_mutex);
141mnt_drop_write_and_out:
142 mnt_drop_write(mnt);
137dput_and_out: 143dput_and_out:
138 if (f) 144 if (f)
139 fput(f); 145 fput(f);
diff --git a/fs/xattr.c b/fs/xattr.c
index 3acab1615460..f7062da505d4 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -11,6 +11,7 @@
11#include <linux/slab.h> 11#include <linux/slab.h>
12#include <linux/file.h> 12#include <linux/file.h>
13#include <linux/xattr.h> 13#include <linux/xattr.h>
14#include <linux/mount.h>
14#include <linux/namei.h> 15#include <linux/namei.h>
15#include <linux/security.h> 16#include <linux/security.h>
16#include <linux/syscalls.h> 17#include <linux/syscalls.h>
@@ -32,8 +33,6 @@ xattr_permission(struct inode *inode, const char *name, int mask)
32 * filesystem or on an immutable / append-only inode. 33 * filesystem or on an immutable / append-only inode.
33 */ 34 */
34 if (mask & MAY_WRITE) { 35 if (mask & MAY_WRITE) {
35 if (IS_RDONLY(inode))
36 return -EROFS;
37 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 36 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
38 return -EPERM; 37 return -EPERM;
39 } 38 }
@@ -262,7 +261,11 @@ sys_setxattr(char __user *path, char __user *name, void __user *value,
262 error = user_path_walk(path, &nd); 261 error = user_path_walk(path, &nd);
263 if (error) 262 if (error)
264 return error; 263 return error;
265 error = setxattr(nd.path.dentry, name, value, size, flags); 264 error = mnt_want_write(nd.path.mnt);
265 if (!error) {
266 error = setxattr(nd.path.dentry, name, value, size, flags);
267 mnt_drop_write(nd.path.mnt);
268 }
266 path_put(&nd.path); 269 path_put(&nd.path);
267 return error; 270 return error;
268} 271}
@@ -277,7 +280,11 @@ sys_lsetxattr(char __user *path, char __user *name, void __user *value,
277 error = user_path_walk_link(path, &nd); 280 error = user_path_walk_link(path, &nd);
278 if (error) 281 if (error)
279 return error; 282 return error;
280 error = setxattr(nd.path.dentry, name, value, size, flags); 283 error = mnt_want_write(nd.path.mnt);
284 if (!error) {
285 error = setxattr(nd.path.dentry, name, value, size, flags);
286 mnt_drop_write(nd.path.mnt);
287 }
281 path_put(&nd.path); 288 path_put(&nd.path);
282 return error; 289 return error;
283} 290}
@@ -295,7 +302,12 @@ sys_fsetxattr(int fd, char __user *name, void __user *value,
295 return error; 302 return error;
296 dentry = f->f_path.dentry; 303 dentry = f->f_path.dentry;
297 audit_inode(NULL, dentry); 304 audit_inode(NULL, dentry);
298 error = setxattr(dentry, name, value, size, flags); 305 error = mnt_want_write(f->f_path.mnt);
306 if (!error) {
307 error = setxattr(dentry, name, value, size, flags);
308 mnt_drop_write(f->f_path.mnt);
309 }
310out_fput:
299 fput(f); 311 fput(f);
300 return error; 312 return error;
301} 313}
@@ -482,7 +494,11 @@ sys_removexattr(char __user *path, char __user *name)
482 error = user_path_walk(path, &nd); 494 error = user_path_walk(path, &nd);
483 if (error) 495 if (error)
484 return error; 496 return error;
485 error = removexattr(nd.path.dentry, name); 497 error = mnt_want_write(nd.path.mnt);
498 if (!error) {
499 error = removexattr(nd.path.dentry, name);
500 mnt_drop_write(nd.path.mnt);
501 }
486 path_put(&nd.path); 502 path_put(&nd.path);
487 return error; 503 return error;
488} 504}
@@ -496,7 +512,11 @@ sys_lremovexattr(char __user *path, char __user *name)
496 error = user_path_walk_link(path, &nd); 512 error = user_path_walk_link(path, &nd);
497 if (error) 513 if (error)
498 return error; 514 return error;
499 error = removexattr(nd.path.dentry, name); 515 error = mnt_want_write(nd.path.mnt);
516 if (!error) {
517 error = removexattr(nd.path.dentry, name);
518 mnt_drop_write(nd.path.mnt);
519 }
500 path_put(&nd.path); 520 path_put(&nd.path);
501 return error; 521 return error;
502} 522}
@@ -513,7 +533,11 @@ sys_fremovexattr(int fd, char __user *name)
513 return error; 533 return error;
514 dentry = f->f_path.dentry; 534 dentry = f->f_path.dentry;
515 audit_inode(NULL, dentry); 535 audit_inode(NULL, dentry);
516 error = removexattr(dentry, name); 536 error = mnt_want_write(f->f_path.mnt);
537 if (!error) {
538 error = removexattr(dentry, name);
539 mnt_drop_write(f->f_path.mnt);
540 }
517 fput(f); 541 fput(f);
518 return error; 542 return error;
519} 543}
diff --git a/fs/xfs/Kbuild b/fs/xfs/Kbuild
deleted file mode 100644
index 2566e96706f1..000000000000
--- a/fs/xfs/Kbuild
+++ /dev/null
@@ -1,6 +0,0 @@
1#
2# The xfs people like to share Makefile with 2.6 and 2.4.
3# Utilise file named Kbuild file which has precedence over Makefile.
4#
5
6include $(srctree)/$(obj)/Makefile-linux-2.6
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index 35115bca036e..524021ff5436 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -35,18 +35,6 @@ config XFS_QUOTA
35 with or without the generic quota support enabled (CONFIG_QUOTA) - 35 with or without the generic quota support enabled (CONFIG_QUOTA) -
36 they are completely independent subsystems. 36 they are completely independent subsystems.
37 37
38config XFS_SECURITY
39 bool "XFS Security Label support"
40 depends on XFS_FS
41 help
42 Security labels support alternative access control models
43 implemented by security modules like SELinux. This option
44 enables an extended attribute namespace for inode security
45 labels in the XFS filesystem.
46
47 If you are not using a security module that requires using
48 extended attributes for inode security labels, say N.
49
50config XFS_POSIX_ACL 38config XFS_POSIX_ACL
51 bool "XFS POSIX ACL support" 39 bool "XFS POSIX ACL support"
52 depends on XFS_FS 40 depends on XFS_FS
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 49e3e7e5e3dc..36ec614e699a 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -1 +1,117 @@
1include $(TOPDIR)/fs/xfs/Makefile-linux-$(VERSION).$(PATCHLEVEL) 1#
2# Copyright (c) 2000-2005 Silicon Graphics, Inc.
3# All Rights Reserved.
4#
5# This program is free software; you can redistribute it and/or
6# modify it under the terms of the GNU General Public License as
7# published by the Free Software Foundation.
8#
9# This program is distributed in the hope that it would be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU General Public License for more details.
13#
14# You should have received a copy of the GNU General Public License
15# along with this program; if not, write the Free Software Foundation,
16# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17#
18
19EXTRA_CFLAGS += -I$(src) -I$(src)/linux-2.6 -funsigned-char
20
21XFS_LINUX := linux-2.6
22
23ifeq ($(CONFIG_XFS_DEBUG),y)
24 EXTRA_CFLAGS += -g
25endif
26
27obj-$(CONFIG_XFS_FS) += xfs.o
28
29xfs-$(CONFIG_XFS_QUOTA) += $(addprefix quota/, \
30 xfs_dquot.o \
31 xfs_dquot_item.o \
32 xfs_trans_dquot.o \
33 xfs_qm_syscalls.o \
34 xfs_qm_bhv.o \
35 xfs_qm.o)
36
37ifeq ($(CONFIG_XFS_QUOTA),y)
38xfs-$(CONFIG_PROC_FS) += quota/xfs_qm_stats.o
39endif
40
41xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o
42xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o
43xfs-$(CONFIG_PROC_FS) += $(XFS_LINUX)/xfs_stats.o
44xfs-$(CONFIG_SYSCTL) += $(XFS_LINUX)/xfs_sysctl.o
45xfs-$(CONFIG_COMPAT) += $(XFS_LINUX)/xfs_ioctl32.o
46
47
48xfs-y += xfs_alloc.o \
49 xfs_alloc_btree.o \
50 xfs_attr.o \
51 xfs_attr_leaf.o \
52 xfs_bit.o \
53 xfs_bmap.o \
54 xfs_bmap_btree.o \
55 xfs_btree.o \
56 xfs_buf_item.o \
57 xfs_da_btree.o \
58 xfs_dir2.o \
59 xfs_dir2_block.o \
60 xfs_dir2_data.o \
61 xfs_dir2_leaf.o \
62 xfs_dir2_node.o \
63 xfs_dir2_sf.o \
64 xfs_error.o \
65 xfs_extfree_item.o \
66 xfs_filestream.o \
67 xfs_fsops.o \
68 xfs_ialloc.o \
69 xfs_ialloc_btree.o \
70 xfs_iget.o \
71 xfs_inode.o \
72 xfs_inode_item.o \
73 xfs_iomap.o \
74 xfs_itable.o \
75 xfs_dfrag.o \
76 xfs_log.o \
77 xfs_log_recover.o \
78 xfs_mount.o \
79 xfs_mru_cache.o \
80 xfs_rename.o \
81 xfs_trans.o \
82 xfs_trans_ail.o \
83 xfs_trans_buf.o \
84 xfs_trans_extfree.o \
85 xfs_trans_inode.o \
86 xfs_trans_item.o \
87 xfs_utils.o \
88 xfs_vfsops.o \
89 xfs_vnodeops.o \
90 xfs_rw.o \
91 xfs_dmops.o \
92 xfs_qmops.o
93
94xfs-$(CONFIG_XFS_TRACE) += xfs_dir2_trace.o
95
96# Objects in linux/
97xfs-y += $(addprefix $(XFS_LINUX)/, \
98 kmem.o \
99 xfs_aops.o \
100 xfs_buf.o \
101 xfs_export.o \
102 xfs_file.o \
103 xfs_fs_subr.o \
104 xfs_globals.o \
105 xfs_ioctl.o \
106 xfs_iops.o \
107 xfs_lrw.o \
108 xfs_super.o \
109 xfs_vnode.o)
110
111# Objects in support/
112xfs-y += $(addprefix support/, \
113 debug.o \
114 uuid.o)
115
116xfs-$(CONFIG_XFS_TRACE) += support/ktrace.o
117
diff --git a/fs/xfs/Makefile-linux-2.6 b/fs/xfs/Makefile-linux-2.6
deleted file mode 100644
index 97316451fc6d..000000000000
--- a/fs/xfs/Makefile-linux-2.6
+++ /dev/null
@@ -1,117 +0,0 @@
1#
2# Copyright (c) 2000-2005 Silicon Graphics, Inc.
3# All Rights Reserved.
4#
5# This program is free software; you can redistribute it and/or
6# modify it under the terms of the GNU General Public License as
7# published by the Free Software Foundation.
8#
9# This program is distributed in the hope that it would be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU General Public License for more details.
13#
14# You should have received a copy of the GNU General Public License
15# along with this program; if not, write the Free Software Foundation,
16# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17#
18
19EXTRA_CFLAGS += -Ifs/xfs -Ifs/xfs/linux-2.6 -funsigned-char
20
21XFS_LINUX := linux-2.6
22
23ifeq ($(CONFIG_XFS_DEBUG),y)
24 EXTRA_CFLAGS += -g
25endif
26
27obj-$(CONFIG_XFS_FS) += xfs.o
28
29xfs-$(CONFIG_XFS_QUOTA) += $(addprefix quota/, \
30 xfs_dquot.o \
31 xfs_dquot_item.o \
32 xfs_trans_dquot.o \
33 xfs_qm_syscalls.o \
34 xfs_qm_bhv.o \
35 xfs_qm.o)
36
37ifeq ($(CONFIG_XFS_QUOTA),y)
38xfs-$(CONFIG_PROC_FS) += quota/xfs_qm_stats.o
39endif
40
41xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o
42xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o
43xfs-$(CONFIG_PROC_FS) += $(XFS_LINUX)/xfs_stats.o
44xfs-$(CONFIG_SYSCTL) += $(XFS_LINUX)/xfs_sysctl.o
45xfs-$(CONFIG_COMPAT) += $(XFS_LINUX)/xfs_ioctl32.o
46
47
48xfs-y += xfs_alloc.o \
49 xfs_alloc_btree.o \
50 xfs_attr.o \
51 xfs_attr_leaf.o \
52 xfs_bit.o \
53 xfs_bmap.o \
54 xfs_bmap_btree.o \
55 xfs_btree.o \
56 xfs_buf_item.o \
57 xfs_da_btree.o \
58 xfs_dir2.o \
59 xfs_dir2_block.o \
60 xfs_dir2_data.o \
61 xfs_dir2_leaf.o \
62 xfs_dir2_node.o \
63 xfs_dir2_sf.o \
64 xfs_error.o \
65 xfs_extfree_item.o \
66 xfs_filestream.o \
67 xfs_fsops.o \
68 xfs_ialloc.o \
69 xfs_ialloc_btree.o \
70 xfs_iget.o \
71 xfs_inode.o \
72 xfs_inode_item.o \
73 xfs_iomap.o \
74 xfs_itable.o \
75 xfs_dfrag.o \
76 xfs_log.o \
77 xfs_log_recover.o \
78 xfs_mount.o \
79 xfs_mru_cache.o \
80 xfs_rename.o \
81 xfs_trans.o \
82 xfs_trans_ail.o \
83 xfs_trans_buf.o \
84 xfs_trans_extfree.o \
85 xfs_trans_inode.o \
86 xfs_trans_item.o \
87 xfs_utils.o \
88 xfs_vfsops.o \
89 xfs_vnodeops.o \
90 xfs_rw.o \
91 xfs_dmops.o \
92 xfs_qmops.o
93
94xfs-$(CONFIG_XFS_TRACE) += xfs_dir2_trace.o
95
96# Objects in linux/
97xfs-y += $(addprefix $(XFS_LINUX)/, \
98 kmem.o \
99 xfs_aops.o \
100 xfs_buf.o \
101 xfs_export.o \
102 xfs_file.o \
103 xfs_fs_subr.o \
104 xfs_globals.o \
105 xfs_ioctl.o \
106 xfs_iops.o \
107 xfs_lrw.o \
108 xfs_super.o \
109 xfs_vnode.o)
110
111# Objects in support/
112xfs-y += $(addprefix support/, \
113 debug.o \
114 uuid.o)
115
116xfs-$(CONFIG_XFS_TRACE) += support/ktrace.o
117
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c
index e040f1ce1b6a..9b1bb17a0501 100644
--- a/fs/xfs/linux-2.6/kmem.c
+++ b/fs/xfs/linux-2.6/kmem.c
@@ -37,7 +37,7 @@ kmem_alloc(size_t size, unsigned int __nocast flags)
37#ifdef DEBUG 37#ifdef DEBUG
38 if (unlikely(!(flags & KM_LARGE) && (size > PAGE_SIZE))) { 38 if (unlikely(!(flags & KM_LARGE) && (size > PAGE_SIZE))) {
39 printk(KERN_WARNING "Large %s attempt, size=%ld\n", 39 printk(KERN_WARNING "Large %s attempt, size=%ld\n",
40 __FUNCTION__, (long)size); 40 __func__, (long)size);
41 dump_stack(); 41 dump_stack();
42 } 42 }
43#endif 43#endif
@@ -52,7 +52,7 @@ kmem_alloc(size_t size, unsigned int __nocast flags)
52 if (!(++retries % 100)) 52 if (!(++retries % 100))
53 printk(KERN_ERR "XFS: possible memory allocation " 53 printk(KERN_ERR "XFS: possible memory allocation "
54 "deadlock in %s (mode:0x%x)\n", 54 "deadlock in %s (mode:0x%x)\n",
55 __FUNCTION__, lflags); 55 __func__, lflags);
56 congestion_wait(WRITE, HZ/50); 56 congestion_wait(WRITE, HZ/50);
57 } while (1); 57 } while (1);
58} 58}
@@ -129,7 +129,7 @@ kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags)
129 if (!(++retries % 100)) 129 if (!(++retries % 100))
130 printk(KERN_ERR "XFS: possible memory allocation " 130 printk(KERN_ERR "XFS: possible memory allocation "
131 "deadlock in %s (mode:0x%x)\n", 131 "deadlock in %s (mode:0x%x)\n",
132 __FUNCTION__, lflags); 132 __func__, lflags);
133 congestion_wait(WRITE, HZ/50); 133 congestion_wait(WRITE, HZ/50);
134 } while (1); 134 } while (1);
135} 135}
diff --git a/fs/xfs/linux-2.6/sema.h b/fs/xfs/linux-2.6/sema.h
index 2009e6d922ce..3abe7e9ceb33 100644
--- a/fs/xfs/linux-2.6/sema.h
+++ b/fs/xfs/linux-2.6/sema.h
@@ -20,8 +20,8 @@
20 20
21#include <linux/time.h> 21#include <linux/time.h>
22#include <linux/wait.h> 22#include <linux/wait.h>
23#include <linux/semaphore.h>
23#include <asm/atomic.h> 24#include <asm/atomic.h>
24#include <asm/semaphore.h>
25 25
26/* 26/*
27 * sema_t structure just maps to struct semaphore in Linux kernel. 27 * sema_t structure just maps to struct semaphore in Linux kernel.
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index e0519529c26c..a55c3b26d840 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -243,8 +243,12 @@ xfs_end_bio_unwritten(
243 size_t size = ioend->io_size; 243 size_t size = ioend->io_size;
244 244
245 if (likely(!ioend->io_error)) { 245 if (likely(!ioend->io_error)) {
246 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) 246 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
247 xfs_iomap_write_unwritten(ip, offset, size); 247 int error;
248 error = xfs_iomap_write_unwritten(ip, offset, size);
249 if (error)
250 ioend->io_error = error;
251 }
248 xfs_setfilesize(ioend); 252 xfs_setfilesize(ioend);
249 } 253 }
250 xfs_destroy_ioend(ioend); 254 xfs_destroy_ioend(ioend);
@@ -1532,9 +1536,9 @@ xfs_vm_bmap(
1532 struct xfs_inode *ip = XFS_I(inode); 1536 struct xfs_inode *ip = XFS_I(inode);
1533 1537
1534 xfs_itrace_entry(XFS_I(inode)); 1538 xfs_itrace_entry(XFS_I(inode));
1535 xfs_rwlock(ip, VRWLOCK_READ); 1539 xfs_ilock(ip, XFS_IOLOCK_SHARED);
1536 xfs_flush_pages(ip, (xfs_off_t)0, -1, 0, FI_REMAPF); 1540 xfs_flush_pages(ip, (xfs_off_t)0, -1, 0, FI_REMAPF);
1537 xfs_rwunlock(ip, VRWLOCK_READ); 1541 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
1538 return generic_block_bmap(mapping, block, xfs_get_blocks); 1542 return generic_block_bmap(mapping, block, xfs_get_blocks);
1539} 1543}
1540 1544
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index e347bfd47c91..52f6846101d5 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -400,7 +400,7 @@ _xfs_buf_lookup_pages(
400 printk(KERN_ERR 400 printk(KERN_ERR
401 "XFS: possible memory allocation " 401 "XFS: possible memory allocation "
402 "deadlock in %s (mode:0x%x)\n", 402 "deadlock in %s (mode:0x%x)\n",
403 __FUNCTION__, gfp_mask); 403 __func__, gfp_mask);
404 404
405 XFS_STATS_INC(xb_page_retries); 405 XFS_STATS_INC(xb_page_retries);
406 xfsbufd_wakeup(0, gfp_mask); 406 xfsbufd_wakeup(0, gfp_mask);
@@ -598,7 +598,7 @@ xfs_buf_get_flags(
598 error = _xfs_buf_map_pages(bp, flags); 598 error = _xfs_buf_map_pages(bp, flags);
599 if (unlikely(error)) { 599 if (unlikely(error)) {
600 printk(KERN_WARNING "%s: failed to map pages\n", 600 printk(KERN_WARNING "%s: failed to map pages\n",
601 __FUNCTION__); 601 __func__);
602 goto no_buffer; 602 goto no_buffer;
603 } 603 }
604 } 604 }
@@ -778,7 +778,7 @@ xfs_buf_get_noaddr(
778 error = _xfs_buf_map_pages(bp, XBF_MAPPED); 778 error = _xfs_buf_map_pages(bp, XBF_MAPPED);
779 if (unlikely(error)) { 779 if (unlikely(error)) {
780 printk(KERN_WARNING "%s: failed to map pages\n", 780 printk(KERN_WARNING "%s: failed to map pages\n",
781 __FUNCTION__); 781 __func__);
782 goto fail_free_mem; 782 goto fail_free_mem;
783 } 783 }
784 784
@@ -1060,7 +1060,7 @@ xfs_buf_iostart(
1060 bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_ASYNC); 1060 bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_ASYNC);
1061 bp->b_flags |= flags & (XBF_DELWRI | XBF_ASYNC); 1061 bp->b_flags |= flags & (XBF_DELWRI | XBF_ASYNC);
1062 xfs_buf_delwri_queue(bp, 1); 1062 xfs_buf_delwri_queue(bp, 1);
1063 return status; 1063 return 0;
1064 } 1064 }
1065 1065
1066 bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_ASYNC | XBF_DELWRI | \ 1066 bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_ASYNC | XBF_DELWRI | \
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index a3d207de48b8..841d7883528d 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -387,11 +387,15 @@ static inline int XFS_bwrite(xfs_buf_t *bp)
387 return error; 387 return error;
388} 388}
389 389
390static inline int xfs_bdwrite(void *mp, xfs_buf_t *bp) 390/*
391 * No error can be returned from xfs_buf_iostart for delwri
392 * buffers as they are queued and no I/O is issued.
393 */
394static inline void xfs_bdwrite(void *mp, xfs_buf_t *bp)
391{ 395{
392 bp->b_strat = xfs_bdstrat_cb; 396 bp->b_strat = xfs_bdstrat_cb;
393 bp->b_fspriv3 = mp; 397 bp->b_fspriv3 = mp;
394 return xfs_buf_iostart(bp, XBF_DELWRI | XBF_ASYNC); 398 (void)xfs_buf_iostart(bp, XBF_DELWRI | XBF_ASYNC);
395} 399}
396 400
397#define XFS_bdstrat(bp) xfs_buf_iorequest(bp) 401#define XFS_bdstrat(bp) xfs_buf_iorequest(bp)
diff --git a/fs/xfs/linux-2.6/xfs_cred.h b/fs/xfs/linux-2.6/xfs_cred.h
index e7f3da61c6c3..652721ce0ea5 100644
--- a/fs/xfs/linux-2.6/xfs_cred.h
+++ b/fs/xfs/linux-2.6/xfs_cred.h
@@ -30,7 +30,7 @@ typedef struct cred {
30extern struct cred *sys_cred; 30extern struct cred *sys_cred;
31 31
32/* this is a hack.. (assumes sys_cred is the only cred_t in the system) */ 32/* this is a hack.. (assumes sys_cred is the only cred_t in the system) */
33static __inline int capable_cred(cred_t *cr, int cid) 33static inline int capable_cred(cred_t *cr, int cid)
34{ 34{
35 return (cr == sys_cred) ? 1 : capable(cid); 35 return (cr == sys_cred) ? 1 : capable(cid);
36} 36}
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index ca4f66c4de16..265f0168ab76 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -22,6 +22,7 @@
22#include "xfs_trans.h" 22#include "xfs_trans.h"
23#include "xfs_sb.h" 23#include "xfs_sb.h"
24#include "xfs_ag.h" 24#include "xfs_ag.h"
25#include "xfs_dir2.h"
25#include "xfs_dmapi.h" 26#include "xfs_dmapi.h"
26#include "xfs_mount.h" 27#include "xfs_mount.h"
27#include "xfs_export.h" 28#include "xfs_export.h"
@@ -30,8 +31,6 @@
30#include "xfs_inode.h" 31#include "xfs_inode.h"
31#include "xfs_vfsops.h" 32#include "xfs_vfsops.h"
32 33
33static struct dentry dotdot = { .d_name.name = "..", .d_name.len = 2, };
34
35/* 34/*
36 * Note that we only accept fileids which are long enough rather than allow 35 * Note that we only accept fileids which are long enough rather than allow
37 * the parent generation number to default to zero. XFS considers zero a 36 * the parent generation number to default to zero. XFS considers zero a
@@ -66,7 +65,7 @@ xfs_fs_encode_fh(
66 int len; 65 int len;
67 66
68 /* Directories don't need their parent encoded, they have ".." */ 67 /* Directories don't need their parent encoded, they have ".." */
69 if (S_ISDIR(inode->i_mode)) 68 if (S_ISDIR(inode->i_mode) || !connectable)
70 fileid_type = FILEID_INO32_GEN; 69 fileid_type = FILEID_INO32_GEN;
71 else 70 else
72 fileid_type = FILEID_INO32_GEN_PARENT; 71 fileid_type = FILEID_INO32_GEN_PARENT;
@@ -213,17 +212,16 @@ xfs_fs_get_parent(
213 struct dentry *child) 212 struct dentry *child)
214{ 213{
215 int error; 214 int error;
216 bhv_vnode_t *cvp; 215 struct xfs_inode *cip;
217 struct dentry *parent; 216 struct dentry *parent;
218 217
219 cvp = NULL; 218 error = xfs_lookup(XFS_I(child->d_inode), &xfs_name_dotdot, &cip);
220 error = xfs_lookup(XFS_I(child->d_inode), &dotdot, &cvp);
221 if (unlikely(error)) 219 if (unlikely(error))
222 return ERR_PTR(-error); 220 return ERR_PTR(-error);
223 221
224 parent = d_alloc_anon(vn_to_inode(cvp)); 222 parent = d_alloc_anon(cip->i_vnode);
225 if (unlikely(!parent)) { 223 if (unlikely(!parent)) {
226 VN_RELE(cvp); 224 iput(cip->i_vnode);
227 return ERR_PTR(-ENOMEM); 225 return ERR_PTR(-ENOMEM);
228 } 226 }
229 return parent; 227 return parent;
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index edab1ffbb163..05905246434d 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -469,16 +469,11 @@ xfs_file_open_exec(
469 struct inode *inode) 469 struct inode *inode)
470{ 470{
471 struct xfs_mount *mp = XFS_M(inode->i_sb); 471 struct xfs_mount *mp = XFS_M(inode->i_sb);
472 struct xfs_inode *ip = XFS_I(inode);
472 473
473 if (unlikely(mp->m_flags & XFS_MOUNT_DMAPI)) { 474 if (unlikely(mp->m_flags & XFS_MOUNT_DMAPI) &&
474 if (DM_EVENT_ENABLED(XFS_I(inode), DM_EVENT_READ)) { 475 DM_EVENT_ENABLED(ip, DM_EVENT_READ))
475 bhv_vnode_t *vp = vn_from_inode(inode); 476 return -XFS_SEND_DATA(mp, DM_EVENT_READ, ip, 0, 0, 0, NULL);
476
477 return -XFS_SEND_DATA(mp, DM_EVENT_READ,
478 vp, 0, 0, 0, NULL);
479 }
480 }
481
482 return 0; 477 return 0;
483} 478}
484#endif /* HAVE_FOP_OPEN_EXEC */ 479#endif /* HAVE_FOP_OPEN_EXEC */
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c
index ac6d34cc355d..1eefe61f0e10 100644
--- a/fs/xfs/linux-2.6/xfs_fs_subr.c
+++ b/fs/xfs/linux-2.6/xfs_fs_subr.c
@@ -17,18 +17,7 @@
17 */ 17 */
18#include "xfs.h" 18#include "xfs.h"
19#include "xfs_vnodeops.h" 19#include "xfs_vnodeops.h"
20
21/*
22 * The following six includes are needed so that we can include
23 * xfs_inode.h. What a mess..
24 */
25#include "xfs_bmap_btree.h" 20#include "xfs_bmap_btree.h"
26#include "xfs_inum.h"
27#include "xfs_dir2.h"
28#include "xfs_dir2_sf.h"
29#include "xfs_attr_sf.h"
30#include "xfs_dinode.h"
31
32#include "xfs_inode.h" 21#include "xfs_inode.h"
33 22
34int fs_noerr(void) { return 0; } 23int fs_noerr(void) { return 0; }
@@ -42,11 +31,10 @@ xfs_tosspages(
42 xfs_off_t last, 31 xfs_off_t last,
43 int fiopt) 32 int fiopt)
44{ 33{
45 bhv_vnode_t *vp = XFS_ITOV(ip); 34 struct address_space *mapping = ip->i_vnode->i_mapping;
46 struct inode *inode = vn_to_inode(vp);
47 35
48 if (VN_CACHED(vp)) 36 if (mapping->nrpages)
49 truncate_inode_pages(inode->i_mapping, first); 37 truncate_inode_pages(mapping, first);
50} 38}
51 39
52int 40int
@@ -56,15 +44,14 @@ xfs_flushinval_pages(
56 xfs_off_t last, 44 xfs_off_t last,
57 int fiopt) 45 int fiopt)
58{ 46{
59 bhv_vnode_t *vp = XFS_ITOV(ip); 47 struct address_space *mapping = ip->i_vnode->i_mapping;
60 struct inode *inode = vn_to_inode(vp);
61 int ret = 0; 48 int ret = 0;
62 49
63 if (VN_CACHED(vp)) { 50 if (mapping->nrpages) {
64 xfs_iflags_clear(ip, XFS_ITRUNCATED); 51 xfs_iflags_clear(ip, XFS_ITRUNCATED);
65 ret = filemap_write_and_wait(inode->i_mapping); 52 ret = filemap_write_and_wait(mapping);
66 if (!ret) 53 if (!ret)
67 truncate_inode_pages(inode->i_mapping, first); 54 truncate_inode_pages(mapping, first);
68 } 55 }
69 return ret; 56 return ret;
70} 57}
@@ -77,17 +64,16 @@ xfs_flush_pages(
77 uint64_t flags, 64 uint64_t flags,
78 int fiopt) 65 int fiopt)
79{ 66{
80 bhv_vnode_t *vp = XFS_ITOV(ip); 67 struct address_space *mapping = ip->i_vnode->i_mapping;
81 struct inode *inode = vn_to_inode(vp);
82 int ret = 0; 68 int ret = 0;
83 int ret2; 69 int ret2;
84 70
85 if (VN_DIRTY(vp)) { 71 if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
86 xfs_iflags_clear(ip, XFS_ITRUNCATED); 72 xfs_iflags_clear(ip, XFS_ITRUNCATED);
87 ret = filemap_fdatawrite(inode->i_mapping); 73 ret = filemap_fdatawrite(mapping);
88 if (flags & XFS_B_ASYNC) 74 if (flags & XFS_B_ASYNC)
89 return ret; 75 return ret;
90 ret2 = filemap_fdatawait(inode->i_mapping); 76 ret2 = filemap_fdatawait(mapping);
91 if (!ret) 77 if (!ret)
92 ret = ret2; 78 ret = ret2;
93 } 79 }
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index a9952e490ac9..4ddb86b73c6b 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -535,8 +535,6 @@ xfs_attrmulti_attr_set(
535 char *kbuf; 535 char *kbuf;
536 int error = EFAULT; 536 int error = EFAULT;
537 537
538 if (IS_RDONLY(inode))
539 return -EROFS;
540 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 538 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
541 return EPERM; 539 return EPERM;
542 if (len > XATTR_SIZE_MAX) 540 if (len > XATTR_SIZE_MAX)
@@ -562,8 +560,6 @@ xfs_attrmulti_attr_remove(
562 char *name, 560 char *name,
563 __uint32_t flags) 561 __uint32_t flags)
564{ 562{
565 if (IS_RDONLY(inode))
566 return -EROFS;
567 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 563 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
568 return EPERM; 564 return EPERM;
569 return xfs_attr_remove(XFS_I(inode), name, flags); 565 return xfs_attr_remove(XFS_I(inode), name, flags);
@@ -573,6 +569,7 @@ STATIC int
573xfs_attrmulti_by_handle( 569xfs_attrmulti_by_handle(
574 xfs_mount_t *mp, 570 xfs_mount_t *mp,
575 void __user *arg, 571 void __user *arg,
572 struct file *parfilp,
576 struct inode *parinode) 573 struct inode *parinode)
577{ 574{
578 int error; 575 int error;
@@ -626,13 +623,21 @@ xfs_attrmulti_by_handle(
626 &ops[i].am_length, ops[i].am_flags); 623 &ops[i].am_length, ops[i].am_flags);
627 break; 624 break;
628 case ATTR_OP_SET: 625 case ATTR_OP_SET:
626 ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
627 if (ops[i].am_error)
628 break;
629 ops[i].am_error = xfs_attrmulti_attr_set(inode, 629 ops[i].am_error = xfs_attrmulti_attr_set(inode,
630 attr_name, ops[i].am_attrvalue, 630 attr_name, ops[i].am_attrvalue,
631 ops[i].am_length, ops[i].am_flags); 631 ops[i].am_length, ops[i].am_flags);
632 mnt_drop_write(parfilp->f_path.mnt);
632 break; 633 break;
633 case ATTR_OP_REMOVE: 634 case ATTR_OP_REMOVE:
635 ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
636 if (ops[i].am_error)
637 break;
634 ops[i].am_error = xfs_attrmulti_attr_remove(inode, 638 ops[i].am_error = xfs_attrmulti_attr_remove(inode,
635 attr_name, ops[i].am_flags); 639 attr_name, ops[i].am_flags);
640 mnt_drop_write(parfilp->f_path.mnt);
636 break; 641 break;
637 default: 642 default:
638 ops[i].am_error = EINVAL; 643 ops[i].am_error = EINVAL;
@@ -651,314 +656,6 @@ xfs_attrmulti_by_handle(
651 return -error; 656 return -error;
652} 657}
653 658
654/* prototypes for a few of the stack-hungry cases that have
655 * their own functions. Functions are defined after their use
656 * so gcc doesn't get fancy and inline them with -03 */
657
658STATIC int
659xfs_ioc_space(
660 struct xfs_inode *ip,
661 struct inode *inode,
662 struct file *filp,
663 int flags,
664 unsigned int cmd,
665 void __user *arg);
666
667STATIC int
668xfs_ioc_bulkstat(
669 xfs_mount_t *mp,
670 unsigned int cmd,
671 void __user *arg);
672
673STATIC int
674xfs_ioc_fsgeometry_v1(
675 xfs_mount_t *mp,
676 void __user *arg);
677
678STATIC int
679xfs_ioc_fsgeometry(
680 xfs_mount_t *mp,
681 void __user *arg);
682
683STATIC int
684xfs_ioc_xattr(
685 xfs_inode_t *ip,
686 struct file *filp,
687 unsigned int cmd,
688 void __user *arg);
689
690STATIC int
691xfs_ioc_fsgetxattr(
692 xfs_inode_t *ip,
693 int attr,
694 void __user *arg);
695
696STATIC int
697xfs_ioc_getbmap(
698 struct xfs_inode *ip,
699 int flags,
700 unsigned int cmd,
701 void __user *arg);
702
703STATIC int
704xfs_ioc_getbmapx(
705 struct xfs_inode *ip,
706 void __user *arg);
707
708int
709xfs_ioctl(
710 xfs_inode_t *ip,
711 struct file *filp,
712 int ioflags,
713 unsigned int cmd,
714 void __user *arg)
715{
716 struct inode *inode = filp->f_path.dentry->d_inode;
717 xfs_mount_t *mp = ip->i_mount;
718 int error;
719
720 xfs_itrace_entry(XFS_I(inode));
721 switch (cmd) {
722
723 case XFS_IOC_ALLOCSP:
724 case XFS_IOC_FREESP:
725 case XFS_IOC_RESVSP:
726 case XFS_IOC_UNRESVSP:
727 case XFS_IOC_ALLOCSP64:
728 case XFS_IOC_FREESP64:
729 case XFS_IOC_RESVSP64:
730 case XFS_IOC_UNRESVSP64:
731 /*
732 * Only allow the sys admin to reserve space unless
733 * unwritten extents are enabled.
734 */
735 if (!XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb) &&
736 !capable(CAP_SYS_ADMIN))
737 return -EPERM;
738
739 return xfs_ioc_space(ip, inode, filp, ioflags, cmd, arg);
740
741 case XFS_IOC_DIOINFO: {
742 struct dioattr da;
743 xfs_buftarg_t *target =
744 XFS_IS_REALTIME_INODE(ip) ?
745 mp->m_rtdev_targp : mp->m_ddev_targp;
746
747 da.d_mem = da.d_miniosz = 1 << target->bt_sshift;
748 da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1);
749
750 if (copy_to_user(arg, &da, sizeof(da)))
751 return -XFS_ERROR(EFAULT);
752 return 0;
753 }
754
755 case XFS_IOC_FSBULKSTAT_SINGLE:
756 case XFS_IOC_FSBULKSTAT:
757 case XFS_IOC_FSINUMBERS:
758 return xfs_ioc_bulkstat(mp, cmd, arg);
759
760 case XFS_IOC_FSGEOMETRY_V1:
761 return xfs_ioc_fsgeometry_v1(mp, arg);
762
763 case XFS_IOC_FSGEOMETRY:
764 return xfs_ioc_fsgeometry(mp, arg);
765
766 case XFS_IOC_GETVERSION:
767 return put_user(inode->i_generation, (int __user *)arg);
768
769 case XFS_IOC_FSGETXATTR:
770 return xfs_ioc_fsgetxattr(ip, 0, arg);
771 case XFS_IOC_FSGETXATTRA:
772 return xfs_ioc_fsgetxattr(ip, 1, arg);
773 case XFS_IOC_GETXFLAGS:
774 case XFS_IOC_SETXFLAGS:
775 case XFS_IOC_FSSETXATTR:
776 return xfs_ioc_xattr(ip, filp, cmd, arg);
777
778 case XFS_IOC_FSSETDM: {
779 struct fsdmidata dmi;
780
781 if (copy_from_user(&dmi, arg, sizeof(dmi)))
782 return -XFS_ERROR(EFAULT);
783
784 error = xfs_set_dmattrs(ip, dmi.fsd_dmevmask,
785 dmi.fsd_dmstate);
786 return -error;
787 }
788
789 case XFS_IOC_GETBMAP:
790 case XFS_IOC_GETBMAPA:
791 return xfs_ioc_getbmap(ip, ioflags, cmd, arg);
792
793 case XFS_IOC_GETBMAPX:
794 return xfs_ioc_getbmapx(ip, arg);
795
796 case XFS_IOC_FD_TO_HANDLE:
797 case XFS_IOC_PATH_TO_HANDLE:
798 case XFS_IOC_PATH_TO_FSHANDLE:
799 return xfs_find_handle(cmd, arg);
800
801 case XFS_IOC_OPEN_BY_HANDLE:
802 return xfs_open_by_handle(mp, arg, filp, inode);
803
804 case XFS_IOC_FSSETDM_BY_HANDLE:
805 return xfs_fssetdm_by_handle(mp, arg, inode);
806
807 case XFS_IOC_READLINK_BY_HANDLE:
808 return xfs_readlink_by_handle(mp, arg, inode);
809
810 case XFS_IOC_ATTRLIST_BY_HANDLE:
811 return xfs_attrlist_by_handle(mp, arg, inode);
812
813 case XFS_IOC_ATTRMULTI_BY_HANDLE:
814 return xfs_attrmulti_by_handle(mp, arg, inode);
815
816 case XFS_IOC_SWAPEXT: {
817 error = xfs_swapext((struct xfs_swapext __user *)arg);
818 return -error;
819 }
820
821 case XFS_IOC_FSCOUNTS: {
822 xfs_fsop_counts_t out;
823
824 error = xfs_fs_counts(mp, &out);
825 if (error)
826 return -error;
827
828 if (copy_to_user(arg, &out, sizeof(out)))
829 return -XFS_ERROR(EFAULT);
830 return 0;
831 }
832
833 case XFS_IOC_SET_RESBLKS: {
834 xfs_fsop_resblks_t inout;
835 __uint64_t in;
836
837 if (!capable(CAP_SYS_ADMIN))
838 return -EPERM;
839
840 if (copy_from_user(&inout, arg, sizeof(inout)))
841 return -XFS_ERROR(EFAULT);
842
843 /* input parameter is passed in resblks field of structure */
844 in = inout.resblks;
845 error = xfs_reserve_blocks(mp, &in, &inout);
846 if (error)
847 return -error;
848
849 if (copy_to_user(arg, &inout, sizeof(inout)))
850 return -XFS_ERROR(EFAULT);
851 return 0;
852 }
853
854 case XFS_IOC_GET_RESBLKS: {
855 xfs_fsop_resblks_t out;
856
857 if (!capable(CAP_SYS_ADMIN))
858 return -EPERM;
859
860 error = xfs_reserve_blocks(mp, NULL, &out);
861 if (error)
862 return -error;
863
864 if (copy_to_user(arg, &out, sizeof(out)))
865 return -XFS_ERROR(EFAULT);
866
867 return 0;
868 }
869
870 case XFS_IOC_FSGROWFSDATA: {
871 xfs_growfs_data_t in;
872
873 if (!capable(CAP_SYS_ADMIN))
874 return -EPERM;
875
876 if (copy_from_user(&in, arg, sizeof(in)))
877 return -XFS_ERROR(EFAULT);
878
879 error = xfs_growfs_data(mp, &in);
880 return -error;
881 }
882
883 case XFS_IOC_FSGROWFSLOG: {
884 xfs_growfs_log_t in;
885
886 if (!capable(CAP_SYS_ADMIN))
887 return -EPERM;
888
889 if (copy_from_user(&in, arg, sizeof(in)))
890 return -XFS_ERROR(EFAULT);
891
892 error = xfs_growfs_log(mp, &in);
893 return -error;
894 }
895
896 case XFS_IOC_FSGROWFSRT: {
897 xfs_growfs_rt_t in;
898
899 if (!capable(CAP_SYS_ADMIN))
900 return -EPERM;
901
902 if (copy_from_user(&in, arg, sizeof(in)))
903 return -XFS_ERROR(EFAULT);
904
905 error = xfs_growfs_rt(mp, &in);
906 return -error;
907 }
908
909 case XFS_IOC_FREEZE:
910 if (!capable(CAP_SYS_ADMIN))
911 return -EPERM;
912
913 if (inode->i_sb->s_frozen == SB_UNFROZEN)
914 freeze_bdev(inode->i_sb->s_bdev);
915 return 0;
916
917 case XFS_IOC_THAW:
918 if (!capable(CAP_SYS_ADMIN))
919 return -EPERM;
920 if (inode->i_sb->s_frozen != SB_UNFROZEN)
921 thaw_bdev(inode->i_sb->s_bdev, inode->i_sb);
922 return 0;
923
924 case XFS_IOC_GOINGDOWN: {
925 __uint32_t in;
926
927 if (!capable(CAP_SYS_ADMIN))
928 return -EPERM;
929
930 if (get_user(in, (__uint32_t __user *)arg))
931 return -XFS_ERROR(EFAULT);
932
933 error = xfs_fs_goingdown(mp, in);
934 return -error;
935 }
936
937 case XFS_IOC_ERROR_INJECTION: {
938 xfs_error_injection_t in;
939
940 if (!capable(CAP_SYS_ADMIN))
941 return -EPERM;
942
943 if (copy_from_user(&in, arg, sizeof(in)))
944 return -XFS_ERROR(EFAULT);
945
946 error = xfs_errortag_add(in.errtag, mp);
947 return -error;
948 }
949
950 case XFS_IOC_ERROR_CLEARALL:
951 if (!capable(CAP_SYS_ADMIN))
952 return -EPERM;
953
954 error = xfs_errortag_clearall(mp, 1);
955 return -error;
956
957 default:
958 return -ENOTTY;
959 }
960}
961
962STATIC int 659STATIC int
963xfs_ioc_space( 660xfs_ioc_space(
964 struct xfs_inode *ip, 661 struct xfs_inode *ip,
@@ -1179,85 +876,85 @@ xfs_ioc_fsgetxattr(
1179} 876}
1180 877
1181STATIC int 878STATIC int
1182xfs_ioc_xattr( 879xfs_ioc_fssetxattr(
1183 xfs_inode_t *ip, 880 xfs_inode_t *ip,
1184 struct file *filp, 881 struct file *filp,
1185 unsigned int cmd,
1186 void __user *arg) 882 void __user *arg)
1187{ 883{
1188 struct fsxattr fa; 884 struct fsxattr fa;
1189 struct bhv_vattr *vattr; 885 struct bhv_vattr *vattr;
1190 int error = 0; 886 int error;
1191 int attr_flags; 887 int attr_flags;
1192 unsigned int flags; 888
889 if (copy_from_user(&fa, arg, sizeof(fa)))
890 return -EFAULT;
1193 891
1194 vattr = kmalloc(sizeof(*vattr), GFP_KERNEL); 892 vattr = kmalloc(sizeof(*vattr), GFP_KERNEL);
1195 if (unlikely(!vattr)) 893 if (unlikely(!vattr))
1196 return -ENOMEM; 894 return -ENOMEM;
1197 895
1198 switch (cmd) { 896 attr_flags = 0;
1199 case XFS_IOC_FSSETXATTR: { 897 if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
1200 if (copy_from_user(&fa, arg, sizeof(fa))) { 898 attr_flags |= ATTR_NONBLOCK;
1201 error = -EFAULT;
1202 break;
1203 }
1204 899
1205 attr_flags = 0; 900 vattr->va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE | XFS_AT_PROJID;
1206 if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) 901 vattr->va_xflags = fa.fsx_xflags;
1207 attr_flags |= ATTR_NONBLOCK; 902 vattr->va_extsize = fa.fsx_extsize;
903 vattr->va_projid = fa.fsx_projid;
1208 904
1209 vattr->va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE | XFS_AT_PROJID; 905 error = -xfs_setattr(ip, vattr, attr_flags, NULL);
1210 vattr->va_xflags = fa.fsx_xflags; 906 if (!error)
1211 vattr->va_extsize = fa.fsx_extsize; 907 vn_revalidate(XFS_ITOV(ip)); /* update flags */
1212 vattr->va_projid = fa.fsx_projid; 908 kfree(vattr);
909 return 0;
910}
1213 911
1214 error = xfs_setattr(ip, vattr, attr_flags, NULL); 912STATIC int
1215 if (likely(!error)) 913xfs_ioc_getxflags(
1216 vn_revalidate(XFS_ITOV(ip)); /* update flags */ 914 xfs_inode_t *ip,
1217 error = -error; 915 void __user *arg)
1218 break; 916{
1219 } 917 unsigned int flags;
1220 918
1221 case XFS_IOC_GETXFLAGS: { 919 flags = xfs_di2lxflags(ip->i_d.di_flags);
1222 flags = xfs_di2lxflags(ip->i_d.di_flags); 920 if (copy_to_user(arg, &flags, sizeof(flags)))
1223 if (copy_to_user(arg, &flags, sizeof(flags))) 921 return -EFAULT;
1224 error = -EFAULT; 922 return 0;
1225 break; 923}
1226 }
1227 924
1228 case XFS_IOC_SETXFLAGS: { 925STATIC int
1229 if (copy_from_user(&flags, arg, sizeof(flags))) { 926xfs_ioc_setxflags(
1230 error = -EFAULT; 927 xfs_inode_t *ip,
1231 break; 928 struct file *filp,
1232 } 929 void __user *arg)
930{
931 struct bhv_vattr *vattr;
932 unsigned int flags;
933 int attr_flags;
934 int error;
1233 935
1234 if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \ 936 if (copy_from_user(&flags, arg, sizeof(flags)))
1235 FS_NOATIME_FL | FS_NODUMP_FL | \ 937 return -EFAULT;
1236 FS_SYNC_FL)) {
1237 error = -EOPNOTSUPP;
1238 break;
1239 }
1240 938
1241 attr_flags = 0; 939 if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
1242 if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) 940 FS_NOATIME_FL | FS_NODUMP_FL | \
1243 attr_flags |= ATTR_NONBLOCK; 941 FS_SYNC_FL))
942 return -EOPNOTSUPP;
1244 943
1245 vattr->va_mask = XFS_AT_XFLAGS; 944 vattr = kmalloc(sizeof(*vattr), GFP_KERNEL);
1246 vattr->va_xflags = xfs_merge_ioc_xflags(flags, 945 if (unlikely(!vattr))
1247 xfs_ip2xflags(ip)); 946 return -ENOMEM;
1248 947
1249 error = xfs_setattr(ip, vattr, attr_flags, NULL); 948 attr_flags = 0;
1250 if (likely(!error)) 949 if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
1251 vn_revalidate(XFS_ITOV(ip)); /* update flags */ 950 attr_flags |= ATTR_NONBLOCK;
1252 error = -error;
1253 break;
1254 }
1255 951
1256 default: 952 vattr->va_mask = XFS_AT_XFLAGS;
1257 error = -ENOTTY; 953 vattr->va_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip));
1258 break;
1259 }
1260 954
955 error = -xfs_setattr(ip, vattr, attr_flags, NULL);
956 if (likely(!error))
957 vn_revalidate(XFS_ITOV(ip)); /* update flags */
1261 kfree(vattr); 958 kfree(vattr);
1262 return error; 959 return error;
1263} 960}
@@ -1332,3 +1029,259 @@ xfs_ioc_getbmapx(
1332 1029
1333 return 0; 1030 return 0;
1334} 1031}
1032
1033int
1034xfs_ioctl(
1035 xfs_inode_t *ip,
1036 struct file *filp,
1037 int ioflags,
1038 unsigned int cmd,
1039 void __user *arg)
1040{
1041 struct inode *inode = filp->f_path.dentry->d_inode;
1042 xfs_mount_t *mp = ip->i_mount;
1043 int error;
1044
1045 xfs_itrace_entry(XFS_I(inode));
1046 switch (cmd) {
1047
1048 case XFS_IOC_ALLOCSP:
1049 case XFS_IOC_FREESP:
1050 case XFS_IOC_RESVSP:
1051 case XFS_IOC_UNRESVSP:
1052 case XFS_IOC_ALLOCSP64:
1053 case XFS_IOC_FREESP64:
1054 case XFS_IOC_RESVSP64:
1055 case XFS_IOC_UNRESVSP64:
1056 /*
1057 * Only allow the sys admin to reserve space unless
1058 * unwritten extents are enabled.
1059 */
1060 if (!xfs_sb_version_hasextflgbit(&mp->m_sb) &&
1061 !capable(CAP_SYS_ADMIN))
1062 return -EPERM;
1063
1064 return xfs_ioc_space(ip, inode, filp, ioflags, cmd, arg);
1065
1066 case XFS_IOC_DIOINFO: {
1067 struct dioattr da;
1068 xfs_buftarg_t *target =
1069 XFS_IS_REALTIME_INODE(ip) ?
1070 mp->m_rtdev_targp : mp->m_ddev_targp;
1071
1072 da.d_mem = da.d_miniosz = 1 << target->bt_sshift;
1073 da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1);
1074
1075 if (copy_to_user(arg, &da, sizeof(da)))
1076 return -XFS_ERROR(EFAULT);
1077 return 0;
1078 }
1079
1080 case XFS_IOC_FSBULKSTAT_SINGLE:
1081 case XFS_IOC_FSBULKSTAT:
1082 case XFS_IOC_FSINUMBERS:
1083 return xfs_ioc_bulkstat(mp, cmd, arg);
1084
1085 case XFS_IOC_FSGEOMETRY_V1:
1086 return xfs_ioc_fsgeometry_v1(mp, arg);
1087
1088 case XFS_IOC_FSGEOMETRY:
1089 return xfs_ioc_fsgeometry(mp, arg);
1090
1091 case XFS_IOC_GETVERSION:
1092 return put_user(inode->i_generation, (int __user *)arg);
1093
1094 case XFS_IOC_FSGETXATTR:
1095 return xfs_ioc_fsgetxattr(ip, 0, arg);
1096 case XFS_IOC_FSGETXATTRA:
1097 return xfs_ioc_fsgetxattr(ip, 1, arg);
1098 case XFS_IOC_FSSETXATTR:
1099 return xfs_ioc_fssetxattr(ip, filp, arg);
1100 case XFS_IOC_GETXFLAGS:
1101 return xfs_ioc_getxflags(ip, arg);
1102 case XFS_IOC_SETXFLAGS:
1103 return xfs_ioc_setxflags(ip, filp, arg);
1104
1105 case XFS_IOC_FSSETDM: {
1106 struct fsdmidata dmi;
1107
1108 if (copy_from_user(&dmi, arg, sizeof(dmi)))
1109 return -XFS_ERROR(EFAULT);
1110
1111 error = xfs_set_dmattrs(ip, dmi.fsd_dmevmask,
1112 dmi.fsd_dmstate);
1113 return -error;
1114 }
1115
1116 case XFS_IOC_GETBMAP:
1117 case XFS_IOC_GETBMAPA:
1118 return xfs_ioc_getbmap(ip, ioflags, cmd, arg);
1119
1120 case XFS_IOC_GETBMAPX:
1121 return xfs_ioc_getbmapx(ip, arg);
1122
1123 case XFS_IOC_FD_TO_HANDLE:
1124 case XFS_IOC_PATH_TO_HANDLE:
1125 case XFS_IOC_PATH_TO_FSHANDLE:
1126 return xfs_find_handle(cmd, arg);
1127
1128 case XFS_IOC_OPEN_BY_HANDLE:
1129 return xfs_open_by_handle(mp, arg, filp, inode);
1130
1131 case XFS_IOC_FSSETDM_BY_HANDLE:
1132 return xfs_fssetdm_by_handle(mp, arg, inode);
1133
1134 case XFS_IOC_READLINK_BY_HANDLE:
1135 return xfs_readlink_by_handle(mp, arg, inode);
1136
1137 case XFS_IOC_ATTRLIST_BY_HANDLE:
1138 return xfs_attrlist_by_handle(mp, arg, inode);
1139
1140 case XFS_IOC_ATTRMULTI_BY_HANDLE:
1141 return xfs_attrmulti_by_handle(mp, arg, filp, inode);
1142
1143 case XFS_IOC_SWAPEXT: {
1144 error = xfs_swapext((struct xfs_swapext __user *)arg);
1145 return -error;
1146 }
1147
1148 case XFS_IOC_FSCOUNTS: {
1149 xfs_fsop_counts_t out;
1150
1151 error = xfs_fs_counts(mp, &out);
1152 if (error)
1153 return -error;
1154
1155 if (copy_to_user(arg, &out, sizeof(out)))
1156 return -XFS_ERROR(EFAULT);
1157 return 0;
1158 }
1159
1160 case XFS_IOC_SET_RESBLKS: {
1161 xfs_fsop_resblks_t inout;
1162 __uint64_t in;
1163
1164 if (!capable(CAP_SYS_ADMIN))
1165 return -EPERM;
1166
1167 if (copy_from_user(&inout, arg, sizeof(inout)))
1168 return -XFS_ERROR(EFAULT);
1169
1170 /* input parameter is passed in resblks field of structure */
1171 in = inout.resblks;
1172 error = xfs_reserve_blocks(mp, &in, &inout);
1173 if (error)
1174 return -error;
1175
1176 if (copy_to_user(arg, &inout, sizeof(inout)))
1177 return -XFS_ERROR(EFAULT);
1178 return 0;
1179 }
1180
1181 case XFS_IOC_GET_RESBLKS: {
1182 xfs_fsop_resblks_t out;
1183
1184 if (!capable(CAP_SYS_ADMIN))
1185 return -EPERM;
1186
1187 error = xfs_reserve_blocks(mp, NULL, &out);
1188 if (error)
1189 return -error;
1190
1191 if (copy_to_user(arg, &out, sizeof(out)))
1192 return -XFS_ERROR(EFAULT);
1193
1194 return 0;
1195 }
1196
1197 case XFS_IOC_FSGROWFSDATA: {
1198 xfs_growfs_data_t in;
1199
1200 if (!capable(CAP_SYS_ADMIN))
1201 return -EPERM;
1202
1203 if (copy_from_user(&in, arg, sizeof(in)))
1204 return -XFS_ERROR(EFAULT);
1205
1206 error = xfs_growfs_data(mp, &in);
1207 return -error;
1208 }
1209
1210 case XFS_IOC_FSGROWFSLOG: {
1211 xfs_growfs_log_t in;
1212
1213 if (!capable(CAP_SYS_ADMIN))
1214 return -EPERM;
1215
1216 if (copy_from_user(&in, arg, sizeof(in)))
1217 return -XFS_ERROR(EFAULT);
1218
1219 error = xfs_growfs_log(mp, &in);
1220 return -error;
1221 }
1222
1223 case XFS_IOC_FSGROWFSRT: {
1224 xfs_growfs_rt_t in;
1225
1226 if (!capable(CAP_SYS_ADMIN))
1227 return -EPERM;
1228
1229 if (copy_from_user(&in, arg, sizeof(in)))
1230 return -XFS_ERROR(EFAULT);
1231
1232 error = xfs_growfs_rt(mp, &in);
1233 return -error;
1234 }
1235
1236 case XFS_IOC_FREEZE:
1237 if (!capable(CAP_SYS_ADMIN))
1238 return -EPERM;
1239
1240 if (inode->i_sb->s_frozen == SB_UNFROZEN)
1241 freeze_bdev(inode->i_sb->s_bdev);
1242 return 0;
1243
1244 case XFS_IOC_THAW:
1245 if (!capable(CAP_SYS_ADMIN))
1246 return -EPERM;
1247 if (inode->i_sb->s_frozen != SB_UNFROZEN)
1248 thaw_bdev(inode->i_sb->s_bdev, inode->i_sb);
1249 return 0;
1250
1251 case XFS_IOC_GOINGDOWN: {
1252 __uint32_t in;
1253
1254 if (!capable(CAP_SYS_ADMIN))
1255 return -EPERM;
1256
1257 if (get_user(in, (__uint32_t __user *)arg))
1258 return -XFS_ERROR(EFAULT);
1259
1260 error = xfs_fs_goingdown(mp, in);
1261 return -error;
1262 }
1263
1264 case XFS_IOC_ERROR_INJECTION: {
1265 xfs_error_injection_t in;
1266
1267 if (!capable(CAP_SYS_ADMIN))
1268 return -EPERM;
1269
1270 if (copy_from_user(&in, arg, sizeof(in)))
1271 return -XFS_ERROR(EFAULT);
1272
1273 error = xfs_errortag_add(in.errtag, mp);
1274 return -error;
1275 }
1276
1277 case XFS_IOC_ERROR_CLEARALL:
1278 if (!capable(CAP_SYS_ADMIN))
1279 return -EPERM;
1280
1281 error = xfs_errortag_clearall(mp, 1);
1282 return -error;
1283
1284 default:
1285 return -ENOTTY;
1286 }
1287}
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index cc4abd3daa49..a1237dad6430 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -62,12 +62,11 @@ void
62xfs_synchronize_atime( 62xfs_synchronize_atime(
63 xfs_inode_t *ip) 63 xfs_inode_t *ip)
64{ 64{
65 bhv_vnode_t *vp; 65 struct inode *inode = ip->i_vnode;
66 66
67 vp = XFS_ITOV_NULL(ip); 67 if (inode) {
68 if (vp) { 68 ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec;
69 ip->i_d.di_atime.t_sec = (__int32_t)vp->i_atime.tv_sec; 69 ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec;
70 ip->i_d.di_atime.t_nsec = (__int32_t)vp->i_atime.tv_nsec;
71 } 70 }
72} 71}
73 72
@@ -80,11 +79,10 @@ void
80xfs_mark_inode_dirty_sync( 79xfs_mark_inode_dirty_sync(
81 xfs_inode_t *ip) 80 xfs_inode_t *ip)
82{ 81{
83 bhv_vnode_t *vp; 82 struct inode *inode = ip->i_vnode;
84 83
85 vp = XFS_ITOV_NULL(ip); 84 if (inode)
86 if (vp) 85 mark_inode_dirty_sync(inode);
87 mark_inode_dirty_sync(vn_to_inode(vp));
88} 86}
89 87
90/* 88/*
@@ -157,13 +155,6 @@ xfs_ichgtime_fast(
157 */ 155 */
158 ASSERT((flags & XFS_ICHGTIME_ACC) == 0); 156 ASSERT((flags & XFS_ICHGTIME_ACC) == 0);
159 157
160 /*
161 * We're not supposed to change timestamps in readonly-mounted
162 * filesystems. Throw it away if anyone asks us.
163 */
164 if (unlikely(IS_RDONLY(inode)))
165 return;
166
167 if (flags & XFS_ICHGTIME_MOD) { 158 if (flags & XFS_ICHGTIME_MOD) {
168 tvp = &inode->i_mtime; 159 tvp = &inode->i_mtime;
169 ip->i_d.di_mtime.t_sec = (__int32_t)tvp->tv_sec; 160 ip->i_d.di_mtime.t_sec = (__int32_t)tvp->tv_sec;
@@ -215,66 +206,62 @@ xfs_validate_fields(
215 */ 206 */
216STATIC int 207STATIC int
217xfs_init_security( 208xfs_init_security(
218 bhv_vnode_t *vp, 209 struct inode *inode,
219 struct inode *dir) 210 struct inode *dir)
220{ 211{
221 struct inode *ip = vn_to_inode(vp); 212 struct xfs_inode *ip = XFS_I(inode);
222 size_t length; 213 size_t length;
223 void *value; 214 void *value;
224 char *name; 215 char *name;
225 int error; 216 int error;
226 217
227 error = security_inode_init_security(ip, dir, &name, &value, &length); 218 error = security_inode_init_security(inode, dir, &name,
219 &value, &length);
228 if (error) { 220 if (error) {
229 if (error == -EOPNOTSUPP) 221 if (error == -EOPNOTSUPP)
230 return 0; 222 return 0;
231 return -error; 223 return -error;
232 } 224 }
233 225
234 error = xfs_attr_set(XFS_I(ip), name, value, 226 error = xfs_attr_set(ip, name, value, length, ATTR_SECURE);
235 length, ATTR_SECURE);
236 if (!error) 227 if (!error)
237 xfs_iflags_set(XFS_I(ip), XFS_IMODIFIED); 228 xfs_iflags_set(ip, XFS_IMODIFIED);
238 229
239 kfree(name); 230 kfree(name);
240 kfree(value); 231 kfree(value);
241 return error; 232 return error;
242} 233}
243 234
244/* 235static void
245 * Determine whether a process has a valid fs_struct (kernel daemons 236xfs_dentry_to_name(
246 * like knfsd don't have an fs_struct). 237 struct xfs_name *namep,
247 * 238 struct dentry *dentry)
248 * XXX(hch): nfsd is broken, better fix it instead.
249 */
250STATIC_INLINE int
251xfs_has_fs_struct(struct task_struct *task)
252{ 239{
253 return (task->fs != init_task.fs); 240 namep->name = dentry->d_name.name;
241 namep->len = dentry->d_name.len;
254} 242}
255 243
256STATIC void 244STATIC void
257xfs_cleanup_inode( 245xfs_cleanup_inode(
258 struct inode *dir, 246 struct inode *dir,
259 bhv_vnode_t *vp, 247 struct inode *inode,
260 struct dentry *dentry, 248 struct dentry *dentry,
261 int mode) 249 int mode)
262{ 250{
263 struct dentry teardown = {}; 251 struct xfs_name teardown;
264 252
265 /* Oh, the horror. 253 /* Oh, the horror.
266 * If we can't add the ACL or we fail in 254 * If we can't add the ACL or we fail in
267 * xfs_init_security we must back out. 255 * xfs_init_security we must back out.
268 * ENOSPC can hit here, among other things. 256 * ENOSPC can hit here, among other things.
269 */ 257 */
270 teardown.d_inode = vn_to_inode(vp); 258 xfs_dentry_to_name(&teardown, dentry);
271 teardown.d_name = dentry->d_name;
272 259
273 if (S_ISDIR(mode)) 260 if (S_ISDIR(mode))
274 xfs_rmdir(XFS_I(dir), &teardown); 261 xfs_rmdir(XFS_I(dir), &teardown, XFS_I(inode));
275 else 262 else
276 xfs_remove(XFS_I(dir), &teardown); 263 xfs_remove(XFS_I(dir), &teardown, XFS_I(inode));
277 VN_RELE(vp); 264 iput(inode);
278} 265}
279 266
280STATIC int 267STATIC int
@@ -284,9 +271,10 @@ xfs_vn_mknod(
284 int mode, 271 int mode,
285 dev_t rdev) 272 dev_t rdev)
286{ 273{
287 struct inode *ip; 274 struct inode *inode;
288 bhv_vnode_t *vp = NULL, *dvp = vn_from_inode(dir); 275 struct xfs_inode *ip = NULL;
289 xfs_acl_t *default_acl = NULL; 276 xfs_acl_t *default_acl = NULL;
277 struct xfs_name name;
290 attrexists_t test_default_acl = _ACL_DEFAULT_EXISTS; 278 attrexists_t test_default_acl = _ACL_DEFAULT_EXISTS;
291 int error; 279 int error;
292 280
@@ -297,59 +285,67 @@ xfs_vn_mknod(
297 if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff)) 285 if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff))
298 return -EINVAL; 286 return -EINVAL;
299 287
300 if (unlikely(test_default_acl && test_default_acl(dvp))) { 288 if (test_default_acl && test_default_acl(dir)) {
301 if (!_ACL_ALLOC(default_acl)) { 289 if (!_ACL_ALLOC(default_acl)) {
302 return -ENOMEM; 290 return -ENOMEM;
303 } 291 }
304 if (!_ACL_GET_DEFAULT(dvp, default_acl)) { 292 if (!_ACL_GET_DEFAULT(dir, default_acl)) {
305 _ACL_FREE(default_acl); 293 _ACL_FREE(default_acl);
306 default_acl = NULL; 294 default_acl = NULL;
307 } 295 }
308 } 296 }
309 297
310 if (IS_POSIXACL(dir) && !default_acl && xfs_has_fs_struct(current)) 298 xfs_dentry_to_name(&name, dentry);
299
300 if (IS_POSIXACL(dir) && !default_acl)
311 mode &= ~current->fs->umask; 301 mode &= ~current->fs->umask;
312 302
313 switch (mode & S_IFMT) { 303 switch (mode & S_IFMT) {
314 case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK: 304 case S_IFCHR:
305 case S_IFBLK:
306 case S_IFIFO:
307 case S_IFSOCK:
315 rdev = sysv_encode_dev(rdev); 308 rdev = sysv_encode_dev(rdev);
316 case S_IFREG: 309 case S_IFREG:
317 error = xfs_create(XFS_I(dir), dentry, mode, rdev, &vp, NULL); 310 error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip, NULL);
318 break; 311 break;
319 case S_IFDIR: 312 case S_IFDIR:
320 error = xfs_mkdir(XFS_I(dir), dentry, mode, &vp, NULL); 313 error = xfs_mkdir(XFS_I(dir), &name, mode, &ip, NULL);
321 break; 314 break;
322 default: 315 default:
323 error = EINVAL; 316 error = EINVAL;
324 break; 317 break;
325 } 318 }
326 319
327 if (unlikely(!error)) { 320 if (unlikely(error))
328 error = xfs_init_security(vp, dir); 321 goto out_free_acl;
329 if (error)
330 xfs_cleanup_inode(dir, vp, dentry, mode);
331 }
332 322
333 if (unlikely(default_acl)) { 323 inode = ip->i_vnode;
334 if (!error) { 324
335 error = _ACL_INHERIT(vp, mode, default_acl); 325 error = xfs_init_security(inode, dir);
336 if (!error) 326 if (unlikely(error))
337 xfs_iflags_set(XFS_I(vp), XFS_IMODIFIED); 327 goto out_cleanup_inode;
338 else 328
339 xfs_cleanup_inode(dir, vp, dentry, mode); 329 if (default_acl) {
340 } 330 error = _ACL_INHERIT(inode, mode, default_acl);
331 if (unlikely(error))
332 goto out_cleanup_inode;
333 xfs_iflags_set(ip, XFS_IMODIFIED);
341 _ACL_FREE(default_acl); 334 _ACL_FREE(default_acl);
342 } 335 }
343 336
344 if (likely(!error)) {
345 ASSERT(vp);
346 ip = vn_to_inode(vp);
347 337
348 if (S_ISDIR(mode)) 338 if (S_ISDIR(mode))
349 xfs_validate_fields(ip); 339 xfs_validate_fields(inode);
350 d_instantiate(dentry, ip); 340 d_instantiate(dentry, inode);
351 xfs_validate_fields(dir); 341 xfs_validate_fields(dir);
352 } 342 return -error;
343
344 out_cleanup_inode:
345 xfs_cleanup_inode(dir, inode, dentry, mode);
346 out_free_acl:
347 if (default_acl)
348 _ACL_FREE(default_acl);
353 return -error; 349 return -error;
354} 350}
355 351
@@ -378,13 +374,15 @@ xfs_vn_lookup(
378 struct dentry *dentry, 374 struct dentry *dentry,
379 struct nameidata *nd) 375 struct nameidata *nd)
380{ 376{
381 bhv_vnode_t *cvp; 377 struct xfs_inode *cip;
378 struct xfs_name name;
382 int error; 379 int error;
383 380
384 if (dentry->d_name.len >= MAXNAMELEN) 381 if (dentry->d_name.len >= MAXNAMELEN)
385 return ERR_PTR(-ENAMETOOLONG); 382 return ERR_PTR(-ENAMETOOLONG);
386 383
387 error = xfs_lookup(XFS_I(dir), dentry, &cvp); 384 xfs_dentry_to_name(&name, dentry);
385 error = xfs_lookup(XFS_I(dir), &name, &cip);
388 if (unlikely(error)) { 386 if (unlikely(error)) {
389 if (unlikely(error != ENOENT)) 387 if (unlikely(error != ENOENT))
390 return ERR_PTR(-error); 388 return ERR_PTR(-error);
@@ -392,7 +390,7 @@ xfs_vn_lookup(
392 return NULL; 390 return NULL;
393 } 391 }
394 392
395 return d_splice_alias(vn_to_inode(cvp), dentry); 393 return d_splice_alias(cip->i_vnode, dentry);
396} 394}
397 395
398STATIC int 396STATIC int
@@ -401,23 +399,24 @@ xfs_vn_link(
401 struct inode *dir, 399 struct inode *dir,
402 struct dentry *dentry) 400 struct dentry *dentry)
403{ 401{
404 struct inode *ip; /* inode of guy being linked to */ 402 struct inode *inode; /* inode of guy being linked to */
405 bhv_vnode_t *vp; /* vp of name being linked */ 403 struct xfs_name name;
406 int error; 404 int error;
407 405
408 ip = old_dentry->d_inode; /* inode being linked to */ 406 inode = old_dentry->d_inode;
409 vp = vn_from_inode(ip); 407 xfs_dentry_to_name(&name, dentry);
410 408
411 VN_HOLD(vp); 409 igrab(inode);
412 error = xfs_link(XFS_I(dir), vp, dentry); 410 error = xfs_link(XFS_I(dir), XFS_I(inode), &name);
413 if (unlikely(error)) { 411 if (unlikely(error)) {
414 VN_RELE(vp); 412 iput(inode);
415 } else { 413 return -error;
416 xfs_iflags_set(XFS_I(dir), XFS_IMODIFIED);
417 xfs_validate_fields(ip);
418 d_instantiate(dentry, ip);
419 } 414 }
420 return -error; 415
416 xfs_iflags_set(XFS_I(dir), XFS_IMODIFIED);
417 xfs_validate_fields(inode);
418 d_instantiate(dentry, inode);
419 return 0;
421} 420}
422 421
423STATIC int 422STATIC int
@@ -426,11 +425,13 @@ xfs_vn_unlink(
426 struct dentry *dentry) 425 struct dentry *dentry)
427{ 426{
428 struct inode *inode; 427 struct inode *inode;
428 struct xfs_name name;
429 int error; 429 int error;
430 430
431 inode = dentry->d_inode; 431 inode = dentry->d_inode;
432 xfs_dentry_to_name(&name, dentry);
432 433
433 error = xfs_remove(XFS_I(dir), dentry); 434 error = xfs_remove(XFS_I(dir), &name, XFS_I(inode));
434 if (likely(!error)) { 435 if (likely(!error)) {
435 xfs_validate_fields(dir); /* size needs update */ 436 xfs_validate_fields(dir); /* size needs update */
436 xfs_validate_fields(inode); 437 xfs_validate_fields(inode);
@@ -444,29 +445,34 @@ xfs_vn_symlink(
444 struct dentry *dentry, 445 struct dentry *dentry,
445 const char *symname) 446 const char *symname)
446{ 447{
447 struct inode *ip; 448 struct inode *inode;
448 bhv_vnode_t *cvp; /* used to lookup symlink to put in dentry */ 449 struct xfs_inode *cip = NULL;
450 struct xfs_name name;
449 int error; 451 int error;
450 mode_t mode; 452 mode_t mode;
451 453
452 cvp = NULL;
453
454 mode = S_IFLNK | 454 mode = S_IFLNK |
455 (irix_symlink_mode ? 0777 & ~current->fs->umask : S_IRWXUGO); 455 (irix_symlink_mode ? 0777 & ~current->fs->umask : S_IRWXUGO);
456 xfs_dentry_to_name(&name, dentry);
456 457
457 error = xfs_symlink(XFS_I(dir), dentry, (char *)symname, mode, 458 error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip, NULL);
458 &cvp, NULL); 459 if (unlikely(error))
459 if (likely(!error && cvp)) { 460 goto out;
460 error = xfs_init_security(cvp, dir); 461
461 if (likely(!error)) { 462 inode = cip->i_vnode;
462 ip = vn_to_inode(cvp); 463
463 d_instantiate(dentry, ip); 464 error = xfs_init_security(inode, dir);
464 xfs_validate_fields(dir); 465 if (unlikely(error))
465 xfs_validate_fields(ip); 466 goto out_cleanup_inode;
466 } else { 467
467 xfs_cleanup_inode(dir, cvp, dentry, 0); 468 d_instantiate(dentry, inode);
468 } 469 xfs_validate_fields(dir);
469 } 470 xfs_validate_fields(inode);
471 return 0;
472
473 out_cleanup_inode:
474 xfs_cleanup_inode(dir, inode, dentry, 0);
475 out:
470 return -error; 476 return -error;
471} 477}
472 478
@@ -476,9 +482,12 @@ xfs_vn_rmdir(
476 struct dentry *dentry) 482 struct dentry *dentry)
477{ 483{
478 struct inode *inode = dentry->d_inode; 484 struct inode *inode = dentry->d_inode;
485 struct xfs_name name;
479 int error; 486 int error;
480 487
481 error = xfs_rmdir(XFS_I(dir), dentry); 488 xfs_dentry_to_name(&name, dentry);
489
490 error = xfs_rmdir(XFS_I(dir), &name, XFS_I(inode));
482 if (likely(!error)) { 491 if (likely(!error)) {
483 xfs_validate_fields(inode); 492 xfs_validate_fields(inode);
484 xfs_validate_fields(dir); 493 xfs_validate_fields(dir);
@@ -494,12 +503,15 @@ xfs_vn_rename(
494 struct dentry *ndentry) 503 struct dentry *ndentry)
495{ 504{
496 struct inode *new_inode = ndentry->d_inode; 505 struct inode *new_inode = ndentry->d_inode;
497 bhv_vnode_t *tvp; /* target directory */ 506 struct xfs_name oname;
507 struct xfs_name nname;
498 int error; 508 int error;
499 509
500 tvp = vn_from_inode(ndir); 510 xfs_dentry_to_name(&oname, odentry);
511 xfs_dentry_to_name(&nname, ndentry);
501 512
502 error = xfs_rename(XFS_I(odir), odentry, tvp, ndentry); 513 error = xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode),
514 XFS_I(ndir), &nname);
503 if (likely(!error)) { 515 if (likely(!error)) {
504 if (new_inode) 516 if (new_inode)
505 xfs_validate_fields(new_inode); 517 xfs_validate_fields(new_inode);
@@ -700,11 +712,19 @@ xfs_vn_setattr(
700 return -error; 712 return -error;
701} 713}
702 714
715/*
716 * block_truncate_page can return an error, but we can't propagate it
717 * at all here. Leave a complaint + stack trace in the syslog because
718 * this could be bad. If it is bad, we need to propagate the error further.
719 */
703STATIC void 720STATIC void
704xfs_vn_truncate( 721xfs_vn_truncate(
705 struct inode *inode) 722 struct inode *inode)
706{ 723{
707 block_truncate_page(inode->i_mapping, inode->i_size, xfs_get_blocks); 724 int error;
725 error = block_truncate_page(inode->i_mapping, inode->i_size,
726 xfs_get_blocks);
727 WARN_ON(error);
708} 728}
709 729
710STATIC int 730STATIC int
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 3ca39c4e5d2a..e5143323e71f 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -99,7 +99,6 @@
99/* 99/*
100 * Feature macros (disable/enable) 100 * Feature macros (disable/enable)
101 */ 101 */
102#undef HAVE_REFCACHE /* reference cache not needed for NFS in 2.6 */
103#define HAVE_SPLICE /* a splice(2) exists in 2.6, but not in 2.4 */ 102#define HAVE_SPLICE /* a splice(2) exists in 2.6, but not in 2.4 */
104#ifdef CONFIG_SMP 103#ifdef CONFIG_SMP
105#define HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */ 104#define HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 166353388490..1ebd8004469c 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -51,6 +51,7 @@
51#include "xfs_vnodeops.h" 51#include "xfs_vnodeops.h"
52 52
53#include <linux/capability.h> 53#include <linux/capability.h>
54#include <linux/mount.h>
54#include <linux/writeback.h> 55#include <linux/writeback.h>
55 56
56 57
@@ -176,7 +177,6 @@ xfs_read(
176{ 177{
177 struct file *file = iocb->ki_filp; 178 struct file *file = iocb->ki_filp;
178 struct inode *inode = file->f_mapping->host; 179 struct inode *inode = file->f_mapping->host;
179 bhv_vnode_t *vp = XFS_ITOV(ip);
180 xfs_mount_t *mp = ip->i_mount; 180 xfs_mount_t *mp = ip->i_mount;
181 size_t size = 0; 181 size_t size = 0;
182 ssize_t ret = 0; 182 ssize_t ret = 0;
@@ -228,11 +228,11 @@ xfs_read(
228 xfs_ilock(ip, XFS_IOLOCK_SHARED); 228 xfs_ilock(ip, XFS_IOLOCK_SHARED);
229 229
230 if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) { 230 if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) {
231 bhv_vrwlock_t locktype = VRWLOCK_READ;
232 int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags); 231 int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags);
232 int iolock = XFS_IOLOCK_SHARED;
233 233
234 ret = -XFS_SEND_DATA(mp, DM_EVENT_READ, vp, *offset, size, 234 ret = -XFS_SEND_DATA(mp, DM_EVENT_READ, ip, *offset, size,
235 dmflags, &locktype); 235 dmflags, &iolock);
236 if (ret) { 236 if (ret) {
237 xfs_iunlock(ip, XFS_IOLOCK_SHARED); 237 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
238 if (unlikely(ioflags & IO_ISDIRECT)) 238 if (unlikely(ioflags & IO_ISDIRECT))
@@ -242,7 +242,7 @@ xfs_read(
242 } 242 }
243 243
244 if (unlikely(ioflags & IO_ISDIRECT)) { 244 if (unlikely(ioflags & IO_ISDIRECT)) {
245 if (VN_CACHED(vp)) 245 if (inode->i_mapping->nrpages)
246 ret = xfs_flushinval_pages(ip, (*offset & PAGE_CACHE_MASK), 246 ret = xfs_flushinval_pages(ip, (*offset & PAGE_CACHE_MASK),
247 -1, FI_REMAPF_LOCKED); 247 -1, FI_REMAPF_LOCKED);
248 mutex_unlock(&inode->i_mutex); 248 mutex_unlock(&inode->i_mutex);
@@ -276,7 +276,6 @@ xfs_splice_read(
276 int flags, 276 int flags,
277 int ioflags) 277 int ioflags)
278{ 278{
279 bhv_vnode_t *vp = XFS_ITOV(ip);
280 xfs_mount_t *mp = ip->i_mount; 279 xfs_mount_t *mp = ip->i_mount;
281 ssize_t ret; 280 ssize_t ret;
282 281
@@ -287,11 +286,11 @@ xfs_splice_read(
287 xfs_ilock(ip, XFS_IOLOCK_SHARED); 286 xfs_ilock(ip, XFS_IOLOCK_SHARED);
288 287
289 if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) { 288 if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) {
290 bhv_vrwlock_t locktype = VRWLOCK_READ; 289 int iolock = XFS_IOLOCK_SHARED;
291 int error; 290 int error;
292 291
293 error = XFS_SEND_DATA(mp, DM_EVENT_READ, vp, *ppos, count, 292 error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, *ppos, count,
294 FILP_DELAY_FLAG(infilp), &locktype); 293 FILP_DELAY_FLAG(infilp), &iolock);
295 if (error) { 294 if (error) {
296 xfs_iunlock(ip, XFS_IOLOCK_SHARED); 295 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
297 return -error; 296 return -error;
@@ -317,7 +316,6 @@ xfs_splice_write(
317 int flags, 316 int flags,
318 int ioflags) 317 int ioflags)
319{ 318{
320 bhv_vnode_t *vp = XFS_ITOV(ip);
321 xfs_mount_t *mp = ip->i_mount; 319 xfs_mount_t *mp = ip->i_mount;
322 ssize_t ret; 320 ssize_t ret;
323 struct inode *inode = outfilp->f_mapping->host; 321 struct inode *inode = outfilp->f_mapping->host;
@@ -330,11 +328,11 @@ xfs_splice_write(
330 xfs_ilock(ip, XFS_IOLOCK_EXCL); 328 xfs_ilock(ip, XFS_IOLOCK_EXCL);
331 329
332 if (DM_EVENT_ENABLED(ip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS)) { 330 if (DM_EVENT_ENABLED(ip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS)) {
333 bhv_vrwlock_t locktype = VRWLOCK_WRITE; 331 int iolock = XFS_IOLOCK_EXCL;
334 int error; 332 int error;
335 333
336 error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, vp, *ppos, count, 334 error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip, *ppos, count,
337 FILP_DELAY_FLAG(outfilp), &locktype); 335 FILP_DELAY_FLAG(outfilp), &iolock);
338 if (error) { 336 if (error) {
339 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 337 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
340 return -error; 338 return -error;
@@ -573,14 +571,12 @@ xfs_write(
573 struct file *file = iocb->ki_filp; 571 struct file *file = iocb->ki_filp;
574 struct address_space *mapping = file->f_mapping; 572 struct address_space *mapping = file->f_mapping;
575 struct inode *inode = mapping->host; 573 struct inode *inode = mapping->host;
576 bhv_vnode_t *vp = XFS_ITOV(xip);
577 unsigned long segs = nsegs; 574 unsigned long segs = nsegs;
578 xfs_mount_t *mp; 575 xfs_mount_t *mp;
579 ssize_t ret = 0, error = 0; 576 ssize_t ret = 0, error = 0;
580 xfs_fsize_t isize, new_size; 577 xfs_fsize_t isize, new_size;
581 int iolock; 578 int iolock;
582 int eventsent = 0; 579 int eventsent = 0;
583 bhv_vrwlock_t locktype;
584 size_t ocount = 0, count; 580 size_t ocount = 0, count;
585 loff_t pos; 581 loff_t pos;
586 int need_i_mutex; 582 int need_i_mutex;
@@ -607,11 +603,9 @@ xfs_write(
607relock: 603relock:
608 if (ioflags & IO_ISDIRECT) { 604 if (ioflags & IO_ISDIRECT) {
609 iolock = XFS_IOLOCK_SHARED; 605 iolock = XFS_IOLOCK_SHARED;
610 locktype = VRWLOCK_WRITE_DIRECT;
611 need_i_mutex = 0; 606 need_i_mutex = 0;
612 } else { 607 } else {
613 iolock = XFS_IOLOCK_EXCL; 608 iolock = XFS_IOLOCK_EXCL;
614 locktype = VRWLOCK_WRITE;
615 need_i_mutex = 1; 609 need_i_mutex = 1;
616 mutex_lock(&inode->i_mutex); 610 mutex_lock(&inode->i_mutex);
617 } 611 }
@@ -634,9 +628,8 @@ start:
634 dmflags |= DM_FLAGS_IMUX; 628 dmflags |= DM_FLAGS_IMUX;
635 629
636 xfs_iunlock(xip, XFS_ILOCK_EXCL); 630 xfs_iunlock(xip, XFS_ILOCK_EXCL);
637 error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, vp, 631 error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, xip,
638 pos, count, 632 pos, count, dmflags, &iolock);
639 dmflags, &locktype);
640 if (error) { 633 if (error) {
641 goto out_unlock_internal; 634 goto out_unlock_internal;
642 } 635 }
@@ -664,10 +657,9 @@ start:
664 return XFS_ERROR(-EINVAL); 657 return XFS_ERROR(-EINVAL);
665 } 658 }
666 659
667 if (!need_i_mutex && (VN_CACHED(vp) || pos > xip->i_size)) { 660 if (!need_i_mutex && (mapping->nrpages || pos > xip->i_size)) {
668 xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); 661 xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
669 iolock = XFS_IOLOCK_EXCL; 662 iolock = XFS_IOLOCK_EXCL;
670 locktype = VRWLOCK_WRITE;
671 need_i_mutex = 1; 663 need_i_mutex = 1;
672 mutex_lock(&inode->i_mutex); 664 mutex_lock(&inode->i_mutex);
673 xfs_ilock(xip, XFS_ILOCK_EXCL|iolock); 665 xfs_ilock(xip, XFS_ILOCK_EXCL|iolock);
@@ -679,10 +671,16 @@ start:
679 if (new_size > xip->i_size) 671 if (new_size > xip->i_size)
680 xip->i_new_size = new_size; 672 xip->i_new_size = new_size;
681 673
682 if (likely(!(ioflags & IO_INVIS))) { 674 /*
675 * We're not supposed to change timestamps in readonly-mounted
676 * filesystems. Throw it away if anyone asks us.
677 */
678 if (likely(!(ioflags & IO_INVIS) &&
679 !mnt_want_write(file->f_path.mnt))) {
683 file_update_time(file); 680 file_update_time(file);
684 xfs_ichgtime_fast(xip, inode, 681 xfs_ichgtime_fast(xip, inode,
685 XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 682 XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
683 mnt_drop_write(file->f_path.mnt);
686 } 684 }
687 685
688 /* 686 /*
@@ -727,7 +725,7 @@ retry:
727 current->backing_dev_info = mapping->backing_dev_info; 725 current->backing_dev_info = mapping->backing_dev_info;
728 726
729 if ((ioflags & IO_ISDIRECT)) { 727 if ((ioflags & IO_ISDIRECT)) {
730 if (VN_CACHED(vp)) { 728 if (mapping->nrpages) {
731 WARN_ON(need_i_mutex == 0); 729 WARN_ON(need_i_mutex == 0);
732 xfs_inval_cached_trace(xip, pos, -1, 730 xfs_inval_cached_trace(xip, pos, -1,
733 (pos & PAGE_CACHE_MASK), -1); 731 (pos & PAGE_CACHE_MASK), -1);
@@ -744,7 +742,6 @@ retry:
744 mutex_unlock(&inode->i_mutex); 742 mutex_unlock(&inode->i_mutex);
745 743
746 iolock = XFS_IOLOCK_SHARED; 744 iolock = XFS_IOLOCK_SHARED;
747 locktype = VRWLOCK_WRITE_DIRECT;
748 need_i_mutex = 0; 745 need_i_mutex = 0;
749 } 746 }
750 747
@@ -781,15 +778,15 @@ retry:
781 778
782 if (ret == -ENOSPC && 779 if (ret == -ENOSPC &&
783 DM_EVENT_ENABLED(xip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) { 780 DM_EVENT_ENABLED(xip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) {
784 xfs_rwunlock(xip, locktype); 781 xfs_iunlock(xip, iolock);
785 if (need_i_mutex) 782 if (need_i_mutex)
786 mutex_unlock(&inode->i_mutex); 783 mutex_unlock(&inode->i_mutex);
787 error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, vp, 784 error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, xip,
788 DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL, 785 DM_RIGHT_NULL, xip, DM_RIGHT_NULL, NULL, NULL,
789 0, 0, 0); /* Delay flag intentionally unused */ 786 0, 0, 0); /* Delay flag intentionally unused */
790 if (need_i_mutex) 787 if (need_i_mutex)
791 mutex_lock(&inode->i_mutex); 788 mutex_lock(&inode->i_mutex);
792 xfs_rwlock(xip, locktype); 789 xfs_ilock(xip, iolock);
793 if (error) 790 if (error)
794 goto out_unlock_internal; 791 goto out_unlock_internal;
795 pos = xip->i_size; 792 pos = xip->i_size;
@@ -817,7 +814,8 @@ retry:
817 /* Handle various SYNC-type writes */ 814 /* Handle various SYNC-type writes */
818 if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { 815 if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) {
819 int error2; 816 int error2;
820 xfs_rwunlock(xip, locktype); 817
818 xfs_iunlock(xip, iolock);
821 if (need_i_mutex) 819 if (need_i_mutex)
822 mutex_unlock(&inode->i_mutex); 820 mutex_unlock(&inode->i_mutex);
823 error2 = sync_page_range(inode, mapping, pos, ret); 821 error2 = sync_page_range(inode, mapping, pos, ret);
@@ -825,7 +823,7 @@ retry:
825 error = error2; 823 error = error2;
826 if (need_i_mutex) 824 if (need_i_mutex)
827 mutex_lock(&inode->i_mutex); 825 mutex_lock(&inode->i_mutex);
828 xfs_rwlock(xip, locktype); 826 xfs_ilock(xip, iolock);
829 error2 = xfs_write_sync_logforce(mp, xip); 827 error2 = xfs_write_sync_logforce(mp, xip);
830 if (!error) 828 if (!error)
831 error = error2; 829 error = error2;
@@ -846,7 +844,7 @@ retry:
846 xip->i_d.di_size = xip->i_size; 844 xip->i_d.di_size = xip->i_size;
847 xfs_iunlock(xip, XFS_ILOCK_EXCL); 845 xfs_iunlock(xip, XFS_ILOCK_EXCL);
848 } 846 }
849 xfs_rwunlock(xip, locktype); 847 xfs_iunlock(xip, iolock);
850 out_unlock_mutex: 848 out_unlock_mutex:
851 if (need_i_mutex) 849 if (need_i_mutex)
852 mutex_unlock(&inode->i_mutex); 850 mutex_unlock(&inode->i_mutex);
@@ -884,28 +882,23 @@ xfs_bdstrat_cb(struct xfs_buf *bp)
884} 882}
885 883
886/* 884/*
887 * Wrapper around bdstrat so that we can stop data 885 * Wrapper around bdstrat so that we can stop data from going to disk in case
888 * from going to disk in case we are shutting down the filesystem. 886 * we are shutting down the filesystem. Typically user data goes thru this
889 * Typically user data goes thru this path; one of the exceptions 887 * path; one of the exceptions is the superblock.
890 * is the superblock.
891 */ 888 */
892int 889void
893xfsbdstrat( 890xfsbdstrat(
894 struct xfs_mount *mp, 891 struct xfs_mount *mp,
895 struct xfs_buf *bp) 892 struct xfs_buf *bp)
896{ 893{
897 ASSERT(mp); 894 ASSERT(mp);
898 if (!XFS_FORCED_SHUTDOWN(mp)) { 895 if (!XFS_FORCED_SHUTDOWN(mp)) {
899 /* Grio redirection would go here
900 * if (XFS_BUF_IS_GRIO(bp)) {
901 */
902
903 xfs_buf_iorequest(bp); 896 xfs_buf_iorequest(bp);
904 return 0; 897 return;
905 } 898 }
906 899
907 xfs_buftrace("XFSBDSTRAT IOERROR", bp); 900 xfs_buftrace("XFSBDSTRAT IOERROR", bp);
908 return (xfs_bioerror_relse(bp)); 901 xfs_bioerror_relse(bp);
909} 902}
910 903
911/* 904/*
diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h
index e200253139cf..e1d498b4ba7a 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.h
+++ b/fs/xfs/linux-2.6/xfs_lrw.h
@@ -68,7 +68,8 @@ extern void xfs_inval_cached_trace(struct xfs_inode *,
68#define xfs_inval_cached_trace(ip, offset, len, first, last) 68#define xfs_inval_cached_trace(ip, offset, len, first, last)
69#endif 69#endif
70 70
71extern int xfsbdstrat(struct xfs_mount *, struct xfs_buf *); 71/* errors from xfsbdstrat() must be extracted from the buffer */
72extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *);
72extern int xfs_bdstrat_cb(struct xfs_buf *); 73extern int xfs_bdstrat_cb(struct xfs_buf *);
73extern int xfs_dev_is_read_only(struct xfs_mount *, char *); 74extern int xfs_dev_is_read_only(struct xfs_mount *, char *);
74 75
diff --git a/fs/xfs/linux-2.6/xfs_stats.h b/fs/xfs/linux-2.6/xfs_stats.h
index 8ba7a2fa6c1d..afd0b0d5fdb2 100644
--- a/fs/xfs/linux-2.6/xfs_stats.h
+++ b/fs/xfs/linux-2.6/xfs_stats.h
@@ -144,8 +144,8 @@ extern void xfs_cleanup_procfs(void);
144# define XFS_STATS_DEC(count) 144# define XFS_STATS_DEC(count)
145# define XFS_STATS_ADD(count, inc) 145# define XFS_STATS_ADD(count, inc)
146 146
147static __inline void xfs_init_procfs(void) { }; 147static inline void xfs_init_procfs(void) { };
148static __inline void xfs_cleanup_procfs(void) { }; 148static inline void xfs_cleanup_procfs(void) { };
149 149
150#endif /* !CONFIG_PROC_FS */ 150#endif /* !CONFIG_PROC_FS */
151 151
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 21dfc9da235e..865eb708aa95 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -171,7 +171,7 @@ xfs_parseargs(
171 char *this_char, *value, *eov; 171 char *this_char, *value, *eov;
172 int dsunit, dswidth, vol_dsunit, vol_dswidth; 172 int dsunit, dswidth, vol_dsunit, vol_dswidth;
173 int iosize; 173 int iosize;
174 int ikeep = 0; 174 int dmapi_implies_ikeep = 1;
175 175
176 args->flags |= XFSMNT_BARRIER; 176 args->flags |= XFSMNT_BARRIER;
177 args->flags2 |= XFSMNT2_COMPAT_IOSIZE; 177 args->flags2 |= XFSMNT2_COMPAT_IOSIZE;
@@ -302,10 +302,10 @@ xfs_parseargs(
302 } else if (!strcmp(this_char, MNTOPT_NOBARRIER)) { 302 } else if (!strcmp(this_char, MNTOPT_NOBARRIER)) {
303 args->flags &= ~XFSMNT_BARRIER; 303 args->flags &= ~XFSMNT_BARRIER;
304 } else if (!strcmp(this_char, MNTOPT_IKEEP)) { 304 } else if (!strcmp(this_char, MNTOPT_IKEEP)) {
305 ikeep = 1; 305 args->flags |= XFSMNT_IKEEP;
306 args->flags &= ~XFSMNT_IDELETE;
307 } else if (!strcmp(this_char, MNTOPT_NOIKEEP)) { 306 } else if (!strcmp(this_char, MNTOPT_NOIKEEP)) {
308 args->flags |= XFSMNT_IDELETE; 307 dmapi_implies_ikeep = 0;
308 args->flags &= ~XFSMNT_IKEEP;
309 } else if (!strcmp(this_char, MNTOPT_LARGEIO)) { 309 } else if (!strcmp(this_char, MNTOPT_LARGEIO)) {
310 args->flags2 &= ~XFSMNT2_COMPAT_IOSIZE; 310 args->flags2 &= ~XFSMNT2_COMPAT_IOSIZE;
311 } else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) { 311 } else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) {
@@ -410,8 +410,8 @@ xfs_parseargs(
410 * Note that if "ikeep" or "noikeep" mount options are 410 * Note that if "ikeep" or "noikeep" mount options are
411 * supplied, then they are honored. 411 * supplied, then they are honored.
412 */ 412 */
413 if (!(args->flags & XFSMNT_DMAPI) && !ikeep) 413 if ((args->flags & XFSMNT_DMAPI) && dmapi_implies_ikeep)
414 args->flags |= XFSMNT_IDELETE; 414 args->flags |= XFSMNT_IKEEP;
415 415
416 if ((args->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) { 416 if ((args->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) {
417 if (dsunit) { 417 if (dsunit) {
@@ -446,6 +446,7 @@ xfs_showargs(
446{ 446{
447 static struct proc_xfs_info xfs_info_set[] = { 447 static struct proc_xfs_info xfs_info_set[] = {
448 /* the few simple ones we can get from the mount struct */ 448 /* the few simple ones we can get from the mount struct */
449 { XFS_MOUNT_IKEEP, "," MNTOPT_IKEEP },
449 { XFS_MOUNT_WSYNC, "," MNTOPT_WSYNC }, 450 { XFS_MOUNT_WSYNC, "," MNTOPT_WSYNC },
450 { XFS_MOUNT_INO64, "," MNTOPT_INO64 }, 451 { XFS_MOUNT_INO64, "," MNTOPT_INO64 },
451 { XFS_MOUNT_NOALIGN, "," MNTOPT_NOALIGN }, 452 { XFS_MOUNT_NOALIGN, "," MNTOPT_NOALIGN },
@@ -461,7 +462,6 @@ xfs_showargs(
461 }; 462 };
462 static struct proc_xfs_info xfs_info_unset[] = { 463 static struct proc_xfs_info xfs_info_unset[] = {
463 /* the few simple ones we can get from the mount struct */ 464 /* the few simple ones we can get from the mount struct */
464 { XFS_MOUNT_IDELETE, "," MNTOPT_IKEEP },
465 { XFS_MOUNT_COMPAT_IOSIZE, "," MNTOPT_LARGEIO }, 465 { XFS_MOUNT_COMPAT_IOSIZE, "," MNTOPT_LARGEIO },
466 { XFS_MOUNT_BARRIER, "," MNTOPT_NOBARRIER }, 466 { XFS_MOUNT_BARRIER, "," MNTOPT_NOBARRIER },
467 { XFS_MOUNT_SMALL_INUMS, "," MNTOPT_64BITINODE }, 467 { XFS_MOUNT_SMALL_INUMS, "," MNTOPT_64BITINODE },
@@ -896,7 +896,8 @@ xfs_fs_write_inode(
896 struct inode *inode, 896 struct inode *inode,
897 int sync) 897 int sync)
898{ 898{
899 int error = 0, flags = FLUSH_INODE; 899 int error = 0;
900 int flags = 0;
900 901
901 xfs_itrace_entry(XFS_I(inode)); 902 xfs_itrace_entry(XFS_I(inode));
902 if (sync) { 903 if (sync) {
@@ -934,7 +935,7 @@ xfs_fs_clear_inode(
934 xfs_inactive(ip); 935 xfs_inactive(ip);
935 xfs_iflags_clear(ip, XFS_IMODIFIED); 936 xfs_iflags_clear(ip, XFS_IMODIFIED);
936 if (xfs_reclaim(ip)) 937 if (xfs_reclaim(ip))
937 panic("%s: cannot reclaim 0x%p\n", __FUNCTION__, inode); 938 panic("%s: cannot reclaim 0x%p\n", __func__, inode);
938 } 939 }
939 940
940 ASSERT(XFS_I(inode) == NULL); 941 ASSERT(XFS_I(inode) == NULL);
@@ -1027,8 +1028,7 @@ xfs_sync_worker(
1027 int error; 1028 int error;
1028 1029
1029 if (!(mp->m_flags & XFS_MOUNT_RDONLY)) 1030 if (!(mp->m_flags & XFS_MOUNT_RDONLY))
1030 error = xfs_sync(mp, SYNC_FSDATA | SYNC_BDFLUSH | SYNC_ATTR | 1031 error = xfs_sync(mp, SYNC_FSDATA | SYNC_BDFLUSH | SYNC_ATTR);
1031 SYNC_REFCACHE | SYNC_SUPER);
1032 mp->m_sync_seq++; 1032 mp->m_sync_seq++;
1033 wake_up(&mp->m_wait_single_sync_task); 1033 wake_up(&mp->m_wait_single_sync_task);
1034} 1034}
@@ -1306,7 +1306,7 @@ xfs_fs_fill_super(
1306 void *data, 1306 void *data,
1307 int silent) 1307 int silent)
1308{ 1308{
1309 struct inode *rootvp; 1309 struct inode *root;
1310 struct xfs_mount *mp = NULL; 1310 struct xfs_mount *mp = NULL;
1311 struct xfs_mount_args *args = xfs_args_allocate(sb, silent); 1311 struct xfs_mount_args *args = xfs_args_allocate(sb, silent);
1312 int error; 1312 int error;
@@ -1344,19 +1344,18 @@ xfs_fs_fill_super(
1344 sb->s_time_gran = 1; 1344 sb->s_time_gran = 1;
1345 set_posix_acl_flag(sb); 1345 set_posix_acl_flag(sb);
1346 1346
1347 rootvp = igrab(mp->m_rootip->i_vnode); 1347 root = igrab(mp->m_rootip->i_vnode);
1348 if (!rootvp) { 1348 if (!root) {
1349 error = ENOENT; 1349 error = ENOENT;
1350 goto fail_unmount; 1350 goto fail_unmount;
1351 } 1351 }
1352 1352 if (is_bad_inode(root)) {
1353 sb->s_root = d_alloc_root(vn_to_inode(rootvp)); 1353 error = EINVAL;
1354 if (!sb->s_root) {
1355 error = ENOMEM;
1356 goto fail_vnrele; 1354 goto fail_vnrele;
1357 } 1355 }
1358 if (is_bad_inode(sb->s_root->d_inode)) { 1356 sb->s_root = d_alloc_root(root);
1359 error = EINVAL; 1357 if (!sb->s_root) {
1358 error = ENOMEM;
1360 goto fail_vnrele; 1359 goto fail_vnrele;
1361 } 1360 }
1362 1361
@@ -1378,7 +1377,7 @@ fail_vnrele:
1378 dput(sb->s_root); 1377 dput(sb->s_root);
1379 sb->s_root = NULL; 1378 sb->s_root = NULL;
1380 } else { 1379 } else {
1381 VN_RELE(rootvp); 1380 iput(root);
1382 } 1381 }
1383 1382
1384fail_unmount: 1383fail_unmount:
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h
index 3efcf45b14ab..3efb7c6d3303 100644
--- a/fs/xfs/linux-2.6/xfs_super.h
+++ b/fs/xfs/linux-2.6/xfs_super.h
@@ -50,13 +50,7 @@ extern void xfs_qm_exit(void);
50# define set_posix_acl_flag(sb) do { } while (0) 50# define set_posix_acl_flag(sb) do { } while (0)
51#endif 51#endif
52 52
53#ifdef CONFIG_XFS_SECURITY 53#define XFS_SECURITY_STRING "security attributes, "
54# define XFS_SECURITY_STRING "security attributes, "
55# define ENOSECURITY 0
56#else
57# define XFS_SECURITY_STRING
58# define ENOSECURITY EOPNOTSUPP
59#endif
60 54
61#ifdef CONFIG_XFS_RT 55#ifdef CONFIG_XFS_RT
62# define XFS_REALTIME_STRING "realtime, " 56# define XFS_REALTIME_STRING "realtime, "
diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h
index 4da03a4e3520..7e60c7776b1c 100644
--- a/fs/xfs/linux-2.6/xfs_vfs.h
+++ b/fs/xfs/linux-2.6/xfs_vfs.h
@@ -49,7 +49,6 @@ typedef struct bhv_vfs_sync_work {
49#define SYNC_REFCACHE 0x0040 /* prune some of the nfs ref cache */ 49#define SYNC_REFCACHE 0x0040 /* prune some of the nfs ref cache */
50#define SYNC_REMOUNT 0x0080 /* remount readonly, no dummy LRs */ 50#define SYNC_REMOUNT 0x0080 /* remount readonly, no dummy LRs */
51#define SYNC_IOWAIT 0x0100 /* wait for all I/O to complete */ 51#define SYNC_IOWAIT 0x0100 /* wait for all I/O to complete */
52#define SYNC_SUPER 0x0200 /* flush superblock to disk */
53 52
54/* 53/*
55 * When remounting a filesystem read-only or freezing the filesystem, 54 * When remounting a filesystem read-only or freezing the filesystem,
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index b5ea418693b1..8b4d63ce8694 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -23,8 +23,6 @@ struct bhv_vattr;
23struct xfs_iomap; 23struct xfs_iomap;
24struct attrlist_cursor_kern; 24struct attrlist_cursor_kern;
25 25
26typedef struct dentry bhv_vname_t;
27typedef __u64 bhv_vnumber_t;
28typedef struct inode bhv_vnode_t; 26typedef struct inode bhv_vnode_t;
29 27
30#define VN_ISLNK(vp) S_ISLNK((vp)->i_mode) 28#define VN_ISLNK(vp) S_ISLNK((vp)->i_mode)
@@ -46,18 +44,6 @@ static inline struct inode *vn_to_inode(bhv_vnode_t *vnode)
46} 44}
47 45
48/* 46/*
49 * Values for the vop_rwlock/rwunlock flags parameter.
50 */
51typedef enum bhv_vrwlock {
52 VRWLOCK_NONE,
53 VRWLOCK_READ,
54 VRWLOCK_WRITE,
55 VRWLOCK_WRITE_DIRECT,
56 VRWLOCK_TRY_READ,
57 VRWLOCK_TRY_WRITE
58} bhv_vrwlock_t;
59
60/*
61 * Return values for xfs_inactive. A return value of 47 * Return values for xfs_inactive. A return value of
62 * VN_INACTIVE_NOCACHE implies that the file system behavior 48 * VN_INACTIVE_NOCACHE implies that the file system behavior
63 * has disassociated its state and bhv_desc_t from the vnode. 49 * has disassociated its state and bhv_desc_t from the vnode.
@@ -73,12 +59,9 @@ typedef enum bhv_vrwlock {
73#define IO_INVIS 0x00020 /* don't update inode timestamps */ 59#define IO_INVIS 0x00020 /* don't update inode timestamps */
74 60
75/* 61/*
76 * Flags for vop_iflush call 62 * Flags for xfs_inode_flush
77 */ 63 */
78#define FLUSH_SYNC 1 /* wait for flush to complete */ 64#define FLUSH_SYNC 1 /* wait for flush to complete */
79#define FLUSH_INODE 2 /* flush the inode itself */
80#define FLUSH_LOG 4 /* force the last log entry for
81 * this inode out to disk */
82 65
83/* 66/*
84 * Flush/Invalidate options for vop_toss/flush/flushinval_pages. 67 * Flush/Invalidate options for vop_toss/flush/flushinval_pages.
@@ -226,13 +209,6 @@ static inline bhv_vnode_t *vn_grab(bhv_vnode_t *vp)
226} 209}
227 210
228/* 211/*
229 * Vname handling macros.
230 */
231#define VNAME(dentry) ((char *) (dentry)->d_name.name)
232#define VNAMELEN(dentry) ((dentry)->d_name.len)
233#define VNAME_TO_VNODE(dentry) (vn_from_inode((dentry)->d_inode))
234
235/*
236 * Dealing with bad inodes 212 * Dealing with bad inodes
237 */ 213 */
238static inline int VN_BAD(bhv_vnode_t *vp) 214static inline int VN_BAD(bhv_vnode_t *vp)
@@ -303,9 +279,9 @@ extern void xfs_itrace_hold(struct xfs_inode *, char *, int, inst_t *);
303extern void _xfs_itrace_ref(struct xfs_inode *, char *, int, inst_t *); 279extern void _xfs_itrace_ref(struct xfs_inode *, char *, int, inst_t *);
304extern void xfs_itrace_rele(struct xfs_inode *, char *, int, inst_t *); 280extern void xfs_itrace_rele(struct xfs_inode *, char *, int, inst_t *);
305#define xfs_itrace_entry(ip) \ 281#define xfs_itrace_entry(ip) \
306 _xfs_itrace_entry(ip, __FUNCTION__, (inst_t *)__return_address) 282 _xfs_itrace_entry(ip, __func__, (inst_t *)__return_address)
307#define xfs_itrace_exit(ip) \ 283#define xfs_itrace_exit(ip) \
308 _xfs_itrace_exit(ip, __FUNCTION__, (inst_t *)__return_address) 284 _xfs_itrace_exit(ip, __func__, (inst_t *)__return_address)
309#define xfs_itrace_exit_tag(ip, tag) \ 285#define xfs_itrace_exit_tag(ip, tag) \
310 _xfs_itrace_exit(ip, tag, (inst_t *)__return_address) 286 _xfs_itrace_exit(ip, tag, (inst_t *)__return_address)
311#define xfs_itrace_ref(ip) \ 287#define xfs_itrace_ref(ip) \
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index 665babcca6a6..631ebb31b295 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -1291,7 +1291,7 @@ xfs_qm_dqflush(
1291 if (flags & XFS_QMOPT_DELWRI) { 1291 if (flags & XFS_QMOPT_DELWRI) {
1292 xfs_bdwrite(mp, bp); 1292 xfs_bdwrite(mp, bp);
1293 } else if (flags & XFS_QMOPT_ASYNC) { 1293 } else if (flags & XFS_QMOPT_ASYNC) {
1294 xfs_bawrite(mp, bp); 1294 error = xfs_bawrite(mp, bp);
1295 } else { 1295 } else {
1296 error = xfs_bwrite(mp, bp); 1296 error = xfs_bwrite(mp, bp);
1297 } 1297 }
@@ -1439,9 +1439,7 @@ xfs_qm_dqpurge(
1439 uint flags) 1439 uint flags)
1440{ 1440{
1441 xfs_dqhash_t *thishash; 1441 xfs_dqhash_t *thishash;
1442 xfs_mount_t *mp; 1442 xfs_mount_t *mp = dqp->q_mount;
1443
1444 mp = dqp->q_mount;
1445 1443
1446 ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp)); 1444 ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp));
1447 ASSERT(XFS_DQ_IS_HASH_LOCKED(dqp->q_hash)); 1445 ASSERT(XFS_DQ_IS_HASH_LOCKED(dqp->q_hash));
@@ -1485,6 +1483,7 @@ xfs_qm_dqpurge(
1485 * we're unmounting, we do care, so we flush it and wait. 1483 * we're unmounting, we do care, so we flush it and wait.
1486 */ 1484 */
1487 if (XFS_DQ_IS_DIRTY(dqp)) { 1485 if (XFS_DQ_IS_DIRTY(dqp)) {
1486 int error;
1488 xfs_dqtrace_entry(dqp, "DQPURGE ->DQFLUSH: DQDIRTY"); 1487 xfs_dqtrace_entry(dqp, "DQPURGE ->DQFLUSH: DQDIRTY");
1489 /* dqflush unlocks dqflock */ 1488 /* dqflush unlocks dqflock */
1490 /* 1489 /*
@@ -1495,7 +1494,10 @@ xfs_qm_dqpurge(
1495 * We don't care about getting disk errors here. We need 1494 * We don't care about getting disk errors here. We need
1496 * to purge this dquot anyway, so we go ahead regardless. 1495 * to purge this dquot anyway, so we go ahead regardless.
1497 */ 1496 */
1498 (void) xfs_qm_dqflush(dqp, XFS_QMOPT_SYNC); 1497 error = xfs_qm_dqflush(dqp, XFS_QMOPT_SYNC);
1498 if (error)
1499 xfs_fs_cmn_err(CE_WARN, mp,
1500 "xfs_qm_dqpurge: dquot %p flush failed", dqp);
1499 xfs_dqflock(dqp); 1501 xfs_dqflock(dqp);
1500 } 1502 }
1501 ASSERT(dqp->q_pincount == 0); 1503 ASSERT(dqp->q_pincount == 0);
@@ -1580,12 +1582,18 @@ xfs_qm_dqflock_pushbuf_wait(
1580 XFS_INCORE_TRYLOCK); 1582 XFS_INCORE_TRYLOCK);
1581 if (bp != NULL) { 1583 if (bp != NULL) {
1582 if (XFS_BUF_ISDELAYWRITE(bp)) { 1584 if (XFS_BUF_ISDELAYWRITE(bp)) {
1585 int error;
1583 if (XFS_BUF_ISPINNED(bp)) { 1586 if (XFS_BUF_ISPINNED(bp)) {
1584 xfs_log_force(dqp->q_mount, 1587 xfs_log_force(dqp->q_mount,
1585 (xfs_lsn_t)0, 1588 (xfs_lsn_t)0,
1586 XFS_LOG_FORCE); 1589 XFS_LOG_FORCE);
1587 } 1590 }
1588 xfs_bawrite(dqp->q_mount, bp); 1591 error = xfs_bawrite(dqp->q_mount, bp);
1592 if (error)
1593 xfs_fs_cmn_err(CE_WARN, dqp->q_mount,
1594 "xfs_qm_dqflock_pushbuf_wait: "
1595 "pushbuf error %d on dqp %p, bp %p",
1596 error, dqp, bp);
1589 } else { 1597 } else {
1590 xfs_buf_relse(bp); 1598 xfs_buf_relse(bp);
1591 } 1599 }
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index 1800e8d1f646..36e05ca78412 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -146,6 +146,7 @@ xfs_qm_dquot_logitem_push(
146 xfs_dq_logitem_t *logitem) 146 xfs_dq_logitem_t *logitem)
147{ 147{
148 xfs_dquot_t *dqp; 148 xfs_dquot_t *dqp;
149 int error;
149 150
150 dqp = logitem->qli_dquot; 151 dqp = logitem->qli_dquot;
151 152
@@ -161,7 +162,11 @@ xfs_qm_dquot_logitem_push(
161 * lock without sleeping, then there must not have been 162 * lock without sleeping, then there must not have been
162 * anyone in the process of flushing the dquot. 163 * anyone in the process of flushing the dquot.
163 */ 164 */
164 xfs_qm_dqflush(dqp, XFS_B_DELWRI); 165 error = xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI);
166 if (error)
167 xfs_fs_cmn_err(CE_WARN, dqp->q_mount,
168 "xfs_qm_dquot_logitem_push: push error %d on dqp %p",
169 error, dqp);
165 xfs_dqunlock(dqp); 170 xfs_dqunlock(dqp);
166} 171}
167 172
@@ -262,11 +267,16 @@ xfs_qm_dquot_logitem_pushbuf(
262 XFS_LOG_FORCE); 267 XFS_LOG_FORCE);
263 } 268 }
264 if (dopush) { 269 if (dopush) {
270 int error;
265#ifdef XFSRACEDEBUG 271#ifdef XFSRACEDEBUG
266 delay_for_intr(); 272 delay_for_intr();
267 delay(300); 273 delay(300);
268#endif 274#endif
269 xfs_bawrite(mp, bp); 275 error = xfs_bawrite(mp, bp);
276 if (error)
277 xfs_fs_cmn_err(CE_WARN, mp,
278 "xfs_qm_dquot_logitem_pushbuf: pushbuf error %d on qip %p, bp %p",
279 error, qip, bp);
270 } else { 280 } else {
271 xfs_buf_relse(bp); 281 xfs_buf_relse(bp);
272 } 282 }
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 1f3da5b8657b..40ea56409561 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -304,8 +304,11 @@ xfs_qm_unmount_quotadestroy(
304 * necessary data structures like quotainfo. This is also responsible for 304 * necessary data structures like quotainfo. This is also responsible for
305 * running a quotacheck as necessary. We are guaranteed that the superblock 305 * running a quotacheck as necessary. We are guaranteed that the superblock
306 * is consistently read in at this point. 306 * is consistently read in at this point.
307 *
308 * If we fail here, the mount will continue with quota turned off. We don't
309 * need to inidicate success or failure at all.
307 */ 310 */
308int 311void
309xfs_qm_mount_quotas( 312xfs_qm_mount_quotas(
310 xfs_mount_t *mp, 313 xfs_mount_t *mp,
311 int mfsi_flags) 314 int mfsi_flags)
@@ -313,7 +316,6 @@ xfs_qm_mount_quotas(
313 int error = 0; 316 int error = 0;
314 uint sbf; 317 uint sbf;
315 318
316
317 /* 319 /*
318 * If quotas on realtime volumes is not supported, we disable 320 * If quotas on realtime volumes is not supported, we disable
319 * quotas immediately. 321 * quotas immediately.
@@ -332,7 +334,8 @@ xfs_qm_mount_quotas(
332 * Allocate the quotainfo structure inside the mount struct, and 334 * Allocate the quotainfo structure inside the mount struct, and
333 * create quotainode(s), and change/rev superblock if necessary. 335 * create quotainode(s), and change/rev superblock if necessary.
334 */ 336 */
335 if ((error = xfs_qm_init_quotainfo(mp))) { 337 error = xfs_qm_init_quotainfo(mp);
338 if (error) {
336 /* 339 /*
337 * We must turn off quotas. 340 * We must turn off quotas.
338 */ 341 */
@@ -344,12 +347,11 @@ xfs_qm_mount_quotas(
344 * If any of the quotas are not consistent, do a quotacheck. 347 * If any of the quotas are not consistent, do a quotacheck.
345 */ 348 */
346 if (XFS_QM_NEED_QUOTACHECK(mp) && 349 if (XFS_QM_NEED_QUOTACHECK(mp) &&
347 !(mfsi_flags & XFS_MFSI_NO_QUOTACHECK)) { 350 !(mfsi_flags & XFS_MFSI_NO_QUOTACHECK)) {
348 if ((error = xfs_qm_quotacheck(mp))) { 351 error = xfs_qm_quotacheck(mp);
349 /* Quotacheck has failed and quotas have 352 if (error) {
350 * been disabled. 353 /* Quotacheck failed and disabled quotas. */
351 */ 354 return;
352 return XFS_ERROR(error);
353 } 355 }
354 } 356 }
355 /* 357 /*
@@ -357,12 +359,10 @@ xfs_qm_mount_quotas(
357 * quotachecked status, since we won't be doing accounting for 359 * quotachecked status, since we won't be doing accounting for
358 * that type anymore. 360 * that type anymore.
359 */ 361 */
360 if (!XFS_IS_UQUOTA_ON(mp)) { 362 if (!XFS_IS_UQUOTA_ON(mp))
361 mp->m_qflags &= ~XFS_UQUOTA_CHKD; 363 mp->m_qflags &= ~XFS_UQUOTA_CHKD;
362 } 364 if (!(XFS_IS_GQUOTA_ON(mp) || XFS_IS_PQUOTA_ON(mp)))
363 if (!(XFS_IS_GQUOTA_ON(mp) || XFS_IS_PQUOTA_ON(mp))) {
364 mp->m_qflags &= ~XFS_OQUOTA_CHKD; 365 mp->m_qflags &= ~XFS_OQUOTA_CHKD;
365 }
366 366
367 write_changes: 367 write_changes:
368 /* 368 /*
@@ -392,7 +392,7 @@ xfs_qm_mount_quotas(
392 xfs_fs_cmn_err(CE_WARN, mp, 392 xfs_fs_cmn_err(CE_WARN, mp,
393 "Failed to initialize disk quotas."); 393 "Failed to initialize disk quotas.");
394 } 394 }
395 return XFS_ERROR(error); 395 return;
396} 396}
397 397
398/* 398/*
@@ -1405,13 +1405,13 @@ xfs_qm_qino_alloc(
1405#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY) 1405#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
1406 unsigned oldv = mp->m_sb.sb_versionnum; 1406 unsigned oldv = mp->m_sb.sb_versionnum;
1407#endif 1407#endif
1408 ASSERT(!XFS_SB_VERSION_HASQUOTA(&mp->m_sb)); 1408 ASSERT(!xfs_sb_version_hasquota(&mp->m_sb));
1409 ASSERT((sbfields & (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | 1409 ASSERT((sbfields & (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
1410 XFS_SB_GQUOTINO | XFS_SB_QFLAGS)) == 1410 XFS_SB_GQUOTINO | XFS_SB_QFLAGS)) ==
1411 (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | 1411 (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
1412 XFS_SB_GQUOTINO | XFS_SB_QFLAGS)); 1412 XFS_SB_GQUOTINO | XFS_SB_QFLAGS));
1413 1413
1414 XFS_SB_VERSION_ADDQUOTA(&mp->m_sb); 1414 xfs_sb_version_addquota(&mp->m_sb);
1415 mp->m_sb.sb_uquotino = NULLFSINO; 1415 mp->m_sb.sb_uquotino = NULLFSINO;
1416 mp->m_sb.sb_gquotino = NULLFSINO; 1416 mp->m_sb.sb_gquotino = NULLFSINO;
1417 1417
@@ -1438,7 +1438,7 @@ xfs_qm_qino_alloc(
1438} 1438}
1439 1439
1440 1440
1441STATIC int 1441STATIC void
1442xfs_qm_reset_dqcounts( 1442xfs_qm_reset_dqcounts(
1443 xfs_mount_t *mp, 1443 xfs_mount_t *mp,
1444 xfs_buf_t *bp, 1444 xfs_buf_t *bp,
@@ -1478,8 +1478,6 @@ xfs_qm_reset_dqcounts(
1478 ddq->d_rtbwarns = 0; 1478 ddq->d_rtbwarns = 0;
1479 ddq = (xfs_disk_dquot_t *) ((xfs_dqblk_t *)ddq + 1); 1479 ddq = (xfs_disk_dquot_t *) ((xfs_dqblk_t *)ddq + 1);
1480 } 1480 }
1481
1482 return 0;
1483} 1481}
1484 1482
1485STATIC int 1483STATIC int
@@ -1520,7 +1518,7 @@ xfs_qm_dqiter_bufs(
1520 if (error) 1518 if (error)
1521 break; 1519 break;
1522 1520
1523 (void) xfs_qm_reset_dqcounts(mp, bp, firstid, type); 1521 xfs_qm_reset_dqcounts(mp, bp, firstid, type);
1524 xfs_bdwrite(mp, bp); 1522 xfs_bdwrite(mp, bp);
1525 /* 1523 /*
1526 * goto the next block. 1524 * goto the next block.
@@ -1810,7 +1808,7 @@ xfs_qm_dqusage_adjust(
1810 * Now release the inode. This will send it to 'inactive', and 1808 * Now release the inode. This will send it to 'inactive', and
1811 * possibly even free blocks. 1809 * possibly even free blocks.
1812 */ 1810 */
1813 VN_RELE(XFS_ITOV(ip)); 1811 IRELE(ip);
1814 1812
1815 /* 1813 /*
1816 * Goto next inode. 1814 * Goto next inode.
@@ -1880,6 +1878,14 @@ xfs_qm_quotacheck(
1880 } while (! done); 1878 } while (! done);
1881 1879
1882 /* 1880 /*
1881 * We've made all the changes that we need to make incore.
1882 * Flush them down to disk buffers if everything was updated
1883 * successfully.
1884 */
1885 if (!error)
1886 error = xfs_qm_dqflush_all(mp, XFS_QMOPT_DELWRI);
1887
1888 /*
1883 * We can get this error if we couldn't do a dquot allocation inside 1889 * We can get this error if we couldn't do a dquot allocation inside
1884 * xfs_qm_dqusage_adjust (via bulkstat). We don't care about the 1890 * xfs_qm_dqusage_adjust (via bulkstat). We don't care about the
1885 * dirty dquots that might be cached, we just want to get rid of them 1891 * dirty dquots that might be cached, we just want to get rid of them
@@ -1890,11 +1896,6 @@ xfs_qm_quotacheck(
1890 xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_QUOTAOFF); 1896 xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_QUOTAOFF);
1891 goto error_return; 1897 goto error_return;
1892 } 1898 }
1893 /*
1894 * We've made all the changes that we need to make incore.
1895 * Now flush_them down to disk buffers.
1896 */
1897 xfs_qm_dqflush_all(mp, XFS_QMOPT_DELWRI);
1898 1899
1899 /* 1900 /*
1900 * We didn't log anything, because if we crashed, we'll have to 1901 * We didn't log anything, because if we crashed, we'll have to
@@ -1926,7 +1927,10 @@ xfs_qm_quotacheck(
1926 ASSERT(mp->m_quotainfo != NULL); 1927 ASSERT(mp->m_quotainfo != NULL);
1927 ASSERT(xfs_Gqm != NULL); 1928 ASSERT(xfs_Gqm != NULL);
1928 xfs_qm_destroy_quotainfo(mp); 1929 xfs_qm_destroy_quotainfo(mp);
1929 (void)xfs_mount_reset_sbqflags(mp); 1930 if (xfs_mount_reset_sbqflags(mp)) {
1931 cmn_err(CE_WARN, "XFS quotacheck %s: "
1932 "Failed to reset quota flags.", mp->m_fsname);
1933 }
1930 } else { 1934 } else {
1931 cmn_err(CE_NOTE, "XFS quotacheck %s: Done.", mp->m_fsname); 1935 cmn_err(CE_NOTE, "XFS quotacheck %s: Done.", mp->m_fsname);
1932 } 1936 }
@@ -1954,7 +1958,7 @@ xfs_qm_init_quotainos(
1954 /* 1958 /*
1955 * Get the uquota and gquota inodes 1959 * Get the uquota and gquota inodes
1956 */ 1960 */
1957 if (XFS_SB_VERSION_HASQUOTA(&mp->m_sb)) { 1961 if (xfs_sb_version_hasquota(&mp->m_sb)) {
1958 if (XFS_IS_UQUOTA_ON(mp) && 1962 if (XFS_IS_UQUOTA_ON(mp) &&
1959 mp->m_sb.sb_uquotino != NULLFSINO) { 1963 mp->m_sb.sb_uquotino != NULLFSINO) {
1960 ASSERT(mp->m_sb.sb_uquotino > 0); 1964 ASSERT(mp->m_sb.sb_uquotino > 0);
@@ -1968,7 +1972,7 @@ xfs_qm_init_quotainos(
1968 if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 1972 if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
1969 0, 0, &gip, 0))) { 1973 0, 0, &gip, 0))) {
1970 if (uip) 1974 if (uip)
1971 VN_RELE(XFS_ITOV(uip)); 1975 IRELE(uip);
1972 return XFS_ERROR(error); 1976 return XFS_ERROR(error);
1973 } 1977 }
1974 } 1978 }
@@ -1999,7 +2003,7 @@ xfs_qm_init_quotainos(
1999 sbflags | XFS_SB_GQUOTINO, flags); 2003 sbflags | XFS_SB_GQUOTINO, flags);
2000 if (error) { 2004 if (error) {
2001 if (uip) 2005 if (uip)
2002 VN_RELE(XFS_ITOV(uip)); 2006 IRELE(uip);
2003 2007
2004 return XFS_ERROR(error); 2008 return XFS_ERROR(error);
2005 } 2009 }
@@ -2093,12 +2097,17 @@ xfs_qm_shake_freelist(
2093 * dirty dquots. 2097 * dirty dquots.
2094 */ 2098 */
2095 if (XFS_DQ_IS_DIRTY(dqp)) { 2099 if (XFS_DQ_IS_DIRTY(dqp)) {
2100 int error;
2096 xfs_dqtrace_entry(dqp, "DQSHAKE: DQDIRTY"); 2101 xfs_dqtrace_entry(dqp, "DQSHAKE: DQDIRTY");
2097 /* 2102 /*
2098 * We flush it delayed write, so don't bother 2103 * We flush it delayed write, so don't bother
2099 * releasing the mplock. 2104 * releasing the mplock.
2100 */ 2105 */
2101 (void) xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI); 2106 error = xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI);
2107 if (error) {
2108 xfs_fs_cmn_err(CE_WARN, dqp->q_mount,
2109 "xfs_qm_dqflush_all: dquot %p flush failed", dqp);
2110 }
2102 xfs_dqunlock(dqp); /* dqflush unlocks dqflock */ 2111 xfs_dqunlock(dqp); /* dqflush unlocks dqflock */
2103 dqp = dqp->dq_flnext; 2112 dqp = dqp->dq_flnext;
2104 continue; 2113 continue;
@@ -2265,12 +2274,17 @@ xfs_qm_dqreclaim_one(void)
2265 * dirty dquots. 2274 * dirty dquots.
2266 */ 2275 */
2267 if (XFS_DQ_IS_DIRTY(dqp)) { 2276 if (XFS_DQ_IS_DIRTY(dqp)) {
2277 int error;
2268 xfs_dqtrace_entry(dqp, "DQRECLAIM: DQDIRTY"); 2278 xfs_dqtrace_entry(dqp, "DQRECLAIM: DQDIRTY");
2269 /* 2279 /*
2270 * We flush it delayed write, so don't bother 2280 * We flush it delayed write, so don't bother
2271 * releasing the freelist lock. 2281 * releasing the freelist lock.
2272 */ 2282 */
2273 (void) xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI); 2283 error = xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI);
2284 if (error) {
2285 xfs_fs_cmn_err(CE_WARN, dqp->q_mount,
2286 "xfs_qm_dqreclaim: dquot %p flush failed", dqp);
2287 }
2274 xfs_dqunlock(dqp); /* dqflush unlocks dqflock */ 2288 xfs_dqunlock(dqp); /* dqflush unlocks dqflock */
2275 continue; 2289 continue;
2276 } 2290 }
@@ -2378,9 +2392,9 @@ xfs_qm_write_sb_changes(
2378 } 2392 }
2379 2393
2380 xfs_mod_sb(tp, flags); 2394 xfs_mod_sb(tp, flags);
2381 (void) xfs_trans_commit(tp, 0); 2395 error = xfs_trans_commit(tp, 0);
2382 2396
2383 return 0; 2397 return error;
2384} 2398}
2385 2399
2386 2400
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h
index baf537c1c177..cd2300e374af 100644
--- a/fs/xfs/quota/xfs_qm.h
+++ b/fs/xfs/quota/xfs_qm.h
@@ -165,7 +165,7 @@ typedef struct xfs_dquot_acct {
165#define XFS_QM_RELE(xqm) ((xqm)->qm_nrefs--) 165#define XFS_QM_RELE(xqm) ((xqm)->qm_nrefs--)
166 166
167extern void xfs_qm_destroy_quotainfo(xfs_mount_t *); 167extern void xfs_qm_destroy_quotainfo(xfs_mount_t *);
168extern int xfs_qm_mount_quotas(xfs_mount_t *, int); 168extern void xfs_qm_mount_quotas(xfs_mount_t *, int);
169extern int xfs_qm_quotacheck(xfs_mount_t *); 169extern int xfs_qm_quotacheck(xfs_mount_t *);
170extern void xfs_qm_unmount_quotadestroy(xfs_mount_t *); 170extern void xfs_qm_unmount_quotadestroy(xfs_mount_t *);
171extern int xfs_qm_unmount_quotas(xfs_mount_t *); 171extern int xfs_qm_unmount_quotas(xfs_mount_t *);
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
index 97bb32937585..f4f6c4c861d7 100644
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ b/fs/xfs/quota/xfs_qm_bhv.c
@@ -118,7 +118,7 @@ xfs_qm_newmount(
118 *quotaflags = 0; 118 *quotaflags = 0;
119 *needquotamount = B_FALSE; 119 *needquotamount = B_FALSE;
120 120
121 quotaondisk = XFS_SB_VERSION_HASQUOTA(&mp->m_sb) && 121 quotaondisk = xfs_sb_version_hasquota(&mp->m_sb) &&
122 (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT); 122 (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT);
123 123
124 if (quotaondisk) { 124 if (quotaondisk) {
diff --git a/fs/xfs/quota/xfs_qm_stats.h b/fs/xfs/quota/xfs_qm_stats.h
index a50ffabcf554..5b964fc0dc09 100644
--- a/fs/xfs/quota/xfs_qm_stats.h
+++ b/fs/xfs/quota/xfs_qm_stats.h
@@ -45,8 +45,8 @@ extern void xfs_qm_cleanup_procfs(void);
45 45
46# define XQM_STATS_INC(count) do { } while (0) 46# define XQM_STATS_INC(count) do { } while (0)
47 47
48static __inline void xfs_qm_init_procfs(void) { }; 48static inline void xfs_qm_init_procfs(void) { };
49static __inline void xfs_qm_cleanup_procfs(void) { }; 49static inline void xfs_qm_cleanup_procfs(void) { };
50 50
51#endif 51#endif
52 52
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 2cc5886cfe85..8342823dbdc3 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -279,9 +279,12 @@ xfs_qm_scall_quotaoff(
279 279
280 /* 280 /*
281 * Write the LI_QUOTAOFF log record, and do SB changes atomically, 281 * Write the LI_QUOTAOFF log record, and do SB changes atomically,
282 * and synchronously. 282 * and synchronously. If we fail to write, we should abort the
283 * operation as it cannot be recovered safely if we crash.
283 */ 284 */
284 xfs_qm_log_quotaoff(mp, &qoffstart, flags); 285 error = xfs_qm_log_quotaoff(mp, &qoffstart, flags);
286 if (error)
287 goto out_error;
285 288
286 /* 289 /*
287 * Next we clear the XFS_MOUNT_*DQ_ACTIVE bit(s) in the mount struct 290 * Next we clear the XFS_MOUNT_*DQ_ACTIVE bit(s) in the mount struct
@@ -337,7 +340,12 @@ xfs_qm_scall_quotaoff(
337 * So, we have QUOTAOFF start and end logitems; the start 340 * So, we have QUOTAOFF start and end logitems; the start
338 * logitem won't get overwritten until the end logitem appears... 341 * logitem won't get overwritten until the end logitem appears...
339 */ 342 */
340 xfs_qm_log_quotaoff_end(mp, qoffstart, flags); 343 error = xfs_qm_log_quotaoff_end(mp, qoffstart, flags);
344 if (error) {
345 /* We're screwed now. Shutdown is the only option. */
346 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
347 goto out_error;
348 }
341 349
342 /* 350 /*
343 * If quotas is completely disabled, close shop. 351 * If quotas is completely disabled, close shop.
@@ -361,6 +369,7 @@ xfs_qm_scall_quotaoff(
361 XFS_PURGE_INODE(XFS_QI_GQIP(mp)); 369 XFS_PURGE_INODE(XFS_QI_GQIP(mp));
362 XFS_QI_GQIP(mp) = NULL; 370 XFS_QI_GQIP(mp) = NULL;
363 } 371 }
372out_error:
364 mutex_unlock(&(XFS_QI_QOFFLOCK(mp))); 373 mutex_unlock(&(XFS_QI_QOFFLOCK(mp)));
365 374
366 return (error); 375 return (error);
@@ -371,35 +380,34 @@ xfs_qm_scall_trunc_qfiles(
371 xfs_mount_t *mp, 380 xfs_mount_t *mp,
372 uint flags) 381 uint flags)
373{ 382{
374 int error; 383 int error = 0, error2 = 0;
375 xfs_inode_t *qip; 384 xfs_inode_t *qip;
376 385
377 if (!capable(CAP_SYS_ADMIN)) 386 if (!capable(CAP_SYS_ADMIN))
378 return XFS_ERROR(EPERM); 387 return XFS_ERROR(EPERM);
379 error = 0; 388 if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) {
380 if (!XFS_SB_VERSION_HASQUOTA(&mp->m_sb) || flags == 0) {
381 qdprintk("qtrunc flags=%x m_qflags=%x\n", flags, mp->m_qflags); 389 qdprintk("qtrunc flags=%x m_qflags=%x\n", flags, mp->m_qflags);
382 return XFS_ERROR(EINVAL); 390 return XFS_ERROR(EINVAL);
383 } 391 }
384 392
385 if ((flags & XFS_DQ_USER) && mp->m_sb.sb_uquotino != NULLFSINO) { 393 if ((flags & XFS_DQ_USER) && mp->m_sb.sb_uquotino != NULLFSINO) {
386 error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 0, 0, &qip, 0); 394 error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 0, 0, &qip, 0);
387 if (! error) { 395 if (!error) {
388 (void) xfs_truncate_file(mp, qip); 396 error = xfs_truncate_file(mp, qip);
389 VN_RELE(XFS_ITOV(qip)); 397 IRELE(qip);
390 } 398 }
391 } 399 }
392 400
393 if ((flags & (XFS_DQ_GROUP|XFS_DQ_PROJ)) && 401 if ((flags & (XFS_DQ_GROUP|XFS_DQ_PROJ)) &&
394 mp->m_sb.sb_gquotino != NULLFSINO) { 402 mp->m_sb.sb_gquotino != NULLFSINO) {
395 error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 0, 0, &qip, 0); 403 error2 = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 0, 0, &qip, 0);
396 if (! error) { 404 if (!error2) {
397 (void) xfs_truncate_file(mp, qip); 405 error2 = xfs_truncate_file(mp, qip);
398 VN_RELE(XFS_ITOV(qip)); 406 IRELE(qip);
399 } 407 }
400 } 408 }
401 409
402 return (error); 410 return error ? error : error2;
403} 411}
404 412
405 413
@@ -522,7 +530,7 @@ xfs_qm_scall_getqstat(
522 memset(out, 0, sizeof(fs_quota_stat_t)); 530 memset(out, 0, sizeof(fs_quota_stat_t));
523 531
524 out->qs_version = FS_QSTAT_VERSION; 532 out->qs_version = FS_QSTAT_VERSION;
525 if (! XFS_SB_VERSION_HASQUOTA(&mp->m_sb)) { 533 if (!xfs_sb_version_hasquota(&mp->m_sb)) {
526 out->qs_uquota.qfs_ino = NULLFSINO; 534 out->qs_uquota.qfs_ino = NULLFSINO;
527 out->qs_gquota.qfs_ino = NULLFSINO; 535 out->qs_gquota.qfs_ino = NULLFSINO;
528 return (0); 536 return (0);
@@ -552,13 +560,13 @@ xfs_qm_scall_getqstat(
552 out->qs_uquota.qfs_nblks = uip->i_d.di_nblocks; 560 out->qs_uquota.qfs_nblks = uip->i_d.di_nblocks;
553 out->qs_uquota.qfs_nextents = uip->i_d.di_nextents; 561 out->qs_uquota.qfs_nextents = uip->i_d.di_nextents;
554 if (tempuqip) 562 if (tempuqip)
555 VN_RELE(XFS_ITOV(uip)); 563 IRELE(uip);
556 } 564 }
557 if (gip) { 565 if (gip) {
558 out->qs_gquota.qfs_nblks = gip->i_d.di_nblocks; 566 out->qs_gquota.qfs_nblks = gip->i_d.di_nblocks;
559 out->qs_gquota.qfs_nextents = gip->i_d.di_nextents; 567 out->qs_gquota.qfs_nextents = gip->i_d.di_nextents;
560 if (tempgqip) 568 if (tempgqip)
561 VN_RELE(XFS_ITOV(gip)); 569 IRELE(gip);
562 } 570 }
563 if (mp->m_quotainfo) { 571 if (mp->m_quotainfo) {
564 out->qs_incoredqs = XFS_QI_MPLNDQUOTS(mp); 572 out->qs_incoredqs = XFS_QI_MPLNDQUOTS(mp);
@@ -726,12 +734,12 @@ xfs_qm_scall_setqlim(
726 xfs_trans_log_dquot(tp, dqp); 734 xfs_trans_log_dquot(tp, dqp);
727 735
728 xfs_dqtrace_entry(dqp, "Q_SETQLIM: COMMIT"); 736 xfs_dqtrace_entry(dqp, "Q_SETQLIM: COMMIT");
729 xfs_trans_commit(tp, 0); 737 error = xfs_trans_commit(tp, 0);
730 xfs_qm_dqprint(dqp); 738 xfs_qm_dqprint(dqp);
731 xfs_qm_dqrele(dqp); 739 xfs_qm_dqrele(dqp);
732 mutex_unlock(&(XFS_QI_QOFFLOCK(mp))); 740 mutex_unlock(&(XFS_QI_QOFFLOCK(mp)));
733 741
734 return (0); 742 return error;
735} 743}
736 744
737STATIC int 745STATIC int
@@ -1095,7 +1103,7 @@ again:
1095 * inactive code in hell. 1103 * inactive code in hell.
1096 */ 1104 */
1097 if (vnode_refd) 1105 if (vnode_refd)
1098 VN_RELE(vp); 1106 IRELE(ip);
1099 XFS_MOUNT_ILOCK(mp); 1107 XFS_MOUNT_ILOCK(mp);
1100 /* 1108 /*
1101 * If an inode was inserted or removed, we gotta 1109 * If an inode was inserted or removed, we gotta
diff --git a/fs/xfs/support/ktrace.c b/fs/xfs/support/ktrace.c
index 129067cfcb86..0b75d302508f 100644
--- a/fs/xfs/support/ktrace.c
+++ b/fs/xfs/support/ktrace.c
@@ -24,7 +24,7 @@ static int ktrace_zentries;
24void __init 24void __init
25ktrace_init(int zentries) 25ktrace_init(int zentries)
26{ 26{
27 ktrace_zentries = zentries; 27 ktrace_zentries = roundup_pow_of_two(zentries);
28 28
29 ktrace_hdr_zone = kmem_zone_init(sizeof(ktrace_t), 29 ktrace_hdr_zone = kmem_zone_init(sizeof(ktrace_t),
30 "ktrace_hdr"); 30 "ktrace_hdr");
@@ -47,13 +47,16 @@ ktrace_uninit(void)
47 * ktrace_alloc() 47 * ktrace_alloc()
48 * 48 *
49 * Allocate a ktrace header and enough buffering for the given 49 * Allocate a ktrace header and enough buffering for the given
50 * number of entries. 50 * number of entries. Round the number of entries up to a
51 * power of 2 so we can do fast masking to get the index from
52 * the atomic index counter.
51 */ 53 */
52ktrace_t * 54ktrace_t *
53ktrace_alloc(int nentries, unsigned int __nocast sleep) 55ktrace_alloc(int nentries, unsigned int __nocast sleep)
54{ 56{
55 ktrace_t *ktp; 57 ktrace_t *ktp;
56 ktrace_entry_t *ktep; 58 ktrace_entry_t *ktep;
59 int entries;
57 60
58 ktp = (ktrace_t*)kmem_zone_alloc(ktrace_hdr_zone, sleep); 61 ktp = (ktrace_t*)kmem_zone_alloc(ktrace_hdr_zone, sleep);
59 62
@@ -70,11 +73,12 @@ ktrace_alloc(int nentries, unsigned int __nocast sleep)
70 /* 73 /*
71 * Special treatment for buffers with the ktrace_zentries entries 74 * Special treatment for buffers with the ktrace_zentries entries
72 */ 75 */
73 if (nentries == ktrace_zentries) { 76 entries = roundup_pow_of_two(nentries);
77 if (entries == ktrace_zentries) {
74 ktep = (ktrace_entry_t*)kmem_zone_zalloc(ktrace_ent_zone, 78 ktep = (ktrace_entry_t*)kmem_zone_zalloc(ktrace_ent_zone,
75 sleep); 79 sleep);
76 } else { 80 } else {
77 ktep = (ktrace_entry_t*)kmem_zalloc((nentries * sizeof(*ktep)), 81 ktep = (ktrace_entry_t*)kmem_zalloc((entries * sizeof(*ktep)),
78 sleep | KM_LARGE); 82 sleep | KM_LARGE);
79 } 83 }
80 84
@@ -91,8 +95,10 @@ ktrace_alloc(int nentries, unsigned int __nocast sleep)
91 } 95 }
92 96
93 ktp->kt_entries = ktep; 97 ktp->kt_entries = ktep;
94 ktp->kt_nentries = nentries; 98 ktp->kt_nentries = entries;
95 ktp->kt_index = 0; 99 ASSERT(is_power_of_2(entries));
100 ktp->kt_index_mask = entries - 1;
101 atomic_set(&ktp->kt_index, 0);
96 ktp->kt_rollover = 0; 102 ktp->kt_rollover = 0;
97 return ktp; 103 return ktp;
98} 104}
@@ -151,8 +157,6 @@ ktrace_enter(
151 void *val14, 157 void *val14,
152 void *val15) 158 void *val15)
153{ 159{
154 static DEFINE_SPINLOCK(wrap_lock);
155 unsigned long flags;
156 int index; 160 int index;
157 ktrace_entry_t *ktep; 161 ktrace_entry_t *ktep;
158 162
@@ -161,12 +165,8 @@ ktrace_enter(
161 /* 165 /*
162 * Grab an entry by pushing the index up to the next one. 166 * Grab an entry by pushing the index up to the next one.
163 */ 167 */
164 spin_lock_irqsave(&wrap_lock, flags); 168 index = atomic_add_return(1, &ktp->kt_index);
165 index = ktp->kt_index; 169 index = (index - 1) & ktp->kt_index_mask;
166 if (++ktp->kt_index == ktp->kt_nentries)
167 ktp->kt_index = 0;
168 spin_unlock_irqrestore(&wrap_lock, flags);
169
170 if (!ktp->kt_rollover && index == ktp->kt_nentries - 1) 170 if (!ktp->kt_rollover && index == ktp->kt_nentries - 1)
171 ktp->kt_rollover = 1; 171 ktp->kt_rollover = 1;
172 172
@@ -199,11 +199,12 @@ int
199ktrace_nentries( 199ktrace_nentries(
200 ktrace_t *ktp) 200 ktrace_t *ktp)
201{ 201{
202 if (ktp == NULL) { 202 int index;
203 if (ktp == NULL)
203 return 0; 204 return 0;
204 }
205 205
206 return (ktp->kt_rollover ? ktp->kt_nentries : ktp->kt_index); 206 index = atomic_read(&ktp->kt_index) & ktp->kt_index_mask;
207 return (ktp->kt_rollover ? ktp->kt_nentries : index);
207} 208}
208 209
209/* 210/*
@@ -228,7 +229,7 @@ ktrace_first(ktrace_t *ktp, ktrace_snap_t *ktsp)
228 int nentries; 229 int nentries;
229 230
230 if (ktp->kt_rollover) 231 if (ktp->kt_rollover)
231 index = ktp->kt_index; 232 index = atomic_read(&ktp->kt_index) & ktp->kt_index_mask;
232 else 233 else
233 index = 0; 234 index = 0;
234 235
diff --git a/fs/xfs/support/ktrace.h b/fs/xfs/support/ktrace.h
index 56e72b40a859..741d6947ca60 100644
--- a/fs/xfs/support/ktrace.h
+++ b/fs/xfs/support/ktrace.h
@@ -30,7 +30,8 @@ typedef struct ktrace_entry {
30 */ 30 */
31typedef struct ktrace { 31typedef struct ktrace {
32 int kt_nentries; /* number of entries in trace buf */ 32 int kt_nentries; /* number of entries in trace buf */
33 int kt_index; /* current index in entries */ 33 atomic_t kt_index; /* current index in entries */
34 unsigned int kt_index_mask;
34 int kt_rollover; 35 int kt_rollover;
35 ktrace_entry_t *kt_entries; /* buffer of entries */ 36 ktrace_entry_t *kt_entries; /* buffer of entries */
36} ktrace_t; 37} ktrace_t;
diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h
index 540e4c989825..765aaf65e2d3 100644
--- a/fs/xfs/xfs.h
+++ b/fs/xfs/xfs.h
@@ -22,7 +22,7 @@
22#define STATIC 22#define STATIC
23#define DEBUG 1 23#define DEBUG 1
24#define XFS_BUF_LOCK_TRACKING 1 24#define XFS_BUF_LOCK_TRACKING 1
25/* #define QUOTADEBUG 1 */ 25#define QUOTADEBUG 1
26#endif 26#endif
27 27
28#ifdef CONFIG_XFS_TRACE 28#ifdef CONFIG_XFS_TRACE
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 7272fe39a92d..8e130b9720ae 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -307,12 +307,13 @@ xfs_acl_vset(
307 307
308 VN_HOLD(vp); 308 VN_HOLD(vp);
309 error = xfs_acl_allow_set(vp, kind); 309 error = xfs_acl_allow_set(vp, kind);
310 if (error)
311 goto out;
312 310
313 /* Incoming ACL exists, set file mode based on its value */ 311 /* Incoming ACL exists, set file mode based on its value */
314 if (kind == _ACL_TYPE_ACCESS) 312 if (!error && kind == _ACL_TYPE_ACCESS)
315 xfs_acl_setmode(vp, xfs_acl, &basicperms); 313 error = xfs_acl_setmode(vp, xfs_acl, &basicperms);
314
315 if (error)
316 goto out;
316 317
317 /* 318 /*
318 * If we have more than std unix permissions, set up the actual attr. 319 * If we have more than std unix permissions, set up the actual attr.
@@ -323,7 +324,7 @@ xfs_acl_vset(
323 if (!basicperms) { 324 if (!basicperms) {
324 xfs_acl_set_attr(vp, xfs_acl, kind, &error); 325 xfs_acl_set_attr(vp, xfs_acl, kind, &error);
325 } else { 326 } else {
326 xfs_acl_vremove(vp, _ACL_TYPE_ACCESS); 327 error = -xfs_acl_vremove(vp, _ACL_TYPE_ACCESS);
327 } 328 }
328 329
329out: 330out:
@@ -707,7 +708,9 @@ xfs_acl_inherit(
707 708
708 memcpy(cacl, pdaclp, sizeof(xfs_acl_t)); 709 memcpy(cacl, pdaclp, sizeof(xfs_acl_t));
709 xfs_acl_filter_mode(mode, cacl); 710 xfs_acl_filter_mode(mode, cacl);
710 xfs_acl_setmode(vp, cacl, &basicperms); 711 error = xfs_acl_setmode(vp, cacl, &basicperms);
712 if (error)
713 goto out_error;
711 714
712 /* 715 /*
713 * Set the Default and Access ACL on the file. The mode is already 716 * Set the Default and Access ACL on the file. The mode is already
@@ -720,6 +723,7 @@ xfs_acl_inherit(
720 xfs_acl_set_attr(vp, pdaclp, _ACL_TYPE_DEFAULT, &error); 723 xfs_acl_set_attr(vp, pdaclp, _ACL_TYPE_DEFAULT, &error);
721 if (!error && !basicperms) 724 if (!error && !basicperms)
722 xfs_acl_set_attr(vp, cacl, _ACL_TYPE_ACCESS, &error); 725 xfs_acl_set_attr(vp, cacl, _ACL_TYPE_ACCESS, &error);
726out_error:
723 _ACL_FREE(cacl); 727 _ACL_FREE(cacl);
724 return error; 728 return error;
725} 729}
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index bdbfbbee4959..1956f83489f1 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -45,7 +45,7 @@
45#define XFSA_FIXUP_BNO_OK 1 45#define XFSA_FIXUP_BNO_OK 1
46#define XFSA_FIXUP_CNT_OK 2 46#define XFSA_FIXUP_CNT_OK 2
47 47
48STATIC int 48STATIC void
49xfs_alloc_search_busy(xfs_trans_t *tp, 49xfs_alloc_search_busy(xfs_trans_t *tp,
50 xfs_agnumber_t agno, 50 xfs_agnumber_t agno,
51 xfs_agblock_t bno, 51 xfs_agblock_t bno,
@@ -55,24 +55,24 @@ xfs_alloc_search_busy(xfs_trans_t *tp,
55ktrace_t *xfs_alloc_trace_buf; 55ktrace_t *xfs_alloc_trace_buf;
56 56
57#define TRACE_ALLOC(s,a) \ 57#define TRACE_ALLOC(s,a) \
58 xfs_alloc_trace_alloc(__FUNCTION__, s, a, __LINE__) 58 xfs_alloc_trace_alloc(__func__, s, a, __LINE__)
59#define TRACE_FREE(s,a,b,x,f) \ 59#define TRACE_FREE(s,a,b,x,f) \
60 xfs_alloc_trace_free(__FUNCTION__, s, mp, a, b, x, f, __LINE__) 60 xfs_alloc_trace_free(__func__, s, mp, a, b, x, f, __LINE__)
61#define TRACE_MODAGF(s,a,f) \ 61#define TRACE_MODAGF(s,a,f) \
62 xfs_alloc_trace_modagf(__FUNCTION__, s, mp, a, f, __LINE__) 62 xfs_alloc_trace_modagf(__func__, s, mp, a, f, __LINE__)
63#define TRACE_BUSY(__FUNCTION__,s,ag,agb,l,sl,tp) \ 63#define TRACE_BUSY(__func__,s,ag,agb,l,sl,tp) \
64 xfs_alloc_trace_busy(__FUNCTION__, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSY, __LINE__) 64 xfs_alloc_trace_busy(__func__, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSY, __LINE__)
65#define TRACE_UNBUSY(__FUNCTION__,s,ag,sl,tp) \ 65#define TRACE_UNBUSY(__func__,s,ag,sl,tp) \
66 xfs_alloc_trace_busy(__FUNCTION__, s, mp, ag, -1, -1, sl, tp, XFS_ALLOC_KTRACE_UNBUSY, __LINE__) 66 xfs_alloc_trace_busy(__func__, s, mp, ag, -1, -1, sl, tp, XFS_ALLOC_KTRACE_UNBUSY, __LINE__)
67#define TRACE_BUSYSEARCH(__FUNCTION__,s,ag,agb,l,sl,tp) \ 67#define TRACE_BUSYSEARCH(__func__,s,ag,agb,l,tp) \
68 xfs_alloc_trace_busy(__FUNCTION__, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSYSEARCH, __LINE__) 68 xfs_alloc_trace_busy(__func__, s, mp, ag, agb, l, 0, tp, XFS_ALLOC_KTRACE_BUSYSEARCH, __LINE__)
69#else 69#else
70#define TRACE_ALLOC(s,a) 70#define TRACE_ALLOC(s,a)
71#define TRACE_FREE(s,a,b,x,f) 71#define TRACE_FREE(s,a,b,x,f)
72#define TRACE_MODAGF(s,a,f) 72#define TRACE_MODAGF(s,a,f)
73#define TRACE_BUSY(s,a,ag,agb,l,sl,tp) 73#define TRACE_BUSY(s,a,ag,agb,l,sl,tp)
74#define TRACE_UNBUSY(fname,s,ag,sl,tp) 74#define TRACE_UNBUSY(fname,s,ag,sl,tp)
75#define TRACE_BUSYSEARCH(fname,s,ag,agb,l,sl,tp) 75#define TRACE_BUSYSEARCH(fname,s,ag,agb,l,tp)
76#endif /* XFS_ALLOC_TRACE */ 76#endif /* XFS_ALLOC_TRACE */
77 77
78/* 78/*
@@ -93,7 +93,7 @@ STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
93 * Compute aligned version of the found extent. 93 * Compute aligned version of the found extent.
94 * Takes alignment and min length into account. 94 * Takes alignment and min length into account.
95 */ 95 */
96STATIC int /* success (>= minlen) */ 96STATIC void
97xfs_alloc_compute_aligned( 97xfs_alloc_compute_aligned(
98 xfs_agblock_t foundbno, /* starting block in found extent */ 98 xfs_agblock_t foundbno, /* starting block in found extent */
99 xfs_extlen_t foundlen, /* length in found extent */ 99 xfs_extlen_t foundlen, /* length in found extent */
@@ -116,7 +116,6 @@ xfs_alloc_compute_aligned(
116 } 116 }
117 *resbno = bno; 117 *resbno = bno;
118 *reslen = len; 118 *reslen = len;
119 return len >= minlen;
120} 119}
121 120
122/* 121/*
@@ -837,9 +836,9 @@ xfs_alloc_ag_vextent_near(
837 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i))) 836 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
838 goto error0; 837 goto error0;
839 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 838 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
840 if (!xfs_alloc_compute_aligned(ltbno, ltlen, 839 xfs_alloc_compute_aligned(ltbno, ltlen, args->alignment,
841 args->alignment, args->minlen, 840 args->minlen, &ltbnoa, &ltlena);
842 &ltbnoa, &ltlena)) 841 if (ltlena < args->minlen)
843 continue; 842 continue;
844 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); 843 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
845 xfs_alloc_fix_len(args); 844 xfs_alloc_fix_len(args);
@@ -958,9 +957,9 @@ xfs_alloc_ag_vextent_near(
958 if ((error = xfs_alloc_get_rec(bno_cur_lt, &ltbno, &ltlen, &i))) 957 if ((error = xfs_alloc_get_rec(bno_cur_lt, &ltbno, &ltlen, &i)))
959 goto error0; 958 goto error0;
960 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 959 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
961 if (xfs_alloc_compute_aligned(ltbno, ltlen, 960 xfs_alloc_compute_aligned(ltbno, ltlen, args->alignment,
962 args->alignment, args->minlen, 961 args->minlen, &ltbnoa, &ltlena);
963 &ltbnoa, &ltlena)) 962 if (ltlena >= args->minlen)
964 break; 963 break;
965 if ((error = xfs_alloc_decrement(bno_cur_lt, 0, &i))) 964 if ((error = xfs_alloc_decrement(bno_cur_lt, 0, &i)))
966 goto error0; 965 goto error0;
@@ -974,9 +973,9 @@ xfs_alloc_ag_vextent_near(
974 if ((error = xfs_alloc_get_rec(bno_cur_gt, &gtbno, &gtlen, &i))) 973 if ((error = xfs_alloc_get_rec(bno_cur_gt, &gtbno, &gtlen, &i)))
975 goto error0; 974 goto error0;
976 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 975 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
977 if (xfs_alloc_compute_aligned(gtbno, gtlen, 976 xfs_alloc_compute_aligned(gtbno, gtlen, args->alignment,
978 args->alignment, args->minlen, 977 args->minlen, &gtbnoa, &gtlena);
979 &gtbnoa, &gtlena)) 978 if (gtlena >= args->minlen)
980 break; 979 break;
981 if ((error = xfs_alloc_increment(bno_cur_gt, 0, &i))) 980 if ((error = xfs_alloc_increment(bno_cur_gt, 0, &i)))
982 goto error0; 981 goto error0;
@@ -2562,9 +2561,10 @@ xfs_alloc_clear_busy(xfs_trans_t *tp,
2562 2561
2563 2562
2564/* 2563/*
2565 * returns non-zero if any of (agno,bno):len is in a busy list 2564 * If we find the extent in the busy list, force the log out to get the
2565 * extent out of the busy list so the caller can use it straight away.
2566 */ 2566 */
2567STATIC int 2567STATIC void
2568xfs_alloc_search_busy(xfs_trans_t *tp, 2568xfs_alloc_search_busy(xfs_trans_t *tp,
2569 xfs_agnumber_t agno, 2569 xfs_agnumber_t agno,
2570 xfs_agblock_t bno, 2570 xfs_agblock_t bno,
@@ -2572,7 +2572,6 @@ xfs_alloc_search_busy(xfs_trans_t *tp,
2572{ 2572{
2573 xfs_mount_t *mp; 2573 xfs_mount_t *mp;
2574 xfs_perag_busy_t *bsy; 2574 xfs_perag_busy_t *bsy;
2575 int n;
2576 xfs_agblock_t uend, bend; 2575 xfs_agblock_t uend, bend;
2577 xfs_lsn_t lsn; 2576 xfs_lsn_t lsn;
2578 int cnt; 2577 int cnt;
@@ -2585,21 +2584,18 @@ xfs_alloc_search_busy(xfs_trans_t *tp,
2585 uend = bno + len - 1; 2584 uend = bno + len - 1;
2586 2585
2587 /* search pagb_list for this slot, skipping open slots */ 2586 /* search pagb_list for this slot, skipping open slots */
2588 for (bsy = mp->m_perag[agno].pagb_list, n = 0; 2587 for (bsy = mp->m_perag[agno].pagb_list; cnt; bsy++) {
2589 cnt; bsy++, n++) {
2590 2588
2591 /* 2589 /*
2592 * (start1,length1) within (start2, length2) 2590 * (start1,length1) within (start2, length2)
2593 */ 2591 */
2594 if (bsy->busy_tp != NULL) { 2592 if (bsy->busy_tp != NULL) {
2595 bend = bsy->busy_start + bsy->busy_length - 1; 2593 bend = bsy->busy_start + bsy->busy_length - 1;
2596 if ((bno > bend) || 2594 if ((bno > bend) || (uend < bsy->busy_start)) {
2597 (uend < bsy->busy_start)) {
2598 cnt--; 2595 cnt--;
2599 } else { 2596 } else {
2600 TRACE_BUSYSEARCH("xfs_alloc_search_busy", 2597 TRACE_BUSYSEARCH("xfs_alloc_search_busy",
2601 "found1", agno, bno, len, n, 2598 "found1", agno, bno, len, tp);
2602 tp);
2603 break; 2599 break;
2604 } 2600 }
2605 } 2601 }
@@ -2610,15 +2606,12 @@ xfs_alloc_search_busy(xfs_trans_t *tp,
2610 * transaction that freed the block 2606 * transaction that freed the block
2611 */ 2607 */
2612 if (cnt) { 2608 if (cnt) {
2613 TRACE_BUSYSEARCH("xfs_alloc_search_busy", "found", agno, bno, len, n, tp); 2609 TRACE_BUSYSEARCH("xfs_alloc_search_busy", "found", agno, bno, len, tp);
2614 lsn = bsy->busy_tp->t_commit_lsn; 2610 lsn = bsy->busy_tp->t_commit_lsn;
2615 spin_unlock(&mp->m_perag[agno].pagb_lock); 2611 spin_unlock(&mp->m_perag[agno].pagb_lock);
2616 xfs_log_force(mp, lsn, XFS_LOG_FORCE|XFS_LOG_SYNC); 2612 xfs_log_force(mp, lsn, XFS_LOG_FORCE|XFS_LOG_SYNC);
2617 } else { 2613 } else {
2618 TRACE_BUSYSEARCH("xfs_alloc_search_busy", "not-found", agno, bno, len, n, tp); 2614 TRACE_BUSYSEARCH("xfs_alloc_search_busy", "not-found", agno, bno, len, tp);
2619 n = -1;
2620 spin_unlock(&mp->m_perag[agno].pagb_lock); 2615 spin_unlock(&mp->m_perag[agno].pagb_lock);
2621 } 2616 }
2622
2623 return n;
2624} 2617}
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index e58f321fdae9..36d781ee5fcc 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -2647,14 +2647,6 @@ attr_trusted_capable(
2647} 2647}
2648 2648
2649STATIC int 2649STATIC int
2650attr_secure_capable(
2651 bhv_vnode_t *vp,
2652 cred_t *cred)
2653{
2654 return -ENOSECURITY;
2655}
2656
2657STATIC int
2658attr_system_set( 2650attr_system_set(
2659 bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags) 2651 bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2660{ 2652{
@@ -2724,7 +2716,7 @@ struct attrnames attr_secure = {
2724 .attr_get = attr_generic_get, 2716 .attr_get = attr_generic_get,
2725 .attr_set = attr_generic_set, 2717 .attr_set = attr_generic_set,
2726 .attr_remove = attr_generic_remove, 2718 .attr_remove = attr_generic_remove,
2727 .attr_capable = attr_secure_capable, 2719 .attr_capable = (attrcapable_t)fs_noerr,
2728}; 2720};
2729 2721
2730struct attrnames attr_user = { 2722struct attrnames attr_user = {
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index b08e2a2a8add..303d41e4217b 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -166,7 +166,7 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes)
166 166
167 if (!(mp->m_flags & XFS_MOUNT_ATTR2)) { 167 if (!(mp->m_flags & XFS_MOUNT_ATTR2)) {
168 if (bytes <= XFS_IFORK_ASIZE(dp)) 168 if (bytes <= XFS_IFORK_ASIZE(dp))
169 return mp->m_attroffset >> 3; 169 return dp->i_d.di_forkoff;
170 return 0; 170 return 0;
171 } 171 }
172 172
@@ -227,10 +227,10 @@ STATIC void
227xfs_sbversion_add_attr2(xfs_mount_t *mp, xfs_trans_t *tp) 227xfs_sbversion_add_attr2(xfs_mount_t *mp, xfs_trans_t *tp)
228{ 228{
229 if ((mp->m_flags & XFS_MOUNT_ATTR2) && 229 if ((mp->m_flags & XFS_MOUNT_ATTR2) &&
230 !(XFS_SB_VERSION_HASATTR2(&mp->m_sb))) { 230 !(xfs_sb_version_hasattr2(&mp->m_sb))) {
231 spin_lock(&mp->m_sb_lock); 231 spin_lock(&mp->m_sb_lock);
232 if (!XFS_SB_VERSION_HASATTR2(&mp->m_sb)) { 232 if (!xfs_sb_version_hasattr2(&mp->m_sb)) {
233 XFS_SB_VERSION_ADDATTR2(&mp->m_sb); 233 xfs_sb_version_addattr2(&mp->m_sb);
234 spin_unlock(&mp->m_sb_lock); 234 spin_unlock(&mp->m_sb_lock);
235 xfs_mod_sb(tp, XFS_SB_VERSIONNUM | XFS_SB_FEATURES2); 235 xfs_mod_sb(tp, XFS_SB_VERSIONNUM | XFS_SB_FEATURES2);
236 } else 236 } else
diff --git a/fs/xfs/xfs_bit.c b/fs/xfs/xfs_bit.c
index 48228848f5ae..fab0b6d5a41b 100644
--- a/fs/xfs/xfs_bit.c
+++ b/fs/xfs/xfs_bit.c
@@ -25,6 +25,109 @@
25 * XFS bit manipulation routines, used in non-realtime code. 25 * XFS bit manipulation routines, used in non-realtime code.
26 */ 26 */
27 27
28#ifndef HAVE_ARCH_HIGHBIT
29/*
30 * Index of high bit number in byte, -1 for none set, 0..7 otherwise.
31 */
32static const char xfs_highbit[256] = {
33 -1, 0, 1, 1, 2, 2, 2, 2, /* 00 .. 07 */
34 3, 3, 3, 3, 3, 3, 3, 3, /* 08 .. 0f */
35 4, 4, 4, 4, 4, 4, 4, 4, /* 10 .. 17 */
36 4, 4, 4, 4, 4, 4, 4, 4, /* 18 .. 1f */
37 5, 5, 5, 5, 5, 5, 5, 5, /* 20 .. 27 */
38 5, 5, 5, 5, 5, 5, 5, 5, /* 28 .. 2f */
39 5, 5, 5, 5, 5, 5, 5, 5, /* 30 .. 37 */
40 5, 5, 5, 5, 5, 5, 5, 5, /* 38 .. 3f */
41 6, 6, 6, 6, 6, 6, 6, 6, /* 40 .. 47 */
42 6, 6, 6, 6, 6, 6, 6, 6, /* 48 .. 4f */
43 6, 6, 6, 6, 6, 6, 6, 6, /* 50 .. 57 */
44 6, 6, 6, 6, 6, 6, 6, 6, /* 58 .. 5f */
45 6, 6, 6, 6, 6, 6, 6, 6, /* 60 .. 67 */
46 6, 6, 6, 6, 6, 6, 6, 6, /* 68 .. 6f */
47 6, 6, 6, 6, 6, 6, 6, 6, /* 70 .. 77 */
48 6, 6, 6, 6, 6, 6, 6, 6, /* 78 .. 7f */
49 7, 7, 7, 7, 7, 7, 7, 7, /* 80 .. 87 */
50 7, 7, 7, 7, 7, 7, 7, 7, /* 88 .. 8f */
51 7, 7, 7, 7, 7, 7, 7, 7, /* 90 .. 97 */
52 7, 7, 7, 7, 7, 7, 7, 7, /* 98 .. 9f */
53 7, 7, 7, 7, 7, 7, 7, 7, /* a0 .. a7 */
54 7, 7, 7, 7, 7, 7, 7, 7, /* a8 .. af */
55 7, 7, 7, 7, 7, 7, 7, 7, /* b0 .. b7 */
56 7, 7, 7, 7, 7, 7, 7, 7, /* b8 .. bf */
57 7, 7, 7, 7, 7, 7, 7, 7, /* c0 .. c7 */
58 7, 7, 7, 7, 7, 7, 7, 7, /* c8 .. cf */
59 7, 7, 7, 7, 7, 7, 7, 7, /* d0 .. d7 */
60 7, 7, 7, 7, 7, 7, 7, 7, /* d8 .. df */
61 7, 7, 7, 7, 7, 7, 7, 7, /* e0 .. e7 */
62 7, 7, 7, 7, 7, 7, 7, 7, /* e8 .. ef */
63 7, 7, 7, 7, 7, 7, 7, 7, /* f0 .. f7 */
64 7, 7, 7, 7, 7, 7, 7, 7, /* f8 .. ff */
65};
66#endif
67
68/*
69 * xfs_highbit32: get high bit set out of 32-bit argument, -1 if none set.
70 */
71inline int
72xfs_highbit32(
73 __uint32_t v)
74{
75#ifdef HAVE_ARCH_HIGHBIT
76 return highbit32(v);
77#else
78 int i;
79
80 if (v & 0xffff0000)
81 if (v & 0xff000000)
82 i = 24;
83 else
84 i = 16;
85 else if (v & 0x0000ffff)
86 if (v & 0x0000ff00)
87 i = 8;
88 else
89 i = 0;
90 else
91 return -1;
92 return i + xfs_highbit[(v >> i) & 0xff];
93#endif
94}
95
96/*
97 * xfs_lowbit64: get low bit set out of 64-bit argument, -1 if none set.
98 */
99int
100xfs_lowbit64(
101 __uint64_t v)
102{
103 __uint32_t w = (__uint32_t)v;
104 int n = 0;
105
106 if (w) { /* lower bits */
107 n = ffs(w);
108 } else { /* upper bits */
109 w = (__uint32_t)(v >> 32);
110 if (w && (n = ffs(w)))
111 n += 32;
112 }
113 return n - 1;
114}
115
116/*
117 * xfs_highbit64: get high bit set out of 64-bit argument, -1 if none set.
118 */
119int
120xfs_highbit64(
121 __uint64_t v)
122{
123 __uint32_t h = (__uint32_t)(v >> 32);
124
125 if (h)
126 return xfs_highbit32(h) + 32;
127 return xfs_highbit32((__uint32_t)v);
128}
129
130
28/* 131/*
29 * Return whether bitmap is empty. 132 * Return whether bitmap is empty.
30 * Size is number of words in the bitmap, which is padded to word boundary 133 * Size is number of words in the bitmap, which is padded to word boundary
diff --git a/fs/xfs/xfs_bit.h b/fs/xfs/xfs_bit.h
index 325a007dec91..082641a9782c 100644
--- a/fs/xfs/xfs_bit.h
+++ b/fs/xfs/xfs_bit.h
@@ -47,30 +47,13 @@ static inline __uint64_t xfs_mask64lo(int n)
47} 47}
48 48
49/* Get high bit set out of 32-bit argument, -1 if none set */ 49/* Get high bit set out of 32-bit argument, -1 if none set */
50static inline int xfs_highbit32(__uint32_t v) 50extern int xfs_highbit32(__uint32_t v);
51{
52 return fls(v) - 1;
53}
54
55/* Get high bit set out of 64-bit argument, -1 if none set */
56static inline int xfs_highbit64(__uint64_t v)
57{
58 return fls64(v) - 1;
59}
60
61/* Get low bit set out of 32-bit argument, -1 if none set */
62static inline int xfs_lowbit32(__uint32_t v)
63{
64 __uint32_t t = v;
65 return (t) ? find_first_bit((unsigned long *)&t, 32) : -1;
66}
67 51
68/* Get low bit set out of 64-bit argument, -1 if none set */ 52/* Get low bit set out of 64-bit argument, -1 if none set */
69static inline int xfs_lowbit64(__uint64_t v) 53extern int xfs_lowbit64(__uint64_t v);
70{ 54
71 __uint64_t t = v; 55/* Get high bit set out of 64-bit argument, -1 if none set */
72 return (t) ? find_first_bit((unsigned long *)&t, 64) : -1; 56extern int xfs_highbit64(__uint64_t);
73}
74 57
75/* Return whether bitmap is empty (1 == empty) */ 58/* Return whether bitmap is empty (1 == empty) */
76extern int xfs_bitmap_empty(uint *map, uint size); 59extern int xfs_bitmap_empty(uint *map, uint size);
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 1c0a5a585a82..eb198c01c35d 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -323,13 +323,13 @@ xfs_bmap_trace_pre_update(
323 int whichfork); /* data or attr fork */ 323 int whichfork); /* data or attr fork */
324 324
325#define XFS_BMAP_TRACE_DELETE(d,ip,i,c,w) \ 325#define XFS_BMAP_TRACE_DELETE(d,ip,i,c,w) \
326 xfs_bmap_trace_delete(__FUNCTION__,d,ip,i,c,w) 326 xfs_bmap_trace_delete(__func__,d,ip,i,c,w)
327#define XFS_BMAP_TRACE_INSERT(d,ip,i,c,r1,r2,w) \ 327#define XFS_BMAP_TRACE_INSERT(d,ip,i,c,r1,r2,w) \
328 xfs_bmap_trace_insert(__FUNCTION__,d,ip,i,c,r1,r2,w) 328 xfs_bmap_trace_insert(__func__,d,ip,i,c,r1,r2,w)
329#define XFS_BMAP_TRACE_POST_UPDATE(d,ip,i,w) \ 329#define XFS_BMAP_TRACE_POST_UPDATE(d,ip,i,w) \
330 xfs_bmap_trace_post_update(__FUNCTION__,d,ip,i,w) 330 xfs_bmap_trace_post_update(__func__,d,ip,i,w)
331#define XFS_BMAP_TRACE_PRE_UPDATE(d,ip,i,w) \ 331#define XFS_BMAP_TRACE_PRE_UPDATE(d,ip,i,w) \
332 xfs_bmap_trace_pre_update(__FUNCTION__,d,ip,i,w) 332 xfs_bmap_trace_pre_update(__func__,d,ip,i,w)
333#else 333#else
334#define XFS_BMAP_TRACE_DELETE(d,ip,i,c,w) 334#define XFS_BMAP_TRACE_DELETE(d,ip,i,c,w)
335#define XFS_BMAP_TRACE_INSERT(d,ip,i,c,r1,r2,w) 335#define XFS_BMAP_TRACE_INSERT(d,ip,i,c,r1,r2,w)
@@ -2402,7 +2402,7 @@ xfs_bmap_extsize_align(
2402 2402
2403#define XFS_ALLOC_GAP_UNITS 4 2403#define XFS_ALLOC_GAP_UNITS 4
2404 2404
2405STATIC int 2405STATIC void
2406xfs_bmap_adjacent( 2406xfs_bmap_adjacent(
2407 xfs_bmalloca_t *ap) /* bmap alloc argument struct */ 2407 xfs_bmalloca_t *ap) /* bmap alloc argument struct */
2408{ 2408{
@@ -2548,7 +2548,6 @@ xfs_bmap_adjacent(
2548 ap->rval = gotbno; 2548 ap->rval = gotbno;
2549 } 2549 }
2550#undef ISVALID 2550#undef ISVALID
2551 return 0;
2552} 2551}
2553 2552
2554STATIC int 2553STATIC int
@@ -4047,17 +4046,17 @@ xfs_bmap_add_attrfork(
4047 xfs_trans_log_inode(tp, ip, logflags); 4046 xfs_trans_log_inode(tp, ip, logflags);
4048 if (error) 4047 if (error)
4049 goto error2; 4048 goto error2;
4050 if (!XFS_SB_VERSION_HASATTR(&mp->m_sb) || 4049 if (!xfs_sb_version_hasattr(&mp->m_sb) ||
4051 (!XFS_SB_VERSION_HASATTR2(&mp->m_sb) && version == 2)) { 4050 (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) {
4052 __int64_t sbfields = 0; 4051 __int64_t sbfields = 0;
4053 4052
4054 spin_lock(&mp->m_sb_lock); 4053 spin_lock(&mp->m_sb_lock);
4055 if (!XFS_SB_VERSION_HASATTR(&mp->m_sb)) { 4054 if (!xfs_sb_version_hasattr(&mp->m_sb)) {
4056 XFS_SB_VERSION_ADDATTR(&mp->m_sb); 4055 xfs_sb_version_addattr(&mp->m_sb);
4057 sbfields |= XFS_SB_VERSIONNUM; 4056 sbfields |= XFS_SB_VERSIONNUM;
4058 } 4057 }
4059 if (!XFS_SB_VERSION_HASATTR2(&mp->m_sb) && version == 2) { 4058 if (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2) {
4060 XFS_SB_VERSION_ADDATTR2(&mp->m_sb); 4059 xfs_sb_version_addattr2(&mp->m_sb);
4061 sbfields |= (XFS_SB_VERSIONNUM | XFS_SB_FEATURES2); 4060 sbfields |= (XFS_SB_VERSIONNUM | XFS_SB_FEATURES2);
4062 } 4061 }
4063 if (sbfields) { 4062 if (sbfields) {
@@ -4154,16 +4153,21 @@ xfs_bmap_compute_maxlevels(
4154 * number of leaf entries, is controlled by the type of di_nextents 4153 * number of leaf entries, is controlled by the type of di_nextents
4155 * (a signed 32-bit number, xfs_extnum_t), or by di_anextents 4154 * (a signed 32-bit number, xfs_extnum_t), or by di_anextents
4156 * (a signed 16-bit number, xfs_aextnum_t). 4155 * (a signed 16-bit number, xfs_aextnum_t).
4156 *
4157 * Note that we can no longer assume that if we are in ATTR1 that
4158 * the fork offset of all the inodes will be (m_attroffset >> 3)
4159 * because we could have mounted with ATTR2 and then mounted back
4160 * with ATTR1, keeping the di_forkoff's fixed but probably at
4161 * various positions. Therefore, for both ATTR1 and ATTR2
4162 * we have to assume the worst case scenario of a minimum size
4163 * available.
4157 */ 4164 */
4158 if (whichfork == XFS_DATA_FORK) { 4165 if (whichfork == XFS_DATA_FORK) {
4159 maxleafents = MAXEXTNUM; 4166 maxleafents = MAXEXTNUM;
4160 sz = (mp->m_flags & XFS_MOUNT_ATTR2) ? 4167 sz = XFS_BMDR_SPACE_CALC(MINDBTPTRS);
4161 XFS_BMDR_SPACE_CALC(MINDBTPTRS) : mp->m_attroffset;
4162 } else { 4168 } else {
4163 maxleafents = MAXAEXTNUM; 4169 maxleafents = MAXAEXTNUM;
4164 sz = (mp->m_flags & XFS_MOUNT_ATTR2) ? 4170 sz = XFS_BMDR_SPACE_CALC(MINABTPTRS);
4165 XFS_BMDR_SPACE_CALC(MINABTPTRS) :
4166 mp->m_sb.sb_inodesize - mp->m_attroffset;
4167 } 4171 }
4168 maxrootrecs = (int)XFS_BTREE_BLOCK_MAXRECS(sz, xfs_bmdr, 0); 4172 maxrootrecs = (int)XFS_BTREE_BLOCK_MAXRECS(sz, xfs_bmdr, 0);
4169 minleafrecs = mp->m_bmap_dmnr[0]; 4173 minleafrecs = mp->m_bmap_dmnr[0];
@@ -5043,7 +5047,7 @@ xfs_bmapi(
5043 * A wasdelay extent has been initialized, so 5047 * A wasdelay extent has been initialized, so
5044 * shouldn't be flagged as unwritten. 5048 * shouldn't be flagged as unwritten.
5045 */ 5049 */
5046 if (wr && XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb)) { 5050 if (wr && xfs_sb_version_hasextflgbit(&mp->m_sb)) {
5047 if (!wasdelay && (flags & XFS_BMAPI_PREALLOC)) 5051 if (!wasdelay && (flags & XFS_BMAPI_PREALLOC))
5048 got.br_state = XFS_EXT_UNWRITTEN; 5052 got.br_state = XFS_EXT_UNWRITTEN;
5049 } 5053 }
@@ -5483,7 +5487,7 @@ xfs_bunmapi(
5483 * get rid of part of a realtime extent. 5487 * get rid of part of a realtime extent.
5484 */ 5488 */
5485 if (del.br_state == XFS_EXT_UNWRITTEN || 5489 if (del.br_state == XFS_EXT_UNWRITTEN ||
5486 !XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb)) { 5490 !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
5487 /* 5491 /*
5488 * This piece is unwritten, or we're not 5492 * This piece is unwritten, or we're not
5489 * using unwritten extents. Skip over it. 5493 * using unwritten extents. Skip over it.
@@ -5535,7 +5539,7 @@ xfs_bunmapi(
5535 } else if ((del.br_startoff == start && 5539 } else if ((del.br_startoff == start &&
5536 (del.br_state == XFS_EXT_UNWRITTEN || 5540 (del.br_state == XFS_EXT_UNWRITTEN ||
5537 xfs_trans_get_block_res(tp) == 0)) || 5541 xfs_trans_get_block_res(tp) == 0)) ||
5538 !XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb)) { 5542 !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
5539 /* 5543 /*
5540 * Can't make it unwritten. There isn't 5544 * Can't make it unwritten. There isn't
5541 * a full extent here so just skip it. 5545 * a full extent here so just skip it.
@@ -5772,7 +5776,6 @@ xfs_getbmap(
5772 int error; /* return value */ 5776 int error; /* return value */
5773 __int64_t fixlen; /* length for -1 case */ 5777 __int64_t fixlen; /* length for -1 case */
5774 int i; /* extent number */ 5778 int i; /* extent number */
5775 bhv_vnode_t *vp; /* corresponding vnode */
5776 int lock; /* lock state */ 5779 int lock; /* lock state */
5777 xfs_bmbt_irec_t *map; /* buffer for user's data */ 5780 xfs_bmbt_irec_t *map; /* buffer for user's data */
5778 xfs_mount_t *mp; /* file system mount point */ 5781 xfs_mount_t *mp; /* file system mount point */
@@ -5789,7 +5792,6 @@ xfs_getbmap(
5789 int bmapi_flags; /* flags for xfs_bmapi */ 5792 int bmapi_flags; /* flags for xfs_bmapi */
5790 __int32_t oflags; /* getbmapx bmv_oflags field */ 5793 __int32_t oflags; /* getbmapx bmv_oflags field */
5791 5794
5792 vp = XFS_ITOV(ip);
5793 mp = ip->i_mount; 5795 mp = ip->i_mount;
5794 5796
5795 whichfork = interface & BMV_IF_ATTRFORK ? XFS_ATTR_FORK : XFS_DATA_FORK; 5797 whichfork = interface & BMV_IF_ATTRFORK ? XFS_ATTR_FORK : XFS_DATA_FORK;
@@ -5811,7 +5813,7 @@ xfs_getbmap(
5811 if ((interface & BMV_IF_NO_DMAPI_READ) == 0 && 5813 if ((interface & BMV_IF_NO_DMAPI_READ) == 0 &&
5812 DM_EVENT_ENABLED(ip, DM_EVENT_READ) && 5814 DM_EVENT_ENABLED(ip, DM_EVENT_READ) &&
5813 whichfork == XFS_DATA_FORK) { 5815 whichfork == XFS_DATA_FORK) {
5814 error = XFS_SEND_DATA(mp, DM_EVENT_READ, vp, 0, 0, 0, NULL); 5816 error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, 0, 0, 0, NULL);
5815 if (error) 5817 if (error)
5816 return XFS_ERROR(error); 5818 return XFS_ERROR(error);
5817 } 5819 }
@@ -5869,6 +5871,10 @@ xfs_getbmap(
5869 /* xfs_fsize_t last_byte = xfs_file_last_byte(ip); */ 5871 /* xfs_fsize_t last_byte = xfs_file_last_byte(ip); */
5870 error = xfs_flush_pages(ip, (xfs_off_t)0, 5872 error = xfs_flush_pages(ip, (xfs_off_t)0,
5871 -1, 0, FI_REMAPF); 5873 -1, 0, FI_REMAPF);
5874 if (error) {
5875 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
5876 return error;
5877 }
5872 } 5878 }
5873 5879
5874 ASSERT(whichfork == XFS_ATTR_FORK || ip->i_delayed_blks == 0); 5880 ASSERT(whichfork == XFS_ATTR_FORK || ip->i_delayed_blks == 0);
@@ -6162,10 +6168,10 @@ xfs_check_block(
6162 } 6168 }
6163 if (*thispa == *pp) { 6169 if (*thispa == *pp) {
6164 cmn_err(CE_WARN, "%s: thispa(%d) == pp(%d) %Ld", 6170 cmn_err(CE_WARN, "%s: thispa(%d) == pp(%d) %Ld",
6165 __FUNCTION__, j, i, 6171 __func__, j, i,
6166 (unsigned long long)be64_to_cpu(*thispa)); 6172 (unsigned long long)be64_to_cpu(*thispa));
6167 panic("%s: ptrs are equal in node\n", 6173 panic("%s: ptrs are equal in node\n",
6168 __FUNCTION__); 6174 __func__);
6169 } 6175 }
6170 } 6176 }
6171 } 6177 }
@@ -6192,7 +6198,7 @@ xfs_bmap_check_leaf_extents(
6192 xfs_mount_t *mp; /* file system mount structure */ 6198 xfs_mount_t *mp; /* file system mount structure */
6193 __be64 *pp; /* pointer to block address */ 6199 __be64 *pp; /* pointer to block address */
6194 xfs_bmbt_rec_t *ep; /* pointer to current extent */ 6200 xfs_bmbt_rec_t *ep; /* pointer to current extent */
6195 xfs_bmbt_rec_t *lastp; /* pointer to previous extent */ 6201 xfs_bmbt_rec_t last = {0, 0}; /* last extent in prev block */
6196 xfs_bmbt_rec_t *nextp; /* pointer to next extent */ 6202 xfs_bmbt_rec_t *nextp; /* pointer to next extent */
6197 int bp_release = 0; 6203 int bp_release = 0;
6198 6204
@@ -6262,7 +6268,6 @@ xfs_bmap_check_leaf_extents(
6262 /* 6268 /*
6263 * Loop over all leaf nodes checking that all extents are in the right order. 6269 * Loop over all leaf nodes checking that all extents are in the right order.
6264 */ 6270 */
6265 lastp = NULL;
6266 for (;;) { 6271 for (;;) {
6267 xfs_fsblock_t nextbno; 6272 xfs_fsblock_t nextbno;
6268 xfs_extnum_t num_recs; 6273 xfs_extnum_t num_recs;
@@ -6283,18 +6288,16 @@ xfs_bmap_check_leaf_extents(
6283 */ 6288 */
6284 6289
6285 ep = XFS_BTREE_REC_ADDR(xfs_bmbt, block, 1); 6290 ep = XFS_BTREE_REC_ADDR(xfs_bmbt, block, 1);
6291 if (i) {
6292 xfs_btree_check_rec(XFS_BTNUM_BMAP, &last, ep);
6293 }
6286 for (j = 1; j < num_recs; j++) { 6294 for (j = 1; j < num_recs; j++) {
6287 nextp = XFS_BTREE_REC_ADDR(xfs_bmbt, block, j + 1); 6295 nextp = XFS_BTREE_REC_ADDR(xfs_bmbt, block, j + 1);
6288 if (lastp) { 6296 xfs_btree_check_rec(XFS_BTNUM_BMAP, ep, nextp);
6289 xfs_btree_check_rec(XFS_BTNUM_BMAP,
6290 (void *)lastp, (void *)ep);
6291 }
6292 xfs_btree_check_rec(XFS_BTNUM_BMAP, (void *)ep,
6293 (void *)(nextp));
6294 lastp = ep;
6295 ep = nextp; 6297 ep = nextp;
6296 } 6298 }
6297 6299
6300 last = *ep;
6298 i += num_recs; 6301 i += num_recs;
6299 if (bp_release) { 6302 if (bp_release) {
6300 bp_release = 0; 6303 bp_release = 0;
@@ -6325,13 +6328,13 @@ xfs_bmap_check_leaf_extents(
6325 return; 6328 return;
6326 6329
6327error0: 6330error0:
6328 cmn_err(CE_WARN, "%s: at error0", __FUNCTION__); 6331 cmn_err(CE_WARN, "%s: at error0", __func__);
6329 if (bp_release) 6332 if (bp_release)
6330 xfs_trans_brelse(NULL, bp); 6333 xfs_trans_brelse(NULL, bp);
6331error_norelse: 6334error_norelse:
6332 cmn_err(CE_WARN, "%s: BAD after btree leaves for %d extents", 6335 cmn_err(CE_WARN, "%s: BAD after btree leaves for %d extents",
6333 __FUNCTION__, i); 6336 __func__, i);
6334 panic("%s: CORRUPTED BTREE OR SOMETHING", __FUNCTION__); 6337 panic("%s: CORRUPTED BTREE OR SOMETHING", __func__);
6335 return; 6338 return;
6336} 6339}
6337#endif 6340#endif
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 87224b7d7984..6ff70cda451c 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -151,7 +151,7 @@ xfs_bmap_trace_exlist(
151 xfs_extnum_t cnt, /* count of entries in list */ 151 xfs_extnum_t cnt, /* count of entries in list */
152 int whichfork); /* data or attr fork */ 152 int whichfork); /* data or attr fork */
153#define XFS_BMAP_TRACE_EXLIST(ip,c,w) \ 153#define XFS_BMAP_TRACE_EXLIST(ip,c,w) \
154 xfs_bmap_trace_exlist(__FUNCTION__,ip,c,w) 154 xfs_bmap_trace_exlist(__func__,ip,c,w)
155#else 155#else
156#define XFS_BMAP_TRACE_EXLIST(ip,c,w) 156#define XFS_BMAP_TRACE_EXLIST(ip,c,w)
157#endif 157#endif
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index bd18987326a3..4f0e849d973e 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -275,21 +275,21 @@ xfs_bmbt_trace_cursor(
275} 275}
276 276
277#define XFS_BMBT_TRACE_ARGBI(c,b,i) \ 277#define XFS_BMBT_TRACE_ARGBI(c,b,i) \
278 xfs_bmbt_trace_argbi(__FUNCTION__, c, b, i, __LINE__) 278 xfs_bmbt_trace_argbi(__func__, c, b, i, __LINE__)
279#define XFS_BMBT_TRACE_ARGBII(c,b,i,j) \ 279#define XFS_BMBT_TRACE_ARGBII(c,b,i,j) \
280 xfs_bmbt_trace_argbii(__FUNCTION__, c, b, i, j, __LINE__) 280 xfs_bmbt_trace_argbii(__func__, c, b, i, j, __LINE__)
281#define XFS_BMBT_TRACE_ARGFFFI(c,o,b,i,j) \ 281#define XFS_BMBT_TRACE_ARGFFFI(c,o,b,i,j) \
282 xfs_bmbt_trace_argfffi(__FUNCTION__, c, o, b, i, j, __LINE__) 282 xfs_bmbt_trace_argfffi(__func__, c, o, b, i, j, __LINE__)
283#define XFS_BMBT_TRACE_ARGI(c,i) \ 283#define XFS_BMBT_TRACE_ARGI(c,i) \
284 xfs_bmbt_trace_argi(__FUNCTION__, c, i, __LINE__) 284 xfs_bmbt_trace_argi(__func__, c, i, __LINE__)
285#define XFS_BMBT_TRACE_ARGIFK(c,i,f,s) \ 285#define XFS_BMBT_TRACE_ARGIFK(c,i,f,s) \
286 xfs_bmbt_trace_argifk(__FUNCTION__, c, i, f, s, __LINE__) 286 xfs_bmbt_trace_argifk(__func__, c, i, f, s, __LINE__)
287#define XFS_BMBT_TRACE_ARGIFR(c,i,f,r) \ 287#define XFS_BMBT_TRACE_ARGIFR(c,i,f,r) \
288 xfs_bmbt_trace_argifr(__FUNCTION__, c, i, f, r, __LINE__) 288 xfs_bmbt_trace_argifr(__func__, c, i, f, r, __LINE__)
289#define XFS_BMBT_TRACE_ARGIK(c,i,k) \ 289#define XFS_BMBT_TRACE_ARGIK(c,i,k) \
290 xfs_bmbt_trace_argik(__FUNCTION__, c, i, k, __LINE__) 290 xfs_bmbt_trace_argik(__func__, c, i, k, __LINE__)
291#define XFS_BMBT_TRACE_CURSOR(c,s) \ 291#define XFS_BMBT_TRACE_CURSOR(c,s) \
292 xfs_bmbt_trace_cursor(__FUNCTION__, c, s, __LINE__) 292 xfs_bmbt_trace_cursor(__func__, c, s, __LINE__)
293#else 293#else
294#define XFS_BMBT_TRACE_ARGBI(c,b,i) 294#define XFS_BMBT_TRACE_ARGBI(c,b,i)
295#define XFS_BMBT_TRACE_ARGBII(c,b,i,j) 295#define XFS_BMBT_TRACE_ARGBII(c,b,i,j)
@@ -2027,6 +2027,24 @@ xfs_bmbt_increment(
2027 2027
2028/* 2028/*
2029 * Insert the current record at the point referenced by cur. 2029 * Insert the current record at the point referenced by cur.
2030 *
2031 * A multi-level split of the tree on insert will invalidate the original
2032 * cursor. It appears, however, that some callers assume that the cursor is
2033 * always valid. Hence if we do a multi-level split we need to revalidate the
2034 * cursor.
2035 *
2036 * When a split occurs, we will see a new cursor returned. Use that as a
2037 * trigger to determine if we need to revalidate the original cursor. If we get
2038 * a split, then use the original irec to lookup up the path of the record we
2039 * just inserted.
2040 *
2041 * Note that the fact that the btree root is in the inode means that we can
2042 * have the level of the tree change without a "split" occurring at the root
2043 * level. What happens is that the root is migrated to an allocated block and
2044 * the inode root is pointed to it. This means a single split can change the
2045 * level of the tree (level 2 -> level 3) and invalidate the old cursor. Hence
2046 * the level change should be accounted as a split so as to correctly trigger a
2047 * revalidation of the old cursor.
2030 */ 2048 */
2031int /* error */ 2049int /* error */
2032xfs_bmbt_insert( 2050xfs_bmbt_insert(
@@ -2039,11 +2057,14 @@ xfs_bmbt_insert(
2039 xfs_fsblock_t nbno; 2057 xfs_fsblock_t nbno;
2040 xfs_btree_cur_t *ncur; 2058 xfs_btree_cur_t *ncur;
2041 xfs_bmbt_rec_t nrec; 2059 xfs_bmbt_rec_t nrec;
2060 xfs_bmbt_irec_t oirec; /* original irec */
2042 xfs_btree_cur_t *pcur; 2061 xfs_btree_cur_t *pcur;
2062 int splits = 0;
2043 2063
2044 XFS_BMBT_TRACE_CURSOR(cur, ENTRY); 2064 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
2045 level = 0; 2065 level = 0;
2046 nbno = NULLFSBLOCK; 2066 nbno = NULLFSBLOCK;
2067 oirec = cur->bc_rec.b;
2047 xfs_bmbt_disk_set_all(&nrec, &cur->bc_rec.b); 2068 xfs_bmbt_disk_set_all(&nrec, &cur->bc_rec.b);
2048 ncur = NULL; 2069 ncur = NULL;
2049 pcur = cur; 2070 pcur = cur;
@@ -2052,11 +2073,13 @@ xfs_bmbt_insert(
2052 &i))) { 2073 &i))) {
2053 if (pcur != cur) 2074 if (pcur != cur)
2054 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR); 2075 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
2055 XFS_BMBT_TRACE_CURSOR(cur, ERROR); 2076 goto error0;
2056 return error;
2057 } 2077 }
2058 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 2078 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
2059 if (pcur != cur && (ncur || nbno == NULLFSBLOCK)) { 2079 if (pcur != cur && (ncur || nbno == NULLFSBLOCK)) {
2080 /* allocating a new root is effectively a split */
2081 if (cur->bc_nlevels != pcur->bc_nlevels)
2082 splits++;
2060 cur->bc_nlevels = pcur->bc_nlevels; 2083 cur->bc_nlevels = pcur->bc_nlevels;
2061 cur->bc_private.b.allocated += 2084 cur->bc_private.b.allocated +=
2062 pcur->bc_private.b.allocated; 2085 pcur->bc_private.b.allocated;
@@ -2070,10 +2093,21 @@ xfs_bmbt_insert(
2070 xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR); 2093 xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
2071 } 2094 }
2072 if (ncur) { 2095 if (ncur) {
2096 splits++;
2073 pcur = ncur; 2097 pcur = ncur;
2074 ncur = NULL; 2098 ncur = NULL;
2075 } 2099 }
2076 } while (nbno != NULLFSBLOCK); 2100 } while (nbno != NULLFSBLOCK);
2101
2102 if (splits > 1) {
2103 /* revalidate the old cursor as we had a multi-level split */
2104 error = xfs_bmbt_lookup_eq(cur, oirec.br_startoff,
2105 oirec.br_startblock, oirec.br_blockcount, &i);
2106 if (error)
2107 goto error0;
2108 ASSERT(i == 1);
2109 }
2110
2077 XFS_BMBT_TRACE_CURSOR(cur, EXIT); 2111 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
2078 *stat = i; 2112 *stat = i;
2079 return 0; 2113 return 0;
diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h
index 2d950e975918..cd0d4b4bb816 100644
--- a/fs/xfs/xfs_bmap_btree.h
+++ b/fs/xfs/xfs_bmap_btree.h
@@ -120,7 +120,7 @@ typedef enum {
120 * Extent state and extent format macros. 120 * Extent state and extent format macros.
121 */ 121 */
122#define XFS_EXTFMT_INODE(x) \ 122#define XFS_EXTFMT_INODE(x) \
123 (XFS_SB_VERSION_HASEXTFLGBIT(&((x)->i_mount->m_sb)) ? \ 123 (xfs_sb_version_hasextflgbit(&((x)->i_mount->m_sb)) ? \
124 XFS_EXTFMT_HASSTATE : XFS_EXTFMT_NOSTATE) 124 XFS_EXTFMT_HASSTATE : XFS_EXTFMT_NOSTATE)
125#define ISUNWRITTEN(x) ((x)->br_state == XFS_EXT_UNWRITTEN) 125#define ISUNWRITTEN(x) ((x)->br_state == XFS_EXT_UNWRITTEN)
126 126
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 63debd147eb5..53a71c62025d 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -645,7 +645,12 @@ xfs_buf_item_push(
645 bp = bip->bli_buf; 645 bp = bip->bli_buf;
646 646
647 if (XFS_BUF_ISDELAYWRITE(bp)) { 647 if (XFS_BUF_ISDELAYWRITE(bp)) {
648 xfs_bawrite(bip->bli_item.li_mountp, bp); 648 int error;
649 error = xfs_bawrite(bip->bli_item.li_mountp, bp);
650 if (error)
651 xfs_fs_cmn_err(CE_WARN, bip->bli_item.li_mountp,
652 "xfs_buf_item_push: pushbuf error %d on bip %p, bp %p",
653 error, bip, bp);
649 } else { 654 } else {
650 xfs_buf_relse(bp); 655 xfs_buf_relse(bp);
651 } 656 }
diff --git a/fs/xfs/xfs_clnt.h b/fs/xfs/xfs_clnt.h
index d16c1b971074..d5d1e60ee224 100644
--- a/fs/xfs/xfs_clnt.h
+++ b/fs/xfs/xfs_clnt.h
@@ -86,7 +86,7 @@ struct xfs_mount_args {
86#define XFSMNT_NOUUID 0x01000000 /* Ignore fs uuid */ 86#define XFSMNT_NOUUID 0x01000000 /* Ignore fs uuid */
87#define XFSMNT_DMAPI 0x02000000 /* enable dmapi/xdsm */ 87#define XFSMNT_DMAPI 0x02000000 /* enable dmapi/xdsm */
88#define XFSMNT_BARRIER 0x04000000 /* use write barriers */ 88#define XFSMNT_BARRIER 0x04000000 /* use write barriers */
89#define XFSMNT_IDELETE 0x08000000 /* inode cluster delete */ 89#define XFSMNT_IKEEP 0x08000000 /* inode cluster delete */
90#define XFSMNT_SWALLOC 0x10000000 /* turn on stripe width 90#define XFSMNT_SWALLOC 0x10000000 /* turn on stripe width
91 * allocation */ 91 * allocation */
92#define XFSMNT_DIRSYNC 0x40000000 /* sync creat,link,unlink,rename 92#define XFSMNT_DIRSYNC 0x40000000 /* sync creat,link,unlink,rename
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index be7c4251fa61..7cb26529766b 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -44,12 +44,13 @@
44#include "xfs_error.h" 44#include "xfs_error.h"
45#include "xfs_vnodeops.h" 45#include "xfs_vnodeops.h"
46 46
47struct xfs_name xfs_name_dotdot = {"..", 2};
47 48
48void 49void
49xfs_dir_mount( 50xfs_dir_mount(
50 xfs_mount_t *mp) 51 xfs_mount_t *mp)
51{ 52{
52 ASSERT(XFS_SB_VERSION_HASDIRV2(&mp->m_sb)); 53 ASSERT(xfs_sb_version_hasdirv2(&mp->m_sb));
53 ASSERT((1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) <= 54 ASSERT((1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) <=
54 XFS_MAX_BLOCKSIZE); 55 XFS_MAX_BLOCKSIZE);
55 mp->m_dirblksize = 1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog); 56 mp->m_dirblksize = 1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog);
@@ -146,8 +147,7 @@ int
146xfs_dir_createname( 147xfs_dir_createname(
147 xfs_trans_t *tp, 148 xfs_trans_t *tp,
148 xfs_inode_t *dp, 149 xfs_inode_t *dp,
149 char *name, 150 struct xfs_name *name,
150 int namelen,
151 xfs_ino_t inum, /* new entry inode number */ 151 xfs_ino_t inum, /* new entry inode number */
152 xfs_fsblock_t *first, /* bmap's firstblock */ 152 xfs_fsblock_t *first, /* bmap's firstblock */
153 xfs_bmap_free_t *flist, /* bmap's freeblock list */ 153 xfs_bmap_free_t *flist, /* bmap's freeblock list */
@@ -162,9 +162,9 @@ xfs_dir_createname(
162 return rval; 162 return rval;
163 XFS_STATS_INC(xs_dir_create); 163 XFS_STATS_INC(xs_dir_create);
164 164
165 args.name = name; 165 args.name = name->name;
166 args.namelen = namelen; 166 args.namelen = name->len;
167 args.hashval = xfs_da_hashname(name, namelen); 167 args.hashval = xfs_da_hashname(name->name, name->len);
168 args.inumber = inum; 168 args.inumber = inum;
169 args.dp = dp; 169 args.dp = dp;
170 args.firstblock = first; 170 args.firstblock = first;
@@ -197,8 +197,7 @@ int
197xfs_dir_lookup( 197xfs_dir_lookup(
198 xfs_trans_t *tp, 198 xfs_trans_t *tp,
199 xfs_inode_t *dp, 199 xfs_inode_t *dp,
200 char *name, 200 struct xfs_name *name,
201 int namelen,
202 xfs_ino_t *inum) /* out: inode number */ 201 xfs_ino_t *inum) /* out: inode number */
203{ 202{
204 xfs_da_args_t args; 203 xfs_da_args_t args;
@@ -207,18 +206,14 @@ xfs_dir_lookup(
207 206
208 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); 207 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
209 XFS_STATS_INC(xs_dir_lookup); 208 XFS_STATS_INC(xs_dir_lookup);
209 memset(&args, 0, sizeof(xfs_da_args_t));
210 210
211 args.name = name; 211 args.name = name->name;
212 args.namelen = namelen; 212 args.namelen = name->len;
213 args.hashval = xfs_da_hashname(name, namelen); 213 args.hashval = xfs_da_hashname(name->name, name->len);
214 args.inumber = 0;
215 args.dp = dp; 214 args.dp = dp;
216 args.firstblock = NULL;
217 args.flist = NULL;
218 args.total = 0;
219 args.whichfork = XFS_DATA_FORK; 215 args.whichfork = XFS_DATA_FORK;
220 args.trans = tp; 216 args.trans = tp;
221 args.justcheck = args.addname = 0;
222 args.oknoent = 1; 217 args.oknoent = 1;
223 218
224 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) 219 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
@@ -247,8 +242,7 @@ int
247xfs_dir_removename( 242xfs_dir_removename(
248 xfs_trans_t *tp, 243 xfs_trans_t *tp,
249 xfs_inode_t *dp, 244 xfs_inode_t *dp,
250 char *name, 245 struct xfs_name *name,
251 int namelen,
252 xfs_ino_t ino, 246 xfs_ino_t ino,
253 xfs_fsblock_t *first, /* bmap's firstblock */ 247 xfs_fsblock_t *first, /* bmap's firstblock */
254 xfs_bmap_free_t *flist, /* bmap's freeblock list */ 248 xfs_bmap_free_t *flist, /* bmap's freeblock list */
@@ -261,9 +255,9 @@ xfs_dir_removename(
261 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); 255 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
262 XFS_STATS_INC(xs_dir_remove); 256 XFS_STATS_INC(xs_dir_remove);
263 257
264 args.name = name; 258 args.name = name->name;
265 args.namelen = namelen; 259 args.namelen = name->len;
266 args.hashval = xfs_da_hashname(name, namelen); 260 args.hashval = xfs_da_hashname(name->name, name->len);
267 args.inumber = ino; 261 args.inumber = ino;
268 args.dp = dp; 262 args.dp = dp;
269 args.firstblock = first; 263 args.firstblock = first;
@@ -329,8 +323,7 @@ int
329xfs_dir_replace( 323xfs_dir_replace(
330 xfs_trans_t *tp, 324 xfs_trans_t *tp,
331 xfs_inode_t *dp, 325 xfs_inode_t *dp,
332 char *name, /* name of entry to replace */ 326 struct xfs_name *name, /* name of entry to replace */
333 int namelen,
334 xfs_ino_t inum, /* new inode number */ 327 xfs_ino_t inum, /* new inode number */
335 xfs_fsblock_t *first, /* bmap's firstblock */ 328 xfs_fsblock_t *first, /* bmap's firstblock */
336 xfs_bmap_free_t *flist, /* bmap's freeblock list */ 329 xfs_bmap_free_t *flist, /* bmap's freeblock list */
@@ -345,9 +338,9 @@ xfs_dir_replace(
345 if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum))) 338 if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum)))
346 return rval; 339 return rval;
347 340
348 args.name = name; 341 args.name = name->name;
349 args.namelen = namelen; 342 args.namelen = name->len;
350 args.hashval = xfs_da_hashname(name, namelen); 343 args.hashval = xfs_da_hashname(name->name, name->len);
351 args.inumber = inum; 344 args.inumber = inum;
352 args.dp = dp; 345 args.dp = dp;
353 args.firstblock = first; 346 args.firstblock = first;
@@ -374,28 +367,29 @@ xfs_dir_replace(
374 367
375/* 368/*
376 * See if this entry can be added to the directory without allocating space. 369 * See if this entry can be added to the directory without allocating space.
370 * First checks that the caller couldn't reserve enough space (resblks = 0).
377 */ 371 */
378int 372int
379xfs_dir_canenter( 373xfs_dir_canenter(
380 xfs_trans_t *tp, 374 xfs_trans_t *tp,
381 xfs_inode_t *dp, 375 xfs_inode_t *dp,
382 char *name, /* name of entry to add */ 376 struct xfs_name *name, /* name of entry to add */
383 int namelen) 377 uint resblks)
384{ 378{
385 xfs_da_args_t args; 379 xfs_da_args_t args;
386 int rval; 380 int rval;
387 int v; /* type-checking value */ 381 int v; /* type-checking value */
388 382
383 if (resblks)
384 return 0;
385
389 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); 386 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
387 memset(&args, 0, sizeof(xfs_da_args_t));
390 388
391 args.name = name; 389 args.name = name->name;
392 args.namelen = namelen; 390 args.namelen = name->len;
393 args.hashval = xfs_da_hashname(name, namelen); 391 args.hashval = xfs_da_hashname(name->name, name->len);
394 args.inumber = 0;
395 args.dp = dp; 392 args.dp = dp;
396 args.firstblock = NULL;
397 args.flist = NULL;
398 args.total = 0;
399 args.whichfork = XFS_DATA_FORK; 393 args.whichfork = XFS_DATA_FORK;
400 args.trans = tp; 394 args.trans = tp;
401 args.justcheck = args.addname = args.oknoent = 1; 395 args.justcheck = args.addname = args.oknoent = 1;
diff --git a/fs/xfs/xfs_dir2.h b/fs/xfs/xfs_dir2.h
index b265197e74cf..6392f939029f 100644
--- a/fs/xfs/xfs_dir2.h
+++ b/fs/xfs/xfs_dir2.h
@@ -59,6 +59,8 @@ typedef __uint32_t xfs_dir2_db_t;
59 */ 59 */
60typedef xfs_off_t xfs_dir2_off_t; 60typedef xfs_off_t xfs_dir2_off_t;
61 61
62extern struct xfs_name xfs_name_dotdot;
63
62/* 64/*
63 * Generic directory interface routines 65 * Generic directory interface routines
64 */ 66 */
@@ -68,21 +70,21 @@ extern int xfs_dir_isempty(struct xfs_inode *dp);
68extern int xfs_dir_init(struct xfs_trans *tp, struct xfs_inode *dp, 70extern int xfs_dir_init(struct xfs_trans *tp, struct xfs_inode *dp,
69 struct xfs_inode *pdp); 71 struct xfs_inode *pdp);
70extern int xfs_dir_createname(struct xfs_trans *tp, struct xfs_inode *dp, 72extern int xfs_dir_createname(struct xfs_trans *tp, struct xfs_inode *dp,
71 char *name, int namelen, xfs_ino_t inum, 73 struct xfs_name *name, xfs_ino_t inum,
72 xfs_fsblock_t *first, 74 xfs_fsblock_t *first,
73 struct xfs_bmap_free *flist, xfs_extlen_t tot); 75 struct xfs_bmap_free *flist, xfs_extlen_t tot);
74extern int xfs_dir_lookup(struct xfs_trans *tp, struct xfs_inode *dp, 76extern int xfs_dir_lookup(struct xfs_trans *tp, struct xfs_inode *dp,
75 char *name, int namelen, xfs_ino_t *inum); 77 struct xfs_name *name, xfs_ino_t *inum);
76extern int xfs_dir_removename(struct xfs_trans *tp, struct xfs_inode *dp, 78extern int xfs_dir_removename(struct xfs_trans *tp, struct xfs_inode *dp,
77 char *name, int namelen, xfs_ino_t ino, 79 struct xfs_name *name, xfs_ino_t ino,
78 xfs_fsblock_t *first, 80 xfs_fsblock_t *first,
79 struct xfs_bmap_free *flist, xfs_extlen_t tot); 81 struct xfs_bmap_free *flist, xfs_extlen_t tot);
80extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp, 82extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp,
81 char *name, int namelen, xfs_ino_t inum, 83 struct xfs_name *name, xfs_ino_t inum,
82 xfs_fsblock_t *first, 84 xfs_fsblock_t *first,
83 struct xfs_bmap_free *flist, xfs_extlen_t tot); 85 struct xfs_bmap_free *flist, xfs_extlen_t tot);
84extern int xfs_dir_canenter(struct xfs_trans *tp, struct xfs_inode *dp, 86extern int xfs_dir_canenter(struct xfs_trans *tp, struct xfs_inode *dp,
85 char *name, int namelen); 87 struct xfs_name *name, uint resblks);
86extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino); 88extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino);
87 89
88/* 90/*
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index eb03eab5ca52..3f3785b10804 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -73,7 +73,7 @@ xfs_filestreams_trace(
73#define TRACE4(mp,t,a0,a1,a2,a3) TRACE6(mp,t,a0,a1,a2,a3,0,0) 73#define TRACE4(mp,t,a0,a1,a2,a3) TRACE6(mp,t,a0,a1,a2,a3,0,0)
74#define TRACE5(mp,t,a0,a1,a2,a3,a4) TRACE6(mp,t,a0,a1,a2,a3,a4,0) 74#define TRACE5(mp,t,a0,a1,a2,a3,a4) TRACE6(mp,t,a0,a1,a2,a3,a4,0)
75#define TRACE6(mp,t,a0,a1,a2,a3,a4,a5) \ 75#define TRACE6(mp,t,a0,a1,a2,a3,a4,a5) \
76 xfs_filestreams_trace(mp, t, __FUNCTION__, __LINE__, \ 76 xfs_filestreams_trace(mp, t, __func__, __LINE__, \
77 (__psunsigned_t)a0, (__psunsigned_t)a1, \ 77 (__psunsigned_t)a0, (__psunsigned_t)a1, \
78 (__psunsigned_t)a2, (__psunsigned_t)a3, \ 78 (__psunsigned_t)a2, (__psunsigned_t)a3, \
79 (__psunsigned_t)a4, (__psunsigned_t)a5) 79 (__psunsigned_t)a4, (__psunsigned_t)a5)
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index eadc1591c795..d3a0f538d6a6 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -77,36 +77,36 @@ xfs_fs_geometry(
77 if (new_version >= 3) { 77 if (new_version >= 3) {
78 geo->version = XFS_FSOP_GEOM_VERSION; 78 geo->version = XFS_FSOP_GEOM_VERSION;
79 geo->flags = 79 geo->flags =
80 (XFS_SB_VERSION_HASATTR(&mp->m_sb) ? 80 (xfs_sb_version_hasattr(&mp->m_sb) ?
81 XFS_FSOP_GEOM_FLAGS_ATTR : 0) | 81 XFS_FSOP_GEOM_FLAGS_ATTR : 0) |
82 (XFS_SB_VERSION_HASNLINK(&mp->m_sb) ? 82 (xfs_sb_version_hasnlink(&mp->m_sb) ?
83 XFS_FSOP_GEOM_FLAGS_NLINK : 0) | 83 XFS_FSOP_GEOM_FLAGS_NLINK : 0) |
84 (XFS_SB_VERSION_HASQUOTA(&mp->m_sb) ? 84 (xfs_sb_version_hasquota(&mp->m_sb) ?
85 XFS_FSOP_GEOM_FLAGS_QUOTA : 0) | 85 XFS_FSOP_GEOM_FLAGS_QUOTA : 0) |
86 (XFS_SB_VERSION_HASALIGN(&mp->m_sb) ? 86 (xfs_sb_version_hasalign(&mp->m_sb) ?
87 XFS_FSOP_GEOM_FLAGS_IALIGN : 0) | 87 XFS_FSOP_GEOM_FLAGS_IALIGN : 0) |
88 (XFS_SB_VERSION_HASDALIGN(&mp->m_sb) ? 88 (xfs_sb_version_hasdalign(&mp->m_sb) ?
89 XFS_FSOP_GEOM_FLAGS_DALIGN : 0) | 89 XFS_FSOP_GEOM_FLAGS_DALIGN : 0) |
90 (XFS_SB_VERSION_HASSHARED(&mp->m_sb) ? 90 (xfs_sb_version_hasshared(&mp->m_sb) ?
91 XFS_FSOP_GEOM_FLAGS_SHARED : 0) | 91 XFS_FSOP_GEOM_FLAGS_SHARED : 0) |
92 (XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb) ? 92 (xfs_sb_version_hasextflgbit(&mp->m_sb) ?
93 XFS_FSOP_GEOM_FLAGS_EXTFLG : 0) | 93 XFS_FSOP_GEOM_FLAGS_EXTFLG : 0) |
94 (XFS_SB_VERSION_HASDIRV2(&mp->m_sb) ? 94 (xfs_sb_version_hasdirv2(&mp->m_sb) ?
95 XFS_FSOP_GEOM_FLAGS_DIRV2 : 0) | 95 XFS_FSOP_GEOM_FLAGS_DIRV2 : 0) |
96 (XFS_SB_VERSION_HASSECTOR(&mp->m_sb) ? 96 (xfs_sb_version_hassector(&mp->m_sb) ?
97 XFS_FSOP_GEOM_FLAGS_SECTOR : 0) | 97 XFS_FSOP_GEOM_FLAGS_SECTOR : 0) |
98 (xfs_sb_version_haslazysbcount(&mp->m_sb) ? 98 (xfs_sb_version_haslazysbcount(&mp->m_sb) ?
99 XFS_FSOP_GEOM_FLAGS_LAZYSB : 0) | 99 XFS_FSOP_GEOM_FLAGS_LAZYSB : 0) |
100 (XFS_SB_VERSION_HASATTR2(&mp->m_sb) ? 100 (xfs_sb_version_hasattr2(&mp->m_sb) ?
101 XFS_FSOP_GEOM_FLAGS_ATTR2 : 0); 101 XFS_FSOP_GEOM_FLAGS_ATTR2 : 0);
102 geo->logsectsize = XFS_SB_VERSION_HASSECTOR(&mp->m_sb) ? 102 geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ?
103 mp->m_sb.sb_logsectsize : BBSIZE; 103 mp->m_sb.sb_logsectsize : BBSIZE;
104 geo->rtsectsize = mp->m_sb.sb_blocksize; 104 geo->rtsectsize = mp->m_sb.sb_blocksize;
105 geo->dirblocksize = mp->m_dirblksize; 105 geo->dirblocksize = mp->m_dirblksize;
106 } 106 }
107 if (new_version >= 4) { 107 if (new_version >= 4) {
108 geo->flags |= 108 geo->flags |=
109 (XFS_SB_VERSION_HASLOGV2(&mp->m_sb) ? 109 (xfs_sb_version_haslogv2(&mp->m_sb) ?
110 XFS_FSOP_GEOM_FLAGS_LOGV2 : 0); 110 XFS_FSOP_GEOM_FLAGS_LOGV2 : 0);
111 geo->logsunit = mp->m_sb.sb_logsunit; 111 geo->logsunit = mp->m_sb.sb_logsunit;
112 } 112 }
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index c5836b951d0c..a64dfbd565a5 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -107,6 +107,16 @@ xfs_ialloc_log_di(
107/* 107/*
108 * Allocation group level functions. 108 * Allocation group level functions.
109 */ 109 */
110static inline int
111xfs_ialloc_cluster_alignment(
112 xfs_alloc_arg_t *args)
113{
114 if (xfs_sb_version_hasalign(&args->mp->m_sb) &&
115 args->mp->m_sb.sb_inoalignmt >=
116 XFS_B_TO_FSBT(args->mp, XFS_INODE_CLUSTER_SIZE(args->mp)))
117 return args->mp->m_sb.sb_inoalignmt;
118 return 1;
119}
110 120
111/* 121/*
112 * Allocate new inodes in the allocation group specified by agbp. 122 * Allocate new inodes in the allocation group specified by agbp.
@@ -167,10 +177,24 @@ xfs_ialloc_ag_alloc(
167 args.mod = args.total = args.wasdel = args.isfl = 177 args.mod = args.total = args.wasdel = args.isfl =
168 args.userdata = args.minalignslop = 0; 178 args.userdata = args.minalignslop = 0;
169 args.prod = 1; 179 args.prod = 1;
170 args.alignment = 1; 180
171 /* 181 /*
172 * Allow space for the inode btree to split. 182 * We need to take into account alignment here to ensure that
183 * we don't modify the free list if we fail to have an exact
184 * block. If we don't have an exact match, and every oher
185 * attempt allocation attempt fails, we'll end up cancelling
186 * a dirty transaction and shutting down.
187 *
188 * For an exact allocation, alignment must be 1,
189 * however we need to take cluster alignment into account when
190 * fixing up the freelist. Use the minalignslop field to
191 * indicate that extra blocks might be required for alignment,
192 * but not to use them in the actual exact allocation.
173 */ 193 */
194 args.alignment = 1;
195 args.minalignslop = xfs_ialloc_cluster_alignment(&args) - 1;
196
197 /* Allow space for the inode btree to split. */
174 args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1; 198 args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1;
175 if ((error = xfs_alloc_vextent(&args))) 199 if ((error = xfs_alloc_vextent(&args)))
176 return error; 200 return error;
@@ -191,13 +215,8 @@ xfs_ialloc_ag_alloc(
191 ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN)); 215 ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN));
192 args.alignment = args.mp->m_dalign; 216 args.alignment = args.mp->m_dalign;
193 isaligned = 1; 217 isaligned = 1;
194 } else if (XFS_SB_VERSION_HASALIGN(&args.mp->m_sb) && 218 } else
195 args.mp->m_sb.sb_inoalignmt >= 219 args.alignment = xfs_ialloc_cluster_alignment(&args);
196 XFS_B_TO_FSBT(args.mp,
197 XFS_INODE_CLUSTER_SIZE(args.mp)))
198 args.alignment = args.mp->m_sb.sb_inoalignmt;
199 else
200 args.alignment = 1;
201 /* 220 /*
202 * Need to figure out where to allocate the inode blocks. 221 * Need to figure out where to allocate the inode blocks.
203 * Ideally they should be spaced out through the a.g. 222 * Ideally they should be spaced out through the a.g.
@@ -230,12 +249,7 @@ xfs_ialloc_ag_alloc(
230 args.agbno = be32_to_cpu(agi->agi_root); 249 args.agbno = be32_to_cpu(agi->agi_root);
231 args.fsbno = XFS_AGB_TO_FSB(args.mp, 250 args.fsbno = XFS_AGB_TO_FSB(args.mp,
232 be32_to_cpu(agi->agi_seqno), args.agbno); 251 be32_to_cpu(agi->agi_seqno), args.agbno);
233 if (XFS_SB_VERSION_HASALIGN(&args.mp->m_sb) && 252 args.alignment = xfs_ialloc_cluster_alignment(&args);
234 args.mp->m_sb.sb_inoalignmt >=
235 XFS_B_TO_FSBT(args.mp, XFS_INODE_CLUSTER_SIZE(args.mp)))
236 args.alignment = args.mp->m_sb.sb_inoalignmt;
237 else
238 args.alignment = 1;
239 if ((error = xfs_alloc_vextent(&args))) 253 if ((error = xfs_alloc_vextent(&args)))
240 return error; 254 return error;
241 } 255 }
@@ -271,7 +285,7 @@ xfs_ialloc_ag_alloc(
271 * use the old version so that old kernels will continue to be 285 * use the old version so that old kernels will continue to be
272 * able to use the file system. 286 * able to use the file system.
273 */ 287 */
274 if (XFS_SB_VERSION_HASNLINK(&args.mp->m_sb)) 288 if (xfs_sb_version_hasnlink(&args.mp->m_sb))
275 version = XFS_DINODE_VERSION_2; 289 version = XFS_DINODE_VERSION_2;
276 else 290 else
277 version = XFS_DINODE_VERSION_1; 291 version = XFS_DINODE_VERSION_1;
@@ -1053,7 +1067,7 @@ xfs_difree(
1053 /* 1067 /*
1054 * When an inode cluster is free, it becomes eligible for removal 1068 * When an inode cluster is free, it becomes eligible for removal
1055 */ 1069 */
1056 if ((mp->m_flags & XFS_MOUNT_IDELETE) && 1070 if (!(mp->m_flags & XFS_MOUNT_IKEEP) &&
1057 (rec.ir_freecount == XFS_IALLOC_INODES(mp))) { 1071 (rec.ir_freecount == XFS_IALLOC_INODES(mp))) {
1058 1072
1059 *delete = 1; 1073 *delete = 1;
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index f01b07687faf..e657c5128460 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -78,7 +78,6 @@ xfs_iget_core(
78 xfs_inode_t *ip; 78 xfs_inode_t *ip;
79 xfs_inode_t *iq; 79 xfs_inode_t *iq;
80 int error; 80 int error;
81 xfs_icluster_t *icl, *new_icl = NULL;
82 unsigned long first_index, mask; 81 unsigned long first_index, mask;
83 xfs_perag_t *pag; 82 xfs_perag_t *pag;
84 xfs_agino_t agino; 83 xfs_agino_t agino;
@@ -229,29 +228,17 @@ finish_inode:
229 } 228 }
230 229
231 /* 230 /*
232 * This is a bit messy - we preallocate everything we _might_ 231 * Preload the radix tree so we can insert safely under the
233 * need before we pick up the ici lock. That way we don't have to 232 * write spinlock.
234 * juggle locks and go all the way back to the start.
235 */ 233 */
236 new_icl = kmem_zone_alloc(xfs_icluster_zone, KM_SLEEP);
237 if (radix_tree_preload(GFP_KERNEL)) { 234 if (radix_tree_preload(GFP_KERNEL)) {
235 xfs_idestroy(ip);
238 delay(1); 236 delay(1);
239 goto again; 237 goto again;
240 } 238 }
241 mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); 239 mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1);
242 first_index = agino & mask; 240 first_index = agino & mask;
243 write_lock(&pag->pag_ici_lock); 241 write_lock(&pag->pag_ici_lock);
244
245 /*
246 * Find the cluster if it exists
247 */
248 icl = NULL;
249 if (radix_tree_gang_lookup(&pag->pag_ici_root, (void**)&iq,
250 first_index, 1)) {
251 if ((XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) == first_index)
252 icl = iq->i_cluster;
253 }
254
255 /* 242 /*
256 * insert the new inode 243 * insert the new inode
257 */ 244 */
@@ -266,30 +253,13 @@ finish_inode:
266 } 253 }
267 254
268 /* 255 /*
269 * These values _must_ be set before releasing ihlock! 256 * These values _must_ be set before releasing the radix tree lock!
270 */ 257 */
271 ip->i_udquot = ip->i_gdquot = NULL; 258 ip->i_udquot = ip->i_gdquot = NULL;
272 xfs_iflags_set(ip, XFS_INEW); 259 xfs_iflags_set(ip, XFS_INEW);
273 260
274 ASSERT(ip->i_cluster == NULL);
275
276 if (!icl) {
277 spin_lock_init(&new_icl->icl_lock);
278 INIT_HLIST_HEAD(&new_icl->icl_inodes);
279 icl = new_icl;
280 new_icl = NULL;
281 } else {
282 ASSERT(!hlist_empty(&icl->icl_inodes));
283 }
284 spin_lock(&icl->icl_lock);
285 hlist_add_head(&ip->i_cnode, &icl->icl_inodes);
286 ip->i_cluster = icl;
287 spin_unlock(&icl->icl_lock);
288
289 write_unlock(&pag->pag_ici_lock); 261 write_unlock(&pag->pag_ici_lock);
290 radix_tree_preload_end(); 262 radix_tree_preload_end();
291 if (new_icl)
292 kmem_zone_free(xfs_icluster_zone, new_icl);
293 263
294 /* 264 /*
295 * Link ip to its mount and thread it on the mount's inode list. 265 * Link ip to its mount and thread it on the mount's inode list.
@@ -528,18 +498,6 @@ xfs_iextract(
528 xfs_put_perag(mp, pag); 498 xfs_put_perag(mp, pag);
529 499
530 /* 500 /*
531 * Remove from cluster list
532 */
533 mp = ip->i_mount;
534 spin_lock(&ip->i_cluster->icl_lock);
535 hlist_del(&ip->i_cnode);
536 spin_unlock(&ip->i_cluster->icl_lock);
537
538 /* was last inode in cluster? */
539 if (hlist_empty(&ip->i_cluster->icl_inodes))
540 kmem_zone_free(xfs_icluster_zone, ip->i_cluster);
541
542 /*
543 * Remove from mount's inode list. 501 * Remove from mount's inode list.
544 */ 502 */
545 XFS_MOUNT_ILOCK(mp); 503 XFS_MOUNT_ILOCK(mp);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index a550546a7083..ca12acb90394 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -55,7 +55,6 @@
55 55
56kmem_zone_t *xfs_ifork_zone; 56kmem_zone_t *xfs_ifork_zone;
57kmem_zone_t *xfs_inode_zone; 57kmem_zone_t *xfs_inode_zone;
58kmem_zone_t *xfs_icluster_zone;
59 58
60/* 59/*
61 * Used in xfs_itruncate(). This is the maximum number of extents 60 * Used in xfs_itruncate(). This is the maximum number of extents
@@ -126,6 +125,90 @@ xfs_inobp_check(
126#endif 125#endif
127 126
128/* 127/*
128 * Find the buffer associated with the given inode map
129 * We do basic validation checks on the buffer once it has been
130 * retrieved from disk.
131 */
132STATIC int
133xfs_imap_to_bp(
134 xfs_mount_t *mp,
135 xfs_trans_t *tp,
136 xfs_imap_t *imap,
137 xfs_buf_t **bpp,
138 uint buf_flags,
139 uint imap_flags)
140{
141 int error;
142 int i;
143 int ni;
144 xfs_buf_t *bp;
145
146 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno,
147 (int)imap->im_len, buf_flags, &bp);
148 if (error) {
149 if (error != EAGAIN) {
150 cmn_err(CE_WARN,
151 "xfs_imap_to_bp: xfs_trans_read_buf()returned "
152 "an error %d on %s. Returning error.",
153 error, mp->m_fsname);
154 } else {
155 ASSERT(buf_flags & XFS_BUF_TRYLOCK);
156 }
157 return error;
158 }
159
160 /*
161 * Validate the magic number and version of every inode in the buffer
162 * (if DEBUG kernel) or the first inode in the buffer, otherwise.
163 */
164#ifdef DEBUG
165 ni = BBTOB(imap->im_len) >> mp->m_sb.sb_inodelog;
166#else /* usual case */
167 ni = 1;
168#endif
169
170 for (i = 0; i < ni; i++) {
171 int di_ok;
172 xfs_dinode_t *dip;
173
174 dip = (xfs_dinode_t *)xfs_buf_offset(bp,
175 (i << mp->m_sb.sb_inodelog));
176 di_ok = be16_to_cpu(dip->di_core.di_magic) == XFS_DINODE_MAGIC &&
177 XFS_DINODE_GOOD_VERSION(dip->di_core.di_version);
178 if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
179 XFS_ERRTAG_ITOBP_INOTOBP,
180 XFS_RANDOM_ITOBP_INOTOBP))) {
181 if (imap_flags & XFS_IMAP_BULKSTAT) {
182 xfs_trans_brelse(tp, bp);
183 return XFS_ERROR(EINVAL);
184 }
185 XFS_CORRUPTION_ERROR("xfs_imap_to_bp",
186 XFS_ERRLEVEL_HIGH, mp, dip);
187#ifdef DEBUG
188 cmn_err(CE_PANIC,
189 "Device %s - bad inode magic/vsn "
190 "daddr %lld #%d (magic=%x)",
191 XFS_BUFTARG_NAME(mp->m_ddev_targp),
192 (unsigned long long)imap->im_blkno, i,
193 be16_to_cpu(dip->di_core.di_magic));
194#endif
195 xfs_trans_brelse(tp, bp);
196 return XFS_ERROR(EFSCORRUPTED);
197 }
198 }
199
200 xfs_inobp_check(mp, bp);
201
202 /*
203 * Mark the buffer as an inode buffer now that it looks good
204 */
205 XFS_BUF_SET_VTYPE(bp, B_FS_INO);
206
207 *bpp = bp;
208 return 0;
209}
210
211/*
129 * This routine is called to map an inode number within a file 212 * This routine is called to map an inode number within a file
130 * system to the buffer containing the on-disk version of the 213 * system to the buffer containing the on-disk version of the
131 * inode. It returns a pointer to the buffer containing the 214 * inode. It returns a pointer to the buffer containing the
@@ -147,72 +230,19 @@ xfs_inotobp(
147 xfs_buf_t **bpp, 230 xfs_buf_t **bpp,
148 int *offset) 231 int *offset)
149{ 232{
150 int di_ok;
151 xfs_imap_t imap; 233 xfs_imap_t imap;
152 xfs_buf_t *bp; 234 xfs_buf_t *bp;
153 int error; 235 int error;
154 xfs_dinode_t *dip;
155 236
156 /*
157 * Call the space management code to find the location of the
158 * inode on disk.
159 */
160 imap.im_blkno = 0; 237 imap.im_blkno = 0;
161 error = xfs_imap(mp, tp, ino, &imap, XFS_IMAP_LOOKUP); 238 error = xfs_imap(mp, tp, ino, &imap, XFS_IMAP_LOOKUP);
162 if (error != 0) { 239 if (error)
163 cmn_err(CE_WARN,
164 "xfs_inotobp: xfs_imap() returned an "
165 "error %d on %s. Returning error.", error, mp->m_fsname);
166 return error; 240 return error;
167 }
168 241
169 /* 242 error = xfs_imap_to_bp(mp, tp, &imap, &bp, XFS_BUF_LOCK, 0);
170 * If the inode number maps to a block outside the bounds of the 243 if (error)
171 * file system then return NULL rather than calling read_buf
172 * and panicing when we get an error from the driver.
173 */
174 if ((imap.im_blkno + imap.im_len) >
175 XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) {
176 cmn_err(CE_WARN,
177 "xfs_inotobp: inode number (%llu + %d) maps to a block outside the bounds "
178 "of the file system %s. Returning EINVAL.",
179 (unsigned long long)imap.im_blkno,
180 imap.im_len, mp->m_fsname);
181 return XFS_ERROR(EINVAL);
182 }
183
184 /*
185 * Read in the buffer. If tp is NULL, xfs_trans_read_buf() will
186 * default to just a read_buf() call.
187 */
188 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap.im_blkno,
189 (int)imap.im_len, XFS_BUF_LOCK, &bp);
190
191 if (error) {
192 cmn_err(CE_WARN,
193 "xfs_inotobp: xfs_trans_read_buf() returned an "
194 "error %d on %s. Returning error.", error, mp->m_fsname);
195 return error; 244 return error;
196 }
197 dip = (xfs_dinode_t *)xfs_buf_offset(bp, 0);
198 di_ok =
199 be16_to_cpu(dip->di_core.di_magic) == XFS_DINODE_MAGIC &&
200 XFS_DINODE_GOOD_VERSION(dip->di_core.di_version);
201 if (unlikely(XFS_TEST_ERROR(!di_ok, mp, XFS_ERRTAG_ITOBP_INOTOBP,
202 XFS_RANDOM_ITOBP_INOTOBP))) {
203 XFS_CORRUPTION_ERROR("xfs_inotobp", XFS_ERRLEVEL_LOW, mp, dip);
204 xfs_trans_brelse(tp, bp);
205 cmn_err(CE_WARN,
206 "xfs_inotobp: XFS_TEST_ERROR() returned an "
207 "error on %s. Returning EFSCORRUPTED.", mp->m_fsname);
208 return XFS_ERROR(EFSCORRUPTED);
209 }
210 245
211 xfs_inobp_check(mp, bp);
212
213 /*
214 * Set *dipp to point to the on-disk inode in the buffer.
215 */
216 *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); 246 *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset);
217 *bpp = bp; 247 *bpp = bp;
218 *offset = imap.im_boffset; 248 *offset = imap.im_boffset;
@@ -248,46 +278,21 @@ xfs_itobp(
248 xfs_dinode_t **dipp, 278 xfs_dinode_t **dipp,
249 xfs_buf_t **bpp, 279 xfs_buf_t **bpp,
250 xfs_daddr_t bno, 280 xfs_daddr_t bno,
251 uint imap_flags) 281 uint imap_flags,
282 uint buf_flags)
252{ 283{
253 xfs_imap_t imap; 284 xfs_imap_t imap;
254 xfs_buf_t *bp; 285 xfs_buf_t *bp;
255 int error; 286 int error;
256 int i;
257 int ni;
258 287
259 if (ip->i_blkno == (xfs_daddr_t)0) { 288 if (ip->i_blkno == (xfs_daddr_t)0) {
260 /*
261 * Call the space management code to find the location of the
262 * inode on disk.
263 */
264 imap.im_blkno = bno; 289 imap.im_blkno = bno;
265 if ((error = xfs_imap(mp, tp, ip->i_ino, &imap, 290 error = xfs_imap(mp, tp, ip->i_ino, &imap,
266 XFS_IMAP_LOOKUP | imap_flags))) 291 XFS_IMAP_LOOKUP | imap_flags);
292 if (error)
267 return error; 293 return error;
268 294
269 /* 295 /*
270 * If the inode number maps to a block outside the bounds
271 * of the file system then return NULL rather than calling
272 * read_buf and panicing when we get an error from the
273 * driver.
274 */
275 if ((imap.im_blkno + imap.im_len) >
276 XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) {
277#ifdef DEBUG
278 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_itobp: "
279 "(imap.im_blkno (0x%llx) "
280 "+ imap.im_len (0x%llx)) > "
281 " XFS_FSB_TO_BB(mp, "
282 "mp->m_sb.sb_dblocks) (0x%llx)",
283 (unsigned long long) imap.im_blkno,
284 (unsigned long long) imap.im_len,
285 XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks));
286#endif /* DEBUG */
287 return XFS_ERROR(EINVAL);
288 }
289
290 /*
291 * Fill in the fields in the inode that will be used to 296 * Fill in the fields in the inode that will be used to
292 * map the inode to its buffer from now on. 297 * map the inode to its buffer from now on.
293 */ 298 */
@@ -305,76 +310,17 @@ xfs_itobp(
305 } 310 }
306 ASSERT(bno == 0 || bno == imap.im_blkno); 311 ASSERT(bno == 0 || bno == imap.im_blkno);
307 312
308 /* 313 error = xfs_imap_to_bp(mp, tp, &imap, &bp, buf_flags, imap_flags);
309 * Read in the buffer. If tp is NULL, xfs_trans_read_buf() will 314 if (error)
310 * default to just a read_buf() call.
311 */
312 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap.im_blkno,
313 (int)imap.im_len, XFS_BUF_LOCK, &bp);
314 if (error) {
315#ifdef DEBUG
316 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_itobp: "
317 "xfs_trans_read_buf() returned error %d, "
318 "imap.im_blkno 0x%llx, imap.im_len 0x%llx",
319 error, (unsigned long long) imap.im_blkno,
320 (unsigned long long) imap.im_len);
321#endif /* DEBUG */
322 return error; 315 return error;
323 }
324
325 /*
326 * Validate the magic number and version of every inode in the buffer
327 * (if DEBUG kernel) or the first inode in the buffer, otherwise.
328 * No validation is done here in userspace (xfs_repair).
329 */
330#if !defined(__KERNEL__)
331 ni = 0;
332#elif defined(DEBUG)
333 ni = BBTOB(imap.im_len) >> mp->m_sb.sb_inodelog;
334#else /* usual case */
335 ni = 1;
336#endif
337
338 for (i = 0; i < ni; i++) {
339 int di_ok;
340 xfs_dinode_t *dip;
341 316
342 dip = (xfs_dinode_t *)xfs_buf_offset(bp, 317 if (!bp) {
343 (i << mp->m_sb.sb_inodelog)); 318 ASSERT(buf_flags & XFS_BUF_TRYLOCK);
344 di_ok = be16_to_cpu(dip->di_core.di_magic) == XFS_DINODE_MAGIC && 319 ASSERT(tp == NULL);
345 XFS_DINODE_GOOD_VERSION(dip->di_core.di_version); 320 *bpp = NULL;
346 if (unlikely(XFS_TEST_ERROR(!di_ok, mp, 321 return EAGAIN;
347 XFS_ERRTAG_ITOBP_INOTOBP,
348 XFS_RANDOM_ITOBP_INOTOBP))) {
349 if (imap_flags & XFS_IMAP_BULKSTAT) {
350 xfs_trans_brelse(tp, bp);
351 return XFS_ERROR(EINVAL);
352 }
353#ifdef DEBUG
354 cmn_err(CE_ALERT,
355 "Device %s - bad inode magic/vsn "
356 "daddr %lld #%d (magic=%x)",
357 XFS_BUFTARG_NAME(mp->m_ddev_targp),
358 (unsigned long long)imap.im_blkno, i,
359 be16_to_cpu(dip->di_core.di_magic));
360#endif
361 XFS_CORRUPTION_ERROR("xfs_itobp", XFS_ERRLEVEL_HIGH,
362 mp, dip);
363 xfs_trans_brelse(tp, bp);
364 return XFS_ERROR(EFSCORRUPTED);
365 }
366 } 322 }
367 323
368 xfs_inobp_check(mp, bp);
369
370 /*
371 * Mark the buffer as an inode buffer now that it looks good
372 */
373 XFS_BUF_SET_VTYPE(bp, B_FS_INO);
374
375 /*
376 * Set *dipp to point to the on-disk inode in the buffer.
377 */
378 *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); 324 *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset);
379 *bpp = bp; 325 *bpp = bp;
380 return 0; 326 return 0;
@@ -878,7 +824,7 @@ xfs_iread(
878 * return NULL as well. Set i_blkno to 0 so that xfs_itobp() will 824 * return NULL as well. Set i_blkno to 0 so that xfs_itobp() will
879 * know that this is a new incore inode. 825 * know that this is a new incore inode.
880 */ 826 */
881 error = xfs_itobp(mp, tp, ip, &dip, &bp, bno, imap_flags); 827 error = xfs_itobp(mp, tp, ip, &dip, &bp, bno, imap_flags, XFS_BUF_LOCK);
882 if (error) { 828 if (error) {
883 kmem_zone_free(xfs_inode_zone, ip); 829 kmem_zone_free(xfs_inode_zone, ip);
884 return error; 830 return error;
@@ -1147,7 +1093,7 @@ xfs_ialloc(
1147 * the inode version number now. This way we only do the conversion 1093 * the inode version number now. This way we only do the conversion
1148 * here rather than here and in the flush/logging code. 1094 * here rather than here and in the flush/logging code.
1149 */ 1095 */
1150 if (XFS_SB_VERSION_HASNLINK(&tp->t_mountp->m_sb) && 1096 if (xfs_sb_version_hasnlink(&tp->t_mountp->m_sb) &&
1151 ip->i_d.di_version == XFS_DINODE_VERSION_1) { 1097 ip->i_d.di_version == XFS_DINODE_VERSION_1) {
1152 ip->i_d.di_version = XFS_DINODE_VERSION_2; 1098 ip->i_d.di_version = XFS_DINODE_VERSION_2;
1153 /* 1099 /*
@@ -1518,51 +1464,50 @@ xfs_itruncate_start(
1518} 1464}
1519 1465
1520/* 1466/*
1521 * Shrink the file to the given new_size. The new 1467 * Shrink the file to the given new_size. The new size must be smaller than
1522 * size must be smaller than the current size. 1468 * the current size. This will free up the underlying blocks in the removed
1523 * This will free up the underlying blocks 1469 * range after a call to xfs_itruncate_start() or xfs_atruncate_start().
1524 * in the removed range after a call to xfs_itruncate_start()
1525 * or xfs_atruncate_start().
1526 * 1470 *
1527 * The transaction passed to this routine must have made 1471 * The transaction passed to this routine must have made a permanent log
1528 * a permanent log reservation of at least XFS_ITRUNCATE_LOG_RES. 1472 * reservation of at least XFS_ITRUNCATE_LOG_RES. This routine may commit the
1529 * This routine may commit the given transaction and 1473 * given transaction and start new ones, so make sure everything involved in
1530 * start new ones, so make sure everything involved in 1474 * the transaction is tidy before calling here. Some transaction will be
1531 * the transaction is tidy before calling here. 1475 * returned to the caller to be committed. The incoming transaction must
1532 * Some transaction will be returned to the caller to be 1476 * already include the inode, and both inode locks must be held exclusively.
1533 * committed. The incoming transaction must already include 1477 * The inode must also be "held" within the transaction. On return the inode
1534 * the inode, and both inode locks must be held exclusively. 1478 * will be "held" within the returned transaction. This routine does NOT
1535 * The inode must also be "held" within the transaction. On 1479 * require any disk space to be reserved for it within the transaction.
1536 * return the inode will be "held" within the returned transaction.
1537 * This routine does NOT require any disk space to be reserved
1538 * for it within the transaction.
1539 * 1480 *
1540 * The fork parameter must be either xfs_attr_fork or xfs_data_fork, 1481 * The fork parameter must be either xfs_attr_fork or xfs_data_fork, and it
1541 * and it indicates the fork which is to be truncated. For the 1482 * indicates the fork which is to be truncated. For the attribute fork we only
1542 * attribute fork we only support truncation to size 0. 1483 * support truncation to size 0.
1543 * 1484 *
1544 * We use the sync parameter to indicate whether or not the first 1485 * We use the sync parameter to indicate whether or not the first transaction
1545 * transaction we perform might have to be synchronous. For the attr fork, 1486 * we perform might have to be synchronous. For the attr fork, it needs to be
1546 * it needs to be so if the unlink of the inode is not yet known to be 1487 * so if the unlink of the inode is not yet known to be permanent in the log.
1547 * permanent in the log. This keeps us from freeing and reusing the 1488 * This keeps us from freeing and reusing the blocks of the attribute fork
1548 * blocks of the attribute fork before the unlink of the inode becomes 1489 * before the unlink of the inode becomes permanent.
1549 * permanent.
1550 * 1490 *
1551 * For the data fork, we normally have to run synchronously if we're 1491 * For the data fork, we normally have to run synchronously if we're being
1552 * being called out of the inactive path or we're being called 1492 * called out of the inactive path or we're being called out of the create path
1553 * out of the create path where we're truncating an existing file. 1493 * where we're truncating an existing file. Either way, the truncate needs to
1554 * Either way, the truncate needs to be sync so blocks don't reappear 1494 * be sync so blocks don't reappear in the file with altered data in case of a
1555 * in the file with altered data in case of a crash. wsync filesystems 1495 * crash. wsync filesystems can run the first case async because anything that
1556 * can run the first case async because anything that shrinks the inode 1496 * shrinks the inode has to run sync so by the time we're called here from
1557 * has to run sync so by the time we're called here from inactive, the 1497 * inactive, the inode size is permanently set to 0.
1558 * inode size is permanently set to 0.
1559 * 1498 *
1560 * Calls from the truncate path always need to be sync unless we're 1499 * Calls from the truncate path always need to be sync unless we're in a wsync
1561 * in a wsync filesystem and the file has already been unlinked. 1500 * filesystem and the file has already been unlinked.
1562 * 1501 *
1563 * The caller is responsible for correctly setting the sync parameter. 1502 * The caller is responsible for correctly setting the sync parameter. It gets
1564 * It gets too hard for us to guess here which path we're being called 1503 * too hard for us to guess here which path we're being called out of just
1565 * out of just based on inode state. 1504 * based on inode state.
1505 *
1506 * If we get an error, we must return with the inode locked and linked into the
1507 * current transaction. This keeps things simple for the higher level code,
1508 * because it always knows that the inode is locked and held in the transaction
1509 * that returns to it whether errors occur or not. We don't mark the inode
1510 * dirty on error so that transactions can be easily aborted if possible.
1566 */ 1511 */
1567int 1512int
1568xfs_itruncate_finish( 1513xfs_itruncate_finish(
@@ -1741,65 +1686,51 @@ xfs_itruncate_finish(
1741 */ 1686 */
1742 error = xfs_bmap_finish(tp, &free_list, &committed); 1687 error = xfs_bmap_finish(tp, &free_list, &committed);
1743 ntp = *tp; 1688 ntp = *tp;
1689 if (committed) {
1690 /* link the inode into the next xact in the chain */
1691 xfs_trans_ijoin(ntp, ip,
1692 XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
1693 xfs_trans_ihold(ntp, ip);
1694 }
1695
1744 if (error) { 1696 if (error) {
1745 /* 1697 /*
1746 * If the bmap finish call encounters an error, 1698 * If the bmap finish call encounters an error, return
1747 * return to the caller where the transaction 1699 * to the caller where the transaction can be properly
1748 * can be properly aborted. We just need to 1700 * aborted. We just need to make sure we're not
1749 * make sure we're not holding any resources 1701 * holding any resources that we were not when we came
1750 * that we were not when we came in. 1702 * in.
1751 * 1703 *
1752 * Aborting from this point might lose some 1704 * Aborting from this point might lose some blocks in
1753 * blocks in the file system, but oh well. 1705 * the file system, but oh well.
1754 */ 1706 */
1755 xfs_bmap_cancel(&free_list); 1707 xfs_bmap_cancel(&free_list);
1756 if (committed) {
1757 /*
1758 * If the passed in transaction committed
1759 * in xfs_bmap_finish(), then we want to
1760 * add the inode to this one before returning.
1761 * This keeps things simple for the higher
1762 * level code, because it always knows that
1763 * the inode is locked and held in the
1764 * transaction that returns to it whether
1765 * errors occur or not. We don't mark the
1766 * inode dirty so that this transaction can
1767 * be easily aborted if possible.
1768 */
1769 xfs_trans_ijoin(ntp, ip,
1770 XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
1771 xfs_trans_ihold(ntp, ip);
1772 }
1773 return error; 1708 return error;
1774 } 1709 }
1775 1710
1776 if (committed) { 1711 if (committed) {
1777 /* 1712 /*
1778 * The first xact was committed, 1713 * Mark the inode dirty so it will be logged and
1779 * so add the inode to the new one. 1714 * moved forward in the log as part of every commit.
1780 * Mark it dirty so it will be logged
1781 * and moved forward in the log as
1782 * part of every commit.
1783 */ 1715 */
1784 xfs_trans_ijoin(ntp, ip,
1785 XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
1786 xfs_trans_ihold(ntp, ip);
1787 xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); 1716 xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE);
1788 } 1717 }
1718
1789 ntp = xfs_trans_dup(ntp); 1719 ntp = xfs_trans_dup(ntp);
1790 (void) xfs_trans_commit(*tp, 0); 1720 error = xfs_trans_commit(*tp, 0);
1791 *tp = ntp; 1721 *tp = ntp;
1792 error = xfs_trans_reserve(ntp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 1722
1793 XFS_TRANS_PERM_LOG_RES, 1723 /* link the inode into the next transaction in the chain */
1794 XFS_ITRUNCATE_LOG_COUNT);
1795 /*
1796 * Add the inode being truncated to the next chained
1797 * transaction.
1798 */
1799 xfs_trans_ijoin(ntp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 1724 xfs_trans_ijoin(ntp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
1800 xfs_trans_ihold(ntp, ip); 1725 xfs_trans_ihold(ntp, ip);
1726
1727 if (!error)
1728 error = xfs_trans_reserve(ntp, 0,
1729 XFS_ITRUNCATE_LOG_RES(mp), 0,
1730 XFS_TRANS_PERM_LOG_RES,
1731 XFS_ITRUNCATE_LOG_COUNT);
1801 if (error) 1732 if (error)
1802 return (error); 1733 return error;
1803 } 1734 }
1804 /* 1735 /*
1805 * Only update the size in the case of the data fork, but 1736 * Only update the size in the case of the data fork, but
@@ -1967,7 +1898,7 @@ xfs_iunlink(
1967 * Here we put the head pointer into our next pointer, 1898 * Here we put the head pointer into our next pointer,
1968 * and then we fall through to point the head at us. 1899 * and then we fall through to point the head at us.
1969 */ 1900 */
1970 error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0); 1901 error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK);
1971 if (error) 1902 if (error)
1972 return error; 1903 return error;
1973 1904
@@ -2075,7 +2006,7 @@ xfs_iunlink_remove(
2075 * of dealing with the buffer when there is no need to 2006 * of dealing with the buffer when there is no need to
2076 * change it. 2007 * change it.
2077 */ 2008 */
2078 error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0); 2009 error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK);
2079 if (error) { 2010 if (error) {
2080 cmn_err(CE_WARN, 2011 cmn_err(CE_WARN,
2081 "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", 2012 "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.",
@@ -2137,7 +2068,7 @@ xfs_iunlink_remove(
2137 * Now last_ibp points to the buffer previous to us on 2068 * Now last_ibp points to the buffer previous to us on
2138 * the unlinked list. Pull us from the list. 2069 * the unlinked list. Pull us from the list.
2139 */ 2070 */
2140 error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0); 2071 error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK);
2141 if (error) { 2072 if (error) {
2142 cmn_err(CE_WARN, 2073 cmn_err(CE_WARN,
2143 "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", 2074 "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.",
@@ -2172,13 +2103,6 @@ xfs_iunlink_remove(
2172 return 0; 2103 return 0;
2173} 2104}
2174 2105
2175STATIC_INLINE int xfs_inode_clean(xfs_inode_t *ip)
2176{
2177 return (((ip->i_itemp == NULL) ||
2178 !(ip->i_itemp->ili_format.ilf_fields & XFS_ILOG_ALL)) &&
2179 (ip->i_update_core == 0));
2180}
2181
2182STATIC void 2106STATIC void
2183xfs_ifree_cluster( 2107xfs_ifree_cluster(
2184 xfs_inode_t *free_ip, 2108 xfs_inode_t *free_ip,
@@ -2400,7 +2324,7 @@ xfs_ifree(
2400 2324
2401 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 2325 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
2402 2326
2403 error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, 0, 0); 2327 error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK);
2404 if (error) 2328 if (error)
2405 return error; 2329 return error;
2406 2330
@@ -2678,14 +2602,31 @@ xfs_imap(
2678 fsbno = imap->im_blkno ? 2602 fsbno = imap->im_blkno ?
2679 XFS_DADDR_TO_FSB(mp, imap->im_blkno) : NULLFSBLOCK; 2603 XFS_DADDR_TO_FSB(mp, imap->im_blkno) : NULLFSBLOCK;
2680 error = xfs_dilocate(mp, tp, ino, &fsbno, &len, &off, flags); 2604 error = xfs_dilocate(mp, tp, ino, &fsbno, &len, &off, flags);
2681 if (error != 0) { 2605 if (error)
2682 return error; 2606 return error;
2683 } 2607
2684 imap->im_blkno = XFS_FSB_TO_DADDR(mp, fsbno); 2608 imap->im_blkno = XFS_FSB_TO_DADDR(mp, fsbno);
2685 imap->im_len = XFS_FSB_TO_BB(mp, len); 2609 imap->im_len = XFS_FSB_TO_BB(mp, len);
2686 imap->im_agblkno = XFS_FSB_TO_AGBNO(mp, fsbno); 2610 imap->im_agblkno = XFS_FSB_TO_AGBNO(mp, fsbno);
2687 imap->im_ioffset = (ushort)off; 2611 imap->im_ioffset = (ushort)off;
2688 imap->im_boffset = (ushort)(off << mp->m_sb.sb_inodelog); 2612 imap->im_boffset = (ushort)(off << mp->m_sb.sb_inodelog);
2613
2614 /*
2615 * If the inode number maps to a block outside the bounds
2616 * of the file system then return NULL rather than calling
2617 * read_buf and panicing when we get an error from the
2618 * driver.
2619 */
2620 if ((imap->im_blkno + imap->im_len) >
2621 XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) {
2622 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
2623 "(imap->im_blkno (0x%llx) + imap->im_len (0x%llx)) > "
2624 " XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) (0x%llx)",
2625 (unsigned long long) imap->im_blkno,
2626 (unsigned long long) imap->im_len,
2627 XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks));
2628 return EINVAL;
2629 }
2689 return 0; 2630 return 0;
2690} 2631}
2691 2632
@@ -2826,38 +2767,41 @@ xfs_iunpin(
2826} 2767}
2827 2768
2828/* 2769/*
2829 * This is called to wait for the given inode to be unpinned. 2770 * This is called to unpin an inode. It can be directed to wait or to return
2830 * It will sleep until this happens. The caller must have the 2771 * immediately without waiting for the inode to be unpinned. The caller must
2831 * inode locked in at least shared mode so that the buffer cannot 2772 * have the inode locked in at least shared mode so that the buffer cannot be
2832 * be subsequently pinned once someone is waiting for it to be 2773 * subsequently pinned once someone is waiting for it to be unpinned.
2833 * unpinned.
2834 */ 2774 */
2835STATIC void 2775STATIC void
2836xfs_iunpin_wait( 2776__xfs_iunpin_wait(
2837 xfs_inode_t *ip) 2777 xfs_inode_t *ip,
2778 int wait)
2838{ 2779{
2839 xfs_inode_log_item_t *iip; 2780 xfs_inode_log_item_t *iip = ip->i_itemp;
2840 xfs_lsn_t lsn;
2841 2781
2842 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE | MR_ACCESS)); 2782 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE | MR_ACCESS));
2843 2783 if (atomic_read(&ip->i_pincount) == 0)
2844 if (atomic_read(&ip->i_pincount) == 0) {
2845 return; 2784 return;
2846 }
2847 2785
2848 iip = ip->i_itemp; 2786 /* Give the log a push to start the unpinning I/O */
2849 if (iip && iip->ili_last_lsn) { 2787 xfs_log_force(ip->i_mount, (iip && iip->ili_last_lsn) ?
2850 lsn = iip->ili_last_lsn; 2788 iip->ili_last_lsn : 0, XFS_LOG_FORCE);
2851 } else { 2789 if (wait)
2852 lsn = (xfs_lsn_t)0; 2790 wait_event(ip->i_ipin_wait, (atomic_read(&ip->i_pincount) == 0));
2853 } 2791}
2854 2792
2855 /* 2793static inline void
2856 * Give the log a push so we don't wait here too long. 2794xfs_iunpin_wait(
2857 */ 2795 xfs_inode_t *ip)
2858 xfs_log_force(ip->i_mount, lsn, XFS_LOG_FORCE); 2796{
2797 __xfs_iunpin_wait(ip, 1);
2798}
2859 2799
2860 wait_event(ip->i_ipin_wait, (atomic_read(&ip->i_pincount) == 0)); 2800static inline void
2801xfs_iunpin_nowait(
2802 xfs_inode_t *ip)
2803{
2804 __xfs_iunpin_wait(ip, 0);
2861} 2805}
2862 2806
2863 2807
@@ -2932,7 +2876,7 @@ xfs_iextents_copy(
2932 * format indicates the current state of the fork. 2876 * format indicates the current state of the fork.
2933 */ 2877 */
2934/*ARGSUSED*/ 2878/*ARGSUSED*/
2935STATIC int 2879STATIC void
2936xfs_iflush_fork( 2880xfs_iflush_fork(
2937 xfs_inode_t *ip, 2881 xfs_inode_t *ip,
2938 xfs_dinode_t *dip, 2882 xfs_dinode_t *dip,
@@ -2953,16 +2897,16 @@ xfs_iflush_fork(
2953 static const short extflag[2] = 2897 static const short extflag[2] =
2954 { XFS_ILOG_DEXT, XFS_ILOG_AEXT }; 2898 { XFS_ILOG_DEXT, XFS_ILOG_AEXT };
2955 2899
2956 if (iip == NULL) 2900 if (!iip)
2957 return 0; 2901 return;
2958 ifp = XFS_IFORK_PTR(ip, whichfork); 2902 ifp = XFS_IFORK_PTR(ip, whichfork);
2959 /* 2903 /*
2960 * This can happen if we gave up in iformat in an error path, 2904 * This can happen if we gave up in iformat in an error path,
2961 * for the attribute fork. 2905 * for the attribute fork.
2962 */ 2906 */
2963 if (ifp == NULL) { 2907 if (!ifp) {
2964 ASSERT(whichfork == XFS_ATTR_FORK); 2908 ASSERT(whichfork == XFS_ATTR_FORK);
2965 return 0; 2909 return;
2966 } 2910 }
2967 cp = XFS_DFORK_PTR(dip, whichfork); 2911 cp = XFS_DFORK_PTR(dip, whichfork);
2968 mp = ip->i_mount; 2912 mp = ip->i_mount;
@@ -3023,8 +2967,145 @@ xfs_iflush_fork(
3023 ASSERT(0); 2967 ASSERT(0);
3024 break; 2968 break;
3025 } 2969 }
2970}
2971
2972STATIC int
2973xfs_iflush_cluster(
2974 xfs_inode_t *ip,
2975 xfs_buf_t *bp)
2976{
2977 xfs_mount_t *mp = ip->i_mount;
2978 xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino);
2979 unsigned long first_index, mask;
2980 int ilist_size;
2981 xfs_inode_t **ilist;
2982 xfs_inode_t *iq;
2983 int nr_found;
2984 int clcount = 0;
2985 int bufwasdelwri;
2986 int i;
2987
2988 ASSERT(pag->pagi_inodeok);
2989 ASSERT(pag->pag_ici_init);
2990
2991 ilist_size = XFS_INODE_CLUSTER_SIZE(mp) * sizeof(xfs_inode_t *);
2992 ilist = kmem_alloc(ilist_size, KM_MAYFAIL);
2993 if (!ilist)
2994 return 0;
2995
2996 mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1);
2997 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask;
2998 read_lock(&pag->pag_ici_lock);
2999 /* really need a gang lookup range call here */
3000 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist,
3001 first_index,
3002 XFS_INODE_CLUSTER_SIZE(mp));
3003 if (nr_found == 0)
3004 goto out_free;
3005
3006 for (i = 0; i < nr_found; i++) {
3007 iq = ilist[i];
3008 if (iq == ip)
3009 continue;
3010 /* if the inode lies outside this cluster, we're done. */
3011 if ((XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index)
3012 break;
3013 /*
3014 * Do an un-protected check to see if the inode is dirty and
3015 * is a candidate for flushing. These checks will be repeated
3016 * later after the appropriate locks are acquired.
3017 */
3018 if (xfs_inode_clean(iq) && xfs_ipincount(iq) == 0)
3019 continue;
3020
3021 /*
3022 * Try to get locks. If any are unavailable or it is pinned,
3023 * then this inode cannot be flushed and is skipped.
3024 */
3025
3026 if (!xfs_ilock_nowait(iq, XFS_ILOCK_SHARED))
3027 continue;
3028 if (!xfs_iflock_nowait(iq)) {
3029 xfs_iunlock(iq, XFS_ILOCK_SHARED);
3030 continue;
3031 }
3032 if (xfs_ipincount(iq)) {
3033 xfs_ifunlock(iq);
3034 xfs_iunlock(iq, XFS_ILOCK_SHARED);
3035 continue;
3036 }
3037
3038 /*
3039 * arriving here means that this inode can be flushed. First
3040 * re-check that it's dirty before flushing.
3041 */
3042 if (!xfs_inode_clean(iq)) {
3043 int error;
3044 error = xfs_iflush_int(iq, bp);
3045 if (error) {
3046 xfs_iunlock(iq, XFS_ILOCK_SHARED);
3047 goto cluster_corrupt_out;
3048 }
3049 clcount++;
3050 } else {
3051 xfs_ifunlock(iq);
3052 }
3053 xfs_iunlock(iq, XFS_ILOCK_SHARED);
3054 }
3055
3056 if (clcount) {
3057 XFS_STATS_INC(xs_icluster_flushcnt);
3058 XFS_STATS_ADD(xs_icluster_flushinode, clcount);
3059 }
3026 3060
3061out_free:
3062 read_unlock(&pag->pag_ici_lock);
3063 kmem_free(ilist, ilist_size);
3027 return 0; 3064 return 0;
3065
3066
3067cluster_corrupt_out:
3068 /*
3069 * Corruption detected in the clustering loop. Invalidate the
3070 * inode buffer and shut down the filesystem.
3071 */
3072 read_unlock(&pag->pag_ici_lock);
3073 /*
3074 * Clean up the buffer. If it was B_DELWRI, just release it --
3075 * brelse can handle it with no problems. If not, shut down the
3076 * filesystem before releasing the buffer.
3077 */
3078 bufwasdelwri = XFS_BUF_ISDELAYWRITE(bp);
3079 if (bufwasdelwri)
3080 xfs_buf_relse(bp);
3081
3082 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
3083
3084 if (!bufwasdelwri) {
3085 /*
3086 * Just like incore_relse: if we have b_iodone functions,
3087 * mark the buffer as an error and call them. Otherwise
3088 * mark it as stale and brelse.
3089 */
3090 if (XFS_BUF_IODONE_FUNC(bp)) {
3091 XFS_BUF_CLR_BDSTRAT_FUNC(bp);
3092 XFS_BUF_UNDONE(bp);
3093 XFS_BUF_STALE(bp);
3094 XFS_BUF_SHUT(bp);
3095 XFS_BUF_ERROR(bp,EIO);
3096 xfs_biodone(bp);
3097 } else {
3098 XFS_BUF_STALE(bp);
3099 xfs_buf_relse(bp);
3100 }
3101 }
3102
3103 /*
3104 * Unlocks the flush lock
3105 */
3106 xfs_iflush_abort(iq);
3107 kmem_free(ilist, ilist_size);
3108 return XFS_ERROR(EFSCORRUPTED);
3028} 3109}
3029 3110
3030/* 3111/*
@@ -3046,11 +3127,7 @@ xfs_iflush(
3046 xfs_dinode_t *dip; 3127 xfs_dinode_t *dip;
3047 xfs_mount_t *mp; 3128 xfs_mount_t *mp;
3048 int error; 3129 int error;
3049 /* REFERENCED */ 3130 int noblock = (flags == XFS_IFLUSH_ASYNC_NOBLOCK);
3050 xfs_inode_t *iq;
3051 int clcount; /* count of inodes clustered */
3052 int bufwasdelwri;
3053 struct hlist_node *entry;
3054 enum { INT_DELWRI = (1 << 0), INT_ASYNC = (1 << 1) }; 3131 enum { INT_DELWRI = (1 << 0), INT_ASYNC = (1 << 1) };
3055 3132
3056 XFS_STATS_INC(xs_iflush_count); 3133 XFS_STATS_INC(xs_iflush_count);
@@ -3067,8 +3144,7 @@ xfs_iflush(
3067 * If the inode isn't dirty, then just release the inode 3144 * If the inode isn't dirty, then just release the inode
3068 * flush lock and do nothing. 3145 * flush lock and do nothing.
3069 */ 3146 */
3070 if ((ip->i_update_core == 0) && 3147 if (xfs_inode_clean(ip)) {
3071 ((iip == NULL) || !(iip->ili_format.ilf_fields & XFS_ILOG_ALL))) {
3072 ASSERT((iip != NULL) ? 3148 ASSERT((iip != NULL) ?
3073 !(iip->ili_item.li_flags & XFS_LI_IN_AIL) : 1); 3149 !(iip->ili_item.li_flags & XFS_LI_IN_AIL) : 1);
3074 xfs_ifunlock(ip); 3150 xfs_ifunlock(ip);
@@ -3076,11 +3152,21 @@ xfs_iflush(
3076 } 3152 }
3077 3153
3078 /* 3154 /*
3079 * We can't flush the inode until it is unpinned, so 3155 * We can't flush the inode until it is unpinned, so wait for it if we
3080 * wait for it. We know noone new can pin it, because 3156 * are allowed to block. We know noone new can pin it, because we are
3081 * we are holding the inode lock shared and you need 3157 * holding the inode lock shared and you need to hold it exclusively to
3082 * to hold it exclusively to pin the inode. 3158 * pin the inode.
3159 *
3160 * If we are not allowed to block, force the log out asynchronously so
3161 * that when we come back the inode will be unpinned. If other inodes
3162 * in the same cluster are dirty, they will probably write the inode
3163 * out for us if they occur after the log force completes.
3083 */ 3164 */
3165 if (noblock && xfs_ipincount(ip)) {
3166 xfs_iunpin_nowait(ip);
3167 xfs_ifunlock(ip);
3168 return EAGAIN;
3169 }
3084 xfs_iunpin_wait(ip); 3170 xfs_iunpin_wait(ip);
3085 3171
3086 /* 3172 /*
@@ -3097,15 +3183,6 @@ xfs_iflush(
3097 } 3183 }
3098 3184
3099 /* 3185 /*
3100 * Get the buffer containing the on-disk inode.
3101 */
3102 error = xfs_itobp(mp, NULL, ip, &dip, &bp, 0, 0);
3103 if (error) {
3104 xfs_ifunlock(ip);
3105 return error;
3106 }
3107
3108 /*
3109 * Decide how buffer will be flushed out. This is done before 3186 * Decide how buffer will be flushed out. This is done before
3110 * the call to xfs_iflush_int because this field is zeroed by it. 3187 * the call to xfs_iflush_int because this field is zeroed by it.
3111 */ 3188 */
@@ -3121,6 +3198,7 @@ xfs_iflush(
3121 case XFS_IFLUSH_DELWRI_ELSE_SYNC: 3198 case XFS_IFLUSH_DELWRI_ELSE_SYNC:
3122 flags = 0; 3199 flags = 0;
3123 break; 3200 break;
3201 case XFS_IFLUSH_ASYNC_NOBLOCK:
3124 case XFS_IFLUSH_ASYNC: 3202 case XFS_IFLUSH_ASYNC:
3125 case XFS_IFLUSH_DELWRI_ELSE_ASYNC: 3203 case XFS_IFLUSH_DELWRI_ELSE_ASYNC:
3126 flags = INT_ASYNC; 3204 flags = INT_ASYNC;
@@ -3140,6 +3218,7 @@ xfs_iflush(
3140 case XFS_IFLUSH_DELWRI: 3218 case XFS_IFLUSH_DELWRI:
3141 flags = INT_DELWRI; 3219 flags = INT_DELWRI;
3142 break; 3220 break;
3221 case XFS_IFLUSH_ASYNC_NOBLOCK:
3143 case XFS_IFLUSH_ASYNC: 3222 case XFS_IFLUSH_ASYNC:
3144 flags = INT_ASYNC; 3223 flags = INT_ASYNC;
3145 break; 3224 break;
@@ -3154,94 +3233,41 @@ xfs_iflush(
3154 } 3233 }
3155 3234
3156 /* 3235 /*
3157 * First flush out the inode that xfs_iflush was called with. 3236 * Get the buffer containing the on-disk inode.
3158 */ 3237 */
3159 error = xfs_iflush_int(ip, bp); 3238 error = xfs_itobp(mp, NULL, ip, &dip, &bp, 0, 0,
3160 if (error) { 3239 noblock ? XFS_BUF_TRYLOCK : XFS_BUF_LOCK);
3161 goto corrupt_out; 3240 if (error || !bp) {
3241 xfs_ifunlock(ip);
3242 return error;
3162 } 3243 }
3163 3244
3164 /* 3245 /*
3165 * inode clustering: 3246 * First flush out the inode that xfs_iflush was called with.
3166 * see if other inodes can be gathered into this write
3167 */ 3247 */
3168 spin_lock(&ip->i_cluster->icl_lock); 3248 error = xfs_iflush_int(ip, bp);
3169 ip->i_cluster->icl_buf = bp; 3249 if (error)
3170 3250 goto corrupt_out;
3171 clcount = 0;
3172 hlist_for_each_entry(iq, entry, &ip->i_cluster->icl_inodes, i_cnode) {
3173 if (iq == ip)
3174 continue;
3175
3176 /*
3177 * Do an un-protected check to see if the inode is dirty and
3178 * is a candidate for flushing. These checks will be repeated
3179 * later after the appropriate locks are acquired.
3180 */
3181 iip = iq->i_itemp;
3182 if ((iq->i_update_core == 0) &&
3183 ((iip == NULL) ||
3184 !(iip->ili_format.ilf_fields & XFS_ILOG_ALL)) &&
3185 xfs_ipincount(iq) == 0) {
3186 continue;
3187 }
3188
3189 /*
3190 * Try to get locks. If any are unavailable,
3191 * then this inode cannot be flushed and is skipped.
3192 */
3193
3194 /* get inode locks (just i_lock) */
3195 if (xfs_ilock_nowait(iq, XFS_ILOCK_SHARED)) {
3196 /* get inode flush lock */
3197 if (xfs_iflock_nowait(iq)) {
3198 /* check if pinned */
3199 if (xfs_ipincount(iq) == 0) {
3200 /* arriving here means that
3201 * this inode can be flushed.
3202 * first re-check that it's
3203 * dirty
3204 */
3205 iip = iq->i_itemp;
3206 if ((iq->i_update_core != 0)||
3207 ((iip != NULL) &&
3208 (iip->ili_format.ilf_fields & XFS_ILOG_ALL))) {
3209 clcount++;
3210 error = xfs_iflush_int(iq, bp);
3211 if (error) {
3212 xfs_iunlock(iq,
3213 XFS_ILOCK_SHARED);
3214 goto cluster_corrupt_out;
3215 }
3216 } else {
3217 xfs_ifunlock(iq);
3218 }
3219 } else {
3220 xfs_ifunlock(iq);
3221 }
3222 }
3223 xfs_iunlock(iq, XFS_ILOCK_SHARED);
3224 }
3225 }
3226 spin_unlock(&ip->i_cluster->icl_lock);
3227
3228 if (clcount) {
3229 XFS_STATS_INC(xs_icluster_flushcnt);
3230 XFS_STATS_ADD(xs_icluster_flushinode, clcount);
3231 }
3232 3251
3233 /* 3252 /*
3234 * If the buffer is pinned then push on the log so we won't 3253 * If the buffer is pinned then push on the log now so we won't
3235 * get stuck waiting in the write for too long. 3254 * get stuck waiting in the write for too long.
3236 */ 3255 */
3237 if (XFS_BUF_ISPINNED(bp)){ 3256 if (XFS_BUF_ISPINNED(bp))
3238 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); 3257 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE);
3239 } 3258
3259 /*
3260 * inode clustering:
3261 * see if other inodes can be gathered into this write
3262 */
3263 error = xfs_iflush_cluster(ip, bp);
3264 if (error)
3265 goto cluster_corrupt_out;
3240 3266
3241 if (flags & INT_DELWRI) { 3267 if (flags & INT_DELWRI) {
3242 xfs_bdwrite(mp, bp); 3268 xfs_bdwrite(mp, bp);
3243 } else if (flags & INT_ASYNC) { 3269 } else if (flags & INT_ASYNC) {
3244 xfs_bawrite(mp, bp); 3270 error = xfs_bawrite(mp, bp);
3245 } else { 3271 } else {
3246 error = xfs_bwrite(mp, bp); 3272 error = xfs_bwrite(mp, bp);
3247 } 3273 }
@@ -3250,52 +3276,11 @@ xfs_iflush(
3250corrupt_out: 3276corrupt_out:
3251 xfs_buf_relse(bp); 3277 xfs_buf_relse(bp);
3252 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 3278 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
3253 xfs_iflush_abort(ip);
3254 /*
3255 * Unlocks the flush lock
3256 */
3257 return XFS_ERROR(EFSCORRUPTED);
3258
3259cluster_corrupt_out: 3279cluster_corrupt_out:
3260 /* Corruption detected in the clustering loop. Invalidate the
3261 * inode buffer and shut down the filesystem.
3262 */
3263 spin_unlock(&ip->i_cluster->icl_lock);
3264
3265 /*
3266 * Clean up the buffer. If it was B_DELWRI, just release it --
3267 * brelse can handle it with no problems. If not, shut down the
3268 * filesystem before releasing the buffer.
3269 */
3270 if ((bufwasdelwri= XFS_BUF_ISDELAYWRITE(bp))) {
3271 xfs_buf_relse(bp);
3272 }
3273
3274 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
3275
3276 if(!bufwasdelwri) {
3277 /*
3278 * Just like incore_relse: if we have b_iodone functions,
3279 * mark the buffer as an error and call them. Otherwise
3280 * mark it as stale and brelse.
3281 */
3282 if (XFS_BUF_IODONE_FUNC(bp)) {
3283 XFS_BUF_CLR_BDSTRAT_FUNC(bp);
3284 XFS_BUF_UNDONE(bp);
3285 XFS_BUF_STALE(bp);
3286 XFS_BUF_SHUT(bp);
3287 XFS_BUF_ERROR(bp,EIO);
3288 xfs_biodone(bp);
3289 } else {
3290 XFS_BUF_STALE(bp);
3291 xfs_buf_relse(bp);
3292 }
3293 }
3294
3295 xfs_iflush_abort(iq);
3296 /* 3280 /*
3297 * Unlocks the flush lock 3281 * Unlocks the flush lock
3298 */ 3282 */
3283 xfs_iflush_abort(ip);
3299 return XFS_ERROR(EFSCORRUPTED); 3284 return XFS_ERROR(EFSCORRUPTED);
3300} 3285}
3301 3286
@@ -3325,8 +3310,7 @@ xfs_iflush_int(
3325 * If the inode isn't dirty, then just release the inode 3310 * If the inode isn't dirty, then just release the inode
3326 * flush lock and do nothing. 3311 * flush lock and do nothing.
3327 */ 3312 */
3328 if ((ip->i_update_core == 0) && 3313 if (xfs_inode_clean(ip)) {
3329 ((iip == NULL) || !(iip->ili_format.ilf_fields & XFS_ILOG_ALL))) {
3330 xfs_ifunlock(ip); 3314 xfs_ifunlock(ip);
3331 return 0; 3315 return 0;
3332 } 3316 }
@@ -3434,9 +3418,9 @@ xfs_iflush_int(
3434 * has been updated, then make the conversion permanent. 3418 * has been updated, then make the conversion permanent.
3435 */ 3419 */
3436 ASSERT(ip->i_d.di_version == XFS_DINODE_VERSION_1 || 3420 ASSERT(ip->i_d.di_version == XFS_DINODE_VERSION_1 ||
3437 XFS_SB_VERSION_HASNLINK(&mp->m_sb)); 3421 xfs_sb_version_hasnlink(&mp->m_sb));
3438 if (ip->i_d.di_version == XFS_DINODE_VERSION_1) { 3422 if (ip->i_d.di_version == XFS_DINODE_VERSION_1) {
3439 if (!XFS_SB_VERSION_HASNLINK(&mp->m_sb)) { 3423 if (!xfs_sb_version_hasnlink(&mp->m_sb)) {
3440 /* 3424 /*
3441 * Convert it back. 3425 * Convert it back.
3442 */ 3426 */
@@ -3459,16 +3443,9 @@ xfs_iflush_int(
3459 } 3443 }
3460 } 3444 }
3461 3445
3462 if (xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK, bp) == EFSCORRUPTED) { 3446 xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK, bp);
3463 goto corrupt_out; 3447 if (XFS_IFORK_Q(ip))
3464 } 3448 xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK, bp);
3465
3466 if (XFS_IFORK_Q(ip)) {
3467 /*
3468 * The only error from xfs_iflush_fork is on the data fork.
3469 */
3470 (void) xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK, bp);
3471 }
3472 xfs_inobp_check(mp, bp); 3449 xfs_inobp_check(mp, bp);
3473 3450
3474 /* 3451 /*
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index bfcd72cbaeea..93c37697a72c 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -133,19 +133,6 @@ typedef struct dm_attrs_s {
133} dm_attrs_t; 133} dm_attrs_t;
134 134
135/* 135/*
136 * This is the xfs inode cluster structure. This structure is used by
137 * xfs_iflush to find inodes that share a cluster and can be flushed to disk at
138 * the same time.
139 */
140typedef struct xfs_icluster {
141 struct hlist_head icl_inodes; /* list of inodes on cluster */
142 xfs_daddr_t icl_blkno; /* starting block number of
143 * the cluster */
144 struct xfs_buf *icl_buf; /* the inode buffer */
145 spinlock_t icl_lock; /* inode list lock */
146} xfs_icluster_t;
147
148/*
149 * This is the xfs in-core inode structure. 136 * This is the xfs in-core inode structure.
150 * Most of the on-disk inode is embedded in the i_d field. 137 * Most of the on-disk inode is embedded in the i_d field.
151 * 138 *
@@ -240,10 +227,6 @@ typedef struct xfs_inode {
240 atomic_t i_pincount; /* inode pin count */ 227 atomic_t i_pincount; /* inode pin count */
241 wait_queue_head_t i_ipin_wait; /* inode pinning wait queue */ 228 wait_queue_head_t i_ipin_wait; /* inode pinning wait queue */
242 spinlock_t i_flags_lock; /* inode i_flags lock */ 229 spinlock_t i_flags_lock; /* inode i_flags lock */
243#ifdef HAVE_REFCACHE
244 struct xfs_inode **i_refcache; /* ptr to entry in ref cache */
245 struct xfs_inode *i_release; /* inode to unref */
246#endif
247 /* Miscellaneous state. */ 230 /* Miscellaneous state. */
248 unsigned short i_flags; /* see defined flags below */ 231 unsigned short i_flags; /* see defined flags below */
249 unsigned char i_update_core; /* timestamps/size is dirty */ 232 unsigned char i_update_core; /* timestamps/size is dirty */
@@ -252,8 +235,6 @@ typedef struct xfs_inode {
252 unsigned int i_delayed_blks; /* count of delay alloc blks */ 235 unsigned int i_delayed_blks; /* count of delay alloc blks */
253 236
254 xfs_icdinode_t i_d; /* most of ondisk inode */ 237 xfs_icdinode_t i_d; /* most of ondisk inode */
255 xfs_icluster_t *i_cluster; /* cluster list header */
256 struct hlist_node i_cnode; /* cluster link node */
257 238
258 xfs_fsize_t i_size; /* in-memory size */ 239 xfs_fsize_t i_size; /* in-memory size */
259 xfs_fsize_t i_new_size; /* size when write completes */ 240 xfs_fsize_t i_new_size; /* size when write completes */
@@ -461,6 +442,7 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags)
461#define XFS_IFLUSH_SYNC 3 442#define XFS_IFLUSH_SYNC 3
462#define XFS_IFLUSH_ASYNC 4 443#define XFS_IFLUSH_ASYNC 4
463#define XFS_IFLUSH_DELWRI 5 444#define XFS_IFLUSH_DELWRI 5
445#define XFS_IFLUSH_ASYNC_NOBLOCK 6
464 446
465/* 447/*
466 * Flags for xfs_itruncate_start(). 448 * Flags for xfs_itruncate_start().
@@ -515,7 +497,7 @@ int xfs_finish_reclaim_all(struct xfs_mount *, int);
515 */ 497 */
516int xfs_itobp(struct xfs_mount *, struct xfs_trans *, 498int xfs_itobp(struct xfs_mount *, struct xfs_trans *,
517 xfs_inode_t *, struct xfs_dinode **, struct xfs_buf **, 499 xfs_inode_t *, struct xfs_dinode **, struct xfs_buf **,
518 xfs_daddr_t, uint); 500 xfs_daddr_t, uint, uint);
519int xfs_iread(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, 501int xfs_iread(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
520 xfs_inode_t **, xfs_daddr_t, uint); 502 xfs_inode_t **, xfs_daddr_t, uint);
521int xfs_iread_extents(struct xfs_trans *, xfs_inode_t *, int); 503int xfs_iread_extents(struct xfs_trans *, xfs_inode_t *, int);
@@ -597,7 +579,6 @@ void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
597#define xfs_inobp_check(mp, bp) 579#define xfs_inobp_check(mp, bp)
598#endif /* DEBUG */ 580#endif /* DEBUG */
599 581
600extern struct kmem_zone *xfs_icluster_zone;
601extern struct kmem_zone *xfs_ifork_zone; 582extern struct kmem_zone *xfs_ifork_zone;
602extern struct kmem_zone *xfs_inode_zone; 583extern struct kmem_zone *xfs_inode_zone;
603extern struct kmem_zone *xfs_ili_zone; 584extern struct kmem_zone *xfs_ili_zone;
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 034ca7202295..93b5db453ea2 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -40,6 +40,7 @@
40#include "xfs_btree.h" 40#include "xfs_btree.h"
41#include "xfs_ialloc.h" 41#include "xfs_ialloc.h"
42#include "xfs_rw.h" 42#include "xfs_rw.h"
43#include "xfs_error.h"
43 44
44 45
45kmem_zone_t *xfs_ili_zone; /* inode log item zone */ 46kmem_zone_t *xfs_ili_zone; /* inode log item zone */
@@ -296,9 +297,9 @@ xfs_inode_item_format(
296 */ 297 */
297 mp = ip->i_mount; 298 mp = ip->i_mount;
298 ASSERT(ip->i_d.di_version == XFS_DINODE_VERSION_1 || 299 ASSERT(ip->i_d.di_version == XFS_DINODE_VERSION_1 ||
299 XFS_SB_VERSION_HASNLINK(&mp->m_sb)); 300 xfs_sb_version_hasnlink(&mp->m_sb));
300 if (ip->i_d.di_version == XFS_DINODE_VERSION_1) { 301 if (ip->i_d.di_version == XFS_DINODE_VERSION_1) {
301 if (!XFS_SB_VERSION_HASNLINK(&mp->m_sb)) { 302 if (!xfs_sb_version_hasnlink(&mp->m_sb)) {
302 /* 303 /*
303 * Convert it back. 304 * Convert it back.
304 */ 305 */
@@ -813,7 +814,12 @@ xfs_inode_item_pushbuf(
813 XFS_LOG_FORCE); 814 XFS_LOG_FORCE);
814 } 815 }
815 if (dopush) { 816 if (dopush) {
816 xfs_bawrite(mp, bp); 817 int error;
818 error = xfs_bawrite(mp, bp);
819 if (error)
820 xfs_fs_cmn_err(CE_WARN, mp,
821 "xfs_inode_item_pushbuf: pushbuf error %d on iip %p, bp %p",
822 error, iip, bp);
817 } else { 823 } else {
818 xfs_buf_relse(bp); 824 xfs_buf_relse(bp);
819 } 825 }
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index bfe92ea17952..40513077ab36 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -168,6 +168,14 @@ static inline int xfs_ilog_fext(int w)
168 return (w == XFS_DATA_FORK ? XFS_ILOG_DEXT : XFS_ILOG_AEXT); 168 return (w == XFS_DATA_FORK ? XFS_ILOG_DEXT : XFS_ILOG_AEXT);
169} 169}
170 170
171static inline int xfs_inode_clean(xfs_inode_t *ip)
172{
173 return (!ip->i_itemp ||
174 !(ip->i_itemp->ili_format.ilf_fields & XFS_ILOG_ALL)) &&
175 !ip->i_update_core;
176}
177
178
171#ifdef __KERNEL__ 179#ifdef __KERNEL__
172 180
173extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *); 181extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *);
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index fde37f87d52f..fb3cf1191419 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -802,8 +802,11 @@ xfs_iomap_write_allocate(
802 */ 802 */
803 nimaps = 1; 803 nimaps = 1;
804 end_fsb = XFS_B_TO_FSB(mp, ip->i_size); 804 end_fsb = XFS_B_TO_FSB(mp, ip->i_size);
805 xfs_bmap_last_offset(NULL, ip, &last_block, 805 error = xfs_bmap_last_offset(NULL, ip, &last_block,
806 XFS_DATA_FORK); 806 XFS_DATA_FORK);
807 if (error)
808 goto trans_cancel;
809
807 last_block = XFS_FILEOFF_MAX(last_block, end_fsb); 810 last_block = XFS_FILEOFF_MAX(last_block, end_fsb);
808 if ((map_start_fsb + count_fsb) > last_block) { 811 if ((map_start_fsb + count_fsb) > last_block) {
809 count_fsb = last_block - map_start_fsb; 812 count_fsb = last_block - map_start_fsb;
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 658aab6b1bbf..eb85bdedad0c 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -45,7 +45,7 @@ xfs_internal_inum(
45 xfs_ino_t ino) 45 xfs_ino_t ino)
46{ 46{
47 return (ino == mp->m_sb.sb_rbmino || ino == mp->m_sb.sb_rsumino || 47 return (ino == mp->m_sb.sb_rbmino || ino == mp->m_sb.sb_rsumino ||
48 (XFS_SB_VERSION_HASQUOTA(&mp->m_sb) && 48 (xfs_sb_version_hasquota(&mp->m_sb) &&
49 (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino))); 49 (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino)));
50} 50}
51 51
@@ -129,7 +129,7 @@ xfs_bulkstat_one_iget(
129 return error; 129 return error;
130} 130}
131 131
132STATIC int 132STATIC void
133xfs_bulkstat_one_dinode( 133xfs_bulkstat_one_dinode(
134 xfs_mount_t *mp, /* mount point for filesystem */ 134 xfs_mount_t *mp, /* mount point for filesystem */
135 xfs_ino_t ino, /* inode number to get data for */ 135 xfs_ino_t ino, /* inode number to get data for */
@@ -198,8 +198,6 @@ xfs_bulkstat_one_dinode(
198 buf->bs_blocks = be64_to_cpu(dic->di_nblocks); 198 buf->bs_blocks = be64_to_cpu(dic->di_nblocks);
199 break; 199 break;
200 } 200 }
201
202 return 0;
203} 201}
204 202
205STATIC int 203STATIC int
@@ -614,7 +612,8 @@ xfs_bulkstat(
614 xfs_buf_relse(bp); 612 xfs_buf_relse(bp);
615 error = xfs_itobp(mp, NULL, ip, 613 error = xfs_itobp(mp, NULL, ip,
616 &dip, &bp, bno, 614 &dip, &bp, bno,
617 XFS_IMAP_BULKSTAT); 615 XFS_IMAP_BULKSTAT,
616 XFS_BUF_LOCK);
618 if (!error) 617 if (!error)
619 clustidx = ip->i_boffset / mp->m_sb.sb_inodesize; 618 clustidx = ip->i_boffset / mp->m_sb.sb_inodesize;
620 kmem_zone_free(xfs_inode_zone, ip); 619 kmem_zone_free(xfs_inode_zone, ip);
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index a75edca1860f..afaee301b0ee 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -41,6 +41,7 @@
41#include "xfs_inode.h" 41#include "xfs_inode.h"
42#include "xfs_rw.h" 42#include "xfs_rw.h"
43 43
44kmem_zone_t *xfs_log_ticket_zone;
44 45
45#define xlog_write_adv_cnt(ptr, len, off, bytes) \ 46#define xlog_write_adv_cnt(ptr, len, off, bytes) \
46 { (ptr) += (bytes); \ 47 { (ptr) += (bytes); \
@@ -73,8 +74,6 @@ STATIC int xlog_state_get_iclog_space(xlog_t *log,
73 xlog_ticket_t *ticket, 74 xlog_ticket_t *ticket,
74 int *continued_write, 75 int *continued_write,
75 int *logoffsetp); 76 int *logoffsetp);
76STATIC void xlog_state_put_ticket(xlog_t *log,
77 xlog_ticket_t *tic);
78STATIC int xlog_state_release_iclog(xlog_t *log, 77STATIC int xlog_state_release_iclog(xlog_t *log,
79 xlog_in_core_t *iclog); 78 xlog_in_core_t *iclog);
80STATIC void xlog_state_switch_iclogs(xlog_t *log, 79STATIC void xlog_state_switch_iclogs(xlog_t *log,
@@ -101,7 +100,6 @@ STATIC void xlog_ungrant_log_space(xlog_t *log,
101 100
102 101
103/* local ticket functions */ 102/* local ticket functions */
104STATIC void xlog_state_ticket_alloc(xlog_t *log);
105STATIC xlog_ticket_t *xlog_ticket_get(xlog_t *log, 103STATIC xlog_ticket_t *xlog_ticket_get(xlog_t *log,
106 int unit_bytes, 104 int unit_bytes,
107 int count, 105 int count,
@@ -330,7 +328,7 @@ xfs_log_done(xfs_mount_t *mp,
330 */ 328 */
331 xlog_trace_loggrant(log, ticket, "xfs_log_done: (non-permanent)"); 329 xlog_trace_loggrant(log, ticket, "xfs_log_done: (non-permanent)");
332 xlog_ungrant_log_space(log, ticket); 330 xlog_ungrant_log_space(log, ticket);
333 xlog_state_put_ticket(log, ticket); 331 xlog_ticket_put(log, ticket);
334 } else { 332 } else {
335 xlog_trace_loggrant(log, ticket, "xfs_log_done: (permanent)"); 333 xlog_trace_loggrant(log, ticket, "xfs_log_done: (permanent)");
336 xlog_regrant_reserve_log_space(log, ticket); 334 xlog_regrant_reserve_log_space(log, ticket);
@@ -384,7 +382,27 @@ _xfs_log_force(
384 return xlog_state_sync_all(log, flags, log_flushed); 382 return xlog_state_sync_all(log, flags, log_flushed);
385 else 383 else
386 return xlog_state_sync(log, lsn, flags, log_flushed); 384 return xlog_state_sync(log, lsn, flags, log_flushed);
387} /* xfs_log_force */ 385} /* _xfs_log_force */
386
387/*
388 * Wrapper for _xfs_log_force(), to be used when caller doesn't care
389 * about errors or whether the log was flushed or not. This is the normal
390 * interface to use when trying to unpin items or move the log forward.
391 */
392void
393xfs_log_force(
394 xfs_mount_t *mp,
395 xfs_lsn_t lsn,
396 uint flags)
397{
398 int error;
399 error = _xfs_log_force(mp, lsn, flags, NULL);
400 if (error) {
401 xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: "
402 "error %d returned.", error);
403 }
404}
405
388 406
389/* 407/*
390 * Attaches a new iclog I/O completion callback routine during 408 * Attaches a new iclog I/O completion callback routine during
@@ -397,12 +415,10 @@ xfs_log_notify(xfs_mount_t *mp, /* mount of partition */
397 void *iclog_hndl, /* iclog to hang callback off */ 415 void *iclog_hndl, /* iclog to hang callback off */
398 xfs_log_callback_t *cb) 416 xfs_log_callback_t *cb)
399{ 417{
400 xlog_t *log = mp->m_log;
401 xlog_in_core_t *iclog = (xlog_in_core_t *)iclog_hndl; 418 xlog_in_core_t *iclog = (xlog_in_core_t *)iclog_hndl;
402 int abortflg; 419 int abortflg;
403 420
404 cb->cb_next = NULL; 421 spin_lock(&iclog->ic_callback_lock);
405 spin_lock(&log->l_icloglock);
406 abortflg = (iclog->ic_state & XLOG_STATE_IOERROR); 422 abortflg = (iclog->ic_state & XLOG_STATE_IOERROR);
407 if (!abortflg) { 423 if (!abortflg) {
408 ASSERT_ALWAYS((iclog->ic_state == XLOG_STATE_ACTIVE) || 424 ASSERT_ALWAYS((iclog->ic_state == XLOG_STATE_ACTIVE) ||
@@ -411,7 +427,7 @@ xfs_log_notify(xfs_mount_t *mp, /* mount of partition */
411 *(iclog->ic_callback_tail) = cb; 427 *(iclog->ic_callback_tail) = cb;
412 iclog->ic_callback_tail = &(cb->cb_next); 428 iclog->ic_callback_tail = &(cb->cb_next);
413 } 429 }
414 spin_unlock(&log->l_icloglock); 430 spin_unlock(&iclog->ic_callback_lock);
415 return abortflg; 431 return abortflg;
416} /* xfs_log_notify */ 432} /* xfs_log_notify */
417 433
@@ -471,6 +487,8 @@ xfs_log_reserve(xfs_mount_t *mp,
471 /* may sleep if need to allocate more tickets */ 487 /* may sleep if need to allocate more tickets */
472 internal_ticket = xlog_ticket_get(log, unit_bytes, cnt, 488 internal_ticket = xlog_ticket_get(log, unit_bytes, cnt,
473 client, flags); 489 client, flags);
490 if (!internal_ticket)
491 return XFS_ERROR(ENOMEM);
474 internal_ticket->t_trans_type = t_type; 492 internal_ticket->t_trans_type = t_type;
475 *ticket = internal_ticket; 493 *ticket = internal_ticket;
476 xlog_trace_loggrant(log, internal_ticket, 494 xlog_trace_loggrant(log, internal_ticket,
@@ -636,7 +654,8 @@ xfs_log_unmount_write(xfs_mount_t *mp)
636 if (mp->m_flags & XFS_MOUNT_RDONLY) 654 if (mp->m_flags & XFS_MOUNT_RDONLY)
637 return 0; 655 return 0;
638 656
639 xfs_log_force(mp, 0, XFS_LOG_FORCE|XFS_LOG_SYNC); 657 error = _xfs_log_force(mp, 0, XFS_LOG_FORCE|XFS_LOG_SYNC, NULL);
658 ASSERT(error || !(XLOG_FORCED_SHUTDOWN(log)));
640 659
641#ifdef DEBUG 660#ifdef DEBUG
642 first_iclog = iclog = log->l_iclog; 661 first_iclog = iclog = log->l_iclog;
@@ -675,10 +694,10 @@ xfs_log_unmount_write(xfs_mount_t *mp)
675 694
676 spin_lock(&log->l_icloglock); 695 spin_lock(&log->l_icloglock);
677 iclog = log->l_iclog; 696 iclog = log->l_iclog;
678 iclog->ic_refcnt++; 697 atomic_inc(&iclog->ic_refcnt);
679 spin_unlock(&log->l_icloglock); 698 spin_unlock(&log->l_icloglock);
680 xlog_state_want_sync(log, iclog); 699 xlog_state_want_sync(log, iclog);
681 (void) xlog_state_release_iclog(log, iclog); 700 error = xlog_state_release_iclog(log, iclog);
682 701
683 spin_lock(&log->l_icloglock); 702 spin_lock(&log->l_icloglock);
684 if (!(iclog->ic_state == XLOG_STATE_ACTIVE || 703 if (!(iclog->ic_state == XLOG_STATE_ACTIVE ||
@@ -695,7 +714,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
695 if (tic) { 714 if (tic) {
696 xlog_trace_loggrant(log, tic, "unmount rec"); 715 xlog_trace_loggrant(log, tic, "unmount rec");
697 xlog_ungrant_log_space(log, tic); 716 xlog_ungrant_log_space(log, tic);
698 xlog_state_put_ticket(log, tic); 717 xlog_ticket_put(log, tic);
699 } 718 }
700 } else { 719 } else {
701 /* 720 /*
@@ -713,11 +732,11 @@ xfs_log_unmount_write(xfs_mount_t *mp)
713 */ 732 */
714 spin_lock(&log->l_icloglock); 733 spin_lock(&log->l_icloglock);
715 iclog = log->l_iclog; 734 iclog = log->l_iclog;
716 iclog->ic_refcnt++; 735 atomic_inc(&iclog->ic_refcnt);
717 spin_unlock(&log->l_icloglock); 736 spin_unlock(&log->l_icloglock);
718 737
719 xlog_state_want_sync(log, iclog); 738 xlog_state_want_sync(log, iclog);
720 (void) xlog_state_release_iclog(log, iclog); 739 error = xlog_state_release_iclog(log, iclog);
721 740
722 spin_lock(&log->l_icloglock); 741 spin_lock(&log->l_icloglock);
723 742
@@ -732,7 +751,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
732 } 751 }
733 } 752 }
734 753
735 return 0; 754 return error;
736} /* xfs_log_unmount_write */ 755} /* xfs_log_unmount_write */
737 756
738/* 757/*
@@ -1090,7 +1109,7 @@ xlog_get_iclog_buffer_size(xfs_mount_t *mp,
1090 size >>= 1; 1109 size >>= 1;
1091 } 1110 }
1092 1111
1093 if (XFS_SB_VERSION_HASLOGV2(&mp->m_sb)) { 1112 if (xfs_sb_version_haslogv2(&mp->m_sb)) {
1094 /* # headers = size / 32K 1113 /* # headers = size / 32K
1095 * one header holds cycles from 32K of data 1114 * one header holds cycles from 32K of data
1096 */ 1115 */
@@ -1186,13 +1205,13 @@ xlog_alloc_log(xfs_mount_t *mp,
1186 log->l_grant_reserve_cycle = 1; 1205 log->l_grant_reserve_cycle = 1;
1187 log->l_grant_write_cycle = 1; 1206 log->l_grant_write_cycle = 1;
1188 1207
1189 if (XFS_SB_VERSION_HASSECTOR(&mp->m_sb)) { 1208 if (xfs_sb_version_hassector(&mp->m_sb)) {
1190 log->l_sectbb_log = mp->m_sb.sb_logsectlog - BBSHIFT; 1209 log->l_sectbb_log = mp->m_sb.sb_logsectlog - BBSHIFT;
1191 ASSERT(log->l_sectbb_log <= mp->m_sectbb_log); 1210 ASSERT(log->l_sectbb_log <= mp->m_sectbb_log);
1192 /* for larger sector sizes, must have v2 or external log */ 1211 /* for larger sector sizes, must have v2 or external log */
1193 ASSERT(log->l_sectbb_log == 0 || 1212 ASSERT(log->l_sectbb_log == 0 ||
1194 log->l_logBBstart == 0 || 1213 log->l_logBBstart == 0 ||
1195 XFS_SB_VERSION_HASLOGV2(&mp->m_sb)); 1214 xfs_sb_version_haslogv2(&mp->m_sb));
1196 ASSERT(mp->m_sb.sb_logsectlog >= BBSHIFT); 1215 ASSERT(mp->m_sb.sb_logsectlog >= BBSHIFT);
1197 } 1216 }
1198 log->l_sectbb_mask = (1 << log->l_sectbb_log) - 1; 1217 log->l_sectbb_mask = (1 << log->l_sectbb_log) - 1;
@@ -1210,7 +1229,6 @@ xlog_alloc_log(xfs_mount_t *mp,
1210 spin_lock_init(&log->l_icloglock); 1229 spin_lock_init(&log->l_icloglock);
1211 spin_lock_init(&log->l_grant_lock); 1230 spin_lock_init(&log->l_grant_lock);
1212 initnsema(&log->l_flushsema, 0, "ic-flush"); 1231 initnsema(&log->l_flushsema, 0, "ic-flush");
1213 xlog_state_ticket_alloc(log); /* wait until after icloglock inited */
1214 1232
1215 /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ 1233 /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */
1216 ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0); 1234 ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0);
@@ -1240,23 +1258,24 @@ xlog_alloc_log(xfs_mount_t *mp,
1240 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); 1258 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
1241 iclog->ic_bp = bp; 1259 iclog->ic_bp = bp;
1242 iclog->hic_data = bp->b_addr; 1260 iclog->hic_data = bp->b_addr;
1243 1261#ifdef DEBUG
1244 log->l_iclog_bak[i] = (xfs_caddr_t)&(iclog->ic_header); 1262 log->l_iclog_bak[i] = (xfs_caddr_t)&(iclog->ic_header);
1245 1263#endif
1246 head = &iclog->ic_header; 1264 head = &iclog->ic_header;
1247 memset(head, 0, sizeof(xlog_rec_header_t)); 1265 memset(head, 0, sizeof(xlog_rec_header_t));
1248 head->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM); 1266 head->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM);
1249 head->h_version = cpu_to_be32( 1267 head->h_version = cpu_to_be32(
1250 XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb) ? 2 : 1); 1268 xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? 2 : 1);
1251 head->h_size = cpu_to_be32(log->l_iclog_size); 1269 head->h_size = cpu_to_be32(log->l_iclog_size);
1252 /* new fields */ 1270 /* new fields */
1253 head->h_fmt = cpu_to_be32(XLOG_FMT); 1271 head->h_fmt = cpu_to_be32(XLOG_FMT);
1254 memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t)); 1272 memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t));
1255 1273
1256
1257 iclog->ic_size = XFS_BUF_SIZE(bp) - log->l_iclog_hsize; 1274 iclog->ic_size = XFS_BUF_SIZE(bp) - log->l_iclog_hsize;
1258 iclog->ic_state = XLOG_STATE_ACTIVE; 1275 iclog->ic_state = XLOG_STATE_ACTIVE;
1259 iclog->ic_log = log; 1276 iclog->ic_log = log;
1277 atomic_set(&iclog->ic_refcnt, 0);
1278 spin_lock_init(&iclog->ic_callback_lock);
1260 iclog->ic_callback_tail = &(iclog->ic_callback); 1279 iclog->ic_callback_tail = &(iclog->ic_callback);
1261 iclog->ic_datap = (char *)iclog->hic_data + log->l_iclog_hsize; 1280 iclog->ic_datap = (char *)iclog->hic_data + log->l_iclog_hsize;
1262 1281
@@ -1402,10 +1421,10 @@ xlog_sync(xlog_t *log,
1402 int roundoff; /* roundoff to BB or stripe */ 1421 int roundoff; /* roundoff to BB or stripe */
1403 int split = 0; /* split write into two regions */ 1422 int split = 0; /* split write into two regions */
1404 int error; 1423 int error;
1405 int v2 = XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb); 1424 int v2 = xfs_sb_version_haslogv2(&log->l_mp->m_sb);
1406 1425
1407 XFS_STATS_INC(xs_log_writes); 1426 XFS_STATS_INC(xs_log_writes);
1408 ASSERT(iclog->ic_refcnt == 0); 1427 ASSERT(atomic_read(&iclog->ic_refcnt) == 0);
1409 1428
1410 /* Add for LR header */ 1429 /* Add for LR header */
1411 count_init = log->l_iclog_hsize + iclog->ic_offset; 1430 count_init = log->l_iclog_hsize + iclog->ic_offset;
@@ -1538,7 +1557,6 @@ STATIC void
1538xlog_dealloc_log(xlog_t *log) 1557xlog_dealloc_log(xlog_t *log)
1539{ 1558{
1540 xlog_in_core_t *iclog, *next_iclog; 1559 xlog_in_core_t *iclog, *next_iclog;
1541 xlog_ticket_t *tic, *next_tic;
1542 int i; 1560 int i;
1543 1561
1544 iclog = log->l_iclog; 1562 iclog = log->l_iclog;
@@ -1559,22 +1577,6 @@ xlog_dealloc_log(xlog_t *log)
1559 spinlock_destroy(&log->l_icloglock); 1577 spinlock_destroy(&log->l_icloglock);
1560 spinlock_destroy(&log->l_grant_lock); 1578 spinlock_destroy(&log->l_grant_lock);
1561 1579
1562 /* XXXsup take a look at this again. */
1563 if ((log->l_ticket_cnt != log->l_ticket_tcnt) &&
1564 !XLOG_FORCED_SHUTDOWN(log)) {
1565 xfs_fs_cmn_err(CE_WARN, log->l_mp,
1566 "xlog_dealloc_log: (cnt: %d, total: %d)",
1567 log->l_ticket_cnt, log->l_ticket_tcnt);
1568 /* ASSERT(log->l_ticket_cnt == log->l_ticket_tcnt); */
1569
1570 } else {
1571 tic = log->l_unmount_free;
1572 while (tic) {
1573 next_tic = tic->t_next;
1574 kmem_free(tic, PAGE_SIZE);
1575 tic = next_tic;
1576 }
1577 }
1578 xfs_buf_free(log->l_xbuf); 1580 xfs_buf_free(log->l_xbuf);
1579#ifdef XFS_LOG_TRACE 1581#ifdef XFS_LOG_TRACE
1580 if (log->l_trace != NULL) { 1582 if (log->l_trace != NULL) {
@@ -1987,7 +1989,7 @@ xlog_state_clean_log(xlog_t *log)
1987 if (iclog->ic_state == XLOG_STATE_DIRTY) { 1989 if (iclog->ic_state == XLOG_STATE_DIRTY) {
1988 iclog->ic_state = XLOG_STATE_ACTIVE; 1990 iclog->ic_state = XLOG_STATE_ACTIVE;
1989 iclog->ic_offset = 0; 1991 iclog->ic_offset = 0;
1990 iclog->ic_callback = NULL; /* don't need to free */ 1992 ASSERT(iclog->ic_callback == NULL);
1991 /* 1993 /*
1992 * If the number of ops in this iclog indicate it just 1994 * If the number of ops in this iclog indicate it just
1993 * contains the dummy transaction, we can 1995 * contains the dummy transaction, we can
@@ -2190,37 +2192,40 @@ xlog_state_do_callback(
2190 be64_to_cpu(iclog->ic_header.h_lsn); 2192 be64_to_cpu(iclog->ic_header.h_lsn);
2191 spin_unlock(&log->l_grant_lock); 2193 spin_unlock(&log->l_grant_lock);
2192 2194
2193 /*
2194 * Keep processing entries in the callback list
2195 * until we come around and it is empty. We
2196 * need to atomically see that the list is
2197 * empty and change the state to DIRTY so that
2198 * we don't miss any more callbacks being added.
2199 */
2200 spin_lock(&log->l_icloglock);
2201 } else { 2195 } else {
2196 spin_unlock(&log->l_icloglock);
2202 ioerrors++; 2197 ioerrors++;
2203 } 2198 }
2204 cb = iclog->ic_callback;
2205 2199
2200 /*
2201 * Keep processing entries in the callback list until
2202 * we come around and it is empty. We need to
2203 * atomically see that the list is empty and change the
2204 * state to DIRTY so that we don't miss any more
2205 * callbacks being added.
2206 */
2207 spin_lock(&iclog->ic_callback_lock);
2208 cb = iclog->ic_callback;
2206 while (cb) { 2209 while (cb) {
2207 iclog->ic_callback_tail = &(iclog->ic_callback); 2210 iclog->ic_callback_tail = &(iclog->ic_callback);
2208 iclog->ic_callback = NULL; 2211 iclog->ic_callback = NULL;
2209 spin_unlock(&log->l_icloglock); 2212 spin_unlock(&iclog->ic_callback_lock);
2210 2213
2211 /* perform callbacks in the order given */ 2214 /* perform callbacks in the order given */
2212 for (; cb; cb = cb_next) { 2215 for (; cb; cb = cb_next) {
2213 cb_next = cb->cb_next; 2216 cb_next = cb->cb_next;
2214 cb->cb_func(cb->cb_arg, aborted); 2217 cb->cb_func(cb->cb_arg, aborted);
2215 } 2218 }
2216 spin_lock(&log->l_icloglock); 2219 spin_lock(&iclog->ic_callback_lock);
2217 cb = iclog->ic_callback; 2220 cb = iclog->ic_callback;
2218 } 2221 }
2219 2222
2220 loopdidcallbacks++; 2223 loopdidcallbacks++;
2221 funcdidcallbacks++; 2224 funcdidcallbacks++;
2222 2225
2226 spin_lock(&log->l_icloglock);
2223 ASSERT(iclog->ic_callback == NULL); 2227 ASSERT(iclog->ic_callback == NULL);
2228 spin_unlock(&iclog->ic_callback_lock);
2224 if (!(iclog->ic_state & XLOG_STATE_IOERROR)) 2229 if (!(iclog->ic_state & XLOG_STATE_IOERROR))
2225 iclog->ic_state = XLOG_STATE_DIRTY; 2230 iclog->ic_state = XLOG_STATE_DIRTY;
2226 2231
@@ -2241,7 +2246,7 @@ xlog_state_do_callback(
2241 repeats = 0; 2246 repeats = 0;
2242 xfs_fs_cmn_err(CE_WARN, log->l_mp, 2247 xfs_fs_cmn_err(CE_WARN, log->l_mp,
2243 "%s: possible infinite loop (%d iterations)", 2248 "%s: possible infinite loop (%d iterations)",
2244 __FUNCTION__, flushcnt); 2249 __func__, flushcnt);
2245 } 2250 }
2246 } while (!ioerrors && loopdidcallbacks); 2251 } while (!ioerrors && loopdidcallbacks);
2247 2252
@@ -2309,7 +2314,7 @@ xlog_state_done_syncing(
2309 2314
2310 ASSERT(iclog->ic_state == XLOG_STATE_SYNCING || 2315 ASSERT(iclog->ic_state == XLOG_STATE_SYNCING ||
2311 iclog->ic_state == XLOG_STATE_IOERROR); 2316 iclog->ic_state == XLOG_STATE_IOERROR);
2312 ASSERT(iclog->ic_refcnt == 0); 2317 ASSERT(atomic_read(&iclog->ic_refcnt) == 0);
2313 ASSERT(iclog->ic_bwritecnt == 1 || iclog->ic_bwritecnt == 2); 2318 ASSERT(iclog->ic_bwritecnt == 1 || iclog->ic_bwritecnt == 2);
2314 2319
2315 2320
@@ -2391,7 +2396,7 @@ restart:
2391 ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE); 2396 ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE);
2392 head = &iclog->ic_header; 2397 head = &iclog->ic_header;
2393 2398
2394 iclog->ic_refcnt++; /* prevents sync */ 2399 atomic_inc(&iclog->ic_refcnt); /* prevents sync */
2395 log_offset = iclog->ic_offset; 2400 log_offset = iclog->ic_offset;
2396 2401
2397 /* On the 1st write to an iclog, figure out lsn. This works 2402 /* On the 1st write to an iclog, figure out lsn. This works
@@ -2423,12 +2428,12 @@ restart:
2423 xlog_state_switch_iclogs(log, iclog, iclog->ic_size); 2428 xlog_state_switch_iclogs(log, iclog, iclog->ic_size);
2424 2429
2425 /* If I'm the only one writing to this iclog, sync it to disk */ 2430 /* If I'm the only one writing to this iclog, sync it to disk */
2426 if (iclog->ic_refcnt == 1) { 2431 if (atomic_read(&iclog->ic_refcnt) == 1) {
2427 spin_unlock(&log->l_icloglock); 2432 spin_unlock(&log->l_icloglock);
2428 if ((error = xlog_state_release_iclog(log, iclog))) 2433 if ((error = xlog_state_release_iclog(log, iclog)))
2429 return error; 2434 return error;
2430 } else { 2435 } else {
2431 iclog->ic_refcnt--; 2436 atomic_dec(&iclog->ic_refcnt);
2432 spin_unlock(&log->l_icloglock); 2437 spin_unlock(&log->l_icloglock);
2433 } 2438 }
2434 goto restart; 2439 goto restart;
@@ -2792,18 +2797,6 @@ xlog_ungrant_log_space(xlog_t *log,
2792 2797
2793 2798
2794/* 2799/*
2795 * Atomically put back used ticket.
2796 */
2797STATIC void
2798xlog_state_put_ticket(xlog_t *log,
2799 xlog_ticket_t *tic)
2800{
2801 spin_lock(&log->l_icloglock);
2802 xlog_ticket_put(log, tic);
2803 spin_unlock(&log->l_icloglock);
2804} /* xlog_state_put_ticket */
2805
2806/*
2807 * Flush iclog to disk if this is the last reference to the given iclog and 2800 * Flush iclog to disk if this is the last reference to the given iclog and
2808 * the WANT_SYNC bit is set. 2801 * the WANT_SYNC bit is set.
2809 * 2802 *
@@ -2813,33 +2806,35 @@ xlog_state_put_ticket(xlog_t *log,
2813 * 2806 *
2814 */ 2807 */
2815STATIC int 2808STATIC int
2816xlog_state_release_iclog(xlog_t *log, 2809xlog_state_release_iclog(
2817 xlog_in_core_t *iclog) 2810 xlog_t *log,
2811 xlog_in_core_t *iclog)
2818{ 2812{
2819 int sync = 0; /* do we sync? */ 2813 int sync = 0; /* do we sync? */
2820 2814
2821 xlog_assign_tail_lsn(log->l_mp); 2815 if (iclog->ic_state & XLOG_STATE_IOERROR)
2816 return XFS_ERROR(EIO);
2822 2817
2823 spin_lock(&log->l_icloglock); 2818 ASSERT(atomic_read(&iclog->ic_refcnt) > 0);
2819 if (!atomic_dec_and_lock(&iclog->ic_refcnt, &log->l_icloglock))
2820 return 0;
2824 2821
2825 if (iclog->ic_state & XLOG_STATE_IOERROR) { 2822 if (iclog->ic_state & XLOG_STATE_IOERROR) {
2826 spin_unlock(&log->l_icloglock); 2823 spin_unlock(&log->l_icloglock);
2827 return XFS_ERROR(EIO); 2824 return XFS_ERROR(EIO);
2828 } 2825 }
2829
2830 ASSERT(iclog->ic_refcnt > 0);
2831 ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE || 2826 ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE ||
2832 iclog->ic_state == XLOG_STATE_WANT_SYNC); 2827 iclog->ic_state == XLOG_STATE_WANT_SYNC);
2833 2828
2834 if (--iclog->ic_refcnt == 0 && 2829 if (iclog->ic_state == XLOG_STATE_WANT_SYNC) {
2835 iclog->ic_state == XLOG_STATE_WANT_SYNC) { 2830 /* update tail before writing to iclog */
2831 xlog_assign_tail_lsn(log->l_mp);
2836 sync++; 2832 sync++;
2837 iclog->ic_state = XLOG_STATE_SYNCING; 2833 iclog->ic_state = XLOG_STATE_SYNCING;
2838 iclog->ic_header.h_tail_lsn = cpu_to_be64(log->l_tail_lsn); 2834 iclog->ic_header.h_tail_lsn = cpu_to_be64(log->l_tail_lsn);
2839 xlog_verify_tail_lsn(log, iclog, log->l_tail_lsn); 2835 xlog_verify_tail_lsn(log, iclog, log->l_tail_lsn);
2840 /* cycle incremented when incrementing curr_block */ 2836 /* cycle incremented when incrementing curr_block */
2841 } 2837 }
2842
2843 spin_unlock(&log->l_icloglock); 2838 spin_unlock(&log->l_icloglock);
2844 2839
2845 /* 2840 /*
@@ -2849,11 +2844,9 @@ xlog_state_release_iclog(xlog_t *log,
2849 * this iclog has consistent data, so we ignore IOERROR 2844 * this iclog has consistent data, so we ignore IOERROR
2850 * flags after this point. 2845 * flags after this point.
2851 */ 2846 */
2852 if (sync) { 2847 if (sync)
2853 return xlog_sync(log, iclog); 2848 return xlog_sync(log, iclog);
2854 }
2855 return 0; 2849 return 0;
2856
2857} /* xlog_state_release_iclog */ 2850} /* xlog_state_release_iclog */
2858 2851
2859 2852
@@ -2881,7 +2874,7 @@ xlog_state_switch_iclogs(xlog_t *log,
2881 log->l_curr_block += BTOBB(eventual_size)+BTOBB(log->l_iclog_hsize); 2874 log->l_curr_block += BTOBB(eventual_size)+BTOBB(log->l_iclog_hsize);
2882 2875
2883 /* Round up to next log-sunit */ 2876 /* Round up to next log-sunit */
2884 if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb) && 2877 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb) &&
2885 log->l_mp->m_sb.sb_logsunit > 1) { 2878 log->l_mp->m_sb.sb_logsunit > 1) {
2886 __uint32_t sunit_bb = BTOBB(log->l_mp->m_sb.sb_logsunit); 2879 __uint32_t sunit_bb = BTOBB(log->l_mp->m_sb.sb_logsunit);
2887 log->l_curr_block = roundup(log->l_curr_block, sunit_bb); 2880 log->l_curr_block = roundup(log->l_curr_block, sunit_bb);
@@ -2953,7 +2946,8 @@ xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed)
2953 * previous iclog and go to sleep. 2946 * previous iclog and go to sleep.
2954 */ 2947 */
2955 if (iclog->ic_state == XLOG_STATE_DIRTY || 2948 if (iclog->ic_state == XLOG_STATE_DIRTY ||
2956 (iclog->ic_refcnt == 0 && iclog->ic_offset == 0)) { 2949 (atomic_read(&iclog->ic_refcnt) == 0
2950 && iclog->ic_offset == 0)) {
2957 iclog = iclog->ic_prev; 2951 iclog = iclog->ic_prev;
2958 if (iclog->ic_state == XLOG_STATE_ACTIVE || 2952 if (iclog->ic_state == XLOG_STATE_ACTIVE ||
2959 iclog->ic_state == XLOG_STATE_DIRTY) 2953 iclog->ic_state == XLOG_STATE_DIRTY)
@@ -2961,14 +2955,14 @@ xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed)
2961 else 2955 else
2962 goto maybe_sleep; 2956 goto maybe_sleep;
2963 } else { 2957 } else {
2964 if (iclog->ic_refcnt == 0) { 2958 if (atomic_read(&iclog->ic_refcnt) == 0) {
2965 /* We are the only one with access to this 2959 /* We are the only one with access to this
2966 * iclog. Flush it out now. There should 2960 * iclog. Flush it out now. There should
2967 * be a roundoff of zero to show that someone 2961 * be a roundoff of zero to show that someone
2968 * has already taken care of the roundoff from 2962 * has already taken care of the roundoff from
2969 * the previous sync. 2963 * the previous sync.
2970 */ 2964 */
2971 iclog->ic_refcnt++; 2965 atomic_inc(&iclog->ic_refcnt);
2972 lsn = be64_to_cpu(iclog->ic_header.h_lsn); 2966 lsn = be64_to_cpu(iclog->ic_header.h_lsn);
2973 xlog_state_switch_iclogs(log, iclog, 0); 2967 xlog_state_switch_iclogs(log, iclog, 0);
2974 spin_unlock(&log->l_icloglock); 2968 spin_unlock(&log->l_icloglock);
@@ -3100,7 +3094,7 @@ try_again:
3100 already_slept = 1; 3094 already_slept = 1;
3101 goto try_again; 3095 goto try_again;
3102 } else { 3096 } else {
3103 iclog->ic_refcnt++; 3097 atomic_inc(&iclog->ic_refcnt);
3104 xlog_state_switch_iclogs(log, iclog, 0); 3098 xlog_state_switch_iclogs(log, iclog, 0);
3105 spin_unlock(&log->l_icloglock); 3099 spin_unlock(&log->l_icloglock);
3106 if (xlog_state_release_iclog(log, iclog)) 3100 if (xlog_state_release_iclog(log, iclog))
@@ -3172,92 +3166,19 @@ xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog)
3172 */ 3166 */
3173 3167
3174/* 3168/*
3175 * Algorithm doesn't take into account page size. ;-( 3169 * Free a used ticket.
3176 */
3177STATIC void
3178xlog_state_ticket_alloc(xlog_t *log)
3179{
3180 xlog_ticket_t *t_list;
3181 xlog_ticket_t *next;
3182 xfs_caddr_t buf;
3183 uint i = (PAGE_SIZE / sizeof(xlog_ticket_t)) - 2;
3184
3185 /*
3186 * The kmem_zalloc may sleep, so we shouldn't be holding the
3187 * global lock. XXXmiken: may want to use zone allocator.
3188 */
3189 buf = (xfs_caddr_t) kmem_zalloc(PAGE_SIZE, KM_SLEEP);
3190
3191 spin_lock(&log->l_icloglock);
3192
3193 /* Attach 1st ticket to Q, so we can keep track of allocated memory */
3194 t_list = (xlog_ticket_t *)buf;
3195 t_list->t_next = log->l_unmount_free;
3196 log->l_unmount_free = t_list++;
3197 log->l_ticket_cnt++;
3198 log->l_ticket_tcnt++;
3199
3200 /* Next ticket becomes first ticket attached to ticket free list */
3201 if (log->l_freelist != NULL) {
3202 ASSERT(log->l_tail != NULL);
3203 log->l_tail->t_next = t_list;
3204 } else {
3205 log->l_freelist = t_list;
3206 }
3207 log->l_ticket_cnt++;
3208 log->l_ticket_tcnt++;
3209
3210 /* Cycle through rest of alloc'ed memory, building up free Q */
3211 for ( ; i > 0; i--) {
3212 next = t_list + 1;
3213 t_list->t_next = next;
3214 t_list = next;
3215 log->l_ticket_cnt++;
3216 log->l_ticket_tcnt++;
3217 }
3218 t_list->t_next = NULL;
3219 log->l_tail = t_list;
3220 spin_unlock(&log->l_icloglock);
3221} /* xlog_state_ticket_alloc */
3222
3223
3224/*
3225 * Put ticket into free list
3226 *
3227 * Assumption: log lock is held around this call.
3228 */ 3170 */
3229STATIC void 3171STATIC void
3230xlog_ticket_put(xlog_t *log, 3172xlog_ticket_put(xlog_t *log,
3231 xlog_ticket_t *ticket) 3173 xlog_ticket_t *ticket)
3232{ 3174{
3233 sv_destroy(&ticket->t_sema); 3175 sv_destroy(&ticket->t_sema);
3234 3176 kmem_zone_free(xfs_log_ticket_zone, ticket);
3235 /*
3236 * Don't think caching will make that much difference. It's
3237 * more important to make debug easier.
3238 */
3239#if 0
3240 /* real code will want to use LIFO for caching */
3241 ticket->t_next = log->l_freelist;
3242 log->l_freelist = ticket;
3243 /* no need to clear fields */
3244#else
3245 /* When we debug, it is easier if tickets are cycled */
3246 ticket->t_next = NULL;
3247 if (log->l_tail) {
3248 log->l_tail->t_next = ticket;
3249 } else {
3250 ASSERT(log->l_freelist == NULL);
3251 log->l_freelist = ticket;
3252 }
3253 log->l_tail = ticket;
3254#endif /* DEBUG */
3255 log->l_ticket_cnt++;
3256} /* xlog_ticket_put */ 3177} /* xlog_ticket_put */
3257 3178
3258 3179
3259/* 3180/*
3260 * Grab ticket off freelist or allocation some more 3181 * Allocate and initialise a new log ticket.
3261 */ 3182 */
3262STATIC xlog_ticket_t * 3183STATIC xlog_ticket_t *
3263xlog_ticket_get(xlog_t *log, 3184xlog_ticket_get(xlog_t *log,
@@ -3269,21 +3190,9 @@ xlog_ticket_get(xlog_t *log,
3269 xlog_ticket_t *tic; 3190 xlog_ticket_t *tic;
3270 uint num_headers; 3191 uint num_headers;
3271 3192
3272 alloc: 3193 tic = kmem_zone_zalloc(xfs_log_ticket_zone, KM_SLEEP|KM_MAYFAIL);
3273 if (log->l_freelist == NULL) 3194 if (!tic)
3274 xlog_state_ticket_alloc(log); /* potentially sleep */ 3195 return NULL;
3275
3276 spin_lock(&log->l_icloglock);
3277 if (log->l_freelist == NULL) {
3278 spin_unlock(&log->l_icloglock);
3279 goto alloc;
3280 }
3281 tic = log->l_freelist;
3282 log->l_freelist = tic->t_next;
3283 if (log->l_freelist == NULL)
3284 log->l_tail = NULL;
3285 log->l_ticket_cnt--;
3286 spin_unlock(&log->l_icloglock);
3287 3196
3288 /* 3197 /*
3289 * Permanent reservations have up to 'cnt'-1 active log operations 3198 * Permanent reservations have up to 'cnt'-1 active log operations
@@ -3334,7 +3243,7 @@ xlog_ticket_get(xlog_t *log,
3334 unit_bytes += sizeof(xlog_op_header_t) * num_headers; 3243 unit_bytes += sizeof(xlog_op_header_t) * num_headers;
3335 3244
3336 /* for roundoff padding for transaction data and one for commit record */ 3245 /* for roundoff padding for transaction data and one for commit record */
3337 if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb) && 3246 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb) &&
3338 log->l_mp->m_sb.sb_logsunit > 1) { 3247 log->l_mp->m_sb.sb_logsunit > 1) {
3339 /* log su roundoff */ 3248 /* log su roundoff */
3340 unit_bytes += 2*log->l_mp->m_sb.sb_logsunit; 3249 unit_bytes += 2*log->l_mp->m_sb.sb_logsunit;
@@ -3611,8 +3520,8 @@ xfs_log_force_umount(
3611 * before we mark the filesystem SHUTDOWN and wake 3520 * before we mark the filesystem SHUTDOWN and wake
3612 * everybody up to tell the bad news. 3521 * everybody up to tell the bad news.
3613 */ 3522 */
3614 spin_lock(&log->l_grant_lock);
3615 spin_lock(&log->l_icloglock); 3523 spin_lock(&log->l_icloglock);
3524 spin_lock(&log->l_grant_lock);
3616 mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN; 3525 mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN;
3617 XFS_BUF_DONE(mp->m_sb_bp); 3526 XFS_BUF_DONE(mp->m_sb_bp);
3618 /* 3527 /*
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 4cdac048df5e..d1d678ecb63e 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -142,8 +142,9 @@ int _xfs_log_force(struct xfs_mount *mp,
142 xfs_lsn_t lsn, 142 xfs_lsn_t lsn,
143 uint flags, 143 uint flags,
144 int *log_forced); 144 int *log_forced);
145#define xfs_log_force(mp, lsn, flags) \ 145void xfs_log_force(struct xfs_mount *mp,
146 _xfs_log_force(mp, lsn, flags, NULL); 146 xfs_lsn_t lsn,
147 uint flags);
147int xfs_log_mount(struct xfs_mount *mp, 148int xfs_log_mount(struct xfs_mount *mp,
148 struct xfs_buftarg *log_target, 149 struct xfs_buftarg *log_target,
149 xfs_daddr_t start_block, 150 xfs_daddr_t start_block,
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index e008233ee249..8952a392b5f3 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -49,10 +49,10 @@ struct xfs_mount;
49#define XLOG_HEADER_SIZE 512 49#define XLOG_HEADER_SIZE 512
50 50
51#define XLOG_REC_SHIFT(log) \ 51#define XLOG_REC_SHIFT(log) \
52 BTOBB(1 << (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb) ? \ 52 BTOBB(1 << (xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? \
53 XLOG_MAX_RECORD_BSHIFT : XLOG_BIG_RECORD_BSHIFT)) 53 XLOG_MAX_RECORD_BSHIFT : XLOG_BIG_RECORD_BSHIFT))
54#define XLOG_TOTAL_REC_SHIFT(log) \ 54#define XLOG_TOTAL_REC_SHIFT(log) \
55 BTOBB(XLOG_MAX_ICLOGS << (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb) ? \ 55 BTOBB(XLOG_MAX_ICLOGS << (xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? \
56 XLOG_MAX_RECORD_BSHIFT : XLOG_BIG_RECORD_BSHIFT)) 56 XLOG_MAX_RECORD_BSHIFT : XLOG_BIG_RECORD_BSHIFT))
57 57
58 58
@@ -242,7 +242,7 @@ typedef struct xlog_res {
242 242
243typedef struct xlog_ticket { 243typedef struct xlog_ticket {
244 sv_t t_sema; /* sleep on this semaphore : 20 */ 244 sv_t t_sema; /* sleep on this semaphore : 20 */
245 struct xlog_ticket *t_next; /* :4|8 */ 245 struct xlog_ticket *t_next; /* :4|8 */
246 struct xlog_ticket *t_prev; /* :4|8 */ 246 struct xlog_ticket *t_prev; /* :4|8 */
247 xlog_tid_t t_tid; /* transaction identifier : 4 */ 247 xlog_tid_t t_tid; /* transaction identifier : 4 */
248 int t_curr_res; /* current reservation in bytes : 4 */ 248 int t_curr_res; /* current reservation in bytes : 4 */
@@ -324,6 +324,19 @@ typedef struct xlog_rec_ext_header {
324 * - ic_offset is the current number of bytes written to in this iclog. 324 * - ic_offset is the current number of bytes written to in this iclog.
325 * - ic_refcnt is bumped when someone is writing to the log. 325 * - ic_refcnt is bumped when someone is writing to the log.
326 * - ic_state is the state of the iclog. 326 * - ic_state is the state of the iclog.
327 *
328 * Because of cacheline contention on large machines, we need to separate
329 * various resources onto different cachelines. To start with, make the
330 * structure cacheline aligned. The following fields can be contended on
331 * by independent processes:
332 *
333 * - ic_callback_*
334 * - ic_refcnt
335 * - fields protected by the global l_icloglock
336 *
337 * so we need to ensure that these fields are located in separate cachelines.
338 * We'll put all the read-only and l_icloglock fields in the first cacheline,
339 * and move everything else out to subsequent cachelines.
327 */ 340 */
328typedef struct xlog_iclog_fields { 341typedef struct xlog_iclog_fields {
329 sv_t ic_forcesema; 342 sv_t ic_forcesema;
@@ -332,17 +345,22 @@ typedef struct xlog_iclog_fields {
332 struct xlog_in_core *ic_prev; 345 struct xlog_in_core *ic_prev;
333 struct xfs_buf *ic_bp; 346 struct xfs_buf *ic_bp;
334 struct log *ic_log; 347 struct log *ic_log;
335 xfs_log_callback_t *ic_callback;
336 xfs_log_callback_t **ic_callback_tail;
337#ifdef XFS_LOG_TRACE
338 struct ktrace *ic_trace;
339#endif
340 int ic_size; 348 int ic_size;
341 int ic_offset; 349 int ic_offset;
342 int ic_refcnt;
343 int ic_bwritecnt; 350 int ic_bwritecnt;
344 ushort_t ic_state; 351 ushort_t ic_state;
345 char *ic_datap; /* pointer to iclog data */ 352 char *ic_datap; /* pointer to iclog data */
353#ifdef XFS_LOG_TRACE
354 struct ktrace *ic_trace;
355#endif
356
357 /* Callback structures need their own cacheline */
358 spinlock_t ic_callback_lock ____cacheline_aligned_in_smp;
359 xfs_log_callback_t *ic_callback;
360 xfs_log_callback_t **ic_callback_tail;
361
362 /* reference counts need their own cacheline */
363 atomic_t ic_refcnt ____cacheline_aligned_in_smp;
346} xlog_iclog_fields_t; 364} xlog_iclog_fields_t;
347 365
348typedef union xlog_in_core2 { 366typedef union xlog_in_core2 {
@@ -366,6 +384,7 @@ typedef struct xlog_in_core {
366#define ic_bp hic_fields.ic_bp 384#define ic_bp hic_fields.ic_bp
367#define ic_log hic_fields.ic_log 385#define ic_log hic_fields.ic_log
368#define ic_callback hic_fields.ic_callback 386#define ic_callback hic_fields.ic_callback
387#define ic_callback_lock hic_fields.ic_callback_lock
369#define ic_callback_tail hic_fields.ic_callback_tail 388#define ic_callback_tail hic_fields.ic_callback_tail
370#define ic_trace hic_fields.ic_trace 389#define ic_trace hic_fields.ic_trace
371#define ic_size hic_fields.ic_size 390#define ic_size hic_fields.ic_size
@@ -383,43 +402,46 @@ typedef struct xlog_in_core {
383 * that round off problems won't occur when releasing partial reservations. 402 * that round off problems won't occur when releasing partial reservations.
384 */ 403 */
385typedef struct log { 404typedef struct log {
405 /* The following fields don't need locking */
406 struct xfs_mount *l_mp; /* mount point */
407 struct xfs_buf *l_xbuf; /* extra buffer for log
408 * wrapping */
409 struct xfs_buftarg *l_targ; /* buftarg of log */
410 uint l_flags;
411 uint l_quotaoffs_flag; /* XFS_DQ_*, for QUOTAOFFs */
412 struct xfs_buf_cancel **l_buf_cancel_table;
413 int l_iclog_hsize; /* size of iclog header */
414 int l_iclog_heads; /* # of iclog header sectors */
415 uint l_sectbb_log; /* log2 of sector size in BBs */
416 uint l_sectbb_mask; /* sector size (in BBs)
417 * alignment mask */
418 int l_iclog_size; /* size of log in bytes */
419 int l_iclog_size_log; /* log power size of log */
420 int l_iclog_bufs; /* number of iclog buffers */
421 xfs_daddr_t l_logBBstart; /* start block of log */
422 int l_logsize; /* size of log in bytes */
423 int l_logBBsize; /* size of log in BB chunks */
424
386 /* The following block of fields are changed while holding icloglock */ 425 /* The following block of fields are changed while holding icloglock */
387 sema_t l_flushsema; /* iclog flushing semaphore */ 426 sema_t l_flushsema ____cacheline_aligned_in_smp;
427 /* iclog flushing semaphore */
388 int l_flushcnt; /* # of procs waiting on this 428 int l_flushcnt; /* # of procs waiting on this
389 * sema */ 429 * sema */
390 int l_ticket_cnt; /* free ticket count */
391 int l_ticket_tcnt; /* total ticket count */
392 int l_covered_state;/* state of "covering disk 430 int l_covered_state;/* state of "covering disk
393 * log entries" */ 431 * log entries" */
394 xlog_ticket_t *l_freelist; /* free list of tickets */
395 xlog_ticket_t *l_unmount_free;/* kmem_free these addresses */
396 xlog_ticket_t *l_tail; /* free list of tickets */
397 xlog_in_core_t *l_iclog; /* head log queue */ 432 xlog_in_core_t *l_iclog; /* head log queue */
398 spinlock_t l_icloglock; /* grab to change iclog state */ 433 spinlock_t l_icloglock; /* grab to change iclog state */
399 xfs_lsn_t l_tail_lsn; /* lsn of 1st LR with unflushed 434 xfs_lsn_t l_tail_lsn; /* lsn of 1st LR with unflushed
400 * buffers */ 435 * buffers */
401 xfs_lsn_t l_last_sync_lsn;/* lsn of last LR on disk */ 436 xfs_lsn_t l_last_sync_lsn;/* lsn of last LR on disk */
402 struct xfs_mount *l_mp; /* mount point */
403 struct xfs_buf *l_xbuf; /* extra buffer for log
404 * wrapping */
405 struct xfs_buftarg *l_targ; /* buftarg of log */
406 xfs_daddr_t l_logBBstart; /* start block of log */
407 int l_logsize; /* size of log in bytes */
408 int l_logBBsize; /* size of log in BB chunks */
409 int l_curr_cycle; /* Cycle number of log writes */ 437 int l_curr_cycle; /* Cycle number of log writes */
410 int l_prev_cycle; /* Cycle number before last 438 int l_prev_cycle; /* Cycle number before last
411 * block increment */ 439 * block increment */
412 int l_curr_block; /* current logical log block */ 440 int l_curr_block; /* current logical log block */
413 int l_prev_block; /* previous logical log block */ 441 int l_prev_block; /* previous logical log block */
414 int l_iclog_size; /* size of log in bytes */
415 int l_iclog_size_log; /* log power size of log */
416 int l_iclog_bufs; /* number of iclog buffers */
417
418 /* The following field are used for debugging; need to hold icloglock */
419 char *l_iclog_bak[XLOG_MAX_ICLOGS];
420 442
421 /* The following block of fields are changed while holding grant_lock */ 443 /* The following block of fields are changed while holding grant_lock */
422 spinlock_t l_grant_lock; 444 spinlock_t l_grant_lock ____cacheline_aligned_in_smp;
423 xlog_ticket_t *l_reserve_headq; 445 xlog_ticket_t *l_reserve_headq;
424 xlog_ticket_t *l_write_headq; 446 xlog_ticket_t *l_write_headq;
425 int l_grant_reserve_cycle; 447 int l_grant_reserve_cycle;
@@ -427,19 +449,16 @@ typedef struct log {
427 int l_grant_write_cycle; 449 int l_grant_write_cycle;
428 int l_grant_write_bytes; 450 int l_grant_write_bytes;
429 451
430 /* The following fields don't need locking */
431#ifdef XFS_LOG_TRACE 452#ifdef XFS_LOG_TRACE
432 struct ktrace *l_trace; 453 struct ktrace *l_trace;
433 struct ktrace *l_grant_trace; 454 struct ktrace *l_grant_trace;
434#endif 455#endif
435 uint l_flags; 456
436 uint l_quotaoffs_flag; /* XFS_DQ_*, for QUOTAOFFs */ 457 /* The following field are used for debugging; need to hold icloglock */
437 struct xfs_buf_cancel **l_buf_cancel_table; 458#ifdef DEBUG
438 int l_iclog_hsize; /* size of iclog header */ 459 char *l_iclog_bak[XLOG_MAX_ICLOGS];
439 int l_iclog_heads; /* # of iclog header sectors */ 460#endif
440 uint l_sectbb_log; /* log2 of sector size in BBs */ 461
441 uint l_sectbb_mask; /* sector size (in BBs)
442 * alignment mask */
443} xlog_t; 462} xlog_t;
444 463
445#define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR) 464#define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR)
@@ -459,6 +478,8 @@ extern struct xfs_buf *xlog_get_bp(xlog_t *, int);
459extern void xlog_put_bp(struct xfs_buf *); 478extern void xlog_put_bp(struct xfs_buf *);
460extern int xlog_bread(xlog_t *, xfs_daddr_t, int, struct xfs_buf *); 479extern int xlog_bread(xlog_t *, xfs_daddr_t, int, struct xfs_buf *);
461 480
481extern kmem_zone_t *xfs_log_ticket_zone;
482
462/* iclog tracing */ 483/* iclog tracing */
463#define XLOG_TRACE_GRAB_FLUSH 1 484#define XLOG_TRACE_GRAB_FLUSH 1
464#define XLOG_TRACE_REL_FLUSH 2 485#define XLOG_TRACE_REL_FLUSH 2
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index b82d5d4d2462..e65ab4af0955 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -46,6 +46,7 @@
46#include "xfs_trans_priv.h" 46#include "xfs_trans_priv.h"
47#include "xfs_quota.h" 47#include "xfs_quota.h"
48#include "xfs_rw.h" 48#include "xfs_rw.h"
49#include "xfs_utils.h"
49 50
50STATIC int xlog_find_zeroed(xlog_t *, xfs_daddr_t *); 51STATIC int xlog_find_zeroed(xlog_t *, xfs_daddr_t *);
51STATIC int xlog_clear_stale_blocks(xlog_t *, xfs_lsn_t); 52STATIC int xlog_clear_stale_blocks(xlog_t *, xfs_lsn_t);
@@ -120,7 +121,8 @@ xlog_bread(
120 XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp); 121 XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp);
121 122
122 xfsbdstrat(log->l_mp, bp); 123 xfsbdstrat(log->l_mp, bp);
123 if ((error = xfs_iowait(bp))) 124 error = xfs_iowait(bp);
125 if (error)
124 xfs_ioerror_alert("xlog_bread", log->l_mp, 126 xfs_ioerror_alert("xlog_bread", log->l_mp,
125 bp, XFS_BUF_ADDR(bp)); 127 bp, XFS_BUF_ADDR(bp));
126 return error; 128 return error;
@@ -191,7 +193,7 @@ xlog_header_check_dump(
191{ 193{
192 int b; 194 int b;
193 195
194 cmn_err(CE_DEBUG, "%s: SB : uuid = ", __FUNCTION__); 196 cmn_err(CE_DEBUG, "%s: SB : uuid = ", __func__);
195 for (b = 0; b < 16; b++) 197 for (b = 0; b < 16; b++)
196 cmn_err(CE_DEBUG, "%02x", ((uchar_t *)&mp->m_sb.sb_uuid)[b]); 198 cmn_err(CE_DEBUG, "%02x", ((uchar_t *)&mp->m_sb.sb_uuid)[b]);
197 cmn_err(CE_DEBUG, ", fmt = %d\n", XLOG_FMT); 199 cmn_err(CE_DEBUG, ", fmt = %d\n", XLOG_FMT);
@@ -478,7 +480,7 @@ xlog_find_verify_log_record(
478 * reset last_blk. Only when last_blk points in the middle of a log 480 * reset last_blk. Only when last_blk points in the middle of a log
479 * record do we update last_blk. 481 * record do we update last_blk.
480 */ 482 */
481 if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) { 483 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
482 uint h_size = be32_to_cpu(head->h_size); 484 uint h_size = be32_to_cpu(head->h_size);
483 485
484 xhdrs = h_size / XLOG_HEADER_CYCLE_SIZE; 486 xhdrs = h_size / XLOG_HEADER_CYCLE_SIZE;
@@ -888,7 +890,7 @@ xlog_find_tail(
888 * unmount record if there is one, so we pass the lsn of the 890 * unmount record if there is one, so we pass the lsn of the
889 * unmount record rather than the block after it. 891 * unmount record rather than the block after it.
890 */ 892 */
891 if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) { 893 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
892 int h_size = be32_to_cpu(rhead->h_size); 894 int h_size = be32_to_cpu(rhead->h_size);
893 int h_version = be32_to_cpu(rhead->h_version); 895 int h_version = be32_to_cpu(rhead->h_version);
894 896
@@ -1101,7 +1103,7 @@ xlog_add_record(
1101 recp->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM); 1103 recp->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM);
1102 recp->h_cycle = cpu_to_be32(cycle); 1104 recp->h_cycle = cpu_to_be32(cycle);
1103 recp->h_version = cpu_to_be32( 1105 recp->h_version = cpu_to_be32(
1104 XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb) ? 2 : 1); 1106 xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? 2 : 1);
1105 recp->h_lsn = cpu_to_be64(xlog_assign_lsn(cycle, block)); 1107 recp->h_lsn = cpu_to_be64(xlog_assign_lsn(cycle, block));
1106 recp->h_tail_lsn = cpu_to_be64(xlog_assign_lsn(tail_cycle, tail_block)); 1108 recp->h_tail_lsn = cpu_to_be64(xlog_assign_lsn(tail_cycle, tail_block));
1107 recp->h_fmt = cpu_to_be32(XLOG_FMT); 1109 recp->h_fmt = cpu_to_be32(XLOG_FMT);
@@ -1160,10 +1162,14 @@ xlog_write_log_records(
1160 if (j == 0 && (start_block + endcount > ealign)) { 1162 if (j == 0 && (start_block + endcount > ealign)) {
1161 offset = XFS_BUF_PTR(bp); 1163 offset = XFS_BUF_PTR(bp);
1162 balign = BBTOB(ealign - start_block); 1164 balign = BBTOB(ealign - start_block);
1163 XFS_BUF_SET_PTR(bp, offset + balign, BBTOB(sectbb)); 1165 error = XFS_BUF_SET_PTR(bp, offset + balign,
1164 if ((error = xlog_bread(log, ealign, sectbb, bp))) 1166 BBTOB(sectbb));
1167 if (!error)
1168 error = xlog_bread(log, ealign, sectbb, bp);
1169 if (!error)
1170 error = XFS_BUF_SET_PTR(bp, offset, bufblks);
1171 if (error)
1165 break; 1172 break;
1166 XFS_BUF_SET_PTR(bp, offset, bufblks);
1167 } 1173 }
1168 1174
1169 offset = xlog_align(log, start_block, endcount, bp); 1175 offset = xlog_align(log, start_block, endcount, bp);
@@ -2280,7 +2286,9 @@ xlog_recover_do_inode_trans(
2280 * invalidate the buffer when we write it out below. 2286 * invalidate the buffer when we write it out below.
2281 */ 2287 */
2282 imap.im_blkno = 0; 2288 imap.im_blkno = 0;
2283 xfs_imap(log->l_mp, NULL, ino, &imap, 0); 2289 error = xfs_imap(log->l_mp, NULL, ino, &imap, 0);
2290 if (error)
2291 goto error;
2284 } 2292 }
2285 2293
2286 /* 2294 /*
@@ -2964,7 +2972,7 @@ xlog_recover_process_data(
2964 * Process an extent free intent item that was recovered from 2972 * Process an extent free intent item that was recovered from
2965 * the log. We need to free the extents that it describes. 2973 * the log. We need to free the extents that it describes.
2966 */ 2974 */
2967STATIC void 2975STATIC int
2968xlog_recover_process_efi( 2976xlog_recover_process_efi(
2969 xfs_mount_t *mp, 2977 xfs_mount_t *mp,
2970 xfs_efi_log_item_t *efip) 2978 xfs_efi_log_item_t *efip)
@@ -2972,6 +2980,7 @@ xlog_recover_process_efi(
2972 xfs_efd_log_item_t *efdp; 2980 xfs_efd_log_item_t *efdp;
2973 xfs_trans_t *tp; 2981 xfs_trans_t *tp;
2974 int i; 2982 int i;
2983 int error = 0;
2975 xfs_extent_t *extp; 2984 xfs_extent_t *extp;
2976 xfs_fsblock_t startblock_fsb; 2985 xfs_fsblock_t startblock_fsb;
2977 2986
@@ -2995,23 +3004,32 @@ xlog_recover_process_efi(
2995 * free the memory associated with it. 3004 * free the memory associated with it.
2996 */ 3005 */
2997 xfs_efi_release(efip, efip->efi_format.efi_nextents); 3006 xfs_efi_release(efip, efip->efi_format.efi_nextents);
2998 return; 3007 return XFS_ERROR(EIO);
2999 } 3008 }
3000 } 3009 }
3001 3010
3002 tp = xfs_trans_alloc(mp, 0); 3011 tp = xfs_trans_alloc(mp, 0);
3003 xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 0, 0); 3012 error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 0, 0);
3013 if (error)
3014 goto abort_error;
3004 efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents); 3015 efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents);
3005 3016
3006 for (i = 0; i < efip->efi_format.efi_nextents; i++) { 3017 for (i = 0; i < efip->efi_format.efi_nextents; i++) {
3007 extp = &(efip->efi_format.efi_extents[i]); 3018 extp = &(efip->efi_format.efi_extents[i]);
3008 xfs_free_extent(tp, extp->ext_start, extp->ext_len); 3019 error = xfs_free_extent(tp, extp->ext_start, extp->ext_len);
3020 if (error)
3021 goto abort_error;
3009 xfs_trans_log_efd_extent(tp, efdp, extp->ext_start, 3022 xfs_trans_log_efd_extent(tp, efdp, extp->ext_start,
3010 extp->ext_len); 3023 extp->ext_len);
3011 } 3024 }
3012 3025
3013 efip->efi_flags |= XFS_EFI_RECOVERED; 3026 efip->efi_flags |= XFS_EFI_RECOVERED;
3014 xfs_trans_commit(tp, 0); 3027 error = xfs_trans_commit(tp, 0);
3028 return error;
3029
3030abort_error:
3031 xfs_trans_cancel(tp, XFS_TRANS_ABORT);
3032 return error;
3015} 3033}
3016 3034
3017/* 3035/*
@@ -3059,7 +3077,7 @@ xlog_recover_check_ail(
3059 * everything already in the AIL, we stop processing as soon as 3077 * everything already in the AIL, we stop processing as soon as
3060 * we see something other than an EFI in the AIL. 3078 * we see something other than an EFI in the AIL.
3061 */ 3079 */
3062STATIC void 3080STATIC int
3063xlog_recover_process_efis( 3081xlog_recover_process_efis(
3064 xlog_t *log) 3082 xlog_t *log)
3065{ 3083{
@@ -3067,6 +3085,7 @@ xlog_recover_process_efis(
3067 xfs_efi_log_item_t *efip; 3085 xfs_efi_log_item_t *efip;
3068 int gen; 3086 int gen;
3069 xfs_mount_t *mp; 3087 xfs_mount_t *mp;
3088 int error = 0;
3070 3089
3071 mp = log->l_mp; 3090 mp = log->l_mp;
3072 spin_lock(&mp->m_ail_lock); 3091 spin_lock(&mp->m_ail_lock);
@@ -3091,11 +3110,14 @@ xlog_recover_process_efis(
3091 } 3110 }
3092 3111
3093 spin_unlock(&mp->m_ail_lock); 3112 spin_unlock(&mp->m_ail_lock);
3094 xlog_recover_process_efi(mp, efip); 3113 error = xlog_recover_process_efi(mp, efip);
3114 if (error)
3115 return error;
3095 spin_lock(&mp->m_ail_lock); 3116 spin_lock(&mp->m_ail_lock);
3096 lip = xfs_trans_next_ail(mp, lip, &gen, NULL); 3117 lip = xfs_trans_next_ail(mp, lip, &gen, NULL);
3097 } 3118 }
3098 spin_unlock(&mp->m_ail_lock); 3119 spin_unlock(&mp->m_ail_lock);
3120 return error;
3099} 3121}
3100 3122
3101/* 3123/*
@@ -3115,21 +3137,18 @@ xlog_recover_clear_agi_bucket(
3115 int error; 3137 int error;
3116 3138
3117 tp = xfs_trans_alloc(mp, XFS_TRANS_CLEAR_AGI_BUCKET); 3139 tp = xfs_trans_alloc(mp, XFS_TRANS_CLEAR_AGI_BUCKET);
3118 xfs_trans_reserve(tp, 0, XFS_CLEAR_AGI_BUCKET_LOG_RES(mp), 0, 0, 0); 3140 error = xfs_trans_reserve(tp, 0, XFS_CLEAR_AGI_BUCKET_LOG_RES(mp), 0, 0, 0);
3119 3141 if (!error)
3120 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, 3142 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
3121 XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)), 3143 XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
3122 XFS_FSS_TO_BB(mp, 1), 0, &agibp); 3144 XFS_FSS_TO_BB(mp, 1), 0, &agibp);
3123 if (error) { 3145 if (error)
3124 xfs_trans_cancel(tp, XFS_TRANS_ABORT); 3146 goto out_abort;
3125 return;
3126 }
3127 3147
3148 error = EINVAL;
3128 agi = XFS_BUF_TO_AGI(agibp); 3149 agi = XFS_BUF_TO_AGI(agibp);
3129 if (be32_to_cpu(agi->agi_magicnum) != XFS_AGI_MAGIC) { 3150 if (be32_to_cpu(agi->agi_magicnum) != XFS_AGI_MAGIC)
3130 xfs_trans_cancel(tp, XFS_TRANS_ABORT); 3151 goto out_abort;
3131 return;
3132 }
3133 3152
3134 agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO); 3153 agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO);
3135 offset = offsetof(xfs_agi_t, agi_unlinked) + 3154 offset = offsetof(xfs_agi_t, agi_unlinked) +
@@ -3137,7 +3156,17 @@ xlog_recover_clear_agi_bucket(
3137 xfs_trans_log_buf(tp, agibp, offset, 3156 xfs_trans_log_buf(tp, agibp, offset,
3138 (offset + sizeof(xfs_agino_t) - 1)); 3157 (offset + sizeof(xfs_agino_t) - 1));
3139 3158
3140 (void) xfs_trans_commit(tp, 0); 3159 error = xfs_trans_commit(tp, 0);
3160 if (error)
3161 goto out_error;
3162 return;
3163
3164out_abort:
3165 xfs_trans_cancel(tp, XFS_TRANS_ABORT);
3166out_error:
3167 xfs_fs_cmn_err(CE_WARN, mp, "xlog_recover_clear_agi_bucket: "
3168 "failed to clear agi %d. Continuing.", agno);
3169 return;
3141} 3170}
3142 3171
3143/* 3172/*
@@ -3214,7 +3243,8 @@ xlog_recover_process_iunlinks(
3214 * next inode in the bucket. 3243 * next inode in the bucket.
3215 */ 3244 */
3216 error = xfs_itobp(mp, NULL, ip, &dip, 3245 error = xfs_itobp(mp, NULL, ip, &dip,
3217 &ibp, 0, 0); 3246 &ibp, 0, 0,
3247 XFS_BUF_LOCK);
3218 ASSERT(error || (dip != NULL)); 3248 ASSERT(error || (dip != NULL));
3219 } 3249 }
3220 3250
@@ -3247,7 +3277,7 @@ xlog_recover_process_iunlinks(
3247 if (ip->i_d.di_mode == 0) 3277 if (ip->i_d.di_mode == 0)
3248 xfs_iput_new(ip, 0); 3278 xfs_iput_new(ip, 0);
3249 else 3279 else
3250 VN_RELE(XFS_ITOV(ip)); 3280 IRELE(ip);
3251 } else { 3281 } else {
3252 /* 3282 /*
3253 * We can't read in the inode 3283 * We can't read in the inode
@@ -3348,7 +3378,7 @@ xlog_pack_data(
3348 dp += BBSIZE; 3378 dp += BBSIZE;
3349 } 3379 }
3350 3380
3351 if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) { 3381 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
3352 xhdr = (xlog_in_core_2_t *)&iclog->ic_header; 3382 xhdr = (xlog_in_core_2_t *)&iclog->ic_header;
3353 for ( ; i < BTOBB(size); i++) { 3383 for ( ; i < BTOBB(size); i++) {
3354 j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); 3384 j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
@@ -3388,7 +3418,7 @@ xlog_unpack_data_checksum(
3388 be32_to_cpu(rhead->h_chksum), chksum); 3418 be32_to_cpu(rhead->h_chksum), chksum);
3389 cmn_err(CE_DEBUG, 3419 cmn_err(CE_DEBUG,
3390"XFS: Disregard message if filesystem was created with non-DEBUG kernel"); 3420"XFS: Disregard message if filesystem was created with non-DEBUG kernel");
3391 if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) { 3421 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
3392 cmn_err(CE_DEBUG, 3422 cmn_err(CE_DEBUG,
3393 "XFS: LogR this is a LogV2 filesystem\n"); 3423 "XFS: LogR this is a LogV2 filesystem\n");
3394 } 3424 }
@@ -3415,7 +3445,7 @@ xlog_unpack_data(
3415 dp += BBSIZE; 3445 dp += BBSIZE;
3416 } 3446 }
3417 3447
3418 if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) { 3448 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
3419 xhdr = (xlog_in_core_2_t *)rhead; 3449 xhdr = (xlog_in_core_2_t *)rhead;
3420 for ( ; i < BTOBB(be32_to_cpu(rhead->h_len)); i++) { 3450 for ( ; i < BTOBB(be32_to_cpu(rhead->h_len)); i++) {
3421 j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); 3451 j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
@@ -3445,7 +3475,7 @@ xlog_valid_rec_header(
3445 (!rhead->h_version || 3475 (!rhead->h_version ||
3446 (be32_to_cpu(rhead->h_version) & (~XLOG_VERSION_OKBITS))))) { 3476 (be32_to_cpu(rhead->h_version) & (~XLOG_VERSION_OKBITS))))) {
3447 xlog_warn("XFS: %s: unrecognised log version (%d).", 3477 xlog_warn("XFS: %s: unrecognised log version (%d).",
3448 __FUNCTION__, be32_to_cpu(rhead->h_version)); 3478 __func__, be32_to_cpu(rhead->h_version));
3449 return XFS_ERROR(EIO); 3479 return XFS_ERROR(EIO);
3450 } 3480 }
3451 3481
@@ -3494,7 +3524,7 @@ xlog_do_recovery_pass(
3494 * Read the header of the tail block and get the iclog buffer size from 3524 * Read the header of the tail block and get the iclog buffer size from
3495 * h_size. Use this to tell how many sectors make up the log header. 3525 * h_size. Use this to tell how many sectors make up the log header.
3496 */ 3526 */
3497 if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) { 3527 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
3498 /* 3528 /*
3499 * When using variable length iclogs, read first sector of 3529 * When using variable length iclogs, read first sector of
3500 * iclog header and extract the header size from it. Get a 3530 * iclog header and extract the header size from it. Get a
@@ -3604,15 +3634,19 @@ xlog_do_recovery_pass(
3604 * _first_, then the log start (LR header end) 3634 * _first_, then the log start (LR header end)
3605 * - order is important. 3635 * - order is important.
3606 */ 3636 */
3637 wrapped_hblks = hblks - split_hblks;
3607 bufaddr = XFS_BUF_PTR(hbp); 3638 bufaddr = XFS_BUF_PTR(hbp);
3608 XFS_BUF_SET_PTR(hbp, 3639 error = XFS_BUF_SET_PTR(hbp,
3609 bufaddr + BBTOB(split_hblks), 3640 bufaddr + BBTOB(split_hblks),
3610 BBTOB(hblks - split_hblks)); 3641 BBTOB(hblks - split_hblks));
3611 wrapped_hblks = hblks - split_hblks; 3642 if (!error)
3612 error = xlog_bread(log, 0, wrapped_hblks, hbp); 3643 error = xlog_bread(log, 0,
3644 wrapped_hblks, hbp);
3645 if (!error)
3646 error = XFS_BUF_SET_PTR(hbp, bufaddr,
3647 BBTOB(hblks));
3613 if (error) 3648 if (error)
3614 goto bread_err2; 3649 goto bread_err2;
3615 XFS_BUF_SET_PTR(hbp, bufaddr, BBTOB(hblks));
3616 if (!offset) 3650 if (!offset)
3617 offset = xlog_align(log, 0, 3651 offset = xlog_align(log, 0,
3618 wrapped_hblks, hbp); 3652 wrapped_hblks, hbp);
@@ -3664,13 +3698,18 @@ xlog_do_recovery_pass(
3664 * - order is important. 3698 * - order is important.
3665 */ 3699 */
3666 bufaddr = XFS_BUF_PTR(dbp); 3700 bufaddr = XFS_BUF_PTR(dbp);
3667 XFS_BUF_SET_PTR(dbp, 3701 error = XFS_BUF_SET_PTR(dbp,
3668 bufaddr + BBTOB(split_bblks), 3702 bufaddr + BBTOB(split_bblks),
3669 BBTOB(bblks - split_bblks)); 3703 BBTOB(bblks - split_bblks));
3670 if ((error = xlog_bread(log, wrapped_hblks, 3704 if (!error)
3671 bblks - split_bblks, dbp))) 3705 error = xlog_bread(log, wrapped_hblks,
3706 bblks - split_bblks,
3707 dbp);
3708 if (!error)
3709 error = XFS_BUF_SET_PTR(dbp, bufaddr,
3710 h_size);
3711 if (error)
3672 goto bread_err2; 3712 goto bread_err2;
3673 XFS_BUF_SET_PTR(dbp, bufaddr, h_size);
3674 if (!offset) 3713 if (!offset)
3675 offset = xlog_align(log, wrapped_hblks, 3714 offset = xlog_align(log, wrapped_hblks,
3676 bblks - split_bblks, dbp); 3715 bblks - split_bblks, dbp);
@@ -3826,7 +3865,8 @@ xlog_do_recover(
3826 XFS_BUF_READ(bp); 3865 XFS_BUF_READ(bp);
3827 XFS_BUF_UNASYNC(bp); 3866 XFS_BUF_UNASYNC(bp);
3828 xfsbdstrat(log->l_mp, bp); 3867 xfsbdstrat(log->l_mp, bp);
3829 if ((error = xfs_iowait(bp))) { 3868 error = xfs_iowait(bp);
3869 if (error) {
3830 xfs_ioerror_alert("xlog_do_recover", 3870 xfs_ioerror_alert("xlog_do_recover",
3831 log->l_mp, bp, XFS_BUF_ADDR(bp)); 3871 log->l_mp, bp, XFS_BUF_ADDR(bp));
3832 ASSERT(0); 3872 ASSERT(0);
@@ -3838,7 +3878,7 @@ xlog_do_recover(
3838 sbp = &log->l_mp->m_sb; 3878 sbp = &log->l_mp->m_sb;
3839 xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp)); 3879 xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp));
3840 ASSERT(sbp->sb_magicnum == XFS_SB_MAGIC); 3880 ASSERT(sbp->sb_magicnum == XFS_SB_MAGIC);
3841 ASSERT(XFS_SB_GOOD_VERSION(sbp)); 3881 ASSERT(xfs_sb_good_version(sbp));
3842 xfs_buf_relse(bp); 3882 xfs_buf_relse(bp);
3843 3883
3844 /* We've re-read the superblock so re-initialize per-cpu counters */ 3884 /* We've re-read the superblock so re-initialize per-cpu counters */
@@ -3917,7 +3957,14 @@ xlog_recover_finish(
3917 * rather than accepting new requests. 3957 * rather than accepting new requests.
3918 */ 3958 */
3919 if (log->l_flags & XLOG_RECOVERY_NEEDED) { 3959 if (log->l_flags & XLOG_RECOVERY_NEEDED) {
3920 xlog_recover_process_efis(log); 3960 int error;
3961 error = xlog_recover_process_efis(log);
3962 if (error) {
3963 cmn_err(CE_ALERT,
3964 "Failed to recover EFIs on filesystem: %s",
3965 log->l_mp->m_fsname);
3966 return error;
3967 }
3921 /* 3968 /*
3922 * Sync the log to get all the EFIs out of the AIL. 3969 * Sync the log to get all the EFIs out of the AIL.
3923 * This isn't absolutely necessary, but it helps in 3970 * This isn't absolutely necessary, but it helps in
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 6409b3762995..2fec452afbcc 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -43,8 +43,9 @@
43#include "xfs_rw.h" 43#include "xfs_rw.h"
44#include "xfs_quota.h" 44#include "xfs_quota.h"
45#include "xfs_fsops.h" 45#include "xfs_fsops.h"
46#include "xfs_utils.h"
46 47
47STATIC void xfs_mount_log_sbunit(xfs_mount_t *, __int64_t); 48STATIC int xfs_mount_log_sb(xfs_mount_t *, __int64_t);
48STATIC int xfs_uuid_mount(xfs_mount_t *); 49STATIC int xfs_uuid_mount(xfs_mount_t *);
49STATIC void xfs_uuid_unmount(xfs_mount_t *mp); 50STATIC void xfs_uuid_unmount(xfs_mount_t *mp);
50STATIC void xfs_unmountfs_wait(xfs_mount_t *); 51STATIC void xfs_unmountfs_wait(xfs_mount_t *);
@@ -57,7 +58,7 @@ STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t,
57STATIC void xfs_icsb_sync_counters(xfs_mount_t *); 58STATIC void xfs_icsb_sync_counters(xfs_mount_t *);
58STATIC int xfs_icsb_modify_counters(xfs_mount_t *, xfs_sb_field_t, 59STATIC int xfs_icsb_modify_counters(xfs_mount_t *, xfs_sb_field_t,
59 int64_t, int); 60 int64_t, int);
60STATIC int xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t); 61STATIC void xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t);
61 62
62#else 63#else
63 64
@@ -119,6 +120,7 @@ static const struct {
119 { offsetof(xfs_sb_t, sb_logsectsize),0 }, 120 { offsetof(xfs_sb_t, sb_logsectsize),0 },
120 { offsetof(xfs_sb_t, sb_logsunit), 0 }, 121 { offsetof(xfs_sb_t, sb_logsunit), 0 },
121 { offsetof(xfs_sb_t, sb_features2), 0 }, 122 { offsetof(xfs_sb_t, sb_features2), 0 },
123 { offsetof(xfs_sb_t, sb_bad_features2), 0 },
122 { sizeof(xfs_sb_t), 0 } 124 { sizeof(xfs_sb_t), 0 }
123}; 125};
124 126
@@ -225,7 +227,7 @@ xfs_mount_validate_sb(
225 return XFS_ERROR(EWRONGFS); 227 return XFS_ERROR(EWRONGFS);
226 } 228 }
227 229
228 if (!XFS_SB_GOOD_VERSION(sbp)) { 230 if (!xfs_sb_good_version(sbp)) {
229 xfs_fs_mount_cmn_err(flags, "bad version"); 231 xfs_fs_mount_cmn_err(flags, "bad version");
230 return XFS_ERROR(EWRONGFS); 232 return XFS_ERROR(EWRONGFS);
231 } 233 }
@@ -300,7 +302,7 @@ xfs_mount_validate_sb(
300 /* 302 /*
301 * Version 1 directory format has never worked on Linux. 303 * Version 1 directory format has never worked on Linux.
302 */ 304 */
303 if (unlikely(!XFS_SB_VERSION_HASDIRV2(sbp))) { 305 if (unlikely(!xfs_sb_version_hasdirv2(sbp))) {
304 xfs_fs_mount_cmn_err(flags, 306 xfs_fs_mount_cmn_err(flags,
305 "file system using version 1 directory format"); 307 "file system using version 1 directory format");
306 return XFS_ERROR(ENOSYS); 308 return XFS_ERROR(ENOSYS);
@@ -449,6 +451,7 @@ xfs_sb_from_disk(
449 to->sb_logsectsize = be16_to_cpu(from->sb_logsectsize); 451 to->sb_logsectsize = be16_to_cpu(from->sb_logsectsize);
450 to->sb_logsunit = be32_to_cpu(from->sb_logsunit); 452 to->sb_logsunit = be32_to_cpu(from->sb_logsunit);
451 to->sb_features2 = be32_to_cpu(from->sb_features2); 453 to->sb_features2 = be32_to_cpu(from->sb_features2);
454 to->sb_bad_features2 = be32_to_cpu(from->sb_bad_features2);
452} 455}
453 456
454/* 457/*
@@ -781,7 +784,7 @@ xfs_update_alignment(xfs_mount_t *mp, int mfsi_flags, __uint64_t *update_flags)
781 * Update superblock with new values 784 * Update superblock with new values
782 * and log changes 785 * and log changes
783 */ 786 */
784 if (XFS_SB_VERSION_HASDALIGN(sbp)) { 787 if (xfs_sb_version_hasdalign(sbp)) {
785 if (sbp->sb_unit != mp->m_dalign) { 788 if (sbp->sb_unit != mp->m_dalign) {
786 sbp->sb_unit = mp->m_dalign; 789 sbp->sb_unit = mp->m_dalign;
787 *update_flags |= XFS_SB_UNIT; 790 *update_flags |= XFS_SB_UNIT;
@@ -792,7 +795,7 @@ xfs_update_alignment(xfs_mount_t *mp, int mfsi_flags, __uint64_t *update_flags)
792 } 795 }
793 } 796 }
794 } else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN && 797 } else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN &&
795 XFS_SB_VERSION_HASDALIGN(&mp->m_sb)) { 798 xfs_sb_version_hasdalign(&mp->m_sb)) {
796 mp->m_dalign = sbp->sb_unit; 799 mp->m_dalign = sbp->sb_unit;
797 mp->m_swidth = sbp->sb_width; 800 mp->m_swidth = sbp->sb_width;
798 } 801 }
@@ -869,7 +872,7 @@ xfs_set_rw_sizes(xfs_mount_t *mp)
869STATIC void 872STATIC void
870xfs_set_inoalignment(xfs_mount_t *mp) 873xfs_set_inoalignment(xfs_mount_t *mp)
871{ 874{
872 if (XFS_SB_VERSION_HASALIGN(&mp->m_sb) && 875 if (xfs_sb_version_hasalign(&mp->m_sb) &&
873 mp->m_sb.sb_inoalignmt >= 876 mp->m_sb.sb_inoalignmt >=
874 XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)) 877 XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size))
875 mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1; 878 mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1;
@@ -954,7 +957,6 @@ xfs_mountfs(
954{ 957{
955 xfs_sb_t *sbp = &(mp->m_sb); 958 xfs_sb_t *sbp = &(mp->m_sb);
956 xfs_inode_t *rip; 959 xfs_inode_t *rip;
957 bhv_vnode_t *rvp = NULL;
958 __uint64_t resblks; 960 __uint64_t resblks;
959 __int64_t update_flags = 0LL; 961 __int64_t update_flags = 0LL;
960 uint quotamount, quotaflags; 962 uint quotamount, quotaflags;
@@ -962,14 +964,41 @@ xfs_mountfs(
962 int uuid_mounted = 0; 964 int uuid_mounted = 0;
963 int error = 0; 965 int error = 0;
964 966
965 if (mp->m_sb_bp == NULL) {
966 error = xfs_readsb(mp, mfsi_flags);
967 if (error)
968 return error;
969 }
970 xfs_mount_common(mp, sbp); 967 xfs_mount_common(mp, sbp);
971 968
972 /* 969 /*
970 * Check for a mismatched features2 values. Older kernels
971 * read & wrote into the wrong sb offset for sb_features2
972 * on some platforms due to xfs_sb_t not being 64bit size aligned
973 * when sb_features2 was added, which made older superblock
974 * reading/writing routines swap it as a 64-bit value.
975 *
976 * For backwards compatibility, we make both slots equal.
977 *
978 * If we detect a mismatched field, we OR the set bits into the
979 * existing features2 field in case it has already been modified; we
980 * don't want to lose any features. We then update the bad location
981 * with the ORed value so that older kernels will see any features2
982 * flags, and mark the two fields as needing updates once the
983 * transaction subsystem is online.
984 */
985 if (xfs_sb_has_mismatched_features2(sbp)) {
986 cmn_err(CE_WARN,
987 "XFS: correcting sb_features alignment problem");
988 sbp->sb_features2 |= sbp->sb_bad_features2;
989 sbp->sb_bad_features2 = sbp->sb_features2;
990 update_flags |= XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2;
991
992 /*
993 * Re-check for ATTR2 in case it was found in bad_features2
994 * slot.
995 */
996 if (xfs_sb_version_hasattr2(&mp->m_sb))
997 mp->m_flags |= XFS_MOUNT_ATTR2;
998
999 }
1000
1001 /*
973 * Check if sb_agblocks is aligned at stripe boundary 1002 * Check if sb_agblocks is aligned at stripe boundary
974 * If sb_agblocks is NOT aligned turn off m_dalign since 1003 * If sb_agblocks is NOT aligned turn off m_dalign since
975 * allocator alignment is within an ag, therefore ag has 1004 * allocator alignment is within an ag, therefore ag has
@@ -1129,7 +1158,6 @@ xfs_mountfs(
1129 } 1158 }
1130 1159
1131 ASSERT(rip != NULL); 1160 ASSERT(rip != NULL);
1132 rvp = XFS_ITOV(rip);
1133 1161
1134 if (unlikely((rip->i_d.di_mode & S_IFMT) != S_IFDIR)) { 1162 if (unlikely((rip->i_d.di_mode & S_IFMT) != S_IFDIR)) {
1135 cmn_err(CE_WARN, "XFS: corrupted root inode"); 1163 cmn_err(CE_WARN, "XFS: corrupted root inode");
@@ -1159,11 +1187,15 @@ xfs_mountfs(
1159 } 1187 }
1160 1188
1161 /* 1189 /*
1162 * If fs is not mounted readonly, then update the superblock 1190 * If fs is not mounted readonly, then update the superblock changes.
1163 * unit and width changes.
1164 */ 1191 */
1165 if (update_flags && !(mp->m_flags & XFS_MOUNT_RDONLY)) 1192 if (update_flags && !(mp->m_flags & XFS_MOUNT_RDONLY)) {
1166 xfs_mount_log_sbunit(mp, update_flags); 1193 error = xfs_mount_log_sb(mp, update_flags);
1194 if (error) {
1195 cmn_err(CE_WARN, "XFS: failed to write sb changes");
1196 goto error4;
1197 }
1198 }
1167 1199
1168 /* 1200 /*
1169 * Initialise the XFS quota management subsystem for this mount 1201 * Initialise the XFS quota management subsystem for this mount
@@ -1200,12 +1232,15 @@ xfs_mountfs(
1200 * 1232 *
1201 * We default to 5% or 1024 fsbs of space reserved, whichever is smaller. 1233 * We default to 5% or 1024 fsbs of space reserved, whichever is smaller.
1202 * This may drive us straight to ENOSPC on mount, but that implies 1234 * This may drive us straight to ENOSPC on mount, but that implies
1203 * we were already there on the last unmount. 1235 * we were already there on the last unmount. Warn if this occurs.
1204 */ 1236 */
1205 resblks = mp->m_sb.sb_dblocks; 1237 resblks = mp->m_sb.sb_dblocks;
1206 do_div(resblks, 20); 1238 do_div(resblks, 20);
1207 resblks = min_t(__uint64_t, resblks, 1024); 1239 resblks = min_t(__uint64_t, resblks, 1024);
1208 xfs_reserve_blocks(mp, &resblks, NULL); 1240 error = xfs_reserve_blocks(mp, &resblks, NULL);
1241 if (error)
1242 cmn_err(CE_WARN, "XFS: Unable to allocate reserve blocks. "
1243 "Continuing without a reserve pool.");
1209 1244
1210 return 0; 1245 return 0;
1211 1246
@@ -1213,7 +1248,7 @@ xfs_mountfs(
1213 /* 1248 /*
1214 * Free up the root inode. 1249 * Free up the root inode.
1215 */ 1250 */
1216 VN_RELE(rvp); 1251 IRELE(rip);
1217 error3: 1252 error3:
1218 xfs_log_unmount_dealloc(mp); 1253 xfs_log_unmount_dealloc(mp);
1219 error2: 1254 error2:
@@ -1241,6 +1276,7 @@ int
1241xfs_unmountfs(xfs_mount_t *mp, struct cred *cr) 1276xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
1242{ 1277{
1243 __uint64_t resblks; 1278 __uint64_t resblks;
1279 int error = 0;
1244 1280
1245 /* 1281 /*
1246 * We can potentially deadlock here if we have an inode cluster 1282 * We can potentially deadlock here if we have an inode cluster
@@ -1284,9 +1320,15 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
1284 * value does not matter.... 1320 * value does not matter....
1285 */ 1321 */
1286 resblks = 0; 1322 resblks = 0;
1287 xfs_reserve_blocks(mp, &resblks, NULL); 1323 error = xfs_reserve_blocks(mp, &resblks, NULL);
1324 if (error)
1325 cmn_err(CE_WARN, "XFS: Unable to free reserved block pool. "
1326 "Freespace may not be correct on next mount.");
1288 1327
1289 xfs_log_sbcount(mp, 1); 1328 error = xfs_log_sbcount(mp, 1);
1329 if (error)
1330 cmn_err(CE_WARN, "XFS: Unable to update superblock counters. "
1331 "Freespace may not be correct on next mount.");
1290 xfs_unmountfs_writesb(mp); 1332 xfs_unmountfs_writesb(mp);
1291 xfs_unmountfs_wait(mp); /* wait for async bufs */ 1333 xfs_unmountfs_wait(mp); /* wait for async bufs */
1292 xfs_log_unmount(mp); /* Done! No more fs ops. */ 1334 xfs_log_unmount(mp); /* Done! No more fs ops. */
@@ -1378,9 +1420,8 @@ xfs_log_sbcount(
1378 xfs_mod_sb(tp, XFS_SB_IFREE | XFS_SB_ICOUNT | XFS_SB_FDBLOCKS); 1420 xfs_mod_sb(tp, XFS_SB_IFREE | XFS_SB_ICOUNT | XFS_SB_FDBLOCKS);
1379 if (sync) 1421 if (sync)
1380 xfs_trans_set_sync(tp); 1422 xfs_trans_set_sync(tp);
1381 xfs_trans_commit(tp, 0); 1423 error = xfs_trans_commit(tp, 0);
1382 1424 return error;
1383 return 0;
1384} 1425}
1385 1426
1386STATIC void 1427STATIC void
@@ -1429,7 +1470,6 @@ xfs_unmountfs_writesb(xfs_mount_t *mp)
1429 XFS_BUF_UNASYNC(sbp); 1470 XFS_BUF_UNASYNC(sbp);
1430 ASSERT(XFS_BUF_TARGET(sbp) == mp->m_ddev_targp); 1471 ASSERT(XFS_BUF_TARGET(sbp) == mp->m_ddev_targp);
1431 xfsbdstrat(mp, sbp); 1472 xfsbdstrat(mp, sbp);
1432 /* Nevermind errors we might get here. */
1433 error = xfs_iowait(sbp); 1473 error = xfs_iowait(sbp);
1434 if (error) 1474 if (error)
1435 xfs_ioerror_alert("xfs_unmountfs_writesb", 1475 xfs_ioerror_alert("xfs_unmountfs_writesb",
@@ -1875,25 +1915,30 @@ xfs_uuid_unmount(
1875 1915
1876/* 1916/*
1877 * Used to log changes to the superblock unit and width fields which could 1917 * Used to log changes to the superblock unit and width fields which could
1878 * be altered by the mount options. Only the first superblock is updated. 1918 * be altered by the mount options, as well as any potential sb_features2
1919 * fixup. Only the first superblock is updated.
1879 */ 1920 */
1880STATIC void 1921STATIC int
1881xfs_mount_log_sbunit( 1922xfs_mount_log_sb(
1882 xfs_mount_t *mp, 1923 xfs_mount_t *mp,
1883 __int64_t fields) 1924 __int64_t fields)
1884{ 1925{
1885 xfs_trans_t *tp; 1926 xfs_trans_t *tp;
1927 int error;
1886 1928
1887 ASSERT(fields & (XFS_SB_UNIT|XFS_SB_WIDTH|XFS_SB_UUID)); 1929 ASSERT(fields & (XFS_SB_UNIT | XFS_SB_WIDTH | XFS_SB_UUID |
1930 XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2));
1888 1931
1889 tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT); 1932 tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT);
1890 if (xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, 1933 error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
1891 XFS_DEFAULT_LOG_COUNT)) { 1934 XFS_DEFAULT_LOG_COUNT);
1935 if (error) {
1892 xfs_trans_cancel(tp, 0); 1936 xfs_trans_cancel(tp, 0);
1893 return; 1937 return error;
1894 } 1938 }
1895 xfs_mod_sb(tp, fields); 1939 xfs_mod_sb(tp, fields);
1896 xfs_trans_commit(tp, 0); 1940 error = xfs_trans_commit(tp, 0);
1941 return error;
1897} 1942}
1898 1943
1899 1944
@@ -2154,7 +2199,7 @@ xfs_icsb_counter_disabled(
2154 return test_bit(field, &mp->m_icsb_counters); 2199 return test_bit(field, &mp->m_icsb_counters);
2155} 2200}
2156 2201
2157STATIC int 2202STATIC void
2158xfs_icsb_disable_counter( 2203xfs_icsb_disable_counter(
2159 xfs_mount_t *mp, 2204 xfs_mount_t *mp,
2160 xfs_sb_field_t field) 2205 xfs_sb_field_t field)
@@ -2172,7 +2217,7 @@ xfs_icsb_disable_counter(
2172 * the m_icsb_mutex. 2217 * the m_icsb_mutex.
2173 */ 2218 */
2174 if (xfs_icsb_counter_disabled(mp, field)) 2219 if (xfs_icsb_counter_disabled(mp, field))
2175 return 0; 2220 return;
2176 2221
2177 xfs_icsb_lock_all_counters(mp); 2222 xfs_icsb_lock_all_counters(mp);
2178 if (!test_and_set_bit(field, &mp->m_icsb_counters)) { 2223 if (!test_and_set_bit(field, &mp->m_icsb_counters)) {
@@ -2195,8 +2240,6 @@ xfs_icsb_disable_counter(
2195 } 2240 }
2196 2241
2197 xfs_icsb_unlock_all_counters(mp); 2242 xfs_icsb_unlock_all_counters(mp);
2198
2199 return 0;
2200} 2243}
2201 2244
2202STATIC void 2245STATIC void
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index f7c620ec6e69..1ed575110ff0 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -66,17 +66,17 @@ struct xfs_mru_cache;
66 * Prototypes and functions for the Data Migration subsystem. 66 * Prototypes and functions for the Data Migration subsystem.
67 */ 67 */
68 68
69typedef int (*xfs_send_data_t)(int, bhv_vnode_t *, 69typedef int (*xfs_send_data_t)(int, struct xfs_inode *,
70 xfs_off_t, size_t, int, bhv_vrwlock_t *); 70 xfs_off_t, size_t, int, int *);
71typedef int (*xfs_send_mmap_t)(struct vm_area_struct *, uint); 71typedef int (*xfs_send_mmap_t)(struct vm_area_struct *, uint);
72typedef int (*xfs_send_destroy_t)(bhv_vnode_t *, dm_right_t); 72typedef int (*xfs_send_destroy_t)(struct xfs_inode *, dm_right_t);
73typedef int (*xfs_send_namesp_t)(dm_eventtype_t, struct xfs_mount *, 73typedef int (*xfs_send_namesp_t)(dm_eventtype_t, struct xfs_mount *,
74 bhv_vnode_t *, 74 struct xfs_inode *, dm_right_t,
75 dm_right_t, bhv_vnode_t *, dm_right_t, 75 struct xfs_inode *, dm_right_t,
76 char *, char *, mode_t, int, int); 76 const char *, const char *, mode_t, int, int);
77typedef int (*xfs_send_mount_t)(struct xfs_mount *, dm_right_t, 77typedef int (*xfs_send_mount_t)(struct xfs_mount *, dm_right_t,
78 char *, char *); 78 char *, char *);
79typedef void (*xfs_send_unmount_t)(struct xfs_mount *, bhv_vnode_t *, 79typedef void (*xfs_send_unmount_t)(struct xfs_mount *, struct xfs_inode *,
80 dm_right_t, mode_t, int, int); 80 dm_right_t, mode_t, int, int);
81 81
82typedef struct xfs_dmops { 82typedef struct xfs_dmops {
@@ -88,20 +88,20 @@ typedef struct xfs_dmops {
88 xfs_send_unmount_t xfs_send_unmount; 88 xfs_send_unmount_t xfs_send_unmount;
89} xfs_dmops_t; 89} xfs_dmops_t;
90 90
91#define XFS_SEND_DATA(mp, ev,vp,off,len,fl,lock) \ 91#define XFS_SEND_DATA(mp, ev,ip,off,len,fl,lock) \
92 (*(mp)->m_dm_ops->xfs_send_data)(ev,vp,off,len,fl,lock) 92 (*(mp)->m_dm_ops->xfs_send_data)(ev,ip,off,len,fl,lock)
93#define XFS_SEND_MMAP(mp, vma,fl) \ 93#define XFS_SEND_MMAP(mp, vma,fl) \
94 (*(mp)->m_dm_ops->xfs_send_mmap)(vma,fl) 94 (*(mp)->m_dm_ops->xfs_send_mmap)(vma,fl)
95#define XFS_SEND_DESTROY(mp, vp,right) \ 95#define XFS_SEND_DESTROY(mp, ip,right) \
96 (*(mp)->m_dm_ops->xfs_send_destroy)(vp,right) 96 (*(mp)->m_dm_ops->xfs_send_destroy)(ip,right)
97#define XFS_SEND_NAMESP(mp, ev,b1,r1,b2,r2,n1,n2,mode,rval,fl) \ 97#define XFS_SEND_NAMESP(mp, ev,b1,r1,b2,r2,n1,n2,mode,rval,fl) \
98 (*(mp)->m_dm_ops->xfs_send_namesp)(ev,NULL,b1,r1,b2,r2,n1,n2,mode,rval,fl) 98 (*(mp)->m_dm_ops->xfs_send_namesp)(ev,NULL,b1,r1,b2,r2,n1,n2,mode,rval,fl)
99#define XFS_SEND_PREUNMOUNT(mp,b1,r1,b2,r2,n1,n2,mode,rval,fl) \ 99#define XFS_SEND_PREUNMOUNT(mp,b1,r1,b2,r2,n1,n2,mode,rval,fl) \
100 (*(mp)->m_dm_ops->xfs_send_namesp)(DM_EVENT_PREUNMOUNT,mp,b1,r1,b2,r2,n1,n2,mode,rval,fl) 100 (*(mp)->m_dm_ops->xfs_send_namesp)(DM_EVENT_PREUNMOUNT,mp,b1,r1,b2,r2,n1,n2,mode,rval,fl)
101#define XFS_SEND_MOUNT(mp,right,path,name) \ 101#define XFS_SEND_MOUNT(mp,right,path,name) \
102 (*(mp)->m_dm_ops->xfs_send_mount)(mp,right,path,name) 102 (*(mp)->m_dm_ops->xfs_send_mount)(mp,right,path,name)
103#define XFS_SEND_UNMOUNT(mp, vp,right,mode,rval,fl) \ 103#define XFS_SEND_UNMOUNT(mp, ip,right,mode,rval,fl) \
104 (*(mp)->m_dm_ops->xfs_send_unmount)(mp,vp,right,mode,rval,fl) 104 (*(mp)->m_dm_ops->xfs_send_unmount)(mp,ip,right,mode,rval,fl)
105 105
106 106
107/* 107/*
@@ -220,7 +220,7 @@ extern void xfs_icsb_sync_counters_flags(struct xfs_mount *, int);
220#endif 220#endif
221 221
222typedef struct xfs_ail { 222typedef struct xfs_ail {
223 xfs_ail_entry_t xa_ail; 223 struct list_head xa_ail;
224 uint xa_gen; 224 uint xa_gen;
225 struct task_struct *xa_task; 225 struct task_struct *xa_task;
226 xfs_lsn_t xa_target; 226 xfs_lsn_t xa_target;
@@ -366,7 +366,7 @@ typedef struct xfs_mount {
366#define XFS_MOUNT_SMALL_INUMS (1ULL << 15) /* users wants 32bit inodes */ 366#define XFS_MOUNT_SMALL_INUMS (1ULL << 15) /* users wants 32bit inodes */
367#define XFS_MOUNT_NOUUID (1ULL << 16) /* ignore uuid during mount */ 367#define XFS_MOUNT_NOUUID (1ULL << 16) /* ignore uuid during mount */
368#define XFS_MOUNT_BARRIER (1ULL << 17) 368#define XFS_MOUNT_BARRIER (1ULL << 17)
369#define XFS_MOUNT_IDELETE (1ULL << 18) /* delete empty inode clusters*/ 369#define XFS_MOUNT_IKEEP (1ULL << 18) /* keep empty inode clusters*/
370#define XFS_MOUNT_SWALLOC (1ULL << 19) /* turn on stripe width 370#define XFS_MOUNT_SWALLOC (1ULL << 19) /* turn on stripe width
371 * allocation */ 371 * allocation */
372#define XFS_MOUNT_RDONLY (1ULL << 20) /* read-only fs */ 372#define XFS_MOUNT_RDONLY (1ULL << 20) /* read-only fs */
@@ -401,7 +401,7 @@ typedef struct xfs_mount {
401 401
402/* 402/*
403 * Allow large block sizes to be reported to userspace programs if the 403 * Allow large block sizes to be reported to userspace programs if the
404 * "largeio" mount option is used. 404 * "largeio" mount option is used.
405 * 405 *
406 * If compatibility mode is specified, simply return the basic unit of caching 406 * If compatibility mode is specified, simply return the basic unit of caching
407 * so that we don't get inefficient read/modify/write I/O from user apps. 407 * so that we don't get inefficient read/modify/write I/O from user apps.
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index 7eb157a59f9e..ee371890d85d 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -36,7 +36,6 @@
36#include "xfs_bmap.h" 36#include "xfs_bmap.h"
37#include "xfs_error.h" 37#include "xfs_error.h"
38#include "xfs_quota.h" 38#include "xfs_quota.h"
39#include "xfs_refcache.h"
40#include "xfs_utils.h" 39#include "xfs_utils.h"
41#include "xfs_trans_space.h" 40#include "xfs_trans_space.h"
42#include "xfs_vnodeops.h" 41#include "xfs_vnodeops.h"
@@ -84,25 +83,23 @@ int xfs_rename_skip, xfs_rename_nskip;
84 */ 83 */
85STATIC int 84STATIC int
86xfs_lock_for_rename( 85xfs_lock_for_rename(
87 xfs_inode_t *dp1, /* old (source) directory inode */ 86 xfs_inode_t *dp1, /* in: old (source) directory inode */
88 xfs_inode_t *dp2, /* new (target) directory inode */ 87 xfs_inode_t *dp2, /* in: new (target) directory inode */
89 bhv_vname_t *vname1,/* old entry name */ 88 xfs_inode_t *ip1, /* in: inode of old entry */
90 bhv_vname_t *vname2,/* new entry name */ 89 struct xfs_name *name2, /* in: new entry name */
91 xfs_inode_t **ipp1, /* inode of old entry */ 90 xfs_inode_t **ipp2, /* out: inode of new entry, if it
92 xfs_inode_t **ipp2, /* inode of new entry, if it
93 already exists, NULL otherwise. */ 91 already exists, NULL otherwise. */
94 xfs_inode_t **i_tab,/* array of inode returned, sorted */ 92 xfs_inode_t **i_tab,/* out: array of inode returned, sorted */
95 int *num_inodes) /* number of inodes in array */ 93 int *num_inodes) /* out: number of inodes in array */
96{ 94{
97 xfs_inode_t *ip1, *ip2, *temp; 95 xfs_inode_t *ip2 = NULL;
96 xfs_inode_t *temp;
98 xfs_ino_t inum1, inum2; 97 xfs_ino_t inum1, inum2;
99 int error; 98 int error;
100 int i, j; 99 int i, j;
101 uint lock_mode; 100 uint lock_mode;
102 int diff_dirs = (dp1 != dp2); 101 int diff_dirs = (dp1 != dp2);
103 102
104 ip2 = NULL;
105
106 /* 103 /*
107 * First, find out the current inums of the entries so that we 104 * First, find out the current inums of the entries so that we
108 * can determine the initial locking order. We'll have to 105 * can determine the initial locking order. We'll have to
@@ -110,27 +107,20 @@ xfs_lock_for_rename(
110 * to see if we still have the right inodes, directories, etc. 107 * to see if we still have the right inodes, directories, etc.
111 */ 108 */
112 lock_mode = xfs_ilock_map_shared(dp1); 109 lock_mode = xfs_ilock_map_shared(dp1);
113 error = xfs_get_dir_entry(vname1, &ip1); 110 IHOLD(ip1);
114 if (error) { 111 xfs_itrace_ref(ip1);
115 xfs_iunlock_map_shared(dp1, lock_mode);
116 return error;
117 }
118 112
119 inum1 = ip1->i_ino; 113 inum1 = ip1->i_ino;
120 114
121 ASSERT(ip1);
122 xfs_itrace_ref(ip1);
123
124 /* 115 /*
125 * Unlock dp1 and lock dp2 if they are different. 116 * Unlock dp1 and lock dp2 if they are different.
126 */ 117 */
127
128 if (diff_dirs) { 118 if (diff_dirs) {
129 xfs_iunlock_map_shared(dp1, lock_mode); 119 xfs_iunlock_map_shared(dp1, lock_mode);
130 lock_mode = xfs_ilock_map_shared(dp2); 120 lock_mode = xfs_ilock_map_shared(dp2);
131 } 121 }
132 122
133 error = xfs_dir_lookup_int(dp2, lock_mode, vname2, &inum2, &ip2); 123 error = xfs_dir_lookup_int(dp2, lock_mode, name2, &inum2, &ip2);
134 if (error == ENOENT) { /* target does not need to exist. */ 124 if (error == ENOENT) { /* target does not need to exist. */
135 inum2 = 0; 125 inum2 = 0;
136 } else if (error) { 126 } else if (error) {
@@ -162,6 +152,7 @@ xfs_lock_for_rename(
162 *num_inodes = 4; 152 *num_inodes = 4;
163 i_tab[3] = ip2; 153 i_tab[3] = ip2;
164 } 154 }
155 *ipp2 = i_tab[3];
165 156
166 /* 157 /*
167 * Sort the elements via bubble sort. (Remember, there are at 158 * Sort the elements via bubble sort. (Remember, there are at
@@ -199,21 +190,6 @@ xfs_lock_for_rename(
199 xfs_lock_inodes(i_tab, *num_inodes, 0, XFS_ILOCK_SHARED); 190 xfs_lock_inodes(i_tab, *num_inodes, 0, XFS_ILOCK_SHARED);
200 } 191 }
201 192
202 /*
203 * Set the return value. Null out any unused entries in i_tab.
204 */
205 *ipp1 = *ipp2 = NULL;
206 for (i=0; i < *num_inodes; i++) {
207 if (i_tab[i]->i_ino == inum1) {
208 *ipp1 = i_tab[i];
209 }
210 if (i_tab[i]->i_ino == inum2) {
211 *ipp2 = i_tab[i];
212 }
213 }
214 for (;i < 4; i++) {
215 i_tab[i] = NULL;
216 }
217 return 0; 193 return 0;
218} 194}
219 195
@@ -223,13 +199,13 @@ xfs_lock_for_rename(
223int 199int
224xfs_rename( 200xfs_rename(
225 xfs_inode_t *src_dp, 201 xfs_inode_t *src_dp,
226 bhv_vname_t *src_vname, 202 struct xfs_name *src_name,
227 bhv_vnode_t *target_dir_vp, 203 xfs_inode_t *src_ip,
228 bhv_vname_t *target_vname) 204 xfs_inode_t *target_dp,
205 struct xfs_name *target_name)
229{ 206{
230 bhv_vnode_t *src_dir_vp = XFS_ITOV(src_dp);
231 xfs_trans_t *tp; 207 xfs_trans_t *tp;
232 xfs_inode_t *target_dp, *src_ip, *target_ip; 208 xfs_inode_t *target_ip;
233 xfs_mount_t *mp = src_dp->i_mount; 209 xfs_mount_t *mp = src_dp->i_mount;
234 int new_parent; /* moving to a new dir */ 210 int new_parent; /* moving to a new dir */
235 int src_is_directory; /* src_name is a directory */ 211 int src_is_directory; /* src_name is a directory */
@@ -243,29 +219,16 @@ xfs_rename(
243 int spaceres; 219 int spaceres;
244 int target_link_zero = 0; 220 int target_link_zero = 0;
245 int num_inodes; 221 int num_inodes;
246 char *src_name = VNAME(src_vname);
247 char *target_name = VNAME(target_vname);
248 int src_namelen = VNAMELEN(src_vname);
249 int target_namelen = VNAMELEN(target_vname);
250 222
251 xfs_itrace_entry(src_dp); 223 xfs_itrace_entry(src_dp);
252 xfs_itrace_entry(xfs_vtoi(target_dir_vp)); 224 xfs_itrace_entry(target_dp);
253
254 /*
255 * Find the XFS behavior descriptor for the target directory
256 * vnode since it was not handed to us.
257 */
258 target_dp = xfs_vtoi(target_dir_vp);
259 if (target_dp == NULL) {
260 return XFS_ERROR(EXDEV);
261 }
262 225
263 if (DM_EVENT_ENABLED(src_dp, DM_EVENT_RENAME) || 226 if (DM_EVENT_ENABLED(src_dp, DM_EVENT_RENAME) ||
264 DM_EVENT_ENABLED(target_dp, DM_EVENT_RENAME)) { 227 DM_EVENT_ENABLED(target_dp, DM_EVENT_RENAME)) {
265 error = XFS_SEND_NAMESP(mp, DM_EVENT_RENAME, 228 error = XFS_SEND_NAMESP(mp, DM_EVENT_RENAME,
266 src_dir_vp, DM_RIGHT_NULL, 229 src_dp, DM_RIGHT_NULL,
267 target_dir_vp, DM_RIGHT_NULL, 230 target_dp, DM_RIGHT_NULL,
268 src_name, target_name, 231 src_name->name, target_name->name,
269 0, 0, 0); 232 0, 0, 0);
270 if (error) { 233 if (error) {
271 return error; 234 return error;
@@ -282,10 +245,8 @@ xfs_rename(
282 * does not exist in the source directory. 245 * does not exist in the source directory.
283 */ 246 */
284 tp = NULL; 247 tp = NULL;
285 error = xfs_lock_for_rename(src_dp, target_dp, src_vname, 248 error = xfs_lock_for_rename(src_dp, target_dp, src_ip, target_name,
286 target_vname, &src_ip, &target_ip, inodes, 249 &target_ip, inodes, &num_inodes);
287 &num_inodes);
288
289 if (error) { 250 if (error) {
290 /* 251 /*
291 * We have nothing locked, no inode references, and 252 * We have nothing locked, no inode references, and
@@ -331,7 +292,7 @@ xfs_rename(
331 XFS_BMAP_INIT(&free_list, &first_block); 292 XFS_BMAP_INIT(&free_list, &first_block);
332 tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME); 293 tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME);
333 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 294 cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
334 spaceres = XFS_RENAME_SPACE_RES(mp, target_namelen); 295 spaceres = XFS_RENAME_SPACE_RES(mp, target_name->len);
335 error = xfs_trans_reserve(tp, spaceres, XFS_RENAME_LOG_RES(mp), 0, 296 error = xfs_trans_reserve(tp, spaceres, XFS_RENAME_LOG_RES(mp), 0,
336 XFS_TRANS_PERM_LOG_RES, XFS_RENAME_LOG_COUNT); 297 XFS_TRANS_PERM_LOG_RES, XFS_RENAME_LOG_COUNT);
337 if (error == ENOSPC) { 298 if (error == ENOSPC) {
@@ -365,10 +326,10 @@ xfs_rename(
365 * them when they unlock the inodes. Also, we need to be careful 326 * them when they unlock the inodes. Also, we need to be careful
366 * not to add an inode to the transaction more than once. 327 * not to add an inode to the transaction more than once.
367 */ 328 */
368 VN_HOLD(src_dir_vp); 329 IHOLD(src_dp);
369 xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL); 330 xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL);
370 if (new_parent) { 331 if (new_parent) {
371 VN_HOLD(target_dir_vp); 332 IHOLD(target_dp);
372 xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL); 333 xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL);
373 } 334 }
374 if ((src_ip != src_dp) && (src_ip != target_dp)) { 335 if ((src_ip != src_dp) && (src_ip != target_dp)) {
@@ -389,9 +350,8 @@ xfs_rename(
389 * If there's no space reservation, check the entry will 350 * If there's no space reservation, check the entry will
390 * fit before actually inserting it. 351 * fit before actually inserting it.
391 */ 352 */
392 if (spaceres == 0 && 353 error = xfs_dir_canenter(tp, target_dp, target_name, spaceres);
393 (error = xfs_dir_canenter(tp, target_dp, target_name, 354 if (error)
394 target_namelen)))
395 goto error_return; 355 goto error_return;
396 /* 356 /*
397 * If target does not exist and the rename crosses 357 * If target does not exist and the rename crosses
@@ -399,8 +359,8 @@ xfs_rename(
399 * to account for the ".." reference from the new entry. 359 * to account for the ".." reference from the new entry.
400 */ 360 */
401 error = xfs_dir_createname(tp, target_dp, target_name, 361 error = xfs_dir_createname(tp, target_dp, target_name,
402 target_namelen, src_ip->i_ino, 362 src_ip->i_ino, &first_block,
403 &first_block, &free_list, spaceres); 363 &free_list, spaceres);
404 if (error == ENOSPC) 364 if (error == ENOSPC)
405 goto error_return; 365 goto error_return;
406 if (error) 366 if (error)
@@ -439,7 +399,7 @@ xfs_rename(
439 * name at the destination directory, remove it first. 399 * name at the destination directory, remove it first.
440 */ 400 */
441 error = xfs_dir_replace(tp, target_dp, target_name, 401 error = xfs_dir_replace(tp, target_dp, target_name,
442 target_namelen, src_ip->i_ino, 402 src_ip->i_ino,
443 &first_block, &free_list, spaceres); 403 &first_block, &free_list, spaceres);
444 if (error) 404 if (error)
445 goto abort_return; 405 goto abort_return;
@@ -476,7 +436,8 @@ xfs_rename(
476 * Rewrite the ".." entry to point to the new 436 * Rewrite the ".." entry to point to the new
477 * directory. 437 * directory.
478 */ 438 */
479 error = xfs_dir_replace(tp, src_ip, "..", 2, target_dp->i_ino, 439 error = xfs_dir_replace(tp, src_ip, &xfs_name_dotdot,
440 target_dp->i_ino,
480 &first_block, &free_list, spaceres); 441 &first_block, &free_list, spaceres);
481 ASSERT(error != EEXIST); 442 ASSERT(error != EEXIST);
482 if (error) 443 if (error)
@@ -512,8 +473,8 @@ xfs_rename(
512 goto abort_return; 473 goto abort_return;
513 } 474 }
514 475
515 error = xfs_dir_removename(tp, src_dp, src_name, src_namelen, 476 error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino,
516 src_ip->i_ino, &first_block, &free_list, spaceres); 477 &first_block, &free_list, spaceres);
517 if (error) 478 if (error)
518 goto abort_return; 479 goto abort_return;
519 xfs_ichgtime(src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 480 xfs_ichgtime(src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
@@ -580,10 +541,8 @@ xfs_rename(
580 * the vnode references. 541 * the vnode references.
581 */ 542 */
582 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 543 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
583 if (target_ip != NULL) { 544 if (target_ip != NULL)
584 xfs_refcache_purge_ip(target_ip);
585 IRELE(target_ip); 545 IRELE(target_ip);
586 }
587 /* 546 /*
588 * Let interposed file systems know about removed links. 547 * Let interposed file systems know about removed links.
589 */ 548 */
@@ -598,9 +557,9 @@ std_return:
598 if (DM_EVENT_ENABLED(src_dp, DM_EVENT_POSTRENAME) || 557 if (DM_EVENT_ENABLED(src_dp, DM_EVENT_POSTRENAME) ||
599 DM_EVENT_ENABLED(target_dp, DM_EVENT_POSTRENAME)) { 558 DM_EVENT_ENABLED(target_dp, DM_EVENT_POSTRENAME)) {
600 (void) XFS_SEND_NAMESP (mp, DM_EVENT_POSTRENAME, 559 (void) XFS_SEND_NAMESP (mp, DM_EVENT_POSTRENAME,
601 src_dir_vp, DM_RIGHT_NULL, 560 src_dp, DM_RIGHT_NULL,
602 target_dir_vp, DM_RIGHT_NULL, 561 target_dp, DM_RIGHT_NULL,
603 src_name, target_name, 562 src_name->name, target_name->name,
604 0, error, 0); 563 0, error, 0);
605 } 564 }
606 return error; 565 return error;
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index ca83ddf72af4..a0dc6e5bc5b9 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -44,6 +44,7 @@
44#include "xfs_rw.h" 44#include "xfs_rw.h"
45#include "xfs_inode_item.h" 45#include "xfs_inode_item.h"
46#include "xfs_trans_space.h" 46#include "xfs_trans_space.h"
47#include "xfs_utils.h"
47 48
48 49
49/* 50/*
@@ -73,6 +74,18 @@ STATIC int xfs_rtmodify_summary(xfs_mount_t *, xfs_trans_t *, int,
73 */ 74 */
74 75
75/* 76/*
77 * xfs_lowbit32: get low bit set out of 32-bit argument, -1 if none set.
78 */
79STATIC int
80xfs_lowbit32(
81 __uint32_t v)
82{
83 if (v)
84 return ffs(v) - 1;
85 return -1;
86}
87
88/*
76 * Allocate space to the bitmap or summary file, and zero it, for growfs. 89 * Allocate space to the bitmap or summary file, and zero it, for growfs.
77 */ 90 */
78STATIC int /* error */ 91STATIC int /* error */
@@ -111,14 +124,14 @@ xfs_growfs_rt_alloc(
111 XFS_GROWRTALLOC_LOG_RES(mp), 0, 124 XFS_GROWRTALLOC_LOG_RES(mp), 0,
112 XFS_TRANS_PERM_LOG_RES, 125 XFS_TRANS_PERM_LOG_RES,
113 XFS_DEFAULT_PERM_LOG_COUNT))) 126 XFS_DEFAULT_PERM_LOG_COUNT)))
114 goto error_exit; 127 goto error_cancel;
115 cancelflags = XFS_TRANS_RELEASE_LOG_RES; 128 cancelflags = XFS_TRANS_RELEASE_LOG_RES;
116 /* 129 /*
117 * Lock the inode. 130 * Lock the inode.
118 */ 131 */
119 if ((error = xfs_trans_iget(mp, tp, ino, 0, 132 if ((error = xfs_trans_iget(mp, tp, ino, 0,
120 XFS_ILOCK_EXCL, &ip))) 133 XFS_ILOCK_EXCL, &ip)))
121 goto error_exit; 134 goto error_cancel;
122 XFS_BMAP_INIT(&flist, &firstblock); 135 XFS_BMAP_INIT(&flist, &firstblock);
123 /* 136 /*
124 * Allocate blocks to the bitmap file. 137 * Allocate blocks to the bitmap file.
@@ -131,14 +144,16 @@ xfs_growfs_rt_alloc(
131 if (!error && nmap < 1) 144 if (!error && nmap < 1)
132 error = XFS_ERROR(ENOSPC); 145 error = XFS_ERROR(ENOSPC);
133 if (error) 146 if (error)
134 goto error_exit; 147 goto error_cancel;
135 /* 148 /*
136 * Free any blocks freed up in the transaction, then commit. 149 * Free any blocks freed up in the transaction, then commit.
137 */ 150 */
138 error = xfs_bmap_finish(&tp, &flist, &committed); 151 error = xfs_bmap_finish(&tp, &flist, &committed);
139 if (error) 152 if (error)
140 goto error_exit; 153 goto error_cancel;
141 xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 154 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
155 if (error)
156 goto error;
142 /* 157 /*
143 * Now we need to clear the allocated blocks. 158 * Now we need to clear the allocated blocks.
144 * Do this one block per transaction, to keep it simple. 159 * Do this one block per transaction, to keep it simple.
@@ -153,13 +168,13 @@ xfs_growfs_rt_alloc(
153 */ 168 */
154 if ((error = xfs_trans_reserve(tp, 0, 169 if ((error = xfs_trans_reserve(tp, 0,
155 XFS_GROWRTZERO_LOG_RES(mp), 0, 0, 0))) 170 XFS_GROWRTZERO_LOG_RES(mp), 0, 0, 0)))
156 goto error_exit; 171 goto error_cancel;
157 /* 172 /*
158 * Lock the bitmap inode. 173 * Lock the bitmap inode.
159 */ 174 */
160 if ((error = xfs_trans_iget(mp, tp, ino, 0, 175 if ((error = xfs_trans_iget(mp, tp, ino, 0,
161 XFS_ILOCK_EXCL, &ip))) 176 XFS_ILOCK_EXCL, &ip)))
162 goto error_exit; 177 goto error_cancel;
163 /* 178 /*
164 * Get a buffer for the block. 179 * Get a buffer for the block.
165 */ 180 */
@@ -168,14 +183,16 @@ xfs_growfs_rt_alloc(
168 mp->m_bsize, 0); 183 mp->m_bsize, 0);
169 if (bp == NULL) { 184 if (bp == NULL) {
170 error = XFS_ERROR(EIO); 185 error = XFS_ERROR(EIO);
171 goto error_exit; 186 goto error_cancel;
172 } 187 }
173 memset(XFS_BUF_PTR(bp), 0, mp->m_sb.sb_blocksize); 188 memset(XFS_BUF_PTR(bp), 0, mp->m_sb.sb_blocksize);
174 xfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1); 189 xfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1);
175 /* 190 /*
176 * Commit the transaction. 191 * Commit the transaction.
177 */ 192 */
178 xfs_trans_commit(tp, 0); 193 error = xfs_trans_commit(tp, 0);
194 if (error)
195 goto error;
179 } 196 }
180 /* 197 /*
181 * Go on to the next extent, if any. 198 * Go on to the next extent, if any.
@@ -183,8 +200,9 @@ xfs_growfs_rt_alloc(
183 oblocks = map.br_startoff + map.br_blockcount; 200 oblocks = map.br_startoff + map.br_blockcount;
184 } 201 }
185 return 0; 202 return 0;
186error_exit: 203error_cancel:
187 xfs_trans_cancel(tp, cancelflags); 204 xfs_trans_cancel(tp, cancelflags);
205error:
188 return error; 206 return error;
189} 207}
190 208
@@ -432,7 +450,6 @@ xfs_rtallocate_extent_near(
432 } 450 }
433 bbno = XFS_BITTOBLOCK(mp, bno); 451 bbno = XFS_BITTOBLOCK(mp, bno);
434 i = 0; 452 i = 0;
435 ASSERT(minlen != 0);
436 log2len = xfs_highbit32(minlen); 453 log2len = xfs_highbit32(minlen);
437 /* 454 /*
438 * Loop over all bitmap blocks (bbno + i is current block). 455 * Loop over all bitmap blocks (bbno + i is current block).
@@ -601,8 +618,6 @@ xfs_rtallocate_extent_size(
601 xfs_suminfo_t sum; /* summary information for extents */ 618 xfs_suminfo_t sum; /* summary information for extents */
602 619
603 ASSERT(minlen % prod == 0 && maxlen % prod == 0); 620 ASSERT(minlen % prod == 0 && maxlen % prod == 0);
604 ASSERT(maxlen != 0);
605
606 /* 621 /*
607 * Loop over all the levels starting with maxlen. 622 * Loop over all the levels starting with maxlen.
608 * At each level, look at all the bitmap blocks, to see if there 623 * At each level, look at all the bitmap blocks, to see if there
@@ -660,9 +675,6 @@ xfs_rtallocate_extent_size(
660 *rtblock = NULLRTBLOCK; 675 *rtblock = NULLRTBLOCK;
661 return 0; 676 return 0;
662 } 677 }
663 ASSERT(minlen != 0);
664 ASSERT(maxlen != 0);
665
666 /* 678 /*
667 * Loop over sizes, from maxlen down to minlen. 679 * Loop over sizes, from maxlen down to minlen.
668 * This time, when we do the allocations, allow smaller ones 680 * This time, when we do the allocations, allow smaller ones
@@ -1869,6 +1881,7 @@ xfs_growfs_rt(
1869 xfs_trans_t *tp; /* transaction pointer */ 1881 xfs_trans_t *tp; /* transaction pointer */
1870 1882
1871 sbp = &mp->m_sb; 1883 sbp = &mp->m_sb;
1884 cancelflags = 0;
1872 /* 1885 /*
1873 * Initial error checking. 1886 * Initial error checking.
1874 */ 1887 */
@@ -1948,7 +1961,6 @@ xfs_growfs_rt(
1948 nsbp->sb_blocksize * nsbp->sb_rextsize); 1961 nsbp->sb_blocksize * nsbp->sb_rextsize);
1949 nsbp->sb_rextents = nsbp->sb_rblocks; 1962 nsbp->sb_rextents = nsbp->sb_rblocks;
1950 do_div(nsbp->sb_rextents, nsbp->sb_rextsize); 1963 do_div(nsbp->sb_rextents, nsbp->sb_rextsize);
1951 ASSERT(nsbp->sb_rextents != 0);
1952 nsbp->sb_rextslog = xfs_highbit32(nsbp->sb_rextents); 1964 nsbp->sb_rextslog = xfs_highbit32(nsbp->sb_rextents);
1953 nrsumlevels = nmp->m_rsumlevels = nsbp->sb_rextslog + 1; 1965 nrsumlevels = nmp->m_rsumlevels = nsbp->sb_rextslog + 1;
1954 nrsumsize = 1966 nrsumsize =
@@ -2036,13 +2048,15 @@ xfs_growfs_rt(
2036 */ 2048 */
2037 mp->m_rsumlevels = nrsumlevels; 2049 mp->m_rsumlevels = nrsumlevels;
2038 mp->m_rsumsize = nrsumsize; 2050 mp->m_rsumsize = nrsumsize;
2039 /* 2051
2040 * Commit the transaction. 2052 error = xfs_trans_commit(tp, 0);
2041 */ 2053 if (error) {
2042 xfs_trans_commit(tp, 0); 2054 tp = NULL;
2055 break;
2056 }
2043 } 2057 }
2044 2058
2045 if (error) 2059 if (error && tp)
2046 xfs_trans_cancel(tp, cancelflags); 2060 xfs_trans_cancel(tp, cancelflags);
2047 2061
2048 /* 2062 /*
@@ -2273,7 +2287,7 @@ xfs_rtmount_inodes(
2273 ASSERT(sbp->sb_rsumino != NULLFSINO); 2287 ASSERT(sbp->sb_rsumino != NULLFSINO);
2274 error = xfs_iget(mp, NULL, sbp->sb_rsumino, 0, 0, &mp->m_rsumip, 0); 2288 error = xfs_iget(mp, NULL, sbp->sb_rsumino, 0, 0, &mp->m_rsumip, 0);
2275 if (error) { 2289 if (error) {
2276 VN_RELE(XFS_ITOV(mp->m_rbmip)); 2290 IRELE(mp->m_rbmip);
2277 return error; 2291 return error;
2278 } 2292 }
2279 ASSERT(mp->m_rsumip != NULL); 2293 ASSERT(mp->m_rsumip != NULL);
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c
index cd3ece6cc918..b0f31c09a76d 100644
--- a/fs/xfs/xfs_rw.c
+++ b/fs/xfs/xfs_rw.c
@@ -126,11 +126,11 @@ xfs_write_sync_logforce(
126 * when we return. 126 * when we return.
127 */ 127 */
128 if (iip && iip->ili_last_lsn) { 128 if (iip && iip->ili_last_lsn) {
129 xfs_log_force(mp, iip->ili_last_lsn, 129 error = _xfs_log_force(mp, iip->ili_last_lsn,
130 XFS_LOG_FORCE | XFS_LOG_SYNC); 130 XFS_LOG_FORCE | XFS_LOG_SYNC, NULL);
131 } else if (xfs_ipincount(ip) > 0) { 131 } else if (xfs_ipincount(ip) > 0) {
132 xfs_log_force(mp, (xfs_lsn_t)0, 132 error = _xfs_log_force(mp, (xfs_lsn_t)0,
133 XFS_LOG_FORCE | XFS_LOG_SYNC); 133 XFS_LOG_FORCE | XFS_LOG_SYNC, NULL);
134 } 134 }
135 135
136 } else { 136 } else {
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h
index 94660b1a6ccc..d904efe7f871 100644
--- a/fs/xfs/xfs_sb.h
+++ b/fs/xfs/xfs_sb.h
@@ -89,6 +89,7 @@ struct xfs_mount;
89 89
90/* 90/*
91 * Superblock - in core version. Must match the ondisk version below. 91 * Superblock - in core version. Must match the ondisk version below.
92 * Must be padded to 64 bit alignment.
92 */ 93 */
93typedef struct xfs_sb { 94typedef struct xfs_sb {
94 __uint32_t sb_magicnum; /* magic number == XFS_SB_MAGIC */ 95 __uint32_t sb_magicnum; /* magic number == XFS_SB_MAGIC */
@@ -145,10 +146,21 @@ typedef struct xfs_sb {
145 __uint16_t sb_logsectsize; /* sector size for the log, bytes */ 146 __uint16_t sb_logsectsize; /* sector size for the log, bytes */
146 __uint32_t sb_logsunit; /* stripe unit size for the log */ 147 __uint32_t sb_logsunit; /* stripe unit size for the log */
147 __uint32_t sb_features2; /* additional feature bits */ 148 __uint32_t sb_features2; /* additional feature bits */
149
150 /*
151 * bad features2 field as a result of failing to pad the sb
152 * structure to 64 bits. Some machines will be using this field
153 * for features2 bits. Easiest just to mark it bad and not use
154 * it for anything else.
155 */
156 __uint32_t sb_bad_features2;
157
158 /* must be padded to 64 bit alignment */
148} xfs_sb_t; 159} xfs_sb_t;
149 160
150/* 161/*
151 * Superblock - on disk version. Must match the in core version below. 162 * Superblock - on disk version. Must match the in core version above.
163 * Must be padded to 64 bit alignment.
152 */ 164 */
153typedef struct xfs_dsb { 165typedef struct xfs_dsb {
154 __be32 sb_magicnum; /* magic number == XFS_SB_MAGIC */ 166 __be32 sb_magicnum; /* magic number == XFS_SB_MAGIC */
@@ -205,6 +217,15 @@ typedef struct xfs_dsb {
205 __be16 sb_logsectsize; /* sector size for the log, bytes */ 217 __be16 sb_logsectsize; /* sector size for the log, bytes */
206 __be32 sb_logsunit; /* stripe unit size for the log */ 218 __be32 sb_logsunit; /* stripe unit size for the log */
207 __be32 sb_features2; /* additional feature bits */ 219 __be32 sb_features2; /* additional feature bits */
220 /*
221 * bad features2 field as a result of failing to pad the sb
222 * structure to 64 bits. Some machines will be using this field
223 * for features2 bits. Easiest just to mark it bad and not use
224 * it for anything else.
225 */
226 __be32 sb_bad_features2;
227
228 /* must be padded to 64 bit alignment */
208} xfs_dsb_t; 229} xfs_dsb_t;
209 230
210/* 231/*
@@ -223,7 +244,7 @@ typedef enum {
223 XFS_SBS_GQUOTINO, XFS_SBS_QFLAGS, XFS_SBS_FLAGS, XFS_SBS_SHARED_VN, 244 XFS_SBS_GQUOTINO, XFS_SBS_QFLAGS, XFS_SBS_FLAGS, XFS_SBS_SHARED_VN,
224 XFS_SBS_INOALIGNMT, XFS_SBS_UNIT, XFS_SBS_WIDTH, XFS_SBS_DIRBLKLOG, 245 XFS_SBS_INOALIGNMT, XFS_SBS_UNIT, XFS_SBS_WIDTH, XFS_SBS_DIRBLKLOG,
225 XFS_SBS_LOGSECTLOG, XFS_SBS_LOGSECTSIZE, XFS_SBS_LOGSUNIT, 246 XFS_SBS_LOGSECTLOG, XFS_SBS_LOGSECTSIZE, XFS_SBS_LOGSUNIT,
226 XFS_SBS_FEATURES2, 247 XFS_SBS_FEATURES2, XFS_SBS_BAD_FEATURES2,
227 XFS_SBS_FIELDCOUNT 248 XFS_SBS_FIELDCOUNT
228} xfs_sb_field_t; 249} xfs_sb_field_t;
229 250
@@ -248,13 +269,15 @@ typedef enum {
248#define XFS_SB_IFREE XFS_SB_MVAL(IFREE) 269#define XFS_SB_IFREE XFS_SB_MVAL(IFREE)
249#define XFS_SB_FDBLOCKS XFS_SB_MVAL(FDBLOCKS) 270#define XFS_SB_FDBLOCKS XFS_SB_MVAL(FDBLOCKS)
250#define XFS_SB_FEATURES2 XFS_SB_MVAL(FEATURES2) 271#define XFS_SB_FEATURES2 XFS_SB_MVAL(FEATURES2)
272#define XFS_SB_BAD_FEATURES2 XFS_SB_MVAL(BAD_FEATURES2)
251#define XFS_SB_NUM_BITS ((int)XFS_SBS_FIELDCOUNT) 273#define XFS_SB_NUM_BITS ((int)XFS_SBS_FIELDCOUNT)
252#define XFS_SB_ALL_BITS ((1LL << XFS_SB_NUM_BITS) - 1) 274#define XFS_SB_ALL_BITS ((1LL << XFS_SB_NUM_BITS) - 1)
253#define XFS_SB_MOD_BITS \ 275#define XFS_SB_MOD_BITS \
254 (XFS_SB_UUID | XFS_SB_ROOTINO | XFS_SB_RBMINO | XFS_SB_RSUMINO | \ 276 (XFS_SB_UUID | XFS_SB_ROOTINO | XFS_SB_RBMINO | XFS_SB_RSUMINO | \
255 XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | XFS_SB_GQUOTINO | \ 277 XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | XFS_SB_GQUOTINO | \
256 XFS_SB_QFLAGS | XFS_SB_SHARED_VN | XFS_SB_UNIT | XFS_SB_WIDTH | \ 278 XFS_SB_QFLAGS | XFS_SB_SHARED_VN | XFS_SB_UNIT | XFS_SB_WIDTH | \
257 XFS_SB_ICOUNT | XFS_SB_IFREE | XFS_SB_FDBLOCKS | XFS_SB_FEATURES2) 279 XFS_SB_ICOUNT | XFS_SB_IFREE | XFS_SB_FDBLOCKS | XFS_SB_FEATURES2 | \
280 XFS_SB_BAD_FEATURES2)
258 281
259 282
260/* 283/*
@@ -271,7 +294,6 @@ typedef enum {
271 294
272#define XFS_SB_VERSION_NUM(sbp) ((sbp)->sb_versionnum & XFS_SB_VERSION_NUMBITS) 295#define XFS_SB_VERSION_NUM(sbp) ((sbp)->sb_versionnum & XFS_SB_VERSION_NUMBITS)
273 296
274#define XFS_SB_GOOD_VERSION(sbp) xfs_sb_good_version(sbp)
275#ifdef __KERNEL__ 297#ifdef __KERNEL__
276static inline int xfs_sb_good_version(xfs_sb_t *sbp) 298static inline int xfs_sb_good_version(xfs_sb_t *sbp)
277{ 299{
@@ -297,7 +319,15 @@ static inline int xfs_sb_good_version(xfs_sb_t *sbp)
297} 319}
298#endif /* __KERNEL__ */ 320#endif /* __KERNEL__ */
299 321
300#define XFS_SB_VERSION_TONEW(v) xfs_sb_version_tonew(v) 322/*
323 * Detect a mismatched features2 field. Older kernels read/wrote
324 * this into the wrong slot, so to be safe we keep them in sync.
325 */
326static inline int xfs_sb_has_mismatched_features2(xfs_sb_t *sbp)
327{
328 return (sbp->sb_bad_features2 != sbp->sb_features2);
329}
330
301static inline unsigned xfs_sb_version_tonew(unsigned v) 331static inline unsigned xfs_sb_version_tonew(unsigned v)
302{ 332{
303 return ((((v) == XFS_SB_VERSION_1) ? \ 333 return ((((v) == XFS_SB_VERSION_1) ? \
@@ -308,7 +338,6 @@ static inline unsigned xfs_sb_version_tonew(unsigned v)
308 XFS_SB_VERSION_4); 338 XFS_SB_VERSION_4);
309} 339}
310 340
311#define XFS_SB_VERSION_TOOLD(v) xfs_sb_version_toold(v)
312static inline unsigned xfs_sb_version_toold(unsigned v) 341static inline unsigned xfs_sb_version_toold(unsigned v)
313{ 342{
314 return (((v) & (XFS_SB_VERSION_QUOTABIT | XFS_SB_VERSION_ALIGNBIT)) ? \ 343 return (((v) & (XFS_SB_VERSION_QUOTABIT | XFS_SB_VERSION_ALIGNBIT)) ? \
@@ -320,7 +349,6 @@ static inline unsigned xfs_sb_version_toold(unsigned v)
320 XFS_SB_VERSION_1))); 349 XFS_SB_VERSION_1)));
321} 350}
322 351
323#define XFS_SB_VERSION_HASATTR(sbp) xfs_sb_version_hasattr(sbp)
324static inline int xfs_sb_version_hasattr(xfs_sb_t *sbp) 352static inline int xfs_sb_version_hasattr(xfs_sb_t *sbp)
325{ 353{
326 return ((sbp)->sb_versionnum == XFS_SB_VERSION_2) || \ 354 return ((sbp)->sb_versionnum == XFS_SB_VERSION_2) || \
@@ -329,7 +357,6 @@ static inline int xfs_sb_version_hasattr(xfs_sb_t *sbp)
329 ((sbp)->sb_versionnum & XFS_SB_VERSION_ATTRBIT)); 357 ((sbp)->sb_versionnum & XFS_SB_VERSION_ATTRBIT));
330} 358}
331 359
332#define XFS_SB_VERSION_ADDATTR(sbp) xfs_sb_version_addattr(sbp)
333static inline void xfs_sb_version_addattr(xfs_sb_t *sbp) 360static inline void xfs_sb_version_addattr(xfs_sb_t *sbp)
334{ 361{
335 (sbp)->sb_versionnum = (((sbp)->sb_versionnum == XFS_SB_VERSION_1) ? \ 362 (sbp)->sb_versionnum = (((sbp)->sb_versionnum == XFS_SB_VERSION_1) ? \
@@ -339,7 +366,6 @@ static inline void xfs_sb_version_addattr(xfs_sb_t *sbp)
339 (XFS_SB_VERSION_4 | XFS_SB_VERSION_ATTRBIT))); 366 (XFS_SB_VERSION_4 | XFS_SB_VERSION_ATTRBIT)));
340} 367}
341 368
342#define XFS_SB_VERSION_HASNLINK(sbp) xfs_sb_version_hasnlink(sbp)
343static inline int xfs_sb_version_hasnlink(xfs_sb_t *sbp) 369static inline int xfs_sb_version_hasnlink(xfs_sb_t *sbp)
344{ 370{
345 return ((sbp)->sb_versionnum == XFS_SB_VERSION_3) || \ 371 return ((sbp)->sb_versionnum == XFS_SB_VERSION_3) || \
@@ -347,7 +373,6 @@ static inline int xfs_sb_version_hasnlink(xfs_sb_t *sbp)
347 ((sbp)->sb_versionnum & XFS_SB_VERSION_NLINKBIT)); 373 ((sbp)->sb_versionnum & XFS_SB_VERSION_NLINKBIT));
348} 374}
349 375
350#define XFS_SB_VERSION_ADDNLINK(sbp) xfs_sb_version_addnlink(sbp)
351static inline void xfs_sb_version_addnlink(xfs_sb_t *sbp) 376static inline void xfs_sb_version_addnlink(xfs_sb_t *sbp)
352{ 377{
353 (sbp)->sb_versionnum = ((sbp)->sb_versionnum <= XFS_SB_VERSION_2 ? \ 378 (sbp)->sb_versionnum = ((sbp)->sb_versionnum <= XFS_SB_VERSION_2 ? \
@@ -355,115 +380,63 @@ static inline void xfs_sb_version_addnlink(xfs_sb_t *sbp)
355 ((sbp)->sb_versionnum | XFS_SB_VERSION_NLINKBIT)); 380 ((sbp)->sb_versionnum | XFS_SB_VERSION_NLINKBIT));
356} 381}
357 382
358#define XFS_SB_VERSION_HASQUOTA(sbp) xfs_sb_version_hasquota(sbp)
359static inline int xfs_sb_version_hasquota(xfs_sb_t *sbp) 383static inline int xfs_sb_version_hasquota(xfs_sb_t *sbp)
360{ 384{
361 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ 385 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
362 ((sbp)->sb_versionnum & XFS_SB_VERSION_QUOTABIT); 386 ((sbp)->sb_versionnum & XFS_SB_VERSION_QUOTABIT);
363} 387}
364 388
365#define XFS_SB_VERSION_ADDQUOTA(sbp) xfs_sb_version_addquota(sbp)
366static inline void xfs_sb_version_addquota(xfs_sb_t *sbp) 389static inline void xfs_sb_version_addquota(xfs_sb_t *sbp)
367{ 390{
368 (sbp)->sb_versionnum = \ 391 (sbp)->sb_versionnum = \
369 (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 ? \ 392 (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 ? \
370 ((sbp)->sb_versionnum | XFS_SB_VERSION_QUOTABIT) : \ 393 ((sbp)->sb_versionnum | XFS_SB_VERSION_QUOTABIT) : \
371 (XFS_SB_VERSION_TONEW((sbp)->sb_versionnum) | \ 394 (xfs_sb_version_tonew((sbp)->sb_versionnum) | \
372 XFS_SB_VERSION_QUOTABIT)); 395 XFS_SB_VERSION_QUOTABIT));
373} 396}
374 397
375#define XFS_SB_VERSION_HASALIGN(sbp) xfs_sb_version_hasalign(sbp)
376static inline int xfs_sb_version_hasalign(xfs_sb_t *sbp) 398static inline int xfs_sb_version_hasalign(xfs_sb_t *sbp)
377{ 399{
378 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ 400 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
379 ((sbp)->sb_versionnum & XFS_SB_VERSION_ALIGNBIT); 401 ((sbp)->sb_versionnum & XFS_SB_VERSION_ALIGNBIT);
380} 402}
381 403
382#define XFS_SB_VERSION_SUBALIGN(sbp) xfs_sb_version_subalign(sbp)
383static inline void xfs_sb_version_subalign(xfs_sb_t *sbp)
384{
385 (sbp)->sb_versionnum = \
386 XFS_SB_VERSION_TOOLD((sbp)->sb_versionnum & ~XFS_SB_VERSION_ALIGNBIT);
387}
388
389#define XFS_SB_VERSION_HASDALIGN(sbp) xfs_sb_version_hasdalign(sbp)
390static inline int xfs_sb_version_hasdalign(xfs_sb_t *sbp) 404static inline int xfs_sb_version_hasdalign(xfs_sb_t *sbp)
391{ 405{
392 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ 406 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
393 ((sbp)->sb_versionnum & XFS_SB_VERSION_DALIGNBIT); 407 ((sbp)->sb_versionnum & XFS_SB_VERSION_DALIGNBIT);
394} 408}
395 409
396#define XFS_SB_VERSION_ADDDALIGN(sbp) xfs_sb_version_adddalign(sbp)
397static inline int xfs_sb_version_adddalign(xfs_sb_t *sbp)
398{
399 return (sbp)->sb_versionnum = \
400 ((sbp)->sb_versionnum | XFS_SB_VERSION_DALIGNBIT);
401}
402
403#define XFS_SB_VERSION_HASSHARED(sbp) xfs_sb_version_hasshared(sbp)
404static inline int xfs_sb_version_hasshared(xfs_sb_t *sbp) 410static inline int xfs_sb_version_hasshared(xfs_sb_t *sbp)
405{ 411{
406 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ 412 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
407 ((sbp)->sb_versionnum & XFS_SB_VERSION_SHAREDBIT); 413 ((sbp)->sb_versionnum & XFS_SB_VERSION_SHAREDBIT);
408} 414}
409 415
410#define XFS_SB_VERSION_ADDSHARED(sbp) xfs_sb_version_addshared(sbp)
411static inline int xfs_sb_version_addshared(xfs_sb_t *sbp)
412{
413 return (sbp)->sb_versionnum = \
414 ((sbp)->sb_versionnum | XFS_SB_VERSION_SHAREDBIT);
415}
416
417#define XFS_SB_VERSION_SUBSHARED(sbp) xfs_sb_version_subshared(sbp)
418static inline int xfs_sb_version_subshared(xfs_sb_t *sbp)
419{
420 return (sbp)->sb_versionnum = \
421 ((sbp)->sb_versionnum & ~XFS_SB_VERSION_SHAREDBIT);
422}
423
424#define XFS_SB_VERSION_HASDIRV2(sbp) xfs_sb_version_hasdirv2(sbp)
425static inline int xfs_sb_version_hasdirv2(xfs_sb_t *sbp) 416static inline int xfs_sb_version_hasdirv2(xfs_sb_t *sbp)
426{ 417{
427 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ 418 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
428 ((sbp)->sb_versionnum & XFS_SB_VERSION_DIRV2BIT); 419 ((sbp)->sb_versionnum & XFS_SB_VERSION_DIRV2BIT);
429} 420}
430 421
431#define XFS_SB_VERSION_HASLOGV2(sbp) xfs_sb_version_haslogv2(sbp)
432static inline int xfs_sb_version_haslogv2(xfs_sb_t *sbp) 422static inline int xfs_sb_version_haslogv2(xfs_sb_t *sbp)
433{ 423{
434 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ 424 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
435 ((sbp)->sb_versionnum & XFS_SB_VERSION_LOGV2BIT); 425 ((sbp)->sb_versionnum & XFS_SB_VERSION_LOGV2BIT);
436} 426}
437 427
438#define XFS_SB_VERSION_HASEXTFLGBIT(sbp) xfs_sb_version_hasextflgbit(sbp)
439static inline int xfs_sb_version_hasextflgbit(xfs_sb_t *sbp) 428static inline int xfs_sb_version_hasextflgbit(xfs_sb_t *sbp)
440{ 429{
441 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ 430 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
442 ((sbp)->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT); 431 ((sbp)->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT);
443} 432}
444 433
445#define XFS_SB_VERSION_ADDEXTFLGBIT(sbp) xfs_sb_version_addextflgbit(sbp)
446static inline int xfs_sb_version_addextflgbit(xfs_sb_t *sbp)
447{
448 return (sbp)->sb_versionnum = \
449 ((sbp)->sb_versionnum | XFS_SB_VERSION_EXTFLGBIT);
450}
451
452#define XFS_SB_VERSION_SUBEXTFLGBIT(sbp) xfs_sb_version_subextflgbit(sbp)
453static inline int xfs_sb_version_subextflgbit(xfs_sb_t *sbp)
454{
455 return (sbp)->sb_versionnum = \
456 ((sbp)->sb_versionnum & ~XFS_SB_VERSION_EXTFLGBIT);
457}
458
459#define XFS_SB_VERSION_HASSECTOR(sbp) xfs_sb_version_hassector(sbp)
460static inline int xfs_sb_version_hassector(xfs_sb_t *sbp) 434static inline int xfs_sb_version_hassector(xfs_sb_t *sbp)
461{ 435{
462 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ 436 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
463 ((sbp)->sb_versionnum & XFS_SB_VERSION_SECTORBIT); 437 ((sbp)->sb_versionnum & XFS_SB_VERSION_SECTORBIT);
464} 438}
465 439
466#define XFS_SB_VERSION_HASMOREBITS(sbp) xfs_sb_version_hasmorebits(sbp)
467static inline int xfs_sb_version_hasmorebits(xfs_sb_t *sbp) 440static inline int xfs_sb_version_hasmorebits(xfs_sb_t *sbp)
468{ 441{
469 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ 442 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
@@ -476,24 +449,22 @@ static inline int xfs_sb_version_hasmorebits(xfs_sb_t *sbp)
476 * For example, for a bit defined as XFS_SB_VERSION2_FUNBIT, has a macro: 449 * For example, for a bit defined as XFS_SB_VERSION2_FUNBIT, has a macro:
477 * 450 *
478 * SB_VERSION_HASFUNBIT(xfs_sb_t *sbp) 451 * SB_VERSION_HASFUNBIT(xfs_sb_t *sbp)
479 * ((XFS_SB_VERSION_HASMOREBITS(sbp) && 452 * ((xfs_sb_version_hasmorebits(sbp) &&
480 * ((sbp)->sb_features2 & XFS_SB_VERSION2_FUNBIT) 453 * ((sbp)->sb_features2 & XFS_SB_VERSION2_FUNBIT)
481 */ 454 */
482 455
483static inline int xfs_sb_version_haslazysbcount(xfs_sb_t *sbp) 456static inline int xfs_sb_version_haslazysbcount(xfs_sb_t *sbp)
484{ 457{
485 return (XFS_SB_VERSION_HASMOREBITS(sbp) && \ 458 return (xfs_sb_version_hasmorebits(sbp) && \
486 ((sbp)->sb_features2 & XFS_SB_VERSION2_LAZYSBCOUNTBIT)); 459 ((sbp)->sb_features2 & XFS_SB_VERSION2_LAZYSBCOUNTBIT));
487} 460}
488 461
489#define XFS_SB_VERSION_HASATTR2(sbp) xfs_sb_version_hasattr2(sbp)
490static inline int xfs_sb_version_hasattr2(xfs_sb_t *sbp) 462static inline int xfs_sb_version_hasattr2(xfs_sb_t *sbp)
491{ 463{
492 return (XFS_SB_VERSION_HASMOREBITS(sbp)) && \ 464 return (xfs_sb_version_hasmorebits(sbp)) && \
493 ((sbp)->sb_features2 & XFS_SB_VERSION2_ATTR2BIT); 465 ((sbp)->sb_features2 & XFS_SB_VERSION2_ATTR2BIT);
494} 466}
495 467
496#define XFS_SB_VERSION_ADDATTR2(sbp) xfs_sb_version_addattr2(sbp)
497static inline void xfs_sb_version_addattr2(xfs_sb_t *sbp) 468static inline void xfs_sb_version_addattr2(xfs_sb_t *sbp)
498{ 469{
499 ((sbp)->sb_versionnum = \ 470 ((sbp)->sb_versionnum = \
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 7f40628d85c7..0804207c7391 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -113,13 +113,8 @@ struct xfs_mount;
113struct xfs_trans; 113struct xfs_trans;
114struct xfs_dquot_acct; 114struct xfs_dquot_acct;
115 115
116typedef struct xfs_ail_entry {
117 struct xfs_log_item *ail_forw; /* AIL forw pointer */
118 struct xfs_log_item *ail_back; /* AIL back pointer */
119} xfs_ail_entry_t;
120
121typedef struct xfs_log_item { 116typedef struct xfs_log_item {
122 xfs_ail_entry_t li_ail; /* AIL pointers */ 117 struct list_head li_ail; /* AIL pointers */
123 xfs_lsn_t li_lsn; /* last on-disk lsn */ 118 xfs_lsn_t li_lsn; /* last on-disk lsn */
124 struct xfs_log_item_desc *li_desc; /* ptr to current desc*/ 119 struct xfs_log_item_desc *li_desc; /* ptr to current desc*/
125 struct xfs_mount *li_mountp; /* ptr to fs mount */ 120 struct xfs_mount *li_mountp; /* ptr to fs mount */
@@ -341,7 +336,6 @@ typedef struct xfs_trans {
341 unsigned int t_rtx_res; /* # of rt extents resvd */ 336 unsigned int t_rtx_res; /* # of rt extents resvd */
342 unsigned int t_rtx_res_used; /* # of resvd rt extents used */ 337 unsigned int t_rtx_res_used; /* # of resvd rt extents used */
343 xfs_log_ticket_t t_ticket; /* log mgr ticket */ 338 xfs_log_ticket_t t_ticket; /* log mgr ticket */
344 sema_t t_sema; /* sema for commit completion */
345 xfs_lsn_t t_lsn; /* log seq num of start of 339 xfs_lsn_t t_lsn; /* log seq num of start of
346 * transaction. */ 340 * transaction. */
347 xfs_lsn_t t_commit_lsn; /* log seq num of end of 341 xfs_lsn_t t_commit_lsn; /* log seq num of end of
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 4d6330eddc8d..1f77c00af566 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -28,13 +28,13 @@
28#include "xfs_trans_priv.h" 28#include "xfs_trans_priv.h"
29#include "xfs_error.h" 29#include "xfs_error.h"
30 30
31STATIC void xfs_ail_insert(xfs_ail_entry_t *, xfs_log_item_t *); 31STATIC void xfs_ail_insert(xfs_ail_t *, xfs_log_item_t *);
32STATIC xfs_log_item_t * xfs_ail_delete(xfs_ail_entry_t *, xfs_log_item_t *); 32STATIC xfs_log_item_t * xfs_ail_delete(xfs_ail_t *, xfs_log_item_t *);
33STATIC xfs_log_item_t * xfs_ail_min(xfs_ail_entry_t *); 33STATIC xfs_log_item_t * xfs_ail_min(xfs_ail_t *);
34STATIC xfs_log_item_t * xfs_ail_next(xfs_ail_entry_t *, xfs_log_item_t *); 34STATIC xfs_log_item_t * xfs_ail_next(xfs_ail_t *, xfs_log_item_t *);
35 35
36#ifdef DEBUG 36#ifdef DEBUG
37STATIC void xfs_ail_check(xfs_ail_entry_t *, xfs_log_item_t *); 37STATIC void xfs_ail_check(xfs_ail_t *, xfs_log_item_t *);
38#else 38#else
39#define xfs_ail_check(a,l) 39#define xfs_ail_check(a,l)
40#endif /* DEBUG */ 40#endif /* DEBUG */
@@ -57,7 +57,7 @@ xfs_trans_tail_ail(
57 xfs_log_item_t *lip; 57 xfs_log_item_t *lip;
58 58
59 spin_lock(&mp->m_ail_lock); 59 spin_lock(&mp->m_ail_lock);
60 lip = xfs_ail_min(&(mp->m_ail.xa_ail)); 60 lip = xfs_ail_min(&mp->m_ail);
61 if (lip == NULL) { 61 if (lip == NULL) {
62 lsn = (xfs_lsn_t)0; 62 lsn = (xfs_lsn_t)0;
63 } else { 63 } else {
@@ -91,7 +91,7 @@ xfs_trans_push_ail(
91{ 91{
92 xfs_log_item_t *lip; 92 xfs_log_item_t *lip;
93 93
94 lip = xfs_ail_min(&mp->m_ail.xa_ail); 94 lip = xfs_ail_min(&mp->m_ail);
95 if (lip && !XFS_FORCED_SHUTDOWN(mp)) { 95 if (lip && !XFS_FORCED_SHUTDOWN(mp)) {
96 if (XFS_LSN_CMP(threshold_lsn, mp->m_ail.xa_target) > 0) 96 if (XFS_LSN_CMP(threshold_lsn, mp->m_ail.xa_target) > 0)
97 xfsaild_wakeup(mp, threshold_lsn); 97 xfsaild_wakeup(mp, threshold_lsn);
@@ -111,15 +111,17 @@ xfs_trans_first_push_ail(
111{ 111{
112 xfs_log_item_t *lip; 112 xfs_log_item_t *lip;
113 113
114 lip = xfs_ail_min(&(mp->m_ail.xa_ail)); 114 lip = xfs_ail_min(&mp->m_ail);
115 *gen = (int)mp->m_ail.xa_gen; 115 *gen = (int)mp->m_ail.xa_gen;
116 if (lsn == 0) 116 if (lsn == 0)
117 return lip; 117 return lip;
118 118
119 while (lip && (XFS_LSN_CMP(lip->li_lsn, lsn) < 0)) 119 list_for_each_entry(lip, &mp->m_ail.xa_ail, li_ail) {
120 lip = lip->li_ail.ail_forw; 120 if (XFS_LSN_CMP(lip->li_lsn, lsn) >= 0)
121 return lip;
122 }
121 123
122 return lip; 124 return NULL;
123} 125}
124 126
125/* 127/*
@@ -261,16 +263,19 @@ xfsaild_push(
261 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); 263 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE);
262 } 264 }
263 265
264 /* 266 if (!count) {
265 * We reached the target so wait a bit longer for I/O to complete and 267 /* We're past our target or empty, so idle */
266 * remove pushed items from the AIL before we start the next scan from 268 tout = 1000;
267 * the start of the AIL. 269 } else if (XFS_LSN_CMP(lsn, target) >= 0) {
268 */ 270 /*
269 if ((XFS_LSN_CMP(lsn, target) >= 0)) { 271 * We reached the target so wait a bit longer for I/O to
272 * complete and remove pushed items from the AIL before we
273 * start the next scan from the start of the AIL.
274 */
270 tout += 20; 275 tout += 20;
271 last_pushed_lsn = 0; 276 last_pushed_lsn = 0;
272 } else if ((restarts > XFS_TRANS_PUSH_AIL_RESTARTS) || 277 } else if ((restarts > XFS_TRANS_PUSH_AIL_RESTARTS) ||
273 (count && ((stuck * 100) / count > 90))) { 278 ((stuck * 100) / count > 90)) {
274 /* 279 /*
275 * Either there is a lot of contention on the AIL or we 280 * Either there is a lot of contention on the AIL or we
276 * are stuck due to operations in progress. "Stuck" in this 281 * are stuck due to operations in progress. "Stuck" in this
@@ -326,7 +331,7 @@ xfs_trans_unlocked_item(
326 * the call to xfs_log_move_tail() doesn't do anything if there's 331 * the call to xfs_log_move_tail() doesn't do anything if there's
327 * not enough free space to wake people up so we're safe calling it. 332 * not enough free space to wake people up so we're safe calling it.
328 */ 333 */
329 min_lip = xfs_ail_min(&mp->m_ail.xa_ail); 334 min_lip = xfs_ail_min(&mp->m_ail);
330 335
331 if (min_lip == lip) 336 if (min_lip == lip)
332 xfs_log_move_tail(mp, 1); 337 xfs_log_move_tail(mp, 1);
@@ -354,15 +359,13 @@ xfs_trans_update_ail(
354 xfs_log_item_t *lip, 359 xfs_log_item_t *lip,
355 xfs_lsn_t lsn) __releases(mp->m_ail_lock) 360 xfs_lsn_t lsn) __releases(mp->m_ail_lock)
356{ 361{
357 xfs_ail_entry_t *ailp;
358 xfs_log_item_t *dlip=NULL; 362 xfs_log_item_t *dlip=NULL;
359 xfs_log_item_t *mlip; /* ptr to minimum lip */ 363 xfs_log_item_t *mlip; /* ptr to minimum lip */
360 364
361 ailp = &(mp->m_ail.xa_ail); 365 mlip = xfs_ail_min(&mp->m_ail);
362 mlip = xfs_ail_min(ailp);
363 366
364 if (lip->li_flags & XFS_LI_IN_AIL) { 367 if (lip->li_flags & XFS_LI_IN_AIL) {
365 dlip = xfs_ail_delete(ailp, lip); 368 dlip = xfs_ail_delete(&mp->m_ail, lip);
366 ASSERT(dlip == lip); 369 ASSERT(dlip == lip);
367 } else { 370 } else {
368 lip->li_flags |= XFS_LI_IN_AIL; 371 lip->li_flags |= XFS_LI_IN_AIL;
@@ -370,11 +373,11 @@ xfs_trans_update_ail(
370 373
371 lip->li_lsn = lsn; 374 lip->li_lsn = lsn;
372 375
373 xfs_ail_insert(ailp, lip); 376 xfs_ail_insert(&mp->m_ail, lip);
374 mp->m_ail.xa_gen++; 377 mp->m_ail.xa_gen++;
375 378
376 if (mlip == dlip) { 379 if (mlip == dlip) {
377 mlip = xfs_ail_min(&(mp->m_ail.xa_ail)); 380 mlip = xfs_ail_min(&mp->m_ail);
378 spin_unlock(&mp->m_ail_lock); 381 spin_unlock(&mp->m_ail_lock);
379 xfs_log_move_tail(mp, mlip->li_lsn); 382 xfs_log_move_tail(mp, mlip->li_lsn);
380 } else { 383 } else {
@@ -404,14 +407,12 @@ xfs_trans_delete_ail(
404 xfs_mount_t *mp, 407 xfs_mount_t *mp,
405 xfs_log_item_t *lip) __releases(mp->m_ail_lock) 408 xfs_log_item_t *lip) __releases(mp->m_ail_lock)
406{ 409{
407 xfs_ail_entry_t *ailp;
408 xfs_log_item_t *dlip; 410 xfs_log_item_t *dlip;
409 xfs_log_item_t *mlip; 411 xfs_log_item_t *mlip;
410 412
411 if (lip->li_flags & XFS_LI_IN_AIL) { 413 if (lip->li_flags & XFS_LI_IN_AIL) {
412 ailp = &(mp->m_ail.xa_ail); 414 mlip = xfs_ail_min(&mp->m_ail);
413 mlip = xfs_ail_min(ailp); 415 dlip = xfs_ail_delete(&mp->m_ail, lip);
414 dlip = xfs_ail_delete(ailp, lip);
415 ASSERT(dlip == lip); 416 ASSERT(dlip == lip);
416 417
417 418
@@ -420,7 +421,7 @@ xfs_trans_delete_ail(
420 mp->m_ail.xa_gen++; 421 mp->m_ail.xa_gen++;
421 422
422 if (mlip == dlip) { 423 if (mlip == dlip) {
423 mlip = xfs_ail_min(&(mp->m_ail.xa_ail)); 424 mlip = xfs_ail_min(&mp->m_ail);
424 spin_unlock(&mp->m_ail_lock); 425 spin_unlock(&mp->m_ail_lock);
425 xfs_log_move_tail(mp, (mlip ? mlip->li_lsn : 0)); 426 xfs_log_move_tail(mp, (mlip ? mlip->li_lsn : 0));
426 } else { 427 } else {
@@ -437,7 +438,7 @@ xfs_trans_delete_ail(
437 else { 438 else {
438 xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp, 439 xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp,
439 "%s: attempting to delete a log item that is not in the AIL", 440 "%s: attempting to delete a log item that is not in the AIL",
440 __FUNCTION__); 441 __func__);
441 spin_unlock(&mp->m_ail_lock); 442 spin_unlock(&mp->m_ail_lock);
442 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 443 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
443 } 444 }
@@ -458,7 +459,7 @@ xfs_trans_first_ail(
458{ 459{
459 xfs_log_item_t *lip; 460 xfs_log_item_t *lip;
460 461
461 lip = xfs_ail_min(&(mp->m_ail.xa_ail)); 462 lip = xfs_ail_min(&mp->m_ail);
462 *gen = (int)mp->m_ail.xa_gen; 463 *gen = (int)mp->m_ail.xa_gen;
463 464
464 return lip; 465 return lip;
@@ -482,9 +483,9 @@ xfs_trans_next_ail(
482 483
483 ASSERT(mp && lip && gen); 484 ASSERT(mp && lip && gen);
484 if (mp->m_ail.xa_gen == *gen) { 485 if (mp->m_ail.xa_gen == *gen) {
485 nlip = xfs_ail_next(&(mp->m_ail.xa_ail), lip); 486 nlip = xfs_ail_next(&mp->m_ail, lip);
486 } else { 487 } else {
487 nlip = xfs_ail_min(&(mp->m_ail).xa_ail); 488 nlip = xfs_ail_min(&mp->m_ail);
488 *gen = (int)mp->m_ail.xa_gen; 489 *gen = (int)mp->m_ail.xa_gen;
489 if (restarts != NULL) { 490 if (restarts != NULL) {
490 XFS_STATS_INC(xs_push_ail_restarts); 491 XFS_STATS_INC(xs_push_ail_restarts);
@@ -514,8 +515,7 @@ int
514xfs_trans_ail_init( 515xfs_trans_ail_init(
515 xfs_mount_t *mp) 516 xfs_mount_t *mp)
516{ 517{
517 mp->m_ail.xa_ail.ail_forw = (xfs_log_item_t*)&mp->m_ail.xa_ail; 518 INIT_LIST_HEAD(&mp->m_ail.xa_ail);
518 mp->m_ail.xa_ail.ail_back = (xfs_log_item_t*)&mp->m_ail.xa_ail;
519 return xfsaild_start(mp); 519 return xfsaild_start(mp);
520} 520}
521 521
@@ -534,7 +534,7 @@ xfs_trans_ail_destroy(
534 */ 534 */
535STATIC void 535STATIC void
536xfs_ail_insert( 536xfs_ail_insert(
537 xfs_ail_entry_t *base, 537 xfs_ail_t *ailp,
538 xfs_log_item_t *lip) 538 xfs_log_item_t *lip)
539/* ARGSUSED */ 539/* ARGSUSED */
540{ 540{
@@ -543,27 +543,22 @@ xfs_ail_insert(
543 /* 543 /*
544 * If the list is empty, just insert the item. 544 * If the list is empty, just insert the item.
545 */ 545 */
546 if (base->ail_back == (xfs_log_item_t*)base) { 546 if (list_empty(&ailp->xa_ail)) {
547 base->ail_forw = lip; 547 list_add(&lip->li_ail, &ailp->xa_ail);
548 base->ail_back = lip;
549 lip->li_ail.ail_forw = (xfs_log_item_t*)base;
550 lip->li_ail.ail_back = (xfs_log_item_t*)base;
551 return; 548 return;
552 } 549 }
553 550
554 next_lip = base->ail_back; 551 list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) {
555 while ((next_lip != (xfs_log_item_t*)base) && 552 if (XFS_LSN_CMP(next_lip->li_lsn, lip->li_lsn) <= 0)
556 (XFS_LSN_CMP(next_lip->li_lsn, lip->li_lsn) > 0)) { 553 break;
557 next_lip = next_lip->li_ail.ail_back;
558 } 554 }
559 ASSERT((next_lip == (xfs_log_item_t*)base) || 555
556 ASSERT((&next_lip->li_ail == &ailp->xa_ail) ||
560 (XFS_LSN_CMP(next_lip->li_lsn, lip->li_lsn) <= 0)); 557 (XFS_LSN_CMP(next_lip->li_lsn, lip->li_lsn) <= 0));
561 lip->li_ail.ail_forw = next_lip->li_ail.ail_forw;
562 lip->li_ail.ail_back = next_lip;
563 next_lip->li_ail.ail_forw = lip;
564 lip->li_ail.ail_forw->li_ail.ail_back = lip;
565 558
566 xfs_ail_check(base, lip); 559 list_add(&lip->li_ail, &next_lip->li_ail);
560
561 xfs_ail_check(ailp, lip);
567 return; 562 return;
568} 563}
569 564
@@ -573,15 +568,13 @@ xfs_ail_insert(
573/*ARGSUSED*/ 568/*ARGSUSED*/
574STATIC xfs_log_item_t * 569STATIC xfs_log_item_t *
575xfs_ail_delete( 570xfs_ail_delete(
576 xfs_ail_entry_t *base, 571 xfs_ail_t *ailp,
577 xfs_log_item_t *lip) 572 xfs_log_item_t *lip)
578/* ARGSUSED */ 573/* ARGSUSED */
579{ 574{
580 xfs_ail_check(base, lip); 575 xfs_ail_check(ailp, lip);
581 lip->li_ail.ail_forw->li_ail.ail_back = lip->li_ail.ail_back; 576
582 lip->li_ail.ail_back->li_ail.ail_forw = lip->li_ail.ail_forw; 577 list_del(&lip->li_ail);
583 lip->li_ail.ail_forw = NULL;
584 lip->li_ail.ail_back = NULL;
585 578
586 return lip; 579 return lip;
587} 580}
@@ -592,14 +585,13 @@ xfs_ail_delete(
592 */ 585 */
593STATIC xfs_log_item_t * 586STATIC xfs_log_item_t *
594xfs_ail_min( 587xfs_ail_min(
595 xfs_ail_entry_t *base) 588 xfs_ail_t *ailp)
596/* ARGSUSED */ 589/* ARGSUSED */
597{ 590{
598 register xfs_log_item_t *forw = base->ail_forw; 591 if (list_empty(&ailp->xa_ail))
599 if (forw == (xfs_log_item_t*)base) {
600 return NULL; 592 return NULL;
601 } 593
602 return forw; 594 return list_first_entry(&ailp->xa_ail, xfs_log_item_t, li_ail);
603} 595}
604 596
605/* 597/*
@@ -609,15 +601,14 @@ xfs_ail_min(
609 */ 601 */
610STATIC xfs_log_item_t * 602STATIC xfs_log_item_t *
611xfs_ail_next( 603xfs_ail_next(
612 xfs_ail_entry_t *base, 604 xfs_ail_t *ailp,
613 xfs_log_item_t *lip) 605 xfs_log_item_t *lip)
614/* ARGSUSED */ 606/* ARGSUSED */
615{ 607{
616 if (lip->li_ail.ail_forw == (xfs_log_item_t*)base) { 608 if (lip->li_ail.next == &ailp->xa_ail)
617 return NULL; 609 return NULL;
618 }
619 return lip->li_ail.ail_forw;
620 610
611 return list_first_entry(&lip->li_ail, xfs_log_item_t, li_ail);
621} 612}
622 613
623#ifdef DEBUG 614#ifdef DEBUG
@@ -626,57 +617,40 @@ xfs_ail_next(
626 */ 617 */
627STATIC void 618STATIC void
628xfs_ail_check( 619xfs_ail_check(
629 xfs_ail_entry_t *base, 620 xfs_ail_t *ailp,
630 xfs_log_item_t *lip) 621 xfs_log_item_t *lip)
631{ 622{
632 xfs_log_item_t *prev_lip; 623 xfs_log_item_t *prev_lip;
633 624
634 prev_lip = base->ail_forw; 625 if (list_empty(&ailp->xa_ail))
635 if (prev_lip == (xfs_log_item_t*)base) {
636 /*
637 * Make sure the pointers are correct when the list
638 * is empty.
639 */
640 ASSERT(base->ail_back == (xfs_log_item_t*)base);
641 return; 626 return;
642 }
643 627
644 /* 628 /*
645 * Check the next and previous entries are valid. 629 * Check the next and previous entries are valid.
646 */ 630 */
647 ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0); 631 ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0);
648 prev_lip = lip->li_ail.ail_back; 632 prev_lip = list_entry(lip->li_ail.prev, xfs_log_item_t, li_ail);
649 if (prev_lip != (xfs_log_item_t*)base) { 633 if (&prev_lip->li_ail != &ailp->xa_ail)
650 ASSERT(prev_lip->li_ail.ail_forw == lip);
651 ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0); 634 ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0);
652 } 635
653 prev_lip = lip->li_ail.ail_forw; 636 prev_lip = list_entry(lip->li_ail.next, xfs_log_item_t, li_ail);
654 if (prev_lip != (xfs_log_item_t*)base) { 637 if (&prev_lip->li_ail != &ailp->xa_ail)
655 ASSERT(prev_lip->li_ail.ail_back == lip);
656 ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) >= 0); 638 ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) >= 0);
657 }
658 639
659 640
660#ifdef XFS_TRANS_DEBUG 641#ifdef XFS_TRANS_DEBUG
661 /* 642 /*
662 * Walk the list checking forward and backward pointers, 643 * Walk the list checking lsn ordering, and that every entry has the
663 * lsn ordering, and that every entry has the XFS_LI_IN_AIL 644 * XFS_LI_IN_AIL flag set. This is really expensive, so only do it
664 * flag set. This is really expensive, so only do it when 645 * when specifically debugging the transaction subsystem.
665 * specifically debugging the transaction subsystem.
666 */ 646 */
667 prev_lip = (xfs_log_item_t*)base; 647 prev_lip = list_entry(&ailp->xa_ail, xfs_log_item_t, li_ail);
668 while (lip != (xfs_log_item_t*)base) { 648 list_for_each_entry(lip, &ailp->xa_ail, li_ail) {
669 if (prev_lip != (xfs_log_item_t*)base) { 649 if (&prev_lip->li_ail != &ailp->xa_ail)
670 ASSERT(prev_lip->li_ail.ail_forw == lip);
671 ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0); 650 ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0);
672 }
673 ASSERT(lip->li_ail.ail_back == prev_lip);
674 ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0); 651 ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0);
675 prev_lip = lip; 652 prev_lip = lip;
676 lip = lip->li_ail.ail_forw;
677 } 653 }
678 ASSERT(lip == (xfs_log_item_t*)base);
679 ASSERT(base->ail_back == prev_lip);
680#endif /* XFS_TRANS_DEBUG */ 654#endif /* XFS_TRANS_DEBUG */
681} 655}
682#endif /* DEBUG */ 656#endif /* DEBUG */
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 60b6b898022b..cb0c5839154b 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -304,7 +304,8 @@ xfs_trans_read_buf(
304 if (tp == NULL) { 304 if (tp == NULL) {
305 bp = xfs_buf_read_flags(target, blkno, len, flags | BUF_BUSY); 305 bp = xfs_buf_read_flags(target, blkno, len, flags | BUF_BUSY);
306 if (!bp) 306 if (!bp)
307 return XFS_ERROR(ENOMEM); 307 return (flags & XFS_BUF_TRYLOCK) ?
308 EAGAIN : XFS_ERROR(ENOMEM);
308 309
309 if ((bp != NULL) && (XFS_BUF_GETERROR(bp) != 0)) { 310 if ((bp != NULL) && (XFS_BUF_GETERROR(bp) != 0)) {
310 xfs_ioerror_alert("xfs_trans_read_buf", mp, 311 xfs_ioerror_alert("xfs_trans_read_buf", mp,
@@ -353,17 +354,15 @@ xfs_trans_read_buf(
353 ASSERT(!XFS_BUF_ISASYNC(bp)); 354 ASSERT(!XFS_BUF_ISASYNC(bp));
354 XFS_BUF_READ(bp); 355 XFS_BUF_READ(bp);
355 xfsbdstrat(tp->t_mountp, bp); 356 xfsbdstrat(tp->t_mountp, bp);
356 xfs_iowait(bp); 357 error = xfs_iowait(bp);
357 if (XFS_BUF_GETERROR(bp) != 0) { 358 if (error) {
358 xfs_ioerror_alert("xfs_trans_read_buf", mp, 359 xfs_ioerror_alert("xfs_trans_read_buf", mp,
359 bp, blkno); 360 bp, blkno);
360 error = XFS_BUF_GETERROR(bp);
361 xfs_buf_relse(bp); 361 xfs_buf_relse(bp);
362 /* 362 /*
363 * We can gracefully recover from most 363 * We can gracefully recover from most read
364 * read errors. Ones we can't are those 364 * errors. Ones we can't are those that happen
365 * that happen after the transaction's 365 * after the transaction's already dirty.
366 * already dirty.
367 */ 366 */
368 if (tp->t_flags & XFS_TRANS_DIRTY) 367 if (tp->t_flags & XFS_TRANS_DIRTY)
369 xfs_force_shutdown(tp->t_mountp, 368 xfs_force_shutdown(tp->t_mountp,
diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h
index 5c89be475464..0f5191644ab2 100644
--- a/fs/xfs/xfs_types.h
+++ b/fs/xfs/xfs_types.h
@@ -160,4 +160,9 @@ typedef enum {
160 XFS_BTNUM_MAX 160 XFS_BTNUM_MAX
161} xfs_btnum_t; 161} xfs_btnum_t;
162 162
163struct xfs_name {
164 const char *name;
165 int len;
166};
167
163#endif /* __XFS_TYPES_H__ */ 168#endif /* __XFS_TYPES_H__ */
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c
index 45d740df53b7..2b8dc7e40772 100644
--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -40,34 +40,12 @@
40#include "xfs_itable.h" 40#include "xfs_itable.h"
41#include "xfs_utils.h" 41#include "xfs_utils.h"
42 42
43/*
44 * xfs_get_dir_entry is used to get a reference to an inode given
45 * its parent directory inode and the name of the file. It does
46 * not lock the child inode, and it unlocks the directory before
47 * returning. The directory's generation number is returned for
48 * use by a later call to xfs_lock_dir_and_entry.
49 */
50int
51xfs_get_dir_entry(
52 bhv_vname_t *dentry,
53 xfs_inode_t **ipp)
54{
55 bhv_vnode_t *vp;
56
57 vp = VNAME_TO_VNODE(dentry);
58
59 *ipp = xfs_vtoi(vp);
60 if (!*ipp)
61 return XFS_ERROR(ENOENT);
62 VN_HOLD(vp);
63 return 0;
64}
65 43
66int 44int
67xfs_dir_lookup_int( 45xfs_dir_lookup_int(
68 xfs_inode_t *dp, 46 xfs_inode_t *dp,
69 uint lock_mode, 47 uint lock_mode,
70 bhv_vname_t *dentry, 48 struct xfs_name *name,
71 xfs_ino_t *inum, 49 xfs_ino_t *inum,
72 xfs_inode_t **ipp) 50 xfs_inode_t **ipp)
73{ 51{
@@ -75,7 +53,7 @@ xfs_dir_lookup_int(
75 53
76 xfs_itrace_entry(dp); 54 xfs_itrace_entry(dp);
77 55
78 error = xfs_dir_lookup(NULL, dp, VNAME(dentry), VNAMELEN(dentry), inum); 56 error = xfs_dir_lookup(NULL, dp, name, inum);
79 if (!error) { 57 if (!error) {
80 /* 58 /*
81 * Unlock the directory. We do this because we can't 59 * Unlock the directory. We do this because we can't
@@ -339,10 +317,10 @@ xfs_bump_ino_vers2(
339 ip->i_d.di_onlink = 0; 317 ip->i_d.di_onlink = 0;
340 memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); 318 memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
341 mp = tp->t_mountp; 319 mp = tp->t_mountp;
342 if (!XFS_SB_VERSION_HASNLINK(&mp->m_sb)) { 320 if (!xfs_sb_version_hasnlink(&mp->m_sb)) {
343 spin_lock(&mp->m_sb_lock); 321 spin_lock(&mp->m_sb_lock);
344 if (!XFS_SB_VERSION_HASNLINK(&mp->m_sb)) { 322 if (!xfs_sb_version_hasnlink(&mp->m_sb)) {
345 XFS_SB_VERSION_ADDNLINK(&mp->m_sb); 323 xfs_sb_version_addnlink(&mp->m_sb);
346 spin_unlock(&mp->m_sb_lock); 324 spin_unlock(&mp->m_sb_lock);
347 xfs_mod_sb(tp, XFS_SB_VERSIONNUM); 325 xfs_mod_sb(tp, XFS_SB_VERSIONNUM);
348 } else { 326 } else {
diff --git a/fs/xfs/xfs_utils.h b/fs/xfs/xfs_utils.h
index f857fcccb723..175b126d2cab 100644
--- a/fs/xfs/xfs_utils.h
+++ b/fs/xfs/xfs_utils.h
@@ -21,15 +21,14 @@
21#define IRELE(ip) VN_RELE(XFS_ITOV(ip)) 21#define IRELE(ip) VN_RELE(XFS_ITOV(ip))
22#define IHOLD(ip) VN_HOLD(XFS_ITOV(ip)) 22#define IHOLD(ip) VN_HOLD(XFS_ITOV(ip))
23 23
24extern int xfs_get_dir_entry (bhv_vname_t *, xfs_inode_t **); 24extern int xfs_dir_lookup_int(xfs_inode_t *, uint, struct xfs_name *,
25extern int xfs_dir_lookup_int (xfs_inode_t *, uint, bhv_vname_t *, xfs_ino_t *, 25 xfs_ino_t *, xfs_inode_t **);
26 xfs_inode_t **); 26extern int xfs_truncate_file(xfs_mount_t *, xfs_inode_t *);
27extern int xfs_truncate_file (xfs_mount_t *, xfs_inode_t *); 27extern int xfs_dir_ialloc(xfs_trans_t **, xfs_inode_t *, mode_t, xfs_nlink_t,
28extern int xfs_dir_ialloc (xfs_trans_t **, xfs_inode_t *, mode_t, xfs_nlink_t,
29 xfs_dev_t, cred_t *, prid_t, int, 28 xfs_dev_t, cred_t *, prid_t, int,
30 xfs_inode_t **, int *); 29 xfs_inode_t **, int *);
31extern int xfs_droplink (xfs_trans_t *, xfs_inode_t *); 30extern int xfs_droplink(xfs_trans_t *, xfs_inode_t *);
32extern int xfs_bumplink (xfs_trans_t *, xfs_inode_t *); 31extern int xfs_bumplink(xfs_trans_t *, xfs_inode_t *);
33extern void xfs_bump_ino_vers2 (xfs_trans_t *, xfs_inode_t *); 32extern void xfs_bump_ino_vers2(xfs_trans_t *, xfs_inode_t *);
34 33
35#endif /* __XFS_UTILS_H__ */ 34#endif /* __XFS_UTILS_H__ */
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 413587f02155..fc48158fe479 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -43,7 +43,6 @@
43#include "xfs_error.h" 43#include "xfs_error.h"
44#include "xfs_bmap.h" 44#include "xfs_bmap.h"
45#include "xfs_rw.h" 45#include "xfs_rw.h"
46#include "xfs_refcache.h"
47#include "xfs_buf_item.h" 46#include "xfs_buf_item.h"
48#include "xfs_log_priv.h" 47#include "xfs_log_priv.h"
49#include "xfs_dir2_trace.h" 48#include "xfs_dir2_trace.h"
@@ -56,6 +55,7 @@
56#include "xfs_fsops.h" 55#include "xfs_fsops.h"
57#include "xfs_vnodeops.h" 56#include "xfs_vnodeops.h"
58#include "xfs_vfsops.h" 57#include "xfs_vfsops.h"
58#include "xfs_utils.h"
59 59
60 60
61int __init 61int __init
@@ -69,15 +69,17 @@ xfs_init(void)
69 /* 69 /*
70 * Initialize all of the zone allocators we use. 70 * Initialize all of the zone allocators we use.
71 */ 71 */
72 xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t),
73 "xfs_log_ticket");
72 xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t), 74 xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t),
73 "xfs_bmap_free_item"); 75 "xfs_bmap_free_item");
74 xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t), 76 xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t),
75 "xfs_btree_cur"); 77 "xfs_btree_cur");
76 xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans"); 78 xfs_da_state_zone = kmem_zone_init(sizeof(xfs_da_state_t),
77 xfs_da_state_zone = 79 "xfs_da_state");
78 kmem_zone_init(sizeof(xfs_da_state_t), "xfs_da_state");
79 xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf"); 80 xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf");
80 xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork"); 81 xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork");
82 xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans");
81 xfs_acl_zone_init(xfs_acl_zone, "xfs_acl"); 83 xfs_acl_zone_init(xfs_acl_zone, "xfs_acl");
82 xfs_mru_cache_init(); 84 xfs_mru_cache_init();
83 xfs_filestream_init(); 85 xfs_filestream_init();
@@ -113,9 +115,6 @@ xfs_init(void)
113 xfs_ili_zone = 115 xfs_ili_zone =
114 kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili", 116 kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili",
115 KM_ZONE_SPREAD, NULL); 117 KM_ZONE_SPREAD, NULL);
116 xfs_icluster_zone =
117 kmem_zone_init_flags(sizeof(xfs_icluster_t), "xfs_icluster",
118 KM_ZONE_SPREAD, NULL);
119 118
120 /* 119 /*
121 * Allocate global trace buffers. 120 * Allocate global trace buffers.
@@ -153,11 +152,9 @@ xfs_cleanup(void)
153 extern kmem_zone_t *xfs_inode_zone; 152 extern kmem_zone_t *xfs_inode_zone;
154 extern kmem_zone_t *xfs_efd_zone; 153 extern kmem_zone_t *xfs_efd_zone;
155 extern kmem_zone_t *xfs_efi_zone; 154 extern kmem_zone_t *xfs_efi_zone;
156 extern kmem_zone_t *xfs_icluster_zone;
157 155
158 xfs_cleanup_procfs(); 156 xfs_cleanup_procfs();
159 xfs_sysctl_unregister(); 157 xfs_sysctl_unregister();
160 xfs_refcache_destroy();
161 xfs_filestream_uninit(); 158 xfs_filestream_uninit();
162 xfs_mru_cache_uninit(); 159 xfs_mru_cache_uninit();
163 xfs_acl_zone_destroy(xfs_acl_zone); 160 xfs_acl_zone_destroy(xfs_acl_zone);
@@ -189,7 +186,6 @@ xfs_cleanup(void)
189 kmem_zone_destroy(xfs_efi_zone); 186 kmem_zone_destroy(xfs_efi_zone);
190 kmem_zone_destroy(xfs_ifork_zone); 187 kmem_zone_destroy(xfs_ifork_zone);
191 kmem_zone_destroy(xfs_ili_zone); 188 kmem_zone_destroy(xfs_ili_zone);
192 kmem_zone_destroy(xfs_icluster_zone);
193} 189}
194 190
195/* 191/*
@@ -281,8 +277,8 @@ xfs_start_flags(
281 mp->m_readio_log = mp->m_writeio_log = ap->iosizelog; 277 mp->m_readio_log = mp->m_writeio_log = ap->iosizelog;
282 } 278 }
283 279
284 if (ap->flags & XFSMNT_IDELETE) 280 if (ap->flags & XFSMNT_IKEEP)
285 mp->m_flags |= XFS_MOUNT_IDELETE; 281 mp->m_flags |= XFS_MOUNT_IKEEP;
286 if (ap->flags & XFSMNT_DIRSYNC) 282 if (ap->flags & XFSMNT_DIRSYNC)
287 mp->m_flags |= XFS_MOUNT_DIRSYNC; 283 mp->m_flags |= XFS_MOUNT_DIRSYNC;
288 if (ap->flags & XFSMNT_ATTR2) 284 if (ap->flags & XFSMNT_ATTR2)
@@ -330,7 +326,7 @@ xfs_finish_flags(
330 int ronly = (mp->m_flags & XFS_MOUNT_RDONLY); 326 int ronly = (mp->m_flags & XFS_MOUNT_RDONLY);
331 327
332 /* Fail a mount where the logbuf is smaller then the log stripe */ 328 /* Fail a mount where the logbuf is smaller then the log stripe */
333 if (XFS_SB_VERSION_HASLOGV2(&mp->m_sb)) { 329 if (xfs_sb_version_haslogv2(&mp->m_sb)) {
334 if ((ap->logbufsize <= 0) && 330 if ((ap->logbufsize <= 0) &&
335 (mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE)) { 331 (mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE)) {
336 mp->m_logbsize = mp->m_sb.sb_logsunit; 332 mp->m_logbsize = mp->m_sb.sb_logsunit;
@@ -349,9 +345,8 @@ xfs_finish_flags(
349 } 345 }
350 } 346 }
351 347
352 if (XFS_SB_VERSION_HASATTR2(&mp->m_sb)) { 348 if (xfs_sb_version_hasattr2(&mp->m_sb))
353 mp->m_flags |= XFS_MOUNT_ATTR2; 349 mp->m_flags |= XFS_MOUNT_ATTR2;
354 }
355 350
356 /* 351 /*
357 * prohibit r/w mounts of read-only filesystems 352 * prohibit r/w mounts of read-only filesystems
@@ -366,7 +361,7 @@ xfs_finish_flags(
366 * check for shared mount. 361 * check for shared mount.
367 */ 362 */
368 if (ap->flags & XFSMNT_SHARED) { 363 if (ap->flags & XFSMNT_SHARED) {
369 if (!XFS_SB_VERSION_HASSHARED(&mp->m_sb)) 364 if (!xfs_sb_version_hasshared(&mp->m_sb))
370 return XFS_ERROR(EINVAL); 365 return XFS_ERROR(EINVAL);
371 366
372 /* 367 /*
@@ -512,7 +507,7 @@ xfs_mount(
512 if (!error && logdev && logdev != ddev) { 507 if (!error && logdev && logdev != ddev) {
513 unsigned int log_sector_size = BBSIZE; 508 unsigned int log_sector_size = BBSIZE;
514 509
515 if (XFS_SB_VERSION_HASSECTOR(&mp->m_sb)) 510 if (xfs_sb_version_hassector(&mp->m_sb))
516 log_sector_size = mp->m_sb.sb_logsectsize; 511 log_sector_size = mp->m_sb.sb_logsectsize;
517 error = xfs_setsize_buftarg(mp->m_logdev_targp, 512 error = xfs_setsize_buftarg(mp->m_logdev_targp,
518 mp->m_sb.sb_blocksize, 513 mp->m_sb.sb_blocksize,
@@ -574,7 +569,7 @@ xfs_unmount(
574#ifdef HAVE_DMAPI 569#ifdef HAVE_DMAPI
575 if (mp->m_flags & XFS_MOUNT_DMAPI) { 570 if (mp->m_flags & XFS_MOUNT_DMAPI) {
576 error = XFS_SEND_PREUNMOUNT(mp, 571 error = XFS_SEND_PREUNMOUNT(mp,
577 rvp, DM_RIGHT_NULL, rvp, DM_RIGHT_NULL, 572 rip, DM_RIGHT_NULL, rip, DM_RIGHT_NULL,
578 NULL, NULL, 0, 0, 573 NULL, NULL, 0, 0,
579 (mp->m_dmevmask & (1<<DM_EVENT_PREUNMOUNT))? 574 (mp->m_dmevmask & (1<<DM_EVENT_PREUNMOUNT))?
580 0:DM_FLAGS_UNWANTED); 575 0:DM_FLAGS_UNWANTED);
@@ -585,11 +580,6 @@ xfs_unmount(
585 0 : DM_FLAGS_UNWANTED; 580 0 : DM_FLAGS_UNWANTED;
586 } 581 }
587#endif 582#endif
588 /*
589 * First blow any referenced inode from this file system
590 * out of the reference cache, and delete the timer.
591 */
592 xfs_refcache_purge_mp(mp);
593 583
594 /* 584 /*
595 * Blow away any referenced inode in the filestreams cache. 585 * Blow away any referenced inode in the filestreams cache.
@@ -608,7 +598,7 @@ xfs_unmount(
608 /* 598 /*
609 * Drop the reference count 599 * Drop the reference count
610 */ 600 */
611 VN_RELE(rvp); 601 IRELE(rip);
612 602
613 /* 603 /*
614 * If we're forcing a shutdown, typically because of a media error, 604 * If we're forcing a shutdown, typically because of a media error,
@@ -630,7 +620,7 @@ out:
630 /* Note: mp structure must still exist for 620 /* Note: mp structure must still exist for
631 * XFS_SEND_UNMOUNT() call. 621 * XFS_SEND_UNMOUNT() call.
632 */ 622 */
633 XFS_SEND_UNMOUNT(mp, error == 0 ? rvp : NULL, 623 XFS_SEND_UNMOUNT(mp, error == 0 ? rip : NULL,
634 DM_RIGHT_NULL, 0, error, unmount_event_flags); 624 DM_RIGHT_NULL, 0, error, unmount_event_flags);
635 } 625 }
636 if (xfs_unmountfs_needed) { 626 if (xfs_unmountfs_needed) {
@@ -647,13 +637,12 @@ out:
647 return XFS_ERROR(error); 637 return XFS_ERROR(error);
648} 638}
649 639
650STATIC int 640STATIC void
651xfs_quiesce_fs( 641xfs_quiesce_fs(
652 xfs_mount_t *mp) 642 xfs_mount_t *mp)
653{ 643{
654 int count = 0, pincount; 644 int count = 0, pincount;
655 645
656 xfs_refcache_purge_mp(mp);
657 xfs_flush_buftarg(mp->m_ddev_targp, 0); 646 xfs_flush_buftarg(mp->m_ddev_targp, 0);
658 xfs_finish_reclaim_all(mp, 0); 647 xfs_finish_reclaim_all(mp, 0);
659 648
@@ -672,8 +661,6 @@ xfs_quiesce_fs(
672 count++; 661 count++;
673 } 662 }
674 } while (count < 2); 663 } while (count < 2);
675
676 return 0;
677} 664}
678 665
679/* 666/*
@@ -685,6 +672,8 @@ void
685xfs_attr_quiesce( 672xfs_attr_quiesce(
686 xfs_mount_t *mp) 673 xfs_mount_t *mp)
687{ 674{
675 int error = 0;
676
688 /* wait for all modifications to complete */ 677 /* wait for all modifications to complete */
689 while (atomic_read(&mp->m_active_trans) > 0) 678 while (atomic_read(&mp->m_active_trans) > 0)
690 delay(100); 679 delay(100);
@@ -695,7 +684,11 @@ xfs_attr_quiesce(
695 ASSERT_ALWAYS(atomic_read(&mp->m_active_trans) == 0); 684 ASSERT_ALWAYS(atomic_read(&mp->m_active_trans) == 0);
696 685
697 /* Push the superblock and write an unmount record */ 686 /* Push the superblock and write an unmount record */
698 xfs_log_sbcount(mp, 1); 687 error = xfs_log_sbcount(mp, 1);
688 if (error)
689 xfs_fs_cmn_err(CE_WARN, mp,
690 "xfs_attr_quiesce: failed to log sb changes. "
691 "Frozen image may not be consistent.");
699 xfs_log_unmount_write(mp); 692 xfs_log_unmount_write(mp);
700 xfs_unmountfs_writesb(mp); 693 xfs_unmountfs_writesb(mp);
701} 694}
@@ -791,8 +784,8 @@ xfs_unmount_flush(
791 goto fscorrupt_out2; 784 goto fscorrupt_out2;
792 785
793 if (rbmip) { 786 if (rbmip) {
794 VN_RELE(XFS_ITOV(rbmip)); 787 IRELE(rbmip);
795 VN_RELE(XFS_ITOV(rsumip)); 788 IRELE(rsumip);
796 } 789 }
797 790
798 xfs_iunlock(rip, XFS_ILOCK_EXCL); 791 xfs_iunlock(rip, XFS_ILOCK_EXCL);
@@ -1170,10 +1163,10 @@ xfs_sync_inodes(
1170 * above, then wait until after we've unlocked 1163 * above, then wait until after we've unlocked
1171 * the inode to release the reference. This is 1164 * the inode to release the reference. This is
1172 * because we can be already holding the inode 1165 * because we can be already holding the inode
1173 * lock when VN_RELE() calls xfs_inactive(). 1166 * lock when IRELE() calls xfs_inactive().
1174 * 1167 *
1175 * Make sure to drop the mount lock before calling 1168 * Make sure to drop the mount lock before calling
1176 * VN_RELE() so that we don't trip over ourselves if 1169 * IRELE() so that we don't trip over ourselves if
1177 * we have to go for the mount lock again in the 1170 * we have to go for the mount lock again in the
1178 * inactive code. 1171 * inactive code.
1179 */ 1172 */
@@ -1181,7 +1174,7 @@ xfs_sync_inodes(
1181 IPOINTER_INSERT(ip, mp); 1174 IPOINTER_INSERT(ip, mp);
1182 } 1175 }
1183 1176
1184 VN_RELE(vp); 1177 IRELE(ip);
1185 1178
1186 vnode_refed = B_FALSE; 1179 vnode_refed = B_FALSE;
1187 } 1180 }
@@ -1324,30 +1317,8 @@ xfs_syncsub(
1324 } 1317 }
1325 1318
1326 /* 1319 /*
1327 * If this is the periodic sync, then kick some entries out of
1328 * the reference cache. This ensures that idle entries are
1329 * eventually kicked out of the cache.
1330 */
1331 if (flags & SYNC_REFCACHE) {
1332 if (flags & SYNC_WAIT)
1333 xfs_refcache_purge_mp(mp);
1334 else
1335 xfs_refcache_purge_some(mp);
1336 }
1337
1338 /*
1339 * If asked, update the disk superblock with incore counter values if we
1340 * are using non-persistent counters so that they don't get too far out
1341 * of sync if we crash or get a forced shutdown. We don't want to force
1342 * this to disk, just get a transaction into the iclogs....
1343 */
1344 if (flags & SYNC_SUPER)
1345 xfs_log_sbcount(mp, 0);
1346
1347 /*
1348 * Now check to see if the log needs a "dummy" transaction. 1320 * Now check to see if the log needs a "dummy" transaction.
1349 */ 1321 */
1350
1351 if (!(flags & SYNC_REMOUNT) && xfs_log_need_covered(mp)) { 1322 if (!(flags & SYNC_REMOUNT) && xfs_log_need_covered(mp)) {
1352 xfs_trans_t *tp; 1323 xfs_trans_t *tp;
1353 xfs_inode_t *ip; 1324 xfs_inode_t *ip;
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 51305242ff8c..6650601c64f7 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -48,7 +48,6 @@
48#include "xfs_quota.h" 48#include "xfs_quota.h"
49#include "xfs_utils.h" 49#include "xfs_utils.h"
50#include "xfs_rtalloc.h" 50#include "xfs_rtalloc.h"
51#include "xfs_refcache.h"
52#include "xfs_trans_space.h" 51#include "xfs_trans_space.h"
53#include "xfs_log_priv.h" 52#include "xfs_log_priv.h"
54#include "xfs_filestream.h" 53#include "xfs_filestream.h"
@@ -327,7 +326,7 @@ xfs_setattr(
327 if (DM_EVENT_ENABLED(ip, DM_EVENT_TRUNCATE) && 326 if (DM_EVENT_ENABLED(ip, DM_EVENT_TRUNCATE) &&
328 !(flags & ATTR_DMI)) { 327 !(flags & ATTR_DMI)) {
329 int dmflags = AT_DELAY_FLAG(flags) | DM_SEM_FLAG_WR; 328 int dmflags = AT_DELAY_FLAG(flags) | DM_SEM_FLAG_WR;
330 code = XFS_SEND_DATA(mp, DM_EVENT_TRUNCATE, vp, 329 code = XFS_SEND_DATA(mp, DM_EVENT_TRUNCATE, ip,
331 vap->va_size, 0, dmflags, NULL); 330 vap->va_size, 0, dmflags, NULL);
332 if (code) { 331 if (code) {
333 lock_flags = 0; 332 lock_flags = 0;
@@ -634,6 +633,15 @@ xfs_setattr(
634 * Truncate file. Must have write permission and not be a directory. 633 * Truncate file. Must have write permission and not be a directory.
635 */ 634 */
636 if (mask & XFS_AT_SIZE) { 635 if (mask & XFS_AT_SIZE) {
636 /*
637 * Only change the c/mtime if we are changing the size
638 * or we are explicitly asked to change it. This handles
639 * the semantic difference between truncate() and ftruncate()
640 * as implemented in the VFS.
641 */
642 if (vap->va_size != ip->i_size || (mask & XFS_AT_CTIME))
643 timeflags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG;
644
637 if (vap->va_size > ip->i_size) { 645 if (vap->va_size > ip->i_size) {
638 xfs_igrow_finish(tp, ip, vap->va_size, 646 xfs_igrow_finish(tp, ip, vap->va_size,
639 !(flags & ATTR_DMI)); 647 !(flags & ATTR_DMI));
@@ -662,10 +670,6 @@ xfs_setattr(
662 */ 670 */
663 xfs_iflags_set(ip, XFS_ITRUNCATED); 671 xfs_iflags_set(ip, XFS_ITRUNCATED);
664 } 672 }
665 /*
666 * Have to do this even if the file's size doesn't change.
667 */
668 timeflags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG;
669 } 673 }
670 674
671 /* 675 /*
@@ -877,7 +881,7 @@ xfs_setattr(
877 881
878 if (DM_EVENT_ENABLED(ip, DM_EVENT_ATTRIBUTE) && 882 if (DM_EVENT_ENABLED(ip, DM_EVENT_ATTRIBUTE) &&
879 !(flags & ATTR_DMI)) { 883 !(flags & ATTR_DMI)) {
880 (void) XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, vp, DM_RIGHT_NULL, 884 (void) XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, ip, DM_RIGHT_NULL,
881 NULL, DM_RIGHT_NULL, NULL, NULL, 885 NULL, DM_RIGHT_NULL, NULL, NULL,
882 0, 0, AT_DELAY_FLAG(flags)); 886 0, 0, AT_DELAY_FLAG(flags));
883 } 887 }
@@ -1443,28 +1447,22 @@ xfs_inactive_attrs(
1443 tp = *tpp; 1447 tp = *tpp;
1444 mp = ip->i_mount; 1448 mp = ip->i_mount;
1445 ASSERT(ip->i_d.di_forkoff != 0); 1449 ASSERT(ip->i_d.di_forkoff != 0);
1446 xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1450 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
1447 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1451 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1452 if (error)
1453 goto error_unlock;
1448 1454
1449 error = xfs_attr_inactive(ip); 1455 error = xfs_attr_inactive(ip);
1450 if (error) { 1456 if (error)
1451 *tpp = NULL; 1457 goto error_unlock;
1452 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
1453 return error; /* goto out */
1454 }
1455 1458
1456 tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); 1459 tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
1457 error = xfs_trans_reserve(tp, 0, 1460 error = xfs_trans_reserve(tp, 0,
1458 XFS_IFREE_LOG_RES(mp), 1461 XFS_IFREE_LOG_RES(mp),
1459 0, XFS_TRANS_PERM_LOG_RES, 1462 0, XFS_TRANS_PERM_LOG_RES,
1460 XFS_INACTIVE_LOG_COUNT); 1463 XFS_INACTIVE_LOG_COUNT);
1461 if (error) { 1464 if (error)
1462 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1465 goto error_cancel;
1463 xfs_trans_cancel(tp, 0);
1464 *tpp = NULL;
1465 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
1466 return error;
1467 }
1468 1466
1469 xfs_ilock(ip, XFS_ILOCK_EXCL); 1467 xfs_ilock(ip, XFS_ILOCK_EXCL);
1470 xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1468 xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
@@ -1475,6 +1473,14 @@ xfs_inactive_attrs(
1475 1473
1476 *tpp = tp; 1474 *tpp = tp;
1477 return 0; 1475 return 0;
1476
1477error_cancel:
1478 ASSERT(XFS_FORCED_SHUTDOWN(mp));
1479 xfs_trans_cancel(tp, 0);
1480error_unlock:
1481 *tpp = NULL;
1482 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
1483 return error;
1478} 1484}
1479 1485
1480int 1486int
@@ -1520,12 +1526,6 @@ xfs_release(
1520 xfs_flush_pages(ip, 0, -1, XFS_B_ASYNC, FI_NONE); 1526 xfs_flush_pages(ip, 0, -1, XFS_B_ASYNC, FI_NONE);
1521 } 1527 }
1522 1528
1523#ifdef HAVE_REFCACHE
1524 /* If we are in the NFS reference cache then don't do this now */
1525 if (ip->i_refcache)
1526 return 0;
1527#endif
1528
1529 if (ip->i_d.di_nlink != 0) { 1529 if (ip->i_d.di_nlink != 0) {
1530 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && 1530 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
1531 ((ip->i_size > 0) || (VN_CACHED(vp) > 0 || 1531 ((ip->i_size > 0) || (VN_CACHED(vp) > 0 ||
@@ -1588,9 +1588,8 @@ xfs_inactive(
1588 1588
1589 mp = ip->i_mount; 1589 mp = ip->i_mount;
1590 1590
1591 if (ip->i_d.di_nlink == 0 && DM_EVENT_ENABLED(ip, DM_EVENT_DESTROY)) { 1591 if (ip->i_d.di_nlink == 0 && DM_EVENT_ENABLED(ip, DM_EVENT_DESTROY))
1592 (void) XFS_SEND_DESTROY(mp, vp, DM_RIGHT_NULL); 1592 XFS_SEND_DESTROY(mp, ip, DM_RIGHT_NULL);
1593 }
1594 1593
1595 error = 0; 1594 error = 0;
1596 1595
@@ -1744,11 +1743,18 @@ xfs_inactive(
1744 XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_ICOUNT, -1); 1743 XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_ICOUNT, -1);
1745 1744
1746 /* 1745 /*
1747 * Just ignore errors at this point. There is 1746 * Just ignore errors at this point. There is nothing we can
1748 * nothing we can do except to try to keep going. 1747 * do except to try to keep going. Make sure it's not a silent
1748 * error.
1749 */ 1749 */
1750 (void) xfs_bmap_finish(&tp, &free_list, &committed); 1750 error = xfs_bmap_finish(&tp, &free_list, &committed);
1751 (void) xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1751 if (error)
1752 xfs_fs_cmn_err(CE_NOTE, mp, "xfs_inactive: "
1753 "xfs_bmap_finish() returned error %d", error);
1754 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
1755 if (error)
1756 xfs_fs_cmn_err(CE_NOTE, mp, "xfs_inactive: "
1757 "xfs_trans_commit() returned error %d", error);
1752 } 1758 }
1753 /* 1759 /*
1754 * Release the dquots held by inode, if any. 1760 * Release the dquots held by inode, if any.
@@ -1765,8 +1771,8 @@ xfs_inactive(
1765int 1771int
1766xfs_lookup( 1772xfs_lookup(
1767 xfs_inode_t *dp, 1773 xfs_inode_t *dp,
1768 bhv_vname_t *dentry, 1774 struct xfs_name *name,
1769 bhv_vnode_t **vpp) 1775 xfs_inode_t **ipp)
1770{ 1776{
1771 xfs_inode_t *ip; 1777 xfs_inode_t *ip;
1772 xfs_ino_t e_inum; 1778 xfs_ino_t e_inum;
@@ -1779,9 +1785,9 @@ xfs_lookup(
1779 return XFS_ERROR(EIO); 1785 return XFS_ERROR(EIO);
1780 1786
1781 lock_mode = xfs_ilock_map_shared(dp); 1787 lock_mode = xfs_ilock_map_shared(dp);
1782 error = xfs_dir_lookup_int(dp, lock_mode, dentry, &e_inum, &ip); 1788 error = xfs_dir_lookup_int(dp, lock_mode, name, &e_inum, &ip);
1783 if (!error) { 1789 if (!error) {
1784 *vpp = XFS_ITOV(ip); 1790 *ipp = ip;
1785 xfs_itrace_ref(ip); 1791 xfs_itrace_ref(ip);
1786 } 1792 }
1787 xfs_iunlock_map_shared(dp, lock_mode); 1793 xfs_iunlock_map_shared(dp, lock_mode);
@@ -1791,19 +1797,16 @@ xfs_lookup(
1791int 1797int
1792xfs_create( 1798xfs_create(
1793 xfs_inode_t *dp, 1799 xfs_inode_t *dp,
1794 bhv_vname_t *dentry, 1800 struct xfs_name *name,
1795 mode_t mode, 1801 mode_t mode,
1796 xfs_dev_t rdev, 1802 xfs_dev_t rdev,
1797 bhv_vnode_t **vpp, 1803 xfs_inode_t **ipp,
1798 cred_t *credp) 1804 cred_t *credp)
1799{ 1805{
1800 char *name = VNAME(dentry); 1806 xfs_mount_t *mp = dp->i_mount;
1801 xfs_mount_t *mp = dp->i_mount;
1802 bhv_vnode_t *dir_vp = XFS_ITOV(dp);
1803 xfs_inode_t *ip; 1807 xfs_inode_t *ip;
1804 bhv_vnode_t *vp = NULL;
1805 xfs_trans_t *tp; 1808 xfs_trans_t *tp;
1806 int error; 1809 int error;
1807 xfs_bmap_free_t free_list; 1810 xfs_bmap_free_t free_list;
1808 xfs_fsblock_t first_block; 1811 xfs_fsblock_t first_block;
1809 boolean_t unlock_dp_on_error = B_FALSE; 1812 boolean_t unlock_dp_on_error = B_FALSE;
@@ -1813,17 +1816,14 @@ xfs_create(
1813 xfs_prid_t prid; 1816 xfs_prid_t prid;
1814 struct xfs_dquot *udqp, *gdqp; 1817 struct xfs_dquot *udqp, *gdqp;
1815 uint resblks; 1818 uint resblks;
1816 int namelen;
1817 1819
1818 ASSERT(!*vpp); 1820 ASSERT(!*ipp);
1819 xfs_itrace_entry(dp); 1821 xfs_itrace_entry(dp);
1820 1822
1821 namelen = VNAMELEN(dentry);
1822
1823 if (DM_EVENT_ENABLED(dp, DM_EVENT_CREATE)) { 1823 if (DM_EVENT_ENABLED(dp, DM_EVENT_CREATE)) {
1824 error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE, 1824 error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE,
1825 dir_vp, DM_RIGHT_NULL, NULL, 1825 dp, DM_RIGHT_NULL, NULL,
1826 DM_RIGHT_NULL, name, NULL, 1826 DM_RIGHT_NULL, name->name, NULL,
1827 mode, 0, 0); 1827 mode, 0, 0);
1828 1828
1829 if (error) 1829 if (error)
@@ -1855,7 +1855,7 @@ xfs_create(
1855 1855
1856 tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE); 1856 tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE);
1857 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 1857 cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
1858 resblks = XFS_CREATE_SPACE_RES(mp, namelen); 1858 resblks = XFS_CREATE_SPACE_RES(mp, name->len);
1859 /* 1859 /*
1860 * Initially assume that the file does not exist and 1860 * Initially assume that the file does not exist and
1861 * reserve the resources for that case. If that is not 1861 * reserve the resources for that case. If that is not
@@ -1888,7 +1888,8 @@ xfs_create(
1888 if (error) 1888 if (error)
1889 goto error_return; 1889 goto error_return;
1890 1890
1891 if (resblks == 0 && (error = xfs_dir_canenter(tp, dp, name, namelen))) 1891 error = xfs_dir_canenter(tp, dp, name, resblks);
1892 if (error)
1892 goto error_return; 1893 goto error_return;
1893 error = xfs_dir_ialloc(&tp, dp, mode, 1, 1894 error = xfs_dir_ialloc(&tp, dp, mode, 1,
1894 rdev, credp, prid, resblks > 0, 1895 rdev, credp, prid, resblks > 0,
@@ -1914,11 +1915,11 @@ xfs_create(
1914 * the transaction cancel unlocking dp so don't do it explicitly in the 1915 * the transaction cancel unlocking dp so don't do it explicitly in the
1915 * error path. 1916 * error path.
1916 */ 1917 */
1917 VN_HOLD(dir_vp); 1918 IHOLD(dp);
1918 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 1919 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
1919 unlock_dp_on_error = B_FALSE; 1920 unlock_dp_on_error = B_FALSE;
1920 1921
1921 error = xfs_dir_createname(tp, dp, name, namelen, ip->i_ino, 1922 error = xfs_dir_createname(tp, dp, name, ip->i_ino,
1922 &first_block, &free_list, resblks ? 1923 &first_block, &free_list, resblks ?
1923 resblks - XFS_IALLOC_SPACE_RES(mp) : 0); 1924 resblks - XFS_IALLOC_SPACE_RES(mp) : 0);
1924 if (error) { 1925 if (error) {
@@ -1952,7 +1953,6 @@ xfs_create(
1952 * vnode to the caller, we bump the vnode ref count now. 1953 * vnode to the caller, we bump the vnode ref count now.
1953 */ 1954 */
1954 IHOLD(ip); 1955 IHOLD(ip);
1955 vp = XFS_ITOV(ip);
1956 1956
1957 error = xfs_bmap_finish(&tp, &free_list, &committed); 1957 error = xfs_bmap_finish(&tp, &free_list, &committed);
1958 if (error) { 1958 if (error) {
@@ -1970,17 +1970,17 @@ xfs_create(
1970 XFS_QM_DQRELE(mp, udqp); 1970 XFS_QM_DQRELE(mp, udqp);
1971 XFS_QM_DQRELE(mp, gdqp); 1971 XFS_QM_DQRELE(mp, gdqp);
1972 1972
1973 *vpp = vp; 1973 *ipp = ip;
1974 1974
1975 /* Fallthrough to std_return with error = 0 */ 1975 /* Fallthrough to std_return with error = 0 */
1976 1976
1977std_return: 1977std_return:
1978 if ((*vpp || (error != 0 && dm_event_sent != 0)) && 1978 if ((*ipp || (error != 0 && dm_event_sent != 0)) &&
1979 DM_EVENT_ENABLED(dp, DM_EVENT_POSTCREATE)) { 1979 DM_EVENT_ENABLED(dp, DM_EVENT_POSTCREATE)) {
1980 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE, 1980 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE,
1981 dir_vp, DM_RIGHT_NULL, 1981 dp, DM_RIGHT_NULL,
1982 *vpp ? vp:NULL, 1982 *ipp ? ip : NULL,
1983 DM_RIGHT_NULL, name, NULL, 1983 DM_RIGHT_NULL, name->name, NULL,
1984 mode, error, 0); 1984 mode, error, 0);
1985 } 1985 }
1986 return error; 1986 return error;
@@ -2272,46 +2272,32 @@ int remove_which_error_return = 0;
2272int 2272int
2273xfs_remove( 2273xfs_remove(
2274 xfs_inode_t *dp, 2274 xfs_inode_t *dp,
2275 bhv_vname_t *dentry) 2275 struct xfs_name *name,
2276 xfs_inode_t *ip)
2276{ 2277{
2277 bhv_vnode_t *dir_vp = XFS_ITOV(dp);
2278 char *name = VNAME(dentry);
2279 xfs_mount_t *mp = dp->i_mount; 2278 xfs_mount_t *mp = dp->i_mount;
2280 xfs_inode_t *ip;
2281 xfs_trans_t *tp = NULL; 2279 xfs_trans_t *tp = NULL;
2282 int error = 0; 2280 int error = 0;
2283 xfs_bmap_free_t free_list; 2281 xfs_bmap_free_t free_list;
2284 xfs_fsblock_t first_block; 2282 xfs_fsblock_t first_block;
2285 int cancel_flags; 2283 int cancel_flags;
2286 int committed; 2284 int committed;
2287 int dm_di_mode = 0;
2288 int link_zero; 2285 int link_zero;
2289 uint resblks; 2286 uint resblks;
2290 int namelen;
2291 2287
2292 xfs_itrace_entry(dp); 2288 xfs_itrace_entry(dp);
2293 2289
2294 if (XFS_FORCED_SHUTDOWN(mp)) 2290 if (XFS_FORCED_SHUTDOWN(mp))
2295 return XFS_ERROR(EIO); 2291 return XFS_ERROR(EIO);
2296 2292
2297 namelen = VNAMELEN(dentry);
2298
2299 if (!xfs_get_dir_entry(dentry, &ip)) {
2300 dm_di_mode = ip->i_d.di_mode;
2301 IRELE(ip);
2302 }
2303
2304 if (DM_EVENT_ENABLED(dp, DM_EVENT_REMOVE)) { 2293 if (DM_EVENT_ENABLED(dp, DM_EVENT_REMOVE)) {
2305 error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE, dir_vp, 2294 error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE, dp, DM_RIGHT_NULL,
2306 DM_RIGHT_NULL, NULL, DM_RIGHT_NULL, 2295 NULL, DM_RIGHT_NULL, name->name, NULL,
2307 name, NULL, dm_di_mode, 0, 0); 2296 ip->i_d.di_mode, 0, 0);
2308 if (error) 2297 if (error)
2309 return error; 2298 return error;
2310 } 2299 }
2311 2300
2312 /* From this point on, return through std_return */
2313 ip = NULL;
2314
2315 /* 2301 /*
2316 * We need to get a reference to ip before we get our log 2302 * We need to get a reference to ip before we get our log
2317 * reservation. The reason for this is that we cannot call 2303 * reservation. The reason for this is that we cannot call
@@ -2324,13 +2310,7 @@ xfs_remove(
2324 * when we call xfs_iget. Instead we get an unlocked reference 2310 * when we call xfs_iget. Instead we get an unlocked reference
2325 * to the inode before getting our log reservation. 2311 * to the inode before getting our log reservation.
2326 */ 2312 */
2327 error = xfs_get_dir_entry(dentry, &ip); 2313 IHOLD(ip);
2328 if (error) {
2329 REMOVE_DEBUG_TRACE(__LINE__);
2330 goto std_return;
2331 }
2332
2333 dm_di_mode = ip->i_d.di_mode;
2334 2314
2335 xfs_itrace_entry(ip); 2315 xfs_itrace_entry(ip);
2336 xfs_itrace_ref(ip); 2316 xfs_itrace_ref(ip);
@@ -2398,7 +2378,7 @@ xfs_remove(
2398 * Entry must exist since we did a lookup in xfs_lock_dir_and_entry. 2378 * Entry must exist since we did a lookup in xfs_lock_dir_and_entry.
2399 */ 2379 */
2400 XFS_BMAP_INIT(&free_list, &first_block); 2380 XFS_BMAP_INIT(&free_list, &first_block);
2401 error = xfs_dir_removename(tp, dp, name, namelen, ip->i_ino, 2381 error = xfs_dir_removename(tp, dp, name, ip->i_ino,
2402 &first_block, &free_list, 0); 2382 &first_block, &free_list, 0);
2403 if (error) { 2383 if (error) {
2404 ASSERT(error != ENOENT); 2384 ASSERT(error != ENOENT);
@@ -2449,14 +2429,6 @@ xfs_remove(
2449 } 2429 }
2450 2430
2451 /* 2431 /*
2452 * Before we drop our extra reference to the inode, purge it
2453 * from the refcache if it is there. By waiting until afterwards
2454 * to do the IRELE, we ensure that we won't go inactive in the
2455 * xfs_refcache_purge_ip routine (although that would be OK).
2456 */
2457 xfs_refcache_purge_ip(ip);
2458
2459 /*
2460 * If we are using filestreams, kill the stream association. 2432 * If we are using filestreams, kill the stream association.
2461 * If the file is still open it may get a new one but that 2433 * If the file is still open it may get a new one but that
2462 * will get killed on last close in xfs_close() so we don't 2434 * will get killed on last close in xfs_close() so we don't
@@ -2472,9 +2444,9 @@ xfs_remove(
2472 std_return: 2444 std_return:
2473 if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) { 2445 if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) {
2474 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE, 2446 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE,
2475 dir_vp, DM_RIGHT_NULL, 2447 dp, DM_RIGHT_NULL,
2476 NULL, DM_RIGHT_NULL, 2448 NULL, DM_RIGHT_NULL,
2477 name, NULL, dm_di_mode, error, 0); 2449 name->name, NULL, ip->i_d.di_mode, error, 0);
2478 } 2450 }
2479 return error; 2451 return error;
2480 2452
@@ -2495,14 +2467,6 @@ xfs_remove(
2495 cancel_flags |= XFS_TRANS_ABORT; 2467 cancel_flags |= XFS_TRANS_ABORT;
2496 xfs_trans_cancel(tp, cancel_flags); 2468 xfs_trans_cancel(tp, cancel_flags);
2497 2469
2498 /*
2499 * Before we drop our extra reference to the inode, purge it
2500 * from the refcache if it is there. By waiting until afterwards
2501 * to do the IRELE, we ensure that we won't go inactive in the
2502 * xfs_refcache_purge_ip routine (although that would be OK).
2503 */
2504 xfs_refcache_purge_ip(ip);
2505
2506 IRELE(ip); 2470 IRELE(ip);
2507 2471
2508 goto std_return; 2472 goto std_return;
@@ -2511,12 +2475,10 @@ xfs_remove(
2511int 2475int
2512xfs_link( 2476xfs_link(
2513 xfs_inode_t *tdp, 2477 xfs_inode_t *tdp,
2514 bhv_vnode_t *src_vp, 2478 xfs_inode_t *sip,
2515 bhv_vname_t *dentry) 2479 struct xfs_name *target_name)
2516{ 2480{
2517 bhv_vnode_t *target_dir_vp = XFS_ITOV(tdp);
2518 xfs_mount_t *mp = tdp->i_mount; 2481 xfs_mount_t *mp = tdp->i_mount;
2519 xfs_inode_t *sip = xfs_vtoi(src_vp);
2520 xfs_trans_t *tp; 2482 xfs_trans_t *tp;
2521 xfs_inode_t *ips[2]; 2483 xfs_inode_t *ips[2];
2522 int error; 2484 int error;
@@ -2525,23 +2487,20 @@ xfs_link(
2525 int cancel_flags; 2487 int cancel_flags;
2526 int committed; 2488 int committed;
2527 int resblks; 2489 int resblks;
2528 char *target_name = VNAME(dentry);
2529 int target_namelen;
2530 2490
2531 xfs_itrace_entry(tdp); 2491 xfs_itrace_entry(tdp);
2532 xfs_itrace_entry(xfs_vtoi(src_vp)); 2492 xfs_itrace_entry(sip);
2533 2493
2534 target_namelen = VNAMELEN(dentry); 2494 ASSERT(!S_ISDIR(sip->i_d.di_mode));
2535 ASSERT(!VN_ISDIR(src_vp));
2536 2495
2537 if (XFS_FORCED_SHUTDOWN(mp)) 2496 if (XFS_FORCED_SHUTDOWN(mp))
2538 return XFS_ERROR(EIO); 2497 return XFS_ERROR(EIO);
2539 2498
2540 if (DM_EVENT_ENABLED(tdp, DM_EVENT_LINK)) { 2499 if (DM_EVENT_ENABLED(tdp, DM_EVENT_LINK)) {
2541 error = XFS_SEND_NAMESP(mp, DM_EVENT_LINK, 2500 error = XFS_SEND_NAMESP(mp, DM_EVENT_LINK,
2542 target_dir_vp, DM_RIGHT_NULL, 2501 tdp, DM_RIGHT_NULL,
2543 src_vp, DM_RIGHT_NULL, 2502 sip, DM_RIGHT_NULL,
2544 target_name, NULL, 0, 0, 0); 2503 target_name->name, NULL, 0, 0, 0);
2545 if (error) 2504 if (error)
2546 return error; 2505 return error;
2547 } 2506 }
@@ -2556,7 +2515,7 @@ xfs_link(
2556 2515
2557 tp = xfs_trans_alloc(mp, XFS_TRANS_LINK); 2516 tp = xfs_trans_alloc(mp, XFS_TRANS_LINK);
2558 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 2517 cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
2559 resblks = XFS_LINK_SPACE_RES(mp, target_namelen); 2518 resblks = XFS_LINK_SPACE_RES(mp, target_name->len);
2560 error = xfs_trans_reserve(tp, resblks, XFS_LINK_LOG_RES(mp), 0, 2519 error = xfs_trans_reserve(tp, resblks, XFS_LINK_LOG_RES(mp), 0,
2561 XFS_TRANS_PERM_LOG_RES, XFS_LINK_LOG_COUNT); 2520 XFS_TRANS_PERM_LOG_RES, XFS_LINK_LOG_COUNT);
2562 if (error == ENOSPC) { 2521 if (error == ENOSPC) {
@@ -2584,8 +2543,8 @@ xfs_link(
2584 * xfs_trans_cancel will both unlock the inodes and 2543 * xfs_trans_cancel will both unlock the inodes and
2585 * decrement the associated ref counts. 2544 * decrement the associated ref counts.
2586 */ 2545 */
2587 VN_HOLD(src_vp); 2546 IHOLD(sip);
2588 VN_HOLD(target_dir_vp); 2547 IHOLD(tdp);
2589 xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL); 2548 xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL);
2590 xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL); 2549 xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL);
2591 2550
@@ -2608,15 +2567,14 @@ xfs_link(
2608 goto error_return; 2567 goto error_return;
2609 } 2568 }
2610 2569
2611 if (resblks == 0 && 2570 error = xfs_dir_canenter(tp, tdp, target_name, resblks);
2612 (error = xfs_dir_canenter(tp, tdp, target_name, target_namelen))) 2571 if (error)
2613 goto error_return; 2572 goto error_return;
2614 2573
2615 XFS_BMAP_INIT(&free_list, &first_block); 2574 XFS_BMAP_INIT(&free_list, &first_block);
2616 2575
2617 error = xfs_dir_createname(tp, tdp, target_name, target_namelen, 2576 error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino,
2618 sip->i_ino, &first_block, &free_list, 2577 &first_block, &free_list, resblks);
2619 resblks);
2620 if (error) 2578 if (error)
2621 goto abort_return; 2579 goto abort_return;
2622 xfs_ichgtime(tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2580 xfs_ichgtime(tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
@@ -2650,9 +2608,9 @@ xfs_link(
2650std_return: 2608std_return:
2651 if (DM_EVENT_ENABLED(sip, DM_EVENT_POSTLINK)) { 2609 if (DM_EVENT_ENABLED(sip, DM_EVENT_POSTLINK)) {
2652 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTLINK, 2610 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTLINK,
2653 target_dir_vp, DM_RIGHT_NULL, 2611 tdp, DM_RIGHT_NULL,
2654 src_vp, DM_RIGHT_NULL, 2612 sip, DM_RIGHT_NULL,
2655 target_name, NULL, 0, error, 0); 2613 target_name->name, NULL, 0, error, 0);
2656 } 2614 }
2657 return error; 2615 return error;
2658 2616
@@ -2669,17 +2627,13 @@ std_return:
2669int 2627int
2670xfs_mkdir( 2628xfs_mkdir(
2671 xfs_inode_t *dp, 2629 xfs_inode_t *dp,
2672 bhv_vname_t *dentry, 2630 struct xfs_name *dir_name,
2673 mode_t mode, 2631 mode_t mode,
2674 bhv_vnode_t **vpp, 2632 xfs_inode_t **ipp,
2675 cred_t *credp) 2633 cred_t *credp)
2676{ 2634{
2677 bhv_vnode_t *dir_vp = XFS_ITOV(dp);
2678 char *dir_name = VNAME(dentry);
2679 int dir_namelen = VNAMELEN(dentry);
2680 xfs_mount_t *mp = dp->i_mount; 2635 xfs_mount_t *mp = dp->i_mount;
2681 xfs_inode_t *cdp; /* inode of created dir */ 2636 xfs_inode_t *cdp; /* inode of created dir */
2682 bhv_vnode_t *cvp; /* vnode of created dir */
2683 xfs_trans_t *tp; 2637 xfs_trans_t *tp;
2684 int cancel_flags; 2638 int cancel_flags;
2685 int error; 2639 int error;
@@ -2700,8 +2654,8 @@ xfs_mkdir(
2700 2654
2701 if (DM_EVENT_ENABLED(dp, DM_EVENT_CREATE)) { 2655 if (DM_EVENT_ENABLED(dp, DM_EVENT_CREATE)) {
2702 error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE, 2656 error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE,
2703 dir_vp, DM_RIGHT_NULL, NULL, 2657 dp, DM_RIGHT_NULL, NULL,
2704 DM_RIGHT_NULL, dir_name, NULL, 2658 DM_RIGHT_NULL, dir_name->name, NULL,
2705 mode, 0, 0); 2659 mode, 0, 0);
2706 if (error) 2660 if (error)
2707 return error; 2661 return error;
@@ -2730,7 +2684,7 @@ xfs_mkdir(
2730 2684
2731 tp = xfs_trans_alloc(mp, XFS_TRANS_MKDIR); 2685 tp = xfs_trans_alloc(mp, XFS_TRANS_MKDIR);
2732 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 2686 cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
2733 resblks = XFS_MKDIR_SPACE_RES(mp, dir_namelen); 2687 resblks = XFS_MKDIR_SPACE_RES(mp, dir_name->len);
2734 error = xfs_trans_reserve(tp, resblks, XFS_MKDIR_LOG_RES(mp), 0, 2688 error = xfs_trans_reserve(tp, resblks, XFS_MKDIR_LOG_RES(mp), 0,
2735 XFS_TRANS_PERM_LOG_RES, XFS_MKDIR_LOG_COUNT); 2689 XFS_TRANS_PERM_LOG_RES, XFS_MKDIR_LOG_COUNT);
2736 if (error == ENOSPC) { 2690 if (error == ENOSPC) {
@@ -2762,8 +2716,8 @@ xfs_mkdir(
2762 if (error) 2716 if (error)
2763 goto error_return; 2717 goto error_return;
2764 2718
2765 if (resblks == 0 && 2719 error = xfs_dir_canenter(tp, dp, dir_name, resblks);
2766 (error = xfs_dir_canenter(tp, dp, dir_name, dir_namelen))) 2720 if (error)
2767 goto error_return; 2721 goto error_return;
2768 /* 2722 /*
2769 * create the directory inode. 2723 * create the directory inode.
@@ -2786,15 +2740,15 @@ xfs_mkdir(
2786 * from here on will result in the transaction cancel 2740 * from here on will result in the transaction cancel
2787 * unlocking dp so don't do it explicitly in the error path. 2741 * unlocking dp so don't do it explicitly in the error path.
2788 */ 2742 */
2789 VN_HOLD(dir_vp); 2743 IHOLD(dp);
2790 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 2744 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
2791 unlock_dp_on_error = B_FALSE; 2745 unlock_dp_on_error = B_FALSE;
2792 2746
2793 XFS_BMAP_INIT(&free_list, &first_block); 2747 XFS_BMAP_INIT(&free_list, &first_block);
2794 2748
2795 error = xfs_dir_createname(tp, dp, dir_name, dir_namelen, cdp->i_ino, 2749 error = xfs_dir_createname(tp, dp, dir_name, cdp->i_ino,
2796 &first_block, &free_list, resblks ? 2750 &first_block, &free_list, resblks ?
2797 resblks - XFS_IALLOC_SPACE_RES(mp) : 0); 2751 resblks - XFS_IALLOC_SPACE_RES(mp) : 0);
2798 if (error) { 2752 if (error) {
2799 ASSERT(error != ENOSPC); 2753 ASSERT(error != ENOSPC);
2800 goto error1; 2754 goto error1;
@@ -2817,11 +2771,9 @@ xfs_mkdir(
2817 if (error) 2771 if (error)
2818 goto error2; 2772 goto error2;
2819 2773
2820 cvp = XFS_ITOV(cdp);
2821
2822 created = B_TRUE; 2774 created = B_TRUE;
2823 2775
2824 *vpp = cvp; 2776 *ipp = cdp;
2825 IHOLD(cdp); 2777 IHOLD(cdp);
2826 2778
2827 /* 2779 /*
@@ -2858,10 +2810,10 @@ std_return:
2858 if ((created || (error != 0 && dm_event_sent != 0)) && 2810 if ((created || (error != 0 && dm_event_sent != 0)) &&
2859 DM_EVENT_ENABLED(dp, DM_EVENT_POSTCREATE)) { 2811 DM_EVENT_ENABLED(dp, DM_EVENT_POSTCREATE)) {
2860 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE, 2812 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE,
2861 dir_vp, DM_RIGHT_NULL, 2813 dp, DM_RIGHT_NULL,
2862 created ? XFS_ITOV(cdp):NULL, 2814 created ? cdp : NULL,
2863 DM_RIGHT_NULL, 2815 DM_RIGHT_NULL,
2864 dir_name, NULL, 2816 dir_name->name, NULL,
2865 mode, error, 0); 2817 mode, error, 0);
2866 } 2818 }
2867 return error; 2819 return error;
@@ -2885,20 +2837,17 @@ std_return:
2885int 2837int
2886xfs_rmdir( 2838xfs_rmdir(
2887 xfs_inode_t *dp, 2839 xfs_inode_t *dp,
2888 bhv_vname_t *dentry) 2840 struct xfs_name *name,
2841 xfs_inode_t *cdp)
2889{ 2842{
2890 bhv_vnode_t *dir_vp = XFS_ITOV(dp); 2843 bhv_vnode_t *dir_vp = XFS_ITOV(dp);
2891 char *name = VNAME(dentry);
2892 int namelen = VNAMELEN(dentry);
2893 xfs_mount_t *mp = dp->i_mount; 2844 xfs_mount_t *mp = dp->i_mount;
2894 xfs_inode_t *cdp; /* child directory */
2895 xfs_trans_t *tp; 2845 xfs_trans_t *tp;
2896 int error; 2846 int error;
2897 xfs_bmap_free_t free_list; 2847 xfs_bmap_free_t free_list;
2898 xfs_fsblock_t first_block; 2848 xfs_fsblock_t first_block;
2899 int cancel_flags; 2849 int cancel_flags;
2900 int committed; 2850 int committed;
2901 int dm_di_mode = S_IFDIR;
2902 int last_cdp_link; 2851 int last_cdp_link;
2903 uint resblks; 2852 uint resblks;
2904 2853
@@ -2907,24 +2856,15 @@ xfs_rmdir(
2907 if (XFS_FORCED_SHUTDOWN(mp)) 2856 if (XFS_FORCED_SHUTDOWN(mp))
2908 return XFS_ERROR(EIO); 2857 return XFS_ERROR(EIO);
2909 2858
2910 if (!xfs_get_dir_entry(dentry, &cdp)) {
2911 dm_di_mode = cdp->i_d.di_mode;
2912 IRELE(cdp);
2913 }
2914
2915 if (DM_EVENT_ENABLED(dp, DM_EVENT_REMOVE)) { 2859 if (DM_EVENT_ENABLED(dp, DM_EVENT_REMOVE)) {
2916 error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE, 2860 error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE,
2917 dir_vp, DM_RIGHT_NULL, 2861 dp, DM_RIGHT_NULL,
2918 NULL, DM_RIGHT_NULL, 2862 NULL, DM_RIGHT_NULL, name->name,
2919 name, NULL, dm_di_mode, 0, 0); 2863 NULL, cdp->i_d.di_mode, 0, 0);
2920 if (error) 2864 if (error)
2921 return XFS_ERROR(error); 2865 return XFS_ERROR(error);
2922 } 2866 }
2923 2867
2924 /* Return through std_return after this point. */
2925
2926 cdp = NULL;
2927
2928 /* 2868 /*
2929 * We need to get a reference to cdp before we get our log 2869 * We need to get a reference to cdp before we get our log
2930 * reservation. The reason for this is that we cannot call 2870 * reservation. The reason for this is that we cannot call
@@ -2937,13 +2877,7 @@ xfs_rmdir(
2937 * when we call xfs_iget. Instead we get an unlocked reference 2877 * when we call xfs_iget. Instead we get an unlocked reference
2938 * to the inode before getting our log reservation. 2878 * to the inode before getting our log reservation.
2939 */ 2879 */
2940 error = xfs_get_dir_entry(dentry, &cdp); 2880 IHOLD(cdp);
2941 if (error) {
2942 REMOVE_DEBUG_TRACE(__LINE__);
2943 goto std_return;
2944 }
2945 mp = dp->i_mount;
2946 dm_di_mode = cdp->i_d.di_mode;
2947 2881
2948 /* 2882 /*
2949 * Get the dquots for the inodes. 2883 * Get the dquots for the inodes.
@@ -3020,7 +2954,7 @@ xfs_rmdir(
3020 goto error_return; 2954 goto error_return;
3021 } 2955 }
3022 2956
3023 error = xfs_dir_removename(tp, dp, name, namelen, cdp->i_ino, 2957 error = xfs_dir_removename(tp, dp, name, cdp->i_ino,
3024 &first_block, &free_list, resblks); 2958 &first_block, &free_list, resblks);
3025 if (error) 2959 if (error)
3026 goto error1; 2960 goto error1;
@@ -3098,9 +3032,9 @@ xfs_rmdir(
3098 std_return: 3032 std_return:
3099 if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) { 3033 if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) {
3100 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE, 3034 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE,
3101 dir_vp, DM_RIGHT_NULL, 3035 dp, DM_RIGHT_NULL,
3102 NULL, DM_RIGHT_NULL, 3036 NULL, DM_RIGHT_NULL,
3103 name, NULL, dm_di_mode, 3037 name->name, NULL, cdp->i_d.di_mode,
3104 error, 0); 3038 error, 0);
3105 } 3039 }
3106 return error; 3040 return error;
@@ -3118,13 +3052,12 @@ xfs_rmdir(
3118int 3052int
3119xfs_symlink( 3053xfs_symlink(
3120 xfs_inode_t *dp, 3054 xfs_inode_t *dp,
3121 bhv_vname_t *dentry, 3055 struct xfs_name *link_name,
3122 char *target_path, 3056 const char *target_path,
3123 mode_t mode, 3057 mode_t mode,
3124 bhv_vnode_t **vpp, 3058 xfs_inode_t **ipp,
3125 cred_t *credp) 3059 cred_t *credp)
3126{ 3060{
3127 bhv_vnode_t *dir_vp = XFS_ITOV(dp);
3128 xfs_mount_t *mp = dp->i_mount; 3061 xfs_mount_t *mp = dp->i_mount;
3129 xfs_trans_t *tp; 3062 xfs_trans_t *tp;
3130 xfs_inode_t *ip; 3063 xfs_inode_t *ip;
@@ -3140,17 +3073,15 @@ xfs_symlink(
3140 int nmaps; 3073 int nmaps;
3141 xfs_bmbt_irec_t mval[SYMLINK_MAPS]; 3074 xfs_bmbt_irec_t mval[SYMLINK_MAPS];
3142 xfs_daddr_t d; 3075 xfs_daddr_t d;
3143 char *cur_chunk; 3076 const char *cur_chunk;
3144 int byte_cnt; 3077 int byte_cnt;
3145 int n; 3078 int n;
3146 xfs_buf_t *bp; 3079 xfs_buf_t *bp;
3147 xfs_prid_t prid; 3080 xfs_prid_t prid;
3148 struct xfs_dquot *udqp, *gdqp; 3081 struct xfs_dquot *udqp, *gdqp;
3149 uint resblks; 3082 uint resblks;
3150 char *link_name = VNAME(dentry);
3151 int link_namelen;
3152 3083
3153 *vpp = NULL; 3084 *ipp = NULL;
3154 error = 0; 3085 error = 0;
3155 ip = NULL; 3086 ip = NULL;
3156 tp = NULL; 3087 tp = NULL;
@@ -3160,44 +3091,17 @@ xfs_symlink(
3160 if (XFS_FORCED_SHUTDOWN(mp)) 3091 if (XFS_FORCED_SHUTDOWN(mp))
3161 return XFS_ERROR(EIO); 3092 return XFS_ERROR(EIO);
3162 3093
3163 link_namelen = VNAMELEN(dentry);
3164
3165 /* 3094 /*
3166 * Check component lengths of the target path name. 3095 * Check component lengths of the target path name.
3167 */ 3096 */
3168 pathlen = strlen(target_path); 3097 pathlen = strlen(target_path);
3169 if (pathlen >= MAXPATHLEN) /* total string too long */ 3098 if (pathlen >= MAXPATHLEN) /* total string too long */
3170 return XFS_ERROR(ENAMETOOLONG); 3099 return XFS_ERROR(ENAMETOOLONG);
3171 if (pathlen >= MAXNAMELEN) { /* is any component too long? */
3172 int len, total;
3173 char *path;
3174
3175 for (total = 0, path = target_path; total < pathlen;) {
3176 /*
3177 * Skip any slashes.
3178 */
3179 while(*path == '/') {
3180 total++;
3181 path++;
3182 }
3183
3184 /*
3185 * Count up to the next slash or end of path.
3186 * Error out if the component is bigger than MAXNAMELEN.
3187 */
3188 for(len = 0; *path != '/' && total < pathlen;total++, path++) {
3189 if (++len >= MAXNAMELEN) {
3190 error = ENAMETOOLONG;
3191 return error;
3192 }
3193 }
3194 }
3195 }
3196 3100
3197 if (DM_EVENT_ENABLED(dp, DM_EVENT_SYMLINK)) { 3101 if (DM_EVENT_ENABLED(dp, DM_EVENT_SYMLINK)) {
3198 error = XFS_SEND_NAMESP(mp, DM_EVENT_SYMLINK, dir_vp, 3102 error = XFS_SEND_NAMESP(mp, DM_EVENT_SYMLINK, dp,
3199 DM_RIGHT_NULL, NULL, DM_RIGHT_NULL, 3103 DM_RIGHT_NULL, NULL, DM_RIGHT_NULL,
3200 link_name, target_path, 0, 0, 0); 3104 link_name->name, target_path, 0, 0, 0);
3201 if (error) 3105 if (error)
3202 return error; 3106 return error;
3203 } 3107 }
@@ -3229,7 +3133,7 @@ xfs_symlink(
3229 fs_blocks = 0; 3133 fs_blocks = 0;
3230 else 3134 else
3231 fs_blocks = XFS_B_TO_FSB(mp, pathlen); 3135 fs_blocks = XFS_B_TO_FSB(mp, pathlen);
3232 resblks = XFS_SYMLINK_SPACE_RES(mp, link_namelen, fs_blocks); 3136 resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks);
3233 error = xfs_trans_reserve(tp, resblks, XFS_SYMLINK_LOG_RES(mp), 0, 3137 error = xfs_trans_reserve(tp, resblks, XFS_SYMLINK_LOG_RES(mp), 0,
3234 XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT); 3138 XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT);
3235 if (error == ENOSPC && fs_blocks == 0) { 3139 if (error == ENOSPC && fs_blocks == 0) {
@@ -3263,8 +3167,8 @@ xfs_symlink(
3263 /* 3167 /*
3264 * Check for ability to enter directory entry, if no space reserved. 3168 * Check for ability to enter directory entry, if no space reserved.
3265 */ 3169 */
3266 if (resblks == 0 && 3170 error = xfs_dir_canenter(tp, dp, link_name, resblks);
3267 (error = xfs_dir_canenter(tp, dp, link_name, link_namelen))) 3171 if (error)
3268 goto error_return; 3172 goto error_return;
3269 /* 3173 /*
3270 * Initialize the bmap freelist prior to calling either 3174 * Initialize the bmap freelist prior to calling either
@@ -3289,7 +3193,7 @@ xfs_symlink(
3289 * transaction cancel unlocking dp so don't do it explicitly in the 3193 * transaction cancel unlocking dp so don't do it explicitly in the
3290 * error path. 3194 * error path.
3291 */ 3195 */
3292 VN_HOLD(dir_vp); 3196 IHOLD(dp);
3293 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 3197 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
3294 unlock_dp_on_error = B_FALSE; 3198 unlock_dp_on_error = B_FALSE;
3295 3199
@@ -3356,8 +3260,8 @@ xfs_symlink(
3356 /* 3260 /*
3357 * Create the directory entry for the symlink. 3261 * Create the directory entry for the symlink.
3358 */ 3262 */
3359 error = xfs_dir_createname(tp, dp, link_name, link_namelen, ip->i_ino, 3263 error = xfs_dir_createname(tp, dp, link_name, ip->i_ino,
3360 &first_block, &free_list, resblks); 3264 &first_block, &free_list, resblks);
3361 if (error) 3265 if (error)
3362 goto error1; 3266 goto error1;
3363 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 3267 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
@@ -3399,19 +3303,14 @@ xfs_symlink(
3399std_return: 3303std_return:
3400 if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTSYMLINK)) { 3304 if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTSYMLINK)) {
3401 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTSYMLINK, 3305 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTSYMLINK,
3402 dir_vp, DM_RIGHT_NULL, 3306 dp, DM_RIGHT_NULL,
3403 error ? NULL : XFS_ITOV(ip), 3307 error ? NULL : ip,
3404 DM_RIGHT_NULL, link_name, target_path, 3308 DM_RIGHT_NULL, link_name->name,
3405 0, error, 0); 3309 target_path, 0, error, 0);
3406 } 3310 }
3407 3311
3408 if (!error) { 3312 if (!error)
3409 bhv_vnode_t *vp; 3313 *ipp = ip;
3410
3411 ASSERT(ip);
3412 vp = XFS_ITOV(ip);
3413 *vpp = vp;
3414 }
3415 return error; 3314 return error;
3416 3315
3417 error2: 3316 error2:
@@ -3431,60 +3330,11 @@ std_return:
3431} 3330}
3432 3331
3433int 3332int
3434xfs_rwlock(
3435 xfs_inode_t *ip,
3436 bhv_vrwlock_t locktype)
3437{
3438 if (S_ISDIR(ip->i_d.di_mode))
3439 return 1;
3440 if (locktype == VRWLOCK_WRITE) {
3441 xfs_ilock(ip, XFS_IOLOCK_EXCL);
3442 } else if (locktype == VRWLOCK_TRY_READ) {
3443 return xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED);
3444 } else if (locktype == VRWLOCK_TRY_WRITE) {
3445 return xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL);
3446 } else {
3447 ASSERT((locktype == VRWLOCK_READ) ||
3448 (locktype == VRWLOCK_WRITE_DIRECT));
3449 xfs_ilock(ip, XFS_IOLOCK_SHARED);
3450 }
3451
3452 return 1;
3453}
3454
3455
3456void
3457xfs_rwunlock(
3458 xfs_inode_t *ip,
3459 bhv_vrwlock_t locktype)
3460{
3461 if (S_ISDIR(ip->i_d.di_mode))
3462 return;
3463 if (locktype == VRWLOCK_WRITE) {
3464 /*
3465 * In the write case, we may have added a new entry to
3466 * the reference cache. This might store a pointer to
3467 * an inode to be released in this inode. If it is there,
3468 * clear the pointer and release the inode after unlocking
3469 * this one.
3470 */
3471 xfs_refcache_iunlock(ip, XFS_IOLOCK_EXCL);
3472 } else {
3473 ASSERT((locktype == VRWLOCK_READ) ||
3474 (locktype == VRWLOCK_WRITE_DIRECT));
3475 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
3476 }
3477 return;
3478}
3479
3480
3481int
3482xfs_inode_flush( 3333xfs_inode_flush(
3483 xfs_inode_t *ip, 3334 xfs_inode_t *ip,
3484 int flags) 3335 int flags)
3485{ 3336{
3486 xfs_mount_t *mp = ip->i_mount; 3337 xfs_mount_t *mp = ip->i_mount;
3487 xfs_inode_log_item_t *iip = ip->i_itemp;
3488 int error = 0; 3338 int error = 0;
3489 3339
3490 if (XFS_FORCED_SHUTDOWN(mp)) 3340 if (XFS_FORCED_SHUTDOWN(mp))
@@ -3494,33 +3344,9 @@ xfs_inode_flush(
3494 * Bypass inodes which have already been cleaned by 3344 * Bypass inodes which have already been cleaned by
3495 * the inode flush clustering code inside xfs_iflush 3345 * the inode flush clustering code inside xfs_iflush
3496 */ 3346 */
3497 if ((ip->i_update_core == 0) && 3347 if (xfs_inode_clean(ip))
3498 ((iip == NULL) || !(iip->ili_format.ilf_fields & XFS_ILOG_ALL)))
3499 return 0; 3348 return 0;
3500 3349
3501 if (flags & FLUSH_LOG) {
3502 if (iip && iip->ili_last_lsn) {
3503 xlog_t *log = mp->m_log;
3504 xfs_lsn_t sync_lsn;
3505 int log_flags = XFS_LOG_FORCE;
3506
3507 spin_lock(&log->l_grant_lock);
3508 sync_lsn = log->l_last_sync_lsn;
3509 spin_unlock(&log->l_grant_lock);
3510
3511 if ((XFS_LSN_CMP(iip->ili_last_lsn, sync_lsn) > 0)) {
3512 if (flags & FLUSH_SYNC)
3513 log_flags |= XFS_LOG_SYNC;
3514 error = xfs_log_force(mp, iip->ili_last_lsn, log_flags);
3515 if (error)
3516 return error;
3517 }
3518
3519 if (ip->i_update_core == 0)
3520 return 0;
3521 }
3522 }
3523
3524 /* 3350 /*
3525 * We make this non-blocking if the inode is contended, 3351 * We make this non-blocking if the inode is contended,
3526 * return EAGAIN to indicate to the caller that they 3352 * return EAGAIN to indicate to the caller that they
@@ -3528,30 +3354,22 @@ xfs_inode_flush(
3528 * blocking on inodes inside another operation right 3354 * blocking on inodes inside another operation right
3529 * now, they get caught later by xfs_sync. 3355 * now, they get caught later by xfs_sync.
3530 */ 3356 */
3531 if (flags & FLUSH_INODE) { 3357 if (flags & FLUSH_SYNC) {
3532 int flush_flags; 3358 xfs_ilock(ip, XFS_ILOCK_SHARED);
3533 3359 xfs_iflock(ip);
3534 if (flags & FLUSH_SYNC) { 3360 } else if (xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
3535 xfs_ilock(ip, XFS_ILOCK_SHARED); 3361 if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) {
3536 xfs_iflock(ip); 3362 xfs_iunlock(ip, XFS_ILOCK_SHARED);
3537 } else if (xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
3538 if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) {
3539 xfs_iunlock(ip, XFS_ILOCK_SHARED);
3540 return EAGAIN;
3541 }
3542 } else {
3543 return EAGAIN; 3363 return EAGAIN;
3544 } 3364 }
3545 3365 } else {
3546 if (flags & FLUSH_SYNC) 3366 return EAGAIN;
3547 flush_flags = XFS_IFLUSH_SYNC;
3548 else
3549 flush_flags = XFS_IFLUSH_ASYNC;
3550
3551 error = xfs_iflush(ip, flush_flags);
3552 xfs_iunlock(ip, XFS_ILOCK_SHARED);
3553 } 3367 }
3554 3368
3369 error = xfs_iflush(ip, (flags & FLUSH_SYNC) ? XFS_IFLUSH_SYNC
3370 : XFS_IFLUSH_ASYNC_NOBLOCK);
3371 xfs_iunlock(ip, XFS_ILOCK_SHARED);
3372
3555 return error; 3373 return error;
3556} 3374}
3557 3375
@@ -3694,12 +3512,12 @@ xfs_finish_reclaim(
3694 * We get the flush lock regardless, though, just to make sure 3512 * We get the flush lock regardless, though, just to make sure
3695 * we don't free it while it is being flushed. 3513 * we don't free it while it is being flushed.
3696 */ 3514 */
3697 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { 3515 if (!locked) {
3698 if (!locked) { 3516 xfs_ilock(ip, XFS_ILOCK_EXCL);
3699 xfs_ilock(ip, XFS_ILOCK_EXCL); 3517 xfs_iflock(ip);
3700 xfs_iflock(ip); 3518 }
3701 }
3702 3519
3520 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
3703 if (ip->i_update_core || 3521 if (ip->i_update_core ||
3704 ((ip->i_itemp != NULL) && 3522 ((ip->i_itemp != NULL) &&
3705 (ip->i_itemp->ili_format.ilf_fields != 0))) { 3523 (ip->i_itemp->ili_format.ilf_fields != 0))) {
@@ -3719,17 +3537,11 @@ xfs_finish_reclaim(
3719 ASSERT(ip->i_update_core == 0); 3537 ASSERT(ip->i_update_core == 0);
3720 ASSERT(ip->i_itemp == NULL || 3538 ASSERT(ip->i_itemp == NULL ||
3721 ip->i_itemp->ili_format.ilf_fields == 0); 3539 ip->i_itemp->ili_format.ilf_fields == 0);
3722 xfs_iunlock(ip, XFS_ILOCK_EXCL);
3723 } else if (locked) {
3724 /*
3725 * We are not interested in doing an iflush if we're
3726 * in the process of shutting down the filesystem forcibly.
3727 * So, just reclaim the inode.
3728 */
3729 xfs_ifunlock(ip);
3730 xfs_iunlock(ip, XFS_ILOCK_EXCL);
3731 } 3540 }
3732 3541
3542 xfs_ifunlock(ip);
3543 xfs_iunlock(ip, XFS_ILOCK_EXCL);
3544
3733 reclaim: 3545 reclaim:
3734 xfs_ireclaim(ip); 3546 xfs_ireclaim(ip);
3735 return 0; 3547 return 0;
@@ -3845,9 +3657,8 @@ xfs_alloc_file_space(
3845 end_dmi_offset = offset+len; 3657 end_dmi_offset = offset+len;
3846 if (end_dmi_offset > ip->i_size) 3658 if (end_dmi_offset > ip->i_size)
3847 end_dmi_offset = ip->i_size; 3659 end_dmi_offset = ip->i_size;
3848 error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, XFS_ITOV(ip), 3660 error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip, offset,
3849 offset, end_dmi_offset - offset, 3661 end_dmi_offset - offset, 0, NULL);
3850 0, NULL);
3851 if (error) 3662 if (error)
3852 return error; 3663 return error;
3853 } 3664 }
@@ -3956,8 +3767,8 @@ dmapi_enospc_check:
3956 if (error == ENOSPC && (attr_flags & ATTR_DMI) == 0 && 3767 if (error == ENOSPC && (attr_flags & ATTR_DMI) == 0 &&
3957 DM_EVENT_ENABLED(ip, DM_EVENT_NOSPACE)) { 3768 DM_EVENT_ENABLED(ip, DM_EVENT_NOSPACE)) {
3958 error = XFS_SEND_NAMESP(mp, DM_EVENT_NOSPACE, 3769 error = XFS_SEND_NAMESP(mp, DM_EVENT_NOSPACE,
3959 XFS_ITOV(ip), DM_RIGHT_NULL, 3770 ip, DM_RIGHT_NULL,
3960 XFS_ITOV(ip), DM_RIGHT_NULL, 3771 ip, DM_RIGHT_NULL,
3961 NULL, NULL, 0, 0, 0); /* Delay flag intentionally unused */ 3772 NULL, NULL, 0, 0, 0); /* Delay flag intentionally unused */
3962 if (error == 0) 3773 if (error == 0)
3963 goto retry; /* Maybe DMAPI app. has made space */ 3774 goto retry; /* Maybe DMAPI app. has made space */
@@ -4021,7 +3832,8 @@ xfs_zero_remaining_bytes(
4021 XFS_BUF_READ(bp); 3832 XFS_BUF_READ(bp);
4022 XFS_BUF_SET_ADDR(bp, XFS_FSB_TO_DB(ip, imap.br_startblock)); 3833 XFS_BUF_SET_ADDR(bp, XFS_FSB_TO_DB(ip, imap.br_startblock));
4023 xfsbdstrat(mp, bp); 3834 xfsbdstrat(mp, bp);
4024 if ((error = xfs_iowait(bp))) { 3835 error = xfs_iowait(bp);
3836 if (error) {
4025 xfs_ioerror_alert("xfs_zero_remaining_bytes(read)", 3837 xfs_ioerror_alert("xfs_zero_remaining_bytes(read)",
4026 mp, bp, XFS_BUF_ADDR(bp)); 3838 mp, bp, XFS_BUF_ADDR(bp));
4027 break; 3839 break;
@@ -4033,7 +3845,8 @@ xfs_zero_remaining_bytes(
4033 XFS_BUF_UNREAD(bp); 3845 XFS_BUF_UNREAD(bp);
4034 XFS_BUF_WRITE(bp); 3846 XFS_BUF_WRITE(bp);
4035 xfsbdstrat(mp, bp); 3847 xfsbdstrat(mp, bp);
4036 if ((error = xfs_iowait(bp))) { 3848 error = xfs_iowait(bp);
3849 if (error) {
4037 xfs_ioerror_alert("xfs_zero_remaining_bytes(write)", 3850 xfs_ioerror_alert("xfs_zero_remaining_bytes(write)",
4038 mp, bp, XFS_BUF_ADDR(bp)); 3851 mp, bp, XFS_BUF_ADDR(bp));
4039 break; 3852 break;
@@ -4102,7 +3915,7 @@ xfs_free_file_space(
4102 DM_EVENT_ENABLED(ip, DM_EVENT_WRITE)) { 3915 DM_EVENT_ENABLED(ip, DM_EVENT_WRITE)) {
4103 if (end_dmi_offset > ip->i_size) 3916 if (end_dmi_offset > ip->i_size)
4104 end_dmi_offset = ip->i_size; 3917 end_dmi_offset = ip->i_size;
4105 error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, vp, 3918 error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip,
4106 offset, end_dmi_offset - offset, 3919 offset, end_dmi_offset - offset,
4107 AT_DELAY_FLAG(attr_flags), NULL); 3920 AT_DELAY_FLAG(attr_flags), NULL);
4108 if (error) 3921 if (error)
@@ -4132,7 +3945,7 @@ xfs_free_file_space(
4132 * actually need to zero the extent edges. Otherwise xfs_bunmapi 3945 * actually need to zero the extent edges. Otherwise xfs_bunmapi
4133 * will take care of it for us. 3946 * will take care of it for us.
4134 */ 3947 */
4135 if (rt && !XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb)) { 3948 if (rt && !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
4136 nimap = 1; 3949 nimap = 1;
4137 error = xfs_bmapi(NULL, ip, startoffset_fsb, 3950 error = xfs_bmapi(NULL, ip, startoffset_fsb,
4138 1, 0, NULL, 0, &imap, &nimap, NULL, NULL); 3951 1, 0, NULL, 0, &imap, &nimap, NULL, NULL);
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index 4e3970f0e5e3..24c53923dc2c 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -23,31 +23,32 @@ int xfs_fsync(struct xfs_inode *ip, int flag, xfs_off_t start,
23 xfs_off_t stop); 23 xfs_off_t stop);
24int xfs_release(struct xfs_inode *ip); 24int xfs_release(struct xfs_inode *ip);
25int xfs_inactive(struct xfs_inode *ip); 25int xfs_inactive(struct xfs_inode *ip);
26int xfs_lookup(struct xfs_inode *dp, bhv_vname_t *dentry, 26int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name,
27 bhv_vnode_t **vpp); 27 struct xfs_inode **ipp);
28int xfs_create(struct xfs_inode *dp, bhv_vname_t *dentry, mode_t mode, 28int xfs_create(struct xfs_inode *dp, struct xfs_name *name, mode_t mode,
29 xfs_dev_t rdev, bhv_vnode_t **vpp, struct cred *credp); 29 xfs_dev_t rdev, struct xfs_inode **ipp, struct cred *credp);
30int xfs_remove(struct xfs_inode *dp, bhv_vname_t *dentry); 30int xfs_remove(struct xfs_inode *dp, struct xfs_name *name,
31int xfs_link(struct xfs_inode *tdp, bhv_vnode_t *src_vp, 31 struct xfs_inode *ip);
32 bhv_vname_t *dentry); 32int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip,
33int xfs_mkdir(struct xfs_inode *dp, bhv_vname_t *dentry, 33 struct xfs_name *target_name);
34 mode_t mode, bhv_vnode_t **vpp, struct cred *credp); 34int xfs_mkdir(struct xfs_inode *dp, struct xfs_name *dir_name,
35int xfs_rmdir(struct xfs_inode *dp, bhv_vname_t *dentry); 35 mode_t mode, struct xfs_inode **ipp, struct cred *credp);
36int xfs_rmdir(struct xfs_inode *dp, struct xfs_name *name,
37 struct xfs_inode *cdp);
36int xfs_readdir(struct xfs_inode *dp, void *dirent, size_t bufsize, 38int xfs_readdir(struct xfs_inode *dp, void *dirent, size_t bufsize,
37 xfs_off_t *offset, filldir_t filldir); 39 xfs_off_t *offset, filldir_t filldir);
38int xfs_symlink(struct xfs_inode *dp, bhv_vname_t *dentry, 40int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name,
39 char *target_path, mode_t mode, bhv_vnode_t **vpp, 41 const char *target_path, mode_t mode, struct xfs_inode **ipp,
40 struct cred *credp); 42 struct cred *credp);
41int xfs_rwlock(struct xfs_inode *ip, bhv_vrwlock_t locktype);
42void xfs_rwunlock(struct xfs_inode *ip, bhv_vrwlock_t locktype);
43int xfs_inode_flush(struct xfs_inode *ip, int flags); 43int xfs_inode_flush(struct xfs_inode *ip, int flags);
44int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state); 44int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state);
45int xfs_reclaim(struct xfs_inode *ip); 45int xfs_reclaim(struct xfs_inode *ip);
46int xfs_change_file_space(struct xfs_inode *ip, int cmd, 46int xfs_change_file_space(struct xfs_inode *ip, int cmd,
47 xfs_flock64_t *bf, xfs_off_t offset, 47 xfs_flock64_t *bf, xfs_off_t offset,
48 struct cred *credp, int attr_flags); 48 struct cred *credp, int attr_flags);
49int xfs_rename(struct xfs_inode *src_dp, bhv_vname_t *src_vname, 49int xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name,
50 bhv_vnode_t *target_dir_vp, bhv_vname_t *target_vname); 50 struct xfs_inode *src_ip, struct xfs_inode *target_dp,
51 struct xfs_name *target_name);
51int xfs_attr_get(struct xfs_inode *ip, const char *name, char *value, 52int xfs_attr_get(struct xfs_inode *ip, const char *name, char *value,
52 int *valuelenp, int flags, cred_t *cred); 53 int *valuelenp, int flags, cred_t *cred);
53int xfs_attr_set(struct xfs_inode *dp, const char *name, char *value, 54int xfs_attr_set(struct xfs_inode *dp, const char *name, char *value,